M4RI 1.0.1
|
00001 00010 #ifndef XOR_H 00011 #define XOR_H 00012 00013 #ifdef HAVE_SSE2 00014 #include <emmintrin.h> 00015 #endif 00016 00017 /******************************************************************* 00018 * 00019 * M4RI: Linear Algebra over GF(2) 00020 * 00021 * Copyright (C) 2008-2010 Martin Albrecht <martinralbrecht@googlemail.com> 00022 * 00023 * Distributed under the terms of the GNU General Public License (GPL) 00024 * version 2 or higher. 00025 * 00026 * This code is distributed in the hope that it will be useful, 00027 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00028 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00029 * General Public License for more details. 00030 * 00031 * The full text of the GPL is available at: 00032 * 00033 * http://www.gnu.org/licenses/ 00034 * 00035 ********************************************************************/ 00036 00044 #ifdef HAVE_SSE2 00045 static inline void _mzd_combine8(word *c, word *t1, word *t2, word *t3, word *t4, word *t5, word *t6, word *t7, word *t8, size_t wide) { 00046 size_t i; 00047 /* assuming t1 ... t8 are aligned, but c might not be */ 00048 if (ALIGNMENT(c,16)==0) { 00049 __m128i *__c = (__m128i*)c; 00050 __m128i *__t1 = (__m128i*)t1; 00051 __m128i *__t2 = (__m128i*)t2; 00052 __m128i *__t3 = (__m128i*)t3; 00053 __m128i *__t4 = (__m128i*)t4; 00054 __m128i *__t5 = (__m128i*)t5; 00055 __m128i *__t6 = (__m128i*)t6; 00056 __m128i *__t7 = (__m128i*)t7; 00057 __m128i *__t8 = (__m128i*)t8; 00058 const __m128i *eof = (__m128i*)((unsigned long)(c + wide) & ~0xF); 00059 __m128i xmm1; 00060 00061 while(__c < eof) { 00062 xmm1 = _mm_xor_si128(*__c, *__t1++); 00063 xmm1 = _mm_xor_si128(xmm1, *__t2++); 00064 xmm1 = _mm_xor_si128(xmm1, *__t3++); 00065 xmm1 = _mm_xor_si128(xmm1, *__t4++); 00066 xmm1 = _mm_xor_si128(xmm1, *__t5++); 00067 xmm1 = _mm_xor_si128(xmm1, *__t6++); 00068 xmm1 = _mm_xor_si128(xmm1, *__t7++); 00069 xmm1 = _mm_xor_si128(xmm1, *__t8++); 00070 *__c++ = xmm1; 00071 } 00072 c = (word*)__c; 00073 t1 = (word*)__t1; 00074 t2 = (word*)__t2; 00075 t3 = (word*)__t3; 00076 t4 = (word*)__t4; 00077 t5 = (word*)__t5; 00078 t6 = (word*)__t6; 00079 t7 = (word*)__t7; 00080 t8 = (word*)__t8; 00081 wide = ((sizeof(word)*wide)%16)/sizeof(word); 00082 } 00083 for(i=0; i<wide; i++) { 00084 c[i] ^= t1[i] ^ t2[i] ^ t3[i] ^ t4[i] ^ t5[i] ^ t6[i] ^ t7[i] ^ t8[i]; 00085 } 00086 } 00087 #else 00088 00089 #define _mzd_combine8(c,t1,t2,t3,t4,t5,t6,t7,t8,wide) for(ii=0; ii<wide ; ii++) c[ii] ^= t1[ii] ^ t2[ii] ^ t3[ii] ^ t4[ii] ^ t5[ii] ^ t6[ii] ^ t7[ii] ^ t8[ii] 00090 00091 #endif 00092 00101 #ifdef HAVE_SSE2 00102 static inline void _mzd_combine4(word *c, word *t1, word *t2, word *t3, word *t4, size_t wide) { 00103 size_t i; 00104 /* assuming t1 ... t4 are aligned, but c might not be */ 00105 if (ALIGNMENT(c,16)==0) { 00106 __m128i *__c = (__m128i*)c; 00107 __m128i *__t1 = (__m128i*)t1; 00108 __m128i *__t2 = (__m128i*)t2; 00109 __m128i *__t3 = (__m128i*)t3; 00110 __m128i *__t4 = (__m128i*)t4; 00111 const __m128i *eof = (__m128i*)((unsigned long)(c + wide) & ~0xF); 00112 __m128i xmm1; 00113 00114 while(__c < eof) { 00115 xmm1 = _mm_xor_si128(*__c, *__t1++); 00116 xmm1 = _mm_xor_si128(xmm1, *__t2++); 00117 xmm1 = _mm_xor_si128(xmm1, *__t3++); 00118 xmm1 = _mm_xor_si128(xmm1, *__t4++); 00119 *__c++ = xmm1; 00120 } 00121 c = (word*)__c; 00122 t1 = (word*)__t1; 00123 t2 = (word*)__t2; 00124 t3 = (word*)__t3; 00125 t4 = (word*)__t4; 00126 wide = ((sizeof(word)*wide)%16)/sizeof(word); 00127 } 00128 for(i=0; i<wide; i++) { 00129 c[i] ^= t1[i] ^ t2[i] ^ t3[i] ^ t4[i]; 00130 } 00131 } 00132 #else 00133 00134 #define _mzd_combine4(c, t1, t2, t3, t4, wide) for(ii=0; ii<wide ; ii++) c[ii] ^= t1[ii] ^ t2[ii] ^ t3[ii] ^ t4[ii] 00135 00136 #endif //HAVE_SSE2 00137 00145 #ifdef HAVE_SSE2 00146 static inline void _mzd_combine2(word *c, word *t1, word *t2, size_t wide) { 00147 size_t i; 00148 /* assuming t1 ... t2 are aligned, but c might not be */ 00149 if (ALIGNMENT(c,16)==0) { 00150 __m128i *__c = (__m128i*)c; 00151 __m128i *__t1 = (__m128i*)t1; 00152 __m128i *__t2 = (__m128i*)t2; 00153 const __m128i *eof = (__m128i*)((unsigned long)(c + wide) & ~0xF); 00154 __m128i xmm1; 00155 00156 while(__c < eof) { 00157 xmm1 = _mm_xor_si128(*__c, *__t1++); 00158 xmm1 = _mm_xor_si128(xmm1, *__t2++); 00159 *__c++ = xmm1; 00160 } 00161 c = (word*)__c; 00162 t1 = (word*)__t1; 00163 t2 = (word*)__t2; 00164 wide = ((sizeof(word)*wide)%16)/sizeof(word); 00165 } 00166 for(i=0; i<wide; i++) { 00167 c[i] ^= t1[i] ^ t2[i]; 00168 } 00169 } 00170 #else 00171 00172 #define _mzd_combine2(c, t1, t2, wide) for(ii=0; ii<wide ; ii++) c[ii] ^= t1[ii] ^ t2[ii] 00173 00174 #endif //HAVE_SSE2 00175 00176 00177 #ifdef M4RM_GRAY8 00178 #define _MZD_COMBINE _mzd_combine8(c, t1, t2, t3, t4, t5, t6, t7, t8, wide) 00179 #else //M4RM_GRAY8 00180 #define _MZD_COMBINE _mzd_combine4(c, t1, t2, t3, t4, wide) 00181 #endif //M4RM_GRAY8 00182 00183 #endif //XOR_H