Crypto++
cpu.h
00001 #ifndef CRYPTOPP_CPU_H
00002 #define CRYPTOPP_CPU_H
00003 
00004 #ifdef CRYPTOPP_GENERATE_X64_MASM
00005 
00006 #define CRYPTOPP_X86_ASM_AVAILABLE
00007 #define CRYPTOPP_BOOL_X64 1
00008 #define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1
00009 #define NAMESPACE_END
00010 
00011 #else
00012 
00013 #include "config.h"
00014 
00015 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
00016 #include <emmintrin.h>
00017 #endif
00018 
00019 #if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
00020 #if !defined(__GNUC__) || defined(__SSSE3__) || defined(__INTEL_COMPILER)
00021 #include <tmmintrin.h>
00022 #else
00023 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00024 _mm_shuffle_epi8 (__m128i a, __m128i b)
00025 {
00026     asm ("pshufb %1, %0" : "+x"(a) : "xm"(b));
00027     return a;
00028 }
00029 #endif
00030 #if !defined(__GNUC__) || defined(__SSE4_1__) || defined(__INTEL_COMPILER)
00031 #include <smmintrin.h>
00032 #else
00033 __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00034 _mm_extract_epi32 (__m128i a, const int i)
00035 {
00036     int r;
00037     asm ("pextrd %2, %1, %0" : "=rm"(r) : "x"(a), "i"(i));
00038     return r;
00039 }
00040 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00041 _mm_insert_epi32 (__m128i a, int b, const int i)
00042 {
00043     asm ("pinsrd %2, %1, %0" : "+x"(a) : "rm"(b), "i"(i));
00044     return a;
00045 }
00046 #endif
00047 #if !defined(__GNUC__) || (defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER)
00048 #include <wmmintrin.h>
00049 #else
00050 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00051 _mm_clmulepi64_si128 (__m128i a, __m128i b, const int i)
00052 {
00053     asm ("pclmulqdq %2, %1, %0" : "+x"(a) : "xm"(b), "i"(i));
00054     return a;
00055 }
00056 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00057 _mm_aeskeygenassist_si128 (__m128i a, const int i)
00058 {
00059     __m128i r;
00060     asm ("aeskeygenassist %2, %1, %0" : "=x"(r) : "xm"(a), "i"(i));
00061     return r;
00062 }
00063 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00064 _mm_aesimc_si128 (__m128i a)
00065 {
00066     __m128i r;
00067     asm ("aesimc %1, %0" : "=x"(r) : "xm"(a));
00068     return r;
00069 }
00070 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00071 _mm_aesenc_si128 (__m128i a, __m128i b)
00072 {
00073     asm ("aesenc %1, %0" : "+x"(a) : "xm"(b));
00074     return a;
00075 }
00076 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00077 _mm_aesenclast_si128 (__m128i a, __m128i b)
00078 {
00079     asm ("aesenclast %1, %0" : "+x"(a) : "xm"(b));
00080     return a;
00081 }
00082 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00083 _mm_aesdec_si128 (__m128i a, __m128i b)
00084 {
00085     asm ("aesdec %1, %0" : "+x"(a) : "xm"(b));
00086     return a;
00087 }
00088 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
00089 _mm_aesdeclast_si128 (__m128i a, __m128i b)
00090 {
00091     asm ("aesdeclast %1, %0" : "+x"(a) : "xm"(b));
00092     return a;
00093 }
00094 #endif
00095 #endif
00096 
00097 NAMESPACE_BEGIN(CryptoPP)
00098 
00099 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
00100 
00101 #define CRYPTOPP_CPUID_AVAILABLE
00102 
00103 // these should not be used directly
00104 extern CRYPTOPP_DLL bool g_x86DetectionDone;
00105 extern CRYPTOPP_DLL bool g_hasSSSE3;
00106 extern CRYPTOPP_DLL bool g_hasAESNI;
00107 extern CRYPTOPP_DLL bool g_hasCLMUL;
00108 extern CRYPTOPP_DLL bool g_isP4;
00109 extern CRYPTOPP_DLL word32 g_cacheLineSize;
00110 CRYPTOPP_DLL void CRYPTOPP_API DetectX86Features();
00111 CRYPTOPP_DLL bool CRYPTOPP_API CpuId(word32 input, word32 *output);
00112 
00113 #if CRYPTOPP_BOOL_X64
00114 inline bool HasSSE2()   {return true;}
00115 inline bool HasISSE()   {return true;}
00116 inline bool HasMMX()    {return true;}
00117 #else
00118 
00119 extern CRYPTOPP_DLL bool g_hasSSE2;
00120 extern CRYPTOPP_DLL bool g_hasISSE;
00121 extern CRYPTOPP_DLL bool g_hasMMX;
00122 
00123 inline bool HasSSE2()
00124 {
00125     if (!g_x86DetectionDone)
00126         DetectX86Features();
00127     return g_hasSSE2;
00128 }
00129 
00130 inline bool HasISSE()
00131 {
00132     if (!g_x86DetectionDone)
00133         DetectX86Features();
00134     return g_hasISSE;
00135 }
00136 
00137 inline bool HasMMX()
00138 {
00139     if (!g_x86DetectionDone)
00140         DetectX86Features();
00141     return g_hasMMX;
00142 }
00143 
00144 #endif
00145 
00146 inline bool HasSSSE3()
00147 {
00148     if (!g_x86DetectionDone)
00149         DetectX86Features();
00150     return g_hasSSSE3;
00151 }
00152 
00153 inline bool HasAESNI()
00154 {
00155     if (!g_x86DetectionDone)
00156         DetectX86Features();
00157     return g_hasAESNI;
00158 }
00159 
00160 inline bool HasCLMUL()
00161 {
00162     if (!g_x86DetectionDone)
00163         DetectX86Features();
00164     return g_hasCLMUL;
00165 }
00166 
00167 inline bool IsP4()
00168 {
00169     if (!g_x86DetectionDone)
00170         DetectX86Features();
00171     return g_isP4;
00172 }
00173 
00174 inline int GetCacheLineSize()
00175 {
00176     if (!g_x86DetectionDone)
00177         DetectX86Features();
00178     return g_cacheLineSize;
00179 }
00180 
00181 #else
00182 
00183 inline int GetCacheLineSize()
00184 {
00185     return CRYPTOPP_L1_CACHE_LINE_SIZE;
00186 }
00187 
00188 #endif
00189 
00190 #endif
00191 
00192 #ifdef CRYPTOPP_GENERATE_X64_MASM
00193     #define AS1(x) x*newline*
00194     #define AS2(x, y) x, y*newline*
00195     #define AS3(x, y, z) x, y, z*newline*
00196     #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
00197     #define ASL(x) label##x:*newline*
00198     #define ASJ(x, y, z) x label##y*newline*
00199     #define ASC(x, y) x label##y*newline*
00200     #define AS_HEX(y) 0##y##h
00201 #elif defined(_MSC_VER) || defined(__BORLANDC__)
00202     #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
00203     #define AS1(x) __asm {x}
00204     #define AS2(x, y) __asm {x, y}
00205     #define AS3(x, y, z) __asm {x, y, z}
00206     #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
00207     #define ASL(x) __asm {label##x:}
00208     #define ASJ(x, y, z) __asm {x label##y}
00209     #define ASC(x, y) __asm {x label##y}
00210     #define CRYPTOPP_NAKED __declspec(naked)
00211     #define AS_HEX(y) 0x##y
00212 #else
00213     #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
00214     // define these in two steps to allow arguments to be expanded
00215     #define GNU_AS1(x) #x ";"
00216     #define GNU_AS2(x, y) #x ", " #y ";"
00217     #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
00218     #define GNU_ASL(x) "\n" #x ":"
00219     #define GNU_ASJ(x, y, z) #x " " #y #z ";"
00220     #define AS1(x) GNU_AS1(x)
00221     #define AS2(x, y) GNU_AS2(x, y)
00222     #define AS3(x, y, z) GNU_AS3(x, y, z)
00223     #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
00224     #define ASL(x) GNU_ASL(x)
00225     #define ASJ(x, y, z) GNU_ASJ(x, y, z)
00226     #define ASC(x, y) #x " " #y ";"
00227     #define CRYPTOPP_NAKED
00228     #define AS_HEX(y) 0x##y
00229 #endif
00230 
00231 #define IF0(y)
00232 #define IF1(y) y
00233 
00234 #ifdef CRYPTOPP_GENERATE_X64_MASM
00235 #define ASM_MOD(x, y) ((x) MOD (y))
00236 #define XMMWORD_PTR XMMWORD PTR
00237 #else
00238 // GNU assembler doesn't seem to have mod operator
00239 #define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
00240 // GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
00241 #define XMMWORD_PTR
00242 #endif
00243 
00244 #if CRYPTOPP_BOOL_X86
00245     #define AS_REG_1 ecx
00246     #define AS_REG_2 edx
00247     #define AS_REG_3 esi
00248     #define AS_REG_4 edi
00249     #define AS_REG_5 eax
00250     #define AS_REG_6 ebx
00251     #define AS_REG_7 ebp
00252     #define AS_REG_1d ecx
00253     #define AS_REG_2d edx
00254     #define AS_REG_3d esi
00255     #define AS_REG_4d edi
00256     #define AS_REG_5d eax
00257     #define AS_REG_6d ebx
00258     #define AS_REG_7d ebp
00259     #define WORD_SZ 4
00260     #define WORD_REG(x) e##x
00261     #define WORD_PTR DWORD PTR
00262     #define AS_PUSH_IF86(x) AS1(push e##x)
00263     #define AS_POP_IF86(x) AS1(pop e##x)
00264     #define AS_JCXZ jecxz
00265 #elif CRYPTOPP_BOOL_X64
00266     #ifdef CRYPTOPP_GENERATE_X64_MASM
00267         #define AS_REG_1 rcx
00268         #define AS_REG_2 rdx
00269         #define AS_REG_3 r8
00270         #define AS_REG_4 r9
00271         #define AS_REG_5 rax
00272         #define AS_REG_6 r10
00273         #define AS_REG_7 r11
00274         #define AS_REG_1d ecx
00275         #define AS_REG_2d edx
00276         #define AS_REG_3d r8d
00277         #define AS_REG_4d r9d
00278         #define AS_REG_5d eax
00279         #define AS_REG_6d r10d
00280         #define AS_REG_7d r11d
00281     #else
00282         #define AS_REG_1 rdi
00283         #define AS_REG_2 rsi
00284         #define AS_REG_3 rdx
00285         #define AS_REG_4 rcx
00286         #define AS_REG_5 r8
00287         #define AS_REG_6 r9
00288         #define AS_REG_7 r10
00289         #define AS_REG_1d edi
00290         #define AS_REG_2d esi
00291         #define AS_REG_3d edx
00292         #define AS_REG_4d ecx
00293         #define AS_REG_5d r8d
00294         #define AS_REG_6d r9d
00295         #define AS_REG_7d r10d
00296     #endif
00297     #define WORD_SZ 8
00298     #define WORD_REG(x) r##x
00299     #define WORD_PTR QWORD PTR
00300     #define AS_PUSH_IF86(x)
00301     #define AS_POP_IF86(x)
00302     #define AS_JCXZ jrcxz
00303 #endif
00304 
00305 // helper macro for stream cipher output
00306 #define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\
00307     AS2(    test    inputPtr, inputPtr)\
00308     ASC(    jz,     labelPrefix##3)\
00309     AS2(    test    inputPtr, 15)\
00310     ASC(    jnz,    labelPrefix##7)\
00311     AS2(    pxor    xmm##x0, [inputPtr+p0*16])\
00312     AS2(    pxor    xmm##x1, [inputPtr+p1*16])\
00313     AS2(    pxor    xmm##x2, [inputPtr+p2*16])\
00314     AS2(    pxor    xmm##x3, [inputPtr+p3*16])\
00315     AS2(    add     inputPtr, increment*16)\
00316     ASC(    jmp,    labelPrefix##3)\
00317     ASL(labelPrefix##7)\
00318     AS2(    movdqu  xmm##t, [inputPtr+p0*16])\
00319     AS2(    pxor    xmm##x0, xmm##t)\
00320     AS2(    movdqu  xmm##t, [inputPtr+p1*16])\
00321     AS2(    pxor    xmm##x1, xmm##t)\
00322     AS2(    movdqu  xmm##t, [inputPtr+p2*16])\
00323     AS2(    pxor    xmm##x2, xmm##t)\
00324     AS2(    movdqu  xmm##t, [inputPtr+p3*16])\
00325     AS2(    pxor    xmm##x3, xmm##t)\
00326     AS2(    add     inputPtr, increment*16)\
00327     ASL(labelPrefix##3)\
00328     AS2(    test    outputPtr, 15)\
00329     ASC(    jnz,    labelPrefix##8)\
00330     AS2(    movdqa  [outputPtr+p0*16], xmm##x0)\
00331     AS2(    movdqa  [outputPtr+p1*16], xmm##x1)\
00332     AS2(    movdqa  [outputPtr+p2*16], xmm##x2)\
00333     AS2(    movdqa  [outputPtr+p3*16], xmm##x3)\
00334     ASC(    jmp,    labelPrefix##9)\
00335     ASL(labelPrefix##8)\
00336     AS2(    movdqu  [outputPtr+p0*16], xmm##x0)\
00337     AS2(    movdqu  [outputPtr+p1*16], xmm##x1)\
00338     AS2(    movdqu  [outputPtr+p2*16], xmm##x2)\
00339     AS2(    movdqu  [outputPtr+p3*16], xmm##x3)\
00340     ASL(labelPrefix##9)\
00341     AS2(    add     outputPtr, increment*16)
00342 
00343 NAMESPACE_END
00344 
00345 #endif