diff options
-rwxr-xr-x | cpu.h | 3 | ||||
-rw-r--r-- | panama.cpp | 56 |
2 files changed, 31 insertions, 28 deletions
@@ -148,9 +148,12 @@ inline bool HasMMX() {return false;} #ifdef CRYPTOPP_GENERATE_X64_MASM #define ASM_MOD(x, y) ((x) MOD (y)) +#define XMMWORD_PTR XMMWORD PTR #else // GNU assembler doesn't seem to have mod operator #define ASM_MOD(x, y) ((x)-((x)/(y))*(y)) +// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM +#define XMMWORD_PTR #endif #if CRYPTOPP_BOOL_X86 @@ -72,10 +72,10 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) AS1( push AS_REG_1) #endif - AS2( movdqa xmm0, XMMWORD PTR [AS_REG_2+0*16]) - AS2( movdqa xmm1, XMMWORD PTR [AS_REG_2+1*16]) - AS2( movdqa xmm2, XMMWORD PTR [AS_REG_2+2*16]) - AS2( movdqa xmm3, XMMWORD PTR [AS_REG_2+3*16]) + AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16]) + AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16]) + AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16]) + AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16]) AS2( mov eax, dword ptr [AS_REG_2+4*16]) ASL(4) @@ -163,13 +163,13 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) ASL(1) AS2( test AS_REG_3, 15) ASJ( jnz, 3, f) - AS2( movdqa XMMWORD PTR [AS_REG_3], xmm4) - AS2( movdqa XMMWORD PTR [AS_REG_3+16], xmm6) + AS2( movdqa XMMWORD_PTR [AS_REG_3], xmm4) + AS2( movdqa XMMWORD_PTR [AS_REG_3+16], xmm6) AS2( add AS_REG_3, 32) ASJ( jmp, 0, f) ASL(3) - AS2( movdqu XMMWORD PTR [AS_REG_3], xmm4) - AS2( movdqu XMMWORD PTR [AS_REG_3+16], xmm6) + AS2( movdqu XMMWORD_PTR [AS_REG_3], xmm4) + AS2( movdqu XMMWORD_PTR [AS_REG_3+16], xmm6) AS2( add AS_REG_3, 32) ASL(0) @@ -179,24 +179,24 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) AS2( lea AS_REG_7, [AS_REG_6 + (32-24)*32]) AS2( and AS_REG_7, 31*32) - AS2( movdqa xmm0, XMMWORD PTR [AS_REG_2+20*4+AS_REG_1+0*8]) + AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8]) AS2( pxor xmm3, xmm0) ASS( pshufd xmm0, xmm0, 2, 3, 0, 1) - AS2( movdqa XMMWORD PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3) - AS2( pxor xmm0, XMMWORD PTR [AS_REG_2+20*4+AS_REG_7+2*8]) - AS2( movdqa XMMWORD PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0) + AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*8], xmm3) + AS2( pxor xmm0, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8]) + AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+2*8], xmm0) - AS2( movdqa xmm4, XMMWORD PTR [AS_REG_2+20*4+AS_REG_1+2*8]) + AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8]) AS2( pxor xmm1, xmm4) - AS2( movdqa XMMWORD PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1) - AS2( pxor xmm4, XMMWORD PTR [AS_REG_2+20*4+AS_REG_7+0*8]) - AS2( movdqa XMMWORD PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4) + AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+2*8], xmm1) + AS2( pxor xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8]) + AS2( movdqa XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*8], xmm4) // theta - AS2( movdqa xmm3, XMMWORD PTR [AS_REG_2+3*16]) - AS2( movdqa xmm2, XMMWORD PTR [AS_REG_2+2*16]) - AS2( movdqa xmm1, XMMWORD PTR [AS_REG_2+1*16]) - AS2( movdqa xmm0, XMMWORD PTR [AS_REG_2+0*16]) + AS2( movdqa xmm3, XMMWORD_PTR [AS_REG_2+3*16]) + AS2( movdqa xmm2, XMMWORD_PTR [AS_REG_2+2*16]) + AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_2+1*16]) + AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_2+0*16]) #if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE AS2( test AS_REG_6, 1) @@ -250,16 +250,16 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) AS2( lea AS_REG_7, [AS_REG_6 + 16*32]) AS2( and AS_REG_7, 31*32) - AS2( movdqa xmm4, XMMWORD PTR [AS_REG_2+20*4+AS_REG_1+0*16]) - AS2( movdqa xmm5, XMMWORD PTR [AS_REG_2+20*4+AS_REG_7+0*16]) + AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+0*16]) + AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+0*16]) AS2( movdqa xmm6, xmm4) AS2( punpcklqdq xmm4, xmm5) AS2( punpckhqdq xmm6, xmm5) AS2( pxor xmm3, xmm4) AS2( pxor xmm2, xmm6) - AS2( movdqa xmm4, XMMWORD PTR [AS_REG_2+20*4+AS_REG_1+1*16]) - AS2( movdqa xmm5, XMMWORD PTR [AS_REG_2+20*4+AS_REG_7+1*16]) + AS2( movdqa xmm4, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_1+1*16]) + AS2( movdqa xmm5, XMMWORD_PTR [AS_REG_2+20*4+AS_REG_7+1*16]) AS2( movdqa xmm6, xmm4) AS2( punpcklqdq xmm4, xmm5) AS2( punpckhqdq xmm6, xmm5) @@ -273,10 +273,10 @@ void Panama_SSE2_Pull(size_t count, word32 *state, word32 *z, const word32 *y) // save state AS2( mov [AS_REG_2+4*16], eax) - AS2( movdqa XMMWORD PTR [AS_REG_2+3*16], xmm3) - AS2( movdqa XMMWORD PTR [AS_REG_2+2*16], xmm2) - AS2( movdqa XMMWORD PTR [AS_REG_2+1*16], xmm1) - AS2( movdqa XMMWORD PTR [AS_REG_2+0*16], xmm0) + AS2( movdqa XMMWORD_PTR [AS_REG_2+3*16], xmm3) + AS2( movdqa XMMWORD_PTR [AS_REG_2+2*16], xmm2) + AS2( movdqa XMMWORD_PTR [AS_REG_2+1*16], xmm1) + AS2( movdqa XMMWORD_PTR [AS_REG_2+0*16], xmm0) #if CRYPTOPP_BOOL_X86 AS2( add esp, 4) |