diff options
author | Torbjorn Granlund <tg@gmplib.org> | 2017-06-03 23:57:45 +0200 |
---|---|---|
committer | Torbjorn Granlund <tg@gmplib.org> | 2017-06-03 23:57:45 +0200 |
commit | 46617131b276147b3e6b6531b3e76376f9504e7a (patch) | |
tree | 588a263299690e0b77cdf36d271f93e9c0bfec4d /mpn/x86_64/bd1/hamdist.asm | |
parent | bea05b85192d3f18538ca8c51cbc2b8c60841d6a (diff) | |
download | gmp-46617131b276147b3e6b6531b3e76376f9504e7a.tar.gz |
Expand some instructions as .byte sequences.
Diffstat (limited to 'mpn/x86_64/bd1/hamdist.asm')
-rw-r--r-- | mpn/x86_64/bd1/hamdist.asm | 22 |
1 files changed, 11 insertions, 11 deletions
diff --git a/mpn/x86_64/bd1/hamdist.asm b/mpn/x86_64/bd1/hamdist.asm index 67ee7b116..5282e7f1f 100644 --- a/mpn/x86_64/bd1/hamdist.asm +++ b/mpn/x86_64/bd1/hamdist.asm @@ -120,30 +120,30 @@ L(0): add $64, up ALIGN(32) L(top): lddqu (up), %xmm0 pxor (vp), %xmm0 - vpshlb %xmm6, %xmm0, %xmm1 + .byte 0x8f,0xe9,0x48,0x94,0xc8 C vpshlb %xmm6, %xmm0, %xmm1 pand %xmm5, %xmm0 pand %xmm5, %xmm1 - vpperm %xmm0, %xmm7, %xmm7, %xmm2 - vpperm %xmm1, %xmm7, %xmm7, %xmm3 + .byte 0x8f,0xe8,0x40,0xa3,0xd7,0x00 C vpperm %xmm0,%xmm7,%xmm7,%xmm2 + .byte 0x8f,0xe8,0x40,0xa3,0xdf,0x10 C vpperm %xmm1,%xmm7,%xmm7,%xmm3 paddb %xmm2, %xmm3 paddb %xmm3, %xmm4 L(6): lddqu 16(up), %xmm0 pxor 16(vp), %xmm0 - vpshlb %xmm6, %xmm0, %xmm1 + .byte 0x8f,0xe9,0x48,0x94,0xc8 C vpshlb %xmm6, %xmm0, %xmm1 pand %xmm5, %xmm0 pand %xmm5, %xmm1 - vpperm %xmm0, %xmm7, %xmm7, %xmm2 - vpperm %xmm1, %xmm7, %xmm7, %xmm3 + .byte 0x8f,0xe8,0x40,0xa3,0xd7,0x00 C vpperm %xmm0,%xmm7,%xmm7,%xmm2 + .byte 0x8f,0xe8,0x40,0xa3,0xdf,0x10 C vpperm %xmm1,%xmm7,%xmm7,%xmm3 paddb %xmm2, %xmm3 paddb %xmm3, %xmm4 L(4): lddqu 32(up), %xmm0 pxor 32(vp), %xmm0 - vpshlb %xmm6, %xmm0, %xmm1 + .byte 0x8f,0xe9,0x48,0x94,0xc8 C vpshlb %xmm6, %xmm0, %xmm1 pand %xmm5, %xmm0 pand %xmm5, %xmm1 - vpperm %xmm0, %xmm7, %xmm7, %xmm2 - vphaddubq %xmm4, %xmm0 C sum to 8 x 16-bit counts - vpperm %xmm1, %xmm7, %xmm7, %xmm4 + .byte 0x8f,0xe8,0x40,0xa3,0xd7,0x00 C vpperm %xmm0,%xmm7,%xmm7,%xmm2 + .byte 0x8f,0xe9,0x78,0xd3,0xc4 C vphaddubq %xmm4, %xmm0 + .byte 0x8f,0xe8,0x40,0xa3,0xe7,0x10 C vpperm %xmm1,%xmm7,%xmm7,%xmm4 paddb %xmm2, %xmm3 paddb %xmm2, %xmm4 paddq %xmm0, %xmm8 C sum to 2 x 64-bit counts @@ -166,7 +166,7 @@ L(2): mov 48(up), %r8 xor (vp), %r8 popcnt %r8, %r8 add %r8, %r10 -L(x): vphaddubq %xmm4, %xmm0 C sum to 8 x 16-bit counts +L(x): .byte 0x8f,0xe9,0x78,0xd3,0xc4 C vphaddubq %xmm4, %xmm0 paddq %xmm0, %xmm8 pshufd $14, %xmm8, %xmm0 paddq %xmm8, %xmm0 |