diff options
Diffstat (limited to 'lib/accelerated/x86/coff/ghash-x86_64.s')
-rw-r--r-- | lib/accelerated/x86/coff/ghash-x86_64.s | 934 |
1 files changed, 727 insertions, 207 deletions
diff --git a/lib/accelerated/x86/coff/ghash-x86_64.s b/lib/accelerated/x86/coff/ghash-x86_64.s index 221a226d6b..f4bcee28f0 100644 --- a/lib/accelerated/x86/coff/ghash-x86_64.s +++ b/lib/accelerated/x86/coff/ghash-x86_64.s @@ -39,6 +39,7 @@ # .text + .globl gcm_gmult_4bit .def gcm_gmult_4bit; .scl 2; .type 32; .endef .p2align 4 @@ -65,14 +66,14 @@ gcm_gmult_4bit: movq $14,%rcx movq 8(%rsi,%rax,1),%r8 movq (%rsi,%rax,1),%r9 - andb $240,%bl + andb $0xf0,%bl movq %r8,%rdx jmp .Loop1 .p2align 4 .Loop1: shrq $4,%r8 - andq $15,%rdx + andq $0xf,%rdx movq %r9,%r10 movb (%rdi,%rcx,1),%al shrq $4,%r9 @@ -88,13 +89,13 @@ gcm_gmult_4bit: js .Lbreak1 shrq $4,%r8 - andq $15,%rdx + andq $0xf,%rdx movq %r9,%r10 shrq $4,%r9 xorq 8(%rsi,%rax,1),%r8 shlq $60,%r10 xorq (%rsi,%rax,1),%r9 - andb $240,%bl + andb $0xf0,%bl xorq (%r11,%rdx,8),%r9 movq %r8,%rdx xorq %r10,%r8 @@ -103,19 +104,19 @@ gcm_gmult_4bit: .p2align 4 .Lbreak1: shrq $4,%r8 - andq $15,%rdx + andq $0xf,%rdx movq %r9,%r10 shrq $4,%r9 xorq 8(%rsi,%rax,1),%r8 shlq $60,%r10 xorq (%rsi,%rax,1),%r9 - andb $240,%bl + andb $0xf0,%bl xorq (%r11,%rdx,8),%r9 movq %r8,%rdx xorq %r10,%r8 shrq $4,%r8 - andq $15,%rdx + andq $0xf,%rdx movq %r9,%r10 shrq $4,%r9 xorq 8(%rsi,%rbx,1),%r8 @@ -720,8 +721,8 @@ gcm_init_clmul: .L_init_clmul: .LSEH_begin_gcm_init_clmul: -.byte 0x48,0x83,0xec,0x18 -.byte 0x0f,0x29,0x34,0x24 +.byte 0x48,0x83,0xec,0x18 +.byte 0x0f,0x29,0x34,0x24 movdqu (%rdx),%xmm2 pshufd $78,%xmm2,%xmm2 @@ -935,184 +936,188 @@ gcm_ghash_clmul: leaq -136(%rsp),%rax .LSEH_begin_gcm_ghash_clmul: -.byte 0x48,0x8d,0x60,0xe0 -.byte 0x0f,0x29,0x70,0xe0 -.byte 0x0f,0x29,0x78,0xf0 -.byte 0x44,0x0f,0x29,0x00 -.byte 0x44,0x0f,0x29,0x48,0x10 -.byte 0x44,0x0f,0x29,0x50,0x20 -.byte 0x44,0x0f,0x29,0x58,0x30 -.byte 0x44,0x0f,0x29,0x60,0x40 -.byte 0x44,0x0f,0x29,0x68,0x50 -.byte 0x44,0x0f,0x29,0x70,0x60 -.byte 0x44,0x0f,0x29,0x78,0x70 - movdqa .Lbswap_mask(%rip),%xmm5 - movq $11547335547999543296,%rax +.byte 0x48,0x8d,0x60,0xe0 +.byte 0x0f,0x29,0x70,0xe0 +.byte 0x0f,0x29,0x78,0xf0 +.byte 0x44,0x0f,0x29,0x00 +.byte 0x44,0x0f,0x29,0x48,0x10 +.byte 0x44,0x0f,0x29,0x50,0x20 +.byte 0x44,0x0f,0x29,0x58,0x30 +.byte 0x44,0x0f,0x29,0x60,0x40 +.byte 0x44,0x0f,0x29,0x68,0x50 +.byte 0x44,0x0f,0x29,0x70,0x60 +.byte 0x44,0x0f,0x29,0x78,0x70 + movdqa .Lbswap_mask(%rip),%xmm10 movdqu (%rcx),%xmm0 movdqu (%rdx),%xmm2 - movdqu 32(%rdx),%xmm10 -.byte 102,15,56,0,197 + movdqu 32(%rdx),%xmm7 +.byte 102,65,15,56,0,194 - subq $16,%r9 + subq $0x10,%r9 jz .Lodd_tail - movdqu 16(%rdx),%xmm9 - cmpq $48,%r9 + movdqu 16(%rdx),%xmm6 + movl _gnutls_x86_cpuid_s+4(%rip),%eax + cmpq $0x30,%r9 jb .Lskip4x - subq $48,%r9 + andl $71303168,%eax + cmpl $4194304,%eax + je .Lskip4x + + subq $0x30,%r9 + movq $0xA040608020C0E000,%rax movdqu 48(%rdx),%xmm14 movdqu 64(%rdx),%xmm15 - movdqu 48(%r8),%xmm6 + movdqu 48(%r8),%xmm3 movdqu 32(%r8),%xmm11 -.byte 102,15,56,0,245 -.byte 102,68,15,56,0,221 - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm7 - pxor %xmm6,%xmm7 -.byte 102,15,58,68,242,0 -.byte 102,68,15,58,68,194,17 -.byte 102,65,15,58,68,250,0 +.byte 102,65,15,56,0,218 +.byte 102,69,15,56,0,218 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 movdqa %xmm11,%xmm13 pshufd $78,%xmm11,%xmm12 pxor %xmm11,%xmm12 -.byte 102,69,15,58,68,217,0 -.byte 102,69,15,58,68,233,17 - xorps %xmm11,%xmm6 -.byte 102,69,15,58,68,226,16 - xorps %xmm13,%xmm8 - movups 80(%rdx),%xmm10 - xorps %xmm12,%xmm7 +.byte 102,68,15,58,68,222,0 +.byte 102,68,15,58,68,238,17 +.byte 102,68,15,58,68,231,16 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 + movups 80(%rdx),%xmm7 + xorps %xmm12,%xmm4 movdqu 16(%r8),%xmm11 - movdqu 0(%r8),%xmm3 -.byte 102,68,15,56,0,221 -.byte 102,15,56,0,221 + movdqu 0(%r8),%xmm8 +.byte 102,69,15,56,0,218 +.byte 102,69,15,56,0,194 movdqa %xmm11,%xmm13 pshufd $78,%xmm11,%xmm12 - pxor %xmm3,%xmm0 + pxor %xmm8,%xmm0 pxor %xmm11,%xmm12 .byte 102,69,15,58,68,222,0 movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 .byte 102,69,15,58,68,238,17 - xorps %xmm11,%xmm6 -.byte 102,69,15,58,68,226,0 - xorps %xmm13,%xmm8 +.byte 102,68,15,58,68,231,0 + xorps %xmm11,%xmm3 + xorps %xmm13,%xmm5 leaq 64(%r8),%r8 - subq $64,%r9 + subq $0x40,%r9 jc .Ltail4x jmp .Lmod4_loop .p2align 5 .Lmod4_loop: .byte 102,65,15,58,68,199,0 - xorps %xmm12,%xmm7 + xorps %xmm12,%xmm4 movdqu 48(%r8),%xmm11 -.byte 102,68,15,56,0,221 +.byte 102,69,15,56,0,218 .byte 102,65,15,58,68,207,17 - xorps %xmm6,%xmm0 - movdqu 32(%r8),%xmm6 + xorps %xmm3,%xmm0 + movdqu 32(%r8),%xmm3 movdqa %xmm11,%xmm13 +.byte 102,68,15,58,68,199,16 pshufd $78,%xmm11,%xmm12 -.byte 102,65,15,58,68,218,16 - xorps %xmm8,%xmm1 + xorps %xmm5,%xmm1 pxor %xmm11,%xmm12 -.byte 102,15,56,0,245 - movups 32(%rdx),%xmm10 +.byte 102,65,15,56,0,218 + movups 32(%rdx),%xmm7 + xorps %xmm4,%xmm8 .byte 102,68,15,58,68,218,0 - xorps %xmm7,%xmm3 - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm7 + pshufd $78,%xmm3,%xmm4 - pxor %xmm0,%xmm3 - pxor %xmm6,%xmm7 - pxor %xmm1,%xmm3 - movdqa %xmm3,%xmm4 - pslldq $8,%xmm3 + pxor %xmm0,%xmm8 + movdqa %xmm3,%xmm5 + pxor %xmm1,%xmm8 + pxor %xmm3,%xmm4 + movdqa %xmm8,%xmm9 .byte 102,68,15,58,68,234,17 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - movdqa .L7_mask(%rip),%xmm3 - pxor %xmm4,%xmm1 -.byte 102,72,15,110,224 - - pand %xmm0,%xmm3 -.byte 102,15,56,0,227 -.byte 102,69,15,58,68,226,0 - pxor %xmm0,%xmm4 - psllq $57,%xmm4 - movdqa %xmm4,%xmm3 - pslldq $8,%xmm4 -.byte 102,65,15,58,68,241,0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - movdqu 0(%r8),%xmm3 + pslldq $8,%xmm8 + psrldq $8,%xmm9 + pxor %xmm8,%xmm0 + movdqa .L7_mask(%rip),%xmm8 + pxor %xmm9,%xmm1 +.byte 102,76,15,110,200 + + pand %xmm0,%xmm8 +.byte 102,69,15,56,0,200 + pxor %xmm0,%xmm9 +.byte 102,68,15,58,68,231,0 + psllq $57,%xmm9 + movdqa %xmm9,%xmm8 + pslldq $8,%xmm9 +.byte 102,15,58,68,222,0 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 + movdqu 0(%r8),%xmm8 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm9 psrlq $1,%xmm0 -.byte 102,69,15,58,68,193,17 - xorps %xmm11,%xmm6 +.byte 102,15,58,68,238,17 + xorps %xmm11,%xmm3 movdqu 16(%r8),%xmm11 -.byte 102,68,15,56,0,221 -.byte 102,65,15,58,68,250,16 - xorps %xmm13,%xmm8 - movups 80(%rdx),%xmm10 -.byte 102,15,56,0,221 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 +.byte 102,69,15,56,0,218 +.byte 102,15,58,68,231,16 + xorps %xmm13,%xmm5 + movups 80(%rdx),%xmm7 +.byte 102,69,15,56,0,194 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 psrlq $5,%xmm0 movdqa %xmm11,%xmm13 - pxor %xmm12,%xmm7 + pxor %xmm12,%xmm4 pshufd $78,%xmm11,%xmm12 + pxor %xmm9,%xmm0 + pxor %xmm8,%xmm1 pxor %xmm11,%xmm12 .byte 102,69,15,58,68,222,0 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 psrlq $1,%xmm0 -.byte 102,69,15,58,68,238,17 - xorps %xmm11,%xmm6 pxor %xmm1,%xmm0 - -.byte 102,69,15,58,68,226,0 - xorps %xmm13,%xmm8 - movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pxor %xmm0,%xmm3 +.byte 102,69,15,58,68,238,17 + xorps %xmm11,%xmm3 + pshufd $78,%xmm0,%xmm8 + pxor %xmm0,%xmm8 + +.byte 102,68,15,58,68,231,0 + xorps %xmm13,%xmm5 leaq 64(%r8),%r8 - subq $64,%r9 + subq $0x40,%r9 jnc .Lmod4_loop .Ltail4x: .byte 102,65,15,58,68,199,0 - xorps %xmm12,%xmm7 .byte 102,65,15,58,68,207,17 - xorps %xmm6,%xmm0 -.byte 102,65,15,58,68,218,16 - xorps %xmm8,%xmm1 +.byte 102,68,15,58,68,199,16 + xorps %xmm12,%xmm4 + xorps %xmm3,%xmm0 + xorps %xmm5,%xmm1 pxor %xmm0,%xmm1 - pxor %xmm7,%xmm3 + pxor %xmm4,%xmm8 - pxor %xmm1,%xmm3 + pxor %xmm1,%xmm8 pxor %xmm0,%xmm1 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm9 + psrldq $8,%xmm8 + pslldq $8,%xmm9 + pxor %xmm8,%xmm1 + pxor %xmm9,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 @@ -1136,10 +1141,10 @@ gcm_ghash_clmul: pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 - addq $64,%r9 + addq $0x40,%r9 jz .Ldone - movdqu 32(%rdx),%xmm10 - subq $16,%r9 + movdqu 32(%rdx),%xmm7 + subq $0x10,%r9 jz .Lodd_tail .Lskip4x: @@ -1147,102 +1152,106 @@ gcm_ghash_clmul: - movdqu (%r8),%xmm3 - movdqu 16(%r8),%xmm6 -.byte 102,15,56,0,221 -.byte 102,15,56,0,245 - pxor %xmm3,%xmm0 + movdqu (%r8),%xmm8 + movdqu 16(%r8),%xmm3 +.byte 102,69,15,56,0,194 +.byte 102,65,15,56,0,218 + pxor %xmm8,%xmm0 - movdqa %xmm6,%xmm8 - pshufd $78,%xmm6,%xmm3 - pxor %xmm6,%xmm3 -.byte 102,15,58,68,242,0 -.byte 102,68,15,58,68,194,17 -.byte 102,65,15,58,68,218,0 + movdqa %xmm3,%xmm5 + pshufd $78,%xmm3,%xmm4 + pxor %xmm3,%xmm4 +.byte 102,15,58,68,218,0 +.byte 102,15,58,68,234,17 +.byte 102,15,58,68,231,0 leaq 32(%r8),%r8 - subq $32,%r9 + nop + subq $0x20,%r9 jbe .Leven_tail + nop jmp .Lmod_loop .p2align 5 .Lmod_loop: movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 pshufd $78,%xmm0,%xmm4 pxor %xmm0,%xmm4 -.byte 102,65,15,58,68,193,0 -.byte 102,65,15,58,68,201,17 -.byte 102,65,15,58,68,226,16 +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 - pxor %xmm6,%xmm0 - pxor %xmm8,%xmm1 - movdqu (%r8),%xmm8 -.byte 102,68,15,56,0,197 - movdqu 16(%r8),%xmm6 - - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - pxor %xmm8,%xmm1 - pxor %xmm3,%xmm4 -.byte 102,15,56,0,245 - movdqa %xmm4,%xmm3 - psrldq $8,%xmm3 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + movdqu (%r8),%xmm9 + pxor %xmm0,%xmm8 +.byte 102,69,15,56,0,202 + movdqu 16(%r8),%xmm3 + + pxor %xmm1,%xmm8 + pxor %xmm9,%xmm1 + pxor %xmm8,%xmm4 +.byte 102,65,15,56,0,218 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 pslldq $8,%xmm4 - pxor %xmm3,%xmm1 + pxor %xmm8,%xmm1 pxor %xmm4,%xmm0 - movdqa %xmm6,%xmm8 + movdqa %xmm3,%xmm5 - movdqa %xmm0,%xmm4 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm9 + movdqa %xmm0,%xmm8 psllq $5,%xmm0 -.byte 102,15,58,68,242,0 - pxor %xmm0,%xmm3 + pxor %xmm0,%xmm8 +.byte 102,15,58,68,218,0 psllq $1,%xmm0 - pxor %xmm3,%xmm0 + pxor %xmm8,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm3 + movdqa %xmm0,%xmm8 pslldq $8,%xmm0 - psrldq $8,%xmm3 - pxor %xmm4,%xmm0 - pxor %xmm3,%xmm1 - pshufd $78,%xmm8,%xmm3 - pxor %xmm8,%xmm3 + psrldq $8,%xmm8 + pxor %xmm9,%xmm0 + pshufd $78,%xmm5,%xmm4 + pxor %xmm8,%xmm1 + pxor %xmm5,%xmm4 -.byte 102,68,15,58,68,194,17 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm9 psrlq $1,%xmm0 - pxor %xmm4,%xmm1 - pxor %xmm0,%xmm4 +.byte 102,15,58,68,234,17 + pxor %xmm9,%xmm1 + pxor %xmm0,%xmm9 psrlq $5,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm9,%xmm0 + leaq 32(%r8),%r8 psrlq $1,%xmm0 -.byte 102,65,15,58,68,218,0 +.byte 102,15,58,68,231,0 pxor %xmm1,%xmm0 - leaq 32(%r8),%r8 - subq $32,%r9 + subq $0x20,%r9 ja .Lmod_loop .Leven_tail: movdqa %xmm0,%xmm1 + movdqa %xmm4,%xmm8 pshufd $78,%xmm0,%xmm4 pxor %xmm0,%xmm4 -.byte 102,65,15,58,68,193,0 -.byte 102,65,15,58,68,201,17 -.byte 102,65,15,58,68,226,16 +.byte 102,15,58,68,198,0 +.byte 102,15,58,68,206,17 +.byte 102,15,58,68,231,16 - pxor %xmm6,%xmm0 - pxor %xmm8,%xmm1 - pxor %xmm0,%xmm3 - pxor %xmm1,%xmm3 - pxor %xmm3,%xmm4 - movdqa %xmm4,%xmm3 - psrldq $8,%xmm3 + pxor %xmm3,%xmm0 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm8 + pxor %xmm1,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm8 + psrldq $8,%xmm8 pslldq $8,%xmm4 - pxor %xmm3,%xmm1 + pxor %xmm8,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 @@ -1271,15 +1280,15 @@ gcm_ghash_clmul: jnz .Ldone .Lodd_tail: - movdqu (%r8),%xmm3 -.byte 102,15,56,0,221 - pxor %xmm3,%xmm0 + movdqu (%r8),%xmm8 +.byte 102,69,15,56,0,194 + pxor %xmm8,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,65,15,58,68,218,0 +.byte 102,15,58,68,223,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 @@ -1312,7 +1321,7 @@ gcm_ghash_clmul: psrlq $1,%xmm0 pxor %xmm1,%xmm0 .Ldone: -.byte 102,15,56,0,197 +.byte 102,65,15,56,0,194 movdqu %xmm0,(%rcx) movaps (%rsp),%xmm6 movaps 16(%rsp),%xmm7 @@ -1332,7 +1341,115 @@ gcm_ghash_clmul: .def gcm_init_avx; .scl 2; .type 32; .endef .p2align 5 gcm_init_avx: - jmp .L_init_clmul +.LSEH_begin_gcm_init_avx: + +.byte 0x48,0x83,0xec,0x18 +.byte 0x0f,0x29,0x34,0x24 + vzeroupper + + vmovdqu (%rdx),%xmm2 + vpshufd $78,%xmm2,%xmm2 + + + vpshufd $255,%xmm2,%xmm4 + vpsrlq $63,%xmm2,%xmm3 + vpsllq $1,%xmm2,%xmm2 + vpxor %xmm5,%xmm5,%xmm5 + vpcmpgtd %xmm4,%xmm5,%xmm5 + vpslldq $8,%xmm3,%xmm3 + vpor %xmm3,%xmm2,%xmm2 + + + vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5 + vpxor %xmm5,%xmm2,%xmm2 + + vpunpckhqdq %xmm2,%xmm2,%xmm6 + vmovdqa %xmm2,%xmm0 + vpxor %xmm2,%xmm6,%xmm6 + movq $4,%r10 + jmp .Linit_start_avx +.p2align 5 +.Linit_loop_avx: + vpalignr $8,%xmm3,%xmm4,%xmm5 + vmovdqu %xmm5,-16(%rcx) + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 +.Linit_start_avx: + vmovdqa %xmm0,%xmm5 + vpunpckhqdq %xmm0,%xmm0,%xmm3 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 + vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 + vpxor %xmm0,%xmm1,%xmm4 + vpxor %xmm4,%xmm3,%xmm3 + + vpslldq $8,%xmm3,%xmm4 + vpsrldq $8,%xmm3,%xmm3 + vpxor %xmm4,%xmm0,%xmm0 + vpxor %xmm3,%xmm1,%xmm1 + vpsllq $57,%xmm0,%xmm3 + vpsllq $62,%xmm0,%xmm4 + vpxor %xmm3,%xmm4,%xmm4 + vpsllq $63,%xmm0,%xmm3 + vpxor %xmm3,%xmm4,%xmm4 + vpslldq $8,%xmm4,%xmm3 + vpsrldq $8,%xmm4,%xmm4 + vpxor %xmm3,%xmm0,%xmm0 + vpxor %xmm4,%xmm1,%xmm1 + + vpsrlq $1,%xmm0,%xmm4 + vpxor %xmm0,%xmm1,%xmm1 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $5,%xmm4,%xmm4 + vpxor %xmm4,%xmm0,%xmm0 + vpsrlq $1,%xmm0,%xmm0 + vpxor %xmm1,%xmm0,%xmm0 + vpshufd $78,%xmm5,%xmm3 + vpshufd $78,%xmm0,%xmm4 + vpxor %xmm5,%xmm3,%xmm3 + vmovdqu %xmm5,0(%rcx) + vpxor %xmm0,%xmm4,%xmm4 + vmovdqu %xmm0,16(%rcx) + leaq 48(%rcx),%rcx + subq $1,%r10 + jnz .Linit_loop_avx + + vpalignr $8,%xmm4,%xmm3,%xmm5 + vmovdqu %xmm5,-16(%rcx) + + vzeroupper + movaps (%rsp),%xmm6 + leaq 24(%rsp),%rsp +.LSEH_end_gcm_init_avx: + .byte 0xf3,0xc3 .globl gcm_gmult_avx .def gcm_gmult_avx; .scl 2; .type 32; .endef @@ -1344,7 +1461,403 @@ gcm_gmult_avx: .def gcm_ghash_avx; .scl 2; .type 32; .endef .p2align 5 gcm_ghash_avx: - jmp .L_ghash_clmul + leaq -136(%rsp),%rax +.LSEH_begin_gcm_ghash_avx: + +.byte 0x48,0x8d,0x60,0xe0 +.byte 0x0f,0x29,0x70,0xe0 +.byte 0x0f,0x29,0x78,0xf0 +.byte 0x44,0x0f,0x29,0x00 +.byte 0x44,0x0f,0x29,0x48,0x10 +.byte 0x44,0x0f,0x29,0x50,0x20 +.byte 0x44,0x0f,0x29,0x58,0x30 +.byte 0x44,0x0f,0x29,0x60,0x40 +.byte 0x44,0x0f,0x29,0x68,0x50 +.byte 0x44,0x0f,0x29,0x70,0x60 +.byte 0x44,0x0f,0x29,0x78,0x70 + vzeroupper + + vmovdqu (%rcx),%xmm10 + leaq .L0x1c2_polynomial(%rip),%r10 + leaq 64(%rdx),%rdx + vmovdqu .Lbswap_mask(%rip),%xmm13 + vpshufb %xmm13,%xmm10,%xmm10 + cmpq $0x80,%r9 + jb .Lshort_avx + subq $0x80,%r9 + + vmovdqu 112(%r8),%xmm14 + vmovdqu 0-64(%rdx),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vmovdqu 32-64(%rdx),%xmm7 + + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vmovdqu 96(%r8),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm14,%xmm9,%xmm9 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 16-64(%rdx),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 80(%r8),%xmm14 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 48-64(%rdx),%xmm6 + vpxor %xmm14,%xmm9,%xmm9 + vmovdqu 64(%r8),%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rdx),%xmm7 + + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rdx),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 48(%r8),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rdx),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rdx),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 32(%r8),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rdx),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + + vmovdqu 16(%r8),%xmm14 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm4,%xmm1,%xmm1 + vpshufb %xmm13,%xmm14,%xmm14 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rdx),%xmm6 + vpxor %xmm5,%xmm2,%xmm2 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rdx),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu (%r8),%xmm15 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm1,%xmm4,%xmm4 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rdx),%xmm6 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + + leaq 128(%r8),%r8 + cmpq $0x80,%r9 + jb .Ltail_avx + + vpxor %xmm10,%xmm15,%xmm15 + subq $0x80,%r9 + jmp .Loop8x_avx + +.p2align 5 +.Loop8x_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vmovdqu 112(%r8),%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpxor %xmm15,%xmm8,%xmm8 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 + vmovdqu 0-64(%rdx),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 + vmovdqu 32-64(%rdx),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + + vmovdqu 96(%r8),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpxor %xmm3,%xmm10,%xmm10 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vxorps %xmm4,%xmm11,%xmm11 + vmovdqu 16-64(%rdx),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm5,%xmm12,%xmm12 + vxorps %xmm15,%xmm8,%xmm8 + + vmovdqu 80(%r8),%xmm14 + vpxor %xmm10,%xmm12,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpxor %xmm11,%xmm12,%xmm12 + vpslldq $8,%xmm12,%xmm9 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vpsrldq $8,%xmm12,%xmm12 + vpxor %xmm9,%xmm10,%xmm10 + vmovdqu 48-64(%rdx),%xmm6 + vpshufb %xmm13,%xmm14,%xmm14 + vxorps %xmm12,%xmm11,%xmm11 + vpxor %xmm1,%xmm4,%xmm4 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 80-64(%rdx),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 64(%r8),%xmm15 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 64-64(%rdx),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vxorps %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + + vmovdqu 48(%r8),%xmm14 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 96-64(%rdx),%xmm6 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 128-64(%rdx),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu 32(%r8),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpxor %xmm3,%xmm0,%xmm0 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 112-64(%rdx),%xmm6 + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm4,%xmm1,%xmm1 + vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm5,%xmm2,%xmm2 + vxorps %xmm12,%xmm10,%xmm10 + + vmovdqu 16(%r8),%xmm14 + vpalignr $8,%xmm10,%xmm10,%xmm12 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 + vpshufb %xmm13,%xmm14,%xmm14 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 + vmovdqu 144-64(%rdx),%xmm6 + vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 + vxorps %xmm11,%xmm12,%xmm12 + vpunpckhqdq %xmm14,%xmm14,%xmm9 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 + vmovdqu 176-64(%rdx),%xmm7 + vpxor %xmm14,%xmm9,%xmm9 + vpxor %xmm2,%xmm5,%xmm5 + + vmovdqu (%r8),%xmm15 + vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 + vpshufb %xmm13,%xmm15,%xmm15 + vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 + vmovdqu 160-64(%rdx),%xmm6 + vpxor %xmm12,%xmm15,%xmm15 + vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 + vpxor %xmm10,%xmm15,%xmm15 + + leaq 128(%r8),%r8 + subq $0x80,%r9 + jnc .Loop8x_avx + + addq $0x80,%r9 + jmp .Ltail_no_xor_avx + +.p2align 5 +.Lshort_avx: + vmovdqu -16(%r8,%r9,1),%xmm14 + leaq (%r8,%r9,1),%r8 + vmovdqu 0-64(%rdx),%xmm6 + vmovdqu 32-64(%rdx),%xmm7 + vpshufb %xmm13,%xmm14,%xmm15 + + vmovdqa %xmm0,%xmm3 + vmovdqa %xmm1,%xmm4 + vmovdqa %xmm2,%xmm5 + subq $0x10,%r9 + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -32(%r8),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 16-64(%rdx),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%r9 + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -48(%r8),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 48-64(%rdx),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 80-64(%rdx),%xmm7 + subq $0x10,%r9 + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -64(%r8),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 64-64(%rdx),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%r9 + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -80(%r8),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 96-64(%rdx),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovdqu 128-64(%rdx),%xmm7 + subq $0x10,%r9 + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -96(%r8),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 112-64(%rdx),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vpsrldq $8,%xmm7,%xmm7 + subq $0x10,%r9 + jz .Ltail_avx + + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vmovdqu -112(%r8),%xmm14 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vmovdqu 144-64(%rdx),%xmm6 + vpshufb %xmm13,%xmm14,%xmm15 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + vmovq 184-64(%rdx),%xmm7 + subq $0x10,%r9 + jmp .Ltail_avx + +.p2align 5 +.Ltail_avx: + vpxor %xmm10,%xmm15,%xmm15 +.Ltail_no_xor_avx: + vpunpckhqdq %xmm15,%xmm15,%xmm8 + vpxor %xmm0,%xmm3,%xmm3 + vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 + vpxor %xmm15,%xmm8,%xmm8 + vpxor %xmm1,%xmm4,%xmm4 + vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 + vpxor %xmm2,%xmm5,%xmm5 + vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 + + vmovdqu (%r10),%xmm12 + + vpxor %xmm0,%xmm3,%xmm10 + vpxor %xmm1,%xmm4,%xmm11 + vpxor %xmm2,%xmm5,%xmm5 + + vpxor %xmm10,%xmm5,%xmm5 + vpxor %xmm11,%xmm5,%xmm5 + vpslldq $8,%xmm5,%xmm9 + vpsrldq $8,%xmm5,%xmm5 + vpxor %xmm9,%xmm10,%xmm10 + vpxor %xmm5,%xmm11,%xmm11 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 + vpalignr $8,%xmm10,%xmm10,%xmm10 + vpxor %xmm11,%xmm10,%xmm10 + vpxor %xmm9,%xmm10,%xmm10 + + cmpq $0,%r9 + jne .Lshort_avx + + vpshufb %xmm13,%xmm10,%xmm10 + vmovdqu %xmm10,(%rcx) + vzeroupper + movaps (%rsp),%xmm6 + movaps 16(%rsp),%xmm7 + movaps 32(%rsp),%xmm8 + movaps 48(%rsp),%xmm9 + movaps 64(%rsp),%xmm10 + movaps 80(%rsp),%xmm11 + movaps 96(%rsp),%xmm12 + movaps 112(%rsp),%xmm13 + movaps 128(%rsp),%xmm14 + movaps 144(%rsp),%xmm15 + leaq 168(%rsp),%rsp +.LSEH_end_gcm_ghash_avx: + .byte 0xf3,0xc3 .p2align 6 .Lbswap_mask: @@ -1451,7 +1964,7 @@ se_handler: movq 40(%r9),%rdi movq %r8,%rsi movl $154,%ecx -.long 0xa548f3fc +.long 0xa548f3fc movq %r9,%rsi xorq %rcx,%rcx @@ -1498,31 +2011,38 @@ se_handler: .rva .LSEH_begin_gcm_ghash_clmul .rva .LSEH_end_gcm_ghash_clmul .rva .LSEH_info_gcm_ghash_clmul +.rva .LSEH_begin_gcm_init_avx +.rva .LSEH_end_gcm_init_avx +.rva .LSEH_info_gcm_init_clmul + +.rva .LSEH_begin_gcm_ghash_avx +.rva .LSEH_end_gcm_ghash_avx +.rva .LSEH_info_gcm_ghash_clmul .section .xdata .p2align 3 .LSEH_info_gcm_gmult_4bit: .byte 9,0,0,0 .rva se_handler -.rva .Lgmult_prologue,.Lgmult_epilogue +.rva .Lgmult_prologue,.Lgmult_epilogue .LSEH_info_gcm_ghash_4bit: .byte 9,0,0,0 .rva se_handler -.rva .Lghash_prologue,.Lghash_epilogue +.rva .Lghash_prologue,.Lghash_epilogue .LSEH_info_gcm_init_clmul: .byte 0x01,0x08,0x03,0x00 -.byte 0x08,0x68,0x00,0x00 -.byte 0x04,0x22,0x00,0x00 +.byte 0x08,0x68,0x00,0x00 +.byte 0x04,0x22,0x00,0x00 .LSEH_info_gcm_ghash_clmul: .byte 0x01,0x33,0x16,0x00 -.byte 0x33,0xf8,0x09,0x00 -.byte 0x2e,0xe8,0x08,0x00 -.byte 0x29,0xd8,0x07,0x00 -.byte 0x24,0xc8,0x06,0x00 -.byte 0x1f,0xb8,0x05,0x00 -.byte 0x1a,0xa8,0x04,0x00 -.byte 0x15,0x98,0x03,0x00 -.byte 0x10,0x88,0x02,0x00 -.byte 0x0c,0x78,0x01,0x00 -.byte 0x08,0x68,0x00,0x00 -.byte 0x04,0x01,0x15,0x00 +.byte 0x33,0xf8,0x09,0x00 +.byte 0x2e,0xe8,0x08,0x00 +.byte 0x29,0xd8,0x07,0x00 +.byte 0x24,0xc8,0x06,0x00 +.byte 0x1f,0xb8,0x05,0x00 +.byte 0x1a,0xa8,0x04,0x00 +.byte 0x15,0x98,0x03,0x00 +.byte 0x10,0x88,0x02,0x00 +.byte 0x0c,0x78,0x01,0x00 +.byte 0x08,0x68,0x00,0x00 +.byte 0x04,0x01,0x15,0x00 |