summaryrefslogtreecommitdiff
path: root/lib/accelerated/x86/macosx/ghash-x86_64.s
diff options
context:
space:
mode:
Diffstat (limited to 'lib/accelerated/x86/macosx/ghash-x86_64.s')
-rw-r--r--lib/accelerated/x86/macosx/ghash-x86_64.s68
1 files changed, 57 insertions, 11 deletions
diff --git a/lib/accelerated/x86/macosx/ghash-x86_64.s b/lib/accelerated/x86/macosx/ghash-x86_64.s
index 8fe772fd35..5fd3216755 100644
--- a/lib/accelerated/x86/macosx/ghash-x86_64.s
+++ b/lib/accelerated/x86/macosx/ghash-x86_64.s
@@ -1,4 +1,4 @@
-# Copyright (c) 2011-2013, Andy Polyakov <appro@openssl.org>
+# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -44,9 +44,21 @@
.p2align 4
_gcm_gmult_4bit:
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
+ pushq %r13
+
+ pushq %r14
+
+ pushq %r15
+
+ subq $280,%rsp
+
L$gmult_prologue:
movzbq 15(%rdi),%r8
@@ -123,22 +135,35 @@ L$break1:
movq %r8,8(%rdi)
movq %r9,(%rdi)
- movq 16(%rsp),%rbx
- leaq 24(%rsp),%rsp
+ leaq 280+48(%rsp),%rsi
+
+ movq -8(%rsi),%rbx
+
+ leaq (%rsi),%rsp
+
L$gmult_epilogue:
.byte 0xf3,0xc3
+
.globl _gcm_ghash_4bit
.p2align 4
_gcm_ghash_4bit:
+
pushq %rbx
+
pushq %rbp
+
pushq %r12
+
pushq %r13
+
pushq %r14
+
pushq %r15
+
subq $280,%rsp
+
L$ghash_prologue:
movq %rdx,%r14
movq %rcx,%r15
@@ -683,21 +708,31 @@ L$outer_loop:
movq %r8,8(%rdi)
movq %r9,(%rdi)
- leaq 280(%rsp),%rsi
- movq 0(%rsi),%r15
- movq 8(%rsi),%r14
- movq 16(%rsi),%r13
- movq 24(%rsi),%r12
- movq 32(%rsi),%rbp
- movq 40(%rsi),%rbx
- leaq 48(%rsi),%rsp
+ leaq 280+48(%rsp),%rsi
+
+ movq -48(%rsi),%r15
+
+ movq -40(%rsi),%r14
+
+ movq -32(%rsi),%r13
+
+ movq -24(%rsi),%r12
+
+ movq -16(%rsi),%rbp
+
+ movq -8(%rsi),%rbx
+
+ leaq 0(%rsi),%rsp
+
L$ghash_epilogue:
.byte 0xf3,0xc3
+
.globl _gcm_init_clmul
.p2align 4
_gcm_init_clmul:
+
L$_init_clmul:
movdqu (%rsi),%xmm2
pshufd $78,%xmm2,%xmm2
@@ -850,10 +885,12 @@ L$_init_clmul:
movdqu %xmm4,80(%rdi)
.byte 0xf3,0xc3
+
.globl _gcm_gmult_clmul
.p2align 4
_gcm_gmult_clmul:
+
L$_gmult_clmul:
movdqu (%rdi),%xmm0
movdqa L$bswap_mask(%rip),%xmm5
@@ -901,10 +938,12 @@ L$_gmult_clmul:
movdqu %xmm0,(%rdi)
.byte 0xf3,0xc3
+
.globl _gcm_ghash_clmul
.p2align 5
_gcm_ghash_clmul:
+
L$_ghash_clmul:
movdqa L$bswap_mask(%rip),%xmm10
@@ -1284,10 +1323,12 @@ L$done:
movdqu %xmm0,(%rdi)
.byte 0xf3,0xc3
+
.globl _gcm_init_avx
.p2align 5
_gcm_init_avx:
+
vzeroupper
vmovdqu (%rsi),%xmm2
@@ -1391,16 +1432,20 @@ L$init_start_avx:
vzeroupper
.byte 0xf3,0xc3
+
.globl _gcm_gmult_avx
.p2align 5
_gcm_gmult_avx:
+
jmp L$_gmult_clmul
+
.globl _gcm_ghash_avx
.p2align 5
_gcm_ghash_avx:
+
vzeroupper
vmovdqu (%rdi),%xmm10
@@ -1773,6 +1818,7 @@ L$tail_no_xor_avx:
vzeroupper
.byte 0xf3,0xc3
+
.p2align 6
L$bswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0