summaryrefslogtreecommitdiff
path: root/x86_64
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2014-06-01 22:46:34 +0200
committerNiels Möller <nisse@lysator.liu.se>2014-06-01 22:46:34 +0200
commitedf2b37fa461dc1b5919ce53f1c56cd0a449425e (patch)
tree5600f12f3e55d0ad8e203c634bc9ca7280fd1261 /x86_64
parent1851417eb69ca20c87167301aa20bf828fbdd316 (diff)
downloadnettle-edf2b37fa461dc1b5919ce53f1c56cd0a449425e.tar.gz
Fixes for w64 ABI.
Diffstat (limited to 'x86_64')
-rw-r--r--x86_64/README13
-rw-r--r--x86_64/camellia-crypt-internal.asm4
-rw-r--r--x86_64/gcm-hash8.asm2
-rw-r--r--x86_64/machine.m469
4 files changed, 48 insertions, 40 deletions
diff --git a/x86_64/README b/x86_64/README
index ae693be5..d04e5dfc 100644
--- a/x86_64/README
+++ b/x86_64/README
@@ -4,6 +4,8 @@ Up to 6 integer and pointer arguments are passed in registers. Nine
registers, %rax, %rcx, %rdx, %rsi, %rdi and %r8-%r11 can be used
freely. Integers and pointers are returned in %rax.
+At entry, it is required that %rsp == 8 (mod 16).
+
Registers May be Argument
clobbered number
@@ -51,14 +53,19 @@ Additional arguments are passed on the stack. "backing store" on the
stack for the four register arguments is also required. %xmm6 to
%xmm15 are callee-saved. The "long" type is just 32 bits.
-If we have five arguments, and push the additional callee-save
+If we have six arguments, and push the additional callee-save
registers %rdi and %rsi on the stack, we get a stack frame like
+64(%rsp): Sixth argument
56(%rsp): Fifth argument
48(%rsp): Space for fourth argument
40(%rsp): Space for third argument
32(%rsp): Space for second argument
24(%rsp): Space for first argument
16(%rsp): Return address
-8(%rsp): Saved %rsi
-(%rsp) : Saved %rdi
+8(%rsp) : Saved %rdi
+(%rsp): Saved %rsi
+
+If, in addition, we use more than 6 %xmm registers, we push them
+*after* %rdi (but before %rsi), so that they are stored at 16-byte
+aligned addresses.
diff --git a/x86_64/camellia-crypt-internal.asm b/x86_64/camellia-crypt-internal.asm
index c6032a5d..040e030f 100644
--- a/x86_64/camellia-crypt-internal.asm
+++ b/x86_64/camellia-crypt-internal.asm
@@ -138,7 +138,7 @@ C xorl XREG(TMP), XREG($1)
ALIGN(16)
PROLOGUE(_nettle_camellia_crypt)
- W64_ENTRY(5, 0)
+ W64_ENTRY(6, 0)
test LENGTH, LENGTH
jz .Lend
@@ -197,6 +197,6 @@ PROLOGUE(_nettle_camellia_crypt)
pop %rbp
pop %rbx
.Lend:
- W64_EXIT(5, 0)
+ W64_EXIT(6, 0)
ret
EPILOGUE(_nettle_camellia_crypt)
diff --git a/x86_64/gcm-hash8.asm b/x86_64/gcm-hash8.asm
index f74f2f4b..6dec3b8c 100644
--- a/x86_64/gcm-hash8.asm
+++ b/x86_64/gcm-hash8.asm
@@ -162,7 +162,7 @@ ALIGN(16)
pop %r12
pop %rbp
pop %rbx
- W64_EXIT(2, 0)
+ W64_EXIT(4, 0)
ret
.Lpartial:
diff --git a/x86_64/machine.m4 b/x86_64/machine.m4
index b9556a27..397e9b25 100644
--- a/x86_64/machine.m4
+++ b/x86_64/machine.m4
@@ -67,44 +67,48 @@ define(<XREG>,<ifelse(
dnl W64_ENTRY(nargs, xmm_used)
define(<W64_ENTRY>, <
changequote([,])dnl
- ifelse(<<<<<<<<<<<<<<<< ignored; only for balancing)
+ ifelse(<<<<<<<<<<<<<<<<<< ignored; only for balancing)
ifelse(W64_ABI,yes,[
+ dnl unconditionally push %rdi, making %rsp 16-byte aligned
+ push %rdi
+ dnl Save %xmm6, ..., if needed
ifelse(eval($2 > 6), 1, [
- sub [$]eval(8 + 16*($2 - 6)), %rsp
- movdqu %xmm6, 0(%rsp)
+ sub [$]eval(16*($2 - 6)), %rsp
+ movdqa %xmm6, 0(%rsp)
])
ifelse(eval($2 > 7), 1, [
- movdqu %xmm7, 16(%rsp)
+ movdqa %xmm7, 16(%rsp)
])
ifelse(eval($2 > 8), 1, [
- movdqu %xmm8, 32(%rsp)
+ movdqa %xmm8, 32(%rsp)
])
ifelse(eval($2 > 9), 1, [
- movdqu %xmm9, 48(%rsp)
+ movdqa %xmm9, 48(%rsp)
])
ifelse(eval($2 > 10), 1, [
- movdqu %xmm10, 64(%rsp)
+ movdqa %xmm10, 64(%rsp)
])
ifelse(eval($2 > 11), 1, [
- movdqu %xmm11, 80(%rsp)
+ movdqa %xmm11, 80(%rsp)
])
ifelse(eval($2 > 12), 1, [
- movdqu %xmm12, 96(%rsp)
+ movdqa %xmm12, 96(%rsp)
])
ifelse(eval($2 > 13), 1, [
- movdqu %xmm13, 112(%rsp)
+ movdqa %xmm13, 112(%rsp)
])
ifelse(eval($2 > 14), 1, [
- movdqu %xmm14, 128(%rsp)
+ movdqa %xmm14, 128(%rsp)
])
ifelse(eval($2 > 15), 1, [
- movdqu %xmm15, 144(%rsp)
+ movdqa %xmm15, 144(%rsp)
])
+ dnl Move around arguments
ifelse(eval($1 >= 1), 1, [
- push %rdi
mov %rcx, %rdi
])
ifelse(eval($1 >= 2), 1, [
+ dnl NOTE: Breaks 16-byte %rsp alignment
push %rsi
mov %rdx, %rsi
])
@@ -115,11 +119,10 @@ define(<W64_ENTRY>, <
mov %r9, %rcx
])
ifelse(eval($1 >= 5), 1, [
- ifelse(eval($2 > 6), 1, [
- mov eval(8 + 16*($2 - 6) + 56)(%rsp), %r8
- ], [
- mov 56(%rsp), %r8
- ])
+ mov ifelse(eval($2 > 6), 1, eval(16*($2-6)+56),56)(%rsp), %r8
+ ])
+ ifelse(eval($1 >= 6), 1, [
+ mov ifelse(eval($2 > 6), 1, eval(16*($2-6)+64),64)(%rsp), %r9
])
])
changequote(<,>)dnl
@@ -128,45 +131,43 @@ define(<W64_ENTRY>, <
dnl W64_EXIT(nargs, xmm_used)
define(<W64_EXIT>, <
changequote([,])dnl
- ifelse(<<<<<<<<<<<< ignored; only for balancing)
+ ifelse(<<<<<<<<<<< ignored; only for balancing)
ifelse(W64_ABI,yes,[
ifelse(eval($1 >= 2), 1, [
pop %rsi
- ])
- ifelse(eval($1 >= 1), 1, [
- pop %rdi
- ])
+ ])
ifelse(eval($2 > 15), 1, [
- movdqu 144(%rsp), %xmm15
+ movdqa 144(%rsp), %xmm15
])
ifelse(eval($2 > 14), 1, [
- movdqu 128(%rsp), %xmm14
+ movdqa 128(%rsp), %xmm14
])
ifelse(eval($2 > 13), 1, [
- movdqu 112(%rsp), %xmm13
+ movdqa 112(%rsp), %xmm13
])
ifelse(eval($2 > 12), 1, [
- movdqu 96(%rsp), %xmm12
+ movdqa 96(%rsp), %xmm12
])
ifelse(eval($2 > 11), 1, [
- movdqu 80(%rsp), %xmm11
+ movdqa 80(%rsp), %xmm11
])
ifelse(eval($2 > 10), 1, [
- movdqu 64(%rsp), %xmm10
+ movdqa 64(%rsp), %xmm10
])
ifelse(eval($2 > 9), 1, [
- movdqu 48(%rsp), %xmm9
+ movdqa 48(%rsp), %xmm9
])
ifelse(eval($2 > 8), 1, [
- movdqu 32(%rsp), %xmm8
+ movdqa 32(%rsp), %xmm8
])
ifelse(eval($2 > 7), 1, [
- movdqu 16(%rsp), %xmm7
+ movdqa 16(%rsp), %xmm7
])
ifelse(eval($2 > 6), 1, [
- movdqu 0(%rsp), %xmm6
- add [$]eval(8 + 16*($2 - 6)), %rsp
+ movdqa (%rsp), %xmm6
+ add [$]eval(16*($2 - 6)), %rsp
])
+ pop %rdi
])
changequote(<,>)dnl
>)