summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2013-03-06 09:33:26 +0100
committerNiels Möller <nisse@lysator.liu.se>2013-03-06 09:33:26 +0100
commit59eb1aa12bbad340877f83f06eca1d47c10231c2 (patch)
treef21578b44c671a37ea36df97010679ed71747f7a
parent933c4ceff51ff57d78ff3a7df73eb052c20b06d7 (diff)
downloadnettle-59eb1aa12bbad340877f83f06eca1d47c10231c2.tar.gz
x86_64 assembly for ecc_224_modp.
-rw-r--r--ChangeLog5
-rw-r--r--x86_64/ecc-224-modp.asm115
2 files changed, 120 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index a8c23743..581d86ca 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2013-03-06 Niels Möller <nisse@lysator.liu.se>
+
+ * x86_64/ecc-224-modp.asm: New file, 5 time speedup over C
+ version.
+
2013-03-05 Niels Möller <nisse@lysator.liu.se>
* configure.ac (asm_optional_list): Added ecc-521-modp.asm.
diff --git a/x86_64/ecc-224-modp.asm b/x86_64/ecc-224-modp.asm
new file mode 100644
index 00000000..b759e1f2
--- /dev/null
+++ b/x86_64/ecc-224-modp.asm
@@ -0,0 +1,115 @@
+C nettle, low-level cryptographics library
+C
+C Copyright (C) 2013 Niels Möller
+C
+C The nettle library is free software; you can redistribute it and/or modify
+C it under the terms of the GNU Lesser General Public License as published by
+C the Free Software Foundation; either version 2.1 of the License, or (at your
+C option) any later version.
+C
+C The nettle library is distributed in the hope that it will be useful, but
+C WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+C or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+C License for more details.
+C
+C You should have received a copy of the GNU Lesser General Public License
+C along with the nettle library; see the file COPYING.LIB. If not, write to
+C the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+C MA 02111-1301, USA.
+
+ .file "ecc-224-modp.asm"
+
+define(<RP>, <%rsi>)
+define(<T0>, <%rdi>) C Overlaps unused ecc input
+define(<T1>, <%rcx>)
+define(<H0>, <%rax>)
+define(<H1>, <%rdx>)
+define(<H2>, <%r8>)
+define(<F0>, <%r9>)
+define(<F1>, <%r10>)
+define(<F2>, <%r11>)
+
+PROLOGUE(nettle_ecc_224_modp)
+ W64_ENTRY(2, 0)
+ mov 48(RP), H0
+ mov 56(RP), H1
+ C Set (F2,F1,F0) <-- (H1,H0) << 32
+ mov H0, F0
+ mov H0, F1
+ shl $32, F0
+ shr $32, F1
+ mov H1, F2
+ mov H1, T0
+ shl $32, T0
+ shr $32, F2
+ or T0, F1
+
+ xor H2, H2
+ mov 16(RP), T0
+ mov 24(RP), T1
+ sub F0, T0
+ sbb F1, T1
+ sbb F2, H0
+ sbb $0, H1 C No further borrow
+
+ adc 32(RP), H0
+ adc 40(RP), H1
+ adc $0, H2
+
+ C Set (F2,F1,F0) <-- (H2,H1,H0) << 32
+ C To free registers, add in T1, T0 as soon as H0, H1 have been copied
+ mov H0, F0
+ mov H0, F1
+ add T0, H0
+ mov H1, F2
+ mov H1, T0
+ adc T1, H1
+ mov H2, T1
+ adc $0, H2
+
+ C Shift 32 bits
+ shl $32, F0
+ shr $32, F1
+ shl $32, T0
+ shr $32, F2
+ shl $32, T1
+ or T0, F1
+ or T1, F2
+
+ mov (RP), T0
+ mov 8(RP), T1
+ sub F0, T0
+ sbb F1, T1
+ sbb F2, H0
+ sbb $0, H1
+ sbb $0, H2
+
+ C We now have H2, H1, H0, T1, T0, with 33 bits left to reduce
+ C Set F0 <-- (H2, H1) >> 32
+ C Set (F2,F1) <-- (H2, H1 & 0xffffffff00000000)
+ C H1 <-- H1 & 0xffffffff
+
+ mov H1, F0
+ mov H1, F1
+ mov H2, F2
+ movl XREG(H1), XREG(H1) C Clears high 32 bits
+ sub H1, F1 C Clears low 32 bits
+ shr $32, F0
+ shl $32, H2
+ or H2, F0
+
+ sub F0, T0
+ sbb $0, F1
+ sbb $0, F2
+ add F1, T1
+ adc F2, H0
+ adc $0, H1
+
+ mov T0, (RP)
+ mov T1, 8(RP)
+ mov H0, 16(RP)
+ mov H1, 24(RP)
+
+ W64_EXIT(2, 0)
+ ret
+EPILOGUE(nettle_ecc_224_modp)