summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2021-07-08 20:37:26 +0000
committerNiels Möller <nisse@lysator.liu.se>2021-07-08 20:37:26 +0000
commit6dfdd0bc63cbcf4d7eca01bfda81e02823a4796e (patch)
treea3f6cfe2a02b394dfa3c0d612ee0c73456bda3d0
parent61bcbbf830c28e8453e22db0bbdac174df442709 (diff)
parent6c84092d4d67b1a552ef287525be34e0474a9de5 (diff)
downloadnettle-6dfdd0bc63cbcf4d7eca01bfda81e02823a4796e.tar.gz
Merge branch 's390x-gcm' into 's390x'
[S390x] Optimize GHASH See merge request nettle/nettle!26
-rw-r--r--Makefile.in2
-rw-r--r--configure.ac2
-rw-r--r--s390x/machine.m421
-rw-r--r--s390x/msa_x4/gcm-hash.asm99
4 files changed, 121 insertions, 3 deletions
diff --git a/Makefile.in b/Makefile.in
index 87d193b6..5e0ffdd7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -620,7 +620,7 @@ distdir: $(DISTFILES)
arm arm/neon arm/v6 arm/fat \
arm64 arm64/crypto arm64/fat \
powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat \
- s390x s390x/msa_x1 s390x/msa_x2 ; do \
+ s390x s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 ; do \
mkdir "$(distdir)/$$d" ; \
find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \
-exec cp '{}' "$(distdir)/$$d" ';' ; \
diff --git a/configure.ac b/configure.ac
index c0650ec2..64242bfd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -531,7 +531,7 @@ if test "x$enable_assembler" = xyes ; then
if test "$ABI" = 64 ; then
asm_path="s390x"
if test "$enable_s390x_msa" = yes ; then
- asm_path="s390x/msa_x1 s390x/msa_x2 $asm_path"
+ asm_path="s390x/msa_x1 s390x/msa_x2 s390x/msa_x4 $asm_path"
fi
fi
;;
diff --git a/s390x/machine.m4 b/s390x/machine.m4
index acd5e26c..8626cec1 100644
--- a/s390x/machine.m4
+++ b/s390x/machine.m4
@@ -1,2 +1,21 @@
C Register usage:
-define(`RA', `%r14')
+define(`RA', `%r14') C Return address
+define(`SP', `%r15') C Stack pointer
+
+define(`STANDARD_STACK_FRAME',`160')
+
+C Dynamic stack space allocation
+C AP is a general register to which the allocated space is assigned
+C SPACE_LEN is the length of space, must be a multiple of 8
+C FREE_STACK macro can be used to free the allocated space
+C ALLOC_STACK(AP, SPACE_LEN)
+define(`ALLOC_STACK',
+`lgr $1,SP
+ aghi SP,-(STANDARD_STACK_FRAME+$2)
+ stg $1,0(SP)
+ la $1,STANDARD_STACK_FRAME (SP)')
+
+C Free allocated stack space
+C FREE_STACK(SPACE_LEN)
+define(`FREE_STACK',
+`aghi SP,STANDARD_STACK_FRAME+$1')
diff --git a/s390x/msa_x4/gcm-hash.asm b/s390x/msa_x4/gcm-hash.asm
new file mode 100644
index 00000000..50d8b7c0
--- /dev/null
+++ b/s390x/msa_x4/gcm-hash.asm
@@ -0,0 +1,99 @@
+C s390x/msa_x4/gcm-hash.asm
+
+ifelse(`
+ Copyright (C) 2020 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) is specefied in
+C "z/Architecture Principles of Operation SA22-7832-12" as follows:
+C A function specified by the function code in general register 0 is performed.
+C General register 1 contains the logical address of the leftmost byte of the parameter block in storage.
+C the second operand is processed as specified by the function code using an initial chaining value in
+C the parameter block, and the result replaces the chaining value.
+
+C This implementation uses KIMD-GHASH function.
+C The parameter block used for the KIMD-GHASH function has the following format:
+C *----------------------------------------------*
+C | Initial Chaining Value (16 bytes) |
+C |----------------------------------------------|
+C | Hash Subkey (16 bytes) |
+C *----------------------------------------------*
+
+C Size of parameter block
+define(`PB_SIZE', `32')
+
+C gcm_set_key() assigns H value in the middle element of the table
+define(`H_idx', `128*16')
+
+.file "gcm-hash.asm"
+
+.text
+
+C void gcm_init_key (union gcm_block *table)
+
+PROLOGUE(_nettle_gcm_init_key)
+ C Except for Hash Subkey (H), KIMD-GHASH does not need any pre-computed values so just return to the caller.
+ br RA
+EPILOGUE(_nettle_gcm_init_key)
+
+C void gcm_hash (const struct gcm_key *key, union gcm_block *x,
+C size_t length, const uint8_t *data)
+
+PROLOGUE(_nettle_gcm_hash)
+ ldgr %f0,%r6 C load non-volatile general register 6 into volatile float-point register 0
+ C --- allocate a stack space for parameter block in addition to 16-byte buffer to handle leftover bytes ---
+ ALLOC_STACK(%r1,PB_SIZE+16) C parameter block (must be general register 1)
+ lgr %r6,%r3
+ mvc 0(16,%r1),0(%r3) C copy x Initial Chaining Value field
+ mvc 16(16,%r1),H_idx (%r2) C copy H to Hash Subkey field
+ lghi %r0,65 C GHASH function code (must be general register 0)
+ lgr %r2,%r5 C location of leftmost byte of data (must not be odd-numbered general register nor be general register 0)
+ C number of bytes (must be general register of data + 1). length must be a multiple of the data block size (16).
+ risbg %r3,%r4,0,187,0 C Insert bit offsets 0-59, bit offset 0 of the fourth operand is set to clear the remaining bits.
+1: .long 0xb93e0002 C kimd %r0,%r2
+ brc 1,1b C safely branch back in case of partial completion
+ C --- handle leftovers ---
+ risbg %r5,%r4,60,191,0 C Insert bit offsets 60-63 and clear the remaining bits.
+ jz 4f
+ lgr %r4,%r2
+ C --- copy the leftovers to allocated stack buffer and pad the remaining bytes with zero ---
+ la %r2,PB_SIZE (%r1)
+ lghi %r3,16
+2: mvcle %r2,%r4,0
+ brc 1,2b
+ aghi %r2,-16
+ aghi %r3,16
+3: .long 0xb93e0002 C kimd %r0,%r2
+ brc 1,3b C safely branch back in case of partial completion
+4:
+ mvc 0(16,%r6),0(%r1) C store x
+ xc 0(PB_SIZE+16,%r1),0(%r1) C wipe parameter block content and leftover bytes of data from stack
+ FREE_STACK(PB_SIZE+16)
+ lgdr %r6,%f0 C restore general register 6
+ br RA
+EPILOGUE(_nettle_gcm_hash)