Merge arm64 fat support into master.

author: Niels Möller <nisse@lysator.liu.se> 2021-03-22 19:08:14 +0100
committer: Niels Möller <nisse@lysator.liu.se> 2021-03-22 19:08:14 +0100
commit: a3e38b1d36d189834deaa626111faa93bee95ca9 (patch)
tree: b0d6e3040fb1c533ec82995cfd0e5a96943a3987
parent: 944881d7c7f321c6e4078f271e7e7be9b32aee07 (diff)
parent: 1585f6acd92508aef2988c362db598c2e35f56dd (diff)
download: nettle-a3e38b1d36d189834deaa626111faa93bee95ca9.tar.gz
6 files changed, 361 insertions, 113 deletions
diff --git a/Makefile.in b/Makefile.in
index 2274d8be..0ace35f7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -616,7 +616,7 @@ distdir: $(DISTFILES)
 	set -e; for d in sparc32 sparc64 x86 \
 		x86_64 x86_64/aesni x86_64/sha_ni x86_64/fat \
 		arm arm/neon arm/v6 arm/fat \
-		arm64 arm64/crypto \
+		arm64 arm64/crypto arm64/fat \
 		powerpc64 powerpc64/p7 powerpc64/p8 powerpc64/fat ; do \
 	  mkdir "$(distdir)/$$d" ; \
 	  find "$(srcdir)/$$d" -maxdepth 1 '(' -name '*.asm' -o -name '*.m4' -o -name README ')' \
diff --git a/arm64/README b/arm64/README
index 139a3cc1..d2745d57 100644
--- a/arm64/README
+++ b/arm64/README
@@ -1,3 +1,42 @@
+General-purpose Registers[1]
+
+There are thirty-one, 64-bit, general-purpose (integer) registers visible to
+the A64 instruction set; these are labeled r0-r30. In a 64-bit context these
+registers are normally referred to using the names x0-x30; in a 32-bit context
+the registers are specified by using w0-w30. Additionally, a stack-pointer
+register, SP, can be used with a restricted number of instructions.
+
+The first eight registers, r0-r7, are used to pass argument values into
+a subroutine and to return result values from a function.
+
+Software developers creating platform-independent code are advised to avoid
+using r18 if at all possible. Most compilers provide a mechanism to prevent
+specific registers from being used for general allocation; portable hand-coded
+assembler should avoid it entirely. It should not be assumed that treating the
+register as callee-saved will be sufficient to satisfy the requirements of the
+platform. Virtualization code must, of course, treat the register as they would
+any other resource provided to the virtual machine.
+
+A subroutine invocation must preserve the contents of the registers r19-r29
+and SP. All 64 bits of each value stored in r19-r29 must be preserved, even
+when using the ILP32 data model.
+
+SIMD and Floating-Point Registers[1]
+
+Unlike in AArch32, in AArch64 the 128-bit and 64-bit views of a SIMD and
+Floating-Point register do not overlap multiple registers in a narrower view,
+so q1, d1 and s1 all refer to the same entry in the register bank.
+
+The first eight registers, v0-v7, are used to pass argument values into
+a subroutine and to return result values from a function. They may also
+be used to hold intermediate values within a routine (but, in general,
+only between subroutine calls).
+
+Registers v8-v15 must be preserved by a callee across subroutine calls;
+the remaining registers (v0-v7, v16-v31) do not need to be preserved
+(or should be preserved by the caller). Additionally, only the bottom 64 bits
+of each value stored in v8-v15 need to be preserved.
+
 Endianness
 
 Similar to arm, aarch64 can run with little-endian or big-endian memory
@@ -8,8 +47,8 @@ When writing SIMD code, endianness interaction with vector loads and stores may
 exhibit seemingly unintuitive behaviour, particularly when mixing normal and
 vector load/store operations.
 
-See https://llvm.org/docs/BigEndianNEON.html for a good overview, particularly
-into the pitfalls of using ldr/str vs. ld1/st1.
+See [2] for a good overview, particularly into the pitfalls of using
+ldr/str vs. ld1/st1.
 
 For example, ld1 {v1.2d,v2.2d},[x0] will load v1 and v2 with elements of a
 one-dimensional vector from consecutive memory locations. So v1.d[0] will be
@@ -43,3 +82,6 @@ quadword, they will apply endianness to the whole quadword. Therefore
 particular care must be taken if the loaded data is then to be regarded as
 elements of e.g. a doubleword vector. Indicies may appear reversed on
 big-endian systems (because they are).
+
+[1] https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aapcs64.pdf
+[2] https://llvm.org/docs/BigEndianNEON.html
diff --git a/arm64/crypto/gcm-hash.asm b/arm64/crypto/gcm-hash.asm
index b77b08d6..3e4c98d8 100644
--- a/arm64/crypto/gcm-hash.asm
+++ b/arm64/crypto/gcm-hash.asm
@@ -1,4 +1,4 @@
-C arm/v8/gcm-hash.asm
+C arm64/crypto/gcm-hash.asm
 
 ifelse(`
    Copyright (C) 2020 Niels Möller and Mamone Tarsha
@@ -38,30 +38,42 @@ ifelse(`
 C gcm_set_key() assigns H value in the middle element of the table
 define(`H_Idx', `128')
 
-C common register usage:
+C common SIMD register usage:
 define(`POLY', `v6')
+C temporary register that assist the reduction procedure
 define(`T', `v7')
+C permenant register that hold the 16-byte result of pmull
 define(`F', `v16')
+C permenant register that hold the 16-byte result of pmull2,
+C its value is accumulated on 'F' register immediately
 define(`F1', `v17')
+C permenant register that hold the 16-byte result of pmull
 define(`R', `v18')
+C permenant register that hold the 16-byte result of pmull2,
+C its value is accumulated on 'F' register immediately
 define(`R1', `v19')
 
 C common macros:
-.macro PMUL in, param1, param2
-    pmull          F.1q,\param2\().1d,\in\().1d
-    pmull2         F1.1q,\param2\().2d,\in\().2d
-    pmull          R.1q,\param1\().1d,\in\().1d
-    pmull2         R1.1q,\param1\().2d,\in\().2d
+C long multiply of six 64-bit polynomials and sum
+C R = (in.l × param2.l) + (in.h × param2.h)
+C F = (in.l × param3.l) + (in.h × param3.h)
+C PMUL(in, param1, param2)
+define(`PMUL', m4_assert_numargs(3)`
+    pmull          F.1q,$3.1d,$1.1d
+    pmull2         F1.1q,$3.2d,$1.2d
+    pmull          R.1q,$2.1d,$1.1d
+    pmull2         R1.1q,$2.2d,$1.2d
     eor            F.16b,F.16b,F1.16b
     eor            R.16b,R.16b,R1.16b
-.endm
-
-.macro REDUCTION out
+')
+C Reduce 'R' and 'F' values to 128-bit output
+C REDUCTION(out)
+define(`REDUCTION', m4_assert_numargs(1)`
     pmull          T.1q,F.1d,POLY.1d
     eor            R.16b,R.16b,T.16b
     ext            R.16b,R.16b,R.16b,#8
-    eor            \out\().16b,F.16b,R.16b
-.endm
+    eor            $1.16b,F.16b,R.16b
+')
 
     C void gcm_init_key (union gcm_block *table)
 
@@ -101,13 +113,14 @@ define(`H3L', `v28')
 define(`H4M', `v29')
 define(`H4L', `v30')
 
-.macro PMUL_PARAM in, param1, param2
-    pmull2         Hp.1q,\in\().2d,POLY.2d
-    eor            Hm.16b,\in\().16b,Hp.16b
-    ext            \param1\().16b,Hm.16b,\in\().16b,#8
-    ext            \param2\().16b,\in\().16b,Hm.16b,#8
-    ext            \param1\().16b,\param1\().16b,\param1\().16b,#8
-.endm
+C PMUL_PARAM(in, param1, param2)
+define(`PMUL_PARAM', m4_assert_numargs(3)`
+    pmull2         Hp.1q,$1.2d,POLY.2d
+    eor            Hm.16b,$1.16b,Hp.16b
+    ext            $2.16b,Hm.16b,$1.16b,#8
+    ext            $3.16b,$1.16b,Hm.16b,#8
+    ext            $2.16b,$2.16b,$2.16b,#8
+')
 
 PROLOGUE(_nettle_gcm_init_key)
     add            x1,TABLE,#16*H_Idx
@@ -120,6 +133,8 @@ PROLOGUE(_nettle_gcm_init_key)
 IF_LE(`
     rev64          H.16b,H.16b
 ')
+    C --- calculate H = H × x mod R(X); R(X) = (x¹²⁸+x¹²⁷+x¹²⁶+x¹²¹+1) ---
+
     dup            EMSB.16b,H.b[7]
     mov            x1,#0xC200000000000000
     mov            x2,#1
@@ -136,36 +151,36 @@ IF_LE(`
 
     dup            POLY.2d,POLY.d[0]
 
-    C --- calculate H^2 = H*H ---
+    C --- calculate H^2 = H × H ---
 
-    PMUL_PARAM H,H1M,H1L
+    PMUL_PARAM(H,H1M,H1L)
 
-    PMUL H,H1M,H1L
+    PMUL(H,H1M,H1L)
 
-    REDUCTION H2
+    REDUCTION(H2)
 
-    PMUL_PARAM H2,H2M,H2L
+    PMUL_PARAM(H2,H2M,H2L)
 
     C we store to the table as doubleword-vectors in current memory endianness
     C because it's our own strictly internal data structure and what gcm_hash
     C can most naturally use
     st1            {H1M.2d,H1L.2d,H2M.2d,H2L.2d},[TABLE],#64
 
-    C --- calculate H^3 = H^1*H^2 ---
+    C --- calculate H^3 = H^1 × H^2 ---
 
-    PMUL H2,H1M,H1L
+    PMUL(H2,H1M,H1L)
 
-    REDUCTION H3
+    REDUCTION(H3)
 
-    PMUL_PARAM H3,H3M,H3L
+    PMUL_PARAM(H3,H3M,H3L)
 
-    C --- calculate H^4 = H^2*H^2 ---
+    C --- calculate H^4 = H^2 × H^2 ---
 
-    PMUL H2,H2M,H2L
+    PMUL(H2,H2M,H2L)
 
-    REDUCTION H4
+    REDUCTION(H4)
 
-    PMUL_PARAM H4,H4M,H4L
+    PMUL_PARAM(H4,H4M,H4L)
 
     st1            {H3M.2d,H3L.2d,H4M.2d,H4L.2d},[TABLE]
 
@@ -180,7 +195,6 @@ define(`DATA', `x3')
 
 define(`D', `v0')
 define(`C0', `v1')
-define(`C0D', `d1')
 define(`C1', `v2')
 define(`C2', `v3')
 define(`C3', `v4')
@@ -197,16 +211,52 @@ define(`H3L', `v29')
 define(`H4M', `v30')
 define(`H4L', `v31')
 
-.macro PMUL_SUM in, param1, param2
-    pmull          F2.1q,\param2\().1d,\in\().1d
-    pmull2         F3.1q,\param2\().2d,\in\().2d
-    pmull          R2.1q,\param1\().1d,\in\().1d
-    pmull2         R3.1q,\param1\().2d,\in\().2d
+C PMUL_SUM(in, param1, param2)
+define(`PMUL_SUM', m4_assert_numargs(3)`
+    pmull          F2.1q,$3.1d,$1.1d
+    pmull2         F3.1q,$3.2d,$1.2d
+    pmull          R2.1q,$2.1d,$1.1d
+    pmull2         R3.1q,$2.2d,$1.2d
     eor            F2.16b,F2.16b,F3.16b
     eor            R2.16b,R2.16b,R3.16b
     eor            F.16b,F.16b,F2.16b
     eor            R.16b,R.16b,R2.16b
-.endm
+')
+
+C Load the final partial block into SIMD register,
+C stored in little-endian order for each 64-bit part
+C LOAD_REV_PARTIAL_BLOCK(out)
+define(`LOAD_REV_PARTIAL_BLOCK', m4_assert_numargs(1)`
+    tbz            LENGTH,3,Lless_8_bytes
+    ldr            `d'substr($1,1,len($1)),[DATA],#8
+IF_LE(`
+    rev64          $1.16b,$1.16b
+')
+    mov            x7,#0
+    mov            $1.d[1],x7
+    tst            LENGTH,#7
+    b.eq           Lload_done
+Lless_8_bytes:
+    mov            x6,#0
+    mov            x5,#64
+    and            x4,LENGTH,#7
+Lload_byte_loop:
+    mov            x7,#0
+    ldrb           w7,[DATA],#1
+    sub            x5,x5,#8
+    lsl            x7,x7,x5
+    orr            x6,x6,x7
+    subs           x4,x4,#1
+    b.ne           Lload_byte_loop
+    tbz            LENGTH,3,Lstore_hi_dw
+    mov            $1.d[1],x6
+    b              Lload_done
+Lstore_hi_dw:
+    mov            x7,#0
+    mov            $1.d[0],x6
+    mov            $1.d[1],x7
+Lload_done:
+')
 
     C void gcm_hash (const struct gcm_key *key, union gcm_block *x,
     C                size_t length, const uint8_t *data)
@@ -221,13 +271,13 @@ IF_LE(`
 ')
 
     ands           x4,LENGTH,#-64
-    b.eq           L2x
+    b.eq           L1_block
 
     add            x5,TABLE,#64
     ld1            {H1M.2d,H1L.2d,H2M.2d,H2L.2d},[TABLE]
     ld1            {H3M.2d,H3L.2d,H4M.2d,H4L.2d},[x5]
 
-L4x_loop:
+L4_blocks_loop:
     ld1            {C0.2d,C1.2d,C2.2d,C3.2d},[DATA],#64
 IF_LE(`
     rev64          C0.16b,C0.16b
@@ -238,45 +288,25 @@ IF_LE(`
 
     eor            C0.16b,C0.16b,D.16b
 
-    PMUL C1,H3M,H3L
-    PMUL_SUM C2,H2M,H2L
-    PMUL_SUM C3,H1M,H1L
-    PMUL_SUM C0,H4M,H4L
+    PMUL(C1,H3M,H3L)
+    PMUL_SUM(C2,H2M,H2L)
+    PMUL_SUM(C3,H1M,H1L)
+    PMUL_SUM(C0,H4M,H4L)
 
-    REDUCTION D
+    REDUCTION(D)
 
     subs           x4,x4,#64
-    b.ne           L4x_loop
+    b.ne           L4_blocks_loop
 
     and            LENGTH,LENGTH,#63
 
-L2x:
-    tst            LENGTH,#-32
-    b.eq           L1x
-
-    ld1            {H1M.2d,H1L.2d,H2M.2d,H2L.2d},[TABLE]
-
-    ld1            {C0.2d,C1.2d},[DATA],#32
-IF_LE(`
-    rev64          C0.16b,C0.16b
-    rev64          C1.16b,C1.16b
-')
-
-    eor            C0.16b,C0.16b,D.16b
-
-    PMUL C1,H1M,H1L
-    PMUL_SUM C0,H2M,H2L
-
-    REDUCTION D
-
-    and            LENGTH,LENGTH,#31
-
-L1x:
-    tst            LENGTH,#-16
-    b.eq           Lmod
+L1_block:
+    ands           x4,LENGTH,#-16
+    b.eq           Lpartial
 
     ld1            {H1M.2d,H1L.2d},[TABLE]
 
+L1_block_loop:
     ld1            {C0.2d},[DATA],#16
 IF_LE(`
     rev64          C0.16b,C0.16b
@@ -284,52 +314,28 @@ IF_LE(`
 
     eor            C0.16b,C0.16b,D.16b
 
-    PMUL C0,H1M,H1L
+    PMUL(C0,H1M,H1L)
+
+    REDUCTION(D)
 
-    REDUCTION D
+    subs           x4,x4,#16
+    b.ne           L1_block_loop
 
-Lmod:
+Lpartial:
     tst            LENGTH,#15
-    b.eq           Ldone
+    b.eq           Lghash_done
 
     ld1            {H1M.2d,H1L.2d},[TABLE]
 
-    tbz            LENGTH,3,Lmod_8
-    ldr            C0D,[DATA],#8
-IF_LE(`
-    rev64          C0.16b,C0.16b
-')
-    mov            x7,#0
-    mov            C0.d[1],x7
-Lmod_8:
-    tst            LENGTH,#7
-    b.eq           Lmod_8_done
-    mov            x6,#0
-    mov            x5,#64
-    and            x4,LENGTH,#7
-Lmod_8_loop:
-    mov            x7,#0
-    ldrb           w7,[DATA],#1
-    sub            x5,x5,#8
-    lsl            x7,x7,x5
-    orr            x6,x6,x7
-    subs           x4,x4,#1
-    b.ne           Lmod_8_loop
-    tbz            LENGTH,3,Lmod_8_load
-    mov            C0.d[1],x6
-    b              Lmod_8_done
-Lmod_8_load:
-    mov            x7,#0
-    mov            C0.d[0],x6
-    mov            C0.d[1],x7
-Lmod_8_done:
+    LOAD_REV_PARTIAL_BLOCK(C0)
+
     eor            C0.16b,C0.16b,D.16b
 
-    PMUL C0,H1M,H1L
+    PMUL(C0,H1M,H1L)
 
-    REDUCTION D
+    REDUCTION(D)
 
-Ldone:
+Lghash_done:
 IF_LE(`
     rev64          D.16b,D.16b
 ')
diff --git a/arm64/fat/gcm-hash.asm b/arm64/fat/gcm-hash.asm
new file mode 100644
index 00000000..5ef171b5
--- /dev/null
+++ b/arm64/fat/gcm-hash.asm
@@ -0,0 +1,38 @@
+C arm64/fat/gcm-hash.asm
+
+ifelse(`
+   Copyright (C) 2021 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+')
+
+dnl picked up by configure
+dnl PROLOGUE(_nettle_fat_gcm_init_key)
+dnl PROLOGUE(_nettle_fat_gcm_hash)
+
+define(`fat_transform', `$1_arm64')
+include_src(`arm64/crypto/gcm-hash.asm')
diff --git a/configure.ac b/configure.ac
index 6080a06a..026ae99d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -476,9 +476,15 @@ if test "x$enable_assembler" = xyes ; then
       ;;
     aarch64*)
       if test "$ABI" = 64 ; then
-	asm_path=arm64
-	if test "$enable_arm64_crypto" = yes ; then
-          asm_path="arm64/crypto $asm_path"
+        asm_path=arm64
+        if test "x$enable_fat" = xyes ; then
+          asm_path="arm64/fat $asm_path"
+          OPT_NETTLE_SOURCES="fat-arm64.c $OPT_NETTLE_SOURCES"
+          FAT_TEST_LIST="none pmull"
+        else
+          if test "$enable_arm64_crypto" = yes ; then
+            asm_path="arm64/crypto $asm_path"
+          fi
         fi
       else
 	# As far as I understand, Neon instructions are unlikely to be
diff --git a/fat-arm64.c b/fat-arm64.c
new file mode 100644
index 00000000..9f81951f
--- /dev/null
+++ b/fat-arm64.c
@@ -0,0 +1,156 @@
+/* fat-arm64.c
+
+   Copyright (C) 2021 Mamone Tarsha
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+*/
+
+#define _GNU_SOURCE
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__linux__) && defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+# if __GLIBC_PREREQ(2, 16)
+#  define USE_GETAUXVAL 1
+#  include <asm/hwcap.h>
+#  include <sys/auxv.h>
+# endif
+#endif
+
+#include "nettle-types.h"
+
+#include "gcm.h"
+#include "gcm-internal.h"
+#include "fat-setup.h"
+
+/* Defines from arch/arm64/include/uapi/asm/hwcap.h in Linux kernel */
+#ifndef HWCAP_ASIMD
+#define HWCAP_ASIMD (1 << 1)
+#endif
+#ifndef HWCAP_PMULL
+#define HWCAP_PMULL (1 << 4)
+#endif
+
+struct arm64_features
+{
+  int have_pmull;
+};
+
+#define MATCH(s, slen, literal, llen) \
+  ((slen) == (llen) && memcmp ((s), (literal), llen) == 0)
+
+static void
+get_arm64_features (struct arm64_features *features)
+{
+  const char *s;
+  features->have_pmull = 0;
+
+  s = secure_getenv (ENV_OVERRIDE);
+  if (s)
+    for (;;)
+      {
+	const char *sep = strchr (s, ',');
+	size_t length = sep ? (size_t) (sep - s) : strlen(s);
+
+	if (MATCH (s, length, "pmull", 5))
+	  features->have_pmull = 1;
+	if (!sep)
+	  break;
+	s = sep + 1;
+      }
+  else
+    {
+#if USE_GETAUXVAL
+      unsigned long hwcap = getauxval(AT_HWCAP);
+      features->have_pmull
+	= ((hwcap & (HWCAP_ASIMD | HWCAP_PMULL)) == (HWCAP_ASIMD | HWCAP_PMULL));
+#endif
+    }
+}
+
+#if GCM_TABLE_BITS == 8
+DECLARE_FAT_FUNC(_nettle_gcm_init_key, gcm_init_key_func)
+DECLARE_FAT_FUNC_VAR(gcm_init_key, gcm_init_key_func, c)
+DECLARE_FAT_FUNC_VAR(gcm_init_key, gcm_init_key_func, arm64)
+
+DECLARE_FAT_FUNC(_nettle_gcm_hash, gcm_hash_func)
+DECLARE_FAT_FUNC_VAR(gcm_hash, gcm_hash_func, c)
+DECLARE_FAT_FUNC_VAR(gcm_hash, gcm_hash_func, arm64)
+#endif /* GCM_TABLE_BITS == 8 */
+
+static void CONSTRUCTOR
+fat_init (void)
+{
+  struct arm64_features features;
+  int verbose;
+
+  get_arm64_features (&features);
+
+  verbose = getenv (ENV_VERBOSE) != NULL;
+  if (verbose)
+    fprintf (stderr, "libnettle: cpu features: %s\n",
+	     features.have_pmull ? "polynomial multiply long instructions (PMULL/PMULL2)" : "");
+
+  if (features.have_pmull)
+    {
+      if (verbose)
+	fprintf (stderr, "libnettle: enabling hardware-accelerated polynomial multiply code.\n");
+#if GCM_TABLE_BITS == 8
+      /* Make sure _nettle_gcm_init_key_vec function is compatible
+         with _nettle_gcm_hash_vec function e.g. _nettle_gcm_init_key_c()
+         fills gcm_key table with values that are incompatible with
+         _nettle_gcm_hash_arm64() */
+      _nettle_gcm_init_key_vec = _nettle_gcm_init_key_arm64;
+      _nettle_gcm_hash_vec = _nettle_gcm_hash_arm64;
+#endif /* GCM_TABLE_BITS == 8 */
+    }
+  else
+    {
+#if GCM_TABLE_BITS == 8
+      _nettle_gcm_init_key_vec = _nettle_gcm_init_key_c;
+      _nettle_gcm_hash_vec = _nettle_gcm_hash_c;
+#endif /* GCM_TABLE_BITS == 8 */
+    }
+}
+
+#if GCM_TABLE_BITS == 8
+DEFINE_FAT_FUNC(_nettle_gcm_init_key, void,
+		(union nettle_block16 *table),
+		(table))
+
+DEFINE_FAT_FUNC(_nettle_gcm_hash, void,
+		(const struct gcm_key *key, union nettle_block16 *x,
+		 size_t length, const uint8_t *data),
+		(key, x, length, data))
+#endif /* GCM_TABLE_BITS == 8 */
author	Niels Möller <nisse@lysator.liu.se>	2021-03-22 19:08:14 +0100
committer	Niels Möller <nisse@lysator.liu.se>	2021-03-22 19:08:14 +0100
commit	a3e38b1d36d189834deaa626111faa93bee95ca9 (patch)
tree	b0d6e3040fb1c533ec82995cfd0e5a96943a3987
parent	944881d7c7f321c6e4078f271e7e7be9b32aee07 (diff)
parent	1585f6acd92508aef2988c362db598c2e35f56dd (diff)
download	nettle-a3e38b1d36d189834deaa626111faa93bee95ca9.tar.gz