summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.yml4
-rw-r--r--AUTHORS6
-rw-r--r--ChangeLog238
-rw-r--r--Makefile.in32
-rw-r--r--aclocal.m44
-rw-r--r--arm/fat/sha256-compress-n-2.asm (renamed from arm/fat/sha256-compress-2.asm)6
-rw-r--r--arm/v6/sha256-compress-n.asm (renamed from arm/v6/sha256-compress.asm)110
-rw-r--r--arm64/crypto/sha256-compress-n.asm (renamed from arm64/crypto/sha256-compress.asm)31
-rw-r--r--arm64/fat/sha256-compress-n-2.asm (renamed from arm64/fat/sha256-compress-2.asm)6
-rw-r--r--balloon-sha1.c55
-rw-r--r--balloon-sha256.c55
-rw-r--r--balloon-sha384.c55
-rw-r--r--balloon-sha512.c55
-rw-r--r--balloon.c149
-rw-r--r--balloon.h98
-rw-r--r--block-internal.h12
-rw-r--r--blowfish-bcrypt.c25
-rw-r--r--bswap-internal.h77
-rw-r--r--chacha-poly1305.c3
-rw-r--r--config.make.in1
-rw-r--r--configure.ac16
-rw-r--r--ecc-curve25519.c2
-rw-r--r--ecc-curve448.c2
-rw-r--r--ecc-ecdsa-sign.c10
-rw-r--r--ecc-ecdsa-verify.c36
-rw-r--r--ecc-gost-gc256b.c2
-rw-r--r--ecc-gost-gc512a.c2
-rw-r--r--ecc-gostdsa-sign.c7
-rw-r--r--ecc-gostdsa-verify.c19
-rw-r--r--ecc-internal.h25
-rw-r--r--ecc-mod-arith.c15
-rw-r--r--ecc-mul-a-eh.c4
-rw-r--r--ecc-mul-a.c4
-rw-r--r--ecc-mul-g-eh.c8
-rw-r--r--ecc-mul-g.c8
-rw-r--r--ecc-nonsec-add-jjj.c162
-rw-r--r--ecc-secp192r1.c4
-rw-r--r--ecc-secp224r1.c2
-rw-r--r--ecc-secp256r1.c2
-rw-r--r--ecc-secp384r1.c2
-rw-r--r--ecc-secp521r1.c2
-rw-r--r--eccdata.c187
-rw-r--r--examples/ecc-benchmark.c21
-rw-r--r--examples/nettle-benchmark.c2
-rw-r--r--fat-arm.c17
-rw-r--r--fat-arm64.c17
-rw-r--r--fat-ppc.c13
-rw-r--r--fat-s390x.c17
-rw-r--r--fat-setup.h6
-rw-r--r--fat-x86_64.c17
-rw-r--r--gcm-sm4-meta.c (renamed from sec-tabselect.c)46
-rw-r--r--gcm-sm4.c81
-rw-r--r--gcm.c24
-rw-r--r--gcm.h25
-rw-r--r--ghash-internal.h13
-rw-r--r--gmp-glue.c20
-rw-r--r--gmp-glue.h4
-rw-r--r--md-internal.h70
-rw-r--r--nettle-internal.h5
-rw-r--r--nettle-meta-aeads.c1
-rw-r--r--nettle-meta-ciphers.c1
-rw-r--r--nettle-meta.h3
-rw-r--r--nettle.texinfo273
-rw-r--r--nist-keywrap.c23
-rw-r--r--poly1305-aes.c5
-rw-r--r--poly1305-internal.h10
-rw-r--r--poly1305-update.c78
-rw-r--r--powerpc64/fat/poly1305-blocks.asm38
-rw-r--r--powerpc64/machine.m412
-rw-r--r--powerpc64/p7/chacha-2core.asm47
-rw-r--r--powerpc64/p9/poly1305-blocks.asm434
-rw-r--r--powerpc64/p9/poly1305-internal.asm94
-rw-r--r--powerpc64/p9/poly1305.m4102
-rw-r--r--s390x/fat/sha256-compress-n-2.asm (renamed from s390x/fat/sha256-compress-2.asm)6
-rw-r--r--s390x/msa_x1/sha256-compress-n.asm (renamed from s390x/msa_x1/sha256-compress.asm)24
-rw-r--r--sha2-internal.h5
-rw-r--r--sha256-compress-n.c (renamed from sha256-compress.c)132
-rw-r--r--sha256.c30
-rw-r--r--siv-gcm-aes128.c65
-rw-r--r--siv-gcm-aes256.c65
-rw-r--r--siv-gcm.c229
-rw-r--r--siv-gcm.h107
-rw-r--r--siv-ghash-set-key.c52
-rw-r--r--siv-ghash-update.c65
-rw-r--r--sm4-meta.c49
-rw-r--r--sm4.c223
-rw-r--r--sm4.h69
-rw-r--r--testsuite/.gitignore3
-rw-r--r--testsuite/Makefile.in10
-rw-r--r--testsuite/balloon-test.c135
-rw-r--r--testsuite/ecc-add-test.c44
-rw-r--r--testsuite/ecc-mod-arith-test.c160
-rw-r--r--testsuite/ecdsa-sign-test.c12
-rw-r--r--testsuite/ecdsa-verify-test.c15
-rw-r--r--testsuite/gcm-test.c18
-rw-r--r--testsuite/meta-aead-test.c1
-rw-r--r--testsuite/meta-cipher-test.c11
-rw-r--r--testsuite/meta-hash-test.c1
-rw-r--r--testsuite/siv-gcm-test.c731
-rw-r--r--testsuite/sm4-test.c19
-rw-r--r--testsuite/testutils.c7
-rw-r--r--testsuite/testutils.h2
-rw-r--r--x86_64/fat/sha256-compress-n-2.asm (renamed from x86_64/fat/sha256-compress-2.asm)4
-rw-r--r--x86_64/fat/sha256-compress-n.asm (renamed from x86_64/fat/sha256-compress.asm)4
-rw-r--r--x86_64/poly1305-blocks.asm128
-rw-r--r--x86_64/poly1305-internal.asm2
-rw-r--r--x86_64/sha256-compress-n.asm (renamed from x86_64/sha256-compress.asm)85
-rw-r--r--x86_64/sha_ni/sha256-compress-n.asm (renamed from x86_64/sha_ni/sha256-compress.asm)42
108 files changed, 5011 insertions, 606 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ed15456f..0f10d9fd 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -144,8 +144,8 @@ Debian.cross.x86:
- apt-get update -q
# remove any previously installed nettle headers to avoid conflicts
- for arch in armhf arm64 ppc64el;do apt-get remove -y nettle-dev:$arch;done
- - if [ "$host" == "powerpc64-linux-gnu" ];then apt-get update && apt-get install -y gcc-$host g++-$host && export QEMU_LD_PREFIX=/usr/$host EXTRA_CONFIGURE_FLAGS='--enable-mini-gmp';fi
- - if [ "$host" == "powerpc64le-linux-gnu" ];then apt-get update && apt-get install -y gcc-$host g++-$host libgmp-dev:ppc64el && export QEMU_LD_PREFIX=/usr/$host;fi
+ - if [ "$host" == "powerpc64-linux-gnu" ];then apt-get install -y software-properties-common && add-apt-repository "deb http://deb.debian.org/debian bullseye-backports main" && apt-get update && apt-get install -y -t bullseye-backports binfmt-support qemu-user && apt-get install -y gcc-$host g++-$host && export QEMU_LD_PREFIX=/usr/$host EXTRA_CONFIGURE_FLAGS='--enable-mini-gmp';fi
+ - if [ "$host" == "powerpc64le-linux-gnu" ];then apt-get install -y software-properties-common && add-apt-repository "deb http://deb.debian.org/debian bullseye-backports main" && apt-get update && apt-get install -y -t bullseye-backports binfmt-support qemu-user && apt-get install -y gcc-$host g++-$host libgmp-dev:ppc64el && export QEMU_LD_PREFIX=/usr/$host;fi
- if [ "$host" == "s390x-linux-gnu" ];then apt-get update && apt-get install -y gcc-$host g++-$host libgmp-dev:s390x && export EXTRA_CONFIGURE_FLAGS='--disable-assembler';fi
script:
- build=$(dpkg-architecture -qDEB_HOST_GNU_TYPE)
diff --git a/AUTHORS b/AUTHORS
index 2caaf446..c4547b94 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -96,7 +96,7 @@ Amos Jeffries Implementation of base64url encoding.
Daiki Ueno Implementation of RSA-PSS signatures,
curve448, shake256, ed448-shake256 signatures,
chacha functions for 32-bit nonce, struct
- nettle_mac interface.
+ nettle_mac interface, siv-gcm.
Dmitry Baryshkov CFB and CFB8 modes, CMAC64. gosthash94cp and
Streebog hash functions, GOST DSA signatures
@@ -121,10 +121,12 @@ Mamone Tarsha Kurdi Powerpc64 assembly and fat build setup,
Nicolas Mora RFC 3394 keywrap.
-Tianjia Zhang SM3 hash function.
+Tianjia Zhang SM3 hash function, SM4 block cipher.
Amitay Isaacs Powerpc64 assembly for secp192r1, secp224r1
and secp256r1.
Martin Schwenke Powerpc64 assembly for secp384r1, secp521r1,
curve25519 and curve448.
+
+Zoltan Fridrich Ballon password hashing.
diff --git a/ChangeLog b/ChangeLog
index 7ce9d354..f1e5537d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,190 @@
+2022-11-09 Niels Möller <nisse@lysator.liu.se>
+
+ From Mamone Tarsha:
+ * powerpc64/p9/poly1305-blocks.asm: New file, multi-block radix
+ 2^44 implementation. Benchmarked to give a speedup of 3.2 times on
+ Power9.
+ * powerpc64/p9/poly1305.m4 (DEFINES_BLOCK_R64, BLOCK_R64): New
+ file, new macros.
+ * powerpc64/p9/poly1305-internal.asm: Use BLOCK_R64 macro.
+ * powerpc64/machine.m4 (INC_GPR, INC_VR): New macros.
+ * powerpc64/fat/poly1305-blocks.asm: New file.
+ * poly1305-update.c: Check HAVE_NATIVE_fat_poly1305_blocks, and
+ define _nettle_poly1305_blocks_c when needed.
+ * fat-ppc.c: Fat setup for _nettle_poly1305_blocks.
+
+2022-11-07 Niels Möller <nisse@lysator.liu.se>
+
+ * configure.ac (ASM_FLAGS): New configure environment variable.
+ * aclocal.m4 (GMP_TRY_ASSEMBLE): Use $ASM_FLAGS.
+ * config.make.in (ASM_FLAGS): Add substitution.
+ * Makefile.in: Use $(ASM_FLAGS) when compiling .asm files.
+
+2022-10-31 Niels Möller <nisse@lysator.liu.se>
+
+ * configure.ac: (asm_file_list): Add HAVE_NATIVE_poly1305_blocks.
+ (asm_nettle_optional_list): Add poly1305-blocks.asm.
+ * x86_64/poly1305-blocks.asm: New file.
+
+ * md-internal.h (MD_FILL_OR_RETURN_INDEX): New macro.
+ * poly1305-update.c (_nettle_poly1305_update): New file and
+ function.
+ * poly1305-internal.h: Declare _nettle_poly1305_blocks and
+ _nettle_poly1305_update.
+ * chacha-poly1305.c (poly1305_update): Use _nettle_poly1305_update.
+ * poly1305-aes.c (poly1305_aes_update): Likewise.
+ * Makefile.in (nettle_SOURCES): Add poly1305-update.c.
+
+2022-10-13 Niels Möller <nisse@lysator.liu.se>
+
+ * gmp-glue.c (mpn_sec_tabselect) [NETTLE_USE_MINI_GMP]: Add back
+ here, to support mini-gmp builds. Updated signature to be
+ compatible with the gmp version.
+ * gmp-glue.h: Add declaration.
+
+2022-10-11 Niels Möller <nisse@lysator.liu.se>
+
+ * sec-tabselect.c (sec_tabselect): Delete file and function. All
+ callers updated to use gmp's mpn_sec_tabselect instead, which is
+ implemented in assembly on many platforms.
+
+2022-10-02 Niels Möller <nisse@lysator.liu.se>
+
+ * examples/ecc-benchmark.c (bench_curve): Add benchmarking of
+ modulo q inversion.
+
+2022-09-29 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-ecdsa-verify.c (ecc_ecdsa_verify): Call ecc_mul_g and ecc_mul_a directly, not via
+ function pointers.
+ (ecc_ecdsa_verify_itch): Use ECC_MUL_A_ITCH
+ rather than ecc->mul_itch.
+ * ecc-gostdsa-verify.c (ecc_gostdsa_verify_itch)
+ (ecc_gostdsa_verify): Analogous changes.
+
+ * ecc-ecdsa-sign.c (ecc_ecdsa_sign): Call ecc_mul_g and ecc_j_to_a
+ directly, not via function pointers.
+ (ecc_ecdsa_sign_itch): Use ECC_MUL_G_ITCH rather than
+ ecc->mul_g_itch.
+ * ecc-gostdsa-sign.c (ecc_gostdsa_sign_itch, ecc_gostdsa_sign):
+ Analogous changes.
+
+2022-09-28 Niels Möller <nisse@lysator.liu.se>
+
+ * testsuite/meta-hash-test.c (test_main): Add check of
+ NETTLE_MAX_HASH_BLOCK_SIZE.
+ * nettle-internal.h (NETTLE_MAX_HASH_BLOCK_SIZE): Increase to 144,
+ to accommodate sha3_224.
+ * testsuite/meta-cipher-test.c (test_main): Check that cipher
+ metadata doesn't exceed NETTLE_MAX_CIPHER_BLOCK_SIZE or
+ NETTLE_MAX_CIPHER_KEY_SIZE.
+
+ From Daiki Ueno:
+ * siv-gcm.c (siv_gcm_encrypt_message, siv_gcm_decrypt_message):
+ New file, implementation of SIV-GCM.
+ * siv-gcm.h (SIV_GCM_BLOCK_SIZE, SIV_GCM_DIGEST_SIZE)
+ (SIV_GCM_NONCE_SIZE): New header file, new constants and
+ declarations.
+ * siv-gcm-aes128.c (siv_gcm_aes128_encrypt_message)
+ (siv_gcm_aes128_decrypt_message): New file and functions.
+ * siv-gcm-aes256.c (siv_gcm_aes256_encrypt_message)
+ (siv_gcm_aes256_decrypt_message): Likewise.
+ * siv-ghash-set-key.c (_siv_ghash_set_key): New file, new internal
+ function.
+ * siv-ghash-update.c (_siv_ghash_update): Likewise.
+ * block-internal.h (block16_bswap): New inline function.
+ * bswap-internal.h (bswap64_if_be): New macro.
+ * nettle-internal.h (NETTLE_MAX_CIPHER_KEY_SIZE): New constant.
+ * Makefile.in (nettle_SOURCES): Add new source files.
+ (HEADERS): Add siv-gcm.h.
+ * testsuite/siv-gcm-test.c: New tests.
+ * testsuite/Makefile.in (TS_NETTLE_SOURCES): Add siv-gcm-test.c.
+ * nettle.texinfo (SIV-GCM): Documentation.
+
+ From Zoltan Fridrich:
+ * balloon.c (balloon, balloon_itch): Implementation of balloon
+ password hash.
+ * balloon.h: New header file.
+ * balloon-sha1.c (balloon_sha1): New file and function.
+ * balloon-sha256.c (balloon_sha256): Likewise.
+ * balloon-sha384.c (balloon_sha384): Likewise.
+ * balloon-sha512.c (balloon_sha512): Likewise.
+ * Makefile.in (nettle_SOURCES): Add balloon source files.
+ (HEADERS): Add ballon.h.
+ * testsuite/balloon-test.c: New tests.
+ * testsuite/Makefile.in (TS_NETTLE_SOURCES): Add balloon-test.c.
+
+2022-09-14 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-nonsec-add-jjj.c (ecc_nonsec_add_jjj): New file and
+ function.
+ * ecc-internal.h: Declare it.
+ * Makefile.in (hogweed_SOURCES): Add ecc-nonsec-add-jjj.c.
+ * testsuite/ecc-add-test.c (test_main): Add tests for ecc_nonsec_add_jjj.
+
+ * ecc-ecdsa-verify.c (ecc_ecdsa_verify): Use ecc_nonsec_add_jjj,
+ to produce correct result in a corner case where point addition
+ needs to use point duplication. Also use ecc_j_to_a rather than
+ ecc->h_to_a, since ecdsa supports only weierstrass curves.
+ * ecc-gostdsa-verify.c (ecc_gostdsa_verify): Analogous change.
+
+ * testsuite/ecdsa-verify-test.c (test_main): Add corresponding test.
+ * testsuite/ecdsa-sign-test.c (test_main): And a test producing
+ the problematic signature.
+
+2022-09-08 Niels Möller <nisse@lysator.liu.se>
+
+ * eccdata.c (string_toupper): New utility function.
+ (output_modulo): Move more of the per-modulo output here.
+ (output_curve): Remove corresponding code.
+
+2022-08-31 Niels Möller <nisse@lysator.liu.se>
+
+ * bswap-internal.h (nettle_bswap64, nettle_bswap32)
+ (bswap64_if_le): New header file, new inline functions/macros.
+ * gcm.c (gcm_hash_sizes): Use bswap64_if_le, and bswap-internal.h,
+ replacing local definition of bswap_if_le.
+ * nist-keywrap.c (nist_keywrap16): Likewise.
+ * blowfish-bcrypt.c (swap32): Renamed function, to...
+ (bswap32_if_le): ...new name, rewritten to use nettle_bswap32.
+ Update call sites.
+ * Makefile.in (DISTFILES): Add bswap-internal.h.
+
+2022-08-18 Niels Möller <nisse@lysator.liu.se>
+
+ * Makefile.in (HEADERS): Add sm4.h.
+
+ From Tianjia Zhang: SM4 block cipher.
+ * sm4.c: New file.
+ * sm4.h: New file.
+ * sm4-meta.c: New file.
+ * gcm-sm4.c: New file
+ * gcm-sm4-meta.c: New file.
+ * nettle.texinfo: Document SM4.
+ * testsuite/gcm-test.c (test_main): Add SM4 tests.
+ * testsuite/sm4-test.c: New file.
+
+ * configure.ac (ABI): Change mips abi check to apply only to mips64.
+
+2022-08-17 Niels Möller <nisse@lysator.liu.se>
+
+ * testsuite/testutils.c (mpz_urandomm) [NETTLE_USE_MINI_GMP]: New
+ fallback definition when building with mini-gmp.
+
+2022-08-16 Niels Möller <nisse@lysator.liu.se>
+
+ * ecc-mod-arith.c (ecc_mod_sub): Ensure that if inputs are in the
+ range 0 <= a, b < 2m, then output is in the same range.
+ * eccdata.c (output_curve): New outputs ecc_Bm2p and ecc_Bm2q.
+ * ecc-internal.h (struct ecc_modulo): New member Bm2m (B^size -
+ 2m), needed by ecc_mod_sub. Update all curves.
+ * testsuite/ecc-mod-arith-test.c: New tests for ecc_mod_add and
+ ecc_mod_sub.
+
+ * eccdata.c (output_modulo): Output the limb size, delete return
+ value.
+ (output_curve): Update calls to output_modulo, other minor cleanup.
+
2022-08-07 Niels Möller <nisse@lysator.liu.se>
Delete all arcfour assembly code.
@@ -8,6 +195,15 @@
* x86/arcfour-crypt.asm: Deleted.
* asm.m4: Delete arcfour structure offsets.
+2022-08-07 Niels Möller <nisse@lysator.liu.se>
+
+ Based on patch from Corentin Labbe:
+ * nettle.texinfo: Document sha256_compress, sha512_compress,
+ md5_compress and sha1_compress.
+
+ * configure.ac: Refer to nettle-types.h, rather than arcfour.c,
+ for AC_CONFIG_SRCDIR.
+
2022-08-05 Niels Möller <nisse@lysator.liu.se>
* nettle-internal.h: Include stdlib.h, fix alloca warnings on BSD.
@@ -23,6 +219,48 @@
* aclocal.m4 (LSH_CCPIC): Use proper PIC flag for *BSD OS's.
* blowfish-bcrypt.c (swap32): Eliminate conflict with OpenBSD's swap32 macro.
+2022-07-29 Niels Möller <nisse@lysator.liu.se>
+
+ * s390x/msa_x1/sha256-compress-n.asm: New file. replacing...
+ * s390x/msa_x1/sha256-compress.asm: ...deleted file.
+ * s390x/fat/sha256-compress-n-2.asm: New file. replacing...
+ * s390x/fat/sha256-compress-2.asm: ...deleted file.
+ * fat-s390x.c: Update fat setup.
+
+2022-07-26 Niels Möller <nisse@lysator.liu.se>
+
+ * arm/v6/sha256-compress-n.asm: New file. replacing...
+ * arm/v6/sha256-compress.asm: ...deleted file.
+ * arm/fat/sha256-compress-n-2.asm: New file. replacing...
+ * arm/fat/sha256-compress-2.asm: ...deleted file.
+ * fat-arm.c: Update fat setup.
+
+2022-07-11 Niels Möller <nisse@lysator.liu.se>
+
+ * arm64/crypto/sha256-compress-n.asm: New file. replacing...
+ * arm64/crypto/sha256-compress.asm: ...deleted file.
+ * arm64/fat/sha256-compress-n-2.asm: New file. replacing...
+ * arm64/fat/sha256-compress-2.asm: ...deleted file.
+ * fat-arm64.c: Update fat setup.
+
+2022-07-05 Niels Möller <nisse@lysator.liu.se>
+
+ * md-internal.h (MD_FILL_OR_RETURN): New file, new macro.
+ * sha256-compress-n.c (_nettle_sha256_compress_n): New file and
+ function, replacing...
+ * sha256-compress.c (_nettle_sha256_compress): ...deleted file and
+ function.
+ * sha2-internal.h (_nettle_sha256_compress_n): Declare new function..
+ * sha256.c (sha256_compress): Update to use
+ _nettle_sha256_compress_n and MD_FILL_OR_RETURN.
+ * x86_64/sha256-compress-n.asm: New file. replacing...
+ * x86_64/sha256-compress.asm: ...deleted file.
+ * x86_64/sha_ni/sha256-compress-n.asm: New file. replacing...
+ * x86_64/sha_ni/sha256-compress.asm: ...deleted file.
+ * fat-setup.h (sha256_compress_n_func): New typedef, replacing...
+ (sha256_compress_func): ... deleted typedef.
+ * fat-x86_64.c: Update fat setup.
+
2022-06-20 Niels Möller <nisse@lysator.liu.se>
* testsuite/sha1-test.c (test_sha1_compress): New function.
diff --git a/Makefile.in b/Makefile.in
index 4b4672fa..cd4993e8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -83,6 +83,8 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \
nist-keywrap.c \
arcfour.c \
arctwo.c arctwo-meta.c blowfish.c blowfish-bcrypt.c \
+ balloon.c balloon-sha1.c balloon-sha256.c \
+ balloon-sha384.c balloon-sha512.c \
base16-encode.c base16-decode.c base16-meta.c \
base64-encode.c base64-decode.c base64-meta.c \
base64url-encode.c base64url-decode.c base64url-meta.c \
@@ -100,18 +102,22 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \
cbc.c cbc-aes128-encrypt.c cbc-aes192-encrypt.c cbc-aes256-encrypt.c \
ccm.c ccm-aes128.c ccm-aes192.c ccm-aes256.c cfb.c \
siv-cmac.c siv-cmac-aes128.c siv-cmac-aes256.c \
+ siv-gcm.c siv-gcm-aes128.c siv-gcm-aes256.c \
cnd-memcpy.c \
chacha-crypt.c chacha-core-internal.c \
chacha-poly1305.c chacha-poly1305-meta.c \
chacha-set-key.c chacha-set-nonce.c \
ctr.c ctr16.c des.c des3.c \
eax.c eax-aes128.c eax-aes128-meta.c \
- ghash-set-key.c ghash-update.c gcm.c gcm-aes.c \
+ ghash-set-key.c ghash-update.c \
+ siv-ghash-set-key.c siv-ghash-update.c \
+ gcm.c gcm-aes.c \
gcm-aes128.c gcm-aes128-meta.c \
gcm-aes192.c gcm-aes192-meta.c \
gcm-aes256.c gcm-aes256-meta.c \
gcm-camellia128.c gcm-camellia128-meta.c \
gcm-camellia256.c gcm-camellia256-meta.c \
+ gcm-sm4.c gcm-sm4-meta.c \
cmac.c cmac64.c cmac-aes128.c cmac-aes256.c cmac-des3.c \
cmac-aes128-meta.c cmac-aes256-meta.c cmac-des3-meta.c \
gost28147.c gosthash94.c gosthash94-meta.c \
@@ -130,7 +136,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \
nettle-meta-ciphers.c nettle-meta-hashes.c nettle-meta-macs.c \
pbkdf2.c pbkdf2-hmac-gosthash94.c pbkdf2-hmac-sha1.c \
pbkdf2-hmac-sha256.c pbkdf2-hmac-sha384.c pbkdf2-hmac-sha512.c \
- poly1305-aes.c poly1305-internal.c \
+ poly1305-aes.c poly1305-internal.c poly1305-update.c \
realloc.c \
ripemd160.c ripemd160-compress.c ripemd160-meta.c \
salsa20-core-internal.c salsa20-crypt-internal.c \
@@ -138,7 +144,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \
salsa20-set-nonce.c \
salsa20-128-set-key.c salsa20-256-set-key.c \
sha1.c sha1-compress.c sha1-meta.c \
- sha256.c sha256-compress.c sha224-meta.c sha256-meta.c \
+ sha256.c sha256-compress-n.c sha224-meta.c sha256-meta.c \
sha512.c sha512-compress.c sha384-meta.c sha512-meta.c \
sha512-224-meta.c sha512-256-meta.c \
sha3.c sha3-permute.c \
@@ -150,6 +156,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \
serpent-meta.c \
streebog.c streebog-meta.c \
twofish.c twofish-meta.c \
+ sm4.c sm4-meta.c \
umac-nh.c umac-nh-n.c umac-l2.c umac-l3.c \
umac-poly64.c umac-poly128.c umac-set-key.c \
umac32.c umac64.c umac96.c umac128.c \
@@ -187,7 +194,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \
dsa2sexp.c sexp2dsa.c \
pgp-encode.c rsa2openpgp.c \
der-iterator.c der2rsa.c der2dsa.c \
- sec-add-1.c sec-sub-1.c sec-tabselect.c \
+ sec-add-1.c sec-sub-1.c \
gmp-glue.c cnd-copy.c \
ecc-mod.c ecc-mod-inv.c \
ecc-mod-arith.c ecc-pp1-redc.c ecc-pm1-redc.c \
@@ -196,7 +203,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \
ecc-secp192r1.c ecc-secp224r1.c ecc-secp256r1.c \
ecc-secp384r1.c ecc-secp521r1.c \
ecc-size.c ecc-j-to-a.c ecc-a-to-j.c \
- ecc-dup-jj.c ecc-add-jja.c ecc-add-jjj.c \
+ ecc-dup-jj.c ecc-add-jja.c ecc-add-jjj.c ecc-nonsec-add-jjj.c \
ecc-eh-to-a.c \
ecc-dup-eh.c ecc-add-eh.c ecc-add-ehh.c \
ecc-dup-th.c ecc-add-th.c ecc-add-thh.c \
@@ -218,7 +225,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \
OPT_SOURCES = fat-arm.c fat-arm64.c fat-ppc.c fat-s390x.c fat-x86_64.c mini-gmp.c
-HEADERS = aes.h arcfour.h arctwo.h asn1.h blowfish.h \
+HEADERS = aes.h arcfour.h arctwo.h asn1.h blowfish.h balloon.h \
base16.h base64.h bignum.h buffer.h camellia.h cast128.h \
cbc.h ccm.h cfb.h chacha.h chacha-poly1305.h ctr.h \
curve25519.h curve448.h des.h dsa.h dsa-compat.h eax.h \
@@ -226,15 +233,15 @@ HEADERS = aes.h arcfour.h arctwo.h asn1.h blowfish.h \
gcm.h gostdsa.h gosthash94.h hmac.h \
knuth-lfib.h hkdf.h \
macros.h \
- cmac.h siv-cmac.h \
+ cmac.h siv-cmac.h siv-gcm.h \
md2.h md4.h \
md5.h md5-compat.h \
memops.h memxor.h \
nettle-meta.h nettle-types.h \
pbkdf2.h \
pgp.h pkcs1.h pss.h pss-mgf1.h realloc.h ripemd160.h rsa.h \
- salsa20.h sexp.h \
- serpent.h sha.h sha1.h sha2.h sha3.h sm3.h streebog.h twofish.h \
+ salsa20.h sexp.h serpent.h \
+ sha.h sha1.h sha2.h sha3.h sm3.h sm4.h streebog.h twofish.h \
umac.h yarrow.h xts.h poly1305.h nist-keywrap.h
INSTALL_HEADERS = $(HEADERS) version.h @IF_MINI_GMP@ mini-gmp.h
@@ -257,10 +264,11 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \
INSTALL NEWS ChangeLog \
nettle.pc.in hogweed.pc.in \
desdata.stamp $(des_headers) descore.README \
- aes-internal.h block-internal.h blowfish-internal.h camellia-internal.h \
+ aes-internal.h block-internal.h blowfish-internal.h bswap-internal.h \
+ camellia-internal.h \
ghash-internal.h gost28147-internal.h poly1305-internal.h \
serpent-internal.h cast128_sboxes.h desinfo.h desCode.h \
- ripemd160-internal.h sha2-internal.h \
+ ripemd160-internal.h md-internal.h sha2-internal.h \
memxor-internal.h nettle-internal.h nettle-write.h \
ctr-internal.h chacha-internal.h sha3-internal.h \
salsa20-internal.h umac-internal.h hogweed-internal.h \
@@ -290,7 +298,7 @@ libhogweed.a: $(hogweed_OBJS)
%.$(OBJEXT): %.asm $(srcdir)/m4-utils.m4 $(srcdir)/asm.m4 config.m4 machine.m4
$(M4) $(srcdir)/m4-utils.m4 $(srcdir)/asm.m4 config.m4 machine.m4 $< >$*.s
- $(COMPILE) -c $*.s
+ $(COMPILE) $(ASM_FLAGS) -c $*.s
%.$(OBJEXT): %.c
$(COMPILE) -c $< \
diff --git a/aclocal.m4 b/aclocal.m4
index a398d346..c87c3fa8 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -302,7 +302,7 @@ AC_DEFUN([GMP_TRY_ASSEMBLE],
[cat >conftest.s <<EOF
[$1]
EOF
-gmp_assemble="$CC $CFLAGS $CPPFLAGS -c conftest.s >conftest.out 2>&1"
+gmp_assemble="$CC $CFLAGS $CPPFLAGS $ASM_FLAGS -c conftest.s >conftest.out 2>&1"
if AC_TRY_EVAL(gmp_assemble); then
cat conftest.out >&AC_FD_CC
ifelse([$2],,:,[$2])
@@ -563,7 +563,7 @@ dnl Determine whether the assembler takes powerpc registers with an "r" as
dnl in "r6", or as plain "6". The latter is standard, but NeXT, Rhapsody,
dnl and MacOS-X require the "r" forms.
dnl
-dnl See also mpn/powerpc32/powerpc-defs.m4 which uses the result of this
+dnl See also powerpc64/machine.m4 which uses the result of this
dnl test.
AC_DEFUN([GMP_ASM_POWERPC_R_REGISTERS],
diff --git a/arm/fat/sha256-compress-2.asm b/arm/fat/sha256-compress-n-2.asm
index 36d55e4b..8834d93d 100644
--- a/arm/fat/sha256-compress-2.asm
+++ b/arm/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C arm/fat/sha256-compress-2.asm
+C arm/fat/sha256-compress-n-2.asm
ifelse(`
@@ -31,7 +31,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-dnl PROLOGUE(_nettle_sha256_compress) picked up by configure
+dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure
define(`fat_transform', `$1_armv6')
-include_src(`arm/v6/sha256-compress.asm')
+include_src(`arm/v6/sha256-compress-n.asm')
diff --git a/arm/v6/sha256-compress.asm b/arm/v6/sha256-compress-n.asm
index 3c021284..bf225bd8 100644
--- a/arm/v6/sha256-compress.asm
+++ b/arm/v6/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C arm/v6/sha256-compress.asm
+C arm/v6/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2013 Niels Möller
+ Copyright (C) 2013, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -30,13 +30,14 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
- .file "sha256-compress.asm"
+ .file "sha256-compress-n.asm"
.arch armv6
define(`STATE', `r0')
-define(`INPUT', `r1')
-define(`K', `r2')
-define(`SA', `r3')
+define(`K', `r1')
+define(`BLOCKS', `r2')
+define(`INPUT', `r3')
+define(`SA', `r2') C Overlap BLOCKS
define(`SB', `r4')
define(`SC', `r5')
define(`SD', `r6')
@@ -45,12 +46,12 @@ define(`SF', `r8')
define(`SG', `r10')
define(`SH', `r11')
define(`T0', `r12')
-define(`T1', `r1') C Overlap INPUT
+define(`T1', `r3') C Overlap INPUT
define(`COUNT', `r0') C Overlap STATE
define(`W', `r14')
-C Used for data load
-define(`I0', `r3')
+C Used for data load. Must not clobber STATE (r0), K (r1) or INPUT (r3)
+define(`I0', `r2')
define(`I1', `r4')
define(`I2', `r5')
define(`I3', `r6')
@@ -88,7 +89,7 @@ C S1(E) = E<<<26 ^ E<<<21 ^ E<<<7
C S0(A) = A<<<30 ^ A<<<19 ^ A<<<10
C Choice (E, F, G) = G^(E&(F^G))
C Majority (A,B,C) = (A&B) + (C&(A^B))
-
+
define(`ROUND', `
ror T0, $5, #6
eor T0, T0, $5, ror #11
@@ -117,16 +118,31 @@ define(`NOEXPN', `
ldr W, [sp, + $1]
add $1, $1, #4
')
- C void
- C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
-
.text
.align 2
-PROLOGUE(_nettle_sha256_compress)
- push {r4,r5,r6,r7,r8,r10,r11,r14}
- sub sp, sp, #68
- str STATE, [sp, #+64]
+define(`SHIFT_OFFSET', 64)
+define(`INPUT_OFFSET', 68)
+define(`I0_OFFSET', 72)
+define(`STATE_OFFSET', 76)
+define(`K_OFFSET', 80)
+define(`BLOCKS_OFFSET', 84)
+
+ C const uint8_t *
+ C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ C size_t blocks, const uint8_t *input)
+
+PROLOGUE(_nettle_sha256_compress_n)
+ cmp BLOCKS, #0
+ bne .Lwork
+
+ mov r0, INPUT
+ bx lr
+
+.Lwork:
+ C Also save STATE (r0), K (r1) and BLOCKS (r2)
+ push {r0,r1,r2,r4,r5,r6,r7,r8,r10,r11,r12,r14}
+ sub sp, sp, #STATE_OFFSET
C Load data up front, since we don't have enough registers
C to load and shift on-the-fly
@@ -144,6 +160,9 @@ IF_BE(` lsr I1, T0, SHIFT')
C because there is no rotate left
IF_BE(` rsb SHIFT, SHIFT, #32')
+ str SHIFT, [sp, #SHIFT_OFFSET]
+
+.Loop_block:
mov DST, sp
mov ILEFT, #4
.Lcopy:
@@ -164,7 +183,12 @@ IF_LE(` rev I3, I3')
stm DST!, {I0,I1,I2,I3}
mov I0, I4
bne .Lcopy
-
+
+ str INPUT, [sp, #INPUT_OFFSET]
+ str I0, [sp, #I0_OFFSET]
+
+ C Process block, with input at sp, expanded on the fly
+
ldm STATE, {SA,SB,SC,SD,SE,SF,SG,SH}
mov COUNT,#0
@@ -203,20 +227,40 @@ IF_LE(` rev I3, I3')
EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA)
bne .Loop2
- ldr STATE, [sp, #+64]
+ ldr STATE, [sp, #STATE_OFFSET]
C No longer needed registers
- ldm STATE, {r1,r2,r12,r14}
- add SA, SA, r1
- add SB, SB, r2
- add SC, SC, r12
- add SD, SD, r14
+ ldm STATE, {K, T1, T0, W}
+ add SA, SA, K
+ add SB, SB, T1
+ add SC, SC, T0
+ add SD, SD, W
stm STATE!, {SA,SB,SC,SD}
- ldm STATE, {r1,r2,r12,r14}
- add SE, SE, r1
- add SF, SF, r2
- add SG, SG, r12
- add SH, SH, r14
- stm STATE!, {SE,SF,SG,SH}
- add sp, sp, #68
- pop {r4,r5,r6,r7,r8,r10,r11,pc}
-EPILOGUE(_nettle_sha256_compress)
+ ldm STATE, {K, T1, T0, W}
+ add SE, SE, K
+ add SF, SF, T1
+ add SG, SG, T0
+ add SH, SH, W
+ stm STATE, {SE,SF,SG,SH}
+ sub STATE, STATE, #16
+
+ ldr BLOCKS, [sp, #BLOCKS_OFFSET]
+ subs BLOCKS, BLOCKS, #1
+ str BLOCKS, [sp, #BLOCKS_OFFSET]
+
+ ldr SHIFT, [sp, #SHIFT_OFFSET]
+ ldr K, [sp, #K_OFFSET]
+ ldr INPUT, [sp, #INPUT_OFFSET]
+ ldr I0, [sp, #I0_OFFSET]
+
+ bne .Loop_block
+
+ C Restore input pointer adjustment
+IF_BE(` rsbs SHIFT, SHIFT, #32')
+IF_LE(` cmp SHIFT, #0')
+ subne INPUT, INPUT, #4
+ orr r0, INPUT, SHIFT, lsr #3
+
+ C Discard saved STATE, K and BLOCKS.
+ add sp, sp, #STATE_OFFSET + 12
+ pop {r4,r5,r6,r7,r8,r10,r11,r12,pc}
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/arm64/crypto/sha256-compress.asm b/arm64/crypto/sha256-compress-n.asm
index 2bddea05..447dc590 100644
--- a/arm64/crypto/sha256-compress.asm
+++ b/arm64/crypto/sha256-compress-n.asm
@@ -1,4 +1,4 @@
-C arm64/crypto/sha256-compress.asm
+C arm64/crypto/sha256-compress-n.asm
ifelse(`
Copyright (C) 2021 Mamone Tarsha
@@ -37,7 +37,7 @@ C SHA256H2: SHA256 hash update (part 2)
C SHA256SU0: SHA256 schedule update 0
C SHA256SU1: SHA256 schedule update 1
-.file "sha256-compress.asm"
+.file "sha256-compress-n.asm"
.arch armv8-a+crypto
.text
@@ -45,8 +45,9 @@ C SHA256SU1: SHA256 schedule update 1
C Register usage:
define(`STATE', `x0')
-define(`INPUT', `x1')
-define(`K', `x2')
+define(`K', `x1')
+define(`BLOCKS', `x2')
+define(`INPUT', `x3')
define(`MSG0', `v0')
define(`MSG1', `v1')
@@ -59,19 +60,23 @@ define(`TMP', `v7')
define(`STATE0_SAVED', `v16')
define(`STATE1_SAVED', `v17')
-C void
-C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+C const uint8_t *
+C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+C size_t blocks, const uint8_t *input)
+
+PROLOGUE(_nettle_sha256_compress_n)
+ cbz BLOCKS, .Lend
-PROLOGUE(_nettle_sha256_compress)
C Load state
ld1 {STATE0.4s,STATE1.4s},[STATE]
+.Loop:
C Save state
mov STATE0_SAVED.16b,STATE0.16b
mov STATE1_SAVED.16b,STATE1.16b
C Load message
- ld1 {MSG0.16b,MSG1.16b,MSG2.16b,MSG3.16b},[INPUT]
+ ld1 {MSG0.16b,MSG1.16b,MSG2.16b,MSG3.16b},[INPUT],#64
C Reverse for little endian
rev32 MSG0.16b,MSG0.16b
@@ -217,9 +222,13 @@ PROLOGUE(_nettle_sha256_compress)
C Combine state
add STATE0.4s,STATE0.4s,STATE0_SAVED.4s
add STATE1.4s,STATE1.4s,STATE1_SAVED.4s
-
+ subs BLOCKS, BLOCKS, #1
+ sub K, K, #240
+ b.ne .Loop
+
C Store state
st1 {STATE0.4s,STATE1.4s},[STATE]
-
+.Lend:
+ mov x0, INPUT
ret
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/arm64/fat/sha256-compress-2.asm b/arm64/fat/sha256-compress-n-2.asm
index 67590794..2f70686e 100644
--- a/arm64/fat/sha256-compress-2.asm
+++ b/arm64/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C arm64/fat/sha256-compress-2.asm
+C arm64/fat/sha256-compress-n-2.asm
ifelse(`
@@ -31,7 +31,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-dnl PROLOGUE(_nettle_sha256_compress) picked up by configure
+dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure
define(`fat_transform', `$1_arm64')
-include_src(`arm64/crypto/sha256-compress.asm')
+include_src(`arm64/crypto/sha256-compress-n.asm')
diff --git a/balloon-sha1.c b/balloon-sha1.c
new file mode 100644
index 00000000..71c86e1d
--- /dev/null
+++ b/balloon-sha1.c
@@ -0,0 +1,55 @@
+/* balloon-sha1.c
+
+ Balloon password-hashing algorithm.
+
+ Copyright (C) 2022 Zoltan Fridrich
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "balloon.h"
+#include "sha1.h"
+
+void
+balloon_sha1(size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst)
+{
+ struct sha1_ctx ctx;
+ sha1_init(&ctx);
+ balloon(&ctx,
+ (nettle_hash_update_func*)sha1_update,
+ (nettle_hash_digest_func*)sha1_digest,
+ SHA1_DIGEST_SIZE, s_cost, t_cost,
+ passwd_length, passwd, salt_length, salt, scratch, dst);
+}
diff --git a/balloon-sha256.c b/balloon-sha256.c
new file mode 100644
index 00000000..fe31a691
--- /dev/null
+++ b/balloon-sha256.c
@@ -0,0 +1,55 @@
+/* balloon-sha256.c
+
+ Balloon password-hashing algorithm.
+
+ Copyright (C) 2022 Zoltan Fridrich
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "balloon.h"
+#include "sha2.h"
+
+void
+balloon_sha256(size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst)
+{
+ struct sha256_ctx ctx;
+ sha256_init(&ctx);
+ balloon(&ctx,
+ (nettle_hash_update_func*)sha256_update,
+ (nettle_hash_digest_func*)sha256_digest,
+ SHA256_DIGEST_SIZE, s_cost, t_cost,
+ passwd_length, passwd, salt_length, salt, scratch, dst);
+}
diff --git a/balloon-sha384.c b/balloon-sha384.c
new file mode 100644
index 00000000..68294496
--- /dev/null
+++ b/balloon-sha384.c
@@ -0,0 +1,55 @@
+/* balloon-sha384.c
+
+ Balloon password-hashing algorithm.
+
+ Copyright (C) 2022 Zoltan Fridrich
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "balloon.h"
+#include "sha2.h"
+
+void
+balloon_sha384(size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst)
+{
+ struct sha384_ctx ctx;
+ sha384_init(&ctx);
+ balloon(&ctx,
+ (nettle_hash_update_func*)sha384_update,
+ (nettle_hash_digest_func*)sha384_digest,
+ SHA384_DIGEST_SIZE, s_cost, t_cost,
+ passwd_length, passwd, salt_length, salt, scratch, dst);
+}
diff --git a/balloon-sha512.c b/balloon-sha512.c
new file mode 100644
index 00000000..f19f8aa0
--- /dev/null
+++ b/balloon-sha512.c
@@ -0,0 +1,55 @@
+/* balloon-sha512.c
+
+ Balloon password-hashing algorithm.
+
+ Copyright (C) 2022 Zoltan Fridrich
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "balloon.h"
+#include "sha2.h"
+
+void
+balloon_sha512(size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst)
+{
+ struct sha512_ctx ctx;
+ sha512_init(&ctx);
+ balloon(&ctx,
+ (nettle_hash_update_func*)sha512_update,
+ (nettle_hash_digest_func*)sha512_digest,
+ SHA512_DIGEST_SIZE, s_cost, t_cost,
+ passwd_length, passwd, salt_length, salt, scratch, dst);
+}
diff --git a/balloon.c b/balloon.c
new file mode 100644
index 00000000..c744160a
--- /dev/null
+++ b/balloon.c
@@ -0,0 +1,149 @@
+/* balloon.c
+
+ Balloon password-hashing algorithm.
+
+ Copyright (C) 2022 Zoltan Fridrich
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+/* For a description of the algorithm, see:
+ * Boneh, D., Corrigan-Gibbs, H., Schechter, S. (2017, May 12). Balloon Hashing:
+ * A Memory-Hard Function Providing Provable Protection Against Sequential Attacks.
+ * Retrieved Sep 1, 2022, from https://eprint.iacr.org/2016/027.pdf
+ */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <string.h>
+
+#include "balloon.h"
+#include "macros.h"
+
+#define DELTA 3
+
+static void
+hash(void *ctx,
+ nettle_hash_update_func *update,
+ nettle_hash_digest_func *digest,
+ size_t digest_size,
+ uint64_t cnt,
+ size_t a_len, const uint8_t *a,
+ size_t b_len, const uint8_t *b,
+ uint8_t *dst)
+{
+ uint8_t tmp[8];
+ LE_WRITE_UINT64(tmp, cnt);
+ update(ctx, sizeof(tmp), tmp);
+ if (a && a_len)
+ update(ctx, a_len, a);
+ if (b && b_len)
+ update(ctx, b_len, b);
+ digest(ctx, digest_size, dst);
+}
+
+static void
+hash_ints(void *ctx,
+ nettle_hash_update_func *update,
+ nettle_hash_digest_func *digest,
+ size_t digest_size,
+ uint64_t i, uint64_t j, uint64_t k,
+ uint8_t *dst)
+{
+ uint8_t tmp[24];
+ LE_WRITE_UINT64(tmp, i);
+ LE_WRITE_UINT64(tmp + 8, j);
+ LE_WRITE_UINT64(tmp + 16, k);
+ update(ctx, sizeof(tmp), tmp);
+ digest(ctx, digest_size, dst);
+}
+
+/* Takes length bytes long big number stored
+ * in little endian format and computes modulus
+ */
+static size_t
+block_to_int(size_t length, const uint8_t *block, size_t mod)
+{
+ size_t i = length, r = 0;
+ while (i--)
+ {
+ r = (r << 8) + block[i];
+ r %= mod;
+ }
+ return r;
+}
+
+void
+balloon(void *hash_ctx,
+ nettle_hash_update_func *update,
+ nettle_hash_digest_func *digest,
+ size_t digest_size, size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst)
+{
+ const size_t BS = digest_size;
+ uint8_t *block = scratch;
+ uint8_t *buf = scratch + BS;
+ size_t i, j, k, cnt = 0;
+
+ hash(hash_ctx, update, digest, digest_size,
+ cnt++, passwd_length, passwd, salt_length, salt, buf);
+ for (i = 1; i < s_cost; ++i)
+ hash(hash_ctx, update, digest, digest_size,
+ cnt++, BS, buf + (i - 1) * BS, 0, NULL, buf + i * BS);
+
+ for (i = 0; i < t_cost; ++i)
+ {
+ for (j = 0; j < s_cost; ++j)
+ {
+ hash(hash_ctx, update, digest, digest_size,
+ cnt++, BS, buf + (j ? j - 1 : s_cost - 1) * BS,
+ BS, buf + j * BS, buf + j * BS);
+ for (k = 0; k < DELTA; ++k)
+ {
+ hash_ints(hash_ctx, update, digest, digest_size, i, j, k, block);
+ hash(hash_ctx, update, digest, digest_size,
+ cnt++, salt_length, salt, BS, block, block);
+ hash(hash_ctx, update, digest, digest_size,
+ cnt++, BS, buf + j * BS,
+ BS, buf + block_to_int(BS, block, s_cost) * BS,
+ buf + j * BS);
+ }
+ }
+ }
+ memcpy(dst, buf + (s_cost - 1) * BS, BS);
+}
+
+size_t
+balloon_itch(size_t digest_size, size_t s_cost)
+{
+ return (s_cost + 1) * digest_size;
+}
diff --git a/balloon.h b/balloon.h
new file mode 100644
index 00000000..9c021925
--- /dev/null
+++ b/balloon.h
@@ -0,0 +1,98 @@
+/* balloon.h
+
+ Balloon password-hashing algorithm.
+
+ Copyright (C) 2022 Zoltan Fridrich
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+/* For a description of the algorithm, see:
+ * Boneh, D., Corrigan-Gibbs, H., Schechter, S. (2017, May 12). Balloon Hashing:
+ * A Memory-Hard Function Providing Provable Protection Against Sequential Attacks.
+ * Retrieved Sep 1, 2022, from https://eprint.iacr.org/2016/027.pdf
+ */
+
+#ifndef NETTLE_BALLOON_H_INCLUDED
+#define NETTLE_BALLOON_H_INCLUDED
+
+#include "nettle-types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Name mangling */
+#define balloon nettle_balloon
+#define balloon_itch nettle_balloon_itch
+#define balloon_sha1 nettle_balloon_sha1
+#define balloon_sha256 nettle_balloon_sha256
+#define balloon_sha384 nettle_balloon_sha384
+#define balloon_sha512 nettle_balloon_sha512
+
+void
+balloon(void *hash_ctx,
+ nettle_hash_update_func *update,
+ nettle_hash_digest_func *digest,
+ size_t digest_size, size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst);
+
+size_t
+balloon_itch(size_t digest_size, size_t s_cost);
+
+void
+balloon_sha1(size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst);
+
+void
+balloon_sha256(size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst);
+
+void
+balloon_sha384(size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst);
+
+void
+balloon_sha512(size_t s_cost, size_t t_cost,
+ size_t passwd_length, const uint8_t *passwd,
+ size_t salt_length, const uint8_t *salt,
+ uint8_t *scratch, uint8_t *dst);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NETTLE_BALLOON_H_INCLUDED */
diff --git a/block-internal.h b/block-internal.h
index d7b0c315..e9c26ff6 100644
--- a/block-internal.h
+++ b/block-internal.h
@@ -40,6 +40,7 @@
#include <assert.h>
#include "nettle-types.h"
+#include "bswap-internal.h"
#include "memxor.h"
static inline void
@@ -197,4 +198,15 @@ block16_mulx_ghash (union nettle_block16 *r,
}
#endif /* ! WORDS_BIGENDIAN */
+/* Reverse bytes in X and store the result in R. This supports
+ in-place operation (R and X can overlap). */
+static inline void
+block16_bswap (union nettle_block16 *r,
+ const union nettle_block16 *x)
+{
+ uint64_t t = nettle_bswap64 (x->u64[0]);
+ r->u64[0] = nettle_bswap64 (x->u64[1]);
+ r->u64[1] = t;
+}
+
#endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */
diff --git a/blowfish-bcrypt.c b/blowfish-bcrypt.c
index 800d1468..08b1e32e 100644
--- a/blowfish-bcrypt.c
+++ b/blowfish-bcrypt.c
@@ -42,7 +42,7 @@
#include "blowfish.h"
#include "blowfish-internal.h"
#include "base64.h"
-
+#include "bswap-internal.h"
#include "macros.h"
#define CRYPTPLEN 7
@@ -149,19 +149,16 @@ static uint32_t magic_w[6] = {
0x64657253, 0x63727944, 0x6F756274
};
-/* conflicts with OpenBSD's swap32 macro */
-#undef swap32
-
-static void swap32(uint32_t *x, int count)
+#if WORDS_BIGENDIAN
+#define bswap32_if_le(x, n)
+#else
+static void bswap32_if_le (uint32_t *x, unsigned n)
{
-#if !WORDS_BIGENDIAN
- do {
- uint32_t tmp = *x;
- tmp = (tmp << 16) | (tmp >> 16);
- *x++ = ((tmp & 0x00FF00FF) << 8) | ((tmp >> 8) & 0x00FF00FF);
- } while (--count);
-#endif
+ unsigned i;
+ for (i = 0; i < n; i++)
+ x[i] = nettle_bswap32 (x[i]);
}
+#endif
static void set_xkey(size_t lenkey, const uint8_t *key,
bf_key expanded, bf_key initial,
@@ -343,7 +340,7 @@ static int ibcrypt(uint8_t *dst,
else if (lenscheme < HASHOFFSET)
return 0;
memcpy(psalt, data.binary.salt, BLOWFISH_BCRYPT_BINSALT_SIZE);
- swap32(data.binary.salt, 4);
+ bswap32_if_le (data.binary.salt, 4);
if (log2rounds < minlog2rounds || log2rounds > 31)
return 0;
@@ -448,7 +445,7 @@ static int ibcrypt(uint8_t *dst,
dst = (uint8_t*)
encode_radix64((char*) dst, BLOWFISH_BCRYPT_BINSALT_SIZE, psalt) - 1;
- swap32(data.binary.output, 6);
+ bswap32_if_le (data.binary.output, 6);
/* This has to be bug-compatible with the original implementation, so
only encode 23 of the 24 bytes. */
encode_radix64((char*) dst, 23, (uint8_t *) data.binary.output);
diff --git a/bswap-internal.h b/bswap-internal.h
new file mode 100644
index 00000000..b9923f99
--- /dev/null
+++ b/bswap-internal.h
@@ -0,0 +1,77 @@
+/* bswap-internal.h
+
+ Copyright (C) 2022 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef NETTLE_BSWAP_INTERNAL_H_INCLUDED
+#define NETTLE_BSWAP_INTERNAL_H_INCLUDED
+
+#include "nettle-types.h"
+
+/* Note that these definitions depend config.h, which should be
+ included first. */
+
+#if HAVE_BUILTIN_BSWAP64
+#define nettle_bswap64 __builtin_bswap64
+/* Assume bswap32 is also available. */
+#define nettle_bswap32 __builtin_bswap32
+#else
+static inline uint64_t
+nettle_bswap64 (uint64_t x)
+{
+ x = (x >> 32) | (x << 32);
+ x = ((x >> 16) & UINT64_C (0xffff0000ffff))
+ | ((x & UINT64_C (0xffff0000ffff)) << 16);
+ x = ((x >> 8) & UINT64_C (0xff00ff00ff00ff))
+ | ((x & UINT64_C (0xff00ff00ff00ff)) << 8);
+ return x;
+}
+
+static inline uint32_t
+nettle_bswap32 (uint32_t x)
+{
+ x = (x << 16) | (x >> 16);
+ x = ((x & 0x00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF);
+ return x;
+}
+#endif
+
+#if WORDS_BIGENDIAN
+#define bswap64_if_le(x) (x)
+#else
+#define bswap64_if_le nettle_bswap64
+#endif
+
+#if WORDS_BIGENDIAN
+#define bswap64_if_be nettle_bswap64
+#else
+#define bswap64_if_be(x) (x)
+#endif
+
+#endif /* NETTLE_BSWAP_INTERNAL_H_INCLUDED */
diff --git a/chacha-poly1305.c b/chacha-poly1305.c
index 7a423e1e..ea8b2952 100644
--- a/chacha-poly1305.c
+++ b/chacha-poly1305.c
@@ -97,7 +97,8 @@ static void
poly1305_update (struct chacha_poly1305_ctx *ctx,
size_t length, const uint8_t *data)
{
- MD_UPDATE (ctx, length, data, COMPRESS, (void) 0);
+ ctx->index = _nettle_poly1305_update (&(ctx)->poly1305,
+ ctx->block, ctx->index, length, data);
}
static void
diff --git a/config.make.in b/config.make.in
index f8e1f74e..6aec7c73 100644
--- a/config.make.in
+++ b/config.make.in
@@ -8,6 +8,7 @@ CCPIC = @CCPIC@
CPPFLAGS = @CPPFLAGS@
DEFS = @DEFS@
LDFLAGS = @LDFLAGS@
+ASM_FLAGS = @ASM_FLAGS@
LIBS = @LIBS@
LIBOBJS = @LIBOBJS@
EMULATOR = @EMULATOR@
diff --git a/configure.ac b/configure.ac
index 73ce5764..92536fb0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4,7 +4,7 @@ dnl Process this file with autoconf to produce a configure script.
AC_INIT([nettle], [3.8], [nettle-bugs@lists.lysator.liu.se])
AC_PREREQ(2.61)
-AC_CONFIG_SRCDIR([arcfour.c])
+AC_CONFIG_SRCDIR([nettle-types.h])
# Needed to stop autoconf from looking for files in parent directories.
AC_CONFIG_AUX_DIR([.])
@@ -121,6 +121,8 @@ AC_ARG_ENABLE(mini-gmp,
AC_HELP_STRING([--enable-mini-gmp], [Enable mini-gmp, used instead of libgmp.]),,
[enable_mini_gmp=no])
+AC_ARG_VAR(ASM_FLAGS, [Extra flags for processing assembly source files])
+
if test "x$enable_mini_gmp" = xyes ; then
NETTLE_USE_MINI_GMP=1
HOGWEED_EXTRA_SYMBOLS="mpz_*;gmp_*;mpn_*;mp_*;"
@@ -345,7 +347,7 @@ case "$host_cpu" in
ABI=64
])
;;
- *mips*)
+ *mips64*)
AC_TRY_COMPILE([
#if defined(__mips64) || defined(__mips64__) || (defined(__sgi) && defined(__LP64__))
#error 64-bit mips
@@ -598,7 +600,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
chacha-core-internal.asm \
salsa20-crypt.asm salsa20-core-internal.asm \
serpent-encrypt.asm serpent-decrypt.asm \
- sha1-compress.asm sha256-compress.asm sha512-compress.asm \
+ sha1-compress.asm sha256-compress-n.asm sha512-compress.asm \
sha3-permute.asm umac-nh.asm umac-nh-n.asm machine.m4"
# Assembler files which generate additional object files if they are used.
@@ -612,10 +614,10 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \
aes256-encrypt-2.asm aes256-decrypt-2.asm \
cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \
chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \
- poly1305-internal-2.asm \
+ poly1305-blocks.asm poly1305-internal-2.asm \
ghash-set-key-2.asm ghash-update-2.asm \
salsa20-2core.asm salsa20-core-internal-2.asm \
- sha1-compress-2.asm sha256-compress-2.asm \
+ sha1-compress-2.asm sha256-compress-n-2.asm \
sha3-permute-2.asm sha512-compress-2.asm \
umac-nh-n-2.asm umac-nh-2.asm"
@@ -762,13 +764,15 @@ AH_VERBATIM([HAVE_NATIVE],
#undef HAVE_NATIVE_poly1305_set_key
#undef HAVE_NATIVE_poly1305_block
#undef HAVE_NATIVE_poly1305_digest
+#undef HAVE_NATIVE_poly1305_blocks
+#undef HAVE_NATIVE_fat_poly1305_blocks
#undef HAVE_NATIVE_ghash_set_key
#undef HAVE_NATIVE_ghash_update
#undef HAVE_NATIVE_salsa20_core
#undef HAVE_NATIVE_salsa20_2core
#undef HAVE_NATIVE_fat_salsa20_2core
#undef HAVE_NATIVE_sha1_compress
-#undef HAVE_NATIVE_sha256_compress
+#undef HAVE_NATIVE_sha256_compress_n
#undef HAVE_NATIVE_sha512_compress
#undef HAVE_NATIVE_sha3_permute
#undef HAVE_NATIVE_umac_nh
diff --git a/ecc-curve25519.c b/ecc-curve25519.c
index 56abcf23..539bff22 100644
--- a/ecc-curve25519.c
+++ b/ecc-curve25519.c
@@ -266,6 +266,7 @@ const struct ecc_curve _nettle_curve25519 =
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
+ ecc_Bm2p,
NULL,
ecc_pp1h,
@@ -287,6 +288,7 @@ const struct ecc_curve _nettle_curve25519 =
ecc_q,
ecc_Bmodq,
ecc_mBmodq_shifted, /* Use q - 2^{252} instead. */
+ ecc_Bm2q,
NULL,
ecc_qp1h,
diff --git a/ecc-curve448.c b/ecc-curve448.c
index 1bd4e11f..daef56cc 100644
--- a/ecc-curve448.c
+++ b/ecc-curve448.c
@@ -220,6 +220,7 @@ const struct ecc_curve _nettle_curve448 =
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
+ ecc_Bm2p,
NULL,
ecc_pp1h,
@@ -241,6 +242,7 @@ const struct ecc_curve _nettle_curve448 =
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
+ ecc_Bm2q,
NULL,
ecc_qp1h,
diff --git a/ecc-ecdsa-sign.c b/ecc-ecdsa-sign.c
index 4adee1d1..6a41c14c 100644
--- a/ecc-ecdsa-sign.c
+++ b/ecc-ecdsa-sign.c
@@ -46,9 +46,9 @@
mp_size_t
ecc_ecdsa_sign_itch (const struct ecc_curve *ecc)
{
- /* Needs 3*ecc->p.size + scratch for ecc->mul_g. Currently same for
- ecc_mul_g. */
- assert (ecc->p.size + ecc->p.invert_itch <= 3*ecc->p.size + ecc->mul_g_itch);
+ /* Needs 3*ecc->p.size + scratch for ecc_mul_g. */
+ assert (ecc->p.size + ecc->p.invert_itch
+ <= 3*ecc->p.size + ECC_MUL_G_ITCH (ecc->p.size));
return ECC_ECDSA_SIGN_ITCH (ecc->p.size);
}
@@ -79,9 +79,9 @@ ecc_ecdsa_sign (const struct ecc_curve *ecc,
4. s2 <-- (h + z*s1)/k mod q.
*/
- ecc->mul_g (ecc, P, kp, P + 3*ecc->p.size);
+ ecc_mul_g (ecc, P, kp, P + 3*ecc->p.size);
/* x coordinate only, modulo q */
- ecc->h_to_a (ecc, 2, rp, P, P + 3*ecc->p.size);
+ ecc_j_to_a (ecc, 2, rp, P, P + 3*ecc->p.size);
/* Invert k, uses up to 7 * ecc->p.size including scratch (for secp384). */
ecc->q.invert (&ecc->q, kinv, kp, tp);
diff --git a/ecc-ecdsa-verify.c b/ecc-ecdsa-verify.c
index f3b112b0..9e324ea2 100644
--- a/ecc-ecdsa-verify.c
+++ b/ecc-ecdsa-verify.c
@@ -53,8 +53,8 @@ ecdsa_in_range (const struct ecc_curve *ecc, const mp_limb_t *xp)
mp_size_t
ecc_ecdsa_verify_itch (const struct ecc_curve *ecc)
{
- /* Largest storage need is for the ecc->mul call. */
- return 5*ecc->p.size + ecc->mul_itch;
+ /* Largest storage need is for the ecc_mul_a call. */
+ return 5*ecc->p.size + ECC_MUL_A_ITCH (ecc->p.size);
}
/* FIXME: Use faster primitives, not requiring side-channel silence. */
@@ -107,35 +107,23 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc,
/* u2 = r / s, P2 = u2 * Y */
ecc_mod_mul_canonical (&ecc->q, u2, rp, sinv, u2);
- /* Total storage: 5*ecc->p.size + ecc->mul_itch */
- ecc->mul (ecc, P2, u2, pp, u2 + ecc->p.size);
+ /* Total storage: 5*ecc->p.size + ECC_MUL_A_ITCH */
+ ecc_mul_a (ecc, P2, u2, pp, u2 + ecc->p.size);
/* u = 0 can happen only if h = 0 or h = q, which is extremely
unlikely. */
if (!mpn_zero_p (u1, ecc->p.size))
{
- /* Total storage: 7*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */
- ecc->mul_g (ecc, P1, u1, P1 + 3*ecc->p.size);
-
- /* NOTE: ecc_add_jjj and/or ecc_j_to_a will produce garbage in
- case u1 G = +/- u2 V. However, anyone who gets his or her
- hands on a signature where this happens during verification,
- can also get the private key as z = +/- u1 / u_2 (mod q). And
- then it doesn't matter very much if verification of
- signatures with that key succeeds or fails.
-
- u1 G = - u2 V can never happen for a correctly generated
- signature, since it implies k = 0.
-
- u1 G = u2 V is possible, if we are unlucky enough to get h /
- s_1 = z. Hitting that is about as unlikely as finding the
- private key by guessing.
- */
- /* Total storage: 6*ecc->p.size + ecc->add_hhh_itch */
- ecc->add_hhh (ecc, P2, P2, P1, P1 + 3*ecc->p.size);
+ /* Total storage: 7*ecc->p.size + ECC_MUL_G_ITCH */
+ ecc_mul_g (ecc, P1, u1, P1 + 3*ecc->p.size);
+
+ /* Total storage: 6*ecc->p.size + ECC_ADD_JJJ_ITCH */
+ if (!ecc_nonsec_add_jjj (ecc, P2, P2, P1, P1 + 3*ecc->p.size))
+ /* Infinity point, not a valid signature. */
+ return 0;
}
/* x coordinate only, modulo q */
- ecc->h_to_a (ecc, 2, P1, P2, P1 + 3*ecc->p.size);
+ ecc_j_to_a (ecc, 2, P1, P2, P1 + 3*ecc->p.size);
return (mpn_cmp (rp, P1, ecc->p.size) == 0);
#undef P2
diff --git a/ecc-gost-gc256b.c b/ecc-gost-gc256b.c
index 0cf753e4..df9cbb58 100644
--- a/ecc-gost-gc256b.c
+++ b/ecc-gost-gc256b.c
@@ -71,6 +71,7 @@ const struct ecc_curve _nettle_gost_gc256b =
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
+ ecc_Bm2p,
ecc_redc_ppm1,
ecc_pp1h,
@@ -92,6 +93,7 @@ const struct ecc_curve _nettle_gost_gc256b =
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
+ ecc_Bm2q,
NULL,
ecc_qp1h,
diff --git a/ecc-gost-gc512a.c b/ecc-gost-gc512a.c
index 338ed001..3807b57e 100644
--- a/ecc-gost-gc512a.c
+++ b/ecc-gost-gc512a.c
@@ -71,6 +71,7 @@ const struct ecc_curve _nettle_gost_gc512a =
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
+ ecc_Bm2p,
ecc_redc_ppm1,
ecc_pp1h,
@@ -92,6 +93,7 @@ const struct ecc_curve _nettle_gost_gc512a =
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
+ ecc_Bm2q,
NULL,
ecc_qp1h,
diff --git a/ecc-gostdsa-sign.c b/ecc-gostdsa-sign.c
index c924122c..491a2281 100644
--- a/ecc-gostdsa-sign.c
+++ b/ecc-gostdsa-sign.c
@@ -45,8 +45,7 @@
mp_size_t
ecc_gostdsa_sign_itch (const struct ecc_curve *ecc)
{
- /* Needs 3*ecc->p.size + scratch for ecc->mul_g. Currently same for
- ecc_mul_g. */
+ /* Needs 3*ecc->p.size + scratch for ecc_mul_g. */
return ECC_GOSTDSA_SIGN_ITCH (ecc->p.size);
}
@@ -75,9 +74,9 @@ ecc_gostdsa_sign (const struct ecc_curve *ecc,
4. s <-- (r*z + k*h) mod q.
*/
- ecc->mul_g (ecc, P, kp, P + 3*ecc->p.size);
+ ecc_mul_g (ecc, P, kp, P + 3*ecc->p.size);
/* x coordinate only, modulo q */
- ecc->h_to_a (ecc, 2, rp, P, P + 3*ecc->p.size);
+ ecc_j_to_a (ecc, 2, rp, P, P + 3*ecc->p.size);
/* Process hash digest */
gost_hash (&ecc->q, hp, length, digest);
diff --git a/ecc-gostdsa-verify.c b/ecc-gostdsa-verify.c
index fcdd4644..0570af7e 100644
--- a/ecc-gostdsa-verify.c
+++ b/ecc-gostdsa-verify.c
@@ -52,8 +52,8 @@ ecdsa_in_range (const struct ecc_curve *ecc, const mp_limb_t *xp)
mp_size_t
ecc_gostdsa_verify_itch (const struct ecc_curve *ecc)
{
- /* Largest storage need is for the ecc->mul call. */
- return 5*ecc->p.size + ecc->mul_itch;
+ /* Largest storage need is for the ecc_mul_a call. */
+ return 5*ecc->p.size + ECC_MUL_A_ITCH (ecc->p.size);
}
/* FIXME: Use faster primitives, not requiring side-channel silence. */
@@ -108,17 +108,18 @@ ecc_gostdsa_verify (const struct ecc_curve *ecc,
mpn_sub_n (hp, ecc->q.m, rp, ecc->p.size);
ecc_mod_mul_canonical (&ecc->q, z2, hp, vp, z2);
- /* Total storage: 5*ecc->p.size + ecc->mul_itch */
- ecc->mul (ecc, P2, z2, pp, z2 + ecc->p.size);
+ /* Total storage: 5*ecc->p.size + ECC_MUL_A_ITCH */
+ ecc_mul_a (ecc, P2, z2, pp, z2 + ecc->p.size);
- /* Total storage: 7*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */
- ecc->mul_g (ecc, P1, z1, P1 + 3*ecc->p.size);
+ /* Total storage: 7*ecc->p.size + ECC_MUL_G_ITCH */
+ ecc_mul_g (ecc, P1, z1, P1 + 3*ecc->p.size);
- /* Total storage: 6*ecc->p.size + ecc->add_hhh_itch */
- ecc->add_hhh (ecc, P1, P1, P2, P1 + 3*ecc->p.size);
+ /* Total storage: 6*ecc->p.size + ECC_ADD_JJJ_ITCH */
+ if (!ecc_nonsec_add_jjj (ecc, P1, P1, P2, P1 + 3*ecc->p.size))
+ return 0;
/* x coordinate only, modulo q */
- ecc->h_to_a (ecc, 2, P2, P1, P1 + 3*ecc->p.size);
+ ecc_j_to_a (ecc, 2, P2, P1, P1 + 3*ecc->p.size);
return (mpn_cmp (rp, P2, ecc->p.size) == 0);
#undef P2
diff --git a/ecc-internal.h b/ecc-internal.h
index 2ea553b5..be02de5f 100644
--- a/ecc-internal.h
+++ b/ecc-internal.h
@@ -66,6 +66,7 @@
#define ecc_dup_jj _nettle_ecc_dup_jj
#define ecc_add_jja _nettle_ecc_add_jja
#define ecc_add_jjj _nettle_ecc_add_jjj
+#define ecc_nonsec_add_jjj _nettle_ecc_nonsec_add_jjj
#define ecc_dup_eh _nettle_ecc_dup_eh
#define ecc_add_eh _nettle_ecc_add_eh
#define ecc_add_ehh _nettle_ecc_add_ehh
@@ -80,7 +81,6 @@
#define cnd_copy _nettle_cnd_copy
#define sec_add_1 _nettle_sec_add_1
#define sec_sub_1 _nettle_sec_sub_1
-#define sec_tabselect _nettle_sec_tabselect
#define sec_modinv _nettle_sec_modinv
#define curve25519_eh_to_x _nettle_curve25519_eh_to_x
#define curve448_eh_to_x _nettle_curve448_eh_to_x
@@ -174,8 +174,14 @@ struct ecc_modulo
/* B^size mod m. Expected to have at least 32 leading zeros
(equality for secp_256r1). */
const mp_limb_t *B;
- /* 2^{bit_size} - m, same value as above, but shifted. */
+ /* 2^{bit_size} - m. When different from B above, for numbers of
+ interest, usually B has trailing zeros and this is B shifted
+ right. */
const mp_limb_t *B_shifted;
+ /* For ecc_mod_sub: B^size - 2m, if that doesn't underflow.
+ Otherwise, same as B */
+ const mp_limb_t *Bm2m;
+
/* m +/- 1, for redc, excluding redc_size low limbs. */
const mp_limb_t *redc_mpm1;
/* (m+1)/2 */
@@ -258,6 +264,8 @@ ecc_mod_equal_p (const struct ecc_modulo *m, const mp_limb_t *a,
void
ecc_mod_add (const struct ecc_modulo *m, mp_limb_t *rp,
const mp_limb_t *ap, const mp_limb_t *bp);
+
+/* If inputs are in the range 0 <= a, b < 2m, then so is the output. */
void
ecc_mod_sub (const struct ecc_modulo *m, mp_limb_t *rp,
const mp_limb_t *ap, const mp_limb_t *bp);
@@ -382,6 +390,14 @@ ecc_add_jjj (const struct ecc_curve *ecc,
mp_limb_t *r, const mp_limb_t *p, const mp_limb_t *q,
mp_limb_t *scratch);
+/* Variant that handles the checks for the special cases P = ±Q.
+ Returns 1 on success, 0 if result is infinite. Not side-channel
+ silent, so must not be used with secret inputs. */
+int
+ecc_nonsec_add_jjj (const struct ecc_curve *ecc,
+ mp_limb_t *r, const mp_limb_t *p, const mp_limb_t *q,
+ mp_limb_t *scratch);
+
/* Point doubling on a twisted Edwards curve, with homogeneous
cooordinates. */
void
@@ -458,11 +474,6 @@ mp_limb_t
sec_sub_1 (mp_limb_t *rp, mp_limb_t *ap, mp_size_t n, mp_limb_t b);
void
-sec_tabselect (mp_limb_t *rp, mp_size_t rn,
- const mp_limb_t *table, unsigned tn,
- unsigned k);
-
-void
curve25519_eh_to_x (mp_limb_t *xp, const mp_limb_t *p,
mp_limb_t *scratch);
diff --git a/ecc-mod-arith.c b/ecc-mod-arith.c
index 310cbb1d..d0137864 100644
--- a/ecc-mod-arith.c
+++ b/ecc-mod-arith.c
@@ -85,7 +85,20 @@ ecc_mod_sub (const struct ecc_modulo *m, mp_limb_t *rp,
{
mp_limb_t cy;
cy = mpn_sub_n (rp, ap, bp, m->size);
- cy = mpn_cnd_sub_n (cy, rp, rp, m->B, m->size);
+ /* The adjustments for this function work differently depending on
+ the value of the most significant bit of m.
+
+ If m has a most significant bit of zero, then the first
+ adjustment step conditionally adds 2m. If in addition, inputs are
+ in the 0 <= a,b < 2m range, then the first adjustment guarantees
+ that result is in that same range. The second adjustment step is
+ needed only if b > 2m, it then ensures output is correct modulo
+ m, but nothing more.
+
+ If m has a most significant bit of one, Bm2m and B are the same,
+ and this function works analogously to ecc_mod_add.
+ */
+ cy = mpn_cnd_sub_n (cy, rp, rp, m->Bm2m, m->size);
cy = mpn_cnd_sub_n (cy, rp, rp, m->B, m->size);
assert (cy == 0);
}
diff --git a/ecc-mul-a-eh.c b/ecc-mul-a-eh.c
index 1eb3efcc..980fec3f 100644
--- a/ecc-mul-a-eh.c
+++ b/ecc-mul-a-eh.c
@@ -140,7 +140,7 @@ ecc_mul_a_eh (const struct ecc_curve *ecc,
assert (bits < TABLE_SIZE);
- sec_tabselect (r, 3*ecc->p.size, table, TABLE_SIZE, bits);
+ mpn_sec_tabselect (r, table, 3*ecc->p.size, TABLE_SIZE, bits);
for (;;)
{
@@ -166,7 +166,7 @@ ecc_mul_a_eh (const struct ecc_curve *ecc,
ecc->dup (ecc, r, r, scratch_out);
bits &= TABLE_MASK;
- sec_tabselect (tp, 3*ecc->p.size, table, TABLE_SIZE, bits);
+ mpn_sec_tabselect (tp, table, 3*ecc->p.size, TABLE_SIZE, bits);
ecc->add_hhh (ecc, r, r, tp, scratch_out);
}
#undef table
diff --git a/ecc-mul-a.c b/ecc-mul-a.c
index cb9c7d41..8e1355eb 100644
--- a/ecc-mul-a.c
+++ b/ecc-mul-a.c
@@ -144,7 +144,7 @@ ecc_mul_a (const struct ecc_curve *ecc,
assert (bits < TABLE_SIZE);
- sec_tabselect (r, 3*ecc->p.size, table, TABLE_SIZE, bits);
+ mpn_sec_tabselect (r, table, 3*ecc->p.size, TABLE_SIZE, bits);
is_zero = (bits == 0);
for (;;)
@@ -171,7 +171,7 @@ ecc_mul_a (const struct ecc_curve *ecc,
ecc_dup_jj (ecc, r, r, scratch_out);
bits &= TABLE_MASK;
- sec_tabselect (tp, 3*ecc->p.size, table, TABLE_SIZE, bits);
+ mpn_sec_tabselect (tp, table, 3*ecc->p.size, TABLE_SIZE, bits);
cnd_copy (is_zero, r, tp, 3*ecc->p.size);
ecc_add_jjj (ecc, tp, tp, r, scratch_out);
diff --git a/ecc-mul-g-eh.c b/ecc-mul-g-eh.c
index 8b3ca8f8..57df1c6d 100644
--- a/ecc-mul-g-eh.c
+++ b/ecc-mul-g-eh.c
@@ -88,10 +88,10 @@ ecc_mul_g_eh (const struct ecc_curve *ecc, mp_limb_t *r,
shift = bit_index % GMP_NUMB_BITS;
bits = (bits << 1) | ((np[limb_index] >> shift) & 1);
}
- sec_tabselect (tp, 2*ecc->p.size,
- (ecc->pippenger_table
- + (2*ecc->p.size * (mp_size_t) j << c)),
- 1<<c, bits);
+ mpn_sec_tabselect (tp,
+ (ecc->pippenger_table
+ + (2*ecc->p.size * (mp_size_t) j << c)),
+ 2*ecc->p.size, 1<<c, bits);
ecc->add_hh (ecc, r, r, tp, scratch_out);
}
diff --git a/ecc-mul-g.c b/ecc-mul-g.c
index dcc7c3ea..677a37e7 100644
--- a/ecc-mul-g.c
+++ b/ecc-mul-g.c
@@ -88,10 +88,10 @@ ecc_mul_g (const struct ecc_curve *ecc, mp_limb_t *r,
shift = bit_index % GMP_NUMB_BITS;
bits = (bits << 1) | ((np[limb_index] >> shift) & 1);
}
- sec_tabselect (tp, 2*ecc->p.size,
- (ecc->pippenger_table
- + (2*ecc->p.size * (mp_size_t) j << c)),
- 1<<c, bits);
+ mpn_sec_tabselect (tp,
+ (ecc->pippenger_table
+ + (2*ecc->p.size * (mp_size_t) j << c)),
+ 2*ecc->p.size, 1<<c, bits);
cnd_copy (is_zero, r, tp, 2*ecc->p.size);
cnd_copy (is_zero, r + 2*ecc->p.size, ecc->unit, ecc->p.size);
diff --git a/ecc-nonsec-add-jjj.c b/ecc-nonsec-add-jjj.c
new file mode 100644
index 00000000..439c0a52
--- /dev/null
+++ b/ecc-nonsec-add-jjj.c
@@ -0,0 +1,162 @@
+/* ecc-non-sec-add-jjj.c
+
+ Copyright (C) 2013, 2022 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "ecc.h"
+#include "ecc-internal.h"
+
+/* Similar to ecc_add_jjj, but checks if x coordinates are equal (H =
+ 0) below, and if so, performs doubling if also y coordinates are
+ equal, or returns 0 (failure) indicating that the result is the
+ infinity point. */
+int
+ecc_nonsec_add_jjj (const struct ecc_curve *ecc,
+ mp_limb_t *r, const mp_limb_t *p, const mp_limb_t *q,
+ mp_limb_t *scratch)
+{
+#define x1 p
+#define y1 (p + ecc->p.size)
+#define z1 (p + 2*ecc->p.size)
+
+#define x2 q
+#define y2 (q + ecc->p.size)
+#define z2 (q + 2*ecc->p.size)
+
+#define x3 r
+#define y3 (r + ecc->p.size)
+#define z3 (r + 2*ecc->p.size)
+ /* Formulas, from djb,
+ http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl:
+
+ Computation Operation Live variables
+
+ Z1Z1 = Z1^2 sqr Z1Z1
+ Z2Z2 = Z2^2 sqr Z1Z1, Z2Z2
+ U1 = X1*Z2Z2 mul Z1Z1, Z2Z2, U1
+ U2 = X2*Z1Z1 mul Z1Z1, Z2Z2, U1, U2
+ H = U2-U1 Z1Z1, Z2Z2, U1, H
+ Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H sqr, mul Z1Z1, Z2Z2, U1, H
+ S1 = Y1*Z2*Z2Z2 mul, mul Z1Z1, U1, H, S1
+ S2 = Y2*Z1*Z1Z1 mul, mul U1, H, S1, S2
+ W = 2*(S2-S1) (djb: r) U1, H, S1, W
+ I = (2*H)^2 sqr U1, H, S1, W, I
+ J = H*I mul U1, S1, W, J, V
+ V = U1*I mul S1, W, J, V
+ X3 = W^2-J-2*V sqr S1, W, J, V
+ Y3 = W*(V-X3)-2*S1*J mul, mul
+ */
+
+#define h scratch
+#define z1z1 (scratch + ecc->p.size)
+#define z2z2 z1z1
+#define z1z2 (scratch + 2*ecc->p.size)
+
+#define w (scratch + ecc->p.size)
+#define i (scratch + 2*ecc->p.size)
+#define j h
+#define v i
+
+#define tp (scratch + 3*ecc->p.size)
+
+ ecc_mod_sqr (&ecc->p, z2z2, z2, tp); /* z2z2 */
+ /* Store u1 at x3 */
+ ecc_mod_mul (&ecc->p, x3, x1, z2z2, tp); /* z2z2 */
+
+ ecc_mod_add (&ecc->p, z1z2, z1, z2); /* z2z2, z1z2 */
+ ecc_mod_sqr (&ecc->p, z1z2, z1z2, tp);
+ ecc_mod_sub (&ecc->p, z1z2, z1z2, z2z2); /* z2z2, z1z2 */
+
+ /* Do s1 early, store at y3 */
+ ecc_mod_mul (&ecc->p, z2z2, z2z2, z2, tp); /* z2z2, z1z2 */
+ ecc_mod_mul (&ecc->p, y3, z2z2, y1, tp); /* z1z2 */
+
+ ecc_mod_sqr (&ecc->p, z1z1, z1, tp); /* z1z1, z1z2 */
+ ecc_mod_sub (&ecc->p, z1z2, z1z2, z1z1);
+ ecc_mod_mul (&ecc->p, h, x2, z1z1, tp); /* z1z1, z1z2, h */
+ ecc_mod_sub (&ecc->p, h, h, x3);
+
+ /* z1^3 */
+ ecc_mod_mul (&ecc->p, z1z1, z1z1, z1, tp);
+
+ /* z3 <-- h z1 z2 delayed until now, since that may clobber z1. */
+ ecc_mod_mul (&ecc->p, z3, z1z2, h, tp); /* z1z1, h */
+ /* w = 2 (s2 - s1) */
+ ecc_mod_mul (&ecc->p, w, z1z1, y2, tp); /* h, w */
+ ecc_mod_sub (&ecc->p, w, w, y3);
+
+ /* Note that use of ecc_mod_zero_p depends 0 <= h,w < 2p. */
+ if (ecc_mod_zero_p (&ecc->p, h))
+ {
+ /* X1 == X2 */
+ if (ecc_mod_zero_p (&ecc->p, w)) {
+ /* Y1 == Y2. Do point duplication. Note that q input is
+ unclobbered, and that scratch need is smaller. Implies some
+ unnecessary recomputation, but performance it not so
+ important for this very unlikely corner case. */
+ ecc_dup_jj (ecc, r, q, scratch);
+ return 1;
+ }
+
+ /* We must have Y1 == -Y2, and then the result is the infinity
+ point, */
+ mpn_zero (r, 3*ecc->p.size);
+ return 0;
+ }
+ ecc_mod_add (&ecc->p, w, w, w);
+
+ /* i = (2h)^2 */
+ ecc_mod_add (&ecc->p, i, h, h); /* h, w, i */
+ ecc_mod_sqr (&ecc->p, i, i, tp);
+
+ /* j and h can overlap */
+ ecc_mod_mul (&ecc->p, j, h, i, tp); /* j, w, i */
+
+ /* v and i can overlap */
+ ecc_mod_mul (&ecc->p, v, x3, i, tp); /* j, w, v */
+
+ /* x3 <-- w^2 - j - 2v */
+ ecc_mod_sqr (&ecc->p, x3, w, tp);
+ ecc_mod_sub (&ecc->p, x3, x3, j);
+ ecc_mod_submul_1 (&ecc->p, x3, v, 2);
+
+ /* y3 <-- w (v - x3) - 2 s1 j */
+ ecc_mod_mul (&ecc->p, j, j, y3, tp);
+ ecc_mod_sub (&ecc->p, v, v, x3);
+ ecc_mod_mul (&ecc->p, y3, v, w, tp);
+ ecc_mod_submul_1 (&ecc->p, y3, j, 2);
+
+ return 1;
+}
diff --git a/ecc-secp192r1.c b/ecc-secp192r1.c
index 391ba528..4a07bca3 100644
--- a/ecc-secp192r1.c
+++ b/ecc-secp192r1.c
@@ -247,7 +247,8 @@ const struct ecc_curve _nettle_secp_192r1 =
ecc_p,
ecc_Bmodp,
- ecc_Bmodp_shifted,
+ ecc_Bmodp_shifted,
+ ecc_Bm2p,
ecc_redc_ppm1,
ecc_pp1h,
@@ -269,6 +270,7 @@ const struct ecc_curve _nettle_secp_192r1 =
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
+ ecc_Bm2q,
NULL,
ecc_qp1h,
diff --git a/ecc-secp224r1.c b/ecc-secp224r1.c
index bb321298..b2a335ec 100644
--- a/ecc-secp224r1.c
+++ b/ecc-secp224r1.c
@@ -223,6 +223,7 @@ const struct ecc_curve _nettle_secp_224r1 =
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
+ ecc_Bm2p,
ecc_redc_ppm1,
ecc_pp1h,
@@ -244,6 +245,7 @@ const struct ecc_curve _nettle_secp_224r1 =
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
+ ecc_Bm2q,
NULL,
ecc_qp1h,
diff --git a/ecc-secp256r1.c b/ecc-secp256r1.c
index e1a14b90..4848dfe3 100644
--- a/ecc-secp256r1.c
+++ b/ecc-secp256r1.c
@@ -343,6 +343,7 @@ const struct ecc_curve _nettle_secp_256r1 =
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
+ ecc_Bm2p,
ecc_redc_ppm1,
ecc_pp1h,
@@ -364,6 +365,7 @@ const struct ecc_curve _nettle_secp_256r1 =
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
+ ecc_Bm2q,
NULL,
ecc_qp1h,
diff --git a/ecc-secp384r1.c b/ecc-secp384r1.c
index 39716dff..abac5e6d 100644
--- a/ecc-secp384r1.c
+++ b/ecc-secp384r1.c
@@ -314,6 +314,7 @@ const struct ecc_curve _nettle_secp_384r1 =
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
+ ecc_Bm2p,
ecc_redc_ppm1,
ecc_pp1h,
@@ -335,6 +336,7 @@ const struct ecc_curve _nettle_secp_384r1 =
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
+ ecc_Bm2q,
NULL,
ecc_qp1h,
diff --git a/ecc-secp521r1.c b/ecc-secp521r1.c
index 24d0b53a..8ab7b4bf 100644
--- a/ecc-secp521r1.c
+++ b/ecc-secp521r1.c
@@ -169,6 +169,7 @@ const struct ecc_curve _nettle_secp_521r1 =
ecc_p,
ecc_Bmodp,
ecc_Bmodp_shifted,
+ ecc_Bm2p,
ecc_redc_ppm1,
ecc_pp1h,
@@ -190,6 +191,7 @@ const struct ecc_curve _nettle_secp_521r1 =
ecc_q,
ecc_Bmodq,
ecc_Bmodq_shifted,
+ ecc_Bm2q,
NULL,
ecc_qp1h,
diff --git a/eccdata.c b/eccdata.c
index a7e7e18a..e0726e8d 100644
--- a/eccdata.c
+++ b/eccdata.c
@@ -71,6 +71,7 @@ struct ecc_curve
/* Prime */
mpz_t p;
+ /* Curve constant */
mpz_t b;
/* Curve order */
@@ -626,15 +627,15 @@ ecc_curve_init (struct ecc_curve *ecc, const char *curve)
x^2 + y^2 = 1 + (121665/121666) x^2 y^2 (mod p).
- -x^2 + y^2 = 1 - (121665/121666) x^2 y^2, with p = 2^{255} - 19.
+ But instead of using this curve, we use a twisted curve, following RFC 7748,
+
+ -x^2 + y^2 = 1 - (121665/121666) x^2 y^2 (mod p)
+
+ (this is possible because -1 is a square modulo p).
The generator is
x = 0x216936d3cd6e53fec0a4e231fdd6dc5c692cc7609525a7b2c9562d608f25d51a
y = 0x6666666666666666666666666666666666666666666666666666666666666658
-
- Also birationally equivalent to the curve25519 Montgomery curve,
-
- y^2 = x^3 + 486662 x^2 + x (mod p)
*/
ecc_curve_init_str (ecc, ECC_TYPE_TWISTED_EDWARDS,
"7fffffffffffffffffffffffffffffff"
@@ -1151,98 +1152,99 @@ output_point (const struct ecc_curve *ecc,
mpz_clear (t);
}
-static unsigned
-output_modulo (const char *name, const mpz_t x,
- unsigned size, unsigned bits_per_limb)
+static void
+string_toupper (char *buf, size_t size, const char *s)
{
- mpz_t mod;
- unsigned bits;
-
- mpz_init (mod);
-
- mpz_setbit (mod, bits_per_limb * size);
- mpz_mod (mod, mod, x);
-
- bits = mpz_sizeinbase (mod, 2);
- output_bignum (name, mod, size, bits_per_limb);
-
- mpz_clear (mod);
- return bits;
+ size_t i;
+ for (i = 0; i < size; i++)
+ {
+ buf[i] = toupper ((int)s[i]);
+ if (!buf[i])
+ return;
+ }
+ fprintf (stderr, "string '%s' too large for buffer of size %u.\n",
+ s, (unsigned) size);
+ abort();
}
static void
-output_curve (const struct ecc_curve *ecc, unsigned bits_per_limb)
+output_modulo (const char *name, const mpz_t x,
+ unsigned size, unsigned bits_per_limb)
{
- unsigned limb_size = (ecc->bit_size + bits_per_limb - 1)/bits_per_limb;
- unsigned i;
- unsigned bits;
- int redc_limbs;
+ unsigned bit_size;
+ int shift;
+ char buf[20];
mpz_t t;
- mpz_t z;
+
+ snprintf (buf, sizeof (buf), "ecc_%s", name);
+ output_bignum (buf, x, size, bits_per_limb);
mpz_init (t);
- mpz_init (z);
- printf ("/* For NULL. */\n#include <stddef.h>\n");
+ mpz_setbit (t, bits_per_limb * size);
+ mpz_mod (t, t, x);
- printf ("#define ECC_LIMB_SIZE %u\n", limb_size);
- printf ("#define ECC_PIPPENGER_K %u\n", ecc->pippenger_k);
- printf ("#define ECC_PIPPENGER_C %u\n", ecc->pippenger_c);
-
- output_bignum ("ecc_p", ecc->p, limb_size, bits_per_limb);
- output_bignum ("ecc_b", ecc->b, limb_size, bits_per_limb);
- output_bignum ("ecc_q", ecc->q, limb_size, bits_per_limb);
+ snprintf (buf, sizeof (buf), "ecc_Bmod%s", name);
+ output_bignum (buf, t, size, bits_per_limb);
- bits = output_modulo ("ecc_Bmodp", ecc->p, limb_size, bits_per_limb);
- printf ("#define ECC_BMODP_SIZE %u\n",
- (bits + bits_per_limb - 1) / bits_per_limb);
- bits = output_modulo ("ecc_Bmodq", ecc->q, limb_size, bits_per_limb);
- printf ("#define ECC_BMODQ_SIZE %u\n",
- (bits + bits_per_limb - 1) / bits_per_limb);
- bits = mpz_sizeinbase (ecc->q, 2);
- if (bits < ecc->bit_size)
+ string_toupper (buf, sizeof (buf), name);
+ printf ("#define ECC_BMOD%s_SIZE %u\n", buf,
+ (unsigned) ((mpz_sizeinbase (t, 2) + bits_per_limb - 1)
+ / bits_per_limb));
+
+ bit_size = mpz_sizeinbase (x, 2);
+
+ shift = size * bits_per_limb - bit_size;
+ assert (shift >= 0);
+ if (shift > 0)
{
- /* for curve25519, with q = 2^k + q', with a much smaller q' */
- unsigned mbits;
- unsigned shift;
+ mpz_set_ui (t, 0);
+ mpz_setbit (t, size * bits_per_limb);
+ mpz_submul_ui (t, x, 2);
- /* Shift to align the one bit at B */
- shift = bits_per_limb * limb_size + 1 - bits;
-
- mpz_set (t, ecc->q);
- mpz_clrbit (t, bits-1);
- mbits = mpz_sizeinbase (t, 2);
+ snprintf (buf, sizeof (buf), "ecc_Bm2%s", name);
+ output_bignum (buf, t, size, bits_per_limb);
- /* The shifted value must be a limb smaller than q. */
- if (mbits + shift + bits_per_limb <= bits)
+ if (bit_size == 253)
{
+ /* For curve25519, with q = 2^k + q', with a much smaller q' */
+ unsigned mbits;
+ unsigned shift;
+
+ /* Shift to align the one bit at B */
+ shift = bits_per_limb * size + 1 - bit_size;
+
+ mpz_set (t, x);
+ mpz_clrbit (t, bit_size-1);
+ mbits = mpz_sizeinbase (t, 2);
+
+ /* The shifted value must be a limb smaller than q. */
+ assert (mbits + shift + bits_per_limb <= bit_size);
+
/* q of the form 2^k + q', with q' a limb smaller */
mpz_mul_2exp (t, t, shift);
- output_bignum ("ecc_mBmodq_shifted", t, limb_size, bits_per_limb);
- }
- }
+ snprintf (buf, sizeof (buf), "ecc_mBmod%s_shifted", name);
- if (ecc->bit_size < limb_size * bits_per_limb)
- {
- int shift;
+ output_bignum (buf, t, size, bits_per_limb);
+ }
+ else
+ {
+ mpz_set_ui (t, 0);
+ mpz_setbit (t, bit_size);
+ mpz_sub (t, t, x);
- mpz_set_ui (t, 0);
- mpz_setbit (t, ecc->bit_size);
- mpz_sub (t, t, ecc->p);
- output_bignum ("ecc_Bmodp_shifted", t, limb_size, bits_per_limb);
+ snprintf (buf, sizeof (buf), "ecc_Bmod%s_shifted", name);
+ output_bignum (buf, t, size, bits_per_limb);
- shift = limb_size * bits_per_limb - ecc->bit_size;
- if (shift > 0)
- {
/* Check condition for reducing hi limbs. If s is the
normalization shift and n is the bit size (so that s + n
- = limb_size * bite_per_limb), then we need
+ = limb_size * bits_per_limb), then we need
- (2^n - 1) + (2^s - 1) (2^n - p) < 2p
+ (2^n - 1) + (2^s - 1) (2^n - p) < 2p
or equivalently,
- 2^s (2^n - p) <= p
+ 2^s (2^n - p) <= p
To a allow a carry limb to be added in at the same time,
substitute s+1 for s.
@@ -1250,26 +1252,45 @@ output_curve (const struct ecc_curve *ecc, unsigned bits_per_limb)
/* FIXME: For ecdsa verify, we actually need the stricter
inequality < 2 q. */
mpz_mul_2exp (t, t, shift + 1);
- if (mpz_cmp (t, ecc->p) > 0)
+ if (mpz_cmp (t, x) > 0)
{
- fprintf (stderr, "Reduction condition failed for %u-bit curve.\n",
- ecc->bit_size);
+ fprintf (stderr, "Reduction condition failed for %u-bit %s.\n",
+ bit_size, name);
exit (EXIT_FAILURE);
}
}
}
else
- printf ("#define ecc_Bmodp_shifted ecc_Bmodp\n");
-
- if (bits < limb_size * bits_per_limb)
{
- mpz_set_ui (t, 0);
- mpz_setbit (t, bits);
- mpz_sub (t, t, ecc->q);
- output_bignum ("ecc_Bmodq_shifted", t, limb_size, bits_per_limb);
+ printf ("#define ecc_Bm2%s ecc_Bmod%s\n", name, name);
+ printf ("#define ecc_Bmod%s_shifted ecc_Bmod%s\n", name, name);
}
- else
- printf ("#define ecc_Bmodq_shifted ecc_Bmodq\n");
+
+ mpz_clear (t);
+}
+
+static void
+output_curve (const struct ecc_curve *ecc, unsigned bits_per_limb)
+{
+ unsigned limb_size = (ecc->bit_size + bits_per_limb - 1)/bits_per_limb;
+ unsigned i;
+ int redc_limbs;
+ mpz_t t;
+ mpz_t z;
+
+ mpz_init (t);
+ mpz_init (z);
+
+ printf ("/* For NULL. */\n#include <stddef.h>\n");
+
+ printf ("#define ECC_LIMB_SIZE %u\n", limb_size);
+ printf ("#define ECC_PIPPENGER_K %u\n", ecc->pippenger_k);
+ printf ("#define ECC_PIPPENGER_C %u\n", ecc->pippenger_c);
+
+ output_modulo ("p", ecc->p, limb_size, bits_per_limb);
+ output_modulo ("q", ecc->q, limb_size, bits_per_limb);
+
+ output_bignum ("ecc_b", ecc->b, limb_size, bits_per_limb);
mpz_add_ui (t, ecc->p, 1);
mpz_fdiv_q_2exp (t, t, 1);
diff --git a/examples/ecc-benchmark.c b/examples/ecc-benchmark.c
index 3ab269c7..7e857f80 100644
--- a/examples/ecc-benchmark.c
+++ b/examples/ecc-benchmark.c
@@ -159,11 +159,17 @@ bench_modq (void *p)
}
static void
-bench_modinv (void *p)
+bench_pinv (void *p)
{
struct ecc_ctx *ctx = (struct ecc_ctx *) p;
ctx->ecc->p.invert (&ctx->ecc->p, ctx->rp, ctx->ap, ctx->tp);
}
+static void
+bench_qinv (void *p)
+{
+ struct ecc_ctx *ctx = (struct ecc_ctx *) p;
+ ctx->ecc->q.invert (&ctx->ecc->p, ctx->rp, ctx->ap, ctx->tp);
+}
#if !NETTLE_USE_MINI_GMP
static void
@@ -239,7 +245,7 @@ static void
bench_curve (const struct ecc_curve *ecc)
{
struct ecc_ctx ctx;
- double modp, reduce, modq, modinv, modinv_gcd, modinv_powm,
+ double modp, reduce, modq, pinv, qinv, modinv_gcd, modinv_powm,
dup_hh, add_hh, add_hhh,
mul_g, mul_a;
@@ -277,7 +283,8 @@ bench_curve (const struct ecc_curve *ecc)
modq = time_function (bench_modq, &ctx);
- modinv = time_function (bench_modinv, &ctx);
+ pinv = time_function (bench_pinv, &ctx);
+ qinv = time_function (bench_qinv, &ctx);
#if !NETTLE_USE_MINI_GMP
modinv_gcd = time_function (bench_modinv_gcd, &ctx);
#else
@@ -299,9 +306,9 @@ bench_curve (const struct ecc_curve *ecc)
free (ctx.bp);
free (ctx.tp);
- printf ("%4d %6.4f %6.4f %6.4f %6.2f %6.3f %6.2f %6.3f %6.3f %6.3f %6.1f %6.1f\n",
+ printf ("%4d %6.4f %6.4f %6.4f %6.2f %6.2f %6.3f %6.2f %6.3f %6.3f %6.3f %6.1f %6.1f\n",
ecc->p.bit_size, 1e6 * modp, 1e6 * reduce, 1e6 * modq,
- 1e6 * modinv, 1e6 * modinv_gcd, 1e6 * modinv_powm,
+ 1e6 * pinv, 1e6 * qinv, 1e6 * modinv_gcd, 1e6 * modinv_powm,
1e6 * dup_hh, 1e6 * add_hh, 1e6 * add_hhh,
1e6 * mul_g, 1e6 * mul_a);
}
@@ -326,8 +333,8 @@ main (int argc UNUSED, char **argv UNUSED)
unsigned i;
time_init();
- printf ("%4s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s (us)\n",
- "size", "modp", "reduce", "modq", "modinv", "mi_gcd", "mi_pow",
+ printf ("%4s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s (us)\n",
+ "size", "modp", "reduce", "modq", "pinv", "qinv", "mi_gcd", "mi_pow",
"dup_hh", "add_hh", "ad_hhh",
"mul_g", "mul_a");
for (i = 0; i < numberof (curves); i++)
diff --git a/examples/nettle-benchmark.c b/examples/nettle-benchmark.c
index ba5dd284..802a7234 100644
--- a/examples/nettle-benchmark.c
+++ b/examples/nettle-benchmark.c
@@ -63,6 +63,7 @@
#include "sha1.h"
#include "sha2.h"
#include "sha3.h"
+#include "sm4.h"
#include "twofish.h"
#include "umac.h"
#include "cmac.h"
@@ -926,6 +927,7 @@ main(int argc, char **argv)
&nettle_des3,
&nettle_serpent256,
&nettle_twofish128, &nettle_twofish192, &nettle_twofish256,
+ &nettle_sm4,
NULL
};
diff --git a/fat-arm.c b/fat-arm.c
index 56647404..8133ca69 100644
--- a/fat-arm.c
+++ b/fat-arm.c
@@ -153,9 +153,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, armv6)
-DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, armv6)
+DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, armv6)
DECLARE_FAT_FUNC(_nettle_sha512_compress, sha512_compress_func)
DECLARE_FAT_FUNC_VAR(sha512_compress, sha512_compress_func, c)
@@ -202,7 +202,7 @@ fat_init (void)
_nettle_aes_encrypt_vec = _nettle_aes_encrypt_armv6;
_nettle_aes_decrypt_vec = _nettle_aes_decrypt_armv6;
nettle_sha1_compress_vec = _nettle_sha1_compress_armv6;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_armv6;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_armv6;
}
else
{
@@ -211,7 +211,7 @@ fat_init (void)
_nettle_aes_encrypt_vec = _nettle_aes_encrypt_arm;
_nettle_aes_decrypt_vec = _nettle_aes_decrypt_arm;
nettle_sha1_compress_vec = _nettle_sha1_compress_c;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_c;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c;
}
if (features.have_neon)
{
@@ -263,9 +263,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void,
(uint32_t *state, const uint8_t *input),
(state, input))
-DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
- (uint32_t *state, const uint8_t *input, const uint32_t *k),
- (state, input, k))
+DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *,
+ (uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input),
+ (state, k, blocks, input))
DEFINE_FAT_FUNC(_nettle_sha512_compress, void,
(uint64_t *state, const uint8_t *input, const uint64_t *k),
diff --git a/fat-arm64.c b/fat-arm64.c
index f2b8493d..aec99f66 100644
--- a/fat-arm64.c
+++ b/fat-arm64.c
@@ -178,9 +178,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, arm64)
-DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, arm64)
+DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, arm64)
static void CONSTRUCTOR
fat_init (void)
@@ -250,11 +250,11 @@ fat_init (void)
{
if (verbose)
fprintf (stderr, "libnettle: enabling hardware-accelerated sha256 compress code.\n");
- _nettle_sha256_compress_vec = _nettle_sha256_compress_arm64;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_arm64;
}
else
{
- _nettle_sha256_compress_vec = _nettle_sha256_compress_c;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c;
}
}
@@ -297,6 +297,7 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void,
(uint32_t *state, const uint8_t *input),
(state, input))
-DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
- (uint32_t *state, const uint8_t *input, const uint32_t *k),
- (state, input, k))
+DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *,
+ (uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input),
+ (state, k, blocks, input))
diff --git a/fat-ppc.c b/fat-ppc.c
index 7569e44d..b95365f6 100644
--- a/fat-ppc.c
+++ b/fat-ppc.c
@@ -195,6 +195,11 @@ DECLARE_FAT_FUNC(_nettle_poly1305_digest, poly1305_digest_func)
DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, c)
DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, ppc64)
+DECLARE_FAT_FUNC(_nettle_poly1305_blocks, poly1305_blocks_func)
+DECLARE_FAT_FUNC_VAR(poly1305_blocks, poly1305_blocks_func, c)
+DECLARE_FAT_FUNC_VAR(poly1305_blocks, poly1305_blocks_func, ppc64)
+
+
static void CONSTRUCTOR
fat_init (void)
{
@@ -251,12 +256,14 @@ fat_init (void)
_nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_ppc64;
_nettle_poly1305_block_vec = _nettle_poly1305_block_ppc64;
_nettle_poly1305_digest_vec = _nettle_poly1305_digest_ppc64;
+ _nettle_poly1305_blocks_vec = _nettle_poly1305_blocks_ppc64;
}
else
{
_nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_c;
_nettle_poly1305_block_vec = _nettle_poly1305_block_c;
_nettle_poly1305_digest_vec = _nettle_poly1305_digest_c;
+ _nettle_poly1305_blocks_vec = _nettle_poly1305_blocks_c;
}
}
@@ -315,3 +322,9 @@ DEFINE_FAT_FUNC(_nettle_poly1305_digest, void,
(struct poly1305_ctx *ctx,
union nettle_block16 *s),
(ctx, s))
+
+DEFINE_FAT_FUNC(_nettle_poly1305_blocks, const uint8_t *,
+ (struct poly1305_ctx *ctx,
+ size_t blocks,
+ const uint8_t *m),
+ (ctx, blocks, m))
diff --git a/fat-s390x.c b/fat-s390x.c
index fa026018..1bbd8e16 100644
--- a/fat-s390x.c
+++ b/fat-s390x.c
@@ -254,9 +254,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, s390x)
-DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, s390x)
+DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, s390x)
DECLARE_FAT_FUNC(_nettle_sha512_compress, sha512_compress_func)
DECLARE_FAT_FUNC_VAR(sha512_compress, sha512_compress_func, c)
@@ -398,11 +398,11 @@ fat_init (void)
{
if (verbose)
fprintf (stderr, "libnettle: enabling hardware accelerated SHA256 compress code.\n");
- _nettle_sha256_compress_vec = _nettle_sha256_compress_s390x;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_s390x;
}
else
{
- _nettle_sha256_compress_vec = _nettle_sha256_compress_c;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c;
}
/* SHA512 */
@@ -495,9 +495,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void,
(state, input))
/* SHA256 */
-DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
- (uint32_t *state, const uint8_t *input, const uint32_t *k),
- (state, input, k))
+DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *,
+ (uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input),
+ (state, k, blocks, input))
/* SHA512 */
DEFINE_FAT_FUNC(_nettle_sha512_compress, void,
diff --git a/fat-setup.h b/fat-setup.h
index ad3c10f0..6bf3e2fa 100644
--- a/fat-setup.h
+++ b/fat-setup.h
@@ -178,7 +178,9 @@ typedef void salsa20_crypt_func (struct salsa20_ctx *ctx, unsigned rounds,
const uint8_t *src);
typedef void sha1_compress_func(uint32_t *state, const uint8_t *input);
-typedef void sha256_compress_func(uint32_t *state, const uint8_t *input, const uint32_t *k);
+typedef const uint8_t *
+sha256_compress_n_func(uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input);
struct sha3_state;
typedef void sha3_permute_func (struct sha3_state *state);
@@ -201,6 +203,8 @@ typedef void poly1305_set_key_func(struct poly1305_ctx *ctx, const uint8_t *key)
typedef void poly1305_digest_func(struct poly1305_ctx *ctx, union nettle_block16 *s);
typedef void poly1305_block_func(struct poly1305_ctx *ctx, const uint8_t *m,
unsigned high);
+typedef const uint8_t * poly1305_blocks_func(struct poly1305_ctx *ctx, size_t blocks,
+ const uint8_t *m);
struct aes128_ctx;
typedef void aes128_set_key_func (struct aes128_ctx *ctx, const uint8_t *key);
diff --git a/fat-x86_64.c b/fat-x86_64.c
index 47cf78ae..0a2fedf4 100644
--- a/fat-x86_64.c
+++ b/fat-x86_64.c
@@ -155,9 +155,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, x86_64)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, sha_ni)
-DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, x86_64)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, sha_ni)
+DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, x86_64)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, sha_ni)
DECLARE_FAT_FUNC(_nettle_ghash_set_key, ghash_set_key_func)
DECLARE_FAT_FUNC_VAR(ghash_set_key, ghash_set_key_func, c)
@@ -228,14 +228,14 @@ fat_init (void)
if (verbose)
fprintf (stderr, "libnettle: using sha_ni instructions.\n");
nettle_sha1_compress_vec = _nettle_sha1_compress_sha_ni;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_sha_ni;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_sha_ni;
}
else
{
if (verbose)
fprintf (stderr, "libnettle: not using sha_ni instructions.\n");
nettle_sha1_compress_vec = _nettle_sha1_compress_x86_64;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_x86_64;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_x86_64;
}
if (features.have_pclmul)
@@ -315,9 +315,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void,
(uint32_t *state, const uint8_t *input),
(state, input))
-DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
- (uint32_t *state, const uint8_t *input, const uint32_t *k),
- (state, input, k))
+DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *,
+ (uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input),
+ (state, k, blocks, input))
DEFINE_FAT_FUNC(_nettle_ghash_set_key, void,
(struct gcm_key *ctx, const union nettle_block16 *key),
diff --git a/sec-tabselect.c b/gcm-sm4-meta.c
index e6bf2282..090460d3 100644
--- a/sec-tabselect.c
+++ b/gcm-sm4-meta.c
@@ -1,6 +1,6 @@
-/* sec-tabselect.c
+/* gcm-sm4-meta.c
- Copyright (C) 2013 Niels Möller
+ Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
This file is part of GNU Nettle.
@@ -29,34 +29,32 @@
not, see http://www.gnu.org/licenses/.
*/
-/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */
-
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include <assert.h>
-#include "ecc-internal.h"
+#include "nettle-meta.h"
+
+#include "gcm.h"
-/* Copy the k'th element of the table out tn elements, each of size
- rn. Always read complete table. Similar to gmp's mpn_tabselect. */
-/* FIXME: Should we need to volatile declare anything? */
-void
-sec_tabselect (mp_limb_t *rp, mp_size_t rn,
- const mp_limb_t *table, unsigned tn,
- unsigned k)
+static nettle_set_key_func gcm_sm4_set_nonce_wrapper;
+static void
+gcm_sm4_set_nonce_wrapper (void *ctx, const uint8_t *nonce)
{
- const mp_limb_t *end = table + tn * rn;
- const mp_limb_t *p;
- mp_size_t i;
-
- assert (k < tn);
- mpn_zero (rp, rn);
- for (p = table; p < end; p += rn, k--)
- {
- mp_limb_t mask = - (mp_limb_t) (k == 0);
- for (i = 0; i < rn; i++)
- rp[i] += mask & p[i];
- }
+ gcm_sm4_set_iv (ctx, GCM_IV_SIZE, nonce);
}
+
+const struct nettle_aead nettle_gcm_sm4 =
+ { "gcm_sm4", sizeof(struct gcm_sm4_ctx),
+ GCM_BLOCK_SIZE, SM4_KEY_SIZE,
+ GCM_IV_SIZE, GCM_DIGEST_SIZE,
+ (nettle_set_key_func *) gcm_sm4_set_key,
+ (nettle_set_key_func *) gcm_sm4_set_key,
+ gcm_sm4_set_nonce_wrapper,
+ (nettle_hash_update_func *) gcm_sm4_update,
+ (nettle_crypt_func *) gcm_sm4_encrypt,
+ (nettle_crypt_func *) gcm_sm4_decrypt,
+ (nettle_hash_digest_func *) gcm_sm4_digest,
+ };
diff --git a/gcm-sm4.c b/gcm-sm4.c
new file mode 100644
index 00000000..19d91ae9
--- /dev/null
+++ b/gcm-sm4.c
@@ -0,0 +1,81 @@
+/* gcm-sm4.c
+
+ Galois counter mode using SM4 as the underlying cipher.
+
+ Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+
+#include "gcm.h"
+
+void
+gcm_sm4_set_key(struct gcm_sm4_ctx *ctx, const uint8_t *key)
+{
+ GCM_SET_KEY(ctx, sm4_set_encrypt_key, sm4_crypt, key);
+}
+
+void
+gcm_sm4_set_iv(struct gcm_sm4_ctx *ctx,
+ size_t length, const uint8_t *iv)
+{
+ GCM_SET_IV (ctx, length, iv);
+}
+
+void
+gcm_sm4_update(struct gcm_sm4_ctx *ctx,
+ size_t length, const uint8_t *data)
+{
+ GCM_UPDATE (ctx, length, data);
+}
+
+void
+gcm_sm4_encrypt(struct gcm_sm4_ctx *ctx,
+ size_t length, uint8_t *dst, const uint8_t *src)
+{
+ GCM_ENCRYPT(ctx, sm4_crypt, length, dst, src);
+}
+
+void
+gcm_sm4_decrypt(struct gcm_sm4_ctx *ctx,
+ size_t length, uint8_t *dst, const uint8_t *src)
+{
+ GCM_DECRYPT(ctx, sm4_crypt, length, dst, src);
+}
+
+void
+gcm_sm4_digest(struct gcm_sm4_ctx *ctx,
+ size_t length, uint8_t *digest)
+{
+ GCM_DIGEST(ctx, sm4_crypt, length, digest);
+}
diff --git a/gcm.c b/gcm.c
index 5de8abb2..1e015b9d 100644
--- a/gcm.c
+++ b/gcm.c
@@ -55,25 +55,7 @@
#include "macros.h"
#include "ctr-internal.h"
#include "block-internal.h"
-
-/* FIXME: Duplicated in nist-keywrap.c */
-#if WORDS_BIGENDIAN
-#define bswap_if_le(x) (x)
-#elif HAVE_BUILTIN_BSWAP64
-#define bswap_if_le(x) (__builtin_bswap64 (x))
-#else
-static uint64_t
-bswap_if_le (uint64_t x)
-{
- x = ((x >> 32) & UINT64_C (0xffffffff))
- | ((x & UINT64_C (0xffffffff)) << 32);
- x = ((x >> 16) & UINT64_C (0xffff0000ffff))
- | ((x & UINT64_C (0xffff0000ffff)) << 16);
- x = ((x >> 8) & UINT64_C (0xff00ff00ff00ff))
- | ((x & UINT64_C (0xff00ff00ff00ff)) << 8);
- return x;
-}
-#endif
+#include "bswap-internal.h"
/* Initialization of GCM.
* @ctx: The context of GCM
@@ -115,8 +97,8 @@ gcm_hash_sizes(const struct gcm_key *key, union nettle_block16 *x,
data_size *= 8;
auth_size *= 8;
- buffer.u64[0] = bswap_if_le (auth_size);
- buffer.u64[1] = bswap_if_le (data_size);
+ buffer.u64[0] = bswap64_if_le (auth_size);
+ buffer.u64[1] = bswap64_if_le (data_size);
_ghash_update (key, x, 1, buffer.b);
}
diff --git a/gcm.h b/gcm.h
index 96578530..39af5ab0 100644
--- a/gcm.h
+++ b/gcm.h
@@ -40,6 +40,7 @@
#include "aes.h"
#include "camellia.h"
+#include "sm4.h"
#ifdef __cplusplus
extern "C" {
@@ -95,6 +96,13 @@ extern "C" {
#define gcm_camellia256_decrypt nettle_gcm_camellia256_decrypt
#define gcm_camellia256_digest nettle_gcm_camellia256_digest
+#define gcm_sm4_set_key nettle_gcm_sm4_set_key
+#define gcm_sm4_set_iv nettle_gcm_sm4_set_iv
+#define gcm_sm4_update nettle_gcm_sm4_update
+#define gcm_sm4_encrypt nettle_gcm_sm4_encrypt
+#define gcm_sm4_decrypt nettle_gcm_sm4_decrypt
+#define gcm_sm4_digest nettle_gcm_sm4_digest
+
#define GCM_BLOCK_SIZE 16
#define GCM_IV_SIZE (GCM_BLOCK_SIZE - 4)
#define GCM_DIGEST_SIZE 16
@@ -322,7 +330,22 @@ void gcm_camellia256_decrypt(struct gcm_camellia256_ctx *ctx,
void gcm_camellia256_digest(struct gcm_camellia256_ctx *ctx,
size_t length, uint8_t *digest);
-
+
+struct gcm_sm4_ctx GCM_CTX(struct sm4_ctx);
+
+void gcm_sm4_set_key(struct gcm_sm4_ctx *ctx, const uint8_t *key);
+void gcm_sm4_set_iv(struct gcm_sm4_ctx *ctx,
+ size_t length, const uint8_t *iv);
+void gcm_sm4_update(struct gcm_sm4_ctx *ctx,
+ size_t length, const uint8_t *data);
+void gcm_sm4_encrypt(struct gcm_sm4_ctx *ctx,
+ size_t length, uint8_t *dst, const uint8_t *src);
+void gcm_sm4_decrypt(struct gcm_sm4_ctx *ctx,
+ size_t length, uint8_t *dst, const uint8_t *src);
+void gcm_sm4_digest(struct gcm_sm4_ctx *ctx,
+ size_t length, uint8_t *digest);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/ghash-internal.h b/ghash-internal.h
index 97dff024..2504dc09 100644
--- a/ghash-internal.h
+++ b/ghash-internal.h
@@ -38,6 +38,8 @@
/* Name mangling */
#define _ghash_set_key _nettle_ghash_set_key
#define _ghash_update _nettle_ghash_update
+#define _siv_ghash_set_key _nettle_siv_ghash_set_key
+#define _siv_ghash_update _nettle_siv_ghash_update
#ifdef __cplusplus
extern "C" {
@@ -58,6 +60,17 @@ const uint8_t *
_ghash_update (const struct gcm_key *ctx, union nettle_block16 *state,
size_t blocks, const uint8_t *data);
+/* Expands KEY as needed, for corresponding _siv_ghash_update */
+void
+_siv_ghash_set_key (struct gcm_key *ctx, const union nettle_block16 *key);
+
+/* Updates STATE by hashing DATA, which must be an integral number of
+ blocks. For convenience, returns a pointer to the end of the
+ data. */
+const uint8_t *
+_siv_ghash_update (const struct gcm_key *ctx, union nettle_block16 *state,
+ size_t blocks, const uint8_t *data);
+
#ifdef __cplusplus
}
#endif
diff --git a/gmp-glue.c b/gmp-glue.c
index e75d678b..ffce6c30 100644
--- a/gmp-glue.c
+++ b/gmp-glue.c
@@ -99,6 +99,26 @@ mpn_cnd_swap (mp_limb_t cnd, volatile mp_limb_t *ap, volatile mp_limb_t *bp, mp_
}
}
+/* Copy the k'th element of the table out tn elements, each of size
+ rn. Always read complete table. Similar to gmp's mpn_tabselect. */
+void
+mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *table,
+ mp_size_t rn, unsigned tn, unsigned k)
+{
+ volatile const mp_limb_t *end = table + tn * rn;
+ volatile const mp_limb_t *p;
+ mp_size_t i;
+
+ assert (k < tn);
+ for (p = table; p < end; p += rn, k--)
+ {
+ mp_limb_t mask = - (mp_limb_t) (k == 0);
+ for (i = 0; i < rn; i++)
+ rp[i] = (~mask & rp[i]) | (mask & p[i]);
+ }
+}
+
+
#endif /* NETTLE_USE_MINI_GMP */
int
diff --git a/gmp-glue.h b/gmp-glue.h
index bc6dbf16..dc0ede2a 100644
--- a/gmp-glue.h
+++ b/gmp-glue.h
@@ -66,6 +66,10 @@ mpn_cnd_sub_n (mp_limb_t cnd, mp_limb_t *rp,
void
mpn_cnd_swap (mp_limb_t cnd, volatile mp_limb_t *ap, volatile mp_limb_t *bp, mp_size_t n);
+
+void
+mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *table,
+ mp_size_t rn, unsigned tn, unsigned k);
#endif
/* Side-channel silent variant of mpn_zero_p. */
diff --git a/md-internal.h b/md-internal.h
new file mode 100644
index 00000000..a97b7b90
--- /dev/null
+++ b/md-internal.h
@@ -0,0 +1,70 @@
+/* md-internal.h
+
+ Copyright (C) 2001, 2010, 2022 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef NETTLE_MD_INTERNAL_H_INCLUDED
+#define NETTLE_MD_INTERNAL_H_INCLUDED
+
+#include <string.h>
+
+/* Internal helper macros for Merkle-Damgård hash functions. Assumes the context
+ structs includes the following fields:
+
+ uint8_t block[...]; // Buffer holding one block
+ unsigned int index; // Index into block
+*/
+
+#define MD_FILL_OR_RETURN(ctx, length, data) \
+ do { \
+ unsigned __md_left = sizeof((ctx)->block) - (ctx)->index; \
+ if ((length) < __md_left) \
+ { \
+ memcpy((ctx)->block + (ctx)->index, (data), (length)); \
+ (ctx)->index += (length); \
+ return; \
+ } \
+ memcpy((ctx)->block + (ctx)->index, (data), __md_left); \
+ (data) += __md_left; \
+ (length) -= __md_left; \
+ } while(0)
+
+#define MD_FILL_OR_RETURN_INDEX(block_size, block, index, length, data) \
+ do { \
+ unsigned __md_left = (block_size) - (index); \
+ if ((length) < __md_left) \
+ { \
+ memcpy(block + (index), (data), (length)); \
+ return (index) + (length); \
+ } \
+ memcpy((block) + (index), (data), __md_left); \
+ (data) += __md_left; \
+ (length) -= __md_left; \
+ } while(0)
+#endif /* NETTLE_MD_INTERNAL_H_INCLUDED */
diff --git a/nettle-internal.h b/nettle-internal.h
index 92416400..bf906c88 100644
--- a/nettle-internal.h
+++ b/nettle-internal.h
@@ -74,12 +74,13 @@
do { assert((size_t)(size) <= (sizeof(name))); } while (0)
#endif
-/* Arbitrary limits which apply to systems that don't have alloca */
-#define NETTLE_MAX_HASH_BLOCK_SIZE 128
+/* Limits that apply to systems that don't have alloca */
+#define NETTLE_MAX_HASH_BLOCK_SIZE 144 /* For sha3_224*/
#define NETTLE_MAX_HASH_DIGEST_SIZE 64
#define NETTLE_MAX_HASH_CONTEXT_SIZE (sizeof(struct sha3_224_ctx))
#define NETTLE_MAX_SEXP_ASSOC 17
#define NETTLE_MAX_CIPHER_BLOCK_SIZE 32
+#define NETTLE_MAX_CIPHER_KEY_SIZE 32
/* Doesn't quite fit with the other algorithms, because of the weak
* keys. Weak keys are not reported, the functions will simply crash
diff --git a/nettle-meta-aeads.c b/nettle-meta-aeads.c
index c99cc465..78f38a3c 100644
--- a/nettle-meta-aeads.c
+++ b/nettle-meta-aeads.c
@@ -43,6 +43,7 @@ const struct nettle_aead * const _nettle_aeads[] = {
&nettle_gcm_aes256,
&nettle_gcm_camellia128,
&nettle_gcm_camellia256,
+ &nettle_gcm_sm4,
&nettle_eax_aes128,
&nettle_chacha_poly1305,
NULL
diff --git a/nettle-meta-ciphers.c b/nettle-meta-ciphers.c
index 49cb47a7..f8d691cf 100644
--- a/nettle-meta-ciphers.c
+++ b/nettle-meta-ciphers.c
@@ -54,6 +54,7 @@ const struct nettle_cipher * const _nettle_ciphers[] = {
&nettle_arctwo64,
&nettle_arctwo128,
&nettle_arctwo_gutmann128,
+ &nettle_sm4,
NULL
};
diff --git a/nettle-meta.h b/nettle-meta.h
index d684947e..19dc96c5 100644
--- a/nettle-meta.h
+++ b/nettle-meta.h
@@ -89,6 +89,8 @@ extern const struct nettle_cipher nettle_arctwo64;
extern const struct nettle_cipher nettle_arctwo128;
extern const struct nettle_cipher nettle_arctwo_gutmann128;
+extern const struct nettle_cipher nettle_sm4;
+
struct nettle_hash
{
const char *name;
@@ -198,6 +200,7 @@ extern const struct nettle_aead nettle_gcm_aes192;
extern const struct nettle_aead nettle_gcm_aes256;
extern const struct nettle_aead nettle_gcm_camellia128;
extern const struct nettle_aead nettle_gcm_camellia256;
+extern const struct nettle_aead nettle_gcm_sm4;
extern const struct nettle_aead nettle_eax_aes128;
extern const struct nettle_aead nettle_chacha_poly1305;
diff --git a/nettle.texinfo b/nettle.texinfo
index 69f9bcaf..767ae718 100644
--- a/nettle.texinfo
+++ b/nettle.texinfo
@@ -105,6 +105,7 @@ Cipher functions
* DES3::
* Salsa20::
* Serpent::
+* SM4::
* Twofish::
* nettle_cipher abstraction::
@@ -122,6 +123,7 @@ Authenticated encryption with associated data
* CCM::
* ChaCha-Poly1305::
* SIV-CMAC::
+* SIV-GCM::
* nettle_aead abstraction::
Keyed Hash Functions
@@ -442,6 +444,14 @@ This function also resets the context in the same way as
@code{sha256_init}.
@end deftypefun
+@deftypefun void sha256_compress (const uint32_t *@var{state}, uint8_t *@var{input})
+Perform a raw SHA256 compress on SHA256_BLOCK_SIZE bytes from@var{input}
+using @var{state} as IV (an array of 8 uint32_t). The output is stored in @var{state}.
+This function provides access to the underlying compression function,
+for the rare applications that need that (e.g., using different IV from
+standard SHA256).
+@end deftypefun
+
Earlier versions of nettle defined SHA256 in the header file
@file{<nettle/sha.h>}, which is now deprecated, but kept for
compatibility.
@@ -522,6 +532,14 @@ This function also resets the context in the same way as
@code{sha512_init}.
@end deftypefun
+@deftypefun void sha512_compress (const uint64_t *@var{state}, uint8_t *@var{input})
+Perform a raw SHA512 compress on SHA512_BLOCK_SIZE bytes from
+@var{input} using @var{state} as IV (an array of 8 uint64_t). The output is stored in @var{state}.
+This function provides access to the underlying compression function,
+for the rare applications that need that (e.g., using different IV from
+standard SHA512).
+@end deftypefun
+
@subsubsection @acronym{SHA384 and other variants of SHA512}
Several variants of SHA512 have been defined, with a different initial
@@ -929,6 +947,14 @@ This function also resets the context in the same way as
@code{md5_init}.
@end deftypefun
+@deftypefun void md5_compress (const uint32_t *@var{state}, uint8_t *@var{input})
+Perform a raw MD5 compress on MD5_BLOCK_SIZE bytes from @var{input}
+using @var{state} as IV (an array of 4 uint32_t). The output is stored in @var{state}.
+This function provides access to the underlying compression function,
+for the rare applications that need that (e.g., using different IV from
+standard MD5).
+@end deftypefun
+
The normal way to use MD5 is to call the functions in order: First
@code{md5_init}, then @code{md5_update} zero or more times, and finally
@code{md5_digest}. After @code{md5_digest}, the context is reset to
@@ -1083,6 +1109,13 @@ This function also resets the context in the same way as
@code{sha1_init}.
@end deftypefun
+@deftypefun void sha1_compress (const uint32_t *@var{state}, uint8_t *@var{input})
+Perform a raw SHA1 compress on SHA1_BLOCK_SIZE bytes from @var{input}
+using @var{state} as IV (an array of 5 uint32_t). The output is stored in @var{state}.
+This function provides access to the underlying compression function,
+for the rare applications that need that (e.g., using different IV from
+standard SHA1).
+@end deftypefun
@subsubsection @acronym{GOSTHASH94 and GOSTHASH94CP}
@cindex GOST hash
@@ -1292,6 +1325,7 @@ decryption.
* DES3::
* Salsa20::
* Serpent::
+* SM4::
* Twofish::
* nettle_cipher abstraction::
@end menu
@@ -2114,6 +2148,42 @@ in any other way.
Analogous to @code{serpent_encrypt}
@end deftypefun
+@node SM4
+@subsection SM4
+@cindex SM4
+
+SM4 is a block cipher standard adopted by the government of the People's
+Republic of China, and it was issued by the State Cryptography Administration
+on March 21, 2012. The standard is GM/T 0002-2012 "SM4 block cipher algorithm".
+Nettle defines it in @file{<nettle/sm4.h>}.
+
+@deftp {Context struct} {struct sm4_ctx}
+@end deftp
+
+@defvr Constant SM4_BLOCK_SIZE
+The SM4 block-size, 16.
+@end defvr
+
+@defvr Constant SM4_KEY_SIZE
+Default SM4 key size, 16.
+@end defvr
+
+@deftypefun void sm4_set_encrypt_key (struct sm4_ctx *@var{ctx}, const uint8_t *@var{key})
+Initialize the cipher. The function is used for encryption.
+@end deftypefun
+
+@deftypefun void sm4_set_decrypt_key (struct sm4_ctx *@var{ctx}, const uint8_t *@var{key})
+Initialize the cipher. The function is used for decryption.
+@end deftypefun
+
+@deftypefun void sm4_crypt (const struct sm4_ctx *@var{ctx}, size_t @var{length}, uint8_t *@var{dst}, const uint8_t *@var{src})
+Cryption function. @var{length} must be an integral multiple of the
+block size. If it is more than one block, the data is processed in ECB
+mode. @code{src} and @code{dst} may be equal, but they must not overlap
+in any other way. The same function is used for both encryption and
+decryption.
+@end deftypefun
+
@node Twofish
@subsection Twofish
@cindex Twofish
@@ -2811,6 +2881,7 @@ more adventurous alternative, in particular if performance is important.
* CCM::
* ChaCha-Poly1305::
* SIV-CMAC::
+* SIV-GCM::
* nettle_aead abstraction::
@end menu
@@ -3227,6 +3298,44 @@ that @var{length} is @code{GCM_DIGEST_SIZE}, but if you provide a smaller
value, only the first @var{length} octets of the digest are written.
@end deftypefun
+@subsubsection @acronym{GCM}-SM4 interface
+
+The following functions implement the case of @acronym{GCM} using
+SM4 as the underlying cipher.
+
+@deftp {Context struct} {struct gcm_sm4_ctx}
+Context structs, defined using @code{GCM_CTX}.
+@end deftp
+
+@deftypefun void gcm_sm4_set_key (struct gcm_sm4_ctx *@var{ctx}, const uint8_t *@var{key})
+Initializes @var{ctx} using the given key.
+@end deftypefun
+
+@deftypefun void gcm_sm4_set_iv (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, const uint8_t *@var{iv})
+Initializes the per-message state, using the given @acronym{IV}.
+@end deftypefun
+
+@deftypefun void gcm_sm4_update (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, const uint8_t *@var{data})
+Provides associated data to be authenticated. If used, must be called
+before @code{gcm_sm4_encrypt} or @code{gcm_sm4_decrypt}. All but the
+last call for each message @emph{must} use a length that is a multiple
+of the block size.
+@end deftypefun
+
+@deftypefun void gcm_sm4_encrypt (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, uint8_t *@var{dst}, const uint8_t *@var{src})
+@deftypefunx void gcm_sm4_decrypt (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, uint8_t *@var{dst}, const uint8_t *@var{src})
+Encrypts or decrypts the data of a message. All but the last call for
+each message @emph{must} use a length that is a multiple of the block
+size.
+@end deftypefun
+
+@deftypefun void gcm_sm4_digest (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, uint8_t *@var{digest})
+Extracts the message digest (also known ``authentication tag''). This is
+the final operation when processing a message. It's strongly recommended
+that @var{length} is @code{GCM_DIGEST_SIZE}, but if you provide a smaller
+value, only the first @var{length} octets of the digest are written.
+@end deftypefun
+
@node CCM
@subsection Counter with CBC-MAC mode
@@ -3626,6 +3735,95 @@ are equal, this will return 1 indicating a valid and authenticated
message. Otherwise, this function will return zero.
@end deftypefun
+@node SIV-GCM
+@subsection SIV-GCM
+
+@acronym{SIV-GCM}, described in @cite{RFC 8452}, is an @acronym{AEAD}
+construction similar to @acronym{AES-GCM}, but provides protection against
+accidental nonce misuse like @acronym{SIV-CMAC} mode.
+
+It is constructed on top of a block cipher which must have a block size of 128
+bits and a nonce size of 12 bytes. Nettle's support for @acronym{SIV-GCM}
+consists of a message encryption and authentication interface, for
+@acronym{SIV-GCM} using AES as the underlying block cipher. These
+interfaces are defined in @file{<nettle/siv-gcm.h>}.
+
+Unlike other @acronym{AEAD} mode in @acronym{SIV-GCM} the tag is calculated
+over the encoded additional authentication data and plaintext instead of the
+ciphertext.
+
+@subsubsection General interface
+
+@defvr Constant SIV_GCM_BLOCK_SIZE
+@acronym{SIV-GCM}'s block size, 16.
+@end defvr
+
+@defvr Constant SIV_GCM_DIGEST_SIZE
+Size of the @acronym{SIV-GCM} digest for tags, 16.
+@end defvr
+
+@defvr Constant SIV_GCM_NONCE_SIZE
+Size of the @acronym{SIV-GCM} nonce, 12.
+@end defvr
+
+@deftypefun void siv_gcm_encrypt_message (const struct nettle_cipher *@var{nc}, const void *@var{ctx}, void *@var{ctr_ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{clength}, uint8_t *@var{dst}, const uint8_t *@var{src})
+Computes the message digest from the @var{adata} and @var{src}
+parameters, encrypts the plaintext from @var{src}, appends the
+authentication tag to the ciphertext and outputs it to @var{dst}. The
+@var{clength} variable must be equal to the length of @var{src} plus
+@code{SIV_GCM_DIGEST_SIZE}.
+@end deftypefun
+
+@deftypefun int siv_gcm_decrypt_message (const struct nettle_cipher *@var{nc}, const void *@var{ctx}, void *@var{ctr_ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{mlength}, uint8_t *@var{dst}, const uint8_t *@var{src})
+Decrypts the ciphertext from @var{src}, outputs the plaintext to
+@var{dst}, recalculates the initialization vector from @var{adata} and the
+plaintext. If the values of the received and calculated initialization vector
+are equal, this will return 1 indicating a valid and authenticated
+message. Otherwise, this function will return zero.
+@end deftypefun
+
+In the above interface, @var{nc} must point to a cipher that works
+with 16-byte block size and the key sizes that are multiple of
+8-bytes. The @var{ctx} context structure must be initialized for
+encryption mode using a set-key function, before using any of the
+functions in this interface. While the @var{ctr_ctx} context
+structure must have the same size as @var{ctx}, it does not need to be
+initialized before calling those functions as it is used as working
+storage. These structures can point to the same area; in that case
+the contents of *@var{ctx} is destroyed by the call.
+
+For convenience, Nettle provides wrapper functions that works with
+@acronym{AES} described in the following section.
+
+@subsubsection @acronym{SIV-GCM}-@acronym{AES} interface
+
+The @acronym{SIV-GCM} functions provide an API for using @acronym{SIV-GCM}
+mode with the @acronym{AES} block ciphers. The parameters all have the same
+meaning as the general and message interfaces, except that the @var{cipher},
+@var{f}, and @var{ctx} parameters are replaced with an @acronym{AES} context
+structure. The @acronym{AES} context structure must be initialized for
+encryption mode using a set-key function, before using any of the functions in
+this interface.
+
+@deftypefun void siv_gcm_aes128_encrypt_message (const struct aes128_ctx *@var{ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{clength}, uint8_t *@var{dst}, const uint8_t *@var{src})
+@deftypefunx void siv_gcm_aes256_encrypt_message (const struct aes256_ctx *@var{ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{clength}, uint8_t *@var{dst}, const uint8_t *@var{src})
+Computes the message digest from the @var{adata} and @var{src}
+parameters, encrypts the plaintext from @var{src}, appends the
+authentication tag to the ciphertext and outputs it to @var{dst}.
+The @var{clength} variable must be equal to the length of @var{src}
+plus @code{SIV_GCM_DIGEST_SIZE}.
+
+@end deftypefun
+
+@deftypefun int siv_gcm_aes128_decrypt_message (const struct aes128_ctx *@var{ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{mlength}, uint8_t *@var{dst}, const uint8_t *@var{src})
+@deftypefunx int siv_gcm_aes256_decrypt_message (const struct aes256_ctx *@var{ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{mlength}, uint8_t *@var{dst}, const uint8_t *@var{src})
+Decrypts the ciphertext from @var{src}, outputs the plaintext to
+@var{dst}, recalculates the initialization vector from @var{adata} and the
+plaintext. If the values of the received and calculated initialization vector
+are equal, this will return 1 indicating a valid and authenticated
+message. Otherwise, this function will return zero.
+@end deftypefun
+
@node nettle_aead abstraction
@subsection The @code{struct nettle_aead} abstraction
@cindex nettle_aead
@@ -4348,6 +4546,81 @@ salt @var{salt} of length @var{salt_length}, with iteration counter
room for at least @var{length} octets.
@end deftypefun
+
+@subsection @acronym{BALLOON}
+@cindex Balloon password-hashing algorithm
+Balloon is a memory-hard password-hashing algorithm. An in-depth description
+of the algorithm and its properties can be found in an online research paper:
+Boneh, D., Corrigan-Gibbs, H., Schechter, S. (2017, May 12). Balloon Hashing:
+A Memory-Hard Function Providing Provable Protection Against Sequential Attacks.
+Retrieved Sep 1, 2022, from @url{https://eprint.iacr.org/2016/027.pdf}
+
+Nettle's definition of the @acronym{BALLOON} algorithm can be found in
+@file{<nettle/balloon.h>}. There is a general @acronym{BALLOON} function where
+the user can specify desired hash algorithm that will be used by the function.
+There are also concrete, more user-friendly functions that use common hash algorithms
+like SHA1, SHA256, SHA384 and SHA512. There is also a utility function which helps to
+determine the size of the working buffer that must be provided as one of the inputs.
+
+Each @acronym{BALLOON} function takes as an input a password and a salt of arbitrary
+lengths, a time and a space parameters, and a scratch buffer. The space parameter
+@var{s_cost} determines how many blocks of working space the algorithm will require
+during its computation. It is common to set @var{s_cost} to a high value in order
+to increase the cost of hardware accelerators built by the adversary. The time
+parameter @var{t_cost} determines the number of rounds of computation that the algorithm
+will perform. This can be used to further increase the cost of computation without raising
+the memory requirement. Scratch buffer @var{scratch} is a user allocated working space
+required by the algorithm. To determine the required size of the scratch buffer use the
+utility function @code{balloon_itch}. Output of @acronym{BALLOON} algorithm will be
+written into the output buffer @var{dst} that has to be at least @var{digest_size} bytes
+long. Note that it is safe to use the same buffer for both @var{scratch} and @var{dst}.
+Next follows the description of the general @acronym{BALLOON} function.
+
+@deftypefun void balloon (void *@var{hash_ctx}, nettle_hash_update_func *@var{update}, nettle_hash_digest_func *@var{digest}, size_t @var{digest_size}, size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst})
+Compute hash of given password @var{passwd} of length @var{passwd_length} salted
+with @var{salt} of length @var{salt_length} and write @var{digest_size} bytes into
+the output buffer @var{dst}. Parameter @var{hash_ctx} is a context for the
+underlying hash function, which much be initialized by the caller. @var{update}
+and @var{digest} are the update and digest functions of the chosen hash algorithm.
+@var{digest_size} is the digest size of the chosen hash algorithm and determines
+the size of the output.
+@end deftypefun
+
+@deftypefun size_t balloon_itch (size_t @var{digest_size}, size_t @var{s_cost})
+Compute the size of the scratch buffer @var{scratch}. @var{digest_size} is the
+digest size of the chosen hash algorithm. @var{s_cost} is the space parameter
+used by the @code{balloon} function.
+@end deftypefun
+
+@subsection Concrete @acronym{BALLOON} functions
+Here follows a list of the specialized @acronym{BALLOON} functions, which are
+more user-friendly variants of the general function.
+
+@subsubsection @acronym{BALLOON-SHA1}
+
+@deftypefun void balloon_sha1 (size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst})
+@acronym{BALLOON} algorithm using SHA1 as the underlying hash function.
+@end deftypefun
+
+@subsubsection @acronym{BALLOON-SHA256}
+
+@deftypefun void balloon_sha256 (size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst})
+@acronym{BALLOON} algorithm using SHA256 as the underlying hash function.
+@end deftypefun
+
+@subsubsection @acronym{BALLOON-SHA384}
+
+@deftypefun void balloon_sha384 (size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst})
+@acronym{BALLOON} algorithm using SHA384 as the underlying hash function.
+@end deftypefun
+
+@subsubsection @acronym{BALLOON-SHA512}
+
+@deftypefun void balloon_sha512 (size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst})
+@acronym{BALLOON} algorithm using SHA512 as the underlying hash function.
+@end deftypefun
+
+
@node Public-key algorithms
@section Public-key algorithms
diff --git a/nist-keywrap.c b/nist-keywrap.c
index 8fdd9335..2aca8423 100644
--- a/nist-keywrap.c
+++ b/nist-keywrap.c
@@ -44,24 +44,7 @@
#include "nist-keywrap.h"
#include "memops.h"
#include "macros.h"
-
-#if WORDS_BIGENDIAN
-#define bswap_if_le(x) (x)
-#elif HAVE_BUILTIN_BSWAP64
-#define bswap_if_le(x) (__builtin_bswap64 (x))
-#else
-static uint64_t
-bswap_if_le (uint64_t x)
-{
- x = ((x >> 32) & UINT64_C (0xffffffff))
- | ((x & UINT64_C (0xffffffff)) << 32);
- x = ((x >> 16) & UINT64_C (0xffff0000ffff))
- | ((x & UINT64_C (0xffff0000ffff)) << 16);
- x = ((x >> 8) & UINT64_C (0xff00ff00ff00ff))
- | ((x & UINT64_C (0xff00ff00ff00ff)) << 8);
- return x;
-}
-#endif
+#include "bswap-internal.h"
void
nist_keywrap16 (const void *ctx, nettle_cipher_func *encrypt,
@@ -94,7 +77,7 @@ nist_keywrap16 (const void *ctx, nettle_cipher_func *encrypt,
encrypt (ctx, 16, B.b, I.b);
/* A = MSB(64, B) ^ t where t = (n*j)+i */
- A.u64 = B.u64[0] ^ bswap_if_le ((n * j) + (i + 1));
+ A.u64 = B.u64[0] ^ bswap64_if_le ((n * j) + (i + 1));
/* R[i] = LSB(64, B) */
memcpy (R + (i * 8), B.b + 8, 8);
@@ -129,7 +112,7 @@ nist_keyunwrap16 (const void *ctx, nettle_cipher_func *decrypt,
for (i = n - 1; i >= 0; i--)
{
/* B = AES-1(K, (A ^ t) | R[i]) where t = n*j+i */
- I.u64[0] = A.u64 ^ bswap_if_le ((n * j) + (i + 1));
+ I.u64[0] = A.u64 ^ bswap64_if_le ((n * j) + (i + 1));
memcpy (I.b + 8, R + (i * 8), 8);
decrypt (ctx, 16, B.b, I.b);
diff --git a/poly1305-aes.c b/poly1305-aes.c
index a4050254..374d5a78 100644
--- a/poly1305-aes.c
+++ b/poly1305-aes.c
@@ -56,13 +56,12 @@ poly1305_aes_set_nonce (struct poly1305_aes_ctx *ctx,
memcpy (ctx->nonce, nonce, POLY1305_AES_NONCE_SIZE);
}
-#define COMPRESS(ctx, data) _nettle_poly1305_block(&(ctx)->pctx, (data), 1)
-
void
poly1305_aes_update (struct poly1305_aes_ctx *ctx,
size_t length, const uint8_t *data)
{
- MD_UPDATE (ctx, length, data, COMPRESS, (void) 0);
+ ctx->index = _nettle_poly1305_update (&(ctx)->pctx,
+ ctx->block, ctx->index, length, data);
}
void
diff --git a/poly1305-internal.h b/poly1305-internal.h
index 9932d524..a6afd466 100644
--- a/poly1305-internal.h
+++ b/poly1305-internal.h
@@ -53,7 +53,15 @@ void _nettle_poly1305_digest (struct poly1305_ctx *ctx, union nettle_block16 *s)
/* Process one block. */
void _nettle_poly1305_block (struct poly1305_ctx *ctx, const uint8_t *m,
unsigned high);
-
+/* Updates CTX by hashing M, which must be an integral number of
+ blocks. For convenience, returns a pointer to the end of the
+ data. Implies 128 set on all input blocks. */
+const uint8_t *
+_nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m);
+
+unsigned
+_nettle_poly1305_update (struct poly1305_ctx *ctx, uint8_t *buffer, unsigned index,
+ size_t length, const uint8_t *m);
#ifdef __cplusplus
}
#endif
diff --git a/poly1305-update.c b/poly1305-update.c
new file mode 100644
index 00000000..15ee3231
--- /dev/null
+++ b/poly1305-update.c
@@ -0,0 +1,78 @@
+/* poly1305-update.c
+
+ Copyright (C) 2022 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "poly1305.h"
+#include "poly1305-internal.h"
+#include "md-internal.h"
+
+#if HAVE_NATIVE_fat_poly1305_blocks
+const uint8_t *
+_nettle_poly1305_blocks_c(struct poly1305_ctx *ctx,
+ size_t blocks, const uint8_t *m);
+
+const uint8_t *
+_nettle_poly1305_blocks_c(struct poly1305_ctx *ctx,
+ size_t blocks, const uint8_t *m)
+{
+ for (; blocks; blocks--, m += POLY1305_BLOCK_SIZE)
+ _nettle_poly1305_block(ctx, m, 1);
+ return m;
+}
+#endif
+
+unsigned
+_nettle_poly1305_update (struct poly1305_ctx *ctx,
+ uint8_t *block, unsigned index,
+ size_t length, const uint8_t *m)
+{
+ if (index > 0)
+ {
+ /* Try to fill partial block */
+ MD_FILL_OR_RETURN_INDEX (POLY1305_BLOCK_SIZE, block, index,
+ length, m);
+ _nettle_poly1305_block(ctx, block, 1);
+ }
+#if HAVE_NATIVE_poly1305_blocks
+ m = _nettle_poly1305_blocks (ctx, length >> 4, m);
+ length &= 15;
+#else
+ for (; length >= POLY1305_BLOCK_SIZE;
+ length -= POLY1305_BLOCK_SIZE, m += POLY1305_BLOCK_SIZE)
+ _nettle_poly1305_block (ctx, m, 1);
+#endif
+
+ memcpy (block, m, length);
+ return length;
+}
diff --git a/powerpc64/fat/poly1305-blocks.asm b/powerpc64/fat/poly1305-blocks.asm
new file mode 100644
index 00000000..9efef0a0
--- /dev/null
+++ b/powerpc64/fat/poly1305-blocks.asm
@@ -0,0 +1,38 @@
+C powerpc64/fat/poly1305-blocks.asm
+
+ifelse(`
+ Copyright (C) 2022 Mamone Tarsha
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl picked up by configure
+dnl PROLOGUE(_nettle_poly1305_blocks)
+dnl PROLOGUE(_nettle_fat_poly1305_blocks)
+
+define(`fat_transform', `$1_ppc64')
+include_src(`powerpc64/p9/poly1305-blocks.asm')
diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4
index b59f0863..8f28f295 100644
--- a/powerpc64/machine.m4
+++ b/powerpc64/machine.m4
@@ -51,3 +51,15 @@ forloop(i,0,63,`deflit(`vs'i,i)')
forloop(i,0,31,`deflit(`f'i,i)')
forloop(i,0,7, `deflit(`cr'i,i)')
')
+
+C Increase index of general-purpose register by specific value
+C INC_GPR(GPR, INC)
+define(`INC_GPR',`ifelse(substr($1,0,1),`r',
+``r'eval($2+substr($1,1,len($1)))',
+`eval($2+$1)')')
+
+C Increase index of vector register by specific value
+C INC_VR(VR, INC)
+define(`INC_VR',`ifelse(substr($1,0,1),`v',
+``v'eval($2+substr($1,1,len($1)))',
+`eval($2+$1)')')
diff --git a/powerpc64/p7/chacha-2core.asm b/powerpc64/p7/chacha-2core.asm
index d5935263..ec20b4a5 100644
--- a/powerpc64/p7/chacha-2core.asm
+++ b/powerpc64/p7/chacha-2core.asm
@@ -60,6 +60,9 @@ define(`S3p1', `v16')
define(`T0', `v17')
+define(`EW_MASK', `v18')
+define(`OW_MASK', `v19')
+
.text
C _chacha_2core(uint32_t *dst, const uint32_t *src, unsigned rounds)
@@ -78,6 +81,9 @@ PROLOGUE(_nettle_chacha_2core)
vor Y3, Y3, X1
.Lshared_entry:
+ DATA_LOAD_VEC(EW_MASK,.even_word_mask,r6)
+ DATA_LOAD_VEC(OW_MASK,.odd_word_mask,r6)
+
vadduwm Y3, Y3, X3
li r6, 0x10 C set up some...
@@ -92,14 +98,14 @@ PROLOGUE(_nettle_chacha_2core)
vor S3, X3, X3
vor S3p1, Y3, Y3
- vmrgow Y0, X0, X0 C 1 1 3 3
- vmrgew X0, X0, X0 C 0 0 2 2
- vmrgow Y1, X1, X1 C 5 5 7 7
- vmrgew X1, X1, X1 C 4 4 6 6
- vmrgow Y2, X2, X2 C 9 9 11 11
- vmrgew X2, X2, X2 C 8 8 10 10
- vmrgow Y3, X3, S3p1 C 13 13 15 15
- vmrgew X3, X3, S3p1 C 12 12 14 14
+ vperm Y0, X0, X0, OW_MASK C 1 1 3 3
+ vperm X0, X0, X0, EW_MASK C 0 0 2 2
+ vperm Y1, X1, X1, OW_MASK C 5 5 7 7
+ vperm X1, X1, X1, EW_MASK C 4 4 6 6
+ vperm Y2, X2, X2, OW_MASK C 9 9 11 11
+ vperm X2, X2, X2, EW_MASK C 8 8 10 10
+ vperm Y3, X3, S3p1, OW_MASK C 13 13 15 15
+ vperm X3, X3, S3p1, EW_MASK C 12 12 14 14
vspltisw ROT16, -16 C -16 instead of 16 actually works!
vspltisw ROT12, 12
@@ -189,17 +195,17 @@ C Y3 A15 B15 A13 B13 X3 A12 B12 A14 B14 (Y3 swapped)
bdnz .Loop
- vmrgew T0, X0, Y0
- vmrgow Y0, X0, Y0
+ vperm T0, X0, Y0, EW_MASK
+ vperm Y0, X0, Y0, OW_MASK
- vmrgew X0, X1, Y1
- vmrgow Y1, X1, Y1
+ vperm X0, X1, Y1, EW_MASK
+ vperm Y1, X1, Y1, OW_MASK
- vmrgew X1, X2, Y2
- vmrgow Y2, X2, Y2
+ vperm X1, X2, Y2, EW_MASK
+ vperm Y2, X2, Y2, OW_MASK
- vmrgew X2, X3, Y3
- vmrgow Y3, X3, Y3
+ vperm X2, X3, Y3, EW_MASK
+ vperm Y3, X3, Y3, OW_MASK
vadduwm T0, T0, S0
vadduwm Y0, Y0, S0
@@ -251,6 +257,15 @@ PROLOGUE(_nettle_chacha_2core32)
b .Lshared_entry
EPILOGUE(_nettle_chacha_2core32)
+.rodata
+.align 4
+.even_word_mask:
+IF_LE(`.byte 27,26,25,24,11,10,9,8,19,18,17,16,3,2,1,0')
+IF_BE(`.byte 0,1,2,3,16,17,18,19,8,9,10,11,24,25,26,27')
+.odd_word_mask:
+IF_LE(`.byte 31,30,29,28,15,14,13,12,23,22,21,20,7,6,5,4')
+IF_BE(`.byte 4,5,6,7,20,21,22,23,12,13,14,15,28,29,30,31')
+
divert(-1)
define core2state
p/x $vs32.v4_int32
diff --git a/powerpc64/p9/poly1305-blocks.asm b/powerpc64/p9/poly1305-blocks.asm
new file mode 100644
index 00000000..90e3df7b
--- /dev/null
+++ b/powerpc64/p9/poly1305-blocks.asm
@@ -0,0 +1,434 @@
+C powerpc64/p9/poly1305-blocks.asm
+
+ifelse(`
+ Copyright (C) 2013, 2022 Niels Möller
+ Copyright (C) 2022 Mamone Tarsha
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+include_src(`powerpc64/p9/poly1305.m4')
+
+C Register usage:
+
+define(`SP', `r1')
+define(`TOCP', `r2')
+
+C Argments
+define(`CTX', `r3')
+define(`BLOCKS', `r4')
+define(`DATA', `r5')
+
+define(`PADBYTE', `r6') C Padding byte register
+
+define(`DEFINES_BLOCK_R44', `
+ define(`R0', `v0')
+ define(`R1', `v1')
+ define(`R2', `v2')
+ define(`S1', `v3')
+ define(`S2', `v4')
+ define(`H0', `v5')
+ define(`H1', `v6')
+ define(`H2', `v7')
+
+ define(`R3', `v8')
+ define(`R4', `v9')
+ define(`R5', `v10')
+ define(`S4', `v11')
+ define(`S5', `v12')
+
+ define(`T0', `v13')
+ define(`T1', `v14')
+ define(`T2', `v15')
+ define(`T3', `v16')
+ define(`T4', `v17')
+ define(`T5', `v18')
+ define(`TMP', `v19')
+ define(`TMP2', `v20')
+
+ define(`ZERO', `v21')
+ define(`MASK44', `v22')
+ define(`MASK42L', `v23')
+ define(`MASK44L', `v24')
+ define(`T4PAD', `v25')
+ define(`D40', `v26')
+ define(`D20', `v27')
+ define(`D24', `v28')
+ define(`D44', `v29')
+ define(`D2', `v30')
+ define(`D4', `v31')
+ ')
+
+C Compute S_1 = 20 * R_1 and S_2 = 20 * R_2
+C COMPUTE_S(S1, S2, R1, R2)
+define(`COMPUTE_S', `
+ vsld $1, $3, D2
+ vsld $2, $4, D2
+ vaddudm $1, $1, $3
+ vaddudm $2, $2, $4
+ vsld $1, $1, D2
+ vsld $2, $2, D2
+ ')
+
+C Convert two-part radix 2^64 to three-part radix 2^44 of four blocks
+C R64_TO_R44_4B(VR0, VR1, VR2, VR3, VR4, VR5)
+define(`R64_TO_R44_4B', `
+ vsrd $3, $2, D24
+ vsrd $6, $5, D24
+ vsrd TMP, $1, D44
+ vsrd TMP2, $4, D44
+ vsld $2, $2, D20
+ vsld $5, $5, D20
+ vor $2, $2, TMP
+ vor $5, $5, TMP2
+ vand $1, $1, MASK44
+ vand $4, $4, MASK44
+ vand $2, $2, MASK44
+ vand $5, $5, MASK44
+ ')
+
+C T_0 = R_0 H_0 + S_2 H_1 + S_1 H_2
+C T_1 = R_1 H_0 + R_0 H_1 + S_2 H_2
+C T_2 = R_2 H_0 + R_1 H_1 + R_0 H_2
+C MUL(T0, T1, T2, H0, H1, H2)
+define(`MUL', `
+ vmsumudm $1, $4, R0, ZERO
+ vmsumudm $2, $4, R1, ZERO
+ vmsumudm $3, $4, R2, ZERO
+
+ vmsumudm $1, $5, S2, $1
+ vmsumudm $2, $5, R0, $2
+ vmsumudm $3, $5, R1, $3
+
+ vmsumudm $1, $6, S1, $1
+ vmsumudm $2, $6, S2, $2
+ vmsumudm $3, $6, R0, $3
+ ')
+
+C Apply aforenamed equations on four-blocks
+C Each two successive blocks are interleaved horizontally
+C MUL_4B(T0, T1, T2, H0, H1, H2, H3, H4, H5)
+define(`MUL_4B', `
+ vmsumudm $1, $7, R0, ZERO
+ vmsumudm $2, $7, R1, ZERO
+ vmsumudm $3, $7, R2, ZERO
+
+ vmsumudm $1, $8, S2, $1
+ vmsumudm $2, $8, R0, $2
+ vmsumudm $3, $8, R1, $3
+
+ vmsumudm $1, $9, S1, $1
+ vmsumudm $2, $9, S2, $2
+ vmsumudm $3, $9, R0, $3
+
+ vmsumudm $1, $4, R3, $1
+ vmsumudm $2, $4, R4, $2
+ vmsumudm $3, $4, R5, $3
+
+ vmsumudm $1, $5, S5, $1
+ vmsumudm $2, $5, R3, $2
+ vmsumudm $3, $5, R4, $3
+
+ vmsumudm $1, $6, S4, $1
+ vmsumudm $2, $6, S5, $2
+ vmsumudm $3, $6, R3, $3
+ ')
+
+C Reduction phase of two interleaved chains
+C RED(H0, H1, H2, T0, T1, T2)
+define(`RED', `
+ vand $1, $4, MASK44L
+ vsro $4, $4, D40
+ vsrd $4, $4, D4
+ vadduqm $5, $5, $4
+ vand $2, $5, MASK44L
+ vsro $5, $5, D40
+ vsrd $5, $5, D4
+ vadduqm $6, $6, $5
+ vand $3, $6, MASK42L
+ vsro $6, $6, D40
+ vsrd $6, $6, D2
+ vadduqm $1, $1, $6
+ vsld $6, $6, D2
+ vadduqm $1, $1, $6
+ vsrd TMP, $1, D44
+ vand $1, $1, MASK44L
+ vadduqm $2, $2, TMP
+ ')
+
+.text
+
+C void _nettle_poly1305_blocks(struct poly1305_ctx *ctx,
+C size_t length, const uint8_t *data)
+define(`FUNC_ALIGN', `5')
+PROLOGUE(_nettle_poly1305_blocks)
+ C Save non-volatile vector registers
+ std r31,-8(SP)
+ stxv VSR(v31),-32(SP)
+ stxv VSR(v30),-48(SP)
+ stxv VSR(v29),-64(SP)
+ stxv VSR(v28),-80(SP)
+ stxv VSR(v27),-96(SP)
+ stxv VSR(v26),-112(SP)
+ stxv VSR(v25),-128(SP)
+ stxv VSR(v24),-144(SP)
+ stxv VSR(v23),-160(SP)
+ stxv VSR(v22),-176(SP)
+ stxv VSR(v21),-192(SP)
+ stxv VSR(v20),-208(SP)
+
+ C Initialize padding byte register
+ li PADBYTE, 1
+
+C Process data blocks of number of multiple 4
+ DEFINES_BLOCK_R44()
+ cmpldi BLOCKS, POLY1305_BLOCK_THRESHOLD
+ blt Ldata_r64
+ srdi r9, BLOCKS, 2
+ andi. BLOCKS, BLOCKS, 3
+ mtctr r9
+
+ C Initialize constants
+
+ vxor ZERO, ZERO, ZERO
+ vspltisb D2, 2
+ vspltisb D4, 4
+ addis r9, TOCP, .mask44@got@ha
+ ld r9, .mask44@got@l(r9)
+ lxvd2x VSR(MASK44), 0, r9
+ addi r9, r9, 16
+ lxvd2x VSR(MASK42L), 0, r9
+ addi r9, r9, 16
+ lxvd2x VSR(D40), 0, r9
+ addi r9, r9, 16
+ lxvd2x VSR(D20), 0, r9
+ addi r9, r9, 16
+ lxvd2x VSR(D24), 0, r9
+ addi r9, r9, 16
+ lxvd2x VSR(D44), 0, r9
+ xxmrghd VSR(MASK44L), VSR(ZERO), VSR(MASK44)
+
+ sldi r10, PADBYTE, 40
+ mtvsrdd VSR(T4PAD), r10, r10
+
+ C Load key of radix 2^44
+ lxsd R0, 0(CTX)
+ lxsd R1, 8(CTX)
+ vsrd R2, R1, D24
+ vsrd TMP, R0, D44
+ vsld R1, R1, D20
+ vor R1, R1, TMP
+ vand R0, R0, MASK44
+ vand R1, R1, MASK44
+ xxmrghd VSR(R0), VSR(R0), VSR(ZERO)
+ xxmrghd VSR(R1), VSR(R1), VSR(ZERO)
+ xxmrghd VSR(R2), VSR(R2), VSR(ZERO)
+
+ COMPUTE_S(S1, S2, R1, R2)
+
+ C Calculate R^2 = R R
+
+ MUL(T0, T1, T2, R0, R1, R2)
+ RED(H0, H1, H2, T0, T1, T2)
+ xxpermdi VSR(R0), VSR(R0), VSR(H0), 0b01
+ xxpermdi VSR(R1), VSR(R1), VSR(H1), 0b01
+ xxpermdi VSR(R2), VSR(R2), VSR(H2), 0b01
+
+ COMPUTE_S(S1, S2, R1, R2)
+
+ C Calculate R^3 = R^2 R
+
+ xxmrghd VSR(R3), VSR(ZERO), VSR(R0)
+ xxmrghd VSR(R4), VSR(ZERO), VSR(R1)
+ xxmrghd VSR(R5), VSR(ZERO), VSR(R2)
+
+ MUL(T0, T1, T2, R3, R4, R5)
+ RED(H0, H1, H2, T0, T1, T2)
+
+ C Calculate R^4 = R^2 R^2
+
+ xxmrgld VSR(R3), VSR(ZERO), VSR(R0)
+ xxmrgld VSR(R4), VSR(ZERO), VSR(R1)
+ xxmrgld VSR(R5), VSR(ZERO), VSR(R2)
+
+ MUL(T0, T1, T2, R3, R4, R5)
+ RED(R3, R4, R5, T0, T1, T2)
+ xxmrgld VSR(R3), VSR(H0), VSR(R3)
+ xxmrgld VSR(R4), VSR(H1), VSR(R4)
+ xxmrgld VSR(R5), VSR(H2), VSR(R5)
+
+ COMPUTE_S(S4, S5, R4, R5)
+
+ C Load state
+ ld r7, 32(CTX)
+ ld r8, 40(CTX)
+ ld r31, 48(CTX)
+
+ C Fold high part of H2
+ srdi r9, r31, 2
+ sldi r10, r9, 2
+ add r10, r10, r9
+ andi. r31, r31, 3
+ li r9, 0
+ addc r7, r7, r10
+ adde r8, r8, r9
+ adde r31, r31, r9
+
+ mtvsrdd VSR(H0), 0, r7
+ mtvsrdd VSR(H1), 0, r8
+ mtvsrdd VSR(H2), 0, r31
+
+ C Convert state of radix 2^64 to 2^44
+ vsrd TMP, H1, D24
+ vsld H2, H2, D40
+ vor H2, H2, TMP
+ vsrd TMP2, H0, D44
+ vsld H1, H1, D20
+ vor H1, H1, TMP2
+ vand H0, H0, MASK44
+ vand H1, H1, MASK44
+
+ li r8, 0x10
+ li r9, 0x20
+ li r10, 0x30
+L4B_loop:
+ C Load four blocks
+ lxvd2x VSR(T3), 0, DATA
+ lxvd2x VSR(T4), r8, DATA
+ lxvd2x VSR(T5), r9, DATA
+ lxvd2x VSR(TMP), r10, DATA
+IF_BE(`
+ xxbrd VSR(T3), VSR(T3)
+ xxbrd VSR(T4), VSR(T4)
+ xxbrd VSR(T5), VSR(T5)
+ xxbrd VSR(TMP), VSR(TMP)
+')
+ C Permute blocks in little-endian and line each two successive
+ C blocks horizontally
+ xxmrghd VSR(T0), VSR(T4), VSR(T3)
+ xxmrgld VSR(T1), VSR(T4), VSR(T3)
+ xxmrghd VSR(T3), VSR(TMP), VSR(T5)
+ xxmrgld VSR(T4), VSR(TMP), VSR(T5)
+ R64_TO_R44_4B(T0, T1, T2, T3, T4, T5)
+ vor T2, T2, T4PAD
+ vor T5, T5, T4PAD
+
+ C Combine first block with previous state
+ vaddudm H0, H0, T0
+ vaddudm H1, H1, T1
+ vaddudm H2, H2, T2
+
+ MUL_4B(T0, T1, T2, H0, H1, H2, T3, T4, T5)
+ RED(H0, H1, H2, T0, T1, T2)
+
+ addi DATA, DATA, 64
+ bdnz L4B_loop
+
+ C Moving carry
+ vsrd TMP, H1, D44
+ vaddudm H2, H2, TMP
+ vsrd TMP2, H2, D40
+ vsrd TMP2, TMP2, D2
+ vsld TMP, TMP2, D2
+ vand H1, H1, MASK44
+ vaddudm TMP2, TMP2, TMP
+ vaddudm H0, H0, TMP2
+ vsrd TMP, H0, D44
+ vaddudm H1, H1, TMP
+ vand H2, H2, MASK42L
+ vand H0, H0, MASK44
+
+ C Convert state of radix 2^44 to 2^64
+ vsld TMP, H1, D44
+ vor H0, H0, TMP
+ vsrd H1, H1, D20
+ vsld TMP2, H2, D24
+ vor H1, H1, TMP2
+ vsrd H2, H2, D40
+
+ xxswapd VSR(H0), VSR(H0)
+ xxswapd VSR(H1), VSR(H1)
+ xxswapd VSR(H2), VSR(H2)
+
+ C Store state
+ stxsd H0, 32(CTX)
+ stxsd H1, 40(CTX)
+ stxsd H2, 48(CTX)
+
+Ldata_r64:
+ cmpldi BLOCKS, 0
+ beq Ldone
+ mtctr BLOCKS
+ mr r4, PADBYTE
+ ld r6, P1305_H0 (CTX)
+ ld r7, P1305_H1 (CTX)
+ ld r8, P1305_H2 (CTX)
+L1B_loop:
+ BLOCK_R64(CTX,DATA,r4,r6,v0)
+ mfvsrld r6, VSR(v0)
+ mfvsrld r7, VSR(v1)
+ mfvsrd r8, VSR(v1)
+ addi DATA, DATA, 16
+ bdnz L1B_loop
+ std r6, P1305_H0 (CTX)
+ std r7, P1305_H1 (CTX)
+ std r8, P1305_H2 (CTX)
+
+Ldone:
+ C Restore non-volatile vector registers
+ ld r31, -8(SP)
+ lxv VSR(v31),-32(SP)
+ lxv VSR(v30),-48(SP)
+ lxv VSR(v29),-64(SP)
+ lxv VSR(v28),-80(SP)
+ lxv VSR(v27),-96(SP)
+ lxv VSR(v26),-112(SP)
+ lxv VSR(v25),-128(SP)
+ lxv VSR(v24),-144(SP)
+ lxv VSR(v23),-160(SP)
+ lxv VSR(v22),-176(SP)
+ lxv VSR(v21),-192(SP)
+ lxv VSR(v20),-208(SP)
+
+ mr r3, DATA
+
+ blr
+EPILOGUE(_nettle_poly1305_blocks)
+
+.rodata
+.align 4
+.mask44:
+.quad 0x00000FFFFFFFFFFF,0x00000FFFFFFFFFFF
+.mask42l:
+.quad 0x0000000000000000,0x000003FFFFFFFFFF
+.d40:
+.quad 0x0000000000000028,0x0000000000000028
+.d20:
+.quad 0x0000000000000014,0x0000000000000014
+.d24:
+.quad 0x0000000000000018,0x0000000000000018
+.d44:
+.quad 0x000000000000002C,0x000000000000002C
diff --git a/powerpc64/p9/poly1305-internal.asm b/powerpc64/p9/poly1305-internal.asm
index a082fed2..c23e16fd 100644
--- a/powerpc64/p9/poly1305-internal.asm
+++ b/powerpc64/p9/poly1305-internal.asm
@@ -30,6 +30,8 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
+include_src(`powerpc64/p9/poly1305.m4')
+
C Register usage:
define(`SP', `r1')
@@ -37,36 +39,8 @@ define(`TOCP', `r2')
C Argments
define(`CTX', `r3')
-define(`M', `r4')
-define(`M128', `r5')
-
-C Working state
-define(`H0', `r6')
-define(`H1', `r7')
-define(`H2', `r8')
-define(`T0', `r9')
-define(`T1', `r10')
-define(`T2', `r8')
-define(`T2A', `r9')
-define(`T2S', `r10')
-define(`IDX', `r6')
-define(`RZ', `r7')
-
-define(`ZERO', `v0')
-define(`F0', `v1')
-define(`F1', `v2')
-define(`F0S', `v3')
-define(`T', `v4')
-
-define(`R', `v5')
-define(`S', `v6')
-
-define(`T00', `v7')
-define(`T10', `v8')
-define(`T11', `v9')
-define(`MU0', `v10')
-define(`MU1', `v11')
-define(`TMP', `v12')
+define(`DATA', `r4')
+define(`PADBYTE', `r5') C Padding byte register
.text
@@ -114,59 +88,17 @@ EPILOGUE(_nettle_poly1305_set_key)
C void _nettle_poly1305_block(struct poly1305_ctx *ctx, const uint8_t *m, unsigned m128)
define(`FUNC_ALIGN', `5')
PROLOGUE(_nettle_poly1305_block)
- ld H0, P1305_H0 (CTX)
- ld H1, P1305_H1 (CTX)
- ld H2, P1305_H2 (CTX)
-IF_LE(`
- ld T0, 0(M)
- ld T1, 8(M)
-')
-IF_BE(`
- ldbrx T0, 0, M
- addi M, M, 8
- ldbrx T0, 0, M
-')
-
- addc T0, T0, H0
- adde T1, T1, H1
- adde T2, M128, H2
-
- mtvsrdd VSR(T), T0, T1
-
- li IDX, P1305_S0
- lxvd2x VSR(R), 0, CTX
- lxvd2x VSR(S), IDX, CTX
-
- andi. T2A, T2, 3
- srdi T2S, T2, 2
-
- li RZ, 0
- vxor ZERO, ZERO, ZERO
-
- xxpermdi VSR(MU0), VSR(R), VSR(S), 0b01
- xxswapd VSR(MU1), VSR(R)
-
- mtvsrdd VSR(T11), 0, T2A
- mtvsrdd VSR(T00), T2S, RZ
- mtvsrdd VSR(T10), 0, T2
-
- vmsumudm F0, T, MU0, ZERO
- vmsumudm F1, T, MU1, ZERO
- vmsumudm TMP, T11, MU1, ZERO
-
- vmsumudm F0, T00, S, F0
- vmsumudm F1, T10, MU0, F1
+ ld r6, P1305_H0 (CTX)
+ ld r7, P1305_H1 (CTX)
+ ld r8, P1305_H2 (CTX)
- xxmrgld VSR(TMP), VSR(TMP), VSR(ZERO)
- xxswapd VSR(F0S), VSR(F0)
- vadduqm F1, F1, TMP
- stxsd F0S, P1305_H0 (CTX)
+ BLOCK_R64(CTX,DATA,PADBYTE,r6,v0)
- li IDX, P1305_H1
- xxmrghd VSR(F0), VSR(ZERO), VSR(F0)
- vadduqm F1, F1, F0
- xxswapd VSR(F1), VSR(F1)
- stxvd2x VSR(F1), IDX, CTX
+ li r10, P1305_H1
+ xxswapd VSR(v0), VSR(v0)
+ xxswapd VSR(v1), VSR(v1)
+ stxsd v0, P1305_H0 (CTX)
+ stxvd2x VSR(v1), r10, CTX
blr
EPILOGUE(_nettle_poly1305_block)
diff --git a/powerpc64/p9/poly1305.m4 b/powerpc64/p9/poly1305.m4
new file mode 100644
index 00000000..13a57e83
--- /dev/null
+++ b/powerpc64/p9/poly1305.m4
@@ -0,0 +1,102 @@
+C Threshold of processing multiple blocks in parallel
+C of a multiple of 4
+define(`POLY1305_BLOCK_THRESHOLD', `12')
+
+C DEFINES_BLOCK_R64(GPR0, VR0)
+define(`DEFINES_BLOCK_R64', `
+ define(`H0', `$1')
+ define(`H1', `INC_GPR($1,1)')
+ define(`H2', `INC_GPR($1,2)')
+
+ define(`T0', `INC_GPR($1,3)')
+ define(`T1', `INC_GPR($1,4)')
+ define(`T2', `H2')
+ define(`T2A', `INC_GPR($1,3)')
+ define(`T2S', `INC_GPR($1,4)')
+ define(`RZ', `H0')
+ define(`IDX', `INC_GPR($1,4)')
+
+ define(`F0', `$2')
+ define(`F1', `INC_VR($2,1)')
+
+ define(`ZERO', `INC_VR($2,2)')
+ define(`F0S', `INC_VR($2,3)')
+ define(`F11', `INC_VR($2,4)')
+ define(`T', `INC_VR($2,5)')
+
+ define(`R', `INC_VR($2,6)')
+ define(`S', `INC_VR($2,7)')
+
+ define(`T00', `INC_VR($2,8)')
+ define(`T10', `INC_VR($2,9)')
+ define(`T11', `INC_VR($2,10)')
+ define(`MU0', `INC_VR($2,11)')
+ define(`MU1', `INC_VR($2,12)')
+ ')
+
+C CTX is the address of context where key and pre-computed values are stored
+C DATA is the address of input block
+C PADBYTE is padding byte for input block
+C GPR0 is the starting register of sequential general-purpose registers
+C used in the macro of following layout
+C GPR0, GPR1, GPR2 are inputs representing the previous state radix 2^64
+C GPR3, GPR4 are temporary registers
+C VR0 is the starting register of sequential vector resigers used in
+C the macro of following layout
+C VR0, VR1 are outputs representing the result state radix 2^64 sorted as follows
+C (low 64-bit of VR0) + (low 64-bit of VR1) + (high 64-bit of VR1)
+C VR2..VR12 are temporary registers
+C BLOCK_R64(CTX, DATA, PADBYTE, GPR0, VR0)
+define(`BLOCK_R64', `
+ DEFINES_BLOCK_R64($4,$5)
+ C Load 128-bit input block
+IF_LE(`
+ ld T0, 0($2)
+ ld T1, 8($2)
+')
+IF_BE(`
+ li IDX, 8
+ ldbrx T1, IDX, $2
+ ldbrx T0, 0, $2
+')
+ C Combine state with input block, latter is padded to 17-bytes
+ C by low-order byte of PADBYTE register
+ addc T0, T0, H0
+ adde T1, T1, H1
+ adde T2, $3, H2
+
+ mtvsrdd VSR(T), T0, T1
+
+ C Load key and pre-computed values
+ li IDX, 16
+ lxvd2x VSR(R), 0, $1
+ lxvd2x VSR(S), IDX, $1
+
+ andi. T2A, T2, 3
+ srdi T2S, T2, 2
+
+ li RZ, 0
+ vxor ZERO, ZERO, ZERO
+
+ xxpermdi VSR(MU0), VSR(R), VSR(S), 0b01
+ xxswapd VSR(MU1), VSR(R)
+
+ mtvsrdd VSR(T11), 0, T2A
+ mtvsrdd VSR(T00), T2S, RZ
+ mtvsrdd VSR(T10), 0, T2
+
+ C Multiply key by combined state and block
+ vmsumudm F0, T, MU0, ZERO
+ vmsumudm F1, T, MU1, ZERO
+ vmsumudm F11, T11, MU1, ZERO
+
+ vmsumudm F0, T00, S, F0
+ vmsumudm F1, T10, MU0, F1
+
+ C Product addition
+ xxmrgld VSR(F11), VSR(F11), VSR(ZERO)
+ vadduqm F1, F1, F11
+
+ xxmrghd VSR(F0S), VSR(ZERO), VSR(F0)
+ vadduqm F1, F1, F0S
+ ')
diff --git a/s390x/fat/sha256-compress-2.asm b/s390x/fat/sha256-compress-n-2.asm
index f4b16181..06fb1014 100644
--- a/s390x/fat/sha256-compress-2.asm
+++ b/s390x/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C s390x/fat/sha256-compress-2.asm
+C s390x/fat/sha256-compress-n-2.asm
ifelse(`
Copyright (C) 2021 Mamone Tarsha
@@ -30,7 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-dnl PROLOGUE(_nettle_sha256_compress) picked up by configure
+dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure
define(`fat_transform', `$1_s390x')
-include_src(`s390x/msa_x1/sha256-compress.asm')
+include_src(`s390x/msa_x1/sha256-compress-n.asm')
diff --git a/s390x/msa_x1/sha256-compress.asm b/s390x/msa_x1/sha256-compress-n.asm
index 9a9511fb..51539927 100644
--- a/s390x/msa_x1/sha256-compress.asm
+++ b/s390x/msa_x1/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C s390x/msa_x1/sha256-compress.asm
+C s390x/msa_x1/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2021 Mamone Tarsha
+ Copyright (C) 2021, 2022 Mamone Tarsha, Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
@@ -56,25 +56,23 @@ C |----------------------------------------------|
C | H7 (4 bytes) |
C *----------------------------------------------*
-.file "sha256-compress.asm"
+.file "sha256-compress-n.asm"
.text
C SHA function code
define(`SHA256_FUNCTION_CODE', `2')
-C Size of block
-define(`SHA256_BLOCK_SIZE', `64')
-C void
-C _nettle_sha256_compress(uint32_t *state, const uint8_t *input,
-C const uint32_t *k)
+C const uint8_t *
+C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+C size_t blocks, const uint8_t *input)
-PROLOGUE(_nettle_sha256_compress)
+PROLOGUE(_nettle_sha256_compress_n)
lghi %r0,SHA256_FUNCTION_CODE C SHA-256 Function Code
lgr %r1,%r2
- lgr %r4,%r3
- lghi %r5,SHA256_BLOCK_SIZE
-1: .long 0xb93e0004 C kimd %r0,%r4. perform KIMD-SHA operation on data
+ lgr %r2, %r5
+ sllg %r3, %r4, 6 C 64 * block size
+1: .long 0xb93e0002 C kimd %r0,%r2. perform KIMD-SHA operation on data
brc 1,1b
br RA
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/sha2-internal.h b/sha2-internal.h
index 40f25a5f..93080bee 100644
--- a/sha2-internal.h
+++ b/sha2-internal.h
@@ -39,8 +39,9 @@
/* Internal compression function. STATE points to 8 uint32_t words,
DATA points to 64 bytes of input data, possibly unaligned, and K
points to the table of constants. */
-void
-_nettle_sha256_compress(uint32_t *state, const uint8_t *data, const uint32_t *k);
+const uint8_t *
+_nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *data);
/* Internal compression function. STATE points to 8 uint64_t words,
DATA points to 128 bytes of input data, possibly unaligned, and K
diff --git a/sha256-compress.c b/sha256-compress-n.c
index cf17e3e1..d135d14f 100644
--- a/sha256-compress.c
+++ b/sha256-compress-n.c
@@ -1,8 +1,8 @@
-/* sha256-compress.c
+/* sha256-compress-n.c
The compression function of the sha256 hash function.
- Copyright (C) 2001, 2010 Niels Möller
+ Copyright (C) 2001, 2010, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -118,26 +118,19 @@
} while (0)
/* For fat builds */
-#if HAVE_NATIVE_sha256_compress
-void
-_nettle_sha256_compress_c(uint32_t *state, const uint8_t *input, const uint32_t *k);
-#define _nettle_sha256_compress _nettle_sha256_compress_c
+#if HAVE_NATIVE_sha256_compress_n
+const uint8_t *
+_nettle_sha256_compress_n_c(uint32_t *state, const uint32_t *table,
+ size_t blocks, const uint8_t *input);
+#define _nettle_sha256_compress_n _nettle_sha256_compress_n_c
#endif
-void
-_nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+const uint8_t *
+_nettle_sha256_compress_n(uint32_t *state, const uint32_t *table,
+ size_t blocks, const uint8_t *input)
{
- uint32_t data[SHA256_DATA_LENGTH];
uint32_t A, B, C, D, E, F, G, H; /* Local vars */
- unsigned i;
- uint32_t *d;
- for (i = 0; i < SHA256_DATA_LENGTH; i++, input+= 4)
- {
- data[i] = READ_UINT32(input);
- }
-
- /* Set up first buffer and local data buffer */
A = state[0];
B = state[1];
C = state[2];
@@ -146,55 +139,68 @@ _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k
F = state[5];
G = state[6];
H = state[7];
-
- /* Heavy mangling */
- /* First 16 subrounds that act on the original data */
- DEBUG(-1);
- for (i = 0, d = data; i<16; i+=8, k += 8, d+= 8)
+ for (; blocks > 0; blocks--)
{
- ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); DEBUG(i);
- ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); DEBUG(i+1);
- ROUND(G, H, A, B, C, D, E, F, k[2], d[2]);
- ROUND(F, G, H, A, B, C, D, E, k[3], d[3]);
- ROUND(E, F, G, H, A, B, C, D, k[4], d[4]);
- ROUND(D, E, F, G, H, A, B, C, k[5], d[5]);
- ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); DEBUG(i+6);
- ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); DEBUG(i+7);
- }
+ uint32_t data[SHA256_DATA_LENGTH];
+ unsigned i;
+ const uint32_t *k;
+ uint32_t *d;
+ for (i = 0; i < SHA256_DATA_LENGTH; i++, input+= 4)
+ {
+ data[i] = READ_UINT32(input);
+ }
+
+ /* Heavy mangling */
+ /* First 16 subrounds that act on the original data */
+
+ DEBUG(-1);
+ for (i = 0, d = data, k = table; i<16; i+=8, k += 8, d+= 8)
+ {
+ ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); DEBUG(i);
+ ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); DEBUG(i+1);
+ ROUND(G, H, A, B, C, D, E, F, k[2], d[2]);
+ ROUND(F, G, H, A, B, C, D, E, k[3], d[3]);
+ ROUND(E, F, G, H, A, B, C, D, k[4], d[4]);
+ ROUND(D, E, F, G, H, A, B, C, k[5], d[5]);
+ ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); DEBUG(i+6);
+ ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); DEBUG(i+7);
+ }
- for (; i<64; i += 16, k+= 16)
- {
- ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); DEBUG(i);
- ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); DEBUG(i+1);
- ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); DEBUG(i+2);
- ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); DEBUG(i+3);
- ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); DEBUG(i+4);
- ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); DEBUG(i+5);
- ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); DEBUG(i+6);
- ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); DEBUG(i+7);
- ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); DEBUG(i+8);
- ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); DEBUG(i+9);
- ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); DEBUG(i+10);
- ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); DEBUG(i+11);
- ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); DEBUG(i+12);
- ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); DEBUG(i+13);
- ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); DEBUG(i+14);
- ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); DEBUG(i+15);
- }
-
- /* Update state */
- state[0] += A;
- state[1] += B;
- state[2] += C;
- state[3] += D;
- state[4] += E;
- state[5] += F;
- state[6] += G;
- state[7] += H;
+ for (; i<64; i += 16, k+= 16)
+ {
+ ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); DEBUG(i);
+ ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); DEBUG(i+1);
+ ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); DEBUG(i+2);
+ ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); DEBUG(i+3);
+ ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); DEBUG(i+4);
+ ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); DEBUG(i+5);
+ ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); DEBUG(i+6);
+ ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); DEBUG(i+7);
+ ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); DEBUG(i+8);
+ ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); DEBUG(i+9);
+ ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); DEBUG(i+10);
+ ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); DEBUG(i+11);
+ ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); DEBUG(i+12);
+ ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); DEBUG(i+13);
+ ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); DEBUG(i+14);
+ ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); DEBUG(i+15);
+ }
+
+ /* Update state */
+ state[0] = A = state[0] + A;
+ state[1] = B = state[1] + B;
+ state[2] = C = state[2] + C;
+ state[3] = D = state[3] + D;
+ state[4] = E = state[4] + E;
+ state[5] = F = state[5] + F;
+ state[6] = G = state[6] + G;
+ state[7] = H = state[7] + H;
#if SHA256_DEBUG
- fprintf(stderr, "99: %8x %8x %8x %8x %8x %8x %8x %8x\n",
- state[0], state[1], state[2], state[3],
- state[4], state[5], state[6], state[7]);
+ fprintf(stderr, "99: %8x %8x %8x %8x %8x %8x %8x %8x\n",
+ state[0], state[1], state[2], state[3],
+ state[4], state[5], state[6], state[7]);
#endif
+ }
+ return input;
}
diff --git a/sha256.c b/sha256.c
index 3872ca6f..0c9c21a0 100644
--- a/sha256.c
+++ b/sha256.c
@@ -46,6 +46,7 @@
#include "sha2-internal.h"
#include "macros.h"
+#include "md-internal.h"
#include "nettle-write.h"
/* Generated by the shadata program. */
@@ -70,6 +71,12 @@ K[64] =
0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL,
};
+void
+sha256_compress(uint32_t *state, const uint8_t *input)
+{
+ _nettle_sha256_compress_n(state, K, 1, input);
+}
+
#define COMPRESS(ctx, data) (sha256_compress((ctx)->state, (data)))
/* Initialize the SHA values */
@@ -97,7 +104,22 @@ void
sha256_update(struct sha256_ctx *ctx,
size_t length, const uint8_t *data)
{
- MD_UPDATE (ctx, length, data, COMPRESS, ctx->count++);
+ size_t blocks;
+ if (ctx->index > 0)
+ {
+ /* Try to fill partial block */
+ MD_FILL_OR_RETURN (ctx, length, data);
+ sha256_compress (ctx->state, ctx->block);
+ ctx->count++;
+ }
+
+ blocks = length >> 6;
+ data = _nettle_sha256_compress_n (ctx->state, K, blocks, data);
+ ctx->count += blocks;
+ length &= 63;
+
+ memcpy (ctx->block, data, length);
+ ctx->index = length;
}
static void
@@ -161,9 +183,3 @@ sha224_digest(struct sha256_ctx *ctx,
sha256_write_digest(ctx, length, digest);
sha224_init(ctx);
}
-
-void
-sha256_compress(uint32_t *state, const uint8_t *input)
-{
- _nettle_sha256_compress(state, input, K);
-}
diff --git a/siv-gcm-aes128.c b/siv-gcm-aes128.c
new file mode 100644
index 00000000..4317d3d8
--- /dev/null
+++ b/siv-gcm-aes128.c
@@ -0,0 +1,65 @@
+/* siv-gcm-aes128.c
+
+ AES-GCM-SIV, RFC8452
+
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "nettle-meta.h"
+#include "siv-gcm.h"
+
+void
+siv_gcm_aes128_encrypt_message (const struct aes128_ctx *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t clength, uint8_t *dst, const uint8_t *src)
+{
+ struct aes128_ctx ctr_ctx;
+ siv_gcm_encrypt_message (&nettle_aes128, ctx, &ctr_ctx,
+ nlength, nonce,
+ alength, adata,
+ clength, dst, src);
+}
+
+int
+siv_gcm_aes128_decrypt_message (const struct aes128_ctx *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t mlength, uint8_t *dst, const uint8_t *src)
+{
+ struct aes128_ctx ctr_ctx;
+ return siv_gcm_decrypt_message (&nettle_aes128, ctx, &ctr_ctx,
+ nlength, nonce,
+ alength, adata,
+ mlength, dst, src);
+}
diff --git a/siv-gcm-aes256.c b/siv-gcm-aes256.c
new file mode 100644
index 00000000..70bf3f35
--- /dev/null
+++ b/siv-gcm-aes256.c
@@ -0,0 +1,65 @@
+/* siv-gcm-aes256.c
+
+ AES-GCM-SIV, RFC8452
+
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "nettle-meta.h"
+#include "siv-gcm.h"
+
+void
+siv_gcm_aes256_encrypt_message (const struct aes256_ctx *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t clength, uint8_t *dst, const uint8_t *src)
+{
+ struct aes256_ctx ctr_ctx;
+ siv_gcm_encrypt_message (&nettle_aes256, ctx, &ctr_ctx,
+ nlength, nonce,
+ alength, adata,
+ clength, dst, src);
+}
+
+int
+siv_gcm_aes256_decrypt_message (const struct aes256_ctx *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t mlength, uint8_t *dst, const uint8_t *src)
+{
+ struct aes256_ctx ctr_ctx;
+ return siv_gcm_decrypt_message (&nettle_aes256, ctx, &ctr_ctx,
+ nlength, nonce,
+ alength, adata,
+ mlength, dst, src);
+}
diff --git a/siv-gcm.c b/siv-gcm.c
new file mode 100644
index 00000000..332a7439
--- /dev/null
+++ b/siv-gcm.c
@@ -0,0 +1,229 @@
+/* siv-gcm.c
+
+ AES-GCM-SIV, RFC8452
+
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "siv-gcm.h"
+#include "ghash-internal.h"
+#include "block-internal.h"
+#include "nettle-internal.h"
+#include "macros.h"
+#include "memops.h"
+#include "ctr-internal.h"
+#include <string.h>
+
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+
+static void
+siv_gcm_derive_keys (const void *ctx,
+ nettle_cipher_func *f,
+ size_t key_size,
+ size_t nlength, const uint8_t *nonce,
+ union nettle_block16 *auth_key,
+ uint8_t *encryption_key)
+{
+ union nettle_block16 block;
+ union nettle_block16 out;
+ size_t i;
+
+ block16_zero (&block);
+ memcpy (block.b + 4, nonce, MIN(nlength, SIV_GCM_NONCE_SIZE));
+
+ f (ctx, SIV_GCM_BLOCK_SIZE, out.b, block.b);
+ auth_key->u64[0] = out.u64[0];
+
+ block.b[0] = 1;
+ f (ctx, SIV_GCM_BLOCK_SIZE, out.b, block.b);
+ auth_key->u64[1] = out.u64[0];
+
+ assert (key_size % 8 == 0 && key_size / 8 + 2 <= UINT8_MAX);
+
+ for (i = 0; i < key_size; i += 8)
+ {
+ block.b[0]++;
+ f (ctx, SIV_GCM_BLOCK_SIZE, out.b, block.b);
+ memcpy (encryption_key + i, out.b, 8);
+ }
+}
+
+static nettle_fill16_func siv_gcm_fill;
+
+static void
+siv_gcm_fill(uint8_t *ctr, size_t blocks, union nettle_block16 *buffer)
+{
+ uint32_t c;
+
+ c = LE_READ_UINT32(ctr);
+
+ for (; blocks-- > 0; buffer++, c++)
+ {
+ memcpy(buffer->b + 4, ctr + 4, SIV_GCM_BLOCK_SIZE - 4);
+ LE_WRITE_UINT32(buffer->b, c);
+ }
+
+ LE_WRITE_UINT32(ctr, c);
+}
+
+static void
+siv_ghash_pad_update (struct gcm_key *ctx,
+ union nettle_block16 *state,
+ size_t length, const uint8_t *data)
+{
+ size_t blocks;
+
+ blocks = length / SIV_GCM_BLOCK_SIZE;
+ if (blocks > 0)
+ {
+ data = _siv_ghash_update (ctx, state, blocks, data);
+ length &= 0xf;
+ }
+ if (length > 0)
+ {
+ uint8_t block[SIV_GCM_BLOCK_SIZE];
+
+ memset (block + length, 0, SIV_GCM_BLOCK_SIZE - length);
+ memcpy (block, data, length);
+ _siv_ghash_update (ctx, state, 1, block);
+ }
+}
+
+static void
+siv_gcm_authenticate (const void *ctx,
+ const struct nettle_cipher *nc,
+ const union nettle_block16 *authentication_key,
+ const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t mlength, const uint8_t *mdata,
+ uint8_t *tag)
+{
+ union nettle_block16 state;
+ struct gcm_key siv_ghash_key;
+ union nettle_block16 block;
+
+ _siv_ghash_set_key (&siv_ghash_key, authentication_key);
+
+ block16_zero (&state);
+ siv_ghash_pad_update (&siv_ghash_key, &state, alength, adata);
+ siv_ghash_pad_update (&siv_ghash_key, &state, mlength, mdata);
+
+ block.u64[0] = bswap64_if_be (alength * 8);
+ block.u64[1] = bswap64_if_be (mlength * 8);
+
+ _siv_ghash_update (&siv_ghash_key, &state, 1, block.b);
+ block16_bswap (&state, &state);
+
+ memxor (state.b, nonce, SIV_GCM_NONCE_SIZE);
+ state.b[15] &= 0x7f;
+ nc->encrypt (ctx, SIV_GCM_BLOCK_SIZE, tag, state.b);
+}
+
+void
+siv_gcm_encrypt_message (const struct nettle_cipher *nc,
+ const void *ctx,
+ void *ctr_ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t clength, uint8_t *dst, const uint8_t *src)
+{
+ union nettle_block16 authentication_key;
+ TMP_DECL(encryption_key, uint8_t, NETTLE_MAX_CIPHER_KEY_SIZE);
+ uint8_t ctr[SIV_GCM_DIGEST_SIZE];
+ uint8_t *tag = dst + clength - SIV_GCM_BLOCK_SIZE;
+
+ assert (clength >= SIV_GCM_DIGEST_SIZE);
+ assert (nlength == SIV_GCM_NONCE_SIZE);
+
+ TMP_ALLOC(encryption_key, nc->key_size);
+ siv_gcm_derive_keys (ctx, nc->encrypt, nc->key_size, nlength, nonce,
+ &authentication_key, encryption_key);
+
+ /* Calculate authentication tag. */
+ nc->set_encrypt_key (ctr_ctx, encryption_key);
+
+ siv_gcm_authenticate (ctr_ctx, nc,
+ &authentication_key,
+ nonce, alength, adata,
+ clength - SIV_GCM_BLOCK_SIZE, src,
+ tag);
+
+ /* Encrypt the plaintext. */
+
+ /* The initial counter block is the tag with the most significant
+ bit of the last byte set to one. */
+ memcpy (ctr, tag, SIV_GCM_DIGEST_SIZE);
+ ctr[15] |= 0x80;
+ _nettle_ctr_crypt16 (ctr_ctx, nc->encrypt, siv_gcm_fill, ctr,
+ clength - SIV_GCM_BLOCK_SIZE, dst, src);
+}
+
+int
+siv_gcm_decrypt_message (const struct nettle_cipher *nc,
+ const void *ctx,
+ void *ctr_ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t mlength, uint8_t *dst, const uint8_t *src)
+{
+ union nettle_block16 authentication_key;
+ TMP_DECL(encryption_key, uint8_t, NETTLE_MAX_CIPHER_KEY_SIZE);
+ union nettle_block16 state;
+ uint8_t tag[SIV_GCM_DIGEST_SIZE];
+
+ assert (nlength == SIV_GCM_NONCE_SIZE);
+
+ TMP_ALLOC(encryption_key, nc->key_size);
+ siv_gcm_derive_keys (ctx, nc->encrypt, nc->key_size, nlength, nonce,
+ &authentication_key, encryption_key);
+
+ memcpy (state.b, src + mlength, SIV_GCM_DIGEST_SIZE);
+ /* The initial counter block is the tag with the most significant
+ bit of the last byte set to one. */
+ state.b[15] |= 0x80;
+
+ /* Decrypt the ciphertext. */
+ nc->set_encrypt_key (ctr_ctx, encryption_key);
+
+ _nettle_ctr_crypt16 (ctr_ctx, nc->encrypt, siv_gcm_fill, state.b,
+ mlength, dst, src);
+
+ /* Calculate authentication tag. */
+ siv_gcm_authenticate (ctr_ctx, nc,
+ &authentication_key,
+ nonce, alength, adata,
+ mlength, dst,
+ tag);
+
+ return memeql_sec (tag, src + mlength, SIV_GCM_DIGEST_SIZE);
+}
diff --git a/siv-gcm.h b/siv-gcm.h
new file mode 100644
index 00000000..1a9e3084
--- /dev/null
+++ b/siv-gcm.h
@@ -0,0 +1,107 @@
+/* siv-gcm.h
+
+ AES-GCM-SIV, RFC8452
+
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef NETTLE_SIV_GCM_H_INCLUDED
+#define NETTLE_SIV_GCM_H_INCLUDED
+
+#include "nettle-types.h"
+#include "nettle-meta.h"
+#include "aes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Name mangling */
+#define siv_gcm_encrypt_message nettle_siv_gcm_encrypt_message
+#define siv_gcm_decrypt_message nettle_siv_gcm_decrypt_message
+#define siv_gcm_aes128_encrypt_message nettle_siv_gcm_aes128_encrypt_message
+#define siv_gcm_aes128_decrypt_message nettle_siv_gcm_aes128_decrypt_message
+#define siv_gcm_aes256_encrypt_message nettle_siv_gcm_aes256_encrypt_message
+#define siv_gcm_aes256_decrypt_message nettle_siv_gcm_aes256_decrypt_message
+
+/* For AES-GCM-SIV, the block size of the underlying cipher shall be 128 bits. */
+#define SIV_GCM_BLOCK_SIZE 16
+#define SIV_GCM_DIGEST_SIZE 16
+#define SIV_GCM_NONCE_SIZE 12
+
+/* Generic interface. NC must be a block cipher with 128-bit block
+ size, and keysize that is a multiple of 64 bits, such as AES-128 or
+ AES-256. */
+void
+siv_gcm_encrypt_message (const struct nettle_cipher *nc,
+ const void *ctx,
+ void *ctr_ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t clength, uint8_t *dst, const uint8_t *src);
+
+int
+siv_gcm_decrypt_message (const struct nettle_cipher *nc,
+ const void *ctx,
+ void *ctr_ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t mlength, uint8_t *dst, const uint8_t *src);
+
+/* AEAD_AES_128_GCM_SIV */
+void
+siv_gcm_aes128_encrypt_message (const struct aes128_ctx *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t clength, uint8_t *dst, const uint8_t *src);
+
+int
+siv_gcm_aes128_decrypt_message (const struct aes128_ctx *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t mlength, uint8_t *dst, const uint8_t *src);
+
+/* AEAD_AES_256_GCM_SIV */
+void
+siv_gcm_aes256_encrypt_message (const struct aes256_ctx *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t clength, uint8_t *dst, const uint8_t *src);
+
+int
+siv_gcm_aes256_decrypt_message (const struct aes256_ctx *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t mlength, uint8_t *dst, const uint8_t *src);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NETTLE_SIV_H_INCLUDED */
diff --git a/siv-ghash-set-key.c b/siv-ghash-set-key.c
new file mode 100644
index 00000000..b13d7495
--- /dev/null
+++ b/siv-ghash-set-key.c
@@ -0,0 +1,52 @@
+/* siv-ghash-set-key.c
+
+ POLYVAL implementation for AES-GCM-SIV, based on GHASH
+
+ Copyright (C) 2011 Katholieke Universiteit Leuven
+ Copyright (C) 2011, 2013, 2018, 2022 Niels Möller
+ Copyright (C) 2018, 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "ghash-internal.h"
+#include "block-internal.h"
+
+void
+_siv_ghash_set_key (struct gcm_key *ctx, const union nettle_block16 *key)
+{
+ union nettle_block16 h;
+
+ block16_bswap (&h, key);
+ block16_mulx_ghash (&h, &h);
+
+ _ghash_set_key (ctx, &h);
+}
diff --git a/siv-ghash-update.c b/siv-ghash-update.c
new file mode 100644
index 00000000..21ce5c6e
--- /dev/null
+++ b/siv-ghash-update.c
@@ -0,0 +1,65 @@
+/* siv-ghash-update.c
+
+ POLYVAL implementation for AES-GCM-SIV, based on GHASH
+
+ Copyright (C) 2011 Katholieke Universiteit Leuven
+ Copyright (C) 2011, 2013, 2018, 2022 Niels Möller
+ Copyright (C) 2018, 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "ghash-internal.h"
+#include "block-internal.h"
+#include "macros.h"
+
+const uint8_t *
+_siv_ghash_update (const struct gcm_key *ctx, union nettle_block16 *state,
+ size_t blocks, const uint8_t *data)
+{
+ for (; blocks-- > 0; data += GCM_BLOCK_SIZE)
+ {
+ union nettle_block16 b;
+
+#if WORDS_BIGENDIAN
+ b.u64[1] = LE_READ_UINT64(data);
+ b.u64[0] = LE_READ_UINT64(data + 8);
+#else
+ b.u64[1] = READ_UINT64(data);
+ b.u64[0] = READ_UINT64(data + 8);
+#endif
+
+ _ghash_update (ctx, state, 1, b.b);
+ }
+
+ return data;
+}
+
diff --git a/sm4-meta.c b/sm4-meta.c
new file mode 100644
index 00000000..d7234984
--- /dev/null
+++ b/sm4-meta.c
@@ -0,0 +1,49 @@
+/* sm4-meta.c
+
+ Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "nettle-meta.h"
+
+#include "sm4.h"
+
+const struct nettle_cipher nettle_sm4 = {
+ "sm4",
+ sizeof(struct sm4_ctx),
+ SM4_BLOCK_SIZE,
+ SM4_KEY_SIZE,
+ (nettle_set_key_func *) sm4_set_encrypt_key,
+ (nettle_set_key_func *) sm4_set_decrypt_key,
+ (nettle_cipher_func *) sm4_crypt,
+ (nettle_cipher_func *) sm4_crypt
+};
diff --git a/sm4.c b/sm4.c
new file mode 100644
index 00000000..7b3c049a
--- /dev/null
+++ b/sm4.c
@@ -0,0 +1,223 @@
+/* sm4.c
+
+ Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#if HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <assert.h>
+#include <string.h>
+
+#include "sm4.h"
+
+#include "macros.h"
+
+
+static const uint32_t fk[4] =
+{
+ 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+static const uint32_t ck[32] =
+{
+ 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+ 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+ 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+ 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+ 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+ 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+ 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+ 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+static const uint8_t sbox[256] =
+{
+ 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+ 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+ 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+ 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+ 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+ 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+ 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+ 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+ 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+ 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+ 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+ 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+ 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+ 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+ 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+ 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+ 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+ 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+ 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+ 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+ 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+ 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+ 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+ 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+ 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+ 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+ 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+ 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+ 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+ 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+ 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+ 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
+};
+
+static uint32_t
+sm4_t_non_lin_sub(uint32_t x)
+{
+ uint32_t out;
+
+ out = (uint32_t)sbox[x & 0xff];
+ out |= (uint32_t)sbox[(x >> 8) & 0xff] << 8;
+ out |= (uint32_t)sbox[(x >> 16) & 0xff] << 16;
+ out |= (uint32_t)sbox[(x >> 24) & 0xff] << 24;
+
+ return out;
+}
+
+static uint32_t
+sm4_key_lin_sub(uint32_t x)
+{
+ return x ^ ROTL32(13, x) ^ ROTL32(23, x);
+}
+
+static uint32_t
+sm4_enc_lin_sub(uint32_t x)
+{
+ return x ^ ROTL32(2, x) ^ ROTL32(10, x) ^ ROTL32(18, x) ^ ROTL32(24, x);
+}
+
+static uint32_t
+sm4_key_sub(uint32_t x)
+{
+ return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static uint32_t
+sm4_enc_sub(uint32_t x)
+{
+ return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static uint32_t
+sm4_round(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t rk)
+{
+ return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
+}
+
+static void
+sm4_set_key(struct sm4_ctx *ctx, const uint8_t *key, int encrypt)
+{
+ uint32_t rk0, rk1, rk2, rk3;
+ unsigned i;
+
+ rk0 = READ_UINT32(key + 0) ^ fk[0];
+ rk1 = READ_UINT32(key + 4) ^ fk[1];
+ rk2 = READ_UINT32(key + 8) ^ fk[2];
+ rk3 = READ_UINT32(key + 12) ^ fk[3];
+
+ for (i = 0; i < 32; i += 4)
+ {
+ rk0 ^= sm4_key_sub(rk1 ^ rk2 ^ rk3 ^ ck[i + 0]);
+ rk1 ^= sm4_key_sub(rk2 ^ rk3 ^ rk0 ^ ck[i + 1]);
+ rk2 ^= sm4_key_sub(rk3 ^ rk0 ^ rk1 ^ ck[i + 2]);
+ rk3 ^= sm4_key_sub(rk0 ^ rk1 ^ rk2 ^ ck[i + 3]);
+
+ if (encrypt)
+ {
+ ctx->rkey[i + 0] = rk0;
+ ctx->rkey[i + 1] = rk1;
+ ctx->rkey[i + 2] = rk2;
+ ctx->rkey[i + 3] = rk3;
+ }
+ else
+ {
+ ctx->rkey[31 - 0 - i] = rk0;
+ ctx->rkey[31 - 1 - i] = rk1;
+ ctx->rkey[31 - 2 - i] = rk2;
+ ctx->rkey[31 - 3 - i] = rk3;
+ }
+ }
+}
+
+void
+sm4_set_encrypt_key(struct sm4_ctx *ctx, const uint8_t *key)
+{
+ sm4_set_key(ctx, key, 1);
+}
+
+void
+sm4_set_decrypt_key(struct sm4_ctx *ctx, const uint8_t *key)
+{
+ sm4_set_key(ctx, key, 0);
+}
+
+void
+sm4_crypt(const struct sm4_ctx *context,
+ size_t length,
+ uint8_t *dst,
+ const uint8_t *src)
+{
+ const uint32_t *rk = context->rkey;
+
+ assert( !(length % SM4_BLOCK_SIZE) );
+
+ for ( ; length; length -= SM4_BLOCK_SIZE)
+ {
+ uint32_t x0, x1, x2, x3;
+ unsigned i;
+
+ x0 = READ_UINT32(src + 0 * 4);
+ x1 = READ_UINT32(src + 1 * 4);
+ x2 = READ_UINT32(src + 2 * 4);
+ x3 = READ_UINT32(src + 3 * 4);
+
+ for (i = 0; i < 32; i += 4)
+ {
+ x0 = sm4_round(x0, x1, x2, x3, rk[i + 0]);
+ x1 = sm4_round(x1, x2, x3, x0, rk[i + 1]);
+ x2 = sm4_round(x2, x3, x0, x1, rk[i + 2]);
+ x3 = sm4_round(x3, x0, x1, x2, rk[i + 3]);
+ }
+
+ WRITE_UINT32(dst + 0 * 4, x3);
+ WRITE_UINT32(dst + 1 * 4, x2);
+ WRITE_UINT32(dst + 2 * 4, x1);
+ WRITE_UINT32(dst + 3 * 4, x0);
+
+ src += SM4_BLOCK_SIZE;
+ dst += SM4_BLOCK_SIZE;
+ }
+}
diff --git a/sm4.h b/sm4.h
new file mode 100644
index 00000000..608eb3f3
--- /dev/null
+++ b/sm4.h
@@ -0,0 +1,69 @@
+/* sm4.h
+
+ Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef NETTLE_SM4_H_INCLUDED
+#define NETTLE_SM4_H_INCLUDED
+
+#include "nettle-types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Name mangling */
+#define sm4_set_encrypt_key nettle_sm4_set_encrypt_key
+#define sm4_set_decrypt_key nettle_sm4_set_decrypt_key
+#define sm4_crypt nettle_sm4_crypt
+
+#define SM4_BLOCK_SIZE 16
+#define SM4_KEY_SIZE 16
+
+struct sm4_ctx
+{
+ uint32_t rkey[32];
+};
+
+void
+sm4_set_encrypt_key(struct sm4_ctx *ctx, const uint8_t *key);
+
+void
+sm4_set_decrypt_key(struct sm4_ctx *ctx, const uint8_t *key);
+
+void
+sm4_crypt(const struct sm4_ctx *context,
+ size_t length, uint8_t *dst,
+ const uint8_t *src);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NETTLE_SM4_H_INCLUDED */
diff --git a/testsuite/.gitignore b/testsuite/.gitignore
index ca41472e..8c91d1af 100644
--- a/testsuite/.gitignore
+++ b/testsuite/.gitignore
@@ -4,6 +4,7 @@
/aes-keywrap-test
/arcfour-test
/arctwo-test
+/balloon-test
/base16-test
/base64-test
/bignum-test
@@ -98,6 +99,7 @@
/sha512-256-test
/sha512-test
/sm3-test
+/sm4-test
/streebog-test
/twofish-test
/umac-test
@@ -106,6 +108,7 @@
/xts-test
/cmac-test
/siv-test
+/siv-gcm-test
/bcrypt-test
/ed448-test
/shake256-test
diff --git a/testsuite/Makefile.in b/testsuite/Makefile.in
index 6734d3e6..025ab72d 100644
--- a/testsuite/Makefile.in
+++ b/testsuite/Makefile.in
@@ -11,7 +11,7 @@ PRE_CPPFLAGS = -I.. -I$(top_srcdir)
PRE_LDFLAGS = -L..
TS_NETTLE_SOURCES = aes-test.c aes-keywrap-test.c arcfour-test.c arctwo-test.c \
- blowfish-test.c bcrypt-test.c cast128-test.c \
+ balloon-test.c blowfish-test.c bcrypt-test.c cast128-test.c \
base16-test.c base64-test.c \
camellia-test.c chacha-test.c \
cnd-memcpy-test.c \
@@ -24,11 +24,11 @@ TS_NETTLE_SOURCES = aes-test.c aes-keywrap-test.c arcfour-test.c arctwo-test.c \
sha384-test.c sha512-test.c sha512-224-test.c sha512-256-test.c \
sha3-permute-test.c sha3-224-test.c sha3-256-test.c \
sha3-384-test.c sha3-512-test.c \
- shake256-test.c streebog-test.c sm3-test.c \
+ shake256-test.c streebog-test.c sm3-test.c sm4-test.c \
serpent-test.c twofish-test.c version-test.c \
knuth-lfib-test.c \
cbc-test.c cfb-test.c ctr-test.c gcm-test.c eax-test.c ccm-test.c \
- cmac-test.c siv-test.c \
+ cmac-test.c siv-test.c siv-gcm-test.c \
poly1305-test.c chacha-poly1305-test.c \
hmac-test.c umac-test.c \
meta-hash-test.c meta-cipher-test.c\
@@ -47,8 +47,8 @@ TS_HOGWEED_SOURCES = sexp-test.c sexp-format-test.c \
rsa-compute-root-test.c \
dsa-test.c dsa-keygen-test.c \
curve25519-dh-test.c curve448-dh-test.c \
- ecc-mod-test.c ecc-modinv-test.c ecc-redc-test.c \
- ecc-sqrt-test.c \
+ ecc-mod-arith-test.c ecc-mod-test.c ecc-modinv-test.c \
+ ecc-redc-test.c ecc-sqrt-test.c \
ecc-dup-test.c ecc-add-test.c \
ecc-mul-g-test.c ecc-mul-a-test.c \
ecdsa-sign-test.c ecdsa-verify-test.c \
diff --git a/testsuite/balloon-test.c b/testsuite/balloon-test.c
new file mode 100644
index 00000000..ad63c7a0
--- /dev/null
+++ b/testsuite/balloon-test.c
@@ -0,0 +1,135 @@
+/* balloon-test.c
+
+ Copyright (C) 2022 Zoltan Fridrich
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#include "testutils.h"
+#include "balloon.h"
+
+static void
+test_balloon(const struct nettle_hash *alg,
+ size_t password_len, const char *password,
+ size_t salt_len, const char *salt,
+ unsigned s_cost, unsigned t_cost,
+ const struct tstring *expected)
+{
+ void *ctx = xalloc(alg->context_size);
+ uint8_t *buf = xalloc(balloon_itch(alg->digest_size, s_cost));
+
+ alg->init(ctx);
+ balloon(ctx, alg->update, alg->digest, alg->digest_size,
+ s_cost, t_cost, password_len, (const uint8_t *)password,
+ salt_len, (const uint8_t *)salt, buf, buf);
+
+ if (!MEMEQ(alg->digest_size, buf, expected->data))
+ {
+ fprintf(stderr, "test_balloon: result doesn't match the expectation:");
+ fprintf(stderr, "\nOutput: ");
+ print_hex(alg->digest_size, buf);
+ fprintf(stderr, "\nExpected:");
+ tstring_print_hex(expected);
+ fprintf(stderr, "\n");
+ FAIL();
+ }
+
+ free(ctx);
+ free(buf);
+}
+
+static void
+test_balloon_sha(const struct nettle_hash *alg,
+ size_t password_len, const char *password,
+ size_t salt_len, const char *salt,
+ unsigned s_cost, unsigned t_cost,
+ const struct tstring *expected)
+{
+ uint8_t *buf = xalloc(balloon_itch(alg->digest_size, s_cost));
+
+ if (alg == &nettle_sha1)
+ balloon_sha1(s_cost, t_cost, password_len, (const uint8_t *)password,
+ salt_len, (const uint8_t *)salt, buf, buf);
+ else if (alg == &nettle_sha256)
+ balloon_sha256(s_cost, t_cost, password_len, (const uint8_t *)password,
+ salt_len, (const uint8_t *)salt, buf, buf);
+ else if (alg == &nettle_sha384)
+ balloon_sha384(s_cost, t_cost, password_len, (const uint8_t *)password,
+ salt_len, (const uint8_t *)salt, buf, buf);
+ else if (alg == &nettle_sha512)
+ balloon_sha512(s_cost, t_cost, password_len, (const uint8_t *)password,
+ salt_len, (const uint8_t *)salt, buf, buf);
+ else
+ {
+ fprintf(stderr, "test_balloon_sha: bad test\n");
+ FAIL();
+ }
+
+ if (!MEMEQ(alg->digest_size, buf, expected->data))
+ {
+ fprintf(stderr, "test_balloon_sha: result doesn't match the expectation:");
+ fprintf(stderr, "\nOutput: ");
+ print_hex(alg->digest_size, buf);
+ fprintf(stderr, "\nExpected:");
+ tstring_print_hex(expected);
+ fprintf(stderr, "\n");
+ FAIL();
+ }
+
+ free(buf);
+}
+
+/* Test vectors are taken from:
+ * <https://github.com/nachonavarro/balloon-hashing>
+ * <https://github.com/RustCrypto/password-hashes/tree/master/balloon-hash>
+ */
+void
+test_main(void)
+{
+ test_balloon(&nettle_sha256, 8, "hunter42", 11, "examplesalt", 1024, 3,
+ SHEX("716043dff777b44aa7b88dcbab12c078abecfac9d289c5b5195967aa63440dfb"));
+ test_balloon(&nettle_sha256, 0, "", 4, "salt", 3, 3,
+ SHEX("5f02f8206f9cd212485c6bdf85527b698956701ad0852106f94b94ee94577378"));
+ test_balloon(&nettle_sha256, 8, "password", 0, "", 3, 3,
+ SHEX("20aa99d7fe3f4df4bd98c655c5480ec98b143107a331fd491deda885c4d6a6cc"));
+ test_balloon(&nettle_sha256, 1, "", 1, "", 3, 3,
+ SHEX("4fc7e302ffa29ae0eac31166cee7a552d1d71135f4e0da66486fb68a749b73a4"));
+ test_balloon(&nettle_sha256, 8, "password", 4, "salt", 1, 1,
+ SHEX("eefda4a8a75b461fa389c1dcfaf3e9dfacbc26f81f22e6f280d15cc18c417545"));
+
+ test_balloon_sha(&nettle_sha1, 8, "password", 4, "salt", 3, 3,
+ SHEX("99393c091fdd3136f85864099ec49a439dcacc21"));
+ test_balloon_sha(&nettle_sha256, 8, "password", 4, "salt", 3, 3,
+ SHEX("a4df347f5a312e8b2b14c32164f61a81758c807f1bdcda44f4930e2b80ab2154"));
+ test_balloon_sha(&nettle_sha384, 8, "password", 4, "salt", 3, 3,
+ SHEX("78da235f7d0f84aba98b50a432fa6c8f7f3ecb7ea0858cfb316c7e5356aae6c8"
+ "d7e7b3924c54c4ed71a3d0d68cb0ad68"));
+ test_balloon_sha(&nettle_sha512, 8, "password", 4, "salt", 3, 3,
+ SHEX("9baf289dfa42990f4b189d96d4ede0f2610ba71fb644169427829d696f6866d8"
+ "7af41eb68f9e14fd4b1f1a7ce4832f1ed6117c16e8eae753f9e1d054a7c0a7eb"));
+}
diff --git a/testsuite/ecc-add-test.c b/testsuite/ecc-add-test.c
index 6f58a3bb..4793a4bf 100644
--- a/testsuite/ecc-add-test.c
+++ b/testsuite/ecc-add-test.c
@@ -19,6 +19,24 @@ test_main (void)
test_ecc_get_g (i, g);
+ ecc->dup (ecc, g2, g, scratch);
+ test_ecc_mul_h (i, 2, g2);
+
+ ecc->add_hhh (ecc, g3, g, g2, scratch);
+ test_ecc_mul_h (i, 3, g3);
+
+ ecc->add_hhh (ecc, g3, g2, g, scratch);
+ test_ecc_mul_h (i, 3, g3);
+
+ ecc->add_hhh (ecc, p, g, g3, scratch);
+ test_ecc_mul_h (i, 4, p);
+
+ ecc->add_hhh (ecc, p, g3, g, scratch);
+ test_ecc_mul_h (i, 4, p);
+
+ ecc->dup (ecc, p, g2, scratch);
+ test_ecc_mul_h (i, 4, p);
+
if (ecc->p.bit_size == 255 || ecc->p.bit_size == 448)
{
mp_limb_t *z = xalloc_limbs (ecc_size_j (ecc));
@@ -49,24 +67,20 @@ test_main (void)
free (z);
}
+ else
+ {
+ ASSERT (ecc_nonsec_add_jjj (ecc, g2, g, g, scratch));
+ test_ecc_mul_h (i, 2, g2);
- ecc->dup (ecc, g2, g, scratch);
- test_ecc_mul_h (i, 2, g2);
-
- ecc->add_hhh (ecc, g3, g, g2, scratch);
- test_ecc_mul_h (i, 3, g3);
-
- ecc->add_hhh (ecc, g3, g2, g, scratch);
- test_ecc_mul_h (i, 3, g3);
-
- ecc->add_hhh (ecc, p, g, g3, scratch);
- test_ecc_mul_h (i, 4, p);
+ ASSERT (ecc_nonsec_add_jjj (ecc, g3, g2, g, scratch));
+ test_ecc_mul_h (i, 3, g3);
- ecc->add_hhh (ecc, p, g3, g, scratch);
- test_ecc_mul_h (i, 4, p);
+ ASSERT (ecc_nonsec_add_jjj (ecc, p, g, g3, scratch));
+ test_ecc_mul_h (i, 4, p);
- ecc->dup (ecc, p, g2, scratch);
- test_ecc_mul_h (i, 4, p);
+ ASSERT (ecc_nonsec_add_jjj (ecc, p, g2, g2, scratch));
+ test_ecc_mul_h (i, 4, p);
+ }
free (g);
free (g2);
diff --git a/testsuite/ecc-mod-arith-test.c b/testsuite/ecc-mod-arith-test.c
new file mode 100644
index 00000000..14b3bd1c
--- /dev/null
+++ b/testsuite/ecc-mod-arith-test.c
@@ -0,0 +1,160 @@
+#include "testutils.h"
+
+#define MAX_SIZE (1 + 521 / GMP_NUMB_BITS)
+#define COUNT 50000
+
+static void
+test_add(const char *name,
+ const struct ecc_modulo *m,
+ const mpz_t az, const mpz_t bz)
+{
+ mp_limb_t a[MAX_SIZE];
+ mp_limb_t b[MAX_SIZE];
+ mp_limb_t t[MAX_SIZE];
+ mpz_t mz;
+ mpz_t tz;
+ mpz_t ref;
+
+ mpz_init (ref);
+ mpz_add (ref, az, bz);
+ mpz_mod (ref, ref, mpz_roinit_n (mz, m->m, m->size));
+
+ mpz_limbs_copy (a, az, m->size);
+ mpz_limbs_copy (b, bz, m->size);
+ ecc_mod_add (m, t, a, b);
+
+ if (!mpz_congruent_p (ref, mpz_roinit_n (tz, t, m->size), mz))
+ {
+ fprintf (stderr, "ecc_mod_add %s failed: bit_size = %u\n",
+ name, m->bit_size);
+
+ fprintf (stderr, "a = ");
+ mpn_out_str (stderr, 16, a, m->size);
+ fprintf (stderr, "\nb = ");
+ mpn_out_str (stderr, 16, b, m->size);
+ fprintf (stderr, "\nt = ");
+ mpn_out_str (stderr, 16, t, m->size);
+ fprintf (stderr, " (bad)\nref = ");
+ mpz_out_str (stderr, 16, ref);
+ fprintf (stderr, "\n");
+ abort ();
+ }
+ mpz_clear (ref);
+}
+
+static void
+test_sub(const char *name,
+ const struct ecc_modulo *m,
+ /* If range is non-null, check that 0 <= r < range. */
+ const mp_limb_t *range,
+ const mpz_t az, const mpz_t bz)
+{
+ mp_limb_t a[MAX_SIZE];
+ mp_limb_t b[MAX_SIZE];
+ mp_limb_t t[MAX_SIZE];
+ mpz_t mz;
+ mpz_t tz;
+ mpz_t ref;
+
+ mpz_init (ref);
+ mpz_sub (ref, az, bz);
+ mpz_mod (ref, ref, mpz_roinit_n (mz, m->m, m->size));
+
+ mpz_limbs_copy (a, az, m->size);
+ mpz_limbs_copy (b, bz, m->size);
+ ecc_mod_sub (m, t, a, b);
+
+ if (!mpz_congruent_p (ref, mpz_roinit_n (tz, t, m->size), mz))
+ {
+ fprintf (stderr, "ecc_mod_sub %s failed: bit_size = %u\n",
+ name, m->bit_size);
+
+ fprintf (stderr, "a = ");
+ mpn_out_str (stderr, 16, a, m->size);
+ fprintf (stderr, "\nb = ");
+ mpn_out_str (stderr, 16, b, m->size);
+ fprintf (stderr, "\nt = ");
+ mpn_out_str (stderr, 16, t, m->size);
+ fprintf (stderr, " (bad)\nref = ");
+ mpz_out_str (stderr, 16, ref);
+ fprintf (stderr, "\n");
+ abort ();
+ }
+
+ if (range && mpn_cmp (t, range, m->size) >= 0)
+ {
+ fprintf (stderr, "ecc_mod_sub %s out of range: bit_size = %u\n",
+ name, m->bit_size);
+
+ fprintf (stderr, "a = ");
+ mpn_out_str (stderr, 16, a, m->size);
+ fprintf (stderr, "\nb = ");
+ mpn_out_str (stderr, 16, b, m->size);
+ fprintf (stderr, "\nt = ");
+ mpn_out_str (stderr, 16, t, m->size);
+ fprintf (stderr, " \nrange = ");
+ mpn_out_str (stderr, 16, range, m->size);
+ fprintf (stderr, "\n");
+ abort ();
+ }
+ mpz_clear (ref);
+}
+
+static void
+test_modulo (gmp_randstate_t rands, const char *name,
+ const struct ecc_modulo *m, unsigned count)
+{
+ mpz_t a, b;
+ unsigned j;
+
+ mpz_init (a);
+ mpz_init (b);
+
+ for (j = 0; j < count; j++)
+ {
+ if (j & 1)
+ {
+ mpz_rrandomb (a, rands, m->size * GMP_NUMB_BITS);
+ mpz_rrandomb (b, rands, m->size * GMP_NUMB_BITS);
+ }
+ else
+ {
+ mpz_urandomb (a, rands, m->size * GMP_NUMB_BITS);
+ mpz_urandomb (b, rands, m->size * GMP_NUMB_BITS);
+ }
+ test_add (name, m, a, b);
+ test_sub (name, m, NULL, a, b);
+ }
+ if (m->bit_size < m->size * GMP_NUMB_BITS)
+ {
+ mp_limb_t two_p[MAX_SIZE];
+ mpn_lshift (two_p, m->m, m->size, 1);
+ mpz_t range;
+ mpz_roinit_n (range, two_p, m->size);
+ mpz_urandomm (a, rands, range);
+ mpz_urandomm (b, rands, range);
+ test_sub (name, m, two_p, a, b);
+ }
+ mpz_clear (a);
+ mpz_clear (b);
+}
+
+void
+test_main (void)
+{
+ gmp_randstate_t rands;
+ unsigned count = COUNT;
+ unsigned i;
+
+ gmp_randinit_default (rands);
+
+ if (test_randomize(rands))
+ count *= 20;
+
+ for (i = 0; ecc_curves[i]; i++)
+ {
+ test_modulo (rands, "p", &ecc_curves[i]->p, count);
+ test_modulo (rands, "q", &ecc_curves[i]->q, count);
+ }
+ gmp_randclear (rands);
+}
diff --git a/testsuite/ecdsa-sign-test.c b/testsuite/ecdsa-sign-test.c
index c79493ae..b8a100b6 100644
--- a/testsuite/ecdsa-sign-test.c
+++ b/testsuite/ecdsa-sign-test.c
@@ -77,6 +77,18 @@ test_main (void)
"3a41e1423b1853e8aa89747b1f987364"
"44705d6d6d8371ea1f578f2e"); /* s */
+ /* Produce a signature where verify operation results in a point duplication. */
+ test_ecdsa (&_nettle_secp_256r1,
+ "1", /* Private key */
+ "01010101010101010101010101010101"
+ "01010101010101010101010101010101", /* nonce */
+ SHEX("6ff03b949241ce1dadd43519e6960e0a"
+ "85b41a69a05c328103aa2bce1594ca16"), /* hash */
+ "6ff03b949241ce1dadd43519e6960e0a"
+ "85b41a69a05c328103aa2bce1594ca16", /* r */
+ "53f097727a0e0dc284a0daa0da0ab77d"
+ "5792ae67ed075d1f8d5bda0f853fa093"); /* s */
+
/* Test cases for the smaller groups, verified with a
proof-of-concept implementation done for Yubico AB. */
test_ecdsa (&_nettle_secp_192r1,
diff --git a/testsuite/ecdsa-verify-test.c b/testsuite/ecdsa-verify-test.c
index 8110c64d..8d527000 100644
--- a/testsuite/ecdsa-verify-test.c
+++ b/testsuite/ecdsa-verify-test.c
@@ -109,6 +109,21 @@ test_main (void)
"952800792ed19341fdeeec047f2514f3b0f150d6066151fb", /* r */
"ec5971222014878b50d7a19d8954bc871e7e65b00b860ffb"); /* s */
+ /* Test case provided by Guido Vranken, from oss-fuzz. Triggers
+ point duplication in the verify operation by using private key =
+ 1 (public key = generator) and hash = r. */
+ test_ecdsa (&_nettle_secp_256r1,
+ "6B17D1F2E12C4247F8BCE6E563A440F2"
+ "77037D812DEB33A0F4A13945D898C296", /* x */
+ "4FE342E2FE1A7F9B8EE7EB4A7C0F9E16"
+ "2BCE33576B315ECECBB6406837BF51F5", /* y */
+ SHEX("6ff03b949241ce1dadd43519e6960e0a"
+ "85b41a69a05c328103aa2bce1594ca16"), /* hash */
+ "6ff03b949241ce1dadd43519e6960e0a"
+ "85b41a69a05c328103aa2bce1594ca16", /* r */
+ "53f097727a0e0dc284a0daa0da0ab77d"
+ "5792ae67ed075d1f8d5bda0f853fa093"); /* s */
+
/* From RFC 4754 */
test_ecdsa (&_nettle_secp_256r1,
"2442A5CC 0ECD015F A3CA31DC 8E2BBC70"
diff --git a/testsuite/gcm-test.c b/testsuite/gcm-test.c
index 8955e9b8..d70cdd1e 100644
--- a/testsuite/gcm-test.c
+++ b/testsuite/gcm-test.c
@@ -577,6 +577,24 @@ test_main(void)
"16aedbf5a0de6a57 a637b39b"), /* iv */
SHEX("5791883f822013f8bd136fc36fb9946b")); /* tag */
+ /*
+ * GCM-SM4 Test Vectors from
+ * https://datatracker.ietf.org/doc/html/rfc8998
+ */
+ test_aead(&nettle_gcm_sm4, NULL,
+ SHEX("0123456789ABCDEFFEDCBA9876543210"),
+ SHEX("FEEDFACEDEADBEEFFEEDFACEDEADBEEFABADDAD2"),
+ SHEX("AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBB"
+ "CCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDD"
+ "EEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFF"
+ "EEEEEEEEEEEEEEEEAAAAAAAAAAAAAAAA"),
+ SHEX("17F399F08C67D5EE19D0DC9969C4BB7D"
+ "5FD46FD3756489069157B282BB200735"
+ "D82710CA5C22F0CCFA7CBF93D496AC15"
+ "A56834CBCF98C397B4024A2691233B8D"),
+ SHEX("00001234567800000000ABCD"),
+ SHEX("83DE3541E4C2B58177E065A9BF7B62EC"));
+
/* Test gcm_hash, with varying message size, keys and iv all zero.
Not compared to any other implementation. */
test_gcm_hash (SDATA("a"),
diff --git a/testsuite/meta-aead-test.c b/testsuite/meta-aead-test.c
index 1fcede40..ceeca227 100644
--- a/testsuite/meta-aead-test.c
+++ b/testsuite/meta-aead-test.c
@@ -8,6 +8,7 @@ const char* aeads[] = {
"gcm_aes256",
"gcm_camellia128",
"gcm_camellia256",
+ "gcm_sm4",
"eax_aes128",
"chacha_poly1305",
};
diff --git a/testsuite/meta-cipher-test.c b/testsuite/meta-cipher-test.c
index f949fd76..912fac5a 100644
--- a/testsuite/meta-cipher-test.c
+++ b/testsuite/meta-cipher-test.c
@@ -1,5 +1,6 @@
#include "testutils.h"
#include "nettle-meta.h"
+#include "nettle-internal.h"
const char* ciphers[] = {
"aes128",
@@ -18,7 +19,8 @@ const char* ciphers[] = {
"serpent256",
"twofish128",
"twofish192",
- "twofish256"
+ "twofish256",
+ "sm4"
};
void
@@ -34,8 +36,11 @@ test_main(void)
ASSERT(NULL != nettle_ciphers[j]); /* make sure we found a matching cipher */
}
j = 0;
- while (NULL != nettle_ciphers[j])
- j++;
+ for (j = 0; NULL != nettle_ciphers[j]; j++)
+ {
+ ASSERT(nettle_ciphers[j]->block_size <= NETTLE_MAX_CIPHER_BLOCK_SIZE);
+ ASSERT(nettle_ciphers[j]->key_size <= NETTLE_MAX_CIPHER_KEY_SIZE);
+ }
ASSERT(j == count); /* we are not missing testing any ciphers */
}
diff --git a/testsuite/meta-hash-test.c b/testsuite/meta-hash-test.c
index 3aed43fc..6a15e7db 100644
--- a/testsuite/meta-hash-test.c
+++ b/testsuite/meta-hash-test.c
@@ -36,6 +36,7 @@ test_main(void)
}
for (i = 0; NULL != nettle_hashes[i]; i++) {
+ ASSERT(nettle_hashes[i]->block_size <= NETTLE_MAX_HASH_BLOCK_SIZE);
ASSERT(nettle_hashes[i]->digest_size <= NETTLE_MAX_HASH_DIGEST_SIZE);
ASSERT(nettle_hashes[i]->context_size <= NETTLE_MAX_HASH_CONTEXT_SIZE);
}
diff --git a/testsuite/siv-gcm-test.c b/testsuite/siv-gcm-test.c
new file mode 100644
index 00000000..eba03f23
--- /dev/null
+++ b/testsuite/siv-gcm-test.c
@@ -0,0 +1,731 @@
+/* siv-gcm-test.c
+
+ Self-test and vectors for AES-GCM-SIV mode ciphers
+
+ Copyright (C) 2022 Red Hat, Inc.
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+/* The test vectors have been collected from the following standards:
+ * RFC8452
+ */
+
+#include "testutils.h"
+#include "ghash-internal.h"
+#include "block-internal.h"
+#include "aes.h"
+#include "siv-gcm.h"
+
+/* AEAD ciphers */
+typedef void
+nettle_encrypt_message_func(void *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t clength, uint8_t *dst, const uint8_t *src);
+
+typedef int
+nettle_decrypt_message_func(void *ctx,
+ size_t nlength, const uint8_t *nonce,
+ size_t alength, const uint8_t *adata,
+ size_t mlength, uint8_t *dst, const uint8_t *src);
+
+static void
+test_compare_results (const char *name,
+ const struct tstring *adata,
+ /* Expected results. */
+ const struct tstring *e_clear,
+ const struct tstring *e_cipher,
+ /* Actual results. */
+ const void *clear,
+ const void *cipher)
+{
+ if (!MEMEQ(e_cipher->length, e_cipher->data, cipher))
+ {
+ fprintf (stderr, "%s: encryption failed\nAdata: ", name);
+ tstring_print_hex (adata);
+ fprintf (stderr, "\nInput: ");
+ tstring_print_hex (e_clear);
+ fprintf (stderr, "\nOutput: ");
+ print_hex (e_cipher->length, cipher);
+ fprintf (stderr, "\nExpected:");
+ tstring_print_hex (e_cipher);
+ fprintf (stderr, "\n");
+ FAIL();
+ }
+ if (!MEMEQ(e_clear->length, e_clear->data, clear))
+ {
+ fprintf (stderr, "%s decrypt failed:\nAdata:", name);
+ tstring_print_hex (adata);
+ fprintf (stderr, "\nInput: ");
+ tstring_print_hex (e_cipher);
+ fprintf (stderr, "\nOutput: ");
+ print_hex (e_clear->length, clear);
+ fprintf (stderr, "\nExpected:");
+ tstring_print_hex (e_clear);
+ fprintf (stderr, "\n");
+ FAIL();
+ }
+} /* test_compare_results */
+
+static void
+test_cipher_siv_gcm (const char *name,
+ nettle_set_key_func *siv_gcm_set_key,
+ nettle_encrypt_message_func *siv_gcm_encrypt,
+ nettle_decrypt_message_func *siv_gcm_decrypt,
+ size_t context_size, size_t key_size,
+ const struct tstring *key,
+ const struct tstring *nonce,
+ const struct tstring *authdata,
+ const struct tstring *cleartext,
+ const struct tstring *ciphertext)
+{
+ void *ctx = xalloc (context_size);
+ uint8_t *en_data;
+ uint8_t *de_data;
+ int ret;
+
+ ASSERT (key->length == key_size);
+ ASSERT (cleartext->length + SIV_GCM_DIGEST_SIZE == ciphertext->length);
+
+ de_data = xalloc (cleartext->length);
+ en_data = xalloc (ciphertext->length);
+
+ /* Ensure we get the same answers using the all-in-one API. */
+ memset (de_data, 0, cleartext->length);
+ memset (en_data, 0, ciphertext->length);
+
+ siv_gcm_set_key (ctx, key->data);
+ siv_gcm_encrypt (ctx, nonce->length, nonce->data,
+ authdata->length, authdata->data,
+ ciphertext->length, en_data, cleartext->data);
+ ret = siv_gcm_decrypt (ctx, nonce->length, nonce->data,
+ authdata->length, authdata->data,
+ cleartext->length, de_data, ciphertext->data);
+
+ if (ret != 1)
+ {
+ fprintf (stderr, "siv_gcm_decrypt_message failed to validate message\n");
+ FAIL();
+ }
+ test_compare_results (name, authdata,
+ cleartext, ciphertext, de_data, en_data);
+
+ /* Ensure that we can detect corrupted message or tag data. */
+ en_data[0] ^= 1;
+ ret = siv_gcm_decrypt (ctx, nonce->length, nonce->data,
+ authdata->length, authdata->data,
+ cleartext->length, de_data, en_data);
+ if (ret != 0)
+ {
+ fprintf (stderr, "siv_gcm_decrypt_message failed to detect corrupted message\n");
+ FAIL();
+ }
+
+ /* Ensure we can detect corrupted adata. */
+ if (authdata->length)
+ {
+ en_data[0] ^= 1;
+ ret = siv_gcm_decrypt (ctx, nonce->length, nonce->data,
+ authdata->length-1, authdata->data,
+ cleartext->length, de_data, en_data);
+ if (ret != 0)
+ {
+ fprintf (stderr, "siv_decrypt_message failed to detect corrupted message\n");
+ FAIL();
+ }
+ }
+
+ free (ctx);
+ free (en_data);
+ free (de_data);
+}
+
+#define test_siv_gcm_aes128(name, key, nonce, authdata, cleartext, ciphertext) \
+ test_cipher_siv_gcm(name, (nettle_set_key_func*)aes128_set_encrypt_key, \
+ (nettle_encrypt_message_func*)siv_gcm_aes128_encrypt_message, \
+ (nettle_decrypt_message_func*)siv_gcm_aes128_decrypt_message, \
+ sizeof(struct aes128_ctx), AES128_KEY_SIZE, \
+ key, nonce, authdata, cleartext, ciphertext)
+
+#define test_siv_gcm_aes256(name, key, nonce, authdata, cleartext, ciphertext) \
+ test_cipher_siv_gcm(name, (nettle_set_key_func*)aes256_set_encrypt_key, \
+ (nettle_encrypt_message_func*)siv_gcm_aes256_encrypt_message, \
+ (nettle_decrypt_message_func*)siv_gcm_aes256_decrypt_message, \
+ sizeof(struct aes256_ctx), AES256_KEY_SIZE, \
+ key, nonce, authdata, cleartext, ciphertext)
+
+static void
+test_polyval_internal (const struct tstring *key,
+ const struct tstring *message,
+ const struct tstring *digest)
+{
+ ASSERT (key->length == GCM_BLOCK_SIZE);
+ ASSERT (message->length % GCM_BLOCK_SIZE == 0);
+ ASSERT (digest->length == GCM_BLOCK_SIZE);
+ struct gcm_key gcm_key;
+ union nettle_block16 state;
+
+ memcpy (state.b, key->data, GCM_BLOCK_SIZE);
+ _siv_ghash_set_key (&gcm_key, &state);
+
+ block16_zero (&state);
+ _siv_ghash_update (&gcm_key, &state, message->length / GCM_BLOCK_SIZE, message->data);
+ block16_bswap (&state, &state);
+
+ if (!MEMEQ(GCM_BLOCK_SIZE, state.b, digest->data))
+ {
+ fprintf (stderr, "POLYVAL failed\n");
+ fprintf (stderr, "Key: ");
+ tstring_print_hex (key);
+ fprintf (stderr, "\nMessage: ");
+ tstring_print_hex (message);
+ fprintf (stderr, "\nOutput: ");
+ print_hex (GCM_BLOCK_SIZE, state.b);
+ fprintf (stderr, "\nExpected:");
+ tstring_print_hex (digest);
+ fprintf (stderr, "\n");
+ FAIL();
+ }
+}
+
+void
+test_main(void)
+{
+ /* RFC8452, Appendix A. */
+ test_polyval_internal (SHEX("25629347589242761d31f826ba4b757b"),
+ SHEX("4f4f95668c83dfb6401762bb2d01a262"
+ "d1a24ddd2721d006bbe45f20d3c9f362"),
+ SHEX("f7a3b47b846119fae5b7866cf5e5b77e"));
+
+ /* RFC8452, Appendix C.1. */
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX(""),
+ SHEX("dc20e2d83f25705bb49e439eca56de25"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV 1",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("0100000000000000"),
+ SHEX("b5d839330ac7b786578782fff6013b81"
+ "5b287c22493a364c"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("010000000000000000000000"),
+ SHEX("7323ea61d05932260047d942a4978db3"
+ "57391a0bc4fdec8b0d106639"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("01000000000000000000000000000000"),
+ SHEX("743f7c8077ab25f8624e2e948579cf77"
+ "303aaf90f6fe21199c6068577437a0c4"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("01000000000000000000000000000000"
+ "02000000000000000000000000000000"),
+ SHEX("84e07e62ba83a6585417245d7ec413a9"
+ "fe427d6315c09b57ce45f2e3936a9445"
+ "1a8e45dcd4578c667cd86847bf6155ff"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("01000000000000000000000000000000"
+ "02000000000000000000000000000000"
+ "03000000000000000000000000000000"),
+ SHEX("3fd24ce1f5a67b75bf2351f181a475c7"
+ "b800a5b4d3dcf70106b1eea82fa1d64d"
+ "f42bf7226122fa92e17a40eeaac1201b"
+ "5e6e311dbf395d35b0fe39c2714388f8"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("01000000000000000000000000000000"
+ "02000000000000000000000000000000"
+ "03000000000000000000000000000000"
+ "04000000000000000000000000000000"),
+ SHEX("2433668f1058190f6d43e360f4f35cd8"
+ "e475127cfca7028ea8ab5c20f7ab2af0"
+ "2516a2bdcbc08d521be37ff28c152bba"
+ "36697f25b4cd169c6590d1dd39566d3f"
+ "8a263dd317aa88d56bdf3936dba75bb8"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("0200000000000000"),
+ SHEX("1e6daba35669f4273b0a1a2560969cdf"
+ "790d99759abd1508"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("020000000000000000000000"),
+ SHEX("296c7889fd99f41917f4462008299c51"
+ "02745aaa3a0c469fad9e075a"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("02000000000000000000000000000000"),
+ SHEX("e2b0c5da79a901c1745f700525cb335b"
+ "8f8936ec039e4e4bb97ebd8c4457441f"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("02000000000000000000000000000000"
+ "03000000000000000000000000000000"),
+ SHEX("620048ef3c1e73e57e02bb8562c416a3"
+ "19e73e4caac8e96a1ecb2933145a1d71"
+ "e6af6a7f87287da059a71684ed3498e1"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("02000000000000000000000000000000"
+ "03000000000000000000000000000000"
+ "04000000000000000000000000000000"),
+ SHEX("50c8303ea93925d64090d07bd109dfd9"
+ "515a5a33431019c17d93465999a8b005"
+ "3201d723120a8562b838cdff25bf9d1e"
+ "6a8cc3865f76897c2e4b245cf31c51f2"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("02000000000000000000000000000000"
+ "03000000000000000000000000000000"
+ "04000000000000000000000000000000"
+ "05000000000000000000000000000000"),
+ SHEX("2f5c64059db55ee0fb847ed513003746"
+ "aca4e61c711b5de2e7a77ffd02da42fe"
+ "ec601910d3467bb8b36ebbaebce5fba3"
+ "0d36c95f48a3e7980f0e7ac299332a80"
+ "cdc46ae475563de037001ef84ae21744"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("010000000000000000000000"),
+ SHEX("02000000"),
+ SHEX("a8fe3e8707eb1f84fb28f8cb73de8e99"
+ "e2f48a14"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01000000000000000000000000000000"
+ "0200"),
+ SHEX("03000000000000000000000000000000"
+ "04000000"),
+ SHEX("6bb0fecf5ded9b77f902c7d5da236a43"
+ "91dd029724afc9805e976f451e6d87f6"
+ "fe106514"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("01000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01000000000000000000000000000000"
+ "02000000"),
+ SHEX("03000000000000000000000000000000"
+ "0400"),
+ SHEX("44d0aaf6fb2f1f34add5e8064e83e12a"
+ "2adabff9b2ef00fb47920cc72a0c0f13"
+ "b9fd"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("e66021d5eb8e4f4066d4adb9c33560e4"),
+ SHEX("f46e44bb3da0015c94f70887"),
+ SHEX(""),
+ SHEX(""),
+ SHEX("a4194b79071b01a87d65f706e3949578"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("36864200e0eaf5284d884a0e77d31646"),
+ SHEX("bae8e37fc83441b16034566b"),
+ SHEX("46bb91c3c5"),
+ SHEX("7a806c"),
+ SHEX("af60eb711bd85bc1e4d3e0a462e074ee"
+ "a428a8"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("aedb64a6c590bc84d1a5e269e4b47801"),
+ SHEX("afc0577e34699b9e671fdd4f"),
+ SHEX("fc880c94a95198874296"),
+ SHEX("bdc66f146545"),
+ SHEX("bb93a3e34d3cd6a9c45545cfc11f03ad"
+ "743dba20f966"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("d5cc1fd161320b6920ce07787f86743b"),
+ SHEX("275d1ab32f6d1f0434d8848c"),
+ SHEX("046787f3ea22c127aaf195d1894728"),
+ SHEX("1177441f195495860f"),
+ SHEX("4f37281f7ad12949d01d02fd0cd174c8"
+ "4fc5dae2f60f52fd2b"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("b3fed1473c528b8426a582995929a149"),
+ SHEX("9e9ad8780c8d63d0ab4149c0"),
+ SHEX("c9882e5386fd9f92ec489c8fde2be2cf"
+ "97e74e93"),
+ SHEX("9f572c614b4745914474e7c7"),
+ SHEX("f54673c5ddf710c745641c8bc1dc2f87"
+ "1fb7561da1286e655e24b7b0"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("2d4ed87da44102952ef94b02b805249b"),
+ SHEX("ac80e6f61455bfac8308a2d4"),
+ SHEX("2950a70d5a1db2316fd568378da107b5"
+ "2b0da55210cc1c1b0a"),
+ SHEX("0d8c8451178082355c9e940fea2f58"),
+ SHEX("c9ff545e07b88a015f05b274540aa183"
+ "b3449b9f39552de99dc214a1190b0b"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("bde3b2f204d1e9f8b06bc47f9745b3d1"),
+ SHEX("ae06556fb6aa7890bebc18fe"),
+ SHEX("1860f762ebfbd08284e421702de0de18"
+ "baa9c9596291b08466f37de21c7f"),
+ SHEX("6b3db4da3d57aa94842b9803a96e07fb"
+ "6de7"),
+ SHEX("6298b296e24e8cc35dce0bed484b7f30"
+ "d5803e377094f04709f64d7b985310a4"
+ "db84"));
+
+ test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV",
+ SHEX("f901cfe8a69615a93fdf7a98cad48179"),
+ SHEX("6245709fb18853f68d833640"),
+ SHEX("7576f7028ec6eb5ea7e298342a94d4b2"
+ "02b370ef9768ec6561c4fe6b7e7296fa"
+ "859c21"),
+ SHEX("e42a3c02c25b64869e146d7b233987bd"
+ "dfc240871d"),
+ SHEX("391cc328d484a4f46406181bcd62efd9"
+ "b3ee197d052d15506c84a9edd65e13e9"
+ "d24a2a6e70"));
+
+ /* RFC8452, Appendix C.2. */
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX(""),
+ SHEX("07f5f4169bbf55a8400cd47ea6fd400f"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("0100000000000000"),
+ SHEX("c2ef328e5c71c83b843122130f7364b7"
+ "61e0b97427e3df28"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("010000000000000000000000"),
+ SHEX("9aab2aeb3faa0a34aea8e2b18ca50da9"
+ "ae6559e48fd10f6e5c9ca17e"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("01000000000000000000000000000000"),
+ SHEX("85a01b63025ba19b7fd3ddfc033b3e76"
+ "c9eac6fa700942702e90862383c6c366"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("01000000000000000000000000000000"
+ "02000000000000000000000000000000"),
+ SHEX("4a6a9db4c8c6549201b9edb53006cba8"
+ "21ec9cf850948a7c86c68ac7539d027f"
+ "e819e63abcd020b006a976397632eb5d"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("01000000000000000000000000000000"
+ "02000000000000000000000000000000"
+ "03000000000000000000000000000000"),
+ SHEX("c00d121893a9fa603f48ccc1ca3c57ce"
+ "7499245ea0046db16c53c7c66fe717e3"
+ "9cf6c748837b61f6ee3adcee17534ed5"
+ "790bc96880a99ba804bd12c0e6a22cc4"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX(""),
+ SHEX("01000000000000000000000000000000"
+ "02000000000000000000000000000000"
+ "03000000000000000000000000000000"
+ "04000000000000000000000000000000"),
+ SHEX("c2d5160a1f8683834910acdafc41fbb1"
+ "632d4a353e8b905ec9a5499ac34f96c7"
+ "e1049eb080883891a4db8caaa1f99dd0"
+ "04d80487540735234e3744512c6f90ce"
+ "112864c269fc0d9d88c61fa47e39aa08"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("0200000000000000"),
+ SHEX("1de22967237a813291213f267e3b452f"
+ "02d01ae33e4ec854"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("020000000000000000000000"),
+ SHEX("163d6f9cc1b346cd453a2e4cc1a4a19a"
+ "e800941ccdc57cc8413c277f"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("02000000000000000000000000000000"),
+ SHEX("c91545823cc24f17dbb0e9e807d5ec17"
+ "b292d28ff61189e8e49f3875ef91aff7"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("02000000000000000000000000000000"
+ "03000000000000000000000000000000"),
+ SHEX("07dad364bfc2b9da89116d7bef6daaaf"
+ "6f255510aa654f920ac81b94e8bad365"
+ "aea1bad12702e1965604374aab96dbbc"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("02000000000000000000000000000000"
+ "03000000000000000000000000000000"
+ "04000000000000000000000000000000"),
+ SHEX("c67a1f0f567a5198aa1fcc8e3f213143"
+ "36f7f51ca8b1af61feac35a86416fa47"
+ "fbca3b5f749cdf564527f2314f42fe25"
+ "03332742b228c647173616cfd44c54eb"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01"),
+ SHEX("02000000000000000000000000000000"
+ "03000000000000000000000000000000"
+ "04000000000000000000000000000000"
+ "05000000000000000000000000000000"),
+ SHEX("67fd45e126bfb9a79930c43aad2d3696"
+ "7d3f0e4d217c1e551f59727870beefc9"
+ "8cb933a8fce9de887b1e40799988db1f"
+ "c3f91880ed405b2dd298318858467c89"
+ "5bde0285037c5de81e5b570a049b62a0"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("010000000000000000000000"),
+ SHEX("02000000"),
+ SHEX("22b3f4cd1835e517741dfddccfa07fa4"
+ "661b74cf"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01000000000000000000000000000000"
+ "0200"),
+ SHEX("03000000000000000000000000000000"
+ "04000000"),
+ SHEX("43dd0163cdb48f9fe3212bf61b201976"
+ "067f342bb879ad976d8242acc188ab59"
+ "cabfe307"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("01000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("030000000000000000000000"),
+ SHEX("01000000000000000000000000000000"
+ "02000000"),
+ SHEX("03000000000000000000000000000000"
+ "0400"),
+ SHEX("462401724b5ce6588d5a54aae5375513"
+ "a075cfcdf5042112aa29685c912fc205"
+ "6543"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("e66021d5eb8e4f4066d4adb9c33560e4"
+ "f46e44bb3da0015c94f7088736864200"),
+ SHEX("e0eaf5284d884a0e77d31646"),
+ SHEX(""),
+ SHEX(""),
+ SHEX("169fbb2fbf389a995f6390af22228a62"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("bae8e37fc83441b16034566b7a806c46"
+ "bb91c3c5aedb64a6c590bc84d1a5e269"),
+ SHEX("e4b47801afc0577e34699b9e"),
+ SHEX("4fbdc66f14"),
+ SHEX("671fdd"),
+ SHEX("0eaccb93da9bb81333aee0c785b240d3"
+ "19719d"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("6545fc880c94a95198874296d5cc1fd1"
+ "61320b6920ce07787f86743b275d1ab3"),
+ SHEX("2f6d1f0434d8848c1177441f"),
+ SHEX("6787f3ea22c127aaf195"),
+ SHEX("195495860f04"),
+ SHEX("a254dad4f3f96b62b84dc40c84636a5e"
+ "c12020ec8c2c"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("d1894728b3fed1473c528b8426a58299"
+ "5929a1499e9ad8780c8d63d0ab4149c0"),
+ SHEX("9f572c614b4745914474e7c7"),
+ SHEX("489c8fde2be2cf97e74e932d4ed87d"),
+ SHEX("c9882e5386fd9f92ec"),
+ SHEX("0df9e308678244c44bc0fd3dc6628dfe"
+ "55ebb0b9fb2295c8c2"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("a44102952ef94b02b805249bac80e6f6"
+ "1455bfac8308a2d40d8c845117808235"),
+ SHEX("5c9e940fea2f582950a70d5a"),
+ SHEX("0da55210cc1c1b0abde3b2f204d1e9f8"
+ "b06bc47f"),
+ SHEX("1db2316fd568378da107b52b"),
+ SHEX("8dbeb9f7255bf5769dd56692404099c2"
+ "587f64979f21826706d497d5"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("9745b3d1ae06556fb6aa7890bebc18fe"
+ "6b3db4da3d57aa94842b9803a96e07fb"),
+ SHEX("6de71860f762ebfbd08284e4"),
+ SHEX("f37de21c7ff901cfe8a69615a93fdf7a"
+ "98cad481796245709f"),
+ SHEX("21702de0de18baa9c9596291b08466"),
+ SHEX("793576dfa5c0f88729a7ed3c2f1bffb3"
+ "080d28f6ebb5d3648ce97bd5ba67fd"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("b18853f68d833640e42a3c02c25b6486"
+ "9e146d7b233987bddfc240871d7576f7"),
+ SHEX("028ec6eb5ea7e298342a94d4"),
+ SHEX("9c2159058b1f0fe91433a5bdc20e214e"
+ "ab7fecef4454a10ef0657df21ac7"),
+ SHEX("b202b370ef9768ec6561c4fe6b7e7296"
+ "fa85"),
+ SHEX("857e16a64915a787637687db4a951963"
+ "5cdd454fc2a154fea91f8363a39fec7d"
+ "0a49"));
+
+ test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV",
+ SHEX("3c535de192eaed3822a2fbbe2ca9dfc8"
+ "8255e14a661b8aa82cc54236093bbc23"),
+ SHEX("688089e55540db1872504e1c"),
+ SHEX("734320ccc9d9bbbb19cb81b2af4ecbc3"
+ "e72834321f7aa0f70b7282b4f33df23f"
+ "167541"),
+ SHEX("ced532ce4159b035277d4dfbb7db6296"
+ "8b13cd4eec"),
+ SHEX("626660c26ea6612fb17ad91e8e767639"
+ "edd6c9faee9d6c7029675b89eaf4ba1d"
+ "ed1a286594"));
+
+ /* RFC8452, Appendix C.3. */
+ test_siv_gcm_aes256 ("Counter wrap",
+ SHEX("00000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("000000000000000000000000"),
+ SHEX(""),
+ SHEX("00000000000000000000000000000000"
+ "4db923dc793ee6497c76dcc03a98e108"),
+ SHEX("f3f80f2cf0cb2dd9c5984fcda908456c"
+ "c537703b5ba70324a6793a7bf218d3ea"
+ "ffffffff000000000000000000000000"));
+
+ test_siv_gcm_aes256 ("Counter wrap",
+ SHEX("00000000000000000000000000000000"
+ "00000000000000000000000000000000"),
+ SHEX("000000000000000000000000"),
+ SHEX(""),
+ SHEX("eb3640277c7ffd1303c7a542d02d3e4c"
+ "0000000000000000"),
+ SHEX("18ce4f0b8cb4d0cac65fea8f79257b20"
+ "888e53e72299e56dffffffff00000000"
+ "0000000000000000"));
+}
diff --git a/testsuite/sm4-test.c b/testsuite/sm4-test.c
new file mode 100644
index 00000000..97d9d58a
--- /dev/null
+++ b/testsuite/sm4-test.c
@@ -0,0 +1,19 @@
+#include "testutils.h"
+#include "sm4.h"
+
+void
+test_main(void)
+{
+ /* test vectors from:
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ */
+ test_cipher(&nettle_sm4,
+ SHEX("0123456789ABCDEF FEDCBA9876543210"),
+ SHEX("0123456789ABCDEF FEDCBA9876543210"),
+ SHEX("681EDF34D206965E 86B3E94F536E4246"));
+
+ test_cipher(&nettle_sm4,
+ SHEX("FEDCBA9876543210 0123456789ABCDEF"),
+ SHEX("0001020304050607 08090A0B0C0D0E0F"),
+ SHEX("F766678F13F01ADE AC1B3EA955ADB594"));
+}
diff --git a/testsuite/testutils.c b/testsuite/testutils.c
index 0d91d8ef..39c6bece 100644
--- a/testsuite/testutils.c
+++ b/testsuite/testutils.c
@@ -1109,6 +1109,13 @@ mpz_urandomb (mpz_t r, struct knuth_lfib_ctx *ctx, mp_bitcnt_t bits)
nettle_mpz_set_str_256_u (r, bytes, buf);
free (buf);
}
+void
+mpz_urandomm (mpz_t r, struct knuth_lfib_ctx *ctx, const mpz_t n)
+{
+ /* Add some extra bits, to make result almost unbiased. */
+ mpz_urandomb(r, ctx, mpz_sizeinbase(n, 2) + 30);
+ mpz_mod(r, r, n);
+}
#else /* !NETTLE_USE_MINI_GMP */
static void
get_random_seed(mpz_t seed)
diff --git a/testsuite/testutils.h b/testsuite/testutils.h
index 3e239787..00555b3a 100644
--- a/testsuite/testutils.h
+++ b/testsuite/testutils.h
@@ -164,8 +164,10 @@ typedef struct knuth_lfib_ctx gmp_randstate_t[1];
void gmp_randinit_default (struct knuth_lfib_ctx *ctx);
#define gmp_randclear(state)
void mpz_urandomb (mpz_t r, struct knuth_lfib_ctx *ctx, mp_bitcnt_t bits);
+void mpz_urandomm (mpz_t r, struct knuth_lfib_ctx *ctx, const mpz_t n);
/* This is cheating */
#define mpz_rrandomb mpz_urandomb
+#define mpz_rrandomm mpz_urandomm
static inline int
test_randomize (gmp_randstate_t rands UNUSED) { return 0; }
#else /* !NETTLE_USE_MINI_GMP */
diff --git a/x86_64/fat/sha256-compress-2.asm b/x86_64/fat/sha256-compress-n-2.asm
index 996cf8c5..60f7c8f6 100644
--- a/x86_64/fat/sha256-compress-2.asm
+++ b/x86_64/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C x86_64/fat/sha256-compress-2.asm
+C x86_64/fat/sha256-compress-n-2.asm
ifelse(`
Copyright (C) 2018 Niels Möller
@@ -31,4 +31,4 @@ ifelse(`
')
define(`fat_transform', `$1_sha_ni')
-include_src(`x86_64/sha_ni/sha256-compress.asm')
+include_src(`x86_64/sha_ni/sha256-compress-n.asm')
diff --git a/x86_64/fat/sha256-compress.asm b/x86_64/fat/sha256-compress-n.asm
index 2aaeb5e8..fc358858 100644
--- a/x86_64/fat/sha256-compress.asm
+++ b/x86_64/fat/sha256-compress-n.asm
@@ -1,4 +1,4 @@
-C x86_64/fat/sha256-compress.asm
+C x86_64/fat/sha256-compress-n.asm
ifelse(`
Copyright (C) 2018 Niels Möller
@@ -31,4 +31,4 @@ ifelse(`
')
define(`fat_transform', `$1_x86_64')
-include_src(`x86_64/sha256-compress.asm')
+include_src(`x86_64/sha256-compress-n.asm')
diff --git a/x86_64/poly1305-blocks.asm b/x86_64/poly1305-blocks.asm
new file mode 100644
index 00000000..63bfed3e
--- /dev/null
+++ b/x86_64/poly1305-blocks.asm
@@ -0,0 +1,128 @@
+C x86_64/poly1305-blocks.asm
+
+ifelse(`
+ Copyright (C) 2022 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+ .file "poly1305-blocks.asm"
+
+define(`CTX', `%rdi') C First argument to all functions
+define(`BLOCKS', `%rsi')
+define(`MP_PARAM', `%rdx') C Moved to MP, to not collide with mul instruction.
+
+define(`MP', `%r8') C May clobber, both with unix and windows conventions.
+define(`T0', `%rbx')
+define(`T1', `%rcx')
+define(`H0', `%rbp')
+define(`H1', `%r9')
+define(`H2', `%r10')
+define(`F0', `%r11')
+define(`F1', `%r12')
+
+C const uint8_t *
+C _nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m)
+
+PROLOGUE(_nettle_poly1305_blocks)
+ W64_ENTRY(3, 0)
+ mov MP_PARAM, MP
+ test BLOCKS, BLOCKS
+ jz .Lend
+
+ push %rbx
+ push %rbp
+ push %r12
+ mov P1305_H0 (CTX), H0
+ mov P1305_H1 (CTX), H1
+ mov P1305_H2 (CTX), H2
+ ALIGN(16)
+.Loop:
+ mov (MP), T0
+ mov 8(MP), T1
+ add $16, MP
+
+ add H0, T0
+ adc H1, T1
+ adc $1, H2
+
+ mov P1305_R1 (CTX), %rax
+ mul T0 C R1*T0
+ mov %rax, F0
+ mov %rdx, F1
+
+ mov T0, %rax C Last use of T0 input
+ mov P1305_R0 (CTX), T0
+ mul T0 C R0*T0
+ mov %rax, H0
+ mov %rdx, H1
+
+ mov T1, %rax
+ mul T0 C R0*T1
+ add %rax, F0
+ adc %rdx, F1
+
+ mov P1305_S1 (CTX), T0
+ mov T1, %rax C Last use of T1 input
+ mul T0 C S1*T1
+ add %rax, H0
+ adc %rdx, H1
+
+ mov H2, %rax
+ mul T0 C S1*H2
+ add %rax, F0
+ adc %rdx, F1
+
+ mov H2, T0
+ and $3, H2
+
+ shr $2, T0
+ mov P1305_S0 (CTX), %rax
+ mul T0 C S0*(H2 >> 2)
+ add %rax, H0
+ adc %rdx, H1
+
+ imul P1305_R0 (CTX), H2 C R0*(H2 & 3)
+ add F0, H1
+ adc F1, H2
+
+ dec BLOCKS
+ jnz .Loop
+
+ mov H0, P1305_H0 (CTX)
+ mov H1, P1305_H1 (CTX)
+ mov H2, P1305_H2 (CTX)
+
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+.Lend:
+ mov MP, %rax
+ W64_EXIT(3, 0)
+ ret
+EPILOGUE(_nettle_poly1305_blocks)
diff --git a/x86_64/poly1305-internal.asm b/x86_64/poly1305-internal.asm
index ef2f38e4..7ce415a4 100644
--- a/x86_64/poly1305-internal.asm
+++ b/x86_64/poly1305-internal.asm
@@ -106,7 +106,7 @@ PROLOGUE(_nettle_poly1305_block)
adc P1305_H2 (CTX), T2
mov P1305_R1 (CTX), %rax
- mul T0 C R1 T0
+ mul T0 C R1*T0
mov %rax, F0
mov %rdx, F1
diff --git a/x86_64/sha256-compress.asm b/x86_64/sha256-compress-n.asm
index 5ed669b1..e10d260c 100644
--- a/x86_64/sha256-compress.asm
+++ b/x86_64/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C x86_64/sha256-compress.asm
+C x86_64/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2013 Niels Möller
+ Copyright (C) 2013, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -30,21 +30,24 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
- .file "sha256-compress.asm"
+ .file "sha256-compress-n.asm"
define(`STATE', `%rdi')
-define(`INPUT', `%rsi')
-define(`K', `%rdx')
+define(`K', `%rsi')
+define(`BLOCKS', `%rdx')
+define(`INPUT', `%rcx')
+define(`STATE_SAVED', `64(%rsp)')
+
define(`SA', `%eax')
define(`SB', `%ebx')
-define(`SC', `%ecx')
+define(`SC', `%ebp')
define(`SD', `%r8d')
define(`SE', `%r9d')
define(`SF', `%r10d')
define(`SG', `%r11d')
define(`SH', `%r12d')
define(`T0', `%r13d')
-define(`T1', `%edi') C Overlap STATE
-define(`COUNT', `%r14')
+define(`T1', `%r14d')
+define(`COUNT', `%rdi') C Overlap STATE
define(`W', `%r15d')
define(`EXPN', `
@@ -123,18 +126,21 @@ define(`NOEXPN', `
movl W, OFFSET($1)(%rsp, COUNT, 4)
')
- C void
- C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+ C const uint8_t *
+ C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ C size_t blocks, const uint8_t *input)
.text
ALIGN(16)
-PROLOGUE(_nettle_sha256_compress)
+PROLOGUE(_nettle_sha256_compress_n)
W64_ENTRY(3, 0)
+ test BLOCKS, BLOCKS
+ jz .Lend
sub $120, %rsp
- mov %rbx, 64(%rsp)
- mov STATE, 72(%rsp) C Save state, to free a register
+ mov STATE, STATE_SAVED C Save state, to free a register
+ mov %rbx, 72(%rsp)
mov %rbp, 80(%rsp)
mov %r12, 88(%rsp)
mov %r13, 96(%rsp)
@@ -149,7 +155,9 @@ PROLOGUE(_nettle_sha256_compress)
movl 20(STATE), SF
movl 24(STATE), SG
movl 28(STATE), SH
- xor COUNT, COUNT
+
+.Loop_block:
+ xorl XREG(COUNT), XREG(COUNT)
ALIGN(16)
.Loop1:
@@ -161,8 +169,8 @@ PROLOGUE(_nettle_sha256_compress)
NOEXPN(5) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,5)
NOEXPN(6) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,6)
NOEXPN(7) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,7)
- add $8, COUNT
- cmp $16, COUNT
+ addl $8, XREG(COUNT)
+ cmpl $16, XREG(COUNT)
jne .Loop1
.Loop2:
@@ -182,22 +190,35 @@ PROLOGUE(_nettle_sha256_compress)
EXPN(13) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,13)
EXPN(14) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,14)
EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,15)
- add $16, COUNT
- cmp $64, COUNT
+ addl $16, XREG(COUNT)
+ cmpl $64, XREG(COUNT)
jne .Loop2
- mov 72(%rsp), STATE
-
- addl SA, (STATE)
- addl SB, 4(STATE)
- addl SC, 8(STATE)
- addl SD, 12(STATE)
- addl SE, 16(STATE)
- addl SF, 20(STATE)
- addl SG, 24(STATE)
- addl SH, 28(STATE)
-
- mov 64(%rsp), %rbx
+ mov STATE_SAVED, STATE
+
+ addl (STATE), SA
+ addl 4(STATE), SB
+ addl 8(STATE), SC
+ addl 12(STATE), SD
+ addl 16(STATE), SE
+ addl 20(STATE), SF
+ addl 24(STATE), SG
+ addl 28(STATE), SH
+
+ movl SA, (STATE)
+ movl SB, 4(STATE)
+ movl SC, 8(STATE)
+ movl SD, 12(STATE)
+ movl SE, 16(STATE)
+ movl SF, 20(STATE)
+ movl SG, 24(STATE)
+ movl SH, 28(STATE)
+
+ add $64, INPUT
+ dec BLOCKS
+ jnz .Loop_block
+
+ mov 72(%rsp), %rbx
mov 80(%rsp), %rbp
mov 88(%rsp), %r12
mov 96(%rsp), %r13
@@ -205,6 +226,8 @@ PROLOGUE(_nettle_sha256_compress)
mov 112(%rsp),%r15
add $120, %rsp
+.Lend:
+ mov INPUT, %rax
W64_EXIT(3, 0)
ret
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/x86_64/sha_ni/sha256-compress.asm b/x86_64/sha_ni/sha256-compress-n.asm
index 00bd3cd3..005909df 100644
--- a/x86_64/sha_ni/sha256-compress.asm
+++ b/x86_64/sha_ni/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C x86_64/sha_ni/sha256-compress.asm
+C x86_64/sha_ni/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2018 Niels Möller
+ Copyright (C) 2018, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -30,10 +30,11 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
- .file "sha256-compress.asm"
+ .file "sha256-compress-n.asm"
define(`STATE', `%rdi')
-define(`INPUT', `%rsi')
-define(`K', `%rdx')
+define(`K', `%rsi')
+define(`BLOCKS', `%rdx')
+define(`INPUT', `%rcx')
define(`MSGK',`%xmm0') C Implicit operand of sha256rnds2
define(`MSG0',`%xmm1')
@@ -45,7 +46,7 @@ define(`CDGH',`%xmm6')
define(`ABEF_ORIG',`%xmm7')
define(`CDGH_ORIG', `%xmm8')
define(`SWAP_MASK',`%xmm9')
-define(`TMP', `%xmm9') C Overlaps SWAP_MASK
+define(`TMP', `%xmm10')
C QROUND(M0, M1, M2, M3, R)
define(`QROUND', `
@@ -69,15 +70,19 @@ define(`TRANSPOSE', `
punpcklqdq $1, $3
')
- C void
- C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+ C const uint8_t *
+ C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ C size_t blocks, const uint8_t *input)
.text
ALIGN(16)
.Lswap_mask:
.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
-PROLOGUE(_nettle_sha256_compress)
- W64_ENTRY(3, 10)
+PROLOGUE(_nettle_sha256_compress_n)
+ W64_ENTRY(4, 11)
+ test BLOCKS, BLOCKS
+ jz .Lend
+
movups (STATE), TMP
movups 16(STATE), ABEF
@@ -88,12 +93,13 @@ PROLOGUE(_nettle_sha256_compress)
movdqa .Lswap_mask(%rip), SWAP_MASK
- movdqa ABEF, ABEF_ORIG
- movdqa CDGH, CDGH_ORIG
-
+.Loop:
movups (INPUT), MSG0
pshufb SWAP_MASK, MSG0
+ movdqa ABEF, ABEF_ORIG
+ movdqa CDGH, CDGH_ORIG
+
movdqa (K), MSGK
paddd MSG0, MSGK
sha256rnds2 ABEF, CDGH C Round 0-1
@@ -163,6 +169,10 @@ PROLOGUE(_nettle_sha256_compress)
paddd ABEF_ORIG, ABEF
paddd CDGH_ORIG, CDGH
+ add $64, INPUT
+ dec BLOCKS
+ jnz .Loop
+
TRANSPOSE(ABEF, CDGH, TMP)
pshufd $0x1b, CDGH, CDGH
@@ -170,6 +180,8 @@ PROLOGUE(_nettle_sha256_compress)
movups CDGH, 0(STATE)
movups TMP, 16(STATE)
- W64_EXIT(3, 10)
+.Lend:
+ mov INPUT, %rax
+ W64_EXIT(4, 11)
ret
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)