diff options
108 files changed, 5011 insertions, 606 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ed15456f..0f10d9fd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -144,8 +144,8 @@ Debian.cross.x86: - apt-get update -q # remove any previously installed nettle headers to avoid conflicts - for arch in armhf arm64 ppc64el;do apt-get remove -y nettle-dev:$arch;done - - if [ "$host" == "powerpc64-linux-gnu" ];then apt-get update && apt-get install -y gcc-$host g++-$host && export QEMU_LD_PREFIX=/usr/$host EXTRA_CONFIGURE_FLAGS='--enable-mini-gmp';fi - - if [ "$host" == "powerpc64le-linux-gnu" ];then apt-get update && apt-get install -y gcc-$host g++-$host libgmp-dev:ppc64el && export QEMU_LD_PREFIX=/usr/$host;fi + - if [ "$host" == "powerpc64-linux-gnu" ];then apt-get install -y software-properties-common && add-apt-repository "deb http://deb.debian.org/debian bullseye-backports main" && apt-get update && apt-get install -y -t bullseye-backports binfmt-support qemu-user && apt-get install -y gcc-$host g++-$host && export QEMU_LD_PREFIX=/usr/$host EXTRA_CONFIGURE_FLAGS='--enable-mini-gmp';fi + - if [ "$host" == "powerpc64le-linux-gnu" ];then apt-get install -y software-properties-common && add-apt-repository "deb http://deb.debian.org/debian bullseye-backports main" && apt-get update && apt-get install -y -t bullseye-backports binfmt-support qemu-user && apt-get install -y gcc-$host g++-$host libgmp-dev:ppc64el && export QEMU_LD_PREFIX=/usr/$host;fi - if [ "$host" == "s390x-linux-gnu" ];then apt-get update && apt-get install -y gcc-$host g++-$host libgmp-dev:s390x && export EXTRA_CONFIGURE_FLAGS='--disable-assembler';fi script: - build=$(dpkg-architecture -qDEB_HOST_GNU_TYPE) @@ -96,7 +96,7 @@ Amos Jeffries Implementation of base64url encoding. Daiki Ueno Implementation of RSA-PSS signatures, curve448, shake256, ed448-shake256 signatures, chacha functions for 32-bit nonce, struct - nettle_mac interface. + nettle_mac interface, siv-gcm. Dmitry Baryshkov CFB and CFB8 modes, CMAC64. gosthash94cp and Streebog hash functions, GOST DSA signatures @@ -121,10 +121,12 @@ Mamone Tarsha Kurdi Powerpc64 assembly and fat build setup, Nicolas Mora RFC 3394 keywrap. -Tianjia Zhang SM3 hash function. +Tianjia Zhang SM3 hash function, SM4 block cipher. Amitay Isaacs Powerpc64 assembly for secp192r1, secp224r1 and secp256r1. Martin Schwenke Powerpc64 assembly for secp384r1, secp521r1, curve25519 and curve448. + +Zoltan Fridrich Ballon password hashing. @@ -1,3 +1,190 @@ +2022-11-09 Niels Möller <nisse@lysator.liu.se> + + From Mamone Tarsha: + * powerpc64/p9/poly1305-blocks.asm: New file, multi-block radix + 2^44 implementation. Benchmarked to give a speedup of 3.2 times on + Power9. + * powerpc64/p9/poly1305.m4 (DEFINES_BLOCK_R64, BLOCK_R64): New + file, new macros. + * powerpc64/p9/poly1305-internal.asm: Use BLOCK_R64 macro. + * powerpc64/machine.m4 (INC_GPR, INC_VR): New macros. + * powerpc64/fat/poly1305-blocks.asm: New file. + * poly1305-update.c: Check HAVE_NATIVE_fat_poly1305_blocks, and + define _nettle_poly1305_blocks_c when needed. + * fat-ppc.c: Fat setup for _nettle_poly1305_blocks. + +2022-11-07 Niels Möller <nisse@lysator.liu.se> + + * configure.ac (ASM_FLAGS): New configure environment variable. + * aclocal.m4 (GMP_TRY_ASSEMBLE): Use $ASM_FLAGS. + * config.make.in (ASM_FLAGS): Add substitution. + * Makefile.in: Use $(ASM_FLAGS) when compiling .asm files. + +2022-10-31 Niels Möller <nisse@lysator.liu.se> + + * configure.ac: (asm_file_list): Add HAVE_NATIVE_poly1305_blocks. + (asm_nettle_optional_list): Add poly1305-blocks.asm. + * x86_64/poly1305-blocks.asm: New file. + + * md-internal.h (MD_FILL_OR_RETURN_INDEX): New macro. + * poly1305-update.c (_nettle_poly1305_update): New file and + function. + * poly1305-internal.h: Declare _nettle_poly1305_blocks and + _nettle_poly1305_update. + * chacha-poly1305.c (poly1305_update): Use _nettle_poly1305_update. + * poly1305-aes.c (poly1305_aes_update): Likewise. + * Makefile.in (nettle_SOURCES): Add poly1305-update.c. + +2022-10-13 Niels Möller <nisse@lysator.liu.se> + + * gmp-glue.c (mpn_sec_tabselect) [NETTLE_USE_MINI_GMP]: Add back + here, to support mini-gmp builds. Updated signature to be + compatible with the gmp version. + * gmp-glue.h: Add declaration. + +2022-10-11 Niels Möller <nisse@lysator.liu.se> + + * sec-tabselect.c (sec_tabselect): Delete file and function. All + callers updated to use gmp's mpn_sec_tabselect instead, which is + implemented in assembly on many platforms. + +2022-10-02 Niels Möller <nisse@lysator.liu.se> + + * examples/ecc-benchmark.c (bench_curve): Add benchmarking of + modulo q inversion. + +2022-09-29 Niels Möller <nisse@lysator.liu.se> + + * ecc-ecdsa-verify.c (ecc_ecdsa_verify): Call ecc_mul_g and ecc_mul_a directly, not via + function pointers. + (ecc_ecdsa_verify_itch): Use ECC_MUL_A_ITCH + rather than ecc->mul_itch. + * ecc-gostdsa-verify.c (ecc_gostdsa_verify_itch) + (ecc_gostdsa_verify): Analogous changes. + + * ecc-ecdsa-sign.c (ecc_ecdsa_sign): Call ecc_mul_g and ecc_j_to_a + directly, not via function pointers. + (ecc_ecdsa_sign_itch): Use ECC_MUL_G_ITCH rather than + ecc->mul_g_itch. + * ecc-gostdsa-sign.c (ecc_gostdsa_sign_itch, ecc_gostdsa_sign): + Analogous changes. + +2022-09-28 Niels Möller <nisse@lysator.liu.se> + + * testsuite/meta-hash-test.c (test_main): Add check of + NETTLE_MAX_HASH_BLOCK_SIZE. + * nettle-internal.h (NETTLE_MAX_HASH_BLOCK_SIZE): Increase to 144, + to accommodate sha3_224. + * testsuite/meta-cipher-test.c (test_main): Check that cipher + metadata doesn't exceed NETTLE_MAX_CIPHER_BLOCK_SIZE or + NETTLE_MAX_CIPHER_KEY_SIZE. + + From Daiki Ueno: + * siv-gcm.c (siv_gcm_encrypt_message, siv_gcm_decrypt_message): + New file, implementation of SIV-GCM. + * siv-gcm.h (SIV_GCM_BLOCK_SIZE, SIV_GCM_DIGEST_SIZE) + (SIV_GCM_NONCE_SIZE): New header file, new constants and + declarations. + * siv-gcm-aes128.c (siv_gcm_aes128_encrypt_message) + (siv_gcm_aes128_decrypt_message): New file and functions. + * siv-gcm-aes256.c (siv_gcm_aes256_encrypt_message) + (siv_gcm_aes256_decrypt_message): Likewise. + * siv-ghash-set-key.c (_siv_ghash_set_key): New file, new internal + function. + * siv-ghash-update.c (_siv_ghash_update): Likewise. + * block-internal.h (block16_bswap): New inline function. + * bswap-internal.h (bswap64_if_be): New macro. + * nettle-internal.h (NETTLE_MAX_CIPHER_KEY_SIZE): New constant. + * Makefile.in (nettle_SOURCES): Add new source files. + (HEADERS): Add siv-gcm.h. + * testsuite/siv-gcm-test.c: New tests. + * testsuite/Makefile.in (TS_NETTLE_SOURCES): Add siv-gcm-test.c. + * nettle.texinfo (SIV-GCM): Documentation. + + From Zoltan Fridrich: + * balloon.c (balloon, balloon_itch): Implementation of balloon + password hash. + * balloon.h: New header file. + * balloon-sha1.c (balloon_sha1): New file and function. + * balloon-sha256.c (balloon_sha256): Likewise. + * balloon-sha384.c (balloon_sha384): Likewise. + * balloon-sha512.c (balloon_sha512): Likewise. + * Makefile.in (nettle_SOURCES): Add balloon source files. + (HEADERS): Add ballon.h. + * testsuite/balloon-test.c: New tests. + * testsuite/Makefile.in (TS_NETTLE_SOURCES): Add balloon-test.c. + +2022-09-14 Niels Möller <nisse@lysator.liu.se> + + * ecc-nonsec-add-jjj.c (ecc_nonsec_add_jjj): New file and + function. + * ecc-internal.h: Declare it. + * Makefile.in (hogweed_SOURCES): Add ecc-nonsec-add-jjj.c. + * testsuite/ecc-add-test.c (test_main): Add tests for ecc_nonsec_add_jjj. + + * ecc-ecdsa-verify.c (ecc_ecdsa_verify): Use ecc_nonsec_add_jjj, + to produce correct result in a corner case where point addition + needs to use point duplication. Also use ecc_j_to_a rather than + ecc->h_to_a, since ecdsa supports only weierstrass curves. + * ecc-gostdsa-verify.c (ecc_gostdsa_verify): Analogous change. + + * testsuite/ecdsa-verify-test.c (test_main): Add corresponding test. + * testsuite/ecdsa-sign-test.c (test_main): And a test producing + the problematic signature. + +2022-09-08 Niels Möller <nisse@lysator.liu.se> + + * eccdata.c (string_toupper): New utility function. + (output_modulo): Move more of the per-modulo output here. + (output_curve): Remove corresponding code. + +2022-08-31 Niels Möller <nisse@lysator.liu.se> + + * bswap-internal.h (nettle_bswap64, nettle_bswap32) + (bswap64_if_le): New header file, new inline functions/macros. + * gcm.c (gcm_hash_sizes): Use bswap64_if_le, and bswap-internal.h, + replacing local definition of bswap_if_le. + * nist-keywrap.c (nist_keywrap16): Likewise. + * blowfish-bcrypt.c (swap32): Renamed function, to... + (bswap32_if_le): ...new name, rewritten to use nettle_bswap32. + Update call sites. + * Makefile.in (DISTFILES): Add bswap-internal.h. + +2022-08-18 Niels Möller <nisse@lysator.liu.se> + + * Makefile.in (HEADERS): Add sm4.h. + + From Tianjia Zhang: SM4 block cipher. + * sm4.c: New file. + * sm4.h: New file. + * sm4-meta.c: New file. + * gcm-sm4.c: New file + * gcm-sm4-meta.c: New file. + * nettle.texinfo: Document SM4. + * testsuite/gcm-test.c (test_main): Add SM4 tests. + * testsuite/sm4-test.c: New file. + + * configure.ac (ABI): Change mips abi check to apply only to mips64. + +2022-08-17 Niels Möller <nisse@lysator.liu.se> + + * testsuite/testutils.c (mpz_urandomm) [NETTLE_USE_MINI_GMP]: New + fallback definition when building with mini-gmp. + +2022-08-16 Niels Möller <nisse@lysator.liu.se> + + * ecc-mod-arith.c (ecc_mod_sub): Ensure that if inputs are in the + range 0 <= a, b < 2m, then output is in the same range. + * eccdata.c (output_curve): New outputs ecc_Bm2p and ecc_Bm2q. + * ecc-internal.h (struct ecc_modulo): New member Bm2m (B^size - + 2m), needed by ecc_mod_sub. Update all curves. + * testsuite/ecc-mod-arith-test.c: New tests for ecc_mod_add and + ecc_mod_sub. + + * eccdata.c (output_modulo): Output the limb size, delete return + value. + (output_curve): Update calls to output_modulo, other minor cleanup. + 2022-08-07 Niels Möller <nisse@lysator.liu.se> Delete all arcfour assembly code. @@ -8,6 +195,15 @@ * x86/arcfour-crypt.asm: Deleted. * asm.m4: Delete arcfour structure offsets. +2022-08-07 Niels Möller <nisse@lysator.liu.se> + + Based on patch from Corentin Labbe: + * nettle.texinfo: Document sha256_compress, sha512_compress, + md5_compress and sha1_compress. + + * configure.ac: Refer to nettle-types.h, rather than arcfour.c, + for AC_CONFIG_SRCDIR. + 2022-08-05 Niels Möller <nisse@lysator.liu.se> * nettle-internal.h: Include stdlib.h, fix alloca warnings on BSD. @@ -23,6 +219,48 @@ * aclocal.m4 (LSH_CCPIC): Use proper PIC flag for *BSD OS's. * blowfish-bcrypt.c (swap32): Eliminate conflict with OpenBSD's swap32 macro. +2022-07-29 Niels Möller <nisse@lysator.liu.se> + + * s390x/msa_x1/sha256-compress-n.asm: New file. replacing... + * s390x/msa_x1/sha256-compress.asm: ...deleted file. + * s390x/fat/sha256-compress-n-2.asm: New file. replacing... + * s390x/fat/sha256-compress-2.asm: ...deleted file. + * fat-s390x.c: Update fat setup. + +2022-07-26 Niels Möller <nisse@lysator.liu.se> + + * arm/v6/sha256-compress-n.asm: New file. replacing... + * arm/v6/sha256-compress.asm: ...deleted file. + * arm/fat/sha256-compress-n-2.asm: New file. replacing... + * arm/fat/sha256-compress-2.asm: ...deleted file. + * fat-arm.c: Update fat setup. + +2022-07-11 Niels Möller <nisse@lysator.liu.se> + + * arm64/crypto/sha256-compress-n.asm: New file. replacing... + * arm64/crypto/sha256-compress.asm: ...deleted file. + * arm64/fat/sha256-compress-n-2.asm: New file. replacing... + * arm64/fat/sha256-compress-2.asm: ...deleted file. + * fat-arm64.c: Update fat setup. + +2022-07-05 Niels Möller <nisse@lysator.liu.se> + + * md-internal.h (MD_FILL_OR_RETURN): New file, new macro. + * sha256-compress-n.c (_nettle_sha256_compress_n): New file and + function, replacing... + * sha256-compress.c (_nettle_sha256_compress): ...deleted file and + function. + * sha2-internal.h (_nettle_sha256_compress_n): Declare new function.. + * sha256.c (sha256_compress): Update to use + _nettle_sha256_compress_n and MD_FILL_OR_RETURN. + * x86_64/sha256-compress-n.asm: New file. replacing... + * x86_64/sha256-compress.asm: ...deleted file. + * x86_64/sha_ni/sha256-compress-n.asm: New file. replacing... + * x86_64/sha_ni/sha256-compress.asm: ...deleted file. + * fat-setup.h (sha256_compress_n_func): New typedef, replacing... + (sha256_compress_func): ... deleted typedef. + * fat-x86_64.c: Update fat setup. + 2022-06-20 Niels Möller <nisse@lysator.liu.se> * testsuite/sha1-test.c (test_sha1_compress): New function. diff --git a/Makefile.in b/Makefile.in index 4b4672fa..cd4993e8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -83,6 +83,8 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \ nist-keywrap.c \ arcfour.c \ arctwo.c arctwo-meta.c blowfish.c blowfish-bcrypt.c \ + balloon.c balloon-sha1.c balloon-sha256.c \ + balloon-sha384.c balloon-sha512.c \ base16-encode.c base16-decode.c base16-meta.c \ base64-encode.c base64-decode.c base64-meta.c \ base64url-encode.c base64url-decode.c base64url-meta.c \ @@ -100,18 +102,22 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \ cbc.c cbc-aes128-encrypt.c cbc-aes192-encrypt.c cbc-aes256-encrypt.c \ ccm.c ccm-aes128.c ccm-aes192.c ccm-aes256.c cfb.c \ siv-cmac.c siv-cmac-aes128.c siv-cmac-aes256.c \ + siv-gcm.c siv-gcm-aes128.c siv-gcm-aes256.c \ cnd-memcpy.c \ chacha-crypt.c chacha-core-internal.c \ chacha-poly1305.c chacha-poly1305-meta.c \ chacha-set-key.c chacha-set-nonce.c \ ctr.c ctr16.c des.c des3.c \ eax.c eax-aes128.c eax-aes128-meta.c \ - ghash-set-key.c ghash-update.c gcm.c gcm-aes.c \ + ghash-set-key.c ghash-update.c \ + siv-ghash-set-key.c siv-ghash-update.c \ + gcm.c gcm-aes.c \ gcm-aes128.c gcm-aes128-meta.c \ gcm-aes192.c gcm-aes192-meta.c \ gcm-aes256.c gcm-aes256-meta.c \ gcm-camellia128.c gcm-camellia128-meta.c \ gcm-camellia256.c gcm-camellia256-meta.c \ + gcm-sm4.c gcm-sm4-meta.c \ cmac.c cmac64.c cmac-aes128.c cmac-aes256.c cmac-des3.c \ cmac-aes128-meta.c cmac-aes256-meta.c cmac-des3-meta.c \ gost28147.c gosthash94.c gosthash94-meta.c \ @@ -130,7 +136,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \ nettle-meta-ciphers.c nettle-meta-hashes.c nettle-meta-macs.c \ pbkdf2.c pbkdf2-hmac-gosthash94.c pbkdf2-hmac-sha1.c \ pbkdf2-hmac-sha256.c pbkdf2-hmac-sha384.c pbkdf2-hmac-sha512.c \ - poly1305-aes.c poly1305-internal.c \ + poly1305-aes.c poly1305-internal.c poly1305-update.c \ realloc.c \ ripemd160.c ripemd160-compress.c ripemd160-meta.c \ salsa20-core-internal.c salsa20-crypt-internal.c \ @@ -138,7 +144,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \ salsa20-set-nonce.c \ salsa20-128-set-key.c salsa20-256-set-key.c \ sha1.c sha1-compress.c sha1-meta.c \ - sha256.c sha256-compress.c sha224-meta.c sha256-meta.c \ + sha256.c sha256-compress-n.c sha224-meta.c sha256-meta.c \ sha512.c sha512-compress.c sha384-meta.c sha512-meta.c \ sha512-224-meta.c sha512-256-meta.c \ sha3.c sha3-permute.c \ @@ -150,6 +156,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \ serpent-meta.c \ streebog.c streebog-meta.c \ twofish.c twofish-meta.c \ + sm4.c sm4-meta.c \ umac-nh.c umac-nh-n.c umac-l2.c umac-l3.c \ umac-poly64.c umac-poly128.c umac-set-key.c \ umac32.c umac64.c umac96.c umac128.c \ @@ -187,7 +194,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \ dsa2sexp.c sexp2dsa.c \ pgp-encode.c rsa2openpgp.c \ der-iterator.c der2rsa.c der2dsa.c \ - sec-add-1.c sec-sub-1.c sec-tabselect.c \ + sec-add-1.c sec-sub-1.c \ gmp-glue.c cnd-copy.c \ ecc-mod.c ecc-mod-inv.c \ ecc-mod-arith.c ecc-pp1-redc.c ecc-pm1-redc.c \ @@ -196,7 +203,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \ ecc-secp192r1.c ecc-secp224r1.c ecc-secp256r1.c \ ecc-secp384r1.c ecc-secp521r1.c \ ecc-size.c ecc-j-to-a.c ecc-a-to-j.c \ - ecc-dup-jj.c ecc-add-jja.c ecc-add-jjj.c \ + ecc-dup-jj.c ecc-add-jja.c ecc-add-jjj.c ecc-nonsec-add-jjj.c \ ecc-eh-to-a.c \ ecc-dup-eh.c ecc-add-eh.c ecc-add-ehh.c \ ecc-dup-th.c ecc-add-th.c ecc-add-thh.c \ @@ -218,7 +225,7 @@ hogweed_SOURCES = sexp.c sexp-format.c \ OPT_SOURCES = fat-arm.c fat-arm64.c fat-ppc.c fat-s390x.c fat-x86_64.c mini-gmp.c -HEADERS = aes.h arcfour.h arctwo.h asn1.h blowfish.h \ +HEADERS = aes.h arcfour.h arctwo.h asn1.h blowfish.h balloon.h \ base16.h base64.h bignum.h buffer.h camellia.h cast128.h \ cbc.h ccm.h cfb.h chacha.h chacha-poly1305.h ctr.h \ curve25519.h curve448.h des.h dsa.h dsa-compat.h eax.h \ @@ -226,15 +233,15 @@ HEADERS = aes.h arcfour.h arctwo.h asn1.h blowfish.h \ gcm.h gostdsa.h gosthash94.h hmac.h \ knuth-lfib.h hkdf.h \ macros.h \ - cmac.h siv-cmac.h \ + cmac.h siv-cmac.h siv-gcm.h \ md2.h md4.h \ md5.h md5-compat.h \ memops.h memxor.h \ nettle-meta.h nettle-types.h \ pbkdf2.h \ pgp.h pkcs1.h pss.h pss-mgf1.h realloc.h ripemd160.h rsa.h \ - salsa20.h sexp.h \ - serpent.h sha.h sha1.h sha2.h sha3.h sm3.h streebog.h twofish.h \ + salsa20.h sexp.h serpent.h \ + sha.h sha1.h sha2.h sha3.h sm3.h sm4.h streebog.h twofish.h \ umac.h yarrow.h xts.h poly1305.h nist-keywrap.h INSTALL_HEADERS = $(HEADERS) version.h @IF_MINI_GMP@ mini-gmp.h @@ -257,10 +264,11 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \ INSTALL NEWS ChangeLog \ nettle.pc.in hogweed.pc.in \ desdata.stamp $(des_headers) descore.README \ - aes-internal.h block-internal.h blowfish-internal.h camellia-internal.h \ + aes-internal.h block-internal.h blowfish-internal.h bswap-internal.h \ + camellia-internal.h \ ghash-internal.h gost28147-internal.h poly1305-internal.h \ serpent-internal.h cast128_sboxes.h desinfo.h desCode.h \ - ripemd160-internal.h sha2-internal.h \ + ripemd160-internal.h md-internal.h sha2-internal.h \ memxor-internal.h nettle-internal.h nettle-write.h \ ctr-internal.h chacha-internal.h sha3-internal.h \ salsa20-internal.h umac-internal.h hogweed-internal.h \ @@ -290,7 +298,7 @@ libhogweed.a: $(hogweed_OBJS) %.$(OBJEXT): %.asm $(srcdir)/m4-utils.m4 $(srcdir)/asm.m4 config.m4 machine.m4 $(M4) $(srcdir)/m4-utils.m4 $(srcdir)/asm.m4 config.m4 machine.m4 $< >$*.s - $(COMPILE) -c $*.s + $(COMPILE) $(ASM_FLAGS) -c $*.s %.$(OBJEXT): %.c $(COMPILE) -c $< \ @@ -302,7 +302,7 @@ AC_DEFUN([GMP_TRY_ASSEMBLE], [cat >conftest.s <<EOF [$1] EOF -gmp_assemble="$CC $CFLAGS $CPPFLAGS -c conftest.s >conftest.out 2>&1" +gmp_assemble="$CC $CFLAGS $CPPFLAGS $ASM_FLAGS -c conftest.s >conftest.out 2>&1" if AC_TRY_EVAL(gmp_assemble); then cat conftest.out >&AC_FD_CC ifelse([$2],,:,[$2]) @@ -563,7 +563,7 @@ dnl Determine whether the assembler takes powerpc registers with an "r" as dnl in "r6", or as plain "6". The latter is standard, but NeXT, Rhapsody, dnl and MacOS-X require the "r" forms. dnl -dnl See also mpn/powerpc32/powerpc-defs.m4 which uses the result of this +dnl See also powerpc64/machine.m4 which uses the result of this dnl test. AC_DEFUN([GMP_ASM_POWERPC_R_REGISTERS], diff --git a/arm/fat/sha256-compress-2.asm b/arm/fat/sha256-compress-n-2.asm index 36d55e4b..8834d93d 100644 --- a/arm/fat/sha256-compress-2.asm +++ b/arm/fat/sha256-compress-n-2.asm @@ -1,4 +1,4 @@ -C arm/fat/sha256-compress-2.asm +C arm/fat/sha256-compress-n-2.asm ifelse(` @@ -31,7 +31,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -dnl PROLOGUE(_nettle_sha256_compress) picked up by configure +dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure define(`fat_transform', `$1_armv6') -include_src(`arm/v6/sha256-compress.asm') +include_src(`arm/v6/sha256-compress-n.asm') diff --git a/arm/v6/sha256-compress.asm b/arm/v6/sha256-compress-n.asm index 3c021284..bf225bd8 100644 --- a/arm/v6/sha256-compress.asm +++ b/arm/v6/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C arm/v6/sha256-compress.asm +C arm/v6/sha256-compress-n.asm ifelse(` - Copyright (C) 2013 Niels Möller + Copyright (C) 2013, 2022 Niels Möller This file is part of GNU Nettle. @@ -30,13 +30,14 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') - .file "sha256-compress.asm" + .file "sha256-compress-n.asm" .arch armv6 define(`STATE', `r0') -define(`INPUT', `r1') -define(`K', `r2') -define(`SA', `r3') +define(`K', `r1') +define(`BLOCKS', `r2') +define(`INPUT', `r3') +define(`SA', `r2') C Overlap BLOCKS define(`SB', `r4') define(`SC', `r5') define(`SD', `r6') @@ -45,12 +46,12 @@ define(`SF', `r8') define(`SG', `r10') define(`SH', `r11') define(`T0', `r12') -define(`T1', `r1') C Overlap INPUT +define(`T1', `r3') C Overlap INPUT define(`COUNT', `r0') C Overlap STATE define(`W', `r14') -C Used for data load -define(`I0', `r3') +C Used for data load. Must not clobber STATE (r0), K (r1) or INPUT (r3) +define(`I0', `r2') define(`I1', `r4') define(`I2', `r5') define(`I3', `r6') @@ -88,7 +89,7 @@ C S1(E) = E<<<26 ^ E<<<21 ^ E<<<7 C S0(A) = A<<<30 ^ A<<<19 ^ A<<<10 C Choice (E, F, G) = G^(E&(F^G)) C Majority (A,B,C) = (A&B) + (C&(A^B)) - + define(`ROUND', ` ror T0, $5, #6 eor T0, T0, $5, ror #11 @@ -117,16 +118,31 @@ define(`NOEXPN', ` ldr W, [sp, + $1] add $1, $1, #4 ') - C void - C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) - .text .align 2 -PROLOGUE(_nettle_sha256_compress) - push {r4,r5,r6,r7,r8,r10,r11,r14} - sub sp, sp, #68 - str STATE, [sp, #+64] +define(`SHIFT_OFFSET', 64) +define(`INPUT_OFFSET', 68) +define(`I0_OFFSET', 72) +define(`STATE_OFFSET', 76) +define(`K_OFFSET', 80) +define(`BLOCKS_OFFSET', 84) + + C const uint8_t * + C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + C size_t blocks, const uint8_t *input) + +PROLOGUE(_nettle_sha256_compress_n) + cmp BLOCKS, #0 + bne .Lwork + + mov r0, INPUT + bx lr + +.Lwork: + C Also save STATE (r0), K (r1) and BLOCKS (r2) + push {r0,r1,r2,r4,r5,r6,r7,r8,r10,r11,r12,r14} + sub sp, sp, #STATE_OFFSET C Load data up front, since we don't have enough registers C to load and shift on-the-fly @@ -144,6 +160,9 @@ IF_BE(` lsr I1, T0, SHIFT') C because there is no rotate left IF_BE(` rsb SHIFT, SHIFT, #32') + str SHIFT, [sp, #SHIFT_OFFSET] + +.Loop_block: mov DST, sp mov ILEFT, #4 .Lcopy: @@ -164,7 +183,12 @@ IF_LE(` rev I3, I3') stm DST!, {I0,I1,I2,I3} mov I0, I4 bne .Lcopy - + + str INPUT, [sp, #INPUT_OFFSET] + str I0, [sp, #I0_OFFSET] + + C Process block, with input at sp, expanded on the fly + ldm STATE, {SA,SB,SC,SD,SE,SF,SG,SH} mov COUNT,#0 @@ -203,20 +227,40 @@ IF_LE(` rev I3, I3') EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA) bne .Loop2 - ldr STATE, [sp, #+64] + ldr STATE, [sp, #STATE_OFFSET] C No longer needed registers - ldm STATE, {r1,r2,r12,r14} - add SA, SA, r1 - add SB, SB, r2 - add SC, SC, r12 - add SD, SD, r14 + ldm STATE, {K, T1, T0, W} + add SA, SA, K + add SB, SB, T1 + add SC, SC, T0 + add SD, SD, W stm STATE!, {SA,SB,SC,SD} - ldm STATE, {r1,r2,r12,r14} - add SE, SE, r1 - add SF, SF, r2 - add SG, SG, r12 - add SH, SH, r14 - stm STATE!, {SE,SF,SG,SH} - add sp, sp, #68 - pop {r4,r5,r6,r7,r8,r10,r11,pc} -EPILOGUE(_nettle_sha256_compress) + ldm STATE, {K, T1, T0, W} + add SE, SE, K + add SF, SF, T1 + add SG, SG, T0 + add SH, SH, W + stm STATE, {SE,SF,SG,SH} + sub STATE, STATE, #16 + + ldr BLOCKS, [sp, #BLOCKS_OFFSET] + subs BLOCKS, BLOCKS, #1 + str BLOCKS, [sp, #BLOCKS_OFFSET] + + ldr SHIFT, [sp, #SHIFT_OFFSET] + ldr K, [sp, #K_OFFSET] + ldr INPUT, [sp, #INPUT_OFFSET] + ldr I0, [sp, #I0_OFFSET] + + bne .Loop_block + + C Restore input pointer adjustment +IF_BE(` rsbs SHIFT, SHIFT, #32') +IF_LE(` cmp SHIFT, #0') + subne INPUT, INPUT, #4 + orr r0, INPUT, SHIFT, lsr #3 + + C Discard saved STATE, K and BLOCKS. + add sp, sp, #STATE_OFFSET + 12 + pop {r4,r5,r6,r7,r8,r10,r11,r12,pc} +EPILOGUE(_nettle_sha256_compress_n) diff --git a/arm64/crypto/sha256-compress.asm b/arm64/crypto/sha256-compress-n.asm index 2bddea05..447dc590 100644 --- a/arm64/crypto/sha256-compress.asm +++ b/arm64/crypto/sha256-compress-n.asm @@ -1,4 +1,4 @@ -C arm64/crypto/sha256-compress.asm +C arm64/crypto/sha256-compress-n.asm ifelse(` Copyright (C) 2021 Mamone Tarsha @@ -37,7 +37,7 @@ C SHA256H2: SHA256 hash update (part 2) C SHA256SU0: SHA256 schedule update 0 C SHA256SU1: SHA256 schedule update 1 -.file "sha256-compress.asm" +.file "sha256-compress-n.asm" .arch armv8-a+crypto .text @@ -45,8 +45,9 @@ C SHA256SU1: SHA256 schedule update 1 C Register usage: define(`STATE', `x0') -define(`INPUT', `x1') -define(`K', `x2') +define(`K', `x1') +define(`BLOCKS', `x2') +define(`INPUT', `x3') define(`MSG0', `v0') define(`MSG1', `v1') @@ -59,19 +60,23 @@ define(`TMP', `v7') define(`STATE0_SAVED', `v16') define(`STATE1_SAVED', `v17') -C void -C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) +C const uint8_t * +C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, +C size_t blocks, const uint8_t *input) + +PROLOGUE(_nettle_sha256_compress_n) + cbz BLOCKS, .Lend -PROLOGUE(_nettle_sha256_compress) C Load state ld1 {STATE0.4s,STATE1.4s},[STATE] +.Loop: C Save state mov STATE0_SAVED.16b,STATE0.16b mov STATE1_SAVED.16b,STATE1.16b C Load message - ld1 {MSG0.16b,MSG1.16b,MSG2.16b,MSG3.16b},[INPUT] + ld1 {MSG0.16b,MSG1.16b,MSG2.16b,MSG3.16b},[INPUT],#64 C Reverse for little endian rev32 MSG0.16b,MSG0.16b @@ -217,9 +222,13 @@ PROLOGUE(_nettle_sha256_compress) C Combine state add STATE0.4s,STATE0.4s,STATE0_SAVED.4s add STATE1.4s,STATE1.4s,STATE1_SAVED.4s - + subs BLOCKS, BLOCKS, #1 + sub K, K, #240 + b.ne .Loop + C Store state st1 {STATE0.4s,STATE1.4s},[STATE] - +.Lend: + mov x0, INPUT ret -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) diff --git a/arm64/fat/sha256-compress-2.asm b/arm64/fat/sha256-compress-n-2.asm index 67590794..2f70686e 100644 --- a/arm64/fat/sha256-compress-2.asm +++ b/arm64/fat/sha256-compress-n-2.asm @@ -1,4 +1,4 @@ -C arm64/fat/sha256-compress-2.asm +C arm64/fat/sha256-compress-n-2.asm ifelse(` @@ -31,7 +31,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -dnl PROLOGUE(_nettle_sha256_compress) picked up by configure +dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure define(`fat_transform', `$1_arm64') -include_src(`arm64/crypto/sha256-compress.asm') +include_src(`arm64/crypto/sha256-compress-n.asm') diff --git a/balloon-sha1.c b/balloon-sha1.c new file mode 100644 index 00000000..71c86e1d --- /dev/null +++ b/balloon-sha1.c @@ -0,0 +1,55 @@ +/* balloon-sha1.c + + Balloon password-hashing algorithm. + + Copyright (C) 2022 Zoltan Fridrich + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "balloon.h" +#include "sha1.h" + +void +balloon_sha1(size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst) +{ + struct sha1_ctx ctx; + sha1_init(&ctx); + balloon(&ctx, + (nettle_hash_update_func*)sha1_update, + (nettle_hash_digest_func*)sha1_digest, + SHA1_DIGEST_SIZE, s_cost, t_cost, + passwd_length, passwd, salt_length, salt, scratch, dst); +} diff --git a/balloon-sha256.c b/balloon-sha256.c new file mode 100644 index 00000000..fe31a691 --- /dev/null +++ b/balloon-sha256.c @@ -0,0 +1,55 @@ +/* balloon-sha256.c + + Balloon password-hashing algorithm. + + Copyright (C) 2022 Zoltan Fridrich + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "balloon.h" +#include "sha2.h" + +void +balloon_sha256(size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst) +{ + struct sha256_ctx ctx; + sha256_init(&ctx); + balloon(&ctx, + (nettle_hash_update_func*)sha256_update, + (nettle_hash_digest_func*)sha256_digest, + SHA256_DIGEST_SIZE, s_cost, t_cost, + passwd_length, passwd, salt_length, salt, scratch, dst); +} diff --git a/balloon-sha384.c b/balloon-sha384.c new file mode 100644 index 00000000..68294496 --- /dev/null +++ b/balloon-sha384.c @@ -0,0 +1,55 @@ +/* balloon-sha384.c + + Balloon password-hashing algorithm. + + Copyright (C) 2022 Zoltan Fridrich + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "balloon.h" +#include "sha2.h" + +void +balloon_sha384(size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst) +{ + struct sha384_ctx ctx; + sha384_init(&ctx); + balloon(&ctx, + (nettle_hash_update_func*)sha384_update, + (nettle_hash_digest_func*)sha384_digest, + SHA384_DIGEST_SIZE, s_cost, t_cost, + passwd_length, passwd, salt_length, salt, scratch, dst); +} diff --git a/balloon-sha512.c b/balloon-sha512.c new file mode 100644 index 00000000..f19f8aa0 --- /dev/null +++ b/balloon-sha512.c @@ -0,0 +1,55 @@ +/* balloon-sha512.c + + Balloon password-hashing algorithm. + + Copyright (C) 2022 Zoltan Fridrich + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "balloon.h" +#include "sha2.h" + +void +balloon_sha512(size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst) +{ + struct sha512_ctx ctx; + sha512_init(&ctx); + balloon(&ctx, + (nettle_hash_update_func*)sha512_update, + (nettle_hash_digest_func*)sha512_digest, + SHA512_DIGEST_SIZE, s_cost, t_cost, + passwd_length, passwd, salt_length, salt, scratch, dst); +} diff --git a/balloon.c b/balloon.c new file mode 100644 index 00000000..c744160a --- /dev/null +++ b/balloon.c @@ -0,0 +1,149 @@ +/* balloon.c + + Balloon password-hashing algorithm. + + Copyright (C) 2022 Zoltan Fridrich + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* For a description of the algorithm, see: + * Boneh, D., Corrigan-Gibbs, H., Schechter, S. (2017, May 12). Balloon Hashing: + * A Memory-Hard Function Providing Provable Protection Against Sequential Attacks. + * Retrieved Sep 1, 2022, from https://eprint.iacr.org/2016/027.pdf + */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <string.h> + +#include "balloon.h" +#include "macros.h" + +#define DELTA 3 + +static void +hash(void *ctx, + nettle_hash_update_func *update, + nettle_hash_digest_func *digest, + size_t digest_size, + uint64_t cnt, + size_t a_len, const uint8_t *a, + size_t b_len, const uint8_t *b, + uint8_t *dst) +{ + uint8_t tmp[8]; + LE_WRITE_UINT64(tmp, cnt); + update(ctx, sizeof(tmp), tmp); + if (a && a_len) + update(ctx, a_len, a); + if (b && b_len) + update(ctx, b_len, b); + digest(ctx, digest_size, dst); +} + +static void +hash_ints(void *ctx, + nettle_hash_update_func *update, + nettle_hash_digest_func *digest, + size_t digest_size, + uint64_t i, uint64_t j, uint64_t k, + uint8_t *dst) +{ + uint8_t tmp[24]; + LE_WRITE_UINT64(tmp, i); + LE_WRITE_UINT64(tmp + 8, j); + LE_WRITE_UINT64(tmp + 16, k); + update(ctx, sizeof(tmp), tmp); + digest(ctx, digest_size, dst); +} + +/* Takes length bytes long big number stored + * in little endian format and computes modulus + */ +static size_t +block_to_int(size_t length, const uint8_t *block, size_t mod) +{ + size_t i = length, r = 0; + while (i--) + { + r = (r << 8) + block[i]; + r %= mod; + } + return r; +} + +void +balloon(void *hash_ctx, + nettle_hash_update_func *update, + nettle_hash_digest_func *digest, + size_t digest_size, size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst) +{ + const size_t BS = digest_size; + uint8_t *block = scratch; + uint8_t *buf = scratch + BS; + size_t i, j, k, cnt = 0; + + hash(hash_ctx, update, digest, digest_size, + cnt++, passwd_length, passwd, salt_length, salt, buf); + for (i = 1; i < s_cost; ++i) + hash(hash_ctx, update, digest, digest_size, + cnt++, BS, buf + (i - 1) * BS, 0, NULL, buf + i * BS); + + for (i = 0; i < t_cost; ++i) + { + for (j = 0; j < s_cost; ++j) + { + hash(hash_ctx, update, digest, digest_size, + cnt++, BS, buf + (j ? j - 1 : s_cost - 1) * BS, + BS, buf + j * BS, buf + j * BS); + for (k = 0; k < DELTA; ++k) + { + hash_ints(hash_ctx, update, digest, digest_size, i, j, k, block); + hash(hash_ctx, update, digest, digest_size, + cnt++, salt_length, salt, BS, block, block); + hash(hash_ctx, update, digest, digest_size, + cnt++, BS, buf + j * BS, + BS, buf + block_to_int(BS, block, s_cost) * BS, + buf + j * BS); + } + } + } + memcpy(dst, buf + (s_cost - 1) * BS, BS); +} + +size_t +balloon_itch(size_t digest_size, size_t s_cost) +{ + return (s_cost + 1) * digest_size; +} diff --git a/balloon.h b/balloon.h new file mode 100644 index 00000000..9c021925 --- /dev/null +++ b/balloon.h @@ -0,0 +1,98 @@ +/* balloon.h + + Balloon password-hashing algorithm. + + Copyright (C) 2022 Zoltan Fridrich + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* For a description of the algorithm, see: + * Boneh, D., Corrigan-Gibbs, H., Schechter, S. (2017, May 12). Balloon Hashing: + * A Memory-Hard Function Providing Provable Protection Against Sequential Attacks. + * Retrieved Sep 1, 2022, from https://eprint.iacr.org/2016/027.pdf + */ + +#ifndef NETTLE_BALLOON_H_INCLUDED +#define NETTLE_BALLOON_H_INCLUDED + +#include "nettle-types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Name mangling */ +#define balloon nettle_balloon +#define balloon_itch nettle_balloon_itch +#define balloon_sha1 nettle_balloon_sha1 +#define balloon_sha256 nettle_balloon_sha256 +#define balloon_sha384 nettle_balloon_sha384 +#define balloon_sha512 nettle_balloon_sha512 + +void +balloon(void *hash_ctx, + nettle_hash_update_func *update, + nettle_hash_digest_func *digest, + size_t digest_size, size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst); + +size_t +balloon_itch(size_t digest_size, size_t s_cost); + +void +balloon_sha1(size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst); + +void +balloon_sha256(size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst); + +void +balloon_sha384(size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst); + +void +balloon_sha512(size_t s_cost, size_t t_cost, + size_t passwd_length, const uint8_t *passwd, + size_t salt_length, const uint8_t *salt, + uint8_t *scratch, uint8_t *dst); + +#ifdef __cplusplus +} +#endif + +#endif /* NETTLE_BALLOON_H_INCLUDED */ diff --git a/block-internal.h b/block-internal.h index d7b0c315..e9c26ff6 100644 --- a/block-internal.h +++ b/block-internal.h @@ -40,6 +40,7 @@ #include <assert.h> #include "nettle-types.h" +#include "bswap-internal.h" #include "memxor.h" static inline void @@ -197,4 +198,15 @@ block16_mulx_ghash (union nettle_block16 *r, } #endif /* ! WORDS_BIGENDIAN */ +/* Reverse bytes in X and store the result in R. This supports + in-place operation (R and X can overlap). */ +static inline void +block16_bswap (union nettle_block16 *r, + const union nettle_block16 *x) +{ + uint64_t t = nettle_bswap64 (x->u64[0]); + r->u64[0] = nettle_bswap64 (x->u64[1]); + r->u64[1] = t; +} + #endif /* NETTLE_BLOCK_INTERNAL_H_INCLUDED */ diff --git a/blowfish-bcrypt.c b/blowfish-bcrypt.c index 800d1468..08b1e32e 100644 --- a/blowfish-bcrypt.c +++ b/blowfish-bcrypt.c @@ -42,7 +42,7 @@ #include "blowfish.h" #include "blowfish-internal.h" #include "base64.h" - +#include "bswap-internal.h" #include "macros.h" #define CRYPTPLEN 7 @@ -149,19 +149,16 @@ static uint32_t magic_w[6] = { 0x64657253, 0x63727944, 0x6F756274 }; -/* conflicts with OpenBSD's swap32 macro */ -#undef swap32 - -static void swap32(uint32_t *x, int count) +#if WORDS_BIGENDIAN +#define bswap32_if_le(x, n) +#else +static void bswap32_if_le (uint32_t *x, unsigned n) { -#if !WORDS_BIGENDIAN - do { - uint32_t tmp = *x; - tmp = (tmp << 16) | (tmp >> 16); - *x++ = ((tmp & 0x00FF00FF) << 8) | ((tmp >> 8) & 0x00FF00FF); - } while (--count); -#endif + unsigned i; + for (i = 0; i < n; i++) + x[i] = nettle_bswap32 (x[i]); } +#endif static void set_xkey(size_t lenkey, const uint8_t *key, bf_key expanded, bf_key initial, @@ -343,7 +340,7 @@ static int ibcrypt(uint8_t *dst, else if (lenscheme < HASHOFFSET) return 0; memcpy(psalt, data.binary.salt, BLOWFISH_BCRYPT_BINSALT_SIZE); - swap32(data.binary.salt, 4); + bswap32_if_le (data.binary.salt, 4); if (log2rounds < minlog2rounds || log2rounds > 31) return 0; @@ -448,7 +445,7 @@ static int ibcrypt(uint8_t *dst, dst = (uint8_t*) encode_radix64((char*) dst, BLOWFISH_BCRYPT_BINSALT_SIZE, psalt) - 1; - swap32(data.binary.output, 6); + bswap32_if_le (data.binary.output, 6); /* This has to be bug-compatible with the original implementation, so only encode 23 of the 24 bytes. */ encode_radix64((char*) dst, 23, (uint8_t *) data.binary.output); diff --git a/bswap-internal.h b/bswap-internal.h new file mode 100644 index 00000000..b9923f99 --- /dev/null +++ b/bswap-internal.h @@ -0,0 +1,77 @@ +/* bswap-internal.h + + Copyright (C) 2022 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#ifndef NETTLE_BSWAP_INTERNAL_H_INCLUDED +#define NETTLE_BSWAP_INTERNAL_H_INCLUDED + +#include "nettle-types.h" + +/* Note that these definitions depend config.h, which should be + included first. */ + +#if HAVE_BUILTIN_BSWAP64 +#define nettle_bswap64 __builtin_bswap64 +/* Assume bswap32 is also available. */ +#define nettle_bswap32 __builtin_bswap32 +#else +static inline uint64_t +nettle_bswap64 (uint64_t x) +{ + x = (x >> 32) | (x << 32); + x = ((x >> 16) & UINT64_C (0xffff0000ffff)) + | ((x & UINT64_C (0xffff0000ffff)) << 16); + x = ((x >> 8) & UINT64_C (0xff00ff00ff00ff)) + | ((x & UINT64_C (0xff00ff00ff00ff)) << 8); + return x; +} + +static inline uint32_t +nettle_bswap32 (uint32_t x) +{ + x = (x << 16) | (x >> 16); + x = ((x & 0x00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF); + return x; +} +#endif + +#if WORDS_BIGENDIAN +#define bswap64_if_le(x) (x) +#else +#define bswap64_if_le nettle_bswap64 +#endif + +#if WORDS_BIGENDIAN +#define bswap64_if_be nettle_bswap64 +#else +#define bswap64_if_be(x) (x) +#endif + +#endif /* NETTLE_BSWAP_INTERNAL_H_INCLUDED */ diff --git a/chacha-poly1305.c b/chacha-poly1305.c index 7a423e1e..ea8b2952 100644 --- a/chacha-poly1305.c +++ b/chacha-poly1305.c @@ -97,7 +97,8 @@ static void poly1305_update (struct chacha_poly1305_ctx *ctx, size_t length, const uint8_t *data) { - MD_UPDATE (ctx, length, data, COMPRESS, (void) 0); + ctx->index = _nettle_poly1305_update (&(ctx)->poly1305, + ctx->block, ctx->index, length, data); } static void diff --git a/config.make.in b/config.make.in index f8e1f74e..6aec7c73 100644 --- a/config.make.in +++ b/config.make.in @@ -8,6 +8,7 @@ CCPIC = @CCPIC@ CPPFLAGS = @CPPFLAGS@ DEFS = @DEFS@ LDFLAGS = @LDFLAGS@ +ASM_FLAGS = @ASM_FLAGS@ LIBS = @LIBS@ LIBOBJS = @LIBOBJS@ EMULATOR = @EMULATOR@ diff --git a/configure.ac b/configure.ac index 73ce5764..92536fb0 100644 --- a/configure.ac +++ b/configure.ac @@ -4,7 +4,7 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT([nettle], [3.8], [nettle-bugs@lists.lysator.liu.se]) AC_PREREQ(2.61) -AC_CONFIG_SRCDIR([arcfour.c]) +AC_CONFIG_SRCDIR([nettle-types.h]) # Needed to stop autoconf from looking for files in parent directories. AC_CONFIG_AUX_DIR([.]) @@ -121,6 +121,8 @@ AC_ARG_ENABLE(mini-gmp, AC_HELP_STRING([--enable-mini-gmp], [Enable mini-gmp, used instead of libgmp.]),, [enable_mini_gmp=no]) +AC_ARG_VAR(ASM_FLAGS, [Extra flags for processing assembly source files]) + if test "x$enable_mini_gmp" = xyes ; then NETTLE_USE_MINI_GMP=1 HOGWEED_EXTRA_SYMBOLS="mpz_*;gmp_*;mpn_*;mp_*;" @@ -345,7 +347,7 @@ case "$host_cpu" in ABI=64 ]) ;; - *mips*) + *mips64*) AC_TRY_COMPILE([ #if defined(__mips64) || defined(__mips64__) || (defined(__sgi) && defined(__LP64__)) #error 64-bit mips @@ -598,7 +600,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \ chacha-core-internal.asm \ salsa20-crypt.asm salsa20-core-internal.asm \ serpent-encrypt.asm serpent-decrypt.asm \ - sha1-compress.asm sha256-compress.asm sha512-compress.asm \ + sha1-compress.asm sha256-compress-n.asm sha512-compress.asm \ sha3-permute.asm umac-nh.asm umac-nh-n.asm machine.m4" # Assembler files which generate additional object files if they are used. @@ -612,10 +614,10 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \ aes256-encrypt-2.asm aes256-decrypt-2.asm \ cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \ chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \ - poly1305-internal-2.asm \ + poly1305-blocks.asm poly1305-internal-2.asm \ ghash-set-key-2.asm ghash-update-2.asm \ salsa20-2core.asm salsa20-core-internal-2.asm \ - sha1-compress-2.asm sha256-compress-2.asm \ + sha1-compress-2.asm sha256-compress-n-2.asm \ sha3-permute-2.asm sha512-compress-2.asm \ umac-nh-n-2.asm umac-nh-2.asm" @@ -762,13 +764,15 @@ AH_VERBATIM([HAVE_NATIVE], #undef HAVE_NATIVE_poly1305_set_key #undef HAVE_NATIVE_poly1305_block #undef HAVE_NATIVE_poly1305_digest +#undef HAVE_NATIVE_poly1305_blocks +#undef HAVE_NATIVE_fat_poly1305_blocks #undef HAVE_NATIVE_ghash_set_key #undef HAVE_NATIVE_ghash_update #undef HAVE_NATIVE_salsa20_core #undef HAVE_NATIVE_salsa20_2core #undef HAVE_NATIVE_fat_salsa20_2core #undef HAVE_NATIVE_sha1_compress -#undef HAVE_NATIVE_sha256_compress +#undef HAVE_NATIVE_sha256_compress_n #undef HAVE_NATIVE_sha512_compress #undef HAVE_NATIVE_sha3_permute #undef HAVE_NATIVE_umac_nh diff --git a/ecc-curve25519.c b/ecc-curve25519.c index 56abcf23..539bff22 100644 --- a/ecc-curve25519.c +++ b/ecc-curve25519.c @@ -266,6 +266,7 @@ const struct ecc_curve _nettle_curve25519 = ecc_p, ecc_Bmodp, ecc_Bmodp_shifted, + ecc_Bm2p, NULL, ecc_pp1h, @@ -287,6 +288,7 @@ const struct ecc_curve _nettle_curve25519 = ecc_q, ecc_Bmodq, ecc_mBmodq_shifted, /* Use q - 2^{252} instead. */ + ecc_Bm2q, NULL, ecc_qp1h, diff --git a/ecc-curve448.c b/ecc-curve448.c index 1bd4e11f..daef56cc 100644 --- a/ecc-curve448.c +++ b/ecc-curve448.c @@ -220,6 +220,7 @@ const struct ecc_curve _nettle_curve448 = ecc_p, ecc_Bmodp, ecc_Bmodp_shifted, + ecc_Bm2p, NULL, ecc_pp1h, @@ -241,6 +242,7 @@ const struct ecc_curve _nettle_curve448 = ecc_q, ecc_Bmodq, ecc_Bmodq_shifted, + ecc_Bm2q, NULL, ecc_qp1h, diff --git a/ecc-ecdsa-sign.c b/ecc-ecdsa-sign.c index 4adee1d1..6a41c14c 100644 --- a/ecc-ecdsa-sign.c +++ b/ecc-ecdsa-sign.c @@ -46,9 +46,9 @@ mp_size_t ecc_ecdsa_sign_itch (const struct ecc_curve *ecc) { - /* Needs 3*ecc->p.size + scratch for ecc->mul_g. Currently same for - ecc_mul_g. */ - assert (ecc->p.size + ecc->p.invert_itch <= 3*ecc->p.size + ecc->mul_g_itch); + /* Needs 3*ecc->p.size + scratch for ecc_mul_g. */ + assert (ecc->p.size + ecc->p.invert_itch + <= 3*ecc->p.size + ECC_MUL_G_ITCH (ecc->p.size)); return ECC_ECDSA_SIGN_ITCH (ecc->p.size); } @@ -79,9 +79,9 @@ ecc_ecdsa_sign (const struct ecc_curve *ecc, 4. s2 <-- (h + z*s1)/k mod q. */ - ecc->mul_g (ecc, P, kp, P + 3*ecc->p.size); + ecc_mul_g (ecc, P, kp, P + 3*ecc->p.size); /* x coordinate only, modulo q */ - ecc->h_to_a (ecc, 2, rp, P, P + 3*ecc->p.size); + ecc_j_to_a (ecc, 2, rp, P, P + 3*ecc->p.size); /* Invert k, uses up to 7 * ecc->p.size including scratch (for secp384). */ ecc->q.invert (&ecc->q, kinv, kp, tp); diff --git a/ecc-ecdsa-verify.c b/ecc-ecdsa-verify.c index f3b112b0..9e324ea2 100644 --- a/ecc-ecdsa-verify.c +++ b/ecc-ecdsa-verify.c @@ -53,8 +53,8 @@ ecdsa_in_range (const struct ecc_curve *ecc, const mp_limb_t *xp) mp_size_t ecc_ecdsa_verify_itch (const struct ecc_curve *ecc) { - /* Largest storage need is for the ecc->mul call. */ - return 5*ecc->p.size + ecc->mul_itch; + /* Largest storage need is for the ecc_mul_a call. */ + return 5*ecc->p.size + ECC_MUL_A_ITCH (ecc->p.size); } /* FIXME: Use faster primitives, not requiring side-channel silence. */ @@ -107,35 +107,23 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc, /* u2 = r / s, P2 = u2 * Y */ ecc_mod_mul_canonical (&ecc->q, u2, rp, sinv, u2); - /* Total storage: 5*ecc->p.size + ecc->mul_itch */ - ecc->mul (ecc, P2, u2, pp, u2 + ecc->p.size); + /* Total storage: 5*ecc->p.size + ECC_MUL_A_ITCH */ + ecc_mul_a (ecc, P2, u2, pp, u2 + ecc->p.size); /* u = 0 can happen only if h = 0 or h = q, which is extremely unlikely. */ if (!mpn_zero_p (u1, ecc->p.size)) { - /* Total storage: 7*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */ - ecc->mul_g (ecc, P1, u1, P1 + 3*ecc->p.size); - - /* NOTE: ecc_add_jjj and/or ecc_j_to_a will produce garbage in - case u1 G = +/- u2 V. However, anyone who gets his or her - hands on a signature where this happens during verification, - can also get the private key as z = +/- u1 / u_2 (mod q). And - then it doesn't matter very much if verification of - signatures with that key succeeds or fails. - - u1 G = - u2 V can never happen for a correctly generated - signature, since it implies k = 0. - - u1 G = u2 V is possible, if we are unlucky enough to get h / - s_1 = z. Hitting that is about as unlikely as finding the - private key by guessing. - */ - /* Total storage: 6*ecc->p.size + ecc->add_hhh_itch */ - ecc->add_hhh (ecc, P2, P2, P1, P1 + 3*ecc->p.size); + /* Total storage: 7*ecc->p.size + ECC_MUL_G_ITCH */ + ecc_mul_g (ecc, P1, u1, P1 + 3*ecc->p.size); + + /* Total storage: 6*ecc->p.size + ECC_ADD_JJJ_ITCH */ + if (!ecc_nonsec_add_jjj (ecc, P2, P2, P1, P1 + 3*ecc->p.size)) + /* Infinity point, not a valid signature. */ + return 0; } /* x coordinate only, modulo q */ - ecc->h_to_a (ecc, 2, P1, P2, P1 + 3*ecc->p.size); + ecc_j_to_a (ecc, 2, P1, P2, P1 + 3*ecc->p.size); return (mpn_cmp (rp, P1, ecc->p.size) == 0); #undef P2 diff --git a/ecc-gost-gc256b.c b/ecc-gost-gc256b.c index 0cf753e4..df9cbb58 100644 --- a/ecc-gost-gc256b.c +++ b/ecc-gost-gc256b.c @@ -71,6 +71,7 @@ const struct ecc_curve _nettle_gost_gc256b = ecc_p, ecc_Bmodp, ecc_Bmodp_shifted, + ecc_Bm2p, ecc_redc_ppm1, ecc_pp1h, @@ -92,6 +93,7 @@ const struct ecc_curve _nettle_gost_gc256b = ecc_q, ecc_Bmodq, ecc_Bmodq_shifted, + ecc_Bm2q, NULL, ecc_qp1h, diff --git a/ecc-gost-gc512a.c b/ecc-gost-gc512a.c index 338ed001..3807b57e 100644 --- a/ecc-gost-gc512a.c +++ b/ecc-gost-gc512a.c @@ -71,6 +71,7 @@ const struct ecc_curve _nettle_gost_gc512a = ecc_p, ecc_Bmodp, ecc_Bmodp_shifted, + ecc_Bm2p, ecc_redc_ppm1, ecc_pp1h, @@ -92,6 +93,7 @@ const struct ecc_curve _nettle_gost_gc512a = ecc_q, ecc_Bmodq, ecc_Bmodq_shifted, + ecc_Bm2q, NULL, ecc_qp1h, diff --git a/ecc-gostdsa-sign.c b/ecc-gostdsa-sign.c index c924122c..491a2281 100644 --- a/ecc-gostdsa-sign.c +++ b/ecc-gostdsa-sign.c @@ -45,8 +45,7 @@ mp_size_t ecc_gostdsa_sign_itch (const struct ecc_curve *ecc) { - /* Needs 3*ecc->p.size + scratch for ecc->mul_g. Currently same for - ecc_mul_g. */ + /* Needs 3*ecc->p.size + scratch for ecc_mul_g. */ return ECC_GOSTDSA_SIGN_ITCH (ecc->p.size); } @@ -75,9 +74,9 @@ ecc_gostdsa_sign (const struct ecc_curve *ecc, 4. s <-- (r*z + k*h) mod q. */ - ecc->mul_g (ecc, P, kp, P + 3*ecc->p.size); + ecc_mul_g (ecc, P, kp, P + 3*ecc->p.size); /* x coordinate only, modulo q */ - ecc->h_to_a (ecc, 2, rp, P, P + 3*ecc->p.size); + ecc_j_to_a (ecc, 2, rp, P, P + 3*ecc->p.size); /* Process hash digest */ gost_hash (&ecc->q, hp, length, digest); diff --git a/ecc-gostdsa-verify.c b/ecc-gostdsa-verify.c index fcdd4644..0570af7e 100644 --- a/ecc-gostdsa-verify.c +++ b/ecc-gostdsa-verify.c @@ -52,8 +52,8 @@ ecdsa_in_range (const struct ecc_curve *ecc, const mp_limb_t *xp) mp_size_t ecc_gostdsa_verify_itch (const struct ecc_curve *ecc) { - /* Largest storage need is for the ecc->mul call. */ - return 5*ecc->p.size + ecc->mul_itch; + /* Largest storage need is for the ecc_mul_a call. */ + return 5*ecc->p.size + ECC_MUL_A_ITCH (ecc->p.size); } /* FIXME: Use faster primitives, not requiring side-channel silence. */ @@ -108,17 +108,18 @@ ecc_gostdsa_verify (const struct ecc_curve *ecc, mpn_sub_n (hp, ecc->q.m, rp, ecc->p.size); ecc_mod_mul_canonical (&ecc->q, z2, hp, vp, z2); - /* Total storage: 5*ecc->p.size + ecc->mul_itch */ - ecc->mul (ecc, P2, z2, pp, z2 + ecc->p.size); + /* Total storage: 5*ecc->p.size + ECC_MUL_A_ITCH */ + ecc_mul_a (ecc, P2, z2, pp, z2 + ecc->p.size); - /* Total storage: 7*ecc->p.size + ecc->mul_g_itch (ecc->p.size) */ - ecc->mul_g (ecc, P1, z1, P1 + 3*ecc->p.size); + /* Total storage: 7*ecc->p.size + ECC_MUL_G_ITCH */ + ecc_mul_g (ecc, P1, z1, P1 + 3*ecc->p.size); - /* Total storage: 6*ecc->p.size + ecc->add_hhh_itch */ - ecc->add_hhh (ecc, P1, P1, P2, P1 + 3*ecc->p.size); + /* Total storage: 6*ecc->p.size + ECC_ADD_JJJ_ITCH */ + if (!ecc_nonsec_add_jjj (ecc, P1, P1, P2, P1 + 3*ecc->p.size)) + return 0; /* x coordinate only, modulo q */ - ecc->h_to_a (ecc, 2, P2, P1, P1 + 3*ecc->p.size); + ecc_j_to_a (ecc, 2, P2, P1, P1 + 3*ecc->p.size); return (mpn_cmp (rp, P2, ecc->p.size) == 0); #undef P2 diff --git a/ecc-internal.h b/ecc-internal.h index 2ea553b5..be02de5f 100644 --- a/ecc-internal.h +++ b/ecc-internal.h @@ -66,6 +66,7 @@ #define ecc_dup_jj _nettle_ecc_dup_jj #define ecc_add_jja _nettle_ecc_add_jja #define ecc_add_jjj _nettle_ecc_add_jjj +#define ecc_nonsec_add_jjj _nettle_ecc_nonsec_add_jjj #define ecc_dup_eh _nettle_ecc_dup_eh #define ecc_add_eh _nettle_ecc_add_eh #define ecc_add_ehh _nettle_ecc_add_ehh @@ -80,7 +81,6 @@ #define cnd_copy _nettle_cnd_copy #define sec_add_1 _nettle_sec_add_1 #define sec_sub_1 _nettle_sec_sub_1 -#define sec_tabselect _nettle_sec_tabselect #define sec_modinv _nettle_sec_modinv #define curve25519_eh_to_x _nettle_curve25519_eh_to_x #define curve448_eh_to_x _nettle_curve448_eh_to_x @@ -174,8 +174,14 @@ struct ecc_modulo /* B^size mod m. Expected to have at least 32 leading zeros (equality for secp_256r1). */ const mp_limb_t *B; - /* 2^{bit_size} - m, same value as above, but shifted. */ + /* 2^{bit_size} - m. When different from B above, for numbers of + interest, usually B has trailing zeros and this is B shifted + right. */ const mp_limb_t *B_shifted; + /* For ecc_mod_sub: B^size - 2m, if that doesn't underflow. + Otherwise, same as B */ + const mp_limb_t *Bm2m; + /* m +/- 1, for redc, excluding redc_size low limbs. */ const mp_limb_t *redc_mpm1; /* (m+1)/2 */ @@ -258,6 +264,8 @@ ecc_mod_equal_p (const struct ecc_modulo *m, const mp_limb_t *a, void ecc_mod_add (const struct ecc_modulo *m, mp_limb_t *rp, const mp_limb_t *ap, const mp_limb_t *bp); + +/* If inputs are in the range 0 <= a, b < 2m, then so is the output. */ void ecc_mod_sub (const struct ecc_modulo *m, mp_limb_t *rp, const mp_limb_t *ap, const mp_limb_t *bp); @@ -382,6 +390,14 @@ ecc_add_jjj (const struct ecc_curve *ecc, mp_limb_t *r, const mp_limb_t *p, const mp_limb_t *q, mp_limb_t *scratch); +/* Variant that handles the checks for the special cases P = ±Q. + Returns 1 on success, 0 if result is infinite. Not side-channel + silent, so must not be used with secret inputs. */ +int +ecc_nonsec_add_jjj (const struct ecc_curve *ecc, + mp_limb_t *r, const mp_limb_t *p, const mp_limb_t *q, + mp_limb_t *scratch); + /* Point doubling on a twisted Edwards curve, with homogeneous cooordinates. */ void @@ -458,11 +474,6 @@ mp_limb_t sec_sub_1 (mp_limb_t *rp, mp_limb_t *ap, mp_size_t n, mp_limb_t b); void -sec_tabselect (mp_limb_t *rp, mp_size_t rn, - const mp_limb_t *table, unsigned tn, - unsigned k); - -void curve25519_eh_to_x (mp_limb_t *xp, const mp_limb_t *p, mp_limb_t *scratch); diff --git a/ecc-mod-arith.c b/ecc-mod-arith.c index 310cbb1d..d0137864 100644 --- a/ecc-mod-arith.c +++ b/ecc-mod-arith.c @@ -85,7 +85,20 @@ ecc_mod_sub (const struct ecc_modulo *m, mp_limb_t *rp, { mp_limb_t cy; cy = mpn_sub_n (rp, ap, bp, m->size); - cy = mpn_cnd_sub_n (cy, rp, rp, m->B, m->size); + /* The adjustments for this function work differently depending on + the value of the most significant bit of m. + + If m has a most significant bit of zero, then the first + adjustment step conditionally adds 2m. If in addition, inputs are + in the 0 <= a,b < 2m range, then the first adjustment guarantees + that result is in that same range. The second adjustment step is + needed only if b > 2m, it then ensures output is correct modulo + m, but nothing more. + + If m has a most significant bit of one, Bm2m and B are the same, + and this function works analogously to ecc_mod_add. + */ + cy = mpn_cnd_sub_n (cy, rp, rp, m->Bm2m, m->size); cy = mpn_cnd_sub_n (cy, rp, rp, m->B, m->size); assert (cy == 0); } diff --git a/ecc-mul-a-eh.c b/ecc-mul-a-eh.c index 1eb3efcc..980fec3f 100644 --- a/ecc-mul-a-eh.c +++ b/ecc-mul-a-eh.c @@ -140,7 +140,7 @@ ecc_mul_a_eh (const struct ecc_curve *ecc, assert (bits < TABLE_SIZE); - sec_tabselect (r, 3*ecc->p.size, table, TABLE_SIZE, bits); + mpn_sec_tabselect (r, table, 3*ecc->p.size, TABLE_SIZE, bits); for (;;) { @@ -166,7 +166,7 @@ ecc_mul_a_eh (const struct ecc_curve *ecc, ecc->dup (ecc, r, r, scratch_out); bits &= TABLE_MASK; - sec_tabselect (tp, 3*ecc->p.size, table, TABLE_SIZE, bits); + mpn_sec_tabselect (tp, table, 3*ecc->p.size, TABLE_SIZE, bits); ecc->add_hhh (ecc, r, r, tp, scratch_out); } #undef table diff --git a/ecc-mul-a.c b/ecc-mul-a.c index cb9c7d41..8e1355eb 100644 --- a/ecc-mul-a.c +++ b/ecc-mul-a.c @@ -144,7 +144,7 @@ ecc_mul_a (const struct ecc_curve *ecc, assert (bits < TABLE_SIZE); - sec_tabselect (r, 3*ecc->p.size, table, TABLE_SIZE, bits); + mpn_sec_tabselect (r, table, 3*ecc->p.size, TABLE_SIZE, bits); is_zero = (bits == 0); for (;;) @@ -171,7 +171,7 @@ ecc_mul_a (const struct ecc_curve *ecc, ecc_dup_jj (ecc, r, r, scratch_out); bits &= TABLE_MASK; - sec_tabselect (tp, 3*ecc->p.size, table, TABLE_SIZE, bits); + mpn_sec_tabselect (tp, table, 3*ecc->p.size, TABLE_SIZE, bits); cnd_copy (is_zero, r, tp, 3*ecc->p.size); ecc_add_jjj (ecc, tp, tp, r, scratch_out); diff --git a/ecc-mul-g-eh.c b/ecc-mul-g-eh.c index 8b3ca8f8..57df1c6d 100644 --- a/ecc-mul-g-eh.c +++ b/ecc-mul-g-eh.c @@ -88,10 +88,10 @@ ecc_mul_g_eh (const struct ecc_curve *ecc, mp_limb_t *r, shift = bit_index % GMP_NUMB_BITS; bits = (bits << 1) | ((np[limb_index] >> shift) & 1); } - sec_tabselect (tp, 2*ecc->p.size, - (ecc->pippenger_table - + (2*ecc->p.size * (mp_size_t) j << c)), - 1<<c, bits); + mpn_sec_tabselect (tp, + (ecc->pippenger_table + + (2*ecc->p.size * (mp_size_t) j << c)), + 2*ecc->p.size, 1<<c, bits); ecc->add_hh (ecc, r, r, tp, scratch_out); } diff --git a/ecc-mul-g.c b/ecc-mul-g.c index dcc7c3ea..677a37e7 100644 --- a/ecc-mul-g.c +++ b/ecc-mul-g.c @@ -88,10 +88,10 @@ ecc_mul_g (const struct ecc_curve *ecc, mp_limb_t *r, shift = bit_index % GMP_NUMB_BITS; bits = (bits << 1) | ((np[limb_index] >> shift) & 1); } - sec_tabselect (tp, 2*ecc->p.size, - (ecc->pippenger_table - + (2*ecc->p.size * (mp_size_t) j << c)), - 1<<c, bits); + mpn_sec_tabselect (tp, + (ecc->pippenger_table + + (2*ecc->p.size * (mp_size_t) j << c)), + 2*ecc->p.size, 1<<c, bits); cnd_copy (is_zero, r, tp, 2*ecc->p.size); cnd_copy (is_zero, r + 2*ecc->p.size, ecc->unit, ecc->p.size); diff --git a/ecc-nonsec-add-jjj.c b/ecc-nonsec-add-jjj.c new file mode 100644 index 00000000..439c0a52 --- /dev/null +++ b/ecc-nonsec-add-jjj.c @@ -0,0 +1,162 @@ +/* ecc-non-sec-add-jjj.c + + Copyright (C) 2013, 2022 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ecc.h" +#include "ecc-internal.h" + +/* Similar to ecc_add_jjj, but checks if x coordinates are equal (H = + 0) below, and if so, performs doubling if also y coordinates are + equal, or returns 0 (failure) indicating that the result is the + infinity point. */ +int +ecc_nonsec_add_jjj (const struct ecc_curve *ecc, + mp_limb_t *r, const mp_limb_t *p, const mp_limb_t *q, + mp_limb_t *scratch) +{ +#define x1 p +#define y1 (p + ecc->p.size) +#define z1 (p + 2*ecc->p.size) + +#define x2 q +#define y2 (q + ecc->p.size) +#define z2 (q + 2*ecc->p.size) + +#define x3 r +#define y3 (r + ecc->p.size) +#define z3 (r + 2*ecc->p.size) + /* Formulas, from djb, + http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl: + + Computation Operation Live variables + + Z1Z1 = Z1^2 sqr Z1Z1 + Z2Z2 = Z2^2 sqr Z1Z1, Z2Z2 + U1 = X1*Z2Z2 mul Z1Z1, Z2Z2, U1 + U2 = X2*Z1Z1 mul Z1Z1, Z2Z2, U1, U2 + H = U2-U1 Z1Z1, Z2Z2, U1, H + Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H sqr, mul Z1Z1, Z2Z2, U1, H + S1 = Y1*Z2*Z2Z2 mul, mul Z1Z1, U1, H, S1 + S2 = Y2*Z1*Z1Z1 mul, mul U1, H, S1, S2 + W = 2*(S2-S1) (djb: r) U1, H, S1, W + I = (2*H)^2 sqr U1, H, S1, W, I + J = H*I mul U1, S1, W, J, V + V = U1*I mul S1, W, J, V + X3 = W^2-J-2*V sqr S1, W, J, V + Y3 = W*(V-X3)-2*S1*J mul, mul + */ + +#define h scratch +#define z1z1 (scratch + ecc->p.size) +#define z2z2 z1z1 +#define z1z2 (scratch + 2*ecc->p.size) + +#define w (scratch + ecc->p.size) +#define i (scratch + 2*ecc->p.size) +#define j h +#define v i + +#define tp (scratch + 3*ecc->p.size) + + ecc_mod_sqr (&ecc->p, z2z2, z2, tp); /* z2z2 */ + /* Store u1 at x3 */ + ecc_mod_mul (&ecc->p, x3, x1, z2z2, tp); /* z2z2 */ + + ecc_mod_add (&ecc->p, z1z2, z1, z2); /* z2z2, z1z2 */ + ecc_mod_sqr (&ecc->p, z1z2, z1z2, tp); + ecc_mod_sub (&ecc->p, z1z2, z1z2, z2z2); /* z2z2, z1z2 */ + + /* Do s1 early, store at y3 */ + ecc_mod_mul (&ecc->p, z2z2, z2z2, z2, tp); /* z2z2, z1z2 */ + ecc_mod_mul (&ecc->p, y3, z2z2, y1, tp); /* z1z2 */ + + ecc_mod_sqr (&ecc->p, z1z1, z1, tp); /* z1z1, z1z2 */ + ecc_mod_sub (&ecc->p, z1z2, z1z2, z1z1); + ecc_mod_mul (&ecc->p, h, x2, z1z1, tp); /* z1z1, z1z2, h */ + ecc_mod_sub (&ecc->p, h, h, x3); + + /* z1^3 */ + ecc_mod_mul (&ecc->p, z1z1, z1z1, z1, tp); + + /* z3 <-- h z1 z2 delayed until now, since that may clobber z1. */ + ecc_mod_mul (&ecc->p, z3, z1z2, h, tp); /* z1z1, h */ + /* w = 2 (s2 - s1) */ + ecc_mod_mul (&ecc->p, w, z1z1, y2, tp); /* h, w */ + ecc_mod_sub (&ecc->p, w, w, y3); + + /* Note that use of ecc_mod_zero_p depends 0 <= h,w < 2p. */ + if (ecc_mod_zero_p (&ecc->p, h)) + { + /* X1 == X2 */ + if (ecc_mod_zero_p (&ecc->p, w)) { + /* Y1 == Y2. Do point duplication. Note that q input is + unclobbered, and that scratch need is smaller. Implies some + unnecessary recomputation, but performance it not so + important for this very unlikely corner case. */ + ecc_dup_jj (ecc, r, q, scratch); + return 1; + } + + /* We must have Y1 == -Y2, and then the result is the infinity + point, */ + mpn_zero (r, 3*ecc->p.size); + return 0; + } + ecc_mod_add (&ecc->p, w, w, w); + + /* i = (2h)^2 */ + ecc_mod_add (&ecc->p, i, h, h); /* h, w, i */ + ecc_mod_sqr (&ecc->p, i, i, tp); + + /* j and h can overlap */ + ecc_mod_mul (&ecc->p, j, h, i, tp); /* j, w, i */ + + /* v and i can overlap */ + ecc_mod_mul (&ecc->p, v, x3, i, tp); /* j, w, v */ + + /* x3 <-- w^2 - j - 2v */ + ecc_mod_sqr (&ecc->p, x3, w, tp); + ecc_mod_sub (&ecc->p, x3, x3, j); + ecc_mod_submul_1 (&ecc->p, x3, v, 2); + + /* y3 <-- w (v - x3) - 2 s1 j */ + ecc_mod_mul (&ecc->p, j, j, y3, tp); + ecc_mod_sub (&ecc->p, v, v, x3); + ecc_mod_mul (&ecc->p, y3, v, w, tp); + ecc_mod_submul_1 (&ecc->p, y3, j, 2); + + return 1; +} diff --git a/ecc-secp192r1.c b/ecc-secp192r1.c index 391ba528..4a07bca3 100644 --- a/ecc-secp192r1.c +++ b/ecc-secp192r1.c @@ -247,7 +247,8 @@ const struct ecc_curve _nettle_secp_192r1 = ecc_p, ecc_Bmodp, - ecc_Bmodp_shifted, + ecc_Bmodp_shifted, + ecc_Bm2p, ecc_redc_ppm1, ecc_pp1h, @@ -269,6 +270,7 @@ const struct ecc_curve _nettle_secp_192r1 = ecc_q, ecc_Bmodq, ecc_Bmodq_shifted, + ecc_Bm2q, NULL, ecc_qp1h, diff --git a/ecc-secp224r1.c b/ecc-secp224r1.c index bb321298..b2a335ec 100644 --- a/ecc-secp224r1.c +++ b/ecc-secp224r1.c @@ -223,6 +223,7 @@ const struct ecc_curve _nettle_secp_224r1 = ecc_p, ecc_Bmodp, ecc_Bmodp_shifted, + ecc_Bm2p, ecc_redc_ppm1, ecc_pp1h, @@ -244,6 +245,7 @@ const struct ecc_curve _nettle_secp_224r1 = ecc_q, ecc_Bmodq, ecc_Bmodq_shifted, + ecc_Bm2q, NULL, ecc_qp1h, diff --git a/ecc-secp256r1.c b/ecc-secp256r1.c index e1a14b90..4848dfe3 100644 --- a/ecc-secp256r1.c +++ b/ecc-secp256r1.c @@ -343,6 +343,7 @@ const struct ecc_curve _nettle_secp_256r1 = ecc_p, ecc_Bmodp, ecc_Bmodp_shifted, + ecc_Bm2p, ecc_redc_ppm1, ecc_pp1h, @@ -364,6 +365,7 @@ const struct ecc_curve _nettle_secp_256r1 = ecc_q, ecc_Bmodq, ecc_Bmodq_shifted, + ecc_Bm2q, NULL, ecc_qp1h, diff --git a/ecc-secp384r1.c b/ecc-secp384r1.c index 39716dff..abac5e6d 100644 --- a/ecc-secp384r1.c +++ b/ecc-secp384r1.c @@ -314,6 +314,7 @@ const struct ecc_curve _nettle_secp_384r1 = ecc_p, ecc_Bmodp, ecc_Bmodp_shifted, + ecc_Bm2p, ecc_redc_ppm1, ecc_pp1h, @@ -335,6 +336,7 @@ const struct ecc_curve _nettle_secp_384r1 = ecc_q, ecc_Bmodq, ecc_Bmodq_shifted, + ecc_Bm2q, NULL, ecc_qp1h, diff --git a/ecc-secp521r1.c b/ecc-secp521r1.c index 24d0b53a..8ab7b4bf 100644 --- a/ecc-secp521r1.c +++ b/ecc-secp521r1.c @@ -169,6 +169,7 @@ const struct ecc_curve _nettle_secp_521r1 = ecc_p, ecc_Bmodp, ecc_Bmodp_shifted, + ecc_Bm2p, ecc_redc_ppm1, ecc_pp1h, @@ -190,6 +191,7 @@ const struct ecc_curve _nettle_secp_521r1 = ecc_q, ecc_Bmodq, ecc_Bmodq_shifted, + ecc_Bm2q, NULL, ecc_qp1h, @@ -71,6 +71,7 @@ struct ecc_curve /* Prime */ mpz_t p; + /* Curve constant */ mpz_t b; /* Curve order */ @@ -626,15 +627,15 @@ ecc_curve_init (struct ecc_curve *ecc, const char *curve) x^2 + y^2 = 1 + (121665/121666) x^2 y^2 (mod p). - -x^2 + y^2 = 1 - (121665/121666) x^2 y^2, with p = 2^{255} - 19. + But instead of using this curve, we use a twisted curve, following RFC 7748, + + -x^2 + y^2 = 1 - (121665/121666) x^2 y^2 (mod p) + + (this is possible because -1 is a square modulo p). The generator is x = 0x216936d3cd6e53fec0a4e231fdd6dc5c692cc7609525a7b2c9562d608f25d51a y = 0x6666666666666666666666666666666666666666666666666666666666666658 - - Also birationally equivalent to the curve25519 Montgomery curve, - - y^2 = x^3 + 486662 x^2 + x (mod p) */ ecc_curve_init_str (ecc, ECC_TYPE_TWISTED_EDWARDS, "7fffffffffffffffffffffffffffffff" @@ -1151,98 +1152,99 @@ output_point (const struct ecc_curve *ecc, mpz_clear (t); } -static unsigned -output_modulo (const char *name, const mpz_t x, - unsigned size, unsigned bits_per_limb) +static void +string_toupper (char *buf, size_t size, const char *s) { - mpz_t mod; - unsigned bits; - - mpz_init (mod); - - mpz_setbit (mod, bits_per_limb * size); - mpz_mod (mod, mod, x); - - bits = mpz_sizeinbase (mod, 2); - output_bignum (name, mod, size, bits_per_limb); - - mpz_clear (mod); - return bits; + size_t i; + for (i = 0; i < size; i++) + { + buf[i] = toupper ((int)s[i]); + if (!buf[i]) + return; + } + fprintf (stderr, "string '%s' too large for buffer of size %u.\n", + s, (unsigned) size); + abort(); } static void -output_curve (const struct ecc_curve *ecc, unsigned bits_per_limb) +output_modulo (const char *name, const mpz_t x, + unsigned size, unsigned bits_per_limb) { - unsigned limb_size = (ecc->bit_size + bits_per_limb - 1)/bits_per_limb; - unsigned i; - unsigned bits; - int redc_limbs; + unsigned bit_size; + int shift; + char buf[20]; mpz_t t; - mpz_t z; + + snprintf (buf, sizeof (buf), "ecc_%s", name); + output_bignum (buf, x, size, bits_per_limb); mpz_init (t); - mpz_init (z); - printf ("/* For NULL. */\n#include <stddef.h>\n"); + mpz_setbit (t, bits_per_limb * size); + mpz_mod (t, t, x); - printf ("#define ECC_LIMB_SIZE %u\n", limb_size); - printf ("#define ECC_PIPPENGER_K %u\n", ecc->pippenger_k); - printf ("#define ECC_PIPPENGER_C %u\n", ecc->pippenger_c); - - output_bignum ("ecc_p", ecc->p, limb_size, bits_per_limb); - output_bignum ("ecc_b", ecc->b, limb_size, bits_per_limb); - output_bignum ("ecc_q", ecc->q, limb_size, bits_per_limb); + snprintf (buf, sizeof (buf), "ecc_Bmod%s", name); + output_bignum (buf, t, size, bits_per_limb); - bits = output_modulo ("ecc_Bmodp", ecc->p, limb_size, bits_per_limb); - printf ("#define ECC_BMODP_SIZE %u\n", - (bits + bits_per_limb - 1) / bits_per_limb); - bits = output_modulo ("ecc_Bmodq", ecc->q, limb_size, bits_per_limb); - printf ("#define ECC_BMODQ_SIZE %u\n", - (bits + bits_per_limb - 1) / bits_per_limb); - bits = mpz_sizeinbase (ecc->q, 2); - if (bits < ecc->bit_size) + string_toupper (buf, sizeof (buf), name); + printf ("#define ECC_BMOD%s_SIZE %u\n", buf, + (unsigned) ((mpz_sizeinbase (t, 2) + bits_per_limb - 1) + / bits_per_limb)); + + bit_size = mpz_sizeinbase (x, 2); + + shift = size * bits_per_limb - bit_size; + assert (shift >= 0); + if (shift > 0) { - /* for curve25519, with q = 2^k + q', with a much smaller q' */ - unsigned mbits; - unsigned shift; + mpz_set_ui (t, 0); + mpz_setbit (t, size * bits_per_limb); + mpz_submul_ui (t, x, 2); - /* Shift to align the one bit at B */ - shift = bits_per_limb * limb_size + 1 - bits; - - mpz_set (t, ecc->q); - mpz_clrbit (t, bits-1); - mbits = mpz_sizeinbase (t, 2); + snprintf (buf, sizeof (buf), "ecc_Bm2%s", name); + output_bignum (buf, t, size, bits_per_limb); - /* The shifted value must be a limb smaller than q. */ - if (mbits + shift + bits_per_limb <= bits) + if (bit_size == 253) { + /* For curve25519, with q = 2^k + q', with a much smaller q' */ + unsigned mbits; + unsigned shift; + + /* Shift to align the one bit at B */ + shift = bits_per_limb * size + 1 - bit_size; + + mpz_set (t, x); + mpz_clrbit (t, bit_size-1); + mbits = mpz_sizeinbase (t, 2); + + /* The shifted value must be a limb smaller than q. */ + assert (mbits + shift + bits_per_limb <= bit_size); + /* q of the form 2^k + q', with q' a limb smaller */ mpz_mul_2exp (t, t, shift); - output_bignum ("ecc_mBmodq_shifted", t, limb_size, bits_per_limb); - } - } + snprintf (buf, sizeof (buf), "ecc_mBmod%s_shifted", name); - if (ecc->bit_size < limb_size * bits_per_limb) - { - int shift; + output_bignum (buf, t, size, bits_per_limb); + } + else + { + mpz_set_ui (t, 0); + mpz_setbit (t, bit_size); + mpz_sub (t, t, x); - mpz_set_ui (t, 0); - mpz_setbit (t, ecc->bit_size); - mpz_sub (t, t, ecc->p); - output_bignum ("ecc_Bmodp_shifted", t, limb_size, bits_per_limb); + snprintf (buf, sizeof (buf), "ecc_Bmod%s_shifted", name); + output_bignum (buf, t, size, bits_per_limb); - shift = limb_size * bits_per_limb - ecc->bit_size; - if (shift > 0) - { /* Check condition for reducing hi limbs. If s is the normalization shift and n is the bit size (so that s + n - = limb_size * bite_per_limb), then we need + = limb_size * bits_per_limb), then we need - (2^n - 1) + (2^s - 1) (2^n - p) < 2p + (2^n - 1) + (2^s - 1) (2^n - p) < 2p or equivalently, - 2^s (2^n - p) <= p + 2^s (2^n - p) <= p To a allow a carry limb to be added in at the same time, substitute s+1 for s. @@ -1250,26 +1252,45 @@ output_curve (const struct ecc_curve *ecc, unsigned bits_per_limb) /* FIXME: For ecdsa verify, we actually need the stricter inequality < 2 q. */ mpz_mul_2exp (t, t, shift + 1); - if (mpz_cmp (t, ecc->p) > 0) + if (mpz_cmp (t, x) > 0) { - fprintf (stderr, "Reduction condition failed for %u-bit curve.\n", - ecc->bit_size); + fprintf (stderr, "Reduction condition failed for %u-bit %s.\n", + bit_size, name); exit (EXIT_FAILURE); } } } else - printf ("#define ecc_Bmodp_shifted ecc_Bmodp\n"); - - if (bits < limb_size * bits_per_limb) { - mpz_set_ui (t, 0); - mpz_setbit (t, bits); - mpz_sub (t, t, ecc->q); - output_bignum ("ecc_Bmodq_shifted", t, limb_size, bits_per_limb); + printf ("#define ecc_Bm2%s ecc_Bmod%s\n", name, name); + printf ("#define ecc_Bmod%s_shifted ecc_Bmod%s\n", name, name); } - else - printf ("#define ecc_Bmodq_shifted ecc_Bmodq\n"); + + mpz_clear (t); +} + +static void +output_curve (const struct ecc_curve *ecc, unsigned bits_per_limb) +{ + unsigned limb_size = (ecc->bit_size + bits_per_limb - 1)/bits_per_limb; + unsigned i; + int redc_limbs; + mpz_t t; + mpz_t z; + + mpz_init (t); + mpz_init (z); + + printf ("/* For NULL. */\n#include <stddef.h>\n"); + + printf ("#define ECC_LIMB_SIZE %u\n", limb_size); + printf ("#define ECC_PIPPENGER_K %u\n", ecc->pippenger_k); + printf ("#define ECC_PIPPENGER_C %u\n", ecc->pippenger_c); + + output_modulo ("p", ecc->p, limb_size, bits_per_limb); + output_modulo ("q", ecc->q, limb_size, bits_per_limb); + + output_bignum ("ecc_b", ecc->b, limb_size, bits_per_limb); mpz_add_ui (t, ecc->p, 1); mpz_fdiv_q_2exp (t, t, 1); diff --git a/examples/ecc-benchmark.c b/examples/ecc-benchmark.c index 3ab269c7..7e857f80 100644 --- a/examples/ecc-benchmark.c +++ b/examples/ecc-benchmark.c @@ -159,11 +159,17 @@ bench_modq (void *p) } static void -bench_modinv (void *p) +bench_pinv (void *p) { struct ecc_ctx *ctx = (struct ecc_ctx *) p; ctx->ecc->p.invert (&ctx->ecc->p, ctx->rp, ctx->ap, ctx->tp); } +static void +bench_qinv (void *p) +{ + struct ecc_ctx *ctx = (struct ecc_ctx *) p; + ctx->ecc->q.invert (&ctx->ecc->p, ctx->rp, ctx->ap, ctx->tp); +} #if !NETTLE_USE_MINI_GMP static void @@ -239,7 +245,7 @@ static void bench_curve (const struct ecc_curve *ecc) { struct ecc_ctx ctx; - double modp, reduce, modq, modinv, modinv_gcd, modinv_powm, + double modp, reduce, modq, pinv, qinv, modinv_gcd, modinv_powm, dup_hh, add_hh, add_hhh, mul_g, mul_a; @@ -277,7 +283,8 @@ bench_curve (const struct ecc_curve *ecc) modq = time_function (bench_modq, &ctx); - modinv = time_function (bench_modinv, &ctx); + pinv = time_function (bench_pinv, &ctx); + qinv = time_function (bench_qinv, &ctx); #if !NETTLE_USE_MINI_GMP modinv_gcd = time_function (bench_modinv_gcd, &ctx); #else @@ -299,9 +306,9 @@ bench_curve (const struct ecc_curve *ecc) free (ctx.bp); free (ctx.tp); - printf ("%4d %6.4f %6.4f %6.4f %6.2f %6.3f %6.2f %6.3f %6.3f %6.3f %6.1f %6.1f\n", + printf ("%4d %6.4f %6.4f %6.4f %6.2f %6.2f %6.3f %6.2f %6.3f %6.3f %6.3f %6.1f %6.1f\n", ecc->p.bit_size, 1e6 * modp, 1e6 * reduce, 1e6 * modq, - 1e6 * modinv, 1e6 * modinv_gcd, 1e6 * modinv_powm, + 1e6 * pinv, 1e6 * qinv, 1e6 * modinv_gcd, 1e6 * modinv_powm, 1e6 * dup_hh, 1e6 * add_hh, 1e6 * add_hhh, 1e6 * mul_g, 1e6 * mul_a); } @@ -326,8 +333,8 @@ main (int argc UNUSED, char **argv UNUSED) unsigned i; time_init(); - printf ("%4s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s (us)\n", - "size", "modp", "reduce", "modq", "modinv", "mi_gcd", "mi_pow", + printf ("%4s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s (us)\n", + "size", "modp", "reduce", "modq", "pinv", "qinv", "mi_gcd", "mi_pow", "dup_hh", "add_hh", "ad_hhh", "mul_g", "mul_a"); for (i = 0; i < numberof (curves); i++) diff --git a/examples/nettle-benchmark.c b/examples/nettle-benchmark.c index ba5dd284..802a7234 100644 --- a/examples/nettle-benchmark.c +++ b/examples/nettle-benchmark.c @@ -63,6 +63,7 @@ #include "sha1.h" #include "sha2.h" #include "sha3.h" +#include "sm4.h" #include "twofish.h" #include "umac.h" #include "cmac.h" @@ -926,6 +927,7 @@ main(int argc, char **argv) &nettle_des3, &nettle_serpent256, &nettle_twofish128, &nettle_twofish192, &nettle_twofish256, + &nettle_sm4, NULL }; @@ -153,9 +153,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, armv6) -DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, armv6) +DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, armv6) DECLARE_FAT_FUNC(_nettle_sha512_compress, sha512_compress_func) DECLARE_FAT_FUNC_VAR(sha512_compress, sha512_compress_func, c) @@ -202,7 +202,7 @@ fat_init (void) _nettle_aes_encrypt_vec = _nettle_aes_encrypt_armv6; _nettle_aes_decrypt_vec = _nettle_aes_decrypt_armv6; nettle_sha1_compress_vec = _nettle_sha1_compress_armv6; - _nettle_sha256_compress_vec = _nettle_sha256_compress_armv6; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_armv6; } else { @@ -211,7 +211,7 @@ fat_init (void) _nettle_aes_encrypt_vec = _nettle_aes_encrypt_arm; _nettle_aes_decrypt_vec = _nettle_aes_decrypt_arm; nettle_sha1_compress_vec = _nettle_sha1_compress_c; - _nettle_sha256_compress_vec = _nettle_sha256_compress_c; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c; } if (features.have_neon) { @@ -263,9 +263,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void, (uint32_t *state, const uint8_t *input), (state, input)) -DEFINE_FAT_FUNC(_nettle_sha256_compress, void, - (uint32_t *state, const uint8_t *input, const uint32_t *k), - (state, input, k)) +DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *, + (uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input), + (state, k, blocks, input)) DEFINE_FAT_FUNC(_nettle_sha512_compress, void, (uint64_t *state, const uint8_t *input, const uint64_t *k), diff --git a/fat-arm64.c b/fat-arm64.c index f2b8493d..aec99f66 100644 --- a/fat-arm64.c +++ b/fat-arm64.c @@ -178,9 +178,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, arm64) -DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, arm64) +DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, arm64) static void CONSTRUCTOR fat_init (void) @@ -250,11 +250,11 @@ fat_init (void) { if (verbose) fprintf (stderr, "libnettle: enabling hardware-accelerated sha256 compress code.\n"); - _nettle_sha256_compress_vec = _nettle_sha256_compress_arm64; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_arm64; } else { - _nettle_sha256_compress_vec = _nettle_sha256_compress_c; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c; } } @@ -297,6 +297,7 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void, (uint32_t *state, const uint8_t *input), (state, input)) -DEFINE_FAT_FUNC(_nettle_sha256_compress, void, - (uint32_t *state, const uint8_t *input, const uint32_t *k), - (state, input, k)) +DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *, + (uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input), + (state, k, blocks, input)) @@ -195,6 +195,11 @@ DECLARE_FAT_FUNC(_nettle_poly1305_digest, poly1305_digest_func) DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, c) DECLARE_FAT_FUNC_VAR(poly1305_digest, poly1305_digest_func, ppc64) +DECLARE_FAT_FUNC(_nettle_poly1305_blocks, poly1305_blocks_func) +DECLARE_FAT_FUNC_VAR(poly1305_blocks, poly1305_blocks_func, c) +DECLARE_FAT_FUNC_VAR(poly1305_blocks, poly1305_blocks_func, ppc64) + + static void CONSTRUCTOR fat_init (void) { @@ -251,12 +256,14 @@ fat_init (void) _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_ppc64; _nettle_poly1305_block_vec = _nettle_poly1305_block_ppc64; _nettle_poly1305_digest_vec = _nettle_poly1305_digest_ppc64; + _nettle_poly1305_blocks_vec = _nettle_poly1305_blocks_ppc64; } else { _nettle_poly1305_set_key_vec = _nettle_poly1305_set_key_c; _nettle_poly1305_block_vec = _nettle_poly1305_block_c; _nettle_poly1305_digest_vec = _nettle_poly1305_digest_c; + _nettle_poly1305_blocks_vec = _nettle_poly1305_blocks_c; } } @@ -315,3 +322,9 @@ DEFINE_FAT_FUNC(_nettle_poly1305_digest, void, (struct poly1305_ctx *ctx, union nettle_block16 *s), (ctx, s)) + +DEFINE_FAT_FUNC(_nettle_poly1305_blocks, const uint8_t *, + (struct poly1305_ctx *ctx, + size_t blocks, + const uint8_t *m), + (ctx, blocks, m)) diff --git a/fat-s390x.c b/fat-s390x.c index fa026018..1bbd8e16 100644 --- a/fat-s390x.c +++ b/fat-s390x.c @@ -254,9 +254,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, s390x) -DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, s390x) +DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, s390x) DECLARE_FAT_FUNC(_nettle_sha512_compress, sha512_compress_func) DECLARE_FAT_FUNC_VAR(sha512_compress, sha512_compress_func, c) @@ -398,11 +398,11 @@ fat_init (void) { if (verbose) fprintf (stderr, "libnettle: enabling hardware accelerated SHA256 compress code.\n"); - _nettle_sha256_compress_vec = _nettle_sha256_compress_s390x; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_s390x; } else { - _nettle_sha256_compress_vec = _nettle_sha256_compress_c; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c; } /* SHA512 */ @@ -495,9 +495,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void, (state, input)) /* SHA256 */ -DEFINE_FAT_FUNC(_nettle_sha256_compress, void, - (uint32_t *state, const uint8_t *input, const uint32_t *k), - (state, input, k)) +DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *, + (uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input), + (state, k, blocks, input)) /* SHA512 */ DEFINE_FAT_FUNC(_nettle_sha512_compress, void, diff --git a/fat-setup.h b/fat-setup.h index ad3c10f0..6bf3e2fa 100644 --- a/fat-setup.h +++ b/fat-setup.h @@ -178,7 +178,9 @@ typedef void salsa20_crypt_func (struct salsa20_ctx *ctx, unsigned rounds, const uint8_t *src); typedef void sha1_compress_func(uint32_t *state, const uint8_t *input); -typedef void sha256_compress_func(uint32_t *state, const uint8_t *input, const uint32_t *k); +typedef const uint8_t * +sha256_compress_n_func(uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input); struct sha3_state; typedef void sha3_permute_func (struct sha3_state *state); @@ -201,6 +203,8 @@ typedef void poly1305_set_key_func(struct poly1305_ctx *ctx, const uint8_t *key) typedef void poly1305_digest_func(struct poly1305_ctx *ctx, union nettle_block16 *s); typedef void poly1305_block_func(struct poly1305_ctx *ctx, const uint8_t *m, unsigned high); +typedef const uint8_t * poly1305_blocks_func(struct poly1305_ctx *ctx, size_t blocks, + const uint8_t *m); struct aes128_ctx; typedef void aes128_set_key_func (struct aes128_ctx *ctx, const uint8_t *key); diff --git a/fat-x86_64.c b/fat-x86_64.c index 47cf78ae..0a2fedf4 100644 --- a/fat-x86_64.c +++ b/fat-x86_64.c @@ -155,9 +155,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, x86_64) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, sha_ni) -DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, x86_64) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, sha_ni) +DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, x86_64) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, sha_ni) DECLARE_FAT_FUNC(_nettle_ghash_set_key, ghash_set_key_func) DECLARE_FAT_FUNC_VAR(ghash_set_key, ghash_set_key_func, c) @@ -228,14 +228,14 @@ fat_init (void) if (verbose) fprintf (stderr, "libnettle: using sha_ni instructions.\n"); nettle_sha1_compress_vec = _nettle_sha1_compress_sha_ni; - _nettle_sha256_compress_vec = _nettle_sha256_compress_sha_ni; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_sha_ni; } else { if (verbose) fprintf (stderr, "libnettle: not using sha_ni instructions.\n"); nettle_sha1_compress_vec = _nettle_sha1_compress_x86_64; - _nettle_sha256_compress_vec = _nettle_sha256_compress_x86_64; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_x86_64; } if (features.have_pclmul) @@ -315,9 +315,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void, (uint32_t *state, const uint8_t *input), (state, input)) -DEFINE_FAT_FUNC(_nettle_sha256_compress, void, - (uint32_t *state, const uint8_t *input, const uint32_t *k), - (state, input, k)) +DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *, + (uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input), + (state, k, blocks, input)) DEFINE_FAT_FUNC(_nettle_ghash_set_key, void, (struct gcm_key *ctx, const union nettle_block16 *key), diff --git a/sec-tabselect.c b/gcm-sm4-meta.c index e6bf2282..090460d3 100644 --- a/sec-tabselect.c +++ b/gcm-sm4-meta.c @@ -1,6 +1,6 @@ -/* sec-tabselect.c +/* gcm-sm4-meta.c - Copyright (C) 2013 Niels Möller + Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> This file is part of GNU Nettle. @@ -29,34 +29,32 @@ not, see http://www.gnu.org/licenses/. */ -/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */ - #if HAVE_CONFIG_H # include "config.h" #endif #include <assert.h> -#include "ecc-internal.h" +#include "nettle-meta.h" + +#include "gcm.h" -/* Copy the k'th element of the table out tn elements, each of size - rn. Always read complete table. Similar to gmp's mpn_tabselect. */ -/* FIXME: Should we need to volatile declare anything? */ -void -sec_tabselect (mp_limb_t *rp, mp_size_t rn, - const mp_limb_t *table, unsigned tn, - unsigned k) +static nettle_set_key_func gcm_sm4_set_nonce_wrapper; +static void +gcm_sm4_set_nonce_wrapper (void *ctx, const uint8_t *nonce) { - const mp_limb_t *end = table + tn * rn; - const mp_limb_t *p; - mp_size_t i; - - assert (k < tn); - mpn_zero (rp, rn); - for (p = table; p < end; p += rn, k--) - { - mp_limb_t mask = - (mp_limb_t) (k == 0); - for (i = 0; i < rn; i++) - rp[i] += mask & p[i]; - } + gcm_sm4_set_iv (ctx, GCM_IV_SIZE, nonce); } + +const struct nettle_aead nettle_gcm_sm4 = + { "gcm_sm4", sizeof(struct gcm_sm4_ctx), + GCM_BLOCK_SIZE, SM4_KEY_SIZE, + GCM_IV_SIZE, GCM_DIGEST_SIZE, + (nettle_set_key_func *) gcm_sm4_set_key, + (nettle_set_key_func *) gcm_sm4_set_key, + gcm_sm4_set_nonce_wrapper, + (nettle_hash_update_func *) gcm_sm4_update, + (nettle_crypt_func *) gcm_sm4_encrypt, + (nettle_crypt_func *) gcm_sm4_decrypt, + (nettle_hash_digest_func *) gcm_sm4_digest, + }; diff --git a/gcm-sm4.c b/gcm-sm4.c new file mode 100644 index 00000000..19d91ae9 --- /dev/null +++ b/gcm-sm4.c @@ -0,0 +1,81 @@ +/* gcm-sm4.c + + Galois counter mode using SM4 as the underlying cipher. + + Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <assert.h> + +#include "gcm.h" + +void +gcm_sm4_set_key(struct gcm_sm4_ctx *ctx, const uint8_t *key) +{ + GCM_SET_KEY(ctx, sm4_set_encrypt_key, sm4_crypt, key); +} + +void +gcm_sm4_set_iv(struct gcm_sm4_ctx *ctx, + size_t length, const uint8_t *iv) +{ + GCM_SET_IV (ctx, length, iv); +} + +void +gcm_sm4_update(struct gcm_sm4_ctx *ctx, + size_t length, const uint8_t *data) +{ + GCM_UPDATE (ctx, length, data); +} + +void +gcm_sm4_encrypt(struct gcm_sm4_ctx *ctx, + size_t length, uint8_t *dst, const uint8_t *src) +{ + GCM_ENCRYPT(ctx, sm4_crypt, length, dst, src); +} + +void +gcm_sm4_decrypt(struct gcm_sm4_ctx *ctx, + size_t length, uint8_t *dst, const uint8_t *src) +{ + GCM_DECRYPT(ctx, sm4_crypt, length, dst, src); +} + +void +gcm_sm4_digest(struct gcm_sm4_ctx *ctx, + size_t length, uint8_t *digest) +{ + GCM_DIGEST(ctx, sm4_crypt, length, digest); +} @@ -55,25 +55,7 @@ #include "macros.h" #include "ctr-internal.h" #include "block-internal.h" - -/* FIXME: Duplicated in nist-keywrap.c */ -#if WORDS_BIGENDIAN -#define bswap_if_le(x) (x) -#elif HAVE_BUILTIN_BSWAP64 -#define bswap_if_le(x) (__builtin_bswap64 (x)) -#else -static uint64_t -bswap_if_le (uint64_t x) -{ - x = ((x >> 32) & UINT64_C (0xffffffff)) - | ((x & UINT64_C (0xffffffff)) << 32); - x = ((x >> 16) & UINT64_C (0xffff0000ffff)) - | ((x & UINT64_C (0xffff0000ffff)) << 16); - x = ((x >> 8) & UINT64_C (0xff00ff00ff00ff)) - | ((x & UINT64_C (0xff00ff00ff00ff)) << 8); - return x; -} -#endif +#include "bswap-internal.h" /* Initialization of GCM. * @ctx: The context of GCM @@ -115,8 +97,8 @@ gcm_hash_sizes(const struct gcm_key *key, union nettle_block16 *x, data_size *= 8; auth_size *= 8; - buffer.u64[0] = bswap_if_le (auth_size); - buffer.u64[1] = bswap_if_le (data_size); + buffer.u64[0] = bswap64_if_le (auth_size); + buffer.u64[1] = bswap64_if_le (data_size); _ghash_update (key, x, 1, buffer.b); } @@ -40,6 +40,7 @@ #include "aes.h" #include "camellia.h" +#include "sm4.h" #ifdef __cplusplus extern "C" { @@ -95,6 +96,13 @@ extern "C" { #define gcm_camellia256_decrypt nettle_gcm_camellia256_decrypt #define gcm_camellia256_digest nettle_gcm_camellia256_digest +#define gcm_sm4_set_key nettle_gcm_sm4_set_key +#define gcm_sm4_set_iv nettle_gcm_sm4_set_iv +#define gcm_sm4_update nettle_gcm_sm4_update +#define gcm_sm4_encrypt nettle_gcm_sm4_encrypt +#define gcm_sm4_decrypt nettle_gcm_sm4_decrypt +#define gcm_sm4_digest nettle_gcm_sm4_digest + #define GCM_BLOCK_SIZE 16 #define GCM_IV_SIZE (GCM_BLOCK_SIZE - 4) #define GCM_DIGEST_SIZE 16 @@ -322,7 +330,22 @@ void gcm_camellia256_decrypt(struct gcm_camellia256_ctx *ctx, void gcm_camellia256_digest(struct gcm_camellia256_ctx *ctx, size_t length, uint8_t *digest); - + +struct gcm_sm4_ctx GCM_CTX(struct sm4_ctx); + +void gcm_sm4_set_key(struct gcm_sm4_ctx *ctx, const uint8_t *key); +void gcm_sm4_set_iv(struct gcm_sm4_ctx *ctx, + size_t length, const uint8_t *iv); +void gcm_sm4_update(struct gcm_sm4_ctx *ctx, + size_t length, const uint8_t *data); +void gcm_sm4_encrypt(struct gcm_sm4_ctx *ctx, + size_t length, uint8_t *dst, const uint8_t *src); +void gcm_sm4_decrypt(struct gcm_sm4_ctx *ctx, + size_t length, uint8_t *dst, const uint8_t *src); +void gcm_sm4_digest(struct gcm_sm4_ctx *ctx, + size_t length, uint8_t *digest); + + #ifdef __cplusplus } #endif diff --git a/ghash-internal.h b/ghash-internal.h index 97dff024..2504dc09 100644 --- a/ghash-internal.h +++ b/ghash-internal.h @@ -38,6 +38,8 @@ /* Name mangling */ #define _ghash_set_key _nettle_ghash_set_key #define _ghash_update _nettle_ghash_update +#define _siv_ghash_set_key _nettle_siv_ghash_set_key +#define _siv_ghash_update _nettle_siv_ghash_update #ifdef __cplusplus extern "C" { @@ -58,6 +60,17 @@ const uint8_t * _ghash_update (const struct gcm_key *ctx, union nettle_block16 *state, size_t blocks, const uint8_t *data); +/* Expands KEY as needed, for corresponding _siv_ghash_update */ +void +_siv_ghash_set_key (struct gcm_key *ctx, const union nettle_block16 *key); + +/* Updates STATE by hashing DATA, which must be an integral number of + blocks. For convenience, returns a pointer to the end of the + data. */ +const uint8_t * +_siv_ghash_update (const struct gcm_key *ctx, union nettle_block16 *state, + size_t blocks, const uint8_t *data); + #ifdef __cplusplus } #endif @@ -99,6 +99,26 @@ mpn_cnd_swap (mp_limb_t cnd, volatile mp_limb_t *ap, volatile mp_limb_t *bp, mp_ } } +/* Copy the k'th element of the table out tn elements, each of size + rn. Always read complete table. Similar to gmp's mpn_tabselect. */ +void +mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *table, + mp_size_t rn, unsigned tn, unsigned k) +{ + volatile const mp_limb_t *end = table + tn * rn; + volatile const mp_limb_t *p; + mp_size_t i; + + assert (k < tn); + for (p = table; p < end; p += rn, k--) + { + mp_limb_t mask = - (mp_limb_t) (k == 0); + for (i = 0; i < rn; i++) + rp[i] = (~mask & rp[i]) | (mask & p[i]); + } +} + + #endif /* NETTLE_USE_MINI_GMP */ int @@ -66,6 +66,10 @@ mpn_cnd_sub_n (mp_limb_t cnd, mp_limb_t *rp, void mpn_cnd_swap (mp_limb_t cnd, volatile mp_limb_t *ap, volatile mp_limb_t *bp, mp_size_t n); + +void +mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *table, + mp_size_t rn, unsigned tn, unsigned k); #endif /* Side-channel silent variant of mpn_zero_p. */ diff --git a/md-internal.h b/md-internal.h new file mode 100644 index 00000000..a97b7b90 --- /dev/null +++ b/md-internal.h @@ -0,0 +1,70 @@ +/* md-internal.h + + Copyright (C) 2001, 2010, 2022 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#ifndef NETTLE_MD_INTERNAL_H_INCLUDED +#define NETTLE_MD_INTERNAL_H_INCLUDED + +#include <string.h> + +/* Internal helper macros for Merkle-Damgård hash functions. Assumes the context + structs includes the following fields: + + uint8_t block[...]; // Buffer holding one block + unsigned int index; // Index into block +*/ + +#define MD_FILL_OR_RETURN(ctx, length, data) \ + do { \ + unsigned __md_left = sizeof((ctx)->block) - (ctx)->index; \ + if ((length) < __md_left) \ + { \ + memcpy((ctx)->block + (ctx)->index, (data), (length)); \ + (ctx)->index += (length); \ + return; \ + } \ + memcpy((ctx)->block + (ctx)->index, (data), __md_left); \ + (data) += __md_left; \ + (length) -= __md_left; \ + } while(0) + +#define MD_FILL_OR_RETURN_INDEX(block_size, block, index, length, data) \ + do { \ + unsigned __md_left = (block_size) - (index); \ + if ((length) < __md_left) \ + { \ + memcpy(block + (index), (data), (length)); \ + return (index) + (length); \ + } \ + memcpy((block) + (index), (data), __md_left); \ + (data) += __md_left; \ + (length) -= __md_left; \ + } while(0) +#endif /* NETTLE_MD_INTERNAL_H_INCLUDED */ diff --git a/nettle-internal.h b/nettle-internal.h index 92416400..bf906c88 100644 --- a/nettle-internal.h +++ b/nettle-internal.h @@ -74,12 +74,13 @@ do { assert((size_t)(size) <= (sizeof(name))); } while (0) #endif -/* Arbitrary limits which apply to systems that don't have alloca */ -#define NETTLE_MAX_HASH_BLOCK_SIZE 128 +/* Limits that apply to systems that don't have alloca */ +#define NETTLE_MAX_HASH_BLOCK_SIZE 144 /* For sha3_224*/ #define NETTLE_MAX_HASH_DIGEST_SIZE 64 #define NETTLE_MAX_HASH_CONTEXT_SIZE (sizeof(struct sha3_224_ctx)) #define NETTLE_MAX_SEXP_ASSOC 17 #define NETTLE_MAX_CIPHER_BLOCK_SIZE 32 +#define NETTLE_MAX_CIPHER_KEY_SIZE 32 /* Doesn't quite fit with the other algorithms, because of the weak * keys. Weak keys are not reported, the functions will simply crash diff --git a/nettle-meta-aeads.c b/nettle-meta-aeads.c index c99cc465..78f38a3c 100644 --- a/nettle-meta-aeads.c +++ b/nettle-meta-aeads.c @@ -43,6 +43,7 @@ const struct nettle_aead * const _nettle_aeads[] = { &nettle_gcm_aes256, &nettle_gcm_camellia128, &nettle_gcm_camellia256, + &nettle_gcm_sm4, &nettle_eax_aes128, &nettle_chacha_poly1305, NULL diff --git a/nettle-meta-ciphers.c b/nettle-meta-ciphers.c index 49cb47a7..f8d691cf 100644 --- a/nettle-meta-ciphers.c +++ b/nettle-meta-ciphers.c @@ -54,6 +54,7 @@ const struct nettle_cipher * const _nettle_ciphers[] = { &nettle_arctwo64, &nettle_arctwo128, &nettle_arctwo_gutmann128, + &nettle_sm4, NULL }; diff --git a/nettle-meta.h b/nettle-meta.h index d684947e..19dc96c5 100644 --- a/nettle-meta.h +++ b/nettle-meta.h @@ -89,6 +89,8 @@ extern const struct nettle_cipher nettle_arctwo64; extern const struct nettle_cipher nettle_arctwo128; extern const struct nettle_cipher nettle_arctwo_gutmann128; +extern const struct nettle_cipher nettle_sm4; + struct nettle_hash { const char *name; @@ -198,6 +200,7 @@ extern const struct nettle_aead nettle_gcm_aes192; extern const struct nettle_aead nettle_gcm_aes256; extern const struct nettle_aead nettle_gcm_camellia128; extern const struct nettle_aead nettle_gcm_camellia256; +extern const struct nettle_aead nettle_gcm_sm4; extern const struct nettle_aead nettle_eax_aes128; extern const struct nettle_aead nettle_chacha_poly1305; diff --git a/nettle.texinfo b/nettle.texinfo index 69f9bcaf..767ae718 100644 --- a/nettle.texinfo +++ b/nettle.texinfo @@ -105,6 +105,7 @@ Cipher functions * DES3:: * Salsa20:: * Serpent:: +* SM4:: * Twofish:: * nettle_cipher abstraction:: @@ -122,6 +123,7 @@ Authenticated encryption with associated data * CCM:: * ChaCha-Poly1305:: * SIV-CMAC:: +* SIV-GCM:: * nettle_aead abstraction:: Keyed Hash Functions @@ -442,6 +444,14 @@ This function also resets the context in the same way as @code{sha256_init}. @end deftypefun +@deftypefun void sha256_compress (const uint32_t *@var{state}, uint8_t *@var{input}) +Perform a raw SHA256 compress on SHA256_BLOCK_SIZE bytes from@var{input} +using @var{state} as IV (an array of 8 uint32_t). The output is stored in @var{state}. +This function provides access to the underlying compression function, +for the rare applications that need that (e.g., using different IV from +standard SHA256). +@end deftypefun + Earlier versions of nettle defined SHA256 in the header file @file{<nettle/sha.h>}, which is now deprecated, but kept for compatibility. @@ -522,6 +532,14 @@ This function also resets the context in the same way as @code{sha512_init}. @end deftypefun +@deftypefun void sha512_compress (const uint64_t *@var{state}, uint8_t *@var{input}) +Perform a raw SHA512 compress on SHA512_BLOCK_SIZE bytes from +@var{input} using @var{state} as IV (an array of 8 uint64_t). The output is stored in @var{state}. +This function provides access to the underlying compression function, +for the rare applications that need that (e.g., using different IV from +standard SHA512). +@end deftypefun + @subsubsection @acronym{SHA384 and other variants of SHA512} Several variants of SHA512 have been defined, with a different initial @@ -929,6 +947,14 @@ This function also resets the context in the same way as @code{md5_init}. @end deftypefun +@deftypefun void md5_compress (const uint32_t *@var{state}, uint8_t *@var{input}) +Perform a raw MD5 compress on MD5_BLOCK_SIZE bytes from @var{input} +using @var{state} as IV (an array of 4 uint32_t). The output is stored in @var{state}. +This function provides access to the underlying compression function, +for the rare applications that need that (e.g., using different IV from +standard MD5). +@end deftypefun + The normal way to use MD5 is to call the functions in order: First @code{md5_init}, then @code{md5_update} zero or more times, and finally @code{md5_digest}. After @code{md5_digest}, the context is reset to @@ -1083,6 +1109,13 @@ This function also resets the context in the same way as @code{sha1_init}. @end deftypefun +@deftypefun void sha1_compress (const uint32_t *@var{state}, uint8_t *@var{input}) +Perform a raw SHA1 compress on SHA1_BLOCK_SIZE bytes from @var{input} +using @var{state} as IV (an array of 5 uint32_t). The output is stored in @var{state}. +This function provides access to the underlying compression function, +for the rare applications that need that (e.g., using different IV from +standard SHA1). +@end deftypefun @subsubsection @acronym{GOSTHASH94 and GOSTHASH94CP} @cindex GOST hash @@ -1292,6 +1325,7 @@ decryption. * DES3:: * Salsa20:: * Serpent:: +* SM4:: * Twofish:: * nettle_cipher abstraction:: @end menu @@ -2114,6 +2148,42 @@ in any other way. Analogous to @code{serpent_encrypt} @end deftypefun +@node SM4 +@subsection SM4 +@cindex SM4 + +SM4 is a block cipher standard adopted by the government of the People's +Republic of China, and it was issued by the State Cryptography Administration +on March 21, 2012. The standard is GM/T 0002-2012 "SM4 block cipher algorithm". +Nettle defines it in @file{<nettle/sm4.h>}. + +@deftp {Context struct} {struct sm4_ctx} +@end deftp + +@defvr Constant SM4_BLOCK_SIZE +The SM4 block-size, 16. +@end defvr + +@defvr Constant SM4_KEY_SIZE +Default SM4 key size, 16. +@end defvr + +@deftypefun void sm4_set_encrypt_key (struct sm4_ctx *@var{ctx}, const uint8_t *@var{key}) +Initialize the cipher. The function is used for encryption. +@end deftypefun + +@deftypefun void sm4_set_decrypt_key (struct sm4_ctx *@var{ctx}, const uint8_t *@var{key}) +Initialize the cipher. The function is used for decryption. +@end deftypefun + +@deftypefun void sm4_crypt (const struct sm4_ctx *@var{ctx}, size_t @var{length}, uint8_t *@var{dst}, const uint8_t *@var{src}) +Cryption function. @var{length} must be an integral multiple of the +block size. If it is more than one block, the data is processed in ECB +mode. @code{src} and @code{dst} may be equal, but they must not overlap +in any other way. The same function is used for both encryption and +decryption. +@end deftypefun + @node Twofish @subsection Twofish @cindex Twofish @@ -2811,6 +2881,7 @@ more adventurous alternative, in particular if performance is important. * CCM:: * ChaCha-Poly1305:: * SIV-CMAC:: +* SIV-GCM:: * nettle_aead abstraction:: @end menu @@ -3227,6 +3298,44 @@ that @var{length} is @code{GCM_DIGEST_SIZE}, but if you provide a smaller value, only the first @var{length} octets of the digest are written. @end deftypefun +@subsubsection @acronym{GCM}-SM4 interface + +The following functions implement the case of @acronym{GCM} using +SM4 as the underlying cipher. + +@deftp {Context struct} {struct gcm_sm4_ctx} +Context structs, defined using @code{GCM_CTX}. +@end deftp + +@deftypefun void gcm_sm4_set_key (struct gcm_sm4_ctx *@var{ctx}, const uint8_t *@var{key}) +Initializes @var{ctx} using the given key. +@end deftypefun + +@deftypefun void gcm_sm4_set_iv (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, const uint8_t *@var{iv}) +Initializes the per-message state, using the given @acronym{IV}. +@end deftypefun + +@deftypefun void gcm_sm4_update (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, const uint8_t *@var{data}) +Provides associated data to be authenticated. If used, must be called +before @code{gcm_sm4_encrypt} or @code{gcm_sm4_decrypt}. All but the +last call for each message @emph{must} use a length that is a multiple +of the block size. +@end deftypefun + +@deftypefun void gcm_sm4_encrypt (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, uint8_t *@var{dst}, const uint8_t *@var{src}) +@deftypefunx void gcm_sm4_decrypt (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, uint8_t *@var{dst}, const uint8_t *@var{src}) +Encrypts or decrypts the data of a message. All but the last call for +each message @emph{must} use a length that is a multiple of the block +size. +@end deftypefun + +@deftypefun void gcm_sm4_digest (struct gcm_sm4_ctx *@var{ctx}, size_t @var{length}, uint8_t *@var{digest}) +Extracts the message digest (also known ``authentication tag''). This is +the final operation when processing a message. It's strongly recommended +that @var{length} is @code{GCM_DIGEST_SIZE}, but if you provide a smaller +value, only the first @var{length} octets of the digest are written. +@end deftypefun + @node CCM @subsection Counter with CBC-MAC mode @@ -3626,6 +3735,95 @@ are equal, this will return 1 indicating a valid and authenticated message. Otherwise, this function will return zero. @end deftypefun +@node SIV-GCM +@subsection SIV-GCM + +@acronym{SIV-GCM}, described in @cite{RFC 8452}, is an @acronym{AEAD} +construction similar to @acronym{AES-GCM}, but provides protection against +accidental nonce misuse like @acronym{SIV-CMAC} mode. + +It is constructed on top of a block cipher which must have a block size of 128 +bits and a nonce size of 12 bytes. Nettle's support for @acronym{SIV-GCM} +consists of a message encryption and authentication interface, for +@acronym{SIV-GCM} using AES as the underlying block cipher. These +interfaces are defined in @file{<nettle/siv-gcm.h>}. + +Unlike other @acronym{AEAD} mode in @acronym{SIV-GCM} the tag is calculated +over the encoded additional authentication data and plaintext instead of the +ciphertext. + +@subsubsection General interface + +@defvr Constant SIV_GCM_BLOCK_SIZE +@acronym{SIV-GCM}'s block size, 16. +@end defvr + +@defvr Constant SIV_GCM_DIGEST_SIZE +Size of the @acronym{SIV-GCM} digest for tags, 16. +@end defvr + +@defvr Constant SIV_GCM_NONCE_SIZE +Size of the @acronym{SIV-GCM} nonce, 12. +@end defvr + +@deftypefun void siv_gcm_encrypt_message (const struct nettle_cipher *@var{nc}, const void *@var{ctx}, void *@var{ctr_ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{clength}, uint8_t *@var{dst}, const uint8_t *@var{src}) +Computes the message digest from the @var{adata} and @var{src} +parameters, encrypts the plaintext from @var{src}, appends the +authentication tag to the ciphertext and outputs it to @var{dst}. The +@var{clength} variable must be equal to the length of @var{src} plus +@code{SIV_GCM_DIGEST_SIZE}. +@end deftypefun + +@deftypefun int siv_gcm_decrypt_message (const struct nettle_cipher *@var{nc}, const void *@var{ctx}, void *@var{ctr_ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{mlength}, uint8_t *@var{dst}, const uint8_t *@var{src}) +Decrypts the ciphertext from @var{src}, outputs the plaintext to +@var{dst}, recalculates the initialization vector from @var{adata} and the +plaintext. If the values of the received and calculated initialization vector +are equal, this will return 1 indicating a valid and authenticated +message. Otherwise, this function will return zero. +@end deftypefun + +In the above interface, @var{nc} must point to a cipher that works +with 16-byte block size and the key sizes that are multiple of +8-bytes. The @var{ctx} context structure must be initialized for +encryption mode using a set-key function, before using any of the +functions in this interface. While the @var{ctr_ctx} context +structure must have the same size as @var{ctx}, it does not need to be +initialized before calling those functions as it is used as working +storage. These structures can point to the same area; in that case +the contents of *@var{ctx} is destroyed by the call. + +For convenience, Nettle provides wrapper functions that works with +@acronym{AES} described in the following section. + +@subsubsection @acronym{SIV-GCM}-@acronym{AES} interface + +The @acronym{SIV-GCM} functions provide an API for using @acronym{SIV-GCM} +mode with the @acronym{AES} block ciphers. The parameters all have the same +meaning as the general and message interfaces, except that the @var{cipher}, +@var{f}, and @var{ctx} parameters are replaced with an @acronym{AES} context +structure. The @acronym{AES} context structure must be initialized for +encryption mode using a set-key function, before using any of the functions in +this interface. + +@deftypefun void siv_gcm_aes128_encrypt_message (const struct aes128_ctx *@var{ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{clength}, uint8_t *@var{dst}, const uint8_t *@var{src}) +@deftypefunx void siv_gcm_aes256_encrypt_message (const struct aes256_ctx *@var{ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{clength}, uint8_t *@var{dst}, const uint8_t *@var{src}) +Computes the message digest from the @var{adata} and @var{src} +parameters, encrypts the plaintext from @var{src}, appends the +authentication tag to the ciphertext and outputs it to @var{dst}. +The @var{clength} variable must be equal to the length of @var{src} +plus @code{SIV_GCM_DIGEST_SIZE}. + +@end deftypefun + +@deftypefun int siv_gcm_aes128_decrypt_message (const struct aes128_ctx *@var{ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{mlength}, uint8_t *@var{dst}, const uint8_t *@var{src}) +@deftypefunx int siv_gcm_aes256_decrypt_message (const struct aes256_ctx *@var{ctx}, size_t @var{nlength}, const uint8_t *@var{nonce}, size_t @var{alength}, const uint8_t *@var{adata}, size_t @var{mlength}, uint8_t *@var{dst}, const uint8_t *@var{src}) +Decrypts the ciphertext from @var{src}, outputs the plaintext to +@var{dst}, recalculates the initialization vector from @var{adata} and the +plaintext. If the values of the received and calculated initialization vector +are equal, this will return 1 indicating a valid and authenticated +message. Otherwise, this function will return zero. +@end deftypefun + @node nettle_aead abstraction @subsection The @code{struct nettle_aead} abstraction @cindex nettle_aead @@ -4348,6 +4546,81 @@ salt @var{salt} of length @var{salt_length}, with iteration counter room for at least @var{length} octets. @end deftypefun + +@subsection @acronym{BALLOON} +@cindex Balloon password-hashing algorithm +Balloon is a memory-hard password-hashing algorithm. An in-depth description +of the algorithm and its properties can be found in an online research paper: +Boneh, D., Corrigan-Gibbs, H., Schechter, S. (2017, May 12). Balloon Hashing: +A Memory-Hard Function Providing Provable Protection Against Sequential Attacks. +Retrieved Sep 1, 2022, from @url{https://eprint.iacr.org/2016/027.pdf} + +Nettle's definition of the @acronym{BALLOON} algorithm can be found in +@file{<nettle/balloon.h>}. There is a general @acronym{BALLOON} function where +the user can specify desired hash algorithm that will be used by the function. +There are also concrete, more user-friendly functions that use common hash algorithms +like SHA1, SHA256, SHA384 and SHA512. There is also a utility function which helps to +determine the size of the working buffer that must be provided as one of the inputs. + +Each @acronym{BALLOON} function takes as an input a password and a salt of arbitrary +lengths, a time and a space parameters, and a scratch buffer. The space parameter +@var{s_cost} determines how many blocks of working space the algorithm will require +during its computation. It is common to set @var{s_cost} to a high value in order +to increase the cost of hardware accelerators built by the adversary. The time +parameter @var{t_cost} determines the number of rounds of computation that the algorithm +will perform. This can be used to further increase the cost of computation without raising +the memory requirement. Scratch buffer @var{scratch} is a user allocated working space +required by the algorithm. To determine the required size of the scratch buffer use the +utility function @code{balloon_itch}. Output of @acronym{BALLOON} algorithm will be +written into the output buffer @var{dst} that has to be at least @var{digest_size} bytes +long. Note that it is safe to use the same buffer for both @var{scratch} and @var{dst}. +Next follows the description of the general @acronym{BALLOON} function. + +@deftypefun void balloon (void *@var{hash_ctx}, nettle_hash_update_func *@var{update}, nettle_hash_digest_func *@var{digest}, size_t @var{digest_size}, size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst}) +Compute hash of given password @var{passwd} of length @var{passwd_length} salted +with @var{salt} of length @var{salt_length} and write @var{digest_size} bytes into +the output buffer @var{dst}. Parameter @var{hash_ctx} is a context for the +underlying hash function, which much be initialized by the caller. @var{update} +and @var{digest} are the update and digest functions of the chosen hash algorithm. +@var{digest_size} is the digest size of the chosen hash algorithm and determines +the size of the output. +@end deftypefun + +@deftypefun size_t balloon_itch (size_t @var{digest_size}, size_t @var{s_cost}) +Compute the size of the scratch buffer @var{scratch}. @var{digest_size} is the +digest size of the chosen hash algorithm. @var{s_cost} is the space parameter +used by the @code{balloon} function. +@end deftypefun + +@subsection Concrete @acronym{BALLOON} functions +Here follows a list of the specialized @acronym{BALLOON} functions, which are +more user-friendly variants of the general function. + +@subsubsection @acronym{BALLOON-SHA1} + +@deftypefun void balloon_sha1 (size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst}) +@acronym{BALLOON} algorithm using SHA1 as the underlying hash function. +@end deftypefun + +@subsubsection @acronym{BALLOON-SHA256} + +@deftypefun void balloon_sha256 (size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst}) +@acronym{BALLOON} algorithm using SHA256 as the underlying hash function. +@end deftypefun + +@subsubsection @acronym{BALLOON-SHA384} + +@deftypefun void balloon_sha384 (size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst}) +@acronym{BALLOON} algorithm using SHA384 as the underlying hash function. +@end deftypefun + +@subsubsection @acronym{BALLOON-SHA512} + +@deftypefun void balloon_sha512 (size_t @var{s_cost}, size_t @var{t_cost}, size_t @var{passwd_length}, const uint8_t *@var{passwd}, size_t @var{salt_length}, const uint8_t *@var{salt}, uint8_t *@var{scratch}, uint8_t *@var{dst}) +@acronym{BALLOON} algorithm using SHA512 as the underlying hash function. +@end deftypefun + + @node Public-key algorithms @section Public-key algorithms diff --git a/nist-keywrap.c b/nist-keywrap.c index 8fdd9335..2aca8423 100644 --- a/nist-keywrap.c +++ b/nist-keywrap.c @@ -44,24 +44,7 @@ #include "nist-keywrap.h" #include "memops.h" #include "macros.h" - -#if WORDS_BIGENDIAN -#define bswap_if_le(x) (x) -#elif HAVE_BUILTIN_BSWAP64 -#define bswap_if_le(x) (__builtin_bswap64 (x)) -#else -static uint64_t -bswap_if_le (uint64_t x) -{ - x = ((x >> 32) & UINT64_C (0xffffffff)) - | ((x & UINT64_C (0xffffffff)) << 32); - x = ((x >> 16) & UINT64_C (0xffff0000ffff)) - | ((x & UINT64_C (0xffff0000ffff)) << 16); - x = ((x >> 8) & UINT64_C (0xff00ff00ff00ff)) - | ((x & UINT64_C (0xff00ff00ff00ff)) << 8); - return x; -} -#endif +#include "bswap-internal.h" void nist_keywrap16 (const void *ctx, nettle_cipher_func *encrypt, @@ -94,7 +77,7 @@ nist_keywrap16 (const void *ctx, nettle_cipher_func *encrypt, encrypt (ctx, 16, B.b, I.b); /* A = MSB(64, B) ^ t where t = (n*j)+i */ - A.u64 = B.u64[0] ^ bswap_if_le ((n * j) + (i + 1)); + A.u64 = B.u64[0] ^ bswap64_if_le ((n * j) + (i + 1)); /* R[i] = LSB(64, B) */ memcpy (R + (i * 8), B.b + 8, 8); @@ -129,7 +112,7 @@ nist_keyunwrap16 (const void *ctx, nettle_cipher_func *decrypt, for (i = n - 1; i >= 0; i--) { /* B = AES-1(K, (A ^ t) | R[i]) where t = n*j+i */ - I.u64[0] = A.u64 ^ bswap_if_le ((n * j) + (i + 1)); + I.u64[0] = A.u64 ^ bswap64_if_le ((n * j) + (i + 1)); memcpy (I.b + 8, R + (i * 8), 8); decrypt (ctx, 16, B.b, I.b); diff --git a/poly1305-aes.c b/poly1305-aes.c index a4050254..374d5a78 100644 --- a/poly1305-aes.c +++ b/poly1305-aes.c @@ -56,13 +56,12 @@ poly1305_aes_set_nonce (struct poly1305_aes_ctx *ctx, memcpy (ctx->nonce, nonce, POLY1305_AES_NONCE_SIZE); } -#define COMPRESS(ctx, data) _nettle_poly1305_block(&(ctx)->pctx, (data), 1) - void poly1305_aes_update (struct poly1305_aes_ctx *ctx, size_t length, const uint8_t *data) { - MD_UPDATE (ctx, length, data, COMPRESS, (void) 0); + ctx->index = _nettle_poly1305_update (&(ctx)->pctx, + ctx->block, ctx->index, length, data); } void diff --git a/poly1305-internal.h b/poly1305-internal.h index 9932d524..a6afd466 100644 --- a/poly1305-internal.h +++ b/poly1305-internal.h @@ -53,7 +53,15 @@ void _nettle_poly1305_digest (struct poly1305_ctx *ctx, union nettle_block16 *s) /* Process one block. */ void _nettle_poly1305_block (struct poly1305_ctx *ctx, const uint8_t *m, unsigned high); - +/* Updates CTX by hashing M, which must be an integral number of + blocks. For convenience, returns a pointer to the end of the + data. Implies 128 set on all input blocks. */ +const uint8_t * +_nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m); + +unsigned +_nettle_poly1305_update (struct poly1305_ctx *ctx, uint8_t *buffer, unsigned index, + size_t length, const uint8_t *m); #ifdef __cplusplus } #endif diff --git a/poly1305-update.c b/poly1305-update.c new file mode 100644 index 00000000..15ee3231 --- /dev/null +++ b/poly1305-update.c @@ -0,0 +1,78 @@ +/* poly1305-update.c + + Copyright (C) 2022 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +#include "config.h" +#endif + +#include "poly1305.h" +#include "poly1305-internal.h" +#include "md-internal.h" + +#if HAVE_NATIVE_fat_poly1305_blocks +const uint8_t * +_nettle_poly1305_blocks_c(struct poly1305_ctx *ctx, + size_t blocks, const uint8_t *m); + +const uint8_t * +_nettle_poly1305_blocks_c(struct poly1305_ctx *ctx, + size_t blocks, const uint8_t *m) +{ + for (; blocks; blocks--, m += POLY1305_BLOCK_SIZE) + _nettle_poly1305_block(ctx, m, 1); + return m; +} +#endif + +unsigned +_nettle_poly1305_update (struct poly1305_ctx *ctx, + uint8_t *block, unsigned index, + size_t length, const uint8_t *m) +{ + if (index > 0) + { + /* Try to fill partial block */ + MD_FILL_OR_RETURN_INDEX (POLY1305_BLOCK_SIZE, block, index, + length, m); + _nettle_poly1305_block(ctx, block, 1); + } +#if HAVE_NATIVE_poly1305_blocks + m = _nettle_poly1305_blocks (ctx, length >> 4, m); + length &= 15; +#else + for (; length >= POLY1305_BLOCK_SIZE; + length -= POLY1305_BLOCK_SIZE, m += POLY1305_BLOCK_SIZE) + _nettle_poly1305_block (ctx, m, 1); +#endif + + memcpy (block, m, length); + return length; +} diff --git a/powerpc64/fat/poly1305-blocks.asm b/powerpc64/fat/poly1305-blocks.asm new file mode 100644 index 00000000..9efef0a0 --- /dev/null +++ b/powerpc64/fat/poly1305-blocks.asm @@ -0,0 +1,38 @@ +C powerpc64/fat/poly1305-blocks.asm + +ifelse(` + Copyright (C) 2022 Mamone Tarsha + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl picked up by configure +dnl PROLOGUE(_nettle_poly1305_blocks) +dnl PROLOGUE(_nettle_fat_poly1305_blocks) + +define(`fat_transform', `$1_ppc64') +include_src(`powerpc64/p9/poly1305-blocks.asm') diff --git a/powerpc64/machine.m4 b/powerpc64/machine.m4 index b59f0863..8f28f295 100644 --- a/powerpc64/machine.m4 +++ b/powerpc64/machine.m4 @@ -51,3 +51,15 @@ forloop(i,0,63,`deflit(`vs'i,i)') forloop(i,0,31,`deflit(`f'i,i)') forloop(i,0,7, `deflit(`cr'i,i)') ') + +C Increase index of general-purpose register by specific value +C INC_GPR(GPR, INC) +define(`INC_GPR',`ifelse(substr($1,0,1),`r', +``r'eval($2+substr($1,1,len($1)))', +`eval($2+$1)')') + +C Increase index of vector register by specific value +C INC_VR(VR, INC) +define(`INC_VR',`ifelse(substr($1,0,1),`v', +``v'eval($2+substr($1,1,len($1)))', +`eval($2+$1)')') diff --git a/powerpc64/p7/chacha-2core.asm b/powerpc64/p7/chacha-2core.asm index d5935263..ec20b4a5 100644 --- a/powerpc64/p7/chacha-2core.asm +++ b/powerpc64/p7/chacha-2core.asm @@ -60,6 +60,9 @@ define(`S3p1', `v16') define(`T0', `v17') +define(`EW_MASK', `v18') +define(`OW_MASK', `v19') + .text C _chacha_2core(uint32_t *dst, const uint32_t *src, unsigned rounds) @@ -78,6 +81,9 @@ PROLOGUE(_nettle_chacha_2core) vor Y3, Y3, X1 .Lshared_entry: + DATA_LOAD_VEC(EW_MASK,.even_word_mask,r6) + DATA_LOAD_VEC(OW_MASK,.odd_word_mask,r6) + vadduwm Y3, Y3, X3 li r6, 0x10 C set up some... @@ -92,14 +98,14 @@ PROLOGUE(_nettle_chacha_2core) vor S3, X3, X3 vor S3p1, Y3, Y3 - vmrgow Y0, X0, X0 C 1 1 3 3 - vmrgew X0, X0, X0 C 0 0 2 2 - vmrgow Y1, X1, X1 C 5 5 7 7 - vmrgew X1, X1, X1 C 4 4 6 6 - vmrgow Y2, X2, X2 C 9 9 11 11 - vmrgew X2, X2, X2 C 8 8 10 10 - vmrgow Y3, X3, S3p1 C 13 13 15 15 - vmrgew X3, X3, S3p1 C 12 12 14 14 + vperm Y0, X0, X0, OW_MASK C 1 1 3 3 + vperm X0, X0, X0, EW_MASK C 0 0 2 2 + vperm Y1, X1, X1, OW_MASK C 5 5 7 7 + vperm X1, X1, X1, EW_MASK C 4 4 6 6 + vperm Y2, X2, X2, OW_MASK C 9 9 11 11 + vperm X2, X2, X2, EW_MASK C 8 8 10 10 + vperm Y3, X3, S3p1, OW_MASK C 13 13 15 15 + vperm X3, X3, S3p1, EW_MASK C 12 12 14 14 vspltisw ROT16, -16 C -16 instead of 16 actually works! vspltisw ROT12, 12 @@ -189,17 +195,17 @@ C Y3 A15 B15 A13 B13 X3 A12 B12 A14 B14 (Y3 swapped) bdnz .Loop - vmrgew T0, X0, Y0 - vmrgow Y0, X0, Y0 + vperm T0, X0, Y0, EW_MASK + vperm Y0, X0, Y0, OW_MASK - vmrgew X0, X1, Y1 - vmrgow Y1, X1, Y1 + vperm X0, X1, Y1, EW_MASK + vperm Y1, X1, Y1, OW_MASK - vmrgew X1, X2, Y2 - vmrgow Y2, X2, Y2 + vperm X1, X2, Y2, EW_MASK + vperm Y2, X2, Y2, OW_MASK - vmrgew X2, X3, Y3 - vmrgow Y3, X3, Y3 + vperm X2, X3, Y3, EW_MASK + vperm Y3, X3, Y3, OW_MASK vadduwm T0, T0, S0 vadduwm Y0, Y0, S0 @@ -251,6 +257,15 @@ PROLOGUE(_nettle_chacha_2core32) b .Lshared_entry EPILOGUE(_nettle_chacha_2core32) +.rodata +.align 4 +.even_word_mask: +IF_LE(`.byte 27,26,25,24,11,10,9,8,19,18,17,16,3,2,1,0') +IF_BE(`.byte 0,1,2,3,16,17,18,19,8,9,10,11,24,25,26,27') +.odd_word_mask: +IF_LE(`.byte 31,30,29,28,15,14,13,12,23,22,21,20,7,6,5,4') +IF_BE(`.byte 4,5,6,7,20,21,22,23,12,13,14,15,28,29,30,31') + divert(-1) define core2state p/x $vs32.v4_int32 diff --git a/powerpc64/p9/poly1305-blocks.asm b/powerpc64/p9/poly1305-blocks.asm new file mode 100644 index 00000000..90e3df7b --- /dev/null +++ b/powerpc64/p9/poly1305-blocks.asm @@ -0,0 +1,434 @@ +C powerpc64/p9/poly1305-blocks.asm + +ifelse(` + Copyright (C) 2013, 2022 Niels Möller + Copyright (C) 2022 Mamone Tarsha + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +include_src(`powerpc64/p9/poly1305.m4') + +C Register usage: + +define(`SP', `r1') +define(`TOCP', `r2') + +C Argments +define(`CTX', `r3') +define(`BLOCKS', `r4') +define(`DATA', `r5') + +define(`PADBYTE', `r6') C Padding byte register + +define(`DEFINES_BLOCK_R44', ` + define(`R0', `v0') + define(`R1', `v1') + define(`R2', `v2') + define(`S1', `v3') + define(`S2', `v4') + define(`H0', `v5') + define(`H1', `v6') + define(`H2', `v7') + + define(`R3', `v8') + define(`R4', `v9') + define(`R5', `v10') + define(`S4', `v11') + define(`S5', `v12') + + define(`T0', `v13') + define(`T1', `v14') + define(`T2', `v15') + define(`T3', `v16') + define(`T4', `v17') + define(`T5', `v18') + define(`TMP', `v19') + define(`TMP2', `v20') + + define(`ZERO', `v21') + define(`MASK44', `v22') + define(`MASK42L', `v23') + define(`MASK44L', `v24') + define(`T4PAD', `v25') + define(`D40', `v26') + define(`D20', `v27') + define(`D24', `v28') + define(`D44', `v29') + define(`D2', `v30') + define(`D4', `v31') + ') + +C Compute S_1 = 20 * R_1 and S_2 = 20 * R_2 +C COMPUTE_S(S1, S2, R1, R2) +define(`COMPUTE_S', ` + vsld $1, $3, D2 + vsld $2, $4, D2 + vaddudm $1, $1, $3 + vaddudm $2, $2, $4 + vsld $1, $1, D2 + vsld $2, $2, D2 + ') + +C Convert two-part radix 2^64 to three-part radix 2^44 of four blocks +C R64_TO_R44_4B(VR0, VR1, VR2, VR3, VR4, VR5) +define(`R64_TO_R44_4B', ` + vsrd $3, $2, D24 + vsrd $6, $5, D24 + vsrd TMP, $1, D44 + vsrd TMP2, $4, D44 + vsld $2, $2, D20 + vsld $5, $5, D20 + vor $2, $2, TMP + vor $5, $5, TMP2 + vand $1, $1, MASK44 + vand $4, $4, MASK44 + vand $2, $2, MASK44 + vand $5, $5, MASK44 + ') + +C T_0 = R_0 H_0 + S_2 H_1 + S_1 H_2 +C T_1 = R_1 H_0 + R_0 H_1 + S_2 H_2 +C T_2 = R_2 H_0 + R_1 H_1 + R_0 H_2 +C MUL(T0, T1, T2, H0, H1, H2) +define(`MUL', ` + vmsumudm $1, $4, R0, ZERO + vmsumudm $2, $4, R1, ZERO + vmsumudm $3, $4, R2, ZERO + + vmsumudm $1, $5, S2, $1 + vmsumudm $2, $5, R0, $2 + vmsumudm $3, $5, R1, $3 + + vmsumudm $1, $6, S1, $1 + vmsumudm $2, $6, S2, $2 + vmsumudm $3, $6, R0, $3 + ') + +C Apply aforenamed equations on four-blocks +C Each two successive blocks are interleaved horizontally +C MUL_4B(T0, T1, T2, H0, H1, H2, H3, H4, H5) +define(`MUL_4B', ` + vmsumudm $1, $7, R0, ZERO + vmsumudm $2, $7, R1, ZERO + vmsumudm $3, $7, R2, ZERO + + vmsumudm $1, $8, S2, $1 + vmsumudm $2, $8, R0, $2 + vmsumudm $3, $8, R1, $3 + + vmsumudm $1, $9, S1, $1 + vmsumudm $2, $9, S2, $2 + vmsumudm $3, $9, R0, $3 + + vmsumudm $1, $4, R3, $1 + vmsumudm $2, $4, R4, $2 + vmsumudm $3, $4, R5, $3 + + vmsumudm $1, $5, S5, $1 + vmsumudm $2, $5, R3, $2 + vmsumudm $3, $5, R4, $3 + + vmsumudm $1, $6, S4, $1 + vmsumudm $2, $6, S5, $2 + vmsumudm $3, $6, R3, $3 + ') + +C Reduction phase of two interleaved chains +C RED(H0, H1, H2, T0, T1, T2) +define(`RED', ` + vand $1, $4, MASK44L + vsro $4, $4, D40 + vsrd $4, $4, D4 + vadduqm $5, $5, $4 + vand $2, $5, MASK44L + vsro $5, $5, D40 + vsrd $5, $5, D4 + vadduqm $6, $6, $5 + vand $3, $6, MASK42L + vsro $6, $6, D40 + vsrd $6, $6, D2 + vadduqm $1, $1, $6 + vsld $6, $6, D2 + vadduqm $1, $1, $6 + vsrd TMP, $1, D44 + vand $1, $1, MASK44L + vadduqm $2, $2, TMP + ') + +.text + +C void _nettle_poly1305_blocks(struct poly1305_ctx *ctx, +C size_t length, const uint8_t *data) +define(`FUNC_ALIGN', `5') +PROLOGUE(_nettle_poly1305_blocks) + C Save non-volatile vector registers + std r31,-8(SP) + stxv VSR(v31),-32(SP) + stxv VSR(v30),-48(SP) + stxv VSR(v29),-64(SP) + stxv VSR(v28),-80(SP) + stxv VSR(v27),-96(SP) + stxv VSR(v26),-112(SP) + stxv VSR(v25),-128(SP) + stxv VSR(v24),-144(SP) + stxv VSR(v23),-160(SP) + stxv VSR(v22),-176(SP) + stxv VSR(v21),-192(SP) + stxv VSR(v20),-208(SP) + + C Initialize padding byte register + li PADBYTE, 1 + +C Process data blocks of number of multiple 4 + DEFINES_BLOCK_R44() + cmpldi BLOCKS, POLY1305_BLOCK_THRESHOLD + blt Ldata_r64 + srdi r9, BLOCKS, 2 + andi. BLOCKS, BLOCKS, 3 + mtctr r9 + + C Initialize constants + + vxor ZERO, ZERO, ZERO + vspltisb D2, 2 + vspltisb D4, 4 + addis r9, TOCP, .mask44@got@ha + ld r9, .mask44@got@l(r9) + lxvd2x VSR(MASK44), 0, r9 + addi r9, r9, 16 + lxvd2x VSR(MASK42L), 0, r9 + addi r9, r9, 16 + lxvd2x VSR(D40), 0, r9 + addi r9, r9, 16 + lxvd2x VSR(D20), 0, r9 + addi r9, r9, 16 + lxvd2x VSR(D24), 0, r9 + addi r9, r9, 16 + lxvd2x VSR(D44), 0, r9 + xxmrghd VSR(MASK44L), VSR(ZERO), VSR(MASK44) + + sldi r10, PADBYTE, 40 + mtvsrdd VSR(T4PAD), r10, r10 + + C Load key of radix 2^44 + lxsd R0, 0(CTX) + lxsd R1, 8(CTX) + vsrd R2, R1, D24 + vsrd TMP, R0, D44 + vsld R1, R1, D20 + vor R1, R1, TMP + vand R0, R0, MASK44 + vand R1, R1, MASK44 + xxmrghd VSR(R0), VSR(R0), VSR(ZERO) + xxmrghd VSR(R1), VSR(R1), VSR(ZERO) + xxmrghd VSR(R2), VSR(R2), VSR(ZERO) + + COMPUTE_S(S1, S2, R1, R2) + + C Calculate R^2 = R R + + MUL(T0, T1, T2, R0, R1, R2) + RED(H0, H1, H2, T0, T1, T2) + xxpermdi VSR(R0), VSR(R0), VSR(H0), 0b01 + xxpermdi VSR(R1), VSR(R1), VSR(H1), 0b01 + xxpermdi VSR(R2), VSR(R2), VSR(H2), 0b01 + + COMPUTE_S(S1, S2, R1, R2) + + C Calculate R^3 = R^2 R + + xxmrghd VSR(R3), VSR(ZERO), VSR(R0) + xxmrghd VSR(R4), VSR(ZERO), VSR(R1) + xxmrghd VSR(R5), VSR(ZERO), VSR(R2) + + MUL(T0, T1, T2, R3, R4, R5) + RED(H0, H1, H2, T0, T1, T2) + + C Calculate R^4 = R^2 R^2 + + xxmrgld VSR(R3), VSR(ZERO), VSR(R0) + xxmrgld VSR(R4), VSR(ZERO), VSR(R1) + xxmrgld VSR(R5), VSR(ZERO), VSR(R2) + + MUL(T0, T1, T2, R3, R4, R5) + RED(R3, R4, R5, T0, T1, T2) + xxmrgld VSR(R3), VSR(H0), VSR(R3) + xxmrgld VSR(R4), VSR(H1), VSR(R4) + xxmrgld VSR(R5), VSR(H2), VSR(R5) + + COMPUTE_S(S4, S5, R4, R5) + + C Load state + ld r7, 32(CTX) + ld r8, 40(CTX) + ld r31, 48(CTX) + + C Fold high part of H2 + srdi r9, r31, 2 + sldi r10, r9, 2 + add r10, r10, r9 + andi. r31, r31, 3 + li r9, 0 + addc r7, r7, r10 + adde r8, r8, r9 + adde r31, r31, r9 + + mtvsrdd VSR(H0), 0, r7 + mtvsrdd VSR(H1), 0, r8 + mtvsrdd VSR(H2), 0, r31 + + C Convert state of radix 2^64 to 2^44 + vsrd TMP, H1, D24 + vsld H2, H2, D40 + vor H2, H2, TMP + vsrd TMP2, H0, D44 + vsld H1, H1, D20 + vor H1, H1, TMP2 + vand H0, H0, MASK44 + vand H1, H1, MASK44 + + li r8, 0x10 + li r9, 0x20 + li r10, 0x30 +L4B_loop: + C Load four blocks + lxvd2x VSR(T3), 0, DATA + lxvd2x VSR(T4), r8, DATA + lxvd2x VSR(T5), r9, DATA + lxvd2x VSR(TMP), r10, DATA +IF_BE(` + xxbrd VSR(T3), VSR(T3) + xxbrd VSR(T4), VSR(T4) + xxbrd VSR(T5), VSR(T5) + xxbrd VSR(TMP), VSR(TMP) +') + C Permute blocks in little-endian and line each two successive + C blocks horizontally + xxmrghd VSR(T0), VSR(T4), VSR(T3) + xxmrgld VSR(T1), VSR(T4), VSR(T3) + xxmrghd VSR(T3), VSR(TMP), VSR(T5) + xxmrgld VSR(T4), VSR(TMP), VSR(T5) + R64_TO_R44_4B(T0, T1, T2, T3, T4, T5) + vor T2, T2, T4PAD + vor T5, T5, T4PAD + + C Combine first block with previous state + vaddudm H0, H0, T0 + vaddudm H1, H1, T1 + vaddudm H2, H2, T2 + + MUL_4B(T0, T1, T2, H0, H1, H2, T3, T4, T5) + RED(H0, H1, H2, T0, T1, T2) + + addi DATA, DATA, 64 + bdnz L4B_loop + + C Moving carry + vsrd TMP, H1, D44 + vaddudm H2, H2, TMP + vsrd TMP2, H2, D40 + vsrd TMP2, TMP2, D2 + vsld TMP, TMP2, D2 + vand H1, H1, MASK44 + vaddudm TMP2, TMP2, TMP + vaddudm H0, H0, TMP2 + vsrd TMP, H0, D44 + vaddudm H1, H1, TMP + vand H2, H2, MASK42L + vand H0, H0, MASK44 + + C Convert state of radix 2^44 to 2^64 + vsld TMP, H1, D44 + vor H0, H0, TMP + vsrd H1, H1, D20 + vsld TMP2, H2, D24 + vor H1, H1, TMP2 + vsrd H2, H2, D40 + + xxswapd VSR(H0), VSR(H0) + xxswapd VSR(H1), VSR(H1) + xxswapd VSR(H2), VSR(H2) + + C Store state + stxsd H0, 32(CTX) + stxsd H1, 40(CTX) + stxsd H2, 48(CTX) + +Ldata_r64: + cmpldi BLOCKS, 0 + beq Ldone + mtctr BLOCKS + mr r4, PADBYTE + ld r6, P1305_H0 (CTX) + ld r7, P1305_H1 (CTX) + ld r8, P1305_H2 (CTX) +L1B_loop: + BLOCK_R64(CTX,DATA,r4,r6,v0) + mfvsrld r6, VSR(v0) + mfvsrld r7, VSR(v1) + mfvsrd r8, VSR(v1) + addi DATA, DATA, 16 + bdnz L1B_loop + std r6, P1305_H0 (CTX) + std r7, P1305_H1 (CTX) + std r8, P1305_H2 (CTX) + +Ldone: + C Restore non-volatile vector registers + ld r31, -8(SP) + lxv VSR(v31),-32(SP) + lxv VSR(v30),-48(SP) + lxv VSR(v29),-64(SP) + lxv VSR(v28),-80(SP) + lxv VSR(v27),-96(SP) + lxv VSR(v26),-112(SP) + lxv VSR(v25),-128(SP) + lxv VSR(v24),-144(SP) + lxv VSR(v23),-160(SP) + lxv VSR(v22),-176(SP) + lxv VSR(v21),-192(SP) + lxv VSR(v20),-208(SP) + + mr r3, DATA + + blr +EPILOGUE(_nettle_poly1305_blocks) + +.rodata +.align 4 +.mask44: +.quad 0x00000FFFFFFFFFFF,0x00000FFFFFFFFFFF +.mask42l: +.quad 0x0000000000000000,0x000003FFFFFFFFFF +.d40: +.quad 0x0000000000000028,0x0000000000000028 +.d20: +.quad 0x0000000000000014,0x0000000000000014 +.d24: +.quad 0x0000000000000018,0x0000000000000018 +.d44: +.quad 0x000000000000002C,0x000000000000002C diff --git a/powerpc64/p9/poly1305-internal.asm b/powerpc64/p9/poly1305-internal.asm index a082fed2..c23e16fd 100644 --- a/powerpc64/p9/poly1305-internal.asm +++ b/powerpc64/p9/poly1305-internal.asm @@ -30,6 +30,8 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') +include_src(`powerpc64/p9/poly1305.m4') + C Register usage: define(`SP', `r1') @@ -37,36 +39,8 @@ define(`TOCP', `r2') C Argments define(`CTX', `r3') -define(`M', `r4') -define(`M128', `r5') - -C Working state -define(`H0', `r6') -define(`H1', `r7') -define(`H2', `r8') -define(`T0', `r9') -define(`T1', `r10') -define(`T2', `r8') -define(`T2A', `r9') -define(`T2S', `r10') -define(`IDX', `r6') -define(`RZ', `r7') - -define(`ZERO', `v0') -define(`F0', `v1') -define(`F1', `v2') -define(`F0S', `v3') -define(`T', `v4') - -define(`R', `v5') -define(`S', `v6') - -define(`T00', `v7') -define(`T10', `v8') -define(`T11', `v9') -define(`MU0', `v10') -define(`MU1', `v11') -define(`TMP', `v12') +define(`DATA', `r4') +define(`PADBYTE', `r5') C Padding byte register .text @@ -114,59 +88,17 @@ EPILOGUE(_nettle_poly1305_set_key) C void _nettle_poly1305_block(struct poly1305_ctx *ctx, const uint8_t *m, unsigned m128) define(`FUNC_ALIGN', `5') PROLOGUE(_nettle_poly1305_block) - ld H0, P1305_H0 (CTX) - ld H1, P1305_H1 (CTX) - ld H2, P1305_H2 (CTX) -IF_LE(` - ld T0, 0(M) - ld T1, 8(M) -') -IF_BE(` - ldbrx T0, 0, M - addi M, M, 8 - ldbrx T0, 0, M -') - - addc T0, T0, H0 - adde T1, T1, H1 - adde T2, M128, H2 - - mtvsrdd VSR(T), T0, T1 - - li IDX, P1305_S0 - lxvd2x VSR(R), 0, CTX - lxvd2x VSR(S), IDX, CTX - - andi. T2A, T2, 3 - srdi T2S, T2, 2 - - li RZ, 0 - vxor ZERO, ZERO, ZERO - - xxpermdi VSR(MU0), VSR(R), VSR(S), 0b01 - xxswapd VSR(MU1), VSR(R) - - mtvsrdd VSR(T11), 0, T2A - mtvsrdd VSR(T00), T2S, RZ - mtvsrdd VSR(T10), 0, T2 - - vmsumudm F0, T, MU0, ZERO - vmsumudm F1, T, MU1, ZERO - vmsumudm TMP, T11, MU1, ZERO - - vmsumudm F0, T00, S, F0 - vmsumudm F1, T10, MU0, F1 + ld r6, P1305_H0 (CTX) + ld r7, P1305_H1 (CTX) + ld r8, P1305_H2 (CTX) - xxmrgld VSR(TMP), VSR(TMP), VSR(ZERO) - xxswapd VSR(F0S), VSR(F0) - vadduqm F1, F1, TMP - stxsd F0S, P1305_H0 (CTX) + BLOCK_R64(CTX,DATA,PADBYTE,r6,v0) - li IDX, P1305_H1 - xxmrghd VSR(F0), VSR(ZERO), VSR(F0) - vadduqm F1, F1, F0 - xxswapd VSR(F1), VSR(F1) - stxvd2x VSR(F1), IDX, CTX + li r10, P1305_H1 + xxswapd VSR(v0), VSR(v0) + xxswapd VSR(v1), VSR(v1) + stxsd v0, P1305_H0 (CTX) + stxvd2x VSR(v1), r10, CTX blr EPILOGUE(_nettle_poly1305_block) diff --git a/powerpc64/p9/poly1305.m4 b/powerpc64/p9/poly1305.m4 new file mode 100644 index 00000000..13a57e83 --- /dev/null +++ b/powerpc64/p9/poly1305.m4 @@ -0,0 +1,102 @@ +C Threshold of processing multiple blocks in parallel +C of a multiple of 4 +define(`POLY1305_BLOCK_THRESHOLD', `12') + +C DEFINES_BLOCK_R64(GPR0, VR0) +define(`DEFINES_BLOCK_R64', ` + define(`H0', `$1') + define(`H1', `INC_GPR($1,1)') + define(`H2', `INC_GPR($1,2)') + + define(`T0', `INC_GPR($1,3)') + define(`T1', `INC_GPR($1,4)') + define(`T2', `H2') + define(`T2A', `INC_GPR($1,3)') + define(`T2S', `INC_GPR($1,4)') + define(`RZ', `H0') + define(`IDX', `INC_GPR($1,4)') + + define(`F0', `$2') + define(`F1', `INC_VR($2,1)') + + define(`ZERO', `INC_VR($2,2)') + define(`F0S', `INC_VR($2,3)') + define(`F11', `INC_VR($2,4)') + define(`T', `INC_VR($2,5)') + + define(`R', `INC_VR($2,6)') + define(`S', `INC_VR($2,7)') + + define(`T00', `INC_VR($2,8)') + define(`T10', `INC_VR($2,9)') + define(`T11', `INC_VR($2,10)') + define(`MU0', `INC_VR($2,11)') + define(`MU1', `INC_VR($2,12)') + ') + +C CTX is the address of context where key and pre-computed values are stored +C DATA is the address of input block +C PADBYTE is padding byte for input block +C GPR0 is the starting register of sequential general-purpose registers +C used in the macro of following layout +C GPR0, GPR1, GPR2 are inputs representing the previous state radix 2^64 +C GPR3, GPR4 are temporary registers +C VR0 is the starting register of sequential vector resigers used in +C the macro of following layout +C VR0, VR1 are outputs representing the result state radix 2^64 sorted as follows +C (low 64-bit of VR0) + (low 64-bit of VR1) + (high 64-bit of VR1) +C VR2..VR12 are temporary registers +C BLOCK_R64(CTX, DATA, PADBYTE, GPR0, VR0) +define(`BLOCK_R64', ` + DEFINES_BLOCK_R64($4,$5) + C Load 128-bit input block +IF_LE(` + ld T0, 0($2) + ld T1, 8($2) +') +IF_BE(` + li IDX, 8 + ldbrx T1, IDX, $2 + ldbrx T0, 0, $2 +') + C Combine state with input block, latter is padded to 17-bytes + C by low-order byte of PADBYTE register + addc T0, T0, H0 + adde T1, T1, H1 + adde T2, $3, H2 + + mtvsrdd VSR(T), T0, T1 + + C Load key and pre-computed values + li IDX, 16 + lxvd2x VSR(R), 0, $1 + lxvd2x VSR(S), IDX, $1 + + andi. T2A, T2, 3 + srdi T2S, T2, 2 + + li RZ, 0 + vxor ZERO, ZERO, ZERO + + xxpermdi VSR(MU0), VSR(R), VSR(S), 0b01 + xxswapd VSR(MU1), VSR(R) + + mtvsrdd VSR(T11), 0, T2A + mtvsrdd VSR(T00), T2S, RZ + mtvsrdd VSR(T10), 0, T2 + + C Multiply key by combined state and block + vmsumudm F0, T, MU0, ZERO + vmsumudm F1, T, MU1, ZERO + vmsumudm F11, T11, MU1, ZERO + + vmsumudm F0, T00, S, F0 + vmsumudm F1, T10, MU0, F1 + + C Product addition + xxmrgld VSR(F11), VSR(F11), VSR(ZERO) + vadduqm F1, F1, F11 + + xxmrghd VSR(F0S), VSR(ZERO), VSR(F0) + vadduqm F1, F1, F0S + ') diff --git a/s390x/fat/sha256-compress-2.asm b/s390x/fat/sha256-compress-n-2.asm index f4b16181..06fb1014 100644 --- a/s390x/fat/sha256-compress-2.asm +++ b/s390x/fat/sha256-compress-n-2.asm @@ -1,4 +1,4 @@ -C s390x/fat/sha256-compress-2.asm +C s390x/fat/sha256-compress-n-2.asm ifelse(` Copyright (C) 2021 Mamone Tarsha @@ -30,7 +30,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -dnl PROLOGUE(_nettle_sha256_compress) picked up by configure +dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure define(`fat_transform', `$1_s390x') -include_src(`s390x/msa_x1/sha256-compress.asm') +include_src(`s390x/msa_x1/sha256-compress-n.asm') diff --git a/s390x/msa_x1/sha256-compress.asm b/s390x/msa_x1/sha256-compress-n.asm index 9a9511fb..51539927 100644 --- a/s390x/msa_x1/sha256-compress.asm +++ b/s390x/msa_x1/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C s390x/msa_x1/sha256-compress.asm +C s390x/msa_x1/sha256-compress-n.asm ifelse(` - Copyright (C) 2021 Mamone Tarsha + Copyright (C) 2021, 2022 Mamone Tarsha, Niels Möller This file is part of GNU Nettle. GNU Nettle is free software: you can redistribute it and/or @@ -56,25 +56,23 @@ C |----------------------------------------------| C | H7 (4 bytes) | C *----------------------------------------------* -.file "sha256-compress.asm" +.file "sha256-compress-n.asm" .text C SHA function code define(`SHA256_FUNCTION_CODE', `2') -C Size of block -define(`SHA256_BLOCK_SIZE', `64') -C void -C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, -C const uint32_t *k) +C const uint8_t * +C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, +C size_t blocks, const uint8_t *input) -PROLOGUE(_nettle_sha256_compress) +PROLOGUE(_nettle_sha256_compress_n) lghi %r0,SHA256_FUNCTION_CODE C SHA-256 Function Code lgr %r1,%r2 - lgr %r4,%r3 - lghi %r5,SHA256_BLOCK_SIZE -1: .long 0xb93e0004 C kimd %r0,%r4. perform KIMD-SHA operation on data + lgr %r2, %r5 + sllg %r3, %r4, 6 C 64 * block size +1: .long 0xb93e0002 C kimd %r0,%r2. perform KIMD-SHA operation on data brc 1,1b br RA -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) diff --git a/sha2-internal.h b/sha2-internal.h index 40f25a5f..93080bee 100644 --- a/sha2-internal.h +++ b/sha2-internal.h @@ -39,8 +39,9 @@ /* Internal compression function. STATE points to 8 uint32_t words, DATA points to 64 bytes of input data, possibly unaligned, and K points to the table of constants. */ -void -_nettle_sha256_compress(uint32_t *state, const uint8_t *data, const uint32_t *k); +const uint8_t * +_nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *data); /* Internal compression function. STATE points to 8 uint64_t words, DATA points to 128 bytes of input data, possibly unaligned, and K diff --git a/sha256-compress.c b/sha256-compress-n.c index cf17e3e1..d135d14f 100644 --- a/sha256-compress.c +++ b/sha256-compress-n.c @@ -1,8 +1,8 @@ -/* sha256-compress.c +/* sha256-compress-n.c The compression function of the sha256 hash function. - Copyright (C) 2001, 2010 Niels Möller + Copyright (C) 2001, 2010, 2022 Niels Möller This file is part of GNU Nettle. @@ -118,26 +118,19 @@ } while (0) /* For fat builds */ -#if HAVE_NATIVE_sha256_compress -void -_nettle_sha256_compress_c(uint32_t *state, const uint8_t *input, const uint32_t *k); -#define _nettle_sha256_compress _nettle_sha256_compress_c +#if HAVE_NATIVE_sha256_compress_n +const uint8_t * +_nettle_sha256_compress_n_c(uint32_t *state, const uint32_t *table, + size_t blocks, const uint8_t *input); +#define _nettle_sha256_compress_n _nettle_sha256_compress_n_c #endif -void -_nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) +const uint8_t * +_nettle_sha256_compress_n(uint32_t *state, const uint32_t *table, + size_t blocks, const uint8_t *input) { - uint32_t data[SHA256_DATA_LENGTH]; uint32_t A, B, C, D, E, F, G, H; /* Local vars */ - unsigned i; - uint32_t *d; - for (i = 0; i < SHA256_DATA_LENGTH; i++, input+= 4) - { - data[i] = READ_UINT32(input); - } - - /* Set up first buffer and local data buffer */ A = state[0]; B = state[1]; C = state[2]; @@ -146,55 +139,68 @@ _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k F = state[5]; G = state[6]; H = state[7]; - - /* Heavy mangling */ - /* First 16 subrounds that act on the original data */ - DEBUG(-1); - for (i = 0, d = data; i<16; i+=8, k += 8, d+= 8) + for (; blocks > 0; blocks--) { - ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); DEBUG(i); - ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); DEBUG(i+1); - ROUND(G, H, A, B, C, D, E, F, k[2], d[2]); - ROUND(F, G, H, A, B, C, D, E, k[3], d[3]); - ROUND(E, F, G, H, A, B, C, D, k[4], d[4]); - ROUND(D, E, F, G, H, A, B, C, k[5], d[5]); - ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); DEBUG(i+6); - ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); DEBUG(i+7); - } + uint32_t data[SHA256_DATA_LENGTH]; + unsigned i; + const uint32_t *k; + uint32_t *d; + for (i = 0; i < SHA256_DATA_LENGTH; i++, input+= 4) + { + data[i] = READ_UINT32(input); + } + + /* Heavy mangling */ + /* First 16 subrounds that act on the original data */ + + DEBUG(-1); + for (i = 0, d = data, k = table; i<16; i+=8, k += 8, d+= 8) + { + ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); DEBUG(i); + ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); DEBUG(i+1); + ROUND(G, H, A, B, C, D, E, F, k[2], d[2]); + ROUND(F, G, H, A, B, C, D, E, k[3], d[3]); + ROUND(E, F, G, H, A, B, C, D, k[4], d[4]); + ROUND(D, E, F, G, H, A, B, C, k[5], d[5]); + ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); DEBUG(i+6); + ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); DEBUG(i+7); + } - for (; i<64; i += 16, k+= 16) - { - ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); DEBUG(i); - ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); DEBUG(i+1); - ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); DEBUG(i+2); - ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); DEBUG(i+3); - ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); DEBUG(i+4); - ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); DEBUG(i+5); - ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); DEBUG(i+6); - ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); DEBUG(i+7); - ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); DEBUG(i+8); - ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); DEBUG(i+9); - ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); DEBUG(i+10); - ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); DEBUG(i+11); - ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); DEBUG(i+12); - ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); DEBUG(i+13); - ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); DEBUG(i+14); - ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); DEBUG(i+15); - } - - /* Update state */ - state[0] += A; - state[1] += B; - state[2] += C; - state[3] += D; - state[4] += E; - state[5] += F; - state[6] += G; - state[7] += H; + for (; i<64; i += 16, k+= 16) + { + ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); DEBUG(i); + ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); DEBUG(i+1); + ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); DEBUG(i+2); + ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); DEBUG(i+3); + ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); DEBUG(i+4); + ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); DEBUG(i+5); + ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); DEBUG(i+6); + ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); DEBUG(i+7); + ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); DEBUG(i+8); + ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); DEBUG(i+9); + ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); DEBUG(i+10); + ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); DEBUG(i+11); + ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); DEBUG(i+12); + ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); DEBUG(i+13); + ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); DEBUG(i+14); + ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); DEBUG(i+15); + } + + /* Update state */ + state[0] = A = state[0] + A; + state[1] = B = state[1] + B; + state[2] = C = state[2] + C; + state[3] = D = state[3] + D; + state[4] = E = state[4] + E; + state[5] = F = state[5] + F; + state[6] = G = state[6] + G; + state[7] = H = state[7] + H; #if SHA256_DEBUG - fprintf(stderr, "99: %8x %8x %8x %8x %8x %8x %8x %8x\n", - state[0], state[1], state[2], state[3], - state[4], state[5], state[6], state[7]); + fprintf(stderr, "99: %8x %8x %8x %8x %8x %8x %8x %8x\n", + state[0], state[1], state[2], state[3], + state[4], state[5], state[6], state[7]); #endif + } + return input; } @@ -46,6 +46,7 @@ #include "sha2-internal.h" #include "macros.h" +#include "md-internal.h" #include "nettle-write.h" /* Generated by the shadata program. */ @@ -70,6 +71,12 @@ K[64] = 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL, }; +void +sha256_compress(uint32_t *state, const uint8_t *input) +{ + _nettle_sha256_compress_n(state, K, 1, input); +} + #define COMPRESS(ctx, data) (sha256_compress((ctx)->state, (data))) /* Initialize the SHA values */ @@ -97,7 +104,22 @@ void sha256_update(struct sha256_ctx *ctx, size_t length, const uint8_t *data) { - MD_UPDATE (ctx, length, data, COMPRESS, ctx->count++); + size_t blocks; + if (ctx->index > 0) + { + /* Try to fill partial block */ + MD_FILL_OR_RETURN (ctx, length, data); + sha256_compress (ctx->state, ctx->block); + ctx->count++; + } + + blocks = length >> 6; + data = _nettle_sha256_compress_n (ctx->state, K, blocks, data); + ctx->count += blocks; + length &= 63; + + memcpy (ctx->block, data, length); + ctx->index = length; } static void @@ -161,9 +183,3 @@ sha224_digest(struct sha256_ctx *ctx, sha256_write_digest(ctx, length, digest); sha224_init(ctx); } - -void -sha256_compress(uint32_t *state, const uint8_t *input) -{ - _nettle_sha256_compress(state, input, K); -} diff --git a/siv-gcm-aes128.c b/siv-gcm-aes128.c new file mode 100644 index 00000000..4317d3d8 --- /dev/null +++ b/siv-gcm-aes128.c @@ -0,0 +1,65 @@ +/* siv-gcm-aes128.c + + AES-GCM-SIV, RFC8452 + + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "nettle-meta.h" +#include "siv-gcm.h" + +void +siv_gcm_aes128_encrypt_message (const struct aes128_ctx *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t clength, uint8_t *dst, const uint8_t *src) +{ + struct aes128_ctx ctr_ctx; + siv_gcm_encrypt_message (&nettle_aes128, ctx, &ctr_ctx, + nlength, nonce, + alength, adata, + clength, dst, src); +} + +int +siv_gcm_aes128_decrypt_message (const struct aes128_ctx *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t mlength, uint8_t *dst, const uint8_t *src) +{ + struct aes128_ctx ctr_ctx; + return siv_gcm_decrypt_message (&nettle_aes128, ctx, &ctr_ctx, + nlength, nonce, + alength, adata, + mlength, dst, src); +} diff --git a/siv-gcm-aes256.c b/siv-gcm-aes256.c new file mode 100644 index 00000000..70bf3f35 --- /dev/null +++ b/siv-gcm-aes256.c @@ -0,0 +1,65 @@ +/* siv-gcm-aes256.c + + AES-GCM-SIV, RFC8452 + + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "nettle-meta.h" +#include "siv-gcm.h" + +void +siv_gcm_aes256_encrypt_message (const struct aes256_ctx *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t clength, uint8_t *dst, const uint8_t *src) +{ + struct aes256_ctx ctr_ctx; + siv_gcm_encrypt_message (&nettle_aes256, ctx, &ctr_ctx, + nlength, nonce, + alength, adata, + clength, dst, src); +} + +int +siv_gcm_aes256_decrypt_message (const struct aes256_ctx *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t mlength, uint8_t *dst, const uint8_t *src) +{ + struct aes256_ctx ctr_ctx; + return siv_gcm_decrypt_message (&nettle_aes256, ctx, &ctr_ctx, + nlength, nonce, + alength, adata, + mlength, dst, src); +} diff --git a/siv-gcm.c b/siv-gcm.c new file mode 100644 index 00000000..332a7439 --- /dev/null +++ b/siv-gcm.c @@ -0,0 +1,229 @@ +/* siv-gcm.c + + AES-GCM-SIV, RFC8452 + + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "siv-gcm.h" +#include "ghash-internal.h" +#include "block-internal.h" +#include "nettle-internal.h" +#include "macros.h" +#include "memops.h" +#include "ctr-internal.h" +#include <string.h> + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +static void +siv_gcm_derive_keys (const void *ctx, + nettle_cipher_func *f, + size_t key_size, + size_t nlength, const uint8_t *nonce, + union nettle_block16 *auth_key, + uint8_t *encryption_key) +{ + union nettle_block16 block; + union nettle_block16 out; + size_t i; + + block16_zero (&block); + memcpy (block.b + 4, nonce, MIN(nlength, SIV_GCM_NONCE_SIZE)); + + f (ctx, SIV_GCM_BLOCK_SIZE, out.b, block.b); + auth_key->u64[0] = out.u64[0]; + + block.b[0] = 1; + f (ctx, SIV_GCM_BLOCK_SIZE, out.b, block.b); + auth_key->u64[1] = out.u64[0]; + + assert (key_size % 8 == 0 && key_size / 8 + 2 <= UINT8_MAX); + + for (i = 0; i < key_size; i += 8) + { + block.b[0]++; + f (ctx, SIV_GCM_BLOCK_SIZE, out.b, block.b); + memcpy (encryption_key + i, out.b, 8); + } +} + +static nettle_fill16_func siv_gcm_fill; + +static void +siv_gcm_fill(uint8_t *ctr, size_t blocks, union nettle_block16 *buffer) +{ + uint32_t c; + + c = LE_READ_UINT32(ctr); + + for (; blocks-- > 0; buffer++, c++) + { + memcpy(buffer->b + 4, ctr + 4, SIV_GCM_BLOCK_SIZE - 4); + LE_WRITE_UINT32(buffer->b, c); + } + + LE_WRITE_UINT32(ctr, c); +} + +static void +siv_ghash_pad_update (struct gcm_key *ctx, + union nettle_block16 *state, + size_t length, const uint8_t *data) +{ + size_t blocks; + + blocks = length / SIV_GCM_BLOCK_SIZE; + if (blocks > 0) + { + data = _siv_ghash_update (ctx, state, blocks, data); + length &= 0xf; + } + if (length > 0) + { + uint8_t block[SIV_GCM_BLOCK_SIZE]; + + memset (block + length, 0, SIV_GCM_BLOCK_SIZE - length); + memcpy (block, data, length); + _siv_ghash_update (ctx, state, 1, block); + } +} + +static void +siv_gcm_authenticate (const void *ctx, + const struct nettle_cipher *nc, + const union nettle_block16 *authentication_key, + const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t mlength, const uint8_t *mdata, + uint8_t *tag) +{ + union nettle_block16 state; + struct gcm_key siv_ghash_key; + union nettle_block16 block; + + _siv_ghash_set_key (&siv_ghash_key, authentication_key); + + block16_zero (&state); + siv_ghash_pad_update (&siv_ghash_key, &state, alength, adata); + siv_ghash_pad_update (&siv_ghash_key, &state, mlength, mdata); + + block.u64[0] = bswap64_if_be (alength * 8); + block.u64[1] = bswap64_if_be (mlength * 8); + + _siv_ghash_update (&siv_ghash_key, &state, 1, block.b); + block16_bswap (&state, &state); + + memxor (state.b, nonce, SIV_GCM_NONCE_SIZE); + state.b[15] &= 0x7f; + nc->encrypt (ctx, SIV_GCM_BLOCK_SIZE, tag, state.b); +} + +void +siv_gcm_encrypt_message (const struct nettle_cipher *nc, + const void *ctx, + void *ctr_ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t clength, uint8_t *dst, const uint8_t *src) +{ + union nettle_block16 authentication_key; + TMP_DECL(encryption_key, uint8_t, NETTLE_MAX_CIPHER_KEY_SIZE); + uint8_t ctr[SIV_GCM_DIGEST_SIZE]; + uint8_t *tag = dst + clength - SIV_GCM_BLOCK_SIZE; + + assert (clength >= SIV_GCM_DIGEST_SIZE); + assert (nlength == SIV_GCM_NONCE_SIZE); + + TMP_ALLOC(encryption_key, nc->key_size); + siv_gcm_derive_keys (ctx, nc->encrypt, nc->key_size, nlength, nonce, + &authentication_key, encryption_key); + + /* Calculate authentication tag. */ + nc->set_encrypt_key (ctr_ctx, encryption_key); + + siv_gcm_authenticate (ctr_ctx, nc, + &authentication_key, + nonce, alength, adata, + clength - SIV_GCM_BLOCK_SIZE, src, + tag); + + /* Encrypt the plaintext. */ + + /* The initial counter block is the tag with the most significant + bit of the last byte set to one. */ + memcpy (ctr, tag, SIV_GCM_DIGEST_SIZE); + ctr[15] |= 0x80; + _nettle_ctr_crypt16 (ctr_ctx, nc->encrypt, siv_gcm_fill, ctr, + clength - SIV_GCM_BLOCK_SIZE, dst, src); +} + +int +siv_gcm_decrypt_message (const struct nettle_cipher *nc, + const void *ctx, + void *ctr_ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t mlength, uint8_t *dst, const uint8_t *src) +{ + union nettle_block16 authentication_key; + TMP_DECL(encryption_key, uint8_t, NETTLE_MAX_CIPHER_KEY_SIZE); + union nettle_block16 state; + uint8_t tag[SIV_GCM_DIGEST_SIZE]; + + assert (nlength == SIV_GCM_NONCE_SIZE); + + TMP_ALLOC(encryption_key, nc->key_size); + siv_gcm_derive_keys (ctx, nc->encrypt, nc->key_size, nlength, nonce, + &authentication_key, encryption_key); + + memcpy (state.b, src + mlength, SIV_GCM_DIGEST_SIZE); + /* The initial counter block is the tag with the most significant + bit of the last byte set to one. */ + state.b[15] |= 0x80; + + /* Decrypt the ciphertext. */ + nc->set_encrypt_key (ctr_ctx, encryption_key); + + _nettle_ctr_crypt16 (ctr_ctx, nc->encrypt, siv_gcm_fill, state.b, + mlength, dst, src); + + /* Calculate authentication tag. */ + siv_gcm_authenticate (ctr_ctx, nc, + &authentication_key, + nonce, alength, adata, + mlength, dst, + tag); + + return memeql_sec (tag, src + mlength, SIV_GCM_DIGEST_SIZE); +} diff --git a/siv-gcm.h b/siv-gcm.h new file mode 100644 index 00000000..1a9e3084 --- /dev/null +++ b/siv-gcm.h @@ -0,0 +1,107 @@ +/* siv-gcm.h + + AES-GCM-SIV, RFC8452 + + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#ifndef NETTLE_SIV_GCM_H_INCLUDED +#define NETTLE_SIV_GCM_H_INCLUDED + +#include "nettle-types.h" +#include "nettle-meta.h" +#include "aes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Name mangling */ +#define siv_gcm_encrypt_message nettle_siv_gcm_encrypt_message +#define siv_gcm_decrypt_message nettle_siv_gcm_decrypt_message +#define siv_gcm_aes128_encrypt_message nettle_siv_gcm_aes128_encrypt_message +#define siv_gcm_aes128_decrypt_message nettle_siv_gcm_aes128_decrypt_message +#define siv_gcm_aes256_encrypt_message nettle_siv_gcm_aes256_encrypt_message +#define siv_gcm_aes256_decrypt_message nettle_siv_gcm_aes256_decrypt_message + +/* For AES-GCM-SIV, the block size of the underlying cipher shall be 128 bits. */ +#define SIV_GCM_BLOCK_SIZE 16 +#define SIV_GCM_DIGEST_SIZE 16 +#define SIV_GCM_NONCE_SIZE 12 + +/* Generic interface. NC must be a block cipher with 128-bit block + size, and keysize that is a multiple of 64 bits, such as AES-128 or + AES-256. */ +void +siv_gcm_encrypt_message (const struct nettle_cipher *nc, + const void *ctx, + void *ctr_ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t clength, uint8_t *dst, const uint8_t *src); + +int +siv_gcm_decrypt_message (const struct nettle_cipher *nc, + const void *ctx, + void *ctr_ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t mlength, uint8_t *dst, const uint8_t *src); + +/* AEAD_AES_128_GCM_SIV */ +void +siv_gcm_aes128_encrypt_message (const struct aes128_ctx *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t clength, uint8_t *dst, const uint8_t *src); + +int +siv_gcm_aes128_decrypt_message (const struct aes128_ctx *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t mlength, uint8_t *dst, const uint8_t *src); + +/* AEAD_AES_256_GCM_SIV */ +void +siv_gcm_aes256_encrypt_message (const struct aes256_ctx *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t clength, uint8_t *dst, const uint8_t *src); + +int +siv_gcm_aes256_decrypt_message (const struct aes256_ctx *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t mlength, uint8_t *dst, const uint8_t *src); + +#ifdef __cplusplus +} +#endif + +#endif /* NETTLE_SIV_H_INCLUDED */ diff --git a/siv-ghash-set-key.c b/siv-ghash-set-key.c new file mode 100644 index 00000000..b13d7495 --- /dev/null +++ b/siv-ghash-set-key.c @@ -0,0 +1,52 @@ +/* siv-ghash-set-key.c + + POLYVAL implementation for AES-GCM-SIV, based on GHASH + + Copyright (C) 2011 Katholieke Universiteit Leuven + Copyright (C) 2011, 2013, 2018, 2022 Niels Möller + Copyright (C) 2018, 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ghash-internal.h" +#include "block-internal.h" + +void +_siv_ghash_set_key (struct gcm_key *ctx, const union nettle_block16 *key) +{ + union nettle_block16 h; + + block16_bswap (&h, key); + block16_mulx_ghash (&h, &h); + + _ghash_set_key (ctx, &h); +} diff --git a/siv-ghash-update.c b/siv-ghash-update.c new file mode 100644 index 00000000..21ce5c6e --- /dev/null +++ b/siv-ghash-update.c @@ -0,0 +1,65 @@ +/* siv-ghash-update.c + + POLYVAL implementation for AES-GCM-SIV, based on GHASH + + Copyright (C) 2011 Katholieke Universiteit Leuven + Copyright (C) 2011, 2013, 2018, 2022 Niels Möller + Copyright (C) 2018, 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "ghash-internal.h" +#include "block-internal.h" +#include "macros.h" + +const uint8_t * +_siv_ghash_update (const struct gcm_key *ctx, union nettle_block16 *state, + size_t blocks, const uint8_t *data) +{ + for (; blocks-- > 0; data += GCM_BLOCK_SIZE) + { + union nettle_block16 b; + +#if WORDS_BIGENDIAN + b.u64[1] = LE_READ_UINT64(data); + b.u64[0] = LE_READ_UINT64(data + 8); +#else + b.u64[1] = READ_UINT64(data); + b.u64[0] = READ_UINT64(data + 8); +#endif + + _ghash_update (ctx, state, 1, b.b); + } + + return data; +} + diff --git a/sm4-meta.c b/sm4-meta.c new file mode 100644 index 00000000..d7234984 --- /dev/null +++ b/sm4-meta.c @@ -0,0 +1,49 @@ +/* sm4-meta.c + + Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include "nettle-meta.h" + +#include "sm4.h" + +const struct nettle_cipher nettle_sm4 = { + "sm4", + sizeof(struct sm4_ctx), + SM4_BLOCK_SIZE, + SM4_KEY_SIZE, + (nettle_set_key_func *) sm4_set_encrypt_key, + (nettle_set_key_func *) sm4_set_decrypt_key, + (nettle_cipher_func *) sm4_crypt, + (nettle_cipher_func *) sm4_crypt +}; @@ -0,0 +1,223 @@ +/* sm4.c + + Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#if HAVE_CONFIG_H +# include "config.h" +#endif + +#include <assert.h> +#include <string.h> + +#include "sm4.h" + +#include "macros.h" + + +static const uint32_t fk[4] = +{ + 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc +}; + +static const uint32_t ck[32] = +{ + 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, + 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, + 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, + 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, + 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, + 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, + 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, + 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 +}; + +static const uint8_t sbox[256] = +{ + 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, + 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, + 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, + 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, + 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, + 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, + 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, + 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, + 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, + 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, + 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, + 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, + 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, + 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, + 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, + 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, + 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, + 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, + 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, + 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, + 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, + 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, + 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, + 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, + 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, + 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, + 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, + 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, + 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, + 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, + 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, + 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 +}; + +static uint32_t +sm4_t_non_lin_sub(uint32_t x) +{ + uint32_t out; + + out = (uint32_t)sbox[x & 0xff]; + out |= (uint32_t)sbox[(x >> 8) & 0xff] << 8; + out |= (uint32_t)sbox[(x >> 16) & 0xff] << 16; + out |= (uint32_t)sbox[(x >> 24) & 0xff] << 24; + + return out; +} + +static uint32_t +sm4_key_lin_sub(uint32_t x) +{ + return x ^ ROTL32(13, x) ^ ROTL32(23, x); +} + +static uint32_t +sm4_enc_lin_sub(uint32_t x) +{ + return x ^ ROTL32(2, x) ^ ROTL32(10, x) ^ ROTL32(18, x) ^ ROTL32(24, x); +} + +static uint32_t +sm4_key_sub(uint32_t x) +{ + return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); +} + +static uint32_t +sm4_enc_sub(uint32_t x) +{ + return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); +} + +static uint32_t +sm4_round(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, uint32_t rk) +{ + return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk); +} + +static void +sm4_set_key(struct sm4_ctx *ctx, const uint8_t *key, int encrypt) +{ + uint32_t rk0, rk1, rk2, rk3; + unsigned i; + + rk0 = READ_UINT32(key + 0) ^ fk[0]; + rk1 = READ_UINT32(key + 4) ^ fk[1]; + rk2 = READ_UINT32(key + 8) ^ fk[2]; + rk3 = READ_UINT32(key + 12) ^ fk[3]; + + for (i = 0; i < 32; i += 4) + { + rk0 ^= sm4_key_sub(rk1 ^ rk2 ^ rk3 ^ ck[i + 0]); + rk1 ^= sm4_key_sub(rk2 ^ rk3 ^ rk0 ^ ck[i + 1]); + rk2 ^= sm4_key_sub(rk3 ^ rk0 ^ rk1 ^ ck[i + 2]); + rk3 ^= sm4_key_sub(rk0 ^ rk1 ^ rk2 ^ ck[i + 3]); + + if (encrypt) + { + ctx->rkey[i + 0] = rk0; + ctx->rkey[i + 1] = rk1; + ctx->rkey[i + 2] = rk2; + ctx->rkey[i + 3] = rk3; + } + else + { + ctx->rkey[31 - 0 - i] = rk0; + ctx->rkey[31 - 1 - i] = rk1; + ctx->rkey[31 - 2 - i] = rk2; + ctx->rkey[31 - 3 - i] = rk3; + } + } +} + +void +sm4_set_encrypt_key(struct sm4_ctx *ctx, const uint8_t *key) +{ + sm4_set_key(ctx, key, 1); +} + +void +sm4_set_decrypt_key(struct sm4_ctx *ctx, const uint8_t *key) +{ + sm4_set_key(ctx, key, 0); +} + +void +sm4_crypt(const struct sm4_ctx *context, + size_t length, + uint8_t *dst, + const uint8_t *src) +{ + const uint32_t *rk = context->rkey; + + assert( !(length % SM4_BLOCK_SIZE) ); + + for ( ; length; length -= SM4_BLOCK_SIZE) + { + uint32_t x0, x1, x2, x3; + unsigned i; + + x0 = READ_UINT32(src + 0 * 4); + x1 = READ_UINT32(src + 1 * 4); + x2 = READ_UINT32(src + 2 * 4); + x3 = READ_UINT32(src + 3 * 4); + + for (i = 0; i < 32; i += 4) + { + x0 = sm4_round(x0, x1, x2, x3, rk[i + 0]); + x1 = sm4_round(x1, x2, x3, x0, rk[i + 1]); + x2 = sm4_round(x2, x3, x0, x1, rk[i + 2]); + x3 = sm4_round(x3, x0, x1, x2, rk[i + 3]); + } + + WRITE_UINT32(dst + 0 * 4, x3); + WRITE_UINT32(dst + 1 * 4, x2); + WRITE_UINT32(dst + 2 * 4, x1); + WRITE_UINT32(dst + 3 * 4, x0); + + src += SM4_BLOCK_SIZE; + dst += SM4_BLOCK_SIZE; + } +} @@ -0,0 +1,69 @@ +/* sm4.h + + Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#ifndef NETTLE_SM4_H_INCLUDED +#define NETTLE_SM4_H_INCLUDED + +#include "nettle-types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Name mangling */ +#define sm4_set_encrypt_key nettle_sm4_set_encrypt_key +#define sm4_set_decrypt_key nettle_sm4_set_decrypt_key +#define sm4_crypt nettle_sm4_crypt + +#define SM4_BLOCK_SIZE 16 +#define SM4_KEY_SIZE 16 + +struct sm4_ctx +{ + uint32_t rkey[32]; +}; + +void +sm4_set_encrypt_key(struct sm4_ctx *ctx, const uint8_t *key); + +void +sm4_set_decrypt_key(struct sm4_ctx *ctx, const uint8_t *key); + +void +sm4_crypt(const struct sm4_ctx *context, + size_t length, uint8_t *dst, + const uint8_t *src); + +#ifdef __cplusplus +} +#endif + +#endif /* NETTLE_SM4_H_INCLUDED */ diff --git a/testsuite/.gitignore b/testsuite/.gitignore index ca41472e..8c91d1af 100644 --- a/testsuite/.gitignore +++ b/testsuite/.gitignore @@ -4,6 +4,7 @@ /aes-keywrap-test /arcfour-test /arctwo-test +/balloon-test /base16-test /base64-test /bignum-test @@ -98,6 +99,7 @@ /sha512-256-test /sha512-test /sm3-test +/sm4-test /streebog-test /twofish-test /umac-test @@ -106,6 +108,7 @@ /xts-test /cmac-test /siv-test +/siv-gcm-test /bcrypt-test /ed448-test /shake256-test diff --git a/testsuite/Makefile.in b/testsuite/Makefile.in index 6734d3e6..025ab72d 100644 --- a/testsuite/Makefile.in +++ b/testsuite/Makefile.in @@ -11,7 +11,7 @@ PRE_CPPFLAGS = -I.. -I$(top_srcdir) PRE_LDFLAGS = -L.. TS_NETTLE_SOURCES = aes-test.c aes-keywrap-test.c arcfour-test.c arctwo-test.c \ - blowfish-test.c bcrypt-test.c cast128-test.c \ + balloon-test.c blowfish-test.c bcrypt-test.c cast128-test.c \ base16-test.c base64-test.c \ camellia-test.c chacha-test.c \ cnd-memcpy-test.c \ @@ -24,11 +24,11 @@ TS_NETTLE_SOURCES = aes-test.c aes-keywrap-test.c arcfour-test.c arctwo-test.c \ sha384-test.c sha512-test.c sha512-224-test.c sha512-256-test.c \ sha3-permute-test.c sha3-224-test.c sha3-256-test.c \ sha3-384-test.c sha3-512-test.c \ - shake256-test.c streebog-test.c sm3-test.c \ + shake256-test.c streebog-test.c sm3-test.c sm4-test.c \ serpent-test.c twofish-test.c version-test.c \ knuth-lfib-test.c \ cbc-test.c cfb-test.c ctr-test.c gcm-test.c eax-test.c ccm-test.c \ - cmac-test.c siv-test.c \ + cmac-test.c siv-test.c siv-gcm-test.c \ poly1305-test.c chacha-poly1305-test.c \ hmac-test.c umac-test.c \ meta-hash-test.c meta-cipher-test.c\ @@ -47,8 +47,8 @@ TS_HOGWEED_SOURCES = sexp-test.c sexp-format-test.c \ rsa-compute-root-test.c \ dsa-test.c dsa-keygen-test.c \ curve25519-dh-test.c curve448-dh-test.c \ - ecc-mod-test.c ecc-modinv-test.c ecc-redc-test.c \ - ecc-sqrt-test.c \ + ecc-mod-arith-test.c ecc-mod-test.c ecc-modinv-test.c \ + ecc-redc-test.c ecc-sqrt-test.c \ ecc-dup-test.c ecc-add-test.c \ ecc-mul-g-test.c ecc-mul-a-test.c \ ecdsa-sign-test.c ecdsa-verify-test.c \ diff --git a/testsuite/balloon-test.c b/testsuite/balloon-test.c new file mode 100644 index 00000000..ad63c7a0 --- /dev/null +++ b/testsuite/balloon-test.c @@ -0,0 +1,135 @@ +/* balloon-test.c + + Copyright (C) 2022 Zoltan Fridrich + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#include "testutils.h" +#include "balloon.h" + +static void +test_balloon(const struct nettle_hash *alg, + size_t password_len, const char *password, + size_t salt_len, const char *salt, + unsigned s_cost, unsigned t_cost, + const struct tstring *expected) +{ + void *ctx = xalloc(alg->context_size); + uint8_t *buf = xalloc(balloon_itch(alg->digest_size, s_cost)); + + alg->init(ctx); + balloon(ctx, alg->update, alg->digest, alg->digest_size, + s_cost, t_cost, password_len, (const uint8_t *)password, + salt_len, (const uint8_t *)salt, buf, buf); + + if (!MEMEQ(alg->digest_size, buf, expected->data)) + { + fprintf(stderr, "test_balloon: result doesn't match the expectation:"); + fprintf(stderr, "\nOutput: "); + print_hex(alg->digest_size, buf); + fprintf(stderr, "\nExpected:"); + tstring_print_hex(expected); + fprintf(stderr, "\n"); + FAIL(); + } + + free(ctx); + free(buf); +} + +static void +test_balloon_sha(const struct nettle_hash *alg, + size_t password_len, const char *password, + size_t salt_len, const char *salt, + unsigned s_cost, unsigned t_cost, + const struct tstring *expected) +{ + uint8_t *buf = xalloc(balloon_itch(alg->digest_size, s_cost)); + + if (alg == &nettle_sha1) + balloon_sha1(s_cost, t_cost, password_len, (const uint8_t *)password, + salt_len, (const uint8_t *)salt, buf, buf); + else if (alg == &nettle_sha256) + balloon_sha256(s_cost, t_cost, password_len, (const uint8_t *)password, + salt_len, (const uint8_t *)salt, buf, buf); + else if (alg == &nettle_sha384) + balloon_sha384(s_cost, t_cost, password_len, (const uint8_t *)password, + salt_len, (const uint8_t *)salt, buf, buf); + else if (alg == &nettle_sha512) + balloon_sha512(s_cost, t_cost, password_len, (const uint8_t *)password, + salt_len, (const uint8_t *)salt, buf, buf); + else + { + fprintf(stderr, "test_balloon_sha: bad test\n"); + FAIL(); + } + + if (!MEMEQ(alg->digest_size, buf, expected->data)) + { + fprintf(stderr, "test_balloon_sha: result doesn't match the expectation:"); + fprintf(stderr, "\nOutput: "); + print_hex(alg->digest_size, buf); + fprintf(stderr, "\nExpected:"); + tstring_print_hex(expected); + fprintf(stderr, "\n"); + FAIL(); + } + + free(buf); +} + +/* Test vectors are taken from: + * <https://github.com/nachonavarro/balloon-hashing> + * <https://github.com/RustCrypto/password-hashes/tree/master/balloon-hash> + */ +void +test_main(void) +{ + test_balloon(&nettle_sha256, 8, "hunter42", 11, "examplesalt", 1024, 3, + SHEX("716043dff777b44aa7b88dcbab12c078abecfac9d289c5b5195967aa63440dfb")); + test_balloon(&nettle_sha256, 0, "", 4, "salt", 3, 3, + SHEX("5f02f8206f9cd212485c6bdf85527b698956701ad0852106f94b94ee94577378")); + test_balloon(&nettle_sha256, 8, "password", 0, "", 3, 3, + SHEX("20aa99d7fe3f4df4bd98c655c5480ec98b143107a331fd491deda885c4d6a6cc")); + test_balloon(&nettle_sha256, 1, "", 1, "", 3, 3, + SHEX("4fc7e302ffa29ae0eac31166cee7a552d1d71135f4e0da66486fb68a749b73a4")); + test_balloon(&nettle_sha256, 8, "password", 4, "salt", 1, 1, + SHEX("eefda4a8a75b461fa389c1dcfaf3e9dfacbc26f81f22e6f280d15cc18c417545")); + + test_balloon_sha(&nettle_sha1, 8, "password", 4, "salt", 3, 3, + SHEX("99393c091fdd3136f85864099ec49a439dcacc21")); + test_balloon_sha(&nettle_sha256, 8, "password", 4, "salt", 3, 3, + SHEX("a4df347f5a312e8b2b14c32164f61a81758c807f1bdcda44f4930e2b80ab2154")); + test_balloon_sha(&nettle_sha384, 8, "password", 4, "salt", 3, 3, + SHEX("78da235f7d0f84aba98b50a432fa6c8f7f3ecb7ea0858cfb316c7e5356aae6c8" + "d7e7b3924c54c4ed71a3d0d68cb0ad68")); + test_balloon_sha(&nettle_sha512, 8, "password", 4, "salt", 3, 3, + SHEX("9baf289dfa42990f4b189d96d4ede0f2610ba71fb644169427829d696f6866d8" + "7af41eb68f9e14fd4b1f1a7ce4832f1ed6117c16e8eae753f9e1d054a7c0a7eb")); +} diff --git a/testsuite/ecc-add-test.c b/testsuite/ecc-add-test.c index 6f58a3bb..4793a4bf 100644 --- a/testsuite/ecc-add-test.c +++ b/testsuite/ecc-add-test.c @@ -19,6 +19,24 @@ test_main (void) test_ecc_get_g (i, g); + ecc->dup (ecc, g2, g, scratch); + test_ecc_mul_h (i, 2, g2); + + ecc->add_hhh (ecc, g3, g, g2, scratch); + test_ecc_mul_h (i, 3, g3); + + ecc->add_hhh (ecc, g3, g2, g, scratch); + test_ecc_mul_h (i, 3, g3); + + ecc->add_hhh (ecc, p, g, g3, scratch); + test_ecc_mul_h (i, 4, p); + + ecc->add_hhh (ecc, p, g3, g, scratch); + test_ecc_mul_h (i, 4, p); + + ecc->dup (ecc, p, g2, scratch); + test_ecc_mul_h (i, 4, p); + if (ecc->p.bit_size == 255 || ecc->p.bit_size == 448) { mp_limb_t *z = xalloc_limbs (ecc_size_j (ecc)); @@ -49,24 +67,20 @@ test_main (void) free (z); } + else + { + ASSERT (ecc_nonsec_add_jjj (ecc, g2, g, g, scratch)); + test_ecc_mul_h (i, 2, g2); - ecc->dup (ecc, g2, g, scratch); - test_ecc_mul_h (i, 2, g2); - - ecc->add_hhh (ecc, g3, g, g2, scratch); - test_ecc_mul_h (i, 3, g3); - - ecc->add_hhh (ecc, g3, g2, g, scratch); - test_ecc_mul_h (i, 3, g3); - - ecc->add_hhh (ecc, p, g, g3, scratch); - test_ecc_mul_h (i, 4, p); + ASSERT (ecc_nonsec_add_jjj (ecc, g3, g2, g, scratch)); + test_ecc_mul_h (i, 3, g3); - ecc->add_hhh (ecc, p, g3, g, scratch); - test_ecc_mul_h (i, 4, p); + ASSERT (ecc_nonsec_add_jjj (ecc, p, g, g3, scratch)); + test_ecc_mul_h (i, 4, p); - ecc->dup (ecc, p, g2, scratch); - test_ecc_mul_h (i, 4, p); + ASSERT (ecc_nonsec_add_jjj (ecc, p, g2, g2, scratch)); + test_ecc_mul_h (i, 4, p); + } free (g); free (g2); diff --git a/testsuite/ecc-mod-arith-test.c b/testsuite/ecc-mod-arith-test.c new file mode 100644 index 00000000..14b3bd1c --- /dev/null +++ b/testsuite/ecc-mod-arith-test.c @@ -0,0 +1,160 @@ +#include "testutils.h" + +#define MAX_SIZE (1 + 521 / GMP_NUMB_BITS) +#define COUNT 50000 + +static void +test_add(const char *name, + const struct ecc_modulo *m, + const mpz_t az, const mpz_t bz) +{ + mp_limb_t a[MAX_SIZE]; + mp_limb_t b[MAX_SIZE]; + mp_limb_t t[MAX_SIZE]; + mpz_t mz; + mpz_t tz; + mpz_t ref; + + mpz_init (ref); + mpz_add (ref, az, bz); + mpz_mod (ref, ref, mpz_roinit_n (mz, m->m, m->size)); + + mpz_limbs_copy (a, az, m->size); + mpz_limbs_copy (b, bz, m->size); + ecc_mod_add (m, t, a, b); + + if (!mpz_congruent_p (ref, mpz_roinit_n (tz, t, m->size), mz)) + { + fprintf (stderr, "ecc_mod_add %s failed: bit_size = %u\n", + name, m->bit_size); + + fprintf (stderr, "a = "); + mpn_out_str (stderr, 16, a, m->size); + fprintf (stderr, "\nb = "); + mpn_out_str (stderr, 16, b, m->size); + fprintf (stderr, "\nt = "); + mpn_out_str (stderr, 16, t, m->size); + fprintf (stderr, " (bad)\nref = "); + mpz_out_str (stderr, 16, ref); + fprintf (stderr, "\n"); + abort (); + } + mpz_clear (ref); +} + +static void +test_sub(const char *name, + const struct ecc_modulo *m, + /* If range is non-null, check that 0 <= r < range. */ + const mp_limb_t *range, + const mpz_t az, const mpz_t bz) +{ + mp_limb_t a[MAX_SIZE]; + mp_limb_t b[MAX_SIZE]; + mp_limb_t t[MAX_SIZE]; + mpz_t mz; + mpz_t tz; + mpz_t ref; + + mpz_init (ref); + mpz_sub (ref, az, bz); + mpz_mod (ref, ref, mpz_roinit_n (mz, m->m, m->size)); + + mpz_limbs_copy (a, az, m->size); + mpz_limbs_copy (b, bz, m->size); + ecc_mod_sub (m, t, a, b); + + if (!mpz_congruent_p (ref, mpz_roinit_n (tz, t, m->size), mz)) + { + fprintf (stderr, "ecc_mod_sub %s failed: bit_size = %u\n", + name, m->bit_size); + + fprintf (stderr, "a = "); + mpn_out_str (stderr, 16, a, m->size); + fprintf (stderr, "\nb = "); + mpn_out_str (stderr, 16, b, m->size); + fprintf (stderr, "\nt = "); + mpn_out_str (stderr, 16, t, m->size); + fprintf (stderr, " (bad)\nref = "); + mpz_out_str (stderr, 16, ref); + fprintf (stderr, "\n"); + abort (); + } + + if (range && mpn_cmp (t, range, m->size) >= 0) + { + fprintf (stderr, "ecc_mod_sub %s out of range: bit_size = %u\n", + name, m->bit_size); + + fprintf (stderr, "a = "); + mpn_out_str (stderr, 16, a, m->size); + fprintf (stderr, "\nb = "); + mpn_out_str (stderr, 16, b, m->size); + fprintf (stderr, "\nt = "); + mpn_out_str (stderr, 16, t, m->size); + fprintf (stderr, " \nrange = "); + mpn_out_str (stderr, 16, range, m->size); + fprintf (stderr, "\n"); + abort (); + } + mpz_clear (ref); +} + +static void +test_modulo (gmp_randstate_t rands, const char *name, + const struct ecc_modulo *m, unsigned count) +{ + mpz_t a, b; + unsigned j; + + mpz_init (a); + mpz_init (b); + + for (j = 0; j < count; j++) + { + if (j & 1) + { + mpz_rrandomb (a, rands, m->size * GMP_NUMB_BITS); + mpz_rrandomb (b, rands, m->size * GMP_NUMB_BITS); + } + else + { + mpz_urandomb (a, rands, m->size * GMP_NUMB_BITS); + mpz_urandomb (b, rands, m->size * GMP_NUMB_BITS); + } + test_add (name, m, a, b); + test_sub (name, m, NULL, a, b); + } + if (m->bit_size < m->size * GMP_NUMB_BITS) + { + mp_limb_t two_p[MAX_SIZE]; + mpn_lshift (two_p, m->m, m->size, 1); + mpz_t range; + mpz_roinit_n (range, two_p, m->size); + mpz_urandomm (a, rands, range); + mpz_urandomm (b, rands, range); + test_sub (name, m, two_p, a, b); + } + mpz_clear (a); + mpz_clear (b); +} + +void +test_main (void) +{ + gmp_randstate_t rands; + unsigned count = COUNT; + unsigned i; + + gmp_randinit_default (rands); + + if (test_randomize(rands)) + count *= 20; + + for (i = 0; ecc_curves[i]; i++) + { + test_modulo (rands, "p", &ecc_curves[i]->p, count); + test_modulo (rands, "q", &ecc_curves[i]->q, count); + } + gmp_randclear (rands); +} diff --git a/testsuite/ecdsa-sign-test.c b/testsuite/ecdsa-sign-test.c index c79493ae..b8a100b6 100644 --- a/testsuite/ecdsa-sign-test.c +++ b/testsuite/ecdsa-sign-test.c @@ -77,6 +77,18 @@ test_main (void) "3a41e1423b1853e8aa89747b1f987364" "44705d6d6d8371ea1f578f2e"); /* s */ + /* Produce a signature where verify operation results in a point duplication. */ + test_ecdsa (&_nettle_secp_256r1, + "1", /* Private key */ + "01010101010101010101010101010101" + "01010101010101010101010101010101", /* nonce */ + SHEX("6ff03b949241ce1dadd43519e6960e0a" + "85b41a69a05c328103aa2bce1594ca16"), /* hash */ + "6ff03b949241ce1dadd43519e6960e0a" + "85b41a69a05c328103aa2bce1594ca16", /* r */ + "53f097727a0e0dc284a0daa0da0ab77d" + "5792ae67ed075d1f8d5bda0f853fa093"); /* s */ + /* Test cases for the smaller groups, verified with a proof-of-concept implementation done for Yubico AB. */ test_ecdsa (&_nettle_secp_192r1, diff --git a/testsuite/ecdsa-verify-test.c b/testsuite/ecdsa-verify-test.c index 8110c64d..8d527000 100644 --- a/testsuite/ecdsa-verify-test.c +++ b/testsuite/ecdsa-verify-test.c @@ -109,6 +109,21 @@ test_main (void) "952800792ed19341fdeeec047f2514f3b0f150d6066151fb", /* r */ "ec5971222014878b50d7a19d8954bc871e7e65b00b860ffb"); /* s */ + /* Test case provided by Guido Vranken, from oss-fuzz. Triggers + point duplication in the verify operation by using private key = + 1 (public key = generator) and hash = r. */ + test_ecdsa (&_nettle_secp_256r1, + "6B17D1F2E12C4247F8BCE6E563A440F2" + "77037D812DEB33A0F4A13945D898C296", /* x */ + "4FE342E2FE1A7F9B8EE7EB4A7C0F9E16" + "2BCE33576B315ECECBB6406837BF51F5", /* y */ + SHEX("6ff03b949241ce1dadd43519e6960e0a" + "85b41a69a05c328103aa2bce1594ca16"), /* hash */ + "6ff03b949241ce1dadd43519e6960e0a" + "85b41a69a05c328103aa2bce1594ca16", /* r */ + "53f097727a0e0dc284a0daa0da0ab77d" + "5792ae67ed075d1f8d5bda0f853fa093"); /* s */ + /* From RFC 4754 */ test_ecdsa (&_nettle_secp_256r1, "2442A5CC 0ECD015F A3CA31DC 8E2BBC70" diff --git a/testsuite/gcm-test.c b/testsuite/gcm-test.c index 8955e9b8..d70cdd1e 100644 --- a/testsuite/gcm-test.c +++ b/testsuite/gcm-test.c @@ -577,6 +577,24 @@ test_main(void) "16aedbf5a0de6a57 a637b39b"), /* iv */ SHEX("5791883f822013f8bd136fc36fb9946b")); /* tag */ + /* + * GCM-SM4 Test Vectors from + * https://datatracker.ietf.org/doc/html/rfc8998 + */ + test_aead(&nettle_gcm_sm4, NULL, + SHEX("0123456789ABCDEFFEDCBA9876543210"), + SHEX("FEEDFACEDEADBEEFFEEDFACEDEADBEEFABADDAD2"), + SHEX("AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFF" + "EEEEEEEEEEEEEEEEAAAAAAAAAAAAAAAA"), + SHEX("17F399F08C67D5EE19D0DC9969C4BB7D" + "5FD46FD3756489069157B282BB200735" + "D82710CA5C22F0CCFA7CBF93D496AC15" + "A56834CBCF98C397B4024A2691233B8D"), + SHEX("00001234567800000000ABCD"), + SHEX("83DE3541E4C2B58177E065A9BF7B62EC")); + /* Test gcm_hash, with varying message size, keys and iv all zero. Not compared to any other implementation. */ test_gcm_hash (SDATA("a"), diff --git a/testsuite/meta-aead-test.c b/testsuite/meta-aead-test.c index 1fcede40..ceeca227 100644 --- a/testsuite/meta-aead-test.c +++ b/testsuite/meta-aead-test.c @@ -8,6 +8,7 @@ const char* aeads[] = { "gcm_aes256", "gcm_camellia128", "gcm_camellia256", + "gcm_sm4", "eax_aes128", "chacha_poly1305", }; diff --git a/testsuite/meta-cipher-test.c b/testsuite/meta-cipher-test.c index f949fd76..912fac5a 100644 --- a/testsuite/meta-cipher-test.c +++ b/testsuite/meta-cipher-test.c @@ -1,5 +1,6 @@ #include "testutils.h" #include "nettle-meta.h" +#include "nettle-internal.h" const char* ciphers[] = { "aes128", @@ -18,7 +19,8 @@ const char* ciphers[] = { "serpent256", "twofish128", "twofish192", - "twofish256" + "twofish256", + "sm4" }; void @@ -34,8 +36,11 @@ test_main(void) ASSERT(NULL != nettle_ciphers[j]); /* make sure we found a matching cipher */ } j = 0; - while (NULL != nettle_ciphers[j]) - j++; + for (j = 0; NULL != nettle_ciphers[j]; j++) + { + ASSERT(nettle_ciphers[j]->block_size <= NETTLE_MAX_CIPHER_BLOCK_SIZE); + ASSERT(nettle_ciphers[j]->key_size <= NETTLE_MAX_CIPHER_KEY_SIZE); + } ASSERT(j == count); /* we are not missing testing any ciphers */ } diff --git a/testsuite/meta-hash-test.c b/testsuite/meta-hash-test.c index 3aed43fc..6a15e7db 100644 --- a/testsuite/meta-hash-test.c +++ b/testsuite/meta-hash-test.c @@ -36,6 +36,7 @@ test_main(void) } for (i = 0; NULL != nettle_hashes[i]; i++) { + ASSERT(nettle_hashes[i]->block_size <= NETTLE_MAX_HASH_BLOCK_SIZE); ASSERT(nettle_hashes[i]->digest_size <= NETTLE_MAX_HASH_DIGEST_SIZE); ASSERT(nettle_hashes[i]->context_size <= NETTLE_MAX_HASH_CONTEXT_SIZE); } diff --git a/testsuite/siv-gcm-test.c b/testsuite/siv-gcm-test.c new file mode 100644 index 00000000..eba03f23 --- /dev/null +++ b/testsuite/siv-gcm-test.c @@ -0,0 +1,731 @@ +/* siv-gcm-test.c + + Self-test and vectors for AES-GCM-SIV mode ciphers + + Copyright (C) 2022 Red Hat, Inc. + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +/* The test vectors have been collected from the following standards: + * RFC8452 + */ + +#include "testutils.h" +#include "ghash-internal.h" +#include "block-internal.h" +#include "aes.h" +#include "siv-gcm.h" + +/* AEAD ciphers */ +typedef void +nettle_encrypt_message_func(void *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t clength, uint8_t *dst, const uint8_t *src); + +typedef int +nettle_decrypt_message_func(void *ctx, + size_t nlength, const uint8_t *nonce, + size_t alength, const uint8_t *adata, + size_t mlength, uint8_t *dst, const uint8_t *src); + +static void +test_compare_results (const char *name, + const struct tstring *adata, + /* Expected results. */ + const struct tstring *e_clear, + const struct tstring *e_cipher, + /* Actual results. */ + const void *clear, + const void *cipher) +{ + if (!MEMEQ(e_cipher->length, e_cipher->data, cipher)) + { + fprintf (stderr, "%s: encryption failed\nAdata: ", name); + tstring_print_hex (adata); + fprintf (stderr, "\nInput: "); + tstring_print_hex (e_clear); + fprintf (stderr, "\nOutput: "); + print_hex (e_cipher->length, cipher); + fprintf (stderr, "\nExpected:"); + tstring_print_hex (e_cipher); + fprintf (stderr, "\n"); + FAIL(); + } + if (!MEMEQ(e_clear->length, e_clear->data, clear)) + { + fprintf (stderr, "%s decrypt failed:\nAdata:", name); + tstring_print_hex (adata); + fprintf (stderr, "\nInput: "); + tstring_print_hex (e_cipher); + fprintf (stderr, "\nOutput: "); + print_hex (e_clear->length, clear); + fprintf (stderr, "\nExpected:"); + tstring_print_hex (e_clear); + fprintf (stderr, "\n"); + FAIL(); + } +} /* test_compare_results */ + +static void +test_cipher_siv_gcm (const char *name, + nettle_set_key_func *siv_gcm_set_key, + nettle_encrypt_message_func *siv_gcm_encrypt, + nettle_decrypt_message_func *siv_gcm_decrypt, + size_t context_size, size_t key_size, + const struct tstring *key, + const struct tstring *nonce, + const struct tstring *authdata, + const struct tstring *cleartext, + const struct tstring *ciphertext) +{ + void *ctx = xalloc (context_size); + uint8_t *en_data; + uint8_t *de_data; + int ret; + + ASSERT (key->length == key_size); + ASSERT (cleartext->length + SIV_GCM_DIGEST_SIZE == ciphertext->length); + + de_data = xalloc (cleartext->length); + en_data = xalloc (ciphertext->length); + + /* Ensure we get the same answers using the all-in-one API. */ + memset (de_data, 0, cleartext->length); + memset (en_data, 0, ciphertext->length); + + siv_gcm_set_key (ctx, key->data); + siv_gcm_encrypt (ctx, nonce->length, nonce->data, + authdata->length, authdata->data, + ciphertext->length, en_data, cleartext->data); + ret = siv_gcm_decrypt (ctx, nonce->length, nonce->data, + authdata->length, authdata->data, + cleartext->length, de_data, ciphertext->data); + + if (ret != 1) + { + fprintf (stderr, "siv_gcm_decrypt_message failed to validate message\n"); + FAIL(); + } + test_compare_results (name, authdata, + cleartext, ciphertext, de_data, en_data); + + /* Ensure that we can detect corrupted message or tag data. */ + en_data[0] ^= 1; + ret = siv_gcm_decrypt (ctx, nonce->length, nonce->data, + authdata->length, authdata->data, + cleartext->length, de_data, en_data); + if (ret != 0) + { + fprintf (stderr, "siv_gcm_decrypt_message failed to detect corrupted message\n"); + FAIL(); + } + + /* Ensure we can detect corrupted adata. */ + if (authdata->length) + { + en_data[0] ^= 1; + ret = siv_gcm_decrypt (ctx, nonce->length, nonce->data, + authdata->length-1, authdata->data, + cleartext->length, de_data, en_data); + if (ret != 0) + { + fprintf (stderr, "siv_decrypt_message failed to detect corrupted message\n"); + FAIL(); + } + } + + free (ctx); + free (en_data); + free (de_data); +} + +#define test_siv_gcm_aes128(name, key, nonce, authdata, cleartext, ciphertext) \ + test_cipher_siv_gcm(name, (nettle_set_key_func*)aes128_set_encrypt_key, \ + (nettle_encrypt_message_func*)siv_gcm_aes128_encrypt_message, \ + (nettle_decrypt_message_func*)siv_gcm_aes128_decrypt_message, \ + sizeof(struct aes128_ctx), AES128_KEY_SIZE, \ + key, nonce, authdata, cleartext, ciphertext) + +#define test_siv_gcm_aes256(name, key, nonce, authdata, cleartext, ciphertext) \ + test_cipher_siv_gcm(name, (nettle_set_key_func*)aes256_set_encrypt_key, \ + (nettle_encrypt_message_func*)siv_gcm_aes256_encrypt_message, \ + (nettle_decrypt_message_func*)siv_gcm_aes256_decrypt_message, \ + sizeof(struct aes256_ctx), AES256_KEY_SIZE, \ + key, nonce, authdata, cleartext, ciphertext) + +static void +test_polyval_internal (const struct tstring *key, + const struct tstring *message, + const struct tstring *digest) +{ + ASSERT (key->length == GCM_BLOCK_SIZE); + ASSERT (message->length % GCM_BLOCK_SIZE == 0); + ASSERT (digest->length == GCM_BLOCK_SIZE); + struct gcm_key gcm_key; + union nettle_block16 state; + + memcpy (state.b, key->data, GCM_BLOCK_SIZE); + _siv_ghash_set_key (&gcm_key, &state); + + block16_zero (&state); + _siv_ghash_update (&gcm_key, &state, message->length / GCM_BLOCK_SIZE, message->data); + block16_bswap (&state, &state); + + if (!MEMEQ(GCM_BLOCK_SIZE, state.b, digest->data)) + { + fprintf (stderr, "POLYVAL failed\n"); + fprintf (stderr, "Key: "); + tstring_print_hex (key); + fprintf (stderr, "\nMessage: "); + tstring_print_hex (message); + fprintf (stderr, "\nOutput: "); + print_hex (GCM_BLOCK_SIZE, state.b); + fprintf (stderr, "\nExpected:"); + tstring_print_hex (digest); + fprintf (stderr, "\n"); + FAIL(); + } +} + +void +test_main(void) +{ + /* RFC8452, Appendix A. */ + test_polyval_internal (SHEX("25629347589242761d31f826ba4b757b"), + SHEX("4f4f95668c83dfb6401762bb2d01a262" + "d1a24ddd2721d006bbe45f20d3c9f362"), + SHEX("f7a3b47b846119fae5b7866cf5e5b77e")); + + /* RFC8452, Appendix C.1. */ + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX(""), + SHEX("dc20e2d83f25705bb49e439eca56de25")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV 1", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("0100000000000000"), + SHEX("b5d839330ac7b786578782fff6013b81" + "5b287c22493a364c")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("010000000000000000000000"), + SHEX("7323ea61d05932260047d942a4978db3" + "57391a0bc4fdec8b0d106639")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("01000000000000000000000000000000"), + SHEX("743f7c8077ab25f8624e2e948579cf77" + "303aaf90f6fe21199c6068577437a0c4")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("01000000000000000000000000000000" + "02000000000000000000000000000000"), + SHEX("84e07e62ba83a6585417245d7ec413a9" + "fe427d6315c09b57ce45f2e3936a9445" + "1a8e45dcd4578c667cd86847bf6155ff")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("01000000000000000000000000000000" + "02000000000000000000000000000000" + "03000000000000000000000000000000"), + SHEX("3fd24ce1f5a67b75bf2351f181a475c7" + "b800a5b4d3dcf70106b1eea82fa1d64d" + "f42bf7226122fa92e17a40eeaac1201b" + "5e6e311dbf395d35b0fe39c2714388f8")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("01000000000000000000000000000000" + "02000000000000000000000000000000" + "03000000000000000000000000000000" + "04000000000000000000000000000000"), + SHEX("2433668f1058190f6d43e360f4f35cd8" + "e475127cfca7028ea8ab5c20f7ab2af0" + "2516a2bdcbc08d521be37ff28c152bba" + "36697f25b4cd169c6590d1dd39566d3f" + "8a263dd317aa88d56bdf3936dba75bb8")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("0200000000000000"), + SHEX("1e6daba35669f4273b0a1a2560969cdf" + "790d99759abd1508")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("020000000000000000000000"), + SHEX("296c7889fd99f41917f4462008299c51" + "02745aaa3a0c469fad9e075a")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("02000000000000000000000000000000"), + SHEX("e2b0c5da79a901c1745f700525cb335b" + "8f8936ec039e4e4bb97ebd8c4457441f")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("02000000000000000000000000000000" + "03000000000000000000000000000000"), + SHEX("620048ef3c1e73e57e02bb8562c416a3" + "19e73e4caac8e96a1ecb2933145a1d71" + "e6af6a7f87287da059a71684ed3498e1")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("02000000000000000000000000000000" + "03000000000000000000000000000000" + "04000000000000000000000000000000"), + SHEX("50c8303ea93925d64090d07bd109dfd9" + "515a5a33431019c17d93465999a8b005" + "3201d723120a8562b838cdff25bf9d1e" + "6a8cc3865f76897c2e4b245cf31c51f2")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("02000000000000000000000000000000" + "03000000000000000000000000000000" + "04000000000000000000000000000000" + "05000000000000000000000000000000"), + SHEX("2f5c64059db55ee0fb847ed513003746" + "aca4e61c711b5de2e7a77ffd02da42fe" + "ec601910d3467bb8b36ebbaebce5fba3" + "0d36c95f48a3e7980f0e7ac299332a80" + "cdc46ae475563de037001ef84ae21744")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("010000000000000000000000"), + SHEX("02000000"), + SHEX("a8fe3e8707eb1f84fb28f8cb73de8e99" + "e2f48a14")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01000000000000000000000000000000" + "0200"), + SHEX("03000000000000000000000000000000" + "04000000"), + SHEX("6bb0fecf5ded9b77f902c7d5da236a43" + "91dd029724afc9805e976f451e6d87f6" + "fe106514")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("01000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01000000000000000000000000000000" + "02000000"), + SHEX("03000000000000000000000000000000" + "0400"), + SHEX("44d0aaf6fb2f1f34add5e8064e83e12a" + "2adabff9b2ef00fb47920cc72a0c0f13" + "b9fd")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("e66021d5eb8e4f4066d4adb9c33560e4"), + SHEX("f46e44bb3da0015c94f70887"), + SHEX(""), + SHEX(""), + SHEX("a4194b79071b01a87d65f706e3949578")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("36864200e0eaf5284d884a0e77d31646"), + SHEX("bae8e37fc83441b16034566b"), + SHEX("46bb91c3c5"), + SHEX("7a806c"), + SHEX("af60eb711bd85bc1e4d3e0a462e074ee" + "a428a8")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("aedb64a6c590bc84d1a5e269e4b47801"), + SHEX("afc0577e34699b9e671fdd4f"), + SHEX("fc880c94a95198874296"), + SHEX("bdc66f146545"), + SHEX("bb93a3e34d3cd6a9c45545cfc11f03ad" + "743dba20f966")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("d5cc1fd161320b6920ce07787f86743b"), + SHEX("275d1ab32f6d1f0434d8848c"), + SHEX("046787f3ea22c127aaf195d1894728"), + SHEX("1177441f195495860f"), + SHEX("4f37281f7ad12949d01d02fd0cd174c8" + "4fc5dae2f60f52fd2b")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("b3fed1473c528b8426a582995929a149"), + SHEX("9e9ad8780c8d63d0ab4149c0"), + SHEX("c9882e5386fd9f92ec489c8fde2be2cf" + "97e74e93"), + SHEX("9f572c614b4745914474e7c7"), + SHEX("f54673c5ddf710c745641c8bc1dc2f87" + "1fb7561da1286e655e24b7b0")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("2d4ed87da44102952ef94b02b805249b"), + SHEX("ac80e6f61455bfac8308a2d4"), + SHEX("2950a70d5a1db2316fd568378da107b5" + "2b0da55210cc1c1b0a"), + SHEX("0d8c8451178082355c9e940fea2f58"), + SHEX("c9ff545e07b88a015f05b274540aa183" + "b3449b9f39552de99dc214a1190b0b")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("bde3b2f204d1e9f8b06bc47f9745b3d1"), + SHEX("ae06556fb6aa7890bebc18fe"), + SHEX("1860f762ebfbd08284e421702de0de18" + "baa9c9596291b08466f37de21c7f"), + SHEX("6b3db4da3d57aa94842b9803a96e07fb" + "6de7"), + SHEX("6298b296e24e8cc35dce0bed484b7f30" + "d5803e377094f04709f64d7b985310a4" + "db84")); + + test_siv_gcm_aes128 ("AEAD_AES_128_GCM_SIV", + SHEX("f901cfe8a69615a93fdf7a98cad48179"), + SHEX("6245709fb18853f68d833640"), + SHEX("7576f7028ec6eb5ea7e298342a94d4b2" + "02b370ef9768ec6561c4fe6b7e7296fa" + "859c21"), + SHEX("e42a3c02c25b64869e146d7b233987bd" + "dfc240871d"), + SHEX("391cc328d484a4f46406181bcd62efd9" + "b3ee197d052d15506c84a9edd65e13e9" + "d24a2a6e70")); + + /* RFC8452, Appendix C.2. */ + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX(""), + SHEX("07f5f4169bbf55a8400cd47ea6fd400f")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("0100000000000000"), + SHEX("c2ef328e5c71c83b843122130f7364b7" + "61e0b97427e3df28")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("010000000000000000000000"), + SHEX("9aab2aeb3faa0a34aea8e2b18ca50da9" + "ae6559e48fd10f6e5c9ca17e")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("01000000000000000000000000000000"), + SHEX("85a01b63025ba19b7fd3ddfc033b3e76" + "c9eac6fa700942702e90862383c6c366")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("01000000000000000000000000000000" + "02000000000000000000000000000000"), + SHEX("4a6a9db4c8c6549201b9edb53006cba8" + "21ec9cf850948a7c86c68ac7539d027f" + "e819e63abcd020b006a976397632eb5d")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("01000000000000000000000000000000" + "02000000000000000000000000000000" + "03000000000000000000000000000000"), + SHEX("c00d121893a9fa603f48ccc1ca3c57ce" + "7499245ea0046db16c53c7c66fe717e3" + "9cf6c748837b61f6ee3adcee17534ed5" + "790bc96880a99ba804bd12c0e6a22cc4")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX(""), + SHEX("01000000000000000000000000000000" + "02000000000000000000000000000000" + "03000000000000000000000000000000" + "04000000000000000000000000000000"), + SHEX("c2d5160a1f8683834910acdafc41fbb1" + "632d4a353e8b905ec9a5499ac34f96c7" + "e1049eb080883891a4db8caaa1f99dd0" + "04d80487540735234e3744512c6f90ce" + "112864c269fc0d9d88c61fa47e39aa08")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("0200000000000000"), + SHEX("1de22967237a813291213f267e3b452f" + "02d01ae33e4ec854")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("020000000000000000000000"), + SHEX("163d6f9cc1b346cd453a2e4cc1a4a19a" + "e800941ccdc57cc8413c277f")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("02000000000000000000000000000000"), + SHEX("c91545823cc24f17dbb0e9e807d5ec17" + "b292d28ff61189e8e49f3875ef91aff7")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("02000000000000000000000000000000" + "03000000000000000000000000000000"), + SHEX("07dad364bfc2b9da89116d7bef6daaaf" + "6f255510aa654f920ac81b94e8bad365" + "aea1bad12702e1965604374aab96dbbc")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("02000000000000000000000000000000" + "03000000000000000000000000000000" + "04000000000000000000000000000000"), + SHEX("c67a1f0f567a5198aa1fcc8e3f213143" + "36f7f51ca8b1af61feac35a86416fa47" + "fbca3b5f749cdf564527f2314f42fe25" + "03332742b228c647173616cfd44c54eb")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01"), + SHEX("02000000000000000000000000000000" + "03000000000000000000000000000000" + "04000000000000000000000000000000" + "05000000000000000000000000000000"), + SHEX("67fd45e126bfb9a79930c43aad2d3696" + "7d3f0e4d217c1e551f59727870beefc9" + "8cb933a8fce9de887b1e40799988db1f" + "c3f91880ed405b2dd298318858467c89" + "5bde0285037c5de81e5b570a049b62a0")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("010000000000000000000000"), + SHEX("02000000"), + SHEX("22b3f4cd1835e517741dfddccfa07fa4" + "661b74cf")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01000000000000000000000000000000" + "0200"), + SHEX("03000000000000000000000000000000" + "04000000"), + SHEX("43dd0163cdb48f9fe3212bf61b201976" + "067f342bb879ad976d8242acc188ab59" + "cabfe307")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("01000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("030000000000000000000000"), + SHEX("01000000000000000000000000000000" + "02000000"), + SHEX("03000000000000000000000000000000" + "0400"), + SHEX("462401724b5ce6588d5a54aae5375513" + "a075cfcdf5042112aa29685c912fc205" + "6543")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("e66021d5eb8e4f4066d4adb9c33560e4" + "f46e44bb3da0015c94f7088736864200"), + SHEX("e0eaf5284d884a0e77d31646"), + SHEX(""), + SHEX(""), + SHEX("169fbb2fbf389a995f6390af22228a62")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("bae8e37fc83441b16034566b7a806c46" + "bb91c3c5aedb64a6c590bc84d1a5e269"), + SHEX("e4b47801afc0577e34699b9e"), + SHEX("4fbdc66f14"), + SHEX("671fdd"), + SHEX("0eaccb93da9bb81333aee0c785b240d3" + "19719d")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("6545fc880c94a95198874296d5cc1fd1" + "61320b6920ce07787f86743b275d1ab3"), + SHEX("2f6d1f0434d8848c1177441f"), + SHEX("6787f3ea22c127aaf195"), + SHEX("195495860f04"), + SHEX("a254dad4f3f96b62b84dc40c84636a5e" + "c12020ec8c2c")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("d1894728b3fed1473c528b8426a58299" + "5929a1499e9ad8780c8d63d0ab4149c0"), + SHEX("9f572c614b4745914474e7c7"), + SHEX("489c8fde2be2cf97e74e932d4ed87d"), + SHEX("c9882e5386fd9f92ec"), + SHEX("0df9e308678244c44bc0fd3dc6628dfe" + "55ebb0b9fb2295c8c2")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("a44102952ef94b02b805249bac80e6f6" + "1455bfac8308a2d40d8c845117808235"), + SHEX("5c9e940fea2f582950a70d5a"), + SHEX("0da55210cc1c1b0abde3b2f204d1e9f8" + "b06bc47f"), + SHEX("1db2316fd568378da107b52b"), + SHEX("8dbeb9f7255bf5769dd56692404099c2" + "587f64979f21826706d497d5")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("9745b3d1ae06556fb6aa7890bebc18fe" + "6b3db4da3d57aa94842b9803a96e07fb"), + SHEX("6de71860f762ebfbd08284e4"), + SHEX("f37de21c7ff901cfe8a69615a93fdf7a" + "98cad481796245709f"), + SHEX("21702de0de18baa9c9596291b08466"), + SHEX("793576dfa5c0f88729a7ed3c2f1bffb3" + "080d28f6ebb5d3648ce97bd5ba67fd")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("b18853f68d833640e42a3c02c25b6486" + "9e146d7b233987bddfc240871d7576f7"), + SHEX("028ec6eb5ea7e298342a94d4"), + SHEX("9c2159058b1f0fe91433a5bdc20e214e" + "ab7fecef4454a10ef0657df21ac7"), + SHEX("b202b370ef9768ec6561c4fe6b7e7296" + "fa85"), + SHEX("857e16a64915a787637687db4a951963" + "5cdd454fc2a154fea91f8363a39fec7d" + "0a49")); + + test_siv_gcm_aes256 ("AEAD_AES_256_GCM_SIV", + SHEX("3c535de192eaed3822a2fbbe2ca9dfc8" + "8255e14a661b8aa82cc54236093bbc23"), + SHEX("688089e55540db1872504e1c"), + SHEX("734320ccc9d9bbbb19cb81b2af4ecbc3" + "e72834321f7aa0f70b7282b4f33df23f" + "167541"), + SHEX("ced532ce4159b035277d4dfbb7db6296" + "8b13cd4eec"), + SHEX("626660c26ea6612fb17ad91e8e767639" + "edd6c9faee9d6c7029675b89eaf4ba1d" + "ed1a286594")); + + /* RFC8452, Appendix C.3. */ + test_siv_gcm_aes256 ("Counter wrap", + SHEX("00000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("000000000000000000000000"), + SHEX(""), + SHEX("00000000000000000000000000000000" + "4db923dc793ee6497c76dcc03a98e108"), + SHEX("f3f80f2cf0cb2dd9c5984fcda908456c" + "c537703b5ba70324a6793a7bf218d3ea" + "ffffffff000000000000000000000000")); + + test_siv_gcm_aes256 ("Counter wrap", + SHEX("00000000000000000000000000000000" + "00000000000000000000000000000000"), + SHEX("000000000000000000000000"), + SHEX(""), + SHEX("eb3640277c7ffd1303c7a542d02d3e4c" + "0000000000000000"), + SHEX("18ce4f0b8cb4d0cac65fea8f79257b20" + "888e53e72299e56dffffffff00000000" + "0000000000000000")); +} diff --git a/testsuite/sm4-test.c b/testsuite/sm4-test.c new file mode 100644 index 00000000..97d9d58a --- /dev/null +++ b/testsuite/sm4-test.c @@ -0,0 +1,19 @@ +#include "testutils.h" +#include "sm4.h" + +void +test_main(void) +{ + /* test vectors from: + * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html + */ + test_cipher(&nettle_sm4, + SHEX("0123456789ABCDEF FEDCBA9876543210"), + SHEX("0123456789ABCDEF FEDCBA9876543210"), + SHEX("681EDF34D206965E 86B3E94F536E4246")); + + test_cipher(&nettle_sm4, + SHEX("FEDCBA9876543210 0123456789ABCDEF"), + SHEX("0001020304050607 08090A0B0C0D0E0F"), + SHEX("F766678F13F01ADE AC1B3EA955ADB594")); +} diff --git a/testsuite/testutils.c b/testsuite/testutils.c index 0d91d8ef..39c6bece 100644 --- a/testsuite/testutils.c +++ b/testsuite/testutils.c @@ -1109,6 +1109,13 @@ mpz_urandomb (mpz_t r, struct knuth_lfib_ctx *ctx, mp_bitcnt_t bits) nettle_mpz_set_str_256_u (r, bytes, buf); free (buf); } +void +mpz_urandomm (mpz_t r, struct knuth_lfib_ctx *ctx, const mpz_t n) +{ + /* Add some extra bits, to make result almost unbiased. */ + mpz_urandomb(r, ctx, mpz_sizeinbase(n, 2) + 30); + mpz_mod(r, r, n); +} #else /* !NETTLE_USE_MINI_GMP */ static void get_random_seed(mpz_t seed) diff --git a/testsuite/testutils.h b/testsuite/testutils.h index 3e239787..00555b3a 100644 --- a/testsuite/testutils.h +++ b/testsuite/testutils.h @@ -164,8 +164,10 @@ typedef struct knuth_lfib_ctx gmp_randstate_t[1]; void gmp_randinit_default (struct knuth_lfib_ctx *ctx); #define gmp_randclear(state) void mpz_urandomb (mpz_t r, struct knuth_lfib_ctx *ctx, mp_bitcnt_t bits); +void mpz_urandomm (mpz_t r, struct knuth_lfib_ctx *ctx, const mpz_t n); /* This is cheating */ #define mpz_rrandomb mpz_urandomb +#define mpz_rrandomm mpz_urandomm static inline int test_randomize (gmp_randstate_t rands UNUSED) { return 0; } #else /* !NETTLE_USE_MINI_GMP */ diff --git a/x86_64/fat/sha256-compress-2.asm b/x86_64/fat/sha256-compress-n-2.asm index 996cf8c5..60f7c8f6 100644 --- a/x86_64/fat/sha256-compress-2.asm +++ b/x86_64/fat/sha256-compress-n-2.asm @@ -1,4 +1,4 @@ -C x86_64/fat/sha256-compress-2.asm +C x86_64/fat/sha256-compress-n-2.asm ifelse(` Copyright (C) 2018 Niels Möller @@ -31,4 +31,4 @@ ifelse(` ') define(`fat_transform', `$1_sha_ni') -include_src(`x86_64/sha_ni/sha256-compress.asm') +include_src(`x86_64/sha_ni/sha256-compress-n.asm') diff --git a/x86_64/fat/sha256-compress.asm b/x86_64/fat/sha256-compress-n.asm index 2aaeb5e8..fc358858 100644 --- a/x86_64/fat/sha256-compress.asm +++ b/x86_64/fat/sha256-compress-n.asm @@ -1,4 +1,4 @@ -C x86_64/fat/sha256-compress.asm +C x86_64/fat/sha256-compress-n.asm ifelse(` Copyright (C) 2018 Niels Möller @@ -31,4 +31,4 @@ ifelse(` ') define(`fat_transform', `$1_x86_64') -include_src(`x86_64/sha256-compress.asm') +include_src(`x86_64/sha256-compress-n.asm') diff --git a/x86_64/poly1305-blocks.asm b/x86_64/poly1305-blocks.asm new file mode 100644 index 00000000..63bfed3e --- /dev/null +++ b/x86_64/poly1305-blocks.asm @@ -0,0 +1,128 @@ +C x86_64/poly1305-blocks.asm + +ifelse(` + Copyright (C) 2022 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + + .file "poly1305-blocks.asm" + +define(`CTX', `%rdi') C First argument to all functions +define(`BLOCKS', `%rsi') +define(`MP_PARAM', `%rdx') C Moved to MP, to not collide with mul instruction. + +define(`MP', `%r8') C May clobber, both with unix and windows conventions. +define(`T0', `%rbx') +define(`T1', `%rcx') +define(`H0', `%rbp') +define(`H1', `%r9') +define(`H2', `%r10') +define(`F0', `%r11') +define(`F1', `%r12') + +C const uint8_t * +C _nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m) + +PROLOGUE(_nettle_poly1305_blocks) + W64_ENTRY(3, 0) + mov MP_PARAM, MP + test BLOCKS, BLOCKS + jz .Lend + + push %rbx + push %rbp + push %r12 + mov P1305_H0 (CTX), H0 + mov P1305_H1 (CTX), H1 + mov P1305_H2 (CTX), H2 + ALIGN(16) +.Loop: + mov (MP), T0 + mov 8(MP), T1 + add $16, MP + + add H0, T0 + adc H1, T1 + adc $1, H2 + + mov P1305_R1 (CTX), %rax + mul T0 C R1*T0 + mov %rax, F0 + mov %rdx, F1 + + mov T0, %rax C Last use of T0 input + mov P1305_R0 (CTX), T0 + mul T0 C R0*T0 + mov %rax, H0 + mov %rdx, H1 + + mov T1, %rax + mul T0 C R0*T1 + add %rax, F0 + adc %rdx, F1 + + mov P1305_S1 (CTX), T0 + mov T1, %rax C Last use of T1 input + mul T0 C S1*T1 + add %rax, H0 + adc %rdx, H1 + + mov H2, %rax + mul T0 C S1*H2 + add %rax, F0 + adc %rdx, F1 + + mov H2, T0 + and $3, H2 + + shr $2, T0 + mov P1305_S0 (CTX), %rax + mul T0 C S0*(H2 >> 2) + add %rax, H0 + adc %rdx, H1 + + imul P1305_R0 (CTX), H2 C R0*(H2 & 3) + add F0, H1 + adc F1, H2 + + dec BLOCKS + jnz .Loop + + mov H0, P1305_H0 (CTX) + mov H1, P1305_H1 (CTX) + mov H2, P1305_H2 (CTX) + + pop %r12 + pop %rbp + pop %rbx + +.Lend: + mov MP, %rax + W64_EXIT(3, 0) + ret +EPILOGUE(_nettle_poly1305_blocks) diff --git a/x86_64/poly1305-internal.asm b/x86_64/poly1305-internal.asm index ef2f38e4..7ce415a4 100644 --- a/x86_64/poly1305-internal.asm +++ b/x86_64/poly1305-internal.asm @@ -106,7 +106,7 @@ PROLOGUE(_nettle_poly1305_block) adc P1305_H2 (CTX), T2 mov P1305_R1 (CTX), %rax - mul T0 C R1 T0 + mul T0 C R1*T0 mov %rax, F0 mov %rdx, F1 diff --git a/x86_64/sha256-compress.asm b/x86_64/sha256-compress-n.asm index 5ed669b1..e10d260c 100644 --- a/x86_64/sha256-compress.asm +++ b/x86_64/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C x86_64/sha256-compress.asm +C x86_64/sha256-compress-n.asm ifelse(` - Copyright (C) 2013 Niels Möller + Copyright (C) 2013, 2022 Niels Möller This file is part of GNU Nettle. @@ -30,21 +30,24 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') - .file "sha256-compress.asm" + .file "sha256-compress-n.asm" define(`STATE', `%rdi') -define(`INPUT', `%rsi') -define(`K', `%rdx') +define(`K', `%rsi') +define(`BLOCKS', `%rdx') +define(`INPUT', `%rcx') +define(`STATE_SAVED', `64(%rsp)') + define(`SA', `%eax') define(`SB', `%ebx') -define(`SC', `%ecx') +define(`SC', `%ebp') define(`SD', `%r8d') define(`SE', `%r9d') define(`SF', `%r10d') define(`SG', `%r11d') define(`SH', `%r12d') define(`T0', `%r13d') -define(`T1', `%edi') C Overlap STATE -define(`COUNT', `%r14') +define(`T1', `%r14d') +define(`COUNT', `%rdi') C Overlap STATE define(`W', `%r15d') define(`EXPN', ` @@ -123,18 +126,21 @@ define(`NOEXPN', ` movl W, OFFSET($1)(%rsp, COUNT, 4) ') - C void - C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) + C const uint8_t * + C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + C size_t blocks, const uint8_t *input) .text ALIGN(16) -PROLOGUE(_nettle_sha256_compress) +PROLOGUE(_nettle_sha256_compress_n) W64_ENTRY(3, 0) + test BLOCKS, BLOCKS + jz .Lend sub $120, %rsp - mov %rbx, 64(%rsp) - mov STATE, 72(%rsp) C Save state, to free a register + mov STATE, STATE_SAVED C Save state, to free a register + mov %rbx, 72(%rsp) mov %rbp, 80(%rsp) mov %r12, 88(%rsp) mov %r13, 96(%rsp) @@ -149,7 +155,9 @@ PROLOGUE(_nettle_sha256_compress) movl 20(STATE), SF movl 24(STATE), SG movl 28(STATE), SH - xor COUNT, COUNT + +.Loop_block: + xorl XREG(COUNT), XREG(COUNT) ALIGN(16) .Loop1: @@ -161,8 +169,8 @@ PROLOGUE(_nettle_sha256_compress) NOEXPN(5) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,5) NOEXPN(6) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,6) NOEXPN(7) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,7) - add $8, COUNT - cmp $16, COUNT + addl $8, XREG(COUNT) + cmpl $16, XREG(COUNT) jne .Loop1 .Loop2: @@ -182,22 +190,35 @@ PROLOGUE(_nettle_sha256_compress) EXPN(13) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,13) EXPN(14) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,14) EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,15) - add $16, COUNT - cmp $64, COUNT + addl $16, XREG(COUNT) + cmpl $64, XREG(COUNT) jne .Loop2 - mov 72(%rsp), STATE - - addl SA, (STATE) - addl SB, 4(STATE) - addl SC, 8(STATE) - addl SD, 12(STATE) - addl SE, 16(STATE) - addl SF, 20(STATE) - addl SG, 24(STATE) - addl SH, 28(STATE) - - mov 64(%rsp), %rbx + mov STATE_SAVED, STATE + + addl (STATE), SA + addl 4(STATE), SB + addl 8(STATE), SC + addl 12(STATE), SD + addl 16(STATE), SE + addl 20(STATE), SF + addl 24(STATE), SG + addl 28(STATE), SH + + movl SA, (STATE) + movl SB, 4(STATE) + movl SC, 8(STATE) + movl SD, 12(STATE) + movl SE, 16(STATE) + movl SF, 20(STATE) + movl SG, 24(STATE) + movl SH, 28(STATE) + + add $64, INPUT + dec BLOCKS + jnz .Loop_block + + mov 72(%rsp), %rbx mov 80(%rsp), %rbp mov 88(%rsp), %r12 mov 96(%rsp), %r13 @@ -205,6 +226,8 @@ PROLOGUE(_nettle_sha256_compress) mov 112(%rsp),%r15 add $120, %rsp +.Lend: + mov INPUT, %rax W64_EXIT(3, 0) ret -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) diff --git a/x86_64/sha_ni/sha256-compress.asm b/x86_64/sha_ni/sha256-compress-n.asm index 00bd3cd3..005909df 100644 --- a/x86_64/sha_ni/sha256-compress.asm +++ b/x86_64/sha_ni/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C x86_64/sha_ni/sha256-compress.asm +C x86_64/sha_ni/sha256-compress-n.asm ifelse(` - Copyright (C) 2018 Niels Möller + Copyright (C) 2018, 2022 Niels Möller This file is part of GNU Nettle. @@ -30,10 +30,11 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') - .file "sha256-compress.asm" + .file "sha256-compress-n.asm" define(`STATE', `%rdi') -define(`INPUT', `%rsi') -define(`K', `%rdx') +define(`K', `%rsi') +define(`BLOCKS', `%rdx') +define(`INPUT', `%rcx') define(`MSGK',`%xmm0') C Implicit operand of sha256rnds2 define(`MSG0',`%xmm1') @@ -45,7 +46,7 @@ define(`CDGH',`%xmm6') define(`ABEF_ORIG',`%xmm7') define(`CDGH_ORIG', `%xmm8') define(`SWAP_MASK',`%xmm9') -define(`TMP', `%xmm9') C Overlaps SWAP_MASK +define(`TMP', `%xmm10') C QROUND(M0, M1, M2, M3, R) define(`QROUND', ` @@ -69,15 +70,19 @@ define(`TRANSPOSE', ` punpcklqdq $1, $3 ') - C void - C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) + C const uint8_t * + C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + C size_t blocks, const uint8_t *input) .text ALIGN(16) .Lswap_mask: .byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12 -PROLOGUE(_nettle_sha256_compress) - W64_ENTRY(3, 10) +PROLOGUE(_nettle_sha256_compress_n) + W64_ENTRY(4, 11) + test BLOCKS, BLOCKS + jz .Lend + movups (STATE), TMP movups 16(STATE), ABEF @@ -88,12 +93,13 @@ PROLOGUE(_nettle_sha256_compress) movdqa .Lswap_mask(%rip), SWAP_MASK - movdqa ABEF, ABEF_ORIG - movdqa CDGH, CDGH_ORIG - +.Loop: movups (INPUT), MSG0 pshufb SWAP_MASK, MSG0 + movdqa ABEF, ABEF_ORIG + movdqa CDGH, CDGH_ORIG + movdqa (K), MSGK paddd MSG0, MSGK sha256rnds2 ABEF, CDGH C Round 0-1 @@ -163,6 +169,10 @@ PROLOGUE(_nettle_sha256_compress) paddd ABEF_ORIG, ABEF paddd CDGH_ORIG, CDGH + add $64, INPUT + dec BLOCKS + jnz .Loop + TRANSPOSE(ABEF, CDGH, TMP) pshufd $0x1b, CDGH, CDGH @@ -170,6 +180,8 @@ PROLOGUE(_nettle_sha256_compress) movups CDGH, 0(STATE) movups TMP, 16(STATE) - W64_EXIT(3, 10) +.Lend: + mov INPUT, %rax + W64_EXIT(4, 11) ret -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) |