diff options
-rw-r--r-- | cipher/bulkhelp.h | 103 | ||||
-rw-r--r-- | cipher/camellia-glue.c | 78 | ||||
-rw-r--r-- | cipher/serpent.c | 99 | ||||
-rw-r--r-- | cipher/sm4.c | 63 | ||||
-rw-r--r-- | cipher/twofish.c | 37 |
5 files changed, 132 insertions, 248 deletions
diff --git a/cipher/bulkhelp.h b/cipher/bulkhelp.h new file mode 100644 index 00000000..72668d42 --- /dev/null +++ b/cipher/bulkhelp.h @@ -0,0 +1,103 @@ +/* bulkhelp.h - Some bulk processing helpers + * Copyright (C) 2022 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef GCRYPT_BULKHELP_H +#define GCRYPT_BULKHELP_H + + +#include "g10lib.h" +#include "cipher-internal.h" + + +#ifdef __x86_64__ +/* Use u64 to store pointers for x32 support (assembly function assumes + * 64-bit pointers). */ +typedef u64 ocb_L_uintptr_t; +#else +typedef uintptr_t ocb_L_uintptr_t; +#endif + + +static inline ocb_L_uintptr_t * +bulk_ocb_prepare_L_pointers_array_blk32 (gcry_cipher_hd_t c, + ocb_L_uintptr_t Ls[32], u64 blkn) +{ + unsigned int n = 32 - (blkn % 32); + unsigned int i; + + for (i = 0; i < 32; i += 8) + { + Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4]; + Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + return &Ls[(31 + n) % 32]; +} + + +static inline ocb_L_uintptr_t * +bulk_ocb_prepare_L_pointers_array_blk16 (gcry_cipher_hd_t c, + ocb_L_uintptr_t Ls[16], u64 blkn) +{ + unsigned int n = 16 - (blkn % 16); + unsigned int i; + + for (i = 0; i < 16; i += 8) + { + Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + return &Ls[(15 + n) % 16]; +} + + +static inline ocb_L_uintptr_t * +bulk_ocb_prepare_L_pointers_array_blk8 (gcry_cipher_hd_t c, + ocb_L_uintptr_t Ls[8], u64 blkn) +{ + unsigned int n = 8 - (blkn % 8); + + Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + + return &Ls[(7 + n) % 8]; +} + + +#endif /*GCRYPT_BULKHELP_H*/ diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c index 7f009db4..7f6e92d2 100644 --- a/cipher/camellia-glue.c +++ b/cipher/camellia-glue.c @@ -65,6 +65,7 @@ #include "bufhelp.h" #include "cipher-internal.h" #include "cipher-selftest.h" +#include "bulkhelp.h" /* Helper macro to force alignment to 16 bytes. */ #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED @@ -788,9 +789,7 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, { int did_use_aesni_avx2 = 0; u64 Ls[32]; - unsigned int n = 32 - (blkn % 32); u64 *l; - int i; if (nblocks >= 32) { @@ -808,24 +807,7 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, bulk_ocb_fn = encrypt ? _gcry_camellia_gfni_avx2_ocb_enc : _gcry_camellia_gfni_avx2_ocb_dec; #endif - - for (i = 0; i < 32; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4]; - Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(31 + n) % 32]; + l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn); /* Process data in 32 block chunks. */ while (nblocks >= 32) @@ -860,27 +842,11 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, { int did_use_aesni_avx = 0; u64 Ls[16]; - unsigned int n = 16 - (blkn % 16); u64 *l; - int i; if (nblocks >= 16) { - for (i = 0; i < 16; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(15 + n) % 16]; + l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) @@ -947,9 +913,7 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, { int did_use_aesni_avx2 = 0; u64 Ls[32]; - unsigned int n = 32 - (blkn % 32); u64 *l; - int i; if (nblocks >= 32) { @@ -965,23 +929,7 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, bulk_auth_fn = _gcry_camellia_gfni_avx2_ocb_auth; #endif - for (i = 0; i < 32; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4]; - Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(31 + n) % 32]; + l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn); /* Process data in 32 block chunks. */ while (nblocks >= 32) @@ -1016,27 +964,11 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, { int did_use_aesni_avx = 0; u64 Ls[16]; - unsigned int n = 16 - (blkn % 16); u64 *l; - int i; if (nblocks >= 16) { - for (i = 0; i < 16; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(15 + n) % 16]; + l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) diff --git a/cipher/serpent.c b/cipher/serpent.c index 159d889f..dfe5cc28 100644 --- a/cipher/serpent.c +++ b/cipher/serpent.c @@ -31,6 +31,7 @@ #include "bufhelp.h" #include "cipher-internal.h" #include "cipher-selftest.h" +#include "bulkhelp.h" /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */ @@ -1272,27 +1273,11 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, { int did_use_avx2 = 0; u64 Ls[16]; - unsigned int n = 16 - (blkn % 16); u64 *l; - int i; if (nblocks >= 16) { - for (i = 0; i < 16; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(15 + n) % 16]; + l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) @@ -1329,21 +1314,11 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, { int did_use_sse2 = 0; u64 Ls[8]; - unsigned int n = 8 - (blkn % 8); u64 *l; if (nblocks >= 8) { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - l = &Ls[(7 + n) % 8]; + l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); /* Process data in 8 block chunks. */ while (nblocks >= 8) @@ -1380,33 +1355,25 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, if (ctx->use_neon) { int did_use_neon = 0; - const void *Ls[8]; - unsigned int n = 8 - (blkn % 8); - const void **l; + uintptr_t Ls[8]; + uintptr_t *l; if (nblocks >= 8) { - Ls[(0 + n) % 8] = c->u_mode.ocb.L[0]; - Ls[(1 + n) % 8] = c->u_mode.ocb.L[1]; - Ls[(2 + n) % 8] = c->u_mode.ocb.L[0]; - Ls[(3 + n) % 8] = c->u_mode.ocb.L[2]; - Ls[(4 + n) % 8] = c->u_mode.ocb.L[0]; - Ls[(5 + n) % 8] = c->u_mode.ocb.L[1]; - Ls[(6 + n) % 8] = c->u_mode.ocb.L[0]; - l = &Ls[(7 + n) % 8]; + l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); /* Process data in 8 block chunks. */ while (nblocks >= 8) { blkn += 8; - *l = ocb_get_l(c, blkn - blkn % 8); + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); if (encrypt) _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, - c->u_ctr.ctr, Ls); + c->u_ctr.ctr, (void **)Ls); else _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, - c->u_ctr.ctr, Ls); + c->u_ctr.ctr, (void **)Ls); nblocks -= 8; outbuf += 8 * sizeof(serpent_block_t); @@ -1456,27 +1423,11 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, { int did_use_avx2 = 0; u64 Ls[16]; - unsigned int n = 16 - (blkn % 16); u64 *l; - int i; if (nblocks >= 16) { - for (i = 0; i < 16; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(15 + n) % 16]; + l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) @@ -1508,21 +1459,11 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, { int did_use_sse2 = 0; u64 Ls[8]; - unsigned int n = 8 - (blkn % 8); u64 *l; if (nblocks >= 8) { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - l = &Ls[(7 + n) % 8]; + l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); /* Process data in 8 block chunks. */ while (nblocks >= 8) @@ -1554,29 +1495,21 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, if (ctx->use_neon) { int did_use_neon = 0; - const void *Ls[8]; - unsigned int n = 8 - (blkn % 8); - const void **l; + uintptr_t Ls[8]; + uintptr_t *l; if (nblocks >= 8) { - Ls[(0 + n) % 8] = c->u_mode.ocb.L[0]; - Ls[(1 + n) % 8] = c->u_mode.ocb.L[1]; - Ls[(2 + n) % 8] = c->u_mode.ocb.L[0]; - Ls[(3 + n) % 8] = c->u_mode.ocb.L[2]; - Ls[(4 + n) % 8] = c->u_mode.ocb.L[0]; - Ls[(5 + n) % 8] = c->u_mode.ocb.L[1]; - Ls[(6 + n) % 8] = c->u_mode.ocb.L[0]; - l = &Ls[(7 + n) % 8]; + l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); /* Process data in 8 block chunks. */ while (nblocks >= 8) { blkn += 8; - *l = ocb_get_l(c, blkn - blkn % 8); + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8); _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, - c->u_mode.ocb.aad_sum, Ls); + c->u_mode.ocb.aad_sum, (void **)Ls); nblocks -= 8; abuf += 8 * sizeof(serpent_block_t); diff --git a/cipher/sm4.c b/cipher/sm4.c index d36d9ceb..0148365c 100644 --- a/cipher/sm4.c +++ b/cipher/sm4.c @@ -30,6 +30,7 @@ #include "bufhelp.h" #include "cipher-internal.h" #include "cipher-selftest.h" +#include "bulkhelp.h" /* Helper macro to force alignment to 64 bytes. */ #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED @@ -1030,27 +1031,11 @@ _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, if (ctx->use_aesni_avx2) { u64 Ls[16]; - unsigned int n = 16 - (blkn % 16); u64 *l; - int i; if (nblocks >= 16) { - for (i = 0; i < 16; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(15 + n) % 16]; + l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) @@ -1077,22 +1062,11 @@ _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, if (ctx->use_aesni_avx) { u64 Ls[8]; - unsigned int n = 8 - (blkn % 8); u64 *l; if (nblocks >= 8) { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(7 + n) % 8]; + l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); /* Process data in 8 block chunks. */ while (nblocks >= 8) @@ -1184,27 +1158,11 @@ _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) if (ctx->use_aesni_avx2) { u64 Ls[16]; - unsigned int n = 16 - (blkn % 16); u64 *l; - int i; if (nblocks >= 16) { - for (i = 0; i < 16; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(15 + n) % 16]; + l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) @@ -1227,22 +1185,11 @@ _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) if (ctx->use_aesni_avx) { u64 Ls[8]; - unsigned int n = 8 - (blkn % 8); u64 *l; if (nblocks >= 8) { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(7 + n) % 8]; + l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn); /* Process data in 8 block chunks. */ while (nblocks >= 8) diff --git a/cipher/twofish.c b/cipher/twofish.c index d19e0790..4ae5d5a6 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -47,6 +47,7 @@ #include "bufhelp.h" #include "cipher-internal.h" #include "cipher-selftest.h" +#include "bulkhelp.h" #define TWOFISH_BLOCKSIZE 16 @@ -1358,27 +1359,11 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, { int did_use_avx2 = 0; u64 Ls[16]; - unsigned int n = 16 - (blkn % 16); u64 *l; - int i; if (nblocks >= 16) { - for (i = 0; i < 16; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(15 + n) % 16]; + l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) @@ -1471,27 +1456,11 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, { int did_use_avx2 = 0; u64 Ls[16]; - unsigned int n = 16 - (blkn % 16); u64 *l; - int i; if (nblocks >= 16) { - for (i = 0; i < 16; i += 8) - { - /* Use u64 to store pointers for x32 support (assembly function - * assumes 64-bit pointers). */ - Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; - Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; - Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; - } - - Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; - l = &Ls[(15 + n) % 16]; + l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn); /* Process data in 16 block chunks. */ while (nblocks >= 16) |