diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-07-27 23:15:24 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-07-31 18:03:36 +0300 |
commit | db5136c4d9331c001bbe91cfe6625ae7738f8575 (patch) | |
tree | 2b8279083095354a63343e46580d5bea93ea2b0a /cipher | |
parent | 2ac6c24aa53024eb415d49f52229e868f72f47f8 (diff) | |
download | libgcrypt-db5136c4d9331c001bbe91cfe6625ae7738f8575.tar.gz |
Simplify AES key schedule implementation
* cipher/rijndael-armv8-ce.c (_gcry_aes_armv8_ce_setkey): New key
schedule with simplified structure and less stack usage.
* cipher/rijndael-internal.h (RIJNDAEL_context_s): Add
'keyschedule32b'.
(keyschenc32b): New.
* cipher/rijndael-ppc-common.h (vec_u32): New.
* cipher/rijndael-ppc.c (vec_bswap32_const): Remove.
(_gcry_aes_sbox4_ppc8): Optimize for less instructions emitted.
(keysched_idx): New.
(_gcry_aes_ppc8_setkey): New key schedule with simplified structure.
* cipher/rijndael-tables.h (rcon): Remove.
* cipher/rijndael.c (sbox4): New.
(do_setkey): New key schedule with simplified structure and less
stack usage.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r-- | cipher/rijndael-armv8-ce.c | 104 | ||||
-rw-r--r-- | cipher/rijndael-internal.h | 12 | ||||
-rw-r--r-- | cipher/rijndael-ppc-common.h | 1 | ||||
-rw-r--r-- | cipher/rijndael-ppc.c | 160 | ||||
-rw-r--r-- | cipher/rijndael-tables.h | 7 | ||||
-rw-r--r-- | cipher/rijndael.c | 118 |
6 files changed, 119 insertions, 283 deletions
diff --git a/cipher/rijndael-armv8-ce.c b/cipher/rijndael-armv8-ce.c index e53c940e..c9c37654 100644 --- a/cipher/rijndael-armv8-ce.c +++ b/cipher/rijndael-armv8-ce.c @@ -1,5 +1,5 @@ /* ARMv8 Crypto Extension AES for Libgcrypt - * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (C) 2016, 2022 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This file is part of Libgcrypt. * @@ -128,103 +128,47 @@ typedef void (*xts_crypt_fn_t) (const void *keysched, unsigned char *outbuf, unsigned char *tweak, size_t nblocks, unsigned int nrounds); + void _gcry_aes_armv8_ce_setkey (RIJNDAEL_context *ctx, const byte *key) { - union - { - PROPERLY_ALIGNED_TYPE dummy; - byte data[MAXKC][4]; - u32 data32[MAXKC]; - } tkk[2]; unsigned int rounds = ctx->rounds; - int KC = rounds - 6; - unsigned int keylen = KC * 4; - unsigned int i, r, t; + unsigned int KC = rounds - 6; + u32 *W_u32 = ctx->keyschenc32b; + unsigned int i, j; + u32 W_prev; byte rcon = 1; - int j; -#define k tkk[0].data -#define k_u32 tkk[0].data32 -#define tk tkk[1].data -#define tk_u32 tkk[1].data32 -#define W (ctx->keyschenc) -#define W_u32 (ctx->keyschenc32) - - for (i = 0; i < keylen; i++) - { - k[i >> 2][i & 3] = key[i]; - } - for (j = KC-1; j >= 0; j--) + for (i = 0; i < KC; i += 2) { - tk_u32[j] = k_u32[j]; - } - r = 0; - t = 0; - /* Copy values into round key array. */ - for (j = 0; (j < KC) && (r < rounds + 1); ) - { - for (; (j < KC) && (t < 4); j++, t++) - { - W_u32[r][t] = le_bswap32(tk_u32[j]); - } - if (t == 4) - { - r++; - t = 0; - } + W_u32[i + 0] = buf_get_le32(key + i * 4 + 0); + W_u32[i + 1] = buf_get_le32(key + i * 4 + 4); } - while (r < rounds + 1) + for (i = KC, j = KC, W_prev = W_u32[KC - 1]; + i < 4 * (rounds + 1); + i += 2, j += 2) { - tk_u32[0] ^= _gcry_aes_sbox4_armv8_ce(rol(tk_u32[KC - 1], 24)) ^ rcon; + u32 temp0 = W_prev; + u32 temp1; - if (KC != 8) + if (j == KC) { - for (j = 1; j < KC; j++) - { - tk_u32[j] ^= tk_u32[j-1]; - } + j = 0; + temp0 = _gcry_aes_sbox4_armv8_ce(rol(temp0, 24)) ^ rcon; + rcon = ((rcon << 1) ^ (-(rcon >> 7) & 0x1b)) & 0xff; } - else + else if (KC == 8 && j == 4) { - for (j = 1; j < KC/2; j++) - { - tk_u32[j] ^= tk_u32[j-1]; - } - - tk_u32[KC/2] ^= _gcry_aes_sbox4_armv8_ce(tk_u32[KC/2 - 1]); - - for (j = KC/2 + 1; j < KC; j++) - { - tk_u32[j] ^= tk_u32[j-1]; - } + temp0 = _gcry_aes_sbox4_armv8_ce(temp0); } - /* Copy values into round key array. */ - for (j = 0; (j < KC) && (r < rounds + 1); ) - { - for (; (j < KC) && (t < 4); j++, t++) - { - W_u32[r][t] = le_bswap32(tk_u32[j]); - } - if (t == 4) - { - r++; - t = 0; - } - } + temp1 = W_u32[i - KC + 0]; - rcon = (rcon << 1) ^ ((rcon >> 7) * 0x1b); + W_u32[i + 0] = temp0 ^ temp1; + W_u32[i + 1] = W_u32[i - KC + 1] ^ temp0 ^ temp1; + W_prev = W_u32[i + 1]; } - -#undef W -#undef tk -#undef k -#undef W_u32 -#undef tk_u32 -#undef k_u32 - wipememory(&tkk, sizeof(tkk)); } /* Make a decryption key from an encryption key. */ diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h index 30604088..52c892fd 100644 --- a/cipher/rijndael-internal.h +++ b/cipher/rijndael-internal.h @@ -160,6 +160,7 @@ typedef struct RIJNDAEL_context_s PROPERLY_ALIGNED_TYPE dummy; byte keyschedule[MAXROUNDS+1][4][4]; u32 keyschedule32[MAXROUNDS+1][4]; + u32 keyschedule32b[(MAXROUNDS+1)*4]; #ifdef USE_PADLOCK /* The key as passed to the padlock engine. It is only used if the padlock engine is used (USE_PADLOCK, below). */ @@ -195,10 +196,11 @@ typedef struct RIJNDAEL_context_s } RIJNDAEL_context ATTR_ALIGNED_16; /* Macros defining alias for the keyschedules. */ -#define keyschenc u1.keyschedule -#define keyschenc32 u1.keyschedule32 -#define keyschdec u2.keyschedule -#define keyschdec32 u2.keyschedule32 -#define padlockkey u1.padlock_key +#define keyschenc u1.keyschedule +#define keyschenc32 u1.keyschedule32 +#define keyschenc32b u1.keyschedule32b +#define keyschdec u2.keyschedule +#define keyschdec32 u2.keyschedule32 +#define padlockkey u1.padlock_key #endif /* G10_RIJNDAEL_INTERNAL_H */ diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h index 3fa9a0b9..e4a90934 100644 --- a/cipher/rijndael-ppc-common.h +++ b/cipher/rijndael-ppc-common.h @@ -30,6 +30,7 @@ typedef vector unsigned char block; +typedef vector unsigned int vec_u32; typedef union { diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index f5c32361..9db1c811 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -1,6 +1,6 @@ /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation * Copyright (C) 2019 Shawn Landden <shawn@git.icu> - * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (C) 2019-2020, 2022 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This file is part of Libgcrypt. * @@ -34,10 +34,7 @@ #include "rijndael-ppc-common.h" -#ifdef WORDS_BIGENDIAN -static const block vec_bswap32_const = - { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; -#else +#ifndef WORDS_BIGENDIAN static const block vec_bswap32_const_neg = { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 }; #endif @@ -107,134 +104,81 @@ asm_store_be_noswap(block vec, unsigned long offset, void *ptr) static ASM_FUNC_ATTR_INLINE u32 _gcry_aes_sbox4_ppc8(u32 fourbytes) { - union - { - PROPERLY_ALIGNED_TYPE dummy; - block data_vec; - u32 data32[4]; - } u; + vec_u32 vec_fourbyte = { fourbytes, fourbytes, fourbytes, fourbytes }; +#ifdef WORDS_BIGENDIAN + return ((vec_u32)vec_sbox_be((block)vec_fourbyte))[1]; +#else + return ((vec_u32)vec_sbox_be((block)vec_fourbyte))[2]; +#endif +} + - u.data32[0] = fourbytes; - u.data_vec = vec_sbox_be(u.data_vec); - return u.data32[0]; +static ASM_FUNC_ATTR_INLINE unsigned int +keysched_idx(unsigned int in) +{ +#ifdef WORDS_BIGENDIAN + return in; +#else + return (in & ~3U) | (3U - (in & 3U)); +#endif } + void _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key) { - const block bige_const = asm_load_be_const(); - union - { - PROPERLY_ALIGNED_TYPE dummy; - byte data[MAXKC][4]; - u32 data32[MAXKC]; - } tkk[2]; + u32 tk_u32[MAXKC]; unsigned int rounds = ctx->rounds; - int KC = rounds - 6; - unsigned int keylen = KC * 4; - u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; - unsigned int i, r, t; + unsigned int KC = rounds - 6; + u32 *W_u32 = ctx->keyschenc32b; + unsigned int i, j; + u32 tk_prev; byte rcon = 1; - int j; -#define k tkk[0].data -#define k_u32 tkk[0].data32 -#define tk tkk[1].data -#define tk_u32 tkk[1].data32 -#define W (ctx->keyschenc) -#define W_u32 (ctx->keyschenc32) - for (i = 0; i < keylen; i++) + for (i = 0; i < KC; i += 2) { - k[i >> 2][i & 3] = key[i]; + unsigned int idx0 = keysched_idx(i + 0); + unsigned int idx1 = keysched_idx(i + 1); + tk_u32[i + 0] = buf_get_le32(key + i * 4 + 0); + tk_u32[i + 1] = buf_get_le32(key + i * 4 + 4); + W_u32[idx0] = _gcry_bswap32(tk_u32[i + 0]); + W_u32[idx1] = _gcry_bswap32(tk_u32[i + 1]); } - for (j = KC-1; j >= 0; j--) - { - tk_u32[j] = k_u32[j]; - } - r = 0; - t = 0; - /* Copy values into round key array. */ - for (j = 0; (j < KC) && (r < rounds + 1); ) + for (i = KC, j = KC, tk_prev = tk_u32[KC - 1]; + i < 4 * (rounds + 1); + i += 2, j += 2) { - for (; (j < KC) && (t < 4); j++, t++) - { - W_u32[r][t] = le_bswap32(tk_u32[j]); - } - if (t == 4) - { - r++; - t = 0; - } - } - while (r < rounds + 1) - { - tk_u32[0] ^= - le_bswap32( - _gcry_aes_sbox4_ppc8(rol(le_bswap32(tk_u32[KC - 1]), 24)) ^ rcon); + unsigned int idx0 = keysched_idx(i + 0); + unsigned int idx1 = keysched_idx(i + 1); + u32 temp0 = tk_prev; + u32 temp1; - if (KC != 8) + if (j == KC) { - for (j = 1; j < KC; j++) - { - tk_u32[j] ^= tk_u32[j-1]; - } + j = 0; + temp0 = _gcry_aes_sbox4_ppc8(rol(temp0, 24)) ^ rcon; + rcon = ((rcon << 1) ^ (-(rcon >> 7) & 0x1b)) & 0xff; } - else + else if (KC == 8 && j == 4) { - for (j = 1; j < KC/2; j++) - { - tk_u32[j] ^= tk_u32[j-1]; - } - - tk_u32[KC/2] ^= - le_bswap32(_gcry_aes_sbox4_ppc8(le_bswap32(tk_u32[KC/2 - 1]))); - - for (j = KC/2 + 1; j < KC; j++) - { - tk_u32[j] ^= tk_u32[j-1]; - } + temp0 = _gcry_aes_sbox4_ppc8(temp0); } - /* Copy values into round key array. */ - for (j = 0; (j < KC) && (r < rounds + 1); ) - { - for (; (j < KC) && (t < 4); j++, t++) - { - W_u32[r][t] = le_bswap32(tk_u32[j]); - } - if (t == 4) - { - r++; - t = 0; - } - } + temp1 = tk_u32[j + 0]; - rcon = (rcon << 1) ^ (-(rcon >> 7) & 0x1b); - } + tk_u32[j + 0] = temp0 ^ temp1; + tk_u32[j + 1] ^= temp0 ^ temp1; + tk_prev = tk_u32[j + 1]; - /* Store in big-endian order. */ - for (r = 0; r <= rounds; r++) - { -#ifndef WORDS_BIGENDIAN - VEC_STORE_BE(ekey, r, ALIGNED_LOAD (ekey, r), bige_const); -#else - block rvec = ALIGNED_LOAD (ekey, r); - ALIGNED_STORE (ekey, r, - vec_perm(rvec, rvec, vec_bswap32_const)); - (void)bige_const; -#endif + W_u32[idx0] = _gcry_bswap32(tk_u32[j + 0]); + W_u32[idx1] = _gcry_bswap32(tk_u32[j + 1]); } -#undef W -#undef tk -#undef k -#undef W_u32 -#undef tk_u32 -#undef k_u32 - wipememory(&tkk, sizeof(tkk)); + wipememory(tk_u32, sizeof(tk_u32)); } + void _gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) { diff --git a/cipher/rijndael-tables.h b/cipher/rijndael-tables.h index b54d9593..e46ce08c 100644 --- a/cipher/rijndael-tables.h +++ b/cipher/rijndael-tables.h @@ -218,10 +218,3 @@ static struct #define decT dec_tables.T #define inv_sbox dec_tables.inv_sbox - -static const u32 rcon[30] = - { - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, - 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, - 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91 - }; diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 7e75ddd2..f3060ea5 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -422,6 +422,17 @@ static void prefetch_dec(void) +static inline u32 +sbox4(u32 inb4) +{ + u32 out; + out = (encT[(inb4 >> 0) & 0xffU] & 0xff00U) >> 8; + out |= (encT[(inb4 >> 8) & 0xffU] & 0xff00U) >> 0; + out |= (encT[(inb4 >> 16) & 0xffU] & 0xff0000U) << 0; + out |= (encT[(inb4 >> 24) & 0xffU] & 0xff0000U) << 8; + return out; +} + /* Perform the key setup. */ static gcry_err_code_t do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, @@ -431,8 +442,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, static const char *selftest_failed = 0; void (*hw_setkey)(RIJNDAEL_context *ctx, const byte *key) = NULL; int rounds; - int i,j, r, t, rconpointer = 0; - int KC; + unsigned int KC; unsigned int hwfeatures; /* The on-the-fly self tests are only run in non-fips mode. In fips @@ -662,101 +672,43 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, } else { - const byte *sbox = ((const byte *)encT) + 1; - union - { - PROPERLY_ALIGNED_TYPE dummy; - byte data[MAXKC][4]; - u32 data32[MAXKC]; - } tkk[2]; -#define k tkk[0].data -#define k_u32 tkk[0].data32 -#define tk tkk[1].data -#define tk_u32 tkk[1].data32 -#define W (ctx->keyschenc) -#define W_u32 (ctx->keyschenc32) + u32 W_prev; + u32 *W_u32 = ctx->keyschenc32b; + byte rcon = 1; + unsigned int i, j; prefetch_enc(); - for (i = 0; i < keylen; i++) + for (i = 0; i < KC; i += 2) { - k[i >> 2][i & 3] = key[i]; + W_u32[i + 0] = buf_get_le32(key + i * 4 + 0); + W_u32[i + 1] = buf_get_le32(key + i * 4 + 4); } - for (j = KC-1; j >= 0; j--) + for (i = KC, j = KC, W_prev = W_u32[KC - 1]; + i < 4 * (rounds + 1); + i += 2, j += 2) { - tk_u32[j] = k_u32[j]; - } - r = 0; - t = 0; - /* Copy values into round key array. */ - for (j = 0; (j < KC) && (r < rounds + 1); ) - { - for (; (j < KC) && (t < 4); j++, t++) - { - W_u32[r][t] = le_bswap32(tk_u32[j]); - } - if (t == 4) - { - r++; - t = 0; - } - } + u32 temp0 = W_prev; + u32 temp1; - while (r < rounds + 1) - { - /* While not enough round key material calculated calculate - new values. */ - tk[0][0] ^= sbox[tk[KC-1][1] * 4]; - tk[0][1] ^= sbox[tk[KC-1][2] * 4]; - tk[0][2] ^= sbox[tk[KC-1][3] * 4]; - tk[0][3] ^= sbox[tk[KC-1][0] * 4]; - tk[0][0] ^= rcon[rconpointer++]; - - if (KC != 8) + if (j == KC) { - for (j = 1; j < KC; j++) - { - tk_u32[j] ^= tk_u32[j-1]; - } + j = 0; + temp0 = sbox4(rol(temp0, 24)) ^ rcon; + rcon = ((rcon << 1) ^ (-(rcon >> 7) & 0x1b)) & 0xff; } - else + else if (KC == 8 && j == 4) { - for (j = 1; j < KC/2; j++) - { - tk_u32[j] ^= tk_u32[j-1]; - } - tk[KC/2][0] ^= sbox[tk[KC/2 - 1][0] * 4]; - tk[KC/2][1] ^= sbox[tk[KC/2 - 1][1] * 4]; - tk[KC/2][2] ^= sbox[tk[KC/2 - 1][2] * 4]; - tk[KC/2][3] ^= sbox[tk[KC/2 - 1][3] * 4]; - for (j = KC/2 + 1; j < KC; j++) - { - tk_u32[j] ^= tk_u32[j-1]; - } + temp0 = sbox4(temp0); } - /* Copy values into round key array. */ - for (j = 0; (j < KC) && (r < rounds + 1); ) - { - for (; (j < KC) && (t < 4); j++, t++) - { - W_u32[r][t] = le_bswap32(tk_u32[j]); - } - if (t == 4) - { - r++; - t = 0; - } - } + temp1 = W_u32[i - KC + 0]; + + W_u32[i + 0] = temp0 ^ temp1; + W_u32[i + 1] = W_u32[i - KC + 1] ^ temp0 ^ temp1; + W_prev = W_u32[i + 1]; } -#undef W -#undef tk -#undef k -#undef W_u32 -#undef tk_u32 -#undef k_u32 - wipememory(&tkk, sizeof(tkk)); } return 0; |