From 05a4cecae0c02d2b4ee1cadd9c08115beae3a94a Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 14 Jul 2016 17:55:28 +0300 Subject: Add ARMv8/AArch32 Crypto Extension implementation of AES * cipher/Makefile.am: Add 'rijndael-armv8-ce.c' and 'rijndael-armv-aarch32-ce.S'. * cipher/rijndael-armv8-aarch32-ce.S: New. * cipher/rijndael-armv8-ce.c: New. * cipher/rijndael-internal.h (USE_ARM_CE): New. (RIJNDAEL_context_s): Add 'use_arm_ce'. * cipher/rijndael.c [USE_ARM_CE] (_gcry_aes_armv8_ce_setkey) (_gcry_aes_armv8_ce_prepare_decryption) (_gcry_aes_armv8_ce_encrypt, _gcry_aes_armv8_ce_decrypt) (_gcry_aes_armv8_ce_cfb_enc, _gcry_aes_armv8_ce_cbc_enc) (_gcry_aes_armv8_ce_ctr_enc, _gcry_aes_armv8_ce_cfb_dec) (_gcry_aes_armv8_ce_cbc_dec, _gcry_aes_armv8_ce_ocb_crypt) (_gcry_aes_armv8_ce_ocb_auth): New. (do_setkey) [USE_ARM_CE]: Add ARM CE/AES HW feature check and key setup for ARM CE. (prepare_decryption, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc) (_gcry_aes_ctr_enc, _gcry_aes_cfb_dec, _gcry_aes_cbc_dec) (_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth) [USE_ARM_CE]: Add ARM CE support. * configure.ac: Add 'rijndael-armv8-ce.lo' and 'rijndael-armv8-aarch32-ce.lo'. -- Improvement vs ARM assembly on Cortex-A53: AES-128 AES-192 AES-256 CBC enc: 14.8x 12.8x 11.4x CBC dec: 21.4x 20.5x 19.4x CFB enc: 16.2x 13.6x 11.6x CFB dec: 21.6x 20.5x 19.4x CTR: 19.1x 18.6x 17.8x OCB enc: 16.0x 16.2x 16.1x OCB dec: 15.6x 15.9x 15.8x OCB auth: 18.3x 18.4x 18.0x Benchmark on Cortex-A53 (1152 Mhz): Before: AES | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 24.42 ns/B 39.06 MiB/s 28.13 c/B ECB dec | 25.07 ns/B 38.05 MiB/s 28.88 c/B CBC enc | 21.05 ns/B 45.30 MiB/s 24.25 c/B CBC dec | 21.16 ns/B 45.07 MiB/s 24.38 c/B CFB enc | 21.05 ns/B 45.31 MiB/s 24.25 c/B CFB dec | 21.38 ns/B 44.61 MiB/s 24.62 c/B OFB enc | 26.15 ns/B 36.47 MiB/s 30.13 c/B OFB dec | 26.15 ns/B 36.47 MiB/s 30.13 c/B CTR enc | 21.17 ns/B 45.06 MiB/s 24.38 c/B CTR dec | 21.16 ns/B 45.06 MiB/s 24.38 c/B CCM enc | 42.32 ns/B 22.53 MiB/s 48.75 c/B CCM dec | 42.32 ns/B 22.53 MiB/s 48.75 c/B CCM auth | 21.17 ns/B 45.06 MiB/s 24.38 c/B GCM enc | 22.08 ns/B 43.19 MiB/s 25.44 c/B GCM dec | 22.08 ns/B 43.18 MiB/s 25.44 c/B GCM auth | 0.923 ns/B 1032.8 MiB/s 1.06 c/B OCB enc | 26.20 ns/B 36.40 MiB/s 30.18 c/B OCB dec | 25.97 ns/B 36.73 MiB/s 29.91 c/B OCB auth | 24.52 ns/B 38.90 MiB/s 28.24 c/B = AES192 | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 27.83 ns/B 34.26 MiB/s 32.06 c/B ECB dec | 28.54 ns/B 33.42 MiB/s 32.88 c/B CBC enc | 24.47 ns/B 38.97 MiB/s 28.19 c/B CBC dec | 25.27 ns/B 37.74 MiB/s 29.11 c/B CFB enc | 25.08 ns/B 38.02 MiB/s 28.89 c/B CFB dec | 25.31 ns/B 37.68 MiB/s 29.16 c/B OFB enc | 29.57 ns/B 32.25 MiB/s 34.06 c/B OFB dec | 29.57 ns/B 32.25 MiB/s 34.06 c/B CTR enc | 25.24 ns/B 37.78 MiB/s 29.08 c/B CTR dec | 25.24 ns/B 37.79 MiB/s 29.08 c/B CCM enc | 49.81 ns/B 19.15 MiB/s 57.38 c/B CCM dec | 49.80 ns/B 19.15 MiB/s 57.37 c/B CCM auth | 24.58 ns/B 38.80 MiB/s 28.32 c/B GCM enc | 26.15 ns/B 36.47 MiB/s 30.13 c/B GCM dec | 26.11 ns/B 36.52 MiB/s 30.08 c/B GCM auth | 0.923 ns/B 1033.0 MiB/s 1.06 c/B OCB enc | 29.59 ns/B 32.23 MiB/s 34.09 c/B OCB dec | 29.42 ns/B 32.42 MiB/s 33.89 c/B OCB auth | 27.92 ns/B 34.16 MiB/s 32.16 c/B = AES256 | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 31.20 ns/B 30.57 MiB/s 35.94 c/B ECB dec | 31.80 ns/B 29.99 MiB/s 36.63 c/B CBC enc | 27.83 ns/B 34.27 MiB/s 32.06 c/B CBC dec | 27.87 ns/B 34.21 MiB/s 32.11 c/B CFB enc | 27.88 ns/B 34.20 MiB/s 32.12 c/B CFB dec | 28.16 ns/B 33.87 MiB/s 32.44 c/B OFB enc | 32.93 ns/B 28.96 MiB/s 37.94 c/B OFB dec | 32.93 ns/B 28.96 MiB/s 37.94 c/B CTR enc | 27.95 ns/B 34.13 MiB/s 32.19 c/B CTR dec | 27.95 ns/B 34.12 MiB/s 32.20 c/B CCM enc | 55.88 ns/B 17.07 MiB/s 64.38 c/B CCM dec | 55.88 ns/B 17.07 MiB/s 64.38 c/B CCM auth | 27.95 ns/B 34.12 MiB/s 32.20 c/B GCM enc | 28.86 ns/B 33.05 MiB/s 33.25 c/B GCM dec | 28.87 ns/B 33.04 MiB/s 33.25 c/B GCM auth | 0.923 ns/B 1033.0 MiB/s 1.06 c/B OCB enc | 32.96 ns/B 28.94 MiB/s 37.97 c/B OCB dec | 32.73 ns/B 29.14 MiB/s 37.70 c/B OCB auth | 31.29 ns/B 30.48 MiB/s 36.04 c/B After: AES | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 5.10 ns/B 187.0 MiB/s 5.88 c/B ECB dec | 5.27 ns/B 181.0 MiB/s 6.07 c/B CBC enc | 1.41 ns/B 675.8 MiB/s 1.63 c/B CBC dec | 0.992 ns/B 961.7 MiB/s 1.14 c/B CFB enc | 1.30 ns/B 732.4 MiB/s 1.50 c/B CFB dec | 0.991 ns/B 962.7 MiB/s 1.14 c/B OFB enc | 7.05 ns/B 135.2 MiB/s 8.13 c/B OFB dec | 7.05 ns/B 135.2 MiB/s 8.13 c/B CTR enc | 1.11 ns/B 856.9 MiB/s 1.28 c/B CTR dec | 1.11 ns/B 857.0 MiB/s 1.28 c/B CCM enc | 2.58 ns/B 369.8 MiB/s 2.97 c/B CCM dec | 2.58 ns/B 369.5 MiB/s 2.97 c/B CCM auth | 1.58 ns/B 605.2 MiB/s 1.82 c/B GCM enc | 2.04 ns/B 467.9 MiB/s 2.35 c/B GCM dec | 2.04 ns/B 466.6 MiB/s 2.35 c/B GCM auth | 0.923 ns/B 1033.0 MiB/s 1.06 c/B OCB enc | 1.64 ns/B 579.8 MiB/s 1.89 c/B OCB dec | 1.66 ns/B 574.5 MiB/s 1.91 c/B OCB auth | 1.33 ns/B 715.5 MiB/s 1.54 c/B = AES192 | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 5.64 ns/B 169.0 MiB/s 6.50 c/B ECB dec | 5.81 ns/B 164.3 MiB/s 6.69 c/B CBC enc | 1.90 ns/B 502.1 MiB/s 2.19 c/B CBC dec | 1.24 ns/B 771.7 MiB/s 1.42 c/B CFB enc | 1.84 ns/B 517.1 MiB/s 2.12 c/B CFB dec | 1.23 ns/B 772.5 MiB/s 1.42 c/B OFB enc | 7.60 ns/B 125.5 MiB/s 8.75 c/B OFB dec | 7.60 ns/B 125.6 MiB/s 8.75 c/B CTR enc | 1.36 ns/B 702.7 MiB/s 1.56 c/B CTR dec | 1.36 ns/B 702.5 MiB/s 1.56 c/B CCM enc | 3.31 ns/B 287.8 MiB/s 3.82 c/B CCM dec | 3.31 ns/B 288.0 MiB/s 3.81 c/B CCM auth | 2.06 ns/B 462.1 MiB/s 2.38 c/B GCM enc | 2.28 ns/B 418.4 MiB/s 2.63 c/B GCM dec | 2.28 ns/B 418.0 MiB/s 2.63 c/B GCM auth | 0.923 ns/B 1032.8 MiB/s 1.06 c/B OCB enc | 1.83 ns/B 520.1 MiB/s 2.11 c/B OCB dec | 1.84 ns/B 517.8 MiB/s 2.12 c/B OCB auth | 1.52 ns/B 626.1 MiB/s 1.75 c/B = AES256 | nanosecs/byte mebibytes/sec cycles/byte ECB enc | 5.86 ns/B 162.7 MiB/s 6.75 c/B ECB dec | 6.02 ns/B 158.3 MiB/s 6.94 c/B CBC enc | 2.44 ns/B 390.5 MiB/s 2.81 c/B CBC dec | 1.45 ns/B 656.4 MiB/s 1.67 c/B CFB enc | 2.39 ns/B 399.5 MiB/s 2.75 c/B CFB dec | 1.45 ns/B 656.8 MiB/s 1.67 c/B OFB enc | 7.81 ns/B 122.1 MiB/s 9.00 c/B OFB dec | 7.81 ns/B 122.1 MiB/s 9.00 c/B CTR enc | 1.57 ns/B 605.8 MiB/s 1.81 c/B CTR dec | 1.57 ns/B 605.9 MiB/s 1.81 c/B CCM enc | 4.07 ns/B 234.3 MiB/s 4.69 c/B CCM dec | 4.07 ns/B 234.1 MiB/s 4.69 c/B CCM auth | 2.61 ns/B 365.7 MiB/s 3.00 c/B GCM enc | 2.50 ns/B 381.9 MiB/s 2.88 c/B GCM dec | 2.49 ns/B 382.3 MiB/s 2.87 c/B GCM auth | 0.926 ns/B 1029.7 MiB/s 1.07 c/B OCB enc | 2.05 ns/B 465.6 MiB/s 2.36 c/B OCB dec | 2.06 ns/B 462.0 MiB/s 2.38 c/B OCB auth | 1.74 ns/B 548.4 MiB/s 2.00 c/B Signed-off-by: Jussi Kivilinna --- cipher/rijndael-armv8-ce.c | 469 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 469 insertions(+) create mode 100644 cipher/rijndael-armv8-ce.c (limited to 'cipher/rijndael-armv8-ce.c') diff --git a/cipher/rijndael-armv8-ce.c b/cipher/rijndael-armv8-ce.c new file mode 100644 index 00000000..bed40665 --- /dev/null +++ b/cipher/rijndael-armv8-ce.c @@ -0,0 +1,469 @@ +/* ARMv8 Crypto Extension AES for Libgcrypt + * Copyright (C) 2016 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + */ + +#include +#include +#include +#include /* for memcmp() */ + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" +#include "rijndael-internal.h" +#include "./cipher-internal.h" + + +#ifdef USE_ARM_CE + + +typedef struct u128_s { u32 a, b, c, d; } u128_t; + +extern u32 _gcry_aes_sbox4_armv8_ce(u32 in4b); +extern void _gcry_aes_invmixcol_armv8_ce(u128_t *dst, const u128_t *src); + +extern unsigned int _gcry_aes_enc_armv8_ce(const void *keysched, byte *dst, + const byte *src, + unsigned int nrounds); +extern unsigned int _gcry_aes_dec_armv8_ce(const void *keysched, byte *dst, + const byte *src, + unsigned int nrounds); + +extern void _gcry_aes_cbc_enc_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + int cbc_mac, unsigned int nrounds); +extern void _gcry_aes_cbc_dec_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + unsigned int nrounds); + +extern void _gcry_aes_cfb_enc_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + unsigned int nrounds); +extern void _gcry_aes_cfb_dec_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + unsigned int nrounds); + +extern void _gcry_aes_ctr_enc_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + unsigned int nrounds); + +extern void _gcry_aes_ocb_enc_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *offset, + unsigned char *checksum, + void **Ls, + size_t nblocks, + unsigned int nrounds); +extern void _gcry_aes_ocb_dec_armv8_ce (const void *keysched, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *offset, + unsigned char *checksum, + void **Ls, + size_t nblocks, + unsigned int nrounds); +extern void _gcry_aes_ocb_auth_armv8_ce (const void *keysched, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + void **Ls, + size_t nblocks, + unsigned int nrounds); + +typedef void (*ocb_crypt_fn_t) (const void *keysched, unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *offset, unsigned char *checksum, + void **Ls, size_t nblocks, + unsigned int nrounds); + +void +_gcry_aes_armv8_ce_setkey (RIJNDAEL_context *ctx, const byte *key) +{ + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte data[MAXKC][4]; + u32 data32[MAXKC]; + } tkk[2]; + unsigned int rounds = ctx->rounds; + int KC = rounds - 6; + unsigned int keylen = KC * 4; + unsigned int i, r, t; + byte rcon = 1; + int j; +#define k tkk[0].data +#define k_u32 tkk[0].data32 +#define tk tkk[1].data +#define tk_u32 tkk[1].data32 +#define W (ctx->keyschenc) +#define W_u32 (ctx->keyschenc32) + + for (i = 0; i < keylen; i++) + { + k[i >> 2][i & 3] = key[i]; + } + + for (j = KC-1; j >= 0; j--) + { + tk_u32[j] = k_u32[j]; + } + r = 0; + t = 0; + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + W_u32[r][t] = le_bswap32(tk_u32[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + + while (r < rounds + 1) + { + tk_u32[0] ^= _gcry_aes_sbox4_armv8_ce(rol(tk_u32[KC - 1], 24)) ^ rcon; + + if (KC != 8) + { + for (j = 1; j < KC; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + } + else + { + for (j = 1; j < KC/2; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + + tk_u32[KC/2] ^= _gcry_aes_sbox4_armv8_ce(tk_u32[KC/2 - 1]); + + for (j = KC/2 + 1; j < KC; j++) + { + tk_u32[j] ^= tk_u32[j-1]; + } + } + + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + W_u32[r][t] = le_bswap32(tk_u32[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + + rcon = (rcon << 1) ^ ((rcon >> 7) * 0x1b); + } + +#undef W +#undef tk +#undef k +#undef W_u32 +#undef tk_u32 +#undef k_u32 + wipememory(&tkk, sizeof(tkk)); +} + +/* Make a decryption key from an encryption key. */ +void +_gcry_aes_armv8_ce_prepare_decryption (RIJNDAEL_context *ctx) +{ + u128_t *ekey = (u128_t *)(void *)ctx->keyschenc; + u128_t *dkey = (u128_t *)(void *)ctx->keyschdec; + int rounds = ctx->rounds; + int rr; + int r; + +#define DO_AESIMC() _gcry_aes_invmixcol_armv8_ce(&dkey[r], &ekey[rr]) + + dkey[0] = ekey[rounds]; + r = 1; + rr = rounds-1; + DO_AESIMC(); r++; rr--; /* round 1 */ + DO_AESIMC(); r++; rr--; /* round 2 */ + DO_AESIMC(); r++; rr--; /* round 3 */ + DO_AESIMC(); r++; rr--; /* round 4 */ + DO_AESIMC(); r++; rr--; /* round 5 */ + DO_AESIMC(); r++; rr--; /* round 6 */ + DO_AESIMC(); r++; rr--; /* round 7 */ + DO_AESIMC(); r++; rr--; /* round 8 */ + DO_AESIMC(); r++; rr--; /* round 9 */ + if (rounds >= 12) + { + if (rounds > 12) + { + DO_AESIMC(); r++; rr--; /* round 10 */ + DO_AESIMC(); r++; rr--; /* round 11 */ + } + + DO_AESIMC(); r++; rr--; /* round 12 / 10 */ + DO_AESIMC(); r++; rr--; /* round 13 / 11 */ + } + + dkey[r] = ekey[0]; + +#undef DO_AESIMC +} + +unsigned int +_gcry_aes_armv8_ce_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst, + const unsigned char *src) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + return _gcry_aes_enc_armv8_ce(keysched, dst, src, nrounds); +} + +unsigned int +_gcry_aes_armv8_ce_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst, + const unsigned char *src) +{ + const void *keysched = ctx->keyschdec32; + unsigned int nrounds = ctx->rounds; + + return _gcry_aes_dec_armv8_ce(keysched, dst, src, nrounds); +} + +void +_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks, int cbc_mac) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_cbc_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, cbc_mac, + nrounds); +} + +void +_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + const void *keysched = ctx->keyschdec32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_cbc_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); +} + +void +_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_cfb_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); +} + +void +_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_cfb_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); +} + +void +_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, + const unsigned char *inbuf, unsigned char *iv, + size_t nblocks) +{ + const void *keysched = ctx->keyschenc32; + unsigned int nrounds = ctx->rounds; + + _gcry_aes_ctr_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds); +} + +void +_gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32; + ocb_crypt_fn_t crypt_fn = encrypt ? _gcry_aes_ocb_enc_armv8_ce + : _gcry_aes_ocb_dec_armv8_ce; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int nrounds = ctx->rounds; + u64 blkn = c->u_mode.ocb.data_nblocks; + u64 blkn_offs = blkn - blkn % 32; + unsigned int n = 32 - blkn % 32; + unsigned char l_tmp[16]; + void *Ls[32]; + void **l; + size_t i; + + c->u_mode.ocb.data_nblocks = blkn + nblocks; + + if (nblocks >= 32) + { + for (i = 0; i < 32; i += 8) + { + Ls[(i + 0 + n) % 32] = (void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 32] = (void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 32] = (void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 32] = (void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 32] = (void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 32] = (void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 32] = (void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 32] = (void *)c->u_mode.ocb.L[3]; + Ls[(15 + n) % 32] = (void *)c->u_mode.ocb.L[4]; + Ls[(23 + n) % 32] = (void *)c->u_mode.ocb.L[3]; + l = &Ls[(31 + n) % 32]; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + /* l_tmp will be used only every 65536-th block. */ + blkn_offs += 32; + *l = (void *)ocb_get_l(c, l_tmp, blkn_offs); + + crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, 32, + nrounds); + + nblocks -= 32; + outbuf += 32 * 16; + inbuf += 32 * 16; + } + + if (nblocks && l < &Ls[nblocks]) + { + *l = (void *)ocb_get_l(c, l_tmp, 32 + blkn_offs); + } + } + else + { + for (i = 0; i < nblocks; i++) + Ls[i] = (void *)ocb_get_l(c, l_tmp, ++blkn); + } + + if (nblocks) + { + crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, Ls, nblocks, + nrounds); + } + + wipememory(&l_tmp, sizeof(l_tmp)); +} + +void +_gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg, + size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + const void *keysched = ctx->keyschenc32; + const unsigned char *abuf = abuf_arg; + unsigned int nrounds = ctx->rounds; + u64 blkn = c->u_mode.ocb.aad_nblocks; + u64 blkn_offs = blkn - blkn % 32; + unsigned int n = 32 - blkn % 32; + unsigned char l_tmp[16]; + void *Ls[32]; + void **l; + size_t i; + + c->u_mode.ocb.aad_nblocks = blkn + nblocks; + + if (nblocks >= 32) + { + for (i = 0; i < 32; i += 8) + { + Ls[(i + 0 + n) % 32] = (void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 32] = (void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 32] = (void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 32] = (void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 32] = (void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 32] = (void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 32] = (void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 32] = (void *)c->u_mode.ocb.L[3]; + Ls[(15 + n) % 32] = (void *)c->u_mode.ocb.L[4]; + Ls[(23 + n) % 32] = (void *)c->u_mode.ocb.L[3]; + l = &Ls[(31 + n) % 32]; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + /* l_tmp will be used only every 65536-th block. */ + blkn_offs += 32; + *l = (void *)ocb_get_l(c, l_tmp, blkn_offs); + + _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls, 32, nrounds); + + nblocks -= 32; + abuf += 32 * 16; + } + + if (nblocks && l < &Ls[nblocks]) + { + *l = (void *)ocb_get_l(c, l_tmp, 32 + blkn_offs); + } + } + else + { + for (i = 0; i < nblocks; i++) + Ls[i] = (void *)ocb_get_l(c, l_tmp, ++blkn); + } + + if (nblocks) + { + _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls, nblocks, nrounds); + } + + wipememory(&l_tmp, sizeof(l_tmp)); +} + +#endif /* USE_ARM_CE */ -- cgit v1.2.1