summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas Boichat <drinkcat@chromium.org>2018-07-05 16:46:19 +0800
committerchrome-bot <chrome-bot@chromium.org>2018-09-20 21:29:57 -0700
commitf0ea1b379d4661c4334a50d1cc68c573b6cf84cd (patch)
treed1773c8d2fa2695fdf45a5dc7970c75d71e1961a
parente4db954045988241642e0d7d8817a43c79c2163f (diff)
downloadchrome-ec-f0ea1b379d4661c4334a50d1cc68c573b6cf84cd.tar.gz
aes-gcm: Baseline implementation from BoringSSL
Copied over from upstream BoringSSL at commit 859679518d3433cdd0dd6cf534bd7bdb2a32dd60 . cp boringssl/crypto/fipsmodule/modes/gcm.c \ third_party/boringssl/common/gcm.c cp crypto/fipsmodule/modes/internal.h \ third_party/boringssl/include/aes-gcm.h => Remove non-GCM definitions perl boringssl/crypto/fipsmodule/modes/asm/ghash-armv4.pl \ > third_party/boringssl/core/cortex-m/ghash.S BRANCH=none BUG=b:111160949 TEST=none Change-Id: I34702ff315c8c44e6f4868243058700aaf026099 Signed-off-by: Nicolas Boichat <drinkcat@chromium.org> Reviewed-on: https://chromium-review.googlesource.com/1141445 Reviewed-by: Adam Langley <agl@chromium.org>
l---------common/aes-gcm.c1
l---------core/cortex-m/ghash.S1
l---------include/aes-gcm.h1
-rw-r--r--third_party/boringssl/common/aes-gcm.c1063
-rw-r--r--third_party/boringssl/core/cortex-m/ghash.S580
-rw-r--r--third_party/boringssl/include/aes-gcm.h217
6 files changed, 1863 insertions, 0 deletions
diff --git a/common/aes-gcm.c b/common/aes-gcm.c
new file mode 120000
index 0000000000..3176d85ff8
--- /dev/null
+++ b/common/aes-gcm.c
@@ -0,0 +1 @@
+../third_party/boringssl/common/aes-gcm.c \ No newline at end of file
diff --git a/core/cortex-m/ghash.S b/core/cortex-m/ghash.S
new file mode 120000
index 0000000000..e9acbf4b25
--- /dev/null
+++ b/core/cortex-m/ghash.S
@@ -0,0 +1 @@
+../../third_party/boringssl/core/cortex-m/ghash.S \ No newline at end of file
diff --git a/include/aes-gcm.h b/include/aes-gcm.h
new file mode 120000
index 0000000000..ba62939792
--- /dev/null
+++ b/include/aes-gcm.h
@@ -0,0 +1 @@
+../third_party/boringssl/include/aes-gcm.h \ No newline at end of file
diff --git a/third_party/boringssl/common/aes-gcm.c b/third_party/boringssl/common/aes-gcm.c
new file mode 100644
index 0000000000..99d0e15e83
--- /dev/null
+++ b/third_party/boringssl/common/aes-gcm.c
@@ -0,0 +1,1063 @@
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <openssl/base.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include <openssl/mem.h>
+#include <openssl/cpu.h>
+
+#include "internal.h"
+#include "../../internal.h"
+
+#if !defined(OPENSSL_NO_ASM) && \
+ (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
+ defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
+ defined(OPENSSL_PPC64LE))
+#define GHASH_ASM
+#endif
+
+#define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
+#define REDUCE1BIT(V) \
+ do { \
+ if (sizeof(size_t) == 8) { \
+ uint64_t T = UINT64_C(0xe100000000000000) & (0 - ((V).lo & 1)); \
+ (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
+ (V).hi = ((V).hi >> 1) ^ T; \
+ } else { \
+ uint32_t T = 0xe1000000U & (0 - (uint32_t)((V).lo & 1)); \
+ (V).lo = ((V).hi << 63) | ((V).lo >> 1); \
+ (V).hi = ((V).hi >> 1) ^ ((uint64_t)T << 32); \
+ } \
+ } while (0)
+
+// kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
+// bits of a |size_t|.
+static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
+
+static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
+ u128 V;
+
+ Htable[0].hi = 0;
+ Htable[0].lo = 0;
+ V.hi = H[0];
+ V.lo = H[1];
+
+ Htable[8] = V;
+ REDUCE1BIT(V);
+ Htable[4] = V;
+ REDUCE1BIT(V);
+ Htable[2] = V;
+ REDUCE1BIT(V);
+ Htable[1] = V;
+ Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
+ V = Htable[4];
+ Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
+ Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
+ Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
+ V = Htable[8];
+ Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
+ Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
+ Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
+ Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
+ Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
+ Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
+ Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
+
+#if defined(GHASH_ASM) && defined(OPENSSL_ARM)
+ for (int j = 0; j < 16; ++j) {
+ V = Htable[j];
+ Htable[j].hi = V.lo;
+ Htable[j].lo = V.hi;
+ }
+#endif
+}
+
+#if !defined(GHASH_ASM) || defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
+static const size_t rem_4bit[16] = {
+ PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
+ PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
+ PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
+ PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
+
+static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
+ u128 Z;
+ int cnt = 15;
+ size_t rem, nlo, nhi;
+
+ nlo = ((const uint8_t *)Xi)[15];
+ nhi = nlo >> 4;
+ nlo &= 0xf;
+
+ Z.hi = Htable[nlo].hi;
+ Z.lo = Htable[nlo].lo;
+
+ while (1) {
+ rem = (size_t)Z.lo & 0xf;
+ Z.lo = (Z.hi << 60) | (Z.lo >> 4);
+ Z.hi = (Z.hi >> 4);
+ if (sizeof(size_t) == 8) {
+ Z.hi ^= rem_4bit[rem];
+ } else {
+ Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
+ }
+
+ Z.hi ^= Htable[nhi].hi;
+ Z.lo ^= Htable[nhi].lo;
+
+ if (--cnt < 0) {
+ break;
+ }
+
+ nlo = ((const uint8_t *)Xi)[cnt];
+ nhi = nlo >> 4;
+ nlo &= 0xf;
+
+ rem = (size_t)Z.lo & 0xf;
+ Z.lo = (Z.hi << 60) | (Z.lo >> 4);
+ Z.hi = (Z.hi >> 4);
+ if (sizeof(size_t) == 8) {
+ Z.hi ^= rem_4bit[rem];
+ } else {
+ Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
+ }
+
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+ }
+
+ Xi[0] = CRYPTO_bswap8(Z.hi);
+ Xi[1] = CRYPTO_bswap8(Z.lo);
+}
+
+// Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
+// details... Compiler-generated code doesn't seem to give any
+// performance improvement, at least not on x86[_64]. It's here
+// mostly as reference and a placeholder for possible future
+// non-trivial optimization[s]...
+static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16],
+ const uint8_t *inp, size_t len) {
+ u128 Z;
+ int cnt;
+ size_t rem, nlo, nhi;
+
+ do {
+ cnt = 15;
+ nlo = ((const uint8_t *)Xi)[15];
+ nlo ^= inp[15];
+ nhi = nlo >> 4;
+ nlo &= 0xf;
+
+ Z.hi = Htable[nlo].hi;
+ Z.lo = Htable[nlo].lo;
+
+ while (1) {
+ rem = (size_t)Z.lo & 0xf;
+ Z.lo = (Z.hi << 60) | (Z.lo >> 4);
+ Z.hi = (Z.hi >> 4);
+ if (sizeof(size_t) == 8) {
+ Z.hi ^= rem_4bit[rem];
+ } else {
+ Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
+ }
+
+ Z.hi ^= Htable[nhi].hi;
+ Z.lo ^= Htable[nhi].lo;
+
+ if (--cnt < 0) {
+ break;
+ }
+
+ nlo = ((const uint8_t *)Xi)[cnt];
+ nlo ^= inp[cnt];
+ nhi = nlo >> 4;
+ nlo &= 0xf;
+
+ rem = (size_t)Z.lo & 0xf;
+ Z.lo = (Z.hi << 60) | (Z.lo >> 4);
+ Z.hi = (Z.hi >> 4);
+ if (sizeof(size_t) == 8) {
+ Z.hi ^= rem_4bit[rem];
+ } else {
+ Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
+ }
+
+ Z.hi ^= Htable[nlo].hi;
+ Z.lo ^= Htable[nlo].lo;
+ }
+
+ Xi[0] = CRYPTO_bswap8(Z.hi);
+ Xi[1] = CRYPTO_bswap8(Z.lo);
+ } while (inp += 16, len -= 16);
+}
+#else // GHASH_ASM
+void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len);
+#endif
+
+#define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->Htable)
+#if defined(GHASH_ASM)
+#define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
+// GHASH_CHUNK is "stride parameter" missioned to mitigate cache
+// trashing effect. In other words idea is to hash data while it's
+// still in L1 cache after encryption pass...
+#define GHASH_CHUNK (3 * 1024)
+#endif
+
+
+#if defined(GHASH_ASM)
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+#define GCM_FUNCREF_4BIT
+void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len);
+
+#if defined(OPENSSL_X86_64)
+#define GHASH_ASM_X86_64
+void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
+ size_t len);
+#define AESNI_GCM
+size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+ const void *key, uint8_t ivec[16], uint64_t *Xi);
+size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
+ const void *key, uint8_t ivec[16], uint64_t *Xi);
+#endif
+
+#if defined(OPENSSL_X86)
+#define GHASH_ASM_X86
+void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len);
+#endif
+
+#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
+#include <openssl/arm_arch.h>
+#if __ARM_ARCH__ >= 7
+#define GHASH_ASM_ARM
+#define GCM_FUNCREF_4BIT
+
+static int pmull_capable(void) {
+ return CRYPTO_is_ARMv8_PMULL_capable();
+}
+
+void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len);
+
+#if defined(OPENSSL_ARM)
+// 32-bit ARM also has support for doing GCM with NEON instructions.
+static int neon_capable(void) {
+ return CRYPTO_is_NEON_capable();
+}
+
+void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len);
+#else
+// AArch64 only has the ARMv8 versions of functions.
+static int neon_capable(void) {
+ return 0;
+}
+static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) {
+ abort();
+}
+static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) {
+ abort();
+}
+static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16],
+ const uint8_t *inp, size_t len) {
+ abort();
+}
+#endif
+
+#endif
+#elif defined(OPENSSL_PPC64LE)
+#define GHASH_ASM_PPC64LE
+#define GCM_FUNCREF_4BIT
+void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len);
+#endif
+#endif
+
+#ifdef GCM_FUNCREF_4BIT
+#undef GCM_MUL
+#define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->Htable)
+#ifdef GHASH
+#undef GHASH
+#define GHASH(ctx, in, len) (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->Htable, in, len)
+#endif
+#endif
+
+void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
+ u128 *out_key, u128 out_table[16],
+ int *out_is_avx,
+ const uint8_t *gcm_key) {
+ *out_is_avx = 0;
+
+ union {
+ uint64_t u[2];
+ uint8_t c[16];
+ } H;
+
+ OPENSSL_memcpy(H.c, gcm_key, 16);
+
+ // H is stored in host byte order
+ H.u[0] = CRYPTO_bswap8(H.u[0]);
+ H.u[1] = CRYPTO_bswap8(H.u[1]);
+
+ OPENSSL_memcpy(out_key, H.c, 16);
+
+#if defined(GHASH_ASM_X86_64)
+ if (crypto_gcm_clmul_enabled()) {
+ if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE
+ gcm_init_avx(out_table, H.u);
+ *out_mult = gcm_gmult_avx;
+ *out_hash = gcm_ghash_avx;
+ *out_is_avx = 1;
+ return;
+ }
+ gcm_init_clmul(out_table, H.u);
+ *out_mult = gcm_gmult_clmul;
+ *out_hash = gcm_ghash_clmul;
+ return;
+ }
+#elif defined(GHASH_ASM_X86)
+ if (crypto_gcm_clmul_enabled()) {
+ gcm_init_clmul(out_table, H.u);
+ *out_mult = gcm_gmult_clmul;
+ *out_hash = gcm_ghash_clmul;
+ return;
+ }
+#elif defined(GHASH_ASM_ARM)
+ if (pmull_capable()) {
+ gcm_init_v8(out_table, H.u);
+ *out_mult = gcm_gmult_v8;
+ *out_hash = gcm_ghash_v8;
+ return;
+ }
+
+ if (neon_capable()) {
+ gcm_init_neon(out_table, H.u);
+ *out_mult = gcm_gmult_neon;
+ *out_hash = gcm_ghash_neon;
+ return;
+ }
+#elif defined(GHASH_ASM_PPC64LE)
+ if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
+ gcm_init_p8(out_table, H.u);
+ *out_mult = gcm_gmult_p8;
+ *out_hash = gcm_ghash_p8;
+ return;
+ }
+#endif
+
+ gcm_init_4bit(out_table, H.u);
+#if defined(GHASH_ASM_X86)
+ *out_mult = gcm_gmult_4bit_mmx;
+ *out_hash = gcm_ghash_4bit_mmx;
+#else
+ *out_mult = gcm_gmult_4bit;
+ *out_hash = gcm_ghash_4bit;
+#endif
+}
+
+void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *aes_key,
+ block128_f block, int block_is_hwaes) {
+ OPENSSL_memset(ctx, 0, sizeof(*ctx));
+ ctx->block = block;
+
+ uint8_t gcm_key[16];
+ OPENSSL_memset(gcm_key, 0, sizeof(gcm_key));
+ (*block)(gcm_key, gcm_key, aes_key);
+
+ int is_avx;
+ CRYPTO_ghash_init(&ctx->gmult, &ctx->ghash, &ctx->H, ctx->Htable, &is_avx,
+ gcm_key);
+
+ ctx->use_aesni_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
+}
+
+void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
+ const uint8_t *iv, size_t len) {
+ unsigned int ctr;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+#endif
+
+ ctx->Yi.u[0] = 0;
+ ctx->Yi.u[1] = 0;
+ ctx->Xi.u[0] = 0;
+ ctx->Xi.u[1] = 0;
+ ctx->len.u[0] = 0; // AAD length
+ ctx->len.u[1] = 0; // message length
+ ctx->ares = 0;
+ ctx->mres = 0;
+
+ if (len == 12) {
+ OPENSSL_memcpy(ctx->Yi.c, iv, 12);
+ ctx->Yi.c[15] = 1;
+ ctr = 1;
+ } else {
+ uint64_t len0 = len;
+
+ while (len >= 16) {
+ for (size_t i = 0; i < 16; ++i) {
+ ctx->Yi.c[i] ^= iv[i];
+ }
+ GCM_MUL(ctx, Yi);
+ iv += 16;
+ len -= 16;
+ }
+ if (len) {
+ for (size_t i = 0; i < len; ++i) {
+ ctx->Yi.c[i] ^= iv[i];
+ }
+ GCM_MUL(ctx, Yi);
+ }
+ len0 <<= 3;
+ ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
+
+ GCM_MUL(ctx, Yi);
+ ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+ }
+
+ (*ctx->block)(ctx->Yi.c, ctx->EK0.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+}
+
+int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
+ unsigned int n;
+ uint64_t alen = ctx->len.u[0];
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+#ifdef GHASH
+ void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len) = ctx->ghash;
+#endif
+#endif
+
+ if (ctx->len.u[1]) {
+ return 0;
+ }
+
+ alen += len;
+ if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
+ return 0;
+ }
+ ctx->len.u[0] = alen;
+
+ n = ctx->ares;
+ if (n) {
+ while (n && len) {
+ ctx->Xi.c[n] ^= *(aad++);
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GCM_MUL(ctx, Xi);
+ } else {
+ ctx->ares = n;
+ return 1;
+ }
+ }
+
+ // Process a whole number of blocks.
+#ifdef GHASH
+ size_t len_blocks = len & kSizeTWithoutLower4Bits;
+ if (len_blocks != 0) {
+ GHASH(ctx, aad, len_blocks);
+ aad += len_blocks;
+ len -= len_blocks;
+ }
+#else
+ while (len >= 16) {
+ for (size_t i = 0; i < 16; ++i) {
+ ctx->Xi.c[i] ^= aad[i];
+ }
+ GCM_MUL(ctx, Xi);
+ aad += 16;
+ len -= 16;
+ }
+#endif
+
+ // Process the remainder.
+ if (len != 0) {
+ n = (unsigned int)len;
+ for (size_t i = 0; i < len; ++i) {
+ ctx->Xi.c[i] ^= aad[i];
+ }
+ }
+
+ ctx->ares = n;
+ return 1;
+}
+
+int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
+ const uint8_t *in, uint8_t *out, size_t len) {
+ unsigned int n, ctr;
+ uint64_t mlen = ctx->len.u[1];
+ block128_f block = ctx->block;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+#ifdef GHASH
+ void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len) = ctx->ghash;
+#endif
+#endif
+
+ mlen += len;
+ if (mlen > ((UINT64_C(1) << 36) - 32) ||
+ (sizeof(len) == 8 && mlen < len)) {
+ return 0;
+ }
+ ctx->len.u[1] = mlen;
+
+ if (ctx->ares) {
+ // First call to encrypt finalizes GHASH(AAD)
+ GCM_MUL(ctx, Xi);
+ ctx->ares = 0;
+ }
+
+ ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+
+ n = ctx->mres;
+ if (n) {
+ while (n && len) {
+ ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GCM_MUL(ctx, Xi);
+ } else {
+ ctx->mres = n;
+ return 1;
+ }
+ }
+ if (STRICT_ALIGNMENT &&
+ ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) {
+ for (size_t i = 0; i < len; ++i) {
+ if (n == 0) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ }
+ ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+ n = (n + 1) % 16;
+ if (n == 0) {
+ GCM_MUL(ctx, Xi);
+ }
+ }
+
+ ctx->mres = n;
+ return 1;
+ }
+#if defined(GHASH) && defined(GHASH_CHUNK)
+ while (len >= GHASH_CHUNK) {
+ size_t j = GHASH_CHUNK;
+
+ while (j) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+ store_word_le(out + i,
+ load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
+ }
+ out += 16;
+ in += 16;
+ j -= 16;
+ }
+ GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
+ len -= GHASH_CHUNK;
+ }
+ size_t len_blocks = len & kSizeTWithoutLower4Bits;
+ if (len_blocks != 0) {
+ while (len >= 16) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+ store_word_le(out + i,
+ load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
+ }
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+ GHASH(ctx, out - len_blocks, len_blocks);
+ }
+#else
+ while (len >= 16) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+ size_t tmp = load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)];
+ store_word_le(out + i, tmp);
+ ctx->Xi.t[i / sizeof(size_t)] ^= tmp;
+ }
+ GCM_MUL(ctx, Xi);
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+#endif
+ if (len) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ while (len--) {
+ ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+ ++n;
+ }
+ }
+
+ ctx->mres = n;
+ return 1;
+}
+
+int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
+ const unsigned char *in, unsigned char *out,
+ size_t len) {
+ unsigned int n, ctr;
+ uint64_t mlen = ctx->len.u[1];
+ block128_f block = ctx->block;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+#ifdef GHASH
+ void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len) = ctx->ghash;
+#endif
+#endif
+
+ mlen += len;
+ if (mlen > ((UINT64_C(1) << 36) - 32) ||
+ (sizeof(len) == 8 && mlen < len)) {
+ return 0;
+ }
+ ctx->len.u[1] = mlen;
+
+ if (ctx->ares) {
+ // First call to decrypt finalizes GHASH(AAD)
+ GCM_MUL(ctx, Xi);
+ ctx->ares = 0;
+ }
+
+ ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+
+ n = ctx->mres;
+ if (n) {
+ while (n && len) {
+ uint8_t c = *(in++);
+ *(out++) = c ^ ctx->EKi.c[n];
+ ctx->Xi.c[n] ^= c;
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GCM_MUL(ctx, Xi);
+ } else {
+ ctx->mres = n;
+ return 1;
+ }
+ }
+ if (STRICT_ALIGNMENT &&
+ ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) {
+ for (size_t i = 0; i < len; ++i) {
+ uint8_t c;
+ if (n == 0) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ }
+ c = in[i];
+ out[i] = c ^ ctx->EKi.c[n];
+ ctx->Xi.c[n] ^= c;
+ n = (n + 1) % 16;
+ if (n == 0) {
+ GCM_MUL(ctx, Xi);
+ }
+ }
+
+ ctx->mres = n;
+ return 1;
+ }
+#if defined(GHASH) && defined(GHASH_CHUNK)
+ while (len >= GHASH_CHUNK) {
+ size_t j = GHASH_CHUNK;
+
+ GHASH(ctx, in, GHASH_CHUNK);
+ while (j) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+ store_word_le(out + i,
+ load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
+ }
+ out += 16;
+ in += 16;
+ j -= 16;
+ }
+ len -= GHASH_CHUNK;
+ }
+ size_t len_blocks = len & kSizeTWithoutLower4Bits;
+ if (len_blocks != 0) {
+ GHASH(ctx, in, len_blocks);
+ while (len >= 16) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+ store_word_le(out + i,
+ load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
+ }
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+ }
+#else
+ while (len >= 16) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+ size_t c = load_word_le(in + i);
+ store_word_le(out + i, c ^ ctx->EKi.t[i / sizeof(size_t)]);
+ ctx->Xi.t[i / sizeof(size_t)] ^= c;
+ }
+ GCM_MUL(ctx, Xi);
+ out += 16;
+ in += 16;
+ len -= 16;
+ }
+#endif
+ if (len) {
+ (*block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ while (len--) {
+ uint8_t c = in[n];
+ ctx->Xi.c[n] ^= c;
+ out[n] = c ^ ctx->EKi.c[n];
+ ++n;
+ }
+ }
+
+ ctx->mres = n;
+ return 1;
+}
+
+int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
+ const uint8_t *in, uint8_t *out, size_t len,
+ ctr128_f stream) {
+ unsigned int n, ctr;
+ uint64_t mlen = ctx->len.u[1];
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+#ifdef GHASH
+ void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len) = ctx->ghash;
+#endif
+#endif
+
+ mlen += len;
+ if (mlen > ((UINT64_C(1) << 36) - 32) ||
+ (sizeof(len) == 8 && mlen < len)) {
+ return 0;
+ }
+ ctx->len.u[1] = mlen;
+
+ if (ctx->ares) {
+ // First call to encrypt finalizes GHASH(AAD)
+ GCM_MUL(ctx, Xi);
+ ctx->ares = 0;
+ }
+
+ n = ctx->mres;
+ if (n) {
+ while (n && len) {
+ ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GCM_MUL(ctx, Xi);
+ } else {
+ ctx->mres = n;
+ return 1;
+ }
+ }
+
+#if defined(AESNI_GCM)
+ if (ctx->use_aesni_gcm_crypt) {
+ // |aesni_gcm_encrypt| may not process all the input given to it. It may
+ // not process *any* of its input if it is deemed too small.
+ size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
+ in += bulk;
+ out += bulk;
+ len -= bulk;
+ }
+#endif
+
+ ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+
+#if defined(GHASH)
+ while (len >= GHASH_CHUNK) {
+ (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
+ ctr += GHASH_CHUNK / 16;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ GHASH(ctx, out, GHASH_CHUNK);
+ out += GHASH_CHUNK;
+ in += GHASH_CHUNK;
+ len -= GHASH_CHUNK;
+ }
+#endif
+ size_t i = len & kSizeTWithoutLower4Bits;
+ if (i != 0) {
+ size_t j = i / 16;
+
+ (*stream)(in, out, j, key, ctx->Yi.c);
+ ctr += (unsigned int)j;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ in += i;
+ len -= i;
+#if defined(GHASH)
+ GHASH(ctx, out, i);
+ out += i;
+#else
+ while (j--) {
+ for (i = 0; i < 16; ++i) {
+ ctx->Xi.c[i] ^= out[i];
+ }
+ GCM_MUL(ctx, Xi);
+ out += 16;
+ }
+#endif
+ }
+ if (len) {
+ (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ while (len--) {
+ ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+ ++n;
+ }
+ }
+
+ ctx->mres = n;
+ return 1;
+}
+
+int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const void *key,
+ const uint8_t *in, uint8_t *out, size_t len,
+ ctr128_f stream) {
+ unsigned int n, ctr;
+ uint64_t mlen = ctx->len.u[1];
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+#ifdef GHASH
+ void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+ size_t len) = ctx->ghash;
+#endif
+#endif
+
+ mlen += len;
+ if (mlen > ((UINT64_C(1) << 36) - 32) ||
+ (sizeof(len) == 8 && mlen < len)) {
+ return 0;
+ }
+ ctx->len.u[1] = mlen;
+
+ if (ctx->ares) {
+ // First call to decrypt finalizes GHASH(AAD)
+ GCM_MUL(ctx, Xi);
+ ctx->ares = 0;
+ }
+
+ n = ctx->mres;
+ if (n) {
+ while (n && len) {
+ uint8_t c = *(in++);
+ *(out++) = c ^ ctx->EKi.c[n];
+ ctx->Xi.c[n] ^= c;
+ --len;
+ n = (n + 1) % 16;
+ }
+ if (n == 0) {
+ GCM_MUL(ctx, Xi);
+ } else {
+ ctx->mres = n;
+ return 1;
+ }
+ }
+
+#if defined(AESNI_GCM)
+ if (ctx->use_aesni_gcm_crypt) {
+ // |aesni_gcm_decrypt| may not process all the input given to it. It may
+ // not process *any* of its input if it is deemed too small.
+ size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
+ in += bulk;
+ out += bulk;
+ len -= bulk;
+ }
+#endif
+
+ ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+
+#if defined(GHASH)
+ while (len >= GHASH_CHUNK) {
+ GHASH(ctx, in, GHASH_CHUNK);
+ (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
+ ctr += GHASH_CHUNK / 16;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ out += GHASH_CHUNK;
+ in += GHASH_CHUNK;
+ len -= GHASH_CHUNK;
+ }
+#endif
+ size_t i = len & kSizeTWithoutLower4Bits;
+ if (i != 0) {
+ size_t j = i / 16;
+
+#if defined(GHASH)
+ GHASH(ctx, in, i);
+#else
+ while (j--) {
+ size_t k;
+ for (k = 0; k < 16; ++k) {
+ ctx->Xi.c[k] ^= in[k];
+ }
+ GCM_MUL(ctx, Xi);
+ in += 16;
+ }
+ j = i / 16;
+ in -= i;
+#endif
+ (*stream)(in, out, j, key, ctx->Yi.c);
+ ctr += (unsigned int)j;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ out += i;
+ in += i;
+ len -= i;
+ }
+ if (len) {
+ (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
+ ++ctr;
+ ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+ while (len--) {
+ uint8_t c = in[n];
+ ctx->Xi.c[n] ^= c;
+ out[n] = c ^ ctx->EKi.c[n];
+ ++n;
+ }
+ }
+
+ ctx->mres = n;
+ return 1;
+}
+
+int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
+ uint64_t alen = ctx->len.u[0] << 3;
+ uint64_t clen = ctx->len.u[1] << 3;
+#ifdef GCM_FUNCREF_4BIT
+ void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
+#endif
+
+ if (ctx->mres || ctx->ares) {
+ GCM_MUL(ctx, Xi);
+ }
+
+ alen = CRYPTO_bswap8(alen);
+ clen = CRYPTO_bswap8(clen);
+
+ ctx->Xi.u[0] ^= alen;
+ ctx->Xi.u[1] ^= clen;
+ GCM_MUL(ctx, Xi);
+
+ ctx->Xi.u[0] ^= ctx->EK0.u[0];
+ ctx->Xi.u[1] ^= ctx->EK0.u[1];
+
+ if (tag && len <= sizeof(ctx->Xi)) {
+ return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
+ } else {
+ return 0;
+ }
+}
+
+void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
+ CRYPTO_gcm128_finish(ctx, NULL, 0);
+ OPENSSL_memcpy(tag, ctx->Xi.c,
+ len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
+}
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+int crypto_gcm_clmul_enabled(void) {
+#ifdef GHASH_ASM
+ const uint32_t *ia32cap = OPENSSL_ia32cap_get();
+ return (ia32cap[0] & (1 << 24)) && // check FXSR bit
+ (ia32cap[1] & (1 << 1)); // check PCLMULQDQ bit
+#else
+ return 0;
+#endif
+}
+#endif
diff --git a/third_party/boringssl/core/cortex-m/ghash.S b/third_party/boringssl/core/cortex-m/ghash.S
new file mode 100644
index 0000000000..fafcb5c23b
--- /dev/null
+++ b/third_party/boringssl/core/cortex-m/ghash.S
@@ -0,0 +1,580 @@
+@ Generated by crypto/fipsmodule/modes/asm/ghash-armv4.pl, which carries
+@ this license:
+@
+@ Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
+@
+@ Licensed under the OpenSSL license (the "License"). You may not use
+@ this file except in compliance with the License. You can obtain a copy
+@ in the file LICENSE in the source distribution or at
+@ https://www.openssl.org/source/license.html
+
+#include <openssl/arm_arch.h>
+
+@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
+@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
+@ instructions are in aesv8-armx.pl.)
+.arch armv7-a
+
+.text
+#if defined(__thumb2__) || defined(__clang__)
+.syntax unified
+#endif
+#if defined(__thumb2__)
+.thumb
+#else
+.code 32
+#endif
+
+#ifdef __clang__
+#define ldrplb ldrbpl
+#define ldrneb ldrbne
+#endif
+
+.type rem_4bit,%object
+.align 5
+rem_4bit:
+.short 0x0000,0x1C20,0x3840,0x2460
+.short 0x7080,0x6CA0,0x48C0,0x54E0
+.short 0xE100,0xFD20,0xD940,0xC560
+.short 0x9180,0x8DA0,0xA9C0,0xB5E0
+.size rem_4bit,.-rem_4bit
+
+.type rem_4bit_get,%function
+rem_4bit_get:
+#if defined(__thumb2__)
+ adr r2,rem_4bit
+#else
+ sub r2,pc,#8+32 @ &rem_4bit
+#endif
+ b .Lrem_4bit_got
+ nop
+ nop
+.size rem_4bit_get,.-rem_4bit_get
+
+.global gcm_ghash_4bit
+.type gcm_ghash_4bit,%function
+.align 4
+gcm_ghash_4bit:
+#if defined(__thumb2__)
+ adr r12,rem_4bit
+#else
+ sub r12,pc,#8+48 @ &rem_4bit
+#endif
+ add r3,r2,r3 @ r3 to point at the end
+ stmdb sp!,{r3-r11,lr} @ save r3/end too
+
+ ldmia r12,{r4-r11} @ copy rem_4bit ...
+ stmdb sp!,{r4-r11} @ ... to stack
+
+ ldrb r12,[r2,#15]
+ ldrb r14,[r0,#15]
+.Louter:
+ eor r12,r12,r14
+ and r14,r12,#0xf0
+ and r12,r12,#0x0f
+ mov r3,#14
+
+ add r7,r1,r12,lsl#4
+ ldmia r7,{r4-r7} @ load Htbl[nlo]
+ add r11,r1,r14
+ ldrb r12,[r2,#14]
+
+ and r14,r4,#0xf @ rem
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ add r14,r14,r14
+ eor r4,r8,r4,lsr#4
+ ldrh r8,[sp,r14] @ rem_4bit[rem]
+ eor r4,r4,r5,lsl#28
+ ldrb r14,[r0,#14]
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ eor r12,r12,r14
+ and r14,r12,#0xf0
+ and r12,r12,#0x0f
+ eor r7,r7,r8,lsl#16
+
+.Linner:
+ add r11,r1,r12,lsl#4
+ and r12,r4,#0xf @ rem
+ subs r3,r3,#1
+ add r12,r12,r12
+ ldmia r11,{r8-r11} @ load Htbl[nlo]
+ eor r4,r8,r4,lsr#4
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ ldrh r8,[sp,r12] @ rem_4bit[rem]
+ eor r6,r10,r6,lsr#4
+#ifdef __thumb2__
+ it pl
+#endif
+ ldrplb r12,[r2,r3]
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+
+ add r11,r1,r14
+ and r14,r4,#0xf @ rem
+ eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
+ add r14,r14,r14
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ eor r4,r8,r4,lsr#4
+#ifdef __thumb2__
+ it pl
+#endif
+ ldrplb r8,[r0,r3]
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ ldrh r9,[sp,r14]
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+#ifdef __thumb2__
+ it pl
+#endif
+ eorpl r12,r12,r8
+ eor r7,r11,r7,lsr#4
+#ifdef __thumb2__
+ itt pl
+#endif
+ andpl r14,r12,#0xf0
+ andpl r12,r12,#0x0f
+ eor r7,r7,r9,lsl#16 @ ^= rem_4bit[rem]
+ bpl .Linner
+
+ ldr r3,[sp,#32] @ re-load r3/end
+ add r2,r2,#16
+ mov r14,r4
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r4,r4
+ str r4,[r0,#12]
+#elif defined(__ARMEB__)
+ str r4,[r0,#12]
+#else
+ mov r9,r4,lsr#8
+ strb r4,[r0,#12+3]
+ mov r10,r4,lsr#16
+ strb r9,[r0,#12+2]
+ mov r11,r4,lsr#24
+ strb r10,[r0,#12+1]
+ strb r11,[r0,#12]
+#endif
+ cmp r2,r3
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r5,r5
+ str r5,[r0,#8]
+#elif defined(__ARMEB__)
+ str r5,[r0,#8]
+#else
+ mov r9,r5,lsr#8
+ strb r5,[r0,#8+3]
+ mov r10,r5,lsr#16
+ strb r9,[r0,#8+2]
+ mov r11,r5,lsr#24
+ strb r10,[r0,#8+1]
+ strb r11,[r0,#8]
+#endif
+
+#ifdef __thumb2__
+ it ne
+#endif
+ ldrneb r12,[r2,#15]
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r6,r6
+ str r6,[r0,#4]
+#elif defined(__ARMEB__)
+ str r6,[r0,#4]
+#else
+ mov r9,r6,lsr#8
+ strb r6,[r0,#4+3]
+ mov r10,r6,lsr#16
+ strb r9,[r0,#4+2]
+ mov r11,r6,lsr#24
+ strb r10,[r0,#4+1]
+ strb r11,[r0,#4]
+#endif
+
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r7,r7
+ str r7,[r0,#0]
+#elif defined(__ARMEB__)
+ str r7,[r0,#0]
+#else
+ mov r9,r7,lsr#8
+ strb r7,[r0,#0+3]
+ mov r10,r7,lsr#16
+ strb r9,[r0,#0+2]
+ mov r11,r7,lsr#24
+ strb r10,[r0,#0+1]
+ strb r11,[r0,#0]
+#endif
+
+ bne .Louter
+
+ add sp,sp,#36
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size gcm_ghash_4bit,.-gcm_ghash_4bit
+
+.global gcm_gmult_4bit
+.type gcm_gmult_4bit,%function
+gcm_gmult_4bit:
+ stmdb sp!,{r4-r11,lr}
+ ldrb r12,[r0,#15]
+ b rem_4bit_get
+.Lrem_4bit_got:
+ and r14,r12,#0xf0
+ and r12,r12,#0x0f
+ mov r3,#14
+
+ add r7,r1,r12,lsl#4
+ ldmia r7,{r4-r7} @ load Htbl[nlo]
+ ldrb r12,[r0,#14]
+
+ add r11,r1,r14
+ and r14,r4,#0xf @ rem
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ add r14,r14,r14
+ eor r4,r8,r4,lsr#4
+ ldrh r8,[r2,r14] @ rem_4bit[rem]
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+ and r14,r12,#0xf0
+ eor r7,r7,r8,lsl#16
+ and r12,r12,#0x0f
+
+.Loop:
+ add r11,r1,r12,lsl#4
+ and r12,r4,#0xf @ rem
+ subs r3,r3,#1
+ add r12,r12,r12
+ ldmia r11,{r8-r11} @ load Htbl[nlo]
+ eor r4,r8,r4,lsr#4
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ eor r5,r5,r6,lsl#28
+ ldrh r8,[r2,r12] @ rem_4bit[rem]
+ eor r6,r10,r6,lsr#4
+#ifdef __thumb2__
+ it pl
+#endif
+ ldrplb r12,[r0,r3]
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+
+ add r11,r1,r14
+ and r14,r4,#0xf @ rem
+ eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
+ add r14,r14,r14
+ ldmia r11,{r8-r11} @ load Htbl[nhi]
+ eor r4,r8,r4,lsr#4
+ eor r4,r4,r5,lsl#28
+ eor r5,r9,r5,lsr#4
+ ldrh r8,[r2,r14] @ rem_4bit[rem]
+ eor r5,r5,r6,lsl#28
+ eor r6,r10,r6,lsr#4
+ eor r6,r6,r7,lsl#28
+ eor r7,r11,r7,lsr#4
+#ifdef __thumb2__
+ itt pl
+#endif
+ andpl r14,r12,#0xf0
+ andpl r12,r12,#0x0f
+ eor r7,r7,r8,lsl#16 @ ^= rem_4bit[rem]
+ bpl .Loop
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r4,r4
+ str r4,[r0,#12]
+#elif defined(__ARMEB__)
+ str r4,[r0,#12]
+#else
+ mov r9,r4,lsr#8
+ strb r4,[r0,#12+3]
+ mov r10,r4,lsr#16
+ strb r9,[r0,#12+2]
+ mov r11,r4,lsr#24
+ strb r10,[r0,#12+1]
+ strb r11,[r0,#12]
+#endif
+
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r5,r5
+ str r5,[r0,#8]
+#elif defined(__ARMEB__)
+ str r5,[r0,#8]
+#else
+ mov r9,r5,lsr#8
+ strb r5,[r0,#8+3]
+ mov r10,r5,lsr#16
+ strb r9,[r0,#8+2]
+ mov r11,r5,lsr#24
+ strb r10,[r0,#8+1]
+ strb r11,[r0,#8]
+#endif
+
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r6,r6
+ str r6,[r0,#4]
+#elif defined(__ARMEB__)
+ str r6,[r0,#4]
+#else
+ mov r9,r6,lsr#8
+ strb r6,[r0,#4+3]
+ mov r10,r6,lsr#16
+ strb r9,[r0,#4+2]
+ mov r11,r6,lsr#24
+ strb r10,[r0,#4+1]
+ strb r11,[r0,#4]
+#endif
+
+#if __ARM_ARCH__>=7 && defined(__ARMEL__)
+ rev r7,r7
+ str r7,[r0,#0]
+#elif defined(__ARMEB__)
+ str r7,[r0,#0]
+#else
+ mov r9,r7,lsr#8
+ strb r7,[r0,#0+3]
+ mov r10,r7,lsr#16
+ strb r9,[r0,#0+2]
+ mov r11,r7,lsr#24
+ strb r10,[r0,#0+1]
+ strb r11,[r0,#0]
+#endif
+
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size gcm_gmult_4bit,.-gcm_gmult_4bit
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.global gcm_init_neon
+.type gcm_init_neon,%function
+.align 4
+gcm_init_neon:
+ vld1.64 d7,[r1]! @ load H
+ vmov.i8 q8,#0xe1
+ vld1.64 d6,[r1]
+ vshl.i64 d17,#57
+ vshr.u64 d16,#63 @ t0=0xc2....01
+ vdup.8 q9,d7[7]
+ vshr.u64 d26,d6,#63
+ vshr.s8 q9,#7 @ broadcast carry bit
+ vshl.i64 q3,q3,#1
+ vand q8,q8,q9
+ vorr d7,d26 @ H<<<=1
+ veor q3,q3,q8 @ twisted H
+ vstmia r0,{q3}
+
+ bx lr @ bx lr
+.size gcm_init_neon,.-gcm_init_neon
+
+.global gcm_gmult_neon
+.type gcm_gmult_neon,%function
+.align 4
+gcm_gmult_neon:
+ vld1.64 d7,[r0]! @ load Xi
+ vld1.64 d6,[r0]!
+ vmov.i64 d29,#0x0000ffffffffffff
+ vldmia r1,{d26-d27} @ load twisted H
+ vmov.i64 d30,#0x00000000ffffffff
+#ifdef __ARMEL__
+ vrev64.8 q3,q3
+#endif
+ vmov.i64 d31,#0x000000000000ffff
+ veor d28,d26,d27 @ Karatsuba pre-processing
+ mov r3,#16
+ b .Lgmult_neon
+.size gcm_gmult_neon,.-gcm_gmult_neon
+
+.global gcm_ghash_neon
+.type gcm_ghash_neon,%function
+.align 4
+gcm_ghash_neon:
+ vld1.64 d1,[r0]! @ load Xi
+ vld1.64 d0,[r0]!
+ vmov.i64 d29,#0x0000ffffffffffff
+ vldmia r1,{d26-d27} @ load twisted H
+ vmov.i64 d30,#0x00000000ffffffff
+#ifdef __ARMEL__
+ vrev64.8 q0,q0
+#endif
+ vmov.i64 d31,#0x000000000000ffff
+ veor d28,d26,d27 @ Karatsuba pre-processing
+
+.Loop_neon:
+ vld1.64 d7,[r2]! @ load inp
+ vld1.64 d6,[r2]!
+#ifdef __ARMEL__
+ vrev64.8 q3,q3
+#endif
+ veor q3,q0 @ inp^=Xi
+.Lgmult_neon:
+ vext.8 d16, d26, d26, #1 @ A1
+ vmull.p8 q8, d16, d6 @ F = A1*B
+ vext.8 d0, d6, d6, #1 @ B1
+ vmull.p8 q0, d26, d0 @ E = A*B1
+ vext.8 d18, d26, d26, #2 @ A2
+ vmull.p8 q9, d18, d6 @ H = A2*B
+ vext.8 d22, d6, d6, #2 @ B2
+ vmull.p8 q11, d26, d22 @ G = A*B2
+ vext.8 d20, d26, d26, #3 @ A3
+ veor q8, q8, q0 @ L = E + F
+ vmull.p8 q10, d20, d6 @ J = A3*B
+ vext.8 d0, d6, d6, #3 @ B3
+ veor q9, q9, q11 @ M = G + H
+ vmull.p8 q0, d26, d0 @ I = A*B3
+ veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
+ vand d17, d17, d29
+ vext.8 d22, d6, d6, #4 @ B4
+ veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
+ vand d19, d19, d30
+ vmull.p8 q11, d26, d22 @ K = A*B4
+ veor q10, q10, q0 @ N = I + J
+ veor d16, d16, d17
+ veor d18, d18, d19
+ veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
+ vand d21, d21, d31
+ vext.8 q8, q8, q8, #15
+ veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 d23, #0
+ vext.8 q9, q9, q9, #14
+ veor d20, d20, d21
+ vmull.p8 q0, d26, d6 @ D = A*B
+ vext.8 q11, q11, q11, #12
+ vext.8 q10, q10, q10, #13
+ veor q8, q8, q9
+ veor q10, q10, q11
+ veor q0, q0, q8
+ veor q0, q0, q10
+ veor d6,d6,d7 @ Karatsuba pre-processing
+ vext.8 d16, d28, d28, #1 @ A1
+ vmull.p8 q8, d16, d6 @ F = A1*B
+ vext.8 d2, d6, d6, #1 @ B1
+ vmull.p8 q1, d28, d2 @ E = A*B1
+ vext.8 d18, d28, d28, #2 @ A2
+ vmull.p8 q9, d18, d6 @ H = A2*B
+ vext.8 d22, d6, d6, #2 @ B2
+ vmull.p8 q11, d28, d22 @ G = A*B2
+ vext.8 d20, d28, d28, #3 @ A3
+ veor q8, q8, q1 @ L = E + F
+ vmull.p8 q10, d20, d6 @ J = A3*B
+ vext.8 d2, d6, d6, #3 @ B3
+ veor q9, q9, q11 @ M = G + H
+ vmull.p8 q1, d28, d2 @ I = A*B3
+ veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
+ vand d17, d17, d29
+ vext.8 d22, d6, d6, #4 @ B4
+ veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
+ vand d19, d19, d30
+ vmull.p8 q11, d28, d22 @ K = A*B4
+ veor q10, q10, q1 @ N = I + J
+ veor d16, d16, d17
+ veor d18, d18, d19
+ veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
+ vand d21, d21, d31
+ vext.8 q8, q8, q8, #15
+ veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 d23, #0
+ vext.8 q9, q9, q9, #14
+ veor d20, d20, d21
+ vmull.p8 q1, d28, d6 @ D = A*B
+ vext.8 q11, q11, q11, #12
+ vext.8 q10, q10, q10, #13
+ veor q8, q8, q9
+ veor q10, q10, q11
+ veor q1, q1, q8
+ veor q1, q1, q10
+ vext.8 d16, d27, d27, #1 @ A1
+ vmull.p8 q8, d16, d7 @ F = A1*B
+ vext.8 d4, d7, d7, #1 @ B1
+ vmull.p8 q2, d27, d4 @ E = A*B1
+ vext.8 d18, d27, d27, #2 @ A2
+ vmull.p8 q9, d18, d7 @ H = A2*B
+ vext.8 d22, d7, d7, #2 @ B2
+ vmull.p8 q11, d27, d22 @ G = A*B2
+ vext.8 d20, d27, d27, #3 @ A3
+ veor q8, q8, q2 @ L = E + F
+ vmull.p8 q10, d20, d7 @ J = A3*B
+ vext.8 d4, d7, d7, #3 @ B3
+ veor q9, q9, q11 @ M = G + H
+ vmull.p8 q2, d27, d4 @ I = A*B3
+ veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
+ vand d17, d17, d29
+ vext.8 d22, d7, d7, #4 @ B4
+ veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
+ vand d19, d19, d30
+ vmull.p8 q11, d27, d22 @ K = A*B4
+ veor q10, q10, q2 @ N = I + J
+ veor d16, d16, d17
+ veor d18, d18, d19
+ veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
+ vand d21, d21, d31
+ vext.8 q8, q8, q8, #15
+ veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
+ vmov.i64 d23, #0
+ vext.8 q9, q9, q9, #14
+ veor d20, d20, d21
+ vmull.p8 q2, d27, d7 @ D = A*B
+ vext.8 q11, q11, q11, #12
+ vext.8 q10, q10, q10, #13
+ veor q8, q8, q9
+ veor q10, q10, q11
+ veor q2, q2, q8
+ veor q2, q2, q10
+ veor q1,q1,q0 @ Karatsuba post-processing
+ veor q1,q1,q2
+ veor d1,d1,d2
+ veor d4,d4,d3 @ Xh|Xl - 256-bit result
+
+ @ equivalent of reduction_avx from ghash-x86_64.pl
+ vshl.i64 q9,q0,#57 @ 1st phase
+ vshl.i64 q10,q0,#62
+ veor q10,q10,q9 @
+ vshl.i64 q9,q0,#63
+ veor q10, q10, q9 @
+ veor d1,d1,d20 @
+ veor d4,d4,d21
+
+ vshr.u64 q10,q0,#1 @ 2nd phase
+ veor q2,q2,q0
+ veor q0,q0,q10 @
+ vshr.u64 q10,q10,#6
+ vshr.u64 q0,q0,#1 @
+ veor q0,q0,q2 @
+ veor q0,q0,q10 @
+
+ subs r3,#16
+ bne .Loop_neon
+
+#ifdef __ARMEL__
+ vrev64.8 q0,q0
+#endif
+ sub r0,#16
+ vst1.64 d1,[r0]! @ write out Xi
+ vst1.64 d0,[r0]
+
+ bx lr @ bx lr
+.size gcm_ghash_neon,.-gcm_ghash_neon
+#endif
+.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
diff --git a/third_party/boringssl/include/aes-gcm.h b/third_party/boringssl/include/aes-gcm.h
new file mode 100644
index 0000000000..b2941fb317
--- /dev/null
+++ b/third_party/boringssl/include/aes-gcm.h
@@ -0,0 +1,217 @@
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#ifndef OPENSSL_HEADER_MODES_INTERNAL_H
+#define OPENSSL_HEADER_MODES_INTERNAL_H
+
+#include <openssl/base.h>
+
+#include <string.h>
+
+#include "../../internal.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+#define STRICT_ALIGNMENT 1
+#if defined(OPENSSL_X86_64) || defined(OPENSSL_X86) || defined(OPENSSL_AARCH64)
+#undef STRICT_ALIGNMENT
+#define STRICT_ALIGNMENT 0
+#endif
+
+static inline uint32_t GETU32(const void *in) {
+ uint32_t v;
+ OPENSSL_memcpy(&v, in, sizeof(v));
+ return CRYPTO_bswap4(v);
+}
+
+static inline void PUTU32(void *out, uint32_t v) {
+ v = CRYPTO_bswap4(v);
+ OPENSSL_memcpy(out, &v, sizeof(v));
+}
+
+static inline size_t load_word_le(const void *in) {
+ size_t v;
+ OPENSSL_memcpy(&v, in, sizeof(v));
+ return v;
+}
+
+static inline void store_word_le(void *out, size_t v) {
+ OPENSSL_memcpy(out, &v, sizeof(v));
+}
+
+// block128_f is the type of a 128-bit, block cipher.
+typedef void (*block128_f)(const uint8_t in[16], uint8_t out[16],
+ const void *key);
+
+// GCM definitions
+typedef struct { uint64_t hi,lo; } u128;
+
+// gmult_func multiplies |Xi| by the GCM key and writes the result back to
+// |Xi|.
+typedef void (*gmult_func)(uint64_t Xi[2], const u128 Htable[16]);
+
+// ghash_func repeatedly multiplies |Xi| by the GCM key and adds in blocks from
+// |inp|. The result is written back to |Xi| and the |len| argument must be a
+// multiple of 16.
+typedef void (*ghash_func)(uint64_t Xi[2], const u128 Htable[16],
+ const uint8_t *inp, size_t len);
+
+// This differs from upstream's |gcm128_context| in that it does not have the
+// |key| pointer, in order to make it |memcpy|-friendly. Rather the key is
+// passed into each call that needs it.
+struct gcm128_context {
+ // Following 6 names follow names in GCM specification
+ union {
+ uint64_t u[2];
+ uint32_t d[4];
+ uint8_t c[16];
+ size_t t[16 / sizeof(size_t)];
+ } Yi, EKi, EK0, len, Xi;
+
+ // Note that the order of |Xi|, |H| and |Htable| is fixed by the MOVBE-based,
+ // x86-64, GHASH assembly.
+ u128 H;
+ u128 Htable[16];
+ gmult_func gmult;
+ ghash_func ghash;
+
+ unsigned int mres, ares;
+ block128_f block;
+
+ // use_aesni_gcm_crypt is true if this context should use the assembly
+ // functions |aesni_gcm_encrypt| and |aesni_gcm_decrypt| to process data.
+ unsigned use_aesni_gcm_crypt:1;
+};
+
+
+// GCM.
+//
+// This API differs from the upstream API slightly. The |GCM128_CONTEXT| does
+// not have a |key| pointer that points to the key as upstream's version does.
+// Instead, every function takes a |key| parameter. This way |GCM128_CONTEXT|
+// can be safely copied.
+
+typedef struct gcm128_context GCM128_CONTEXT;
+
+// CRYPTO_ghash_init writes a precomputed table of powers of |gcm_key| to
+// |out_table| and sets |*out_mult| and |*out_hash| to (potentially hardware
+// accelerated) functions for performing operations in the GHASH field. If the
+// AVX implementation was used |*out_is_avx| will be true.
+void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
+ u128 *out_key, u128 out_table[16], int *out_is_avx,
+ const uint8_t *gcm_key);
+
+// CRYPTO_gcm128_init initialises |ctx| to use |block| (typically AES) with
+// the given key. |block_is_hwaes| is one if |block| is |aes_hw_encrypt|.
+OPENSSL_EXPORT void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, const void *key,
+ block128_f block, int block_is_hwaes);
+
+// CRYPTO_gcm128_setiv sets the IV (nonce) for |ctx|. The |key| must be the
+// same key that was passed to |CRYPTO_gcm128_init|.
+OPENSSL_EXPORT void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const void *key,
+ const uint8_t *iv, size_t iv_len);
+
+// CRYPTO_gcm128_aad sets the authenticated data for an instance of GCM.
+// This must be called before and data is encrypted. It returns one on success
+// and zero otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad,
+ size_t len);
+
+// CRYPTO_gcm128_encrypt encrypts |len| bytes from |in| to |out|. The |key|
+// must be the same key that was passed to |CRYPTO_gcm128_init|. It returns one
+// on success and zero otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const void *key,
+ const uint8_t *in, uint8_t *out,
+ size_t len);
+
+// CRYPTO_gcm128_decrypt decrypts |len| bytes from |in| to |out|. The |key|
+// must be the same key that was passed to |CRYPTO_gcm128_init|. It returns one
+// on success and zero otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const void *key,
+ const uint8_t *in, uint8_t *out,
+ size_t len);
+
+// CRYPTO_gcm128_encrypt_ctr32 encrypts |len| bytes from |in| to |out| using
+// a CTR function that only handles the bottom 32 bits of the nonce, like
+// |CRYPTO_ctr128_encrypt_ctr32|. The |key| must be the same key that was
+// passed to |CRYPTO_gcm128_init|. It returns one on success and zero
+// otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
+ const void *key,
+ const uint8_t *in, uint8_t *out,
+ size_t len, ctr128_f stream);
+
+// CRYPTO_gcm128_decrypt_ctr32 decrypts |len| bytes from |in| to |out| using
+// a CTR function that only handles the bottom 32 bits of the nonce, like
+// |CRYPTO_ctr128_encrypt_ctr32|. The |key| must be the same key that was
+// passed to |CRYPTO_gcm128_init|. It returns one on success and zero
+// otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
+ const void *key,
+ const uint8_t *in, uint8_t *out,
+ size_t len, ctr128_f stream);
+
+// CRYPTO_gcm128_finish calculates the authenticator and compares it against
+// |len| bytes of |tag|. It returns one on success and zero otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag,
+ size_t len);
+
+// CRYPTO_gcm128_tag calculates the authenticator and copies it into |tag|.
+// The minimum of |len| and 16 bytes are copied into |tag|.
+OPENSSL_EXPORT void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, uint8_t *tag,
+ size_t len);
+
+
+#if defined(__cplusplus)
+} // extern C
+#endif
+
+#endif // OPENSSL_HEADER_MODES_INTERNAL_H