summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormamonet <maamoun.tk@gmail.com>2021-05-05 09:34:06 +0000
committermamonet <maamoun.tk@gmail.com>2021-05-05 09:34:06 +0000
commit116219d2251b6b11b11ac1f16b64539e783aa047 (patch)
tree6123fcd247cb0b36cac6eac6c035263305240506
parent088eea7e1a4971e114d093eb61742b17a1a3a060 (diff)
downloadnss-hg-116219d2251b6b11b11ac1f16b64539e783aa047.tar.gz
Bug 1566124 - Optimize AES-GCM for ppc64le Differential Revision: https://phabricator.services.mozilla.com/D108221 r=bbeurdouche
Differential Revision: https://phabricator.services.mozilla.com/D112302
-rw-r--r--lib/freebl/Makefile5
-rw-r--r--lib/freebl/freebl.gyp62
-rw-r--r--lib/freebl/ppc-gcm-wrap.c458
-rw-r--r--lib/freebl/ppc-gcm.h76
-rw-r--r--lib/freebl/ppc-gcm.s1051
-rw-r--r--lib/freebl/rijndael.c17
6 files changed, 1663 insertions, 6 deletions
diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile
index 3d5357297..d13a5e930 100644
--- a/lib/freebl/Makefile
+++ b/lib/freebl/Makefile
@@ -299,8 +299,9 @@ ifdef USE_64
ifeq ($(PPC_ABI),2)
ASFILES += sha512-p8.s
ifeq ($(OS_TEST),ppc64le)
- EXTRA_SRCS += chacha20poly1305-ppc.c
- ASFILES += chacha20-ppc64le.s
+ DEFINES += -DPPC_GCM
+ EXTRA_SRCS += chacha20poly1305-ppc.c ppc-gcm-wrap.c
+ ASFILES += chacha20-ppc64le.s ppc-gcm.s
endif # ppc64le
endif
endif # USE_64
diff --git a/lib/freebl/freebl.gyp b/lib/freebl/freebl.gyp
index 865f89232..a4e496399 100644
--- a/lib/freebl/freebl.gyp
+++ b/lib/freebl/freebl.gyp
@@ -285,6 +285,41 @@
]
},
{
+ 'target_name': 'gcm-aes-ppc_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'ppc-gcm.s',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ },
+ {
+ 'target_name': 'ppc-gcm-wrap-nodepend_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'ppc-gcm-wrap.c',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports',
+ 'gcm-aes-ppc_lib',
+ ],
+ },
+ {
+ 'target_name': 'ppc-gcm-wrap_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'ppc-gcm-wrap.c',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports',
+ 'gcm-aes-ppc_lib',
+ ],
+ 'defines!': [
+ 'FREEBL_NO_DEPEND',
+ ],
+ },
+ {
'target_name': 'gcm-sha512-nodepend-ppc_c_lib',
'type': 'static_library',
'sources': [
@@ -466,6 +501,7 @@
'gcm-aes-ppc_c_lib',
'gcm-sha512-ppc_c_lib',
'chacha20-ppc_lib',
+ 'ppc-gcm-wrap_c_lib',
],
}],
[ 'disable_altivec==1 and (target_arch=="ppc64" or target_arch=="ppc64le")', {
@@ -484,6 +520,7 @@
'FREEBL_LOWHASH',
'USE_HW_AES',
'INTEL_GCM',
+ 'PPC_GCM',
],
'conditions': [
[ 'target_arch=="x64"', {
@@ -535,10 +572,20 @@
'gcm-aes-aarch64_c_lib',
],
}],
- [ 'disable_altivec==0 and (target_arch=="ppc64" or target_arch=="ppc64le")', {
- 'dependencies': [
- 'gcm-aes-ppc_c_lib',
- 'gcm-sha512-nodepend-ppc_c_lib',
+ [ 'disable_altivec==0', {
+ 'conditions': [
+ [ 'target_arch=="ppc64"', {
+ 'dependencies': [
+ 'gcm-aes-ppc_c_lib',
+ 'gcm-sha512-nodepend-ppc_c_lib',
+ ],
+ }, 'target_arch=="ppc64le"', {
+ 'dependencies': [
+ 'gcm-aes-ppc_c_lib',
+ 'gcm-sha512-nodepend-ppc_c_lib',
+ 'ppc-gcm-wrap-nodepend_c_lib',
+ ],
+ }],
],
}],
[ 'disable_altivec==1 and (target_arch=="ppc64" or target_arch=="ppc64le")', {
@@ -743,6 +790,13 @@
'FREEBL_LOWHASH',
'FREEBL_NO_DEPEND',
],
+ 'conditions': [
+ [ 'disable_altivec==0 and target_arch=="ppc64le"', {
+ 'defines': [
+ 'PPC_GCM',
+ ],
+ }],
+ ],
}],
[ 'OS=="linux" or OS=="android"', {
'conditions': [
diff --git a/lib/freebl/ppc-gcm-wrap.c b/lib/freebl/ppc-gcm-wrap.c
new file mode 100644
index 000000000..97020e88e
--- /dev/null
+++ b/lib/freebl/ppc-gcm-wrap.c
@@ -0,0 +1,458 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* Copyright(c) 2013, Intel Corp. */
+
+/* Wrapper functions for PowerPC optimized implementation of AES-GCM */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapii.h"
+#include "blapit.h"
+#include "gcm.h"
+#include "ctr.h"
+#include "secerr.h"
+#include "prtypes.h"
+#include "pkcs11t.h"
+
+#include <limits.h>
+#include <stdio.h>
+
+#include "ppc-gcm.h"
+#include "rijndael.h"
+
+struct ppc_AES_GCMContextStr {
+ unsigned char Htbl[8 * AES_BLOCK_SIZE];
+ unsigned char X0[AES_BLOCK_SIZE];
+ unsigned char T[AES_BLOCK_SIZE];
+ unsigned char CTR[AES_BLOCK_SIZE];
+ AESContext *aes_context;
+ unsigned long tagBits;
+ unsigned long Alen;
+ unsigned long Mlen;
+ freeblCipherFunc cipher;
+ PRBool ctr_context_init;
+ gcmIVContext gcm_iv;
+};
+
+SECStatus ppc_aes_gcmInitCounter(ppc_AES_GCMContext *gcm,
+ const unsigned char *iv,
+ unsigned long ivLen, unsigned long tagBits,
+ const unsigned char *aad, unsigned long aadLen);
+
+ppc_AES_GCMContext *
+ppc_AES_GCM_CreateContext(void *context,
+ freeblCipherFunc cipher,
+ const unsigned char *params)
+{
+ ppc_AES_GCMContext *gcm = NULL;
+ AESContext *aes = (AESContext *)context;
+ const CK_NSS_GCM_PARAMS *gcmParams = (const CK_NSS_GCM_PARAMS *)params;
+ SECStatus rv;
+
+ gcm = PORT_ZNew(ppc_AES_GCMContext);
+ if (gcm == NULL) {
+ return NULL;
+ }
+
+ /* initialize context fields */
+ gcm->aes_context = aes;
+ gcm->cipher = cipher;
+ gcm->Alen = 0;
+ gcm->Mlen = 0;
+ gcm->ctr_context_init = PR_FALSE;
+
+ /* first prepare H and its derivatives for ghash */
+ ppc_aes_gcmINIT(gcm->Htbl, aes->k.expandedKey, aes->Nr);
+
+ gcm_InitIVContext(&gcm->gcm_iv);
+
+ /* if gcmParams is NULL, then we are creating an PKCS #11 MESSAGE
+ * style context, in which we initialize the key once, then do separate
+ * iv/aad's for each message. If we are doing that kind of operation,
+ * we've finished with init here. We'll init the Counter in each AEAD
+ * call */
+ if (gcmParams == NULL) {
+ return gcm;
+ }
+
+ rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv,
+ gcmParams->ulIvLen, gcmParams->ulTagBits,
+ gcmParams->pAAD, gcmParams->ulAADLen);
+ if (rv != SECSuccess) {
+ PORT_Free(gcm);
+ return NULL;
+ }
+ gcm->ctr_context_init = PR_TRUE;
+
+ return gcm;
+}
+
+SECStatus
+ppc_aes_gcmInitCounter(ppc_AES_GCMContext *gcm,
+ const unsigned char *iv, unsigned long ivLen,
+ unsigned long tagBits,
+ const unsigned char *aad, unsigned long aadLen)
+{
+ unsigned int j;
+ SECStatus rv;
+
+ if (ivLen == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (tagBits != 128 && tagBits != 120 && tagBits != 112 &&
+ tagBits != 104 && tagBits != 96 && tagBits != 64 &&
+ tagBits != 32) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ gcm->tagBits = tagBits;
+
+ /* reset the aad and message length counters */
+ gcm->Alen = 0;
+ gcm->Mlen = 0;
+
+ /* Initial TAG value is zero */
+ PORT_Memset(gcm->T, 0, AES_BLOCK_SIZE);
+ PORT_Memset(gcm->X0, 0, AES_BLOCK_SIZE);
+
+ /* Init the counter */
+ if (ivLen == 12) {
+ PORT_Memcpy(gcm->CTR, iv, AES_BLOCK_SIZE - 4);
+ gcm->CTR[12] = 0;
+ gcm->CTR[13] = 0;
+ gcm->CTR[14] = 0;
+ gcm->CTR[15] = 1;
+ } else {
+ /* If IV size is not 96 bits, then the initial counter value is GHASH
+ * of the IV */
+ ppc_aes_gcmHASH(gcm->Htbl, iv, ivLen, gcm->T);
+
+ ppc_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ ivLen,
+ 0,
+ gcm->X0,
+ gcm->CTR);
+
+ /* TAG should be zero again */
+ PORT_Memset(gcm->T, 0, AES_BLOCK_SIZE);
+ }
+
+ /* Encrypt the initial counter, will be used to encrypt the GHASH value,
+ * in the end */
+ rv = (*gcm->cipher)(gcm->aes_context, gcm->X0, &j, AES_BLOCK_SIZE, gcm->CTR,
+ AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ /* Promote the counter by 1 */
+ gcm->CTR[14] += !(++gcm->CTR[15]);
+ gcm->CTR[13] += !(gcm->CTR[15]) && !(gcm->CTR[14]);
+ gcm->CTR[12] += !(gcm->CTR[15]) && !(gcm->CTR[13]) && !(gcm->CTR[12]);
+
+ /* Now hash AAD - it would actually make sense to seperate the context
+ * creation from the AAD, because that would allow to reuse the H, which
+ * only changes when the AES key changes, and not every package, like the
+ * IV and AAD */
+ ppc_aes_gcmHASH(gcm->Htbl, aad, aadLen, gcm->T);
+ gcm->Alen += aadLen;
+ return SECSuccess;
+}
+
+void
+ppc_AES_GCM_DestroyContext(ppc_AES_GCMContext *gcm, PRBool freeit)
+{
+ PORT_Memset(gcm, 0, sizeof(ppc_AES_GCMContext));
+ if (freeit) {
+ PORT_Free(gcm);
+ }
+}
+
+SECStatus
+ppc_AES_GCM_EncryptUpdate(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ unsigned int j;
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4 &&
+ inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (!gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_NOT_INITIALIZED);
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ if (UINT_MAX - inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxout < inlen + tagBytes) {
+ *outlen = inlen + tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ ppc_aes_gcmCRYPT(
+ inbuf,
+ outbuf,
+ inlen,
+ gcm->CTR,
+ gcm->aes_context->k.expandedKey,
+ gcm->aes_context->Nr);
+ ppc_aes_gcmHASH(
+ gcm->Htbl,
+ outbuf,
+ inlen,
+ gcm->T);
+
+ gcm->Mlen += inlen;
+
+ ppc_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ gcm->Mlen,
+ gcm->Alen,
+ gcm->X0,
+ T);
+
+ *outlen = inlen + tagBytes;
+
+ for (j = 0; j < tagBytes; j++) {
+ outbuf[inlen + j] = T[j];
+ }
+ return SECSuccess;
+}
+
+SECStatus
+ppc_AES_GCM_DecryptUpdate(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const unsigned char *intag;
+
+ if (!gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_NOT_INITIALIZED);
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ /* get the authentication block */
+ if (inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ inlen -= tagBytes;
+ intag = inbuf + inlen;
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4 &&
+ inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ ppc_aes_gcmHASH(
+ gcm->Htbl,
+ inbuf,
+ inlen,
+ gcm->T);
+ ppc_aes_gcmCRYPT(
+ inbuf,
+ outbuf,
+ inlen,
+ gcm->CTR,
+ gcm->aes_context->k.expandedKey,
+ gcm->aes_context->Nr);
+
+ gcm->Mlen += inlen;
+ ppc_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ gcm->Mlen,
+ gcm->Alen,
+ gcm->X0,
+ T);
+
+ if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) {
+ memset(outbuf, 0, inlen);
+ *outlen = 0;
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+ *outlen = inlen;
+
+ return SECSuccess;
+}
+
+SECStatus
+ppc_AES_GCM_EncryptAEAD(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const CK_GCM_MESSAGE_PARAMS *gcmParams =
+ (const CK_GCM_MESSAGE_PARAMS *)params;
+ SECStatus rv;
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4 &&
+ inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ /* paramLen comes all the way from the application layer, make sure
+ * it's correct */
+ if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* if we were initialized with the C_EncryptInit, we shouldn't be in this
+ * function */
+ if (gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = gcm_GenerateIV(&gcm->gcm_iv, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulIvFixedBits, gcmParams->ivGenerator);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulTagBits, aad, aadLen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ ppc_aes_gcmCRYPT(inbuf, outbuf, inlen, gcm->CTR, gcm->aes_context->k.expandedKey,
+ gcm->aes_context->Nr);
+ ppc_aes_gcmHASH(gcm->Htbl, outbuf, inlen, gcm->T);
+
+ gcm->Mlen += inlen;
+
+ ppc_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T);
+
+ *outlen = inlen;
+ PORT_Memcpy(gcmParams->pTag, T, tagBytes);
+ return SECSuccess;
+}
+
+SECStatus
+ppc_AES_GCM_DecryptAEAD(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const unsigned char *intag;
+ const CK_GCM_MESSAGE_PARAMS *gcmParams =
+ (const CK_GCM_MESSAGE_PARAMS *)params;
+ SECStatus rv;
+
+ /* paramLen comes all the way from the application layer, make sure
+ * it's correct */
+ if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* if we were initialized with the C_DecryptInit, we shouldn't be in this
+ * function */
+ if (gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4 &&
+ inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulTagBits, aad, aadLen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ intag = gcmParams->pTag;
+ PORT_Assert(tagBytes != 0);
+
+ ppc_aes_gcmHASH(gcm->Htbl, inbuf, inlen, gcm->T);
+ ppc_aes_gcmCRYPT(inbuf, outbuf, inlen, gcm->CTR, gcm->aes_context->k.expandedKey,
+ gcm->aes_context->Nr);
+
+ gcm->Mlen += inlen;
+ ppc_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T);
+
+ if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) {
+ memset(outbuf, 0, inlen);
+ *outlen = 0;
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+ *outlen = inlen;
+
+ return SECSuccess;
+}
diff --git a/lib/freebl/ppc-gcm.h b/lib/freebl/ppc-gcm.h
new file mode 100644
index 000000000..169cad0a5
--- /dev/null
+++ b/lib/freebl/ppc-gcm.h
@@ -0,0 +1,76 @@
+/******************************************************************************/
+/* LICENSE: */
+/* This submission to NSS is to be made available under the terms of the */
+/* Mozilla Public License, v. 2.0. You can obtain one at http: */
+/* //mozilla.org/MPL/2.0/. */
+/******************************************************************************/
+
+#ifndef PPC_GCM_H
+#define PPC_GCM_H 1
+
+#include "blapii.h"
+
+typedef struct ppc_AES_GCMContextStr ppc_AES_GCMContext;
+
+ppc_AES_GCMContext *ppc_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *params);
+
+void ppc_AES_GCM_DestroyContext(ppc_AES_GCMContext *gcm, PRBool freeit);
+
+SECStatus ppc_AES_GCM_EncryptUpdate(ppc_AES_GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+SECStatus ppc_AES_GCM_DecryptUpdate(ppc_AES_GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+SECStatus ppc_AES_GCM_EncryptAEAD(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize);
+SECStatus ppc_AES_GCM_DecryptAEAD(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize);
+
+/* Prototypes of the functions defined in the assembler file. */
+
+/* Prepares the constants used in the aggregated reduction method */
+void ppc_aes_gcmINIT(unsigned char Htbl[8 * 16],
+ PRUint32 *KS,
+ int NR);
+
+/* Produces the final GHASH value */
+void ppc_aes_gcmTAG(unsigned char Htbl[8 * 16],
+ unsigned char *Tp,
+ unsigned long Mlen,
+ unsigned long Alen,
+ unsigned char *X0,
+ unsigned char *TAG);
+
+/* Hashes the Additional Authenticated Data, should be used before enc/dec.
+ Operates on any length of data. Partial block is padded internally. */
+void ppc_aes_gcmHASH(unsigned char Htbl[8 * 16],
+ const unsigned char *AAD,
+ unsigned long Alen,
+ unsigned char *Tp);
+
+/* Crypt only, used in combination with ppc_aes_gcmAAD().
+ Operates on any length of data, however partial block should only be encrypted
+ at the last call, otherwise the result will be incorrect. */
+void ppc_aes_gcmCRYPT(const unsigned char *PT,
+ unsigned char *CT,
+ unsigned long len,
+ unsigned char *CTRP,
+ PRUint32 *KS,
+ int NR);
+
+#endif
diff --git a/lib/freebl/ppc-gcm.s b/lib/freebl/ppc-gcm.s
new file mode 100644
index 000000000..4d5fff437
--- /dev/null
+++ b/lib/freebl/ppc-gcm.s
@@ -0,0 +1,1051 @@
+# This submission to NSS is to be made available under the terms of the
+# Mozilla Public License, v. 2.0. You can obtain one at //mozilla.org/MPL/2.0/
+# Copyright(c) 2021, Niels Möller and Mamone Tarsha
+
+# Registers:
+
+.set SP, 1
+.set TOCP, 2
+
+.macro VEC_LOAD_DATA VR, DATA, GPR
+ addis \GPR, 2, \DATA@got@ha
+ ld \GPR, \DATA@got@l(\GPR)
+ lvx \VR, 0, \GPR
+.endm
+
+.macro VEC_LOAD VR, GPR, IDX
+ lxvd2x \VR+32, \IDX, \GPR
+ vperm \VR, \VR, \VR, SWAP_MASK
+.endm
+
+.macro VEC_LOAD_INC VR, GPR, IDX
+ lxvd2x \VR+32, \IDX, \GPR
+ addi \IDX,\IDX,16
+ vperm \VR, \VR, \VR, SWAP_MASK
+.endm
+
+.macro VEC_STORE VR, GPR, IDX
+ vperm \VR, \VR, \VR, SWAP_MASK
+ stxvd2x \VR+32, \IDX, \GPR
+.endm
+
+# 0 < LEN < 16, pad the remaining bytes with zeros
+.macro LOAD_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2
+ li \TMP0, 0
+ li \VAL1, 0
+ li \VAL0, 0
+ andi. \TMP1, \LEN, 8
+ beq 1f
+ ldbrx \VAL1, 0, \DATA
+ li \TMP0, 8
+1:
+ andi. \TMP1, \LEN, 7
+ beq 3f
+ li \TMP1, 56
+2:
+ lbzx \TMP2, \TMP0, \DATA
+ sld \TMP2, \TMP2, \TMP1
+ subi \TMP1, \TMP1, 8
+ or \VAL0, \VAL0, \TMP2
+ addi \TMP0, \TMP0, 1
+ cmpld \TMP0, \LEN
+ bne 2b
+ andi. \TMP1, \LEN, 8
+ bne 3f
+ mr \VAL1, \VAL0
+ li \VAL0, 0
+3:
+.endm
+
+# 0 < LEN < 16
+.macro STORE_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2
+ andi. \TMP1, \LEN, 8
+ beq 1f
+ stdbrx \VAL1, 0, \DATA
+ li \TMP0, 8
+ b 2f
+1:
+ li \TMP0, 0
+ mr \VAL0, \VAL1
+2:
+ andi. \TMP1, \LEN, 7
+ beq 4f
+ li \TMP1, 56
+3:
+ srd \TMP2, \VAL0, \TMP1
+ subi \TMP1, \TMP1, 8
+ stbx \TMP2, \TMP0, \DATA
+ addi \TMP0, \TMP0, 1
+ cmpld \TMP0, \LEN
+ bne 3b
+4:
+.endm
+
+.text
+
+################################################################################
+# Generates the H table
+# void ppc_aes_gcmINIT(uint8_t Htbl[16*8], uint32_t *KS, int NR);
+.globl ppc_aes_gcmINIT
+.type ppc_aes_gcmINIT,@function
+.align 5
+ppc_aes_gcmINIT:
+addis TOCP,12,(.TOC.-ppc_aes_gcmINIT)@ha
+addi TOCP,TOCP,(.TOC.-ppc_aes_gcmINIT)@l
+.localentry ppc_aes_gcmINIT, .-ppc_aes_gcmINIT
+
+.set Htbl, 3
+.set KS, 4
+.set NR, 5
+
+.set ZERO, 19
+.set MSB, 18
+.set ONE, 17
+.set SWAP_MASK, 0
+.set POLY, 1
+.set K, 2
+.set H, 3
+.set H2, 4
+.set H3, 5
+.set H4, 6
+.set HP, 7
+.set HS, 8
+.set R, 9
+.set F, 10
+.set T, 11
+.set H1M, 12
+.set H1L, 13
+.set H2M, 14
+.set H2L, 15
+.set H3M, 16
+.set H3L, 17
+.set H4M, 18
+.set H4L, 19
+
+ VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 6
+ VEC_LOAD_DATA POLY, .Lpoly, 6
+
+ li 6, 0
+ VEC_LOAD_INC H, KS, 6
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ cmpwi NR, 10
+ beq .LH_done
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ cmpwi NR, 12
+ beq .LH_done
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+
+.LH_done:
+ VEC_LOAD K, KS, 6
+ vcipherlast H, H, K
+
+ vupkhsb MSB, H
+ vspltisb ONE, 1
+ vspltb MSB, MSB, 0
+ vsl H, H, ONE
+ vand MSB, MSB, POLY
+ vxor ZERO, ZERO, ZERO
+ vxor H, H, MSB
+ vsldoi POLY, ZERO, POLY, 8
+
+ vpmsumd HP, H, POLY
+ vsldoi HS, H, H, 8
+ vxor HP, HP, HS
+ vsldoi H1L, HP, HS, 8
+ vsldoi H1M, HS, HP, 8
+ vsldoi H1L, H1L, H1L, 8
+
+ # calculate H^2
+
+ vpmsumd F, H, H1L
+ vpmsumd R, H, H1M
+
+ vpmsumd T, F, POLY
+ vsldoi H2, F, F, 8
+ vxor R, R, T
+ vxor H2, H2, R
+
+ vpmsumd HP, H2, POLY
+ vsldoi HS, H2, H2, 8
+ vxor HP, HP, HS
+ vsldoi H2L, HP, HS, 8
+ vsldoi H2M, HS, HP, 8
+ vsldoi H2L, H2L, H2L, 8
+
+ # calculate H^3
+
+ vpmsumd F, H2, H1L
+ vpmsumd R, H2, H1M
+
+ vpmsumd T, F, POLY
+ vsldoi H3, F, F, 8
+ vxor R, R, T
+ vxor H3, H3, R
+
+ vpmsumd HP, H3, POLY
+ vsldoi HS, H3, H3, 8
+ vxor HP, HP, HS
+ vsldoi H3L, HP, HS, 8
+ vsldoi H3M, HS, HP, 8
+ vsldoi H3L, H3L, H3L, 8
+
+ # calculate H^4
+
+ vpmsumd F, H2, H2L
+ vpmsumd R, H2, H2M
+
+ vpmsumd T, F, POLY
+ vsldoi H4, F, F, 8
+ vxor R, R, T
+ vxor H4, H4, R
+
+ vpmsumd HP, H4, POLY
+ vsldoi HS, H4, H4, 8
+ vxor HP, HP, HS
+ vsldoi H4L, HP, HS, 8
+ vsldoi H4M, HS, HP, 8
+ vsldoi H4L, H4L, H4L, 8
+
+ li 8, 16*1
+ li 9, 16*2
+ li 10, 16*3
+ stxvd2x H1L+32, 0, Htbl
+ stxvd2x H1M+32, 8, Htbl
+ stxvd2x H2L+32, 9, Htbl
+ stxvd2x H2M+32, 10, Htbl
+ li 7, 16*4
+ li 8, 16*5
+ li 9, 16*6
+ li 10, 16*7
+ stxvd2x H3L+32, 7, Htbl
+ stxvd2x H3M+32, 8, Htbl
+ stxvd2x H4L+32, 9, Htbl
+ stxvd2x H4M+32, 10, Htbl
+
+ blr
+.size ppc_aes_gcmINIT, . - ppc_aes_gcmINIT
+
+################################################################################
+# Authenticate only
+# void ppc_aes_gcmHASH(uint8_t Htbl[16*8], uint8_t *AAD, uint64_t Alen, uint8_t *Tp);
+.globl ppc_aes_gcmHASH
+.type ppc_aes_gcmHASH,@function
+.align 5
+ppc_aes_gcmHASH:
+addis TOCP,12,(.TOC.-ppc_aes_gcmHASH)@ha
+addi TOCP,TOCP,(.TOC.-ppc_aes_gcmHASH)@l
+.localentry ppc_aes_gcmHASH, .-ppc_aes_gcmHASH
+
+.set Htbl, 3
+.set AAD, 4
+.set Alen, 5
+.set Tp, 6
+
+.set SWAP_MASK, 0
+.set POLY, 1
+.set D, 2
+.set C0, 3
+.set C1, 4
+.set C2, 5
+.set C3, 6
+.set T, 7
+.set R, 8
+.set F, 9
+.set R2, 10
+.set F2, 11
+.set R3, 12
+.set F3, 13
+.set R4, 14
+.set F4, 15
+.set H1M, 16
+.set H1L, 17
+.set H2M, 18
+.set H2L, 19
+.set H3M, 28
+.set H3L, 29
+.set H4M, 30
+.set H4L, 31
+
+ # store non-volatile vector registers
+ addi 7, SP, -16
+ stvx 31, 0, 7
+ addi 7, SP, -32
+ stvx 30, 0, 7
+ addi 7, SP, -48
+ stvx 29, 0, 7
+ addi 7, SP, -64
+ stvx 28, 0, 7
+
+ VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 7
+ VEC_LOAD_DATA POLY, .Lpoly_r, 7
+
+ VEC_LOAD D, Tp, 0
+
+ # --- process 4 blocks ---
+
+ srdi. 7, Alen, 6 # 4-blocks loop count
+ beq .L2x
+
+ mtctr 7 # set counter register
+
+ # load table elements
+ li 8, 1*16
+ li 9, 2*16
+ li 10, 3*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 8, Htbl
+ lxvd2x H2L+32, 9, Htbl
+ lxvd2x H2M+32, 10, Htbl
+ li 7, 4*16
+ li 8, 5*16
+ li 9, 6*16
+ li 10, 7*16
+ lxvd2x H3L+32, 7, Htbl
+ lxvd2x H3M+32, 8, Htbl
+ lxvd2x H4L+32, 9, Htbl
+ lxvd2x H4M+32, 10, Htbl
+
+ li 8, 0x10
+ li 9, 0x20
+ li 10, 0x30
+.align 5
+.L4x_loop:
+ # load input
+ lxvd2x C0+32, 0, AAD
+ lxvd2x C1+32, 8, AAD
+ lxvd2x C2+32, 9, AAD
+ lxvd2x C3+32, 10, AAD
+
+ vperm C0, C0, C0, SWAP_MASK
+ vperm C1, C1, C1, SWAP_MASK
+ vperm C2, C2, C2, SWAP_MASK
+ vperm C3, C3, C3, SWAP_MASK
+
+ # digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F2, H3L, C1
+ vpmsumd R2, H3M, C1
+ vpmsumd F3, H2L, C2
+ vpmsumd R3, H2M, C2
+ vpmsumd F4, H1L, C3
+ vpmsumd R4, H1M, C3
+ vpmsumd F, H4L, C0
+ vpmsumd R, H4M, C0
+
+ # deferred recombination of partial products
+ vxor F3, F3, F4
+ vxor R3, R3, R4
+ vxor F, F, F2
+ vxor R, R, R2
+ vxor F, F, F3
+ vxor R, R, R3
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+
+ addi AAD, AAD, 0x40
+ bdnz .L4x_loop
+
+ clrldi Alen, Alen, 58
+.L2x:
+ # --- process 2 blocks ---
+
+ srdi. 7, Alen, 5
+ beq .L1x
+
+ # load table elements
+ li 8, 1*16
+ li 9, 2*16
+ li 10, 3*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 8, Htbl
+ lxvd2x H2L+32, 9, Htbl
+ lxvd2x H2M+32, 10, Htbl
+
+ # load input
+ li 10, 0x10
+ lxvd2x C0+32, 0, AAD
+ lxvd2x C1+32, 10, AAD
+
+ vperm C0, C0, C0, SWAP_MASK
+ vperm C1, C1, C1, SWAP_MASK
+
+ # previous digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F2, H1L, C1
+ vpmsumd R2, H1M, C1
+ vpmsumd F, H2L, C0
+ vpmsumd R, H2M, C0
+
+ # deferred recombination of partial products
+ vxor F, F, F2
+ vxor R, R, R2
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+
+ addi AAD, AAD, 0x20
+ clrldi Alen, Alen, 59
+.L1x:
+ # --- process 1 block ---
+
+ srdi. 7, Alen, 4
+ beq .Ltail
+
+ # load table elements
+ li 8, 1*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 8, Htbl
+
+ # load input
+ lxvd2x C0+32, 0, AAD
+
+ vperm C0, C0, C0, SWAP_MASK
+
+ # previous digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F, H1L, C0
+ vpmsumd R, H1M, C0
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+
+ addi AAD, AAD, 0x10
+ clrldi Alen, Alen, 60
+
+.Ltail:
+ cmpldi Alen, 0
+ beq .Lh_done
+ # --- process the final partial block ---
+
+ # load table elements
+ li 8, 1*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 8, Htbl
+
+ LOAD_LEN AAD, Alen, 10, 9, 3, 7, 8
+ mtvrd C0, 10
+ mtvrd C1, 9
+ xxmrghd C0+32, C0+32, C1+32
+
+ # previous digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F, H1L, C0
+ vpmsumd R, H1M, C0
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+.Lh_done:
+ VEC_STORE D, Tp, 0
+
+ # restore non-volatile vector registers
+ addi 7, SP, -16
+ lvx 31, 0, 7
+ addi 7, SP, -32
+ lvx 30, 0, 7
+ addi 7, SP, -48
+ lvx 29, 0, 7
+ addi 7, SP, -64
+ lvx 28, 0, 7
+ blr
+.size ppc_aes_gcmHASH, . - ppc_aes_gcmHASH
+
+################################################################################
+# Generates the final GCM tag
+# void ppc_aes_gcmTAG(uint8_t Htbl[16*8], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG);
+.globl ppc_aes_gcmTAG
+.type ppc_aes_gcmTAG,@function
+.align 5
+ppc_aes_gcmTAG:
+addis TOCP,12,(.TOC.-ppc_aes_gcmTAG)@ha
+addi TOCP,TOCP,(.TOC.-ppc_aes_gcmTAG)@l
+.localentry ppc_aes_gcmTAG, .-ppc_aes_gcmTAG
+
+.set Htbl, 3
+.set Tp, 4
+.set Mlen, 5
+.set Alen, 6
+.set X0, 7
+.set TAG, 8
+
+.set SWAP_MASK, 0
+.set POLY, 1
+.set D, 2
+.set C0, 3
+.set C1, 4
+.set T, 5
+.set R, 6
+.set F, 7
+.set H1M, 8
+.set H1L, 9
+.set X, 10
+
+ VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9
+ VEC_LOAD_DATA POLY, .Lpoly_r, 9
+
+ VEC_LOAD D, Tp, 0
+
+ # load table elements
+ li 9, 1*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 9, Htbl
+
+ sldi Alen, Alen, 3
+ sldi Mlen, Mlen, 3
+ mtvrd C0, Alen
+ mtvrd C1, Mlen
+ xxmrghd C0+32, C0+32, C1+32
+
+ # previous digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F, H1L, C0
+ vpmsumd R, H1M, C0
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+
+ lxvd2x X+32, 0, X0
+ vperm D, D, D, SWAP_MASK
+ vxor X, X, D
+ stxvd2x X+32, 0, TAG
+
+ blr
+.size ppc_aes_gcmTAG, . - ppc_aes_gcmTAG
+
+################################################################################
+# Crypt only
+# void ppc_aes_gcmCRYPT(const uint8_t* PT, uint8_t* CT, uint64_t LEN, uint8_t *CTRP, uint32_t *KS, int NR);
+.globl ppc_aes_gcmCRYPT
+.type ppc_aes_gcmCRYPT,@function
+.align 5
+ppc_aes_gcmCRYPT:
+addis TOCP,12,(.TOC.-ppc_aes_gcmCRYPT)@ha
+addi TOCP,TOCP,(.TOC.-ppc_aes_gcmCRYPT)@l
+.localentry ppc_aes_gcmCRYPT, .-ppc_aes_gcmCRYPT
+
+.set PT, 3
+.set CT, 4
+.set LEN, 5
+.set CTRP, 6
+.set KS, 7
+.set NR, 8
+
+.set SWAP_MASK, 0
+.set K, 1
+.set CTR, 2
+.set CTR0, 3
+.set CTR1, 4
+.set CTR2, 5
+.set CTR3, 6
+.set CTR4, 7
+.set CTR5, 8
+.set CTR6, 9
+.set CTR7, 10
+.set ZERO, 11
+.set I1, 12
+.set I2, 13
+.set I3, 14
+.set I4, 15
+.set I5, 16
+.set I6, 17
+.set I7, 18
+.set I8, 19
+.set IN0, 24
+.set IN1, 25
+.set IN2, 26
+.set IN3, 27
+.set IN4, 28
+.set IN5, 29
+.set IN6, 30
+.set IN7, 31
+
+.macro ROUND_8
+ VEC_LOAD_INC K, KS, 10
+ vcipher CTR0, CTR0, K
+ vcipher CTR1, CTR1, K
+ vcipher CTR2, CTR2, K
+ vcipher CTR3, CTR3, K
+ vcipher CTR4, CTR4, K
+ vcipher CTR5, CTR5, K
+ vcipher CTR6, CTR6, K
+ vcipher CTR7, CTR7, K
+.endm
+
+.macro ROUND_4
+ VEC_LOAD_INC K, KS, 10
+ vcipher CTR0, CTR0, K
+ vcipher CTR1, CTR1, K
+ vcipher CTR2, CTR2, K
+ vcipher CTR3, CTR3, K
+.endm
+
+.macro ROUND_2
+ VEC_LOAD_INC K, KS, 10
+ vcipher CTR0, CTR0, K
+ vcipher CTR1, CTR1, K
+.endm
+
+.macro ROUND_1
+ VEC_LOAD_INC K, KS, 10
+ vcipher CTR0, CTR0, K
+.endm
+
+ # store non-volatile general registers
+ std 31,-8(SP);
+ std 30,-16(SP);
+ std 29,-24(SP);
+ std 28,-32(SP);
+ std 27,-40(SP);
+ std 26,-48(SP);
+ std 25,-56(SP);
+
+ # store non-volatile vector registers
+ addi 9, SP, -80
+ stvx 31, 0, 9
+ addi 9, SP, -96
+ stvx 30, 0, 9
+ addi 9, SP, -112
+ stvx 29, 0, 9
+ addi 9, SP, -128
+ stvx 28, 0, 9
+ addi 9, SP, -144
+ stvx 27, 0, 9
+ addi 9, SP, -160
+ stvx 26, 0, 9
+ addi 9, SP, -176
+ stvx 25, 0, 9
+ addi 9, SP, -192
+ stvx 24, 0, 9
+
+ VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9
+
+ vxor ZERO, ZERO, ZERO
+ vspltisb I1, 1
+ vspltisb I2, 2
+ vspltisb I3, 3
+ vspltisb I4, 4
+ vspltisb I5, 5
+ vspltisb I6, 6
+ vspltisb I7, 7
+ vspltisb I8, 8
+ vsldoi I1, ZERO, I1, 1
+ vsldoi I2, ZERO, I2, 1
+ vsldoi I3, ZERO, I3, 1
+ vsldoi I4, ZERO, I4, 1
+ vsldoi I5, ZERO, I5, 1
+ vsldoi I6, ZERO, I6, 1
+ vsldoi I7, ZERO, I7, 1
+ vsldoi I8, ZERO, I8, 1
+
+ VEC_LOAD CTR, CTRP, 0
+
+ srdi. 9, LEN, 7
+ beq .Lctr_4x
+
+ mtctr 9
+
+ li 10, 0
+ li 25, 0x10
+ li 26, 0x20
+ li 27, 0x30
+ li 28, 0x40
+ li 29, 0x50
+ li 30, 0x60
+ li 31, 0x70
+
+.align 5
+.L8x_loop:
+ VEC_LOAD_INC K, KS, 10
+
+ vadduwm CTR1, CTR, I1
+ vadduwm CTR2, CTR, I2
+ vadduwm CTR3, CTR, I3
+ vadduwm CTR4, CTR, I4
+ vadduwm CTR5, CTR, I5
+ vadduwm CTR6, CTR, I6
+ vadduwm CTR7, CTR, I7
+
+ vxor CTR0, CTR, K
+ vxor CTR1, CTR1, K
+ vxor CTR2, CTR2, K
+ vxor CTR3, CTR3, K
+ vxor CTR4, CTR4, K
+ vxor CTR5, CTR5, K
+ vxor CTR6, CTR6, K
+ vxor CTR7, CTR7, K
+
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ cmpwi NR, 10
+ beq .Llast_8
+ ROUND_8
+ ROUND_8
+ cmpwi NR, 12
+ beq .Llast_8
+ ROUND_8
+ ROUND_8
+
+.Llast_8:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+ vcipherlast CTR1, CTR1, K
+ vcipherlast CTR2, CTR2, K
+ vcipherlast CTR3, CTR3, K
+ vcipherlast CTR4, CTR4, K
+ vcipherlast CTR5, CTR5, K
+ vcipherlast CTR6, CTR6, K
+ vcipherlast CTR7, CTR7, K
+
+ lxvd2x IN0+32, 0, PT
+ lxvd2x IN1+32, 25, PT
+ lxvd2x IN2+32, 26, PT
+ lxvd2x IN3+32, 27, PT
+ lxvd2x IN4+32, 28, PT
+ lxvd2x IN5+32, 29, PT
+ lxvd2x IN6+32, 30, PT
+ lxvd2x IN7+32, 31, PT
+
+ vperm CTR0, CTR0, CTR0, SWAP_MASK
+ vperm CTR1, CTR1, CTR1, SWAP_MASK
+ vperm CTR2, CTR2, CTR2, SWAP_MASK
+ vperm CTR3, CTR3, CTR3, SWAP_MASK
+ vperm CTR4, CTR4, CTR4, SWAP_MASK
+ vperm CTR5, CTR5, CTR5, SWAP_MASK
+ vperm CTR6, CTR6, CTR6, SWAP_MASK
+ vperm CTR7, CTR7, CTR7, SWAP_MASK
+
+ vxor IN0, IN0, CTR0
+ vxor IN1, IN1, CTR1
+ vxor IN2, IN2, CTR2
+ vxor IN3, IN3, CTR3
+ vxor IN4, IN4, CTR4
+ vxor IN5, IN5, CTR5
+ vxor IN6, IN6, CTR6
+ vxor IN7, IN7, CTR7
+
+ stxvd2x IN0+32, 0, CT
+ stxvd2x IN1+32, 25, CT
+ stxvd2x IN2+32, 26, CT
+ stxvd2x IN3+32, 27, CT
+ stxvd2x IN4+32, 28, CT
+ stxvd2x IN5+32, 29, CT
+ stxvd2x IN6+32, 30, CT
+ stxvd2x IN7+32, 31, CT
+
+ vadduwm CTR, CTR, I8
+ addi PT, PT, 0x80
+ addi CT, CT, 0x80
+ bdnz .L8x_loop
+
+ clrldi LEN, LEN, 57
+
+.Lctr_4x:
+ srdi. 9, LEN, 6
+ beq .Lctr_2x
+
+ li 10, 0
+ li 29, 0x10
+ li 30, 0x20
+ li 31, 0x30
+
+ VEC_LOAD_INC K, KS, 10
+
+ vadduwm CTR1, CTR, I1
+ vadduwm CTR2, CTR, I2
+ vadduwm CTR3, CTR, I3
+
+ vxor CTR0, CTR, K
+ vxor CTR1, CTR1, K
+ vxor CTR2, CTR2, K
+ vxor CTR3, CTR3, K
+
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ cmpwi NR, 10
+ beq .Llast_4
+ ROUND_4
+ ROUND_4
+ cmpwi NR, 12
+ beq .Llast_4
+ ROUND_4
+ ROUND_4
+
+.Llast_4:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+ vcipherlast CTR1, CTR1, K
+ vcipherlast CTR2, CTR2, K
+ vcipherlast CTR3, CTR3, K
+
+ lxvd2x IN0+32, 0, PT
+ lxvd2x IN1+32, 29, PT
+ lxvd2x IN2+32, 30, PT
+ lxvd2x IN3+32, 31, PT
+
+ vperm CTR0, CTR0, CTR0, SWAP_MASK
+ vperm CTR1, CTR1, CTR1, SWAP_MASK
+ vperm CTR2, CTR2, CTR2, SWAP_MASK
+ vperm CTR3, CTR3, CTR3, SWAP_MASK
+
+ vxor IN0, IN0, CTR0
+ vxor IN1, IN1, CTR1
+ vxor IN2, IN2, CTR2
+ vxor IN3, IN3, CTR3
+
+ stxvd2x IN0+32, 0, CT
+ stxvd2x IN1+32, 29, CT
+ stxvd2x IN2+32, 30, CT
+ stxvd2x IN3+32, 31, CT
+
+ vadduwm CTR, CTR, I4
+ addi PT, PT, 0x40
+ addi CT, CT, 0x40
+
+ clrldi LEN, LEN, 58
+
+.Lctr_2x:
+ srdi. 9, LEN, 5
+ beq .Lctr_1x
+
+ li 10, 0
+ li 31, 0x10
+
+ VEC_LOAD_INC K, KS, 10
+
+ vadduwm CTR1, CTR, I1
+
+ vxor CTR0, CTR, K
+ vxor CTR1, CTR1, K
+
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ cmpwi NR, 10
+ beq .Llast_2
+ ROUND_2
+ ROUND_2
+ cmpwi NR, 12
+ beq .Llast_2
+ ROUND_2
+ ROUND_2
+
+.Llast_2:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+ vcipherlast CTR1, CTR1, K
+
+ lxvd2x IN0+32, 0, PT
+ lxvd2x IN1+32, 31, PT
+
+ vperm CTR0, CTR0, CTR0, SWAP_MASK
+ vperm CTR1, CTR1, CTR1, SWAP_MASK
+
+ vxor IN0, IN0, CTR0
+ vxor IN1, IN1, CTR1
+
+ stxvd2x IN0+32, 0, CT
+ stxvd2x IN1+32, 31, CT
+
+ vadduwm CTR, CTR, I2
+ addi PT, PT, 0x20
+ addi CT, CT, 0x20
+
+ clrldi LEN, LEN, 59
+
+.Lctr_1x:
+ srdi. 9, LEN, 4
+ beq .Lctr_tail
+
+ li 10, 0
+
+ VEC_LOAD_INC K, KS, 10
+ vxor CTR0, CTR, K
+
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ cmpwi NR, 10
+ beq .Llast_1
+ ROUND_1
+ ROUND_1
+ cmpwi NR, 12
+ beq .Llast_1
+ ROUND_1
+ ROUND_1
+
+.Llast_1:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+
+ lxvd2x IN0+32, 0, PT
+
+ vperm CTR0, CTR0, CTR0, SWAP_MASK
+
+ vxor IN0, IN0, CTR0
+
+ stxvd2x IN0+32, 0, CT
+
+ vadduwm CTR, CTR, I1
+ addi PT, PT, 0x10
+ addi CT, CT, 0x10
+
+ clrldi LEN, LEN, 60
+
+.Lctr_tail:
+ cmpldi LEN, 0
+ beq .Lc_done
+
+ li 10, 0
+
+ VEC_LOAD_INC K, KS, 10
+ vxor CTR0, CTR, K
+
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ cmpwi NR, 10
+ beq .Llast_tail
+ ROUND_1
+ ROUND_1
+ cmpwi NR, 12
+ beq .Llast_tail
+ ROUND_1
+ ROUND_1
+
+.Llast_tail:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+
+ LOAD_LEN PT, LEN, 10, 9, 29, 30, 31
+
+ vsldoi CTR1, CTR0, CTR0, 8
+ mfvrd 31, CTR0
+ mfvrd 30, CTR1
+
+ xor 10, 10, 31
+ xor 9, 9, 30
+
+ STORE_LEN CT, LEN, 10, 9, 29, 30, 31
+
+ vadduwm CTR, CTR, I1
+
+.Lc_done:
+ VEC_STORE CTR, CTRP, 0
+
+ # restore non-volatile vector registers
+ addi 9, SP, -80
+ lvx 31, 0, 9
+ addi 9, SP, -96
+ lvx 30, 0, 9
+ addi 9, SP, -112
+ lvx 29, 0, 9
+ addi 9, SP, -128
+ lvx 28, 0, 9
+ addi 9, SP, -144
+ lvx 27, 0, 9
+ addi 9, SP, -160
+ lvx 26, 0, 9
+ addi 9, SP, -176
+ lvx 25, 0, 9
+ addi 9, SP, -192
+ lvx 24, 0, 9
+
+ # restore non-volatile general registers
+ ld 31,-8(SP);
+ ld 30,-16(SP);
+ ld 29,-24(SP);
+ ld 28,-32(SP);
+ ld 27,-40(SP);
+ ld 26,-48(SP);
+ ld 25,-56(SP);
+ blr
+.size ppc_aes_gcmCRYPT, . - ppc_aes_gcmCRYPT
+
+.data
+.align 4
+.Lpoly:
+ .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.Lpoly_r:
+ .byte 0,0,0,0,0,0,0,0xc2,0,0,0,0,0,0,0,0
+.Ldb_bswap_mask:
+ .byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7
diff --git a/lib/freebl/rijndael.c b/lib/freebl/rijndael.c
index 546731f9d..82b1f419d 100644
--- a/lib/freebl/rijndael.c
+++ b/lib/freebl/rijndael.c
@@ -25,6 +25,10 @@
#undef USE_HW_AES
#endif
+#ifdef __powerpc64__
+#include "ppc-crypto.h"
+#endif
+
#ifdef USE_HW_AES
#ifdef NSS_X86_OR_X64
#include "intel-aes.h"
@@ -35,6 +39,9 @@
#ifdef INTEL_GCM
#include "intel-gcm.h"
#endif /* INTEL_GCM */
+#if defined(USE_PPC_CRYPTO) && defined(PPC_GCM)
+#include "ppc-gcm.h"
+#endif
/* Forward declarations */
void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
@@ -1020,6 +1027,16 @@ AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext;
cx->isBlock = PR_FALSE;
} else
+#elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)
+ if (ppc_crypto_support() && (keysize % 8) == 0) {
+ cx->worker_cx = ppc_AES_GCM_CreateContext(cx, cx->worker, iv);
+ cx->worker = (freeblCipherFunc)(encrypt ? ppc_AES_GCM_EncryptUpdate
+ : ppc_AES_GCM_DecryptUpdate);
+ cx->worker_aead = (freeblAeadFunc)(encrypt ? ppc_AES_GCM_EncryptAEAD
+ : ppc_AES_GCM_DecryptAEAD);
+ cx->destroy = (freeblDestroyFunc)ppc_AES_GCM_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ } else
#endif
{
cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv);