diff options
author | mamonet <maamoun.tk@gmail.com> | 2021-05-05 09:34:06 +0000 |
---|---|---|
committer | mamonet <maamoun.tk@gmail.com> | 2021-05-05 09:34:06 +0000 |
commit | 116219d2251b6b11b11ac1f16b64539e783aa047 (patch) | |
tree | 6123fcd247cb0b36cac6eac6c035263305240506 | |
parent | 088eea7e1a4971e114d093eb61742b17a1a3a060 (diff) | |
download | nss-hg-116219d2251b6b11b11ac1f16b64539e783aa047.tar.gz |
Bug 1566124 - Optimize AES-GCM for ppc64le Differential Revision: https://phabricator.services.mozilla.com/D108221 r=bbeurdouche
Differential Revision: https://phabricator.services.mozilla.com/D112302
-rw-r--r-- | lib/freebl/Makefile | 5 | ||||
-rw-r--r-- | lib/freebl/freebl.gyp | 62 | ||||
-rw-r--r-- | lib/freebl/ppc-gcm-wrap.c | 458 | ||||
-rw-r--r-- | lib/freebl/ppc-gcm.h | 76 | ||||
-rw-r--r-- | lib/freebl/ppc-gcm.s | 1051 | ||||
-rw-r--r-- | lib/freebl/rijndael.c | 17 |
6 files changed, 1663 insertions, 6 deletions
diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile index 3d5357297..d13a5e930 100644 --- a/lib/freebl/Makefile +++ b/lib/freebl/Makefile @@ -299,8 +299,9 @@ ifdef USE_64 ifeq ($(PPC_ABI),2) ASFILES += sha512-p8.s ifeq ($(OS_TEST),ppc64le) - EXTRA_SRCS += chacha20poly1305-ppc.c - ASFILES += chacha20-ppc64le.s + DEFINES += -DPPC_GCM + EXTRA_SRCS += chacha20poly1305-ppc.c ppc-gcm-wrap.c + ASFILES += chacha20-ppc64le.s ppc-gcm.s endif # ppc64le endif endif # USE_64 diff --git a/lib/freebl/freebl.gyp b/lib/freebl/freebl.gyp index 865f89232..a4e496399 100644 --- a/lib/freebl/freebl.gyp +++ b/lib/freebl/freebl.gyp @@ -285,6 +285,41 @@ ] }, { + 'target_name': 'gcm-aes-ppc_lib', + 'type': 'static_library', + 'sources': [ + 'ppc-gcm.s', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + }, + { + 'target_name': 'ppc-gcm-wrap-nodepend_c_lib', + 'type': 'static_library', + 'sources': [ + 'ppc-gcm-wrap.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports', + 'gcm-aes-ppc_lib', + ], + }, + { + 'target_name': 'ppc-gcm-wrap_c_lib', + 'type': 'static_library', + 'sources': [ + 'ppc-gcm-wrap.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports', + 'gcm-aes-ppc_lib', + ], + 'defines!': [ + 'FREEBL_NO_DEPEND', + ], + }, + { 'target_name': 'gcm-sha512-nodepend-ppc_c_lib', 'type': 'static_library', 'sources': [ @@ -466,6 +501,7 @@ 'gcm-aes-ppc_c_lib', 'gcm-sha512-ppc_c_lib', 'chacha20-ppc_lib', + 'ppc-gcm-wrap_c_lib', ], }], [ 'disable_altivec==1 and (target_arch=="ppc64" or target_arch=="ppc64le")', { @@ -484,6 +520,7 @@ 'FREEBL_LOWHASH', 'USE_HW_AES', 'INTEL_GCM', + 'PPC_GCM', ], 'conditions': [ [ 'target_arch=="x64"', { @@ -535,10 +572,20 @@ 'gcm-aes-aarch64_c_lib', ], }], - [ 'disable_altivec==0 and (target_arch=="ppc64" or target_arch=="ppc64le")', { - 'dependencies': [ - 'gcm-aes-ppc_c_lib', - 'gcm-sha512-nodepend-ppc_c_lib', + [ 'disable_altivec==0', { + 'conditions': [ + [ 'target_arch=="ppc64"', { + 'dependencies': [ + 'gcm-aes-ppc_c_lib', + 'gcm-sha512-nodepend-ppc_c_lib', + ], + }, 'target_arch=="ppc64le"', { + 'dependencies': [ + 'gcm-aes-ppc_c_lib', + 'gcm-sha512-nodepend-ppc_c_lib', + 'ppc-gcm-wrap-nodepend_c_lib', + ], + }], ], }], [ 'disable_altivec==1 and (target_arch=="ppc64" or target_arch=="ppc64le")', { @@ -743,6 +790,13 @@ 'FREEBL_LOWHASH', 'FREEBL_NO_DEPEND', ], + 'conditions': [ + [ 'disable_altivec==0 and target_arch=="ppc64le"', { + 'defines': [ + 'PPC_GCM', + ], + }], + ], }], [ 'OS=="linux" or OS=="android"', { 'conditions': [ diff --git a/lib/freebl/ppc-gcm-wrap.c b/lib/freebl/ppc-gcm-wrap.c new file mode 100644 index 000000000..97020e88e --- /dev/null +++ b/lib/freebl/ppc-gcm-wrap.c @@ -0,0 +1,458 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* Copyright(c) 2013, Intel Corp. */ + +/* Wrapper functions for PowerPC optimized implementation of AES-GCM */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapii.h" +#include "blapit.h" +#include "gcm.h" +#include "ctr.h" +#include "secerr.h" +#include "prtypes.h" +#include "pkcs11t.h" + +#include <limits.h> +#include <stdio.h> + +#include "ppc-gcm.h" +#include "rijndael.h" + +struct ppc_AES_GCMContextStr { + unsigned char Htbl[8 * AES_BLOCK_SIZE]; + unsigned char X0[AES_BLOCK_SIZE]; + unsigned char T[AES_BLOCK_SIZE]; + unsigned char CTR[AES_BLOCK_SIZE]; + AESContext *aes_context; + unsigned long tagBits; + unsigned long Alen; + unsigned long Mlen; + freeblCipherFunc cipher; + PRBool ctr_context_init; + gcmIVContext gcm_iv; +}; + +SECStatus ppc_aes_gcmInitCounter(ppc_AES_GCMContext *gcm, + const unsigned char *iv, + unsigned long ivLen, unsigned long tagBits, + const unsigned char *aad, unsigned long aadLen); + +ppc_AES_GCMContext * +ppc_AES_GCM_CreateContext(void *context, + freeblCipherFunc cipher, + const unsigned char *params) +{ + ppc_AES_GCMContext *gcm = NULL; + AESContext *aes = (AESContext *)context; + const CK_NSS_GCM_PARAMS *gcmParams = (const CK_NSS_GCM_PARAMS *)params; + SECStatus rv; + + gcm = PORT_ZNew(ppc_AES_GCMContext); + if (gcm == NULL) { + return NULL; + } + + /* initialize context fields */ + gcm->aes_context = aes; + gcm->cipher = cipher; + gcm->Alen = 0; + gcm->Mlen = 0; + gcm->ctr_context_init = PR_FALSE; + + /* first prepare H and its derivatives for ghash */ + ppc_aes_gcmINIT(gcm->Htbl, aes->k.expandedKey, aes->Nr); + + gcm_InitIVContext(&gcm->gcm_iv); + + /* if gcmParams is NULL, then we are creating an PKCS #11 MESSAGE + * style context, in which we initialize the key once, then do separate + * iv/aad's for each message. If we are doing that kind of operation, + * we've finished with init here. We'll init the Counter in each AEAD + * call */ + if (gcmParams == NULL) { + return gcm; + } + + rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, + gcmParams->ulIvLen, gcmParams->ulTagBits, + gcmParams->pAAD, gcmParams->ulAADLen); + if (rv != SECSuccess) { + PORT_Free(gcm); + return NULL; + } + gcm->ctr_context_init = PR_TRUE; + + return gcm; +} + +SECStatus +ppc_aes_gcmInitCounter(ppc_AES_GCMContext *gcm, + const unsigned char *iv, unsigned long ivLen, + unsigned long tagBits, + const unsigned char *aad, unsigned long aadLen) +{ + unsigned int j; + SECStatus rv; + + if (ivLen == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (tagBits != 128 && tagBits != 120 && tagBits != 112 && + tagBits != 104 && tagBits != 96 && tagBits != 64 && + tagBits != 32) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + gcm->tagBits = tagBits; + + /* reset the aad and message length counters */ + gcm->Alen = 0; + gcm->Mlen = 0; + + /* Initial TAG value is zero */ + PORT_Memset(gcm->T, 0, AES_BLOCK_SIZE); + PORT_Memset(gcm->X0, 0, AES_BLOCK_SIZE); + + /* Init the counter */ + if (ivLen == 12) { + PORT_Memcpy(gcm->CTR, iv, AES_BLOCK_SIZE - 4); + gcm->CTR[12] = 0; + gcm->CTR[13] = 0; + gcm->CTR[14] = 0; + gcm->CTR[15] = 1; + } else { + /* If IV size is not 96 bits, then the initial counter value is GHASH + * of the IV */ + ppc_aes_gcmHASH(gcm->Htbl, iv, ivLen, gcm->T); + + ppc_aes_gcmTAG( + gcm->Htbl, + gcm->T, + ivLen, + 0, + gcm->X0, + gcm->CTR); + + /* TAG should be zero again */ + PORT_Memset(gcm->T, 0, AES_BLOCK_SIZE); + } + + /* Encrypt the initial counter, will be used to encrypt the GHASH value, + * in the end */ + rv = (*gcm->cipher)(gcm->aes_context, gcm->X0, &j, AES_BLOCK_SIZE, gcm->CTR, + AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + return SECFailure; + } + + /* Promote the counter by 1 */ + gcm->CTR[14] += !(++gcm->CTR[15]); + gcm->CTR[13] += !(gcm->CTR[15]) && !(gcm->CTR[14]); + gcm->CTR[12] += !(gcm->CTR[15]) && !(gcm->CTR[13]) && !(gcm->CTR[12]); + + /* Now hash AAD - it would actually make sense to seperate the context + * creation from the AAD, because that would allow to reuse the H, which + * only changes when the AES key changes, and not every package, like the + * IV and AAD */ + ppc_aes_gcmHASH(gcm->Htbl, aad, aadLen, gcm->T); + gcm->Alen += aadLen; + return SECSuccess; +} + +void +ppc_AES_GCM_DestroyContext(ppc_AES_GCMContext *gcm, PRBool freeit) +{ + PORT_Memset(gcm, 0, sizeof(ppc_AES_GCMContext)); + if (freeit) { + PORT_Free(gcm); + } +} + +SECStatus +ppc_AES_GCM_EncryptUpdate(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + unsigned int j; + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4 && + inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (!gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_NOT_INITIALIZED); + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + if (UINT_MAX - inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxout < inlen + tagBytes) { + *outlen = inlen + tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + ppc_aes_gcmCRYPT( + inbuf, + outbuf, + inlen, + gcm->CTR, + gcm->aes_context->k.expandedKey, + gcm->aes_context->Nr); + ppc_aes_gcmHASH( + gcm->Htbl, + outbuf, + inlen, + gcm->T); + + gcm->Mlen += inlen; + + ppc_aes_gcmTAG( + gcm->Htbl, + gcm->T, + gcm->Mlen, + gcm->Alen, + gcm->X0, + T); + + *outlen = inlen + tagBytes; + + for (j = 0; j < tagBytes; j++) { + outbuf[inlen + j] = T[j]; + } + return SECSuccess; +} + +SECStatus +ppc_AES_GCM_DecryptUpdate(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const unsigned char *intag; + + if (!gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_NOT_INITIALIZED); + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + /* get the authentication block */ + if (inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + inlen -= tagBytes; + intag = inbuf + inlen; + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4 && + inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + ppc_aes_gcmHASH( + gcm->Htbl, + inbuf, + inlen, + gcm->T); + ppc_aes_gcmCRYPT( + inbuf, + outbuf, + inlen, + gcm->CTR, + gcm->aes_context->k.expandedKey, + gcm->aes_context->Nr); + + gcm->Mlen += inlen; + ppc_aes_gcmTAG( + gcm->Htbl, + gcm->T, + gcm->Mlen, + gcm->Alen, + gcm->X0, + T); + + if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) { + memset(outbuf, 0, inlen); + *outlen = 0; + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + *outlen = inlen; + + return SECSuccess; +} + +SECStatus +ppc_AES_GCM_EncryptAEAD(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const CK_GCM_MESSAGE_PARAMS *gcmParams = + (const CK_GCM_MESSAGE_PARAMS *)params; + SECStatus rv; + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4 && + inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + /* paramLen comes all the way from the application layer, make sure + * it's correct */ + if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* if we were initialized with the C_EncryptInit, we shouldn't be in this + * function */ + if (gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = gcm_GenerateIV(&gcm->gcm_iv, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulIvFixedBits, gcmParams->ivGenerator); + if (rv != SECSuccess) { + return SECFailure; + } + + rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulTagBits, aad, aadLen); + if (rv != SECSuccess) { + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + ppc_aes_gcmCRYPT(inbuf, outbuf, inlen, gcm->CTR, gcm->aes_context->k.expandedKey, + gcm->aes_context->Nr); + ppc_aes_gcmHASH(gcm->Htbl, outbuf, inlen, gcm->T); + + gcm->Mlen += inlen; + + ppc_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T); + + *outlen = inlen; + PORT_Memcpy(gcmParams->pTag, T, tagBytes); + return SECSuccess; +} + +SECStatus +ppc_AES_GCM_DecryptAEAD(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const unsigned char *intag; + const CK_GCM_MESSAGE_PARAMS *gcmParams = + (const CK_GCM_MESSAGE_PARAMS *)params; + SECStatus rv; + + /* paramLen comes all the way from the application layer, make sure + * it's correct */ + if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* if we were initialized with the C_DecryptInit, we shouldn't be in this + * function */ + if (gcm->ctr_context_init) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + // GCM has a 16 octet block, with a 32-bit block counter + // Limit in accordance with SP800-38D + if (sizeof(inlen) > 4 && + inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen, + gcmParams->ulTagBits, aad, aadLen); + if (rv != SECSuccess) { + return SECFailure; + } + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + intag = gcmParams->pTag; + PORT_Assert(tagBytes != 0); + + ppc_aes_gcmHASH(gcm->Htbl, inbuf, inlen, gcm->T); + ppc_aes_gcmCRYPT(inbuf, outbuf, inlen, gcm->CTR, gcm->aes_context->k.expandedKey, + gcm->aes_context->Nr); + + gcm->Mlen += inlen; + ppc_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T); + + if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) { + memset(outbuf, 0, inlen); + *outlen = 0; + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + *outlen = inlen; + + return SECSuccess; +} diff --git a/lib/freebl/ppc-gcm.h b/lib/freebl/ppc-gcm.h new file mode 100644 index 000000000..169cad0a5 --- /dev/null +++ b/lib/freebl/ppc-gcm.h @@ -0,0 +1,76 @@ +/******************************************************************************/ +/* LICENSE: */ +/* This submission to NSS is to be made available under the terms of the */ +/* Mozilla Public License, v. 2.0. You can obtain one at http: */ +/* //mozilla.org/MPL/2.0/. */ +/******************************************************************************/ + +#ifndef PPC_GCM_H +#define PPC_GCM_H 1 + +#include "blapii.h" + +typedef struct ppc_AES_GCMContextStr ppc_AES_GCMContext; + +ppc_AES_GCMContext *ppc_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *params); + +void ppc_AES_GCM_DestroyContext(ppc_AES_GCMContext *gcm, PRBool freeit); + +SECStatus ppc_AES_GCM_EncryptUpdate(ppc_AES_GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +SECStatus ppc_AES_GCM_DecryptUpdate(ppc_AES_GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +SECStatus ppc_AES_GCM_EncryptAEAD(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize); +SECStatus ppc_AES_GCM_DecryptAEAD(ppc_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + void *params, unsigned int paramLen, + const unsigned char *aad, unsigned int aadLen, + unsigned int blocksize); + +/* Prototypes of the functions defined in the assembler file. */ + +/* Prepares the constants used in the aggregated reduction method */ +void ppc_aes_gcmINIT(unsigned char Htbl[8 * 16], + PRUint32 *KS, + int NR); + +/* Produces the final GHASH value */ +void ppc_aes_gcmTAG(unsigned char Htbl[8 * 16], + unsigned char *Tp, + unsigned long Mlen, + unsigned long Alen, + unsigned char *X0, + unsigned char *TAG); + +/* Hashes the Additional Authenticated Data, should be used before enc/dec. + Operates on any length of data. Partial block is padded internally. */ +void ppc_aes_gcmHASH(unsigned char Htbl[8 * 16], + const unsigned char *AAD, + unsigned long Alen, + unsigned char *Tp); + +/* Crypt only, used in combination with ppc_aes_gcmAAD(). + Operates on any length of data, however partial block should only be encrypted + at the last call, otherwise the result will be incorrect. */ +void ppc_aes_gcmCRYPT(const unsigned char *PT, + unsigned char *CT, + unsigned long len, + unsigned char *CTRP, + PRUint32 *KS, + int NR); + +#endif diff --git a/lib/freebl/ppc-gcm.s b/lib/freebl/ppc-gcm.s new file mode 100644 index 000000000..4d5fff437 --- /dev/null +++ b/lib/freebl/ppc-gcm.s @@ -0,0 +1,1051 @@ +# This submission to NSS is to be made available under the terms of the +# Mozilla Public License, v. 2.0. You can obtain one at //mozilla.org/MPL/2.0/ +# Copyright(c) 2021, Niels Möller and Mamone Tarsha + +# Registers: + +.set SP, 1 +.set TOCP, 2 + +.macro VEC_LOAD_DATA VR, DATA, GPR + addis \GPR, 2, \DATA@got@ha + ld \GPR, \DATA@got@l(\GPR) + lvx \VR, 0, \GPR +.endm + +.macro VEC_LOAD VR, GPR, IDX + lxvd2x \VR+32, \IDX, \GPR + vperm \VR, \VR, \VR, SWAP_MASK +.endm + +.macro VEC_LOAD_INC VR, GPR, IDX + lxvd2x \VR+32, \IDX, \GPR + addi \IDX,\IDX,16 + vperm \VR, \VR, \VR, SWAP_MASK +.endm + +.macro VEC_STORE VR, GPR, IDX + vperm \VR, \VR, \VR, SWAP_MASK + stxvd2x \VR+32, \IDX, \GPR +.endm + +# 0 < LEN < 16, pad the remaining bytes with zeros +.macro LOAD_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2 + li \TMP0, 0 + li \VAL1, 0 + li \VAL0, 0 + andi. \TMP1, \LEN, 8 + beq 1f + ldbrx \VAL1, 0, \DATA + li \TMP0, 8 +1: + andi. \TMP1, \LEN, 7 + beq 3f + li \TMP1, 56 +2: + lbzx \TMP2, \TMP0, \DATA + sld \TMP2, \TMP2, \TMP1 + subi \TMP1, \TMP1, 8 + or \VAL0, \VAL0, \TMP2 + addi \TMP0, \TMP0, 1 + cmpld \TMP0, \LEN + bne 2b + andi. \TMP1, \LEN, 8 + bne 3f + mr \VAL1, \VAL0 + li \VAL0, 0 +3: +.endm + +# 0 < LEN < 16 +.macro STORE_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2 + andi. \TMP1, \LEN, 8 + beq 1f + stdbrx \VAL1, 0, \DATA + li \TMP0, 8 + b 2f +1: + li \TMP0, 0 + mr \VAL0, \VAL1 +2: + andi. \TMP1, \LEN, 7 + beq 4f + li \TMP1, 56 +3: + srd \TMP2, \VAL0, \TMP1 + subi \TMP1, \TMP1, 8 + stbx \TMP2, \TMP0, \DATA + addi \TMP0, \TMP0, 1 + cmpld \TMP0, \LEN + bne 3b +4: +.endm + +.text + +################################################################################ +# Generates the H table +# void ppc_aes_gcmINIT(uint8_t Htbl[16*8], uint32_t *KS, int NR); +.globl ppc_aes_gcmINIT +.type ppc_aes_gcmINIT,@function +.align 5 +ppc_aes_gcmINIT: +addis TOCP,12,(.TOC.-ppc_aes_gcmINIT)@ha +addi TOCP,TOCP,(.TOC.-ppc_aes_gcmINIT)@l +.localentry ppc_aes_gcmINIT, .-ppc_aes_gcmINIT + +.set Htbl, 3 +.set KS, 4 +.set NR, 5 + +.set ZERO, 19 +.set MSB, 18 +.set ONE, 17 +.set SWAP_MASK, 0 +.set POLY, 1 +.set K, 2 +.set H, 3 +.set H2, 4 +.set H3, 5 +.set H4, 6 +.set HP, 7 +.set HS, 8 +.set R, 9 +.set F, 10 +.set T, 11 +.set H1M, 12 +.set H1L, 13 +.set H2M, 14 +.set H2L, 15 +.set H3M, 16 +.set H3L, 17 +.set H4M, 18 +.set H4L, 19 + + VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 6 + VEC_LOAD_DATA POLY, .Lpoly, 6 + + li 6, 0 + VEC_LOAD_INC H, KS, 6 + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + cmpwi NR, 10 + beq .LH_done + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + cmpwi NR, 12 + beq .LH_done + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + VEC_LOAD_INC K, KS, 6 + vcipher H, H, K + +.LH_done: + VEC_LOAD K, KS, 6 + vcipherlast H, H, K + + vupkhsb MSB, H + vspltisb ONE, 1 + vspltb MSB, MSB, 0 + vsl H, H, ONE + vand MSB, MSB, POLY + vxor ZERO, ZERO, ZERO + vxor H, H, MSB + vsldoi POLY, ZERO, POLY, 8 + + vpmsumd HP, H, POLY + vsldoi HS, H, H, 8 + vxor HP, HP, HS + vsldoi H1L, HP, HS, 8 + vsldoi H1M, HS, HP, 8 + vsldoi H1L, H1L, H1L, 8 + + # calculate H^2 + + vpmsumd F, H, H1L + vpmsumd R, H, H1M + + vpmsumd T, F, POLY + vsldoi H2, F, F, 8 + vxor R, R, T + vxor H2, H2, R + + vpmsumd HP, H2, POLY + vsldoi HS, H2, H2, 8 + vxor HP, HP, HS + vsldoi H2L, HP, HS, 8 + vsldoi H2M, HS, HP, 8 + vsldoi H2L, H2L, H2L, 8 + + # calculate H^3 + + vpmsumd F, H2, H1L + vpmsumd R, H2, H1M + + vpmsumd T, F, POLY + vsldoi H3, F, F, 8 + vxor R, R, T + vxor H3, H3, R + + vpmsumd HP, H3, POLY + vsldoi HS, H3, H3, 8 + vxor HP, HP, HS + vsldoi H3L, HP, HS, 8 + vsldoi H3M, HS, HP, 8 + vsldoi H3L, H3L, H3L, 8 + + # calculate H^4 + + vpmsumd F, H2, H2L + vpmsumd R, H2, H2M + + vpmsumd T, F, POLY + vsldoi H4, F, F, 8 + vxor R, R, T + vxor H4, H4, R + + vpmsumd HP, H4, POLY + vsldoi HS, H4, H4, 8 + vxor HP, HP, HS + vsldoi H4L, HP, HS, 8 + vsldoi H4M, HS, HP, 8 + vsldoi H4L, H4L, H4L, 8 + + li 8, 16*1 + li 9, 16*2 + li 10, 16*3 + stxvd2x H1L+32, 0, Htbl + stxvd2x H1M+32, 8, Htbl + stxvd2x H2L+32, 9, Htbl + stxvd2x H2M+32, 10, Htbl + li 7, 16*4 + li 8, 16*5 + li 9, 16*6 + li 10, 16*7 + stxvd2x H3L+32, 7, Htbl + stxvd2x H3M+32, 8, Htbl + stxvd2x H4L+32, 9, Htbl + stxvd2x H4M+32, 10, Htbl + + blr +.size ppc_aes_gcmINIT, . - ppc_aes_gcmINIT + +################################################################################ +# Authenticate only +# void ppc_aes_gcmHASH(uint8_t Htbl[16*8], uint8_t *AAD, uint64_t Alen, uint8_t *Tp); +.globl ppc_aes_gcmHASH +.type ppc_aes_gcmHASH,@function +.align 5 +ppc_aes_gcmHASH: +addis TOCP,12,(.TOC.-ppc_aes_gcmHASH)@ha +addi TOCP,TOCP,(.TOC.-ppc_aes_gcmHASH)@l +.localentry ppc_aes_gcmHASH, .-ppc_aes_gcmHASH + +.set Htbl, 3 +.set AAD, 4 +.set Alen, 5 +.set Tp, 6 + +.set SWAP_MASK, 0 +.set POLY, 1 +.set D, 2 +.set C0, 3 +.set C1, 4 +.set C2, 5 +.set C3, 6 +.set T, 7 +.set R, 8 +.set F, 9 +.set R2, 10 +.set F2, 11 +.set R3, 12 +.set F3, 13 +.set R4, 14 +.set F4, 15 +.set H1M, 16 +.set H1L, 17 +.set H2M, 18 +.set H2L, 19 +.set H3M, 28 +.set H3L, 29 +.set H4M, 30 +.set H4L, 31 + + # store non-volatile vector registers + addi 7, SP, -16 + stvx 31, 0, 7 + addi 7, SP, -32 + stvx 30, 0, 7 + addi 7, SP, -48 + stvx 29, 0, 7 + addi 7, SP, -64 + stvx 28, 0, 7 + + VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 7 + VEC_LOAD_DATA POLY, .Lpoly_r, 7 + + VEC_LOAD D, Tp, 0 + + # --- process 4 blocks --- + + srdi. 7, Alen, 6 # 4-blocks loop count + beq .L2x + + mtctr 7 # set counter register + + # load table elements + li 8, 1*16 + li 9, 2*16 + li 10, 3*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 8, Htbl + lxvd2x H2L+32, 9, Htbl + lxvd2x H2M+32, 10, Htbl + li 7, 4*16 + li 8, 5*16 + li 9, 6*16 + li 10, 7*16 + lxvd2x H3L+32, 7, Htbl + lxvd2x H3M+32, 8, Htbl + lxvd2x H4L+32, 9, Htbl + lxvd2x H4M+32, 10, Htbl + + li 8, 0x10 + li 9, 0x20 + li 10, 0x30 +.align 5 +.L4x_loop: + # load input + lxvd2x C0+32, 0, AAD + lxvd2x C1+32, 8, AAD + lxvd2x C2+32, 9, AAD + lxvd2x C3+32, 10, AAD + + vperm C0, C0, C0, SWAP_MASK + vperm C1, C1, C1, SWAP_MASK + vperm C2, C2, C2, SWAP_MASK + vperm C3, C3, C3, SWAP_MASK + + # digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F2, H3L, C1 + vpmsumd R2, H3M, C1 + vpmsumd F3, H2L, C2 + vpmsumd R3, H2M, C2 + vpmsumd F4, H1L, C3 + vpmsumd R4, H1M, C3 + vpmsumd F, H4L, C0 + vpmsumd R, H4M, C0 + + # deferred recombination of partial products + vxor F3, F3, F4 + vxor R3, R3, R4 + vxor F, F, F2 + vxor R, R, R2 + vxor F, F, F3 + vxor R, R, R3 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D + + addi AAD, AAD, 0x40 + bdnz .L4x_loop + + clrldi Alen, Alen, 58 +.L2x: + # --- process 2 blocks --- + + srdi. 7, Alen, 5 + beq .L1x + + # load table elements + li 8, 1*16 + li 9, 2*16 + li 10, 3*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 8, Htbl + lxvd2x H2L+32, 9, Htbl + lxvd2x H2M+32, 10, Htbl + + # load input + li 10, 0x10 + lxvd2x C0+32, 0, AAD + lxvd2x C1+32, 10, AAD + + vperm C0, C0, C0, SWAP_MASK + vperm C1, C1, C1, SWAP_MASK + + # previous digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F2, H1L, C1 + vpmsumd R2, H1M, C1 + vpmsumd F, H2L, C0 + vpmsumd R, H2M, C0 + + # deferred recombination of partial products + vxor F, F, F2 + vxor R, R, R2 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D + + addi AAD, AAD, 0x20 + clrldi Alen, Alen, 59 +.L1x: + # --- process 1 block --- + + srdi. 7, Alen, 4 + beq .Ltail + + # load table elements + li 8, 1*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 8, Htbl + + # load input + lxvd2x C0+32, 0, AAD + + vperm C0, C0, C0, SWAP_MASK + + # previous digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F, H1L, C0 + vpmsumd R, H1M, C0 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D + + addi AAD, AAD, 0x10 + clrldi Alen, Alen, 60 + +.Ltail: + cmpldi Alen, 0 + beq .Lh_done + # --- process the final partial block --- + + # load table elements + li 8, 1*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 8, Htbl + + LOAD_LEN AAD, Alen, 10, 9, 3, 7, 8 + mtvrd C0, 10 + mtvrd C1, 9 + xxmrghd C0+32, C0+32, C1+32 + + # previous digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F, H1L, C0 + vpmsumd R, H1M, C0 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D +.Lh_done: + VEC_STORE D, Tp, 0 + + # restore non-volatile vector registers + addi 7, SP, -16 + lvx 31, 0, 7 + addi 7, SP, -32 + lvx 30, 0, 7 + addi 7, SP, -48 + lvx 29, 0, 7 + addi 7, SP, -64 + lvx 28, 0, 7 + blr +.size ppc_aes_gcmHASH, . - ppc_aes_gcmHASH + +################################################################################ +# Generates the final GCM tag +# void ppc_aes_gcmTAG(uint8_t Htbl[16*8], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG); +.globl ppc_aes_gcmTAG +.type ppc_aes_gcmTAG,@function +.align 5 +ppc_aes_gcmTAG: +addis TOCP,12,(.TOC.-ppc_aes_gcmTAG)@ha +addi TOCP,TOCP,(.TOC.-ppc_aes_gcmTAG)@l +.localentry ppc_aes_gcmTAG, .-ppc_aes_gcmTAG + +.set Htbl, 3 +.set Tp, 4 +.set Mlen, 5 +.set Alen, 6 +.set X0, 7 +.set TAG, 8 + +.set SWAP_MASK, 0 +.set POLY, 1 +.set D, 2 +.set C0, 3 +.set C1, 4 +.set T, 5 +.set R, 6 +.set F, 7 +.set H1M, 8 +.set H1L, 9 +.set X, 10 + + VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9 + VEC_LOAD_DATA POLY, .Lpoly_r, 9 + + VEC_LOAD D, Tp, 0 + + # load table elements + li 9, 1*16 + lxvd2x H1L+32, 0, Htbl + lxvd2x H1M+32, 9, Htbl + + sldi Alen, Alen, 3 + sldi Mlen, Mlen, 3 + mtvrd C0, Alen + mtvrd C1, Mlen + xxmrghd C0+32, C0+32, C1+32 + + # previous digest combining + vxor C0, C0, D + + # polynomial multiplication + vpmsumd F, H1L, C0 + vpmsumd R, H1M, C0 + + # reduction + vpmsumd T, F, POLY + vsldoi D, F, F, 8 + vxor R, R, T + vxor D, R, D + + lxvd2x X+32, 0, X0 + vperm D, D, D, SWAP_MASK + vxor X, X, D + stxvd2x X+32, 0, TAG + + blr +.size ppc_aes_gcmTAG, . - ppc_aes_gcmTAG + +################################################################################ +# Crypt only +# void ppc_aes_gcmCRYPT(const uint8_t* PT, uint8_t* CT, uint64_t LEN, uint8_t *CTRP, uint32_t *KS, int NR); +.globl ppc_aes_gcmCRYPT +.type ppc_aes_gcmCRYPT,@function +.align 5 +ppc_aes_gcmCRYPT: +addis TOCP,12,(.TOC.-ppc_aes_gcmCRYPT)@ha +addi TOCP,TOCP,(.TOC.-ppc_aes_gcmCRYPT)@l +.localentry ppc_aes_gcmCRYPT, .-ppc_aes_gcmCRYPT + +.set PT, 3 +.set CT, 4 +.set LEN, 5 +.set CTRP, 6 +.set KS, 7 +.set NR, 8 + +.set SWAP_MASK, 0 +.set K, 1 +.set CTR, 2 +.set CTR0, 3 +.set CTR1, 4 +.set CTR2, 5 +.set CTR3, 6 +.set CTR4, 7 +.set CTR5, 8 +.set CTR6, 9 +.set CTR7, 10 +.set ZERO, 11 +.set I1, 12 +.set I2, 13 +.set I3, 14 +.set I4, 15 +.set I5, 16 +.set I6, 17 +.set I7, 18 +.set I8, 19 +.set IN0, 24 +.set IN1, 25 +.set IN2, 26 +.set IN3, 27 +.set IN4, 28 +.set IN5, 29 +.set IN6, 30 +.set IN7, 31 + +.macro ROUND_8 + VEC_LOAD_INC K, KS, 10 + vcipher CTR0, CTR0, K + vcipher CTR1, CTR1, K + vcipher CTR2, CTR2, K + vcipher CTR3, CTR3, K + vcipher CTR4, CTR4, K + vcipher CTR5, CTR5, K + vcipher CTR6, CTR6, K + vcipher CTR7, CTR7, K +.endm + +.macro ROUND_4 + VEC_LOAD_INC K, KS, 10 + vcipher CTR0, CTR0, K + vcipher CTR1, CTR1, K + vcipher CTR2, CTR2, K + vcipher CTR3, CTR3, K +.endm + +.macro ROUND_2 + VEC_LOAD_INC K, KS, 10 + vcipher CTR0, CTR0, K + vcipher CTR1, CTR1, K +.endm + +.macro ROUND_1 + VEC_LOAD_INC K, KS, 10 + vcipher CTR0, CTR0, K +.endm + + # store non-volatile general registers + std 31,-8(SP); + std 30,-16(SP); + std 29,-24(SP); + std 28,-32(SP); + std 27,-40(SP); + std 26,-48(SP); + std 25,-56(SP); + + # store non-volatile vector registers + addi 9, SP, -80 + stvx 31, 0, 9 + addi 9, SP, -96 + stvx 30, 0, 9 + addi 9, SP, -112 + stvx 29, 0, 9 + addi 9, SP, -128 + stvx 28, 0, 9 + addi 9, SP, -144 + stvx 27, 0, 9 + addi 9, SP, -160 + stvx 26, 0, 9 + addi 9, SP, -176 + stvx 25, 0, 9 + addi 9, SP, -192 + stvx 24, 0, 9 + + VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9 + + vxor ZERO, ZERO, ZERO + vspltisb I1, 1 + vspltisb I2, 2 + vspltisb I3, 3 + vspltisb I4, 4 + vspltisb I5, 5 + vspltisb I6, 6 + vspltisb I7, 7 + vspltisb I8, 8 + vsldoi I1, ZERO, I1, 1 + vsldoi I2, ZERO, I2, 1 + vsldoi I3, ZERO, I3, 1 + vsldoi I4, ZERO, I4, 1 + vsldoi I5, ZERO, I5, 1 + vsldoi I6, ZERO, I6, 1 + vsldoi I7, ZERO, I7, 1 + vsldoi I8, ZERO, I8, 1 + + VEC_LOAD CTR, CTRP, 0 + + srdi. 9, LEN, 7 + beq .Lctr_4x + + mtctr 9 + + li 10, 0 + li 25, 0x10 + li 26, 0x20 + li 27, 0x30 + li 28, 0x40 + li 29, 0x50 + li 30, 0x60 + li 31, 0x70 + +.align 5 +.L8x_loop: + VEC_LOAD_INC K, KS, 10 + + vadduwm CTR1, CTR, I1 + vadduwm CTR2, CTR, I2 + vadduwm CTR3, CTR, I3 + vadduwm CTR4, CTR, I4 + vadduwm CTR5, CTR, I5 + vadduwm CTR6, CTR, I6 + vadduwm CTR7, CTR, I7 + + vxor CTR0, CTR, K + vxor CTR1, CTR1, K + vxor CTR2, CTR2, K + vxor CTR3, CTR3, K + vxor CTR4, CTR4, K + vxor CTR5, CTR5, K + vxor CTR6, CTR6, K + vxor CTR7, CTR7, K + + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + ROUND_8 + cmpwi NR, 10 + beq .Llast_8 + ROUND_8 + ROUND_8 + cmpwi NR, 12 + beq .Llast_8 + ROUND_8 + ROUND_8 + +.Llast_8: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + vcipherlast CTR1, CTR1, K + vcipherlast CTR2, CTR2, K + vcipherlast CTR3, CTR3, K + vcipherlast CTR4, CTR4, K + vcipherlast CTR5, CTR5, K + vcipherlast CTR6, CTR6, K + vcipherlast CTR7, CTR7, K + + lxvd2x IN0+32, 0, PT + lxvd2x IN1+32, 25, PT + lxvd2x IN2+32, 26, PT + lxvd2x IN3+32, 27, PT + lxvd2x IN4+32, 28, PT + lxvd2x IN5+32, 29, PT + lxvd2x IN6+32, 30, PT + lxvd2x IN7+32, 31, PT + + vperm CTR0, CTR0, CTR0, SWAP_MASK + vperm CTR1, CTR1, CTR1, SWAP_MASK + vperm CTR2, CTR2, CTR2, SWAP_MASK + vperm CTR3, CTR3, CTR3, SWAP_MASK + vperm CTR4, CTR4, CTR4, SWAP_MASK + vperm CTR5, CTR5, CTR5, SWAP_MASK + vperm CTR6, CTR6, CTR6, SWAP_MASK + vperm CTR7, CTR7, CTR7, SWAP_MASK + + vxor IN0, IN0, CTR0 + vxor IN1, IN1, CTR1 + vxor IN2, IN2, CTR2 + vxor IN3, IN3, CTR3 + vxor IN4, IN4, CTR4 + vxor IN5, IN5, CTR5 + vxor IN6, IN6, CTR6 + vxor IN7, IN7, CTR7 + + stxvd2x IN0+32, 0, CT + stxvd2x IN1+32, 25, CT + stxvd2x IN2+32, 26, CT + stxvd2x IN3+32, 27, CT + stxvd2x IN4+32, 28, CT + stxvd2x IN5+32, 29, CT + stxvd2x IN6+32, 30, CT + stxvd2x IN7+32, 31, CT + + vadduwm CTR, CTR, I8 + addi PT, PT, 0x80 + addi CT, CT, 0x80 + bdnz .L8x_loop + + clrldi LEN, LEN, 57 + +.Lctr_4x: + srdi. 9, LEN, 6 + beq .Lctr_2x + + li 10, 0 + li 29, 0x10 + li 30, 0x20 + li 31, 0x30 + + VEC_LOAD_INC K, KS, 10 + + vadduwm CTR1, CTR, I1 + vadduwm CTR2, CTR, I2 + vadduwm CTR3, CTR, I3 + + vxor CTR0, CTR, K + vxor CTR1, CTR1, K + vxor CTR2, CTR2, K + vxor CTR3, CTR3, K + + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + ROUND_4 + cmpwi NR, 10 + beq .Llast_4 + ROUND_4 + ROUND_4 + cmpwi NR, 12 + beq .Llast_4 + ROUND_4 + ROUND_4 + +.Llast_4: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + vcipherlast CTR1, CTR1, K + vcipherlast CTR2, CTR2, K + vcipherlast CTR3, CTR3, K + + lxvd2x IN0+32, 0, PT + lxvd2x IN1+32, 29, PT + lxvd2x IN2+32, 30, PT + lxvd2x IN3+32, 31, PT + + vperm CTR0, CTR0, CTR0, SWAP_MASK + vperm CTR1, CTR1, CTR1, SWAP_MASK + vperm CTR2, CTR2, CTR2, SWAP_MASK + vperm CTR3, CTR3, CTR3, SWAP_MASK + + vxor IN0, IN0, CTR0 + vxor IN1, IN1, CTR1 + vxor IN2, IN2, CTR2 + vxor IN3, IN3, CTR3 + + stxvd2x IN0+32, 0, CT + stxvd2x IN1+32, 29, CT + stxvd2x IN2+32, 30, CT + stxvd2x IN3+32, 31, CT + + vadduwm CTR, CTR, I4 + addi PT, PT, 0x40 + addi CT, CT, 0x40 + + clrldi LEN, LEN, 58 + +.Lctr_2x: + srdi. 9, LEN, 5 + beq .Lctr_1x + + li 10, 0 + li 31, 0x10 + + VEC_LOAD_INC K, KS, 10 + + vadduwm CTR1, CTR, I1 + + vxor CTR0, CTR, K + vxor CTR1, CTR1, K + + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + ROUND_2 + cmpwi NR, 10 + beq .Llast_2 + ROUND_2 + ROUND_2 + cmpwi NR, 12 + beq .Llast_2 + ROUND_2 + ROUND_2 + +.Llast_2: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + vcipherlast CTR1, CTR1, K + + lxvd2x IN0+32, 0, PT + lxvd2x IN1+32, 31, PT + + vperm CTR0, CTR0, CTR0, SWAP_MASK + vperm CTR1, CTR1, CTR1, SWAP_MASK + + vxor IN0, IN0, CTR0 + vxor IN1, IN1, CTR1 + + stxvd2x IN0+32, 0, CT + stxvd2x IN1+32, 31, CT + + vadduwm CTR, CTR, I2 + addi PT, PT, 0x20 + addi CT, CT, 0x20 + + clrldi LEN, LEN, 59 + +.Lctr_1x: + srdi. 9, LEN, 4 + beq .Lctr_tail + + li 10, 0 + + VEC_LOAD_INC K, KS, 10 + vxor CTR0, CTR, K + + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + cmpwi NR, 10 + beq .Llast_1 + ROUND_1 + ROUND_1 + cmpwi NR, 12 + beq .Llast_1 + ROUND_1 + ROUND_1 + +.Llast_1: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + + lxvd2x IN0+32, 0, PT + + vperm CTR0, CTR0, CTR0, SWAP_MASK + + vxor IN0, IN0, CTR0 + + stxvd2x IN0+32, 0, CT + + vadduwm CTR, CTR, I1 + addi PT, PT, 0x10 + addi CT, CT, 0x10 + + clrldi LEN, LEN, 60 + +.Lctr_tail: + cmpldi LEN, 0 + beq .Lc_done + + li 10, 0 + + VEC_LOAD_INC K, KS, 10 + vxor CTR0, CTR, K + + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + ROUND_1 + cmpwi NR, 10 + beq .Llast_tail + ROUND_1 + ROUND_1 + cmpwi NR, 12 + beq .Llast_tail + ROUND_1 + ROUND_1 + +.Llast_tail: + VEC_LOAD K, KS, 10 + vcipherlast CTR0, CTR0, K + + LOAD_LEN PT, LEN, 10, 9, 29, 30, 31 + + vsldoi CTR1, CTR0, CTR0, 8 + mfvrd 31, CTR0 + mfvrd 30, CTR1 + + xor 10, 10, 31 + xor 9, 9, 30 + + STORE_LEN CT, LEN, 10, 9, 29, 30, 31 + + vadduwm CTR, CTR, I1 + +.Lc_done: + VEC_STORE CTR, CTRP, 0 + + # restore non-volatile vector registers + addi 9, SP, -80 + lvx 31, 0, 9 + addi 9, SP, -96 + lvx 30, 0, 9 + addi 9, SP, -112 + lvx 29, 0, 9 + addi 9, SP, -128 + lvx 28, 0, 9 + addi 9, SP, -144 + lvx 27, 0, 9 + addi 9, SP, -160 + lvx 26, 0, 9 + addi 9, SP, -176 + lvx 25, 0, 9 + addi 9, SP, -192 + lvx 24, 0, 9 + + # restore non-volatile general registers + ld 31,-8(SP); + ld 30,-16(SP); + ld 29,-24(SP); + ld 28,-32(SP); + ld 27,-40(SP); + ld 26,-48(SP); + ld 25,-56(SP); + blr +.size ppc_aes_gcmCRYPT, . - ppc_aes_gcmCRYPT + +.data +.align 4 +.Lpoly: + .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +.Lpoly_r: + .byte 0,0,0,0,0,0,0,0xc2,0,0,0,0,0,0,0,0 +.Ldb_bswap_mask: + .byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7 diff --git a/lib/freebl/rijndael.c b/lib/freebl/rijndael.c index 546731f9d..82b1f419d 100644 --- a/lib/freebl/rijndael.c +++ b/lib/freebl/rijndael.c @@ -25,6 +25,10 @@ #undef USE_HW_AES #endif +#ifdef __powerpc64__ +#include "ppc-crypto.h" +#endif + #ifdef USE_HW_AES #ifdef NSS_X86_OR_X64 #include "intel-aes.h" @@ -35,6 +39,9 @@ #ifdef INTEL_GCM #include "intel-gcm.h" #endif /* INTEL_GCM */ +#if defined(USE_PPC_CRYPTO) && defined(PPC_GCM) +#include "ppc-gcm.h" +#endif /* Forward declarations */ void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, @@ -1020,6 +1027,16 @@ AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext; cx->isBlock = PR_FALSE; } else +#elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM) + if (ppc_crypto_support() && (keysize % 8) == 0) { + cx->worker_cx = ppc_AES_GCM_CreateContext(cx, cx->worker, iv); + cx->worker = (freeblCipherFunc)(encrypt ? ppc_AES_GCM_EncryptUpdate + : ppc_AES_GCM_DecryptUpdate); + cx->worker_aead = (freeblAeadFunc)(encrypt ? ppc_AES_GCM_EncryptAEAD + : ppc_AES_GCM_DecryptAEAD); + cx->destroy = (freeblDestroyFunc)ppc_AES_GCM_DestroyContext; + cx->isBlock = PR_FALSE; + } else #endif { cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv); |