summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Jacobs <kjacobs@mozilla.com>2020-01-22 16:21:45 +0000
committerKevin Jacobs <kjacobs@mozilla.com>2020-01-22 16:21:45 +0000
commitb5884338a1a111a365b4d4bdd6a0c8d1d37bceec (patch)
tree01c74eefc02a22b809be7b42d4b18412a87ea216
parent1d9fba9af57f097e333fcda14b0f207030972609 (diff)
downloadnss-hg-b5884338a1a111a365b4d4bdd6a0c8d1d37bceec.tar.gz
Bug 1608493 - Use AES-NI intrinsics for CBC and ECB decrypt when no assembly implementation is available. r=mt
AES-NI is currently not used for //CBC// or //ECB decrypt// when an assembly implementation (`intel-aes.s` or `intel-aes-x86/64-masm.asm`) is not available. Concretely, this is the case on MacOS, Linux32, and other non-Linux OSes such as BSD. This patch adds the plumbing to use AES-NI intrinsics when available. Before: ``` mode in symmkey opreps cxreps context op time(sec) thrgput aes_ecb_d 78Mb 256 10T 0 0.000 395.000 0.395 197Mb aes_cbc_e 78Mb 256 10T 0 0.000 392.000 0.393 198Mb aes_cbc_d 78Mb 256 10T 0 0.000 425.000 0.425 183Mb ``` After: ``` mode in symmkey opreps cxreps context op time(sec) thrgput aes_ecb_d 78Mb 256 10T 0 0.000 39.000 0.039 1Gb aes_cbc_e 78Mb 256 10T 0 0.000 94.000 0.094 831Mb aes_cbc_d 78Mb 256 10T 0 0.000 74.000 0.075 1Gb ``` Differential Revision: https://phabricator.services.mozilla.com/D60195
-rw-r--r--lib/freebl/aes-x86.c27
-rw-r--r--lib/freebl/rijndael.c99
-rw-r--r--lib/freebl/rijndael.h4
3 files changed, 96 insertions, 34 deletions
diff --git a/lib/freebl/aes-x86.c b/lib/freebl/aes-x86.c
index edd0b5125..508fcc650 100644
--- a/lib/freebl/aes-x86.c
+++ b/lib/freebl/aes-x86.c
@@ -155,3 +155,30 @@ rijndael_native_encryptBlock(AESContext *cx,
m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]);
_mm_storeu_si128((__m128i *)output, m);
}
+
+void
+rijndael_native_decryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ int i;
+ pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);
+ m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]);
+ for (i = cx->Nr - 1; i > 0; --i) {
+ m = _mm_aesdec_si128(m, cx->k.keySchedule[i]);
+ }
+ m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]);
+ _mm_storeu_si128((__m128i *)output, m);
+}
+
+// out = a ^ b
+void
+native_xorBlock(unsigned char *out,
+ const unsigned char *a,
+ const unsigned char *b)
+{
+ pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a));
+ pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b));
+ in1 = _mm_xor_si128(in1, in2);
+ _mm_storeu_si128((__m128i *)(out), in1);
+}
diff --git a/lib/freebl/rijndael.c b/lib/freebl/rijndael.c
index 2e8bab87f..247a9419b 100644
--- a/lib/freebl/rijndael.c
+++ b/lib/freebl/rijndael.c
@@ -42,6 +42,12 @@ void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
void rijndael_native_encryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input);
+void rijndael_native_decryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input);
+void native_xorBlock(unsigned char *out,
+ const unsigned char *a,
+ const unsigned char *b);
/* Stub definitions for the above rijndael_native_* functions, which
* shouldn't be used unless NSS_X86_OR_X64 is defined */
@@ -62,6 +68,23 @@ rijndael_native_encryptBlock(AESContext *cx,
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
PORT_Assert(0);
}
+
+void
+rijndael_native_decryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+
+void
+native_xorBlock(unsigned char *out, const unsigned char *a,
+ const unsigned char *b)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
#endif /* NSS_X86_OR_X64 */
/*
@@ -509,6 +532,15 @@ typedef union {
#define STATE_BYTE(i) state.b[i]
+// out = a ^ b
+inline static void
+xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b)
+{
+ for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) {
+ (out)[j] = (a)[j] ^ (b)[j];
+ }
+}
+
static void NO_SANITIZE_ALIGNMENT
rijndael_encryptBlock128(AESContext *cx,
unsigned char *output,
@@ -604,7 +636,7 @@ rijndael_encryptBlock128(AESContext *cx,
#endif
}
-static SECStatus NO_SANITIZE_ALIGNMENT
+static void NO_SANITIZE_ALIGNMENT
rijndael_decryptBlock128(AESContext *cx,
unsigned char *output,
const unsigned char *input)
@@ -693,7 +725,6 @@ rijndael_decryptBlock128(AESContext *cx,
memcpy(output, outBuf, sizeof outBuf);
}
#endif
- return SECSuccess;
}
/**************************************************************************
@@ -707,16 +738,13 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
- AESBlockFunc *encryptor;
-
- if (aesni_support()) {
- /* Use hardware acceleration for normal AES parameters. */
- encryptor = &rijndael_native_encryptBlock;
- } else {
- encryptor = &rijndael_encryptBlock128;
- }
+ PRBool aesni = aesni_support();
while (inputLen > 0) {
- (*encryptor)(cx, output, input);
+ if (aesni) {
+ rijndael_native_encryptBlock(cx, output, input);
+ } else {
+ rijndael_encryptBlock128(cx, output, input);
+ }
output += AES_BLOCK_SIZE;
input += AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
@@ -729,20 +757,23 @@ rijndael_encryptCBC(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
- unsigned int j;
- unsigned char *lastblock;
+ unsigned char *lastblock = cx->iv;
unsigned char inblock[AES_BLOCK_SIZE * 8];
+ PRBool aesni = aesni_support();
if (!inputLen)
return SECSuccess;
- lastblock = cx->iv;
while (inputLen > 0) {
- /* XOR with the last block (IV if first block) */
- for (j = 0; j < AES_BLOCK_SIZE; ++j) {
- inblock[j] = input[j] ^ lastblock[j];
+ if (aesni) {
+ /* XOR with the last block (IV if first block) */
+ native_xorBlock(inblock, input, lastblock);
+ /* encrypt */
+ rijndael_native_encryptBlock(cx, output, inblock);
+ } else {
+ xorBlock(inblock, input, lastblock);
+ rijndael_encryptBlock128(cx, output, inblock);
}
- /* encrypt */
- rijndael_encryptBlock128(cx, output, inblock);
+
/* move to the next block */
lastblock = output;
output += AES_BLOCK_SIZE;
@@ -758,9 +789,12 @@ rijndael_decryptECB(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
+ PRBool aesni = aesni_support();
while (inputLen > 0) {
- if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) {
- return SECFailure;
+ if (aesni) {
+ rijndael_native_decryptBlock(cx, output, input);
+ } else {
+ rijndael_decryptBlock128(cx, output, input);
}
output += AES_BLOCK_SIZE;
input += AES_BLOCK_SIZE;
@@ -776,8 +810,8 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output,
{
const unsigned char *in;
unsigned char *out;
- unsigned int j;
unsigned char newIV[AES_BLOCK_SIZE];
+ PRBool aesni = aesni_support();
if (!inputLen)
return SECSuccess;
@@ -786,21 +820,26 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output,
memcpy(newIV, in, AES_BLOCK_SIZE);
out = output + (inputLen - AES_BLOCK_SIZE);
while (inputLen > AES_BLOCK_SIZE) {
- if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
- return SECFailure;
+ if (aesni) {
+ // Use hardware acceleration for normal AES parameters.
+ rijndael_native_decryptBlock(cx, out, in);
+ native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
+ } else {
+ rijndael_decryptBlock128(cx, out, in);
+ xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
}
- for (j = 0; j < AES_BLOCK_SIZE; ++j)
- out[j] ^= in[(int)(j - AES_BLOCK_SIZE)];
out -= AES_BLOCK_SIZE;
in -= AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
}
if (in == input) {
- if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
- return SECFailure;
+ if (aesni) {
+ rijndael_native_decryptBlock(cx, out, in);
+ native_xorBlock(out, out, cx->iv);
+ } else {
+ rijndael_decryptBlock128(cx, out, in);
+ xorBlock(out, out, cx->iv);
}
- for (j = 0; j < AES_BLOCK_SIZE; ++j)
- out[j] ^= cx->iv[j];
}
memcpy(cx->iv, newIV, AES_BLOCK_SIZE);
return SECSuccess;
diff --git a/lib/freebl/rijndael.h b/lib/freebl/rijndael.h
index 61060e016..16357c8c6 100644
--- a/lib/freebl/rijndael.h
+++ b/lib/freebl/rijndael.h
@@ -26,10 +26,6 @@
#endif /* NSS_DISABLE_SSE2 */
#endif
-typedef void AESBlockFunc(AESContext *cx,
- unsigned char *output,
- const unsigned char *input);
-
/* RIJNDAEL_NUM_ROUNDS
*
* Number of rounds per execution