summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2016-06-04 19:24:57 +0200
committerAndy Polyakov <appro@openssl.org>2016-06-14 23:19:45 +0200
commit627c95337611151b4293944e1b706fea7e1ba5dc (patch)
tree6694622ff3a157b7d00535535546efdfc13fc359
parent74daca72a59d7200929b23096f2ff15e93a2ee25 (diff)
downloadopenssl-new-627c95337611151b4293944e1b706fea7e1ba5dc.tar.gz
aes/asm/aesp8-ppc.pl: implement "tweak chaining".
This is useful in Linux kernel context, in cases data happens to be fragmented and processing can take multiple calls. Reviewed-by: Rich Salz <rsalz@openssl.org>
-rwxr-xr-xcrypto/aes/asm/aesp8-ppc.pl180
1 files changed, 126 insertions, 54 deletions
diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
index 459ef88a29..3fdf1ecda0 100755
--- a/crypto/aes/asm/aesp8-ppc.pl
+++ b/crypto/aes/asm/aesp8-ppc.pl
@@ -1909,6 +1909,15 @@ ___
#########################################################################
{{{ # XTS procedures #
+# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
+# const AES_KEY *key1, const AES_KEY *key2, #
+# [const] unsigned char iv[16]); #
+# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
+# input tweak value is assumed to be encrypted already, and last tweak #
+# value, one suitable for consecutive call on same chunk of data, is #
+# written back to original buffer. In addition, in "tweak chaining" #
+# mode only complete input blocks are processed. #
+
my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
@@ -1943,18 +1952,21 @@ $code.=<<___;
le?vxor $inpperm,$inpperm,$tmp
vperm $tweak,$tweak,$inptail,$inpperm
- ?lvsl $keyperm,0,$key2 # prepare for unaligned key
- lwz $rounds,240($key2)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
neg r11,$inp
lvsr $inpperm,0,r11 # prepare for unaligned load
lvx $inout,0,$inp
addi $inp,$inp,15 # 15 is not typo
le?vxor $inpperm,$inpperm,$tmp
+ ${UCMP}i $key2,0 # key2==NULL?
+ beq Lxts_enc_no_key2
+
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
+ lwz $rounds,240($key2)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
lvx $rndkey0,0,$key2
lvx $rndkey1,$idx,$key2
addi $idx,$idx,16
@@ -1978,10 +1990,18 @@ Ltweak_xts_enc:
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
vcipher $tweak,$tweak,$rndkey1
lvx $rndkey1,$idx,$key2
- li $idx,16
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
vcipherlast $tweak,$tweak,$rndkey0
+ li $ivp,0 # don't chain the tweak
+ b Lxts_enc
+
+Lxts_enc_no_key2:
+ li $idx,-16
+ and $len,$len,$idx # in "tweak chaining"
+ # mode only complete
+ # blocks are processed
+Lxts_enc:
lvx $inptail,0,$inp
addi $inp,$inp,16
@@ -2097,6 +2117,19 @@ Loop_xts_enc_steal:
b Loop_xts_enc # one more time...
Lxts_enc_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_enc_ret
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_enc_ret:
mtspr 256,r12 # restore vrsave
li r3,0
blr
@@ -2135,18 +2168,21 @@ Lxts_enc_done:
le?vxor $inpperm,$inpperm,$tmp
vperm $tweak,$tweak,$inptail,$inpperm
- ?lvsl $keyperm,0,$key2 # prepare for unaligned key
- lwz $rounds,240($key2)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
neg r11,$inp
lvsr $inpperm,0,r11 # prepare for unaligned load
lvx $inout,0,$inp
addi $inp,$inp,15 # 15 is not typo
le?vxor $inpperm,$inpperm,$tmp
+ ${UCMP}i $key2,0 # key2==NULL?
+ beq Lxts_dec_no_key2
+
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
+ lwz $rounds,240($key2)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
lvx $rndkey0,0,$key2
lvx $rndkey1,$idx,$key2
addi $idx,$idx,16
@@ -2170,10 +2206,19 @@ Ltweak_xts_dec:
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
vcipher $tweak,$tweak,$rndkey1
lvx $rndkey1,$idx,$key2
- li $idx,16
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
vcipherlast $tweak,$tweak,$rndkey0
+ li $ivp,0 # don't chain the tweak
+ b Lxts_dec
+
+Lxts_dec_no_key2:
+ neg $idx,$len
+ andi. $idx,$idx,15
+ add $len,$len,$idx # in "tweak chaining"
+ # mode only complete
+ # blocks are processed
+Lxts_dec:
lvx $inptail,0,$inp
addi $inp,$inp,16
@@ -2328,6 +2373,19 @@ Loop_xts_dec_steal:
b Loop_xts_dec # one more time...
Lxts_dec_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_dec_ret
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_dec_ret:
mtspr 256,r12 # restore vrsave
li r3,0
blr
@@ -2338,8 +2396,8 @@ Lxts_dec_done:
___
#########################################################################
{{ # Optimized XTS procedures #
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+my $key_=$key2;
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
$x00=0 if ($flavour =~ /osx/);
my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
@@ -2353,33 +2411,32 @@ $code.=<<___;
.align 5
_aesp8_xts_encrypt6x:
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- mflr r0
+ mflr r11
li r7,`$FRAME+8*16+15`
- li r8,`$FRAME+8*16+31`
- $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+ li r3,`$FRAME+8*16+31`
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
stvx v20,r7,$sp # ABI says so
addi r7,r7,32
- stvx v21,r8,$sp
- addi r8,r8,32
+ stvx v21,r3,$sp
+ addi r3,r3,32
stvx v22,r7,$sp
addi r7,r7,32
- stvx v23,r8,$sp
- addi r8,r8,32
+ stvx v23,r3,$sp
+ addi r3,r3,32
stvx v24,r7,$sp
addi r7,r7,32
- stvx v25,r8,$sp
- addi r8,r8,32
+ stvx v25,r3,$sp
+ addi r3,r3,32
stvx v26,r7,$sp
addi r7,r7,32
- stvx v27,r8,$sp
- addi r8,r8,32
+ stvx v27,r3,$sp
+ addi r3,r3,32
stvx v28,r7,$sp
addi r7,r7,32
- stvx v29,r8,$sp
- addi r8,r8,32
+ stvx v29,r3,$sp
+ addi r3,r3,32
stvx v30,r7,$sp
- stvx v31,r8,$sp
- mr r7,r0
+ stvx v31,r3,$sp
li r0,-1
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
li $x10,0x10
@@ -2842,12 +2899,12 @@ Lxts_enc6x_steal:
vperm $out0,$out0,$out1,$inpperm
vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
- subi r3,$out,17
+ subi r30,$out,17
subi $out,$out,16
mtctr $taillen
Loop_xts_enc6x_steal:
- lbzu r0,1(r3)
- stb r0,16(r3)
+ lbzu r0,1(r30)
+ stb r0,16(r30)
bdnz Loop_xts_enc6x_steal
li $taillen,0
@@ -2856,7 +2913,15 @@ Loop_xts_enc6x_steal:
.align 4
Lxts_enc6x_done:
- mtlr r7
+ ${UCMP}i $ivp,0
+ beq Lxts_enc6x_ret
+
+ vxor $tweak,$twk0,$rndkey0
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_enc6x_ret:
+ mtlr r11
li r10,`$FRAME+15`
li r11,`$FRAME+31`
stvx $seven,r10,$sp # wipe copies of round keys
@@ -2998,33 +3063,32 @@ _aesp8_xts_enc5x:
.align 5
_aesp8_xts_decrypt6x:
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- mflr r0
+ mflr r11
li r7,`$FRAME+8*16+15`
- li r8,`$FRAME+8*16+31`
- $PUSH r0,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+ li r3,`$FRAME+8*16+31`
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
stvx v20,r7,$sp # ABI says so
addi r7,r7,32
- stvx v21,r8,$sp
- addi r8,r8,32
+ stvx v21,r3,$sp
+ addi r3,r3,32
stvx v22,r7,$sp
addi r7,r7,32
- stvx v23,r8,$sp
- addi r8,r8,32
+ stvx v23,r3,$sp
+ addi r3,r3,32
stvx v24,r7,$sp
addi r7,r7,32
- stvx v25,r8,$sp
- addi r8,r8,32
+ stvx v25,r3,$sp
+ addi r3,r3,32
stvx v26,r7,$sp
addi r7,r7,32
- stvx v27,r8,$sp
- addi r8,r8,32
+ stvx v27,r3,$sp
+ addi r3,r3,32
stvx v28,r7,$sp
addi r7,r7,32
- stvx v29,r8,$sp
- addi r8,r8,32
+ stvx v29,r3,$sp
+ addi r3,r3,32
stvx v30,r7,$sp
- stvx v31,r8,$sp
- mr r7,r0
+ stvx v31,r3,$sp
li r0,-1
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
li $x10,0x10
@@ -3524,11 +3588,11 @@ Lxts_dec6x_steal:
vsel $out0,$in0,$tmp,$out0
vxor $out0,$out0,$twk0
- subi r3,$out,1
+ subi r30,$out,1
mtctr $taillen
Loop_xts_dec6x_steal:
- lbzu r0,1(r3)
- stb r0,16(r3)
+ lbzu r0,1(r30)
+ stb r0,16(r30)
bdnz Loop_xts_dec6x_steal
li $taillen,0
@@ -3537,7 +3601,15 @@ Loop_xts_dec6x_steal:
.align 4
Lxts_dec6x_done:
- mtlr r7
+ ${UCMP}i $ivp,0
+ beq Lxts_dec6x_ret
+
+ vxor $tweak,$twk0,$rndkey0
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_dec6x_ret:
+ mtlr r11
li r10,`$FRAME+15`
li r11,`$FRAME+31`
stvx $seven,r10,$sp # wipe copies of round keys