diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-12-14 19:37:37 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-12-14 19:37:49 +0200 |
commit | 02d5d1d97b3f281cf9c854d7143e346ab76fa384 (patch) | |
tree | 4314612c565c5af2d9d2a6f976579bb9ee95b729 /cipher | |
parent | 3d20308cc529b53d49954e9f0b8d10fa14422303 (diff) | |
download | libgcrypt-02d5d1d97b3f281cf9c854d7143e346ab76fa384.tar.gz |
Add clang support for ARM 32-bit assembly
* configure.ac (gcry_cv_gcc_arm_platform_as_ok)
(gcry_cv_gcc_inline_asm_neon): Remove % prefix from register names.
* cipher/cipher-gcm-armv7-neon.S (vmull_p64): Prefix constant values
with # character instead of $.
* cipher/blowfish-arm.S: Remove % prefix from all register names.
* cipher/camellia-arm.S: Likewise.
* cipher/cast5-arm.S: Likewise.
* cipher/rijndael-arm.S: Likewise.
* cipher/rijndael-armv8-aarch32-ce.S: Likewise.
* cipher/sha512-arm.S: Likewise.
* cipher/sha512-armv7-neon.S: Likewise.
* cipher/twofish-arm.S: Likewise.
* mpi/arm/mpih-add1.S: Likewise.
* mpi/arm/mpih-mul1.S: Likewise.
* mpi/arm/mpih-mul2.S: Likewise.
* mpi/arm/mpih-mul3.S: Likewise.
* mpi/arm/mpih-sub1.S: Likewise.
--
Reported-by: Dmytro Kovalov <dmytro.a.kovalov@globallogic.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r-- | cipher/blowfish-arm.S | 216 | ||||
-rw-r--r-- | cipher/camellia-arm.S | 68 | ||||
-rw-r--r-- | cipher/cast5-arm.S | 204 | ||||
-rw-r--r-- | cipher/cipher-gcm-armv7-neon.S | 24 | ||||
-rw-r--r-- | cipher/rijndael-arm.S | 106 | ||||
-rw-r--r-- | cipher/rijndael-armv8-aarch32-ce.S | 66 | ||||
-rw-r--r-- | cipher/sha512-arm.S | 204 | ||||
-rw-r--r-- | cipher/sha512-armv7-neon.S | 78 | ||||
-rw-r--r-- | cipher/twofish-arm.S | 62 |
9 files changed, 514 insertions, 514 deletions
diff --git a/cipher/blowfish-arm.S b/cipher/blowfish-arm.S index b30aa31f..a5101b5c 100644 --- a/cipher/blowfish-arm.S +++ b/cipher/blowfish-arm.S @@ -36,24 +36,24 @@ #define p (s3 + (1 * 256) * 4) /* register macros */ -#define CTXs0 %r0 -#define CTXs1 %r9 -#define CTXs2 %r8 -#define CTXs3 %r10 -#define RMASK %lr -#define RKEYL %r2 -#define RKEYR %ip +#define CTXs0 r0 +#define CTXs1 r9 +#define CTXs2 r8 +#define CTXs3 r10 +#define RMASK lr +#define RKEYL r2 +#define RKEYR ip -#define RL0 %r3 -#define RR0 %r4 +#define RL0 r3 +#define RR0 r4 -#define RL1 %r9 -#define RR1 %r10 +#define RL1 r9 +#define RR1 r10 -#define RT0 %r11 -#define RT1 %r7 -#define RT2 %r5 -#define RT3 %r6 +#define RT0 r11 +#define RT1 r7 +#define RT2 r5 +#define RT3 r6 /* helper macros */ #define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ @@ -250,7 +250,7 @@ __blowfish_enc_blk1: * output: * [RR0, RL0]: dst */ - push {%lr}; + push {lr}; add CTXs1, CTXs0, #(s1 - s0); add CTXs2, CTXs0, #(s2 - s0); @@ -268,7 +268,7 @@ __blowfish_enc_blk1: round_enc(16); add_roundkey_enc(); - pop {%pc}; + pop {pc}; .size __blowfish_enc_blk1,.-__blowfish_enc_blk1; .align 8 @@ -277,22 +277,22 @@ __blowfish_enc_blk1: _gcry_blowfish_arm_do_encrypt: /* input: - * %r0: ctx, CTX - * %r1: u32 *ret_xl - * %r2: u32 *ret_xr + * r0: ctx, CTX + * r1: u32 *ret_xl + * r2: u32 *ret_xr */ - push {%r2, %r4-%r11, %ip, %lr}; + push {r2, r4-r11, ip, lr}; - ldr RL0, [%r1]; - ldr RR0, [%r2]; + ldr RL0, [r1]; + ldr RR0, [r2]; bl __blowfish_enc_blk1; - pop {%r2}; - str RR0, [%r1]; - str RL0, [%r2]; + pop {r2}; + str RR0, [r1]; + str RL0, [r2]; - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt; .align 3 @@ -301,19 +301,19 @@ _gcry_blowfish_arm_do_encrypt: _gcry_blowfish_arm_encrypt_block: /* input: - * %r0: ctx, CTX - * %r1: dst - * %r2: src + * r0: ctx, CTX + * r1: dst + * r2: src */ - push {%r4-%r11, %ip, %lr}; + push {r4-r11, ip, lr}; - read_block(%r2, 0, RL0, RR0, RT0); + read_block(r2, 0, RL0, RR0, RT0); bl __blowfish_enc_blk1; - write_block(%r1, 0, RR0, RL0, RT0, RT1); + write_block(r1, 0, RR0, RL0, RT0, RT1); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block; .align 3 @@ -322,18 +322,18 @@ _gcry_blowfish_arm_encrypt_block: _gcry_blowfish_arm_decrypt_block: /* input: - * %r0: ctx, CTX - * %r1: dst - * %r2: src + * r0: ctx, CTX + * r1: dst + * r2: src */ - push {%r4-%r11, %ip, %lr}; + push {r4-r11, ip, lr}; add CTXs1, CTXs0, #(s1 - s0); add CTXs2, CTXs0, #(s2 - s0); mov RMASK, #(0xff << 2); /* byte mask */ add CTXs3, CTXs1, #(s3 - s1); - read_block(%r2, 0, RL0, RR0, RT0); + read_block(r2, 0, RL0, RR0, RT0); load_roundkey_dec(17); round_dec(15); @@ -346,9 +346,9 @@ _gcry_blowfish_arm_decrypt_block: round_dec(1); add_roundkey_dec(); - write_block(%r1, 0, RR0, RL0, RT0, RT1); + write_block(r1, 0, RR0, RL0, RT0, RT1); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block; /*********************************************************************** @@ -548,7 +548,7 @@ _gcry_blowfish_arm_enc_blk2: * output: * [RR0, RL0], [RR1, RL1]: dst */ - push {RT0,%lr}; + push {RT0,lr}; add CTXs2, CTXs0, #(s2 - s0); mov RMASK, #(0xff << 2); /* byte mask */ @@ -568,7 +568,7 @@ _gcry_blowfish_arm_enc_blk2: host_to_be(RR1, RT0); host_to_be(RL1, RT0); - pop {RT0,%pc}; + pop {RT0,pc}; .size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2; .align 3 @@ -577,40 +577,40 @@ _gcry_blowfish_arm_enc_blk2: _gcry_blowfish_arm_cfb_dec: /* input: - * %r0: CTX - * %r1: dst (2 blocks) - * %r2: src (2 blocks) - * %r3: iv (64bit) + * r0: CTX + * r1: dst (2 blocks) + * r2: src (2 blocks) + * r3: iv (64bit) */ - push {%r2, %r4-%r11, %ip, %lr}; + push {r2, r4-r11, ip, lr}; - mov %lr, %r3; + mov lr, r3; - /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ - ldm %r3, {RL0, RR0}; + /* Load input (iv/r3 is aligned, src/r2 might not be) */ + ldm r3, {RL0, RR0}; host_to_be(RL0, RT0); host_to_be(RR0, RT0); - read_block(%r2, 0, RL1, RR1, RT0); + read_block(r2, 0, RL1, RR1, RT0); /* Update IV, load src[1] and save to iv[0] */ - read_block_host(%r2, 8, %r5, %r6, RT0); - stm %lr, {%r5, %r6}; + read_block_host(r2, 8, r5, r6, RT0); + stm lr, {r5, r6}; bl _gcry_blowfish_arm_enc_blk2; - /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + /* result in RR0:RL0, RR1:RL1 = r4:r3, r10:r9 */ - /* %r1: dst, %r0: %src */ - pop {%r0}; + /* r1: dst, r0: src */ + pop {r0}; /* dst = src ^ result */ - read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); - eor %r5, %r4; - eor %r6, %r3; - eor %r7, %r10; - eor %r8, %r9; - write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); - - pop {%r4-%r11, %ip, %pc}; + read_block2_host(r0, r5, r6, r7, r8, lr); + eor r5, r4; + eor r6, r3; + eor r7, r10; + eor r8, r9; + write_block2_host(r1, r5, r6, r7, r8, r9, r10); + + pop {r4-r11, ip, pc}; .ltorg .size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec; @@ -620,42 +620,42 @@ _gcry_blowfish_arm_cfb_dec: _gcry_blowfish_arm_ctr_enc: /* input: - * %r0: CTX - * %r1: dst (2 blocks) - * %r2: src (2 blocks) - * %r3: iv (64bit, big-endian) + * r0: CTX + * r1: dst (2 blocks) + * r2: src (2 blocks) + * r3: iv (64bit, big-endian) */ - push {%r2, %r4-%r11, %ip, %lr}; + push {r2, r4-r11, ip, lr}; - mov %lr, %r3; + mov lr, r3; /* Load IV (big => host endian) */ - read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT0); + read_block_aligned(lr, 0, RL0, RR0, be_to_host, RT0); /* Construct IVs */ adds RR1, RR0, #1; /* +1 */ adc RL1, RL0, #0; - adds %r6, RR1, #1; /* +2 */ - adc %r5, RL1, #0; + adds r6, RR1, #1; /* +2 */ + adc r5, RL1, #0; /* Store new IV (host => big-endian) */ - write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT0); + write_block_aligned(lr, 0, r5, r6, host_to_be, RT0); bl _gcry_blowfish_arm_enc_blk2; - /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + /* result in RR0:RL0, RR1:RL1 = r4:r3, r10:r9 */ - /* %r1: dst, %r0: %src */ - pop {%r0}; + /* r1: dst, r0: src */ + pop {r0}; /* XOR key-stream with plaintext */ - read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); - eor %r5, %r4; - eor %r6, %r3; - eor %r7, %r10; - eor %r8, %r9; - write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); - - pop {%r4-%r11, %ip, %pc}; + read_block2_host(r0, r5, r6, r7, r8, lr); + eor r5, r4; + eor r6, r3; + eor r7, r10; + eor r8, r9; + write_block2_host(r1, r5, r6, r7, r8, r9, r10); + + pop {r4-r11, ip, pc}; .ltorg .size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc; @@ -697,45 +697,45 @@ _gcry_blowfish_arm_dec_blk2: _gcry_blowfish_arm_cbc_dec: /* input: - * %r0: CTX - * %r1: dst (2 blocks) - * %r2: src (2 blocks) - * %r3: iv (64bit) + * r0: CTX + * r1: dst (2 blocks) + * r2: src (2 blocks) + * r3: iv (64bit) */ - push {%r2-%r11, %ip, %lr}; + push {r2-r11, ip, lr}; - read_block2(%r2, RL0, RR0, RL1, RR1, RT0); + read_block2(r2, RL0, RR0, RL1, RR1, RT0); /* dec_blk2 is only used by cbc_dec, jump directly in/out instead * of function call. */ b _gcry_blowfish_arm_dec_blk2; .Ldec_cbc_tail: - /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + /* result in RR0:RL0, RR1:RL1 = r4:r3, r10:r9 */ - /* %r0: %src, %r1: dst, %r2: iv */ - pop {%r0, %r2}; + /* r0: src, r1: dst, r2: iv */ + pop {r0, r2}; - /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */ - read_block_host(%r0, 0, %r7, %r8, %r5); - /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */ - ldm %r2, {%r5, %r6}; + /* load IV+1 (src[0]) to r7:r8. Might be unaligned. */ + read_block_host(r0, 0, r7, r8, r5); + /* load IV (iv[0]) to r5:r6. 'iv' is aligned. */ + ldm r2, {r5, r6}; /* out[1] ^= IV+1 */ - eor %r10, %r7; - eor %r9, %r8; + eor r10, r7; + eor r9, r8; /* out[0] ^= IV */ - eor %r4, %r5; - eor %r3, %r6; + eor r4, r5; + eor r3, r6; - /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */ - read_block_host(%r0, 8, %r7, %r8, %r5); + /* load IV+2 (src[1]) to r7:r8. Might be unaligned. */ + read_block_host(r0, 8, r7, r8, r5); /* store IV+2 to iv[0] (aligned). */ - stm %r2, {%r7, %r8}; + stm r2, {r7, r8}; /* store result to dst[0-3]. Might be unaligned. */ - write_block2_host(%r1, %r4, %r3, %r10, %r9, %r5, %r6); + write_block2_host(r1, r4, r3, r10, r9, r5, r6); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec; diff --git a/cipher/camellia-arm.S b/cipher/camellia-arm.S index a3d87d11..decd40c2 100644 --- a/cipher/camellia-arm.S +++ b/cipher/camellia-arm.S @@ -45,23 +45,23 @@ #define key_table 0 /* register macros */ -#define CTX %r0 -#define RTAB1 %ip -#define RTAB3 %r1 -#define RMASK %lr +#define CTX r0 +#define RTAB1 ip +#define RTAB3 r1 +#define RMASK lr -#define IL %r2 -#define IR %r3 +#define IL r2 +#define IR r3 -#define XL %r4 -#define XR %r5 -#define YL %r6 -#define YR %r7 +#define XL r4 +#define XR r5 +#define YL r6 +#define YR r7 -#define RT0 %r8 -#define RT1 %r9 -#define RT2 %r10 -#define RT3 %r11 +#define RT0 r8 +#define RT1 r9 +#define RT2 r10 +#define RT3 r11 /* helper macros */ #define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ @@ -248,7 +248,7 @@ (n) * 2 + 0, (n) * 2 + 1); #define inpack(n) \ - ldr_input_be(%r2, XL, XR, YL, YR, RT0); \ + ldr_input_be(r2, XL, XR, YL, YR, RT0); \ ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ eor XL, RT0; \ @@ -259,7 +259,7 @@ ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ eor YL, RT0; \ eor YR, RT1; \ - str_output_be(%r1, YL, YR, XL, XR, RT0, RT1); + str_output_be(r1, YL, YR, XL, XR, RT0, RT1); .align 3 .globl _gcry_camellia_arm_encrypt_block @@ -267,17 +267,17 @@ _gcry_camellia_arm_encrypt_block: /* input: - * %r0: keytable - * %r1: dst - * %r2: src - * %r3: keybitlen + * r0: keytable + * r1: dst + * r2: src + * r3: keybitlen */ - push {%r1, %r4-%r11, %ip, %lr}; + push {r1, r4-r11, ip, lr}; GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3); mov RMASK, #0xff; add RTAB3, RTAB1, #(2 * 4); - push {%r3}; + push {r3}; mov RMASK, RMASK, lsl#4 /* byte mask */ inpack(0); @@ -292,20 +292,20 @@ _gcry_camellia_arm_encrypt_block: cmp RT0, #(16 * 8); bne .Lenc_256; - pop {%r1}; + pop {r1}; outunpack(24); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .Lenc_256: enc_fls(24); enc_rounds(24); - pop {%r1}; + pop {r1}; outunpack(32); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block; @@ -315,19 +315,19 @@ _gcry_camellia_arm_encrypt_block: _gcry_camellia_arm_decrypt_block: /* input: - * %r0: keytable - * %r1: dst - * %r2: src - * %r3: keybitlen + * r0: keytable + * r1: dst + * r2: src + * r3: keybitlen */ - push {%r1, %r4-%r11, %ip, %lr}; + push {r1, r4-r11, ip, lr}; GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3); mov RMASK, #0xff; add RTAB3, RTAB1, #(2 * 4); mov RMASK, RMASK, lsl#4 /* byte mask */ - cmp %r3, #(16 * 8); + cmp r3, #(16 * 8); bne .Ldec_256; inpack(24); @@ -339,10 +339,10 @@ _gcry_camellia_arm_decrypt_block: dec_fls(8); dec_rounds(0); - pop {%r1}; + pop {r1}; outunpack(0); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .Ldec_256: diff --git a/cipher/cast5-arm.S b/cipher/cast5-arm.S index 76ddd2e3..ae53e6b4 100644 --- a/cipher/cast5-arm.S +++ b/cipher/cast5-arm.S @@ -50,25 +50,25 @@ #define Kr_arm_dec (Kr_arm_enc + (16)) /* register macros */ -#define CTX %r0 -#define Rs1 %r7 -#define Rs2 %r8 -#define Rs3 %r9 -#define Rs4 %r10 -#define RMASK %r11 -#define RKM %r1 -#define RKR %r2 - -#define RL0 %r3 -#define RR0 %r4 - -#define RL1 %r9 -#define RR1 %r10 - -#define RT0 %lr -#define RT1 %ip -#define RT2 %r5 -#define RT3 %r6 +#define CTX r0 +#define Rs1 r7 +#define Rs2 r8 +#define Rs3 r9 +#define Rs4 r10 +#define RMASK r11 +#define RKM r1 +#define RKR r2 + +#define RL0 r3 +#define RR0 r4 + +#define RL1 r9 +#define RR1 r10 + +#define RT0 lr +#define RT1 ip +#define RT2 r5 +#define RT3 r6 /* helper macros */ #define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ @@ -267,11 +267,11 @@ _gcry_cast5_arm_encrypt_block: /* input: - * %r0: CTX - * %r1: dst - * %r2: src + * r0: CTX + * r1: dst + * r2: src */ - push {%r1, %r4-%r11, %ip, %lr}; + push {r1, r4-r11, ip, lr}; GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); mov RMASK, #(0xff << 2); @@ -279,7 +279,7 @@ _gcry_cast5_arm_encrypt_block: add Rs3, Rs1, #(0x100*4*2); add Rs4, Rs1, #(0x100*4*3); - read_block(%r2, 0, RL0, RR0, RT0); + read_block(r2, 0, RL0, RR0, RT0); load_km(0); load_kr(0); @@ -300,10 +300,10 @@ _gcry_cast5_arm_encrypt_block: enc_round(14, F3, RL0, RR0, load_km, shift_kr, dummy); enc_round(15, F1, RR0, RL0, dummy, dummy, dummy); - ldr %r1, [%sp], #4; - write_block(%r1, 0, RR0, RL0, RT0, RT1); + ldr r1, [sp], #4; + write_block(r1, 0, RR0, RL0, RT0, RT1); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .size _gcry_cast5_arm_encrypt_block,.-_gcry_cast5_arm_encrypt_block; @@ -313,11 +313,11 @@ _gcry_cast5_arm_encrypt_block: _gcry_cast5_arm_decrypt_block: /* input: - * %r0: CTX - * %r1: dst - * %r2: src + * r0: CTX + * r1: dst + * r2: src */ - push {%r1, %r4-%r11, %ip, %lr}; + push {r1, r4-r11, ip, lr}; GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); mov RMASK, #(0xff << 2); @@ -325,7 +325,7 @@ _gcry_cast5_arm_decrypt_block: add Rs3, Rs1, #(0x100 * 4 * 2); add Rs4, Rs1, #(0x100 * 4 * 3); - read_block(%r2, 0, RL0, RR0, RT0); + read_block(r2, 0, RL0, RR0, RT0); load_km(15); load_dec_kr(15); @@ -346,10 +346,10 @@ _gcry_cast5_arm_decrypt_block: dec_round(1, F2, RL0, RR0, load_km, shift_kr, dummy); dec_round(0, F1, RR0, RL0, dummy, dummy, dummy); - ldr %r1, [%sp], #4; - write_block(%r1, 0, RR0, RL0, RT0, RT1); + ldr r1, [sp], #4; + write_block(r1, 0, RR0, RL0, RT0, RT1); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .size _gcry_cast5_arm_decrypt_block,.-_gcry_cast5_arm_decrypt_block; @@ -511,7 +511,7 @@ _gcry_cast5_arm_enc_blk2: * output: * [RR0, RL0], [RR1, RL1]: dst */ - push {%lr}; + push {lr}; GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); mov RMASK, #(0xff << 2); @@ -541,7 +541,7 @@ _gcry_cast5_arm_enc_blk2: host_to_be(RR1, RT0); host_to_be(RL1, RT0); - pop {%pc}; + pop {pc}; .ltorg .size _gcry_cast5_arm_enc_blk2,.-_gcry_cast5_arm_enc_blk2; @@ -551,40 +551,40 @@ _gcry_cast5_arm_enc_blk2: _gcry_cast5_arm_cfb_dec: /* input: - * %r0: CTX - * %r1: dst (2 blocks) - * %r2: src (2 blocks) - * %r3: iv (64bit) + * r0: CTX + * r1: dst (2 blocks) + * r2: src (2 blocks) + * r3: iv (64bit) */ - push {%r1, %r2, %r4-%r11, %ip, %lr}; + push {r1, r2, r4-r11, ip, lr}; - mov %lr, %r3; + mov lr, r3; - /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ - ldm %r3, {RL0, RR0}; + /* Load input (iv/r3 is aligned, src/r2 might not be) */ + ldm r3, {RL0, RR0}; host_to_be(RL0, RT1); host_to_be(RR0, RT1); - read_block(%r2, 0, RL1, RR1, %ip); + read_block(r2, 0, RL1, RR1, ip); /* Update IV, load src[1] and save to iv[0] */ - read_block_host(%r2, 8, %r5, %r6, %r7); - stm %lr, {%r5, %r6}; + read_block_host(r2, 8, r5, r6, r7); + stm lr, {r5, r6}; bl _gcry_cast5_arm_enc_blk2; - /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + /* result in RR0:RL0, RR1:RL1 = r4:r3, r10:r9 */ - /* %r0: dst, %r1: %src */ - pop {%r0, %r1}; + /* r0: dst, r1: src */ + pop {r0, r1}; /* dst = src ^ result */ - read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr); - eor %r5, %r4; - eor %r6, %r3; - eor %r7, %r10; - eor %r8, %r9; - write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2); - - pop {%r4-%r11, %ip, %pc}; + read_block2_host(r1, r5, r6, r7, r8, lr); + eor r5, r4; + eor r6, r3; + eor r7, r10; + eor r8, r9; + write_block2_host(r0, r5, r6, r7, r8, r1, r2); + + pop {r4-r11, ip, pc}; .ltorg .size _gcry_cast5_arm_cfb_dec,.-_gcry_cast5_arm_cfb_dec; @@ -594,42 +594,42 @@ _gcry_cast5_arm_cfb_dec: _gcry_cast5_arm_ctr_enc: /* input: - * %r0: CTX - * %r1: dst (2 blocks) - * %r2: src (2 blocks) - * %r3: iv (64bit, big-endian) + * r0: CTX + * r1: dst (2 blocks) + * r2: src (2 blocks) + * r3: iv (64bit, big-endian) */ - push {%r1, %r2, %r4-%r11, %ip, %lr}; + push {r1, r2, r4-r11, ip, lr}; - mov %lr, %r3; + mov lr, r3; /* Load IV (big => host endian) */ - read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT1); + read_block_aligned(lr, 0, RL0, RR0, be_to_host, RT1); /* Construct IVs */ adds RR1, RR0, #1; /* +1 */ adc RL1, RL0, #0; - adds %r6, RR1, #1; /* +2 */ - adc %r5, RL1, #0; + adds r6, RR1, #1; /* +2 */ + adc r5, RL1, #0; /* Store new IV (host => big-endian) */ - write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT1); + write_block_aligned(lr, 0, r5, r6, host_to_be, RT1); bl _gcry_cast5_arm_enc_blk2; - /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + /* result in RR0:RL0, RR1:RL1 = r4:r3, r10:r9 */ - /* %r0: dst, %r1: %src */ - pop {%r0, %r1}; + /* r0: dst, r1: src */ + pop {r0, r1}; /* XOR key-stream with plaintext */ - read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr); - eor %r5, %r4; - eor %r6, %r3; - eor %r7, %r10; - eor %r8, %r9; - write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2); - - pop {%r4-%r11, %ip, %pc}; + read_block2_host(r1, r5, r6, r7, r8, lr); + eor r5, r4; + eor r6, r3; + eor r7, r10; + eor r8, r9; + write_block2_host(r0, r5, r6, r7, r8, r1, r2); + + pop {r4-r11, ip, pc}; .ltorg .size _gcry_cast5_arm_ctr_enc,.-_gcry_cast5_arm_ctr_enc; @@ -682,45 +682,45 @@ _gcry_cast5_arm_dec_blk2: _gcry_cast5_arm_cbc_dec: /* input: - * %r0: CTX - * %r1: dst (2 blocks) - * %r2: src (2 blocks) - * %r3: iv (64bit) + * r0: CTX + * r1: dst (2 blocks) + * r2: src (2 blocks) + * r3: iv (64bit) */ - push {%r1-%r11, %ip, %lr}; + push {r1-r11, ip, lr}; - read_block2(%r2, RL0, RR0, RL1, RR1, RT0); + read_block2(r2, RL0, RR0, RL1, RR1, RT0); /* dec_blk2 is only used by cbc_dec, jump directly in/out instead * of function call. */ b _gcry_cast5_arm_dec_blk2; .Ldec_cbc_tail: - /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + /* result in RR0:RL0, RR1:RL1 = r4:r3, r10:r9 */ - /* %r0: dst, %r1: %src, %r2: iv */ - pop {%r0-%r2}; + /* r0: dst, r1: src, r2: iv */ + pop {r0-r2}; - /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */ - read_block_host(%r1, 0, %r7, %r8, %r5); - /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */ - ldm %r2, {%r5, %r6}; + /* load IV+1 (src[0]) to r7:r8. Might be unaligned. */ + read_block_host(r1, 0, r7, r8, r5); + /* load IV (iv[0]) to r5:r6. 'iv' is aligned. */ + ldm r2, {r5, r6}; /* out[1] ^= IV+1 */ - eor %r10, %r7; - eor %r9, %r8; + eor r10, r7; + eor r9, r8; /* out[0] ^= IV */ - eor %r4, %r5; - eor %r3, %r6; + eor r4, r5; + eor r3, r6; - /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */ - read_block_host(%r1, 8, %r7, %r8, %r5); + /* load IV+2 (src[1]) to r7:r8. Might be unaligned. */ + read_block_host(r1, 8, r7, r8, r5); /* store IV+2 to iv[0] (aligned). */ - stm %r2, {%r7, %r8}; + stm r2, {r7, r8}; /* store result to dst[0-3]. Might be unaligned. */ - write_block2_host(%r0, %r4, %r3, %r10, %r9, %r5, %r6); + write_block2_host(r0, r4, r3, r10, r9, r5, r6); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .size _gcry_cast5_arm_cbc_dec,.-_gcry_cast5_arm_cbc_dec; diff --git a/cipher/cipher-gcm-armv7-neon.S b/cipher/cipher-gcm-armv7-neon.S index 16502b4a..c7027af3 100644 --- a/cipher/cipher-gcm-armv7-neon.S +++ b/cipher/cipher-gcm-armv7-neon.S @@ -121,21 +121,21 @@ gcry_gcm_reduction_constant: * Engineering — MoCrySEn, 2013". */ #define vmull_p64(rq, rl, rh, ad, bd) \ - vext.8 t0l, ad, ad, $1; \ + vext.8 t0l, ad, ad, #1; \ vmull.p8 t0q, t0l, bd; \ - vext.8 rl, bd, bd, $1; \ + vext.8 rl, bd, bd, #1; \ vmull.p8 rq, ad, rl; \ - vext.8 t1l, ad, ad, $2; \ + vext.8 t1l, ad, ad, #2; \ vmull.p8 t1q, t1l, bd; \ - vext.8 t3l, bd, bd, $2; \ + vext.8 t3l, bd, bd, #2; \ vmull.p8 t3q, ad, t3l; \ - vext.8 t2l, ad, ad, $3; \ + vext.8 t2l, ad, ad, #3; \ vmull.p8 t2q, t2l, bd; \ veor t0q, t0q, rq; \ - vext.8 rl, bd, bd, $3; \ + vext.8 rl, bd, bd, #3; \ vmull.p8 rq, ad, rl; \ veor t1q, t1q, t3q; \ - vext.8 t3l, bd, bd, $4; \ + vext.8 t3l, bd, bd, #4; \ vmull.p8 t3q, ad, t3l; \ veor t0l, t0l, t0h; \ vand t0h, t0h, k48; \ @@ -147,13 +147,13 @@ gcry_gcm_reduction_constant: veor t2l, t2l, t2h; \ vand t2h, t2h, k16; \ veor t3l, t3l, t3h; \ - vmov.i64 t3h, $0; \ - vext.8 t0q, t0q, t0q, $15; \ + vmov.i64 t3h, #0; \ + vext.8 t0q, t0q, t0q, #15; \ veor t2l, t2l, t2h; \ - vext.8 t1q, t1q, t1q, $14; \ + vext.8 t1q, t1q, t1q, #14; \ vmull.p8 rq, ad, bd; \ - vext.8 t2q, t2q, t2q, $13; \ - vext.8 t3q, t3q, t3q, $12; \ + vext.8 t2q, t2q, t2q, #13; \ + vext.8 t3q, t3q, t3q, #12; \ veor t0q, t0q, t1q; \ veor t2q, t2q, t3q; \ veor rq, rq, t0q; \ diff --git a/cipher/rijndael-arm.S b/cipher/rijndael-arm.S index e680c817..632daac2 100644 --- a/cipher/rijndael-arm.S +++ b/cipher/rijndael-arm.S @@ -29,23 +29,23 @@ .arm /* register macros */ -#define CTX %r0 -#define RTAB %lr -#define RMASK %ip +#define CTX r0 +#define RTAB lr +#define RMASK ip -#define RA %r4 -#define RB %r5 -#define RC %r6 -#define RD %r7 +#define RA r4 +#define RB r5 +#define RC r6 +#define RD r7 -#define RNA %r8 -#define RNB %r9 -#define RNC %r10 -#define RND %r11 +#define RNA r8 +#define RNB r9 +#define RNC r10 +#define RND r11 -#define RT0 %r1 -#define RT1 %r2 -#define RT2 %r3 +#define RT0 r1 +#define RT1 r2 +#define RT2 r3 /* helper macros */ #define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ @@ -216,30 +216,30 @@ _gcry_aes_arm_encrypt_block: /* input: - * %r0: keysched, CTX - * %r1: dst - * %r2: src - * %r3: number of rounds.. 10, 12 or 14 - * %st+0: encryption table + * r0: keysched, CTX + * r1: dst + * r2: src + * r3: number of rounds.. 10, 12 or 14 + * st+0: encryption table */ - push {%r4-%r11, %ip, %lr}; + push {r4-r11, ip, lr}; /* read input block */ /* test if src is unaligned */ - tst %r2, #3; + tst r2, #3; beq 1f; /* unaligned load */ - ldr_unaligned_le(RA, %r2, 0, RNA); - ldr_unaligned_le(RB, %r2, 4, RNB); - ldr_unaligned_le(RC, %r2, 8, RNA); - ldr_unaligned_le(RD, %r2, 12, RNB); + ldr_unaligned_le(RA, r2, 0, RNA); + ldr_unaligned_le(RB, r2, 4, RNB); + ldr_unaligned_le(RC, r2, 8, RNA); + ldr_unaligned_le(RD, r2, 12, RNB); b 2f; .ltorg 1: /* aligned load */ - ldm %r2, {RA, RB, RC, RD}; + ldm r2, {RA, RB, RC, RD}; #ifndef __ARMEL__ rev RA, RA; rev RB, RB; @@ -247,12 +247,12 @@ _gcry_aes_arm_encrypt_block: rev RD, RD; #endif 2: - ldr RTAB, [%sp, #40]; - sub %sp, #16; + ldr RTAB, [sp, #40]; + sub sp, #16; - str %r1, [%sp, #4]; /* dst */ + str r1, [sp, #4]; /* dst */ mov RMASK, #0xff; - str %r3, [%sp, #8]; /* nrounds */ + str r3, [sp, #8]; /* nrounds */ mov RMASK, RMASK, lsl#2; /* byte mask */ firstencround(0, RA, RB, RC, RD, RNA, RNB, RNC, RND); @@ -264,7 +264,7 @@ _gcry_aes_arm_encrypt_block: encround(6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); encround(7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); - ldr RT0, [%sp, #8]; /* nrounds */ + ldr RT0, [sp, #8]; /* nrounds */ cmp RT0, #12; bge .Lenc_not_128; @@ -272,8 +272,8 @@ _gcry_aes_arm_encrypt_block: lastencround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD); .Lenc_done: - ldr RT0, [%sp, #4]; /* dst */ - add %sp, #16; + ldr RT0, [sp, #4]; /* dst */ + add sp, #16; /* store output block */ @@ -301,7 +301,7 @@ _gcry_aes_arm_encrypt_block: 2: mov r0, #(10 * 4); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .Lenc_not_128: @@ -473,30 +473,30 @@ _gcry_aes_arm_encrypt_block: _gcry_aes_arm_decrypt_block: /* input: - * %r0: keysched, CTX - * %r1: dst - * %r2: src - * %r3: number of rounds.. 10, 12 or 14 - * %st+0: decryption table + * r0: keysched, CTX + * r1: dst + * r2: src + * r3: number of rounds.. 10, 12 or 14 + * st+0: decryption table */ - push {%r4-%r11, %ip, %lr}; + push {r4-r11, ip, lr}; /* read input block */ /* test if src is unaligned */ - tst %r2, #3; + tst r2, #3; beq 1f; /* unaligned load */ - ldr_unaligned_le(RA, %r2, 0, RNA); - ldr_unaligned_le(RB, %r2, 4, RNB); - ldr_unaligned_le(RC, %r2, 8, RNA); - ldr_unaligned_le(RD, %r2, 12, RNB); + ldr_unaligned_le(RA, r2, 0, RNA); + ldr_unaligned_le(RB, r2, 4, RNB); + ldr_unaligned_le(RC, r2, 8, RNA); + ldr_unaligned_le(RD, r2, 12, RNB); b 2f; .ltorg 1: /* aligned load */ - ldm %r2, {RA, RB, RC, RD}; + ldm r2, {RA, RB, RC, RD}; #ifndef __ARMEL__ rev RA, RA; rev RB, RB; @@ -504,14 +504,14 @@ _gcry_aes_arm_decrypt_block: rev RD, RD; #endif 2: - ldr RTAB, [%sp, #40]; - sub %sp, #16; + ldr RTAB, [sp, #40]; + sub sp, #16; mov RMASK, #0xff; - str %r1, [%sp, #4]; /* dst */ + str r1, [sp, #4]; /* dst */ mov RMASK, RMASK, lsl#2; /* byte mask */ - cmp %r3, #12; + cmp r3, #12; bge .Ldec_256; firstdecround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND); @@ -526,8 +526,8 @@ _gcry_aes_arm_decrypt_block: decround(1, RA, RB, RC, RD, RNA, RNB, RNC, RND, set_last_round_rmask); lastdecround(0, RNA, RNB, RNC, RND, RA, RB, RC, RD); - ldr RT0, [%sp, #4]; /* dst */ - add %sp, #16; + ldr RT0, [sp, #4]; /* dst */ + add sp, #16; /* store output block */ @@ -554,7 +554,7 @@ _gcry_aes_arm_decrypt_block: stm RT0, {RA, RB, RC, RD}; 2: mov r0, #(10 * 4); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .Ldec_256: diff --git a/cipher/rijndael-armv8-aarch32-ce.S b/cipher/rijndael-armv8-aarch32-ce.S index 6208652b..3c4149b3 100644 --- a/cipher/rijndael-armv8-aarch32-ce.S +++ b/cipher/rijndael-armv8-aarch32-ce.S @@ -483,9 +483,9 @@ _gcry_aes_cbc_enc_armv8_ce: * r1: outbuf * r2: inbuf * r3: iv - * %st+0: nblocks => r4 - * %st+4: cbc_mac => r5 - * %st+8: nrounds => r6 + * st+0: nblocks => r4 + * st+4: cbc_mac => r5 + * st+8: nrounds => r6 */ push {r4-r6,lr} /* 4*4 = 16b */ @@ -563,8 +563,8 @@ _gcry_aes_cbc_dec_armv8_ce: * r1: outbuf * r2: inbuf * r3: iv - * %st+0: nblocks => r4 - * %st+4: nrounds => r5 + * st+0: nblocks => r4 + * st+4: nrounds => r5 */ push {r4-r6,lr} /* 4*4 = 16b */ @@ -670,7 +670,7 @@ _gcry_aes_ecb_enc_armv8_ce: * r1: outbuf * r2: inbuf * r3: nblocks - * %st+0: nrounds => r4 + * st+0: nrounds => r4 */ push {r4-r6,lr} /* 4*4 = 16b */ @@ -755,7 +755,7 @@ _gcry_aes_ecb_dec_armv8_ce: * r1: outbuf * r2: inbuf * r3: nblocks - * %st+0: nrounds => r4 + * st+0: nrounds => r4 */ push {r4-r6,lr} /* 4*4 = 16b */ @@ -812,8 +812,8 @@ _gcry_aes_cfb_enc_armv8_ce: * r1: outbuf * r2: inbuf * r3: iv - * %st+0: nblocks => r4 - * %st+4: nrounds => r5 + * st+0: nblocks => r4 + * st+4: nrounds => r5 */ push {r4-r6,lr} /* 4*4 = 16b */ @@ -888,8 +888,8 @@ _gcry_aes_cfb_dec_armv8_ce: * r1: outbuf * r2: inbuf * r3: iv - * %st+0: nblocks => r4 - * %st+4: nrounds => r5 + * st+0: nblocks => r4 + * st+4: nrounds => r5 */ push {r4-r6,lr} /* 4*4 = 16b */ @@ -996,8 +996,8 @@ _gcry_aes_ctr_enc_armv8_ce: * r1: outbuf * r2: inbuf * r3: iv - * %st+0: nblocks => r4 - * %st+4: nrounds => r5 + * st+0: nblocks => r4 + * st+4: nrounds => r5 */ vpush {q4-q7} @@ -1176,8 +1176,8 @@ _gcry_aes_ctr32le_enc_armv8_ce: * r1: outbuf * r2: inbuf * r3: iv - * %st+0: nblocks => r4 - * %st+4: nrounds => r5 + * st+0: nblocks => r4 + * st+4: nrounds => r5 */ vpush {q4-q7} @@ -1301,11 +1301,11 @@ _gcry_aes_ocb_enc_armv8_ce: * r1: outbuf * r2: inbuf * r3: offset - * %st+0: checksum => r4 - * %st+4: Ls => r5 - * %st+8: nblocks => r6 (0 < nblocks <= 32) - * %st+12: nrounds => r7 - * %st+16: blkn => lr + * st+0: checksum => r4 + * st+4: Ls => r5 + * st+8: nblocks => r6 (0 < nblocks <= 32) + * st+12: nrounds => r7 + * st+16: blkn => lr */ vpush {q4-q7} @@ -1476,11 +1476,11 @@ _gcry_aes_ocb_dec_armv8_ce: * r1: outbuf * r2: inbuf * r3: offset - * %st+0: checksum => r4 - * %st+4: Ls => r5 - * %st+8: nblocks => r6 (0 < nblocks <= 32) - * %st+12: nrounds => r7 - * %st+16: blkn => lr + * st+0: checksum => r4 + * st+4: Ls => r5 + * st+8: nblocks => r6 (0 < nblocks <= 32) + * st+12: nrounds => r7 + * st+16: blkn => lr */ vpush {q4-q7} @@ -1650,10 +1650,10 @@ _gcry_aes_ocb_auth_armv8_ce: * r1: abuf * r2: offset * r3: checksum - * %st+0: Ls => r5 - * %st+4: nblocks => r6 (0 < nblocks <= 32) - * %st+8: nrounds => r7 - * %st+12: blkn => lr + * st+0: Ls => r5 + * st+4: nblocks => r6 (0 < nblocks <= 32) + * st+8: nrounds => r7 + * st+12: blkn => lr */ vpush {q4-q7} @@ -1801,8 +1801,8 @@ _gcry_aes_xts_enc_armv8_ce: * r1: outbuf * r2: inbuf * r3: iv - * %st+0: nblocks => r4 - * %st+4: nrounds => r5 + * st+0: nblocks => r4 + * st+4: nrounds => r5 */ vpush {q4-q7} @@ -1956,8 +1956,8 @@ _gcry_aes_xts_dec_armv8_ce: * r1: outbuf * r2: inbuf * r3: iv - * %st+0: nblocks => r4 - * %st+4: nrounds => r5 + * st+0: nblocks => r4 + * st+4: nrounds => r5 */ vpush {q4-q7} diff --git a/cipher/sha512-arm.S b/cipher/sha512-arm.S index 94ec0141..1e1d296f 100644 --- a/cipher/sha512-arm.S +++ b/cipher/sha512-arm.S @@ -38,23 +38,23 @@ #define hd_h ((hd_g) + 8) /* register macros */ -#define RK %r2 +#define RK r2 -#define RElo %r0 -#define REhi %r1 +#define RElo r0 +#define REhi r1 -#define RT1lo %r3 -#define RT1hi %r4 -#define RT2lo %r5 -#define RT2hi %r6 -#define RWlo %r7 -#define RWhi %r8 -#define RT3lo %r9 -#define RT3hi %r10 -#define RT4lo %r11 -#define RT4hi %ip +#define RT1lo r3 +#define RT1hi r4 +#define RT2lo r5 +#define RT2hi r6 +#define RWlo r7 +#define RWhi r8 +#define RT3lo r9 +#define RT3hi r10 +#define RT4lo r11 +#define RT4hi ip -#define RRND %lr +#define RRND lr /* variable offsets in stack */ #define ctx (0) @@ -150,13 +150,13 @@ mov RWhi, REhi, lsr#14; \ eor RWlo, RWlo, RElo, lsr#18; \ eor RWhi, RWhi, REhi, lsr#18; \ - ldr RT3lo, [%sp, #(_f)]; \ + ldr RT3lo, [sp, #(_f)]; \ adds RT1lo, RT2lo; /* t1 += K */ \ - ldr RT3hi, [%sp, #(_f) + 4]; \ + ldr RT3hi, [sp, #(_f) + 4]; \ adc RT1hi, RT2hi; \ - ldr RT4lo, [%sp, #(_g)]; \ + ldr RT4lo, [sp, #(_g)]; \ eor RWlo, RWlo, RElo, lsl#23; \ - ldr RT4hi, [%sp, #(_g) + 4]; \ + ldr RT4hi, [sp, #(_g) + 4]; \ eor RWhi, RWhi, REhi, lsl#23; \ eor RWlo, RWlo, REhi, lsl#18; \ eor RWhi, RWhi, RElo, lsl#18; \ @@ -177,29 +177,29 @@ \ /* Load D */ \ /* t1 += Cho(_e,_f,_g) */ \ - ldr RElo, [%sp, #(_d)]; \ + ldr RElo, [sp, #(_d)]; \ adds RT1lo, RT3lo; \ - ldr REhi, [%sp, #(_d) + 4]; \ + ldr REhi, [sp, #(_d) + 4]; \ adc RT1hi, RT3hi; \ \ /* Load A */ \ - ldr RT3lo, [%sp, #(_a)]; \ + ldr RT3lo, [sp, #(_a)]; \ \ /* _d += t1 */ \ adds RElo, RT1lo; \ - ldr RT3hi, [%sp, #(_a) + 4]; \ + ldr RT3hi, [sp, #(_a) + 4]; \ adc REhi, RT1hi; \ \ /* Store D */ \ - str RElo, [%sp, #(_d)]; \ + str RElo, [sp, #(_d)]; \ \ /* t2 = Sum0(_a) */ \ mov RT2lo, RT3lo, lsr#28; \ - str REhi, [%sp, #(_d) + 4]; \ + str REhi, [sp, #(_d) + 4]; \ mov RT2hi, RT3hi, lsr#28; \ - ldr RWlo, [%sp, #(_b)]; \ + ldr RWlo, [sp, #(_b)]; \ eor RT2lo, RT2lo, RT3lo, lsl#30; \ - ldr RWhi, [%sp, #(_b) + 4]; \ + ldr RWhi, [sp, #(_b) + 4]; \ eor RT2hi, RT2hi, RT3hi, lsl#30; \ eor RT2lo, RT2lo, RT3lo, lsl#25; \ eor RT2hi, RT2hi, RT3hi, lsl#25; \ @@ -212,11 +212,11 @@ \ /* t2 += t1 */ \ adds RT2lo, RT1lo; \ - ldr RT1lo, [%sp, #(_c)]; \ + ldr RT1lo, [sp, #(_c)]; \ adc RT2hi, RT1hi; \ \ /* Maj(_a,_b,_c) => ((_a & _b) ^ (_c & (_a ^ _b))) */ \ - ldr RT1hi, [%sp, #(_c) + 4]; \ + ldr RT1hi, [sp, #(_c) + 4]; \ and RT4lo, RWlo, RT3lo; \ and RT4hi, RWhi, RT3hi; \ eor RWlo, RWlo, RT3lo; \ @@ -229,36 +229,36 @@ /* Message expansion */ #define W_0_63(_a,_h,i) \ - ldr RT3lo, [%sp, #(w(i-2))]; \ + ldr RT3lo, [sp, #(w(i-2))]; \ adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \ - ldr RT3hi, [%sp, #(w(i-2)) + 4]; \ + ldr RT3hi, [sp, #(w(i-2)) + 4]; \ adc RT2hi, RWhi; \ /* nw = S1(w[i-2]) */ \ - ldr RT1lo, [%sp, #(_h)]; /* Load H */ \ + ldr RT1lo, [sp, #(_h)]; /* Load H */ \ mov RWlo, RT3lo, lsr#19; \ - str RT2lo, [%sp, #(_a)]; \ + str RT2lo, [sp, #(_a)]; \ eor RWlo, RWlo, RT3lo, lsl#3; \ - ldr RT1hi, [%sp, #(_h) + 4]; \ + ldr RT1hi, [sp, #(_h) + 4]; \ mov RWhi, RT3hi, lsr#19; \ - ldr RT2lo, [%sp, #(w(i-7))]; \ + ldr RT2lo, [sp, #(w(i-7))]; \ eor RWhi, RWhi, RT3hi, lsl#3; \ - str RT2hi, [%sp, #(_a) + 4]; \ + str RT2hi, [sp, #(_a) + 4]; \ eor RWlo, RWlo, RT3lo, lsr#6; \ - ldr RT2hi, [%sp, #(w(i-7)) + 4]; \ + ldr RT2hi, [sp, #(w(i-7)) + 4]; \ eor RWhi, RWhi, RT3hi, lsr#6; \ eor RWlo, RWlo, RT3hi, lsl#13; \ eor RWhi, RWhi, RT3lo, lsl#13; \ eor RWlo, RWlo, RT3hi, lsr#29; \ eor RWhi, RWhi, RT3lo, lsr#29; \ - ldr RT3lo, [%sp, #(w(i-15))]; \ + ldr RT3lo, [sp, #(w(i-15))]; \ eor RWlo, RWlo, RT3hi, lsl#26; \ - ldr RT3hi, [%sp, #(w(i-15)) + 4]; \ + ldr RT3hi, [sp, #(w(i-15)) + 4]; \ \ adds RT2lo, RWlo; /* nw += w[i-7] */ \ - ldr RWlo, [%sp, #(w(i-16))]; \ + ldr RWlo, [sp, #(w(i-16))]; \ adc RT2hi, RWhi; \ mov RT4lo, RT3lo, lsr#1; /* S0(w[i-15]) */ \ - ldr RWhi, [%sp, #(w(i-16)) + 4]; \ + ldr RWhi, [sp, #(w(i-16)) + 4]; \ mov RT4hi, RT3hi, lsr#1; \ adds RT2lo, RWlo; /* nw += w[i-16] */ \ eor RT4lo, RT4lo, RT3lo, lsr#8; \ @@ -277,20 +277,20 @@ adc RT2hi, RT4hi; \ \ /* w[0] = nw */ \ - str RT2lo, [%sp, #(w(i))]; \ + str RT2lo, [sp, #(w(i))]; \ adds RT1lo, RWlo; \ - str RT2hi, [%sp, #(w(i)) + 4]; \ + str RT2hi, [sp, #(w(i)) + 4]; \ adc RT1hi, RWhi; #define W_64_79(_a,_h,i) \ adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \ - ldr RWlo, [%sp, #(w(i-16))]; \ + ldr RWlo, [sp, #(w(i-16))]; \ adc RT2hi, RWhi; \ - ldr RWhi, [%sp, #(w(i-16)) + 4]; \ - ldr RT1lo, [%sp, #(_h)]; /* Load H */ \ - ldr RT1hi, [%sp, #(_h) + 4]; \ - str RT2lo, [%sp, #(_a)]; \ - str RT2hi, [%sp, #(_a) + 4]; \ + ldr RWhi, [sp, #(w(i-16)) + 4]; \ + ldr RT1lo, [sp, #(_h)]; /* Load H */ \ + ldr RT1hi, [sp, #(_h) + 4]; \ + str RT2lo, [sp, #(_a)]; \ + str RT2hi, [sp, #(_a) + 4]; \ adds RT1lo, RWlo; \ adc RT1hi, RWhi; @@ -300,72 +300,72 @@ _gcry_sha512_transform_arm: /* Input: - * %r0: SHA512_CONTEXT - * %r1: data - * %r2: u64 k[] constants - * %r3: nblks + * r0: SHA512_CONTEXT + * r1: data + * r2: u64 k[] constants + * r3: nblks */ - push {%r4-%r11, %ip, %lr}; - sub %sp, %sp, #STACK_MAX; - movs RWlo, %r3; - str %r0, [%sp, #(ctx)]; + push {r4-r11, ip, lr}; + sub sp, sp, #STACK_MAX; + movs RWlo, r3; + str r0, [sp, #(ctx)]; beq .Ldone; .Loop_blocks: - str RWlo, [%sp, #nblks]; + str RWlo, [sp, #nblks]; /* Load context to stack */ - add RWhi, %sp, #(_a); - ldm %r0!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + add RWhi, sp, #(_a); + ldm r0!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - ldm %r0, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + ldm r0, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} /* Load input to w[16] */ /* test if data is unaligned */ - tst %r1, #3; + tst r1, #3; beq 1f; /* unaligned load */ - add RWhi, %sp, #(w(0)); - read_be64_unaligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + add RWhi, sp, #(w(0)); + read_be64_unaligned_4(r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_unaligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_unaligned_4(r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_unaligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_unaligned_4(r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_unaligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_unaligned_4(r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); b 2f; 1: /* aligned load */ - add RWhi, %sp, #(w(0)); - read_be64_aligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + add RWhi, sp, #(w(0)); + read_be64_aligned_4(r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_aligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_aligned_4(r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_aligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_aligned_4(r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_aligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_aligned_4(r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); 2: - add %r1, #(16 * 8); + add r1, #(16 * 8); stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - str %r1, [%sp, #(data)]; + str r1, [sp, #(data)]; /* preload E & A */ - ldr RElo, [%sp, #(_e)]; - ldr REhi, [%sp, #(_e) + 4]; + ldr RElo, [sp, #(_e)]; + ldr REhi, [sp, #(_e) + 4]; mov RWlo, #0; - ldr RT2lo, [%sp, #(_a)]; + ldr RT2lo, [sp, #(_a)]; mov RRND, #(80-16); - ldr RT2hi, [%sp, #(_a) + 4]; + ldr RT2hi, [sp, #(_a) + 4]; mov RWhi, #0; .Loop_rounds: @@ -406,58 +406,58 @@ _gcry_sha512_transform_arm: R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 30); R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 31); - ldr %r0, [%sp, #(ctx)]; + ldr r0, [sp, #(ctx)]; adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ - ldr %r1, [%sp, #(data)]; + ldr r1, [sp, #(data)]; adc RT2hi, RWhi; - ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + ldm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} adds RT1lo, RT2lo; - ldr RT2lo, [%sp, #(_b + 0)]; + ldr RT2lo, [sp, #(_b + 0)]; adc RT1hi, RT2hi; - ldr RT2hi, [%sp, #(_b + 4)]; + ldr RT2hi, [sp, #(_b + 4)]; adds RWlo, RT2lo; - ldr RT2lo, [%sp, #(_c + 0)]; + ldr RT2lo, [sp, #(_c + 0)]; adc RWhi, RT2hi; - ldr RT2hi, [%sp, #(_c + 4)]; + ldr RT2hi, [sp, #(_c + 4)]; adds RT3lo, RT2lo; - ldr RT2lo, [%sp, #(_d + 0)]; + ldr RT2lo, [sp, #(_d + 0)]; adc RT3hi, RT2hi; - ldr RT2hi, [%sp, #(_d + 4)]; + ldr RT2hi, [sp, #(_d + 4)]; adds RT4lo, RT2lo; - ldr RT2lo, [%sp, #(_e + 0)]; + ldr RT2lo, [sp, #(_e + 0)]; adc RT4hi, RT2hi; - stm %r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + stm r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} - ldr RT2hi, [%sp, #(_e + 4)]; - ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + ldr RT2hi, [sp, #(_e + 4)]; + ldm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} adds RT1lo, RT2lo; - ldr RT2lo, [%sp, #(_f + 0)]; + ldr RT2lo, [sp, #(_f + 0)]; adc RT1hi, RT2hi; - ldr RT2hi, [%sp, #(_f + 4)]; + ldr RT2hi, [sp, #(_f + 4)]; adds RWlo, RT2lo; - ldr RT2lo, [%sp, #(_g + 0)]; + ldr RT2lo, [sp, #(_g + 0)]; adc RWhi, RT2hi; - ldr RT2hi, [%sp, #(_g + 4)]; + ldr RT2hi, [sp, #(_g + 4)]; adds RT3lo, RT2lo; - ldr RT2lo, [%sp, #(_h + 0)]; + ldr RT2lo, [sp, #(_h + 0)]; adc RT3hi, RT2hi; - ldr RT2hi, [%sp, #(_h + 4)]; + ldr RT2hi, [sp, #(_h + 4)]; adds RT4lo, RT2lo; adc RT4hi, RT2hi; - stm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} - sub %r0, %r0, #(4 * 8); - ldr RWlo, [%sp, #nblks]; + stm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + sub r0, r0, #(4 * 8); + ldr RWlo, [sp, #nblks]; sub RK, #(80 * 8); subs RWlo, #1; bne .Loop_blocks; .Ldone: - mov %r0, #STACK_MAX; + mov r0, #STACK_MAX; __out: - add %sp, %sp, #STACK_MAX; - pop {%r4-%r11, %ip, %pc}; + add sp, sp, #STACK_MAX; + pop {r4-r11, ip, pc}; .size _gcry_sha512_transform_arm,.-_gcry_sha512_transform_arm; #endif diff --git a/cipher/sha512-armv7-neon.S b/cipher/sha512-armv7-neon.S index 2b186b47..a1df73b8 100644 --- a/cipher/sha512-armv7-neon.S +++ b/cipher/sha512-armv7-neon.S @@ -40,7 +40,7 @@ #define hd_g ((hd_f) + 8) /* register macros */ -#define RK %r2 +#define RK r2 #define RA d0 #define RB d1 @@ -287,26 +287,26 @@ _gcry_sha512_transform_armv7_neon: /* Input: - * %r0: SHA512_CONTEXT - * %r1: data - * %r2: u64 k[] constants - * %r3: nblks + * r0: SHA512_CONTEXT + * r1: data + * r2: u64 k[] constants + * r3: nblks */ - push {%lr}; + push {lr}; - mov %lr, #0; + mov lr, #0; /* Load context to d0-d7 */ - vld1.64 {RA-RD}, [%r0]!; - vld1.64 {RE-RH}, [%r0]; - sub %r0, #(4*8); + vld1.64 {RA-RD}, [r0]!; + vld1.64 {RE-RH}, [r0]; + sub r0, #(4*8); /* Load input to w[16], d16-d31 */ /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */ - vld1.64 {RW0-RW3}, [%r1]!; - vld1.64 {RW4-RW7}, [%r1]!; - vld1.64 {RW8-RW11}, [%r1]!; - vld1.64 {RW12-RW15}, [%r1]!; + vld1.64 {RW0-RW3}, [r1]!; + vld1.64 {RW4-RW7}, [r1]!; + vld1.64 {RW8-RW11}, [r1]!; + vld1.64 {RW12-RW15}, [r1]!; #ifdef __ARMEL__ /* byteswap */ vrev64.8 RW01q, RW01q; @@ -334,46 +334,46 @@ _gcry_sha512_transform_armv7_neon: rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q); rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q); rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q); - add %lr, #16; + add lr, #16; rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q); - cmp %lr, #64; + cmp lr, #64; rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q); bne .Loop_rounds; - subs %r3, #1; + subs r3, #1; rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, vadd_RT01q, RW1415q, dummy, _); rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, vadd_rg_RT0, RG, vadd_rg_RT1, RG); beq .Lhandle_tail; - vld1.64 {RW0-RW3}, [%r1]!; + vld1.64 {RW0-RW3}, [r1]!; rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, RE, vadd_rg_RT1, RE); rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, RC, vadd_rg_RT1, RC); #ifdef __ARMEL__ vrev64.8 RW01q, RW01q; vrev64.8 RW23q, RW23q; #endif - vld1.64 {RW4-RW7}, [%r1]!; + vld1.64 {RW4-RW7}, [r1]!; rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, RA, vadd_rg_RT1, RA); rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, RG, vadd_rg_RT1, RG); #ifdef __ARMEL__ vrev64.8 RW45q, RW45q; vrev64.8 RW67q, RW67q; #endif - vld1.64 {RW8-RW11}, [%r1]!; + vld1.64 {RW8-RW11}, [r1]!; rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, RE, vadd_rg_RT1, RE); rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC); #ifdef __ARMEL__ vrev64.8 RW89q, RW89q; vrev64.8 RW1011q, RW1011q; #endif - vld1.64 {RW12-RW15}, [%r1]!; + vld1.64 {RW12-RW15}, [r1]!; vadd_rg_RT0(RA); vadd_rg_RT1(RA); /* Load context */ - vld1.64 {RT0-RT3}, [%r0]!; - vld1.64 {RT4-RT7}, [%r0]; - sub %r0, #(4*8); + vld1.64 {RT0-RT3}, [r0]!; + vld1.64 {RT4-RT7}, [r0]; + sub r0, #(4*8); #ifdef __ARMEL__ vrev64.8 RW1213q, RW1213q; @@ -390,11 +390,11 @@ _gcry_sha512_transform_armv7_neon: vadd.u64 RH, RT7; /* Store the first half of context */ - vst1.64 {RA-RD}, [%r0]!; + vst1.64 {RA-RD}, [r0]!; sub RK, $(8*80); - vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ - mov %lr, #0; - sub %r0, #(4*8); + vst1.64 {RE-RH}, [r0]; /* Store the last half of context */ + mov lr, #0; + sub r0, #(4*8); b .Loop; .ltorg @@ -408,11 +408,11 @@ _gcry_sha512_transform_armv7_neon: rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, RC, vadd_rg_RT1, RC); /* Load context to d16-d23 */ - vld1.64 {RW0-RW3}, [%r0]!; + vld1.64 {RW0-RW3}, [r0]!; vadd_rg_RT0(RA); - vld1.64 {RW4-RW7}, [%r0]; + vld1.64 {RW4-RW7}, [r0]; vadd_rg_RT1(RA); - sub %r0, #(4*8); + sub r0, #(4*8); vadd.u64 RA, RW0; vadd.u64 RB, RW1; @@ -424,7 +424,7 @@ _gcry_sha512_transform_armv7_neon: vadd.u64 RH, RW7; /* Store the first half of context */ - vst1.64 {RA-RD}, [%r0]!; + vst1.64 {RA-RD}, [r0]!; /* Clear used registers */ /* d16-d31 */ @@ -432,7 +432,7 @@ _gcry_sha512_transform_armv7_neon: CLEAR_REG(RW23q); CLEAR_REG(RW45q); CLEAR_REG(RW67q); - vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ + vst1.64 {RE-RH}, [r0]; /* Store the last half of context */ CLEAR_REG(RW89q); CLEAR_REG(RW1011q); CLEAR_REG(RW1213q); @@ -440,13 +440,13 @@ _gcry_sha512_transform_armv7_neon: /* d8-d15 */ vpop {RT0-RT7}; /* d0-d7 (q0-q3) */ - CLEAR_REG(%q0); - CLEAR_REG(%q1); - CLEAR_REG(%q2); - CLEAR_REG(%q3); + CLEAR_REG(q0); + CLEAR_REG(q1); + CLEAR_REG(q2); + CLEAR_REG(q3); - eor %r0, %r0; - pop {%pc}; + eor r0, r0; + pop {pc}; .size _gcry_sha512_transform_armv7_neon,.-_gcry_sha512_transform_armv7_neon; #endif diff --git a/cipher/twofish-arm.S b/cipher/twofish-arm.S index 2e1da6cd..b381e546 100644 --- a/cipher/twofish-arm.S +++ b/cipher/twofish-arm.S @@ -37,25 +37,25 @@ #define k ((w) + 4 * 8) /* register macros */ -#define CTX %r0 -#define CTXs0 %r0 -#define CTXs1 %r1 -#define CTXs3 %r7 +#define CTX r0 +#define CTXs0 r0 +#define CTXs1 r1 +#define CTXs3 r7 -#define RA %r3 -#define RB %r4 -#define RC %r5 -#define RD %r6 +#define RA r3 +#define RB r4 +#define RC r5 +#define RD r6 -#define RX %r2 -#define RY %ip +#define RX r2 +#define RY ip -#define RMASK %lr +#define RMASK lr -#define RT0 %r8 -#define RT1 %r9 -#define RT2 %r10 -#define RT3 %r11 +#define RT0 r8 +#define RT1 r9 +#define RT2 r10 +#define RT3 r11 /* helper macros */ #define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ @@ -262,15 +262,15 @@ _gcry_twofish_arm_encrypt_block: /* input: - * %r0: ctx - * %r1: dst - * %r2: src + * r0: ctx + * r1: dst + * r2: src */ - push {%r1, %r4-%r11, %ip, %lr}; + push {r1, r4-r11, ip, lr}; add RY, CTXs0, #w; - ldr_input_le(%r2, RA, RB, RC, RD, RT0); + ldr_input_le(r2, RA, RB, RC, RD, RT0); /* Input whitening */ ldm RY, {RT0, RT1, RT2, RT3}; @@ -292,7 +292,7 @@ _gcry_twofish_arm_encrypt_block: last_encrypt_cycle(7); add RY, CTXs3, #(w + 4*4 - s3); - pop {%r1}; /* dst */ + pop {r1}; /* dst */ /* Output whitening */ ldm RY, {RT0, RT1, RT2, RT3}; @@ -301,9 +301,9 @@ _gcry_twofish_arm_encrypt_block: eor RA, RA, RT2; eor RB, RB, RT3; - str_output_le(%r1, RC, RD, RA, RB, RT0, RT1); + str_output_le(r1, RC, RD, RA, RB, RT0, RT1); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .ltorg .size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block; @@ -313,15 +313,15 @@ _gcry_twofish_arm_encrypt_block: _gcry_twofish_arm_decrypt_block: /* input: - * %r0: ctx - * %r1: dst - * %r2: src + * r0: ctx + * r1: dst + * r2: src */ - push {%r1, %r4-%r11, %ip, %lr}; + push {r1, r4-r11, ip, lr}; add CTXs3, CTXs0, #(s3 - s0); - ldr_input_le(%r2, RC, RD, RA, RB, RT0); + ldr_input_le(r2, RC, RD, RA, RB, RT0); add RY, CTXs3, #(w + 4*4 - s3); add CTXs3, CTXs0, #(s3 - s0); @@ -345,7 +345,7 @@ _gcry_twofish_arm_decrypt_block: last_decrypt_cycle(0); add RY, CTXs0, #w; - pop {%r1}; /* dst */ + pop {r1}; /* dst */ /* Output whitening */ ldm RY, {RT0, RT1, RT2, RT3}; @@ -354,9 +354,9 @@ _gcry_twofish_arm_decrypt_block: eor RC, RC, RT2; eor RD, RD, RT3; - str_output_le(%r1, RA, RB, RC, RD, RT0, RT1); + str_output_le(r1, RA, RB, RC, RD, RT0, RT1); - pop {%r4-%r11, %ip, %pc}; + pop {r4-r11, ip, pc}; .size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block; #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ |