diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-12-14 19:37:37 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-12-14 19:37:49 +0200 |
commit | 02d5d1d97b3f281cf9c854d7143e346ab76fa384 (patch) | |
tree | 4314612c565c5af2d9d2a6f976579bb9ee95b729 /cipher/sha512-arm.S | |
parent | 3d20308cc529b53d49954e9f0b8d10fa14422303 (diff) | |
download | libgcrypt-02d5d1d97b3f281cf9c854d7143e346ab76fa384.tar.gz |
Add clang support for ARM 32-bit assembly
* configure.ac (gcry_cv_gcc_arm_platform_as_ok)
(gcry_cv_gcc_inline_asm_neon): Remove % prefix from register names.
* cipher/cipher-gcm-armv7-neon.S (vmull_p64): Prefix constant values
with # character instead of $.
* cipher/blowfish-arm.S: Remove % prefix from all register names.
* cipher/camellia-arm.S: Likewise.
* cipher/cast5-arm.S: Likewise.
* cipher/rijndael-arm.S: Likewise.
* cipher/rijndael-armv8-aarch32-ce.S: Likewise.
* cipher/sha512-arm.S: Likewise.
* cipher/sha512-armv7-neon.S: Likewise.
* cipher/twofish-arm.S: Likewise.
* mpi/arm/mpih-add1.S: Likewise.
* mpi/arm/mpih-mul1.S: Likewise.
* mpi/arm/mpih-mul2.S: Likewise.
* mpi/arm/mpih-mul3.S: Likewise.
* mpi/arm/mpih-sub1.S: Likewise.
--
Reported-by: Dmytro Kovalov <dmytro.a.kovalov@globallogic.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha512-arm.S')
-rw-r--r-- | cipher/sha512-arm.S | 204 |
1 files changed, 102 insertions, 102 deletions
diff --git a/cipher/sha512-arm.S b/cipher/sha512-arm.S index 94ec0141..1e1d296f 100644 --- a/cipher/sha512-arm.S +++ b/cipher/sha512-arm.S @@ -38,23 +38,23 @@ #define hd_h ((hd_g) + 8) /* register macros */ -#define RK %r2 +#define RK r2 -#define RElo %r0 -#define REhi %r1 +#define RElo r0 +#define REhi r1 -#define RT1lo %r3 -#define RT1hi %r4 -#define RT2lo %r5 -#define RT2hi %r6 -#define RWlo %r7 -#define RWhi %r8 -#define RT3lo %r9 -#define RT3hi %r10 -#define RT4lo %r11 -#define RT4hi %ip +#define RT1lo r3 +#define RT1hi r4 +#define RT2lo r5 +#define RT2hi r6 +#define RWlo r7 +#define RWhi r8 +#define RT3lo r9 +#define RT3hi r10 +#define RT4lo r11 +#define RT4hi ip -#define RRND %lr +#define RRND lr /* variable offsets in stack */ #define ctx (0) @@ -150,13 +150,13 @@ mov RWhi, REhi, lsr#14; \ eor RWlo, RWlo, RElo, lsr#18; \ eor RWhi, RWhi, REhi, lsr#18; \ - ldr RT3lo, [%sp, #(_f)]; \ + ldr RT3lo, [sp, #(_f)]; \ adds RT1lo, RT2lo; /* t1 += K */ \ - ldr RT3hi, [%sp, #(_f) + 4]; \ + ldr RT3hi, [sp, #(_f) + 4]; \ adc RT1hi, RT2hi; \ - ldr RT4lo, [%sp, #(_g)]; \ + ldr RT4lo, [sp, #(_g)]; \ eor RWlo, RWlo, RElo, lsl#23; \ - ldr RT4hi, [%sp, #(_g) + 4]; \ + ldr RT4hi, [sp, #(_g) + 4]; \ eor RWhi, RWhi, REhi, lsl#23; \ eor RWlo, RWlo, REhi, lsl#18; \ eor RWhi, RWhi, RElo, lsl#18; \ @@ -177,29 +177,29 @@ \ /* Load D */ \ /* t1 += Cho(_e,_f,_g) */ \ - ldr RElo, [%sp, #(_d)]; \ + ldr RElo, [sp, #(_d)]; \ adds RT1lo, RT3lo; \ - ldr REhi, [%sp, #(_d) + 4]; \ + ldr REhi, [sp, #(_d) + 4]; \ adc RT1hi, RT3hi; \ \ /* Load A */ \ - ldr RT3lo, [%sp, #(_a)]; \ + ldr RT3lo, [sp, #(_a)]; \ \ /* _d += t1 */ \ adds RElo, RT1lo; \ - ldr RT3hi, [%sp, #(_a) + 4]; \ + ldr RT3hi, [sp, #(_a) + 4]; \ adc REhi, RT1hi; \ \ /* Store D */ \ - str RElo, [%sp, #(_d)]; \ + str RElo, [sp, #(_d)]; \ \ /* t2 = Sum0(_a) */ \ mov RT2lo, RT3lo, lsr#28; \ - str REhi, [%sp, #(_d) + 4]; \ + str REhi, [sp, #(_d) + 4]; \ mov RT2hi, RT3hi, lsr#28; \ - ldr RWlo, [%sp, #(_b)]; \ + ldr RWlo, [sp, #(_b)]; \ eor RT2lo, RT2lo, RT3lo, lsl#30; \ - ldr RWhi, [%sp, #(_b) + 4]; \ + ldr RWhi, [sp, #(_b) + 4]; \ eor RT2hi, RT2hi, RT3hi, lsl#30; \ eor RT2lo, RT2lo, RT3lo, lsl#25; \ eor RT2hi, RT2hi, RT3hi, lsl#25; \ @@ -212,11 +212,11 @@ \ /* t2 += t1 */ \ adds RT2lo, RT1lo; \ - ldr RT1lo, [%sp, #(_c)]; \ + ldr RT1lo, [sp, #(_c)]; \ adc RT2hi, RT1hi; \ \ /* Maj(_a,_b,_c) => ((_a & _b) ^ (_c & (_a ^ _b))) */ \ - ldr RT1hi, [%sp, #(_c) + 4]; \ + ldr RT1hi, [sp, #(_c) + 4]; \ and RT4lo, RWlo, RT3lo; \ and RT4hi, RWhi, RT3hi; \ eor RWlo, RWlo, RT3lo; \ @@ -229,36 +229,36 @@ /* Message expansion */ #define W_0_63(_a,_h,i) \ - ldr RT3lo, [%sp, #(w(i-2))]; \ + ldr RT3lo, [sp, #(w(i-2))]; \ adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \ - ldr RT3hi, [%sp, #(w(i-2)) + 4]; \ + ldr RT3hi, [sp, #(w(i-2)) + 4]; \ adc RT2hi, RWhi; \ /* nw = S1(w[i-2]) */ \ - ldr RT1lo, [%sp, #(_h)]; /* Load H */ \ + ldr RT1lo, [sp, #(_h)]; /* Load H */ \ mov RWlo, RT3lo, lsr#19; \ - str RT2lo, [%sp, #(_a)]; \ + str RT2lo, [sp, #(_a)]; \ eor RWlo, RWlo, RT3lo, lsl#3; \ - ldr RT1hi, [%sp, #(_h) + 4]; \ + ldr RT1hi, [sp, #(_h) + 4]; \ mov RWhi, RT3hi, lsr#19; \ - ldr RT2lo, [%sp, #(w(i-7))]; \ + ldr RT2lo, [sp, #(w(i-7))]; \ eor RWhi, RWhi, RT3hi, lsl#3; \ - str RT2hi, [%sp, #(_a) + 4]; \ + str RT2hi, [sp, #(_a) + 4]; \ eor RWlo, RWlo, RT3lo, lsr#6; \ - ldr RT2hi, [%sp, #(w(i-7)) + 4]; \ + ldr RT2hi, [sp, #(w(i-7)) + 4]; \ eor RWhi, RWhi, RT3hi, lsr#6; \ eor RWlo, RWlo, RT3hi, lsl#13; \ eor RWhi, RWhi, RT3lo, lsl#13; \ eor RWlo, RWlo, RT3hi, lsr#29; \ eor RWhi, RWhi, RT3lo, lsr#29; \ - ldr RT3lo, [%sp, #(w(i-15))]; \ + ldr RT3lo, [sp, #(w(i-15))]; \ eor RWlo, RWlo, RT3hi, lsl#26; \ - ldr RT3hi, [%sp, #(w(i-15)) + 4]; \ + ldr RT3hi, [sp, #(w(i-15)) + 4]; \ \ adds RT2lo, RWlo; /* nw += w[i-7] */ \ - ldr RWlo, [%sp, #(w(i-16))]; \ + ldr RWlo, [sp, #(w(i-16))]; \ adc RT2hi, RWhi; \ mov RT4lo, RT3lo, lsr#1; /* S0(w[i-15]) */ \ - ldr RWhi, [%sp, #(w(i-16)) + 4]; \ + ldr RWhi, [sp, #(w(i-16)) + 4]; \ mov RT4hi, RT3hi, lsr#1; \ adds RT2lo, RWlo; /* nw += w[i-16] */ \ eor RT4lo, RT4lo, RT3lo, lsr#8; \ @@ -277,20 +277,20 @@ adc RT2hi, RT4hi; \ \ /* w[0] = nw */ \ - str RT2lo, [%sp, #(w(i))]; \ + str RT2lo, [sp, #(w(i))]; \ adds RT1lo, RWlo; \ - str RT2hi, [%sp, #(w(i)) + 4]; \ + str RT2hi, [sp, #(w(i)) + 4]; \ adc RT1hi, RWhi; #define W_64_79(_a,_h,i) \ adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \ - ldr RWlo, [%sp, #(w(i-16))]; \ + ldr RWlo, [sp, #(w(i-16))]; \ adc RT2hi, RWhi; \ - ldr RWhi, [%sp, #(w(i-16)) + 4]; \ - ldr RT1lo, [%sp, #(_h)]; /* Load H */ \ - ldr RT1hi, [%sp, #(_h) + 4]; \ - str RT2lo, [%sp, #(_a)]; \ - str RT2hi, [%sp, #(_a) + 4]; \ + ldr RWhi, [sp, #(w(i-16)) + 4]; \ + ldr RT1lo, [sp, #(_h)]; /* Load H */ \ + ldr RT1hi, [sp, #(_h) + 4]; \ + str RT2lo, [sp, #(_a)]; \ + str RT2hi, [sp, #(_a) + 4]; \ adds RT1lo, RWlo; \ adc RT1hi, RWhi; @@ -300,72 +300,72 @@ _gcry_sha512_transform_arm: /* Input: - * %r0: SHA512_CONTEXT - * %r1: data - * %r2: u64 k[] constants - * %r3: nblks + * r0: SHA512_CONTEXT + * r1: data + * r2: u64 k[] constants + * r3: nblks */ - push {%r4-%r11, %ip, %lr}; - sub %sp, %sp, #STACK_MAX; - movs RWlo, %r3; - str %r0, [%sp, #(ctx)]; + push {r4-r11, ip, lr}; + sub sp, sp, #STACK_MAX; + movs RWlo, r3; + str r0, [sp, #(ctx)]; beq .Ldone; .Loop_blocks: - str RWlo, [%sp, #nblks]; + str RWlo, [sp, #nblks]; /* Load context to stack */ - add RWhi, %sp, #(_a); - ldm %r0!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + add RWhi, sp, #(_a); + ldm r0!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - ldm %r0, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} + ldm r0, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} /* Load input to w[16] */ /* test if data is unaligned */ - tst %r1, #3; + tst r1, #3; beq 1f; /* unaligned load */ - add RWhi, %sp, #(w(0)); - read_be64_unaligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + add RWhi, sp, #(w(0)); + read_be64_unaligned_4(r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_unaligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_unaligned_4(r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_unaligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_unaligned_4(r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_unaligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_unaligned_4(r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); b 2f; 1: /* aligned load */ - add RWhi, %sp, #(w(0)); - read_be64_aligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + add RWhi, sp, #(w(0)); + read_be64_aligned_4(r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_aligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_aligned_4(r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_aligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_aligned_4(r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - read_be64_aligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); + read_be64_aligned_4(r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo); 2: - add %r1, #(16 * 8); + add r1, #(16 * 8); stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi} - str %r1, [%sp, #(data)]; + str r1, [sp, #(data)]; /* preload E & A */ - ldr RElo, [%sp, #(_e)]; - ldr REhi, [%sp, #(_e) + 4]; + ldr RElo, [sp, #(_e)]; + ldr REhi, [sp, #(_e) + 4]; mov RWlo, #0; - ldr RT2lo, [%sp, #(_a)]; + ldr RT2lo, [sp, #(_a)]; mov RRND, #(80-16); - ldr RT2hi, [%sp, #(_a) + 4]; + ldr RT2hi, [sp, #(_a) + 4]; mov RWhi, #0; .Loop_rounds: @@ -406,58 +406,58 @@ _gcry_sha512_transform_arm: R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 30); R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 31); - ldr %r0, [%sp, #(ctx)]; + ldr r0, [sp, #(ctx)]; adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ - ldr %r1, [%sp, #(data)]; + ldr r1, [sp, #(data)]; adc RT2hi, RWhi; - ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + ldm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} adds RT1lo, RT2lo; - ldr RT2lo, [%sp, #(_b + 0)]; + ldr RT2lo, [sp, #(_b + 0)]; adc RT1hi, RT2hi; - ldr RT2hi, [%sp, #(_b + 4)]; + ldr RT2hi, [sp, #(_b + 4)]; adds RWlo, RT2lo; - ldr RT2lo, [%sp, #(_c + 0)]; + ldr RT2lo, [sp, #(_c + 0)]; adc RWhi, RT2hi; - ldr RT2hi, [%sp, #(_c + 4)]; + ldr RT2hi, [sp, #(_c + 4)]; adds RT3lo, RT2lo; - ldr RT2lo, [%sp, #(_d + 0)]; + ldr RT2lo, [sp, #(_d + 0)]; adc RT3hi, RT2hi; - ldr RT2hi, [%sp, #(_d + 4)]; + ldr RT2hi, [sp, #(_d + 4)]; adds RT4lo, RT2lo; - ldr RT2lo, [%sp, #(_e + 0)]; + ldr RT2lo, [sp, #(_e + 0)]; adc RT4hi, RT2hi; - stm %r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + stm r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} - ldr RT2hi, [%sp, #(_e + 4)]; - ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + ldr RT2hi, [sp, #(_e + 4)]; + ldm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} adds RT1lo, RT2lo; - ldr RT2lo, [%sp, #(_f + 0)]; + ldr RT2lo, [sp, #(_f + 0)]; adc RT1hi, RT2hi; - ldr RT2hi, [%sp, #(_f + 4)]; + ldr RT2hi, [sp, #(_f + 4)]; adds RWlo, RT2lo; - ldr RT2lo, [%sp, #(_g + 0)]; + ldr RT2lo, [sp, #(_g + 0)]; adc RWhi, RT2hi; - ldr RT2hi, [%sp, #(_g + 4)]; + ldr RT2hi, [sp, #(_g + 4)]; adds RT3lo, RT2lo; - ldr RT2lo, [%sp, #(_h + 0)]; + ldr RT2lo, [sp, #(_h + 0)]; adc RT3hi, RT2hi; - ldr RT2hi, [%sp, #(_h + 4)]; + ldr RT2hi, [sp, #(_h + 4)]; adds RT4lo, RT2lo; adc RT4hi, RT2hi; - stm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} - sub %r0, %r0, #(4 * 8); - ldr RWlo, [%sp, #nblks]; + stm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi} + sub r0, r0, #(4 * 8); + ldr RWlo, [sp, #nblks]; sub RK, #(80 * 8); subs RWlo, #1; bne .Loop_blocks; .Ldone: - mov %r0, #STACK_MAX; + mov r0, #STACK_MAX; __out: - add %sp, %sp, #STACK_MAX; - pop {%r4-%r11, %ip, %pc}; + add sp, sp, #STACK_MAX; + pop {r4-r11, ip, pc}; .size _gcry_sha512_transform_arm,.-_gcry_sha512_transform_arm; #endif |