summaryrefslogtreecommitdiff
path: root/cipher/sha512-arm.S
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2022-12-14 19:37:37 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2022-12-14 19:37:49 +0200
commit02d5d1d97b3f281cf9c854d7143e346ab76fa384 (patch)
tree4314612c565c5af2d9d2a6f976579bb9ee95b729 /cipher/sha512-arm.S
parent3d20308cc529b53d49954e9f0b8d10fa14422303 (diff)
downloadlibgcrypt-02d5d1d97b3f281cf9c854d7143e346ab76fa384.tar.gz
Add clang support for ARM 32-bit assembly
* configure.ac (gcry_cv_gcc_arm_platform_as_ok) (gcry_cv_gcc_inline_asm_neon): Remove % prefix from register names. * cipher/cipher-gcm-armv7-neon.S (vmull_p64): Prefix constant values with # character instead of $. * cipher/blowfish-arm.S: Remove % prefix from all register names. * cipher/camellia-arm.S: Likewise. * cipher/cast5-arm.S: Likewise. * cipher/rijndael-arm.S: Likewise. * cipher/rijndael-armv8-aarch32-ce.S: Likewise. * cipher/sha512-arm.S: Likewise. * cipher/sha512-armv7-neon.S: Likewise. * cipher/twofish-arm.S: Likewise. * mpi/arm/mpih-add1.S: Likewise. * mpi/arm/mpih-mul1.S: Likewise. * mpi/arm/mpih-mul2.S: Likewise. * mpi/arm/mpih-mul3.S: Likewise. * mpi/arm/mpih-sub1.S: Likewise. -- Reported-by: Dmytro Kovalov <dmytro.a.kovalov@globallogic.com> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha512-arm.S')
-rw-r--r--cipher/sha512-arm.S204
1 files changed, 102 insertions, 102 deletions
diff --git a/cipher/sha512-arm.S b/cipher/sha512-arm.S
index 94ec0141..1e1d296f 100644
--- a/cipher/sha512-arm.S
+++ b/cipher/sha512-arm.S
@@ -38,23 +38,23 @@
#define hd_h ((hd_g) + 8)
/* register macros */
-#define RK %r2
+#define RK r2
-#define RElo %r0
-#define REhi %r1
+#define RElo r0
+#define REhi r1
-#define RT1lo %r3
-#define RT1hi %r4
-#define RT2lo %r5
-#define RT2hi %r6
-#define RWlo %r7
-#define RWhi %r8
-#define RT3lo %r9
-#define RT3hi %r10
-#define RT4lo %r11
-#define RT4hi %ip
+#define RT1lo r3
+#define RT1hi r4
+#define RT2lo r5
+#define RT2hi r6
+#define RWlo r7
+#define RWhi r8
+#define RT3lo r9
+#define RT3hi r10
+#define RT4lo r11
+#define RT4hi ip
-#define RRND %lr
+#define RRND lr
/* variable offsets in stack */
#define ctx (0)
@@ -150,13 +150,13 @@
mov RWhi, REhi, lsr#14; \
eor RWlo, RWlo, RElo, lsr#18; \
eor RWhi, RWhi, REhi, lsr#18; \
- ldr RT3lo, [%sp, #(_f)]; \
+ ldr RT3lo, [sp, #(_f)]; \
adds RT1lo, RT2lo; /* t1 += K */ \
- ldr RT3hi, [%sp, #(_f) + 4]; \
+ ldr RT3hi, [sp, #(_f) + 4]; \
adc RT1hi, RT2hi; \
- ldr RT4lo, [%sp, #(_g)]; \
+ ldr RT4lo, [sp, #(_g)]; \
eor RWlo, RWlo, RElo, lsl#23; \
- ldr RT4hi, [%sp, #(_g) + 4]; \
+ ldr RT4hi, [sp, #(_g) + 4]; \
eor RWhi, RWhi, REhi, lsl#23; \
eor RWlo, RWlo, REhi, lsl#18; \
eor RWhi, RWhi, RElo, lsl#18; \
@@ -177,29 +177,29 @@
\
/* Load D */ \
/* t1 += Cho(_e,_f,_g) */ \
- ldr RElo, [%sp, #(_d)]; \
+ ldr RElo, [sp, #(_d)]; \
adds RT1lo, RT3lo; \
- ldr REhi, [%sp, #(_d) + 4]; \
+ ldr REhi, [sp, #(_d) + 4]; \
adc RT1hi, RT3hi; \
\
/* Load A */ \
- ldr RT3lo, [%sp, #(_a)]; \
+ ldr RT3lo, [sp, #(_a)]; \
\
/* _d += t1 */ \
adds RElo, RT1lo; \
- ldr RT3hi, [%sp, #(_a) + 4]; \
+ ldr RT3hi, [sp, #(_a) + 4]; \
adc REhi, RT1hi; \
\
/* Store D */ \
- str RElo, [%sp, #(_d)]; \
+ str RElo, [sp, #(_d)]; \
\
/* t2 = Sum0(_a) */ \
mov RT2lo, RT3lo, lsr#28; \
- str REhi, [%sp, #(_d) + 4]; \
+ str REhi, [sp, #(_d) + 4]; \
mov RT2hi, RT3hi, lsr#28; \
- ldr RWlo, [%sp, #(_b)]; \
+ ldr RWlo, [sp, #(_b)]; \
eor RT2lo, RT2lo, RT3lo, lsl#30; \
- ldr RWhi, [%sp, #(_b) + 4]; \
+ ldr RWhi, [sp, #(_b) + 4]; \
eor RT2hi, RT2hi, RT3hi, lsl#30; \
eor RT2lo, RT2lo, RT3lo, lsl#25; \
eor RT2hi, RT2hi, RT3hi, lsl#25; \
@@ -212,11 +212,11 @@
\
/* t2 += t1 */ \
adds RT2lo, RT1lo; \
- ldr RT1lo, [%sp, #(_c)]; \
+ ldr RT1lo, [sp, #(_c)]; \
adc RT2hi, RT1hi; \
\
/* Maj(_a,_b,_c) => ((_a & _b) ^ (_c & (_a ^ _b))) */ \
- ldr RT1hi, [%sp, #(_c) + 4]; \
+ ldr RT1hi, [sp, #(_c) + 4]; \
and RT4lo, RWlo, RT3lo; \
and RT4hi, RWhi, RT3hi; \
eor RWlo, RWlo, RT3lo; \
@@ -229,36 +229,36 @@
/* Message expansion */
#define W_0_63(_a,_h,i) \
- ldr RT3lo, [%sp, #(w(i-2))]; \
+ ldr RT3lo, [sp, #(w(i-2))]; \
adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \
- ldr RT3hi, [%sp, #(w(i-2)) + 4]; \
+ ldr RT3hi, [sp, #(w(i-2)) + 4]; \
adc RT2hi, RWhi; \
/* nw = S1(w[i-2]) */ \
- ldr RT1lo, [%sp, #(_h)]; /* Load H */ \
+ ldr RT1lo, [sp, #(_h)]; /* Load H */ \
mov RWlo, RT3lo, lsr#19; \
- str RT2lo, [%sp, #(_a)]; \
+ str RT2lo, [sp, #(_a)]; \
eor RWlo, RWlo, RT3lo, lsl#3; \
- ldr RT1hi, [%sp, #(_h) + 4]; \
+ ldr RT1hi, [sp, #(_h) + 4]; \
mov RWhi, RT3hi, lsr#19; \
- ldr RT2lo, [%sp, #(w(i-7))]; \
+ ldr RT2lo, [sp, #(w(i-7))]; \
eor RWhi, RWhi, RT3hi, lsl#3; \
- str RT2hi, [%sp, #(_a) + 4]; \
+ str RT2hi, [sp, #(_a) + 4]; \
eor RWlo, RWlo, RT3lo, lsr#6; \
- ldr RT2hi, [%sp, #(w(i-7)) + 4]; \
+ ldr RT2hi, [sp, #(w(i-7)) + 4]; \
eor RWhi, RWhi, RT3hi, lsr#6; \
eor RWlo, RWlo, RT3hi, lsl#13; \
eor RWhi, RWhi, RT3lo, lsl#13; \
eor RWlo, RWlo, RT3hi, lsr#29; \
eor RWhi, RWhi, RT3lo, lsr#29; \
- ldr RT3lo, [%sp, #(w(i-15))]; \
+ ldr RT3lo, [sp, #(w(i-15))]; \
eor RWlo, RWlo, RT3hi, lsl#26; \
- ldr RT3hi, [%sp, #(w(i-15)) + 4]; \
+ ldr RT3hi, [sp, #(w(i-15)) + 4]; \
\
adds RT2lo, RWlo; /* nw += w[i-7] */ \
- ldr RWlo, [%sp, #(w(i-16))]; \
+ ldr RWlo, [sp, #(w(i-16))]; \
adc RT2hi, RWhi; \
mov RT4lo, RT3lo, lsr#1; /* S0(w[i-15]) */ \
- ldr RWhi, [%sp, #(w(i-16)) + 4]; \
+ ldr RWhi, [sp, #(w(i-16)) + 4]; \
mov RT4hi, RT3hi, lsr#1; \
adds RT2lo, RWlo; /* nw += w[i-16] */ \
eor RT4lo, RT4lo, RT3lo, lsr#8; \
@@ -277,20 +277,20 @@
adc RT2hi, RT4hi; \
\
/* w[0] = nw */ \
- str RT2lo, [%sp, #(w(i))]; \
+ str RT2lo, [sp, #(w(i))]; \
adds RT1lo, RWlo; \
- str RT2hi, [%sp, #(w(i)) + 4]; \
+ str RT2hi, [sp, #(w(i)) + 4]; \
adc RT1hi, RWhi;
#define W_64_79(_a,_h,i) \
adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \
- ldr RWlo, [%sp, #(w(i-16))]; \
+ ldr RWlo, [sp, #(w(i-16))]; \
adc RT2hi, RWhi; \
- ldr RWhi, [%sp, #(w(i-16)) + 4]; \
- ldr RT1lo, [%sp, #(_h)]; /* Load H */ \
- ldr RT1hi, [%sp, #(_h) + 4]; \
- str RT2lo, [%sp, #(_a)]; \
- str RT2hi, [%sp, #(_a) + 4]; \
+ ldr RWhi, [sp, #(w(i-16)) + 4]; \
+ ldr RT1lo, [sp, #(_h)]; /* Load H */ \
+ ldr RT1hi, [sp, #(_h) + 4]; \
+ str RT2lo, [sp, #(_a)]; \
+ str RT2hi, [sp, #(_a) + 4]; \
adds RT1lo, RWlo; \
adc RT1hi, RWhi;
@@ -300,72 +300,72 @@
_gcry_sha512_transform_arm:
/* Input:
- * %r0: SHA512_CONTEXT
- * %r1: data
- * %r2: u64 k[] constants
- * %r3: nblks
+ * r0: SHA512_CONTEXT
+ * r1: data
+ * r2: u64 k[] constants
+ * r3: nblks
*/
- push {%r4-%r11, %ip, %lr};
- sub %sp, %sp, #STACK_MAX;
- movs RWlo, %r3;
- str %r0, [%sp, #(ctx)];
+ push {r4-r11, ip, lr};
+ sub sp, sp, #STACK_MAX;
+ movs RWlo, r3;
+ str r0, [sp, #(ctx)];
beq .Ldone;
.Loop_blocks:
- str RWlo, [%sp, #nblks];
+ str RWlo, [sp, #nblks];
/* Load context to stack */
- add RWhi, %sp, #(_a);
- ldm %r0!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+ add RWhi, sp, #(_a);
+ ldm r0!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
- ldm %r0, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+ ldm r0, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
/* Load input to w[16] */
/* test if data is unaligned */
- tst %r1, #3;
+ tst r1, #3;
beq 1f;
/* unaligned load */
- add RWhi, %sp, #(w(0));
- read_be64_unaligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ add RWhi, sp, #(w(0));
+ read_be64_unaligned_4(r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
- read_be64_unaligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ read_be64_unaligned_4(r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
- read_be64_unaligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ read_be64_unaligned_4(r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
- read_be64_unaligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ read_be64_unaligned_4(r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
b 2f;
1:
/* aligned load */
- add RWhi, %sp, #(w(0));
- read_be64_aligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ add RWhi, sp, #(w(0));
+ read_be64_aligned_4(r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
- read_be64_aligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ read_be64_aligned_4(r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
- read_be64_aligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ read_be64_aligned_4(r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
- read_be64_aligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
+ read_be64_aligned_4(r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, RT3hi, RT4lo, RT4hi, RWlo);
2:
- add %r1, #(16 * 8);
+ add r1, #(16 * 8);
stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
- str %r1, [%sp, #(data)];
+ str r1, [sp, #(data)];
/* preload E & A */
- ldr RElo, [%sp, #(_e)];
- ldr REhi, [%sp, #(_e) + 4];
+ ldr RElo, [sp, #(_e)];
+ ldr REhi, [sp, #(_e) + 4];
mov RWlo, #0;
- ldr RT2lo, [%sp, #(_a)];
+ ldr RT2lo, [sp, #(_a)];
mov RRND, #(80-16);
- ldr RT2hi, [%sp, #(_a) + 4];
+ ldr RT2hi, [sp, #(_a) + 4];
mov RWhi, #0;
.Loop_rounds:
@@ -406,58 +406,58 @@ _gcry_sha512_transform_arm:
R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 30);
R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 31);
- ldr %r0, [%sp, #(ctx)];
+ ldr r0, [sp, #(ctx)];
adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */
- ldr %r1, [%sp, #(data)];
+ ldr r1, [sp, #(data)];
adc RT2hi, RWhi;
- ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+ ldm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
adds RT1lo, RT2lo;
- ldr RT2lo, [%sp, #(_b + 0)];
+ ldr RT2lo, [sp, #(_b + 0)];
adc RT1hi, RT2hi;
- ldr RT2hi, [%sp, #(_b + 4)];
+ ldr RT2hi, [sp, #(_b + 4)];
adds RWlo, RT2lo;
- ldr RT2lo, [%sp, #(_c + 0)];
+ ldr RT2lo, [sp, #(_c + 0)];
adc RWhi, RT2hi;
- ldr RT2hi, [%sp, #(_c + 4)];
+ ldr RT2hi, [sp, #(_c + 4)];
adds RT3lo, RT2lo;
- ldr RT2lo, [%sp, #(_d + 0)];
+ ldr RT2lo, [sp, #(_d + 0)];
adc RT3hi, RT2hi;
- ldr RT2hi, [%sp, #(_d + 4)];
+ ldr RT2hi, [sp, #(_d + 4)];
adds RT4lo, RT2lo;
- ldr RT2lo, [%sp, #(_e + 0)];
+ ldr RT2lo, [sp, #(_e + 0)];
adc RT4hi, RT2hi;
- stm %r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+ stm r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
- ldr RT2hi, [%sp, #(_e + 4)];
- ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+ ldr RT2hi, [sp, #(_e + 4)];
+ ldm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
adds RT1lo, RT2lo;
- ldr RT2lo, [%sp, #(_f + 0)];
+ ldr RT2lo, [sp, #(_f + 0)];
adc RT1hi, RT2hi;
- ldr RT2hi, [%sp, #(_f + 4)];
+ ldr RT2hi, [sp, #(_f + 4)];
adds RWlo, RT2lo;
- ldr RT2lo, [%sp, #(_g + 0)];
+ ldr RT2lo, [sp, #(_g + 0)];
adc RWhi, RT2hi;
- ldr RT2hi, [%sp, #(_g + 4)];
+ ldr RT2hi, [sp, #(_g + 4)];
adds RT3lo, RT2lo;
- ldr RT2lo, [%sp, #(_h + 0)];
+ ldr RT2lo, [sp, #(_h + 0)];
adc RT3hi, RT2hi;
- ldr RT2hi, [%sp, #(_h + 4)];
+ ldr RT2hi, [sp, #(_h + 4)];
adds RT4lo, RT2lo;
adc RT4hi, RT2hi;
- stm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
- sub %r0, %r0, #(4 * 8);
- ldr RWlo, [%sp, #nblks];
+ stm r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+ sub r0, r0, #(4 * 8);
+ ldr RWlo, [sp, #nblks];
sub RK, #(80 * 8);
subs RWlo, #1;
bne .Loop_blocks;
.Ldone:
- mov %r0, #STACK_MAX;
+ mov r0, #STACK_MAX;
__out:
- add %sp, %sp, #STACK_MAX;
- pop {%r4-%r11, %ip, %pc};
+ add sp, sp, #STACK_MAX;
+ pop {r4-r11, ip, pc};
.size _gcry_sha512_transform_arm,.-_gcry_sha512_transform_arm;
#endif