summaryrefslogtreecommitdiff
path: root/crypto/bn/asm/alpha.s.works
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm/alpha.s.works')
-rw-r--r--crypto/bn/asm/alpha.s.works533
1 files changed, 533 insertions, 0 deletions
diff --git a/crypto/bn/asm/alpha.s.works b/crypto/bn/asm/alpha.s.works
new file mode 100644
index 0000000000..ee6c587809
--- /dev/null
+++ b/crypto/bn/asm/alpha.s.works
@@ -0,0 +1,533 @@
+
+ # DEC Alpha assember
+ # The bn_div64 is actually gcc output but the other parts are hand done.
+ # Thanks to tzeruch@ceddec.com for sending me the gcc output for
+ # bn_div64.
+ # I've gone back and re-done most of routines.
+ # The key thing to remeber for the 164 CPU is that while a
+ # multiply operation takes 8 cycles, another one can only be issued
+ # after 4 cycles have elapsed. I've done modification to help
+ # improve this. Also, normally, a ld instruction will not be available
+ # for about 3 cycles.
+ .file 1 "bn_asm.c"
+ .set noat
+gcc2_compiled.:
+__gnu_compiled_c:
+ .text
+ .align 3
+ .globl bn_mul_add_words
+ .ent bn_mul_add_words
+bn_mul_add_words:
+bn_mul_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ .align 5
+ subq $18,4,$18
+ bis $31,$31,$0
+ blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ ldq $1,0($16) # 1 1
+ .align 3
+$42:
+ mulq $20,$19,$5 # 1 2 1 ######
+ ldq $21,8($17) # 2 1
+ ldq $2,8($16) # 2 1
+ umulh $20,$19,$20 # 1 2 ######
+ ldq $27,16($17) # 3 1
+ ldq $3,16($16) # 3 1
+ mulq $21,$19,$6 # 2 2 1 ######
+ ldq $28,24($17) # 4 1
+ addq $1,$5,$1 # 1 2 2
+ ldq $4,24($16) # 4 1
+ umulh $21,$19,$21 # 2 2 ######
+ cmpult $1,$5,$22 # 1 2 3 1
+ addq $20,$22,$20 # 1 3 1
+ addq $1,$0,$1 # 1 2 3 1
+ mulq $27,$19,$7 # 3 2 1 ######
+ cmpult $1,$0,$0 # 1 2 3 2
+ addq $2,$6,$2 # 2 2 2
+ addq $20,$0,$0 # 1 3 2
+ cmpult $2,$6,$23 # 2 2 3 1
+ addq $21,$23,$21 # 2 3 1
+ umulh $27,$19,$27 # 3 2 ######
+ addq $2,$0,$2 # 2 2 3 1
+ cmpult $2,$0,$0 # 2 2 3 2
+ subq $18,4,$18
+ mulq $28,$19,$8 # 4 2 1 ######
+ addq $21,$0,$0 # 2 3 2
+ addq $3,$7,$3 # 3 2 2
+ addq $16,32,$16
+ cmpult $3,$7,$24 # 3 2 3 1
+ stq $1,-32($16) # 1 2 4
+ umulh $28,$19,$28 # 4 2 ######
+ addq $27,$24,$27 # 3 3 1
+ addq $3,$0,$3 # 3 2 3 1
+ stq $2,-24($16) # 2 2 4
+ cmpult $3,$0,$0 # 3 2 3 2
+ stq $3,-16($16) # 3 2 4
+ addq $4,$8,$4 # 4 2 2
+ addq $27,$0,$0 # 3 3 2
+ cmpult $4,$8,$25 # 4 2 3 1
+ addq $17,32,$17
+ addq $28,$25,$28 # 4 3 1
+ addq $4,$0,$4 # 4 2 3 1
+ cmpult $4,$0,$0 # 4 2 3 2
+ stq $4,-8($16) # 4 2 4
+ addq $28,$0,$0 # 4 3 2
+ blt $18,$43
+
+ ldq $20,0($17) # 1 1
+ ldq $1,0($16) # 1 1
+
+ br $42
+
+ .align 4
+$45:
+ ldq $20,0($17) # 4 1
+ ldq $1,0($16) # 4 1
+ mulq $20,$19,$5 # 4 2 1
+ subq $18,1,$18
+ addq $16,8,$16
+ addq $17,8,$17
+ umulh $20,$19,$20 # 4 2
+ addq $1,$5,$1 # 4 2 2
+ cmpult $1,$5,$22 # 4 2 3 1
+ addq $20,$22,$20 # 4 3 1
+ addq $1,$0,$1 # 4 2 3 1
+ cmpult $1,$0,$0 # 4 2 3 2
+ addq $20,$0,$0 # 4 3 2
+ stq $1,-8($16) # 4 2 4
+ bgt $18,$45
+ ret $31,($26),1 # else exit
+
+ .align 4
+$43:
+ addq $18,4,$18
+ bgt $18,$45 # goto tail code
+ ret $31,($26),1 # else exit
+
+ .end bn_mul_add_words
+ .align 3
+ .globl bn_mul_words
+ .ent bn_mul_words
+bn_mul_words:
+bn_mul_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ .align 5
+ subq $18,4,$18
+ bis $31,$31,$0
+ blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ .align 3
+$142:
+
+ mulq $20,$19,$5 # 1 2 1 #####
+ ldq $21,8($17) # 2 1
+ ldq $27,16($17) # 3 1
+ umulh $20,$19,$20 # 1 2 #####
+ ldq $28,24($17) # 4 1
+ mulq $21,$19,$6 # 2 2 1 #####
+ addq $5,$0,$5 # 1 2 3 1
+ subq $18,4,$18
+ cmpult $5,$0,$0 # 1 2 3 2
+ umulh $21,$19,$21 # 2 2 #####
+ addq $20,$0,$0 # 1 3 2
+ addq $17,32,$17
+ addq $6,$0,$6 # 2 2 3 1
+ mulq $27,$19,$7 # 3 2 1 #####
+ cmpult $6,$0,$0 # 2 2 3 2
+ addq $21,$0,$0 # 2 3 2
+ addq $16,32,$16
+ umulh $27,$19,$27 # 3 2 #####
+ stq $5,-32($16) # 1 2 4
+ mulq $28,$19,$8 # 4 2 1 #####
+ addq $7,$0,$7 # 3 2 3 1
+ stq $6,-24($16) # 2 2 4
+ cmpult $7,$0,$0 # 3 2 3 2
+ umulh $28,$19,$28 # 4 2 #####
+ addq $27,$0,$0 # 3 3 2
+ stq $7,-16($16) # 3 2 4
+ addq $8,$0,$8 # 4 2 3 1
+ cmpult $8,$0,$0 # 4 2 3 2
+
+ addq $28,$0,$0 # 4 3 2
+
+ stq $8,-8($16) # 4 2 4
+
+ blt $18,$143
+
+ ldq $20,0($17) # 1 1
+
+ br $142
+
+ .align 4
+$145:
+ ldq $20,0($17) # 4 1
+ mulq $20,$19,$5 # 4 2 1
+ subq $18,1,$18
+ umulh $20,$19,$20 # 4 2
+ addq $5,$0,$5 # 4 2 3 1
+ addq $16,8,$16
+ cmpult $5,$0,$0 # 4 2 3 2
+ addq $17,8,$17
+ addq $20,$0,$0 # 4 3 2
+ stq $5,-8($16) # 4 2 4
+
+ bgt $18,$145
+ ret $31,($26),1 # else exit
+
+ .align 4
+$143:
+ addq $18,4,$18
+ bgt $18,$145 # goto tail code
+ ret $31,($26),1 # else exit
+
+ .end bn_mul_words
+ .align 3
+ .globl bn_sqr_words
+ .ent bn_sqr_words
+bn_sqr_words:
+bn_sqr_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $18,4,$18
+ blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ .align 3
+$542:
+ mulq $20,$20,$5 ######
+ ldq $21,8($17) # 1 1
+ subq $18,4
+ umulh $20,$20,$1 ######
+ ldq $27,16($17) # 1 1
+ mulq $21,$21,$6 ######
+ ldq $28,24($17) # 1 1
+ stq $5,0($16) # r[0]
+ umulh $21,$21,$2 ######
+ stq $1,8($16) # r[1]
+ mulq $27,$27,$7 ######
+ stq $6,16($16) # r[0]
+ umulh $27,$27,$3 ######
+ stq $2,24($16) # r[1]
+ mulq $28,$28,$8 ######
+ stq $7,32($16) # r[0]
+ umulh $28,$28,$4 ######
+ stq $3,40($16) # r[1]
+
+ addq $16,64,$16
+ addq $17,32,$17
+ stq $8,-16($16) # r[0]
+ stq $4,-8($16) # r[1]
+
+ blt $18,$543
+ ldq $20,0($17) # 1 1
+ br $542
+
+$442:
+ ldq $20,0($17) # a[0]
+ mulq $20,$20,$5 # a[0]*w low part r2
+ addq $16,16,$16
+ addq $17,8,$17
+ subq $18,1,$18
+ umulh $20,$20,$1 # a[0]*w high part r3
+ stq $5,-16($16) # r[0]
+ stq $1,-8($16) # r[1]
+
+ bgt $18,$442
+ ret $31,($26),1 # else exit
+
+ .align 4
+$543:
+ addq $18,4,$18
+ bgt $18,$442 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_sqr_words
+
+ .align 3
+ .globl bn_add_words
+ .ent bn_add_words
+bn_add_words:
+bn_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19,4,$19
+ bis $31,$31,$0 # carry = 0
+ blt $19,$900
+ ldq $5,0($17) # a[0]
+ ldq $1,0($18) # b[1]
+ .align 3
+$901:
+ addq $1,$5,$1 # r=a+b;
+ ldq $6,8($17) # a[1]
+ cmpult $1,$5,$22 # did we overflow?
+ ldq $2,8($18) # b[1]
+ addq $1,$0,$1 # c+= overflow
+ ldq $7,16($17) # a[2]
+ cmpult $1,$0,$0 # overflow?
+ ldq $3,16($18) # b[2]
+ addq $0,$22,$0
+ ldq $8,24($17) # a[3]
+ addq $2,$6,$2 # r=a+b;
+ ldq $4,24($18) # b[3]
+ cmpult $2,$6,$23 # did we overflow?
+ addq $3,$7,$3 # r=a+b;
+ addq $2,$0,$2 # c+= overflow
+ cmpult $3,$7,$24 # did we overflow?
+ cmpult $2,$0,$0 # overflow?
+ addq $4,$8,$4 # r=a+b;
+ addq $0,$23,$0
+ cmpult $4,$8,$25 # did we overflow?
+ addq $3,$0,$3 # c+= overflow
+ stq $1,0($16) # r[0]=c
+ cmpult $3,$0,$0 # overflow?
+ stq $2,8($16) # r[1]=c
+ addq $0,$24,$0
+ stq $3,16($16) # r[2]=c
+ addq $4,$0,$4 # c+= overflow
+ subq $19,4,$19 # loop--
+ cmpult $4,$0,$0 # overflow?
+ addq $17,32,$17 # a++
+ addq $0,$25,$0
+ stq $4,24($16) # r[3]=c
+ addq $18,32,$18 # b++
+ addq $16,32,$16 # r++
+
+ blt $19,$900
+ ldq $5,0($17) # a[0]
+ ldq $1,0($18) # b[1]
+ br $901
+ .align 4
+$945:
+ ldq $5,0($17) # a[0]
+ ldq $1,0($18) # b[1]
+ addq $1,$5,$1 # r=a+b;
+ subq $19,1,$19 # loop--
+ addq $1,$0,$1 # c+= overflow
+ addq $17,8,$17 # a++
+ cmpult $1,$5,$22 # did we overflow?
+ cmpult $1,$0,$0 # overflow?
+ addq $18,8,$18 # b++
+ stq $1,0($16) # r[0]=c
+ addq $0,$22,$0
+ addq $16,8,$16 # r++
+
+ bgt $19,$945
+ ret $31,($26),1 # else exit
+
+$900:
+ addq $19,4,$19
+ bgt $19,$945 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_add_words
+
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+ .align 3
+ .globl bn_div64
+ .ent bn_div64
+bn_div64:
+ ldgp $29,0($27)
+bn_div64..ng:
+ lda $30,-48($30)
+ .frame $30,48,$26,0
+ stq $26,0($30)
+ stq $9,8($30)
+ stq $10,16($30)
+ stq $11,24($30)
+ stq $12,32($30)
+ stq $13,40($30)
+ .mask 0x4003e00,-48
+ .prologue 1
+ bis $16,$16,$9
+ bis $17,$17,$10
+ bis $18,$18,$11
+ bis $31,$31,$13
+ bis $31,2,$12
+ bne $11,$119
+ lda $0,-1
+ br $31,$136
+ .align 4
+$119:
+ bis $11,$11,$16
+ jsr $26,BN_num_bits_word
+ ldgp $29,0($26)
+ subq $0,64,$1
+ beq $1,$120
+ bis $31,1,$1
+ sll $1,$0,$1
+ cmpule $9,$1,$1
+ bne $1,$120
+ # lda $16,_IO_stderr_
+ # lda $17,$C32
+ # bis $0,$0,$18
+ # jsr $26,fprintf
+ # ldgp $29,0($26)
+ jsr $26,abort
+ ldgp $29,0($26)
+ .align 4
+$120:
+ bis $31,64,$3
+ cmpult $9,$11,$2
+ subq $3,$0,$1
+ addl $1,$31,$0
+ subq $9,$11,$1
+ cmoveq $2,$1,$9
+ beq $0,$122
+ zapnot $0,15,$2
+ subq $3,$0,$1
+ sll $11,$2,$11
+ sll $9,$2,$3
+ srl $10,$1,$1
+ sll $10,$2,$10
+ bis $3,$1,$9
+$122:
+ srl $11,32,$5
+ zapnot $11,15,$6
+ lda $7,-1
+ .align 5
+$123:
+ srl $9,32,$1
+ subq $1,$5,$1
+ bne $1,$126
+ zapnot $7,15,$27
+ br $31,$127
+ .align 4
+$126:
+ bis $9,$9,$24
+ bis $5,$5,$25
+ divqu $24,$25,$27
+$127:
+ srl $10,32,$4
+ .align 5
+$128:
+ mulq $27,$5,$1
+ subq $9,$1,$3
+ zapnot $3,240,$1
+ bne $1,$129
+ mulq $6,$27,$2
+ sll $3,32,$1
+ addq $1,$4,$1
+ cmpule $2,$1,$2
+ bne $2,$129
+ subq $27,1,$27
+ br $31,$128
+ .align 4
+$129:
+ mulq $27,$6,$1
+ mulq $27,$5,$4
+ srl $1,32,$3
+ sll $1,32,$1
+ addq $4,$3,$4
+ cmpult $10,$1,$2
+ subq $10,$1,$10
+ addq $2,$4,$2
+ cmpult $9,$2,$1
+ bis $2,$2,$4
+ beq $1,$134
+ addq $9,$11,$9
+ subq $27,1,$27
+$134:
+ subl $12,1,$12
+ subq $9,$4,$9
+ beq $12,$124
+ sll $27,32,$13
+ sll $9,32,$2
+ srl $10,32,$1
+ sll $10,32,$10
+ bis $2,$1,$9
+ br $31,$123
+ .align 4
+$124:
+ bis $13,$27,$0
+$136:
+ ldq $26,0($30)
+ ldq $9,8($30)
+ ldq $10,16($30)
+ ldq $11,24($30)
+ ldq $12,32($30)
+ ldq $13,40($30)
+ addq $30,48,$30
+ ret $31,($26),1
+ .end bn_div64
+
+ .set noat
+ .text
+ .align 3
+ .globl bn_sub_words
+ .ent bn_sub_words
+bn_sub_words:
+bn_sub_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19, 4, $19
+ bis $31, $31, $0
+ blt $19, $100
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+$101:
+ ldq $3, 8($17)
+ cmpult $1, $2, $4
+ ldq $5, 8($18)
+ subq $1, $2, $1
+ ldq $6, 16($17)
+ cmpult $1, $0, $2
+ ldq $7, 16($18)
+ subq $1, $0, $23
+ ldq $8, 24($17)
+ addq $2, $4, $0
+ cmpult $3, $5, $24
+ subq $3, $5, $3
+ ldq $22, 24($18)
+ cmpult $3, $0, $5
+ subq $3, $0, $25
+ addq $5, $24, $0
+ cmpult $6, $7, $27
+ subq $6, $7, $6
+ stq $23, 0($16)
+ cmpult $6, $0, $7
+ subq $6, $0, $28
+ addq $7, $27, $0
+ cmpult $8, $22, $21
+ subq $8, $22, $8
+ stq $25, 8($16)
+ cmpult $8, $0, $22
+ subq $8, $0, $20
+ addq $22, $21, $0
+ stq $28, 16($16)
+ subq $19, 4, $19
+ stq $20, 24($16)
+ addq $17, 32, $17
+ addq $18, 32, $18
+ addq $16, 32, $16
+ blt $19, $100
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ br $101
+$102:
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ cmpult $1, $2, $27
+ subq $1, $2, $1
+ cmpult $1, $0, $2
+ subq $1, $0, $1
+ stq $1, 0($16)
+ addq $2, $27, $0
+ addq $17, 8, $17
+ addq $18, 8, $18
+ addq $16, 8, $16
+ subq $19, 1, $19
+ bgt $19, $102
+ ret $31,($26),1
+$100:
+ addq $19, 4, $19
+ bgt $19, $102
+$103:
+ ret $31,($26),1
+ .end bn_sub_words