summaryrefslogtreecommitdiff
path: root/crypto/bn/asm/alpha.s
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm/alpha.s')
-rw-r--r--crypto/bn/asm/alpha.s310
1 files changed, 310 insertions, 0 deletions
diff --git a/crypto/bn/asm/alpha.s b/crypto/bn/asm/alpha.s
new file mode 100644
index 0000000000..d56f715ecd
--- /dev/null
+++ b/crypto/bn/asm/alpha.s
@@ -0,0 +1,310 @@
+ # DEC Alpha assember
+ # The bn_div64 is actually gcc output but the other parts are hand done.
+ # Thanks to tzeruch@ceddec.com for sending me the gcc output for
+ # bn_div64.
+ .file 1 "bn_mulw.c"
+ .version "01.01"
+ .set noat
+gcc2_compiled.:
+__gnu_compiled_c:
+ .text
+ .align 3
+ .globl bn_mul_add_word
+ .ent bn_mul_add_word
+bn_mul_add_word:
+bn_mul_add_word..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ subq $18,2,$25 # num=-2
+ bis $31,$31,$0
+ blt $25,$42
+ .align 5
+$142:
+ subq $18,2,$18 # num-=2
+ subq $25,2,$25 # num-=2
+
+ ldq $1,0($17) # a[0]
+ ldq $2,8($17) # a[1]
+
+ mulq $19,$1,$3 # a[0]*w low part r3
+ umulh $19,$1,$1 # a[0]*w high part r1
+ mulq $19,$2,$4 # a[1]*w low part r4
+ umulh $19,$2,$2 # a[1]*w high part r2
+
+ ldq $22,0($16) # r[0] r22
+ ldq $23,8($16) # r[1] r23
+
+ addq $3,$22,$3 # a0 low part + r[0]
+ addq $4,$23,$4 # a1 low part + r[1]
+ cmpult $3,$22,$5 # overflow?
+ cmpult $4,$23,$6 # overflow?
+ addq $5,$1,$1 # high part + overflow
+ addq $6,$2,$2 # high part + overflow
+
+ addq $3,$0,$3 # add c
+ cmpult $3,$0,$5 # overflow?
+ stq $3,0($16)
+ addq $5,$1,$0 # c=high part + overflow
+
+ addq $4,$0,$4 # add c
+ cmpult $4,$0,$5 # overflow?
+ stq $4,8($16)
+ addq $5,$2,$0 # c=high part + overflow
+
+ ble $18,$43
+
+ addq $16,16,$16
+ addq $17,16,$17
+ blt $25,$42
+
+ br $31,$142
+$42:
+ ldq $1,0($17) # a[0]
+ umulh $19,$1,$3 # a[0]*w high part
+ mulq $19,$1,$1 # a[0]*w low part
+ ldq $2,0($16) # r[0]
+ addq $1,$2,$1 # low part + r[0]
+ cmpult $1,$2,$4 # overflow?
+ addq $4,$3,$3 # high part + overflow
+ addq $1,$0,$1 # add c
+ cmpult $1,$0,$4 # overflow?
+ addq $4,$3,$0 # c=high part + overflow
+ stq $1,0($16)
+
+ .align 4
+$43:
+ ret $31,($26),1
+ .end bn_mul_add_word
+ .align 3
+ .globl bn_mul_word
+ .ent bn_mul_word
+bn_mul_word:
+bn_mul_word..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ subq $18,2,$25 # num=-2
+ bis $31,$31,$0
+ blt $25,$242
+ .align 5
+$342:
+ subq $18,2,$18 # num-=2
+ subq $25,2,$25 # num-=2
+
+ ldq $1,0($17) # a[0]
+ ldq $2,8($17) # a[1]
+
+ mulq $19,$1,$3 # a[0]*w low part r3
+ umulh $19,$1,$1 # a[0]*w high part r1
+ mulq $19,$2,$4 # a[1]*w low part r4
+ umulh $19,$2,$2 # a[1]*w high part r2
+
+ addq $3,$0,$3 # add c
+ cmpult $3,$0,$5 # overflow?
+ stq $3,0($16)
+ addq $5,$1,$0 # c=high part + overflow
+
+ addq $4,$0,$4 # add c
+ cmpult $4,$0,$5 # overflow?
+ stq $4,8($16)
+ addq $5,$2,$0 # c=high part + overflow
+
+ ble $18,$243
+
+ addq $16,16,$16
+ addq $17,16,$17
+ blt $25,$242
+
+ br $31,$342
+$242:
+ ldq $1,0($17) # a[0]
+ umulh $19,$1,$3 # a[0]*w high part
+ mulq $19,$1,$1 # a[0]*w low part
+ addq $1,$0,$1 # add c
+ cmpult $1,$0,$4 # overflow?
+ addq $4,$3,$0 # c=high part + overflow
+ stq $1,0($16)
+$243:
+ ret $31,($26),1
+ .end bn_mul_word
+ .align 3
+ .globl bn_sqr_words
+ .ent bn_sqr_words
+bn_sqr_words:
+bn_sqr_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $18,2,$25 # num=-2
+ blt $25,$442
+ .align 5
+$542:
+ subq $18,2,$18 # num-=2
+ subq $25,2,$25 # num-=2
+
+ ldq $1,0($17) # a[0]
+ ldq $4,8($17) # a[1]
+
+ mulq $1,$1,$2 # a[0]*w low part r2
+ umulh $1,$1,$3 # a[0]*w high part r3
+ mulq $4,$4,$5 # a[1]*w low part r5
+ umulh $4,$4,$6 # a[1]*w high part r6
+
+ stq $2,0($16) # r[0]
+ stq $3,8($16) # r[1]
+ stq $5,16($16) # r[3]
+ stq $6,24($16) # r[4]
+
+ ble $18,$443
+
+ addq $16,32,$16
+ addq $17,16,$17
+ blt $25,$442
+ br $31,$542
+
+$442:
+ ldq $1,0($17) # a[0]
+ mulq $1,$1,$2 # a[0]*w low part r2
+ umulh $1,$1,$3 # a[0]*w high part r3
+ stq $2,0($16) # r[0]
+ stq $3,8($16) # r[1]
+
+ .align 4
+$443:
+ ret $31,($26),1
+ .end bn_sqr_words
+
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+ .align 3
+ .globl bn_div64
+ .ent bn_div64
+bn_div64:
+ ldgp $29,0($27)
+bn_div64..ng:
+ lda $30,-48($30)
+ .frame $30,48,$26,0
+ stq $26,0($30)
+ stq $9,8($30)
+ stq $10,16($30)
+ stq $11,24($30)
+ stq $12,32($30)
+ stq $13,40($30)
+ .mask 0x4003e00,-48
+ .prologue 1
+ bis $16,$16,$9
+ bis $17,$17,$10
+ bis $18,$18,$11
+ bis $31,$31,$13
+ bis $31,2,$12
+ bne $11,$119
+ lda $0,-1
+ br $31,$136
+ .align 4
+$119:
+ bis $11,$11,$16
+ jsr $26,BN_num_bits_word
+ ldgp $29,0($26)
+ subq $0,64,$1
+ beq $1,$120
+ bis $31,1,$1
+ sll $1,$0,$1
+ cmpule $9,$1,$1
+ bne $1,$120
+ # lda $16,_IO_stderr_
+ # lda $17,$C32
+ # bis $0,$0,$18
+ # jsr $26,fprintf
+ # ldgp $29,0($26)
+ jsr $26,abort
+ ldgp $29,0($26)
+ .align 4
+$120:
+ bis $31,64,$3
+ cmpult $9,$11,$2
+ subq $3,$0,$1
+ addl $1,$31,$0
+ subq $9,$11,$1
+ cmoveq $2,$1,$9
+ beq $0,$122
+ zapnot $0,15,$2
+ subq $3,$0,$1
+ sll $11,$2,$11
+ sll $9,$2,$3
+ srl $10,$1,$1
+ sll $10,$2,$10
+ bis $3,$1,$9
+$122:
+ srl $11,32,$5
+ zapnot $11,15,$6
+ lda $7,-1
+ .align 5
+$123:
+ srl $9,32,$1
+ subq $1,$5,$1
+ bne $1,$126
+ zapnot $7,15,$27
+ br $31,$127
+ .align 4
+$126:
+ bis $9,$9,$24
+ bis $5,$5,$25
+ divqu $24,$25,$27
+$127:
+ srl $10,32,$4
+ .align 5
+$128:
+ mulq $27,$5,$1
+ subq $9,$1,$3
+ zapnot $3,240,$1
+ bne $1,$129
+ mulq $6,$27,$2
+ sll $3,32,$1
+ addq $1,$4,$1
+ cmpule $2,$1,$2
+ bne $2,$129
+ subq $27,1,$27
+ br $31,$128
+ .align 4
+$129:
+ mulq $27,$6,$1
+ mulq $27,$5,$4
+ srl $1,32,$3
+ sll $1,32,$1
+ addq $4,$3,$4
+ cmpult $10,$1,$2
+ subq $10,$1,$10
+ addq $2,$4,$2
+ cmpult $9,$2,$1
+ bis $2,$2,$4
+ beq $1,$134
+ addq $9,$11,$9
+ subq $27,1,$27
+$134:
+ subl $12,1,$12
+ subq $9,$4,$9
+ beq $12,$124
+ sll $27,32,$13
+ sll $9,32,$2
+ srl $10,32,$1
+ sll $10,32,$10
+ bis $2,$1,$9
+ br $31,$123
+ .align 4
+$124:
+ bis $13,$27,$0
+$136:
+ ldq $26,0($30)
+ ldq $9,8($30)
+ ldq $10,16($30)
+ ldq $11,24($30)
+ ldq $12,32($30)
+ ldq $13,40($30)
+ addq $30,48,$30
+ ret $31,($26),1
+ .end bn_div64
+ .ident "GCC: (GNU) 2.7.2.1"
+
+