summaryrefslogtreecommitdiff
path: root/mpn/mips64
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2003-06-02 13:51:42 +0200
committertege <tege@gmplib.org>2003-06-02 13:51:42 +0200
commit9abf8f28e1a21a9d56c8e285e30497fade98f891 (patch)
tree9edc9439d89d7c0d90989a8d6c6f9fe8c9fde9b2 /mpn/mips64
parent11b0ac7622994c5d75779fc721f6a0044cc507ee (diff)
downloadgmp-9abf8f28e1a21a9d56c8e285e30497fade98f891.tar.gz
New file, simply hand-edited gcc output.
(Workaround for IRIX compiler bug.)
Diffstat (limited to 'mpn/mips64')
-rw-r--r--mpn/mips64/divrem_1.asm457
1 files changed, 457 insertions, 0 deletions
diff --git a/mpn/mips64/divrem_1.asm b/mpn/mips64/divrem_1.asm
new file mode 100644
index 000000000..f0b57387b
--- /dev/null
+++ b/mpn/mips64/divrem_1.asm
@@ -0,0 +1,457 @@
+dnl MIPS64 mpn_divrem_1 -- Divide an mpn number by an unnormalized limb.
+
+dnl Copyright 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+dnl MA 02111-1307, USA.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+C qp = r4
+C qxn = r5
+C up = r6
+C n = r7
+C vl = r8
+
+C cycles/limb
+C R4000: ??
+C R1x000: 35
+
+C This was generated by gcc, then the code was manually edited. Lots of things
+C could be streamlined. It would probably be a good idea to merge the loops
+C for normalized and unnormalized divisor, since the shifting stuff is done for
+C free in parallel with other operations.
+
+ASM_START()
+PROLOGUE(mpn_divrem_1)
+ dsubu $sp,$sp,32
+ sd $28,16($sp)
+ lui $1,%hi(%neg(%gp_rel(__gmpn_divrem_1)))
+ addiu $1,$1,%lo(%neg(%gp_rel(__gmpn_divrem_1)))
+ daddu $gp,$1,$25
+ move $14,$4
+ move $15,$5
+ addu $4,$7,$15
+ bne $4,$0,.L176
+ move $13,$0
+ b .L490
+ move $2,$0
+.L491:
+ b .L229
+ dli $5,-1
+.L176:
+ sll $2,$4,3
+ addu $2,$2,-8
+ dli $3,0x8000000000000000
+ and $3,$8,$3
+ beq $3,$0,.L177
+ addu $14,$14,$2
+ beq $7,$0,.L494
+ dsll $2,$8,1
+ sll $2,$7,3
+ addu $2,$2,$6
+ ld $13,-8($2)
+ addu $7,$7,-1
+ sltu $2,$13,$8
+ xori $2,$2,0x1
+ dsll $2,$2,32
+ dsra $2,$2,32
+ dsll $2,$2,32
+ dsrl $2,$2,32
+ sd $2,0($14)
+ addu $14,$14,-8
+ dsubu $2,$0,$2
+ and $2,$8,$2
+ dsubu $13,$13,$2
+ dsll $2,$8,1
+.L494:
+ beq $2,$0,.L491
+ dsrl $9,$8,32
+ dsubu $4,$0,$8
+ ddivu $0,$4,$9
+ dli $2,0xffffffff
+ and $12,$8,$2
+ mflo $5
+ move $11,$5
+ dmult $11,$9
+ mflo $3
+ nop
+ nop
+ dmult $11,$12
+ mflo $10
+ dli $2,0x100000000
+ dsubu $4,$4,$3
+ dmult $4,$2
+ mflo $4
+ sltu $3,$4,$10
+ beql $3,$0,.L495
+ dsubu $4,$4,$10
+ daddu $4,$4,$8
+ sltu $2,$4,$8
+ bne $2,$0,.L248
+ dsubu $11,$11,1
+ sltu $2,$4,$10
+ beql $2,$0,.L495
+ dsubu $4,$4,$10
+ dsubu $11,$11,1
+ daddu $4,$4,$8
+.L248:
+ dsubu $4,$4,$10
+.L495:
+ ddivu $0,$4,$9
+ mflo $2
+ move $5,$2
+ dmult $5,$9
+ mflo $3
+ nop
+ nop
+ dmult $5,$12
+ mflo $10
+ dli $2,0x100000000
+ dsubu $4,$4,$3
+ dmult $4,$2
+ mflo $4
+ sltu $3,$4,$10
+ beq $3,$0,.L504
+ daddu $4,$4,$8
+ sltu $2,$4,$8
+ bne $2,$0,.L251
+ dsubu $5,$5,1
+ sltu $2,$4,$10
+ bnel $2,$0,.L251
+ dsubu $5,$5,1
+.L251:
+ dli $2,0x100000000
+.L504:
+ dmult $11,$2
+ mflo $2
+ or $5,$2,$5
+.L229:
+ addu $7,$7,-1
+ bltz $7,.L257
+ addu $25,$15,-1
+ sll $2,$7,3
+ addu $6,$2,$6
+.Loop1:
+ ld $11,0($6)
+ dmultu $13,$5
+ mfhi $10
+ daddu $10,$10,$13
+ nop
+ dmultu $10,$8
+ mflo $2
+ mfhi $9
+ dsubu $2,$11,$2
+ dsubu $3,$13,$9
+ sltu $9,$11,$2
+ dsubu $9,$3,$9
+ beq $9,$0,.L271
+ move $4,$2
+ dsubu $2,$4,$8
+ sltu $3,$4,$2
+ move $4,$2
+ beq $9,$3,.L271
+ daddu $10,$10,1
+ dsubu $4,$4,$8
+ daddu $10,$10,1
+.L271: sltu $2,$4,$8
+ bne $2,$0,.L496
+ move $13,$4
+ dsubu $4,$4,$8
+ daddu $10,$10,1
+ move $13,$4
+.L496: sd $10,0($14)
+ addu $14,$14,-8
+ addu $7,$7,-1
+ bgez $7,.Loop1
+ addu $6,$6,-8
+.L257:
+ move $7,$25
+ bltz $7,.L490
+ move $2,$13
+.Loop2:
+ dmultu $13,$5
+ mfhi $9
+ daddu $9,$9,$13
+ nop
+ dmultu $9,$8
+ mflo $2
+ mfhi $6
+ dsubu $2,$0,$2
+ dsubu $3,$13,$6
+ sltu $6,$0,$2
+ dsubu $6,$3,$6
+ beq $6,$0,.L295
+ move $4,$2
+ dsubu $2,$4,$8
+ sltu $3,$4,$2
+ move $4,$2
+ beq $6,$3,.L295
+ daddu $9,$9,1
+ dsubu $4,$4,$8
+ daddu $9,$9,1
+.L295: sltu $2,$4,$8
+ bne $2,$0,.L497
+ move $13,$4
+ dsubu $4,$4,$8
+ daddu $9,$9,1
+ move $13,$4
+.L497: sd $9,0($14)
+ addu $7,$7,-1
+ bgez $7,.Loop2
+ addu $14,$14,-8
+ b .L490
+ move $2,$13
+.L177:
+ beq $7,$0,.L308
+ sll $2,$7,3
+ addu $2,$2,$6
+ ld $12,-8($2)
+ sltu $3,$12,$8
+ beq $3,$0,.L308
+ addu $4,$4,-1
+ move $13,$12
+ sd $0,0($14)
+ bne $4,$0,.L307
+ addu $14,$14,-8
+ b .L490
+ move $2,$13
+.L492:
+ b .L395
+ dli $5,-1
+.L307:
+ addu $7,$7,-1
+.L308:
+ dli $5,0x38
+ dsrl $2,$8,56
+ andi $2,$2,0xff
+ la $3,__gmpn_clz_tab
+ bne $2,$0,.L321
+ addu $25,$15,-1
+ dsubu $5,$5,8
+.L499:
+ beql $5,$0,.L498
+ daddu $5,$5,1
+ dsll $2,$5,32
+ dsra $2,$2,32
+ dsrl $2,$8,$2
+ andi $2,$2,0xff
+ beql $2,$0,.L499
+ dsubu $5,$5,8
+.L321:
+ daddu $5,$5,1
+.L498:
+ dsll $2,$5,32
+ dsra $2,$2,32
+ dsrl $2,$8,$2
+ dsll $2,$2,32
+ dsra $2,$2,32
+ addu $2,$2,$3
+ lbu $4,0($2)
+ dli $3,0x41
+ dsubu $3,$3,$4
+ dsubu $3,$3,$5
+ dsll $24,$3,32
+ dsra $24,$24,32
+ dsll $8,$8,$24
+ dsll $2,$8,1
+ beq $2,$0,.L492
+ dsll $13,$13,$24
+ dsrl $9,$8,32
+ dsubu $4,$0,$8
+ ddivu $0,$4,$9
+ dli $2,0xffffffff
+ and $12,$8,$2
+ mflo $5
+ move $11,$5
+ dmult $11,$9
+ mflo $3
+ nop
+ nop
+ dmult $11,$12
+ mflo $10
+ dli $2,0x100000000
+ dsubu $4,$4,$3
+ dmult $4,$2
+ mflo $4
+ sltu $3,$4,$10
+ beql $3,$0,.L500
+ dsubu $4,$4,$10
+ daddu $4,$4,$8
+ sltu $2,$4,$8
+ bne $2,$0,.L414
+ dsubu $11,$11,1
+ sltu $2,$4,$10
+ beql $2,$0,.L500
+ dsubu $4,$4,$10
+ dsubu $11,$11,1
+ daddu $4,$4,$8
+.L414:
+ dsubu $4,$4,$10
+.L500:
+ ddivu $0,$4,$9
+ mflo $2
+ move $5,$2
+ dmult $5,$9
+ mflo $3
+ nop
+ nop
+ dmult $5,$12
+ mflo $10
+ dli $2,0x100000000
+ dsubu $4,$4,$3
+ dmult $4,$2
+ mflo $4
+ sltu $3,$4,$10
+ beq $3,$0,.L505
+ daddu $4,$4,$8
+ sltu $2,$4,$8
+ bne $2,$0,.L417
+ dsubu $5,$5,1
+ sltu $2,$4,$10
+ bnel $2,$0,.L417
+ dsubu $5,$5,1
+.L417:
+ dli $2,0x100000000
+.L505:
+ dmult $11,$2
+ mflo $2
+ or $5,$2,$5
+.L395:
+ beq $7,$0,.L422
+ sll $2,$7,3
+ addu $2,$2,$6
+ ld $12,-8($2)
+ addu $7,$7,-2
+ li $2,64
+ subu $2,$2,$24
+ dsrl $3,$12,$2
+ bltz $7,.L424
+ or $13,$13,$3
+ move $15,$2
+ sll $2,$7,3
+ addu $6,$2,$6
+.Loop3:
+ ld $11,0($6)
+ dmultu $13,$5
+ mfhi $9
+ daddu $9,$9,$13
+ nop
+ dmultu $9,$8
+ mflo $4
+ mfhi $10
+ dsll $3,$12,$24
+ dsrl $2,$11,$15
+ or $3,$3,$2
+ dsubu $4,$3,$4
+ dsubu $2,$13,$10
+ sltu $10,$3,$4
+ dsubu $10,$2,$10
+ beq $10,$0,.L438
+ dsubu $2,$4,$8
+ sltu $3,$4,$2
+ move $4,$2
+ beq $10,$3,.L438
+ daddu $9,$9,1
+ dsubu $4,$4,$8
+ daddu $9,$9,1
+.L438: sltu $2,$4,$8
+ bne $2,$0,.L501
+ move $13,$4
+ dsubu $4,$4,$8
+ daddu $9,$9,1
+ move $13,$4
+.L501: sd $9,0($14)
+ addu $14,$14,-8
+ move $12,$11
+ addu $7,$7,-1
+ bgez $7,.Loop3
+ addu $6,$6,-8
+.L424:
+ dmultu $13,$5
+ mfhi $7
+ daddu $7,$7,$13
+ nop
+ dmultu $7,$8
+ mflo $2
+ mfhi $6
+ dsll $3,$12,$24
+ dsubu $2,$3,$2
+ dsubu $4,$13,$6
+ sltu $6,$3,$2
+ dsubu $6,$4,$6
+ beq $6,$0,.L458
+ move $4,$2
+ dsubu $2,$4,$8
+ sltu $3,$4,$2
+ move $4,$2
+ beq $6,$3,.L458
+ daddu $7,$7,1
+ dsubu $4,$4,$8
+ daddu $7,$7,1
+.L458:
+ sltu $2,$4,$8
+ bne $2,$0,.L502
+ move $13,$4
+ dsubu $4,$4,$8
+ daddu $7,$7,1
+ move $13,$4
+.L502:
+ sd $7,0($14)
+ addu $14,$14,-8
+.L422:
+ move $7,$25
+ bltz $7,.L490
+ dsrl $2,$13,$24
+.Loop4:
+ dmultu $13,$5
+ mfhi $9
+ daddu $9,$9,$13
+ nop
+ dmultu $9,$8
+ mflo $2
+ mfhi $6
+ dsubu $2,$0,$2
+ dsubu $3,$13,$6
+ sltu $6,$0,$2
+ dsubu $6,$3,$6
+ beq $6,$0,.L481
+ move $4,$2
+ dsubu $2,$4,$8
+ sltu $3,$4,$2
+ move $4,$2
+ beq $6,$3,.L481
+ daddu $9,$9,1
+ dsubu $4,$4,$8
+ daddu $9,$9,1
+.L481: sltu $2,$4,$8
+ bne $2,$0,.L503
+ move $13,$4
+ dsubu $4,$4,$8
+ daddu $9,$9,1
+ move $13,$4
+.L503: sd $9,0($14)
+ addu $7,$7,-1
+ bgez $7,.Loop4
+ addu $14,$14,-8
+ dsrl $2,$13,$24
+.L490:
+ ld $28,16($sp)
+ j $31
+ daddu $sp,$sp,32
+EPILOGUE(mpn_divrem_1)