diff options
author | tege <tege@gmplib.org> | 2003-06-02 13:51:42 +0200 |
---|---|---|
committer | tege <tege@gmplib.org> | 2003-06-02 13:51:42 +0200 |
commit | 9abf8f28e1a21a9d56c8e285e30497fade98f891 (patch) | |
tree | 9edc9439d89d7c0d90989a8d6c6f9fe8c9fde9b2 /mpn/mips64 | |
parent | 11b0ac7622994c5d75779fc721f6a0044cc507ee (diff) | |
download | gmp-9abf8f28e1a21a9d56c8e285e30497fade98f891.tar.gz |
New file, simply hand-edited gcc output.
(Workaround for IRIX compiler bug.)
Diffstat (limited to 'mpn/mips64')
-rw-r--r-- | mpn/mips64/divrem_1.asm | 457 |
1 files changed, 457 insertions, 0 deletions
diff --git a/mpn/mips64/divrem_1.asm b/mpn/mips64/divrem_1.asm new file mode 100644 index 000000000..f0b57387b --- /dev/null +++ b/mpn/mips64/divrem_1.asm @@ -0,0 +1,457 @@ +dnl MIPS64 mpn_divrem_1 -- Divide an mpn number by an unnormalized limb. + +dnl Copyright 2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 2.1 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to +dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +dnl MA 02111-1307, USA. + +include(`../config.m4') + +C INPUT PARAMETERS +C qp = r4 +C qxn = r5 +C up = r6 +C n = r7 +C vl = r8 + +C cycles/limb +C R4000: ?? +C R1x000: 35 + +C This was generated by gcc, then the code was manually edited. Lots of things +C could be streamlined. It would probably be a good idea to merge the loops +C for normalized and unnormalized divisor, since the shifting stuff is done for +C free in parallel with other operations. + +ASM_START() +PROLOGUE(mpn_divrem_1) + dsubu $sp,$sp,32 + sd $28,16($sp) + lui $1,%hi(%neg(%gp_rel(__gmpn_divrem_1))) + addiu $1,$1,%lo(%neg(%gp_rel(__gmpn_divrem_1))) + daddu $gp,$1,$25 + move $14,$4 + move $15,$5 + addu $4,$7,$15 + bne $4,$0,.L176 + move $13,$0 + b .L490 + move $2,$0 +.L491: + b .L229 + dli $5,-1 +.L176: + sll $2,$4,3 + addu $2,$2,-8 + dli $3,0x8000000000000000 + and $3,$8,$3 + beq $3,$0,.L177 + addu $14,$14,$2 + beq $7,$0,.L494 + dsll $2,$8,1 + sll $2,$7,3 + addu $2,$2,$6 + ld $13,-8($2) + addu $7,$7,-1 + sltu $2,$13,$8 + xori $2,$2,0x1 + dsll $2,$2,32 + dsra $2,$2,32 + dsll $2,$2,32 + dsrl $2,$2,32 + sd $2,0($14) + addu $14,$14,-8 + dsubu $2,$0,$2 + and $2,$8,$2 + dsubu $13,$13,$2 + dsll $2,$8,1 +.L494: + beq $2,$0,.L491 + dsrl $9,$8,32 + dsubu $4,$0,$8 + ddivu $0,$4,$9 + dli $2,0xffffffff + and $12,$8,$2 + mflo $5 + move $11,$5 + dmult $11,$9 + mflo $3 + nop + nop + dmult $11,$12 + mflo $10 + dli $2,0x100000000 + dsubu $4,$4,$3 + dmult $4,$2 + mflo $4 + sltu $3,$4,$10 + beql $3,$0,.L495 + dsubu $4,$4,$10 + daddu $4,$4,$8 + sltu $2,$4,$8 + bne $2,$0,.L248 + dsubu $11,$11,1 + sltu $2,$4,$10 + beql $2,$0,.L495 + dsubu $4,$4,$10 + dsubu $11,$11,1 + daddu $4,$4,$8 +.L248: + dsubu $4,$4,$10 +.L495: + ddivu $0,$4,$9 + mflo $2 + move $5,$2 + dmult $5,$9 + mflo $3 + nop + nop + dmult $5,$12 + mflo $10 + dli $2,0x100000000 + dsubu $4,$4,$3 + dmult $4,$2 + mflo $4 + sltu $3,$4,$10 + beq $3,$0,.L504 + daddu $4,$4,$8 + sltu $2,$4,$8 + bne $2,$0,.L251 + dsubu $5,$5,1 + sltu $2,$4,$10 + bnel $2,$0,.L251 + dsubu $5,$5,1 +.L251: + dli $2,0x100000000 +.L504: + dmult $11,$2 + mflo $2 + or $5,$2,$5 +.L229: + addu $7,$7,-1 + bltz $7,.L257 + addu $25,$15,-1 + sll $2,$7,3 + addu $6,$2,$6 +.Loop1: + ld $11,0($6) + dmultu $13,$5 + mfhi $10 + daddu $10,$10,$13 + nop + dmultu $10,$8 + mflo $2 + mfhi $9 + dsubu $2,$11,$2 + dsubu $3,$13,$9 + sltu $9,$11,$2 + dsubu $9,$3,$9 + beq $9,$0,.L271 + move $4,$2 + dsubu $2,$4,$8 + sltu $3,$4,$2 + move $4,$2 + beq $9,$3,.L271 + daddu $10,$10,1 + dsubu $4,$4,$8 + daddu $10,$10,1 +.L271: sltu $2,$4,$8 + bne $2,$0,.L496 + move $13,$4 + dsubu $4,$4,$8 + daddu $10,$10,1 + move $13,$4 +.L496: sd $10,0($14) + addu $14,$14,-8 + addu $7,$7,-1 + bgez $7,.Loop1 + addu $6,$6,-8 +.L257: + move $7,$25 + bltz $7,.L490 + move $2,$13 +.Loop2: + dmultu $13,$5 + mfhi $9 + daddu $9,$9,$13 + nop + dmultu $9,$8 + mflo $2 + mfhi $6 + dsubu $2,$0,$2 + dsubu $3,$13,$6 + sltu $6,$0,$2 + dsubu $6,$3,$6 + beq $6,$0,.L295 + move $4,$2 + dsubu $2,$4,$8 + sltu $3,$4,$2 + move $4,$2 + beq $6,$3,.L295 + daddu $9,$9,1 + dsubu $4,$4,$8 + daddu $9,$9,1 +.L295: sltu $2,$4,$8 + bne $2,$0,.L497 + move $13,$4 + dsubu $4,$4,$8 + daddu $9,$9,1 + move $13,$4 +.L497: sd $9,0($14) + addu $7,$7,-1 + bgez $7,.Loop2 + addu $14,$14,-8 + b .L490 + move $2,$13 +.L177: + beq $7,$0,.L308 + sll $2,$7,3 + addu $2,$2,$6 + ld $12,-8($2) + sltu $3,$12,$8 + beq $3,$0,.L308 + addu $4,$4,-1 + move $13,$12 + sd $0,0($14) + bne $4,$0,.L307 + addu $14,$14,-8 + b .L490 + move $2,$13 +.L492: + b .L395 + dli $5,-1 +.L307: + addu $7,$7,-1 +.L308: + dli $5,0x38 + dsrl $2,$8,56 + andi $2,$2,0xff + la $3,__gmpn_clz_tab + bne $2,$0,.L321 + addu $25,$15,-1 + dsubu $5,$5,8 +.L499: + beql $5,$0,.L498 + daddu $5,$5,1 + dsll $2,$5,32 + dsra $2,$2,32 + dsrl $2,$8,$2 + andi $2,$2,0xff + beql $2,$0,.L499 + dsubu $5,$5,8 +.L321: + daddu $5,$5,1 +.L498: + dsll $2,$5,32 + dsra $2,$2,32 + dsrl $2,$8,$2 + dsll $2,$2,32 + dsra $2,$2,32 + addu $2,$2,$3 + lbu $4,0($2) + dli $3,0x41 + dsubu $3,$3,$4 + dsubu $3,$3,$5 + dsll $24,$3,32 + dsra $24,$24,32 + dsll $8,$8,$24 + dsll $2,$8,1 + beq $2,$0,.L492 + dsll $13,$13,$24 + dsrl $9,$8,32 + dsubu $4,$0,$8 + ddivu $0,$4,$9 + dli $2,0xffffffff + and $12,$8,$2 + mflo $5 + move $11,$5 + dmult $11,$9 + mflo $3 + nop + nop + dmult $11,$12 + mflo $10 + dli $2,0x100000000 + dsubu $4,$4,$3 + dmult $4,$2 + mflo $4 + sltu $3,$4,$10 + beql $3,$0,.L500 + dsubu $4,$4,$10 + daddu $4,$4,$8 + sltu $2,$4,$8 + bne $2,$0,.L414 + dsubu $11,$11,1 + sltu $2,$4,$10 + beql $2,$0,.L500 + dsubu $4,$4,$10 + dsubu $11,$11,1 + daddu $4,$4,$8 +.L414: + dsubu $4,$4,$10 +.L500: + ddivu $0,$4,$9 + mflo $2 + move $5,$2 + dmult $5,$9 + mflo $3 + nop + nop + dmult $5,$12 + mflo $10 + dli $2,0x100000000 + dsubu $4,$4,$3 + dmult $4,$2 + mflo $4 + sltu $3,$4,$10 + beq $3,$0,.L505 + daddu $4,$4,$8 + sltu $2,$4,$8 + bne $2,$0,.L417 + dsubu $5,$5,1 + sltu $2,$4,$10 + bnel $2,$0,.L417 + dsubu $5,$5,1 +.L417: + dli $2,0x100000000 +.L505: + dmult $11,$2 + mflo $2 + or $5,$2,$5 +.L395: + beq $7,$0,.L422 + sll $2,$7,3 + addu $2,$2,$6 + ld $12,-8($2) + addu $7,$7,-2 + li $2,64 + subu $2,$2,$24 + dsrl $3,$12,$2 + bltz $7,.L424 + or $13,$13,$3 + move $15,$2 + sll $2,$7,3 + addu $6,$2,$6 +.Loop3: + ld $11,0($6) + dmultu $13,$5 + mfhi $9 + daddu $9,$9,$13 + nop + dmultu $9,$8 + mflo $4 + mfhi $10 + dsll $3,$12,$24 + dsrl $2,$11,$15 + or $3,$3,$2 + dsubu $4,$3,$4 + dsubu $2,$13,$10 + sltu $10,$3,$4 + dsubu $10,$2,$10 + beq $10,$0,.L438 + dsubu $2,$4,$8 + sltu $3,$4,$2 + move $4,$2 + beq $10,$3,.L438 + daddu $9,$9,1 + dsubu $4,$4,$8 + daddu $9,$9,1 +.L438: sltu $2,$4,$8 + bne $2,$0,.L501 + move $13,$4 + dsubu $4,$4,$8 + daddu $9,$9,1 + move $13,$4 +.L501: sd $9,0($14) + addu $14,$14,-8 + move $12,$11 + addu $7,$7,-1 + bgez $7,.Loop3 + addu $6,$6,-8 +.L424: + dmultu $13,$5 + mfhi $7 + daddu $7,$7,$13 + nop + dmultu $7,$8 + mflo $2 + mfhi $6 + dsll $3,$12,$24 + dsubu $2,$3,$2 + dsubu $4,$13,$6 + sltu $6,$3,$2 + dsubu $6,$4,$6 + beq $6,$0,.L458 + move $4,$2 + dsubu $2,$4,$8 + sltu $3,$4,$2 + move $4,$2 + beq $6,$3,.L458 + daddu $7,$7,1 + dsubu $4,$4,$8 + daddu $7,$7,1 +.L458: + sltu $2,$4,$8 + bne $2,$0,.L502 + move $13,$4 + dsubu $4,$4,$8 + daddu $7,$7,1 + move $13,$4 +.L502: + sd $7,0($14) + addu $14,$14,-8 +.L422: + move $7,$25 + bltz $7,.L490 + dsrl $2,$13,$24 +.Loop4: + dmultu $13,$5 + mfhi $9 + daddu $9,$9,$13 + nop + dmultu $9,$8 + mflo $2 + mfhi $6 + dsubu $2,$0,$2 + dsubu $3,$13,$6 + sltu $6,$0,$2 + dsubu $6,$3,$6 + beq $6,$0,.L481 + move $4,$2 + dsubu $2,$4,$8 + sltu $3,$4,$2 + move $4,$2 + beq $6,$3,.L481 + daddu $9,$9,1 + dsubu $4,$4,$8 + daddu $9,$9,1 +.L481: sltu $2,$4,$8 + bne $2,$0,.L503 + move $13,$4 + dsubu $4,$4,$8 + daddu $9,$9,1 + move $13,$4 +.L503: sd $9,0($14) + addu $7,$7,-1 + bgez $7,.Loop4 + addu $14,$14,-8 + dsrl $2,$13,$24 +.L490: + ld $28,16($sp) + j $31 + daddu $sp,$sp,32 +EPILOGUE(mpn_divrem_1) |