diff options
author | florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2017-02-19 19:15:14 +0000 |
---|---|---|
committer | florian <florian@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2017-02-19 19:15:14 +0000 |
commit | d19e5ead18c11b76dbb220427b8cec7fe34721e7 (patch) | |
tree | 736c65e1d3bb47b4d6664ab81d39e54a29b7f276 /rtl/powerpc | |
parent | eaa1dbbf4e3d67f13f2fe93ef565ba03bc452840 (diff) | |
download | fpc-d19e5ead18c11b76dbb220427b8cec7fe34721e7.tar.gz |
* split fpc_mul_<64 bit> into separate procedures with and without overflow checking
git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@35454 3ad0048d-3df7-0310-abae-a5850022a9f2
Diffstat (limited to 'rtl/powerpc')
-rw-r--r-- | rtl/powerpc/int64p.inc | 57 |
1 files changed, 51 insertions, 6 deletions
diff --git a/rtl/powerpc/int64p.inc b/rtl/powerpc/int64p.inc index 02f809bdb4..1dba7374b4 100644 --- a/rtl/powerpc/int64p.inc +++ b/rtl/powerpc/int64p.inc @@ -140,11 +140,9 @@ mr R4,R6 end; +{$ifndef VER3_0} {$define FPC_SYSTEM_HAS_MUL_QWORD} - { multiplies two qwords - the longbool for checkoverflow avoids a misaligned stack - } - function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc; + function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc; assembler; nostackframe; asm // (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7 @@ -185,6 +183,54 @@ add r9,r9,r12 cmplwi cr7,r9,64 // is the sum now >= 64? cmplwi cr1,r9,62 // or <= 62? + + .LDone: + mullw r4,r4,r6 // lsw of product of lsw's + mr r3,r8 // get msw of product in correct register + end; + + + function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc; + assembler; nostackframe; + asm + // (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7 + // res f1 f2 + + or. r10,r3,r5 // are both msw's 0? + mulhwu r8,r4,r6 // msw of product of lsw's + xor r0,r0,r0 // r0 := 0 for overflow checking + beq .LDone // if both msw's are zero, skip cross products + mullw r9,r4,r5 // lsw of first cross-product + cntlzw r11,r3 // count leading zeroes of msw1 + cntlzw r12,r5 // count leading zeroes of msw2 + mullw r7,r3,r6 // lsw of second cross-product + add r12,r11,r12 // sum of leading zeroes + mr r10,r8 + or r0,r12,r0 // maximise sum if no overflow checking, otherwise it remains + add r8,r8,r9 // add + cmplwi cr1,r0,64 // >= 64 leading zero bits in total? If so, no overflow + add r8,r8,r7 // add + bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0) + // there's no overflow, otherwise more thorough check + add r7,r7,r9 + mulhwu r3,r6,r3 + addc r7,r7,r10 // add the msw of the product of the lsw's, record carry + cntlzw r9,r5 + cntlzw r10,r4 // get leading zeroes count of lsw f1 + mulhwu r5,r4,r5 + addze r3,r3 + subfic r0,r11,31 // if msw f1 = 0, then r0 := -1, else r0 >= 0 + cntlzw r7,r6 + subfic r11,r9,31 // same for f2 + srawi r0,r0,31 // if msw f1 = 0, then r0 := 1, else r0 := 0 + srawi r11,r11,31 + and r10,r10,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0 + and r9,r7,r11 // same for f2 + or. r5,r5,r3 + add r9,r9,r10 // add leading zero counts of lsw's to sum if appropriate + add r9,r9,r12 + cmplwi cr7,r9,64 // is the sum now >= 64? + cmplwi cr1,r9,62 // or <= 62? bge+ cr7,.LDone // >= 64 leading zeroes -> no overflow ble+ cr1,.LOverflow // <= 62 leading zeroes -> overflow // for 63 zeroes, we need additional checks @@ -198,5 +244,4 @@ mullw r4,r4,r6 // lsw of product of lsw's mr r3,r8 // get msw of product in correct register end; - - +{$endif VER3_0} |