diff options
author | jonas <jonas@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2017-02-19 20:27:39 +0000 |
---|---|---|
committer | jonas <jonas@3ad0048d-3df7-0310-abae-a5850022a9f2> | 2017-02-19 20:27:39 +0000 |
commit | 55967ab138203c3c374828252653d8dcbc49a54e (patch) | |
tree | 5fb62e0e2bfffe93d8b0b127c1bbc978f7cd5e63 /rtl/powerpc | |
parent | 967bbebd9da3828850961b985cca3b769c05b8af (diff) | |
download | fpc-55967ab138203c3c374828252653d8dcbc49a54e.tar.gz |
* optimized PowerPC version of changes in r35454
git-svn-id: http://svn.freepascal.org/svn/fpc/trunk@35458 3ad0048d-3df7-0310-abae-a5850022a9f2
Diffstat (limited to 'rtl/powerpc')
-rw-r--r-- | rtl/powerpc/int64p.inc | 37 |
1 files changed, 3 insertions, 34 deletions
diff --git a/rtl/powerpc/int64p.inc b/rtl/powerpc/int64p.inc index 1dba7374b4..96c9e8f9a8 100644 --- a/rtl/powerpc/int64p.inc +++ b/rtl/powerpc/int64p.inc @@ -145,45 +145,16 @@ function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc; assembler; nostackframe; asm - // (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7 + // (r3:r4) = (r3:r4) * (r5:r6) // res f1 f2 or. r10,r3,r5 // are both msw's 0? mulhwu r8,r4,r6 // msw of product of lsw's - not r0,r7 // if no overflowcheck, r0 := $ffffffff, else r0 := 0; beq .LDone // if both msw's are zero, skip cross products mullw r9,r4,r5 // lsw of first cross-product - cntlzw r11,r3 // count leading zeroes of msw1 - cntlzw r12,r5 // count leading zeroes of msw2 mullw r7,r3,r6 // lsw of second cross-product - add r12,r11,r12 // sum of leading zeroes - mr r10,r8 - or r0,r12,r0 // maximise sum if no overflow checking, otherwise it remains add r8,r8,r9 // add - cmplwi cr1,r0,64 // >= 64 leading zero bits in total? If so, no overflow add r8,r8,r7 // add - bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0) - // there's no overflow, otherwise more thorough check - add r7,r7,r9 - mulhwu r3,r6,r3 - addc r7,r7,r10 // add the msw of the product of the lsw's, record carry - cntlzw r9,r5 - cntlzw r10,r4 // get leading zeroes count of lsw f1 - mulhwu r5,r4,r5 - addze r3,r3 - subfic r0,r11,31 // if msw f1 = 0, then r0 := -1, else r0 >= 0 - cntlzw r7,r6 - subfic r11,r9,31 // same for f2 - srawi r0,r0,31 // if msw f1 = 0, then r0 := 1, else r0 := 0 - srawi r11,r11,31 - and r10,r10,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0 - and r9,r7,r11 // same for f2 - or. r5,r5,r3 - add r9,r9,r10 // add leading zero counts of lsw's to sum if appropriate - add r9,r9,r12 - cmplwi cr7,r9,64 // is the sum now >= 64? - cmplwi cr1,r9,62 // or <= 62? - .LDone: mullw r4,r4,r6 // lsw of product of lsw's mr r3,r8 // get msw of product in correct register @@ -193,12 +164,11 @@ function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc; assembler; nostackframe; asm - // (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7 + // (r3:r4) = (r3:r4) * (r5:r6) // res f1 f2 or. r10,r3,r5 // are both msw's 0? mulhwu r8,r4,r6 // msw of product of lsw's - xor r0,r0,r0 // r0 := 0 for overflow checking beq .LDone // if both msw's are zero, skip cross products mullw r9,r4,r5 // lsw of first cross-product cntlzw r11,r3 // count leading zeroes of msw1 @@ -206,9 +176,8 @@ mullw r7,r3,r6 // lsw of second cross-product add r12,r11,r12 // sum of leading zeroes mr r10,r8 - or r0,r12,r0 // maximise sum if no overflow checking, otherwise it remains add r8,r8,r9 // add - cmplwi cr1,r0,64 // >= 64 leading zero bits in total? If so, no overflow + cmplwi cr1,r12,64 // >= 64 leading zero bits in total? If so, no overflow add r8,r8,r7 // add bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0) // there's no overflow, otherwise more thorough check |