summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2010-05-19 09:36:50 -0700
committerRobert Griesemer <gri@golang.org>2010-05-19 09:36:50 -0700
commitd8e59e5787e92630f22c2bbb6fa1ab24d8d02672 (patch)
treeb00132eeb81a9e139a158c15a4280b935f15d199 /src
parent8e64ca784787dd7259d4701b54de5431fe81e8a2 (diff)
downloadgo-d8e59e5787e92630f22c2bbb6fa1ab24d8d02672.tar.gz
big: potential bug fix, cleanups
- implemented setWord, use it where setUint64 is wrong - divLarge: use fast mulWW, divWW; implemented mulWW, divWW - better assembly code for addMulVVW R=rsc CC=golang-dev http://codereview.appspot.com/1258042
Diffstat (limited to 'src')
-rw-r--r--src/pkg/big/arith.go2
-rw-r--r--src/pkg/big/arith_386.s24
-rw-r--r--src/pkg/big/arith_amd64.s24
-rwxr-xr-xsrc/pkg/big/nat.go30
4 files changed, 61 insertions, 19 deletions
diff --git a/src/pkg/big/arith.go b/src/pkg/big/arith.go
index 52bb3e165..a5e0dec68 100644
--- a/src/pkg/big/arith.go
+++ b/src/pkg/big/arith.go
@@ -56,6 +56,7 @@ func subWW_g(x, y, c Word) (z1, z0 Word) {
// z1<<_W + z0 = x*y
+func mulWW(x, y Word) (z1, z0 Word)
func mulWW_g(x, y Word) (z1, z0 Word) {
// Split x and y into 2 halfWords each, multiply
// the halfWords separately while avoiding overflow,
@@ -242,6 +243,7 @@ func leadingZeros(x Word) uint {
// q = (x1<<_W + x0 - r)/y
+func divWW(x1, x0, y Word) (q, r Word)
func divWW_g(x1, x0, y Word) (q, r Word) {
if x1 == 0 {
q, r = x0/y, x0%y
diff --git a/src/pkg/big/arith_386.s b/src/pkg/big/arith_386.s
index 08eb5d4d5..21521635b 100644
--- a/src/pkg/big/arith_386.s
+++ b/src/pkg/big/arith_386.s
@@ -5,6 +5,25 @@
// This file provides fast assembly versions for the elementary
// arithmetic operations on vectors implemented in arith.go.
+// func mulWW(x, y Word) (z1, z0 Word)
+TEXT ·mulWW(SB),7,$0
+ MOVL x+0(FP), AX
+ MULL y+4(FP)
+ MOVL DX, z1+8(FP)
+ MOVL AX, z0+12(FP)
+ RET
+
+
+// func divWW(x1, x0, y Word) (q, r Word)
+TEXT ·divWW(SB),7,$0
+ MOVL x1+0(FP), DX
+ MOVL x0+4(FP), AX
+ DIVL y+8(FP)
+ MOVL AX, q+12(FP)
+ MOVL DX, r+16(FP)
+ RET
+
+
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),7,$0
MOVL z+0(FP), DI
@@ -212,11 +231,10 @@ TEXT ·addMulVVW(SB),7,$0
L6: MOVL (SI)(BX*4), AX
MULL BP
- ADDL (DI)(BX*4), AX
- ADCL $0, DX
ADDL CX, AX
ADCL $0, DX
- MOVL AX, (DI)(BX*4)
+ ADDL AX, (DI)(BX*4)
+ ADCL $0, DX
MOVL DX, CX
ADDL $1, BX // i++
diff --git a/src/pkg/big/arith_amd64.s b/src/pkg/big/arith_amd64.s
index 1dd95ec53..c740565a7 100644
--- a/src/pkg/big/arith_amd64.s
+++ b/src/pkg/big/arith_amd64.s
@@ -7,6 +7,25 @@
// TODO(gri) - experiment with unrolled loops for faster execution
+// func mulWW(x, y Word) (z1, z0 Word)
+TEXT ·mulWW(SB),7,$0
+ MOVQ x+0(FP), AX
+ MULQ y+8(FP)
+ MOVQ DX, z1+16(FP)
+ MOVQ AX, z0+24(FP)
+ RET
+
+
+// func divWW(x1, x0, y Word) (q, r Word)
+TEXT ·divWW(SB),7,$0
+ MOVQ x1+0(FP), DX
+ MOVQ x0+8(FP), AX
+ DIVQ y+16(FP)
+ MOVQ AX, q+24(FP)
+ MOVQ DX, r+32(FP)
+ RET
+
+
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),7,$0
MOVQ z+0(FP), R10
@@ -210,11 +229,10 @@ TEXT ·addMulVVW(SB),7,$0
L6: MOVQ (R8)(BX*8), AX
MULQ R9
- ADDQ (R10)(BX*8), AX
- ADCQ $0, DX
ADDQ CX, AX
ADCQ $0, DX
- MOVQ AX, (R10)(BX*8)
+ ADDQ AX, (R10)(BX*8)
+ ADCQ $0, DX
MOVQ DX, CX
ADDL $1, BX // i++
diff --git a/src/pkg/big/nat.go b/src/pkg/big/nat.go
index 668a62689..b09893730 100755
--- a/src/pkg/big/nat.go
+++ b/src/pkg/big/nat.go
@@ -69,16 +69,20 @@ func (z nat) make(n int) nat {
}
-func (z nat) setUint64(x uint64) nat {
+func (z nat) setWord(x Word) nat {
if x == 0 {
return z.make(0)
}
+ z = z.make(1)
+ z[0] = x
+ return z
+}
+
+func (z nat) setUint64(x uint64) nat {
// single-digit values
- if x == uint64(Word(x)) {
- z = z.make(1)
- z[0] = Word(x)
- return z
+ if w := Word(x); uint64(w) == x {
+ return z.setWord(w)
}
// compute number of words n required to represent x
@@ -194,7 +198,7 @@ func (x nat) cmp(y nat) (r int) {
func (z nat) mulAddWW(x nat, y, r Word) nat {
m := len(x)
if m == 0 || y == 0 {
- return z.setUint64(uint64(r)) // result is r
+ return z.setWord(r) // result is r
}
// m > 0
@@ -529,6 +533,8 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
m := len(uIn) - n
// determine if z can be reused
+ // TODO(gri) should find a better solution - this if statement
+ // is very costly (see e.g. time pidigits -s -n 10000)
if alias(z, uIn) || alias(z, v) {
z = nil // z is an alias for uIn or v - cannot reuse
}
@@ -549,15 +555,13 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
// D2.
for j := m; j >= 0; j-- {
// D3.
- var qhat Word
- if u[j+n] == v[n-1] {
- qhat = _B - 1
- } else {
+ qhat := Word(_M)
+ if u[j+n] != v[n-1] {
var rhat Word
- qhat, rhat = divWW_g(u[j+n], u[j+n-1], v[n-1])
+ qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
// x1 | x2 = q̂v_{n-2}
- x1, x2 := mulWW_g(qhat, v[n-2])
+ x1, x2 := mulWW(qhat, v[n-2])
// test if q̂v_{n-2} > br̂ + u_{j+n-2}
for greaterThan(x1, x2, rhat, u[j+n-2]) {
qhat--
@@ -567,7 +571,7 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
if rhat < prevRhat {
break
}
- x1, x2 = mulWW_g(qhat, v[n-2])
+ x1, x2 = mulWW(qhat, v[n-2])
}
}