diff options
author | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2013-03-03 17:10:55 +0000 |
---|---|---|
committer | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2013-03-03 17:10:55 +0000 |
commit | d15f124ff59606604c0243ee19cd67bc99ecd09f (patch) | |
tree | f0b18e431b15b797d5f5dc980928cd1a26b8f74a /libc/sysdeps/ieee754/dbl-64/mpa.c | |
parent | c1078e9067234e88d5c1ca8af18ae67b64141d66 (diff) | |
download | eglibc2-d15f124ff59606604c0243ee19cd67bc99ecd09f.tar.gz |
Merge changes between r22241 and r22552 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@22553 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps/ieee754/dbl-64/mpa.c')
-rw-r--r-- | libc/sysdeps/ieee754/dbl-64/mpa.c | 389 |
1 files changed, 299 insertions, 90 deletions
diff --git a/libc/sysdeps/ieee754/dbl-64/mpa.c b/libc/sysdeps/ieee754/dbl-64/mpa.c index ede8ed198..8fc2626f7 100644 --- a/libc/sysdeps/ieee754/dbl-64/mpa.c +++ b/libc/sysdeps/ieee754/dbl-64/mpa.c @@ -43,6 +43,7 @@ #include "endian.h" #include "mpa.h" #include <sys/param.h> +#include <alloca.h> #ifndef SECTION # define SECTION @@ -59,8 +60,9 @@ const mp_no mptwo = {1, {1.0, 2.0}}; static int mcr (const mp_no *x, const mp_no *y, int p) { - int i; - for (i = 1; i <= p; i++) + long i; + long p2 = p; + for (i = 1; i <= p2; i++) { if (X[i] == Y[i]) continue; @@ -76,7 +78,7 @@ mcr (const mp_no *x, const mp_no *y, int p) int __acr (const mp_no *x, const mp_no *y, int p) { - int i; + long i; if (X[0] == ZERO) { @@ -107,8 +109,10 @@ __acr (const mp_no *x, const mp_no *y, int p) void __cpy (const mp_no *x, mp_no *y, int p) { + long i; + EY = EX; - for (int i = 0; i <= p; i++) + for (i = 0; i <= p; i++) Y[i] = X[i]; } #endif @@ -119,8 +123,8 @@ __cpy (const mp_no *x, mp_no *y, int p) static void norm (const mp_no *x, double *y, int p) { -#define R RADIXI - int i; +#define R RADIXI + long i; double a, c, u, v, z[5]; if (p < 5) { @@ -194,17 +198,18 @@ norm (const mp_no *x, double *y, int p) static void denorm (const mp_no *x, double *y, int p) { - int i, k; + long i, k; + long p2 = p; double c, u, z[5]; -#define R RADIXI +#define R RADIXI if (EX < -44 || (EX == -44 && X[1] < TWO5)) { *y = ZERO; return; } - if (p == 1) + if (p2 == 1) { if (EX == -42) { @@ -228,7 +233,7 @@ denorm (const mp_no *x, double *y, int p) k = 1; } } - else if (p == 2) + else if (p2 == 2) { if (EX == -42) { @@ -281,7 +286,7 @@ denorm (const mp_no *x, double *y, int p) if (u == z[3]) { - for (i = k + 1; i <= p; i++) + for (i = k + 1; i <= p2; i++) { if (X[i] == ZERO) continue; @@ -323,7 +328,8 @@ void SECTION __dbl_mp (double x, mp_no *y, int p) { - int i, n; + long i, n; + long p2 = p; double u; /* Sign. */ @@ -347,7 +353,7 @@ __dbl_mp (double x, mp_no *y, int p) x *= RADIX; /* Digits. */ - n = MIN (p, 4); + n = MIN (p2, 4); for (i = 1; i <= n; i++) { u = (x + TWO52) - TWO52; @@ -357,7 +363,7 @@ __dbl_mp (double x, mp_no *y, int p) x -= u; x *= RADIX; } - for (; i <= p; i++) + for (; i <= p2; i++) Y[i] = ZERO; } @@ -369,53 +375,64 @@ static void SECTION add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) { - int i, j, k; + long i, j, k; + long p2 = p; + double zk; EZ = EX; - i = p; - j = p + EY - EX; - k = p + 1; + i = p2; + j = p2 + EY - EX; + k = p2 + 1; - if (j < 1) + if (__glibc_unlikely (j < 1)) { __cpy (x, z, p); return; } - else - Z[k] = ZERO; + + zk = ZERO; for (; j > 0; i--, j--) { - Z[k] += X[i] + Y[j]; - if (Z[k] >= RADIX) + zk += X[i] + Y[j]; + if (zk >= RADIX) { - Z[k] -= RADIX; - Z[--k] = ONE; + Z[k--] = zk - RADIX; + zk = ONE; } else - Z[--k] = ZERO; + { + Z[k--] = zk; + zk = ZERO; + } } for (; i > 0; i--) { - Z[k] += X[i]; - if (Z[k] >= RADIX) + zk += X[i]; + if (zk >= RADIX) { - Z[k] -= RADIX; - Z[--k] = ONE; + Z[k--] = zk - RADIX; + zk = ONE; } else - Z[--k] = ZERO; + { + Z[k--] = zk; + zk = ZERO; + } } - if (Z[1] == ZERO) + if (zk == ZERO) { - for (i = 1; i <= p; i++) + for (i = 1; i <= p2; i++) Z[i] = Z[i + 1]; } else - EZ += ONE; + { + Z[1] = zk; + EZ += ONE; + } } /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0. @@ -426,71 +443,70 @@ static void SECTION sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) { - int i, j, k; + long i, j, k; + long p2 = p; + double zk; EZ = EX; + i = p2; + j = p2 + EY - EX; + k = p2; - if (EX == EY) + /* Y is too small compared to X, copy X over to the result. */ + if (__glibc_unlikely (j < 1)) { - i = j = k = p; - Z[k] = Z[k + 1] = ZERO; + __cpy (x, z, p); + return; } - else + + /* The relevant least significant digit in Y is non-zero, so we factor it in + to enhance accuracy. */ + if (j < p2 && Y[j + 1] > ZERO) { - j = EX - EY; - if (j > p) - { - __cpy (x, z, p); - return; - } - else - { - i = p; - j = p + 1 - j; - k = p; - if (Y[j] > ZERO) - { - Z[k + 1] = RADIX - Y[j--]; - Z[k] = MONE; - } - else - { - Z[k + 1] = ZERO; - Z[k] = ZERO; - j--; - } - } + Z[k + 1] = RADIX - Y[j + 1]; + zk = MONE; } + else + zk = Z[k + 1] = ZERO; + /* Subtract and borrow. */ for (; j > 0; i--, j--) { - Z[k] += (X[i] - Y[j]); - if (Z[k] < ZERO) + zk += (X[i] - Y[j]); + if (zk < ZERO) { - Z[k] += RADIX; - Z[--k] = MONE; + Z[k--] = zk + RADIX; + zk = MONE; } else - Z[--k] = ZERO; + { + Z[k--] = zk; + zk = ZERO; + } } + /* We're done with digits from Y, so it's just digits in X. */ for (; i > 0; i--) { - Z[k] += X[i]; - if (Z[k] < ZERO) + zk += X[i]; + if (zk < ZERO) { - Z[k] += RADIX; - Z[--k] = MONE; + Z[k--] = zk + RADIX; + zk = MONE; } else - Z[--k] = ZERO; + { + Z[k--] = zk; + zk = ZERO; + } } + /* Normalize. */ for (i = 1; Z[i] == ZERO; i++); EZ = EZ - i + 1; - for (k = 1; i <= p + 1;) + for (k = 1; i <= p2 + 1;) Z[k++] = Z[i++]; - for (; k <= p;) + for (; k <= p2;) Z[k++] = ZERO; } @@ -602,8 +618,11 @@ void SECTION __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) { - int i, j, k, k2; + long i, j, k, ip, ip2; + long p2 = p; double u, zk; + const mp_no *a; + double *diag; /* Is z=0? */ if (__glibc_unlikely (X[0] * Y[0] == ZERO)) @@ -612,48 +631,238 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) return; } - /* Multiply, add and carry. */ - k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3; - zk = Z[k2] = ZERO; + /* We need not iterate through all X's and Y's since it's pointless to + multiply zeroes. Here, both are zero... */ + for (ip2 = p2; ip2 > 0; ip2--) + if (X[ip2] != ZERO || Y[ip2] != ZERO) + break; + + a = X[ip2] != ZERO ? y : x; + + /* ... and here, at least one of them is still zero. */ + for (ip = ip2; ip > 0; ip--) + if (a->d[ip] != ZERO) + break; + + /* The product looks like this for p = 3 (as an example): + + + a1 a2 a3 + x b1 b2 b3 + ----------------------------- + a1*b3 a2*b3 a3*b3 + a1*b2 a2*b2 a3*b2 + a1*b1 a2*b1 a3*b1 + + So our K needs to ideally be P*2, but we're limiting ourselves to P + 3 + for P >= 3. We compute the above digits in two parts; the last P-1 + digits and then the first P digits. The last P-1 digits are a sum of + products of the input digits from P to P-k where K is 0 for the least + significant digit and increases as we go towards the left. The product + term is of the form X[k]*X[P-k] as can be seen in the above example. + + The first P digits are also a sum of products with the same product term, + except that the sum is from 1 to k. This is also evident from the above + example. + + Another thing that becomes evident is that only the most significant + ip+ip2 digits of the result are non-zero, where ip and ip2 are the + 'internal precision' of the input numbers, i.e. digits after ip and ip2 + are all ZERO. */ + + k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3; + + while (k > ip + ip2 + 1) + Z[k--] = ZERO; - for (k = k2; k > p; k--) + zk = ZERO; + + /* Precompute sums of diagonal elements so that we can directly use them + later. See the next comment to know we why need them. */ + diag = alloca (k * sizeof (double)); + double d = ZERO; + for (i = 1; i <= ip; i++) + { + d += X[i] * Y[i]; + diag[i] = d; + } + while (i < k) + diag[i++] = d; + + while (k > p2) { - for (i = k - p, j = p; i < p + 1; i++, j--) - zk += X[i] * Y[j]; + long lim = k / 2; + + if (k % 2 == 0) + /* We want to add this only once, but since we subtract it in the sum + of products above, we add twice. */ + zk += 2 * X[lim] * Y[lim]; + + for (i = k - p2, j = p2; i < j; i++, j--) + zk += (X[i] + X[j]) * (Y[i] + Y[j]); + + zk -= diag[k - 1]; u = (zk + CUTTER) - CUTTER; if (u > zk) u -= RADIX; - Z[k] = zk - u; + Z[k--] = zk - u; zk = u * RADIXI; } + /* The real deal. Mantissa digit Z[k] is the sum of all X[i] * Y[j] where i + goes from 1 -> k - 1 and j goes the same range in reverse. To reduce the + number of multiplications, we halve the range and if k is an even number, + add the diagonal element X[k/2]Y[k/2]. Through the half range, we compute + X[i] * Y[j] as (X[i] + X[j]) * (Y[i] + Y[j]) - X[i] * Y[i] - X[j] * Y[j]. + + This reduction tells us that we're summing two things, the first term + through the half range and the negative of the sum of the product of all + terms of X and Y in the full range. i.e. + + SUM(X[i] * Y[i]) for k terms. This is precalculated above for each k in + a single loop so that it completes in O(n) time and can hence be directly + used in the loop below. */ while (k > 1) { - for (i = 1, j = k - 1; i < k; i++, j--) - zk += X[i] * Y[j]; + long lim = k / 2; + + if (k % 2 == 0) + /* We want to add this only once, but since we subtract it in the sum + of products above, we add twice. */ + zk += 2 * X[lim] * Y[lim]; + + for (i = 1, j = k - 1; i < j; i++, j--) + zk += (X[i] + X[j]) * (Y[i] + Y[j]); + + zk -= diag[k - 1]; u = (zk + CUTTER) - CUTTER; if (u > zk) u -= RADIX; - Z[k] = zk - u; + Z[k--] = zk - u; zk = u * RADIXI; - k--; } Z[k] = zk; - EZ = EX + EY; + /* Get the exponent sum into an intermediate variable. This is a subtle + optimization, where given enough registers, all operations on the exponent + happen in registers and the result is written out only once into EZ. */ + int e = EX + EY; + /* Is there a carry beyond the most significant digit? */ if (__glibc_unlikely (Z[1] == ZERO)) { - for (i = 1; i <= p; i++) + for (i = 1; i <= p2; i++) Z[i] = Z[i + 1]; - EZ--; + e--; } + EZ = e; Z[0] = X[0] * Y[0]; } +/* Square *X and store result in *Y. X and Y may not overlap. For P in + [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the + error is bounded by 1.001 ULP. This is a faster special case of + multiplication. */ +void +SECTION +__sqr (const mp_no *x, mp_no *y, int p) +{ + long i, j, k, ip; + double u, yk; + + /* Is z=0? */ + if (__glibc_unlikely (X[0] == ZERO)) + { + Y[0] = ZERO; + return; + } + + /* We need not iterate through all X's since it's pointless to + multiply zeroes. */ + for (ip = p; ip > 0; ip--) + if (X[ip] != ZERO) + break; + + k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; + + while (k > 2 * ip + 1) + Y[k--] = ZERO; + + yk = ZERO; + + while (k > p) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + yk += X[lim] * X[lim]; + + /* In __mul, this loop (and the one within the next while loop) run + between a range to calculate the mantissa as follows: + + Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] + + X[n] * Y[k] + + For X == Y, we can get away with summing halfway and doubling the + result. For cases where the range size is even, the mid-point needs + to be added separately (above). */ + for (i = k - p, j = p; i < j; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + + while (k > 1) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + yk += X[lim] * X[lim]; + + /* Likewise for this loop. */ + for (i = 1, j = k - 1; i < j; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + Y[k] = yk; + + /* Squares are always positive. */ + Y[0] = 1.0; + + /* Get the exponent sum into an intermediate variable. This is a subtle + optimization, where given enough registers, all operations on the exponent + happen in registers and the result is written out only once into EZ. */ + int e = EX * 2; + + /* Is there a carry beyond the most significant digit? */ + if (__glibc_unlikely (Y[1] == ZERO)) + { + for (i = 1; i <= p; i++) + Y[i] = Y[i + 1]; + e--; + } + + EY = e; +} + /* Invert *X and store in *Y. Relative error bound: - For P = 2: 1.001 * R ^ (1 - P) - For P = 3: 1.063 * R ^ (1 - P) @@ -664,7 +873,7 @@ static void SECTION __inv (const mp_no *x, mp_no *y, int p) { - int i; + long i; double t; mp_no z, w; static const int np1[] = |