diff options
-rw-r--r-- | doc/README.dev | 3 | ||||
-rw-r--r-- | doc/sum.txt | 643 | ||||
-rw-r--r-- | src/mpfr-impl.h | 27 | ||||
-rw-r--r-- | src/sum.c | 1373 | ||||
-rw-r--r-- | tests/tsum.c | 1021 |
5 files changed, 2580 insertions, 487 deletions
diff --git a/doc/README.dev b/doc/README.dev index 80d502626..067423f99 100644 --- a/doc/README.dev +++ b/doc/README.dev @@ -440,6 +440,9 @@ List of macros used for checking MPFR: for no timeout by default, and can be overridden at "make check" time with the MPFR_TESTS_TIMEOUT environment variable. ++ MPFR_COV_CHECK: Define to enable value coverage checking (must not + be used in production). This macro is for the MPFR + developers, in order to improve the testsuite. =========================================================================== diff --git a/doc/sum.txt b/doc/sum.txt new file mode 100644 index 000000000..b51b95456 --- /dev/null +++ b/doc/sum.txt @@ -0,0 +1,643 @@ +Implementation of mpfr_sum: sum of n mpfr_t values +══════════════════════════════════════════════════ + +About the time/memory complexity: The original mpfr_sum algorithm was +inefficient on some inputs, and in some simple cases with inputs of +different orders of magnitude, it could be very slow and take all the +memory, making the whole program (and possibly other programs on the +system too, due to the lack of memory at some critical point) crash. +The time/memory complexity must no longer depend on the value of the +exponents of the inputs, i.e. their order of magnitude. Moreover, be +careful with the carry propagation. + +Preliminary note: The previous mpfr_sum algorithm was a high-level one +and could take too much memory and too much time in cases with numbers +of different magnitudes and cancellation. Here we will use functions +from the mpn layer of GMP only (no mpfr_add), as the algorithm is based +on additions by blocks, involving virtual splitting of MPFR numbers. + +Concerning the sign of a zero result, the specification is: + * if n = 0, then the result is +0 (this choice is consistent with + IEEE 754-2008's sum reduction operation of vector length 0, and + with mpfr_set_ui and mpfr_set_si on the integer 0); otherwise + * if all the inputs have the same sign (i.e. all +0 or all -0), + then the result has the same sign as the inputs (for n = 1 or 2, + this choice is consistent with mpfr_set and mpfr_add respectively); + * otherwise, either because all inputs are zeros with at least a +0 + and a -0, or because some inputs are non-zero (but they globally + cancel), the result is +0, except for the MPFR_RNDD rounding mode, + where it is -0 (this choice is consistent with mpfr_add for n = 2, + and more generally with a sequence of mpfr_add's). +Additional details on: + https://sympa.inria.fr/sympa/arc/mpfr/2014-03/msg00000.html + +Let sq be the output precision. + +*** TODO *** +The following is a bit obsolete. In particular the new helper function +sum_raw (obtained from code refactoring for steps 3 to 6, and 8) should +be introduced and described. Pre-rounding in (7) and rounding in (8) +have been merged, and this [previously pre-rounding in (7)] is still +done at the same time as sign handling with a single mpn operation. +The table concerning the correction term and the ternary value in case +of TMD should be changed to: + + rnd tmd R sst correction ternary + ─────────────────────────────────────────────── + N 1 0 - 0 + + N 1 0 0 0 0 + N 1 0 + 0 - + N 1 1 - +1 + + N 1 1 0 +1 0 + N 1 1 + +1 - + ─────────────────────────────────────────────── + N 2 0 - 0 - + N 2 0 0 ? ? (halfway case) + N 2 0 + +1 + + N 2 1 - 0 - + N 2 1 0 ? ? (halfway case) + N 2 1 + +1 + + ─────────────────────────────────────────────── + D 1 0 - -1 - + D 1 0 0 0 0 + D 1 0 + 0 - + D 1 1 - 0 - + D 1 1 0 +1 0 + D 1 1 + +1 - + ─────────────────────────────────────────────── + U 1 0 - 0 + + U 1 0 0 0 0 + U 1 0 + +1 + + U 1 1 - +1 + + U 1 1 0 +1 0 + U 1 1 + +2 + + ─────────────────────────────────────────────── + +The general ideas of the algorithm: + +0. Handle the case n <= 1 separately (+0 for n = 0, mpfr_set for n = 1). + Now assume that n >= 2. + +1. Look at the exponent field of each mpfr_t input (this is fast); also + track the signs of Inf's and zeros. This step allows one to: + * detect the singular cases, i.e. when there is either at least + a NaN or an Inf in the inputs (in case of a NaN or two Inf's of + different signs, we can immediately return NaN), or all inputs + are zeros; + * determine the maximum exponent maxexp of the regular inputs and + the number n' of regular inputs. + +2. A truncated sum will be computed in two's complement, stored in a + fixed memory area, called "accumulator", whose characteristics are + determined here. + + Two's complement is preferred to the sign + magnitude representation + because the signs of the temporary (and final) results are not known + in advanced, and the computations (additions and subtractions) in + two's complement are more natural in this context. There will be a + conversion to sign + magnitude (representation used by MPFR numbers) + at the end, but this should not take much time compared to the other + calculations. + + Only a fixed part of the inputs will be taken into account for the + truncated sum: the bits whose exponent is in some window (interval) + denoted [minexp,maxexp[. + + We choose not to include maxexp in the interval in order to match the + floating-point representation chosen in MPFR, where the significand + is in [1/2,1[; this also means that the current minexp will be maxexp + at the next iteration, unless there is a "hole" between the inputs, + as explained below. + + Let us define logn = ceil(log2(n')). + + Due to the accumulation of values and the choice of two's complement + (a way to represent the sign), we will need some extra bits to avoid + overflows. The absolute value of the sum is less than n' * 2^maxexp, + taken up to logn extra bits, and one needs one more bit to be able + to determine the sign, so that cq = logn + 1 extra bits will be + considered. + + For the other side, we define minexp = maxexp - sq - dq, where dq + will be chosen around logn to take into account the accumulation + of errors, i.e. from everything less significant than minexp. The + final choice for dq should be done after testing: in practice, one + may need a little more precision to handle partial cancellation at + this iteration, but important cancellation will always lead to other + iterations. For instance, we will choose for dq the smallest value + such that dq ≥ max(logn,4) (the bound 4 will be explained later), + and the accumulator bitsize wq, which is maxexp + cq - minexp, i.e. + cq + sq + dq, is a multiple of GMP_NUMB_BITS. + + Accumulator: [--------]-----------------------------------] + cq └─ maxexp sq + dq minexp ─┘ + +3. Compute the truncated sum in two's complement by taking into account + the part of the inputs in the window [minexp,maxexp[. + + In the same loop over the inputs, determine the maximum exponent + maxexp2 of the numbers formed starting with the most significant + represented bit that has been ignored: one will get either minexp + (if an input has been truncated at this iteration) or the maximum + exponent of the numbers that have been completely ignored. If no + bits have been ignored any longer, then maxexp2 has the special + value MPFR_EXP_MIN, which is the minimum value of the mpfr_exp_t + type and not a valid exponent; and the truncated sum is exact. + + The accumulator during this iteration: + + [--------]-----------------------------------] + cq └─ maxexp minexp ─┘ + + If there is a additional iteration with maxexp2 = minexp - 4 and + a shift of 26 bits, here's the accumulator after the shift: + + <------- 26 zeros -------> + [-------------------0000000000000000000000000] + This iteration: minexp ─┘ ├─ maxexp2 │ + Next iteration: └─ maxexp minexp ─┘ + +4. Determine the number of cancelled bits, more precisely, define the + variable cancel = number of identical bits on the most significant + part of the accumulator. + +5. If the truncated sum (i.e. the value of the accumulator) is 0, i.e. + if all the words are 0, then: + * if maxexp2 == MPFR_EXP_MIN, then return with the exact value ±0 + (its sign is determined from the rounding mode, as specified); + * else reiterate at (3) with maxexp = maxexp2 and minexp determined + from the initial value of cq (like in the first iteration). + +6. Let e = maxexp + cq - cancel, u = e - sq, and err = maxexp2 + logn. + Then the absolute value of the truncated sum is in [2^(e-1),2^e] + (binade closed on both ends due to two's complement), u is the + exponent of the ulp (a.k.a., quantum exponent) in the destination + (except in the exact case -2^e), and the absolute value of the error + is strictly less than the bound 2^err; if maxexp2 == MPFR_EXP_MIN, + then the error is 0, i.e. the sum in the accumulator is exact. + + Here's a representation of the accumulator and the cancel bits, + with the two cases depending on the sign of the truncated sum, + where the x's correspond to the sq-1 represented bits following + the initial value bit 1 (0 if negative), r is the rounding bit, + and the bits f are the following bits: + + ][------------------- accumulator -------------------] + ][---- cancel ----]----------------------------------] + ]0000000000000000001xxxxxxxxxxxxxxxxxxxxxrffffffffffff + ]1111111111111111110xxxxxxxxxxxxxxxxxxxxxrffffffffffff + └─ maxexp + cq └─ e u ─┘ minexp ─┘ + + If the cancellation is important, then the bit r and some of the + least significant bits x may fall outside of the accumulator, in + which case they are considered to be 0. The most degenerate case + is the following: + + * If the cancel bits are 0's, then due to (5), e = minexp + 1, + i.e. a single bit (the leading 1) is represented. + + * If the cancel bits are 1's, all the bits of the accumulator are + 1's. Since the non-represented bits are considered to be 0, the + following bit is a 0; this implies that the computed value of + cancel is the right one, i.e. as if the following bits were + represented in the accumulator, and proves the above assertion + "the absolute value of the truncated sum is in [2^(e-1),2^e]". + Here we have e = minexp, and none of the bits are represented. + + In the implementation, one must make sure that one does not try to + read non-represented bits. + + The exponent and related values may change later due to the error, + but this doesn't really matter here. + + If the error bound is too large due to the cancellation, e.g. if + err > u - 3 (the proofs below are done with this choice), then + one reiterates at (3) after shifting the truncated sum to the + left boundary (most significant part) of the accumulator, where: + * the shift count is determined in such a way to avoid overflows + at the next iteration, i.e. to be able to retrieve the sum with + its sign; + * the new value of cq is implied by this shift count and maxexp2 + (the value of maxexp at the next iteration). + + Concerning the choice of the shift count shiftq, letting only one + identical bit in the most significant part may not be sufficient; + for instance, if + a. maxexp2 = minexp; + b. the accumulator contains: + + 0000000011111111111111111111111111110101 + This iteration: minexp ─┘ + + c. the accumulator is shifted to: + + 0111111111111111111111111111101010000000 + Next iteration: └─ maxexp + + d. there are at least 12 numbers to add at the next iteration; + then one could end up with something like: + + 1000000000000000000000000000000000010001 + └─ maxexp + + i.e. an overflow in two's complement. But leaving at least + 2 + max(0, err - e) itentical bits in the most significant part, + such as + + 0011111111111111111111111111110101000000 + └─ maxexp + + is sufficient. The second term of the max covers cases like: + + ┌─ err = maxexp2 + logn + 0000000000000000000000000000000000000111 + e ─┘ + + (2^err being a bound on the error term, thus a bound on the value + that will be added to the accumulator at the next iteration), for + which the accumulator can be shifted to: + + ┌─ err + 0000000111000000000000000000000000000000 + e ─┘ + + without triggering an overflow at the next iteration, but + + ┌─ err + 0000001110000000000000000000000000000000 + e ─┘ + + is incorrect as a 1 could appear in the MSB (making the accumulator + value negative) just with additions of positive numbers. + + Said otherwise, leaving at least i identical bits allows one to + represent numbers in [-2^(e+i-1),2^(e+i-1)[. The current sum is in + [-2^e,2^e[, and taking into account the error terms, one wants to + be able to represent arbitrary numbers in ]-2^e-2^err,2^e+2^err[. + So, i must be chosen in such a way that 2^e + 2^err ≤ 2^(e+i-1), + i.e. 2^0 + 2^(err-e) ≤ 2^(i-1). The smallest power of two larger + than or equal to 2^0 + 2^(err-e) has exponent 1 + max(0, err - e). + Hence the choice i ≥ 2 + max(0, err - e). + + So, we have: shiftq = cancel - 2 - max(0, err - e). And at the next + iteration, + + cq' = wq - shiftq + (minexp - maxexp2) + + Now one needs to prove that this makes sense, i.e. that shiftq > 0 + and that cq' < wq; otherwise all the inputs will be ignored, making + a new iteration useless, with a possible infinite loop. So, shiftq + must be chosen so that + + shiftq > minexp - maxexp2 + + Note that since maxexp2 ≤ minexp, this will also yield shiftq > 0. + The condition err > u - 3 gives: + + maxexp2 + logn > maxexp + cq - cancel - sq - 3 + + Therefore, since maxexp - minexp = sq + dq, + + cancel - 2 > minexp - maxexp2 - logn + cq + dq - 5 + + And since cq = logn + 1 and dq ≥ 4, + + cancel - 2 > minexp - maxexp2 [A] + + We also have: + + cancel - 2 - logn + e = maxexp - 2 - logn + cq > minexp + + Therefore + + cancel - 2 - (err - e) > minexp - maxexp2 [B] + + [A] and [B] give: shiftq > minexp - maxexp2. QED. + + Note: It is expected that in general, the cancellation is not very + large, so that the new additions in (3) will occur only in a small + part of the accumulator, except in case of long carry propagation + (see below). + +7. Now, the accumulator contains the significand of a good approximation + to the exact sum, since the absolute value of the error is strictly + less than ulp(computed value) * 2^(-3). The corresponding exponent is + e and the sign is determined from the cancel bits. One will copy the + rounded significand to the destination, together with the exponent + and the sign. One has different cases: + + * If all the bits of the inputs have been taken into account, then + the significand is exact (maxexp2 == MPFR_EXP_MIN), so that one + can round it correctly. Note: The TMD does not occur here. + + * If the error bound is non-zero (because not all the bits of the + inputs have been taken into account) but the TMD does not occur, + then one can also round the significand correctly. + + * Otherwise, one will round the significand, assuming that this is + the correct truncation up to the rounding bit and that the sticky + bit is 1: this yields the most probable correctly rounded result + if u - minexp > 2 (the most general case), based on a symmetrical + distribution of the error centered on 0; the destination will be + corrected later if need be. + + For the last two cases, and before the rounding is done, one needs + to determine whether the TMD occurs. Let d = u - err, which is ≥ 3 + and can be very large; however d is representable in a mpfr_exp_t + since u ≤ emax, err ≥ emin, and this type can hold the sum of two + valid exponents. The TMD occurs when the sum is close enough to a + breakpoint, which is either a machine number (i.e. a number whose + significand fits on sq bits) or a midpoint between two consecutive + machine numbers, depending on the rounding mode: + ┌───────────────┬────────────────┐ + │ Rounding mode │ Breakpoint │ + ├───────────────┼────────────────┤ + │ to nearest │ midpoint │ + │ to nearest │ machine number │ + │ directed │ machine number │ + └───────────────┴────────────────┘ + (in the second case, the correctly rounded sum can be determined, + but not the ternary value). More precisely, the TMD occurs when: + * the d bits following the ulp one are identical in directed + rounding modes; + * the d-1 bits following the rounding bit are identical, in the + rounding-to-nearest mode. + + At the same time, one determines whether the correctly rounded sum + is exact. If the TMD occurs, this value is currently assumed to be + inexact, as said above. + + Let us recall that in case of an important cancellation, some bits + may not be represented in the accumulator. Such a case is possible + even in this step, when maxexp2 is much smaller than minexp. + + A change of representation is needed since the sum has been computed + using two's complement representation while regular MPFR numbers are + represented in sign + absolute value (normalized). The sign bit is + directly obtained from the cancel bits. For the absolute value: + + * If the cancel bits are 0's, the value is positive, so that the + absolute value is represented in the same way. + + * If the cancel bits are 1's, the value is negative, so that the + absolute value is formally obtained by inverting all the bits + (this infinite binary expansion is not the canonical one, but + this does not matter as this is just for the discussion below). + Since there is no carry propagation at this point, the exponent + is not changed. + + With the mpn layer of GMP, up to 3 operations are involved for the + handling of the significand: + * a shift (for the alignment), if the shift count is non-zero + (this is the most probable case); + * a negation if the value is negative; + * the initial rounding. + The shift should be done before the initial rounding, so that all + the bits are represented for the rounding. The copy itself should + be done together with the shift or the negation, because this is + where most of the limbs are changed in general. It will be done + with the shift, and see below for the other two operations. + + The rounding and change of representation (as a whole) can be seen + in the following way: after scaling, a number written r = i + f in + two's complement, where i is the integer part and f the infinite + fractional part, has to be rounded to one of the enclosing integers + i and i + 1. Thus the rounding operation consists in replacing f + by an integer c = 0 or 1 (like a carry): the rounded value is the + integer i + c. When r is negative (i.e. the MSB of i is 1), the + following forms are equivalent: + + |i + c| = - (i + c) [1] + = (- i) - c [2] + = rnd(-r) = rnd(~r) = ~i + ~c [3] + + This can lead to various possible implementations. In each of these + 3 cases, one has two potentially multiple-precision operations. + [1] addition of a carry, followed by a negation; + [2] negation, followed by a subtraction of a carry; + [3] bit inversion, followed by an addition of a carry. + In average, the operation with a carry is in constant time; it is an + operation on a single word in most cases. But in the worst case, due + to possible long carry propagation, it may need a read/write on the + whole significand. If GMP had a negation-with-borrow operation, one + could merge these two multiple-operations in a single one; but this + is not the case. However one can notice that either c or ~c is 0, so + that one can select the operation depending on the value of c. As a + summary, consider the following cases: + * r positive: |i + c| = i + c with mpn_add_1; + * r negative, c = 0: |i + c| = -i with mpn_neg ([1]/[2]); + * r negative, c = 1: |i + c| = ~i with mpn_com ([3]). + The first two cases can yield an increment of the exponent e. + + One goal of this early copy/rounding is to free the accumulator in + case there is work to do in (8), though the needed memory for that + is limited. An alternative solution would be to reserve a bit more + memory for (8), and determine the way of rounding before the copy; + this could be slightly more efficient in supposedly rare cases + (when the correction yields a long carry propagation). + + As said above, in rounding to nearest, both kinds of breakpoints + (machine numbers and midpoints) are taken into account for the TMD. + Since the final rounding and ternary value will depend on the kind + of breakpoint, one needs to remember this information in a variable + (as the accumulator will be overwritten). Thus a variable tmd will + contain: + * 0: the TMD does not occur; + * 1: the TMD occurs on a machine number; + * 2: the TMD occurs on a midpoint. + + If the TMD does not occur (tmd = 0): + * Determine the ternary value as follows: if the sum is exact + (already known), the ternary value is 0; otherwise it is -1 + if c = 0, and 1 if c = 1. + * Check the range and return as in (9). + +8. Reiterate like in (3) one or more times using a specific window as + if sq were 0, i.e. around maxexp + logn to maxexp - logn (wq - sq + may be a good choice); in short, determine the sign of a secondary + term, which is the difference between the exact sum and the closest + breakpoint. + + During the iterations, when there are several identical bits on the + left (most significant part) of the accumulator, only one bit needs + to be represented during the computations. But to avoid overflows, + one will keep 2 or more at the start of an iteration, like what is + done in (6). + + Let us see what needs to be done to prepare the first iteration. + Since d ≥ 3 and the TMD occurs, the secondary term is, in absolute + value, less than half the weight of the rounding bit, so that it can + be seen as the fractional part after the rounding bit; the leading + d-1 bits of the truncated secondary term are identical and give its + sign (in the two's complement meaning). Now, if err < minexp, then + at least one of these identical bits is not represented, meaning that + it is 0 and all these bits are 0's; thus, under this condition, the + accumulator is zeroed and minexp is determined from maxexp2 and the + initial value of cq, like in (5) and the first iteration. Otherwise + the identical bits are all represented, and similarly to (6), one + shifts the trailing bits, keeping the last 2 over the d-1 identical + bits, i.e. one shifts the bits from err+1 to minexp, and zeros the + remaining bits of the accumulator; the new minexp is obtained by + subtracting the shift count from it. + + Once the sign of the secondary term is known, depending on: + * the rounding mode rnd (equivalent to N, D, U), + * the value of tmd (1 or 2 in rounding to nearest), + * the rounding bit R (0 or 1), + * the sign sst of the secondary term (negative, zero, positive), + correct the sum if need be (± 1 ulp) and determine the corresponding + ternary value, as in the table below; a question mark means that the + exact value is the middle of two consecutive machine numbers, and a + special rule is used to determine the rounding, equivalent to one of + the enclosing lines (the ones where sst is - or +): + * For nearest-even: + - last significand bit = 0: choose line where correction = 0; + - last significand bit = 1: choose line where correction ≠ 0. + * For nearest-away: choose line where error = +. + + rnd tmd R sst correction ternary + ─────────────────────────────────────────────── + N 1 0 - 0 + + N 1 0 0 0 0 + N 1 0 + 0 - + N 1 1 - 0 + + N 1 1 0 0 0 + N 1 1 + 0 - + ─────────────────────────────────────────────── + N 2 0 - 0 - + N 2 0 0 ? ? (halfway case) + N 2 0 + + + + N 2 1 - - - + N 2 1 0 ? ? (halfway case) + N 2 1 + 0 + + ─────────────────────────────────────────────── + D 1 0 - - - + D 1 0 0 0 0 + D 1 0 + 0 - + D 1 1 - 0 - + D 1 1 0 + 0 + D 1 1 + + - + ─────────────────────────────────────────────── + U 1 0 - - + + U 1 0 0 - 0 + U 1 0 + 0 + + U 1 1 - 0 + + U 1 1 0 0 0 + U 1 1 + + + + ─────────────────────────────────────────────── + +9. Check the range (as usual), and return. + +An example: + + [1] [2] A [3] + u0 = ***** | | . | + u1 = ******|** | . | + u2 = ********|***** | . | + u3 = | *****| . | + u4 = | | ****|** + u5 = | | ***|* + +At iteration 1, minexp is determined to be [1]; thus u0, a part of u1, +and a part of u2 are taken into account for the truncated sum. Then it +appears that an important cancellation occurred, and another step (3) +is needed. Since u1 was truncated, the new maxexp will be minexp, i.e. +[1]. At iteration 2, minexp is determined to be [2]; thus a part of u1, +a part of u2, and u3 are taken into account for the truncated sum. Now +assume that on this example, the error is small enough, but its sign is +unknown. Thus another step (3) is needed, with the conditions of (7). +Since no numbers were truncated at the previous iteration, maxexp is +the maximum exponent of the remaining numbers, here the one of u4, and +minexp is determined to be [3]. Assume that the sign of the error can +be determined now, so that we can return the rounded result with the +ternary value. + +As a bonus, this will also solve overflow, underflow and normalization +issues, since everything is done in fixed point and the output exponent +will be considered only at the end (early overflow detection could also +be done). + +A limb L = *zp in memory will generally contain a part of a significand. +One can define its exponent ze, such that the actual value of this limb +is L * 2^(ze-GMP_NUMB_BITS), i.e. ze is its exponent where the limb is +regarded as a number in [1/2,1[. If an array of limbs zp[] is regarded +as a significand in [1/2,1[, then the exponent of its actual value is +also ze. + +Variables used in the implementation: + tp: pointer to a temporary area that will contain a shifted value. + wp: pointer to the accumulator. + ts: size of the temporary area, in limbs; then wp = tp + ts. + ws: size of the accumulator, in limbs. + wq: size of the accumulator, in bits. + rn: number n' of inputs that are regular numbers (regular inputs). + logn: ceil(log2(rn)). + cq: logn + 1. + sq: output precision (precision of the sum). + maxexp: the maximum exponent of the bit-window of the inputs that is + taken into account (for the current iteration), excluded; + after the sum, it gets the value of maxexp2 until the next + iteration. + minexp: the minimum exponent of the bit-window of the inputs that is + taken into account (for the current iteration), included. + +Note 1: Data locality can be improved after the first iteration if the +shifted values are stored at the end of the temporary area instead of +the beginning. The reason is that only the least significant part of +the accumulator will be used once a good approximate value of the sum +is known, and the accumulator lies just after the temporary area. But +the gain would probably not be noticeable in practice. + +Note 2: At step (4), it was considered to determine the number of +cancelled limbs instead of the number of cancelled bits in order to +avoid a non-trivial shift at step (6), making this step a bit faster +(but particularly in small precisions, with some possible drawbacks). +Moreover, on platforms with a MMU, thus most platforms where GNU MPFR +would be used, we could arrange to reserve a specific address space +for the accumulator and choose the shift count to be a multiple of a +memory block size (or actually not to do any shift/copy at all, just +free the most significant part and allocate a new part when need be), +so that the shift (copy) of the accumulator contents could just be +MMU related changes, without any physical memory copy. However, in +practice, only large precisions with specific inputs would benefit +from such a choice, which would require non portable code, and the +relative speedup would remain small since the sum itself is expected +to take most of the time. + +Note 3: Compared with Ziv's strategy, the issue here is not really +exact values that are very close to a breakpoint, but cancellation. +Moreover, we do not need to recompute everything at each iteration. +The issue with the value very close to a breakpoint actually occurs +at step (8); however, contrary to the usual case, we do not want to +reiterate with more precision as this could take too much time and +memory (a much wider accumulator could be needed). Indeed, due to a +possible "hole" between the inputs, the distance between the exact +value and a breakpoint can be extremely small. + +*** To be considered in future versions *** +It seems that carry propagation (mpn_add_1 & mpn_sub_1 in the code) is +most often limited. But consider the following cases, where all inputs +have the minimal precision 2, and the output precision is p: + u0 = 1 + u_i = (-1)^i * 2^(-p) for i > 0 +Here long carry propagation will occur for each addition of the initial +iteration, so that the complexity will be O(n*p) instead of O(n+p) if +we choose to delay carry propagation (however such a choice may slower +the average case and take more memory, such as around 3*p instead of +2*p). +When a new iteration is needed due to cancellation, a second accumulator +was considered in some early version of the algorithm: the temporary +results of the computations during the new iteration would be stored in +this second accumulator, which would generally be small, thus limiting +carry propagation; this method is actually equivalent to delaying carry +propagation. It could help in some cases, such as: + u0 = 2^q with some q > 0 + u1 = 1 + u2 = -2^q + u_i = (-1)^i * 2^(-p) for i > 2 +but such examples are very specific cases, and as seen with the first +example, a better way must be chosen if avoiding long carry propagation +is regarded as important (in all cases). Moreover, while the use of two +accumulators does not take much more memory (since both accumulators can +occupy the same area, with a flexible limit between them), it probably +makes the code a bit more complex, and noticeably slower if n is small. diff --git a/src/mpfr-impl.h b/src/mpfr-impl.h index 8c3af99d4..0caec6f18 100644 --- a/src/mpfr-impl.h +++ b/src/mpfr-impl.h @@ -2079,4 +2079,31 @@ __MPFR_DECLSPEC void mpfr_mpz_clear _MPFR_PROTO((mpz_ptr)); #endif +/****************************************************** + ************* Value Coverage Checking ************** + ******************************************************/ + +#ifdef MPFR_COV_CHECK + +/* Variable names should start with the __gmpfr_cov_ prefix. */ + +#define MPFR_COV_SET(X) (__gmpfr_cov_ ## X = 1) + +#if defined (__cplusplus) +extern "C" { +#endif + +__MPFR_DECLSPEC extern int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2]; + +#if defined (__cplusplus) +} +#endif + +#else + +#define MPFR_COV_SET(X) ((void) 0) + +#endif + + #endif @@ -1,6 +1,6 @@ /* Sum -- efficiently sum a list of floating-point numbers -Copyright 2004-2015 Free Software Foundation, Inc. +Copyright 2014-2015 Free Software Foundation, Inc. Contributed by the AriC and Caramel projects, INRIA. This file is part of the GNU MPFR Library. @@ -20,327 +20,1152 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ -/* Reference: James Demmel and Yozo Hida, Fast and accurate floating-point - summation with application to computational geometry, Numerical Algorithms, - volume 37, number 1-4, pages 101--112, 2004. */ - #define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" -/* FIXME[VL]: mpfr_sum can take too much memory and too much time. - Rewrite using mpn, with additions by blocks (most significant first)? - The algorithm could be as follows: - 1. Search the maximum exponent max_exp of the inputs. - 2. Compute the truncated sum in a window around max_exp + log2(N) to - max_exp - output_prec - 128. - 3. In case of cancellation, this determines a new maximum exponent; - so, reiterate at (2), or (1) if the truncated sum is zero (so that - "holes" will count for nothing). - 4. Take care of the TMD (something like above, since there can still - be cancellations). - As a bonus, this will also solve overflow and underflow issues, since - everything is done in fixed point and the output exponent will be - considered only at the end (early overflow detection can also be done). +/* See the doc/sum.txt file for the algorithm and a part of its proof +(this will later go into algorithms.tex). Note: see the following paper and its references: http://www.eecs.berkeley.edu/~hdnguyen/public/papers/ARITH21_Fast_Sum.pdf +VL: This is very different: + In MPFR In the paper & references + arbitrary precision fixed precision + correct rounding just reproducible rounding + integer operations floating-point operations + sequencial parallel (& sequential) */ -/* I would really like to use "mpfr_srcptr const []" but the norm is buggy: - it doesn't automatically cast a "mpfr_ptr []" to "mpfr_srcptr const []" - if necessary. So the choice are: - mpfr_s ** : OK - mpfr_s *const* : OK - mpfr_s **const : OK - mpfr_s *const*const : OK - const mpfr_s *const* : no - const mpfr_s **const : no - const mpfr_s *const*const: no - VL: this is not a bug, but a feature. See the reason here: - http://c-faq.com/ansi/constmismatch.html -*/ -static void heap_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *); -static void count_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *, - mpfr_exp_t, mpfr_uexp_t); - -/* Either sort the tab in perm and returns 0 - Or returns 1 for +INF, -1 for -INF and 2 for NAN. - Also set *maxprec to the maximal precision of tab[0..n-1] and of the - initial value of *maxprec. -*/ -int -mpfr_sum_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm, - mpfr_prec_t *maxprec) +#ifdef MPFR_COV_CHECK +int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2] = { 0 }; +#endif + +/* Update minexp after detecting a potential integer overflow in extreme + cases (only 32-bit machines may be concerned in practice). */ +#define UPDATE_MINEXP(E,SH) \ + do \ + { \ + mpfr_prec_t sh = (SH); \ + MPFR_ASSERTN ((E) >= MPFR_EXP_MIN + sh); \ + minexp = (E) - sh; \ + } \ + while (0) + +/* Accumulate a new [minexp,maxexp[ block into (wp,ws). If e and err denote + * the exponents of the computed result and of the error bound respectively, + * while e - err is less than some given bound (due to cancellation), shift + * the accumulator and reiterate. + * wp: pointer to the accumulator (least significant limb first). + * ws: size of the accumulator. + * wq: precision of the accumulator (ws * GMP_NUMB_BITS). + * x: array of the input numbers. + * n: size of this array (number of inputs). + * minexp: exponent of the least significant bit of the block. + * maxexp: exponent of the block (maximum exponent + 1). + * tp: pointer to a temporary area. + * ts: size of this temporary area. + * logn: ceil(log2(rn)), where rn is the number of regular inputs. + * prec: minimal value of e - err (see below). + * ep: pointer to mpfr_exp_t (see below), or a null pointer. + * minexpp: pointer to mpfr_exp_t (see below). + * maxexpp: pointer to mpfr_exp_t (see below). + * This function returns the number of cancelled bits (>= 1), or 0 + * if the accumulator is 0 (then the exact sum is necessarily 0). + * In the former case, the function also returns: + * - in ep: the exponent e of the computed result; + * - in minexpp: the last value of minexp. + * - in maxexpp: the new value of maxexp (for the next iteration). + * Notes: + * - minexp is also the exponent of the least significant bit of the + * accumulator; + * - the temporary area must be large enough to hold a shifted input + * block, and the value of ts is used only when the full assertions + * are checked (i.e. with the --enable-assert configure option), to + * check that a buffer overflow doesn't occur; + * - contrary to the returned value of minexp (the value in the last + * iteration), the returned value of maxexp is the one for the next + * iteration (= maxexp2 of the last iteration). + */ +static mpfr_prec_t +sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, mpfr_ptr *const x, + unsigned long n, mpfr_exp_t minexp, mpfr_exp_t maxexp, + mp_limb_t *tp, mp_size_t ts, int logn, mpfr_prec_t prec, + mpfr_exp_t *ep, mpfr_exp_t *minexpp, mpfr_exp_t *maxexpp) { - mpfr_exp_t min, max; - mpfr_uexp_t exp_num; - unsigned long i; - int sign_inf; - - sign_inf = 0; - min = MPFR_EMIN_MAX; - max = MPFR_EMAX_MIN; - for (i = 0; i < n; i++) + MPFR_LOG_FUNC + (("ws=%Pd ts=%Pd prec=%Pd", (mpfr_prec_t) ws, (mpfr_prec_t) ts, prec), + ("", 0)); + + MPFR_ASSERTD (prec >= 0); + + /* Consistency check. */ + MPFR_ASSERTD (wq == (mpfr_prec_t) ws * GMP_NUMB_BITS); + + while (1) { - if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (tab[i]))) - { - if (MPFR_IS_NAN (tab[i])) - return 2; /* Return NAN code */ - else if (MPFR_IS_INF (tab[i])) + mpfr_exp_t maxexp2 = MPFR_EXP_MIN; + unsigned long i; + + MPFR_LOG_MSG (("sum_raw loop: " + "maxexp=%" MPFR_EXP_FSPEC "d " + "minexp=%" MPFR_EXP_FSPEC "d\n", + (mpfr_eexp_t) maxexp, (mpfr_eexp_t) minexp)); + + MPFR_ASSERTD (maxexp > minexp); + + for (i = 0; i < n; i++) + if (! MPFR_IS_SINGULAR (x[i])) + { + mp_limb_t *dp, *vp; + mp_size_t ds, vs, vds; + mpfr_exp_t xe, vd; + mpfr_prec_t xq; + int tr; + + xe = MPFR_GET_EXP (x[i]); + xq = MPFR_GET_PREC (x[i]); + + vp = MPFR_MANT (x[i]); + vs = MPFR_PREC2LIMBS (xq); + vd = xe - vs * GMP_NUMB_BITS - minexp; + /* vd is the exponent of the least significant represented bit of + x[i] (including the trailing bits, whose value is 0) minus the + exponent of the least significant bit of the accumulator. To + make the code simpler, we won't try to filter out the trailing + bits of x[i]. */ + + if (vd < 0) + { + /* This covers the following cases: + * [-+- accumulator ---] + * [---|----- x[i] ------|--] + * | [----- x[i] --|--] + * | |[----- x[i] -----] + * | | [----- x[i] -----] + * maxexp minexp + */ + + if (xe <= minexp) + { + /* x[i] is entirely after the LSB of the accumulator, + so that it will be ignored at this iteration. */ + if (xe > maxexp2) + maxexp2 = xe; + continue; + } + + /* If some significant bits of x[i] are after the LSB of the + accumulator, then maxexp2 will necessarily be minexp. */ + if (MPFR_LIKELY (xe - xq < minexp)) + maxexp2 = minexp; + + /* We need to ignore the least |vd| significant bits of x[i]. + First, let's ignore the least vds = |vd| / GMP_NUMB_BITS + limbs. */ + vd = - vd; + vds = vd / GMP_NUMB_BITS; + vs -= vds; + MPFR_ASSERTD (vs > 0); /* see xe <= minexp test above */ + vp += vds; + vd -= vds * GMP_NUMB_BITS; + MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS); + + if (xe > maxexp) + { + vs -= (xe - maxexp) / GMP_NUMB_BITS; + MPFR_ASSERTD (vs > 0); + tr = (xe - maxexp) % GMP_NUMB_BITS; + } + else + tr = 0; + + if (vd != 0) + { + MPFR_ASSERTD (vs <= ts); + mpn_rshift (tp, vp, vs, vd); + vp = tp; + tr += vd; + if (tr >= GMP_NUMB_BITS) + { + vs--; + tr -= GMP_NUMB_BITS; + } + MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS); + if (tr != 0) + { + tp[vs-1] &= MPFR_LIMB_MASK (GMP_NUMB_BITS - tr); + tr = 0; + } + /* Truncation has now been taken into account. */ + MPFR_ASSERTD (tr == 0); + } + + dp = wp; + ds = ws; + } + else /* vd >= 0 */ + { + /* This covers the following cases: + * [-+- accumulator ---] + * [- x[i] -] | + * [---|-- x[i] ------] | + * [------|-- x[i] ---------] + * | [- x[i] -] | + * maxexp minexp + */ + + /* We need to ignore the least vd significant bits + of the accumulator. First, let's ignore the least + vds = vd / GMP_NUMB_BITS limbs. -> (dp,ds) */ + vds = vd / GMP_NUMB_BITS; + ds = ws - vds; + if (ds <= 0) + continue; + dp = wp + vds; + vd -= vds * GMP_NUMB_BITS; + MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS); + + /* The low part of x[i] (to be determined) will have to be + shifted vd bits to the left if vd != 0. */ + + if (xe > maxexp) + { + vs -= (xe - maxexp) / GMP_NUMB_BITS; + if (vs <= 0) + continue; + tr = (xe - maxexp) % GMP_NUMB_BITS; + } + else + tr = 0; + + MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS && vs > 0); + + /* We need to consider the least significant vs limbs of x[i] + except the most significant tr bits. */ + + if (vd != 0) + { + mp_limb_t carry; + + MPFR_ASSERTD (vs <= ts); + carry = mpn_lshift (tp, vp, vs, vd); + tr -= vd; + if (tr < 0) + { + tr += GMP_NUMB_BITS; + MPFR_ASSERTD (vs + 1 <= ts); + tp[vs++] = carry; + } + MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS); + vp = tp; + } + } + + MPFR_ASSERTD (vs > 0 && vs <= ds); + + /* We can't truncate the most significant limb of the input + (in case it hasn't been shifted to the temporary area). + So, let's ignore it now. It will be taken into account + via carry propagation after the addition. */ + if (tr != 0) + vs--; + + if (MPFR_IS_POS (x[i])) + { + mp_limb_t carry; + + carry = vs > 0 ? mpn_add_n (dp, dp, vp, vs) : 0; + MPFR_ASSERTD (carry <= 1); + if (tr != 0) + carry += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr); + mpn_add_1 (dp + vs, dp + vs, ds - vs, carry); + } + else + { + mp_limb_t borrow; + + borrow = vs > 0 ? mpn_sub_n (dp, dp, vp, vs) : 0; + MPFR_ASSERTD (borrow <= 1); + if (tr != 0) + borrow += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr); + mpn_sub_1 (dp + vs, dp + vs, ds - vs, borrow); + } + } + + { + mpfr_prec_t cancel; /* number of cancelled bits */ + mp_size_t wi; /* index in the accumulator */ + mp_limb_t a, b; + int cnt; + + cancel = 0; + wi = ws - 1; + MPFR_ASSERTD (wi >= 0); + a = wp[wi] >> (GMP_NUMB_BITS - 1) ? MPFR_LIMB_MAX : MPFR_LIMB_ZERO; + + while (wi >= 0) + if ((b = wp[wi]) == a) { - if (sign_inf == 0) /* No previous INF */ - sign_inf = MPFR_SIGN (tab[i]); - else if (sign_inf != MPFR_SIGN (tab[i])) - return 2; /* Return NAN */ + cancel += GMP_NUMB_BITS; + wi--; + } + else + { + b ^= a; + MPFR_ASSERTD (b != 0); + count_leading_zeros (cnt, b); + cancel += cnt; + break; } - } - else - { - MPFR_ASSERTD (MPFR_IS_PURE_FP (tab[i])); - if (MPFR_GET_EXP (tab[i]) < min) - min = MPFR_GET_EXP(tab[i]); - if (MPFR_GET_EXP (tab[i]) > max) - max = MPFR_GET_EXP(tab[i]); - } - if (MPFR_PREC (tab[i]) > *maxprec) - *maxprec = MPFR_PREC (tab[i]); - } - if (MPFR_UNLIKELY (sign_inf != 0)) - return sign_inf; - exp_num = max - min + 1; - /* FIXME : better test */ - if (exp_num > n * MPFR_INT_CEIL_LOG2 (n)) - heap_sort (tab, n, perm); - else - count_sort (tab, n, perm, min, exp_num); - return 0; + if (wi >= 0 || a != MPFR_LIMB_ZERO) /* accumulator != 0 */ + { + mpfr_exp_t e; /* exponent of the computed result */ + mpfr_exp_t err; /* exponent of the error bound */ + + MPFR_LOG_MSG (("accumulator %s 0, cancel=%Pd\n", + a != MPFR_LIMB_ZERO ? "<" : ">", cancel)); + + MPFR_ASSERTD (cancel > 0); + e = minexp + wq - cancel; + MPFR_ASSERTD (e >= minexp); + err = maxexp2 + logn; /* OK even if maxexp2 == MPFR_EXP_MIN */ + + /* The absolute value of the truncated sum is in the binade + [2^(e-1),2^e] (closed on both ends due to two's complement). + The error is strictly less than 2^err (and is 0 if + maxexp2 == MPFR_EXP_MIN). */ + + /* This basically tests whether err <= e - prec without + potential integer overflow (since prec >= 0)... */ + if (err <= e && SAFE_DIFF (mpfr_uexp_t, e, err) >= prec) + { + MPFR_LOG_MSG (("(err=%" MPFR_EXP_FSPEC "d) <= (e=%" + MPFR_EXP_FSPEC "d) - (prec=%Pd)\n", + (mpfr_eexp_t) err, (mpfr_eexp_t) e, prec)); + if (ep != NULL) + *ep = e; + *minexpp = minexp; + *maxexpp = maxexp2; + MPFR_LOG_MSG (("return with minexp=%" MPFR_EXP_FSPEC + "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n", + (mpfr_eexp_t) minexp, (mpfr_eexp_t) maxexp2, + maxexp2 == MPFR_EXP_MIN ? + " (MPFR_EXP_MIN)" : "")); + return cancel; + } + else + { + mpfr_exp_t diffexp; + mpfr_prec_t shiftq; + mpfr_size_t shifts; + int shiftc; + + MPFR_LOG_MSG (("e=%" MPFR_EXP_FSPEC "d err=%" MPFR_EXP_FSPEC + "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n", + (mpfr_eexp_t) e, (mpfr_eexp_t) err, + (mpfr_eexp_t) maxexp2, + maxexp2 == MPFR_EXP_MIN ? + " (MPFR_EXP_MIN)" : "")); + + diffexp = err - e; + if (diffexp < 0) + diffexp = 0; + /* diffexp = max(0, err - e) */ + + MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d\n", + (mpfr_eexp_t) diffexp)); + + MPFR_ASSERTD (diffexp < cancel - 2); + shiftq = cancel - 2 - (mpfr_prec_t) diffexp; + MPFR_ASSERTD (shiftq > 0); + shifts = shiftq / GMP_NUMB_BITS; + shiftc = shiftq % GMP_NUMB_BITS; + MPFR_LOG_MSG (("shiftq = %Pd = %Pd * GMP_NUMB_BITS + %d\n", + shiftq, (mpfr_prec_t) shifts, shiftc)); + if (MPFR_LIKELY (shiftc != 0)) + mpn_lshift (wp + shifts, wp, ws - shifts, shiftc); + else + MPN_COPY_DECR (wp + shifts, wp, ws - shifts); + MPN_ZERO (wp, shifts); + minexp -= shiftq; + } + } + else if (maxexp2 == MPFR_EXP_MIN) + { + MPFR_LOG_MSG (("accumulator = 0, maxexp2 = MPFR_EXP_MIN\n", 0)); + return 0; + } + else + { + MPFR_LOG_MSG (("accumulator = 0, reiterate\n", 0)); + UPDATE_MINEXP (maxexp2, wq - (logn + 1)); + /* Note: the logn + 1 corresponds to cq in the main code. */ + } + } + + maxexp = maxexp2; + } } -#define GET_EXP1(x) (MPFR_IS_ZERO (x) ? min : MPFR_GET_EXP (x)) -/* Performs a count sort of the entries */ -static void -count_sort (mpfr_srcptr *const tab, unsigned long n, - mpfr_srcptr *perm, mpfr_exp_t min, mpfr_uexp_t exp_num) +/**********************************************************************/ + +/* Generic case: all the inputs are finite numbers, + with at least 3 regular numbers. */ +static int +sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, + mpfr_exp_t maxexp, unsigned long rn) { - unsigned long *account; - unsigned long target_rank, i; - MPFR_TMP_DECL(marker); + mp_limb_t *sump; + mp_limb_t *tp; /* pointer to a temporary area */ + mp_limb_t *wp; /* pointer to the accumulator */ + mp_size_t ts; /* size of the temporary area, in limbs */ + mp_size_t ws; /* size of the accumulator, in limbs */ + mpfr_prec_t wq; /* size of the accumulator, in bits */ + int logn; /* ceil(log2(rn)) */ + int cq; + mpfr_prec_t sq; + int inex; + MPFR_TMP_DECL (marker); + + MPFR_LOG_FUNC + (("n=%lu rnd=%d maxexp=%" MPFR_EXP_FSPEC "d rn=%lu", + n, rnd, (mpfr_eexp_t) maxexp, rn), + ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum)); + + MPFR_ASSERTD (rn >= 3 && rn <= n); + + /* In practice, no integer overflow on the exponent. */ + MPFR_STAT_STATIC_ASSERT (MPFR_EXP_MAX - MPFR_EMAX_MAX >= + sizeof (unsigned long) * CHAR_BIT); + + /* Set up some variables and the accumulator. */ - /* Reserve a place for potential 0 (with EXP min-1) - If there is no zero, we only lose one unused entry */ - min--; - exp_num++; + sump = MPFR_MANT (sum); + + /* rn is the number of regular inputs (the singular ones will be + ignored). Compute logn = ceil(log2(rn)). */ + logn = MPFR_INT_CEIL_LOG2 (rn); + MPFR_ASSERTD (logn >= 2); + + MPFR_LOG_MSG (("logn=%d maxexp=%" MPFR_EXP_FSPEC "d\n", + logn, (mpfr_eexp_t) maxexp)); + + sq = MPFR_GET_PREC (sum); + cq = logn + 1; + + /* First determine the size of the accumulator. */ + ws = MPFR_PREC2LIMBS (cq + sq + logn + 2); + wq = (mpfr_prec_t) ws * GMP_NUMB_BITS; + MPFR_ASSERTD (wq - cq - sq >= 4); + + MPFR_LOG_MSG (("cq=%d sq=%Pd logn=%d wq=%Pd\n", cq, sq, logn, wq)); + + /* An input block will have up to wq - cq bits, and its shifted value + (to be correctly aligned) may take GMP_NUMB_BITS - 1 additional bits. */ + ts = MPFR_PREC2LIMBS (wq - cq + GMP_NUMB_BITS - 1); - /* Performs a counting sort of the entries */ MPFR_TMP_MARK (marker); - account = (unsigned long *) MPFR_TMP_ALLOC (exp_num * sizeof *account); - for (i = 0; i < exp_num; i++) - account[i] = 0; - for (i = 0; i < n; i++) - account[GET_EXP1 (tab[i]) - min]++; - for (i = exp_num - 1; i >= 1; i--) - account[i - 1] += account[i]; - for (i = 0; i < n; i++) - { - target_rank = --account[GET_EXP1 (tab[i]) - min]; - perm[target_rank] = tab[i]; - } + + tp = MPFR_TMP_LIMBS_ALLOC (ts + ws); + wp = tp + ts; + + MPN_ZERO (wp, ws); /* zero the accumulator */ + + { + mpfr_exp_t minexp; /* exponent of the LSB of the block */ + mpfr_prec_t cancel; /* number of cancelled bits */ + mpfr_exp_t e; /* temporary exponent of the result */ + mpfr_exp_t u; /* temporary exponent of the ulp (quantum) */ + mp_limb_t rbit; /* rounding bit (corrected in halfway case) */ + int corr; /* correction term (from -1 to 2) */ + int sd, sh; /* shift counts */ + mp_size_t sn; /* size of the output number */ + int tmd; /* 0: the TMD does not occur + 1: the TMD occurs on a machine number + 2: the TMD occurs on a midpoint */ + int pos; /* 0 if negative sum, 1 if positive */ + + MPFR_LOG_MSG (("Compute an approximation with sum_raw...\n", 0)); + + UPDATE_MINEXP (maxexp, wq - cq); + cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts, + logn, sq + 3, &e, &minexp, &maxexp); + + if (MPFR_UNLIKELY (cancel == 0)) + { + /* The exact sum is zero. Since not all inputs are 0, the sum + * is +0 except in MPFR_RNDD, as specified according to the + * IEEE 754 rules for the addition of two numbers. + */ + MPFR_SET_SIGN (sum, (rnd != MPFR_RNDD ? + MPFR_SIGN_POS : MPFR_SIGN_NEG)); + MPFR_SET_ZERO (sum); + MPFR_TMP_FREE (marker); + MPFR_RET (0); + } + + /* The absolute value of the truncated sum is in the binade + [2^(e-1),2^e] (closed on both ends due to two's complement). + The error is strictly less than 2^(maxexp + logn) (and is 0 + if maxexp == MPFR_EXP_MIN). */ + + u = e - sq; /* e being the exponent, u is the ulp of the target */ + + MPFR_LOG_MSG (("cancel=%Pd" + " e=%" MPFR_EXP_FSPEC "d" + " u=%" MPFR_EXP_FSPEC "d" + " maxexp=%" MPFR_EXP_FSPEC "d%s\n", + cancel, (mpfr_eexp_t) e, (mpfr_eexp_t) u, + (mpfr_eexp_t) maxexp, + maxexp == MPFR_EXP_MIN ? " (MPFR_EXP_MIN)" : "")); + + /* Let's copy/shift the bits [max(u,minexp),e) to the + most significant part of the destination, and zero + the least significant part (there can be one only if + u < minexp). The trailing bits of the destination may + contain garbage at this point. Then, at the same time, + take the absolute value and do an initial rounding, + zeroing the trailing bits at this point. + TODO: This may be improved by merging some operations + is particular case. The average speed-up may not be + significant, though. To be tested... */ + + sn = MPFR_PREC2LIMBS (sq); + sd = (mpfr_prec_t) sn * GMP_NUMB_BITS - sq; + sh = cancel % GMP_NUMB_BITS; + + MPFR_ASSERTD (sd >= 0 && sd < GMP_NUMB_BITS); + + if (MPFR_LIKELY (u > minexp)) + { + mpfr_prec_t tq; + mp_size_t ei, fi, wi; + int td; + + tq = u - minexp; + MPFR_ASSERTD (tq > 0); /* number of trailing bits */ + MPFR_LOG_MSG (("tq=%Pd\n", tq)); + + wi = tq / GMP_NUMB_BITS; + + if (MPFR_LIKELY (sh != 0)) + { + ei = (e - minexp) / GMP_NUMB_BITS; + fi = ei - (sn - 1); + MPFR_ASSERTD (fi == wi || fi == wi + 1); + mpn_lshift (sump, wp + fi, sn, sh); + if (fi != wi) + sump[0] |= wp[wi] >> (GMP_NUMB_BITS - sh); + } + else + { + MPFR_ASSERTD ((mpfr_prec_t) (ws - (wi + sn)) * GMP_NUMB_BITS + == cancel); + MPN_COPY (sump, wp + wi, sn); + } + + /* Determine the rounding bit, which is represented. */ + td = tq % GMP_NUMB_BITS; + rbit = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) : + (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1)); + MPFR_ASSERTD (rbit == 0 || rbit == 1); + MPFR_LOG_MSG (("rbit=%d\n", (int) rbit)); + + if (maxexp == MPFR_EXP_MIN) + { + /* The sum in the accumulator is exact. Determine inex: + inex = 0 if the final sum is exact, else 1, i.e. + inex = rounding bit || sticky bit. In round to nearest, + also determine the rounding direction: obtained from + the rounding bit possibly except in halfway cases. */ + if (MPFR_LIKELY (rbit == 0 || + (rnd == MPFR_RNDN && ((wp[wi] >> td) & 1) == 0))) + { + /* We need to determine the sticky bit, either to set inex + (if the rounding bit is 0) or to possibly "correct" rbit + (round to nearest, halfway case rounded downward) from + which the rounding direction will be determined. */ + MPFR_LOG_MSG (("Determine the sticky bit...\n", 0)); + + inex = td >= 2 ? (wp[wi] & MPFR_LIMB_MASK (td - 1)) != 0 + : td == 0 ? + (MPFR_ASSERTD (wi >= 1), + (wp[--wi] & MPFR_LIMB_MASK (GMP_NUMB_BITS - 1)) != 0) + : 0; + + if (!inex) + { + mp_size_t wj = wi; + + while (!inex && wj > 0) + inex = wp[--wj] != 0; + if (!inex && rbit != 0) + { + /* sticky bit = 0, rounding bit = 1, + i.e. halfway case, which will be + rounded downward (see earlier if). */ + MPFR_ASSERTD (rnd == MPFR_RNDN); + inex = 1; + rbit = 0; /* even rounding downward */ + MPFR_LOG_MSG (("Halfway case rounded downward;" + " set inex=1 rbit=0\n", 0)); + } + } + } + else + inex = 1; + tmd = 0; /* We can round correctly -> no TMD. */ + } + else /* maxexp > MPFR_EXP_MIN */ + { + mpfr_exp_t d; + mp_limb_t limb, mask; + int nbits; + + inex = 1; /* We do not know whether the sum is exact. */ + + MPFR_ASSERTD (u <= MPFR_EMAX_MAX); + d = u - (maxexp + logn); /* representable */ + MPFR_ASSERTD (d >= 3); + + /* Let's see whether the TMD occurs by looking at the d bits + following the ulp bit, or the d-1 bits after the rounding + bit. */ + + /* First chunk after the rounding bit... It starts at: + (wi,td-2) if td >= 2, + (wi-1,td-2+GMP_NUMB_BITS) if td < 2. */ + if (td == 0) + { + MPFR_ASSERTD (wi >= 1); + limb = wp[--wi]; + mask = MPFR_LIMB_MASK (GMP_NUMB_BITS - 1); + nbits = GMP_NUMB_BITS; + } + else if (td == 1) + { + limb = wi >= 1 ? wp[--wi] : MPFR_LIMB_ZERO; + mask = MPFR_LIMB_MAX; + nbits = GMP_NUMB_BITS + 1; + } + else /* td >= 2 */ + { + MPFR_ASSERTD (td >= 2); + limb = wp[wi]; + mask = MPFR_LIMB_MASK (td - 1); + nbits = td; + } + + /* nbits: number of bits of the first chunk + 1 + (the +1 is for the rounding bit). */ + + if (nbits > d) + { + /* Some low significant bits must be ignored. */ + limb >>= nbits - d; + mask >>= nbits - d; + d = 0; + } + else + { + d -= nbits; + MPFR_ASSERTD (d >= 0); + } + + limb &= mask; + tmd = + limb == MPFR_LIMB_ZERO ? + (rbit == 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) : + limb == mask ? + (limb = MPFR_LIMB_MAX, + rbit != 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) : 0; + + while (tmd != 0 && d != 0) + { + mp_limb_t limb2; + + MPFR_ASSERTD (d > 0); + if (wi == 0) + { + /* The non-represented bits are 0's. */ + if (limb != MPFR_LIMB_ZERO) + tmd = 0; + break; + } + MPFR_ASSERTD (wi > 0); + limb2 = wp[--wi]; + if (d < GMP_NUMB_BITS) + { + int c = GMP_NUMB_BITS - d; + MPFR_ASSERTD (c > 0 && c < GMP_NUMB_BITS); + if ((limb2 >> c) != (limb >> c)) + tmd = 0; + break; + } + if (limb2 != limb) + tmd = 0; + d -= GMP_NUMB_BITS; + } + } + } + else /* u <= minexp */ + { + mp_size_t en; + + en = (e - minexp + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS; + if (MPFR_LIKELY (sh != 0)) + mpn_lshift (sump + sn - en, wp, en, sh); + else if (MPFR_UNLIKELY (en > 0)) + MPN_COPY (sump + sn - en, wp, en); + if (sn > en) + MPN_ZERO (sump, sn - en); + + /* The exact value of the accumulator has been copied. + * The TMD occurs if and only if there are bits still + * not taken into account, and if it occurs, this is + * necessarily on a machine number (-> tmd = 1). + */ + rbit = 0; + inex = tmd = maxexp != MPFR_EXP_MIN; + } + + /* Leading bit: 1 if positive, 0 if negative. */ + pos = sump[sn-1] >> (GMP_NUMB_BITS - 1); + + MPFR_ASSERTD (rbit == 0 || rbit == 1); + + MPFR_LOG_MSG (("tmd=%d rbit=%d inex=%d pos=%d\n", + tmd, (int) rbit, inex, pos)); + + /* Here, if the final sum is known to be exact, inex = 0, otherwise + * inex = 1. We have a truncated significand, a trailing term t such + * that 0 <= t < 1 ulp, and an error on the trailing term bounded by + * t' in absolute value. Thus the error e on the truncated significand + * satisfies -t' <= e < 1 ulp + t'. Thus one has 4 correction cases + * denoted by a corr value between -1 and 2 depending on e, pos, rbit, + * and the rounding mode: + * -1: equivalent to nextbelow; + * 0: the truncated significand is not corrected; + * 1: add 1 ulp; + * 2: add 1 ulp, then nextabove. + * The nextbelow and nextabove are used here since there may be a + * change of the binade. + */ + + if (tmd == 0) /* no TMD */ + { + switch (rnd) + { + case MPFR_RNDD: + corr = 0; + break; + case MPFR_RNDU: + corr = inex; + break; + case MPFR_RNDZ: + corr = inex && !pos; + break; + case MPFR_RNDA: + corr = inex && pos; + break; + default: + MPFR_ASSERTN (rnd == MPFR_RNDN); + /* Note: for halfway cases (maxexp == MPFR_EXP_MIN) that are + rounded downward, rbit has been changed to 0 so that corr + is set correctly. */ + corr = rbit; + } + MPFR_ASSERTD (corr == 0 || corr == 1); + if (inex && corr == 0) /* two's complement significand decreased */ + inex = -1; + } + else + { + mpfr_exp_t err; /* exponent of the error bound */ + mp_size_t zs; + int sst; /* sign of the secondary term */ + + MPFR_ASSERTD (maxexp > MPFR_EXP_MIN); + MPFR_ASSERTD (tmd == 1 || tmd == 2); + + /* New accumulator size */ + ws = MPFR_PREC2LIMBS (wq - sq); + wq = (mpfr_prec_t) ws * GMP_NUMB_BITS; + + err = maxexp + logn; + + MPFR_LOG_MSG (("TMD with" + " maxexp=%" MPFR_EXP_FSPEC "d" + " err=%" MPFR_EXP_FSPEC "d" + " ws=%Pd" + " wq=%Pd\n", + (mpfr_eexp_t) maxexp, (mpfr_eexp_t) err, + (mpfr_prec_t) ws, wq)); + + /* The d-1 bits from u-2 to u-d (= err) are identical. */ + + if (err >= minexp) + { + mpfr_prec_t tq; + mp_size_t wi; + int td; + + /* Let's keep the last 2 over the d-1 identical bits and the + following bits, i.e. the bits from err+1 to minexp. */ + tq = err - minexp + 2; /* tq = number of such bits */ + MPFR_LOG_MSG (("[TMD] tq=%Pd\n", tq)); + MPFR_ASSERTD (tq >= 2); + + wi = tq / GMP_NUMB_BITS; + td = tq % GMP_NUMB_BITS; + + if (td != 0) + { + wi++; /* number of words with represented bits */ + td = GMP_NUMB_BITS - td; + zs = ws - wi; + MPFR_ASSERTD (zs >= 0 && zs < ws); + mpn_lshift (wp + zs, wp, wi, td); + } + else + { + MPFR_ASSERTD (wi > 0); + zs = ws - wi; + MPFR_ASSERTD (zs >= 0 && zs < ws); + if (zs > 0) + MPN_COPY_INCR (wp + zs, wp, wi); + } + + UPDATE_MINEXP (minexp, zs * GMP_NUMB_BITS + td); + MPFR_ASSERTD (minexp == err + 2 - wq); + } + else /* err < minexp */ + { + /* At least one of the identical bits is not represented, + meaning that it is 0 and all these bits are 0's. Thus + the accumulator will be 0. The new minexp is determined + from maxexp, with cq bits reserved to avoid an overflow + (as in the early steps). */ + MPFR_LOG_MSG (("[TMD] err < minexp\n", 0)); + zs = ws; + + /* minexp = maxexp + cq - wq */ + UPDATE_MINEXP (maxexp, wq - cq); + MPFR_ASSERTD (minexp == err + 1 - wq); + } + + MPN_ZERO (wp, zs); + + cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts, + logn, 0, NULL, &minexp, &maxexp); + + if (cancel != 0) + sst = MPFR_LIMB_MSB (wp[ws-1]) == 0 ? 1 : -1; + else if (tmd == 1) + sst = 0; + else + { + /* For halfway cases, let's virtually eliminate them + by setting a sst equivalent to a non-halfway case, + which depends on the last bit of the pre-rounded + result. */ + MPFR_ASSERTD (rnd == MPFR_RNDN && tmd == 2); + sst = (sump[0] & (MPFR_LIMB_ONE << sd)) ? 1 : -1; + } + + MPFR_LOG_MSG (("[TMD] tmd=%d rbit=%d sst=%d\n", + tmd, (int) rbit, sst)); + + /* Do not consider the corrected sst for MPFR_COV_SET */ + MPFR_COV_SET (sum_tmd[(int) rnd][tmd-1][rbit] + [cancel == 0 ? 1 : sst+1][pos]); + + inex = + MPFR_IS_LIKE_RNDD (rnd, pos ? 1 : -1) ? (sst ? -1 : 0) : + MPFR_IS_LIKE_RNDU (rnd, pos ? 1 : -1) ? (sst ? 1 : 0) : + (MPFR_ASSERTD (rnd == MPFR_RNDN), + tmd == 1 ? - sst : sst); + + if (tmd == 2 && sst == (rbit ? -1 : 1)) + corr = 1 - (int) rbit; + else if (MPFR_IS_LIKE_RNDD (rnd, pos ? 1 : -1) && sst == -1) + corr = (int) rbit - 1; + else if (MPFR_IS_LIKE_RNDU (rnd, pos ? 1 : -1) && sst == +1) + corr = (int) rbit + 1; + else + corr = (int) rbit; + } + + MPFR_LOG_MSG (("pos=%d corr=%d inex=%d\n", pos, corr, inex)); + + /* Sign handling (-> absolute value and sign), together with + rounding. The most common cases are corr = 0 and corr = 1 + as this is necessarily the case when the TMD did not occur. */ + + MPFR_ASSERTD (corr >= -1 && corr <= 2); + + if (pos) /* positive result */ + { + MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0); + MPFR_SET_POS (sum); + sump[0] &= ~ MPFR_LIMB_MASK (sd); + + if (corr > 0) + { + mp_limb_t corr2, carry_out; + + corr2 = (mp_limb_t) corr << sd; + /* If corr == 2 && sd == GMP_NUMB_BITS - 1, this overflows + to corr2 = 0. This case is taken into account below. */ + + carry_out = corr2 != 0 ? mpn_add_1 (sump, sump, sn, corr2) : + (MPFR_ASSERTD (sn > 1), + mpn_add_1 (sump + 1, sump + 1, sn - 1, MPFR_LIMB_ONE)); + + MPFR_ASSERTD (sump[sn-1] >> (GMP_NUMB_BITS - 1) == !carry_out); + + if (MPFR_UNLIKELY (carry_out)) + { + /* Note: The | is important when sump[sn-1] is not 0 + (this can occur with sn = 1 and corr = 2). TODO: + Add something to make sure that this is tested. */ + sump[sn-1] |= MPFR_LIMB_HIGHBIT; + e++; + } + } + + if (corr < 0) + { + mpn_sub_1 (sump, sump, sn, MPFR_LIMB_ONE << sd); + + if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0)) + { + sump[sn-1] |= MPFR_LIMB_HIGHBIT; + e--; + } + } + } + else /* negative result */ + { + MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) == 0); + MPFR_SET_NEG (sum); + + /* abs(x + corr) = - (x + corr) = com(x) + (1 - corr) */ + + /* We want to avoid separate mpn_com (or mpn_neg) and mpn_add_1 + (or mpn_sub_1) operations, as they could yield two loops in + some particular cases involving a long sequence of 0's in + the low significant bits (except the least significant bit, + which doesn't matter). */ + + if (corr <= 1) + { + mp_limb_t corr2; + + /* Here we can just do the correction operation on the + least significant limb, then do either a mpn_com or + a mpn_neg on the remaining limbs, depending on the + carry (BTW, mpn_neg is just a mpn_com with an initial + carry propagation: after some point, mpn_neg does a + complement). */ + + corr2 = (mp_limb_t) (1 - corr) << sd; + /* Note: If corr = -1, this can overflow to corr2 = 0. + This case is taken into account below. */ + + sump[0] = (~ (sump[0] | MPFR_LIMB_MASK (sd))) + corr2; + + if (sump[0] < corr2 || (corr2 == 0 && corr < 0)) + { + if (sn == 1 || ! mpn_neg (sump + 1, sump + 1, sn - 1)) + { + /* Note: The | is important when sump[sn-1] is not 0 + (this can occur with sn = 1 and corr = -1). TODO: + Add something to make sure that this is tested. */ + sump[sn-1] |= MPFR_LIMB_HIGHBIT; + e++; + } + } + else if (sn > 1) + mpn_com (sump + 1, sump + 1, sn - 1); + } + else /* corr == 2 */ + { + mp_limb_t corr2, c; + mp_size_t i = 1; + + /* We want to compute com(x) - 1, but GMP doesn't have an + operation for that. The fact is that a sequence of low + significant bits 1 is invariant. Starting at the first + low significant bit 0, we can do the complement with + mpn_com. */ + + corr2 = MPFR_LIMB_ONE << sd; + c = ~ (sump[0] | MPFR_LIMB_MASK (sd)); + sump[0] = c - corr2; + + if (c == 0) + { + while (MPFR_ASSERTD (i < sn), sump[i] == MPFR_LIMB_MAX) + i++; + sump[i] = (~ sump[i]) - 1; + i++; + } + + if (i < sn) + mpn_com (sump + i, sump + i, sn - i); + else if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0)) + { + /* Happens on 01111...111, whose complement is + 10000...000, and com(x) - 1 is 01111...111. */ + sump[sn-1] |= MPFR_LIMB_HIGHBIT; + e--; + } + } + } + + MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0); + MPFR_LOG_MSG (("Set exponent e=%" MPFR_EXP_FSPEC "d\n", (mpfr_eexp_t) e)); + /* e may be outside the current exponent range, but this will be checked + with mpfr_check_range below. */ + MPFR_EXP (sum) = e; + } /* main block */ + MPFR_TMP_FREE (marker); + return mpfr_check_range (sum, inex, rnd); } +/**********************************************************************/ -#define GET_EXP2(x) (MPFR_IS_ZERO (x) ? MPFR_EMIN_MIN : MPFR_GET_EXP (x)) - -/* Performs a heap sort of the entries */ -static void -heap_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm) +int +mpfr_sum (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd) { - unsigned long dernier_traite; - unsigned long i, pere; - mpfr_srcptr tmp; - unsigned long fils_gauche, fils_droit, fils_indigne; - /* Reminder of a heap structure : - node(i) has for left son node(2i +1) and right son node(2i) - and father(node(i)) = node((i - 1) / 2) - */ - - /* initialize the permutation to identity */ - for (i = 0; i < n; i++) - perm[i] = tab[i]; - - /* insertion phase */ - for (dernier_traite = 1; dernier_traite < n; dernier_traite++) + MPFR_LOG_FUNC + (("n=%lu rnd=%d", n, rnd), + ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum)); + + if (MPFR_UNLIKELY (n <= 2)) { - i = dernier_traite; - while (i > 0) + if (n == 0) { - pere = (i - 1) / 2; - if (GET_EXP2 (perm[pere]) > GET_EXP2 (perm[i])) - { - tmp = perm[pere]; - perm[pere] = perm[i]; - perm[i] = tmp; - i = pere; - } - else - break; + MPFR_SET_ZERO (sum); + MPFR_SET_POS (sum); + MPFR_RET (0); } + else if (n == 1) + return mpfr_set (sum, x[0], rnd); + else + return mpfr_add (sum, x[0], x[1], rnd); } - - /* extraction phase */ - for (dernier_traite = n - 1; dernier_traite > 0; dernier_traite--) + else { - tmp = perm[0]; - perm[0] = perm[dernier_traite]; - perm[dernier_traite] = tmp; + mpfr_exp_t maxexp = MPFR_EXP_MIN; /* max(Empty) */ + unsigned long i; + unsigned long rn = 0; /* will be the number of regular inputs */ + /* sign of infinities and zeros (0: currently unknown) */ + int sign_inf = 0, sign_zero = 0; + + MPFR_LOG_MSG (("Check for special inputs (n = %lu >= 3)\n", n)); - i = 0; - while (1) + for (i = 0; i < n; i++) { - fils_gauche = 2 * i + 1; - fils_droit = fils_gauche + 1; - if (fils_gauche < dernier_traite) + if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x[i]))) { - if (fils_droit < dernier_traite) + if (MPFR_IS_NAN (x[i])) { - if (GET_EXP2(perm[fils_droit]) < GET_EXP2(perm[fils_gauche])) - fils_indigne = fils_droit; - else - fils_indigne = fils_gauche; - - if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_indigne])) - { - tmp = perm[i]; - perm[i] = perm[fils_indigne]; - perm[fils_indigne] = tmp; - i = fils_indigne; - } - else - break; + /* The current value x[i] is NaN. Then the sum is NaN. */ + nan: + MPFR_SET_NAN (sum); + MPFR_RET_NAN; } - else /* on a un fils gauche, pas de fils droit */ + else if (MPFR_IS_INF (x[i])) { - if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_gauche])) - { - tmp = perm[i]; - perm[i] = perm[fils_gauche]; - perm[fils_gauche] = tmp; - } - break; + /* The current value x[i] is an infinity. + There are two cases: + 1. This is the first infinity value (sign_inf == 0). + Then set sign_inf to its sign, and go on. + 2. All the infinities found until now have the same + sign sign_inf. If this new infinity has a different + sign, then return NaN immediately, else go on. */ + if (sign_inf == 0) + sign_inf = MPFR_SIGN (x[i]); + else if (MPFR_SIGN (x[i]) != sign_inf) + goto nan; + } + else if (MPFR_UNLIKELY (rn == 0)) + { + /* The current value x[i] is a zero. The code below matters + only when all values found until now are zeros, otherwise + it is harmless (the test rn == 0 above is just a minor + optimization). + Here we track the sign of the zero result when all inputs + are zeros: if all zeros have the same sign, the result + will have this sign, otherwise (i.e. if there is at least + a zero of each sign), the sign of the zero result depends + only on the rounding mode (note that this choice is + sticky when new zeros are considered). */ + MPFR_ASSERTD (MPFR_IS_ZERO (x[i])); + if (sign_zero == 0) + sign_zero = MPFR_SIGN (x[i]); + else if (MPFR_SIGN (x[i]) != sign_zero) + sign_zero = rnd == MPFR_RNDD ? -1 : 1; } } - else /* on n'a pas de fils */ - break; + else + { + /* The current value x[i] is a regular number. */ + mpfr_exp_t e = MPFR_GET_EXP (x[i]); + if (e > maxexp) + maxexp = e; /* maximum exponent found until now */ + rn++; /* current number of regular inputs */ + } } - } -} - -/* Sum a list of float with order given by permutation perm, - * intermediate size set to F. Return non-zero if at least one of - * the operations is inexact (thus 0 implies that the sum is exact). - * Internal use function. - */ -static int -sum_once (mpfr_ptr ret, mpfr_srcptr *const tab, unsigned long n, mpfr_prec_t F) -{ - mpfr_t sum; - unsigned long i; - int error_trap; + MPFR_LOG_MSG (("rn=%lu sign_inf=%d sign_zero=%d\n", + rn, sign_inf, sign_zero)); - MPFR_ASSERTD (n >= 2); + /* At this point the result cannot be NaN (this case has already + been filtered out). */ - mpfr_init2 (sum, F); - error_trap = mpfr_set (sum, tab[0], MPFR_RNDN); - for (i = 1; i < n - 1; i++) - { - MPFR_ASSERTD (!MPFR_IS_NAN (sum) && !MPFR_IS_INF (sum)); - if (mpfr_add (sum, sum, tab[i], MPFR_RNDN)) - error_trap = 1; - } - if (mpfr_add (ret, sum, tab[n - 1], MPFR_RNDN)) - error_trap = 1; - mpfr_clear (sum); - return error_trap; -} + if (MPFR_UNLIKELY (sign_inf != 0)) + { + /* At least one infinity, and all of them have the same sign + sign_inf. The sum is the infinity of this sign. */ + MPFR_SET_INF (sum); + MPFR_SET_SIGN (sum, sign_inf); + MPFR_RET (0); + } -/* Sum a list of floating-point numbers. - * If the return value is 0, then the sum is exact. - * Otherwise the return value gives no information. - */ -int -mpfr_sum (mpfr_ptr ret, mpfr_ptr *const tab_p, unsigned long n, mpfr_rnd_t rnd) -{ - mpfr_t cur_sum; - mpfr_prec_t prec; - mpfr_srcptr *perm, *const tab = (mpfr_srcptr *) tab_p; - int k, error_trap; - MPFR_ZIV_DECL (loop); - MPFR_SAVE_EXPO_DECL (expo); - MPFR_TMP_DECL (marker); + /* At this point, all the inputs are finite numbers. */ - if (MPFR_UNLIKELY (n <= 1)) - { - if (n < 1) + if (MPFR_UNLIKELY (rn == 0)) { - MPFR_SET_ZERO (ret); - MPFR_SET_POS (ret); - return 0; + /* All the numbers were zeros (and there is at least one). + The sum is zero with sign sign_zero. */ + MPFR_ASSERTD (sign_zero != 0); + MPFR_SET_ZERO (sum); + MPFR_SET_SIGN (sum, sign_zero); + MPFR_RET (0); } - else - return mpfr_set (ret, tab[0], rnd); - } - /* Sort and treat special cases */ - MPFR_TMP_MARK (marker); - perm = (mpfr_srcptr *) MPFR_TMP_ALLOC (n * sizeof *perm); - prec = MPFR_PREC (ret); - error_trap = mpfr_sum_sort (tab, n, perm, &prec); - /* Check if there was a NAN or a INF */ - if (MPFR_UNLIKELY (error_trap != 0)) - { - MPFR_TMP_FREE (marker); - if (error_trap == 2) + /* Optimize the case where there are only two regular numbers. */ + if (MPFR_UNLIKELY (rn <= 2)) { - MPFR_SET_NAN (ret); - MPFR_RET_NAN; - } - MPFR_SET_INF (ret); - MPFR_SET_SIGN (ret, error_trap); - MPFR_RET (0); - } + unsigned long h = ULONG_MAX; - /* Initial precision is max(prec(ret),prec(tab[0]),...,prec(tab[n-1])) */ - k = MPFR_INT_CEIL_LOG2 (n) + 1; - prec += k + 2; - mpfr_init2 (cur_sum, prec); + for (i = 0; i < n; i++) + if (! MPFR_IS_SINGULAR (x[i])) + { + if (rn == 1) + return mpfr_set (sum, x[i], rnd); + if (h != ULONG_MAX) + return mpfr_add (sum, x[h], x[i], rnd); + h = i; + } + MPFR_RET_NEVER_GO_HERE(); + } - /* Ziv Loop */ - MPFR_SAVE_EXPO_MARK (expo); - MPFR_ZIV_INIT (loop, prec); - for (;;) - { - error_trap = sum_once (cur_sum, perm, n, prec + k); - if (MPFR_LIKELY (error_trap == 0 || - (!MPFR_IS_ZERO (cur_sum) && - mpfr_can_round (cur_sum, prec - 2, - MPFR_RNDN, rnd, MPFR_PREC (ret))))) - break; - MPFR_ZIV_NEXT (loop, prec); - mpfr_set_prec (cur_sum, prec); + return sum_aux (sum, x, n, rnd, maxexp, rn); } - MPFR_ZIV_FREE (loop); - MPFR_TMP_FREE (marker); - - if (mpfr_set (ret, cur_sum, rnd)) - error_trap = 1; - mpfr_clear (cur_sum); - - MPFR_SAVE_EXPO_FREE (expo); - if (mpfr_check_range (ret, 0, rnd)) - error_trap = 1; - return error_trap; /* It doesn't return the ternary value */ } - -/* __END__ */ diff --git a/tests/tsum.c b/tests/tsum.c index ead15a0dc..2eee92755 100644 --- a/tests/tsum.c +++ b/tests/tsum.c @@ -20,219 +20,119 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ -/* TODO: add some generic random test with cancellations. Something like: - 1. Generate random numbers with random precisions. - 2. Compute the sum s at some random precision and some rounding mode. - 3. While s != 0: - 4. Include -s in the array. - 5. Reorder the terms randomly. - 6. Recompute a new sum s' at some random precision and some rounding mode. - 7. Check that |s'| < ulp(s), with a factor 1/2 for MPFR_RNDN. - 8. Reiterate at (3) with s = s'. - Also add tests with intermediate overflows and tests with underflows - (this matters here as we don't have subnormals). - Note: partly done in the cancel() test (r8857); the result is not tested - yet, but this test already shows an efficiency problem. -*/ - #include "mpfr-test.h" #ifndef MPFR_NCANCEL #define MPFR_NCANCEL 10 #endif -static int -check_is_sorted (unsigned long n, mpfr_srcptr *perm) -{ - unsigned long i; - - for (i = 0; i < n - 1; i++) - if (MPFR_GET_EXP(perm[i]) < MPFR_GET_EXP(perm[i+1])) - return 0; - return 1; -} - -static int -sum_tab (mpfr_ptr ret, mpfr_t *tab, unsigned long n, mpfr_rnd_t rnd) +static mpfr_prec_t +get_prec_max (mpfr_t *t, int n) { - mpfr_ptr *tabtmp; - unsigned long i; - int inexact; - MPFR_TMP_DECL(marker); + mpfr_exp_t e, min, max; + int i; - MPFR_TMP_MARK(marker); - tabtmp = (mpfr_ptr *) MPFR_TMP_ALLOC(n * sizeof(mpfr_srcptr)); + min = MPFR_EMAX_MAX; + max = MPFR_EMIN_MIN; for (i = 0; i < n; i++) - tabtmp[i] = tab[i]; + if (MPFR_IS_PURE_FP (t[i])) + { + e = MPFR_GET_EXP (t[i]); + if (e > max) + max = e; + e -= MPFR_GET_PREC (t[i]); + if (e < min) + min = e; + } - inexact = mpfr_sum (ret, tabtmp, n, rnd); - MPFR_TMP_FREE(marker); - return inexact; + return min > max ? MPFR_PREC_MIN /* no pure FP values */ + : max - min + __gmpfr_ceil_log2 (n); } - -static mpfr_prec_t -get_prec_max (mpfr_t *tab, unsigned long n, mpfr_prec_t f) -{ - mpfr_prec_t res; - mpfr_exp_t min, max; - unsigned long i; - - i = 0; - while (MPFR_IS_ZERO (tab[i])) - { - i++; - if (i == n) - return MPFR_PREC_MIN; /* all values are 0 */ - } - - if (! mpfr_check (tab[i])) - { - printf ("tab[%lu] is not valid.\n", i); - exit (1); - } - MPFR_ASSERTN (MPFR_IS_FP (tab[i])); - min = max = MPFR_GET_EXP(tab[i]); - for (i++; i < n; i++) - { - if (! mpfr_check (tab[i])) - { - printf ("tab[%lu] is not valid.\n", i); - exit (1); - } - MPFR_ASSERTN (MPFR_IS_FP (tab[i])); - if (! MPFR_IS_ZERO (tab[i])) - { - if (MPFR_GET_EXP(tab[i]) > max) - max = MPFR_GET_EXP(tab[i]); - if (MPFR_GET_EXP(tab[i]) < min) - min = MPFR_GET_EXP(tab[i]); - } - } - res = max - min; - res += f; - res += __gmpfr_ceil_log2 (n) + 1; - return res; -} - - static void -algo_exact (mpfr_t somme, mpfr_t *tab, unsigned long n, mpfr_prec_t f) +get_exact_sum (mpfr_t sum, mpfr_t *tab, int n) { - unsigned long i; - mpfr_prec_t prec_max; + int i; - prec_max = get_prec_max(tab, n, f); - mpfr_set_prec (somme, prec_max); - mpfr_set_ui (somme, 0, MPFR_RNDN); + mpfr_set_prec (sum, get_prec_max (tab, n)); + mpfr_set_ui (sum, 0, MPFR_RNDN); for (i = 0; i < n; i++) - { - if (mpfr_add(somme, somme, tab[i], MPFR_RNDN)) - { - printf ("FIXME: algo_exact is buggy.\n"); - exit (1); - } - } + if (mpfr_add (sum, sum, tab[i], MPFR_RNDN)) + { + printf ("FIXME: get_exact_sum is buggy.\n"); + exit (1); + } } -/* Test the sorting function */ static void -test_sort (mpfr_prec_t f, unsigned long n) +generic_tests (void) { - mpfr_t *tab; - mpfr_ptr *tabtmp; - mpfr_srcptr *perm; - unsigned long i; - mpfr_prec_t prec = MPFR_PREC_MIN; - - /* Init stuff */ - tab = (mpfr_t *) (*__gmp_allocate_func) (n * sizeof (mpfr_t)); - for (i = 0; i < n; i++) - mpfr_init2 (tab[i], f); - tabtmp = (mpfr_ptr *) (*__gmp_allocate_func) (n * sizeof(mpfr_ptr)); - perm = (mpfr_srcptr *) (*__gmp_allocate_func) (n * sizeof(mpfr_srcptr)); + mpfr_t exact_sum, sum1, sum2; + mpfr_t *t; + mpfr_ptr *p; + mpfr_prec_t precmax = 444; + int i, m, nmax = 500; + int rnd_mode; - for (i = 0; i < n; i++) + t = (mpfr_t *) (*__gmp_allocate_func) (nmax * sizeof(mpfr_t)); + p = (mpfr_ptr *) (*__gmp_allocate_func) (nmax * sizeof(mpfr_ptr)); + for (i = 0; i < nmax; i++) { - mpfr_urandomb (tab[i], RANDS); - tabtmp[i] = tab[i]; + mpfr_init2 (t[i], precmax); + p[i] = t[i]; } + mpfr_inits2 (precmax, exact_sum, sum1, sum2, (mpfr_ptr) 0); - mpfr_sum_sort ((mpfr_srcptr *)tabtmp, n, perm, &prec); - - if (check_is_sorted (n, perm) == 0) + for (m = 0; m < 4000; m++) { - printf ("mpfr_sum_sort incorrect.\n"); - for (i = 0; i < n; i++) - mpfr_dump (perm[i]); - exit (1); - } + int non_uniform, n; + mpfr_prec_t prec; - /* Clear stuff */ - for (i = 0; i < n; i++) - mpfr_clear (tab[i]); - (*__gmp_free_func) (tab, n * sizeof (mpfr_t)); - (*__gmp_free_func) (tabtmp, n * sizeof(mpfr_ptr)); - (*__gmp_free_func) (perm, n * sizeof(mpfr_srcptr)); -} - -static void -test_sum (mpfr_prec_t f, unsigned long n) -{ - mpfr_t sum, real_sum, real_non_rounded; - mpfr_t *tab; - unsigned long i; - int rnd_mode; - - /* Init */ - tab = (mpfr_t *) (*__gmp_allocate_func) (n * sizeof(mpfr_t)); - for (i = 0; i < n; i++) - mpfr_init2 (tab[i], f); - mpfr_inits2 (f, sum, real_sum, real_non_rounded, (mpfr_ptr) 0); + non_uniform = randlimb () % 10; + n = (randlimb () % nmax) + 1; + prec = MPFR_PREC_MIN + (randlimb () % (precmax - MPFR_PREC_MIN + 1)); + mpfr_set_prec (sum1, prec); + mpfr_set_prec (sum2, prec); - /* First Uniform */ - for (i = 0; i < n; i++) - mpfr_urandomb (tab[i], RANDS); - algo_exact (real_non_rounded, tab, n, f); - for (rnd_mode = 0; rnd_mode < MPFR_RND_MAX; rnd_mode++) - { - sum_tab (sum, tab, n, (mpfr_rnd_t) rnd_mode); - mpfr_set (real_sum, real_non_rounded, (mpfr_rnd_t) rnd_mode); - if (mpfr_cmp (real_sum, sum) != 0) + for (i = 0; i < n; i++) { - printf ("mpfr_sum incorrect.\n"); - mpfr_dump (real_sum); - mpfr_dump (sum); - exit (1); + mpfr_set_prec (t[i], MPFR_PREC_MIN + + (randlimb () % (precmax - MPFR_PREC_MIN + 1))); + mpfr_urandomb (t[i], RANDS); + if (m % 8 != 0 && (m % 8 == 1 || (randlimb () & 1))) + mpfr_neg (t[i], t[i], MPFR_RNDN); + if (non_uniform && MPFR_NOTZERO (t[i])) + mpfr_set_exp (t[i], randlimb () % 1000); + /* putchar ("-0+"[SIGN (mpfr_sgn (t[i])) + 1]); */ } - } - - /* Then non uniform */ - for (i = 0; i < n; i++) - { - mpfr_urandomb (tab[i], RANDS); - if (! mpfr_zero_p (tab[i])) - mpfr_set_exp (tab[i], randlimb () % 1000); - } - algo_exact (real_non_rounded, tab, n, f); - for (rnd_mode = 0; rnd_mode < MPFR_RND_MAX; rnd_mode++) - { - sum_tab (sum, tab, n, (mpfr_rnd_t) rnd_mode); - mpfr_set (real_sum, real_non_rounded, (mpfr_rnd_t) rnd_mode); - if (mpfr_cmp (real_sum, sum) != 0) + /* putchar ('\n'); */ + get_exact_sum (exact_sum, t, n); + RND_LOOP (rnd_mode) { - printf ("mpfr_sum incorrect.\n"); - mpfr_dump (real_sum); - mpfr_dump (sum); - exit (1); + int inex1, inex2; + + inex1 = mpfr_set (sum1, exact_sum, (mpfr_rnd_t) rnd_mode); + inex2 = mpfr_sum (sum2, p, n, (mpfr_rnd_t) rnd_mode); + if (!(mpfr_equal_p (sum1, sum2) && SAME_SIGN (inex1, inex2))) + { + printf ("generic_tests failed on m = %d, %s\n", m, + mpfr_print_rnd_mode ((mpfr_rnd_t) rnd_mode)); + printf ("Expected "); + mpfr_dump (sum1); + printf ("with inex = %d\n", inex1); + printf ("Got "); + mpfr_dump (sum2); + printf ("with inex = %d\n", inex2); + exit (1); + } } } - /* Clear stuff */ - for (i = 0; i < n; i++) - mpfr_clear (tab[i]); - mpfr_clears (sum, real_sum, real_non_rounded, (mpfr_ptr) 0); - (*__gmp_free_func) (tab, n * sizeof(mpfr_t)); + for (i = 0; i < nmax; i++) + mpfr_clear (t[i]); + mpfr_clears (exact_sum, sum1, sum2, (mpfr_ptr) 0); + (*__gmp_free_func) (t, nmax * sizeof(mpfr_t)); + (*__gmp_free_func) (p, nmax * sizeof(mpfr_ptr)); } static @@ -318,12 +218,422 @@ void check_special (void) mpfr_clears (tab[0], tab[1], tab[2], r, (mpfr_ptr) 0); } +#define NC 7 +#define NS 6 + +static void +check_more_special (void) +{ + char *str[NC] = { "NaN", "+Inf", "-Inf", "+0", "-0", "+1", "-1" }; + int i, r, k[NS]; + mpfr_t c[NC], s[NS], sum; + mpfr_ptr p[NS]; + int inex; + + for (i = 0; i < NC; i++) + { + int ret; + mpfr_init2 (c[i], 8); + ret = mpfr_set_str (c[i], str[i], 0, MPFR_RNDN); + MPFR_ASSERTN (ret == 0); + } + for (i = 0; i < NS; i++) + mpfr_init2 (s[i], 8); + mpfr_init2 (sum, 8); + + RND_LOOP(r) + { + i = 0; + while (1) + { + while (i < NS) + { + p[i] = c[0]; + mpfr_set_nan (s[i]); + k[i++] = 0; + } + inex = mpfr_sum (sum, p, NS, (mpfr_rnd_t) r); + if (! ((MPFR_IS_NAN (sum) && MPFR_IS_NAN (s[NS-1])) || + (mpfr_equal_p (sum, s[NS-1]) && + MPFR_SIGN (sum) == MPFR_SIGN (s[NS-1]))) || inex != 0) + { + printf ("Error in check_more_special on %s", + mpfr_print_rnd_mode ((mpfr_rnd_t) r)); + for (i = 0; i < NS; i++) + printf (" %d", k[i]); + printf (" with\n"); + for (i = 0; i < NS; i++) + { + printf (" p[%d] = %s = ", i, str[k[i]]); + mpfr_dump (p[i]); + } + printf ("Expected "); + mpfr_dump (s[NS-1]); + printf ("with inex = 0\n"); + printf ("Got "); + mpfr_dump (sum); + printf ("with inex = %d\n", inex); + exit (1); + } + while (k[--i] == NC-1) + if (i == 0) + goto next_rnd; + p[i] = c[++k[i]]; + if (i == 0) + mpfr_set (s[i], p[i], (mpfr_rnd_t) r); + else + mpfr_add (s[i], s[i-1], p[i], (mpfr_rnd_t) r); + i++; + } + next_rnd: ; + } + + for (i = 0; i < NC; i++) + mpfr_clear (c[i]); + for (i = 0; i < NS; i++) + mpfr_clear (s[i]); + mpfr_clear (sum); +} + +/* i * 2^46 + j * 2^45 + k * 2^44 + f * 2^(-2), + with -1 <= i, j, k <= 1, i != 0, -3 <= f <= 3, + ulp(exact sum) = 2^0 and ulp(exact sum) = 2^44. */ +static void +check1 (void) +{ + mpfr_t sum1, sum2, s1, s2, s3, t[4]; + mpfr_ptr p[4]; + int i, j, k, f, prec, r, inex1, inex2; + + mpfr_init2 (sum1, 47); + mpfr_init2 (sum2, 47); + mpfr_init2 (s1, 3); + mpfr_init2 (s2, 3); + mpfr_init2 (s3, 49); + for (i = 0; i < 4; i++) + { + mpfr_init2 (t[i], 2); + p[i] = t[i]; + } + + for (i = -1; i <= 1; i += 2) + { + mpfr_set_si_2exp (t[0], i, 46, MPFR_RNDN); + for (j = -1; j <= 1; j++) + { + mpfr_set_si_2exp (t[1], j, 45, MPFR_RNDN); + inex1 = mpfr_add (s1, t[0], t[1], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (k = -1; k <= 1; k++) + { + mpfr_set_si_2exp (t[2], k, 44, MPFR_RNDN); + inex1 = mpfr_add (s2, s1, t[2], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (f = -3; f <= 3; f++) + { + mpfr_set_si_2exp (t[3], f, -2, MPFR_RNDN); + inex1 = mpfr_add (s3, s2, t[3], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (prec = mpfr_get_exp (s3); + prec >= MPFR_PREC_MIN; + prec -= 44) + { + mpfr_set_prec (sum1, prec); + mpfr_set_prec (sum2, prec); + RND_LOOP (r) + { + inex1 = mpfr_set (sum1, s3, (mpfr_rnd_t) r); + inex2 = mpfr_sum (sum2, p, 4, (mpfr_rnd_t) r); + MPFR_ASSERTN (mpfr_check (sum1)); + MPFR_ASSERTN (mpfr_check (sum2)); + if (!(mpfr_equal_p (sum1, sum2) && + SAME_SIGN (inex1, inex2))) + { + printf ("Error in check1 on %s, prec = %d, " + "i = %d, j = %d, k = %d, f = %d\n", + mpfr_print_rnd_mode ((mpfr_rnd_t) r), + prec, i, j, k, f); + printf ("Expected "); + mpfr_dump (sum1); + printf ("with inex = %d\n", inex1); + printf ("Got "); + mpfr_dump (sum2); + printf ("with inex = %d\n", inex2); + exit (1); + } + } + } + } + } + } + } + + for (i = 0; i < 4; i++) + mpfr_clear (t[i]); + mpfr_clears (sum1, sum2, s1, s2, s3, (mpfr_ptr) 0); +} + +/* With N = 2 * GMP_NUMB_BITS: + i * 2^N + j + k * 2^(-1) + f1 * 2^(-N) + f2 * 2^(-N), + with i = -1 or 1, j = 0 or i, -1 <= k <= 1, -1 <= f1 <= 1, -1 <= f2 <= 1 + ulp(exact sum) = 2^0. */ +static void +check2 (void) +{ + mpfr_t sum1, sum2, s1, s2, s3, s4, t[5]; + mpfr_ptr p[5]; + int i, j, k, f1, f2, prec, r, inex1, inex2; + +#define N (2 * GMP_NUMB_BITS) + + mpfr_init2 (sum1, N+1); + mpfr_init2 (sum2, N+1); + mpfr_init2 (s1, N+1); + mpfr_init2 (s2, N+2); + mpfr_init2 (s3, 2*N+1); + mpfr_init2 (s4, 2*N+1); + for (i = 0; i < 5; i++) + { + mpfr_init2 (t[i], 2); + p[i] = t[i]; + } + + for (i = -1; i <= 1; i += 2) + { + mpfr_set_si_2exp (t[0], i, N, MPFR_RNDN); + for (j = 0; j != 2*i; j += i) + { + mpfr_set_si (t[1], j, MPFR_RNDN); + inex1 = mpfr_add (s1, t[0], t[1], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (k = -1; k <= 1; k++) + { + mpfr_set_si_2exp (t[2], k, -1, MPFR_RNDN); + inex1 = mpfr_add (s2, s1, t[2], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (f1 = -1; f1 <= 1; f1++) + { + mpfr_set_si_2exp (t[3], f1, -N, MPFR_RNDN); + inex1 = mpfr_add (s3, s2, t[3], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (f2 = -1; f2 <= 1; f2++) + { + mpfr_set_si_2exp (t[4], f2, -N, MPFR_RNDN); + inex1 = mpfr_add (s4, s3, t[4], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + prec = mpfr_get_exp (s4); + mpfr_set_prec (sum1, prec); + mpfr_set_prec (sum2, prec); + RND_LOOP (r) + { + inex1 = mpfr_set (sum1, s4, (mpfr_rnd_t) r); + inex2 = mpfr_sum (sum2, p, 5, (mpfr_rnd_t) r); + MPFR_ASSERTN (mpfr_check (sum1)); + MPFR_ASSERTN (mpfr_check (sum2)); + if (!(mpfr_equal_p (sum1, sum2) && + SAME_SIGN (inex1, inex2))) + { + printf ("Error in check2 on %s, prec = %d, " + "i = %d, j = %d, k = %d, f1 = %d, " + "f2 = %d\n", + mpfr_print_rnd_mode ((mpfr_rnd_t) r), + prec, i, j, k, f1, f2); + printf ("Expected "); + mpfr_dump (sum1); + printf ("with inex = %d\n", inex1); + printf ("Got "); + mpfr_dump (sum2); + printf ("with inex = %d\n", inex2); + exit (1); + } + } + } + } + } + } + } + + for (i = 0; i < 5; i++) + mpfr_clear (t[i]); + mpfr_clears (sum1, sum2, s1, s2, s3, s4, (mpfr_ptr) 0); +} + +/* t[i] = (2^17 - 1) * 2^(17*(i-8)) for 0 <= i <= 16. + * t[17] = 2^(17*9+1) * j for -4 <= j <= 4. + * t[18] = 2^(-1) * k for -1 <= k <= 1. + * t[19] = 2^(-17*8) * m for -3 <= m <= 3. + * prec = 17*9+4 + */ +static void +check3 (void) +{ + mpfr_t sum1, sum2, s1, s2, s3, s4, t[20]; + mpfr_ptr p[20]; + int i, s, j, k, m, r, inex1, inex2; + int prec = 17*9+4; + + mpfr_init2 (sum1, prec); + mpfr_init2 (sum2, prec); + mpfr_init2 (s1, 17*17); + mpfr_init2 (s2, 17*17+4); + mpfr_init2 (s3, 17*17+4); + mpfr_init2 (s4, 17*17+5); + mpfr_set_ui (s1, 0, MPFR_RNDN); + for (i = 0; i < 20; i++) + { + mpfr_init2 (t[i], 20); + p[i] = t[i]; + if (i < 17) + { + mpfr_set_ui_2exp (t[i], 0x1ffff, 17*(i-8), MPFR_RNDN); + inex1 = mpfr_add (s1, s1, t[i], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + } + } + + for (s = 1; s >= -1; s -= 2) + { + for (j = -4; j <= 4; j++) + { + mpfr_set_si_2exp (t[17], j, 17*9+1, MPFR_RNDN); + inex1 = mpfr_add (s2, s1, t[17], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (k = -1; k <= 1; k++) + { + mpfr_set_si_2exp (t[18], k, -1, MPFR_RNDN); + inex1 = mpfr_add (s3, s2, t[18], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (m = -3; m <= 3; m++) + { + mpfr_set_si_2exp (t[19], m, -17*8, MPFR_RNDN); + inex1 = mpfr_add (s4, s3, t[19], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + RND_LOOP (r) + { + inex1 = mpfr_set (sum1, s4, (mpfr_rnd_t) r); + inex2 = mpfr_sum (sum2, p, 20, (mpfr_rnd_t) r); + MPFR_ASSERTN (mpfr_check (sum1)); + MPFR_ASSERTN (mpfr_check (sum2)); + if (!(mpfr_equal_p (sum1, sum2) && + SAME_SIGN (inex1, inex2))) + { + printf ("Error in check3 on %s, " + "s = %d, j = %d, k = %d, m = %d\n", + mpfr_print_rnd_mode ((mpfr_rnd_t) r), + s, j, k, m); + printf ("Expected "); + mpfr_dump (sum1); + printf ("with inex = %d\n", inex1); + printf ("Got "); + mpfr_dump (sum2); + printf ("with inex = %d\n", inex2); + exit (1); + } + } + } + } + } + for (i = 0; i < 17; i++) + mpfr_neg (t[i], t[i], MPFR_RNDN); + mpfr_neg (s1, s1, MPFR_RNDN); + } + + for (i = 0; i < 20; i++) + mpfr_clear (t[i]); + mpfr_clears (sum1, sum2, s1, s2, s3, s4, (mpfr_ptr) 0); +} + +/* Test of s * (q * 2^(n-1) - 2^k) + h + i * 2^(-2) + j * 2^(-2) + * with h = -1 or 1, -1 <= i odd <= j <= 3, 2 <= q <= 3, s = -1 or 1, + * prec n-k. + * On a 64-bit machine: + * MPFR_RNDN, tmd=2, rbit=0, sst=0, negative is checked with the inputs + * -3*2^58, 2^5, -1, 2^(-2), 3*2^(-2) + * MPFR_RNDN, tmd=2, rbit=0, sst=1, negative is checked with the inputs + * -3*2^58, 2^5, -1, 3*2^(-2), 3*2^(-2) + */ +static void +check4 (void) +{ + mpfr_t sum1, sum2, s1, s2, s3, s4, t[5]; + mpfr_ptr p[5]; + int h, i, j, k, n, q, r, s, prec, inex1, inex2; + + mpfr_inits2 (257, sum1, sum2, s1, s2, s3, s4, (mpfr_ptr) 0); + for (i = 0; i < 5; i++) + { + mpfr_init2 (t[i], 2); + p[i] = t[i]; + } + + /* No GNU style for the many nested loops... */ + for (k = 1; k <= 64; k++) { + mpfr_set_si_2exp (t[0], -1, k, MPFR_RNDN); + for (n = k + 2; n <= k + 65; n++) { + prec = n - k; + mpfr_set_prec (sum1, prec); + mpfr_set_prec (sum2, prec); + for (q = 2; q <= 3; q++) { + mpfr_set_si_2exp (t[1], q, n - 1, MPFR_RNDN); + inex1 = mpfr_add (s1, t[0], t[1], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (s = -1; s <= 1; s += 2) { + mpfr_neg (t[0], t[0], MPFR_RNDN); + mpfr_neg (t[1], t[1], MPFR_RNDN); + mpfr_neg (s1, s1, MPFR_RNDN); + for (h = -1; h <= 1; h += 2) { + mpfr_set_si (t[2], h, MPFR_RNDN); + inex1 = mpfr_add (s2, s1, t[2], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (i = -1; i <= 3; i += 2) { + mpfr_set_si_2exp (t[3], i, -2, MPFR_RNDN); + inex1 = mpfr_add (s3, s2, t[3], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + for (j = i; j <= 3; j++) { + mpfr_set_si_2exp (t[4], j, -2, MPFR_RNDN); + inex1 = mpfr_add (s4, s3, t[4], MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + RND_LOOP (r) { + inex1 = mpfr_set (sum1, s4, (mpfr_rnd_t) r); + inex2 = mpfr_sum (sum2, p, 5, (mpfr_rnd_t) r); + MPFR_ASSERTN (mpfr_check (sum1)); + MPFR_ASSERTN (mpfr_check (sum2)); + if (!(mpfr_equal_p (sum1, sum2) && + SAME_SIGN (inex1, inex2))) + { + printf ("Error in check4 on %s, " + "k = %d, n = %d (prec %d), " + "q = %d, s = %d, h = %d, i = %d, j = %d\n", + mpfr_print_rnd_mode ((mpfr_rnd_t) r), + k, n, prec, q, s, h, i, j); + printf ("Expected "); + mpfr_dump (sum1); + printf ("with inex = %d\n", inex1); + printf ("Got "); + mpfr_dump (sum2); + printf ("with inex = %d\n", inex2); + exit (1); + } + } + } + } + } + } + } + } + } + + for (i = 0; i < 5; i++) + mpfr_clear (t[i]); + mpfr_clears (sum1, sum2, s1, s2, s3, s4, (mpfr_ptr) 0); +} + /* bug reported by Joseph S. Myers on 2013-10-27 https://sympa.inria.fr/sympa/arc/mpfr/2013-10/msg00015.html */ static void bug20131027 (void) { - mpfr_t r, t[4]; + mpfr_t sum, t[4]; mpfr_ptr p[4]; char *s[4] = { "0x1p1000", @@ -331,35 +641,99 @@ bug20131027 (void) "-0x1p947", "0x1p880" }; - int i; + int i, r; + + mpfr_init2 (sum, 53); - mpfr_init2 (r, 53); for (i = 0; i < 4; i++) { mpfr_init2 (t[i], i == 0 ? 53 : 1000); mpfr_set_str (t[i], s[i], 0, MPFR_RNDN); p[i] = t[i]; } - mpfr_sum (r, p, 4, MPFR_RNDN); - if (MPFR_NOTZERO (r)) + RND_LOOP(r) { - printf ("mpfr_sum incorrect in bug20131027: expected 0, got\n"); - mpfr_dump (r); - exit (1); + int expected_sign = (mpfr_rnd_t) r == MPFR_RNDD ? -1 : 1; + int inex; + + inex = mpfr_sum (sum, p, 4, (mpfr_rnd_t) r); + + if (MPFR_NOTZERO (sum) || MPFR_SIGN (sum) != expected_sign || inex != 0) + { + printf ("mpfr_sum incorrect in bug20131027 for %s:\n" + "expected %c0 with inex = 0, got ", + mpfr_print_rnd_mode ((mpfr_rnd_t) r), + expected_sign > 0 ? '+' : '-'); + mpfr_dump (sum); + printf ("with inex = %d\n", inex); + exit (1); + } } for (i = 0; i < 4; i++) mpfr_clear (t[i]); - mpfr_clear (r); + mpfr_clear (sum); } +/* Occurs in branches/new-sum/src/sum.c@9344 on a 64-bit machine. */ +static void +bug20150327 (void) +{ + mpfr_t sum1, sum2, t[3]; + mpfr_ptr p[3]; + char *s[3] = { "0.10000111110101000010101011100001", "1E-100", "0.1E95" }; + int i, r; + + mpfr_inits2 (58, sum1, sum2, (mpfr_ptr) 0); + + for (i = 0; i < 3; i++) + { + mpfr_init2 (t[i], 64); + mpfr_set_str (t[i], s[i], 2, MPFR_RNDN); + p[i] = t[i]; + } + + RND_LOOP(r) + { + int inex1, inex2; + + mpfr_set (sum1, t[2], MPFR_RNDN); + inex1 = -1; + if (MPFR_IS_LIKE_RNDU ((mpfr_rnd_t) r, 1)) + { + mpfr_nextabove (sum1); + inex1 = 1; + } + + inex2 = mpfr_sum (sum2, p, 3, (mpfr_rnd_t) r); + + if (!(mpfr_equal_p (sum1, sum2) && SAME_SIGN (inex1, inex2))) + { + printf ("mpfr_sum incorrect in bug20150327 for %s:\n", + mpfr_print_rnd_mode ((mpfr_rnd_t) r)); + printf ("Expected "); + mpfr_dump (sum1); + printf ("with inex = %d\n", inex1); + printf ("Got "); + mpfr_dump (sum2); + printf ("with inex = %d\n", inex2); + exit (1); + } + } + + for (i = 0; i < 3; i++) + mpfr_clear (t[i]); + mpfr_clears (sum1, sum2, (mpfr_ptr) 0); +} + +/* TODO: A test with more inputs (but can't be compared to mpfr_add). */ static void check_extreme (void) { mpfr_t u, v, w, x, y; mpfr_ptr t[2]; - int i, inex, r; + int i, inex1, inex2, r; t[0] = u; t[1] = v; @@ -371,19 +745,21 @@ check_extreme (void) RND_LOOP (r) for (i = 0; i < 2; i++) { - mpfr_sum (x, t, 2, (mpfr_rnd_t) r); - mpfr_set_prec (y, 64); - inex = mpfr_add (y, u, w, MPFR_RNDN); - MPFR_ASSERTN (inex == 0); - mpfr_prec_round (y, 32, (mpfr_rnd_t) r); - if (! mpfr_equal_p (x, y)) + mpfr_set_prec (x, 64); + inex1 = mpfr_add (x, u, w, MPFR_RNDN); + MPFR_ASSERTN (inex1 == 0); + inex1 = mpfr_prec_round (x, 32, (mpfr_rnd_t) r); + inex2 = mpfr_sum (y, t, 2, (mpfr_rnd_t) r); + if (!(mpfr_equal_p (x, y) && SAME_SIGN (inex1, inex2))) { printf ("Error in check_extreme (%s, i = %d)\n", mpfr_print_rnd_mode ((mpfr_rnd_t) r), i); printf ("Expected "); - mpfr_dump (y); - printf ("Got "); mpfr_dump (x); + printf ("with inex = %d\n", inex1); + printf ("Got "); + mpfr_dump (y); + printf ("with inex = %d\n", inex2); exit (1); } mpfr_neg (v, v, MPFR_RNDN); @@ -392,19 +768,19 @@ check_extreme (void) mpfr_clears (u, v, w, x, y, (mpfr_ptr) 0); } +/* Generic random tests with cancellations */ static void cancel (void) { - mpfr_t x[2 * MPFR_NCANCEL]; + mpfr_t x[2 * MPFR_NCANCEL], bound; mpfr_ptr px[2 * MPFR_NCANCEL]; int i, j, n; - /* FIXME: re-enable and improve once mpfr_sum has been fixed. - check_extreme() is currently sufficient to show problems. */ - return; + mpfr_init2 (bound, 2); for (i = 0; i < 8; i++) { + mpfr_set_inf (bound, 1); for (n = 0; n < numberof (x); n++) { mpfr_prec_t p; @@ -445,35 +821,254 @@ cancel (void) mpfr_print_rnd_mode (rnd), mpfr_get_prec (x[n])); #endif mpfr_sum (x[n], px, n, rnd); + if (mpfr_zero_p (x[n])) + { + n++; + break; + } mpfr_neg (x[n], x[n], MPFR_RNDN); + if (mpfr_greater_p (x[n], bound)) + { + printf ("Error in cancel on i = %d, n = %d\n", i, n); + exit (1); + } + mpfr_set_ui_2exp (bound, 1, + mpfr_get_exp (x[n]) - p - (rnd == MPFR_RNDN), + MPFR_RNDN); + /* The next sum will be <= bound in absolute value + (the equality can be obtained in all rounding modes + since the sum will be rounded). */ } } while (--n >= 0) mpfr_clear (x[n]); } + + mpfr_clear (bound); +} + +#ifndef NOVFL +# define NOVFL 30 +#endif + +static void +check_overflow (void) +{ + mpfr_t sum1, sum2, x, y; + mpfr_ptr t[2 * NOVFL]; + mpfr_exp_t emin, emax; + int i, r; + + emin = mpfr_get_emin (); + emax = mpfr_get_emax (); + set_emin (MPFR_EMIN_MIN); + set_emax (MPFR_EMAX_MAX); + + mpfr_inits2 (32, sum1, sum2, x, y, (mpfr_ptr) 0); + mpfr_setmax (x, mpfr_get_emax ()); + mpfr_neg (y, x, MPFR_RNDN); + + for (i = 0; i < 2 * NOVFL; i++) + t[i] = i < NOVFL ? x : y; + + /* Two kinds of test: + * i = 1: overflow. + * i = 2: intermediate overflow (exact sum is 0). + */ + for (i = 1; i <= 2; i++) + RND_LOOP(r) + { + int inex1, inex2; + + inex1 = mpfr_add (sum1, x, i == 1 ? x : y, (mpfr_rnd_t) r); + inex2 = mpfr_sum (sum2, t, i * NOVFL, (mpfr_rnd_t) r); + MPFR_ASSERTN (mpfr_check (sum1)); + MPFR_ASSERTN (mpfr_check (sum2)); + if (!(mpfr_equal_p (sum1, sum2) && SAME_SIGN (inex1, inex2))) + { + printf ("Error in check_overflow on %s, i = %d\n", + mpfr_print_rnd_mode ((mpfr_rnd_t) r), i); + printf ("Expected "); + mpfr_dump (sum1); + printf ("with inex = %d\n", inex1); + printf ("Got "); + mpfr_dump (sum2); + printf ("with inex = %d\n", inex2); + exit (1); + } + } + + mpfr_clears (sum1, sum2, x, y, (mpfr_ptr) 0); + + set_emin (emin); + set_emax (emax); +} + +#ifndef NUNFL +# define NUNFL 9 +#endif + +/* t[0] = 2^(-k) - sum(t[i],i=1..n) + */ +static void +check_underflow (void) +{ + mpfr_t sum1, sum2, t[NUNFL]; + mpfr_ptr p[NUNFL]; + mpfr_prec_t precmax = 444; + mpfr_exp_t emin, emax; + unsigned int ex_flags, flags; + int c, i; + + emin = mpfr_get_emin (); + emax = mpfr_get_emax (); + set_emin (MPFR_EMIN_MIN); + set_emax (MPFR_EMAX_MAX); + + ex_flags = MPFR_FLAGS_UNDERFLOW | MPFR_FLAGS_INEXACT; + + mpfr_init2 (sum1, MPFR_PREC_MIN); + mpfr_init2 (sum2, precmax); + + for (i = 0; i < NUNFL; i++) + { + mpfr_init2 (t[i], precmax); + p[i] = t[i]; + } + + for (c = 0; c < 8; c++) + { + mpfr_prec_t fprec; + int n, neg, r; + + fprec = MPFR_PREC_MIN + (randlimb () % (precmax - MPFR_PREC_MIN + 1)); + n = 3 + (randlimb () % (NUNFL - 2)); + MPFR_ASSERTN (n <= NUNFL); + + mpfr_set_prec (sum2, (randlimb () & 1) ? MPFR_PREC_MIN : precmax); + mpfr_set_prec (t[0], fprec + 64); + mpfr_set_zero (t[0], 1); + + for (i = 1; i < n; i++) + { + int inex; + + mpfr_set_prec (t[i], MPFR_PREC_MIN + + (randlimb () % (fprec - MPFR_PREC_MIN + 1))); + do + mpfr_urandomb (t[i], RANDS); + while (MPFR_IS_ZERO (t[i])); + mpfr_set_exp (t[i], MPFR_EMIN_MIN); + inex = mpfr_sub (t[0], t[0], t[i], MPFR_RNDN); + MPFR_ASSERTN (inex == 0); + } + + neg = randlimb () & 1; + if (neg) + mpfr_nextbelow (t[0]); + else + mpfr_nextabove (t[0]); + + RND_LOOP(r) + { + int inex1, inex2; + + mpfr_set_zero (sum1, 1); + if (neg) + mpfr_nextbelow (sum1); + else + mpfr_nextabove (sum1); + inex1 = mpfr_div_2ui (sum1, sum1, 2, (mpfr_rnd_t) r); + + mpfr_clear_flags (); + inex2 = mpfr_sum (sum2, p, n, (mpfr_rnd_t) r); + flags = __gmpfr_flags; + + MPFR_ASSERTN (mpfr_check (sum1)); + MPFR_ASSERTN (mpfr_check (sum2)); + + if (flags != ex_flags) + { + printf ("Bad flags in check_underflow on %s, c = %d\n", + mpfr_print_rnd_mode ((mpfr_rnd_t) r), c); + printf ("Expected flags:"); + flags_out (ex_flags); + printf ("Got flags: "); + flags_out (flags); + printf ("sum = "); + mpfr_dump (sum2); + exit (1); + } + + if (!(mpfr_equal_p (sum1, sum2) && SAME_SIGN (inex1, inex2))) + { + printf ("Error in check_underflow on %s, c = %d\n", + mpfr_print_rnd_mode ((mpfr_rnd_t) r), c); + printf ("Expected "); + mpfr_dump (sum1); + printf ("with inex = %d\n", inex1); + printf ("Got "); + mpfr_dump (sum2); + printf ("with inex = %d\n", inex2); + exit (1); + } + } + } + + for (i = 0; i < NUNFL; i++) + mpfr_clear (t[i]); + mpfr_clears (sum1, sum2, (mpfr_ptr) 0); + + set_emin (emin); + set_emax (emax); +} + +static void +check_coverage (void) +{ +#ifdef MPFR_COV_CHECK + int r, i, j, k, p; + int err = 0; + + for (r = 0; r < MPFR_RND_MAX; r++) + for (i = 0; i < 1 + ((mpfr_rnd_t) r == MPFR_RNDN); i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 3; k++) + for (p = 0; p < 2; p++) + if (!__gmpfr_cov_sum_tmd[r][i][j][k][p]) + { + printf ("TMD not tested on %s, tmd=%d, rbit=%d, sst=%d, %s\n", + mpfr_print_rnd_mode ((mpfr_rnd_t) r), i+1, j, k-1, + p ? "positive" : "negative"); + err = 1; + } + + if (err) + exit (1); +#endif } int main (void) { - mpfr_prec_t p; - unsigned long n; - tests_start_mpfr (); check_special (); + check_more_special (); + check1 (); + check2 (); + check3 (); + check4 (); bug20131027 (); - test_sort (1764, 1026); - for (p = 2 ; p < 444 ; p += 17) - for (n = 2 ; n < 1026 ; n += 42 + p) - test_sum (p, n); - /* FIXME: remove the MPFR_SKIP_TSUM test once the new mpfr_sum version - is out. */ - if (getenv ("MPFR_SKIP_EXTREME") == NULL) - check_extreme (); + bug20150327 (); + generic_tests (); + check_extreme (); cancel (); + check_overflow (); + check_underflow (); + check_coverage (); tests_end_mpfr (); return 0; } |