summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/README.dev3
-rw-r--r--doc/sum.txt643
-rw-r--r--src/mpfr-impl.h27
-rw-r--r--src/sum.c1373
-rw-r--r--tests/tsum.c1021
5 files changed, 2580 insertions, 487 deletions
diff --git a/doc/README.dev b/doc/README.dev
index 80d502626..067423f99 100644
--- a/doc/README.dev
+++ b/doc/README.dev
@@ -440,6 +440,9 @@ List of macros used for checking MPFR:
for no timeout by default, and can be overridden
at "make check" time with the MPFR_TESTS_TIMEOUT
environment variable.
++ MPFR_COV_CHECK: Define to enable value coverage checking (must not
+ be used in production). This macro is for the MPFR
+ developers, in order to improve the testsuite.
===========================================================================
diff --git a/doc/sum.txt b/doc/sum.txt
new file mode 100644
index 000000000..b51b95456
--- /dev/null
+++ b/doc/sum.txt
@@ -0,0 +1,643 @@
+Implementation of mpfr_sum: sum of n mpfr_t values
+══════════════════════════════════════════════════
+
+About the time/memory complexity: The original mpfr_sum algorithm was
+inefficient on some inputs, and in some simple cases with inputs of
+different orders of magnitude, it could be very slow and take all the
+memory, making the whole program (and possibly other programs on the
+system too, due to the lack of memory at some critical point) crash.
+The time/memory complexity must no longer depend on the value of the
+exponents of the inputs, i.e. their order of magnitude. Moreover, be
+careful with the carry propagation.
+
+Preliminary note: The previous mpfr_sum algorithm was a high-level one
+and could take too much memory and too much time in cases with numbers
+of different magnitudes and cancellation. Here we will use functions
+from the mpn layer of GMP only (no mpfr_add), as the algorithm is based
+on additions by blocks, involving virtual splitting of MPFR numbers.
+
+Concerning the sign of a zero result, the specification is:
+ * if n = 0, then the result is +0 (this choice is consistent with
+ IEEE 754-2008's sum reduction operation of vector length 0, and
+ with mpfr_set_ui and mpfr_set_si on the integer 0); otherwise
+ * if all the inputs have the same sign (i.e. all +0 or all -0),
+ then the result has the same sign as the inputs (for n = 1 or 2,
+ this choice is consistent with mpfr_set and mpfr_add respectively);
+ * otherwise, either because all inputs are zeros with at least a +0
+ and a -0, or because some inputs are non-zero (but they globally
+ cancel), the result is +0, except for the MPFR_RNDD rounding mode,
+ where it is -0 (this choice is consistent with mpfr_add for n = 2,
+ and more generally with a sequence of mpfr_add's).
+Additional details on:
+ https://sympa.inria.fr/sympa/arc/mpfr/2014-03/msg00000.html
+
+Let sq be the output precision.
+
+*** TODO ***
+The following is a bit obsolete. In particular the new helper function
+sum_raw (obtained from code refactoring for steps 3 to 6, and 8) should
+be introduced and described. Pre-rounding in (7) and rounding in (8)
+have been merged, and this [previously pre-rounding in (7)] is still
+done at the same time as sign handling with a single mpn operation.
+The table concerning the correction term and the ternary value in case
+of TMD should be changed to:
+
+ rnd tmd R sst correction ternary
+ ───────────────────────────────────────────────
+ N 1 0 - 0 +
+ N 1 0 0 0 0
+ N 1 0 + 0 -
+ N 1 1 - +1 +
+ N 1 1 0 +1 0
+ N 1 1 + +1 -
+ ───────────────────────────────────────────────
+ N 2 0 - 0 -
+ N 2 0 0 ? ? (halfway case)
+ N 2 0 + +1 +
+ N 2 1 - 0 -
+ N 2 1 0 ? ? (halfway case)
+ N 2 1 + +1 +
+ ───────────────────────────────────────────────
+ D 1 0 - -1 -
+ D 1 0 0 0 0
+ D 1 0 + 0 -
+ D 1 1 - 0 -
+ D 1 1 0 +1 0
+ D 1 1 + +1 -
+ ───────────────────────────────────────────────
+ U 1 0 - 0 +
+ U 1 0 0 0 0
+ U 1 0 + +1 +
+ U 1 1 - +1 +
+ U 1 1 0 +1 0
+ U 1 1 + +2 +
+ ───────────────────────────────────────────────
+
+The general ideas of the algorithm:
+
+0. Handle the case n <= 1 separately (+0 for n = 0, mpfr_set for n = 1).
+ Now assume that n >= 2.
+
+1. Look at the exponent field of each mpfr_t input (this is fast); also
+ track the signs of Inf's and zeros. This step allows one to:
+ * detect the singular cases, i.e. when there is either at least
+ a NaN or an Inf in the inputs (in case of a NaN or two Inf's of
+ different signs, we can immediately return NaN), or all inputs
+ are zeros;
+ * determine the maximum exponent maxexp of the regular inputs and
+ the number n' of regular inputs.
+
+2. A truncated sum will be computed in two's complement, stored in a
+ fixed memory area, called "accumulator", whose characteristics are
+ determined here.
+
+ Two's complement is preferred to the sign + magnitude representation
+ because the signs of the temporary (and final) results are not known
+ in advanced, and the computations (additions and subtractions) in
+ two's complement are more natural in this context. There will be a
+ conversion to sign + magnitude (representation used by MPFR numbers)
+ at the end, but this should not take much time compared to the other
+ calculations.
+
+ Only a fixed part of the inputs will be taken into account for the
+ truncated sum: the bits whose exponent is in some window (interval)
+ denoted [minexp,maxexp[.
+
+ We choose not to include maxexp in the interval in order to match the
+ floating-point representation chosen in MPFR, where the significand
+ is in [1/2,1[; this also means that the current minexp will be maxexp
+ at the next iteration, unless there is a "hole" between the inputs,
+ as explained below.
+
+ Let us define logn = ceil(log2(n')).
+
+ Due to the accumulation of values and the choice of two's complement
+ (a way to represent the sign), we will need some extra bits to avoid
+ overflows. The absolute value of the sum is less than n' * 2^maxexp,
+ taken up to logn extra bits, and one needs one more bit to be able
+ to determine the sign, so that cq = logn + 1 extra bits will be
+ considered.
+
+ For the other side, we define minexp = maxexp - sq - dq, where dq
+ will be chosen around logn to take into account the accumulation
+ of errors, i.e. from everything less significant than minexp. The
+ final choice for dq should be done after testing: in practice, one
+ may need a little more precision to handle partial cancellation at
+ this iteration, but important cancellation will always lead to other
+ iterations. For instance, we will choose for dq the smallest value
+ such that dq ≥ max(logn,4) (the bound 4 will be explained later),
+ and the accumulator bitsize wq, which is maxexp + cq - minexp, i.e.
+ cq + sq + dq, is a multiple of GMP_NUMB_BITS.
+
+ Accumulator: [--------]-----------------------------------]
+ cq └─ maxexp sq + dq minexp ─┘
+
+3. Compute the truncated sum in two's complement by taking into account
+ the part of the inputs in the window [minexp,maxexp[.
+
+ In the same loop over the inputs, determine the maximum exponent
+ maxexp2 of the numbers formed starting with the most significant
+ represented bit that has been ignored: one will get either minexp
+ (if an input has been truncated at this iteration) or the maximum
+ exponent of the numbers that have been completely ignored. If no
+ bits have been ignored any longer, then maxexp2 has the special
+ value MPFR_EXP_MIN, which is the minimum value of the mpfr_exp_t
+ type and not a valid exponent; and the truncated sum is exact.
+
+ The accumulator during this iteration:
+
+ [--------]-----------------------------------]
+ cq └─ maxexp minexp ─┘
+
+ If there is a additional iteration with maxexp2 = minexp - 4 and
+ a shift of 26 bits, here's the accumulator after the shift:
+
+ <------- 26 zeros ------->
+ [-------------------0000000000000000000000000]
+ This iteration: minexp ─┘ ├─ maxexp2 │
+ Next iteration: └─ maxexp minexp ─┘
+
+4. Determine the number of cancelled bits, more precisely, define the
+ variable cancel = number of identical bits on the most significant
+ part of the accumulator.
+
+5. If the truncated sum (i.e. the value of the accumulator) is 0, i.e.
+ if all the words are 0, then:
+ * if maxexp2 == MPFR_EXP_MIN, then return with the exact value ±0
+ (its sign is determined from the rounding mode, as specified);
+ * else reiterate at (3) with maxexp = maxexp2 and minexp determined
+ from the initial value of cq (like in the first iteration).
+
+6. Let e = maxexp + cq - cancel, u = e - sq, and err = maxexp2 + logn.
+ Then the absolute value of the truncated sum is in [2^(e-1),2^e]
+ (binade closed on both ends due to two's complement), u is the
+ exponent of the ulp (a.k.a., quantum exponent) in the destination
+ (except in the exact case -2^e), and the absolute value of the error
+ is strictly less than the bound 2^err; if maxexp2 == MPFR_EXP_MIN,
+ then the error is 0, i.e. the sum in the accumulator is exact.
+
+ Here's a representation of the accumulator and the cancel bits,
+ with the two cases depending on the sign of the truncated sum,
+ where the x's correspond to the sq-1 represented bits following
+ the initial value bit 1 (0 if negative), r is the rounding bit,
+ and the bits f are the following bits:
+
+ ][------------------- accumulator -------------------]
+ ][---- cancel ----]----------------------------------]
+ ]0000000000000000001xxxxxxxxxxxxxxxxxxxxxrffffffffffff
+ ]1111111111111111110xxxxxxxxxxxxxxxxxxxxxrffffffffffff
+ └─ maxexp + cq └─ e u ─┘ minexp ─┘
+
+ If the cancellation is important, then the bit r and some of the
+ least significant bits x may fall outside of the accumulator, in
+ which case they are considered to be 0. The most degenerate case
+ is the following:
+
+ * If the cancel bits are 0's, then due to (5), e = minexp + 1,
+ i.e. a single bit (the leading 1) is represented.
+
+ * If the cancel bits are 1's, all the bits of the accumulator are
+ 1's. Since the non-represented bits are considered to be 0, the
+ following bit is a 0; this implies that the computed value of
+ cancel is the right one, i.e. as if the following bits were
+ represented in the accumulator, and proves the above assertion
+ "the absolute value of the truncated sum is in [2^(e-1),2^e]".
+ Here we have e = minexp, and none of the bits are represented.
+
+ In the implementation, one must make sure that one does not try to
+ read non-represented bits.
+
+ The exponent and related values may change later due to the error,
+ but this doesn't really matter here.
+
+ If the error bound is too large due to the cancellation, e.g. if
+ err > u - 3 (the proofs below are done with this choice), then
+ one reiterates at (3) after shifting the truncated sum to the
+ left boundary (most significant part) of the accumulator, where:
+ * the shift count is determined in such a way to avoid overflows
+ at the next iteration, i.e. to be able to retrieve the sum with
+ its sign;
+ * the new value of cq is implied by this shift count and maxexp2
+ (the value of maxexp at the next iteration).
+
+ Concerning the choice of the shift count shiftq, letting only one
+ identical bit in the most significant part may not be sufficient;
+ for instance, if
+ a. maxexp2 = minexp;
+ b. the accumulator contains:
+
+ 0000000011111111111111111111111111110101
+ This iteration: minexp ─┘
+
+ c. the accumulator is shifted to:
+
+ 0111111111111111111111111111101010000000
+ Next iteration: └─ maxexp
+
+ d. there are at least 12 numbers to add at the next iteration;
+ then one could end up with something like:
+
+ 1000000000000000000000000000000000010001
+ └─ maxexp
+
+ i.e. an overflow in two's complement. But leaving at least
+ 2 + max(0, err - e) itentical bits in the most significant part,
+ such as
+
+ 0011111111111111111111111111110101000000
+ └─ maxexp
+
+ is sufficient. The second term of the max covers cases like:
+
+ ┌─ err = maxexp2 + logn
+ 0000000000000000000000000000000000000111
+ e ─┘
+
+ (2^err being a bound on the error term, thus a bound on the value
+ that will be added to the accumulator at the next iteration), for
+ which the accumulator can be shifted to:
+
+ ┌─ err
+ 0000000111000000000000000000000000000000
+ e ─┘
+
+ without triggering an overflow at the next iteration, but
+
+ ┌─ err
+ 0000001110000000000000000000000000000000
+ e ─┘
+
+ is incorrect as a 1 could appear in the MSB (making the accumulator
+ value negative) just with additions of positive numbers.
+
+ Said otherwise, leaving at least i identical bits allows one to
+ represent numbers in [-2^(e+i-1),2^(e+i-1)[. The current sum is in
+ [-2^e,2^e[, and taking into account the error terms, one wants to
+ be able to represent arbitrary numbers in ]-2^e-2^err,2^e+2^err[.
+ So, i must be chosen in such a way that 2^e + 2^err ≤ 2^(e+i-1),
+ i.e. 2^0 + 2^(err-e) ≤ 2^(i-1). The smallest power of two larger
+ than or equal to 2^0 + 2^(err-e) has exponent 1 + max(0, err - e).
+ Hence the choice i ≥ 2 + max(0, err - e).
+
+ So, we have: shiftq = cancel - 2 - max(0, err - e). And at the next
+ iteration,
+
+ cq' = wq - shiftq + (minexp - maxexp2)
+
+ Now one needs to prove that this makes sense, i.e. that shiftq > 0
+ and that cq' < wq; otherwise all the inputs will be ignored, making
+ a new iteration useless, with a possible infinite loop. So, shiftq
+ must be chosen so that
+
+ shiftq > minexp - maxexp2
+
+ Note that since maxexp2 ≤ minexp, this will also yield shiftq > 0.
+ The condition err > u - 3 gives:
+
+ maxexp2 + logn > maxexp + cq - cancel - sq - 3
+
+ Therefore, since maxexp - minexp = sq + dq,
+
+ cancel - 2 > minexp - maxexp2 - logn + cq + dq - 5
+
+ And since cq = logn + 1 and dq ≥ 4,
+
+ cancel - 2 > minexp - maxexp2 [A]
+
+ We also have:
+
+ cancel - 2 - logn + e = maxexp - 2 - logn + cq > minexp
+
+ Therefore
+
+ cancel - 2 - (err - e) > minexp - maxexp2 [B]
+
+ [A] and [B] give: shiftq > minexp - maxexp2. QED.
+
+ Note: It is expected that in general, the cancellation is not very
+ large, so that the new additions in (3) will occur only in a small
+ part of the accumulator, except in case of long carry propagation
+ (see below).
+
+7. Now, the accumulator contains the significand of a good approximation
+ to the exact sum, since the absolute value of the error is strictly
+ less than ulp(computed value) * 2^(-3). The corresponding exponent is
+ e and the sign is determined from the cancel bits. One will copy the
+ rounded significand to the destination, together with the exponent
+ and the sign. One has different cases:
+
+ * If all the bits of the inputs have been taken into account, then
+ the significand is exact (maxexp2 == MPFR_EXP_MIN), so that one
+ can round it correctly. Note: The TMD does not occur here.
+
+ * If the error bound is non-zero (because not all the bits of the
+ inputs have been taken into account) but the TMD does not occur,
+ then one can also round the significand correctly.
+
+ * Otherwise, one will round the significand, assuming that this is
+ the correct truncation up to the rounding bit and that the sticky
+ bit is 1: this yields the most probable correctly rounded result
+ if u - minexp > 2 (the most general case), based on a symmetrical
+ distribution of the error centered on 0; the destination will be
+ corrected later if need be.
+
+ For the last two cases, and before the rounding is done, one needs
+ to determine whether the TMD occurs. Let d = u - err, which is ≥ 3
+ and can be very large; however d is representable in a mpfr_exp_t
+ since u ≤ emax, err ≥ emin, and this type can hold the sum of two
+ valid exponents. The TMD occurs when the sum is close enough to a
+ breakpoint, which is either a machine number (i.e. a number whose
+ significand fits on sq bits) or a midpoint between two consecutive
+ machine numbers, depending on the rounding mode:
+ ┌───────────────┬────────────────┐
+ │ Rounding mode │ Breakpoint │
+ ├───────────────┼────────────────┤
+ │ to nearest │ midpoint │
+ │ to nearest │ machine number │
+ │ directed │ machine number │
+ └───────────────┴────────────────┘
+ (in the second case, the correctly rounded sum can be determined,
+ but not the ternary value). More precisely, the TMD occurs when:
+ * the d bits following the ulp one are identical in directed
+ rounding modes;
+ * the d-1 bits following the rounding bit are identical, in the
+ rounding-to-nearest mode.
+
+ At the same time, one determines whether the correctly rounded sum
+ is exact. If the TMD occurs, this value is currently assumed to be
+ inexact, as said above.
+
+ Let us recall that in case of an important cancellation, some bits
+ may not be represented in the accumulator. Such a case is possible
+ even in this step, when maxexp2 is much smaller than minexp.
+
+ A change of representation is needed since the sum has been computed
+ using two's complement representation while regular MPFR numbers are
+ represented in sign + absolute value (normalized). The sign bit is
+ directly obtained from the cancel bits. For the absolute value:
+
+ * If the cancel bits are 0's, the value is positive, so that the
+ absolute value is represented in the same way.
+
+ * If the cancel bits are 1's, the value is negative, so that the
+ absolute value is formally obtained by inverting all the bits
+ (this infinite binary expansion is not the canonical one, but
+ this does not matter as this is just for the discussion below).
+ Since there is no carry propagation at this point, the exponent
+ is not changed.
+
+ With the mpn layer of GMP, up to 3 operations are involved for the
+ handling of the significand:
+ * a shift (for the alignment), if the shift count is non-zero
+ (this is the most probable case);
+ * a negation if the value is negative;
+ * the initial rounding.
+ The shift should be done before the initial rounding, so that all
+ the bits are represented for the rounding. The copy itself should
+ be done together with the shift or the negation, because this is
+ where most of the limbs are changed in general. It will be done
+ with the shift, and see below for the other two operations.
+
+ The rounding and change of representation (as a whole) can be seen
+ in the following way: after scaling, a number written r = i + f in
+ two's complement, where i is the integer part and f the infinite
+ fractional part, has to be rounded to one of the enclosing integers
+ i and i + 1. Thus the rounding operation consists in replacing f
+ by an integer c = 0 or 1 (like a carry): the rounded value is the
+ integer i + c. When r is negative (i.e. the MSB of i is 1), the
+ following forms are equivalent:
+
+ |i + c| = - (i + c) [1]
+ = (- i) - c [2]
+ = rnd(-r) = rnd(~r) = ~i + ~c [3]
+
+ This can lead to various possible implementations. In each of these
+ 3 cases, one has two potentially multiple-precision operations.
+ [1] addition of a carry, followed by a negation;
+ [2] negation, followed by a subtraction of a carry;
+ [3] bit inversion, followed by an addition of a carry.
+ In average, the operation with a carry is in constant time; it is an
+ operation on a single word in most cases. But in the worst case, due
+ to possible long carry propagation, it may need a read/write on the
+ whole significand. If GMP had a negation-with-borrow operation, one
+ could merge these two multiple-operations in a single one; but this
+ is not the case. However one can notice that either c or ~c is 0, so
+ that one can select the operation depending on the value of c. As a
+ summary, consider the following cases:
+ * r positive: |i + c| = i + c with mpn_add_1;
+ * r negative, c = 0: |i + c| = -i with mpn_neg ([1]/[2]);
+ * r negative, c = 1: |i + c| = ~i with mpn_com ([3]).
+ The first two cases can yield an increment of the exponent e.
+
+ One goal of this early copy/rounding is to free the accumulator in
+ case there is work to do in (8), though the needed memory for that
+ is limited. An alternative solution would be to reserve a bit more
+ memory for (8), and determine the way of rounding before the copy;
+ this could be slightly more efficient in supposedly rare cases
+ (when the correction yields a long carry propagation).
+
+ As said above, in rounding to nearest, both kinds of breakpoints
+ (machine numbers and midpoints) are taken into account for the TMD.
+ Since the final rounding and ternary value will depend on the kind
+ of breakpoint, one needs to remember this information in a variable
+ (as the accumulator will be overwritten). Thus a variable tmd will
+ contain:
+ * 0: the TMD does not occur;
+ * 1: the TMD occurs on a machine number;
+ * 2: the TMD occurs on a midpoint.
+
+ If the TMD does not occur (tmd = 0):
+ * Determine the ternary value as follows: if the sum is exact
+ (already known), the ternary value is 0; otherwise it is -1
+ if c = 0, and 1 if c = 1.
+ * Check the range and return as in (9).
+
+8. Reiterate like in (3) one or more times using a specific window as
+ if sq were 0, i.e. around maxexp + logn to maxexp - logn (wq - sq
+ may be a good choice); in short, determine the sign of a secondary
+ term, which is the difference between the exact sum and the closest
+ breakpoint.
+
+ During the iterations, when there are several identical bits on the
+ left (most significant part) of the accumulator, only one bit needs
+ to be represented during the computations. But to avoid overflows,
+ one will keep 2 or more at the start of an iteration, like what is
+ done in (6).
+
+ Let us see what needs to be done to prepare the first iteration.
+ Since d ≥ 3 and the TMD occurs, the secondary term is, in absolute
+ value, less than half the weight of the rounding bit, so that it can
+ be seen as the fractional part after the rounding bit; the leading
+ d-1 bits of the truncated secondary term are identical and give its
+ sign (in the two's complement meaning). Now, if err < minexp, then
+ at least one of these identical bits is not represented, meaning that
+ it is 0 and all these bits are 0's; thus, under this condition, the
+ accumulator is zeroed and minexp is determined from maxexp2 and the
+ initial value of cq, like in (5) and the first iteration. Otherwise
+ the identical bits are all represented, and similarly to (6), one
+ shifts the trailing bits, keeping the last 2 over the d-1 identical
+ bits, i.e. one shifts the bits from err+1 to minexp, and zeros the
+ remaining bits of the accumulator; the new minexp is obtained by
+ subtracting the shift count from it.
+
+ Once the sign of the secondary term is known, depending on:
+ * the rounding mode rnd (equivalent to N, D, U),
+ * the value of tmd (1 or 2 in rounding to nearest),
+ * the rounding bit R (0 or 1),
+ * the sign sst of the secondary term (negative, zero, positive),
+ correct the sum if need be (± 1 ulp) and determine the corresponding
+ ternary value, as in the table below; a question mark means that the
+ exact value is the middle of two consecutive machine numbers, and a
+ special rule is used to determine the rounding, equivalent to one of
+ the enclosing lines (the ones where sst is - or +):
+ * For nearest-even:
+ - last significand bit = 0: choose line where correction = 0;
+ - last significand bit = 1: choose line where correction ≠ 0.
+ * For nearest-away: choose line where error = +.
+
+ rnd tmd R sst correction ternary
+ ───────────────────────────────────────────────
+ N 1 0 - 0 +
+ N 1 0 0 0 0
+ N 1 0 + 0 -
+ N 1 1 - 0 +
+ N 1 1 0 0 0
+ N 1 1 + 0 -
+ ───────────────────────────────────────────────
+ N 2 0 - 0 -
+ N 2 0 0 ? ? (halfway case)
+ N 2 0 + + +
+ N 2 1 - - -
+ N 2 1 0 ? ? (halfway case)
+ N 2 1 + 0 +
+ ───────────────────────────────────────────────
+ D 1 0 - - -
+ D 1 0 0 0 0
+ D 1 0 + 0 -
+ D 1 1 - 0 -
+ D 1 1 0 + 0
+ D 1 1 + + -
+ ───────────────────────────────────────────────
+ U 1 0 - - +
+ U 1 0 0 - 0
+ U 1 0 + 0 +
+ U 1 1 - 0 +
+ U 1 1 0 0 0
+ U 1 1 + + +
+ ───────────────────────────────────────────────
+
+9. Check the range (as usual), and return.
+
+An example:
+
+ [1] [2] A [3]
+ u0 = ***** | | . |
+ u1 = ******|** | . |
+ u2 = ********|***** | . |
+ u3 = | *****| . |
+ u4 = | | ****|**
+ u5 = | | ***|*
+
+At iteration 1, minexp is determined to be [1]; thus u0, a part of u1,
+and a part of u2 are taken into account for the truncated sum. Then it
+appears that an important cancellation occurred, and another step (3)
+is needed. Since u1 was truncated, the new maxexp will be minexp, i.e.
+[1]. At iteration 2, minexp is determined to be [2]; thus a part of u1,
+a part of u2, and u3 are taken into account for the truncated sum. Now
+assume that on this example, the error is small enough, but its sign is
+unknown. Thus another step (3) is needed, with the conditions of (7).
+Since no numbers were truncated at the previous iteration, maxexp is
+the maximum exponent of the remaining numbers, here the one of u4, and
+minexp is determined to be [3]. Assume that the sign of the error can
+be determined now, so that we can return the rounded result with the
+ternary value.
+
+As a bonus, this will also solve overflow, underflow and normalization
+issues, since everything is done in fixed point and the output exponent
+will be considered only at the end (early overflow detection could also
+be done).
+
+A limb L = *zp in memory will generally contain a part of a significand.
+One can define its exponent ze, such that the actual value of this limb
+is L * 2^(ze-GMP_NUMB_BITS), i.e. ze is its exponent where the limb is
+regarded as a number in [1/2,1[. If an array of limbs zp[] is regarded
+as a significand in [1/2,1[, then the exponent of its actual value is
+also ze.
+
+Variables used in the implementation:
+ tp: pointer to a temporary area that will contain a shifted value.
+ wp: pointer to the accumulator.
+ ts: size of the temporary area, in limbs; then wp = tp + ts.
+ ws: size of the accumulator, in limbs.
+ wq: size of the accumulator, in bits.
+ rn: number n' of inputs that are regular numbers (regular inputs).
+ logn: ceil(log2(rn)).
+ cq: logn + 1.
+ sq: output precision (precision of the sum).
+ maxexp: the maximum exponent of the bit-window of the inputs that is
+ taken into account (for the current iteration), excluded;
+ after the sum, it gets the value of maxexp2 until the next
+ iteration.
+ minexp: the minimum exponent of the bit-window of the inputs that is
+ taken into account (for the current iteration), included.
+
+Note 1: Data locality can be improved after the first iteration if the
+shifted values are stored at the end of the temporary area instead of
+the beginning. The reason is that only the least significant part of
+the accumulator will be used once a good approximate value of the sum
+is known, and the accumulator lies just after the temporary area. But
+the gain would probably not be noticeable in practice.
+
+Note 2: At step (4), it was considered to determine the number of
+cancelled limbs instead of the number of cancelled bits in order to
+avoid a non-trivial shift at step (6), making this step a bit faster
+(but particularly in small precisions, with some possible drawbacks).
+Moreover, on platforms with a MMU, thus most platforms where GNU MPFR
+would be used, we could arrange to reserve a specific address space
+for the accumulator and choose the shift count to be a multiple of a
+memory block size (or actually not to do any shift/copy at all, just
+free the most significant part and allocate a new part when need be),
+so that the shift (copy) of the accumulator contents could just be
+MMU related changes, without any physical memory copy. However, in
+practice, only large precisions with specific inputs would benefit
+from such a choice, which would require non portable code, and the
+relative speedup would remain small since the sum itself is expected
+to take most of the time.
+
+Note 3: Compared with Ziv's strategy, the issue here is not really
+exact values that are very close to a breakpoint, but cancellation.
+Moreover, we do not need to recompute everything at each iteration.
+The issue with the value very close to a breakpoint actually occurs
+at step (8); however, contrary to the usual case, we do not want to
+reiterate with more precision as this could take too much time and
+memory (a much wider accumulator could be needed). Indeed, due to a
+possible "hole" between the inputs, the distance between the exact
+value and a breakpoint can be extremely small.
+
+*** To be considered in future versions ***
+It seems that carry propagation (mpn_add_1 & mpn_sub_1 in the code) is
+most often limited. But consider the following cases, where all inputs
+have the minimal precision 2, and the output precision is p:
+ u0 = 1
+ u_i = (-1)^i * 2^(-p) for i > 0
+Here long carry propagation will occur for each addition of the initial
+iteration, so that the complexity will be O(n*p) instead of O(n+p) if
+we choose to delay carry propagation (however such a choice may slower
+the average case and take more memory, such as around 3*p instead of
+2*p).
+When a new iteration is needed due to cancellation, a second accumulator
+was considered in some early version of the algorithm: the temporary
+results of the computations during the new iteration would be stored in
+this second accumulator, which would generally be small, thus limiting
+carry propagation; this method is actually equivalent to delaying carry
+propagation. It could help in some cases, such as:
+ u0 = 2^q with some q > 0
+ u1 = 1
+ u2 = -2^q
+ u_i = (-1)^i * 2^(-p) for i > 2
+but such examples are very specific cases, and as seen with the first
+example, a better way must be chosen if avoiding long carry propagation
+is regarded as important (in all cases). Moreover, while the use of two
+accumulators does not take much more memory (since both accumulators can
+occupy the same area, with a flexible limit between them), it probably
+makes the code a bit more complex, and noticeably slower if n is small.
diff --git a/src/mpfr-impl.h b/src/mpfr-impl.h
index 8c3af99d4..0caec6f18 100644
--- a/src/mpfr-impl.h
+++ b/src/mpfr-impl.h
@@ -2079,4 +2079,31 @@ __MPFR_DECLSPEC void mpfr_mpz_clear _MPFR_PROTO((mpz_ptr));
#endif
+/******************************************************
+ ************* Value Coverage Checking **************
+ ******************************************************/
+
+#ifdef MPFR_COV_CHECK
+
+/* Variable names should start with the __gmpfr_cov_ prefix. */
+
+#define MPFR_COV_SET(X) (__gmpfr_cov_ ## X = 1)
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+__MPFR_DECLSPEC extern int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2];
+
+#if defined (__cplusplus)
+}
+#endif
+
+#else
+
+#define MPFR_COV_SET(X) ((void) 0)
+
+#endif
+
+
#endif
diff --git a/src/sum.c b/src/sum.c
index 1d30d9201..2c22ad7fb 100644
--- a/src/sum.c
+++ b/src/sum.c
@@ -1,6 +1,6 @@
/* Sum -- efficiently sum a list of floating-point numbers
-Copyright 2004-2015 Free Software Foundation, Inc.
+Copyright 2014-2015 Free Software Foundation, Inc.
Contributed by the AriC and Caramel projects, INRIA.
This file is part of the GNU MPFR Library.
@@ -20,327 +20,1152 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
-/* Reference: James Demmel and Yozo Hida, Fast and accurate floating-point
- summation with application to computational geometry, Numerical Algorithms,
- volume 37, number 1-4, pages 101--112, 2004. */
-
#define MPFR_NEED_LONGLONG_H
#include "mpfr-impl.h"
-/* FIXME[VL]: mpfr_sum can take too much memory and too much time.
- Rewrite using mpn, with additions by blocks (most significant first)?
- The algorithm could be as follows:
- 1. Search the maximum exponent max_exp of the inputs.
- 2. Compute the truncated sum in a window around max_exp + log2(N) to
- max_exp - output_prec - 128.
- 3. In case of cancellation, this determines a new maximum exponent;
- so, reiterate at (2), or (1) if the truncated sum is zero (so that
- "holes" will count for nothing).
- 4. Take care of the TMD (something like above, since there can still
- be cancellations).
- As a bonus, this will also solve overflow and underflow issues, since
- everything is done in fixed point and the output exponent will be
- considered only at the end (early overflow detection can also be done).
+/* See the doc/sum.txt file for the algorithm and a part of its proof
+(this will later go into algorithms.tex).
Note: see the following paper and its references:
http://www.eecs.berkeley.edu/~hdnguyen/public/papers/ARITH21_Fast_Sum.pdf
+VL: This is very different:
+ In MPFR In the paper & references
+ arbitrary precision fixed precision
+ correct rounding just reproducible rounding
+ integer operations floating-point operations
+ sequencial parallel (& sequential)
*/
-/* I would really like to use "mpfr_srcptr const []" but the norm is buggy:
- it doesn't automatically cast a "mpfr_ptr []" to "mpfr_srcptr const []"
- if necessary. So the choice are:
- mpfr_s ** : OK
- mpfr_s *const* : OK
- mpfr_s **const : OK
- mpfr_s *const*const : OK
- const mpfr_s *const* : no
- const mpfr_s **const : no
- const mpfr_s *const*const: no
- VL: this is not a bug, but a feature. See the reason here:
- http://c-faq.com/ansi/constmismatch.html
-*/
-static void heap_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *);
-static void count_sort (mpfr_srcptr *const, unsigned long, mpfr_srcptr *,
- mpfr_exp_t, mpfr_uexp_t);
-
-/* Either sort the tab in perm and returns 0
- Or returns 1 for +INF, -1 for -INF and 2 for NAN.
- Also set *maxprec to the maximal precision of tab[0..n-1] and of the
- initial value of *maxprec.
-*/
-int
-mpfr_sum_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm,
- mpfr_prec_t *maxprec)
+#ifdef MPFR_COV_CHECK
+int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2] = { 0 };
+#endif
+
+/* Update minexp after detecting a potential integer overflow in extreme
+ cases (only 32-bit machines may be concerned in practice). */
+#define UPDATE_MINEXP(E,SH) \
+ do \
+ { \
+ mpfr_prec_t sh = (SH); \
+ MPFR_ASSERTN ((E) >= MPFR_EXP_MIN + sh); \
+ minexp = (E) - sh; \
+ } \
+ while (0)
+
+/* Accumulate a new [minexp,maxexp[ block into (wp,ws). If e and err denote
+ * the exponents of the computed result and of the error bound respectively,
+ * while e - err is less than some given bound (due to cancellation), shift
+ * the accumulator and reiterate.
+ * wp: pointer to the accumulator (least significant limb first).
+ * ws: size of the accumulator.
+ * wq: precision of the accumulator (ws * GMP_NUMB_BITS).
+ * x: array of the input numbers.
+ * n: size of this array (number of inputs).
+ * minexp: exponent of the least significant bit of the block.
+ * maxexp: exponent of the block (maximum exponent + 1).
+ * tp: pointer to a temporary area.
+ * ts: size of this temporary area.
+ * logn: ceil(log2(rn)), where rn is the number of regular inputs.
+ * prec: minimal value of e - err (see below).
+ * ep: pointer to mpfr_exp_t (see below), or a null pointer.
+ * minexpp: pointer to mpfr_exp_t (see below).
+ * maxexpp: pointer to mpfr_exp_t (see below).
+ * This function returns the number of cancelled bits (>= 1), or 0
+ * if the accumulator is 0 (then the exact sum is necessarily 0).
+ * In the former case, the function also returns:
+ * - in ep: the exponent e of the computed result;
+ * - in minexpp: the last value of minexp.
+ * - in maxexpp: the new value of maxexp (for the next iteration).
+ * Notes:
+ * - minexp is also the exponent of the least significant bit of the
+ * accumulator;
+ * - the temporary area must be large enough to hold a shifted input
+ * block, and the value of ts is used only when the full assertions
+ * are checked (i.e. with the --enable-assert configure option), to
+ * check that a buffer overflow doesn't occur;
+ * - contrary to the returned value of minexp (the value in the last
+ * iteration), the returned value of maxexp is the one for the next
+ * iteration (= maxexp2 of the last iteration).
+ */
+static mpfr_prec_t
+sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, mpfr_ptr *const x,
+ unsigned long n, mpfr_exp_t minexp, mpfr_exp_t maxexp,
+ mp_limb_t *tp, mp_size_t ts, int logn, mpfr_prec_t prec,
+ mpfr_exp_t *ep, mpfr_exp_t *minexpp, mpfr_exp_t *maxexpp)
{
- mpfr_exp_t min, max;
- mpfr_uexp_t exp_num;
- unsigned long i;
- int sign_inf;
-
- sign_inf = 0;
- min = MPFR_EMIN_MAX;
- max = MPFR_EMAX_MIN;
- for (i = 0; i < n; i++)
+ MPFR_LOG_FUNC
+ (("ws=%Pd ts=%Pd prec=%Pd", (mpfr_prec_t) ws, (mpfr_prec_t) ts, prec),
+ ("", 0));
+
+ MPFR_ASSERTD (prec >= 0);
+
+ /* Consistency check. */
+ MPFR_ASSERTD (wq == (mpfr_prec_t) ws * GMP_NUMB_BITS);
+
+ while (1)
{
- if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (tab[i])))
- {
- if (MPFR_IS_NAN (tab[i]))
- return 2; /* Return NAN code */
- else if (MPFR_IS_INF (tab[i]))
+ mpfr_exp_t maxexp2 = MPFR_EXP_MIN;
+ unsigned long i;
+
+ MPFR_LOG_MSG (("sum_raw loop: "
+ "maxexp=%" MPFR_EXP_FSPEC "d "
+ "minexp=%" MPFR_EXP_FSPEC "d\n",
+ (mpfr_eexp_t) maxexp, (mpfr_eexp_t) minexp));
+
+ MPFR_ASSERTD (maxexp > minexp);
+
+ for (i = 0; i < n; i++)
+ if (! MPFR_IS_SINGULAR (x[i]))
+ {
+ mp_limb_t *dp, *vp;
+ mp_size_t ds, vs, vds;
+ mpfr_exp_t xe, vd;
+ mpfr_prec_t xq;
+ int tr;
+
+ xe = MPFR_GET_EXP (x[i]);
+ xq = MPFR_GET_PREC (x[i]);
+
+ vp = MPFR_MANT (x[i]);
+ vs = MPFR_PREC2LIMBS (xq);
+ vd = xe - vs * GMP_NUMB_BITS - minexp;
+ /* vd is the exponent of the least significant represented bit of
+ x[i] (including the trailing bits, whose value is 0) minus the
+ exponent of the least significant bit of the accumulator. To
+ make the code simpler, we won't try to filter out the trailing
+ bits of x[i]. */
+
+ if (vd < 0)
+ {
+ /* This covers the following cases:
+ * [-+- accumulator ---]
+ * [---|----- x[i] ------|--]
+ * | [----- x[i] --|--]
+ * | |[----- x[i] -----]
+ * | | [----- x[i] -----]
+ * maxexp minexp
+ */
+
+ if (xe <= minexp)
+ {
+ /* x[i] is entirely after the LSB of the accumulator,
+ so that it will be ignored at this iteration. */
+ if (xe > maxexp2)
+ maxexp2 = xe;
+ continue;
+ }
+
+ /* If some significant bits of x[i] are after the LSB of the
+ accumulator, then maxexp2 will necessarily be minexp. */
+ if (MPFR_LIKELY (xe - xq < minexp))
+ maxexp2 = minexp;
+
+ /* We need to ignore the least |vd| significant bits of x[i].
+ First, let's ignore the least vds = |vd| / GMP_NUMB_BITS
+ limbs. */
+ vd = - vd;
+ vds = vd / GMP_NUMB_BITS;
+ vs -= vds;
+ MPFR_ASSERTD (vs > 0); /* see xe <= minexp test above */
+ vp += vds;
+ vd -= vds * GMP_NUMB_BITS;
+ MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
+
+ if (xe > maxexp)
+ {
+ vs -= (xe - maxexp) / GMP_NUMB_BITS;
+ MPFR_ASSERTD (vs > 0);
+ tr = (xe - maxexp) % GMP_NUMB_BITS;
+ }
+ else
+ tr = 0;
+
+ if (vd != 0)
+ {
+ MPFR_ASSERTD (vs <= ts);
+ mpn_rshift (tp, vp, vs, vd);
+ vp = tp;
+ tr += vd;
+ if (tr >= GMP_NUMB_BITS)
+ {
+ vs--;
+ tr -= GMP_NUMB_BITS;
+ }
+ MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
+ if (tr != 0)
+ {
+ tp[vs-1] &= MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
+ tr = 0;
+ }
+ /* Truncation has now been taken into account. */
+ MPFR_ASSERTD (tr == 0);
+ }
+
+ dp = wp;
+ ds = ws;
+ }
+ else /* vd >= 0 */
+ {
+ /* This covers the following cases:
+ * [-+- accumulator ---]
+ * [- x[i] -] |
+ * [---|-- x[i] ------] |
+ * [------|-- x[i] ---------]
+ * | [- x[i] -] |
+ * maxexp minexp
+ */
+
+ /* We need to ignore the least vd significant bits
+ of the accumulator. First, let's ignore the least
+ vds = vd / GMP_NUMB_BITS limbs. -> (dp,ds) */
+ vds = vd / GMP_NUMB_BITS;
+ ds = ws - vds;
+ if (ds <= 0)
+ continue;
+ dp = wp + vds;
+ vd -= vds * GMP_NUMB_BITS;
+ MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
+
+ /* The low part of x[i] (to be determined) will have to be
+ shifted vd bits to the left if vd != 0. */
+
+ if (xe > maxexp)
+ {
+ vs -= (xe - maxexp) / GMP_NUMB_BITS;
+ if (vs <= 0)
+ continue;
+ tr = (xe - maxexp) % GMP_NUMB_BITS;
+ }
+ else
+ tr = 0;
+
+ MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS && vs > 0);
+
+ /* We need to consider the least significant vs limbs of x[i]
+ except the most significant tr bits. */
+
+ if (vd != 0)
+ {
+ mp_limb_t carry;
+
+ MPFR_ASSERTD (vs <= ts);
+ carry = mpn_lshift (tp, vp, vs, vd);
+ tr -= vd;
+ if (tr < 0)
+ {
+ tr += GMP_NUMB_BITS;
+ MPFR_ASSERTD (vs + 1 <= ts);
+ tp[vs++] = carry;
+ }
+ MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
+ vp = tp;
+ }
+ }
+
+ MPFR_ASSERTD (vs > 0 && vs <= ds);
+
+ /* We can't truncate the most significant limb of the input
+ (in case it hasn't been shifted to the temporary area).
+ So, let's ignore it now. It will be taken into account
+ via carry propagation after the addition. */
+ if (tr != 0)
+ vs--;
+
+ if (MPFR_IS_POS (x[i]))
+ {
+ mp_limb_t carry;
+
+ carry = vs > 0 ? mpn_add_n (dp, dp, vp, vs) : 0;
+ MPFR_ASSERTD (carry <= 1);
+ if (tr != 0)
+ carry += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
+ mpn_add_1 (dp + vs, dp + vs, ds - vs, carry);
+ }
+ else
+ {
+ mp_limb_t borrow;
+
+ borrow = vs > 0 ? mpn_sub_n (dp, dp, vp, vs) : 0;
+ MPFR_ASSERTD (borrow <= 1);
+ if (tr != 0)
+ borrow += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
+ mpn_sub_1 (dp + vs, dp + vs, ds - vs, borrow);
+ }
+ }
+
+ {
+ mpfr_prec_t cancel; /* number of cancelled bits */
+ mp_size_t wi; /* index in the accumulator */
+ mp_limb_t a, b;
+ int cnt;
+
+ cancel = 0;
+ wi = ws - 1;
+ MPFR_ASSERTD (wi >= 0);
+ a = wp[wi] >> (GMP_NUMB_BITS - 1) ? MPFR_LIMB_MAX : MPFR_LIMB_ZERO;
+
+ while (wi >= 0)
+ if ((b = wp[wi]) == a)
{
- if (sign_inf == 0) /* No previous INF */
- sign_inf = MPFR_SIGN (tab[i]);
- else if (sign_inf != MPFR_SIGN (tab[i]))
- return 2; /* Return NAN */
+ cancel += GMP_NUMB_BITS;
+ wi--;
+ }
+ else
+ {
+ b ^= a;
+ MPFR_ASSERTD (b != 0);
+ count_leading_zeros (cnt, b);
+ cancel += cnt;
+ break;
}
- }
- else
- {
- MPFR_ASSERTD (MPFR_IS_PURE_FP (tab[i]));
- if (MPFR_GET_EXP (tab[i]) < min)
- min = MPFR_GET_EXP(tab[i]);
- if (MPFR_GET_EXP (tab[i]) > max)
- max = MPFR_GET_EXP(tab[i]);
- }
- if (MPFR_PREC (tab[i]) > *maxprec)
- *maxprec = MPFR_PREC (tab[i]);
- }
- if (MPFR_UNLIKELY (sign_inf != 0))
- return sign_inf;
- exp_num = max - min + 1;
- /* FIXME : better test */
- if (exp_num > n * MPFR_INT_CEIL_LOG2 (n))
- heap_sort (tab, n, perm);
- else
- count_sort (tab, n, perm, min, exp_num);
- return 0;
+ if (wi >= 0 || a != MPFR_LIMB_ZERO) /* accumulator != 0 */
+ {
+ mpfr_exp_t e; /* exponent of the computed result */
+ mpfr_exp_t err; /* exponent of the error bound */
+
+ MPFR_LOG_MSG (("accumulator %s 0, cancel=%Pd\n",
+ a != MPFR_LIMB_ZERO ? "<" : ">", cancel));
+
+ MPFR_ASSERTD (cancel > 0);
+ e = minexp + wq - cancel;
+ MPFR_ASSERTD (e >= minexp);
+ err = maxexp2 + logn; /* OK even if maxexp2 == MPFR_EXP_MIN */
+
+ /* The absolute value of the truncated sum is in the binade
+ [2^(e-1),2^e] (closed on both ends due to two's complement).
+ The error is strictly less than 2^err (and is 0 if
+ maxexp2 == MPFR_EXP_MIN). */
+
+ /* This basically tests whether err <= e - prec without
+ potential integer overflow (since prec >= 0)... */
+ if (err <= e && SAFE_DIFF (mpfr_uexp_t, e, err) >= prec)
+ {
+ MPFR_LOG_MSG (("(err=%" MPFR_EXP_FSPEC "d) <= (e=%"
+ MPFR_EXP_FSPEC "d) - (prec=%Pd)\n",
+ (mpfr_eexp_t) err, (mpfr_eexp_t) e, prec));
+ if (ep != NULL)
+ *ep = e;
+ *minexpp = minexp;
+ *maxexpp = maxexp2;
+ MPFR_LOG_MSG (("return with minexp=%" MPFR_EXP_FSPEC
+ "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
+ (mpfr_eexp_t) minexp, (mpfr_eexp_t) maxexp2,
+ maxexp2 == MPFR_EXP_MIN ?
+ " (MPFR_EXP_MIN)" : ""));
+ return cancel;
+ }
+ else
+ {
+ mpfr_exp_t diffexp;
+ mpfr_prec_t shiftq;
+ mpfr_size_t shifts;
+ int shiftc;
+
+ MPFR_LOG_MSG (("e=%" MPFR_EXP_FSPEC "d err=%" MPFR_EXP_FSPEC
+ "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
+ (mpfr_eexp_t) e, (mpfr_eexp_t) err,
+ (mpfr_eexp_t) maxexp2,
+ maxexp2 == MPFR_EXP_MIN ?
+ " (MPFR_EXP_MIN)" : ""));
+
+ diffexp = err - e;
+ if (diffexp < 0)
+ diffexp = 0;
+ /* diffexp = max(0, err - e) */
+
+ MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d\n",
+ (mpfr_eexp_t) diffexp));
+
+ MPFR_ASSERTD (diffexp < cancel - 2);
+ shiftq = cancel - 2 - (mpfr_prec_t) diffexp;
+ MPFR_ASSERTD (shiftq > 0);
+ shifts = shiftq / GMP_NUMB_BITS;
+ shiftc = shiftq % GMP_NUMB_BITS;
+ MPFR_LOG_MSG (("shiftq = %Pd = %Pd * GMP_NUMB_BITS + %d\n",
+ shiftq, (mpfr_prec_t) shifts, shiftc));
+ if (MPFR_LIKELY (shiftc != 0))
+ mpn_lshift (wp + shifts, wp, ws - shifts, shiftc);
+ else
+ MPN_COPY_DECR (wp + shifts, wp, ws - shifts);
+ MPN_ZERO (wp, shifts);
+ minexp -= shiftq;
+ }
+ }
+ else if (maxexp2 == MPFR_EXP_MIN)
+ {
+ MPFR_LOG_MSG (("accumulator = 0, maxexp2 = MPFR_EXP_MIN\n", 0));
+ return 0;
+ }
+ else
+ {
+ MPFR_LOG_MSG (("accumulator = 0, reiterate\n", 0));
+ UPDATE_MINEXP (maxexp2, wq - (logn + 1));
+ /* Note: the logn + 1 corresponds to cq in the main code. */
+ }
+ }
+
+ maxexp = maxexp2;
+ }
}
-#define GET_EXP1(x) (MPFR_IS_ZERO (x) ? min : MPFR_GET_EXP (x))
-/* Performs a count sort of the entries */
-static void
-count_sort (mpfr_srcptr *const tab, unsigned long n,
- mpfr_srcptr *perm, mpfr_exp_t min, mpfr_uexp_t exp_num)
+/**********************************************************************/
+
+/* Generic case: all the inputs are finite numbers,
+ with at least 3 regular numbers. */
+static int
+sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd,
+ mpfr_exp_t maxexp, unsigned long rn)
{
- unsigned long *account;
- unsigned long target_rank, i;
- MPFR_TMP_DECL(marker);
+ mp_limb_t *sump;
+ mp_limb_t *tp; /* pointer to a temporary area */
+ mp_limb_t *wp; /* pointer to the accumulator */
+ mp_size_t ts; /* size of the temporary area, in limbs */
+ mp_size_t ws; /* size of the accumulator, in limbs */
+ mpfr_prec_t wq; /* size of the accumulator, in bits */
+ int logn; /* ceil(log2(rn)) */
+ int cq;
+ mpfr_prec_t sq;
+ int inex;
+ MPFR_TMP_DECL (marker);
+
+ MPFR_LOG_FUNC
+ (("n=%lu rnd=%d maxexp=%" MPFR_EXP_FSPEC "d rn=%lu",
+ n, rnd, (mpfr_eexp_t) maxexp, rn),
+ ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
+
+ MPFR_ASSERTD (rn >= 3 && rn <= n);
+
+ /* In practice, no integer overflow on the exponent. */
+ MPFR_STAT_STATIC_ASSERT (MPFR_EXP_MAX - MPFR_EMAX_MAX >=
+ sizeof (unsigned long) * CHAR_BIT);
+
+ /* Set up some variables and the accumulator. */
- /* Reserve a place for potential 0 (with EXP min-1)
- If there is no zero, we only lose one unused entry */
- min--;
- exp_num++;
+ sump = MPFR_MANT (sum);
+
+ /* rn is the number of regular inputs (the singular ones will be
+ ignored). Compute logn = ceil(log2(rn)). */
+ logn = MPFR_INT_CEIL_LOG2 (rn);
+ MPFR_ASSERTD (logn >= 2);
+
+ MPFR_LOG_MSG (("logn=%d maxexp=%" MPFR_EXP_FSPEC "d\n",
+ logn, (mpfr_eexp_t) maxexp));
+
+ sq = MPFR_GET_PREC (sum);
+ cq = logn + 1;
+
+ /* First determine the size of the accumulator. */
+ ws = MPFR_PREC2LIMBS (cq + sq + logn + 2);
+ wq = (mpfr_prec_t) ws * GMP_NUMB_BITS;
+ MPFR_ASSERTD (wq - cq - sq >= 4);
+
+ MPFR_LOG_MSG (("cq=%d sq=%Pd logn=%d wq=%Pd\n", cq, sq, logn, wq));
+
+ /* An input block will have up to wq - cq bits, and its shifted value
+ (to be correctly aligned) may take GMP_NUMB_BITS - 1 additional bits. */
+ ts = MPFR_PREC2LIMBS (wq - cq + GMP_NUMB_BITS - 1);
- /* Performs a counting sort of the entries */
MPFR_TMP_MARK (marker);
- account = (unsigned long *) MPFR_TMP_ALLOC (exp_num * sizeof *account);
- for (i = 0; i < exp_num; i++)
- account[i] = 0;
- for (i = 0; i < n; i++)
- account[GET_EXP1 (tab[i]) - min]++;
- for (i = exp_num - 1; i >= 1; i--)
- account[i - 1] += account[i];
- for (i = 0; i < n; i++)
- {
- target_rank = --account[GET_EXP1 (tab[i]) - min];
- perm[target_rank] = tab[i];
- }
+
+ tp = MPFR_TMP_LIMBS_ALLOC (ts + ws);
+ wp = tp + ts;
+
+ MPN_ZERO (wp, ws); /* zero the accumulator */
+
+ {
+ mpfr_exp_t minexp; /* exponent of the LSB of the block */
+ mpfr_prec_t cancel; /* number of cancelled bits */
+ mpfr_exp_t e; /* temporary exponent of the result */
+ mpfr_exp_t u; /* temporary exponent of the ulp (quantum) */
+ mp_limb_t rbit; /* rounding bit (corrected in halfway case) */
+ int corr; /* correction term (from -1 to 2) */
+ int sd, sh; /* shift counts */
+ mp_size_t sn; /* size of the output number */
+ int tmd; /* 0: the TMD does not occur
+ 1: the TMD occurs on a machine number
+ 2: the TMD occurs on a midpoint */
+ int pos; /* 0 if negative sum, 1 if positive */
+
+ MPFR_LOG_MSG (("Compute an approximation with sum_raw...\n", 0));
+
+ UPDATE_MINEXP (maxexp, wq - cq);
+ cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts,
+ logn, sq + 3, &e, &minexp, &maxexp);
+
+ if (MPFR_UNLIKELY (cancel == 0))
+ {
+ /* The exact sum is zero. Since not all inputs are 0, the sum
+ * is +0 except in MPFR_RNDD, as specified according to the
+ * IEEE 754 rules for the addition of two numbers.
+ */
+ MPFR_SET_SIGN (sum, (rnd != MPFR_RNDD ?
+ MPFR_SIGN_POS : MPFR_SIGN_NEG));
+ MPFR_SET_ZERO (sum);
+ MPFR_TMP_FREE (marker);
+ MPFR_RET (0);
+ }
+
+ /* The absolute value of the truncated sum is in the binade
+ [2^(e-1),2^e] (closed on both ends due to two's complement).
+ The error is strictly less than 2^(maxexp + logn) (and is 0
+ if maxexp == MPFR_EXP_MIN). */
+
+ u = e - sq; /* e being the exponent, u is the ulp of the target */
+
+ MPFR_LOG_MSG (("cancel=%Pd"
+ " e=%" MPFR_EXP_FSPEC "d"
+ " u=%" MPFR_EXP_FSPEC "d"
+ " maxexp=%" MPFR_EXP_FSPEC "d%s\n",
+ cancel, (mpfr_eexp_t) e, (mpfr_eexp_t) u,
+ (mpfr_eexp_t) maxexp,
+ maxexp == MPFR_EXP_MIN ? " (MPFR_EXP_MIN)" : ""));
+
+ /* Let's copy/shift the bits [max(u,minexp),e) to the
+ most significant part of the destination, and zero
+ the least significant part (there can be one only if
+ u < minexp). The trailing bits of the destination may
+ contain garbage at this point. Then, at the same time,
+ take the absolute value and do an initial rounding,
+ zeroing the trailing bits at this point.
+ TODO: This may be improved by merging some operations
+ is particular case. The average speed-up may not be
+ significant, though. To be tested... */
+
+ sn = MPFR_PREC2LIMBS (sq);
+ sd = (mpfr_prec_t) sn * GMP_NUMB_BITS - sq;
+ sh = cancel % GMP_NUMB_BITS;
+
+ MPFR_ASSERTD (sd >= 0 && sd < GMP_NUMB_BITS);
+
+ if (MPFR_LIKELY (u > minexp))
+ {
+ mpfr_prec_t tq;
+ mp_size_t ei, fi, wi;
+ int td;
+
+ tq = u - minexp;
+ MPFR_ASSERTD (tq > 0); /* number of trailing bits */
+ MPFR_LOG_MSG (("tq=%Pd\n", tq));
+
+ wi = tq / GMP_NUMB_BITS;
+
+ if (MPFR_LIKELY (sh != 0))
+ {
+ ei = (e - minexp) / GMP_NUMB_BITS;
+ fi = ei - (sn - 1);
+ MPFR_ASSERTD (fi == wi || fi == wi + 1);
+ mpn_lshift (sump, wp + fi, sn, sh);
+ if (fi != wi)
+ sump[0] |= wp[wi] >> (GMP_NUMB_BITS - sh);
+ }
+ else
+ {
+ MPFR_ASSERTD ((mpfr_prec_t) (ws - (wi + sn)) * GMP_NUMB_BITS
+ == cancel);
+ MPN_COPY (sump, wp + wi, sn);
+ }
+
+ /* Determine the rounding bit, which is represented. */
+ td = tq % GMP_NUMB_BITS;
+ rbit = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) :
+ (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1));
+ MPFR_ASSERTD (rbit == 0 || rbit == 1);
+ MPFR_LOG_MSG (("rbit=%d\n", (int) rbit));
+
+ if (maxexp == MPFR_EXP_MIN)
+ {
+ /* The sum in the accumulator is exact. Determine inex:
+ inex = 0 if the final sum is exact, else 1, i.e.
+ inex = rounding bit || sticky bit. In round to nearest,
+ also determine the rounding direction: obtained from
+ the rounding bit possibly except in halfway cases. */
+ if (MPFR_LIKELY (rbit == 0 ||
+ (rnd == MPFR_RNDN && ((wp[wi] >> td) & 1) == 0)))
+ {
+ /* We need to determine the sticky bit, either to set inex
+ (if the rounding bit is 0) or to possibly "correct" rbit
+ (round to nearest, halfway case rounded downward) from
+ which the rounding direction will be determined. */
+ MPFR_LOG_MSG (("Determine the sticky bit...\n", 0));
+
+ inex = td >= 2 ? (wp[wi] & MPFR_LIMB_MASK (td - 1)) != 0
+ : td == 0 ?
+ (MPFR_ASSERTD (wi >= 1),
+ (wp[--wi] & MPFR_LIMB_MASK (GMP_NUMB_BITS - 1)) != 0)
+ : 0;
+
+ if (!inex)
+ {
+ mp_size_t wj = wi;
+
+ while (!inex && wj > 0)
+ inex = wp[--wj] != 0;
+ if (!inex && rbit != 0)
+ {
+ /* sticky bit = 0, rounding bit = 1,
+ i.e. halfway case, which will be
+ rounded downward (see earlier if). */
+ MPFR_ASSERTD (rnd == MPFR_RNDN);
+ inex = 1;
+ rbit = 0; /* even rounding downward */
+ MPFR_LOG_MSG (("Halfway case rounded downward;"
+ " set inex=1 rbit=0\n", 0));
+ }
+ }
+ }
+ else
+ inex = 1;
+ tmd = 0; /* We can round correctly -> no TMD. */
+ }
+ else /* maxexp > MPFR_EXP_MIN */
+ {
+ mpfr_exp_t d;
+ mp_limb_t limb, mask;
+ int nbits;
+
+ inex = 1; /* We do not know whether the sum is exact. */
+
+ MPFR_ASSERTD (u <= MPFR_EMAX_MAX);
+ d = u - (maxexp + logn); /* representable */
+ MPFR_ASSERTD (d >= 3);
+
+ /* Let's see whether the TMD occurs by looking at the d bits
+ following the ulp bit, or the d-1 bits after the rounding
+ bit. */
+
+ /* First chunk after the rounding bit... It starts at:
+ (wi,td-2) if td >= 2,
+ (wi-1,td-2+GMP_NUMB_BITS) if td < 2. */
+ if (td == 0)
+ {
+ MPFR_ASSERTD (wi >= 1);
+ limb = wp[--wi];
+ mask = MPFR_LIMB_MASK (GMP_NUMB_BITS - 1);
+ nbits = GMP_NUMB_BITS;
+ }
+ else if (td == 1)
+ {
+ limb = wi >= 1 ? wp[--wi] : MPFR_LIMB_ZERO;
+ mask = MPFR_LIMB_MAX;
+ nbits = GMP_NUMB_BITS + 1;
+ }
+ else /* td >= 2 */
+ {
+ MPFR_ASSERTD (td >= 2);
+ limb = wp[wi];
+ mask = MPFR_LIMB_MASK (td - 1);
+ nbits = td;
+ }
+
+ /* nbits: number of bits of the first chunk + 1
+ (the +1 is for the rounding bit). */
+
+ if (nbits > d)
+ {
+ /* Some low significant bits must be ignored. */
+ limb >>= nbits - d;
+ mask >>= nbits - d;
+ d = 0;
+ }
+ else
+ {
+ d -= nbits;
+ MPFR_ASSERTD (d >= 0);
+ }
+
+ limb &= mask;
+ tmd =
+ limb == MPFR_LIMB_ZERO ?
+ (rbit == 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) :
+ limb == mask ?
+ (limb = MPFR_LIMB_MAX,
+ rbit != 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) : 0;
+
+ while (tmd != 0 && d != 0)
+ {
+ mp_limb_t limb2;
+
+ MPFR_ASSERTD (d > 0);
+ if (wi == 0)
+ {
+ /* The non-represented bits are 0's. */
+ if (limb != MPFR_LIMB_ZERO)
+ tmd = 0;
+ break;
+ }
+ MPFR_ASSERTD (wi > 0);
+ limb2 = wp[--wi];
+ if (d < GMP_NUMB_BITS)
+ {
+ int c = GMP_NUMB_BITS - d;
+ MPFR_ASSERTD (c > 0 && c < GMP_NUMB_BITS);
+ if ((limb2 >> c) != (limb >> c))
+ tmd = 0;
+ break;
+ }
+ if (limb2 != limb)
+ tmd = 0;
+ d -= GMP_NUMB_BITS;
+ }
+ }
+ }
+ else /* u <= minexp */
+ {
+ mp_size_t en;
+
+ en = (e - minexp + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS;
+ if (MPFR_LIKELY (sh != 0))
+ mpn_lshift (sump + sn - en, wp, en, sh);
+ else if (MPFR_UNLIKELY (en > 0))
+ MPN_COPY (sump + sn - en, wp, en);
+ if (sn > en)
+ MPN_ZERO (sump, sn - en);
+
+ /* The exact value of the accumulator has been copied.
+ * The TMD occurs if and only if there are bits still
+ * not taken into account, and if it occurs, this is
+ * necessarily on a machine number (-> tmd = 1).
+ */
+ rbit = 0;
+ inex = tmd = maxexp != MPFR_EXP_MIN;
+ }
+
+ /* Leading bit: 1 if positive, 0 if negative. */
+ pos = sump[sn-1] >> (GMP_NUMB_BITS - 1);
+
+ MPFR_ASSERTD (rbit == 0 || rbit == 1);
+
+ MPFR_LOG_MSG (("tmd=%d rbit=%d inex=%d pos=%d\n",
+ tmd, (int) rbit, inex, pos));
+
+ /* Here, if the final sum is known to be exact, inex = 0, otherwise
+ * inex = 1. We have a truncated significand, a trailing term t such
+ * that 0 <= t < 1 ulp, and an error on the trailing term bounded by
+ * t' in absolute value. Thus the error e on the truncated significand
+ * satisfies -t' <= e < 1 ulp + t'. Thus one has 4 correction cases
+ * denoted by a corr value between -1 and 2 depending on e, pos, rbit,
+ * and the rounding mode:
+ * -1: equivalent to nextbelow;
+ * 0: the truncated significand is not corrected;
+ * 1: add 1 ulp;
+ * 2: add 1 ulp, then nextabove.
+ * The nextbelow and nextabove are used here since there may be a
+ * change of the binade.
+ */
+
+ if (tmd == 0) /* no TMD */
+ {
+ switch (rnd)
+ {
+ case MPFR_RNDD:
+ corr = 0;
+ break;
+ case MPFR_RNDU:
+ corr = inex;
+ break;
+ case MPFR_RNDZ:
+ corr = inex && !pos;
+ break;
+ case MPFR_RNDA:
+ corr = inex && pos;
+ break;
+ default:
+ MPFR_ASSERTN (rnd == MPFR_RNDN);
+ /* Note: for halfway cases (maxexp == MPFR_EXP_MIN) that are
+ rounded downward, rbit has been changed to 0 so that corr
+ is set correctly. */
+ corr = rbit;
+ }
+ MPFR_ASSERTD (corr == 0 || corr == 1);
+ if (inex && corr == 0) /* two's complement significand decreased */
+ inex = -1;
+ }
+ else
+ {
+ mpfr_exp_t err; /* exponent of the error bound */
+ mp_size_t zs;
+ int sst; /* sign of the secondary term */
+
+ MPFR_ASSERTD (maxexp > MPFR_EXP_MIN);
+ MPFR_ASSERTD (tmd == 1 || tmd == 2);
+
+ /* New accumulator size */
+ ws = MPFR_PREC2LIMBS (wq - sq);
+ wq = (mpfr_prec_t) ws * GMP_NUMB_BITS;
+
+ err = maxexp + logn;
+
+ MPFR_LOG_MSG (("TMD with"
+ " maxexp=%" MPFR_EXP_FSPEC "d"
+ " err=%" MPFR_EXP_FSPEC "d"
+ " ws=%Pd"
+ " wq=%Pd\n",
+ (mpfr_eexp_t) maxexp, (mpfr_eexp_t) err,
+ (mpfr_prec_t) ws, wq));
+
+ /* The d-1 bits from u-2 to u-d (= err) are identical. */
+
+ if (err >= minexp)
+ {
+ mpfr_prec_t tq;
+ mp_size_t wi;
+ int td;
+
+ /* Let's keep the last 2 over the d-1 identical bits and the
+ following bits, i.e. the bits from err+1 to minexp. */
+ tq = err - minexp + 2; /* tq = number of such bits */
+ MPFR_LOG_MSG (("[TMD] tq=%Pd\n", tq));
+ MPFR_ASSERTD (tq >= 2);
+
+ wi = tq / GMP_NUMB_BITS;
+ td = tq % GMP_NUMB_BITS;
+
+ if (td != 0)
+ {
+ wi++; /* number of words with represented bits */
+ td = GMP_NUMB_BITS - td;
+ zs = ws - wi;
+ MPFR_ASSERTD (zs >= 0 && zs < ws);
+ mpn_lshift (wp + zs, wp, wi, td);
+ }
+ else
+ {
+ MPFR_ASSERTD (wi > 0);
+ zs = ws - wi;
+ MPFR_ASSERTD (zs >= 0 && zs < ws);
+ if (zs > 0)
+ MPN_COPY_INCR (wp + zs, wp, wi);
+ }
+
+ UPDATE_MINEXP (minexp, zs * GMP_NUMB_BITS + td);
+ MPFR_ASSERTD (minexp == err + 2 - wq);
+ }
+ else /* err < minexp */
+ {
+ /* At least one of the identical bits is not represented,
+ meaning that it is 0 and all these bits are 0's. Thus
+ the accumulator will be 0. The new minexp is determined
+ from maxexp, with cq bits reserved to avoid an overflow
+ (as in the early steps). */
+ MPFR_LOG_MSG (("[TMD] err < minexp\n", 0));
+ zs = ws;
+
+ /* minexp = maxexp + cq - wq */
+ UPDATE_MINEXP (maxexp, wq - cq);
+ MPFR_ASSERTD (minexp == err + 1 - wq);
+ }
+
+ MPN_ZERO (wp, zs);
+
+ cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts,
+ logn, 0, NULL, &minexp, &maxexp);
+
+ if (cancel != 0)
+ sst = MPFR_LIMB_MSB (wp[ws-1]) == 0 ? 1 : -1;
+ else if (tmd == 1)
+ sst = 0;
+ else
+ {
+ /* For halfway cases, let's virtually eliminate them
+ by setting a sst equivalent to a non-halfway case,
+ which depends on the last bit of the pre-rounded
+ result. */
+ MPFR_ASSERTD (rnd == MPFR_RNDN && tmd == 2);
+ sst = (sump[0] & (MPFR_LIMB_ONE << sd)) ? 1 : -1;
+ }
+
+ MPFR_LOG_MSG (("[TMD] tmd=%d rbit=%d sst=%d\n",
+ tmd, (int) rbit, sst));
+
+ /* Do not consider the corrected sst for MPFR_COV_SET */
+ MPFR_COV_SET (sum_tmd[(int) rnd][tmd-1][rbit]
+ [cancel == 0 ? 1 : sst+1][pos]);
+
+ inex =
+ MPFR_IS_LIKE_RNDD (rnd, pos ? 1 : -1) ? (sst ? -1 : 0) :
+ MPFR_IS_LIKE_RNDU (rnd, pos ? 1 : -1) ? (sst ? 1 : 0) :
+ (MPFR_ASSERTD (rnd == MPFR_RNDN),
+ tmd == 1 ? - sst : sst);
+
+ if (tmd == 2 && sst == (rbit ? -1 : 1))
+ corr = 1 - (int) rbit;
+ else if (MPFR_IS_LIKE_RNDD (rnd, pos ? 1 : -1) && sst == -1)
+ corr = (int) rbit - 1;
+ else if (MPFR_IS_LIKE_RNDU (rnd, pos ? 1 : -1) && sst == +1)
+ corr = (int) rbit + 1;
+ else
+ corr = (int) rbit;
+ }
+
+ MPFR_LOG_MSG (("pos=%d corr=%d inex=%d\n", pos, corr, inex));
+
+ /* Sign handling (-> absolute value and sign), together with
+ rounding. The most common cases are corr = 0 and corr = 1
+ as this is necessarily the case when the TMD did not occur. */
+
+ MPFR_ASSERTD (corr >= -1 && corr <= 2);
+
+ if (pos) /* positive result */
+ {
+ MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
+ MPFR_SET_POS (sum);
+ sump[0] &= ~ MPFR_LIMB_MASK (sd);
+
+ if (corr > 0)
+ {
+ mp_limb_t corr2, carry_out;
+
+ corr2 = (mp_limb_t) corr << sd;
+ /* If corr == 2 && sd == GMP_NUMB_BITS - 1, this overflows
+ to corr2 = 0. This case is taken into account below. */
+
+ carry_out = corr2 != 0 ? mpn_add_1 (sump, sump, sn, corr2) :
+ (MPFR_ASSERTD (sn > 1),
+ mpn_add_1 (sump + 1, sump + 1, sn - 1, MPFR_LIMB_ONE));
+
+ MPFR_ASSERTD (sump[sn-1] >> (GMP_NUMB_BITS - 1) == !carry_out);
+
+ if (MPFR_UNLIKELY (carry_out))
+ {
+ /* Note: The | is important when sump[sn-1] is not 0
+ (this can occur with sn = 1 and corr = 2). TODO:
+ Add something to make sure that this is tested. */
+ sump[sn-1] |= MPFR_LIMB_HIGHBIT;
+ e++;
+ }
+ }
+
+ if (corr < 0)
+ {
+ mpn_sub_1 (sump, sump, sn, MPFR_LIMB_ONE << sd);
+
+ if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
+ {
+ sump[sn-1] |= MPFR_LIMB_HIGHBIT;
+ e--;
+ }
+ }
+ }
+ else /* negative result */
+ {
+ MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) == 0);
+ MPFR_SET_NEG (sum);
+
+ /* abs(x + corr) = - (x + corr) = com(x) + (1 - corr) */
+
+ /* We want to avoid separate mpn_com (or mpn_neg) and mpn_add_1
+ (or mpn_sub_1) operations, as they could yield two loops in
+ some particular cases involving a long sequence of 0's in
+ the low significant bits (except the least significant bit,
+ which doesn't matter). */
+
+ if (corr <= 1)
+ {
+ mp_limb_t corr2;
+
+ /* Here we can just do the correction operation on the
+ least significant limb, then do either a mpn_com or
+ a mpn_neg on the remaining limbs, depending on the
+ carry (BTW, mpn_neg is just a mpn_com with an initial
+ carry propagation: after some point, mpn_neg does a
+ complement). */
+
+ corr2 = (mp_limb_t) (1 - corr) << sd;
+ /* Note: If corr = -1, this can overflow to corr2 = 0.
+ This case is taken into account below. */
+
+ sump[0] = (~ (sump[0] | MPFR_LIMB_MASK (sd))) + corr2;
+
+ if (sump[0] < corr2 || (corr2 == 0 && corr < 0))
+ {
+ if (sn == 1 || ! mpn_neg (sump + 1, sump + 1, sn - 1))
+ {
+ /* Note: The | is important when sump[sn-1] is not 0
+ (this can occur with sn = 1 and corr = -1). TODO:
+ Add something to make sure that this is tested. */
+ sump[sn-1] |= MPFR_LIMB_HIGHBIT;
+ e++;
+ }
+ }
+ else if (sn > 1)
+ mpn_com (sump + 1, sump + 1, sn - 1);
+ }
+ else /* corr == 2 */
+ {
+ mp_limb_t corr2, c;
+ mp_size_t i = 1;
+
+ /* We want to compute com(x) - 1, but GMP doesn't have an
+ operation for that. The fact is that a sequence of low
+ significant bits 1 is invariant. Starting at the first
+ low significant bit 0, we can do the complement with
+ mpn_com. */
+
+ corr2 = MPFR_LIMB_ONE << sd;
+ c = ~ (sump[0] | MPFR_LIMB_MASK (sd));
+ sump[0] = c - corr2;
+
+ if (c == 0)
+ {
+ while (MPFR_ASSERTD (i < sn), sump[i] == MPFR_LIMB_MAX)
+ i++;
+ sump[i] = (~ sump[i]) - 1;
+ i++;
+ }
+
+ if (i < sn)
+ mpn_com (sump + i, sump + i, sn - i);
+ else if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
+ {
+ /* Happens on 01111...111, whose complement is
+ 10000...000, and com(x) - 1 is 01111...111. */
+ sump[sn-1] |= MPFR_LIMB_HIGHBIT;
+ e--;
+ }
+ }
+ }
+
+ MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
+ MPFR_LOG_MSG (("Set exponent e=%" MPFR_EXP_FSPEC "d\n", (mpfr_eexp_t) e));
+ /* e may be outside the current exponent range, but this will be checked
+ with mpfr_check_range below. */
+ MPFR_EXP (sum) = e;
+ } /* main block */
+
MPFR_TMP_FREE (marker);
+ return mpfr_check_range (sum, inex, rnd);
}
+/**********************************************************************/
-#define GET_EXP2(x) (MPFR_IS_ZERO (x) ? MPFR_EMIN_MIN : MPFR_GET_EXP (x))
-
-/* Performs a heap sort of the entries */
-static void
-heap_sort (mpfr_srcptr *const tab, unsigned long n, mpfr_srcptr *perm)
+int
+mpfr_sum (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd)
{
- unsigned long dernier_traite;
- unsigned long i, pere;
- mpfr_srcptr tmp;
- unsigned long fils_gauche, fils_droit, fils_indigne;
- /* Reminder of a heap structure :
- node(i) has for left son node(2i +1) and right son node(2i)
- and father(node(i)) = node((i - 1) / 2)
- */
-
- /* initialize the permutation to identity */
- for (i = 0; i < n; i++)
- perm[i] = tab[i];
-
- /* insertion phase */
- for (dernier_traite = 1; dernier_traite < n; dernier_traite++)
+ MPFR_LOG_FUNC
+ (("n=%lu rnd=%d", n, rnd),
+ ("sum[%Pu]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
+
+ if (MPFR_UNLIKELY (n <= 2))
{
- i = dernier_traite;
- while (i > 0)
+ if (n == 0)
{
- pere = (i - 1) / 2;
- if (GET_EXP2 (perm[pere]) > GET_EXP2 (perm[i]))
- {
- tmp = perm[pere];
- perm[pere] = perm[i];
- perm[i] = tmp;
- i = pere;
- }
- else
- break;
+ MPFR_SET_ZERO (sum);
+ MPFR_SET_POS (sum);
+ MPFR_RET (0);
}
+ else if (n == 1)
+ return mpfr_set (sum, x[0], rnd);
+ else
+ return mpfr_add (sum, x[0], x[1], rnd);
}
-
- /* extraction phase */
- for (dernier_traite = n - 1; dernier_traite > 0; dernier_traite--)
+ else
{
- tmp = perm[0];
- perm[0] = perm[dernier_traite];
- perm[dernier_traite] = tmp;
+ mpfr_exp_t maxexp = MPFR_EXP_MIN; /* max(Empty) */
+ unsigned long i;
+ unsigned long rn = 0; /* will be the number of regular inputs */
+ /* sign of infinities and zeros (0: currently unknown) */
+ int sign_inf = 0, sign_zero = 0;
+
+ MPFR_LOG_MSG (("Check for special inputs (n = %lu >= 3)\n", n));
- i = 0;
- while (1)
+ for (i = 0; i < n; i++)
{
- fils_gauche = 2 * i + 1;
- fils_droit = fils_gauche + 1;
- if (fils_gauche < dernier_traite)
+ if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x[i])))
{
- if (fils_droit < dernier_traite)
+ if (MPFR_IS_NAN (x[i]))
{
- if (GET_EXP2(perm[fils_droit]) < GET_EXP2(perm[fils_gauche]))
- fils_indigne = fils_droit;
- else
- fils_indigne = fils_gauche;
-
- if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_indigne]))
- {
- tmp = perm[i];
- perm[i] = perm[fils_indigne];
- perm[fils_indigne] = tmp;
- i = fils_indigne;
- }
- else
- break;
+ /* The current value x[i] is NaN. Then the sum is NaN. */
+ nan:
+ MPFR_SET_NAN (sum);
+ MPFR_RET_NAN;
}
- else /* on a un fils gauche, pas de fils droit */
+ else if (MPFR_IS_INF (x[i]))
{
- if (GET_EXP2 (perm[i]) > GET_EXP2 (perm[fils_gauche]))
- {
- tmp = perm[i];
- perm[i] = perm[fils_gauche];
- perm[fils_gauche] = tmp;
- }
- break;
+ /* The current value x[i] is an infinity.
+ There are two cases:
+ 1. This is the first infinity value (sign_inf == 0).
+ Then set sign_inf to its sign, and go on.
+ 2. All the infinities found until now have the same
+ sign sign_inf. If this new infinity has a different
+ sign, then return NaN immediately, else go on. */
+ if (sign_inf == 0)
+ sign_inf = MPFR_SIGN (x[i]);
+ else if (MPFR_SIGN (x[i]) != sign_inf)
+ goto nan;
+ }
+ else if (MPFR_UNLIKELY (rn == 0))
+ {
+ /* The current value x[i] is a zero. The code below matters
+ only when all values found until now are zeros, otherwise
+ it is harmless (the test rn == 0 above is just a minor
+ optimization).
+ Here we track the sign of the zero result when all inputs
+ are zeros: if all zeros have the same sign, the result
+ will have this sign, otherwise (i.e. if there is at least
+ a zero of each sign), the sign of the zero result depends
+ only on the rounding mode (note that this choice is
+ sticky when new zeros are considered). */
+ MPFR_ASSERTD (MPFR_IS_ZERO (x[i]));
+ if (sign_zero == 0)
+ sign_zero = MPFR_SIGN (x[i]);
+ else if (MPFR_SIGN (x[i]) != sign_zero)
+ sign_zero = rnd == MPFR_RNDD ? -1 : 1;
}
}
- else /* on n'a pas de fils */
- break;
+ else
+ {
+ /* The current value x[i] is a regular number. */
+ mpfr_exp_t e = MPFR_GET_EXP (x[i]);
+ if (e > maxexp)
+ maxexp = e; /* maximum exponent found until now */
+ rn++; /* current number of regular inputs */
+ }
}
- }
-}
-
-/* Sum a list of float with order given by permutation perm,
- * intermediate size set to F. Return non-zero if at least one of
- * the operations is inexact (thus 0 implies that the sum is exact).
- * Internal use function.
- */
-static int
-sum_once (mpfr_ptr ret, mpfr_srcptr *const tab, unsigned long n, mpfr_prec_t F)
-{
- mpfr_t sum;
- unsigned long i;
- int error_trap;
+ MPFR_LOG_MSG (("rn=%lu sign_inf=%d sign_zero=%d\n",
+ rn, sign_inf, sign_zero));
- MPFR_ASSERTD (n >= 2);
+ /* At this point the result cannot be NaN (this case has already
+ been filtered out). */
- mpfr_init2 (sum, F);
- error_trap = mpfr_set (sum, tab[0], MPFR_RNDN);
- for (i = 1; i < n - 1; i++)
- {
- MPFR_ASSERTD (!MPFR_IS_NAN (sum) && !MPFR_IS_INF (sum));
- if (mpfr_add (sum, sum, tab[i], MPFR_RNDN))
- error_trap = 1;
- }
- if (mpfr_add (ret, sum, tab[n - 1], MPFR_RNDN))
- error_trap = 1;
- mpfr_clear (sum);
- return error_trap;
-}
+ if (MPFR_UNLIKELY (sign_inf != 0))
+ {
+ /* At least one infinity, and all of them have the same sign
+ sign_inf. The sum is the infinity of this sign. */
+ MPFR_SET_INF (sum);
+ MPFR_SET_SIGN (sum, sign_inf);
+ MPFR_RET (0);
+ }
-/* Sum a list of floating-point numbers.
- * If the return value is 0, then the sum is exact.
- * Otherwise the return value gives no information.
- */
-int
-mpfr_sum (mpfr_ptr ret, mpfr_ptr *const tab_p, unsigned long n, mpfr_rnd_t rnd)
-{
- mpfr_t cur_sum;
- mpfr_prec_t prec;
- mpfr_srcptr *perm, *const tab = (mpfr_srcptr *) tab_p;
- int k, error_trap;
- MPFR_ZIV_DECL (loop);
- MPFR_SAVE_EXPO_DECL (expo);
- MPFR_TMP_DECL (marker);
+ /* At this point, all the inputs are finite numbers. */
- if (MPFR_UNLIKELY (n <= 1))
- {
- if (n < 1)
+ if (MPFR_UNLIKELY (rn == 0))
{
- MPFR_SET_ZERO (ret);
- MPFR_SET_POS (ret);
- return 0;
+ /* All the numbers were zeros (and there is at least one).
+ The sum is zero with sign sign_zero. */
+ MPFR_ASSERTD (sign_zero != 0);
+ MPFR_SET_ZERO (sum);
+ MPFR_SET_SIGN (sum, sign_zero);
+ MPFR_RET (0);
}
- else
- return mpfr_set (ret, tab[0], rnd);
- }
- /* Sort and treat special cases */
- MPFR_TMP_MARK (marker);
- perm = (mpfr_srcptr *) MPFR_TMP_ALLOC (n * sizeof *perm);
- prec = MPFR_PREC (ret);
- error_trap = mpfr_sum_sort (tab, n, perm, &prec);
- /* Check if there was a NAN or a INF */
- if (MPFR_UNLIKELY (error_trap != 0))
- {
- MPFR_TMP_FREE (marker);
- if (error_trap == 2)
+ /* Optimize the case where there are only two regular numbers. */
+ if (MPFR_UNLIKELY (rn <= 2))
{
- MPFR_SET_NAN (ret);
- MPFR_RET_NAN;
- }
- MPFR_SET_INF (ret);
- MPFR_SET_SIGN (ret, error_trap);
- MPFR_RET (0);
- }
+ unsigned long h = ULONG_MAX;
- /* Initial precision is max(prec(ret),prec(tab[0]),...,prec(tab[n-1])) */
- k = MPFR_INT_CEIL_LOG2 (n) + 1;
- prec += k + 2;
- mpfr_init2 (cur_sum, prec);
+ for (i = 0; i < n; i++)
+ if (! MPFR_IS_SINGULAR (x[i]))
+ {
+ if (rn == 1)
+ return mpfr_set (sum, x[i], rnd);
+ if (h != ULONG_MAX)
+ return mpfr_add (sum, x[h], x[i], rnd);
+ h = i;
+ }
+ MPFR_RET_NEVER_GO_HERE();
+ }
- /* Ziv Loop */
- MPFR_SAVE_EXPO_MARK (expo);
- MPFR_ZIV_INIT (loop, prec);
- for (;;)
- {
- error_trap = sum_once (cur_sum, perm, n, prec + k);
- if (MPFR_LIKELY (error_trap == 0 ||
- (!MPFR_IS_ZERO (cur_sum) &&
- mpfr_can_round (cur_sum, prec - 2,
- MPFR_RNDN, rnd, MPFR_PREC (ret)))))
- break;
- MPFR_ZIV_NEXT (loop, prec);
- mpfr_set_prec (cur_sum, prec);
+ return sum_aux (sum, x, n, rnd, maxexp, rn);
}
- MPFR_ZIV_FREE (loop);
- MPFR_TMP_FREE (marker);
-
- if (mpfr_set (ret, cur_sum, rnd))
- error_trap = 1;
- mpfr_clear (cur_sum);
-
- MPFR_SAVE_EXPO_FREE (expo);
- if (mpfr_check_range (ret, 0, rnd))
- error_trap = 1;
- return error_trap; /* It doesn't return the ternary value */
}
-
-/* __END__ */
diff --git a/tests/tsum.c b/tests/tsum.c
index ead15a0dc..2eee92755 100644
--- a/tests/tsum.c
+++ b/tests/tsum.c
@@ -20,219 +20,119 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
-/* TODO: add some generic random test with cancellations. Something like:
- 1. Generate random numbers with random precisions.
- 2. Compute the sum s at some random precision and some rounding mode.
- 3. While s != 0:
- 4. Include -s in the array.
- 5. Reorder the terms randomly.
- 6. Recompute a new sum s' at some random precision and some rounding mode.
- 7. Check that |s'| < ulp(s), with a factor 1/2 for MPFR_RNDN.
- 8. Reiterate at (3) with s = s'.
- Also add tests with intermediate overflows and tests with underflows
- (this matters here as we don't have subnormals).
- Note: partly done in the cancel() test (r8857); the result is not tested
- yet, but this test already shows an efficiency problem.
-*/
-
#include "mpfr-test.h"
#ifndef MPFR_NCANCEL
#define MPFR_NCANCEL 10
#endif
-static int
-check_is_sorted (unsigned long n, mpfr_srcptr *perm)
-{
- unsigned long i;
-
- for (i = 0; i < n - 1; i++)
- if (MPFR_GET_EXP(perm[i]) < MPFR_GET_EXP(perm[i+1]))
- return 0;
- return 1;
-}
-
-static int
-sum_tab (mpfr_ptr ret, mpfr_t *tab, unsigned long n, mpfr_rnd_t rnd)
+static mpfr_prec_t
+get_prec_max (mpfr_t *t, int n)
{
- mpfr_ptr *tabtmp;
- unsigned long i;
- int inexact;
- MPFR_TMP_DECL(marker);
+ mpfr_exp_t e, min, max;
+ int i;
- MPFR_TMP_MARK(marker);
- tabtmp = (mpfr_ptr *) MPFR_TMP_ALLOC(n * sizeof(mpfr_srcptr));
+ min = MPFR_EMAX_MAX;
+ max = MPFR_EMIN_MIN;
for (i = 0; i < n; i++)
- tabtmp[i] = tab[i];
+ if (MPFR_IS_PURE_FP (t[i]))
+ {
+ e = MPFR_GET_EXP (t[i]);
+ if (e > max)
+ max = e;
+ e -= MPFR_GET_PREC (t[i]);
+ if (e < min)
+ min = e;
+ }
- inexact = mpfr_sum (ret, tabtmp, n, rnd);
- MPFR_TMP_FREE(marker);
- return inexact;
+ return min > max ? MPFR_PREC_MIN /* no pure FP values */
+ : max - min + __gmpfr_ceil_log2 (n);
}
-
-static mpfr_prec_t
-get_prec_max (mpfr_t *tab, unsigned long n, mpfr_prec_t f)
-{
- mpfr_prec_t res;
- mpfr_exp_t min, max;
- unsigned long i;
-
- i = 0;
- while (MPFR_IS_ZERO (tab[i]))
- {
- i++;
- if (i == n)
- return MPFR_PREC_MIN; /* all values are 0 */
- }
-
- if (! mpfr_check (tab[i]))
- {
- printf ("tab[%lu] is not valid.\n", i);
- exit (1);
- }
- MPFR_ASSERTN (MPFR_IS_FP (tab[i]));
- min = max = MPFR_GET_EXP(tab[i]);
- for (i++; i < n; i++)
- {
- if (! mpfr_check (tab[i]))
- {
- printf ("tab[%lu] is not valid.\n", i);
- exit (1);
- }
- MPFR_ASSERTN (MPFR_IS_FP (tab[i]));
- if (! MPFR_IS_ZERO (tab[i]))
- {
- if (MPFR_GET_EXP(tab[i]) > max)
- max = MPFR_GET_EXP(tab[i]);
- if (MPFR_GET_EXP(tab[i]) < min)
- min = MPFR_GET_EXP(tab[i]);
- }
- }
- res = max - min;
- res += f;
- res += __gmpfr_ceil_log2 (n) + 1;
- return res;
-}
-
-
static void
-algo_exact (mpfr_t somme, mpfr_t *tab, unsigned long n, mpfr_prec_t f)
+get_exact_sum (mpfr_t sum, mpfr_t *tab, int n)
{
- unsigned long i;
- mpfr_prec_t prec_max;
+ int i;
- prec_max = get_prec_max(tab, n, f);
- mpfr_set_prec (somme, prec_max);
- mpfr_set_ui (somme, 0, MPFR_RNDN);
+ mpfr_set_prec (sum, get_prec_max (tab, n));
+ mpfr_set_ui (sum, 0, MPFR_RNDN);
for (i = 0; i < n; i++)
- {
- if (mpfr_add(somme, somme, tab[i], MPFR_RNDN))
- {
- printf ("FIXME: algo_exact is buggy.\n");
- exit (1);
- }
- }
+ if (mpfr_add (sum, sum, tab[i], MPFR_RNDN))
+ {
+ printf ("FIXME: get_exact_sum is buggy.\n");
+ exit (1);
+ }
}
-/* Test the sorting function */
static void
-test_sort (mpfr_prec_t f, unsigned long n)
+generic_tests (void)
{
- mpfr_t *tab;
- mpfr_ptr *tabtmp;
- mpfr_srcptr *perm;
- unsigned long i;
- mpfr_prec_t prec = MPFR_PREC_MIN;
-
- /* Init stuff */
- tab = (mpfr_t *) (*__gmp_allocate_func) (n * sizeof (mpfr_t));
- for (i = 0; i < n; i++)
- mpfr_init2 (tab[i], f);
- tabtmp = (mpfr_ptr *) (*__gmp_allocate_func) (n * sizeof(mpfr_ptr));
- perm = (mpfr_srcptr *) (*__gmp_allocate_func) (n * sizeof(mpfr_srcptr));
+ mpfr_t exact_sum, sum1, sum2;
+ mpfr_t *t;
+ mpfr_ptr *p;
+ mpfr_prec_t precmax = 444;
+ int i, m, nmax = 500;
+ int rnd_mode;
- for (i = 0; i < n; i++)
+ t = (mpfr_t *) (*__gmp_allocate_func) (nmax * sizeof(mpfr_t));
+ p = (mpfr_ptr *) (*__gmp_allocate_func) (nmax * sizeof(mpfr_ptr));
+ for (i = 0; i < nmax; i++)
{
- mpfr_urandomb (tab[i], RANDS);
- tabtmp[i] = tab[i];
+ mpfr_init2 (t[i], precmax);
+ p[i] = t[i];
}
+ mpfr_inits2 (precmax, exact_sum, sum1, sum2, (mpfr_ptr) 0);
- mpfr_sum_sort ((mpfr_srcptr *)tabtmp, n, perm, &prec);
-
- if (check_is_sorted (n, perm) == 0)
+ for (m = 0; m < 4000; m++)
{
- printf ("mpfr_sum_sort incorrect.\n");
- for (i = 0; i < n; i++)
- mpfr_dump (perm[i]);
- exit (1);
- }
+ int non_uniform, n;
+ mpfr_prec_t prec;
- /* Clear stuff */
- for (i = 0; i < n; i++)
- mpfr_clear (tab[i]);
- (*__gmp_free_func) (tab, n * sizeof (mpfr_t));
- (*__gmp_free_func) (tabtmp, n * sizeof(mpfr_ptr));
- (*__gmp_free_func) (perm, n * sizeof(mpfr_srcptr));
-}
-
-static void
-test_sum (mpfr_prec_t f, unsigned long n)
-{
- mpfr_t sum, real_sum, real_non_rounded;
- mpfr_t *tab;
- unsigned long i;
- int rnd_mode;
-
- /* Init */
- tab = (mpfr_t *) (*__gmp_allocate_func) (n * sizeof(mpfr_t));
- for (i = 0; i < n; i++)
- mpfr_init2 (tab[i], f);
- mpfr_inits2 (f, sum, real_sum, real_non_rounded, (mpfr_ptr) 0);
+ non_uniform = randlimb () % 10;
+ n = (randlimb () % nmax) + 1;
+ prec = MPFR_PREC_MIN + (randlimb () % (precmax - MPFR_PREC_MIN + 1));
+ mpfr_set_prec (sum1, prec);
+ mpfr_set_prec (sum2, prec);
- /* First Uniform */
- for (i = 0; i < n; i++)
- mpfr_urandomb (tab[i], RANDS);
- algo_exact (real_non_rounded, tab, n, f);
- for (rnd_mode = 0; rnd_mode < MPFR_RND_MAX; rnd_mode++)
- {
- sum_tab (sum, tab, n, (mpfr_rnd_t) rnd_mode);
- mpfr_set (real_sum, real_non_rounded, (mpfr_rnd_t) rnd_mode);
- if (mpfr_cmp (real_sum, sum) != 0)
+ for (i = 0; i < n; i++)
{
- printf ("mpfr_sum incorrect.\n");
- mpfr_dump (real_sum);
- mpfr_dump (sum);
- exit (1);
+ mpfr_set_prec (t[i], MPFR_PREC_MIN +
+ (randlimb () % (precmax - MPFR_PREC_MIN + 1)));
+ mpfr_urandomb (t[i], RANDS);
+ if (m % 8 != 0 && (m % 8 == 1 || (randlimb () & 1)))
+ mpfr_neg (t[i], t[i], MPFR_RNDN);
+ if (non_uniform && MPFR_NOTZERO (t[i]))
+ mpfr_set_exp (t[i], randlimb () % 1000);
+ /* putchar ("-0+"[SIGN (mpfr_sgn (t[i])) + 1]); */
}
- }
-
- /* Then non uniform */
- for (i = 0; i < n; i++)
- {
- mpfr_urandomb (tab[i], RANDS);
- if (! mpfr_zero_p (tab[i]))
- mpfr_set_exp (tab[i], randlimb () % 1000);
- }
- algo_exact (real_non_rounded, tab, n, f);
- for (rnd_mode = 0; rnd_mode < MPFR_RND_MAX; rnd_mode++)
- {
- sum_tab (sum, tab, n, (mpfr_rnd_t) rnd_mode);
- mpfr_set (real_sum, real_non_rounded, (mpfr_rnd_t) rnd_mode);
- if (mpfr_cmp (real_sum, sum) != 0)
+ /* putchar ('\n'); */
+ get_exact_sum (exact_sum, t, n);
+ RND_LOOP (rnd_mode)
{
- printf ("mpfr_sum incorrect.\n");
- mpfr_dump (real_sum);
- mpfr_dump (sum);
- exit (1);
+ int inex1, inex2;
+
+ inex1 = mpfr_set (sum1, exact_sum, (mpfr_rnd_t) rnd_mode);
+ inex2 = mpfr_sum (sum2, p, n, (mpfr_rnd_t) rnd_mode);
+ if (!(mpfr_equal_p (sum1, sum2) && SAME_SIGN (inex1, inex2)))
+ {
+ printf ("generic_tests failed on m = %d, %s\n", m,
+ mpfr_print_rnd_mode ((mpfr_rnd_t) rnd_mode));
+ printf ("Expected ");
+ mpfr_dump (sum1);
+ printf ("with inex = %d\n", inex1);
+ printf ("Got ");
+ mpfr_dump (sum2);
+ printf ("with inex = %d\n", inex2);
+ exit (1);
+ }
}
}
- /* Clear stuff */
- for (i = 0; i < n; i++)
- mpfr_clear (tab[i]);
- mpfr_clears (sum, real_sum, real_non_rounded, (mpfr_ptr) 0);
- (*__gmp_free_func) (tab, n * sizeof(mpfr_t));
+ for (i = 0; i < nmax; i++)
+ mpfr_clear (t[i]);
+ mpfr_clears (exact_sum, sum1, sum2, (mpfr_ptr) 0);
+ (*__gmp_free_func) (t, nmax * sizeof(mpfr_t));
+ (*__gmp_free_func) (p, nmax * sizeof(mpfr_ptr));
}
static
@@ -318,12 +218,422 @@ void check_special (void)
mpfr_clears (tab[0], tab[1], tab[2], r, (mpfr_ptr) 0);
}
+#define NC 7
+#define NS 6
+
+static void
+check_more_special (void)
+{
+ char *str[NC] = { "NaN", "+Inf", "-Inf", "+0", "-0", "+1", "-1" };
+ int i, r, k[NS];
+ mpfr_t c[NC], s[NS], sum;
+ mpfr_ptr p[NS];
+ int inex;
+
+ for (i = 0; i < NC; i++)
+ {
+ int ret;
+ mpfr_init2 (c[i], 8);
+ ret = mpfr_set_str (c[i], str[i], 0, MPFR_RNDN);
+ MPFR_ASSERTN (ret == 0);
+ }
+ for (i = 0; i < NS; i++)
+ mpfr_init2 (s[i], 8);
+ mpfr_init2 (sum, 8);
+
+ RND_LOOP(r)
+ {
+ i = 0;
+ while (1)
+ {
+ while (i < NS)
+ {
+ p[i] = c[0];
+ mpfr_set_nan (s[i]);
+ k[i++] = 0;
+ }
+ inex = mpfr_sum (sum, p, NS, (mpfr_rnd_t) r);
+ if (! ((MPFR_IS_NAN (sum) && MPFR_IS_NAN (s[NS-1])) ||
+ (mpfr_equal_p (sum, s[NS-1]) &&
+ MPFR_SIGN (sum) == MPFR_SIGN (s[NS-1]))) || inex != 0)
+ {
+ printf ("Error in check_more_special on %s",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r));
+ for (i = 0; i < NS; i++)
+ printf (" %d", k[i]);
+ printf (" with\n");
+ for (i = 0; i < NS; i++)
+ {
+ printf (" p[%d] = %s = ", i, str[k[i]]);
+ mpfr_dump (p[i]);
+ }
+ printf ("Expected ");
+ mpfr_dump (s[NS-1]);
+ printf ("with inex = 0\n");
+ printf ("Got ");
+ mpfr_dump (sum);
+ printf ("with inex = %d\n", inex);
+ exit (1);
+ }
+ while (k[--i] == NC-1)
+ if (i == 0)
+ goto next_rnd;
+ p[i] = c[++k[i]];
+ if (i == 0)
+ mpfr_set (s[i], p[i], (mpfr_rnd_t) r);
+ else
+ mpfr_add (s[i], s[i-1], p[i], (mpfr_rnd_t) r);
+ i++;
+ }
+ next_rnd: ;
+ }
+
+ for (i = 0; i < NC; i++)
+ mpfr_clear (c[i]);
+ for (i = 0; i < NS; i++)
+ mpfr_clear (s[i]);
+ mpfr_clear (sum);
+}
+
+/* i * 2^46 + j * 2^45 + k * 2^44 + f * 2^(-2),
+ with -1 <= i, j, k <= 1, i != 0, -3 <= f <= 3,
+ ulp(exact sum) = 2^0 and ulp(exact sum) = 2^44. */
+static void
+check1 (void)
+{
+ mpfr_t sum1, sum2, s1, s2, s3, t[4];
+ mpfr_ptr p[4];
+ int i, j, k, f, prec, r, inex1, inex2;
+
+ mpfr_init2 (sum1, 47);
+ mpfr_init2 (sum2, 47);
+ mpfr_init2 (s1, 3);
+ mpfr_init2 (s2, 3);
+ mpfr_init2 (s3, 49);
+ for (i = 0; i < 4; i++)
+ {
+ mpfr_init2 (t[i], 2);
+ p[i] = t[i];
+ }
+
+ for (i = -1; i <= 1; i += 2)
+ {
+ mpfr_set_si_2exp (t[0], i, 46, MPFR_RNDN);
+ for (j = -1; j <= 1; j++)
+ {
+ mpfr_set_si_2exp (t[1], j, 45, MPFR_RNDN);
+ inex1 = mpfr_add (s1, t[0], t[1], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (k = -1; k <= 1; k++)
+ {
+ mpfr_set_si_2exp (t[2], k, 44, MPFR_RNDN);
+ inex1 = mpfr_add (s2, s1, t[2], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (f = -3; f <= 3; f++)
+ {
+ mpfr_set_si_2exp (t[3], f, -2, MPFR_RNDN);
+ inex1 = mpfr_add (s3, s2, t[3], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (prec = mpfr_get_exp (s3);
+ prec >= MPFR_PREC_MIN;
+ prec -= 44)
+ {
+ mpfr_set_prec (sum1, prec);
+ mpfr_set_prec (sum2, prec);
+ RND_LOOP (r)
+ {
+ inex1 = mpfr_set (sum1, s3, (mpfr_rnd_t) r);
+ inex2 = mpfr_sum (sum2, p, 4, (mpfr_rnd_t) r);
+ MPFR_ASSERTN (mpfr_check (sum1));
+ MPFR_ASSERTN (mpfr_check (sum2));
+ if (!(mpfr_equal_p (sum1, sum2) &&
+ SAME_SIGN (inex1, inex2)))
+ {
+ printf ("Error in check1 on %s, prec = %d, "
+ "i = %d, j = %d, k = %d, f = %d\n",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r),
+ prec, i, j, k, f);
+ printf ("Expected ");
+ mpfr_dump (sum1);
+ printf ("with inex = %d\n", inex1);
+ printf ("Got ");
+ mpfr_dump (sum2);
+ printf ("with inex = %d\n", inex2);
+ exit (1);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 4; i++)
+ mpfr_clear (t[i]);
+ mpfr_clears (sum1, sum2, s1, s2, s3, (mpfr_ptr) 0);
+}
+
+/* With N = 2 * GMP_NUMB_BITS:
+ i * 2^N + j + k * 2^(-1) + f1 * 2^(-N) + f2 * 2^(-N),
+ with i = -1 or 1, j = 0 or i, -1 <= k <= 1, -1 <= f1 <= 1, -1 <= f2 <= 1
+ ulp(exact sum) = 2^0. */
+static void
+check2 (void)
+{
+ mpfr_t sum1, sum2, s1, s2, s3, s4, t[5];
+ mpfr_ptr p[5];
+ int i, j, k, f1, f2, prec, r, inex1, inex2;
+
+#define N (2 * GMP_NUMB_BITS)
+
+ mpfr_init2 (sum1, N+1);
+ mpfr_init2 (sum2, N+1);
+ mpfr_init2 (s1, N+1);
+ mpfr_init2 (s2, N+2);
+ mpfr_init2 (s3, 2*N+1);
+ mpfr_init2 (s4, 2*N+1);
+ for (i = 0; i < 5; i++)
+ {
+ mpfr_init2 (t[i], 2);
+ p[i] = t[i];
+ }
+
+ for (i = -1; i <= 1; i += 2)
+ {
+ mpfr_set_si_2exp (t[0], i, N, MPFR_RNDN);
+ for (j = 0; j != 2*i; j += i)
+ {
+ mpfr_set_si (t[1], j, MPFR_RNDN);
+ inex1 = mpfr_add (s1, t[0], t[1], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (k = -1; k <= 1; k++)
+ {
+ mpfr_set_si_2exp (t[2], k, -1, MPFR_RNDN);
+ inex1 = mpfr_add (s2, s1, t[2], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (f1 = -1; f1 <= 1; f1++)
+ {
+ mpfr_set_si_2exp (t[3], f1, -N, MPFR_RNDN);
+ inex1 = mpfr_add (s3, s2, t[3], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (f2 = -1; f2 <= 1; f2++)
+ {
+ mpfr_set_si_2exp (t[4], f2, -N, MPFR_RNDN);
+ inex1 = mpfr_add (s4, s3, t[4], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ prec = mpfr_get_exp (s4);
+ mpfr_set_prec (sum1, prec);
+ mpfr_set_prec (sum2, prec);
+ RND_LOOP (r)
+ {
+ inex1 = mpfr_set (sum1, s4, (mpfr_rnd_t) r);
+ inex2 = mpfr_sum (sum2, p, 5, (mpfr_rnd_t) r);
+ MPFR_ASSERTN (mpfr_check (sum1));
+ MPFR_ASSERTN (mpfr_check (sum2));
+ if (!(mpfr_equal_p (sum1, sum2) &&
+ SAME_SIGN (inex1, inex2)))
+ {
+ printf ("Error in check2 on %s, prec = %d, "
+ "i = %d, j = %d, k = %d, f1 = %d, "
+ "f2 = %d\n",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r),
+ prec, i, j, k, f1, f2);
+ printf ("Expected ");
+ mpfr_dump (sum1);
+ printf ("with inex = %d\n", inex1);
+ printf ("Got ");
+ mpfr_dump (sum2);
+ printf ("with inex = %d\n", inex2);
+ exit (1);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 5; i++)
+ mpfr_clear (t[i]);
+ mpfr_clears (sum1, sum2, s1, s2, s3, s4, (mpfr_ptr) 0);
+}
+
+/* t[i] = (2^17 - 1) * 2^(17*(i-8)) for 0 <= i <= 16.
+ * t[17] = 2^(17*9+1) * j for -4 <= j <= 4.
+ * t[18] = 2^(-1) * k for -1 <= k <= 1.
+ * t[19] = 2^(-17*8) * m for -3 <= m <= 3.
+ * prec = 17*9+4
+ */
+static void
+check3 (void)
+{
+ mpfr_t sum1, sum2, s1, s2, s3, s4, t[20];
+ mpfr_ptr p[20];
+ int i, s, j, k, m, r, inex1, inex2;
+ int prec = 17*9+4;
+
+ mpfr_init2 (sum1, prec);
+ mpfr_init2 (sum2, prec);
+ mpfr_init2 (s1, 17*17);
+ mpfr_init2 (s2, 17*17+4);
+ mpfr_init2 (s3, 17*17+4);
+ mpfr_init2 (s4, 17*17+5);
+ mpfr_set_ui (s1, 0, MPFR_RNDN);
+ for (i = 0; i < 20; i++)
+ {
+ mpfr_init2 (t[i], 20);
+ p[i] = t[i];
+ if (i < 17)
+ {
+ mpfr_set_ui_2exp (t[i], 0x1ffff, 17*(i-8), MPFR_RNDN);
+ inex1 = mpfr_add (s1, s1, t[i], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ }
+ }
+
+ for (s = 1; s >= -1; s -= 2)
+ {
+ for (j = -4; j <= 4; j++)
+ {
+ mpfr_set_si_2exp (t[17], j, 17*9+1, MPFR_RNDN);
+ inex1 = mpfr_add (s2, s1, t[17], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (k = -1; k <= 1; k++)
+ {
+ mpfr_set_si_2exp (t[18], k, -1, MPFR_RNDN);
+ inex1 = mpfr_add (s3, s2, t[18], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (m = -3; m <= 3; m++)
+ {
+ mpfr_set_si_2exp (t[19], m, -17*8, MPFR_RNDN);
+ inex1 = mpfr_add (s4, s3, t[19], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ RND_LOOP (r)
+ {
+ inex1 = mpfr_set (sum1, s4, (mpfr_rnd_t) r);
+ inex2 = mpfr_sum (sum2, p, 20, (mpfr_rnd_t) r);
+ MPFR_ASSERTN (mpfr_check (sum1));
+ MPFR_ASSERTN (mpfr_check (sum2));
+ if (!(mpfr_equal_p (sum1, sum2) &&
+ SAME_SIGN (inex1, inex2)))
+ {
+ printf ("Error in check3 on %s, "
+ "s = %d, j = %d, k = %d, m = %d\n",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r),
+ s, j, k, m);
+ printf ("Expected ");
+ mpfr_dump (sum1);
+ printf ("with inex = %d\n", inex1);
+ printf ("Got ");
+ mpfr_dump (sum2);
+ printf ("with inex = %d\n", inex2);
+ exit (1);
+ }
+ }
+ }
+ }
+ }
+ for (i = 0; i < 17; i++)
+ mpfr_neg (t[i], t[i], MPFR_RNDN);
+ mpfr_neg (s1, s1, MPFR_RNDN);
+ }
+
+ for (i = 0; i < 20; i++)
+ mpfr_clear (t[i]);
+ mpfr_clears (sum1, sum2, s1, s2, s3, s4, (mpfr_ptr) 0);
+}
+
+/* Test of s * (q * 2^(n-1) - 2^k) + h + i * 2^(-2) + j * 2^(-2)
+ * with h = -1 or 1, -1 <= i odd <= j <= 3, 2 <= q <= 3, s = -1 or 1,
+ * prec n-k.
+ * On a 64-bit machine:
+ * MPFR_RNDN, tmd=2, rbit=0, sst=0, negative is checked with the inputs
+ * -3*2^58, 2^5, -1, 2^(-2), 3*2^(-2)
+ * MPFR_RNDN, tmd=2, rbit=0, sst=1, negative is checked with the inputs
+ * -3*2^58, 2^5, -1, 3*2^(-2), 3*2^(-2)
+ */
+static void
+check4 (void)
+{
+ mpfr_t sum1, sum2, s1, s2, s3, s4, t[5];
+ mpfr_ptr p[5];
+ int h, i, j, k, n, q, r, s, prec, inex1, inex2;
+
+ mpfr_inits2 (257, sum1, sum2, s1, s2, s3, s4, (mpfr_ptr) 0);
+ for (i = 0; i < 5; i++)
+ {
+ mpfr_init2 (t[i], 2);
+ p[i] = t[i];
+ }
+
+ /* No GNU style for the many nested loops... */
+ for (k = 1; k <= 64; k++) {
+ mpfr_set_si_2exp (t[0], -1, k, MPFR_RNDN);
+ for (n = k + 2; n <= k + 65; n++) {
+ prec = n - k;
+ mpfr_set_prec (sum1, prec);
+ mpfr_set_prec (sum2, prec);
+ for (q = 2; q <= 3; q++) {
+ mpfr_set_si_2exp (t[1], q, n - 1, MPFR_RNDN);
+ inex1 = mpfr_add (s1, t[0], t[1], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (s = -1; s <= 1; s += 2) {
+ mpfr_neg (t[0], t[0], MPFR_RNDN);
+ mpfr_neg (t[1], t[1], MPFR_RNDN);
+ mpfr_neg (s1, s1, MPFR_RNDN);
+ for (h = -1; h <= 1; h += 2) {
+ mpfr_set_si (t[2], h, MPFR_RNDN);
+ inex1 = mpfr_add (s2, s1, t[2], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (i = -1; i <= 3; i += 2) {
+ mpfr_set_si_2exp (t[3], i, -2, MPFR_RNDN);
+ inex1 = mpfr_add (s3, s2, t[3], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ for (j = i; j <= 3; j++) {
+ mpfr_set_si_2exp (t[4], j, -2, MPFR_RNDN);
+ inex1 = mpfr_add (s4, s3, t[4], MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ RND_LOOP (r) {
+ inex1 = mpfr_set (sum1, s4, (mpfr_rnd_t) r);
+ inex2 = mpfr_sum (sum2, p, 5, (mpfr_rnd_t) r);
+ MPFR_ASSERTN (mpfr_check (sum1));
+ MPFR_ASSERTN (mpfr_check (sum2));
+ if (!(mpfr_equal_p (sum1, sum2) &&
+ SAME_SIGN (inex1, inex2)))
+ {
+ printf ("Error in check4 on %s, "
+ "k = %d, n = %d (prec %d), "
+ "q = %d, s = %d, h = %d, i = %d, j = %d\n",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r),
+ k, n, prec, q, s, h, i, j);
+ printf ("Expected ");
+ mpfr_dump (sum1);
+ printf ("with inex = %d\n", inex1);
+ printf ("Got ");
+ mpfr_dump (sum2);
+ printf ("with inex = %d\n", inex2);
+ exit (1);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 5; i++)
+ mpfr_clear (t[i]);
+ mpfr_clears (sum1, sum2, s1, s2, s3, s4, (mpfr_ptr) 0);
+}
+
/* bug reported by Joseph S. Myers on 2013-10-27
https://sympa.inria.fr/sympa/arc/mpfr/2013-10/msg00015.html */
static void
bug20131027 (void)
{
- mpfr_t r, t[4];
+ mpfr_t sum, t[4];
mpfr_ptr p[4];
char *s[4] = {
"0x1p1000",
@@ -331,35 +641,99 @@ bug20131027 (void)
"-0x1p947",
"0x1p880"
};
- int i;
+ int i, r;
+
+ mpfr_init2 (sum, 53);
- mpfr_init2 (r, 53);
for (i = 0; i < 4; i++)
{
mpfr_init2 (t[i], i == 0 ? 53 : 1000);
mpfr_set_str (t[i], s[i], 0, MPFR_RNDN);
p[i] = t[i];
}
- mpfr_sum (r, p, 4, MPFR_RNDN);
- if (MPFR_NOTZERO (r))
+ RND_LOOP(r)
{
- printf ("mpfr_sum incorrect in bug20131027: expected 0, got\n");
- mpfr_dump (r);
- exit (1);
+ int expected_sign = (mpfr_rnd_t) r == MPFR_RNDD ? -1 : 1;
+ int inex;
+
+ inex = mpfr_sum (sum, p, 4, (mpfr_rnd_t) r);
+
+ if (MPFR_NOTZERO (sum) || MPFR_SIGN (sum) != expected_sign || inex != 0)
+ {
+ printf ("mpfr_sum incorrect in bug20131027 for %s:\n"
+ "expected %c0 with inex = 0, got ",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r),
+ expected_sign > 0 ? '+' : '-');
+ mpfr_dump (sum);
+ printf ("with inex = %d\n", inex);
+ exit (1);
+ }
}
for (i = 0; i < 4; i++)
mpfr_clear (t[i]);
- mpfr_clear (r);
+ mpfr_clear (sum);
}
+/* Occurs in branches/new-sum/src/sum.c@9344 on a 64-bit machine. */
+static void
+bug20150327 (void)
+{
+ mpfr_t sum1, sum2, t[3];
+ mpfr_ptr p[3];
+ char *s[3] = { "0.10000111110101000010101011100001", "1E-100", "0.1E95" };
+ int i, r;
+
+ mpfr_inits2 (58, sum1, sum2, (mpfr_ptr) 0);
+
+ for (i = 0; i < 3; i++)
+ {
+ mpfr_init2 (t[i], 64);
+ mpfr_set_str (t[i], s[i], 2, MPFR_RNDN);
+ p[i] = t[i];
+ }
+
+ RND_LOOP(r)
+ {
+ int inex1, inex2;
+
+ mpfr_set (sum1, t[2], MPFR_RNDN);
+ inex1 = -1;
+ if (MPFR_IS_LIKE_RNDU ((mpfr_rnd_t) r, 1))
+ {
+ mpfr_nextabove (sum1);
+ inex1 = 1;
+ }
+
+ inex2 = mpfr_sum (sum2, p, 3, (mpfr_rnd_t) r);
+
+ if (!(mpfr_equal_p (sum1, sum2) && SAME_SIGN (inex1, inex2)))
+ {
+ printf ("mpfr_sum incorrect in bug20150327 for %s:\n",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r));
+ printf ("Expected ");
+ mpfr_dump (sum1);
+ printf ("with inex = %d\n", inex1);
+ printf ("Got ");
+ mpfr_dump (sum2);
+ printf ("with inex = %d\n", inex2);
+ exit (1);
+ }
+ }
+
+ for (i = 0; i < 3; i++)
+ mpfr_clear (t[i]);
+ mpfr_clears (sum1, sum2, (mpfr_ptr) 0);
+}
+
+/* TODO: A test with more inputs (but can't be compared to mpfr_add). */
static void
check_extreme (void)
{
mpfr_t u, v, w, x, y;
mpfr_ptr t[2];
- int i, inex, r;
+ int i, inex1, inex2, r;
t[0] = u;
t[1] = v;
@@ -371,19 +745,21 @@ check_extreme (void)
RND_LOOP (r)
for (i = 0; i < 2; i++)
{
- mpfr_sum (x, t, 2, (mpfr_rnd_t) r);
- mpfr_set_prec (y, 64);
- inex = mpfr_add (y, u, w, MPFR_RNDN);
- MPFR_ASSERTN (inex == 0);
- mpfr_prec_round (y, 32, (mpfr_rnd_t) r);
- if (! mpfr_equal_p (x, y))
+ mpfr_set_prec (x, 64);
+ inex1 = mpfr_add (x, u, w, MPFR_RNDN);
+ MPFR_ASSERTN (inex1 == 0);
+ inex1 = mpfr_prec_round (x, 32, (mpfr_rnd_t) r);
+ inex2 = mpfr_sum (y, t, 2, (mpfr_rnd_t) r);
+ if (!(mpfr_equal_p (x, y) && SAME_SIGN (inex1, inex2)))
{
printf ("Error in check_extreme (%s, i = %d)\n",
mpfr_print_rnd_mode ((mpfr_rnd_t) r), i);
printf ("Expected ");
- mpfr_dump (y);
- printf ("Got ");
mpfr_dump (x);
+ printf ("with inex = %d\n", inex1);
+ printf ("Got ");
+ mpfr_dump (y);
+ printf ("with inex = %d\n", inex2);
exit (1);
}
mpfr_neg (v, v, MPFR_RNDN);
@@ -392,19 +768,19 @@ check_extreme (void)
mpfr_clears (u, v, w, x, y, (mpfr_ptr) 0);
}
+/* Generic random tests with cancellations */
static void
cancel (void)
{
- mpfr_t x[2 * MPFR_NCANCEL];
+ mpfr_t x[2 * MPFR_NCANCEL], bound;
mpfr_ptr px[2 * MPFR_NCANCEL];
int i, j, n;
- /* FIXME: re-enable and improve once mpfr_sum has been fixed.
- check_extreme() is currently sufficient to show problems. */
- return;
+ mpfr_init2 (bound, 2);
for (i = 0; i < 8; i++)
{
+ mpfr_set_inf (bound, 1);
for (n = 0; n < numberof (x); n++)
{
mpfr_prec_t p;
@@ -445,35 +821,254 @@ cancel (void)
mpfr_print_rnd_mode (rnd), mpfr_get_prec (x[n]));
#endif
mpfr_sum (x[n], px, n, rnd);
+ if (mpfr_zero_p (x[n]))
+ {
+ n++;
+ break;
+ }
mpfr_neg (x[n], x[n], MPFR_RNDN);
+ if (mpfr_greater_p (x[n], bound))
+ {
+ printf ("Error in cancel on i = %d, n = %d\n", i, n);
+ exit (1);
+ }
+ mpfr_set_ui_2exp (bound, 1,
+ mpfr_get_exp (x[n]) - p - (rnd == MPFR_RNDN),
+ MPFR_RNDN);
+ /* The next sum will be <= bound in absolute value
+ (the equality can be obtained in all rounding modes
+ since the sum will be rounded). */
}
}
while (--n >= 0)
mpfr_clear (x[n]);
}
+
+ mpfr_clear (bound);
+}
+
+#ifndef NOVFL
+# define NOVFL 30
+#endif
+
+static void
+check_overflow (void)
+{
+ mpfr_t sum1, sum2, x, y;
+ mpfr_ptr t[2 * NOVFL];
+ mpfr_exp_t emin, emax;
+ int i, r;
+
+ emin = mpfr_get_emin ();
+ emax = mpfr_get_emax ();
+ set_emin (MPFR_EMIN_MIN);
+ set_emax (MPFR_EMAX_MAX);
+
+ mpfr_inits2 (32, sum1, sum2, x, y, (mpfr_ptr) 0);
+ mpfr_setmax (x, mpfr_get_emax ());
+ mpfr_neg (y, x, MPFR_RNDN);
+
+ for (i = 0; i < 2 * NOVFL; i++)
+ t[i] = i < NOVFL ? x : y;
+
+ /* Two kinds of test:
+ * i = 1: overflow.
+ * i = 2: intermediate overflow (exact sum is 0).
+ */
+ for (i = 1; i <= 2; i++)
+ RND_LOOP(r)
+ {
+ int inex1, inex2;
+
+ inex1 = mpfr_add (sum1, x, i == 1 ? x : y, (mpfr_rnd_t) r);
+ inex2 = mpfr_sum (sum2, t, i * NOVFL, (mpfr_rnd_t) r);
+ MPFR_ASSERTN (mpfr_check (sum1));
+ MPFR_ASSERTN (mpfr_check (sum2));
+ if (!(mpfr_equal_p (sum1, sum2) && SAME_SIGN (inex1, inex2)))
+ {
+ printf ("Error in check_overflow on %s, i = %d\n",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r), i);
+ printf ("Expected ");
+ mpfr_dump (sum1);
+ printf ("with inex = %d\n", inex1);
+ printf ("Got ");
+ mpfr_dump (sum2);
+ printf ("with inex = %d\n", inex2);
+ exit (1);
+ }
+ }
+
+ mpfr_clears (sum1, sum2, x, y, (mpfr_ptr) 0);
+
+ set_emin (emin);
+ set_emax (emax);
+}
+
+#ifndef NUNFL
+# define NUNFL 9
+#endif
+
+/* t[0] = 2^(-k) - sum(t[i],i=1..n)
+ */
+static void
+check_underflow (void)
+{
+ mpfr_t sum1, sum2, t[NUNFL];
+ mpfr_ptr p[NUNFL];
+ mpfr_prec_t precmax = 444;
+ mpfr_exp_t emin, emax;
+ unsigned int ex_flags, flags;
+ int c, i;
+
+ emin = mpfr_get_emin ();
+ emax = mpfr_get_emax ();
+ set_emin (MPFR_EMIN_MIN);
+ set_emax (MPFR_EMAX_MAX);
+
+ ex_flags = MPFR_FLAGS_UNDERFLOW | MPFR_FLAGS_INEXACT;
+
+ mpfr_init2 (sum1, MPFR_PREC_MIN);
+ mpfr_init2 (sum2, precmax);
+
+ for (i = 0; i < NUNFL; i++)
+ {
+ mpfr_init2 (t[i], precmax);
+ p[i] = t[i];
+ }
+
+ for (c = 0; c < 8; c++)
+ {
+ mpfr_prec_t fprec;
+ int n, neg, r;
+
+ fprec = MPFR_PREC_MIN + (randlimb () % (precmax - MPFR_PREC_MIN + 1));
+ n = 3 + (randlimb () % (NUNFL - 2));
+ MPFR_ASSERTN (n <= NUNFL);
+
+ mpfr_set_prec (sum2, (randlimb () & 1) ? MPFR_PREC_MIN : precmax);
+ mpfr_set_prec (t[0], fprec + 64);
+ mpfr_set_zero (t[0], 1);
+
+ for (i = 1; i < n; i++)
+ {
+ int inex;
+
+ mpfr_set_prec (t[i], MPFR_PREC_MIN +
+ (randlimb () % (fprec - MPFR_PREC_MIN + 1)));
+ do
+ mpfr_urandomb (t[i], RANDS);
+ while (MPFR_IS_ZERO (t[i]));
+ mpfr_set_exp (t[i], MPFR_EMIN_MIN);
+ inex = mpfr_sub (t[0], t[0], t[i], MPFR_RNDN);
+ MPFR_ASSERTN (inex == 0);
+ }
+
+ neg = randlimb () & 1;
+ if (neg)
+ mpfr_nextbelow (t[0]);
+ else
+ mpfr_nextabove (t[0]);
+
+ RND_LOOP(r)
+ {
+ int inex1, inex2;
+
+ mpfr_set_zero (sum1, 1);
+ if (neg)
+ mpfr_nextbelow (sum1);
+ else
+ mpfr_nextabove (sum1);
+ inex1 = mpfr_div_2ui (sum1, sum1, 2, (mpfr_rnd_t) r);
+
+ mpfr_clear_flags ();
+ inex2 = mpfr_sum (sum2, p, n, (mpfr_rnd_t) r);
+ flags = __gmpfr_flags;
+
+ MPFR_ASSERTN (mpfr_check (sum1));
+ MPFR_ASSERTN (mpfr_check (sum2));
+
+ if (flags != ex_flags)
+ {
+ printf ("Bad flags in check_underflow on %s, c = %d\n",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r), c);
+ printf ("Expected flags:");
+ flags_out (ex_flags);
+ printf ("Got flags: ");
+ flags_out (flags);
+ printf ("sum = ");
+ mpfr_dump (sum2);
+ exit (1);
+ }
+
+ if (!(mpfr_equal_p (sum1, sum2) && SAME_SIGN (inex1, inex2)))
+ {
+ printf ("Error in check_underflow on %s, c = %d\n",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r), c);
+ printf ("Expected ");
+ mpfr_dump (sum1);
+ printf ("with inex = %d\n", inex1);
+ printf ("Got ");
+ mpfr_dump (sum2);
+ printf ("with inex = %d\n", inex2);
+ exit (1);
+ }
+ }
+ }
+
+ for (i = 0; i < NUNFL; i++)
+ mpfr_clear (t[i]);
+ mpfr_clears (sum1, sum2, (mpfr_ptr) 0);
+
+ set_emin (emin);
+ set_emax (emax);
+}
+
+static void
+check_coverage (void)
+{
+#ifdef MPFR_COV_CHECK
+ int r, i, j, k, p;
+ int err = 0;
+
+ for (r = 0; r < MPFR_RND_MAX; r++)
+ for (i = 0; i < 1 + ((mpfr_rnd_t) r == MPFR_RNDN); i++)
+ for (j = 0; j < 2; j++)
+ for (k = 0; k < 3; k++)
+ for (p = 0; p < 2; p++)
+ if (!__gmpfr_cov_sum_tmd[r][i][j][k][p])
+ {
+ printf ("TMD not tested on %s, tmd=%d, rbit=%d, sst=%d, %s\n",
+ mpfr_print_rnd_mode ((mpfr_rnd_t) r), i+1, j, k-1,
+ p ? "positive" : "negative");
+ err = 1;
+ }
+
+ if (err)
+ exit (1);
+#endif
}
int
main (void)
{
- mpfr_prec_t p;
- unsigned long n;
-
tests_start_mpfr ();
check_special ();
+ check_more_special ();
+ check1 ();
+ check2 ();
+ check3 ();
+ check4 ();
bug20131027 ();
- test_sort (1764, 1026);
- for (p = 2 ; p < 444 ; p += 17)
- for (n = 2 ; n < 1026 ; n += 42 + p)
- test_sum (p, n);
- /* FIXME: remove the MPFR_SKIP_TSUM test once the new mpfr_sum version
- is out. */
- if (getenv ("MPFR_SKIP_EXTREME") == NULL)
- check_extreme ();
+ bug20150327 ();
+ generic_tests ();
+ check_extreme ();
cancel ();
+ check_overflow ();
+ check_underflow ();
+ check_coverage ();
tests_end_mpfr ();
return 0;
}