summaryrefslogtreecommitdiff
path: root/src/div.c
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-01-30 16:43:19 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-01-30 16:43:19 +0000
commit3ee9a48e4fcae769e6843c86acb6af08ae8926a2 (patch)
tree0dfed6c2d0a858a939213fd3f53e7a24cb9d3aec /src/div.c
parent8139b6e17f225af76f95ce0aabda3d3ef18a883b (diff)
downloadmpfr-3ee9a48e4fcae769e6843c86acb6af08ae8926a2.tar.gz
[src/div.c] improved slow branch of mpfr_div_2
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11247 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/div.c')
-rw-r--r--src/div.c50
1 files changed, 25 insertions, 25 deletions
diff --git a/src/div.c b/src/div.c
index 5080f2136..d2bf8f427 100644
--- a/src/div.c
+++ b/src/div.c
@@ -276,36 +276,36 @@ mpfr_div_2 (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mpfr_rnd_t rnd_mode)
else
{
/* we know q1:q0 is a good-enough approximation, use it! */
- mp_limb_t qq[2], uu[4];
-
- qq[0] = q0;
- qq[1] = q1;
- /* FIXME: instead of using mpn_mul_n, we can use 3 umul_ppmm calls,
- since we know the difference should at most 16*(v1:v0) after the
- subtraction below, thus at most 16*2^128. */
- mpn_mul_n (uu, qq, MPFR_MANT(v), 2);
- /* we now should have uu[3]:uu[2] <= r3:r2 */
- MPFR_ASSERTD(uu[3] < r3 || (uu[3] == r3 && uu[2] <= r2));
- /* subtract {uu,4} from r3:r2:0:0, with result in {uu,4} */
- uu[2] = r2 - uu[2];
- uu[3] = r3 - uu[3] - (uu[2] > r2);
- /* now negate uu[1]:uu[0] */
- uu[0] = -uu[0];
- uu[1] = -uu[1] - (uu[0] != 0);
- /* there is a borrow in uu[2] when uu[0] and uu[1] are not both zero */
- uu[3] -= (uu[1] != 0 || uu[0] != 0) && (uu[2] == 0);
- uu[2] -= (uu[1] != 0 || uu[0] != 0);
- MPFR_ASSERTD(uu[3] == MPFR_LIMB_ZERO);
- while (uu[2] > 0 || (uu[1] > v1) || (uu[1] == v1 && uu[0] >= v0))
+ mp_limb_t s0, s1, s2, h, l;
+
+ /* Since we know the difference should be at most 21*(v1:v0) after the
+ subtraction below, thus at most 21*2^128, it suffices to compute the
+ lower 3 limbs of (q1:q0) * (v1:v0). */
+ umul_ppmm (s1, s0, q0, v0);
+ umul_ppmm (s2, l, q0, v1);
+ s1 += l;
+ s2 += (s1 < l);
+ umul_ppmm (h, l, q1, v0);
+ s1 += l;
+ s2 += h + (s1 < l);
+ s2 += q1 * v1;
+ /* Subtract s2:s1:s0 from r2:0:0, with result in s2:s1:s0. */
+ s2 = r2 - s2;
+ /* now negate s1:s0 */
+ s0 = -s0;
+ s1 = -s1 - (s0 != 0);
+ /* there is a borrow in s2 when s0 and s1 are not both zero */
+ s2 -= (s1 != 0 || s0 != 0);
+ while (s2 > 0 || (s1 > v1) || (s1 == v1 && s0 >= v0))
{
/* add 1 to q1:q0 */
q0 ++;
q1 += (q0 == 0);
- /* subtract v1:v0 to u2:u1:u0 */
- uu[2] -= (uu[1] < v1) || (uu[1] == v1 && uu[0] < v0);
- sub_ddmmss (uu[1], uu[0], uu[1], uu[0], v1, v0);
+ /* subtract v1:v0 to s2:s1:s0 */
+ s2 -= (s1 < v1) || (s1 == v1 && s0 < v0);
+ sub_ddmmss (s1, s0, s1, s0, v1, v0);
}
- sb = uu[1] | uu[0];
+ sb = s1 | s0;
}
goto round_div2;
#endif