summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2002-05-18 12:42:06 +0200
committertege <tege@gmplib.org>2002-05-18 12:42:06 +0200
commit5f62c764832df3f130751c7d987b0c80103a88bf (patch)
treebc58d0222ebf8f60c77120c85e935c61ac41962d
parent798e94686adddf155e05c37c58ede688ed20e6dc (diff)
downloadgmp-5f62c764832df3f130751c7d987b0c80103a88bf.tar.gz
Use mpn_addmul_2, mpn_addmul_3, and mpn_addmul_4, as available.
-rw-r--r--mpn/generic/mul_basecase.c88
1 files changed, 59 insertions, 29 deletions
diff --git a/mpn/generic/mul_basecase.c b/mpn/generic/mul_basecase.c
index d13303519..832ca8333 100644
--- a/mpn/generic/mul_basecase.c
+++ b/mpn/generic/mul_basecase.c
@@ -44,49 +44,79 @@ MA 02111-1307, USA. */
algorithm. */
void
-mpn_mul_basecase (mp_ptr prodp,
- mp_srcptr up, mp_size_t usize,
- mp_srcptr vp, mp_size_t vsize)
+mpn_mul_basecase (mp_ptr rp,
+ mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
{
- ASSERT (usize >= vsize);
- ASSERT (vsize >= 1);
- ASSERT (! MPN_OVERLAP_P (prodp, usize+vsize, up, usize));
- ASSERT (! MPN_OVERLAP_P (prodp, usize+vsize, vp, vsize));
-
- /* We first multiply by the low order one or two limbs, as the result can
- be stored, not added, to PROD. We also avoid a loop for zeroing this
- way. */
+ ASSERT (un >= vn);
+ ASSERT (vn >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un));
+ ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn));
+
+ /* We first multiply by the low order limb (or depending on optional function
+ availability, limbs). This result can be stored, not added, to rp. We
+ also avoid a loop for zeroing this way. */
+
#if HAVE_NATIVE_mpn_mul_2
- if (vsize >= 2)
+ if (vn >= 2)
{
- prodp[usize + 1] = mpn_mul_2 (prodp, up, usize, vp);
- prodp += 2, vp += 2, vsize -= 2;
+ rp[un + 1] = mpn_mul_2 (rp, up, un, vp);
+ rp += 2, vp += 2, vn -= 2;
}
else
{
- prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
+ rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
return;
}
#else
- prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
- prodp += 1, vp += 1, vsize -= 1;
+ rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+ rp += 1, vp += 1, vn -= 1;
#endif
-#if HAVE_NATIVE_mpn_addmul_2
- while (vsize >= 2)
+ /* Now accumulate the product of up[] and the next low-order limb (or
+ depending on optional function availability, limbs) from vp[0]. */
+
+#define MAX_LEFT MP_SIZE_T_MAX
+
+#if HAVE_NATIVE_mpn_addmul_4
+ while (vn >= 4)
{
- prodp[usize + 1] = mpn_addmul_2 (prodp, up, usize, vp[0], vp[1]);
- prodp += 2, vp += 2, vsize -= 2;
+ rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp);
+ rp += 4, vp += 4, vn -= 4;
}
- if (vsize != 0)
- prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
-#else
- /* For each iteration in the loop, multiply U with one limb from V, and
- add the result to PROD. */
- while (vsize != 0)
+#undef MAX_LEFT
+#define MAX_LEFT 3
+#endif
+
+#if HAVE_NATIVE_mpn_addmul_3
+ while (vn >= 3)
{
- prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
- prodp += 1, vp += 1, vsize -= 1;
+ rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp);
+ if (MAX_LEFT - 3 <= 3)
+ break;
+ rp += 3, vp += 3, vn -= 3;
}
+#undef MAX_LEFT
+#define MAX_LEFT 2
#endif
+
+#if HAVE_NATIVE_mpn_addmul_2
+ while (vn >= 2)
+ {
+ rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp);
+ if (MAX_LEFT - 2 <= 2)
+ break;
+ rp += 2, vp += 2, vn -= 2;
+ }
+#undef MAX_LEFT
+#define MAX_LEFT 1
+#endif
+
+ while (vn >= 0)
+ {
+ rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
+ if (MAX_LEFT - 1 <= 1)
+ break;
+ rp += 1, vp += 1, vn -= 1;
+ }
}