diff options
author | Marco Bodrato <bodrato@mail.dm.unipi.it> | 2020-12-19 05:33:15 +0100 |
---|---|---|
committer | Marco Bodrato <bodrato@mail.dm.unipi.it> | 2020-12-19 05:33:15 +0100 |
commit | 2cd42813414f3193285aacb391c62b962e9d941a (patch) | |
tree | fcc3fe3f184a2a0b17fa184985068c633e309c6a | |
parent | c71f88558bd850c56c571ba8374199adfcfdcf4e (diff) | |
download | gmp-2cd42813414f3193285aacb391c62b962e9d941a.tar.gz |
many mpn/generic/toom: Better handling short recursion
-rw-r--r-- | mpn/generic/toom22_mul.c | 7 | ||||
-rw-r--r-- | mpn/generic/toom2_sqr.c | 6 | ||||
-rw-r--r-- | mpn/generic/toom32_mul.c | 18 | ||||
-rw-r--r-- | mpn/generic/toom42_mul.c | 12 | ||||
-rw-r--r-- | mpn/generic/toom43_mul.c | 9 |
5 files changed, 29 insertions, 23 deletions
diff --git a/mpn/generic/toom22_mul.c b/mpn/generic/toom22_mul.c index 64f024aac..472183f9b 100644 --- a/mpn/generic/toom22_mul.c +++ b/mpn/generic/toom22_mul.c @@ -7,7 +7,7 @@ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. -Copyright 2006-2010, 2012, 2014, 2018 Free Software Foundation, Inc. +Copyright 2006-2010, 2012, 2014, 2018, 2020 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -108,7 +108,7 @@ mpn_toom22_mul (mp_ptr pp, ASSERT (an >= bn); - ASSERT (0 < s && s <= n && s >= n - 1); + ASSERT (0 < s && s <= n && (n - s) == (an & 1)); ASSERT (0 < t && t <= s); asm1 = pp; @@ -117,7 +117,7 @@ mpn_toom22_mul (mp_ptr pp, vm1_neg = 0; /* Compute asm1. */ - if (s == n) + if ((an & 1) == 0) /* s == n */ { if (mpn_cmp (a0, a1, n) < 0) { @@ -207,6 +207,7 @@ mpn_toom22_mul (mp_ptr pp, #else /* we simply fill the area with zeros. */ MPN_FILL (pp + 2 * n, n, 0); + /* ASSERT (s + t == n || mpn_zero_p (pp + 3 * n, s + t - n)); */ #endif return; } diff --git a/mpn/generic/toom2_sqr.c b/mpn/generic/toom2_sqr.c index 4eaa1417a..db7a8464b 100644 --- a/mpn/generic/toom2_sqr.c +++ b/mpn/generic/toom2_sqr.c @@ -6,7 +6,7 @@ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. -Copyright 2006-2010, 2012, 2014, 2018 Free Software Foundation, Inc. +Copyright 2006-2010, 2012, 2014, 2018, 2020 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -80,12 +80,12 @@ mpn_toom2_sqr (mp_ptr pp, s = an >> 1; n = an - s; - ASSERT (0 < s && s <= n && s >= n - 1); + ASSERT (0 < s && s <= n && (n - s) == (an & 1)); asm1 = pp; /* Compute asm1. */ - if (s == n) + if ((an & 1) == 0) /* s == n */ { if (mpn_cmp (a0, a1, n) < 0) { diff --git a/mpn/generic/toom32_mul.c b/mpn/generic/toom32_mul.c index f03ba568f..5ce4d95fc 100644 --- a/mpn/generic/toom32_mul.c +++ b/mpn/generic/toom32_mul.c @@ -4,14 +4,14 @@ Contributed to the GNU project by Torbjorn Granlund. Improvements by Marco Bodrato and Niels Möller. - The idea of applying toom to unbalanced multiplication is due to Marco + The idea of applying Toom to unbalanced multiplication is due to Marco Bodrato and Alberto Zanoni. THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. -Copyright 2006-2010 Free Software Foundation, Inc. +Copyright 2006-2010, 2020 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -82,7 +82,7 @@ mpn_toom32_mul (mp_ptr pp, /* Required, to ensure that s + t >= n. */ ASSERT (bn + 2 <= an && an + 6 <= 3*bn); - n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1); + n = 2 * an >= 3 * bn ? (an + 2) / (size_t) 3 : (bn + 1) >> 1; s = an - 2 * n; t = bn - n; @@ -183,20 +183,20 @@ mpn_toom32_mul (mp_ptr pp, TOOM32_MUL_N_REC (v1, ap1, bp1, n, scratch_out); if (ap1_hi == 1) { - cy = bp1_hi + mpn_add_n (v1 + n, v1 + n, bp1, n); + cy = mpn_add_n (v1 + n, v1 + n, bp1, n); } - else if (ap1_hi == 2) + else if (ap1_hi > 1) /* ap1_hi == 2 */ { -#if HAVE_NATIVE_mpn_addlsh1_n - cy = 2 * bp1_hi + mpn_addlsh1_n (v1 + n, v1 + n, bp1, n); +#if HAVE_NATIVE_mpn_addlsh1_n_ip1 + cy = mpn_addlsh1_n_ip1 (v1 + n, bp1, n); #else - cy = 2 * bp1_hi + mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2)); + cy = mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2)); #endif } else cy = 0; if (bp1_hi != 0) - cy += mpn_add_n (v1 + n, v1 + n, ap1, n); + cy += ap1_hi + mpn_add_n (v1 + n, v1 + n, ap1, n); v1[2 * n] = cy; TOOM32_MUL_N_REC (vm1, am1, bm1, n, scratch_out); diff --git a/mpn/generic/toom42_mul.c b/mpn/generic/toom42_mul.c index 2dfba9b2f..e84ce6593 100644 --- a/mpn/generic/toom42_mul.c +++ b/mpn/generic/toom42_mul.c @@ -206,24 +206,24 @@ mpn_toom42_mul (mp_ptr pp, TOOM42_MUL_N_REC (v1, as1, bs1, n, scratch_out); if (as1[n] == 1) { - cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n); + cy = mpn_add_n (v1 + n, v1 + n, bs1, n); } else if (as1[n] == 2) { -#if HAVE_NATIVE_mpn_addlsh1_n - cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n); +#if HAVE_NATIVE_mpn_addlsh1_n_ip1 + cy = mpn_addlsh1_n_ip1 (v1 + n, bs1, n); #else - cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2)); + cy = mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2)); #endif } else if (as1[n] == 3) { - cy = 3 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(3)); + cy = mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(3)); } else cy = 0; if (bs1[n] != 0) - cy += mpn_add_n (v1 + n, v1 + n, as1, n); + cy += as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n); v1[2 * n] = cy; TOOM42_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */ diff --git a/mpn/generic/toom43_mul.c b/mpn/generic/toom43_mul.c index 065013819..34acd25f1 100644 --- a/mpn/generic/toom43_mul.c +++ b/mpn/generic/toom43_mul.c @@ -10,7 +10,7 @@ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. -Copyright 2009 Free Software Foundation, Inc. +Copyright 2009, 2020 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -115,8 +115,12 @@ mpn_toom43_mul (mp_ptr pp, /* Compute bs2 and bsm2. */ b1d[n] = mpn_lshift (b1d, b1, n, 1); /* 2b1 */ +#if HAVE_NATIVE_mpn_addlsh2_n + cy = mpn_addlsh2_n (b0b2, b0, b2, t); /* 4b2 + b0 */ +#else cy = mpn_lshift (b0b2, b2, t, 2); /* 4b2 */ cy += mpn_add_n (b0b2, b0b2, b0, t); /* 4b2 + b0 */ +#endif if (t != n) cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy); b0b2[n] = cy; @@ -185,7 +189,8 @@ mpn_toom43_mul (mp_ptr pp, ASSERT (bsm2[n] <= 4); /* vm1, 2n+1 limbs */ - mpn_mul_n (vm1, asm1, bsm1, n+1); /* W4 */ + vm1[2*n] = 0; + mpn_mul_n (vm1, asm1, bsm1, n + (asm1[n] | bsm1[n])); /* W4 */ /* vm2, 2n+1 limbs */ mpn_mul_n (vm2, asm2, bsm2, n+1); /* W2 */ |