summaryrefslogtreecommitdiff
path: root/mpn
diff options
context:
space:
mode:
authorMarco Bodrato <bodrato@mail.dm.unipi.it>2021-12-08 08:27:37 +0100
committerMarco Bodrato <bodrato@mail.dm.unipi.it>2021-12-08 08:27:37 +0100
commit5ea34d9f6fd6ebe37653ea64dac820e05cb160d5 (patch)
tree3c6e0cef688366d7c12b7ac59e9b6ac0bf5a4740 /mpn
parent1ffdd9e1de13a0695d5532a6efe4ecb360d18239 (diff)
downloadgmp-5ea34d9f6fd6ebe37653ea64dac820e05cb160d5.tar.gz
mpn/generic/toom[34]*: Use a shorter mul when it's simple to handle
Diffstat (limited to 'mpn')
-rw-r--r--mpn/generic/toom22_mul.c4
-rw-r--r--mpn/generic/toom33_mul.c5
-rw-r--r--mpn/generic/toom3_sqr.c44
-rw-r--r--mpn/generic/toom44_mul.c8
-rw-r--r--mpn/generic/toom4_sqr.c9
5 files changed, 36 insertions, 34 deletions
diff --git a/mpn/generic/toom22_mul.c b/mpn/generic/toom22_mul.c
index 472183f9b..da56014d6 100644
--- a/mpn/generic/toom22_mul.c
+++ b/mpn/generic/toom22_mul.c
@@ -187,10 +187,10 @@ mpn_toom22_mul (mp_ptr pp,
/* H(v0) + L(vinf) */
cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
- /* L(v0) + H(v0) */
+ /* L(v0) + (H(v0) + L(vinf)) */
cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
- /* L(vinf) + H(vinf) */
+ /* (H(v0) + L(vinf)) + H(vinf) */
cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
if (vm1_neg)
diff --git a/mpn/generic/toom33_mul.c b/mpn/generic/toom33_mul.c
index 8f49f427c..54f055f64 100644
--- a/mpn/generic/toom33_mul.c
+++ b/mpn/generic/toom33_mul.c
@@ -8,7 +8,7 @@
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006-2008, 2010, 2012, 2015 Free Software Foundation, Inc.
+Copyright 2006-2008, 2010, 2012, 2015, 2021 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -262,7 +262,8 @@ mpn_toom33_mul (mp_ptr pp,
cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
vm1[2 * n] = cy;
#else
- TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out);
+ vm1[2 * n] = 0;
+ TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + (bsm1[n] | asm1[n]), scratch_out);
#endif
TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
diff --git a/mpn/generic/toom3_sqr.c b/mpn/generic/toom3_sqr.c
index 7be15bff3..297a27f5c 100644
--- a/mpn/generic/toom3_sqr.c
+++ b/mpn/generic/toom3_sqr.c
@@ -7,7 +7,7 @@
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006-2010, 2012, 2015 Free Software Foundation, Inc.
+Copyright 2006-2010, 2012, 2015, 2021 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -167,14 +167,19 @@ mpn_toom3_sqr (mp_ptr pp,
/* vm1, 2n+1 limbs */
#ifdef SMALLER_RECURSION
TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
- cy = 0;
- if (asm1[n] != 0)
- cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n);
- if (asm1[n] != 0)
- cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
+ cy = asm1[n];
+ if (cy != 0)
+ {
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ cy += mpn_addlsh1_n_ip1 (vm1 + n, asm1, n);
+#else
+ cy += mpn_addmul_1 (vm1 + n, asm1, n, CNST_LIMB(2));
+#endif
+ }
vm1[2 * n] = cy;
#else
- TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out);
+ vm1[2 * n] = 0;
+ TOOM3_SQR_REC (vm1, asm1, n + asm1[n], scratch_out);
#endif
TOOM3_SQR_REC (v2, as2, n + 1, scratch_out); /* v2, 2n+1 limbs */
@@ -186,30 +191,21 @@ mpn_toom3_sqr (mp_ptr pp,
#ifdef SMALLER_RECURSION
/* v1, 2n+1 limbs */
TOOM3_SQR_REC (v1, as1, n, scratch_out);
- if (as1[n] == 1)
- {
- cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
- }
- else if (as1[n] != 0)
+ cy = as1[n];
+ if (cy == 1)
{
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
- cy = 2 * as1[n] + mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+ cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
#else
- cy = 2 * as1[n] + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+ cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
#endif
}
- else
- cy = 0;
- if (as1[n] == 1)
- {
- cy += mpn_add_n (v1 + n, v1 + n, as1, n);
- }
- else if (as1[n] != 0)
+ else if (cy != 0)
{
-#if HAVE_NATIVE_mpn_addlsh1_n_ip1
- cy += mpn_addlsh1_n_ip1 (v1 + n, as1, n);
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+ cy = 4 + mpn_addlsh2_n_ip1 (v1 + n, as1, n);
#else
- cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
+ cy = 4 + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(4));
#endif
}
v1[2 * n] = cy;
diff --git a/mpn/generic/toom44_mul.c b/mpn/generic/toom44_mul.c
index 77d5083cf..a3618994b 100644
--- a/mpn/generic/toom44_mul.c
+++ b/mpn/generic/toom44_mul.c
@@ -50,7 +50,7 @@ see https://www.gnu.org/licenses/. */
v1 = ( a0+ a1+ a2+ a3)*( b0+ b1+ b2+ b3) # A(1)*B(1) ah <= 3 bh <= 3
vm1 = ( a0- a1+ a2- a3)*( b0- b1+ b2- b3) # A(-1)*B(-1) |ah| <= 1 |bh| <= 1
v2 = ( a0+2a1+4a2+8a3)*( b0+2b1+4b2+8b3) # A(2)*B(2) ah <= 14 bh <= 14
- vm2 = ( a0-2a1+4a2-8a3)*( b0-2b1+4b2-8b3) # A(2)*B(2) ah <= 9 |bh| <= 9
+ vm2 = ( a0-2a1+4a2-8a3)*( b0-2b1+4b2-8b3) # A(-2)*B(-2) |ah| <= 9 |bh| <= 9
vh = (8a0+4a1+2a2+ a3)*(8b0+4b1+2b2+ b3) # A(1/2)*B(1/2) ah <= 14 bh <= 14
vinf= a3 * b2 # A(inf)*B(inf)
*/
@@ -221,7 +221,11 @@ mpn_toom44_mul (mp_ptr pp,
/* Compute bpx = b0 + b1 + b2 + b3 and bmx = b0 - b1 + b2 - b3. */
flags = (enum toom7_flags) (flags ^ (toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp)));
- TOOM44_MUL_N_REC (vm1, amx, bmx, n + 1, tp); /* vm1, 2n+1 limbs */
+ ASSERT (amx[n] <= 1);
+ ASSERT (bmx[n] <= 1);
+
+ vm1 [2 * n] = 0;
+ TOOM44_MUL_N_REC (vm1, amx, bmx, n + (bmx[n] | amx[n]), tp); /* vm1, 2n+1 limbs */
/* Clobbers amx, bmx. */
TOOM44_MUL_N_REC (v1, apx, bpx, n + 1, tp); /* v1, 2n+1 limbs */
diff --git a/mpn/generic/toom4_sqr.c b/mpn/generic/toom4_sqr.c
index aec84c183..fd59d1cbd 100644
--- a/mpn/generic/toom4_sqr.c
+++ b/mpn/generic/toom4_sqr.c
@@ -6,7 +6,7 @@
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006-2010, 2013 Free Software Foundation, Inc.
+Copyright 2006-2010, 2013, 2021 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
@@ -37,7 +37,7 @@ see https://www.gnu.org/licenses/. */
#include "gmp-impl.h"
-/* Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
+/* Evaluate in: -2, -1, 0, +1/2, +1, +2, +inf
<-s--><--n--><--n--><--n-->
____ ______ ______ ______
@@ -47,8 +47,8 @@ see https://www.gnu.org/licenses/. */
v1 = ( a0+ a1+ a2+ a3)^2 # A(1)^2 ah <= 3
vm1 = ( a0- a1+ a2- a3)^2 # A(-1)^2 |ah| <= 1
v2 = ( a0+2a1+4a2+8a3)^2 # A(2)^2 ah <= 14
+ vm2 = ( a0-2a1+4a2-8a3)^2 # A(-2)^2 -9<=ah<=4
vh = (8a0+4a1+2a2+ a3)^2 # A(1/2)^2 ah <= 14
- vmh = (8a0-4a1+2a2- a3)^2 # A(-1/2)^2 -4<=ah<=9
vinf= a3 ^2 # A(inf)^2
*/
@@ -154,7 +154,8 @@ mpn_toom4_sqr (mp_ptr pp,
mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
TOOM4_SQR_REC (v1, apx, n + 1, tp); /* v1, 2n+1 limbs */
- TOOM4_SQR_REC (vm1, amx, n + 1, tp); /* vm1, 2n+1 limbs */
+ vm1 [2 * n] = 0;
+ TOOM4_SQR_REC (vm1, amx, n + amx[n], tp); /* vm1, 2n+1 limbs */
TOOM4_SQR_REC (v0, a0, n, tp);
TOOM4_SQR_REC (vinf, a3, s, tp); /* vinf, 2s limbs */