diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-07-07 11:02:47 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-07-07 11:02:47 +0000 |
commit | 42d6020059f91c06b96bf7729342a71751d4e801 (patch) | |
tree | a081057ceb24da7bc758c0474b5b209944dd48cc /bignum.c | |
parent | 8dbf566094dc5150ad55d2c8ffefdc895854d59c (diff) | |
download | ruby-42d6020059f91c06b96bf7729342a71751d4e801.tar.gz |
* bignum.c: Reorder functions to decrease forward reference.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@41822 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'bignum.c')
-rw-r--r-- | bignum.c | 2401 |
1 files changed, 1199 insertions, 1202 deletions
@@ -26,6 +26,7 @@ #include <assert.h> VALUE rb_cBignum; +const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; static VALUE big_three = Qnil; @@ -93,51 +94,23 @@ static VALUE big_three = Qnil; #define RBIGNUM_SET_NEGATIVE_SIGN(b) RBIGNUM_SET_SIGN(b, 0) #define RBIGNUM_SET_POSITIVE_SIGN(b) RBIGNUM_SET_SIGN(b, 1) +#define bignew(len,sign) bignew_1(rb_cBignum,(len),(sign)) + #define KARATSUBA_MUL_DIGITS 70 #define TOOM3_MUL_DIGITS 150 static BDIGIT bary_small_lshift(BDIGIT *zds, BDIGIT *xds, long n, int shift); static void bary_small_rshift(BDIGIT *zds, BDIGIT *xds, long n, int shift, int sign_bit); -static void bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwords, size_t wordsize, size_t nails, int flags); -static void bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl); static void bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl); -static int bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn); -static int bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow); static void bary_divmod(BDIGIT *qds, size_t nq, BDIGIT *rds, size_t nr, BDIGIT *xds, size_t nx, BDIGIT *yds, size_t ny); -static int bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn); -static int bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry); -static int bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags); -static int bary_2comp(BDIGIT *ds, size_t n); -static void bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn); -static inline int bary_sparse_p(BDIGIT *ds, size_t n); static VALUE bigmul0(VALUE x, VALUE y); static VALUE bigmul1_toom3(VALUE x, VALUE y); +static VALUE bignew_1(VALUE klass, long len, int sign); +static inline VALUE bigtrunc(VALUE x); -#define BIGNUM_DEBUG 0 -#if BIGNUM_DEBUG -#define ON_DEBUG(x) do { x; } while (0) -static void -dump_bignum(VALUE x) -{ - long i; - printf("%c0x0", RBIGNUM_SIGN(x) ? '+' : '-'); - for (i = RBIGNUM_LEN(x); i--; ) { - printf("_%0*"PRIxBDIGIT, SIZEOF_BDIGITS*2, BDIGITS(x)[i]); - } - printf(", len=%lu", RBIGNUM_LEN(x)); - puts(""); -} - -static VALUE -rb_big_dump(VALUE x) -{ - dump_bignum(x); - return x; -} -#else -#define ON_DEBUG(x) -#endif +static VALUE bigsqr(VALUE x); +static void bigdivmod(VALUE x, VALUE y, volatile VALUE *divp, volatile VALUE *modp); static int nlz16(uint16_t x) @@ -479,137 +452,6 @@ bary_zero_p(BDIGIT *xds, size_t nx) return 1; } -static int -bigzero_p(VALUE x) -{ - return bary_zero_p(BDIGITS(x), RBIGNUM_LEN(x)); -} - -int -rb_bigzero_p(VALUE x) -{ - return BIGZEROP(x); -} - -int -rb_cmpint(VALUE val, VALUE a, VALUE b) -{ - if (NIL_P(val)) { - rb_cmperr(a, b); - } - if (FIXNUM_P(val)) { - long l = FIX2LONG(val); - if (l > 0) return 1; - if (l < 0) return -1; - return 0; - } - if (RB_TYPE_P(val, T_BIGNUM)) { - if (BIGZEROP(val)) return 0; - if (RBIGNUM_SIGN(val)) return 1; - return -1; - } - if (RTEST(rb_funcall(val, '>', 1, INT2FIX(0)))) return 1; - if (RTEST(rb_funcall(val, '<', 1, INT2FIX(0)))) return -1; - return 0; -} - -#define RBIGNUM_SET_LEN(b,l) \ - ((RBASIC(b)->flags & RBIGNUM_EMBED_FLAG) ? \ - (void)(RBASIC(b)->flags = \ - (RBASIC(b)->flags & ~RBIGNUM_EMBED_LEN_MASK) | \ - ((l) << RBIGNUM_EMBED_LEN_SHIFT)) : \ - (void)(RBIGNUM(b)->as.heap.len = (l))) - -static void -rb_big_realloc(VALUE big, long len) -{ - BDIGIT *ds; - if (RBASIC(big)->flags & RBIGNUM_EMBED_FLAG) { - if (RBIGNUM_EMBED_LEN_MAX < len) { - ds = ALLOC_N(BDIGIT, len); - MEMCPY(ds, RBIGNUM(big)->as.ary, BDIGIT, RBIGNUM_EMBED_LEN_MAX); - RBIGNUM(big)->as.heap.len = RBIGNUM_LEN(big); - RBIGNUM(big)->as.heap.digits = ds; - RBASIC(big)->flags &= ~RBIGNUM_EMBED_FLAG; - } - } - else { - if (len <= RBIGNUM_EMBED_LEN_MAX) { - ds = RBIGNUM(big)->as.heap.digits; - RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG; - RBIGNUM_SET_LEN(big, len); - if (ds) { - MEMCPY(RBIGNUM(big)->as.ary, ds, BDIGIT, len); - xfree(ds); - } - } - else { - if (RBIGNUM_LEN(big) == 0) { - RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len); - } - else { - REALLOC_N(RBIGNUM(big)->as.heap.digits, BDIGIT, len); - } - } - } -} - -void -rb_big_resize(VALUE big, long len) -{ - rb_big_realloc(big, len); - RBIGNUM_SET_LEN(big, len); -} - -static VALUE -bignew_1(VALUE klass, long len, int sign) -{ - NEWOBJ_OF(big, struct RBignum, klass, T_BIGNUM | (RGENGC_WB_PROTECTED_BIGNUM ? FL_WB_PROTECTED : 0)); - RBIGNUM_SET_SIGN(big, sign?1:0); - if (len <= RBIGNUM_EMBED_LEN_MAX) { - RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG; - RBIGNUM_SET_LEN(big, len); - } - else { - RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len); - RBIGNUM(big)->as.heap.len = len; - } - OBJ_FREEZE(big); - return (VALUE)big; -} - -#define bignew(len,sign) bignew_1(rb_cBignum,(len),(sign)) - -VALUE -rb_big_new(long len, int sign) -{ - return bignew(len, sign != 0); -} - -VALUE -rb_big_clone(VALUE x) -{ - long len = RBIGNUM_LEN(x); - VALUE z = bignew_1(CLASS_OF(x), len, RBIGNUM_SIGN(x)); - - MEMCPY(BDIGITS(z), BDIGITS(x), BDIGIT, len); - return z; -} - -static int -bytes_2comp(unsigned char *buf, size_t len) -{ - size_t i; - for (i = 0; i < len; i++) - buf[i] = ~buf[i]; - for (i = 0; i < len; i++) { - buf[i]++; - if (buf[i] != 0) - return 0; - } - return 1; -} - static void bary_neg(BDIGIT *ds, size_t n) { @@ -638,420 +480,6 @@ bary_2comp(BDIGIT *ds, size_t n) } static void -big_extend_carry(VALUE x) -{ - rb_big_resize(x, RBIGNUM_LEN(x)+1); - BDIGITS(x)[RBIGNUM_LEN(x)-1] = 1; -} - -/* modify a bignum by 2's complement */ -static void -get2comp(VALUE x) -{ - long i = RBIGNUM_LEN(x); - BDIGIT *ds = BDIGITS(x); - - if (bary_2comp(ds, i)) { - big_extend_carry(x); - } -} - -void -rb_big_2comp(VALUE x) /* get 2's complement */ -{ - get2comp(x); -} - -static BDIGIT -abs2twocomp(VALUE *xp, long *n_ret) -{ - VALUE x = *xp; - long n = RBIGNUM_LEN(x); - BDIGIT *ds = BDIGITS(x); - BDIGIT hibits = 0; - - while (0 < n && ds[n-1] == 0) - n--; - - if (n != 0 && RBIGNUM_NEGATIVE_P(x)) { - VALUE z = bignew_1(CLASS_OF(x), n, 0); - MEMCPY(BDIGITS(z), ds, BDIGIT, n); - bary_2comp(BDIGITS(z), n); - hibits = BDIGMAX; - *xp = z; - } - *n_ret = n; - return hibits; -} - -static void -twocomp2abs_bang(VALUE x, int hibits) -{ - RBIGNUM_SET_SIGN(x, !hibits); - if (hibits) { - get2comp(x); - } -} - -static inline VALUE -bigtrunc(VALUE x) -{ - long len = RBIGNUM_LEN(x); - BDIGIT *ds = BDIGITS(x); - - if (len == 0) return x; - while (--len && !ds[len]); - if (RBIGNUM_LEN(x) > len+1) { - rb_big_resize(x, len+1); - } - return x; -} - -static inline VALUE -bigfixize(VALUE x) -{ - long len = RBIGNUM_LEN(x); - BDIGIT *ds = BDIGITS(x); - - if (len == 0) return INT2FIX(0); - if (BIGSIZE(x) <= sizeof(long)) { - long num = 0; -#if SIZEOF_BDIGITS >= SIZEOF_LONG - num = (long)ds[0]; -#else - while (len--) { - num = (long)(BIGUP(num) + ds[len]); - } -#endif - if (num >= 0) { - if (RBIGNUM_SIGN(x)) { - if (POSFIXABLE(num)) return LONG2FIX(num); - } - else { - if (NEGFIXABLE(-num)) return LONG2FIX(-num); - } - } - } - return x; -} - -static VALUE -bignorm(VALUE x) -{ - if (RB_TYPE_P(x, T_BIGNUM)) { - x = bigfixize(x); - if (!FIXNUM_P(x)) - bigtrunc(x); - } - return x; -} - -VALUE -rb_big_norm(VALUE x) -{ - return bignorm(x); -} - -VALUE -rb_uint2big(VALUE n) -{ - long i; - VALUE big = bignew(bdigit_roomof(SIZEOF_VALUE), 1); - BDIGIT *digits = BDIGITS(big); - -#if SIZEOF_BDIGITS >= SIZEOF_VALUE - digits[0] = n; -#else - for (i = 0; i < bdigit_roomof(SIZEOF_VALUE); i++) { - digits[i] = BIGLO(n); - n = BIGDN(n); - } -#endif - - i = bdigit_roomof(SIZEOF_VALUE); - while (--i && !digits[i]) ; - RBIGNUM_SET_LEN(big, i+1); - return big; -} - -VALUE -rb_int2big(SIGNED_VALUE n) -{ - long neg = 0; - VALUE u; - VALUE big; - - if (n < 0) { - u = 1 + (VALUE)(-(n + 1)); /* u = -n avoiding overflow */ - neg = 1; - } - else { - u = n; - } - big = rb_uint2big(u); - if (neg) { - RBIGNUM_SET_SIGN(big, 0); - } - return big; -} - -VALUE -rb_uint2inum(VALUE n) -{ - if (POSFIXABLE(n)) return LONG2FIX(n); - return rb_uint2big(n); -} - -VALUE -rb_int2inum(SIGNED_VALUE n) -{ - if (FIXABLE(n)) return LONG2FIX(n); - return rb_int2big(n); -} - -void -rb_big_pack(VALUE val, unsigned long *buf, long num_longs) -{ - rb_integer_pack(val, buf, num_longs, sizeof(long), 0, - INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER| - INTEGER_PACK_2COMP); -} - -VALUE -rb_big_unpack(unsigned long *buf, long num_longs) -{ - return rb_integer_unpack(buf, num_longs, sizeof(long), 0, - INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER| - INTEGER_PACK_2COMP); -} - -/* - * Calculate the number of bytes to be required to represent - * the absolute value of the integer given as _val_. - * - * [val] an integer. - * [nlz_bits_ret] number of leading zero bits in the most significant byte is returned if not NULL. - * - * This function returns ((val_numbits * CHAR_BIT + CHAR_BIT - 1) / CHAR_BIT) - * where val_numbits is the number of bits of abs(val). - * This function should not overflow. - * - * If nlz_bits_ret is not NULL, - * (return_value * CHAR_BIT - val_numbits) is stored in *nlz_bits_ret. - * In this case, 0 <= *nlz_bits_ret < CHAR_BIT. - * - */ -size_t -rb_absint_size(VALUE val, int *nlz_bits_ret) -{ - BDIGIT *dp; - BDIGIT *de; - BDIGIT fixbuf[bdigit_roomof(sizeof(long))]; - - int num_leading_zeros; - - val = rb_to_int(val); - - if (FIXNUM_P(val)) { - long v = FIX2LONG(val); - if (v < 0) { - v = -v; - } -#if SIZEOF_BDIGITS >= SIZEOF_LONG - fixbuf[0] = v; -#else - { - int i; - for (i = 0; i < numberof(fixbuf); i++) { - fixbuf[i] = BIGLO(v); - v = BIGDN(v); - } - } -#endif - dp = fixbuf; - de = fixbuf + numberof(fixbuf); - } - else { - dp = BDIGITS(val); - de = dp + RBIGNUM_LEN(val); - } - while (dp < de && de[-1] == 0) - de--; - if (dp == de) { - if (nlz_bits_ret) - *nlz_bits_ret = 0; - return 0; - } - num_leading_zeros = nlz(de[-1]); - if (nlz_bits_ret) - *nlz_bits_ret = num_leading_zeros % CHAR_BIT; - return (de - dp) * SIZEOF_BDIGITS - num_leading_zeros / CHAR_BIT; -} - -static size_t -absint_numwords_small(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret) -{ - size_t val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte; - size_t div = val_numbits / word_numbits; - size_t mod = val_numbits % word_numbits; - size_t numwords; - size_t nlz_bits; - numwords = mod == 0 ? div : div + 1; - nlz_bits = mod == 0 ? 0 : word_numbits - mod; - *nlz_bits_ret = nlz_bits; - return numwords; -} - -static size_t -absint_numwords_generic(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret) -{ - BDIGIT numbytes_bary[bdigit_roomof(sizeof(numbytes))]; - BDIGIT char_bit[1] = { CHAR_BIT }; - BDIGIT val_numbits_bary[bdigit_roomof(sizeof(numbytes) + 1)]; - BDIGIT nlz_bits_in_msbyte_bary[1] = { nlz_bits_in_msbyte }; - BDIGIT word_numbits_bary[bdigit_roomof(sizeof(word_numbits))]; - BDIGIT div_bary[numberof(val_numbits_bary) + BIGDIVREM_EXTRA_WORDS]; - BDIGIT mod_bary[numberof(word_numbits_bary)]; - BDIGIT one[1] = { 1 }; - size_t nlz_bits; - size_t mod; - int sign; - size_t numwords; - - /* - * val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte - * div, mod = val_numbits.divmod(word_numbits) - * numwords = mod == 0 ? div : div + 1 - * nlz_bits = mod == 0 ? 0 : word_numbits - mod - */ - - bary_unpack(BARY_ARGS(numbytes_bary), &numbytes, 1, sizeof(numbytes), 0, - INTEGER_PACK_NATIVE_BYTE_ORDER); - BARY_MUL1(val_numbits_bary, numbytes_bary, char_bit); - if (nlz_bits_in_msbyte) - BARY_SUB(val_numbits_bary, val_numbits_bary, nlz_bits_in_msbyte_bary); - bary_unpack(BARY_ARGS(word_numbits_bary), &word_numbits, 1, sizeof(word_numbits), 0, - INTEGER_PACK_NATIVE_BYTE_ORDER); - BARY_DIVMOD(div_bary, mod_bary, val_numbits_bary, word_numbits_bary); - if (BARY_ZERO_P(mod_bary)) { - nlz_bits = 0; - } - else { - BARY_ADD(div_bary, div_bary, one); - bary_pack(+1, BARY_ARGS(mod_bary), &mod, 1, sizeof(mod), 0, - INTEGER_PACK_NATIVE_BYTE_ORDER); - nlz_bits = word_numbits - mod; - } - sign = bary_pack(+1, BARY_ARGS(div_bary), &numwords, 1, sizeof(numwords), 0, - INTEGER_PACK_NATIVE_BYTE_ORDER); - - if (sign == 2) - return (size_t)-1; - *nlz_bits_ret = nlz_bits; - return numwords; -} - -/* - * Calculate the number of words to be required to represent - * the absolute value of the integer given as _val_. - * - * [val] an integer. - * [word_numbits] number of bits in a word. - * [nlz_bits_ret] number of leading zero bits in the most significant word is returned if not NULL. - * - * This function returns ((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits) - * where val_numbits is the number of bits of abs(val). - * - * This function can overflow. - * When overflow occur, (size_t)-1 is returned. - * - * If nlz_bits_ret is not NULL and overflow is not occur, - * (return_value * word_numbits - val_numbits) is stored in *nlz_bits_ret. - * In this case, 0 <= *nlz_bits_ret < word_numbits. - * - */ -size_t -rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret) -{ - size_t numbytes; - int nlz_bits_in_msbyte; - size_t numwords; - size_t nlz_bits; - - if (word_numbits == 0) - return (size_t)-1; - - numbytes = rb_absint_size(val, &nlz_bits_in_msbyte); - - if (numbytes <= SIZE_MAX / CHAR_BIT) { - numwords = absint_numwords_small(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits); -#ifdef DEBUG_INTEGER_PACK - { - size_t numwords0, nlz_bits0; - numwords0 = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits0); - assert(numwords0 == numwords); - assert(nlz_bits0 == nlz_bits); - } -#endif - } - else { - numwords = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits); - } - if (numwords == (size_t)-1) - return numwords; - - if (nlz_bits_ret) - *nlz_bits_ret = nlz_bits; - - return numwords; -} - -int -rb_absint_singlebit_p(VALUE val) -{ - BDIGIT *dp; - BDIGIT *de; - BDIGIT fixbuf[bdigit_roomof(sizeof(long))]; - BDIGIT d; - - val = rb_to_int(val); - - if (FIXNUM_P(val)) { - long v = FIX2LONG(val); - if (v < 0) { - v = -v; - } -#if SIZEOF_BDIGITS >= SIZEOF_LONG - fixbuf[0] = v; -#else - { - int i; - for (i = 0; i < numberof(fixbuf); i++) { - fixbuf[i] = BIGLO(v); - v = BIGDN(v); - } - } -#endif - dp = fixbuf; - de = fixbuf + numberof(fixbuf); - } - else { - dp = BDIGITS(val); - de = dp + RBIGNUM_LEN(val); - } - while (dp < de && de[-1] == 0) - de--; - while (dp < de && dp[0] == 0) - dp++; - if (dp == de) /* no bit set. */ - return 0; - if (dp != de-1) /* two non-zero words. two bits set, at least. */ - return 0; - d = *dp; - return POW2_P(d); -} - -static void bary_swap(BDIGIT *ds, size_t num_bdigits) { BDIGIT *p1 = ds; @@ -1193,6 +621,20 @@ integer_pack_take_lowbits(int n, BDIGIT_DBL *ddp, int *numbits_in_dd_p) } static int +bytes_2comp(unsigned char *buf, size_t len) +{ + size_t i; + for (i = 0; i < len; i++) + buf[i] = ~buf[i]; + for (i = 0; i < len; i++) { + buf[i]++; + if (buf[i] != 0) + return 0; + } + return 1; +} + +static int bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords, size_t wordsize, size_t nails, int flags) { BDIGIT *dp, *de; @@ -1523,103 +965,6 @@ bary_pack(int sign, BDIGIT *ds, size_t num_bdigits, void *words, size_t numwords #undef TAKE_LOWBITS } -/* - * Export an integer into a buffer. - * - * This function fills the buffer specified by _words_ and _numwords_ as - * val in the format specified by _wordsize_, _nails_ and _flags_. - * - * [val] Fixnum, Bignum or another integer like object which has to_int method. - * [words] buffer to export abs(val). - * [numwords] the size of given buffer as number of words. - * [wordsize] the size of word as number of bytes. - * [nails] number of padding bits in a word. - * Most significant nails bits of each word are filled by zero. - * [flags] bitwise or of constants which name starts "INTEGER_PACK_". - * - * flags: - * [INTEGER_PACK_MSWORD_FIRST] Store the most significant word as the first word. - * [INTEGER_PACK_LSWORD_FIRST] Store the least significant word as the first word. - * [INTEGER_PACK_MSBYTE_FIRST] Store the most significant byte in a word as the first byte in the word. - * [INTEGER_PACK_LSBYTE_FIRST] Store the least significant byte in a word as the first byte in the word. - * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian. - * [INTEGER_PACK_2COMP] Use 2's complement representation. - * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST - * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST - * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug). - * - * This function fills the buffer specified by _words_ - * as abs(val) if INTEGER_PACK_2COMP is not specified in _flags_. - * If INTEGER_PACK_2COMP is specified, 2's complement representation of val is - * filled in the buffer. - * - * This function returns the signedness and overflow condition. - * The overflow condition depends on INTEGER_PACK_2COMP. - * - * INTEGER_PACK_2COMP is not specified: - * -2 : negative overflow. val <= -2**(numwords*(wordsize*CHAR_BIT-nails)) - * -1 : negative without overflow. -2**(numwords*(wordsize*CHAR_BIT-nails)) < val < 0 - * 0 : zero. val == 0 - * 1 : positive without overflow. 0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails)) - * 2 : positive overflow. 2**(numwords*(wordsize*CHAR_BIT-nails)) <= val - * - * INTEGER_PACK_2COMP is specified: - * -2 : negative overflow. val < -2**(numwords*(wordsize*CHAR_BIT-nails)) - * -1 : negative without overflow. -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val < 0 - * 0 : zero. val == 0 - * 1 : positive without overflow. 0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails)) - * 2 : positive overflow. 2**(numwords*(wordsize*CHAR_BIT-nails)) <= val - * - * The value, -2**(numwords*(wordsize*CHAR_BIT-nails)), is representable - * in 2's complement representation but not representable in absolute value. - * So -1 is returned for the value if INTEGER_PACK_2COMP is specified - * but returns -2 if INTEGER_PACK_2COMP is not specified. - * - * The least significant words are filled in the buffer when overflow occur. - */ - -int -rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags) -{ - int sign; - BDIGIT *ds; - size_t num_bdigits; - BDIGIT fixbuf[bdigit_roomof(sizeof(long))]; - - RB_GC_GUARD(val) = rb_to_int(val); - - if (FIXNUM_P(val)) { - long v = FIX2LONG(val); - if (v < 0) { - sign = -1; - v = -v; - } - else { - sign = 1; - } -#if SIZEOF_BDIGITS >= SIZEOF_LONG - fixbuf[0] = v; -#else - { - int i; - for (i = 0; i < numberof(fixbuf); i++) { - fixbuf[i] = BIGLO(v); - v = BIGDN(v); - } - } -#endif - ds = fixbuf; - num_bdigits = numberof(fixbuf); - } - else { - sign = RBIGNUM_POSITIVE_P(val) ? 1 : -1; - ds = BDIGITS(val); - num_bdigits = RBIGNUM_LEN(val); - } - - return bary_pack(sign, ds, num_bdigits, words, numwords, wordsize, nails, flags); -} - static size_t integer_unpack_num_bdigits_small(size_t numwords, size_t wordsize, size_t nails, int *nlp_bits_ret) { @@ -1975,6 +1320,1184 @@ bary_unpack(BDIGIT *bdigits, size_t num_bdigits, const void *words, size_t numwo } } +static int +bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow) +{ + BDIGIT_DBL_SIGNED num; + size_t i; + + assert(yn <= xn); + assert(xn <= zn); + + num = borrow ? -1 : 0; + for (i = 0; i < yn; i++) { + num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i]; + zds[i] = BIGLO(num); + num = BIGDN(num); + } + for (; i < xn; i++) { + if (num == 0) goto num_is_zero; + num += xds[i]; + zds[i] = BIGLO(num); + num = BIGDN(num); + } + if (num == 0) goto num_is_zero; + for (; i < zn; i++) { + zds[i] = BDIGMAX; + } + return 1; + + num_is_zero: + if (xds == zds && xn == zn) + return 0; + for (; i < xn; i++) { + zds[i] = xds[i]; + } + for (; i < zn; i++) { + zds[i] = 0; + } + return 0; +} + +static int +bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn) +{ + return bary_subb(zds, zn, xds, xn, yds, yn, 0); +} + +static int +bary_sub_one(BDIGIT *zds, size_t zn) +{ + return bary_subb(zds, zn, zds, zn, NULL, 0, 1); +} + +static int +bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry) +{ + BDIGIT_DBL num; + size_t i; + + assert(xn <= zn); + assert(yn <= zn); + + if (xn > yn) { + BDIGIT *tds; + tds = xds; xds = yds; yds = tds; + i = xn; xn = yn; yn = i; + } + + num = carry ? 1 : 0; + for (i = 0; i < xn; i++) { + num += (BDIGIT_DBL)xds[i] + yds[i]; + zds[i] = BIGLO(num); + num = BIGDN(num); + } + for (; i < yn; i++) { + if (num == 0) goto num_is_zero; + num += yds[i]; + zds[i] = BIGLO(num); + num = BIGDN(num); + } + for (; i < zn; i++) { + if (num == 0) goto num_is_zero; + zds[i] = BIGLO(num); + num = BIGDN(num); + } + return num != 0; + + num_is_zero: + if (yds == zds && yn == zn) + return 0; + for (; i < yn; i++) { + zds[i] = yds[i]; + } + for (; i < zn; i++) { + zds[i] = 0; + } + return 0; +} + +static int +bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn) +{ + return bary_addc(zds, zn, xds, xn, yds, yn, 0); +} + +static int +bary_add_one(BDIGIT *zds, size_t zn) +{ + return bary_addc(zds, zn, NULL, 0, zds, zn, 1); +} + +static void +bary_mul_single(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT y) +{ + BDIGIT_DBL n; + + assert(2 <= zl); + + n = (BDIGIT_DBL)x * y; + zds[0] = BIGLO(n); + zds[1] = (BDIGIT)BIGDN(n); +} + +static int +bary_muladd_1xN(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT *yds, size_t yl) +{ + BDIGIT_DBL n; + BDIGIT_DBL dd; + size_t j; + + assert(zl > yl); + + if (x == 0) + return 0; + dd = x; + n = 0; + for (j = 0; j < yl; j++) { + BDIGIT_DBL ee = n + dd * yds[j]; + if (ee) { + n = zds[j] + ee; + zds[j] = BIGLO(n); + n = BIGDN(n); + } + else { + n = 0; + } + + } + for (; j < zl; j++) { + if (n == 0) + break; + n += zds[j]; + zds[j] = BIGLO(n); + n = BIGDN(n); + } + return n != 0; +} + +static void +bary_mul_normal(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) +{ + size_t i; + + assert(xl + yl <= zl); + + MEMZERO(zds, BDIGIT, zl); + for (i = 0; i < xl; i++) { + bary_muladd_1xN(zds+i, zl-i, xds[i], yds, yl); + } +} + +/* efficient squaring (2 times faster than normal multiplication) + * ref: Handbook of Applied Cryptography, Algorithm 14.16 + * http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf + */ +static void +bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn) +{ + size_t i, j; + BDIGIT_DBL c, v, w; + + assert(xn * 2 <= zn); + + MEMZERO(zds, BDIGIT, zn); + for (i = 0; i < xn; i++) { + v = (BDIGIT_DBL)xds[i]; + if (!v) continue; + c = (BDIGIT_DBL)zds[i + i] + v * v; + zds[i + i] = BIGLO(c); + c = BIGDN(c); + v *= 2; + for (j = i + 1; j < xn; j++) { + w = (BDIGIT_DBL)xds[j]; + c += (BDIGIT_DBL)zds[i + j] + BIGLO(v) * w; + zds[i + j] = BIGLO(c); + c = BIGDN(c); + if (BIGDN(v)) c += w; + } + if (c) { + c += (BDIGIT_DBL)zds[i + xn]; + zds[i + xn] = BIGLO(c); + c = BIGDN(c); + assert(c == 0 || i != xn-1); + if (c && i != xn-1) zds[i + xn + 1] += (BDIGIT)c; + } + } +} + +/* balancing multiplication by slicing larger argument */ +static void +bary_mul_balance(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) +{ + VALUE work = 0; + size_t r, n; + BDIGIT *wds; + size_t wl; + + assert(xl + yl <= zl); + assert(2 * xl <= yl || 3 * xl <= 2*(yl+2)); + + wl = xl * 2; + wds = ALLOCV_N(BDIGIT, work, wl); + + MEMZERO(zds, BDIGIT, zl); + + n = 0; + while (yl > 0) { + r = xl > yl ? yl : xl; + bary_mul(wds, xl + r, xds, xl, yds + n, r); + bary_add(zds + n, zl - n, + zds + n, zl - n, + wds, xl + r); + yl -= r; + n += r; + } + + if (work) + ALLOCV_END(work); +} + +/* multiplication by karatsuba method */ +static void +bary_mul_karatsuba(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) +{ + VALUE work = 0; + BDIGIT *wds; + size_t wl; + + size_t n; + int sub_p, borrow, carry1, carry2, carry3; + + int odd_x = 0; + int odd_y = 0; + + BDIGIT *xds0, *xds1, *yds0, *yds1, *zds0, *zds1, *zds2, *zds3; + + assert(xl + yl <= zl); + assert(xl <= yl); + assert(yl < 2 * xl); + + if (yl & 1) { + odd_y = 1; + yl--; + if (yl < xl) { + odd_x = 1; + xl--; + } + } + + n = yl / 2; + + assert(n < xl); + + wl = n; + wds = ALLOCV_N(BDIGIT, work, wl); + + /* Karatsuba algorithm: + * + * x = x0 + r*x1 + * y = y0 + r*y1 + * z = x*y + * = (x0 + r*x1) * (y0 + r*y1) + * = x0*y0 + r*(x1*y0 + x0*y1) + r*r*x1*y1 + * = x0*y0 + r*(x0*y0 + x1*y1 - (x1-x0)*(y1-y0)) + r*r*x1*y1 + * = x0*y0 + r*(x0*y0 + x1*y1 - (x0-x1)*(y0-y1)) + r*r*x1*y1 + */ + + xds0 = xds; + xds1 = xds + n; + yds0 = yds; + yds1 = yds + n; + zds0 = zds; + zds1 = zds + n; + zds2 = zds + 2*n; + zds3 = zds + 3*n; + + sub_p = 1; + + /* zds0:? zds1:? zds2:? zds3:? wds:? */ + + if (bary_sub(zds0, n, xds, n, xds+n, xl-n)) { + bary_2comp(zds0, n); + sub_p = !sub_p; + } + + /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:? */ + + if (bary_sub(wds, n, yds, n, yds+n, n)) { + bary_2comp(wds, n); + sub_p = !sub_p; + } + + /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:|y1-y0| */ + + bary_mul(zds1, 2*n, zds0, n, wds, n); + + /* zds0:|x1-x0| zds1,zds2:|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */ + + borrow = 0; + if (sub_p) { + borrow = !bary_2comp(zds1, 2*n); + } + /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */ + + MEMCPY(wds, zds1, BDIGIT, n); + + /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */ + + bary_mul(zds0, 2*n, xds0, n, yds0, n); + + /* zds0,zds1:x0*y0 zds2:hi(-?|x1-x0|*|y1-y0|) zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */ + + carry1 = bary_add(wds, n, wds, n, zds0, n); + carry1 = bary_addc(zds2, n, zds2, n, zds1, n, carry1); + + /* zds0,zds1:x0*y0 zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */ + + carry2 = bary_add(zds1, n, zds1, n, wds, n); + + /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */ + + MEMCPY(wds, zds2, BDIGIT, n); + + /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:_ zds3:? wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */ + + bary_mul(zds2, zl-2*n, xds1, xl-n, yds1, n); + + /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */ + + carry3 = bary_add(zds1, n, zds1, n, zds2, n); + + /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */ + + carry3 = bary_addc(zds2, n, zds2, n, zds3, (4*n < zl ? n : zl-3*n), carry3); + + /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1) wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */ + + bary_add(zds2, zl-2*n, zds2, zl-2*n, wds, n); + + /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1)+hi(x0*y0-?|x1-x0|*|y1-y0|) wds:_ */ + + if (carry2) + bary_add_one(zds2, zl-2*n); + + if (borrow && carry1) + borrow = carry1 = 0; + if (borrow && carry3) + borrow = carry3 = 0; + + if (borrow) + bary_sub_one(zds3, zl-3*n); + else if (carry1 || carry3) { + BDIGIT c = carry1 + carry3; + bary_add(zds3, zl-3*n, zds3, zl-3*n, &c, 1); + } + + /* + if (SIZEOF_BDIGITS * zl <= 16) { + uint128_t z, x, y; + ssize_t i; + for (x = 0, i = xl-1; 0 <= i; i--) { x <<= SIZEOF_BDIGITS*CHAR_BIT; x |= xds[i]; } + for (y = 0, i = yl-1; 0 <= i; i--) { y <<= SIZEOF_BDIGITS*CHAR_BIT; y |= yds[i]; } + for (z = 0, i = zl-1; 0 <= i; i--) { z <<= SIZEOF_BDIGITS*CHAR_BIT; z |= zds[i]; } + assert(z == x * y); + } + */ + + if (odd_x && odd_y) { + bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl); + bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl+1); + } + else if (odd_x) { + bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl); + } + else if (odd_y) { + bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl); + } + + if (work) + ALLOCV_END(work); +} + +static void +bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) +{ + size_t l; + + assert(xl + yl <= zl); + + if (xl == 1 && yl == 1) { + l = 2; + bary_mul_single(zds, zl, xds[0], yds[0]); + } + else { + l = xl + yl; + bary_mul_normal(zds, zl, xds, xl, yds, yl); + rb_thread_check_ints(); + } + MEMZERO(zds + l, BDIGIT, zl - l); +} + +/* determine whether a bignum is sparse or not by random sampling */ +static inline int +bary_sparse_p(BDIGIT *ds, size_t n) +{ + long c = 0; + + if ( ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++; + if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++; + if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++; + + return (c <= 1) ? 1 : 0; +} + +static void +bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) +{ + size_t nlsz; /* number of least significant zero BDIGITs */ + + assert(xl + yl <= zl); + + while (0 < xl && xds[xl-1] == 0) + xl--; + while (0 < yl && yds[yl-1] == 0) + yl--; + + nlsz = 0; + while (0 < xl && xds[0] == 0) { + xds++; + xl--; + nlsz++; + } + while (0 < yl && yds[0] == 0) { + yds++; + yl--; + nlsz++; + } + if (nlsz) { + MEMZERO(zds, BDIGIT, nlsz); + zds += nlsz; + zl -= nlsz; + } + + /* make sure that y is longer than x */ + if (xl > yl) { + BDIGIT *tds; + size_t tl; + tds = xds; xds = yds; yds = tds; + tl = xl; xl = yl; yl = tl; + } + assert(xl <= yl); + + if (xl == 0) { + MEMZERO(zds, BDIGIT, zl); + return; + } + + /* normal multiplication when x is small */ + if (xl < KARATSUBA_MUL_DIGITS) { + normal: + if (xds == yds && xl == yl) + bary_sq_fast(zds, zl, xds, xl); + else + bary_mul1(zds, zl, xds, xl, yds, yl); + return; + } + + /* normal multiplication when x or y is a sparse bignum */ + if (bary_sparse_p(xds, xl)) goto normal; + if (bary_sparse_p(yds, yl)) { + bary_mul1(zds, zl, yds, yl, xds, xl); + return; + } + + /* balance multiplication by slicing y when x is much smaller than y */ + if (2 * xl <= yl) { + bary_mul_balance(zds, zl, xds, xl, yds, yl); + return; + } + + if (xl < TOOM3_MUL_DIGITS) { + /* multiplication by karatsuba method */ + bary_mul_karatsuba(zds, zl, xds, xl, yds, yl); + return; + } + + if (3*xl <= 2*(yl + 2)) { + bary_mul_balance(zds, zl, xds, xl, yds, yl); + return; + } + + { + VALUE x, y, z; + x = bignew(xl, 1); + MEMCPY(BDIGITS(x), xds, BDIGIT, xl); + y = bignew(yl, 1); + MEMCPY(BDIGITS(y), yds, BDIGIT, yl); + z = bigtrunc(bigmul1_toom3(x, y)); + MEMCPY(zds, BDIGITS(z), BDIGIT, RBIGNUM_LEN(z)); + MEMZERO(zds + RBIGNUM_LEN(z), BDIGIT, zl - RBIGNUM_LEN(z)); + } +} + + +/* +xxx +*/ + +#define BIGNUM_DEBUG 0 +#if BIGNUM_DEBUG +#define ON_DEBUG(x) do { x; } while (0) +static void +dump_bignum(VALUE x) +{ + long i; + printf("%c0x0", RBIGNUM_SIGN(x) ? '+' : '-'); + for (i = RBIGNUM_LEN(x); i--; ) { + printf("_%0*"PRIxBDIGIT, SIZEOF_BDIGITS*2, BDIGITS(x)[i]); + } + printf(", len=%lu", RBIGNUM_LEN(x)); + puts(""); +} + +static VALUE +rb_big_dump(VALUE x) +{ + dump_bignum(x); + return x; +} +#else +#define ON_DEBUG(x) +#endif + +static int +bigzero_p(VALUE x) +{ + return bary_zero_p(BDIGITS(x), RBIGNUM_LEN(x)); +} + +int +rb_bigzero_p(VALUE x) +{ + return BIGZEROP(x); +} + +int +rb_cmpint(VALUE val, VALUE a, VALUE b) +{ + if (NIL_P(val)) { + rb_cmperr(a, b); + } + if (FIXNUM_P(val)) { + long l = FIX2LONG(val); + if (l > 0) return 1; + if (l < 0) return -1; + return 0; + } + if (RB_TYPE_P(val, T_BIGNUM)) { + if (BIGZEROP(val)) return 0; + if (RBIGNUM_SIGN(val)) return 1; + return -1; + } + if (RTEST(rb_funcall(val, '>', 1, INT2FIX(0)))) return 1; + if (RTEST(rb_funcall(val, '<', 1, INT2FIX(0)))) return -1; + return 0; +} + +#define RBIGNUM_SET_LEN(b,l) \ + ((RBASIC(b)->flags & RBIGNUM_EMBED_FLAG) ? \ + (void)(RBASIC(b)->flags = \ + (RBASIC(b)->flags & ~RBIGNUM_EMBED_LEN_MASK) | \ + ((l) << RBIGNUM_EMBED_LEN_SHIFT)) : \ + (void)(RBIGNUM(b)->as.heap.len = (l))) + +static void +rb_big_realloc(VALUE big, long len) +{ + BDIGIT *ds; + if (RBASIC(big)->flags & RBIGNUM_EMBED_FLAG) { + if (RBIGNUM_EMBED_LEN_MAX < len) { + ds = ALLOC_N(BDIGIT, len); + MEMCPY(ds, RBIGNUM(big)->as.ary, BDIGIT, RBIGNUM_EMBED_LEN_MAX); + RBIGNUM(big)->as.heap.len = RBIGNUM_LEN(big); + RBIGNUM(big)->as.heap.digits = ds; + RBASIC(big)->flags &= ~RBIGNUM_EMBED_FLAG; + } + } + else { + if (len <= RBIGNUM_EMBED_LEN_MAX) { + ds = RBIGNUM(big)->as.heap.digits; + RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG; + RBIGNUM_SET_LEN(big, len); + if (ds) { + MEMCPY(RBIGNUM(big)->as.ary, ds, BDIGIT, len); + xfree(ds); + } + } + else { + if (RBIGNUM_LEN(big) == 0) { + RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len); + } + else { + REALLOC_N(RBIGNUM(big)->as.heap.digits, BDIGIT, len); + } + } + } +} + +void +rb_big_resize(VALUE big, long len) +{ + rb_big_realloc(big, len); + RBIGNUM_SET_LEN(big, len); +} + +static VALUE +bignew_1(VALUE klass, long len, int sign) +{ + NEWOBJ_OF(big, struct RBignum, klass, T_BIGNUM | (RGENGC_WB_PROTECTED_BIGNUM ? FL_WB_PROTECTED : 0)); + RBIGNUM_SET_SIGN(big, sign?1:0); + if (len <= RBIGNUM_EMBED_LEN_MAX) { + RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG; + RBIGNUM_SET_LEN(big, len); + } + else { + RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len); + RBIGNUM(big)->as.heap.len = len; + } + OBJ_FREEZE(big); + return (VALUE)big; +} + +VALUE +rb_big_new(long len, int sign) +{ + return bignew(len, sign != 0); +} + +VALUE +rb_big_clone(VALUE x) +{ + long len = RBIGNUM_LEN(x); + VALUE z = bignew_1(CLASS_OF(x), len, RBIGNUM_SIGN(x)); + + MEMCPY(BDIGITS(z), BDIGITS(x), BDIGIT, len); + return z; +} + +static void +big_extend_carry(VALUE x) +{ + rb_big_resize(x, RBIGNUM_LEN(x)+1); + BDIGITS(x)[RBIGNUM_LEN(x)-1] = 1; +} + +/* modify a bignum by 2's complement */ +static void +get2comp(VALUE x) +{ + long i = RBIGNUM_LEN(x); + BDIGIT *ds = BDIGITS(x); + + if (bary_2comp(ds, i)) { + big_extend_carry(x); + } +} + +void +rb_big_2comp(VALUE x) /* get 2's complement */ +{ + get2comp(x); +} + +static BDIGIT +abs2twocomp(VALUE *xp, long *n_ret) +{ + VALUE x = *xp; + long n = RBIGNUM_LEN(x); + BDIGIT *ds = BDIGITS(x); + BDIGIT hibits = 0; + + while (0 < n && ds[n-1] == 0) + n--; + + if (n != 0 && RBIGNUM_NEGATIVE_P(x)) { + VALUE z = bignew_1(CLASS_OF(x), n, 0); + MEMCPY(BDIGITS(z), ds, BDIGIT, n); + bary_2comp(BDIGITS(z), n); + hibits = BDIGMAX; + *xp = z; + } + *n_ret = n; + return hibits; +} + +static void +twocomp2abs_bang(VALUE x, int hibits) +{ + RBIGNUM_SET_SIGN(x, !hibits); + if (hibits) { + get2comp(x); + } +} + +static inline VALUE +bigtrunc(VALUE x) +{ + long len = RBIGNUM_LEN(x); + BDIGIT *ds = BDIGITS(x); + + if (len == 0) return x; + while (--len && !ds[len]); + if (RBIGNUM_LEN(x) > len+1) { + rb_big_resize(x, len+1); + } + return x; +} + +static inline VALUE +bigfixize(VALUE x) +{ + long len = RBIGNUM_LEN(x); + BDIGIT *ds = BDIGITS(x); + + if (len == 0) return INT2FIX(0); + if (BIGSIZE(x) <= sizeof(long)) { + long num = 0; +#if SIZEOF_BDIGITS >= SIZEOF_LONG + num = (long)ds[0]; +#else + while (len--) { + num = (long)(BIGUP(num) + ds[len]); + } +#endif + if (num >= 0) { + if (RBIGNUM_SIGN(x)) { + if (POSFIXABLE(num)) return LONG2FIX(num); + } + else { + if (NEGFIXABLE(-num)) return LONG2FIX(-num); + } + } + } + return x; +} + +static VALUE +bignorm(VALUE x) +{ + if (RB_TYPE_P(x, T_BIGNUM)) { + x = bigfixize(x); + if (!FIXNUM_P(x)) + bigtrunc(x); + } + return x; +} + +VALUE +rb_big_norm(VALUE x) +{ + return bignorm(x); +} + +VALUE +rb_uint2big(VALUE n) +{ + long i; + VALUE big = bignew(bdigit_roomof(SIZEOF_VALUE), 1); + BDIGIT *digits = BDIGITS(big); + +#if SIZEOF_BDIGITS >= SIZEOF_VALUE + digits[0] = n; +#else + for (i = 0; i < bdigit_roomof(SIZEOF_VALUE); i++) { + digits[i] = BIGLO(n); + n = BIGDN(n); + } +#endif + + i = bdigit_roomof(SIZEOF_VALUE); + while (--i && !digits[i]) ; + RBIGNUM_SET_LEN(big, i+1); + return big; +} + +VALUE +rb_int2big(SIGNED_VALUE n) +{ + long neg = 0; + VALUE u; + VALUE big; + + if (n < 0) { + u = 1 + (VALUE)(-(n + 1)); /* u = -n avoiding overflow */ + neg = 1; + } + else { + u = n; + } + big = rb_uint2big(u); + if (neg) { + RBIGNUM_SET_SIGN(big, 0); + } + return big; +} + +VALUE +rb_uint2inum(VALUE n) +{ + if (POSFIXABLE(n)) return LONG2FIX(n); + return rb_uint2big(n); +} + +VALUE +rb_int2inum(SIGNED_VALUE n) +{ + if (FIXABLE(n)) return LONG2FIX(n); + return rb_int2big(n); +} + +void +rb_big_pack(VALUE val, unsigned long *buf, long num_longs) +{ + rb_integer_pack(val, buf, num_longs, sizeof(long), 0, + INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER| + INTEGER_PACK_2COMP); +} + +VALUE +rb_big_unpack(unsigned long *buf, long num_longs) +{ + return rb_integer_unpack(buf, num_longs, sizeof(long), 0, + INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER| + INTEGER_PACK_2COMP); +} + +/* + * Calculate the number of bytes to be required to represent + * the absolute value of the integer given as _val_. + * + * [val] an integer. + * [nlz_bits_ret] number of leading zero bits in the most significant byte is returned if not NULL. + * + * This function returns ((val_numbits * CHAR_BIT + CHAR_BIT - 1) / CHAR_BIT) + * where val_numbits is the number of bits of abs(val). + * This function should not overflow. + * + * If nlz_bits_ret is not NULL, + * (return_value * CHAR_BIT - val_numbits) is stored in *nlz_bits_ret. + * In this case, 0 <= *nlz_bits_ret < CHAR_BIT. + * + */ +size_t +rb_absint_size(VALUE val, int *nlz_bits_ret) +{ + BDIGIT *dp; + BDIGIT *de; + BDIGIT fixbuf[bdigit_roomof(sizeof(long))]; + + int num_leading_zeros; + + val = rb_to_int(val); + + if (FIXNUM_P(val)) { + long v = FIX2LONG(val); + if (v < 0) { + v = -v; + } +#if SIZEOF_BDIGITS >= SIZEOF_LONG + fixbuf[0] = v; +#else + { + int i; + for (i = 0; i < numberof(fixbuf); i++) { + fixbuf[i] = BIGLO(v); + v = BIGDN(v); + } + } +#endif + dp = fixbuf; + de = fixbuf + numberof(fixbuf); + } + else { + dp = BDIGITS(val); + de = dp + RBIGNUM_LEN(val); + } + while (dp < de && de[-1] == 0) + de--; + if (dp == de) { + if (nlz_bits_ret) + *nlz_bits_ret = 0; + return 0; + } + num_leading_zeros = nlz(de[-1]); + if (nlz_bits_ret) + *nlz_bits_ret = num_leading_zeros % CHAR_BIT; + return (de - dp) * SIZEOF_BDIGITS - num_leading_zeros / CHAR_BIT; +} + +static size_t +absint_numwords_small(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret) +{ + size_t val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte; + size_t div = val_numbits / word_numbits; + size_t mod = val_numbits % word_numbits; + size_t numwords; + size_t nlz_bits; + numwords = mod == 0 ? div : div + 1; + nlz_bits = mod == 0 ? 0 : word_numbits - mod; + *nlz_bits_ret = nlz_bits; + return numwords; +} + +static size_t +absint_numwords_generic(size_t numbytes, int nlz_bits_in_msbyte, size_t word_numbits, size_t *nlz_bits_ret) +{ + BDIGIT numbytes_bary[bdigit_roomof(sizeof(numbytes))]; + BDIGIT char_bit[1] = { CHAR_BIT }; + BDIGIT val_numbits_bary[bdigit_roomof(sizeof(numbytes) + 1)]; + BDIGIT nlz_bits_in_msbyte_bary[1] = { nlz_bits_in_msbyte }; + BDIGIT word_numbits_bary[bdigit_roomof(sizeof(word_numbits))]; + BDIGIT div_bary[numberof(val_numbits_bary) + BIGDIVREM_EXTRA_WORDS]; + BDIGIT mod_bary[numberof(word_numbits_bary)]; + BDIGIT one[1] = { 1 }; + size_t nlz_bits; + size_t mod; + int sign; + size_t numwords; + + /* + * val_numbits = numbytes * CHAR_BIT - nlz_bits_in_msbyte + * div, mod = val_numbits.divmod(word_numbits) + * numwords = mod == 0 ? div : div + 1 + * nlz_bits = mod == 0 ? 0 : word_numbits - mod + */ + + bary_unpack(BARY_ARGS(numbytes_bary), &numbytes, 1, sizeof(numbytes), 0, + INTEGER_PACK_NATIVE_BYTE_ORDER); + BARY_MUL1(val_numbits_bary, numbytes_bary, char_bit); + if (nlz_bits_in_msbyte) + BARY_SUB(val_numbits_bary, val_numbits_bary, nlz_bits_in_msbyte_bary); + bary_unpack(BARY_ARGS(word_numbits_bary), &word_numbits, 1, sizeof(word_numbits), 0, + INTEGER_PACK_NATIVE_BYTE_ORDER); + BARY_DIVMOD(div_bary, mod_bary, val_numbits_bary, word_numbits_bary); + if (BARY_ZERO_P(mod_bary)) { + nlz_bits = 0; + } + else { + BARY_ADD(div_bary, div_bary, one); + bary_pack(+1, BARY_ARGS(mod_bary), &mod, 1, sizeof(mod), 0, + INTEGER_PACK_NATIVE_BYTE_ORDER); + nlz_bits = word_numbits - mod; + } + sign = bary_pack(+1, BARY_ARGS(div_bary), &numwords, 1, sizeof(numwords), 0, + INTEGER_PACK_NATIVE_BYTE_ORDER); + + if (sign == 2) + return (size_t)-1; + *nlz_bits_ret = nlz_bits; + return numwords; +} + +/* + * Calculate the number of words to be required to represent + * the absolute value of the integer given as _val_. + * + * [val] an integer. + * [word_numbits] number of bits in a word. + * [nlz_bits_ret] number of leading zero bits in the most significant word is returned if not NULL. + * + * This function returns ((val_numbits * CHAR_BIT + word_numbits - 1) / word_numbits) + * where val_numbits is the number of bits of abs(val). + * + * This function can overflow. + * When overflow occur, (size_t)-1 is returned. + * + * If nlz_bits_ret is not NULL and overflow is not occur, + * (return_value * word_numbits - val_numbits) is stored in *nlz_bits_ret. + * In this case, 0 <= *nlz_bits_ret < word_numbits. + * + */ +size_t +rb_absint_numwords(VALUE val, size_t word_numbits, size_t *nlz_bits_ret) +{ + size_t numbytes; + int nlz_bits_in_msbyte; + size_t numwords; + size_t nlz_bits; + + if (word_numbits == 0) + return (size_t)-1; + + numbytes = rb_absint_size(val, &nlz_bits_in_msbyte); + + if (numbytes <= SIZE_MAX / CHAR_BIT) { + numwords = absint_numwords_small(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits); +#ifdef DEBUG_INTEGER_PACK + { + size_t numwords0, nlz_bits0; + numwords0 = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits0); + assert(numwords0 == numwords); + assert(nlz_bits0 == nlz_bits); + } +#endif + } + else { + numwords = absint_numwords_generic(numbytes, nlz_bits_in_msbyte, word_numbits, &nlz_bits); + } + if (numwords == (size_t)-1) + return numwords; + + if (nlz_bits_ret) + *nlz_bits_ret = nlz_bits; + + return numwords; +} + +int +rb_absint_singlebit_p(VALUE val) +{ + BDIGIT *dp; + BDIGIT *de; + BDIGIT fixbuf[bdigit_roomof(sizeof(long))]; + BDIGIT d; + + val = rb_to_int(val); + + if (FIXNUM_P(val)) { + long v = FIX2LONG(val); + if (v < 0) { + v = -v; + } +#if SIZEOF_BDIGITS >= SIZEOF_LONG + fixbuf[0] = v; +#else + { + int i; + for (i = 0; i < numberof(fixbuf); i++) { + fixbuf[i] = BIGLO(v); + v = BIGDN(v); + } + } +#endif + dp = fixbuf; + de = fixbuf + numberof(fixbuf); + } + else { + dp = BDIGITS(val); + de = dp + RBIGNUM_LEN(val); + } + while (dp < de && de[-1] == 0) + de--; + while (dp < de && dp[0] == 0) + dp++; + if (dp == de) /* no bit set. */ + return 0; + if (dp != de-1) /* two non-zero words. two bits set, at least. */ + return 0; + d = *dp; + return POW2_P(d); +} + + +/* + * Export an integer into a buffer. + * + * This function fills the buffer specified by _words_ and _numwords_ as + * val in the format specified by _wordsize_, _nails_ and _flags_. + * + * [val] Fixnum, Bignum or another integer like object which has to_int method. + * [words] buffer to export abs(val). + * [numwords] the size of given buffer as number of words. + * [wordsize] the size of word as number of bytes. + * [nails] number of padding bits in a word. + * Most significant nails bits of each word are filled by zero. + * [flags] bitwise or of constants which name starts "INTEGER_PACK_". + * + * flags: + * [INTEGER_PACK_MSWORD_FIRST] Store the most significant word as the first word. + * [INTEGER_PACK_LSWORD_FIRST] Store the least significant word as the first word. + * [INTEGER_PACK_MSBYTE_FIRST] Store the most significant byte in a word as the first byte in the word. + * [INTEGER_PACK_LSBYTE_FIRST] Store the least significant byte in a word as the first byte in the word. + * [INTEGER_PACK_NATIVE_BYTE_ORDER] INTEGER_PACK_MSBYTE_FIRST or INTEGER_PACK_LSBYTE_FIRST corresponding to the host's endian. + * [INTEGER_PACK_2COMP] Use 2's complement representation. + * [INTEGER_PACK_LITTLE_ENDIAN] Same as INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_LSBYTE_FIRST + * [INTEGER_PACK_BIG_ENDIAN] Same as INTEGER_PACK_MSWORD_FIRST|INTEGER_PACK_MSBYTE_FIRST + * [INTEGER_PACK_FORCE_GENERIC_IMPLEMENTATION] Use generic implementation (for test and debug). + * + * This function fills the buffer specified by _words_ + * as abs(val) if INTEGER_PACK_2COMP is not specified in _flags_. + * If INTEGER_PACK_2COMP is specified, 2's complement representation of val is + * filled in the buffer. + * + * This function returns the signedness and overflow condition. + * The overflow condition depends on INTEGER_PACK_2COMP. + * + * INTEGER_PACK_2COMP is not specified: + * -2 : negative overflow. val <= -2**(numwords*(wordsize*CHAR_BIT-nails)) + * -1 : negative without overflow. -2**(numwords*(wordsize*CHAR_BIT-nails)) < val < 0 + * 0 : zero. val == 0 + * 1 : positive without overflow. 0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails)) + * 2 : positive overflow. 2**(numwords*(wordsize*CHAR_BIT-nails)) <= val + * + * INTEGER_PACK_2COMP is specified: + * -2 : negative overflow. val < -2**(numwords*(wordsize*CHAR_BIT-nails)) + * -1 : negative without overflow. -2**(numwords*(wordsize*CHAR_BIT-nails)) <= val < 0 + * 0 : zero. val == 0 + * 1 : positive without overflow. 0 < val < 2**(numwords*(wordsize*CHAR_BIT-nails)) + * 2 : positive overflow. 2**(numwords*(wordsize*CHAR_BIT-nails)) <= val + * + * The value, -2**(numwords*(wordsize*CHAR_BIT-nails)), is representable + * in 2's complement representation but not representable in absolute value. + * So -1 is returned for the value if INTEGER_PACK_2COMP is specified + * but returns -2 if INTEGER_PACK_2COMP is not specified. + * + * The least significant words are filled in the buffer when overflow occur. + */ + +int +rb_integer_pack(VALUE val, void *words, size_t numwords, size_t wordsize, size_t nails, int flags) +{ + int sign; + BDIGIT *ds; + size_t num_bdigits; + BDIGIT fixbuf[bdigit_roomof(sizeof(long))]; + + RB_GC_GUARD(val) = rb_to_int(val); + + if (FIXNUM_P(val)) { + long v = FIX2LONG(val); + if (v < 0) { + sign = -1; + v = -v; + } + else { + sign = 1; + } +#if SIZEOF_BDIGITS >= SIZEOF_LONG + fixbuf[0] = v; +#else + { + int i; + for (i = 0; i < numberof(fixbuf); i++) { + fixbuf[i] = BIGLO(v); + v = BIGDN(v); + } + } +#endif + ds = fixbuf; + num_bdigits = numberof(fixbuf); + } + else { + sign = RBIGNUM_POSITIVE_P(val) ? 1 : -1; + ds = BDIGITS(val); + num_bdigits = RBIGNUM_LEN(val); + } + + return bary_pack(sign, ds, num_bdigits, words, numwords, wordsize, nails, flags); +} + /* * Import an integer into a buffer. * @@ -2510,11 +3033,6 @@ rb_str2inum(VALUE str, int base) return rb_str_to_inum(str, base, base==0); } -const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; - -static VALUE bigsqr(VALUE x); -static void bigdivmod(VALUE x, VALUE y, volatile VALUE *divp, volatile VALUE *modp); - static inline int ones(register unsigned long x) { @@ -3466,57 +3984,6 @@ bigsub_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn) bary_sub(zds, zn, xds, xn, yds, yn); } -static int -bary_subb(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int borrow) -{ - BDIGIT_DBL_SIGNED num; - size_t i; - - assert(yn <= xn); - assert(xn <= zn); - - num = borrow ? -1 : 0; - for (i = 0; i < yn; i++) { - num += (BDIGIT_DBL_SIGNED)xds[i] - yds[i]; - zds[i] = BIGLO(num); - num = BIGDN(num); - } - for (; i < xn; i++) { - if (num == 0) goto num_is_zero; - num += xds[i]; - zds[i] = BIGLO(num); - num = BIGDN(num); - } - if (num == 0) goto num_is_zero; - for (; i < zn; i++) { - zds[i] = BDIGMAX; - } - return 1; - - num_is_zero: - if (xds == zds && xn == zn) - return 0; - for (; i < xn; i++) { - zds[i] = xds[i]; - } - for (; i < zn; i++) { - zds[i] = 0; - } - return 0; -} - -static int -bary_sub(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn) -{ - return bary_subb(zds, zn, xds, xn, yds, yn, 0); -} - -static int -bary_sub_one(BDIGIT *zds, size_t zn) -{ - return bary_subb(zds, zn, zds, zn, NULL, 0, 1); -} - static VALUE bigsub(VALUE x, VALUE y) { @@ -3739,64 +4206,6 @@ bigadd_core(BDIGIT *xds, long xn, BDIGIT *yds, long yn, BDIGIT *zds, long zn) bary_add(zds, zn, xds, xn, yds, yn); } -static int -bary_addc(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn, int carry) -{ - BDIGIT_DBL num; - size_t i; - - assert(xn <= zn); - assert(yn <= zn); - - if (xn > yn) { - BDIGIT *tds; - tds = xds; xds = yds; yds = tds; - i = xn; xn = yn; yn = i; - } - - num = carry ? 1 : 0; - for (i = 0; i < xn; i++) { - num += (BDIGIT_DBL)xds[i] + yds[i]; - zds[i] = BIGLO(num); - num = BIGDN(num); - } - for (; i < yn; i++) { - if (num == 0) goto num_is_zero; - num += yds[i]; - zds[i] = BIGLO(num); - num = BIGDN(num); - } - for (; i < zn; i++) { - if (num == 0) goto num_is_zero; - zds[i] = BIGLO(num); - num = BIGDN(num); - } - return num != 0; - - num_is_zero: - if (yds == zds && yn == zn) - return 0; - for (; i < yn; i++) { - zds[i] = yds[i]; - } - for (; i < zn; i++) { - zds[i] = 0; - } - return 0; -} - -static int -bary_add(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn, BDIGIT *yds, size_t yn) -{ - return bary_addc(zds, zn, xds, xn, yds, yn, 0); -} - -static int -bary_add_one(BDIGIT *zds, size_t zn) -{ - return bary_addc(zds, zn, NULL, 0, zds, zn, 1); -} - static VALUE bigadd(VALUE x, VALUE y, int sign) { @@ -3907,368 +4316,6 @@ big_real_len(VALUE x) return i + 1; } -static void -bary_mul_single(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT y) -{ - BDIGIT_DBL n; - - assert(2 <= zl); - - n = (BDIGIT_DBL)x * y; - zds[0] = BIGLO(n); - zds[1] = (BDIGIT)BIGDN(n); -} - -static int -bary_muladd_1xN(BDIGIT *zds, size_t zl, BDIGIT x, BDIGIT *yds, size_t yl) -{ - BDIGIT_DBL n; - BDIGIT_DBL dd; - size_t j; - - assert(zl > yl); - - if (x == 0) - return 0; - dd = x; - n = 0; - for (j = 0; j < yl; j++) { - BDIGIT_DBL ee = n + dd * yds[j]; - if (ee) { - n = zds[j] + ee; - zds[j] = BIGLO(n); - n = BIGDN(n); - } - else { - n = 0; - } - - } - for (; j < zl; j++) { - if (n == 0) - break; - n += zds[j]; - zds[j] = BIGLO(n); - n = BIGDN(n); - } - return n != 0; -} - -static void -bary_mul_normal(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) -{ - size_t i; - - assert(xl + yl <= zl); - - MEMZERO(zds, BDIGIT, zl); - for (i = 0; i < xl; i++) { - bary_muladd_1xN(zds+i, zl-i, xds[i], yds, yl); - } -} - -/* balancing multiplication by slicing larger argument */ -static void -bary_mul_balance(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) -{ - VALUE work = 0; - size_t r, n; - BDIGIT *wds; - size_t wl; - - assert(xl + yl <= zl); - assert(2 * xl <= yl || 3 * xl <= 2*(yl+2)); - - wl = xl * 2; - wds = ALLOCV_N(BDIGIT, work, wl); - - MEMZERO(zds, BDIGIT, zl); - - n = 0; - while (yl > 0) { - r = xl > yl ? yl : xl; - bary_mul(wds, xl + r, xds, xl, yds + n, r); - bary_add(zds + n, zl - n, - zds + n, zl - n, - wds, xl + r); - yl -= r; - n += r; - } - - if (work) - ALLOCV_END(work); -} - -/* multiplication by karatsuba method */ -static void -bary_mul_karatsuba(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) -{ - VALUE work = 0; - BDIGIT *wds; - size_t wl; - - size_t n; - int sub_p, borrow, carry1, carry2, carry3; - - int odd_x = 0; - int odd_y = 0; - - BDIGIT *xds0, *xds1, *yds0, *yds1, *zds0, *zds1, *zds2, *zds3; - - assert(xl + yl <= zl); - assert(xl <= yl); - assert(yl < 2 * xl); - - if (yl & 1) { - odd_y = 1; - yl--; - if (yl < xl) { - odd_x = 1; - xl--; - } - } - - n = yl / 2; - - assert(n < xl); - - wl = n; - wds = ALLOCV_N(BDIGIT, work, wl); - - /* Karatsuba algorithm: - * - * x = x0 + r*x1 - * y = y0 + r*y1 - * z = x*y - * = (x0 + r*x1) * (y0 + r*y1) - * = x0*y0 + r*(x1*y0 + x0*y1) + r*r*x1*y1 - * = x0*y0 + r*(x0*y0 + x1*y1 - (x1-x0)*(y1-y0)) + r*r*x1*y1 - * = x0*y0 + r*(x0*y0 + x1*y1 - (x0-x1)*(y0-y1)) + r*r*x1*y1 - */ - - xds0 = xds; - xds1 = xds + n; - yds0 = yds; - yds1 = yds + n; - zds0 = zds; - zds1 = zds + n; - zds2 = zds + 2*n; - zds3 = zds + 3*n; - - sub_p = 1; - - /* zds0:? zds1:? zds2:? zds3:? wds:? */ - - if (bary_sub(zds0, n, xds, n, xds+n, xl-n)) { - bary_2comp(zds0, n); - sub_p = !sub_p; - } - - /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:? */ - - if (bary_sub(wds, n, yds, n, yds+n, n)) { - bary_2comp(wds, n); - sub_p = !sub_p; - } - - /* zds0:|x1-x0| zds1:? zds2:? zds3:? wds:|y1-y0| */ - - bary_mul(zds1, 2*n, zds0, n, wds, n); - - /* zds0:|x1-x0| zds1,zds2:|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */ - - borrow = 0; - if (sub_p) { - borrow = !bary_2comp(zds1, 2*n); - } - /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:|y1-y0| */ - - MEMCPY(wds, zds1, BDIGIT, n); - - /* zds0:|x1-x0| zds1,zds2:-?|x1-x0|*|y1-y0| zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */ - - bary_mul(zds0, 2*n, xds0, n, yds0, n); - - /* zds0,zds1:x0*y0 zds2:hi(-?|x1-x0|*|y1-y0|) zds3:? wds:lo(-?|x1-x0|*|y1-y0|) */ - - carry1 = bary_add(wds, n, wds, n, zds0, n); - carry1 = bary_addc(zds2, n, zds2, n, zds1, n, carry1); - - /* zds0,zds1:x0*y0 zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */ - - carry2 = bary_add(zds1, n, zds1, n, wds, n); - - /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:hi(x0*y0-?|x1-x0|*|y1-y0|) zds3:? wds:lo(x0*y0-?|x1-x0|*|y1-y0|) */ - - MEMCPY(wds, zds2, BDIGIT, n); - - /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2:_ zds3:? wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */ - - bary_mul(zds2, zl-2*n, xds1, xl-n, yds1, n); - - /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */ - - carry3 = bary_add(zds1, n, zds1, n, zds2, n); - - /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1 wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */ - - carry3 = bary_addc(zds2, n, zds2, n, zds3, (4*n < zl ? n : zl-3*n), carry3); - - /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1) wds:hi(x0*y0-?|x1-x0|*|y1-y0|) */ - - bary_add(zds2, zl-2*n, zds2, zl-2*n, wds, n); - - /* zds0:lo(x0*y0) zds1:hi(x0*y0)+lo(x0*y0-?|x1-x0|*|y1-y0|)+lo(x1*y1) zds2,zds3:x1*y1+hi(x1*y1)+hi(x0*y0-?|x1-x0|*|y1-y0|) wds:_ */ - - if (carry2) - bary_add_one(zds2, zl-2*n); - - if (borrow && carry1) - borrow = carry1 = 0; - if (borrow && carry3) - borrow = carry3 = 0; - - if (borrow) - bary_sub_one(zds3, zl-3*n); - else if (carry1 || carry3) { - BDIGIT c = carry1 + carry3; - bary_add(zds3, zl-3*n, zds3, zl-3*n, &c, 1); - } - - /* - if (SIZEOF_BDIGITS * zl <= 16) { - uint128_t z, x, y; - ssize_t i; - for (x = 0, i = xl-1; 0 <= i; i--) { x <<= SIZEOF_BDIGITS*CHAR_BIT; x |= xds[i]; } - for (y = 0, i = yl-1; 0 <= i; i--) { y <<= SIZEOF_BDIGITS*CHAR_BIT; y |= yds[i]; } - for (z = 0, i = zl-1; 0 <= i; i--) { z <<= SIZEOF_BDIGITS*CHAR_BIT; z |= zds[i]; } - assert(z == x * y); - } - */ - - if (odd_x && odd_y) { - bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl); - bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl+1); - } - else if (odd_x) { - bary_muladd_1xN(zds+xl, zl-xl, xds[xl], yds, yl); - } - else if (odd_y) { - bary_muladd_1xN(zds+yl, zl-yl, yds[yl], xds, xl); - } - - if (work) - ALLOCV_END(work); -} - -static void -bary_mul1(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) -{ - size_t l; - - assert(xl + yl <= zl); - - if (xl == 1 && yl == 1) { - l = 2; - bary_mul_single(zds, zl, xds[0], yds[0]); - } - else { - l = xl + yl; - bary_mul_normal(zds, zl, xds, xl, yds, yl); - rb_thread_check_ints(); - } - MEMZERO(zds + l, BDIGIT, zl - l); -} - -static void -bary_mul(BDIGIT *zds, size_t zl, BDIGIT *xds, size_t xl, BDIGIT *yds, size_t yl) -{ - size_t nlsz; /* number of least significant zero BDIGITs */ - - assert(xl + yl <= zl); - - while (0 < xl && xds[xl-1] == 0) - xl--; - while (0 < yl && yds[yl-1] == 0) - yl--; - - nlsz = 0; - while (0 < xl && xds[0] == 0) { - xds++; - xl--; - nlsz++; - } - while (0 < yl && yds[0] == 0) { - yds++; - yl--; - nlsz++; - } - if (nlsz) { - MEMZERO(zds, BDIGIT, nlsz); - zds += nlsz; - zl -= nlsz; - } - - /* make sure that y is longer than x */ - if (xl > yl) { - BDIGIT *tds; - size_t tl; - tds = xds; xds = yds; yds = tds; - tl = xl; xl = yl; yl = tl; - } - assert(xl <= yl); - - if (xl == 0) { - MEMZERO(zds, BDIGIT, zl); - return; - } - - /* normal multiplication when x is small */ - if (xl < KARATSUBA_MUL_DIGITS) { - normal: - if (xds == yds && xl == yl) - bary_sq_fast(zds, zl, xds, xl); - else - bary_mul1(zds, zl, xds, xl, yds, yl); - return; - } - - /* normal multiplication when x or y is a sparse bignum */ - if (bary_sparse_p(xds, xl)) goto normal; - if (bary_sparse_p(yds, yl)) { - bary_mul1(zds, zl, yds, yl, xds, xl); - return; - } - - /* balance multiplication by slicing y when x is much smaller than y */ - if (2 * xl <= yl) { - bary_mul_balance(zds, zl, xds, xl, yds, yl); - return; - } - - if (xl < TOOM3_MUL_DIGITS) { - /* multiplication by karatsuba method */ - bary_mul_karatsuba(zds, zl, xds, xl, yds, yl); - return; - } - - if (3*xl <= 2*(yl + 2)) { - bary_mul_balance(zds, zl, xds, xl, yds, yl); - return; - } - - { - VALUE x, y, z; - x = bignew(xl, 1); - MEMCPY(BDIGITS(x), xds, BDIGIT, xl); - y = bignew(yl, 1); - MEMCPY(BDIGITS(y), yds, BDIGIT, yl); - z = bigtrunc(bigmul1_toom3(x, y)); - MEMCPY(zds, BDIGITS(z), BDIGIT, RBIGNUM_LEN(z)); - MEMZERO(zds + RBIGNUM_LEN(z), BDIGIT, zl - RBIGNUM_LEN(z)); - } -} - /* split a bignum into high and low bignums */ static void @@ -4536,56 +4583,6 @@ bigmul1_toom3(VALUE x, VALUE y) return bignorm(z); } -/* efficient squaring (2 times faster than normal multiplication) - * ref: Handbook of Applied Cryptography, Algorithm 14.16 - * http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf - */ -static void -bary_sq_fast(BDIGIT *zds, size_t zn, BDIGIT *xds, size_t xn) -{ - size_t i, j; - BDIGIT_DBL c, v, w; - - assert(xn * 2 <= zn); - - MEMZERO(zds, BDIGIT, zn); - for (i = 0; i < xn; i++) { - v = (BDIGIT_DBL)xds[i]; - if (!v) continue; - c = (BDIGIT_DBL)zds[i + i] + v * v; - zds[i + i] = BIGLO(c); - c = BIGDN(c); - v *= 2; - for (j = i + 1; j < xn; j++) { - w = (BDIGIT_DBL)xds[j]; - c += (BDIGIT_DBL)zds[i + j] + BIGLO(v) * w; - zds[i + j] = BIGLO(c); - c = BIGDN(c); - if (BIGDN(v)) c += w; - } - if (c) { - c += (BDIGIT_DBL)zds[i + xn]; - zds[i + xn] = BIGLO(c); - c = BIGDN(c); - assert(c == 0 || i != xn-1); - if (c && i != xn-1) zds[i + xn + 1] += (BDIGIT)c; - } - } -} - -/* determine whether a bignum is sparse or not by random sampling */ -static inline int -bary_sparse_p(BDIGIT *ds, size_t n) -{ - long c = 0; - - if ( ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++; - if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++; - if (c <= 1 && ds[rb_genrand_ulong_limited(n / 2) + n / 4]) c++; - - return (c <= 1) ? 1 : 0; -} - static VALUE bigmul0(VALUE x, VALUE y) { |