summaryrefslogtreecommitdiff
path: root/libc/src/__support/UInt.h
diff options
context:
space:
mode:
Diffstat (limited to 'libc/src/__support/UInt.h')
-rw-r--r--libc/src/__support/UInt.h120
1 files changed, 120 insertions, 0 deletions
diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
index 8b273e368964..a702aaad827b 100644
--- a/libc/src/__support/UInt.h
+++ b/libc/src/__support/UInt.h
@@ -342,6 +342,126 @@ template <size_t Bits> struct UInt {
return remainder;
}
+ // Efficiently perform UInt / (x * 2^e), where x is a 32-bit unsigned integer,
+ // and return the remainder.
+ // The main idea is as follow:
+ // Let q = y / (x * 2^e) be the quotient, and
+ // r = y % (x * 2^e) be the remainder.
+ // First, notice that:
+ // r % (2^e) = y % (2^e),
+ // so we just need to focus on all the bits of y that is >= 2^e.
+ // To speed up the shift-and-add steps, we only use x as the divisor, and
+ // performing 32-bit shiftings instead of bit-by-bit shiftings.
+ // Since the remainder of each division step < x < 2^32, the computation of
+ // each step is now properly contained within uint64_t.
+ // And finally we perform some extra alignment steps for the remaining bits.
+ constexpr optional<UInt<Bits>> div_uint32_times_pow_2(uint32_t x, size_t e) {
+ UInt<Bits> remainder(0);
+
+ if (x == 0) {
+ return nullopt;
+ }
+ if (e >= Bits) {
+ remainder = *this;
+ *this = UInt<Bits>(0);
+ return remainder;
+ }
+
+ UInt<Bits> quotient(0);
+ uint64_t x64 = static_cast<uint64_t>(x);
+ // lower64 = smallest multiple of 64 that is >= e.
+ size_t lower64 = ((e >> 6) + ((e & 63) != 0)) << 6;
+ // lower_pos is the index of the closest 64-bit chunk >= 2^e.
+ size_t lower_pos = lower64 / 64;
+ // Keep track of current remainder mod x * 2^(32*i)
+ uint64_t rem = 0;
+ // pos is the index of the current 64-bit chunk that we are processing.
+ size_t pos = WORDCOUNT;
+
+ for (size_t q_pos = WORDCOUNT - lower_pos; q_pos > 0; --q_pos) {
+ // q_pos is 1 + the index of the current 64-bit chunk of the quotient
+ // being processed.
+ // Performing the division / modulus with divisor:
+ // x * 2^(64*q_pos - 32),
+ // i.e. using the upper 32-bit of the current 64-bit chunk.
+ rem <<= 32;
+ rem += val[--pos] >> 32;
+ uint64_t q_tmp = rem / x64;
+ rem %= x64;
+
+ // Performing the division / modulus with divisor:
+ // x * 2^(64*(q_pos - 1)),
+ // i.e. using the lower 32-bit of the current 64-bit chunk.
+ rem <<= 32;
+ rem += val[pos] & MASK32;
+ quotient.val[q_pos - 1] = (q_tmp << 32) + rem / x64;
+ rem %= x64;
+ }
+
+ // So far, what we have is:
+ // quotient = y / (x * 2^lower64), and
+ // rem = (y % (x * 2^lower64)) / 2^lower64.
+ // If (lower64 > e), we will need to perform an extra adjustment of the
+ // quotient and remainder, namely:
+ // y / (x * 2^e) = [ y / (x * 2^lower64) ] * 2^(lower64 - e) +
+ // + (rem * 2^(lower64 - e)) / x
+ // (y % (x * 2^e)) / 2^e = (rem * 2^(lower64 - e)) % x
+ size_t last_shift = lower64 - e;
+
+ if (last_shift > 0) {
+ // quotient * 2^(lower64 - e)
+ quotient <<= last_shift;
+ uint64_t q_tmp = 0;
+ uint64_t d = val[--pos];
+ if (last_shift >= 32) {
+ // The shifting (rem * 2^(lower64 - e)) might overflow uint64_t, so we
+ // perform a 32-bit shift first.
+ rem <<= 32;
+ rem += d >> 32;
+ d &= MASK32;
+ q_tmp = rem / x64;
+ rem %= x64;
+ last_shift -= 32;
+ } else {
+ // Only use the upper 32-bit of the current 64-bit chunk.
+ d >>= 32;
+ }
+
+ if (last_shift > 0) {
+ rem <<= 32;
+ rem += d;
+ q_tmp <<= last_shift;
+ x64 <<= 32 - last_shift;
+ q_tmp += rem / x64;
+ rem %= x64;
+ }
+
+ quotient.val[0] += q_tmp;
+
+ if (lower64 - e <= 32) {
+ // The remainder rem * 2^(lower64 - e) might overflow to the higher
+ // 64-bit chunk.
+ if (pos < WORDCOUNT - 1) {
+ remainder[pos + 1] = rem >> 32;
+ }
+ remainder[pos] = (rem << 32) + (val[pos] & MASK32);
+ } else {
+ remainder[pos] = rem;
+ }
+
+ } else {
+ remainder[pos] = rem;
+ }
+
+ // Set the remaining lower bits of the remainder.
+ for (; pos > 0; --pos) {
+ remainder[pos - 1] = val[pos - 1];
+ }
+
+ *this = quotient;
+ return remainder;
+ }
+
constexpr UInt<Bits> operator/(const UInt<Bits> &other) const {
UInt<Bits> result(*this);
result.div(other);