diff options
author | Geert Bosch <geert@mongodb.com> | 2016-06-01 19:57:57 -0400 |
---|---|---|
committer | Geert Bosch <geert@mongodb.com> | 2016-06-06 13:22:34 -0400 |
commit | 666ce720ebf90229e8e4f92f8719284bedb4f20f (patch) | |
tree | bf6e3c06de35ee97803d6080f07bc7ce9e3ce156 | |
parent | 3ffb9e1ca369e3a5dee5ad4f420d9d4e9866e7af (diff) | |
download | mongo-666ce720ebf90229e8e4f92f8719284bedb4f20f.tar.gz |
SERVER-19735: Add accurate summation class using pairs of doubles
This allows summing long series of 64-bits integers without loss of
precision, and avoids catastrophic cancellation in summing series of
doubles or mixed types.
-rw-r--r-- | src/mongo/util/SConscript | 23 | ||||
-rw-r--r-- | src/mongo/util/summation.cpp | 89 | ||||
-rw-r--r-- | src/mongo/util/summation.h | 154 | ||||
-rw-r--r-- | src/mongo/util/summation_test.cpp | 201 |
4 files changed, 467 insertions, 0 deletions
diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript index 6f7ce51979f..e38ba85ef7d 100644 --- a/src/mongo/util/SConscript +++ b/src/mongo/util/SConscript @@ -69,6 +69,29 @@ env.CppUnitTest( ) env.Library( + target='summation', + source=[ + 'summation.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/util/foundation', + ], +) + +env.CppUnitTest( + target='summation_test', + source=[ + 'summation_test.cpp', + ], + LIBDEPS=[ + 'summation', + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/third_party/shim_boost', + ], +) + +env.Library( target='progress_meter', source=[ 'progress_meter.cpp', diff --git a/src/mongo/util/summation.cpp b/src/mongo/util/summation.cpp new file mode 100644 index 00000000000..d960b73f95e --- /dev/null +++ b/src/mongo/util/summation.cpp @@ -0,0 +1,89 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "summation.h" + +#include <cmath> + +#include "mongo/util/assert_util.h" + +namespace mongo { +void DoubleDoubleSummation::addLong(long long x) { + // Split 64-bit integers into two doubles, so the sum remains exact. + int64_t high = x / (1ll << 32) * (1ll << 32); + int64_t low = x - high; + dassert(high + low == x && 1.0 * high == high && 1.0 * low == low); + addDouble(low); + addDouble(high); +} + +/** + * Returns whether the sum is in range of the 64-bit signed integer long long type. + */ +bool DoubleDoubleSummation::fitsLong() const { + using limits = std::numeric_limits<long long>; + // Fast path: if the rounded _sum is strictly between the minimum and maximum long long value, + // it must be valid. This is the common case. Note that this is correct for NaNs/infinities. + if (_sum > limits::min() && _sum < limits::max()) + return true; + + // Now check the cases where the _sum equals one of the boundaries, and check the compensation + // amount to determine to what integer the value would round. + + // If _sum is equal to limits::max() + 1, _addend must cause us to round down to a lower integer + // and thus be strictly less than -0.5. limits.max() rounds up to limits.max() + 1, as double + // precision does not have enough precision. + if (_sum == limits::max()) + return _addend < -0.5; + + // If _sum is equal to limits::min(), _addend must not cause us to round down and thus be + // greater than or equal to -0.5. + if (_sum == limits::min()) + return _addend >= -0.5; + + // The sum is out of range, an infinity or a NaN. + return false; +} + +/** + * Returns result of sum rounded to nearest integer, rounding half-way cases away from zero. + */ +long long DoubleDoubleSummation::getLong() const { + uassert(ErrorCodes::Overflow, "sum out of range of a 64-bit signed integer", fitsLong()); + if (_sum == std::numeric_limits<long long>::max()) { + // Can't directly convert, because _sum would overflow a signed 64-bit number. + dassert(_addend < -0.5 && -_sum == std::numeric_limits<long long>::min()); + return llround(_addend) - std::numeric_limits<long long>::min(); + } + long long sum = llround(_sum); + sum += llround((_sum - sum) + _addend); + return sum; +} +} // namespace mongo diff --git a/src/mongo/util/summation.h b/src/mongo/util/summation.h new file mode 100644 index 00000000000..e340d13d708 --- /dev/null +++ b/src/mongo/util/summation.h @@ -0,0 +1,154 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +#include <cmath> +#include <tuple> +#include <utility> + +#include "mongo/platform/decimal128.h" +#include "mongo/util/assert_util.h" + +namespace mongo { + +using DoubleDouble = std::pair<double, double>; + +/** + * Class to accurately sum series of numbers using a 2Sum and Fast2Sum formulas to maintain an + * unevaluated sum of two numbers: a rounded-to-nearest _sum and an _addend. + * See Sylvie Boldo, Stef Graillat, Jean-Michel Muller. On the robustness of the 2Sum and Fast2Sum + * algorithms. 2016. https://hal-ens-lyon.archives-ouvertes.fr/ensl-01310023 + */ +class DoubleDoubleSummation { +public: + /** + * Adds x to the sum, keeping track of a compensation amount to be subtracted later. + */ + void addDouble(double x) { + _special += x; // Keep a simple sum to use in case of NaN + std::tie(x, _addend) = _fast2Sum(x, _addend); // Compensated add: _addend tinier than _sum + std::tie(_sum, x) = _2Sum(_sum, x); // Compensated add: x maybe larger than _sum + _addend += x; // Store away lowest part of sum + } + + /** + * Adds x to internal sum. Extra precision guarantees that sum is exact, unless intermediate + * sums exceed a magnitude of 2**106. + */ + void addLong(long long x); + + /** + * Adds x to internal sum. Adds as double as that is more efficient. + */ + void addInt(int x) { + addDouble(x); + } + + /** + * Returns the double nearest to the accumulated sum. + */ + double getDouble() const { + return std::isnan(_sum) ? _special : _sum; + } + + /** + * Return a pair of double representing the sum, with first being the nearest double and second + * the amount to add for full precision. + */ + DoubleDouble getDoubleDouble() const { + return std::isnan(_sum) ? DoubleDouble{_special, 0.0} : DoubleDouble{_sum, _addend}; + } + + /** + * The result will generally have about 107 bits of precision, or about 32 decimal digits. + * Summations of even extremely long series of 32-bit and 64-bit integers should be exact. + */ + Decimal128 getDecimal() const { + return std::isnan(_sum) ? Decimal128(_special, Decimal128::kRoundTo34Digits) + : Decimal128(_sum, Decimal128::kRoundTo34Digits) + .add(Decimal128(_addend, Decimal128::kRoundTo34Digits)); + } + + /** + * Returns whether the sum is in range of the 64-bit signed integer long long type. + */ + bool fitsLong() const; + + /** + * Returns whether the accumulated sum has a fractional part. + */ + bool isInteger() const { + return std::trunc(_sum) == _sum && std::trunc(_addend) == _addend; + } + + /** + * Returns result of sum rounded to nearest integer, rounding half-way cases away from zero. + */ + long long getLong() const; + +private: + /** + * Assuming |b| <= |a|, returns exact unevaluated sum of a and b, where the first member is the + * double nearest the sum (ties to even) and the second member is the remainder. + * + * T. J. Dekker. A floating-point technique for extending the available precision. Numerische + * Mathematik, 18(3):224–242, 1971. + */ + DoubleDouble _fast2Sum(double a, double b) { + double s = a + b; + double z = s - a; + double t = b - z; + return {s, t}; + } + + /** + * returns exact unevaluated sum of a and b, where the first member is the double nearest the + * sum (ties to even) and the second member is the remainder. + * + * O. Møller. Quasi double-precision in floating-point addition. BIT, 5:37–50, 1965. + * D. Knuth. The Art of Computer Programming, vol 2. Addison-Wesley, Reading, MA, 3rd ed, 1998. + */ + DoubleDouble _2Sum(double a, double b) { + double s = a + b; + double aPrime = s - b; + double bPrime = s - aPrime; + double deltaA = a - aPrime; + double deltaB = b - bPrime; + double t = deltaA + deltaB; + return {s, t}; + } + + double _sum = 0.0; + double _addend = 0.0; + + // Simple sum to be returned if _sum is NaN. This addresses infinities turning into NaNs when + // using compensated addition. + double _special = 0.0; +}; +} // namespace mongo diff --git a/src/mongo/util/summation_test.cpp b/src/mongo/util/summation_test.cpp new file mode 100644 index 00000000000..6e86690e171 --- /dev/null +++ b/src/mongo/util/summation_test.cpp @@ -0,0 +1,201 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include <cmath> +#include <limits> +#include <vector> + +#include "mongo/unittest/unittest.h" + +#include "mongo/util/summation.h" + +namespace mongo { + +namespace { +using limits = std::numeric_limits<long long>; +std::vector<long long> longValues = { + limits::min(), + limits::min() + 1, + limits::min() / 2, + -(1LL << 53), + -(1LL << 52), + -(1LL << 32), + -0x100, + -0xff, + -0xaa, + -0x55, + -1, + 0, + 1, + 2, + 0x55, + 0x80, + 0xaa, + 0x100, + 512, + 1024, + 2048, + 1LL << 31, + 1LL << 32, + 1LL << 52, + 1LL << 53, + limits::max() / 2, + static_cast<long long>(1ULL << 63) - (1ULL << (63 - 53 - 1)), // Halfway between two doubles + limits::max() - 1, + limits::max()}; + +std::vector<double> doubleValues = { + 1.4831356930199802e-05, -3.121724665346865, 3041897608700.073, 1001318343149.7166, + -1714.6229586696593, 1731390114894580.8, 6.256645803154374e-08, -107144114533844.25, + -0.08839485091750919, -265119153.02185738, -0.02450615965231944, 0.0002684331017079073, + 32079040427.68358, -0.04733295911845742, 0.061381859083076085, -25329.59126796951, + -0.0009567520620034965, -1553879364344.9932, -2.1101077525869814e-08, -298421079729.5547, + 0.03182394834273594, 22.201944843278916, -33.35667991109125, 11496013.960449915, + -40652595.33210472, 3.8496066090328163, 2.5074042398147304e-08, -0.02208724071782122, + -134211.37290639878, 0.17640433666616578, 4.463787499171126, 9.959669945399718, + 129265976.35224283, 1.5865526187526546e-07, -4746011.710555799, -712048598925.0789, + 582214206210.4034, 0.025236204812875362, 530078170.91147506, -14.865307666195053, + 1.6727994895185032e-05, -113386276.03121366, -6.135827207137054, 10644945799901.145, + -100848907797.1582, 2.2404406961625282e-08, 1.315662618424494e-09, -0.832190208349044, + -9.779323414999364, -546522170658.2997}; + +const double doubleValuesSum = 1636336982012512.5; // simple summation will yield wrong result + +std::vector<double> specialValues = {-std::numeric_limits<double>::infinity(), + std::numeric_limits<double>::infinity(), + std::numeric_limits<double>::quiet_NaN()}; + +} // namespace + +TEST(Summation, AddLongs) { + int iter = 0; + for (auto x : longValues) { + for (auto y : longValues) { + for (auto z : longValues) { + iter++; + DoubleDoubleSummation sum; + + // This checks for correct results mod 2**64, which helps with checking correctness + // around the 2**53 transition between both doubles of the DoubleDouble result in + // int64 additions, as well off-by-one errors. + uint64_t checkUint64 = + static_cast<uint64_t>(x) + static_cast<uint64_t>(y) + static_cast<uint64_t>(z); + + sum.addLong(x); + sum.addLong(y); + sum.addLong(z); + ASSERT(sum.isInteger()); + + if (!sum.fitsLong()) { + ASSERT(std::abs(sum.getDouble()) >= limits::max()); + // Reduce sum to fit in a 64-bit integer. + while (!sum.fitsLong()) { + sum.addDouble(sum.getDouble() < 0 ? std::ldexp(1, 64) : -std::ldexp(1, 64)); + } + } + ASSERT_EQUALS(static_cast<uint64_t>(sum.getLong()), checkUint64); + } + } + } +} + +TEST(Summation, AddSpecial) { + for (auto x : specialValues) { + DoubleDoubleSummation sum; + + // Check that a special number will result in that special number. + sum.addLong(-42); + sum.addLong(100); + sum.addDouble(x); + ASSERT(!sum.fitsLong()); + ASSERT(!sum.isInteger()); + if (std::isnan(x)) { + ASSERT(std::isnan(sum.getDouble())); + } else { + ASSERT_EQUALS(sum.getDouble(), x); + } + + // Check that adding more numbers doesn't reset the special value. + sum.addDouble(-1E22); + sum.addLong(limits::min()); + ASSERT(!sum.fitsLong()); + if (std::isnan(x)) { + ASSERT(std::isnan(sum.getDouble())); + } else { + ASSERT_EQUALS(sum.getDouble(), x); + } + } +} + +TEST(Summation, AddInvalid) { + DoubleDoubleSummation sum; + sum.addDouble(std::numeric_limits<double>::infinity()); + sum.addDouble(-std::numeric_limits<double>::infinity()); + + ASSERT(std::isnan(sum.getDouble())); + ASSERT(!sum.fitsLong()); + ASSERT(!sum.isInteger()); +} + +TEST(Summation, LongOverflow) { + DoubleDoubleSummation positive; + + // Overflow should result in number no longer fitting in a long. + positive.addLong(limits::max()); + positive.addLong(limits::max()); + ASSERT(!positive.fitsLong()); + + // However, actual stored overflow should not overflow or lose precision. + positive.addLong(-limits::max()); + ASSERT_EQUALS(positive.getLong(), limits::max()); + + DoubleDoubleSummation negative; + + // Similarly for negative numbers. + negative.addLong(limits::min()); + negative.addLong(-1); + ASSERT(!negative.fitsLong()); + negative.addDouble(-1.0 * limits::min()); + ASSERT_EQUALS(negative.getLong(), -1); +} + + +TEST(Summation, AddDoubles) { + DoubleDoubleSummation sum; + double straightSum = 0.0; + + for (auto x : doubleValues) { + sum.addDouble(x); + straightSum += x; + } + ASSERT_EQUALS(sum.getDouble(), doubleValuesSum); + ASSERT(straightSum != sum.getDouble()); +} +} // namespace mongo |