SERVER-19735: Add accurate summation class using pairs of doubles

This allows summing long series of 64-bits integers without loss of precision, and avoids catastrophic cancellation in summing series of doubles or mixed types.
author: Geert Bosch <geert@mongodb.com> 2016-06-01 19:57:57 -0400
committer: Geert Bosch <geert@mongodb.com> 2016-06-06 13:22:34 -0400
commit: 666ce720ebf90229e8e4f92f8719284bedb4f20f (patch)
tree: bf6e3c06de35ee97803d6080f07bc7ce9e3ce156
parent: 3ffb9e1ca369e3a5dee5ad4f420d9d4e9866e7af (diff)
download: mongo-666ce720ebf90229e8e4f92f8719284bedb4f20f.tar.gz
4 files changed, 467 insertions, 0 deletions
diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript
index 6f7ce51979f..e38ba85ef7d 100644
--- a/src/mongo/util/SConscript
+++ b/src/mongo/util/SConscript
@@ -69,6 +69,29 @@ env.CppUnitTest(
 )
 
 env.Library(
+    target='summation',
+    source=[
+        'summation.cpp',
+    ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/mongo/util/foundation',
+    ],
+)
+
+env.CppUnitTest(
+    target='summation_test',
+    source=[
+        'summation_test.cpp',
+    ],
+    LIBDEPS=[
+        'summation',
+        '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/third_party/shim_boost',
+    ],
+)
+
+env.Library(
     target='progress_meter',
     source=[
         'progress_meter.cpp',
diff --git a/src/mongo/util/summation.cpp b/src/mongo/util/summation.cpp
new file mode 100644
index 00000000000..d960b73f95e
--- /dev/null
+++ b/src/mongo/util/summation.cpp
@@ -0,0 +1,89 @@
+/**
+ *    Copyright (C) 2016 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects
+ *    for all of the code used other than as permitted herein. If you modify
+ *    file(s) with this exception, you may extend this exception to your
+ *    version of the file(s), but you are not obligated to do so. If you do not
+ *    wish to do so, delete this exception statement from your version. If you
+ *    delete this exception statement from all source files in the program,
+ *    then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "summation.h"
+
+#include <cmath>
+
+#include "mongo/util/assert_util.h"
+
+namespace mongo {
+void DoubleDoubleSummation::addLong(long long x) {
+    // Split 64-bit integers into two doubles, so the sum remains exact.
+    int64_t high = x / (1ll << 32) * (1ll << 32);
+    int64_t low = x - high;
+    dassert(high + low == x && 1.0 * high == high && 1.0 * low == low);
+    addDouble(low);
+    addDouble(high);
+}
+
+/**
+ * Returns whether the sum is in range of the 64-bit signed integer long long type.
+ */
+bool DoubleDoubleSummation::fitsLong() const {
+    using limits = std::numeric_limits<long long>;
+    // Fast path: if the rounded _sum is strictly between the minimum and maximum long long value,
+    // it must be valid. This is the common case. Note that this is correct for NaNs/infinities.
+    if (_sum > limits::min() && _sum < limits::max())
+        return true;
+
+    // Now check the cases where the _sum equals one of the boundaries, and check the compensation
+    // amount to determine to what integer the value would round.
+
+    // If _sum is equal to limits::max() + 1, _addend must cause us to round down to a lower integer
+    // and thus be strictly less than -0.5. limits.max() rounds up to limits.max() + 1, as double
+    // precision does not have enough precision.
+    if (_sum == limits::max())
+        return _addend < -0.5;
+
+    // If _sum is equal to limits::min(), _addend must not cause us to round down and thus be
+    // greater than or equal to -0.5.
+    if (_sum == limits::min())
+        return _addend >= -0.5;
+
+    // The sum is out of range, an infinity or a NaN.
+    return false;
+}
+
+/**
+ * Returns result of sum rounded to nearest integer, rounding half-way cases away from zero.
+ */
+long long DoubleDoubleSummation::getLong() const {
+    uassert(ErrorCodes::Overflow, "sum out of range of a 64-bit signed integer", fitsLong());
+    if (_sum == std::numeric_limits<long long>::max()) {
+        // Can't directly convert, because _sum would overflow a signed 64-bit number.
+        dassert(_addend < -0.5 && -_sum == std::numeric_limits<long long>::min());
+        return llround(_addend) - std::numeric_limits<long long>::min();
+    }
+    long long sum = llround(_sum);
+    sum += llround((_sum - sum) + _addend);
+    return sum;
+}
+}  // namespace mongo
diff --git a/src/mongo/util/summation.h b/src/mongo/util/summation.h
new file mode 100644
index 00000000000..e340d13d708
--- /dev/null
+++ b/src/mongo/util/summation.h
@@ -0,0 +1,154 @@
+/**
+ *    Copyright (C) 2016 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects
+ *    for all of the code used other than as permitted herein. If you modify
+ *    file(s) with this exception, you may extend this exception to your
+ *    version of the file(s), but you are not obligated to do so. If you do not
+ *    wish to do so, delete this exception statement from your version. If you
+ *    delete this exception statement from all source files in the program,
+ *    then also delete it in the license file.
+ */
+
+#pragma once
+
+#include <cmath>
+#include <tuple>
+#include <utility>
+
+#include "mongo/platform/decimal128.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo {
+
+using DoubleDouble = std::pair<double, double>;
+
+/**
+ * Class to accurately sum series of numbers using a 2Sum and Fast2Sum formulas to maintain an
+ * unevaluated sum of two numbers: a rounded-to-nearest _sum and an _addend.
+ * See Sylvie Boldo, Stef Graillat, Jean-Michel Muller. On the robustness of the 2Sum and Fast2Sum
+ * algorithms. 2016. https://hal-ens-lyon.archives-ouvertes.fr/ensl-01310023
+ */
+class DoubleDoubleSummation {
+public:
+    /**
+     * Adds x to the sum, keeping track of a compensation amount to be subtracted later.
+     */
+    void addDouble(double x) {
+        _special += x;                                 // Keep a simple sum to use in case of NaN
+        std::tie(x, _addend) = _fast2Sum(x, _addend);  // Compensated add: _addend tinier than _sum
+        std::tie(_sum, x) = _2Sum(_sum, x);            // Compensated add: x maybe larger than _sum
+        _addend += x;                                  // Store away lowest part of sum
+    }
+
+    /**
+     * Adds x to internal sum. Extra precision guarantees that sum is exact, unless intermediate
+     * sums exceed a magnitude of 2**106.
+     */
+    void addLong(long long x);
+
+    /**
+     * Adds x to internal sum. Adds as double as that is more efficient.
+     */
+    void addInt(int x) {
+        addDouble(x);
+    }
+
+    /**
+     * Returns the double nearest to the accumulated sum.
+     */
+    double getDouble() const {
+        return std::isnan(_sum) ? _special : _sum;
+    }
+
+    /**
+     * Return a pair of double representing the sum, with first being the nearest double and second
+     * the amount to add for full precision.
+     */
+    DoubleDouble getDoubleDouble() const {
+        return std::isnan(_sum) ? DoubleDouble{_special, 0.0} : DoubleDouble{_sum, _addend};
+    }
+
+    /**
+     * The result will generally have about 107 bits of precision, or about 32 decimal digits.
+     * Summations of even extremely long series of 32-bit and 64-bit integers should be exact.
+     */
+    Decimal128 getDecimal() const {
+        return std::isnan(_sum) ? Decimal128(_special, Decimal128::kRoundTo34Digits)
+                                : Decimal128(_sum, Decimal128::kRoundTo34Digits)
+                                      .add(Decimal128(_addend, Decimal128::kRoundTo34Digits));
+    }
+
+    /**
+     * Returns whether the sum is in range of the 64-bit signed integer long long type.
+     */
+    bool fitsLong() const;
+
+    /**
+     * Returns whether the accumulated sum has a fractional part.
+     */
+    bool isInteger() const {
+        return std::trunc(_sum) == _sum && std::trunc(_addend) == _addend;
+    }
+
+    /**
+     * Returns result of sum rounded to nearest integer, rounding half-way cases away from zero.
+     */
+    long long getLong() const;
+
+private:
+    /**
+     * Assuming |b| <= |a|, returns exact unevaluated sum of a and b, where the first member is the
+     * double nearest the sum (ties to even) and the second member is the remainder.
+     *
+     * T. J. Dekker. A floating-point technique for extending the available precision. Numerische
+     * Mathematik, 18(3):224–242, 1971.
+     */
+    DoubleDouble _fast2Sum(double a, double b) {
+        double s = a + b;
+        double z = s - a;
+        double t = b - z;
+        return {s, t};
+    }
+
+    /**
+     * returns exact unevaluated sum of a and b, where the first member is the double nearest the
+     * sum (ties to even) and the second member is the remainder.
+     *
+     * O. Møller. Quasi double-precision in floating-point addition. BIT, 5:37–50, 1965.
+     * D. Knuth. The Art of Computer Programming, vol 2. Addison-Wesley, Reading, MA, 3rd ed, 1998.
+     */
+    DoubleDouble _2Sum(double a, double b) {
+        double s = a + b;
+        double aPrime = s - b;
+        double bPrime = s - aPrime;
+        double deltaA = a - aPrime;
+        double deltaB = b - bPrime;
+        double t = deltaA + deltaB;
+        return {s, t};
+    }
+
+    double _sum = 0.0;
+    double _addend = 0.0;
+
+    // Simple sum to be returned if _sum is NaN. This addresses infinities turning into NaNs when
+    // using compensated addition.
+    double _special = 0.0;
+};
+}  // namespace mongo
diff --git a/src/mongo/util/summation_test.cpp b/src/mongo/util/summation_test.cpp
new file mode 100644
index 00000000000..6e86690e171
--- /dev/null
+++ b/src/mongo/util/summation_test.cpp
@@ -0,0 +1,201 @@
+/**
+ *    Copyright (C) 2016 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects
+ *    for all of the code used other than as permitted herein. If you modify
+ *    file(s) with this exception, you may extend this exception to your
+ *    version of the file(s), but you are not obligated to do so. If you do not
+ *    wish to do so, delete this exception statement from your version. If you
+ *    delete this exception statement from all source files in the program,
+ *    then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include <cmath>
+#include <limits>
+#include <vector>
+
+#include "mongo/unittest/unittest.h"
+
+#include "mongo/util/summation.h"
+
+namespace mongo {
+
+namespace {
+using limits = std::numeric_limits<long long>;
+std::vector<long long> longValues = {
+    limits::min(),
+    limits::min() + 1,
+    limits::min() / 2,
+    -(1LL << 53),
+    -(1LL << 52),
+    -(1LL << 32),
+    -0x100,
+    -0xff,
+    -0xaa,
+    -0x55,
+    -1,
+    0,
+    1,
+    2,
+    0x55,
+    0x80,
+    0xaa,
+    0x100,
+    512,
+    1024,
+    2048,
+    1LL << 31,
+    1LL << 32,
+    1LL << 52,
+    1LL << 53,
+    limits::max() / 2,
+    static_cast<long long>(1ULL << 63) - (1ULL << (63 - 53 - 1)),  // Halfway between two doubles
+    limits::max() - 1,
+    limits::max()};
+
+std::vector<double> doubleValues = {
+    1.4831356930199802e-05, -3.121724665346865,     3041897608700.073,       1001318343149.7166,
+    -1714.6229586696593,    1731390114894580.8,     6.256645803154374e-08,   -107144114533844.25,
+    -0.08839485091750919,   -265119153.02185738,    -0.02450615965231944,    0.0002684331017079073,
+    32079040427.68358,      -0.04733295911845742,   0.061381859083076085,    -25329.59126796951,
+    -0.0009567520620034965, -1553879364344.9932,    -2.1101077525869814e-08, -298421079729.5547,
+    0.03182394834273594,    22.201944843278916,     -33.35667991109125,      11496013.960449915,
+    -40652595.33210472,     3.8496066090328163,     2.5074042398147304e-08,  -0.02208724071782122,
+    -134211.37290639878,    0.17640433666616578,    4.463787499171126,       9.959669945399718,
+    129265976.35224283,     1.5865526187526546e-07, -4746011.710555799,      -712048598925.0789,
+    582214206210.4034,      0.025236204812875362,   530078170.91147506,      -14.865307666195053,
+    1.6727994895185032e-05, -113386276.03121366,    -6.135827207137054,      10644945799901.145,
+    -100848907797.1582,     2.2404406961625282e-08, 1.315662618424494e-09,   -0.832190208349044,
+    -9.779323414999364,     -546522170658.2997};
+
+const double doubleValuesSum = 1636336982012512.5;  // simple summation will yield wrong result
+
+std::vector<double> specialValues = {-std::numeric_limits<double>::infinity(),
+                                     std::numeric_limits<double>::infinity(),
+                                     std::numeric_limits<double>::quiet_NaN()};
+
+}  // namespace
+
+TEST(Summation, AddLongs) {
+    int iter = 0;
+    for (auto x : longValues) {
+        for (auto y : longValues) {
+            for (auto z : longValues) {
+                iter++;
+                DoubleDoubleSummation sum;
+
+                // This checks for correct results mod 2**64, which helps with checking correctness
+                // around the 2**53 transition between both doubles of the DoubleDouble result in
+                // int64 additions, as well off-by-one errors.
+                uint64_t checkUint64 =
+                    static_cast<uint64_t>(x) + static_cast<uint64_t>(y) + static_cast<uint64_t>(z);
+
+                sum.addLong(x);
+                sum.addLong(y);
+                sum.addLong(z);
+                ASSERT(sum.isInteger());
+
+                if (!sum.fitsLong()) {
+                    ASSERT(std::abs(sum.getDouble()) >= limits::max());
+                    // Reduce sum to fit in a 64-bit integer.
+                    while (!sum.fitsLong()) {
+                        sum.addDouble(sum.getDouble() < 0 ? std::ldexp(1, 64) : -std::ldexp(1, 64));
+                    }
+                }
+                ASSERT_EQUALS(static_cast<uint64_t>(sum.getLong()), checkUint64);
+            }
+        }
+    }
+}
+
+TEST(Summation, AddSpecial) {
+    for (auto x : specialValues) {
+        DoubleDoubleSummation sum;
+
+        // Check that a special number will result in that special number.
+        sum.addLong(-42);
+        sum.addLong(100);
+        sum.addDouble(x);
+        ASSERT(!sum.fitsLong());
+        ASSERT(!sum.isInteger());
+        if (std::isnan(x)) {
+            ASSERT(std::isnan(sum.getDouble()));
+        } else {
+            ASSERT_EQUALS(sum.getDouble(), x);
+        }
+
+        // Check that adding more numbers doesn't reset the special value.
+        sum.addDouble(-1E22);
+        sum.addLong(limits::min());
+        ASSERT(!sum.fitsLong());
+        if (std::isnan(x)) {
+            ASSERT(std::isnan(sum.getDouble()));
+        } else {
+            ASSERT_EQUALS(sum.getDouble(), x);
+        }
+    }
+}
+
+TEST(Summation, AddInvalid) {
+    DoubleDoubleSummation sum;
+    sum.addDouble(std::numeric_limits<double>::infinity());
+    sum.addDouble(-std::numeric_limits<double>::infinity());
+
+    ASSERT(std::isnan(sum.getDouble()));
+    ASSERT(!sum.fitsLong());
+    ASSERT(!sum.isInteger());
+}
+
+TEST(Summation, LongOverflow) {
+    DoubleDoubleSummation positive;
+
+    // Overflow should result in number no longer fitting in a long.
+    positive.addLong(limits::max());
+    positive.addLong(limits::max());
+    ASSERT(!positive.fitsLong());
+
+    // However, actual stored overflow should not overflow or lose precision.
+    positive.addLong(-limits::max());
+    ASSERT_EQUALS(positive.getLong(), limits::max());
+
+    DoubleDoubleSummation negative;
+
+    // Similarly for negative numbers.
+    negative.addLong(limits::min());
+    negative.addLong(-1);
+    ASSERT(!negative.fitsLong());
+    negative.addDouble(-1.0 * limits::min());
+    ASSERT_EQUALS(negative.getLong(), -1);
+}
+
+
+TEST(Summation, AddDoubles) {
+    DoubleDoubleSummation sum;
+    double straightSum = 0.0;
+
+    for (auto x : doubleValues) {
+        sum.addDouble(x);
+        straightSum += x;
+    }
+    ASSERT_EQUALS(sum.getDouble(), doubleValuesSum);
+    ASSERT(straightSum != sum.getDouble());
+}
+}  // namespace mongo
author	Geert Bosch <geert@mongodb.com>	2016-06-01 19:57:57 -0400
committer	Geert Bosch <geert@mongodb.com>	2016-06-06 13:22:34 -0400
commit	666ce720ebf90229e8e4f92f8719284bedb4f20f (patch)
tree	bf6e3c06de35ee97803d6080f07bc7ce9e3ce156
parent	3ffb9e1ca369e3a5dee5ad4f420d9d4e9866e7af (diff)
download	mongo-666ce720ebf90229e8e4f92f8719284bedb4f20f.tar.gz