Make BSONObj::woCompare a total ordering

Changes: * Date and Timestamp are now distinct with all Dates before all Timestamps. * Numeric comparisons now correctly handle Doubles and Long > 2**53. * CodeWScope doesn't strcmp BSONObjs or strings which may contain NULs. There should be no changes in any of the cases where woCompare defined a correct total ordering before. The new behavior matches the ordering defined by KeyString. Related tickets: * SERVER-3304 Error comparing Date and Timestamp * SERVER-3719 Total ordering over Longs and Doubles * SERVER-7804 CodeWScope comparisons are broken * SERVER-16632 Change WiredTiger index key format Changes to the comparison function for aggregation will be handled separately as SERVER-16708.
author: Mathias Stearn <mathias@10gen.com> 2015-01-02 17:26:58 -0500
committer: Mathias Stearn <mathias@10gen.com> 2015-01-07 11:56:33 -0500
commit: f58fa5f78a91ce36d8e31d6730ebfdaa6cc1d5ab (patch)
tree: bd143554b3ba2f46edaa9a00a12f1e4840c7c142 /src/mongo
parent: de54755e568481d1bdef37339d899403e3b04d86 (diff)
download: mongo-f58fa5f78a91ce36d8e31d6730ebfdaa6cc1d5ab.tar.gz
7 files changed, 243 insertions, 61 deletions
diff --git a/src/mongo/bson/bson_obj_test.cpp b/src/mongo/bson/bson_obj_test.cpp
index 88d866f531a..1ee6dc743f9 100644
--- a/src/mongo/bson/bson_obj_test.cpp
+++ b/src/mongo/bson/bson_obj_test.cpp
@@ -32,11 +32,148 @@
 
 namespace {
 
-    TEST(ToString, EmptyArray) {
+    TEST(BSONObjToString, EmptyArray) {
         const char text[] = "{ x: [] }";
         mongo::BSONObj o1 = mongo::fromjson(text);
         const std::string o1_str = o1.toString();
         ASSERT_EQUALS(text, o1_str);
     }
 
+    TEST(BSONObjCompare, NumberDouble) {
+        ASSERT_LT(BSON("" << 0.0), BSON("" << 1.0));
+        ASSERT_LT(BSON("" << -1.0), BSON("" << 0.0));
+        ASSERT_LT(BSON("" << -1.0), BSON("" << 1.0));
+
+        ASSERT_LT(BSON("" << 0.0), BSON("" << 0.1));
+        ASSERT_LT(BSON("" << 0.1), BSON("" << 1.0));
+        ASSERT_LT(BSON("" << -1.0), BSON("" << -0.1));
+        ASSERT_LT(BSON("" << -0.1), BSON("" << 0.0));
+        ASSERT_LT(BSON("" << -0.1), BSON("" << 0.1));
+
+        ASSERT_LT(BSON("" << 0.0), BSON("" << std::numeric_limits<double>::denorm_min()));
+        ASSERT_GT(BSON("" << 0.0), BSON("" << -std::numeric_limits<double>::denorm_min()));
+
+        ASSERT_EQ(BSON("" << 0.0), BSON("" << -0.0));
+
+        ASSERT_GT(BSON("" << std::numeric_limits<double>::infinity()), BSON("" << 0.0));
+        ASSERT_GT(BSON("" << std::numeric_limits<double>::infinity()),
+                  BSON("" << std::numeric_limits<double>::max())); // max is finite
+        ASSERT_GT(BSON("" << std::numeric_limits<double>::infinity()),
+                  BSON("" << -std::numeric_limits<double>::infinity()));
+
+        ASSERT_LT(BSON("" << -std::numeric_limits<double>::infinity()), BSON("" << 0.0));
+        ASSERT_LT(BSON("" << -std::numeric_limits<double>::infinity()),
+                  BSON("" << std::numeric_limits<double>::min())); // min is finite
+        ASSERT_LT(BSON("" << -std::numeric_limits<double>::infinity()),
+                  BSON("" << std::numeric_limits<double>::infinity()));
+
+        ASSERT_LT(BSON("" << std::numeric_limits<double>::quiet_NaN()), BSON("" << 0.0));
+        ASSERT_LT(BSON("" << std::numeric_limits<double>::quiet_NaN()),
+                  BSON("" << std::numeric_limits<double>::min()));
+        ASSERT_LT(BSON("" << std::numeric_limits<double>::quiet_NaN()),
+                  BSON("" << std::numeric_limits<double>::infinity()));
+        ASSERT_LT(BSON("" << std::numeric_limits<double>::quiet_NaN()),
+                  BSON("" << -std::numeric_limits<double>::infinity()));
+
+        // TODO in C++11 use hex floating point to test distinct NaN representations
+        ASSERT_EQ(BSON("" << std::numeric_limits<double>::quiet_NaN()),
+                  BSON("" << std::numeric_limits<double>::signaling_NaN()));
+    }
+
+    TEST(BSONObjCompare, NumberLong_Double) {
+        ASSERT_EQ(BSON("" << 0ll), BSON("" << 0.0));
+        ASSERT_EQ(BSON("" << 0ll), BSON("" << -0.0));
+
+        ASSERT_EQ(BSON("" << 1ll), BSON("" << 1.0));
+        ASSERT_EQ(BSON("" << -1ll), BSON("" << -1.0));
+
+        ASSERT_LT(BSON("" << 0ll), BSON("" << 1.0));
+        ASSERT_LT(BSON("" << -1ll), BSON("" << 0.0));
+        ASSERT_LT(BSON("" << -1ll), BSON("" << 1.0));
+
+        ASSERT_LT(BSON("" << 0ll), BSON("" << 0.1));
+        ASSERT_LT(BSON("" << 0.1), BSON("" << 1ll));
+        ASSERT_LT(BSON("" << -1ll), BSON("" << -0.1));
+        ASSERT_LT(BSON("" << -0.1), BSON("" << 0ll));
+
+        ASSERT_LT(BSON("" << 0ll), BSON("" << std::numeric_limits<double>::denorm_min()));
+        ASSERT_GT(BSON("" << 0ll), BSON("" << -std::numeric_limits<double>::denorm_min()));
+
+        ASSERT_GT(BSON("" << std::numeric_limits<double>::infinity()), BSON("" << 0ll));
+        ASSERT_GT(BSON("" << std::numeric_limits<double>::infinity()),
+                  BSON("" << std::numeric_limits<long long>::max()));
+        ASSERT_GT(BSON("" << std::numeric_limits<double>::infinity()),
+                  BSON("" << std::numeric_limits<long long>::min()));
+
+        ASSERT_LT(BSON("" << -std::numeric_limits<double>::infinity()), BSON("" << 0ll));
+        ASSERT_LT(BSON("" << -std::numeric_limits<double>::infinity()),
+                  BSON("" << std::numeric_limits<long long>::max()));
+        ASSERT_LT(BSON("" << -std::numeric_limits<double>::infinity()),
+                  BSON("" << std::numeric_limits<long long>::min()));
+
+        ASSERT_LT(BSON("" << std::numeric_limits<double>::quiet_NaN()), BSON("" << 0ll));
+        ASSERT_LT(BSON("" << std::numeric_limits<double>::quiet_NaN()),
+                  BSON("" << std::numeric_limits<long long>::min()));
+
+        for (int powerOfTwo = 0; powerOfTwo < 63; powerOfTwo++) {
+            const long long lNum = 1ll << powerOfTwo;
+            const double dNum = double(lNum);
+
+            // All powers of two in this range can be represented exactly as doubles.
+            invariant(lNum == static_cast<long long>(dNum));
+
+            ASSERT_EQ(BSON("" << lNum), BSON("" << dNum));
+            ASSERT_EQ(BSON("" << -lNum), BSON("" << -dNum));
+
+            ASSERT_GT(BSON("" << (lNum + 1)), BSON("" << dNum));
+            ASSERT_LT(BSON("" << (lNum - 1)), BSON("" << dNum));
+            ASSERT_GT(BSON("" << (-lNum + 1)), BSON("" << -dNum));
+            ASSERT_LT(BSON("" << (-lNum - 1)), BSON("" << -dNum));
+
+            if (powerOfTwo <= 52) { // is dNum - 0.5 representable?
+                ASSERT_GT(BSON("" << lNum), BSON("" << (dNum - 0.5)));
+                ASSERT_LT(BSON("" << -lNum), BSON("" << -(dNum - 0.5)));
+            }
+
+            if (powerOfTwo <= 51) { // is dNum + 0.5 representable?
+                ASSERT_LT(BSON("" << lNum), BSON("" << (dNum + 0.5)));
+                ASSERT_GT(BSON("" << -lNum), BSON("" << -(dNum + 0.5)));
+            }
+        }
+
+        {
+            // Numbers around +/- numeric_limits<long long>::max() which can't be represented
+            // precisely as a double.
+            const long long maxLL = std::numeric_limits<long long>::max();
+            const double closestAbove = 9223372036854775808.0; // 2**63
+            const double closestBelow = 9223372036854774784.0; // 2**63 - epsilon
+
+            ASSERT_GT(BSON("" << maxLL), BSON("" << (maxLL - 1)));
+            ASSERT_LT(BSON("" << maxLL), BSON("" << closestAbove));
+            ASSERT_GT(BSON("" << maxLL), BSON("" << closestBelow));
+
+            ASSERT_LT(BSON("" << -maxLL), BSON("" << -(maxLL - 1)));
+            ASSERT_GT(BSON("" << -maxLL), BSON("" << -closestAbove));
+            ASSERT_LT(BSON("" << -maxLL), BSON("" << -closestBelow));
+        }
+
+        {
+            // Numbers around numeric_limits<long long>::min() which can be represented precisely as
+            // a double, but not as a positive long long.
+            const long long minLL = std::numeric_limits<long long>::min();
+            const double closestBelow = -9223372036854777856.0; // -2**63 - epsilon
+            const double equal = -9223372036854775808.0; // 2**63
+            const double closestAbove = 9223372036854774784.0; // -2**63 + epsilon
+
+            invariant(static_cast<double>(minLL) == equal);
+            invariant(static_cast<long long>(equal) == minLL);
+
+            ASSERT_LT(BSON("" << minLL), BSON("" << (minLL + 1)));
+
+            ASSERT_EQ(BSON("" << minLL), BSON("" << equal));
+            ASSERT_LT(BSON("" << minLL), BSON("" << closestAbove));
+            ASSERT_GT(BSON("" << minLL), BSON("" << closestBelow));
+        }
+    }
+
 } // unnamed namespace
diff --git a/src/mongo/bson/bsonelement.cpp b/src/mongo/bson/bsonelement.cpp
index 7bc8f3a032e..4b26d85264e 100644
--- a/src/mongo/bson/bsonelement.cpp
+++ b/src/mongo/bson/bsonelement.cpp
@@ -813,9 +813,57 @@ namespace mongo {
         return ret.str();
     }
 
-    /* must be same type when called, unless both sides are #s 
-       this large function is in header to facilitate inline-only use of bson
-    */
+namespace {
+    int compareInts(int lhs, int rhs) { return lhs == rhs ? 0 : lhs < rhs ? -1 : 1; }
+    int compareLongs(long long lhs, long long rhs) { return lhs == rhs ? 0 : lhs < rhs ? -1 : 1; }
+    int compareDoubles(double lhs, double rhs) {
+        if (lhs == rhs) return 0;
+        if (lhs < rhs) return -1;
+        if (lhs > rhs) return 1;
+
+        // If none of the above cases returned, lhs or rhs must be NaN.
+        if (isNaN(lhs)) return isNaN(rhs) ? 0 : -1;
+        dassert(isNaN(rhs));
+        return 1;
+    }
+
+    // This is the tricky one. Needs to support the following cases:
+    // * Doubles with a fractional component.
+    // * Longs that can't be precisely represented as a double.
+    // * Doubles outside of the range of Longs (including +/- Inf).
+    // * NaN (defined by us as less than all Longs)
+    // * Return value is always -1, 0, or 1 to ensure it is safe to negate.
+    int compareLongToDouble(long long lhs, double rhs) {
+        // All Longs are > NaN
+        if (isNaN(rhs)) return 1;
+
+        // Ints with magnitude <= 2**53 can be precisely represented as doubles.
+        // Additionally, doubles outside of this range can't have a fractional component.
+        static const long long kEndOfPreciseDoubles = 1ll << 53;
+        if (lhs <= kEndOfPreciseDoubles && lhs >= -kEndOfPreciseDoubles) {
+            return compareDoubles(lhs, rhs);
+        }
+
+        // Large magnitude doubles (including +/- Inf) are strictly > or < all Longs.
+        static const double kBoundOfLongRange = -static_cast<double>(LLONG_MIN); // positive 2**63
+        if (rhs >= kBoundOfLongRange) return -1; // Can't be represented in a Long.
+        if (rhs < -kBoundOfLongRange) return 1; // Can be represented in a Long.
+
+        // Remaining Doubles can have their integer component precisely represented as long longs.
+        // If they have a fractional component, they must be strictly > or < lhs even after
+        // truncation of the fractional component since low-magnitude lhs were handled above.
+        return compareLongs(lhs, rhs);
+    }
+
+    int compareDoubleToLong(double lhs, long long rhs) {
+        // Only implement the real logic once.
+        return -compareLongToDouble(rhs, lhs);
+    }
+} // namespace
+
+    /**
+     * l and r must be same canonicalType when called.
+     */
     int compareElementValues(const BSONElement& l, const BSONElement& r) {
         int f;
 
@@ -836,6 +884,7 @@ namespace mongo {
                 return -1;
             return l.date() == r.date() ? 0 : 1;
         case Date:
+            // Signed comparisons for Dates.
             {
                 long long a = (long long) l.Date().millis;
                 long long b = (long long) r.Date().millis;
@@ -843,36 +892,36 @@ namespace mongo {
                     return -1;
                 return a == b ? 0 : 1;
             }
-        case NumberLong:
-            if( r.type() == NumberLong ) {
-                long long L = l._numberLong();
-                long long R = r._numberLong();
-                if( L < R ) return -1;
-                if( L == R ) return 0;
-                return 1;
-            }
-            goto dodouble;
-        case NumberInt:
-            if( r.type() == NumberInt ) {
-                int L = l._numberInt();
-                int R = r._numberInt();
-                if( L < R ) return -1;
-                return L == R ? 0 : 1;
-            }
-            // else fall through
-        case NumberDouble: 
-dodouble:
-            {
-                double left = l.number();
-                double right = r.number();
-                if( left < right ) 
-                    return -1;
-                if( left == right )
-                    return 0;
-                if( isNaN(left) )
-                    return isNaN(right) ? 0 : -1;
-                return 1;
+
+        case NumberInt: {
+            // All types can precisely represent all NumberInts, so it is safe to simply convert to
+            // whatever rhs's type is.
+            switch (r.type()) {
+            case NumberInt: return compareInts(l._numberInt(), r._numberInt());
+            case NumberLong: return compareLongs(l._numberInt(), r._numberLong());
+            case NumberDouble: return compareDoubles(l._numberInt(), r._numberDouble());
+            default: invariant(false);
+            }
+        }
+
+        case NumberLong: {
+            switch (r.type()) {
+            case NumberLong: return compareLongs(l._numberLong(), r._numberLong());
+            case NumberInt: return compareLongs(l._numberLong(), r._numberInt());
+            case NumberDouble: return compareLongToDouble(l._numberLong(), r._numberDouble());
+            default: invariant(false);
+            }
+        }
+
+        case NumberDouble: {
+            switch (r.type()) {
+            case NumberDouble: return compareDoubles(l._numberDouble(), r._numberDouble());
+            case NumberInt: return compareDoubles(l._numberDouble(), r._numberInt());
+            case NumberLong: return compareDoubleToLong(l._numberDouble(), r._numberLong());
+            default: invariant(false);
             }
+        }
+
         case jstOID:
             return memcmp(l.value(), r.value(), OID::kOIDSize);
         case Code:
@@ -912,16 +961,11 @@ dodouble:
             return strcmp(l.regexFlags(), r.regexFlags());
         }
         case CodeWScope : {
-            f = l.canonicalType() - r.canonicalType();
-            if ( f )
-                return f;
-            f = strcmp( l.codeWScopeCode() , r.codeWScopeCode() );
-            if ( f )
-                return f;
-            f = strcmp( l.codeWScopeScopeDataUnsafe() , r.codeWScopeScopeDataUnsafe() );
-            if ( f )
-                return f;
-            return 0;
+            int cmp = StringData(l.codeWScopeCode(), l.codeWScopeCodeLen() - 1).compare(
+                      StringData(r.codeWScopeCode(), r.codeWScopeCodeLen() - 1));
+            if (cmp) return cmp;
+
+            return l.codeWScopeObject().woCompare(r.codeWScopeObject());
         }
         default:
             verify( false);
diff --git a/src/mongo/bson/bsonobjbuilder.cpp b/src/mongo/bson/bsonobjbuilder.cpp
index c914f682e5c..34788b4a962 100644
--- a/src/mongo/bson/bsonobjbuilder.cpp
+++ b/src/mongo/bson/bsonobjbuilder.cpp
@@ -53,7 +53,7 @@ namespace mongo {
             appendBool(fieldName, true);
             //appendDate( fieldName , numeric_limits<long long>::min() ); 
             return;
-        case Timestamp: // TODO integrate with Date SERVER-3304
+        case Timestamp:
             appendTimestamp( fieldName , 0 ); return;
         case Undefined: // shared with EOO
             appendUndefined( fieldName ); return;
@@ -107,7 +107,7 @@ namespace mongo {
             appendMinForType( fieldName, Object ); return;
         case Date:
             appendDate( fieldName , std::numeric_limits<long long>::max() ); return;
-        case Timestamp: // TODO integrate with Date SERVER-3304
+        case Timestamp:
             append( fieldName , OpTime::max() ); return;
         case Undefined: // shared with EOO
             appendUndefined( fieldName ); return;
diff --git a/src/mongo/bson/bsontypes.h b/src/mongo/bson/bsontypes.h
index d23ba94c773..449c617f13c 100644
--- a/src/mongo/bson/bsontypes.h
+++ b/src/mongo/bson/bsontypes.h
@@ -149,8 +149,9 @@ namespace mongo {
         case mongo::Bool:
             return 40;
         case mongo::Date:
-        case Timestamp:
             return 45;
+        case Timestamp:
+            return 47;
         case RegEx:
             return 50;
         case DBRef:
diff --git a/src/mongo/db/hasher_test.cpp b/src/mongo/db/hasher_test.cpp
index a11113d13a9..808370922ef 100644
--- a/src/mongo/db/hasher_test.cpp
+++ b/src/mongo/db/hasher_test.cpp
@@ -346,12 +346,12 @@ namespace {
         BSONObj o = BSON( "check" << Date_t( 0x5566778811223344LL ) );
         ASSERT_EQUALS( hashIt( o ), 4476222765095560467LL );
         o = builder1.appendTimestamp( "check", 0x55667788LL * 1000LL, 0x11223344LL ).obj();
-        ASSERT_EQUALS( hashIt( o ), 4476222765095560467LL );
+        ASSERT_EQUALS( hashIt( o ), 4873046866288452390LL );
 
         o = BSON( "check" << Date_t( 0 ) );
         ASSERT_EQUALS( hashIt( o ), -1178696894582842035LL );
         o = builder2.appendTimestamp( "check", 0 ).obj();
-        ASSERT_EQUALS( hashIt( o ), -1178696894582842035LL );
+        ASSERT_EQUALS( hashIt( o ), -7867208682377458672LL );
     }
 
     TEST( BSONElementHasher, HashRegEx ) {
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index ddc138b9ec0..34c070e0aa5 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -128,17 +128,19 @@ namespace repl {
                 break;
 
             case mongo::Timestamp:
+                boost::hash_combine(hash, elem._opTime().asDate());
+                break;
+
             case mongo::Date:
-                // Need to treat these the same until SERVER-3304 is resolved.
                 boost::hash_combine(hash, elem.date().asInt64());
                 break;
 
             case mongo::NumberDouble:
             case mongo::NumberLong:
             case mongo::NumberInt: {
-                // This converts all numbers to doubles for compatibility with woCompare.
-                // This ignores // the low-order bits of NumberLongs > 2**53, but that is required
-                // until SERVER-3719 is resolved.
+                // This converts all numbers to doubles, which ignores the low-order bits of
+                // NumberLongs > 2**53, but that is ok since the hash will still be the same for
+                // equal numbers and is still likely to be different for different numbers.
                 const double dbl = elem.numberDouble();
                 if (isNaN(dbl)) {
                     boost::hash_combine(hash, numeric_limits<double>::quiet_NaN());
@@ -177,12 +179,10 @@ namespace repl {
                 break;
 
             case mongo::CodeWScope: {
-                // SERVER-7804
-                // Intentionally not using codeWScopeCodeLen for compatibility with
-                // compareElementValues. Using codeWScopeScopeDataUnsafe (as a string!) for the same
-                // reason.
-                boost::hash_combine(hash, StringData::Hasher()(elem.codeWScopeCode()));
-                boost::hash_combine(hash, StringData::Hasher()(elem.codeWScopeScopeDataUnsafe()));
+                boost::hash_combine(hash, StringData::Hasher()(
+                                            StringData(elem.codeWScopeCode(),
+                                                       elem.codeWScopeCodeLen())));
+                boost::hash_combine(hash, hashBSONObj(elem.codeWScopeObject()));
                 break;
             }
             }
diff --git a/src/mongo/dbtests/documenttests.cpp b/src/mongo/dbtests/documenttests.cpp
index aa21bbeb4e9..f29b75fd7b2 100644
--- a/src/mongo/dbtests/documenttests.cpp
+++ b/src/mongo/dbtests/documenttests.cpp
@@ -1296,9 +1296,9 @@ namespace DocumentTests {
                 assertComparison(-1, Value(vector<Value>()), Value(BSONBinData("", 0, MD5Type)));
                 assertComparison(-1, Value(BSONBinData("", 0, MD5Type)), Value(mongo::OID()));
                 assertComparison(-1, Value(mongo::OID()), Value(false));
-                assertComparison(-1, Value(false), Value(OpTime()));
-                assertComparison(0,  Value(OpTime()), Value(Date_t(0)));
-                assertComparison(-1, Value(Date_t(0)), Value(BSONRegEx("")));
+                assertComparison(-1, Value(false), Value(Date_t(0)));
+                assertComparison(-1, Value(Date_t(0)), Value(OpTime()));
+                assertComparison(-1, Value(OpTime()), Value(BSONRegEx("")));
                 assertComparison(-1, Value(BSONRegEx("")), Value(BSONDBRef("", mongo::OID())));
                 assertComparison(-1, Value(BSONDBRef("", mongo::OID())), Value(BSONCode("")));
                 assertComparison(-1, Value(BSONCode("")), Value(BSONCodeWScope("", BSONObj())));
author	Mathias Stearn <mathias@10gen.com>	2015-01-02 17:26:58 -0500
committer	Mathias Stearn <mathias@10gen.com>	2015-01-07 11:56:33 -0500
commit	f58fa5f78a91ce36d8e31d6730ebfdaa6cc1d5ab (patch)
tree	bd143554b3ba2f46edaa9a00a12f1e4840c7c142 /src/mongo
parent	de54755e568481d1bdef37339d899403e3b04d86 (diff)
download	mongo-f58fa5f78a91ce36d8e31d6730ebfdaa6cc1d5ab.tar.gz