diff options
author | Mathias Stearn <mathias@10gen.com> | 2015-01-14 14:38:01 -0500 |
---|---|---|
committer | Mathias Stearn <mathias@10gen.com> | 2015-01-15 18:08:36 -0500 |
commit | d1910c70e2e3eb6ce057a474dece00b737645631 (patch) | |
tree | c36c3e0bb77a8cf8af526743ce74f15c48734c60 /src | |
parent | aa3247ba48f78f4423874f2a64f00b12bf0b27d6 (diff) | |
download | mongo-d1910c70e2e3eb6ce057a474dece00b737645631.tar.gz |
SERVER-16708 Make Value::compare match BSON woCompare behavior
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/base/compare_numbers.h | 93 | ||||
-rw-r--r-- | src/mongo/bson/bsonelement.cpp | 49 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value.cpp | 89 | ||||
-rw-r--r-- | src/mongo/dbtests/documenttests.cpp | 19 |
5 files changed, 147 insertions, 110 deletions
diff --git a/src/mongo/base/compare_numbers.h b/src/mongo/base/compare_numbers.h new file mode 100644 index 00000000000..c086f87fb7a --- /dev/null +++ b/src/mongo/base/compare_numbers.h @@ -0,0 +1,93 @@ +/* Copyright 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#pragma once + +#include "mongo/platform/float_utils.h" +#include "mongo/util/assert_util.h" + +namespace mongo { + + /** + * These functions compare numbers using the same rules as BSON. Care is taken to always give + * numerically correct results when comparing different types. Returns are always -1, 0, or 1 to + * ensure it is safe to negate the result to invert the direction of the comparison. + */ + + inline int compareInts(int lhs, int rhs) { + return lhs == rhs ? 0 : lhs < rhs ? -1 : 1; + } + + inline int compareLongs(long long lhs, long long rhs) { + return lhs == rhs ? 0 : lhs < rhs ? -1 : 1; + } + + inline int compareDoubles(double lhs, double rhs) { + if (lhs == rhs) return 0; + if (lhs < rhs) return -1; + if (lhs > rhs) return 1; + + // If none of the above cases returned, lhs or rhs must be NaN. + if (isNaN(lhs)) return isNaN(rhs) ? 0 : -1; + dassert(isNaN(rhs)); + return 1; + } + + // This is the tricky one. Needs to support the following cases: + // * Doubles with a fractional component. + // * Longs that can't be precisely represented as a double. + // * Doubles outside of the range of Longs (including +/- Inf). + // * NaN (defined by us as less than all Longs) + // * Return value is always -1, 0, or 1 to ensure it is safe to negate. + inline int compareLongToDouble(long long lhs, double rhs) { + // All Longs are > NaN + if (isNaN(rhs)) return 1; + + // Ints with magnitude <= 2**53 can be precisely represented as doubles. + // Additionally, doubles outside of this range can't have a fractional component. + static const long long kEndOfPreciseDoubles = 1ll << 53; + if (lhs <= kEndOfPreciseDoubles && lhs >= -kEndOfPreciseDoubles) { + return compareDoubles(lhs, rhs); + } + + // Large magnitude doubles (including +/- Inf) are strictly > or < all Longs. + static const double kBoundOfLongRange = -static_cast<double>(LLONG_MIN); // positive 2**63 + if (rhs >= kBoundOfLongRange) return -1; // Can't be represented in a Long. + if (rhs < -kBoundOfLongRange) return 1; // Can be represented in a Long. + + // Remaining Doubles can have their integer component precisely represented as long longs. + // If they have a fractional component, they must be strictly > or < lhs even after + // truncation of the fractional component since low-magnitude lhs were handled above. + return compareLongs(lhs, rhs); + } + + inline int compareDoubleToLong(double lhs, long long rhs) { + // Only implement the real logic once. + return -compareLongToDouble(rhs, lhs); + } + +} // namespace mongo diff --git a/src/mongo/bson/bsonelement.cpp b/src/mongo/bson/bsonelement.cpp index 243ad2395b8..0502038e668 100644 --- a/src/mongo/bson/bsonelement.cpp +++ b/src/mongo/bson/bsonelement.cpp @@ -33,6 +33,7 @@ #include <boost/functional/hash.hpp> +#include "mongo/base/compare_numbers.h" #include "mongo/base/data_cursor.h" #include "mongo/db/jsobj.h" #include "mongo/util/base64.h" @@ -819,54 +820,6 @@ namespace mongo { return ret.str(); } -namespace { - int compareInts(int lhs, int rhs) { return lhs == rhs ? 0 : lhs < rhs ? -1 : 1; } - int compareLongs(long long lhs, long long rhs) { return lhs == rhs ? 0 : lhs < rhs ? -1 : 1; } - int compareDoubles(double lhs, double rhs) { - if (lhs == rhs) return 0; - if (lhs < rhs) return -1; - if (lhs > rhs) return 1; - - // If none of the above cases returned, lhs or rhs must be NaN. - if (isNaN(lhs)) return isNaN(rhs) ? 0 : -1; - dassert(isNaN(rhs)); - return 1; - } - - // This is the tricky one. Needs to support the following cases: - // * Doubles with a fractional component. - // * Longs that can't be precisely represented as a double. - // * Doubles outside of the range of Longs (including +/- Inf). - // * NaN (defined by us as less than all Longs) - // * Return value is always -1, 0, or 1 to ensure it is safe to negate. - int compareLongToDouble(long long lhs, double rhs) { - // All Longs are > NaN - if (isNaN(rhs)) return 1; - - // Ints with magnitude <= 2**53 can be precisely represented as doubles. - // Additionally, doubles outside of this range can't have a fractional component. - static const long long kEndOfPreciseDoubles = 1ll << 53; - if (lhs <= kEndOfPreciseDoubles && lhs >= -kEndOfPreciseDoubles) { - return compareDoubles(lhs, rhs); - } - - // Large magnitude doubles (including +/- Inf) are strictly > or < all Longs. - static const double kBoundOfLongRange = -static_cast<double>(LLONG_MIN); // positive 2**63 - if (rhs >= kBoundOfLongRange) return -1; // Can't be represented in a Long. - if (rhs < -kBoundOfLongRange) return 1; // Can be represented in a Long. - - // Remaining Doubles can have their integer component precisely represented as long longs. - // If they have a fractional component, they must be strictly > or < lhs even after - // truncation of the fractional component since low-magnitude lhs were handled above. - return compareLongs(lhs, rhs); - } - - int compareDoubleToLong(double lhs, long long rhs) { - // Only implement the real logic once. - return -compareLongToDouble(rhs, lhs); - } -} // namespace - /** * l and r must be same canonicalType when called. */ diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp index a6b9c43c997..4a54a5e614d 100644 --- a/src/mongo/db/pipeline/document.cpp +++ b/src/mongo/db/pipeline/document.cpp @@ -391,6 +391,13 @@ namespace mongo { const ValueElement& rField = rIt.get(); const ValueElement& lField = lIt.get(); + // For compatibility with BSONObj::woCompare() consider the canonical type of values + // before considerting their names. + const int rCType = canonicalizeBSONType(rField.val.getType()); + const int lCType = canonicalizeBSONType(lField.val.getType()); + if (lCType != rCType) + return lCType < rCType ? -1 : 1; + const int nameCmp = lField.nameSD().compare(rField.nameSD()); if (nameCmp) return nameCmp; // field names are unequal diff --git a/src/mongo/db/pipeline/value.cpp b/src/mongo/db/pipeline/value.cpp index 16d8cf5608d..b37bc1cb6d3 100644 --- a/src/mongo/db/pipeline/value.cpp +++ b/src/mongo/db/pipeline/value.cpp @@ -33,6 +33,7 @@ #include <boost/functional/hash.hpp> #include <boost/scoped_array.hpp> +#include "mongo/base/compare_numbers.h" #include "mongo/db/jsobj.h" #include "mongo/db/pipeline/document.h" #include "mongo/util/hex.h" @@ -549,19 +550,6 @@ namespace mongo { } } - // Special case for double since it needs special NaN handling - inline static int cmp(double left, double right) { - // The following is lifted directly from compareElementValues - // to ensure identical handling of NaN - if (left < right) - return -1; - if (left == right) - return 0; - if (isNaN(left)) - return isNaN(right) ? 0 : -1; - return 1; - } - int Value::compare(const Value& rL, const Value& rR) { // Note, this function needs to behave identically to BSON's compareElementValues(). // Additionally, any changes here must be replicated in hash_combine(). @@ -590,23 +578,45 @@ namespace mongo { case Bool: return rL.getBool() - rR.getBool(); - // WARNING: Timestamp and Date have same canonical type, but compare differently. - // Maintaining behavior from normal BSON. case Timestamp: // unsigned return cmp(rL._storage.timestampValue, rR._storage.timestampValue); + case Date: // signed return cmp(rL._storage.dateValue, rR._storage.dateValue); // Numbers should compare by equivalence even if different types - case NumberLong: - case NumberInt: - case NumberDouble: - switch (getWidestNumeric(lType, rType)) { - case NumberDouble: return cmp(rL.getDouble(), rR.getDouble()); - case NumberLong: return cmp(rL.getLong(), rR.getLong()); - case NumberInt: return cmp(rL.getInt(), rR.getInt()); - default: verify(false); + case NumberInt: { + // All types can precisely represent all NumberInts, so it is safe to simply convert to + // whatever rhs's type is. + switch (rType) { + case NumberInt: return compareInts(rL._storage.intValue, rR._storage.intValue); + case NumberLong: return compareLongs(rL._storage.intValue, rR._storage.longValue); + case NumberDouble: return compareDoubles(rL._storage.intValue, rR._storage.doubleValue); + default: invariant(false); } + } + + case NumberLong: { + switch (rType) { + case NumberLong: return compareLongs(rL._storage.longValue, rR._storage.longValue); + case NumberInt: return compareLongs(rL._storage.longValue, rR._storage.intValue); + case NumberDouble: return compareLongToDouble(rL._storage.longValue, + rR._storage.doubleValue); + default: invariant(false); + } + } + + case NumberDouble: { + switch (rType) { + case NumberDouble: return compareDoubles(rL._storage.doubleValue, + rR._storage.doubleValue); + case NumberInt: return compareDoubles(rL._storage.doubleValue, + rR._storage.intValue); + case NumberLong: return compareDoubleToLong(rL._storage.doubleValue, + rR._storage.longValue); + default: invariant(false); + } + } case jstOID: return memcmp(rL._storage.oid, rR._storage.oid, OID::kOIDSize); @@ -662,23 +672,14 @@ namespace mongo { return rL.getStringData().compare(rR.getStringData()); case CodeWScope: { - // This case crazy, but identical to how they are compared in BSON (SERVER-7804) - intrusive_ptr<const RCCodeWScope> l = rL._storage.getCodeWScope(); intrusive_ptr<const RCCodeWScope> r = rR._storage.getCodeWScope(); - // This triggers two bugs in codeWScope. - // Since this is a very rare case I'm not handling it here. - uassert(16557, "can't compare CodeWScope values containing a NUL byte in the code.", - strlen(l->code.c_str()) == l->code.size() - && strlen(r->code.c_str()) == r->code.size()); - ret = l->code.compare(r->code); if (ret) return ret; - // SERVER-7804 - return strcmp(l->scope.objdata(), r->scope.objdata()); + return l->scope.woCompare(r->scope); } } verify(false); @@ -710,16 +711,10 @@ namespace mongo { boost::hash_combine(seed, _storage.dateValue); break; - /* - Numbers whose values are equal need to hash to the same thing - as well. Note that Value::compare() promotes numeric values to - their largest common form in order for comparisons to work. - We must hash all numeric values as if they are doubles so that - things like grouping work. We don't know what values will come - down the pipe later, but if we start out with int representations - of a value, and later see double representations of it, they need - to end up in the same buckets. - */ + // This converts all numbers to doubles, which ignores the low-order bits of + // NumberLongs > 2**53, but that is ok since the hash will still be the same for + // equal numbers and is still likely to be different for different numbers. + // SERVER-16851 case NumberDouble: case NumberLong: case NumberInt: { @@ -776,11 +771,9 @@ namespace mongo { } case CodeWScope: { - // SERVER-7804 - const char * code = _storage.getCodeWScope()->code.c_str(); - boost::hash_range(seed, code, (code + strlen(code))); - // Not going to bother hashing scope. Too many edge cases. Will fall back to - // Value::compare when code is same, so this is ok. + intrusive_ptr<const RCCodeWScope> cws = _storage.getCodeWScope(); + boost::hash_combine(seed, StringData::Hasher()(cws->code)); + boost::hash_combine(seed, BSONObj::Hasher()(cws->scope)); break; } } diff --git a/src/mongo/dbtests/documenttests.cpp b/src/mongo/dbtests/documenttests.cpp index acd5de74446..28e63419242 100644 --- a/src/mongo/dbtests/documenttests.cpp +++ b/src/mongo/dbtests/documenttests.cpp @@ -224,6 +224,10 @@ namespace DocumentTests { assertComparison( -1, BSON( "a" << 1 << "b" << 1 ), BSON( "a" << 1 << "b" << 2 ) ); // numbers sort before strings assertComparison( -1, BSON( "a" << 1 ), BSON( "a" << "foo" ) ); + // numbers sort before strings, even if keys compare otherwise + assertComparison( -1, BSON( "b" << 1 ), BSON( "a" << "foo" ) ); + // null before number, even if keys compare otherwise + assertComparison( -1, BSON( "z" << BSONNULL ), BSON( "a" << 1 ) ); } public: int cmp( const BSONObj& a, const BSONObj& b ) { @@ -247,19 +251,6 @@ namespace DocumentTests { } }; - /** Comparison based on a null field's name. Differs from BSONObj comparison behavior. */ - class CompareNamedNull { - public: - void run() { - BSONObj obj1 = BSON( "z" << BSONNULL ); - BSONObj obj2 = BSON( "a" << 1 ); - // Comparsion with type precedence. - ASSERT( obj1.woCompare( obj2 ) < 0 ); - // Comparison with field name precedence. - ASSERT( Document::compare( fromBson( obj1 ), fromBson( obj2 ) ) > 0 ); - } - }; - /** Shallow copy clone of a single field Document. */ class Clone { public: @@ -1249,6 +1240,7 @@ namespace DocumentTests { assertComparison( 0, fromjson( "{'':{}}" ), fromjson( "{'':{}}" ) ); assertComparison( 0, fromjson( "{'':{x:1}}" ), fromjson( "{'':{x:1}}" ) ); assertComparison( -1, fromjson( "{'':{}}" ), fromjson( "{'':{x:1}}" ) ); + assertComparison( -1, fromjson( "{'':{'z': 1}}" ), fromjson( "{'':{'a': 'a'}}") ); // Array. assertComparison( 0, fromjson( "{'':[]}" ), fromjson( "{'':[]}" ) ); @@ -1427,7 +1419,6 @@ namespace DocumentTests { add<Document::GetValue>(); add<Document::SetField>(); add<Document::Compare>(); - add<Document::CompareNamedNull>(); add<Document::Clone>(); add<Document::CloneMultipleFields>(); add<Document::FieldIteratorEmpty>(); |