diff options
author | Justin Seyster <justin.seyster@mongodb.com> | 2019-04-15 16:35:48 -0400 |
---|---|---|
committer | Justin Seyster <justin.seyster@mongodb.com> | 2019-04-15 16:49:42 -0400 |
commit | ec5473699c86897dc98fca6aac63eb92dcbcc1c4 (patch) | |
tree | bd3319fdfdffad15cd64600a6d7e1a289bcf9931 | |
parent | 195271e1c8254f26636331b9e07067e2eab16374 (diff) | |
download | mongo-ec5473699c86897dc98fca6aac63eb92dcbcc1c4.tar.gz |
SERVER-40513 Convert seed to little endian before hashing
-rw-r--r-- | src/mongo/db/hasher.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/hasher.h | 4 | ||||
-rw-r--r-- | src/mongo/db/hasher_test.cpp | 20 |
3 files changed, 42 insertions, 10 deletions
diff --git a/src/mongo/db/hasher.cpp b/src/mongo/db/hasher.cpp index 731500afa40..677d34d9b7f 100644 --- a/src/mongo/db/hasher.cpp +++ b/src/mongo/db/hasher.cpp @@ -57,24 +57,45 @@ public: // pointer to next part of input key, length in bytes to read void addData(const void* keyData, size_t numBytes); + void addSeed(int32_t number) { + addIntegerData(number); + } + + // All numerical values should be converted to an int64_t before being added to the hash input. + void addNumber(int64_t number) { + addIntegerData(number); + } + // finish computing the hash, put the result in the digest // only call this once per Hasher void finish(HashDigest out); private: + // Convert 'number' to little endian and then append it to the digest input. The number of bytes + // appended is determined by the input type, so ensure that type T has a well defined size that + // is the same on all platforms. + template <typename T> + void addIntegerData(T number); + md5_state_t _md5State; HashSeed _seed; }; Hasher::Hasher(HashSeed seed) : _seed(seed) { md5_init(&_md5State); - md5_append(&_md5State, reinterpret_cast<const md5_byte_t*>(&_seed), sizeof(_seed)); + addSeed(seed); } void Hasher::addData(const void* keyData, size_t numBytes) { md5_append(&_md5State, static_cast<const md5_byte_t*>(keyData), numBytes); } +template <typename T> +void Hasher::addIntegerData(T number) { + const auto data = endian::nativeToLittle(number); + addData(&data, sizeof(data)); +} + void Hasher::finish(HashDigest out) { md5_finish(&_md5State, out); } @@ -91,9 +112,8 @@ void recursiveHash(Hasher* h, const BSONElement& e, bool includeFieldName) { // if there are no embedded objects (subobjects or arrays), // compute the hash, squashing numeric types to 64-bit ints if (e.isNumber()) { - // Use safeNumberLongForHash, it is well-defined for troublesome doubles. - const auto i = endian::nativeToLittle(e.safeNumberLongForHash()); - h->addData(&i, sizeof(i)); + // Use safeNumberLongForHash, because it is well-defined for troublesome doubles. + h->addNumber(static_cast<int64_t>(e.safeNumberLongForHash())); } else { h->addData(e.value(), e.valuesize()); } diff --git a/src/mongo/db/hasher.h b/src/mongo/db/hasher.h index c35eac19d56..20519e6a58f 100644 --- a/src/mongo/db/hasher.h +++ b/src/mongo/db/hasher.h @@ -38,7 +38,7 @@ namespace mongo { -typedef int HashSeed; +typedef int32_t HashSeed; class BSONElementHasher { BSONElementHasher(const BSONElementHasher&) = delete; @@ -52,7 +52,7 @@ public: * WARNING: do not change the hash see value. Hash-based sharding clusters will * expect that value to be zero. */ - static const int DEFAULT_HASH_SEED = 0; + static constexpr HashSeed const DEFAULT_HASH_SEED = 0; /* This computes a 64-bit hash of the value part of BSONElement "e", * preceded by the seed "seed". Squashes element (and any sub-elements) diff --git a/src/mongo/db/hasher_test.cpp b/src/mongo/db/hasher_test.cpp index 9d8a201b1be..63ec64417af 100644 --- a/src/mongo/db/hasher_test.cpp +++ b/src/mongo/db/hasher_test.cpp @@ -42,17 +42,17 @@ namespace mongo { namespace { // Helper methods -long long hashIt(const BSONObj& object, int seed) { +long long hashIt(const BSONObj& object, HashSeed seed) { return BSONElementHasher::hash64(object.firstElement(), seed); } long long hashIt(const BSONObj& object) { - int seed = 0; + HashSeed seed = 0; return hashIt(object, seed); } // Test different oids hash to different things TEST(BSONElementHasher, DifferentOidsAreDifferentHashes) { - int seed = 0; + HashSeed seed = 0; long long int oidHash = BSONElementHasher::hash64(BSONObjBuilder().genOID().obj().firstElement(), seed); @@ -124,7 +124,7 @@ TEST(BSONElementHasher, SubDocumentGroupingHashesDiffer) { // Testing codeWscope scope squashing TEST(BSONElementHasher, CodeWithScopeSquashesScopeIntsAndDoubles) { - int seed = 0; + HashSeed seed = 0; BSONObjBuilder b1; b1.appendCodeWScope("a", "print('this is some stupid code')", BSON("a" << 3)); @@ -360,5 +360,17 @@ TEST(BSONElementHasher, HashCodeWScope) { ASSERT_EQUALS(hashIt(o), 501342939894575968LL); } +TEST(BSONElementHasher, HashWithNonZeroSeed) { + HashSeed seed = 40513; + + BSONObj o = BSON("check" << 42); + ASSERT_EQUALS(hashIt(o, seed), 4302929669663179197LL); + + o = BSON("check" << BSON_ARRAY("sunflower" + << "sesame" + << "mustard")); + ASSERT_EQUALS(hashIt(o, seed), -9222615859251096151LL); +} + } // namespace } // namespace mongo |