summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Seyster <justin.seyster@mongodb.com>2019-04-15 16:35:48 -0400
committerJustin Seyster <justin.seyster@mongodb.com>2019-04-15 16:49:42 -0400
commitec5473699c86897dc98fca6aac63eb92dcbcc1c4 (patch)
treebd3319fdfdffad15cd64600a6d7e1a289bcf9931
parent195271e1c8254f26636331b9e07067e2eab16374 (diff)
downloadmongo-ec5473699c86897dc98fca6aac63eb92dcbcc1c4.tar.gz
SERVER-40513 Convert seed to little endian before hashing
-rw-r--r--src/mongo/db/hasher.cpp28
-rw-r--r--src/mongo/db/hasher.h4
-rw-r--r--src/mongo/db/hasher_test.cpp20
3 files changed, 42 insertions, 10 deletions
diff --git a/src/mongo/db/hasher.cpp b/src/mongo/db/hasher.cpp
index 731500afa40..677d34d9b7f 100644
--- a/src/mongo/db/hasher.cpp
+++ b/src/mongo/db/hasher.cpp
@@ -57,24 +57,45 @@ public:
// pointer to next part of input key, length in bytes to read
void addData(const void* keyData, size_t numBytes);
+ void addSeed(int32_t number) {
+ addIntegerData(number);
+ }
+
+ // All numerical values should be converted to an int64_t before being added to the hash input.
+ void addNumber(int64_t number) {
+ addIntegerData(number);
+ }
+
// finish computing the hash, put the result in the digest
// only call this once per Hasher
void finish(HashDigest out);
private:
+ // Convert 'number' to little endian and then append it to the digest input. The number of bytes
+ // appended is determined by the input type, so ensure that type T has a well defined size that
+ // is the same on all platforms.
+ template <typename T>
+ void addIntegerData(T number);
+
md5_state_t _md5State;
HashSeed _seed;
};
Hasher::Hasher(HashSeed seed) : _seed(seed) {
md5_init(&_md5State);
- md5_append(&_md5State, reinterpret_cast<const md5_byte_t*>(&_seed), sizeof(_seed));
+ addSeed(seed);
}
void Hasher::addData(const void* keyData, size_t numBytes) {
md5_append(&_md5State, static_cast<const md5_byte_t*>(keyData), numBytes);
}
+template <typename T>
+void Hasher::addIntegerData(T number) {
+ const auto data = endian::nativeToLittle(number);
+ addData(&data, sizeof(data));
+}
+
void Hasher::finish(HashDigest out) {
md5_finish(&_md5State, out);
}
@@ -91,9 +112,8 @@ void recursiveHash(Hasher* h, const BSONElement& e, bool includeFieldName) {
// if there are no embedded objects (subobjects or arrays),
// compute the hash, squashing numeric types to 64-bit ints
if (e.isNumber()) {
- // Use safeNumberLongForHash, it is well-defined for troublesome doubles.
- const auto i = endian::nativeToLittle(e.safeNumberLongForHash());
- h->addData(&i, sizeof(i));
+ // Use safeNumberLongForHash, because it is well-defined for troublesome doubles.
+ h->addNumber(static_cast<int64_t>(e.safeNumberLongForHash()));
} else {
h->addData(e.value(), e.valuesize());
}
diff --git a/src/mongo/db/hasher.h b/src/mongo/db/hasher.h
index c35eac19d56..20519e6a58f 100644
--- a/src/mongo/db/hasher.h
+++ b/src/mongo/db/hasher.h
@@ -38,7 +38,7 @@
namespace mongo {
-typedef int HashSeed;
+typedef int32_t HashSeed;
class BSONElementHasher {
BSONElementHasher(const BSONElementHasher&) = delete;
@@ -52,7 +52,7 @@ public:
* WARNING: do not change the hash see value. Hash-based sharding clusters will
* expect that value to be zero.
*/
- static const int DEFAULT_HASH_SEED = 0;
+ static constexpr HashSeed const DEFAULT_HASH_SEED = 0;
/* This computes a 64-bit hash of the value part of BSONElement "e",
* preceded by the seed "seed". Squashes element (and any sub-elements)
diff --git a/src/mongo/db/hasher_test.cpp b/src/mongo/db/hasher_test.cpp
index 9d8a201b1be..63ec64417af 100644
--- a/src/mongo/db/hasher_test.cpp
+++ b/src/mongo/db/hasher_test.cpp
@@ -42,17 +42,17 @@ namespace mongo {
namespace {
// Helper methods
-long long hashIt(const BSONObj& object, int seed) {
+long long hashIt(const BSONObj& object, HashSeed seed) {
return BSONElementHasher::hash64(object.firstElement(), seed);
}
long long hashIt(const BSONObj& object) {
- int seed = 0;
+ HashSeed seed = 0;
return hashIt(object, seed);
}
// Test different oids hash to different things
TEST(BSONElementHasher, DifferentOidsAreDifferentHashes) {
- int seed = 0;
+ HashSeed seed = 0;
long long int oidHash =
BSONElementHasher::hash64(BSONObjBuilder().genOID().obj().firstElement(), seed);
@@ -124,7 +124,7 @@ TEST(BSONElementHasher, SubDocumentGroupingHashesDiffer) {
// Testing codeWscope scope squashing
TEST(BSONElementHasher, CodeWithScopeSquashesScopeIntsAndDoubles) {
- int seed = 0;
+ HashSeed seed = 0;
BSONObjBuilder b1;
b1.appendCodeWScope("a", "print('this is some stupid code')", BSON("a" << 3));
@@ -360,5 +360,17 @@ TEST(BSONElementHasher, HashCodeWScope) {
ASSERT_EQUALS(hashIt(o), 501342939894575968LL);
}
+TEST(BSONElementHasher, HashWithNonZeroSeed) {
+ HashSeed seed = 40513;
+
+ BSONObj o = BSON("check" << 42);
+ ASSERT_EQUALS(hashIt(o, seed), 4302929669663179197LL);
+
+ o = BSON("check" << BSON_ARRAY("sunflower"
+ << "sesame"
+ << "mustard"));
+ ASSERT_EQUALS(hashIt(o, seed), -9222615859251096151LL);
+}
+
} // namespace
} // namespace mongo