diff options
author | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2015-11-23 16:50:29 -0500 |
---|---|---|
committer | Jason Carey <jcarey@argv.me> | 2016-03-02 16:41:53 -0500 |
commit | cdd95deb1b5ebb8dbf9ec76a96ebcf511ba3a14e (patch) | |
tree | c071c528787e090fe8095d417272eea71a276e39 /src | |
parent | 9405fa9e1e0d5f24cb0bc210969cb35de84ae8fc (diff) | |
download | mongo-cdd95deb1b5ebb8dbf9ec76a96ebcf511ba3a14e.tar.gz |
SERVER-21828 Murmurhash for bigendian
Make murmurhash output the same on big endian architectures as on little
endian.
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/base/string_data.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/fts/fts_index_format_test.cpp | 7 | ||||
-rw-r--r-- | src/third_party/murmurhash3/MurmurHash3.cpp | 29 | ||||
-rw-r--r-- | src/third_party/murmurhash3/SConscript | 4 |
4 files changed, 37 insertions, 14 deletions
diff --git a/src/mongo/base/string_data.cpp b/src/mongo/base/string_data.cpp index f00b185fc29..df7374fcf93 100644 --- a/src/mongo/base/string_data.cpp +++ b/src/mongo/base/string_data.cpp @@ -30,6 +30,9 @@ #include <ostream> #include <third_party/murmurhash3/MurmurHash3.h> +#include "mongo/base/data_type_endian.h" +#include "mongo/base/data_view.h" + namespace mongo { namespace { @@ -39,16 +42,16 @@ size_t murmur3(StringData str); template <> size_t murmur3<4>(StringData str) { - uint32_t hash; + char hash[4]; MurmurHash3_x86_32(str.rawData(), str.size(), 0, &hash); - return hash; + return ConstDataView(hash).read<LittleEndian<std::uint32_t>>(); } template <> size_t murmur3<8>(StringData str) { - uint64_t hash[2]; + char hash[16]; MurmurHash3_x64_128(str.rawData(), str.size(), 0, hash); - return static_cast<size_t>(hash[0]); + return static_cast<size_t>(ConstDataView(hash).read<LittleEndian<std::uint64_t>>()); } } // namespace diff --git a/src/mongo/db/fts/fts_index_format_test.cpp b/src/mongo/db/fts/fts_index_format_test.cpp index 59ea9345fd4..8d19e975cab 100644 --- a/src/mongo/db/fts/fts_index_format_test.cpp +++ b/src/mongo/db/fts/fts_index_format_test.cpp @@ -209,7 +209,10 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) { string longWordCat = longPrefix + "cat"; // "aaa...aaasat" string longWordSat = longPrefix + "sat"; - string text = mongoutils::str::stream() << longWordCat << " " << longWordSat; + // "aaa...aaamongodbfts" + string longWordMongoDBFts = longPrefix + "mongodbfts"; + string text = mongoutils::str::stream() << longWordCat << " " << longWordSat << " " + << longWordMongoDBFts; FTSIndexFormat::getKeys(spec, BSON("data" << text), &keys); // Hard-coded expected computed keys for future-proofing. @@ -218,6 +221,8 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) { expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab8e78455d827ebb87cbe87f392bf45f6"); // sat expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaf2d6f58bb3b81b97e611ae7ccac6dea7"); + // mongodbfts + expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaae1d6b34f5d9c92acecd8cce32f747b27"); assertEqualsIndexKeys(expectedKeys, keys); } diff --git a/src/third_party/murmurhash3/MurmurHash3.cpp b/src/third_party/murmurhash3/MurmurHash3.cpp index 421697842b6..82084e0d38d 100644 --- a/src/third_party/murmurhash3/MurmurHash3.cpp +++ b/src/third_party/murmurhash3/MurmurHash3.cpp @@ -9,6 +9,10 @@ #include "MurmurHash3.h"
+#include "mongo/base/data_type_endian.h"
+#include "mongo/base/data_view.h"
+#include "mongo/platform/endian.h"
+
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@@ -51,15 +55,22 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r ) //-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
+//
+// NOTE, MongoDB code: JC -
+// ConstDataView handles the byte swapping and avoids unaligned reads. Note
+// that we need reversed versions because we actually want little endian
+// encoded blocks out of getblock, and our input data is in the native format.
FORCE_INLINE inline uint32_t getblock ( const uint32_t * p, int i )
{
- return p[i];
+ return mongo::ConstDataView(reinterpret_cast<const char*>(p))
+ .read<mongo::ReverseLittleEndian<uint32_t>>(i * sizeof(uint32_t));
}
FORCE_INLINE inline uint64_t getblock ( const uint64_t * p, int i )
{
- return p[i];
+ return mongo::ConstDataView(reinterpret_cast<const char*>(p))
+ .read<mongo::ReverseLittleEndian<uint64_t>>(i * sizeof(uint64_t));
}
//-----------------------------------------------------------------------------
@@ -142,7 +153,7 @@ void MurmurHash3_x86_32 ( const void * key, int len, h1 = fmix(h1);
- *(uint32_t*)out = h1;
+ *(uint32_t*)out = mongo::endian::nativeToLittle(h1);
}
//-----------------------------------------------------------------------------
@@ -244,10 +255,10 @@ void MurmurHash3_x86_128 ( const void * key, const int len, h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
- ((uint32_t*)out)[0] = h1;
- ((uint32_t*)out)[1] = h2;
- ((uint32_t*)out)[2] = h3;
- ((uint32_t*)out)[3] = h4;
+ ((uint32_t*)out)[0] = mongo::endian::nativeToLittle(h1);
+ ((uint32_t*)out)[1] = mongo::endian::nativeToLittle(h2);
+ ((uint32_t*)out)[2] = mongo::endian::nativeToLittle(h3);
+ ((uint32_t*)out)[3] = mongo::endian::nativeToLittle(h4);
}
//-----------------------------------------------------------------------------
@@ -327,8 +338,8 @@ void MurmurHash3_x64_128 ( const void * key, const int len, h1 += h2;
h2 += h1;
- ((uint64_t*)out)[0] = h1;
- ((uint64_t*)out)[1] = h2;
+ ((uint64_t*)out)[0] = mongo::endian::nativeToLittle(h1);
+ ((uint64_t*)out)[1] = mongo::endian::nativeToLittle(h2);
}
//-----------------------------------------------------------------------------
diff --git a/src/third_party/murmurhash3/SConscript b/src/third_party/murmurhash3/SConscript index daab692df41..058701e9da8 100644 --- a/src/third_party/murmurhash3/SConscript +++ b/src/third_party/murmurhash3/SConscript @@ -1,2 +1,6 @@ Import("env") + +env.InjectThirdPartyIncludePaths(libraries=['boost']) + +env.Append(CPPPATH=['#src', '$BUILD_DIR']) env.Library("murmurhash3", ["MurmurHash3.cpp"]) |