summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2015-11-23 16:50:29 -0500
committerJason Carey <jcarey@argv.me>2016-03-02 16:41:53 -0500
commitcdd95deb1b5ebb8dbf9ec76a96ebcf511ba3a14e (patch)
treec071c528787e090fe8095d417272eea71a276e39 /src
parent9405fa9e1e0d5f24cb0bc210969cb35de84ae8fc (diff)
downloadmongo-cdd95deb1b5ebb8dbf9ec76a96ebcf511ba3a14e.tar.gz
SERVER-21828 Murmurhash for bigendian
Make murmurhash output the same on big endian architectures as on little endian.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/base/string_data.cpp11
-rw-r--r--src/mongo/db/fts/fts_index_format_test.cpp7
-rw-r--r--src/third_party/murmurhash3/MurmurHash3.cpp29
-rw-r--r--src/third_party/murmurhash3/SConscript4
4 files changed, 37 insertions, 14 deletions
diff --git a/src/mongo/base/string_data.cpp b/src/mongo/base/string_data.cpp
index f00b185fc29..df7374fcf93 100644
--- a/src/mongo/base/string_data.cpp
+++ b/src/mongo/base/string_data.cpp
@@ -30,6 +30,9 @@
#include <ostream>
#include <third_party/murmurhash3/MurmurHash3.h>
+#include "mongo/base/data_type_endian.h"
+#include "mongo/base/data_view.h"
+
namespace mongo {
namespace {
@@ -39,16 +42,16 @@ size_t murmur3(StringData str);
template <>
size_t murmur3<4>(StringData str) {
- uint32_t hash;
+ char hash[4];
MurmurHash3_x86_32(str.rawData(), str.size(), 0, &hash);
- return hash;
+ return ConstDataView(hash).read<LittleEndian<std::uint32_t>>();
}
template <>
size_t murmur3<8>(StringData str) {
- uint64_t hash[2];
+ char hash[16];
MurmurHash3_x64_128(str.rawData(), str.size(), 0, hash);
- return static_cast<size_t>(hash[0]);
+ return static_cast<size_t>(ConstDataView(hash).read<LittleEndian<std::uint64_t>>());
}
} // namespace
diff --git a/src/mongo/db/fts/fts_index_format_test.cpp b/src/mongo/db/fts/fts_index_format_test.cpp
index 59ea9345fd4..8d19e975cab 100644
--- a/src/mongo/db/fts/fts_index_format_test.cpp
+++ b/src/mongo/db/fts/fts_index_format_test.cpp
@@ -209,7 +209,10 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) {
string longWordCat = longPrefix + "cat";
// "aaa...aaasat"
string longWordSat = longPrefix + "sat";
- string text = mongoutils::str::stream() << longWordCat << " " << longWordSat;
+ // "aaa...aaamongodbfts"
+ string longWordMongoDBFts = longPrefix + "mongodbfts";
+ string text = mongoutils::str::stream() << longWordCat << " " << longWordSat << " "
+ << longWordMongoDBFts;
FTSIndexFormat::getKeys(spec, BSON("data" << text), &keys);
// Hard-coded expected computed keys for future-proofing.
@@ -218,6 +221,8 @@ TEST(FTSIndexFormat, LongWordTextIndexVersion2) {
expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab8e78455d827ebb87cbe87f392bf45f6");
// sat
expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaf2d6f58bb3b81b97e611ae7ccac6dea7");
+ // mongodbfts
+ expectedKeys.insert("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaae1d6b34f5d9c92acecd8cce32f747b27");
assertEqualsIndexKeys(expectedKeys, keys);
}
diff --git a/src/third_party/murmurhash3/MurmurHash3.cpp b/src/third_party/murmurhash3/MurmurHash3.cpp
index 421697842b6..82084e0d38d 100644
--- a/src/third_party/murmurhash3/MurmurHash3.cpp
+++ b/src/third_party/murmurhash3/MurmurHash3.cpp
@@ -9,6 +9,10 @@
#include "MurmurHash3.h"
+#include "mongo/base/data_type_endian.h"
+#include "mongo/base/data_view.h"
+#include "mongo/platform/endian.h"
+
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
@@ -51,15 +55,22 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r )
//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
+//
+// NOTE, MongoDB code: JC -
+// ConstDataView handles the byte swapping and avoids unaligned reads. Note
+// that we need reversed versions because we actually want little endian
+// encoded blocks out of getblock, and our input data is in the native format.
FORCE_INLINE inline uint32_t getblock ( const uint32_t * p, int i )
{
- return p[i];
+ return mongo::ConstDataView(reinterpret_cast<const char*>(p))
+ .read<mongo::ReverseLittleEndian<uint32_t>>(i * sizeof(uint32_t));
}
FORCE_INLINE inline uint64_t getblock ( const uint64_t * p, int i )
{
- return p[i];
+ return mongo::ConstDataView(reinterpret_cast<const char*>(p))
+ .read<mongo::ReverseLittleEndian<uint64_t>>(i * sizeof(uint64_t));
}
//-----------------------------------------------------------------------------
@@ -142,7 +153,7 @@ void MurmurHash3_x86_32 ( const void * key, int len,
h1 = fmix(h1);
- *(uint32_t*)out = h1;
+ *(uint32_t*)out = mongo::endian::nativeToLittle(h1);
}
//-----------------------------------------------------------------------------
@@ -244,10 +255,10 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
- ((uint32_t*)out)[0] = h1;
- ((uint32_t*)out)[1] = h2;
- ((uint32_t*)out)[2] = h3;
- ((uint32_t*)out)[3] = h4;
+ ((uint32_t*)out)[0] = mongo::endian::nativeToLittle(h1);
+ ((uint32_t*)out)[1] = mongo::endian::nativeToLittle(h2);
+ ((uint32_t*)out)[2] = mongo::endian::nativeToLittle(h3);
+ ((uint32_t*)out)[3] = mongo::endian::nativeToLittle(h4);
}
//-----------------------------------------------------------------------------
@@ -327,8 +338,8 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
h1 += h2;
h2 += h1;
- ((uint64_t*)out)[0] = h1;
- ((uint64_t*)out)[1] = h2;
+ ((uint64_t*)out)[0] = mongo::endian::nativeToLittle(h1);
+ ((uint64_t*)out)[1] = mongo::endian::nativeToLittle(h2);
}
//-----------------------------------------------------------------------------
diff --git a/src/third_party/murmurhash3/SConscript b/src/third_party/murmurhash3/SConscript
index daab692df41..058701e9da8 100644
--- a/src/third_party/murmurhash3/SConscript
+++ b/src/third_party/murmurhash3/SConscript
@@ -1,2 +1,6 @@
Import("env")
+
+env.InjectThirdPartyIncludePaths(libraries=['boost'])
+
+env.Append(CPPPATH=['#src', '$BUILD_DIR'])
env.Library("murmurhash3", ["MurmurHash3.cpp"])