summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJason Carey <jcarey@argv.me>2018-10-22 16:24:53 -0400
committerJason Carey <jcarey@argv.me>2018-10-23 11:36:02 -0400
commitbb91ac5a2c0b647b80d65d4aaf5a448351b9cc33 (patch)
treefb8b12deb3bc131648e0990cb8b524c0439b3869 /src
parent1d05a7b3852c6898cab4aae738c9920861f073bd (diff)
downloadmongo-bb91ac5a2c0b647b80d65d4aaf5a448351b9cc33.tar.gz
SERVER-37258 Fix UB in MurmurHash3
When we did the data view integration into murmurhash3 (to make it big endian safe) we missed that murmurhash3 uses negative offsets of pointers. Because DataView took size_t for offsetting parameters, this involved wrapping pointers around (adding very large numbers to pointers to produce negative offsets). That's UB, and newer ubsan caught it. This change fixes that by making the DataView offsetting logic ptrdiff_t based. It also introduces test vectors for murmurhash3 that I've used to verify that we haven't changed its output.
Diffstat (limited to 'src')
-rw-r--r--src/mongo/base/SConscript1
-rw-r--r--src/mongo/base/data_cursor.h16
-rw-r--r--src/mongo/base/data_range.h4
-rw-r--r--src/mongo/base/data_range_cursor_test.cpp4
-rw-r--r--src/mongo/base/data_range_test.cpp4
-rw-r--r--src/mongo/base/data_view.h12
-rw-r--r--src/mongo/base/murmurhash3_test.cpp143
-rw-r--r--src/mongo/util/bufreader.h2
8 files changed, 165 insertions, 21 deletions
diff --git a/src/mongo/base/SConscript b/src/mongo/base/SConscript
index 27c0bbf756c..56617fcdd66 100644
--- a/src/mongo/base/SConscript
+++ b/src/mongo/base/SConscript
@@ -42,6 +42,7 @@ env.CppUnitTest('base_test',
'encoded_value_storage_test.cpp',
'initializer_dependency_graph_test.cpp',
'initializer_test.cpp',
+ 'murmurhash3_test.cpp',
'owned_pointer_map_test.cpp',
'owned_pointer_vector_test.cpp',
'parse_number_test.cpp',
diff --git a/src/mongo/base/data_cursor.h b/src/mongo/base/data_cursor.h
index efee59596fd..1c9fcb35e06 100644
--- a/src/mongo/base/data_cursor.h
+++ b/src/mongo/base/data_cursor.h
@@ -45,20 +45,20 @@ public:
ConstDataCursor(ConstDataView::bytes_type bytes) : ConstDataView(bytes) {}
- ConstDataCursor operator+(std::size_t s) const {
+ ConstDataCursor operator+(std::ptrdiff_t s) const {
return view() + s;
}
- ConstDataCursor& operator+=(std::size_t s) {
+ ConstDataCursor& operator+=(std::ptrdiff_t s) {
*this = view() + s;
return *this;
}
- ConstDataCursor operator-(std::size_t s) const {
+ ConstDataCursor operator-(std::ptrdiff_t s) const {
return view() - s;
}
- ConstDataCursor& operator-=(std::size_t s) {
+ ConstDataCursor& operator-=(std::ptrdiff_t s) {
*this = view() - s;
return *this;
}
@@ -121,20 +121,20 @@ public:
return view();
}
- DataCursor operator+(std::size_t s) const {
+ DataCursor operator+(std::ptrdiff_t s) const {
return view() + s;
}
- DataCursor& operator+=(std::size_t s) {
+ DataCursor& operator+=(std::ptrdiff_t s) {
*this = view() + s;
return *this;
}
- DataCursor operator-(std::size_t s) const {
+ DataCursor operator-(std::ptrdiff_t s) const {
return view() - s;
}
- DataCursor& operator-=(std::size_t s) {
+ DataCursor& operator-=(std::ptrdiff_t s) {
*this = view() - s;
return *this;
}
diff --git a/src/mongo/base/data_range.h b/src/mongo/base/data_range.h
index c09ab139cb2..0067cb3b49d 100644
--- a/src/mongo/base/data_range.h
+++ b/src/mongo/base/data_range.h
@@ -73,7 +73,7 @@ public:
}
template <typename T>
- Status read(T* t, size_t offset = 0) const {
+ Status readInto(T* t, size_t offset = 0) const {
if (offset > length()) {
return makeOffsetStatus(offset);
}
@@ -85,7 +85,7 @@ public:
template <typename T>
StatusWith<T> read(std::size_t offset = 0) const {
T t(DataType::defaultConstruct<T>());
- Status s = read(&t, offset);
+ Status s = readInto(&t, offset);
if (s.isOK()) {
return StatusWith<T>(std::move(t));
diff --git a/src/mongo/base/data_range_cursor_test.cpp b/src/mongo/base/data_range_cursor_test.cpp
index d4170aaae44..41cb4c820f2 100644
--- a/src/mongo/base/data_range_cursor_test.cpp
+++ b/src/mongo/base/data_range_cursor_test.cpp
@@ -67,7 +67,7 @@ TEST(DataRangeCursor, ConstDataRangeCursorType) {
ConstDataRangeCursor out(nullptr, nullptr);
- auto inner = cdrc.read(&out);
+ auto inner = cdrc.readInto(&out);
ASSERT_OK(inner);
ASSERT_EQUALS(buf, out.data());
@@ -100,7 +100,7 @@ TEST(DataRangeCursor, DataRangeCursorType) {
DataRangeCursor out(nullptr, nullptr);
- Status status = drc.read(&out);
+ Status status = drc.readInto(&out);
ASSERT_OK(status);
ASSERT_EQUALS(buf, out.data());
diff --git a/src/mongo/base/data_range_test.cpp b/src/mongo/base/data_range_test.cpp
index e1399bbf7c4..9a5f5726714 100644
--- a/src/mongo/base/data_range_test.cpp
+++ b/src/mongo/base/data_range_test.cpp
@@ -66,7 +66,7 @@ TEST(DataRange, ConstDataRangeType) {
ConstDataRange out(nullptr, nullptr);
- auto inner = cdr.read(&out);
+ auto inner = cdr.readInto(&out);
ASSERT_OK(inner);
ASSERT_EQUALS(buf, out.data());
@@ -101,7 +101,7 @@ TEST(DataRange, DataRangeType) {
DataRange out(nullptr, nullptr);
- Status status = dr.read(&out);
+ Status status = dr.readInto(&out);
ASSERT_OK(status);
ASSERT_EQUALS(buf, out.data());
diff --git a/src/mongo/base/data_view.h b/src/mongo/base/data_view.h
index c3d0bbc8634..0247a04db3f 100644
--- a/src/mongo/base/data_view.h
+++ b/src/mongo/base/data_view.h
@@ -45,22 +45,22 @@ public:
ConstDataView(bytes_type bytes) : _bytes(bytes) {}
- bytes_type view(std::size_t offset = 0) const {
+ bytes_type view(std::ptrdiff_t offset = 0) const {
return _bytes + offset;
}
template <typename T>
- const ConstDataView& read(T* t, size_t offset = 0) const {
+ const ConstDataView& readInto(T* t, std::ptrdiff_t offset = 0) const {
DataType::unsafeLoad(t, view(offset), nullptr);
return *this;
}
template <typename T>
- T read(std::size_t offset = 0) const {
+ T read(std::ptrdiff_t offset = 0) const {
T t(DataType::defaultConstruct<T>());
- read(&t, offset);
+ readInto(&t, offset);
return t;
}
@@ -75,14 +75,14 @@ public:
DataView(bytes_type bytes) : ConstDataView(bytes) {}
- bytes_type view(std::size_t offset = 0) const {
+ bytes_type view(std::ptrdiff_t offset = 0) const {
// It is safe to cast away const here since the pointer stored in our base class was
// originally non-const by way of our constructor.
return const_cast<bytes_type>(ConstDataView::view(offset));
}
template <typename T>
- DataView& write(const T& value, std::size_t offset = 0) {
+ DataView& write(const T& value, std::ptrdiff_t offset = 0) {
DataType::unsafeStore(value, view(offset), nullptr);
return *this;
diff --git a/src/mongo/base/murmurhash3_test.cpp b/src/mongo/base/murmurhash3_test.cpp
new file mode 100644
index 00000000000..a2f4219b856
--- /dev/null
+++ b/src/mongo/base/murmurhash3_test.cpp
@@ -0,0 +1,143 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include <array>
+#include <string>
+
+#include <third_party/murmurhash3/MurmurHash3.h>
+
+#include "mongo/unittest/unittest.h"
+
+#include "mongo/base/data_type_endian.h"
+#include "mongo/base/data_view.h"
+
+#define TEST_STRING32(str, seed, expected) ASSERT_EQUALS(compute32(str, seed), expected)
+#define TEST_STRING64(str, seed, a, b) \
+ do { \
+ auto pair = compute128(str, seed); \
+ ASSERT_EQUALS(pair.first, a); \
+ ASSERT_EQUALS(pair.second, b); \
+ } while (0)
+
+namespace mongo {
+namespace {
+
+uint32_t compute32(StringData input, uint32_t seed) {
+ char hash[4];
+ MurmurHash3_x86_32(input.rawData(), input.size(), seed, &hash);
+ return ConstDataView(hash).read<LittleEndian<uint32_t>>();
+}
+
+std::pair<uint64_t, uint64_t> compute128(StringData input, uint32_t seed) {
+ char hash[16];
+ MurmurHash3_x64_128(input.rawData(), input.size(), seed, &hash);
+ return {ConstDataView(hash).read<LittleEndian<uint64_t>>(),
+ ConstDataView(hash).read<LittleEndian<uint64_t>>(8)};
+}
+
+TEST(MurmurHash3, TestVectors32) {
+ TEST_STRING32("", 0, 0ULL);
+
+ TEST_STRING32("", 1ULL, 0x514E28B7ULL);
+ TEST_STRING32("", 0xffffffffULL, 0x81F16F39ULL); // make sure seed value is handled unsigned
+ TEST_STRING32("\0\0\0\0"_sd, 0ULL, 0x2362F9DEULL); // make sure we handle embedded nulls
+
+
+ TEST_STRING32("aaaa", 0x9747b28cULL, 0x5A97808AULL); // one full chunk
+ TEST_STRING32("aaa", 0x9747b28cULL, 0x283E0130ULL); // three characters
+ TEST_STRING32("aa", 0x9747b28cULL, 0x5D211726ULL); // two characters
+ TEST_STRING32("a", 0x9747b28cULL, 0x7FA09EA6ULL); // one character
+
+ // Endian order within the chunks
+ TEST_STRING32("abcd", 0x9747b28cULL, 0xF0478627ULL); // one full chunk
+ TEST_STRING32("abc", 0x9747b28cULL, 0xC84A62DDULL);
+ TEST_STRING32("ab", 0x9747b28cULL, 0x74875592ULL);
+ TEST_STRING32("a", 0x9747b28cULL, 0x7FA09EA6ULL);
+
+ TEST_STRING32("Hello, world!", 0x9747b28cULL, 0x24884CBAULL);
+
+ // Make sure you handle UTF-8 high characters. A bcrypt implementation messed this up
+ TEST_STRING32("ππππππππ", 0x9747b28cULL, 0xD58063C1ULL); // U+03C0: Greek Small Letter Pi
+
+ // String of 256 characters.
+ // Make sure you don't store string lengths in a char, and overflow at 255 bytes (as OpenBSD's
+ // canonical BCrypt implementation did)
+ TEST_STRING32(std::string(256, 'a'), 0x9747b28cULL, 0x37405BDCULL);
+}
+
+
+TEST(MurmurHash3, TestVectors64) {
+ TEST_STRING64("", 0, 0ULL, 0ULL);
+
+ TEST_STRING64("", 1ULL, 5048724184180415669ULL, 5864299874987029891ULL);
+ TEST_STRING64("",
+ 0xffffffffULL,
+ 7706185961851046380ULL,
+ 9616347466054386795ULL); // make sure seed value is handled unsigned
+ TEST_STRING64("\0\0\0\0"_sd,
+ 0ULL,
+ 14961230494313510588ULL,
+ 6383328099726337777ULL); // make sure we handle embedded nulls
+
+
+ TEST_STRING64(
+ "aaaa", 0x9747b28cULL, 13033599803469372400ULL, 11949150323828610719ULL); // one full chunk
+ TEST_STRING64("aaa",
+ 0x9747b28cULL,
+ 10278871841506805355ULL,
+ 17952965428487426844ULL); // three characters
+ TEST_STRING64(
+ "aa", 0x9747b28cULL, 1343929393636293407ULL, 16804672932933964801ULL); // two characters
+ TEST_STRING64(
+ "a", 0x9747b28cULL, 6694838689256856093ULL, 11415968713816993796ULL); // one character
+
+ // Endian order within the chunks
+ TEST_STRING64(
+ "abcd", 0x9747b28cULL, 5310993687375067025ULL, 9979528070057666491ULL); // one full chunk
+ TEST_STRING64("abc", 0x9747b28cULL, 3982135406228655836ULL, 14835035517329147071ULL);
+ TEST_STRING64("ab", 0x9747b28cULL, 9526501539032868875ULL, 9131386788375312171ULL);
+ TEST_STRING64("a", 0x9747b28cULL, 6694838689256856093ULL, 11415968713816993796ULL);
+
+ TEST_STRING64("Hello, world!", 0x9747b28cULL, 17132966038248896814ULL, 17896881015324243642ULL);
+
+ // Make sure you handle UTF-8 high characters. A bcrypt implementation messed this up
+ TEST_STRING64("ππππππππ",
+ 0x9747b28cULL,
+ 10874605236735318559ULL,
+ 17921841414653337979ULL); // U+03C0: Greek Small Letter Pi
+
+ // String of 256 characters.
+ // Make sure you don't store string lengths in a char, and overflow at 255 bytes (as OpenBSD's
+ // canonical BCrypt implementation did)
+ TEST_STRING64(
+ std::string(256, 'a'), 0x9747b28cULL, 557766291455132100ULL, 14184293241195392597ULL);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/util/bufreader.h b/src/mongo/util/bufreader.h
index 74a8b5a319c..c43f3253aeb 100644
--- a/src/mongo/util/bufreader.h
+++ b/src/mongo/util/bufreader.h
@@ -78,7 +78,7 @@ public:
/** read in the object specified, but do not advance buffer pointer */
template <typename T>
void peek(T& t) const {
- uassertStatusOK(ConstDataRange(_pos, _end).read(&t));
+ uassertStatusOK(ConstDataRange(_pos, _end).readInto(&t));
}
/** read in and return an object of the specified type, but do not advance buffer pointer */