summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2016-07-15 18:15:12 -0400
committerDavid Storch <david.storch@10gen.com>2016-07-19 22:35:21 -0400
commit7e986cc77f121e3af9a5f1217e89913745fc07f9 (patch)
treee29c4ec325199937b2723a500a462e6c125616ea /src/mongo/db
parent0b32158c9cb44b31078ca923ef5c8fff755c952a (diff)
downloadmongo-7e986cc77f121e3af9a5f1217e89913745fc07f9.tar.gz
SERVER-23990 add ValueComparator::Hasher for collation-aware Value hashing
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/pipeline/document.cpp5
-rw-r--r--src/mongo/db/pipeline/document.h3
-rw-r--r--src/mongo/db/pipeline/document_source_group.cpp4
-rw-r--r--src/mongo/db/pipeline/document_value_test.cpp6
-rw-r--r--src/mongo/db/pipeline/lookup_set_cache.h2
-rw-r--r--src/mongo/db/pipeline/value.cpp20
-rw-r--r--src/mongo/db/pipeline/value.h23
-rw-r--r--src/mongo/db/pipeline/value_comparator.h49
-rw-r--r--src/mongo/db/pipeline/value_comparator_test.cpp50
-rw-r--r--src/mongo/db/query/collation/collator_interface.cpp4
-rw-r--r--src/mongo/db/query/collation/collator_interface.h2
11 files changed, 125 insertions, 43 deletions
diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp
index df2eb8facdf..62ac9c1ac32 100644
--- a/src/mongo/db/pipeline/document.cpp
+++ b/src/mongo/db/pipeline/document.cpp
@@ -373,11 +373,12 @@ size_t Document::getApproximateSize() const {
return size;
}
-void Document::hash_combine(size_t& seed) const {
+void Document::hash_combine(size_t& seed,
+ const StringData::ComparatorInterface* stringComparator) const {
for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
StringData name = it->nameSD();
boost::hash_range(seed, name.rawData(), name.rawData() + name.size());
- it->val.hash_combine(seed);
+ it->val.hash_combine(seed, stringComparator);
}
}
diff --git a/src/mongo/db/pipeline/document.h b/src/mongo/db/pipeline/document.h
index 0a0220964bd..288036a1ba4 100644
--- a/src/mongo/db/pipeline/document.h
+++ b/src/mongo/db/pipeline/document.h
@@ -34,6 +34,7 @@
#include <boost/intrusive_ptr.hpp>
#include "mongo/base/string_data.h"
+#include "mongo/base/string_data_comparator_interface.h"
#include "mongo/bson/util/builder.h"
namespace mongo {
@@ -188,7 +189,7 @@ public:
* Meant to be used to create composite hashes suitable for
* hashed container classes such as unordered_map.
*/
- void hash_combine(size_t& seed) const;
+ void hash_combine(size_t& seed, const StringData::ComparatorInterface* stringComparator) const;
/**
* Add this document to the BSONObj under construction with the given BSONObjBuilder.
diff --git a/src/mongo/db/pipeline/document_source_group.cpp b/src/mongo/db/pipeline/document_source_group.cpp
index ed7ab95e575..5b97e76d370 100644
--- a/src/mongo/db/pipeline/document_source_group.cpp
+++ b/src/mongo/db/pipeline/document_source_group.cpp
@@ -156,7 +156,7 @@ boost::optional<Document> DocumentSourceGroup::getNextStreaming() {
void DocumentSourceGroup::dispose() {
// Free our resources.
- GroupsMap().swap(*_groups);
+ _groups = pExpCtx->getValueComparator().makeUnorderedValueMap<Accumulators>();
_sorterIterator.reset();
// Make us look done.
@@ -550,7 +550,7 @@ void DocumentSourceGroup::initialize() {
}
// We won't be using groups again so free its memory.
- GroupsMap().swap(*_groups);
+ _groups = pExpCtx->getValueComparator().makeUnorderedValueMap<Accumulators>();
_sorterIterator.reset(
Sorter<Value, Value>::Iterator::merge(sortedFiles, SortOptions(), SorterComparator()));
diff --git a/src/mongo/db/pipeline/document_value_test.cpp b/src/mongo/db/pipeline/document_value_test.cpp
index b9e942f5659..369f988eb6d 100644
--- a/src/mongo/db/pipeline/document_value_test.cpp
+++ b/src/mongo/db/pipeline/document_value_test.cpp
@@ -257,7 +257,8 @@ public:
}
size_t hash(const BSONObj& obj) {
size_t seed = 0x106e1e1;
- Document(obj).hash_combine(seed);
+ const StringData::ComparatorInterface* stringComparator = nullptr;
+ Document(obj).hash_combine(seed, stringComparator);
return seed;
}
};
@@ -1613,7 +1614,8 @@ private:
}
size_t hash(const Value& v) {
size_t seed = 0xf00ba6;
- v.hash_combine(seed);
+ const StringData::ComparatorInterface* stringComparator = nullptr;
+ v.hash_combine(seed, stringComparator);
return seed;
}
};
diff --git a/src/mongo/db/pipeline/lookup_set_cache.h b/src/mongo/db/pipeline/lookup_set_cache.h
index 7ca74f90964..2c4bf5869f1 100644
--- a/src/mongo/db/pipeline/lookup_set_cache.h
+++ b/src/mongo/db/pipeline/lookup_set_cache.h
@@ -175,7 +175,7 @@ private:
multi_index_container<Cached,
indexed_by<sequenced<>,
hashed_unique<member<Cached, Value, &Cached::first>,
- Value::Hash,
+ ValueComparator::Hasher,
ValueComparator::EqualTo>>>;
IndexedContainer _container;
diff --git a/src/mongo/db/pipeline/value.cpp b/src/mongo/db/pipeline/value.cpp
index d5e6db63d6b..860c4269ba7 100644
--- a/src/mongo/db/pipeline/value.cpp
+++ b/src/mongo/db/pipeline/value.cpp
@@ -813,7 +813,8 @@ int Value::compare(const Value& rL,
verify(false);
}
-void Value::hash_combine(size_t& seed) const {
+void Value::hash_combine(size_t& seed,
+ const StringData::ComparatorInterface* stringComparator) const {
BSONType type = getType();
boost::hash_combine(seed, canonicalizeBSONType(type));
@@ -881,21 +882,30 @@ void Value::hash_combine(size_t& seed) const {
break;
case Code:
- case Symbol:
- case String: {
+ case Symbol: {
StringData sd = getStringData();
MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
break;
}
+ case String: {
+ StringData sd = getStringData();
+ if (stringComparator) {
+ stringComparator->hash_combine(seed, sd);
+ } else {
+ MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed);
+ }
+ break;
+ }
+
case Object:
- getDocument().hash_combine(seed);
+ getDocument().hash_combine(seed, stringComparator);
break;
case Array: {
const vector<Value>& vec = getArray();
for (size_t i = 0; i < vec.size(); i++)
- vec[i].hash_combine(seed);
+ vec[i].hash_combine(seed, stringComparator);
break;
}
diff --git a/src/mongo/db/pipeline/value.h b/src/mongo/db/pipeline/value.h
index e151e58c813..6914ff10014 100644
--- a/src/mongo/db/pipeline/value.h
+++ b/src/mongo/db/pipeline/value.h
@@ -290,17 +290,16 @@ public:
/// Get the approximate memory size of the value, in bytes. Includes sizeof(Value)
size_t getApproximateSize() const;
- /** Calculate a hash value.
+ /**
+ * Calculate a hash value.
+ *
+ * Meant to be used to create composite hashes suitable for hashed container classes such as
+ * unordered_map<>.
*
- * Meant to be used to create composite hashes suitable for
- * hashed container classes such as unordered_map<>.
+ * Most callers should prefer the utilities in ValueComparator for hashing and creating function
+ * objects for computing the hash. See value_comparator.h.
*/
- void hash_combine(size_t& seed) const;
-
- /// struct Hash is defined to enable the use of Values as keys in unordered_map.
- struct Hash : std::unary_function<const Value&, size_t> {
- size_t operator()(const Value& rV) const;
- };
+ void hash_combine(size_t& seed, const StringData::ComparatorInterface* stringComparator) const;
/// Call this after memcpying to update ref counts if needed
void memcpyed() const {
@@ -361,12 +360,6 @@ inline size_t Value::getArrayLength() const {
return getArray().size();
}
-inline size_t Value::Hash::operator()(const Value& v) const {
- size_t seed = 0xf0afbeef;
- v.hash_combine(seed);
- return seed;
-}
-
inline StringData Value::getStringData() const {
return _storage.getString();
}
diff --git a/src/mongo/db/pipeline/value_comparator.h b/src/mongo/db/pipeline/value_comparator.h
index cbf423ccf92..5479ef149eb 100644
--- a/src/mongo/db/pipeline/value_comparator.h
+++ b/src/mongo/db/pipeline/value_comparator.h
@@ -76,6 +76,25 @@ public:
};
/**
+ * Functor for computing the hash of a Value, compatible for use with unordered STL containers.
+ *
+ * TODO SERVER-23349: Remove the no-arguments constructor.
+ */
+ class Hasher {
+ public:
+ Hasher() = default;
+
+ explicit Hasher(const ValueComparator* comparator) : _comparator(comparator) {}
+
+ size_t operator()(const Value& val) const {
+ return _comparator ? _comparator->hash(val) : ValueComparator().hash(val);
+ }
+
+ private:
+ const ValueComparator* _comparator = nullptr;
+ };
+
+ /**
* Constructs a value comparator with simple comparison semantics.
*/
ValueComparator() = default;
@@ -95,6 +114,16 @@ public:
}
/**
+ * Computes a hash of 'val' since that Values which compare equal under this comparator also
+ * have equal hashes.
+ */
+ size_t hash(const Value& val) const {
+ size_t seed = 0xf0afbeef;
+ val.hash_combine(seed, _stringComparator);
+ return seed;
+ }
+
+ /**
* Evaluates a deferred comparison object that was generated by invoking one of the comparison
* operators on the Value class.
*/
@@ -127,12 +156,9 @@ public:
/**
* Construct an empty unordered set of Value whose equivalence classes are given by this
* comparator. This comparator must outlive the returned set.
- *
- * TODO SERVER-23990: Make Value::Hash use the collation. The returned set won't be correctly
- * collation-aware until this work is done.
*/
- std::unordered_set<Value, Value::Hash, EqualTo> makeUnorderedValueSet() const {
- return std::unordered_set<Value, Value::Hash, EqualTo>(0, Value::Hash(), EqualTo(this));
+ std::unordered_set<Value, Hasher, EqualTo> makeUnorderedValueSet() const {
+ return std::unordered_set<Value, Hasher, EqualTo>(0, Hasher(this), EqualTo(this));
}
/**
@@ -147,13 +173,10 @@ public:
/**
* Construct an empty unordered map from Value to type T whose equivalence classes are given by
* this comparator. This comparator must outlive the returned set.
- *
- * TODO SERVER-23990: Make Value::Hash use the collation. The returned map won't be correctly
- * collation-aware until this work is done.
*/
template <typename T>
- std::unordered_map<Value, T, Value::Hash, EqualTo> makeUnorderedValueMap() const {
- return std::unordered_map<Value, T, Value::Hash, EqualTo>(0, Value::Hash(), EqualTo(this));
+ std::unordered_map<Value, T, Hasher, EqualTo> makeUnorderedValueMap() const {
+ return std::unordered_map<Value, T, Hasher, EqualTo>(0, Hasher(this), EqualTo(this));
}
private:
@@ -166,12 +189,14 @@ private:
using ValueSet = std::set<Value, ValueComparator::LessThan>;
-using ValueUnorderedSet = std::unordered_set<Value, Value::Hash, ValueComparator::EqualTo>;
+using ValueUnorderedSet =
+ std::unordered_set<Value, ValueComparator::Hasher, ValueComparator::EqualTo>;
template <typename T>
using ValueMap = std::map<Value, T, ValueComparator::LessThan>;
template <typename T>
-using ValueUnorderedMap = std::unordered_map<Value, T, Value::Hash, ValueComparator::EqualTo>;
+using ValueUnorderedMap =
+ std::unordered_map<Value, T, ValueComparator::Hasher, ValueComparator::EqualTo>;
} // namespace mongo
diff --git a/src/mongo/db/pipeline/value_comparator_test.cpp b/src/mongo/db/pipeline/value_comparator_test.cpp
index 05386061680..afa364b11a8 100644
--- a/src/mongo/db/pipeline/value_comparator_test.cpp
+++ b/src/mongo/db/pipeline/value_comparator_test.cpp
@@ -219,5 +219,55 @@ TEST(ValueComparatorTest, NestedArrayEqualityRespectsCollator) {
ASSERT_FALSE(ValueComparator(&collator).evaluate(val3 == val1));
}
+TEST(ValueComparatorTest, ValueHasherRespectsCollator) {
+ CollatorInterfaceMock toLowerCollator(CollatorInterfaceMock::MockType::kToLowerString);
+ ValueComparator valueCmp(&toLowerCollator);
+ ASSERT_EQ(valueCmp.hash(Value("foo")), valueCmp.hash(Value("FOO")));
+ ASSERT_NE(valueCmp.hash(Value("foo")), valueCmp.hash(Value("FOOz")));
+}
+
+TEST(ValueComparatorTest, ValueHasherRespectsCollatorWithNestedObjects) {
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kAlwaysEqual);
+ ValueComparator valueCmp(&collator);
+ Value val1(Document{{"foo", "abc"}});
+ Value val2(Document{{"foo", "def"}});
+ ASSERT_EQ(valueCmp.hash(val1), valueCmp.hash(val2));
+}
+
+TEST(ValueComparatorTest, ValueHasherRespectsCollatorWithNestedArrays) {
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kAlwaysEqual);
+ ValueComparator valueCmp(&collator);
+ Value val1(std::vector<Value>{Value("a"), Value("b")});
+ Value val2(std::vector<Value>{Value("c"), Value("d")});
+ Value val3(std::vector<Value>{Value("c"), Value("d"), Value("e")});
+ ASSERT_EQ(valueCmp.hash(val1), valueCmp.hash(val2));
+ ASSERT_NE(valueCmp.hash(val1), valueCmp.hash(val3));
+ ASSERT_NE(valueCmp.hash(val2), valueCmp.hash(val3));
+}
+
+TEST(ValueComparatorTest, UnorderedSetOfValueRespectsCollation) {
+ CollatorInterfaceMock toLowerCollator(CollatorInterfaceMock::MockType::kToLowerString);
+ ValueComparator valueCmp(&toLowerCollator);
+ auto set = valueCmp.makeUnorderedValueSet();
+ ASSERT_TRUE(set.insert(Value("foo")).second);
+ ASSERT_FALSE(set.insert(Value("FOO")).second);
+ ASSERT_TRUE(set.insert(Value("FOOz")).second);
+ ASSERT_EQ(set.size(), 2U);
+ ASSERT_EQ(set.count(Value("FoO")), 1U);
+ ASSERT_EQ(set.count(Value("fooZ")), 1U);
+}
+
+TEST(ValueComparatorTest, UnorderedMapOfValueRespectsCollation) {
+ CollatorInterfaceMock toLowerCollator(CollatorInterfaceMock::MockType::kToLowerString);
+ ValueComparator valueCmp(&toLowerCollator);
+ auto map = valueCmp.makeUnorderedValueMap<int>();
+ map[Value("foo")] = 1;
+ map[Value("FOO")] = 2;
+ map[Value("FOOz")] = 3;
+ ASSERT_EQ(map.size(), 2U);
+ ASSERT_EQ(map[Value("FoO")], 2);
+ ASSERT_EQ(map[Value("fooZ")], 3);
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/query/collation/collator_interface.cpp b/src/mongo/db/query/collation/collator_interface.cpp
index 57ea9f55c75..76692e7af51 100644
--- a/src/mongo/db/query/collation/collator_interface.cpp
+++ b/src/mongo/db/query/collation/collator_interface.cpp
@@ -34,9 +34,9 @@
namespace mongo {
-size_t CollatorInterface::hash(StringData stringToHash) const {
+void CollatorInterface::hash_combine(size_t& seed, StringData stringToHash) const {
auto comparisonKey = getComparisonKey(stringToHash);
- return SimpleStringDataComparator::kInstance.hash(comparisonKey.getKeyData());
+ SimpleStringDataComparator::kInstance.hash_combine(seed, comparisonKey.getKeyData());
}
} // namespace mongo
diff --git a/src/mongo/db/query/collation/collator_interface.h b/src/mongo/db/query/collation/collator_interface.h
index 05e4570f97b..76f236c7413 100644
--- a/src/mongo/db/query/collation/collator_interface.h
+++ b/src/mongo/db/query/collation/collator_interface.h
@@ -104,7 +104,7 @@ public:
* Hashes the string such that strings which are equal under this collation also have equal
* hashes.
*/
- size_t hash(StringData stringToHash) const final;
+ void hash_combine(size_t& seed, StringData stringToHash) const final;
/**
* Returns the comparison key for 'stringData', according to this collation. See ComparisonKey's