diff options
author | David Storch <david.storch@10gen.com> | 2016-07-15 18:15:12 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2016-07-19 22:35:21 -0400 |
commit | 7e986cc77f121e3af9a5f1217e89913745fc07f9 (patch) | |
tree | e29c4ec325199937b2723a500a462e6c125616ea /src/mongo/db | |
parent | 0b32158c9cb44b31078ca923ef5c8fff755c952a (diff) | |
download | mongo-7e986cc77f121e3af9a5f1217e89913745fc07f9.tar.gz |
SERVER-23990 add ValueComparator::Hasher for collation-aware Value hashing
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/pipeline/document.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document.h | 3 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_group.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_value_test.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/pipeline/lookup_set_cache.h | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value.h | 23 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value_comparator.h | 49 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value_comparator_test.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/query/collation/collator_interface.h | 2 |
11 files changed, 125 insertions, 43 deletions
diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp index df2eb8facdf..62ac9c1ac32 100644 --- a/src/mongo/db/pipeline/document.cpp +++ b/src/mongo/db/pipeline/document.cpp @@ -373,11 +373,12 @@ size_t Document::getApproximateSize() const { return size; } -void Document::hash_combine(size_t& seed) const { +void Document::hash_combine(size_t& seed, + const StringData::ComparatorInterface* stringComparator) const { for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { StringData name = it->nameSD(); boost::hash_range(seed, name.rawData(), name.rawData() + name.size()); - it->val.hash_combine(seed); + it->val.hash_combine(seed, stringComparator); } } diff --git a/src/mongo/db/pipeline/document.h b/src/mongo/db/pipeline/document.h index 0a0220964bd..288036a1ba4 100644 --- a/src/mongo/db/pipeline/document.h +++ b/src/mongo/db/pipeline/document.h @@ -34,6 +34,7 @@ #include <boost/intrusive_ptr.hpp> #include "mongo/base/string_data.h" +#include "mongo/base/string_data_comparator_interface.h" #include "mongo/bson/util/builder.h" namespace mongo { @@ -188,7 +189,7 @@ public: * Meant to be used to create composite hashes suitable for * hashed container classes such as unordered_map. */ - void hash_combine(size_t& seed) const; + void hash_combine(size_t& seed, const StringData::ComparatorInterface* stringComparator) const; /** * Add this document to the BSONObj under construction with the given BSONObjBuilder. diff --git a/src/mongo/db/pipeline/document_source_group.cpp b/src/mongo/db/pipeline/document_source_group.cpp index ed7ab95e575..5b97e76d370 100644 --- a/src/mongo/db/pipeline/document_source_group.cpp +++ b/src/mongo/db/pipeline/document_source_group.cpp @@ -156,7 +156,7 @@ boost::optional<Document> DocumentSourceGroup::getNextStreaming() { void DocumentSourceGroup::dispose() { // Free our resources. - GroupsMap().swap(*_groups); + _groups = pExpCtx->getValueComparator().makeUnorderedValueMap<Accumulators>(); _sorterIterator.reset(); // Make us look done. @@ -550,7 +550,7 @@ void DocumentSourceGroup::initialize() { } // We won't be using groups again so free its memory. - GroupsMap().swap(*_groups); + _groups = pExpCtx->getValueComparator().makeUnorderedValueMap<Accumulators>(); _sorterIterator.reset( Sorter<Value, Value>::Iterator::merge(sortedFiles, SortOptions(), SorterComparator())); diff --git a/src/mongo/db/pipeline/document_value_test.cpp b/src/mongo/db/pipeline/document_value_test.cpp index b9e942f5659..369f988eb6d 100644 --- a/src/mongo/db/pipeline/document_value_test.cpp +++ b/src/mongo/db/pipeline/document_value_test.cpp @@ -257,7 +257,8 @@ public: } size_t hash(const BSONObj& obj) { size_t seed = 0x106e1e1; - Document(obj).hash_combine(seed); + const StringData::ComparatorInterface* stringComparator = nullptr; + Document(obj).hash_combine(seed, stringComparator); return seed; } }; @@ -1613,7 +1614,8 @@ private: } size_t hash(const Value& v) { size_t seed = 0xf00ba6; - v.hash_combine(seed); + const StringData::ComparatorInterface* stringComparator = nullptr; + v.hash_combine(seed, stringComparator); return seed; } }; diff --git a/src/mongo/db/pipeline/lookup_set_cache.h b/src/mongo/db/pipeline/lookup_set_cache.h index 7ca74f90964..2c4bf5869f1 100644 --- a/src/mongo/db/pipeline/lookup_set_cache.h +++ b/src/mongo/db/pipeline/lookup_set_cache.h @@ -175,7 +175,7 @@ private: multi_index_container<Cached, indexed_by<sequenced<>, hashed_unique<member<Cached, Value, &Cached::first>, - Value::Hash, + ValueComparator::Hasher, ValueComparator::EqualTo>>>; IndexedContainer _container; diff --git a/src/mongo/db/pipeline/value.cpp b/src/mongo/db/pipeline/value.cpp index d5e6db63d6b..860c4269ba7 100644 --- a/src/mongo/db/pipeline/value.cpp +++ b/src/mongo/db/pipeline/value.cpp @@ -813,7 +813,8 @@ int Value::compare(const Value& rL, verify(false); } -void Value::hash_combine(size_t& seed) const { +void Value::hash_combine(size_t& seed, + const StringData::ComparatorInterface* stringComparator) const { BSONType type = getType(); boost::hash_combine(seed, canonicalizeBSONType(type)); @@ -881,21 +882,30 @@ void Value::hash_combine(size_t& seed) const { break; case Code: - case Symbol: - case String: { + case Symbol: { StringData sd = getStringData(); MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed); break; } + case String: { + StringData sd = getStringData(); + if (stringComparator) { + stringComparator->hash_combine(seed, sd); + } else { + MurmurHash3_x86_32(sd.rawData(), sd.size(), seed, &seed); + } + break; + } + case Object: - getDocument().hash_combine(seed); + getDocument().hash_combine(seed, stringComparator); break; case Array: { const vector<Value>& vec = getArray(); for (size_t i = 0; i < vec.size(); i++) - vec[i].hash_combine(seed); + vec[i].hash_combine(seed, stringComparator); break; } diff --git a/src/mongo/db/pipeline/value.h b/src/mongo/db/pipeline/value.h index e151e58c813..6914ff10014 100644 --- a/src/mongo/db/pipeline/value.h +++ b/src/mongo/db/pipeline/value.h @@ -290,17 +290,16 @@ public: /// Get the approximate memory size of the value, in bytes. Includes sizeof(Value) size_t getApproximateSize() const; - /** Calculate a hash value. + /** + * Calculate a hash value. + * + * Meant to be used to create composite hashes suitable for hashed container classes such as + * unordered_map<>. * - * Meant to be used to create composite hashes suitable for - * hashed container classes such as unordered_map<>. + * Most callers should prefer the utilities in ValueComparator for hashing and creating function + * objects for computing the hash. See value_comparator.h. */ - void hash_combine(size_t& seed) const; - - /// struct Hash is defined to enable the use of Values as keys in unordered_map. - struct Hash : std::unary_function<const Value&, size_t> { - size_t operator()(const Value& rV) const; - }; + void hash_combine(size_t& seed, const StringData::ComparatorInterface* stringComparator) const; /// Call this after memcpying to update ref counts if needed void memcpyed() const { @@ -361,12 +360,6 @@ inline size_t Value::getArrayLength() const { return getArray().size(); } -inline size_t Value::Hash::operator()(const Value& v) const { - size_t seed = 0xf0afbeef; - v.hash_combine(seed); - return seed; -} - inline StringData Value::getStringData() const { return _storage.getString(); } diff --git a/src/mongo/db/pipeline/value_comparator.h b/src/mongo/db/pipeline/value_comparator.h index cbf423ccf92..5479ef149eb 100644 --- a/src/mongo/db/pipeline/value_comparator.h +++ b/src/mongo/db/pipeline/value_comparator.h @@ -76,6 +76,25 @@ public: }; /** + * Functor for computing the hash of a Value, compatible for use with unordered STL containers. + * + * TODO SERVER-23349: Remove the no-arguments constructor. + */ + class Hasher { + public: + Hasher() = default; + + explicit Hasher(const ValueComparator* comparator) : _comparator(comparator) {} + + size_t operator()(const Value& val) const { + return _comparator ? _comparator->hash(val) : ValueComparator().hash(val); + } + + private: + const ValueComparator* _comparator = nullptr; + }; + + /** * Constructs a value comparator with simple comparison semantics. */ ValueComparator() = default; @@ -95,6 +114,16 @@ public: } /** + * Computes a hash of 'val' since that Values which compare equal under this comparator also + * have equal hashes. + */ + size_t hash(const Value& val) const { + size_t seed = 0xf0afbeef; + val.hash_combine(seed, _stringComparator); + return seed; + } + + /** * Evaluates a deferred comparison object that was generated by invoking one of the comparison * operators on the Value class. */ @@ -127,12 +156,9 @@ public: /** * Construct an empty unordered set of Value whose equivalence classes are given by this * comparator. This comparator must outlive the returned set. - * - * TODO SERVER-23990: Make Value::Hash use the collation. The returned set won't be correctly - * collation-aware until this work is done. */ - std::unordered_set<Value, Value::Hash, EqualTo> makeUnorderedValueSet() const { - return std::unordered_set<Value, Value::Hash, EqualTo>(0, Value::Hash(), EqualTo(this)); + std::unordered_set<Value, Hasher, EqualTo> makeUnorderedValueSet() const { + return std::unordered_set<Value, Hasher, EqualTo>(0, Hasher(this), EqualTo(this)); } /** @@ -147,13 +173,10 @@ public: /** * Construct an empty unordered map from Value to type T whose equivalence classes are given by * this comparator. This comparator must outlive the returned set. - * - * TODO SERVER-23990: Make Value::Hash use the collation. The returned map won't be correctly - * collation-aware until this work is done. */ template <typename T> - std::unordered_map<Value, T, Value::Hash, EqualTo> makeUnorderedValueMap() const { - return std::unordered_map<Value, T, Value::Hash, EqualTo>(0, Value::Hash(), EqualTo(this)); + std::unordered_map<Value, T, Hasher, EqualTo> makeUnorderedValueMap() const { + return std::unordered_map<Value, T, Hasher, EqualTo>(0, Hasher(this), EqualTo(this)); } private: @@ -166,12 +189,14 @@ private: using ValueSet = std::set<Value, ValueComparator::LessThan>; -using ValueUnorderedSet = std::unordered_set<Value, Value::Hash, ValueComparator::EqualTo>; +using ValueUnorderedSet = + std::unordered_set<Value, ValueComparator::Hasher, ValueComparator::EqualTo>; template <typename T> using ValueMap = std::map<Value, T, ValueComparator::LessThan>; template <typename T> -using ValueUnorderedMap = std::unordered_map<Value, T, Value::Hash, ValueComparator::EqualTo>; +using ValueUnorderedMap = + std::unordered_map<Value, T, ValueComparator::Hasher, ValueComparator::EqualTo>; } // namespace mongo diff --git a/src/mongo/db/pipeline/value_comparator_test.cpp b/src/mongo/db/pipeline/value_comparator_test.cpp index 05386061680..afa364b11a8 100644 --- a/src/mongo/db/pipeline/value_comparator_test.cpp +++ b/src/mongo/db/pipeline/value_comparator_test.cpp @@ -219,5 +219,55 @@ TEST(ValueComparatorTest, NestedArrayEqualityRespectsCollator) { ASSERT_FALSE(ValueComparator(&collator).evaluate(val3 == val1)); } +TEST(ValueComparatorTest, ValueHasherRespectsCollator) { + CollatorInterfaceMock toLowerCollator(CollatorInterfaceMock::MockType::kToLowerString); + ValueComparator valueCmp(&toLowerCollator); + ASSERT_EQ(valueCmp.hash(Value("foo")), valueCmp.hash(Value("FOO"))); + ASSERT_NE(valueCmp.hash(Value("foo")), valueCmp.hash(Value("FOOz"))); +} + +TEST(ValueComparatorTest, ValueHasherRespectsCollatorWithNestedObjects) { + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kAlwaysEqual); + ValueComparator valueCmp(&collator); + Value val1(Document{{"foo", "abc"}}); + Value val2(Document{{"foo", "def"}}); + ASSERT_EQ(valueCmp.hash(val1), valueCmp.hash(val2)); +} + +TEST(ValueComparatorTest, ValueHasherRespectsCollatorWithNestedArrays) { + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kAlwaysEqual); + ValueComparator valueCmp(&collator); + Value val1(std::vector<Value>{Value("a"), Value("b")}); + Value val2(std::vector<Value>{Value("c"), Value("d")}); + Value val3(std::vector<Value>{Value("c"), Value("d"), Value("e")}); + ASSERT_EQ(valueCmp.hash(val1), valueCmp.hash(val2)); + ASSERT_NE(valueCmp.hash(val1), valueCmp.hash(val3)); + ASSERT_NE(valueCmp.hash(val2), valueCmp.hash(val3)); +} + +TEST(ValueComparatorTest, UnorderedSetOfValueRespectsCollation) { + CollatorInterfaceMock toLowerCollator(CollatorInterfaceMock::MockType::kToLowerString); + ValueComparator valueCmp(&toLowerCollator); + auto set = valueCmp.makeUnorderedValueSet(); + ASSERT_TRUE(set.insert(Value("foo")).second); + ASSERT_FALSE(set.insert(Value("FOO")).second); + ASSERT_TRUE(set.insert(Value("FOOz")).second); + ASSERT_EQ(set.size(), 2U); + ASSERT_EQ(set.count(Value("FoO")), 1U); + ASSERT_EQ(set.count(Value("fooZ")), 1U); +} + +TEST(ValueComparatorTest, UnorderedMapOfValueRespectsCollation) { + CollatorInterfaceMock toLowerCollator(CollatorInterfaceMock::MockType::kToLowerString); + ValueComparator valueCmp(&toLowerCollator); + auto map = valueCmp.makeUnorderedValueMap<int>(); + map[Value("foo")] = 1; + map[Value("FOO")] = 2; + map[Value("FOOz")] = 3; + ASSERT_EQ(map.size(), 2U); + ASSERT_EQ(map[Value("FoO")], 2); + ASSERT_EQ(map[Value("fooZ")], 3); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/query/collation/collator_interface.cpp b/src/mongo/db/query/collation/collator_interface.cpp index 57ea9f55c75..76692e7af51 100644 --- a/src/mongo/db/query/collation/collator_interface.cpp +++ b/src/mongo/db/query/collation/collator_interface.cpp @@ -34,9 +34,9 @@ namespace mongo { -size_t CollatorInterface::hash(StringData stringToHash) const { +void CollatorInterface::hash_combine(size_t& seed, StringData stringToHash) const { auto comparisonKey = getComparisonKey(stringToHash); - return SimpleStringDataComparator::kInstance.hash(comparisonKey.getKeyData()); + SimpleStringDataComparator::kInstance.hash_combine(seed, comparisonKey.getKeyData()); } } // namespace mongo diff --git a/src/mongo/db/query/collation/collator_interface.h b/src/mongo/db/query/collation/collator_interface.h index 05e4570f97b..76f236c7413 100644 --- a/src/mongo/db/query/collation/collator_interface.h +++ b/src/mongo/db/query/collation/collator_interface.h @@ -104,7 +104,7 @@ public: * Hashes the string such that strings which are equal under this collation also have equal * hashes. */ - size_t hash(StringData stringToHash) const final; + void hash_combine(size_t& seed, StringData stringToHash) const final; /** * Returns the comparison key for 'stringData', according to this collation. See ComparisonKey's |