diff options
Diffstat (limited to 'src/mongo/db/pipeline')
-rw-r--r-- | src/mongo/db/pipeline/SConscript | 3 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source.h | 8 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_graph_lookup.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/pipeline/lookup_set_cache.h | 77 | ||||
-rw-r--r-- | src/mongo/db/pipeline/lookup_set_cache_test.cpp | 89 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value_comparator.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/pipeline/value_comparator.h | 29 |
7 files changed, 165 insertions, 71 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index e5ba863fbc8..a6e03e82e58 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -351,8 +351,9 @@ env.CppUnitTest( 'lookup_set_cache_test.cpp', ], LIBDEPS=[ - 'document_value', '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/query/collation/collator_interface_mock', + 'document_value', ] ) diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index 12ec1f6aab4..32528244d87 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -1758,7 +1758,6 @@ private: boost::optional<Document> _nextValue; }; -// TODO SERVER-25139: Make $graphLookup respect the collation. class DocumentSourceGraphLookUp final : public DocumentSourceNeedsMongod { public: boost::optional<Document> getNext() final; @@ -1887,10 +1886,9 @@ private: boost::optional<ValueUnorderedSet> _frontier; // Tracks nodes that have been discovered for a given input. Keys are the '_id' value of the - // document from the foreign collection, value is the document itself. We use boost::optional - // to defer initialization until the ExpressionContext containing the correct comparator is - // injected. - boost::optional<ValueUnorderedMap<BSONObj>> _visited; + // document from the foreign collection, value is the document itself. The keys are compared + // using the simple collation. + ValueUnorderedMap<BSONObj> _visited; // Caches query results to avoid repeating any work. This structure is maintained across calls // to getNext(). diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp index 9bba9e68317..5e75716e082 100644 --- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp @@ -73,11 +73,11 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNext() { performSearch(); std::vector<Value> results; - while (!_visited->empty()) { + while (!_visited.empty()) { // Remove elements one at a time to avoid consuming more memory. - auto it = _visited->begin(); + auto it = _visited.begin(); results.push_back(Value(it->second)); - _visited->erase(it); + _visited.erase(it); } MutableDocument output(*_input); @@ -85,7 +85,7 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNext() { _visitedUsageBytes = 0; - invariant(_visited->empty()); + invariant(_visited.empty()); return output.freeze(); } @@ -96,7 +96,7 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNextUnwound() { // If the unwind is not preserving empty arrays, we might have to process multiple inputs before // we get one that will produce an output. while (true) { - if (_visited->empty()) { + if (_visited.empty()) { // No results are left for the current input, so we should move on to the next one and // perform a new search. if (!(_input = pSource->getNext())) { @@ -110,7 +110,7 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNextUnwound() { } MutableDocument unwound(*_input); - if (_visited->empty()) { + if (_visited.empty()) { if ((*_unwind)->preserveNullAndEmptyArrays()) { // Since "preserveNullAndEmptyArrays" was specified, output a document even though // we had no result. @@ -124,13 +124,13 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNextUnwound() { continue; } } else { - auto it = _visited->begin(); + auto it = _visited.begin(); unwound.setNestedField(_as, Value(it->second)); if (indexPath) { unwound.setNestedField(*indexPath, Value(_outputIndex)); ++_outputIndex; } - _visited->erase(it); + _visited.erase(it); } return unwound.freeze(); @@ -140,7 +140,7 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNextUnwound() { void DocumentSourceGraphLookUp::dispose() { _cache.clear(); _frontier->clear(); - _visited->clear(); + _visited.clear(); pSource->dispose(); } @@ -203,7 +203,7 @@ BSONObj addDepthFieldToObject(const std::string& field, long long depth, BSONObj bool DocumentSourceGraphLookUp::addToVisitedAndFrontier(BSONObj result, long long depth) { Value _id = Value(result.getField("_id")); - if (_visited->find(_id) != _visited->end()) { + if (_visited.find(_id) != _visited.end()) { // We've already seen this object, don't repeat any work. return false; } @@ -214,7 +214,7 @@ bool DocumentSourceGraphLookUp::addToVisitedAndFrontier(BSONObj result, long lon _depthField ? addDepthFieldToObject(_depthField->fullPath(), depth, result) : result; // Add the object to our '_visited' list. - (*_visited)[_id] = fullObject; + _visited[_id] = fullObject; // Update the size of '_visited' appropriately. _visitedUsageBytes += _id.getApproximateSize(); @@ -418,7 +418,7 @@ void DocumentSourceGraphLookUp::serializeToArray(std::vector<Value>& array, bool void DocumentSourceGraphLookUp::doInjectExpressionContext() { _fromExpCtx = pExpCtx->copyWith(_from); _frontier = pExpCtx->getValueComparator().makeUnorderedValueSet(); - _visited = pExpCtx->getValueComparator().makeUnorderedValueMap<BSONObj>(); + _cache.setValueComparator(pExpCtx->getValueComparator()); } void DocumentSourceGraphLookUp::doDetachFromOperationContext() { @@ -447,7 +447,9 @@ DocumentSourceGraphLookUp::DocumentSourceGraphLookUp( _startWith(std::move(startWith)), _additionalFilter(additionalFilter), _depthField(depthField), - _maxDepth(maxDepth) {} + _maxDepth(maxDepth), + _visited(ValueComparator::kInstance.makeUnorderedValueMap<BSONObj>()), + _cache(expCtx->getValueComparator()) {} intrusive_ptr<DocumentSource> DocumentSourceGraphLookUp::createFromBson( BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx) { diff --git a/src/mongo/db/pipeline/lookup_set_cache.h b/src/mongo/db/pipeline/lookup_set_cache.h index 5ef9cea62dc..d482b267ee8 100644 --- a/src/mongo/db/pipeline/lookup_set_cache.h +++ b/src/mongo/db/pipeline/lookup_set_cache.h @@ -36,13 +36,15 @@ #include <boost/multi_index_container.hpp> #include <boost/optional.hpp> #include <iostream> -#include <unordered_map> -#include <unordered_set> +#include <vector> +#include "mongo/base/string_data_comparator_interface.h" +#include "mongo/bson/bsonmisc.h" #include "mongo/bson/bsonobj.h" #include "mongo/db/pipeline/value.h" #include "mongo/db/pipeline/value_comparator.h" #include "mongo/stdx/functional.h" +#include "mongo/util/assert_util.h" namespace mongo { @@ -53,16 +55,36 @@ using boost::multi_index::member; using boost::multi_index::indexed_by; /** - * A least-recently-used cache from key to a set of values. It does not implement any default size - * limit, but includes the ability to evict down to both a specific number of elements, and down to - * a specific amount of memory. Memory usage includes only the size of the elements in the cache at - * the time of insertion, not the overhead incurred by the data structures in use. - * - * TODO SERVER-25139: This class must make all comparisons of user data using the aggregation - * operation's collation. + * A least-recently-used cache from key to a vector of values. It does not implement any default + * size limit, but includes the ability to evict down to both a specific number of elements, and + * down to a specific amount of memory. Memory usage includes only the size of the elements in the + * cache at the time of insertion, not the overhead incurred by the data structures in use. */ class LookupSetCache { public: + using Cached = std::pair<Value, std::vector<BSONObj>>; + + // boost::multi_index_container provides a system for implementing a cache. Here, we create + // a container of std::pair<Value, std::vector<BSONObj>>BSONObjSet, that is both sequenced, and + // has a unique + // index on the Value. From this, we are able to evict the least-recently-used member, and + // maintain key uniqueness. + using IndexedContainer = + multi_index_container<Cached, + indexed_by<sequenced<>, + hashed_unique<member<Cached, Value, &Cached::first>, + ValueComparator::Hasher, + ValueComparator::EqualTo>>>; + + /** + * Constructs the underlying cache data structure in such a way that respects the + * ValueComparator. This requires instantiating the multi_index_container with comparison and + * hasher functions obtained from the comparator. + */ + LookupSetCache(ValueComparator valueComparator) + : _valueComparator(std::move(valueComparator)), + _container(makeIndexedContainer(_valueComparator)) {} + /** * Insert "value" into the set with key "key". If "key" is already present in the cache, move it * to the middle of the cache. Otherwise, insert a new key in the middle of the cache. @@ -86,7 +108,7 @@ public: // We did not insert due to a duplicate key. auto cached = *result.first; // Update the cached value, moving it to the middle of the cache. - cached.second.insert(value); + cached.second.push_back(value); _container.replace(result.first, cached); _container.relocate(it, result.first); } else { @@ -151,9 +173,9 @@ public: } /** - * Retrieve the set of values with key "key". If not found, returns boost::none. + * Retrieve the vector of values with key "key". If not found, returns boost::none. */ - boost::optional<std::unordered_set<BSONObj, BSONObj::Hasher>> operator[](Value key) { + boost::optional<std::vector<BSONObj>> operator[](Value key) { auto it = boost::multi_index::get<1>(_container).find(key); if (it != boost::multi_index::get<1>(_container).end()) { boost::multi_index::get<0>(_container) @@ -164,19 +186,28 @@ public: return boost::none; } + /** + * Binds the cache to a new comparator that should be used to make all subsequent Value + * comparisons. + * + * TODO SERVER-25535: Remove this method. + */ + void setValueComparator(ValueComparator valueComparator) { + _valueComparator = std::move(valueComparator); + _container = makeIndexedContainer(_valueComparator); + } + private: - using Cached = std::pair<Value, std::unordered_set<BSONObj, BSONObj::Hasher>>; + IndexedContainer makeIndexedContainer(const ValueComparator& valueComparator) const { + return IndexedContainer( + boost::make_tuple(IndexedContainer::nth_index<0>::type::ctor_args(), + boost::make_tuple(0, + member<Cached, Value, &Cached::first>(), + valueComparator.getHasher(), + valueComparator.getEqualTo()))); + } - // boost::multi_index_container provides a system for implementing a cache. Here, we create - // a container of std::pair<Value, BSONObjSet>, that is both sequenced, and has a unique - // index on the Value. From this, we are able to evict the least-recently-used member, and - // maintain key uniqueness. - using IndexedContainer = - multi_index_container<Cached, - indexed_by<sequenced<>, - hashed_unique<member<Cached, Value, &Cached::first>, - ValueComparator::Hasher, - ValueComparator::EqualTo>>>; + ValueComparator _valueComparator; IndexedContainer _container; diff --git a/src/mongo/db/pipeline/lookup_set_cache_test.cpp b/src/mongo/db/pipeline/lookup_set_cache_test.cpp index 2903a3632f5..13f445e67c3 100644 --- a/src/mongo/db/pipeline/lookup_set_cache_test.cpp +++ b/src/mongo/db/pipeline/lookup_set_cache_test.cpp @@ -28,18 +28,31 @@ #include "mongo/platform/basic.h" +#include <algorithm> +#include <boost/optional.hpp> +#include <vector> + #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/pipeline/lookup_set_cache.h" +#include "mongo/db/pipeline/value_comparator.h" +#include "mongo/db/query/collation/collator_interface_mock.h" #include "mongo/unittest/unittest.h" namespace mongo { +bool vectorContains(const boost::optional<std::vector<BSONObj>>& vector, + const BSONObj& expectedObj) { + ASSERT_TRUE(vector); + return std::find(vector->begin(), vector->end(), expectedObj) != vector->end(); +} + BSONObj intToObj(int value) { return BSON("n" << value); } TEST(LookupSetCacheTest, InsertAndRetrieveWorksCorrectly) { - LookupSetCache cache; + const StringData::ComparatorInterface* stringComparator = nullptr; + LookupSetCache cache(stringComparator); cache.insert(Value(0), intToObj(1)); cache.insert(Value(0), intToObj(2)); cache.insert(Value(0), intToObj(3)); @@ -47,15 +60,16 @@ TEST(LookupSetCacheTest, InsertAndRetrieveWorksCorrectly) { cache.insert(Value(1), intToObj(5)); ASSERT(cache[Value(0)]); - ASSERT_TRUE(cache[Value(0)]->count(intToObj(1))); - ASSERT_TRUE(cache[Value(0)]->count(intToObj(2))); - ASSERT_TRUE(cache[Value(0)]->count(intToObj(3))); - ASSERT_FALSE(cache[Value(0)]->count(intToObj(4))); - ASSERT_FALSE(cache[Value(0)]->count(intToObj(5))); + ASSERT_TRUE(vectorContains(cache[Value(0)], intToObj(1))); + ASSERT_TRUE(vectorContains(cache[Value(0)], intToObj(2))); + ASSERT_TRUE(vectorContains(cache[Value(0)], intToObj(3))); + ASSERT_FALSE(vectorContains(cache[Value(0)], intToObj(4))); + ASSERT_FALSE(vectorContains(cache[Value(0)], intToObj(5))); } TEST(LookupSetCacheTest, CacheDoesEvictInExpectedOrder) { - LookupSetCache cache; + const StringData::ComparatorInterface* stringComparator = nullptr; + LookupSetCache cache(stringComparator); cache.insert(Value(0), intToObj(0)); cache.insert(Value(1), intToObj(0)); @@ -74,7 +88,8 @@ TEST(LookupSetCacheTest, CacheDoesEvictInExpectedOrder) { } TEST(LookupSetCacheTest, ReadDoesMoveKeyToFrontOfCache) { - LookupSetCache cache; + const StringData::ComparatorInterface* stringComparator = nullptr; + LookupSetCache cache(stringComparator); cache.insert(Value(0), intToObj(0)); cache.insert(Value(1), intToObj(0)); @@ -89,7 +104,8 @@ TEST(LookupSetCacheTest, ReadDoesMoveKeyToFrontOfCache) { } TEST(LookupSetCacheTest, InsertDoesPutKeyInMiddle) { - LookupSetCache cache; + const StringData::ComparatorInterface* stringComparator = nullptr; + LookupSetCache cache(stringComparator); cache.insert(Value(0), intToObj(0)); cache.insert(Value(1), intToObj(0)); @@ -102,7 +118,8 @@ TEST(LookupSetCacheTest, InsertDoesPutKeyInMiddle) { } TEST(LookupSetCacheTest, EvictDoesRespectMemoryUsage) { - LookupSetCache cache; + const StringData::ComparatorInterface* stringComparator = nullptr; + LookupSetCache cache(stringComparator); cache.insert(Value(0), intToObj(0)); cache.insert(Value(1), intToObj(0)); @@ -115,7 +132,8 @@ TEST(LookupSetCacheTest, EvictDoesRespectMemoryUsage) { } TEST(LookupSetCacheTest, ComplexAccessPatternDoesBehaveCorrectly) { - LookupSetCache cache; + const StringData::ComparatorInterface* stringComparator = nullptr; + LookupSetCache cache(stringComparator); for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) { @@ -128,11 +146,11 @@ TEST(LookupSetCacheTest, ComplexAccessPatternDoesBehaveCorrectly) { ASSERT_FALSE(cache[Value(0)]); ASSERT_TRUE(cache[Value(1)]); - ASSERT_TRUE(cache[Value(1)]->count(intToObj(0))); - ASSERT_TRUE(cache[Value(1)]->count(intToObj(1))); - ASSERT_TRUE(cache[Value(1)]->count(intToObj(2))); - ASSERT_TRUE(cache[Value(1)]->count(intToObj(3))); - ASSERT_TRUE(cache[Value(1)]->count(intToObj(4))); + ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(0))); + ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(1))); + ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(2))); + ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(3))); + ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(4))); cache.evictUntilSize(2); // Cache ordering is now {1: ..., 3: ...} @@ -151,4 +169,43 @@ TEST(LookupSetCacheTest, ComplexAccessPatternDoesBehaveCorrectly) { ASSERT_TRUE(cache[Value(5)]); } +TEST(LookupSetCacheTest, CacheKeysRespectCollation) { + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kToLowerString); + LookupSetCache cache(&collator); + + cache.insert(Value("foo"), intToObj(1)); + cache.insert(Value("FOO"), intToObj(2)); + cache.insert(Value("FOOz"), intToObj(3)); + + { + auto fooResult = cache[Value("FoO")]; + ASSERT_TRUE(fooResult); + ASSERT_EQ(2U, fooResult->size()); + } + + { + auto foozResult = cache[Value("fooZ")]; + ASSERT_TRUE(foozResult); + ASSERT_EQ(1U, foozResult->size()); + } +} + +// Cache values shouldn't respect collation, since they are distinct documents from the +// foreign collection. +TEST(LookupSetCacheTest, CachedValuesDontRespectCollation) { + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kToLowerString); + LookupSetCache cache(&collator); + + cache.insert(Value("foo"), + BSON("foo" + << "bar")); + cache.insert(Value("foo"), + BSON("foo" + << "BAR")); + + auto fooResult = cache[Value("foo")]; + ASSERT_TRUE(fooResult); + ASSERT_EQ(2U, fooResult->size()); +} + } // namespace mongo diff --git a/src/mongo/db/pipeline/value_comparator.cpp b/src/mongo/db/pipeline/value_comparator.cpp index a20a9460b6d..87eb92b73a6 100644 --- a/src/mongo/db/pipeline/value_comparator.cpp +++ b/src/mongo/db/pipeline/value_comparator.cpp @@ -34,6 +34,8 @@ namespace mongo { +const ValueComparator ValueComparator::kInstance{}; + bool ValueComparator::evaluate(Value::DeferredComparison deferredComparison) const { int cmp = Value::compare(deferredComparison.lhs, deferredComparison.rhs, _stringComparator); switch (deferredComparison.type) { diff --git a/src/mongo/db/pipeline/value_comparator.h b/src/mongo/db/pipeline/value_comparator.h index e6c55ed35cf..1e46da52ce9 100644 --- a/src/mongo/db/pipeline/value_comparator.h +++ b/src/mongo/db/pipeline/value_comparator.h @@ -42,22 +42,17 @@ class ValueComparator { public: /** * Functor compatible for use with unordered STL containers. - * - * TODO SERVER-25139: Remove the no-arguments constructor. */ class EqualTo { public: - EqualTo() = default; - explicit EqualTo(const ValueComparator* comparator) : _comparator(comparator) {} bool operator()(const Value& lhs, const Value& rhs) const { - return _comparator ? _comparator->compare(lhs, rhs) == 0 - : ValueComparator().compare(lhs, rhs) == 0; + return _comparator->compare(lhs, rhs) == 0; } private: - const ValueComparator* _comparator = nullptr; + const ValueComparator* _comparator; }; /** @@ -77,23 +72,23 @@ public: /** * Functor for computing the hash of a Value, compatible for use with unordered STL containers. - * - * TODO SERVER-25139: Remove the no-arguments constructor. */ class Hasher { public: - Hasher() = default; - explicit Hasher(const ValueComparator* comparator) : _comparator(comparator) {} size_t operator()(const Value& val) const { - return _comparator ? _comparator->hash(val) : ValueComparator().hash(val); + return _comparator->hash(val); } private: - const ValueComparator* _comparator = nullptr; + const ValueComparator* _comparator; }; + // Global comparator for performing simple Value comparisons. Value comparisons that require + // special database logic, such as collations, must instantiate their own comparator. + static const ValueComparator kInstance; + /** * Constructs a value comparator with simple comparison semantics. */ @@ -146,6 +141,14 @@ public: } /** + * Returns a function object which computes the hash of a Value such that equal Values under + * this comparator have equal hashes. + */ + Hasher getHasher() const { + return Hasher(this); + } + + /** * Construct an empty ordered set of Value whose ordering and equivalence classes are given by * this comparator. This comparator must outlive the returned set. */ |