summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2016-08-09 14:59:00 -0400
committerDavid Storch <david.storch@10gen.com>2016-08-10 16:53:20 -0400
commitd9bb24e5f5af34862182424b890762cb2c3e9bc8 (patch)
tree0f7244684d3a9615f2a23af14f3725dd60e1af7e
parent08b8ccece70c72a88162604734f895f0d8e35d10 (diff)
downloadmongo-d9bb24e5f5af34862182424b890762cb2c3e9bc8.tar.gz
SERVER-25139 make $graphLookup's cache collation-aware
Ensures that comparisons made during graph traversal respect the collation. Deduplication using _id, however, does not use the collation, in order to ensure that node identity is preserved.
-rw-r--r--jstests/aggregation/sources/graphLookup/collation_graphlookup.js136
-rw-r--r--src/mongo/db/pipeline/SConscript3
-rw-r--r--src/mongo/db/pipeline/document_source.h8
-rw-r--r--src/mongo/db/pipeline/document_source_graph_lookup.cpp28
-rw-r--r--src/mongo/db/pipeline/lookup_set_cache.h77
-rw-r--r--src/mongo/db/pipeline/lookup_set_cache_test.cpp89
-rw-r--r--src/mongo/db/pipeline/value_comparator.cpp2
-rw-r--r--src/mongo/db/pipeline/value_comparator.h29
8 files changed, 301 insertions, 71 deletions
diff --git a/jstests/aggregation/sources/graphLookup/collation_graphlookup.js b/jstests/aggregation/sources/graphLookup/collation_graphlookup.js
new file mode 100644
index 00000000000..833b2f2d771
--- /dev/null
+++ b/jstests/aggregation/sources/graphLookup/collation_graphlookup.js
@@ -0,0 +1,136 @@
+/**
+ * Tests that the $graphLookup stage respects the collation when matching between the
+ * 'connectFromField' and the 'connectToField'. $graphLookup should use the collation
+ * set on the aggregation, or the default collation of the collection.
+ */
+(function() {
+ "use strict";
+
+ var res;
+ const caseInsensitiveUS = {collation: {locale: "en_US", strength: 2}};
+ const caseSensitiveUS = {collation: {locale: "en_US", strength: 3}};
+
+ var coll = db.collation_graphlookup;
+ var foreignColl = db.collation_graphlookup_foreign;
+
+ // Test that $graphLookup respects the collation set on the aggregation pipeline. Case
+ // insensitivity should mean that we find both "jeremy" and "jimmy" as friends.
+ coll.drop();
+ assert.writeOK(coll.insert({username: "erica", friends: ["jeremy", "jimmy"]}));
+ assert.writeOK(coll.insert([{username: "JEREMY"}, {username: "JIMMY"}]));
+
+ res = coll.aggregate(
+ [
+ {$match: {username: "erica"}},
+ {
+ $graphLookup: {
+ from: coll.getName(),
+ startWith: "$friends",
+ connectFromField: "friends",
+ connectToField: "username",
+ as: "friendUsers"
+ }
+ }
+ ],
+ caseInsensitiveUS)
+ .toArray();
+ assert.eq(1, res.length);
+ assert.eq("erica", res[0].username);
+ assert.eq(2, res[0].friendUsers.length);
+
+ // Negative test: ensure that we don't find any friends when the collation is simple.
+ res = coll.aggregate([
+ {$match: {username: "erica"}},
+ {
+ $graphLookup: {
+ from: coll.getName(),
+ startWith: "$friends",
+ connectFromField: "friends",
+ connectToField: "username",
+ as: "friendUsers"
+ }
+ }
+ ])
+ .toArray();
+ assert.eq(1, res.length);
+ assert.eq("erica", res[0].username);
+ assert.eq(0, res[0].friendUsers.length);
+
+ coll.drop();
+ assert.commandWorked(db.createCollection(coll.getName(), caseInsensitiveUS));
+ assert.writeOK(coll.insert({username: "erica", friends: ["jeremy", "jimmy"]}));
+ foreignColl.drop();
+ assert.commandWorked(db.createCollection(foreignColl.getName(), caseSensitiveUS));
+ assert.writeOK(foreignColl.insert([{username: "JEREMY"}, {username: "JIMMY"}]));
+
+ // Test that $graphLookup inherits the default collation of the collection on which it is run,
+ // and that this collation is used instead of the default collation of the foreign collection.
+ res = coll.aggregate([
+ {$match: {username: "erica"}},
+ {
+ $graphLookup: {
+ from: foreignColl.getName(),
+ startWith: "$friends",
+ connectFromField: "friends",
+ connectToField: "username",
+ as: "friendUsers"
+ }
+ }
+ ])
+ .toArray();
+ assert.eq(1, res.length);
+ assert.eq("erica", res[0].username);
+ assert.eq(2, res[0].friendUsers.length);
+
+ // Test that we don't use the collation to dedup string _id values. This would cause us to miss
+ // nodes in the graph that have distinct _id values which compare equal under the collation.
+ coll.drop();
+ assert.writeOK(coll.insert({username: "erica", friends: ["jeremy"]}));
+ assert.writeOK(coll.insert({_id: "foo", username: "JEREMY", friends: ["jimmy"]}));
+ assert.writeOK(coll.insert({_id: "FOO", username: "jimmy", friends: []}));
+
+ res = coll.aggregate(
+ [
+ {$match: {username: "erica"}},
+ {
+ $graphLookup: {
+ from: coll.getName(),
+ startWith: "$friends",
+ connectFromField: "friends",
+ connectToField: "username",
+ as: "friendUsers"
+ }
+ }
+ ],
+ caseInsensitiveUS)
+ .toArray();
+ assert.eq(1, res.length);
+ assert.eq("erica", res[0].username);
+ assert.eq(2, res[0].friendUsers.length);
+
+ // Test that the result set is not deduplicated under the collation. If two documents are
+ // entirely equal under the collation, they should still both get returned in the "as" field.
+ coll.drop();
+ assert.writeOK(coll.insert({username: "erica", friends: ["jeremy"]}));
+ assert.writeOK(coll.insert({_id: "foo", username: "jeremy"}));
+ assert.writeOK(coll.insert({_id: "FOO", username: "JEREMY"}));
+
+ res = coll.aggregate(
+ [
+ {$match: {username: "erica"}},
+ {
+ $graphLookup: {
+ from: coll.getName(),
+ startWith: "$friends",
+ connectFromField: "friends",
+ connectToField: "username",
+ as: "friendUsers"
+ }
+ }
+ ],
+ caseInsensitiveUS)
+ .toArray();
+ assert.eq(1, res.length);
+ assert.eq("erica", res[0].username);
+ assert.eq(2, res[0].friendUsers.length);
+})();
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index e5ba863fbc8..a6e03e82e58 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -351,8 +351,9 @@ env.CppUnitTest(
'lookup_set_cache_test.cpp',
],
LIBDEPS=[
- 'document_value',
'$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/query/collation/collator_interface_mock',
+ 'document_value',
]
)
diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h
index 12ec1f6aab4..32528244d87 100644
--- a/src/mongo/db/pipeline/document_source.h
+++ b/src/mongo/db/pipeline/document_source.h
@@ -1758,7 +1758,6 @@ private:
boost::optional<Document> _nextValue;
};
-// TODO SERVER-25139: Make $graphLookup respect the collation.
class DocumentSourceGraphLookUp final : public DocumentSourceNeedsMongod {
public:
boost::optional<Document> getNext() final;
@@ -1887,10 +1886,9 @@ private:
boost::optional<ValueUnorderedSet> _frontier;
// Tracks nodes that have been discovered for a given input. Keys are the '_id' value of the
- // document from the foreign collection, value is the document itself. We use boost::optional
- // to defer initialization until the ExpressionContext containing the correct comparator is
- // injected.
- boost::optional<ValueUnorderedMap<BSONObj>> _visited;
+ // document from the foreign collection, value is the document itself. The keys are compared
+ // using the simple collation.
+ ValueUnorderedMap<BSONObj> _visited;
// Caches query results to avoid repeating any work. This structure is maintained across calls
// to getNext().
diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp
index 9bba9e68317..5e75716e082 100644
--- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp
@@ -73,11 +73,11 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNext() {
performSearch();
std::vector<Value> results;
- while (!_visited->empty()) {
+ while (!_visited.empty()) {
// Remove elements one at a time to avoid consuming more memory.
- auto it = _visited->begin();
+ auto it = _visited.begin();
results.push_back(Value(it->second));
- _visited->erase(it);
+ _visited.erase(it);
}
MutableDocument output(*_input);
@@ -85,7 +85,7 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNext() {
_visitedUsageBytes = 0;
- invariant(_visited->empty());
+ invariant(_visited.empty());
return output.freeze();
}
@@ -96,7 +96,7 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNextUnwound() {
// If the unwind is not preserving empty arrays, we might have to process multiple inputs before
// we get one that will produce an output.
while (true) {
- if (_visited->empty()) {
+ if (_visited.empty()) {
// No results are left for the current input, so we should move on to the next one and
// perform a new search.
if (!(_input = pSource->getNext())) {
@@ -110,7 +110,7 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNextUnwound() {
}
MutableDocument unwound(*_input);
- if (_visited->empty()) {
+ if (_visited.empty()) {
if ((*_unwind)->preserveNullAndEmptyArrays()) {
// Since "preserveNullAndEmptyArrays" was specified, output a document even though
// we had no result.
@@ -124,13 +124,13 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNextUnwound() {
continue;
}
} else {
- auto it = _visited->begin();
+ auto it = _visited.begin();
unwound.setNestedField(_as, Value(it->second));
if (indexPath) {
unwound.setNestedField(*indexPath, Value(_outputIndex));
++_outputIndex;
}
- _visited->erase(it);
+ _visited.erase(it);
}
return unwound.freeze();
@@ -140,7 +140,7 @@ boost::optional<Document> DocumentSourceGraphLookUp::getNextUnwound() {
void DocumentSourceGraphLookUp::dispose() {
_cache.clear();
_frontier->clear();
- _visited->clear();
+ _visited.clear();
pSource->dispose();
}
@@ -203,7 +203,7 @@ BSONObj addDepthFieldToObject(const std::string& field, long long depth, BSONObj
bool DocumentSourceGraphLookUp::addToVisitedAndFrontier(BSONObj result, long long depth) {
Value _id = Value(result.getField("_id"));
- if (_visited->find(_id) != _visited->end()) {
+ if (_visited.find(_id) != _visited.end()) {
// We've already seen this object, don't repeat any work.
return false;
}
@@ -214,7 +214,7 @@ bool DocumentSourceGraphLookUp::addToVisitedAndFrontier(BSONObj result, long lon
_depthField ? addDepthFieldToObject(_depthField->fullPath(), depth, result) : result;
// Add the object to our '_visited' list.
- (*_visited)[_id] = fullObject;
+ _visited[_id] = fullObject;
// Update the size of '_visited' appropriately.
_visitedUsageBytes += _id.getApproximateSize();
@@ -418,7 +418,7 @@ void DocumentSourceGraphLookUp::serializeToArray(std::vector<Value>& array, bool
void DocumentSourceGraphLookUp::doInjectExpressionContext() {
_fromExpCtx = pExpCtx->copyWith(_from);
_frontier = pExpCtx->getValueComparator().makeUnorderedValueSet();
- _visited = pExpCtx->getValueComparator().makeUnorderedValueMap<BSONObj>();
+ _cache.setValueComparator(pExpCtx->getValueComparator());
}
void DocumentSourceGraphLookUp::doDetachFromOperationContext() {
@@ -447,7 +447,9 @@ DocumentSourceGraphLookUp::DocumentSourceGraphLookUp(
_startWith(std::move(startWith)),
_additionalFilter(additionalFilter),
_depthField(depthField),
- _maxDepth(maxDepth) {}
+ _maxDepth(maxDepth),
+ _visited(ValueComparator::kInstance.makeUnorderedValueMap<BSONObj>()),
+ _cache(expCtx->getValueComparator()) {}
intrusive_ptr<DocumentSource> DocumentSourceGraphLookUp::createFromBson(
BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx) {
diff --git a/src/mongo/db/pipeline/lookup_set_cache.h b/src/mongo/db/pipeline/lookup_set_cache.h
index 5ef9cea62dc..d482b267ee8 100644
--- a/src/mongo/db/pipeline/lookup_set_cache.h
+++ b/src/mongo/db/pipeline/lookup_set_cache.h
@@ -36,13 +36,15 @@
#include <boost/multi_index_container.hpp>
#include <boost/optional.hpp>
#include <iostream>
-#include <unordered_map>
-#include <unordered_set>
+#include <vector>
+#include "mongo/base/string_data_comparator_interface.h"
+#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/db/pipeline/value.h"
#include "mongo/db/pipeline/value_comparator.h"
#include "mongo/stdx/functional.h"
+#include "mongo/util/assert_util.h"
namespace mongo {
@@ -53,16 +55,36 @@ using boost::multi_index::member;
using boost::multi_index::indexed_by;
/**
- * A least-recently-used cache from key to a set of values. It does not implement any default size
- * limit, but includes the ability to evict down to both a specific number of elements, and down to
- * a specific amount of memory. Memory usage includes only the size of the elements in the cache at
- * the time of insertion, not the overhead incurred by the data structures in use.
- *
- * TODO SERVER-25139: This class must make all comparisons of user data using the aggregation
- * operation's collation.
+ * A least-recently-used cache from key to a vector of values. It does not implement any default
+ * size limit, but includes the ability to evict down to both a specific number of elements, and
+ * down to a specific amount of memory. Memory usage includes only the size of the elements in the
+ * cache at the time of insertion, not the overhead incurred by the data structures in use.
*/
class LookupSetCache {
public:
+ using Cached = std::pair<Value, std::vector<BSONObj>>;
+
+ // boost::multi_index_container provides a system for implementing a cache. Here, we create
+ // a container of std::pair<Value, std::vector<BSONObj>>BSONObjSet, that is both sequenced, and
+ // has a unique
+ // index on the Value. From this, we are able to evict the least-recently-used member, and
+ // maintain key uniqueness.
+ using IndexedContainer =
+ multi_index_container<Cached,
+ indexed_by<sequenced<>,
+ hashed_unique<member<Cached, Value, &Cached::first>,
+ ValueComparator::Hasher,
+ ValueComparator::EqualTo>>>;
+
+ /**
+ * Constructs the underlying cache data structure in such a way that respects the
+ * ValueComparator. This requires instantiating the multi_index_container with comparison and
+ * hasher functions obtained from the comparator.
+ */
+ LookupSetCache(ValueComparator valueComparator)
+ : _valueComparator(std::move(valueComparator)),
+ _container(makeIndexedContainer(_valueComparator)) {}
+
/**
* Insert "value" into the set with key "key". If "key" is already present in the cache, move it
* to the middle of the cache. Otherwise, insert a new key in the middle of the cache.
@@ -86,7 +108,7 @@ public:
// We did not insert due to a duplicate key.
auto cached = *result.first;
// Update the cached value, moving it to the middle of the cache.
- cached.second.insert(value);
+ cached.second.push_back(value);
_container.replace(result.first, cached);
_container.relocate(it, result.first);
} else {
@@ -151,9 +173,9 @@ public:
}
/**
- * Retrieve the set of values with key "key". If not found, returns boost::none.
+ * Retrieve the vector of values with key "key". If not found, returns boost::none.
*/
- boost::optional<std::unordered_set<BSONObj, BSONObj::Hasher>> operator[](Value key) {
+ boost::optional<std::vector<BSONObj>> operator[](Value key) {
auto it = boost::multi_index::get<1>(_container).find(key);
if (it != boost::multi_index::get<1>(_container).end()) {
boost::multi_index::get<0>(_container)
@@ -164,19 +186,28 @@ public:
return boost::none;
}
+ /**
+ * Binds the cache to a new comparator that should be used to make all subsequent Value
+ * comparisons.
+ *
+ * TODO SERVER-25535: Remove this method.
+ */
+ void setValueComparator(ValueComparator valueComparator) {
+ _valueComparator = std::move(valueComparator);
+ _container = makeIndexedContainer(_valueComparator);
+ }
+
private:
- using Cached = std::pair<Value, std::unordered_set<BSONObj, BSONObj::Hasher>>;
+ IndexedContainer makeIndexedContainer(const ValueComparator& valueComparator) const {
+ return IndexedContainer(
+ boost::make_tuple(IndexedContainer::nth_index<0>::type::ctor_args(),
+ boost::make_tuple(0,
+ member<Cached, Value, &Cached::first>(),
+ valueComparator.getHasher(),
+ valueComparator.getEqualTo())));
+ }
- // boost::multi_index_container provides a system for implementing a cache. Here, we create
- // a container of std::pair<Value, BSONObjSet>, that is both sequenced, and has a unique
- // index on the Value. From this, we are able to evict the least-recently-used member, and
- // maintain key uniqueness.
- using IndexedContainer =
- multi_index_container<Cached,
- indexed_by<sequenced<>,
- hashed_unique<member<Cached, Value, &Cached::first>,
- ValueComparator::Hasher,
- ValueComparator::EqualTo>>>;
+ ValueComparator _valueComparator;
IndexedContainer _container;
diff --git a/src/mongo/db/pipeline/lookup_set_cache_test.cpp b/src/mongo/db/pipeline/lookup_set_cache_test.cpp
index 2903a3632f5..13f445e67c3 100644
--- a/src/mongo/db/pipeline/lookup_set_cache_test.cpp
+++ b/src/mongo/db/pipeline/lookup_set_cache_test.cpp
@@ -28,18 +28,31 @@
#include "mongo/platform/basic.h"
+#include <algorithm>
+#include <boost/optional.hpp>
+#include <vector>
+
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/pipeline/lookup_set_cache.h"
+#include "mongo/db/pipeline/value_comparator.h"
+#include "mongo/db/query/collation/collator_interface_mock.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
+bool vectorContains(const boost::optional<std::vector<BSONObj>>& vector,
+ const BSONObj& expectedObj) {
+ ASSERT_TRUE(vector);
+ return std::find(vector->begin(), vector->end(), expectedObj) != vector->end();
+}
+
BSONObj intToObj(int value) {
return BSON("n" << value);
}
TEST(LookupSetCacheTest, InsertAndRetrieveWorksCorrectly) {
- LookupSetCache cache;
+ const StringData::ComparatorInterface* stringComparator = nullptr;
+ LookupSetCache cache(stringComparator);
cache.insert(Value(0), intToObj(1));
cache.insert(Value(0), intToObj(2));
cache.insert(Value(0), intToObj(3));
@@ -47,15 +60,16 @@ TEST(LookupSetCacheTest, InsertAndRetrieveWorksCorrectly) {
cache.insert(Value(1), intToObj(5));
ASSERT(cache[Value(0)]);
- ASSERT_TRUE(cache[Value(0)]->count(intToObj(1)));
- ASSERT_TRUE(cache[Value(0)]->count(intToObj(2)));
- ASSERT_TRUE(cache[Value(0)]->count(intToObj(3)));
- ASSERT_FALSE(cache[Value(0)]->count(intToObj(4)));
- ASSERT_FALSE(cache[Value(0)]->count(intToObj(5)));
+ ASSERT_TRUE(vectorContains(cache[Value(0)], intToObj(1)));
+ ASSERT_TRUE(vectorContains(cache[Value(0)], intToObj(2)));
+ ASSERT_TRUE(vectorContains(cache[Value(0)], intToObj(3)));
+ ASSERT_FALSE(vectorContains(cache[Value(0)], intToObj(4)));
+ ASSERT_FALSE(vectorContains(cache[Value(0)], intToObj(5)));
}
TEST(LookupSetCacheTest, CacheDoesEvictInExpectedOrder) {
- LookupSetCache cache;
+ const StringData::ComparatorInterface* stringComparator = nullptr;
+ LookupSetCache cache(stringComparator);
cache.insert(Value(0), intToObj(0));
cache.insert(Value(1), intToObj(0));
@@ -74,7 +88,8 @@ TEST(LookupSetCacheTest, CacheDoesEvictInExpectedOrder) {
}
TEST(LookupSetCacheTest, ReadDoesMoveKeyToFrontOfCache) {
- LookupSetCache cache;
+ const StringData::ComparatorInterface* stringComparator = nullptr;
+ LookupSetCache cache(stringComparator);
cache.insert(Value(0), intToObj(0));
cache.insert(Value(1), intToObj(0));
@@ -89,7 +104,8 @@ TEST(LookupSetCacheTest, ReadDoesMoveKeyToFrontOfCache) {
}
TEST(LookupSetCacheTest, InsertDoesPutKeyInMiddle) {
- LookupSetCache cache;
+ const StringData::ComparatorInterface* stringComparator = nullptr;
+ LookupSetCache cache(stringComparator);
cache.insert(Value(0), intToObj(0));
cache.insert(Value(1), intToObj(0));
@@ -102,7 +118,8 @@ TEST(LookupSetCacheTest, InsertDoesPutKeyInMiddle) {
}
TEST(LookupSetCacheTest, EvictDoesRespectMemoryUsage) {
- LookupSetCache cache;
+ const StringData::ComparatorInterface* stringComparator = nullptr;
+ LookupSetCache cache(stringComparator);
cache.insert(Value(0), intToObj(0));
cache.insert(Value(1), intToObj(0));
@@ -115,7 +132,8 @@ TEST(LookupSetCacheTest, EvictDoesRespectMemoryUsage) {
}
TEST(LookupSetCacheTest, ComplexAccessPatternDoesBehaveCorrectly) {
- LookupSetCache cache;
+ const StringData::ComparatorInterface* stringComparator = nullptr;
+ LookupSetCache cache(stringComparator);
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++) {
@@ -128,11 +146,11 @@ TEST(LookupSetCacheTest, ComplexAccessPatternDoesBehaveCorrectly) {
ASSERT_FALSE(cache[Value(0)]);
ASSERT_TRUE(cache[Value(1)]);
- ASSERT_TRUE(cache[Value(1)]->count(intToObj(0)));
- ASSERT_TRUE(cache[Value(1)]->count(intToObj(1)));
- ASSERT_TRUE(cache[Value(1)]->count(intToObj(2)));
- ASSERT_TRUE(cache[Value(1)]->count(intToObj(3)));
- ASSERT_TRUE(cache[Value(1)]->count(intToObj(4)));
+ ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(0)));
+ ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(1)));
+ ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(2)));
+ ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(3)));
+ ASSERT_TRUE(vectorContains(cache[Value(1)], intToObj(4)));
cache.evictUntilSize(2);
// Cache ordering is now {1: ..., 3: ...}
@@ -151,4 +169,43 @@ TEST(LookupSetCacheTest, ComplexAccessPatternDoesBehaveCorrectly) {
ASSERT_TRUE(cache[Value(5)]);
}
+TEST(LookupSetCacheTest, CacheKeysRespectCollation) {
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kToLowerString);
+ LookupSetCache cache(&collator);
+
+ cache.insert(Value("foo"), intToObj(1));
+ cache.insert(Value("FOO"), intToObj(2));
+ cache.insert(Value("FOOz"), intToObj(3));
+
+ {
+ auto fooResult = cache[Value("FoO")];
+ ASSERT_TRUE(fooResult);
+ ASSERT_EQ(2U, fooResult->size());
+ }
+
+ {
+ auto foozResult = cache[Value("fooZ")];
+ ASSERT_TRUE(foozResult);
+ ASSERT_EQ(1U, foozResult->size());
+ }
+}
+
+// Cache values shouldn't respect collation, since they are distinct documents from the
+// foreign collection.
+TEST(LookupSetCacheTest, CachedValuesDontRespectCollation) {
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kToLowerString);
+ LookupSetCache cache(&collator);
+
+ cache.insert(Value("foo"),
+ BSON("foo"
+ << "bar"));
+ cache.insert(Value("foo"),
+ BSON("foo"
+ << "BAR"));
+
+ auto fooResult = cache[Value("foo")];
+ ASSERT_TRUE(fooResult);
+ ASSERT_EQ(2U, fooResult->size());
+}
+
} // namespace mongo
diff --git a/src/mongo/db/pipeline/value_comparator.cpp b/src/mongo/db/pipeline/value_comparator.cpp
index a20a9460b6d..87eb92b73a6 100644
--- a/src/mongo/db/pipeline/value_comparator.cpp
+++ b/src/mongo/db/pipeline/value_comparator.cpp
@@ -34,6 +34,8 @@
namespace mongo {
+const ValueComparator ValueComparator::kInstance{};
+
bool ValueComparator::evaluate(Value::DeferredComparison deferredComparison) const {
int cmp = Value::compare(deferredComparison.lhs, deferredComparison.rhs, _stringComparator);
switch (deferredComparison.type) {
diff --git a/src/mongo/db/pipeline/value_comparator.h b/src/mongo/db/pipeline/value_comparator.h
index e6c55ed35cf..1e46da52ce9 100644
--- a/src/mongo/db/pipeline/value_comparator.h
+++ b/src/mongo/db/pipeline/value_comparator.h
@@ -42,22 +42,17 @@ class ValueComparator {
public:
/**
* Functor compatible for use with unordered STL containers.
- *
- * TODO SERVER-25139: Remove the no-arguments constructor.
*/
class EqualTo {
public:
- EqualTo() = default;
-
explicit EqualTo(const ValueComparator* comparator) : _comparator(comparator) {}
bool operator()(const Value& lhs, const Value& rhs) const {
- return _comparator ? _comparator->compare(lhs, rhs) == 0
- : ValueComparator().compare(lhs, rhs) == 0;
+ return _comparator->compare(lhs, rhs) == 0;
}
private:
- const ValueComparator* _comparator = nullptr;
+ const ValueComparator* _comparator;
};
/**
@@ -77,23 +72,23 @@ public:
/**
* Functor for computing the hash of a Value, compatible for use with unordered STL containers.
- *
- * TODO SERVER-25139: Remove the no-arguments constructor.
*/
class Hasher {
public:
- Hasher() = default;
-
explicit Hasher(const ValueComparator* comparator) : _comparator(comparator) {}
size_t operator()(const Value& val) const {
- return _comparator ? _comparator->hash(val) : ValueComparator().hash(val);
+ return _comparator->hash(val);
}
private:
- const ValueComparator* _comparator = nullptr;
+ const ValueComparator* _comparator;
};
+ // Global comparator for performing simple Value comparisons. Value comparisons that require
+ // special database logic, such as collations, must instantiate their own comparator.
+ static const ValueComparator kInstance;
+
/**
* Constructs a value comparator with simple comparison semantics.
*/
@@ -146,6 +141,14 @@ public:
}
/**
+ * Returns a function object which computes the hash of a Value such that equal Values under
+ * this comparator have equal hashes.
+ */
+ Hasher getHasher() const {
+ return Hasher(this);
+ }
+
+ /**
* Construct an empty ordered set of Value whose ordering and equivalence classes are given by
* this comparator. This comparator must outlive the returned set.
*/