diff options
author | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2016-06-03 13:26:35 -0400 |
---|---|---|
committer | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2016-06-03 13:26:35 -0400 |
commit | 2a2ca32dd534c801851cadc446906152b4dbeffe (patch) | |
tree | 55ceb701ec448d8c7d5a3a325758f55e73038373 | |
parent | cecbe424d32cbb475d9b0384d29b98a9fba9c89f (diff) | |
download | mongo-2a2ca32dd534c801851cadc446906152b4dbeffe.tar.gz |
SERVER-23114 Compute multikey paths in 2dsphere index key generation.
Propagates information about the prefixes of the indexed fields that
cause the index to be multikey as a result of inserting the generated
keys.
-rw-r--r-- | src/mongo/db/bson/dotted_path_support.cpp | 55 | ||||
-rw-r--r-- | src/mongo/db/bson/dotted_path_support.h | 18 | ||||
-rw-r--r-- | src/mongo/db/bson/dotted_path_support_test.cpp | 61 | ||||
-rw-r--r-- | src/mongo/db/index/expression_keys_private.cpp | 104 | ||||
-rw-r--r-- | src/mongo/db/index/expression_keys_private.h | 4 | ||||
-rw-r--r-- | src/mongo/db/index/external_key_generator.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/index/s2_access_method.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/index/s2_access_method.h | 6 | ||||
-rw-r--r-- | src/mongo/db/index/s2_key_generator_test.cpp | 208 |
9 files changed, 376 insertions, 88 deletions
diff --git a/src/mongo/db/bson/dotted_path_support.cpp b/src/mongo/db/bson/dotted_path_support.cpp index cfb10c5a39f..1a9fda88501 100644 --- a/src/mongo/db/bson/dotted_path_support.cpp +++ b/src/mongo/db/bson/dotted_path_support.cpp @@ -50,7 +50,9 @@ template <typename BSONElementColl> void _extractAllElementsAlongPath(const BSONObj& obj, StringData path, BSONElementColl& elements, - bool expandArrayOnTrailingField) { + bool expandArrayOnTrailingField, + size_t depth, + std::set<size_t>* arrayComponents) { BSONElement e = obj.getField(path); if (e.eoo()) { @@ -62,8 +64,12 @@ void _extractAllElementsAlongPath(const BSONObj& obj, BSONElement e = obj.getField(left); if (e.type() == Object) { - _extractAllElementsAlongPath( - e.embeddedObject(), next, elements, expandArrayOnTrailingField); + _extractAllElementsAlongPath(e.embeddedObject(), + next, + elements, + expandArrayOnTrailingField, + depth + 1, + arrayComponents); } else if (e.type() == Array) { bool allDigits = false; if (next.size() > 0 && std::isdigit(next[0])) { @@ -73,15 +79,28 @@ void _extractAllElementsAlongPath(const BSONObj& obj, allDigits = temp == next.size() || next[temp] == '.'; } if (allDigits) { - _extractAllElementsAlongPath( - e.embeddedObject(), next, elements, expandArrayOnTrailingField); + _extractAllElementsAlongPath(e.embeddedObject(), + next, + elements, + expandArrayOnTrailingField, + depth + 1, + arrayComponents); } else { + size_t nArrElems = 0; BSONObjIterator i(e.embeddedObject()); while (i.more()) { BSONElement e2 = i.next(); if (e2.type() == Object || e2.type() == Array) - _extractAllElementsAlongPath( - e2.embeddedObject(), next, elements, expandArrayOnTrailingField); + _extractAllElementsAlongPath(e2.embeddedObject(), + next, + elements, + expandArrayOnTrailingField, + depth + 1, + arrayComponents); + ++nArrElems; + } + if (arrayComponents && nArrElems > 1) { + arrayComponents->insert(depth); } } } else { @@ -90,9 +109,15 @@ void _extractAllElementsAlongPath(const BSONObj& obj, } } else { if (e.type() == Array && expandArrayOnTrailingField) { + size_t nArrElems = 0; BSONObjIterator i(e.embeddedObject()); - while (i.more()) + while (i.more()) { elements.insert(i.next()); + ++nArrElems; + } + if (arrayComponents && nArrElems > 1) { + arrayComponents->insert(depth); + } } else { elements.insert(e); } @@ -142,15 +167,21 @@ BSONElement extractElementAtPathOrArrayAlongPath(const BSONObj& obj, const char* void extractAllElementsAlongPath(const BSONObj& obj, StringData path, BSONElementSet& elements, - bool expandArrayOnTrailingField) { - _extractAllElementsAlongPath(obj, path, elements, expandArrayOnTrailingField); + bool expandArrayOnTrailingField, + std::set<size_t>* arrayComponents) { + const size_t initialDepth = 0; + _extractAllElementsAlongPath( + obj, path, elements, expandArrayOnTrailingField, initialDepth, arrayComponents); } void extractAllElementsAlongPath(const BSONObj& obj, StringData path, BSONElementMSet& elements, - bool expandArrayOnTrailingField) { - _extractAllElementsAlongPath(obj, path, elements, expandArrayOnTrailingField); + bool expandArrayOnTrailingField, + std::set<size_t>* arrayComponents) { + const size_t initialDepth = 0; + _extractAllElementsAlongPath( + obj, path, elements, expandArrayOnTrailingField, initialDepth, arrayComponents); } BSONObj extractElementsBasedOnTemplate(const BSONObj& obj, diff --git a/src/mongo/db/bson/dotted_path_support.h b/src/mongo/db/bson/dotted_path_support.h index 9e2a3c9e9d0..c9c6676c75a 100644 --- a/src/mongo/db/bson/dotted_path_support.h +++ b/src/mongo/db/bson/dotted_path_support.h @@ -82,25 +82,31 @@ BSONElement extractElementAtPathOrArrayAlongPath(const BSONObj& obj, const char* * The 'path' can be specified using a dotted notation in order to traverse through embedded objects * and array elements. * + * This function fills 'arrayComponents' with the positions (starting at 0) of 'path' corresponding + * to array values with multiple elements. + * * Some examples: * * Consider the document {a: [{b: 1}, {b: 2}]} and the path "a.b". The elements {b: 1} and {b: 2} - * would be added to the set. + * would be added to the set. 'arrayComponents' would be set as std::set<size_t>{0U}. * * Consider the document {a: [{b: [1, 2]}, {b: [2, 3]}]} and the path "a.b". The elements {b: 1}, - * {b: 2}, and {b: 3} would be added to the set if 'expandArrayOnTrailingField' is true. The - * elements {b: [1, 2]} and {b: [2, 3]} would be added to the set if 'expandArrayOnTrailingField' - * is false. + * {b: 2}, and {b: 3} would be added to the set and 'arrayComponents' would be set as + * std::set<size_t>{0U, 1U} if 'expandArrayOnTrailingField' is true. The elements {b: [1, 2]} and + * {b: [2, 3]} would be added to the set and 'arrayComponents' would be set as + * std::set<size_t>{0U} if 'expandArrayOnTrailingField' is false. */ void extractAllElementsAlongPath(const BSONObj& obj, StringData path, BSONElementSet& elements, - bool expandArrayOnTrailingField = true); + bool expandArrayOnTrailingField = true, + std::set<std::size_t>* arrayComponents = nullptr); void extractAllElementsAlongPath(const BSONObj& obj, StringData path, BSONElementMSet& elements, - bool expandArrayOnTrailingField = true); + bool expandArrayOnTrailingField = true, + std::set<std::size_t>* arrayComponents = nullptr); /** * Returns an owned BSONObj with elements in the same order as they appear in the 'pattern' object diff --git a/src/mongo/db/bson/dotted_path_support_test.cpp b/src/mongo/db/bson/dotted_path_support_test.cpp index 78d3253d8b5..4473721c3d6 100644 --- a/src/mongo/db/bson/dotted_path_support_test.cpp +++ b/src/mongo/db/bson/dotted_path_support_test.cpp @@ -28,6 +28,7 @@ #include "mongo/platform/basic.h" +#include <set> #include <vector> #include "mongo/bson/bsonelement.h" @@ -139,14 +140,42 @@ void assertBSONElementSetsAreEqual(const std::vector<BSONObj>& expectedObjs, } } +void dumpArrayComponents(const std::set<size_t>& arrayComponents, StringBuilder* sb) { + *sb << "[ "; + bool firstIteration = true; + for (const auto pos : arrayComponents) { + if (!firstIteration) { + *sb << ", "; + } + *sb << pos; + firstIteration = false; + } + *sb << " ]"; +} + +void assertArrayComponentsAreEqual(const std::set<size_t>& expectedArrayComponents, + const std::set<size_t>& actualArrayComponents) { + if (expectedArrayComponents != actualArrayComponents) { + StringBuilder sb; + sb << "Expected: "; + dumpArrayComponents(expectedArrayComponents, &sb); + sb << ", Actual: "; + dumpArrayComponents(actualArrayComponents, &sb); + FAIL(sb.str()); + } +} + TEST(ExtractAllElementsAlongPath, NestedObjectWithScalarValue) { BSONObj obj = BSON("a" << BSON("b" << 1)); BSONElementSet actualElements; const bool expandArrayOnTrailingField = true; - dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField); + std::set<size_t> actualArrayComponents; + dps::extractAllElementsAlongPath( + obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents); assertBSONElementSetsAreEqual({BSON("" << 1)}, actualElements); + assertArrayComponentsAreEqual(std::set<size_t>{}, actualArrayComponents); } TEST(ExtractAllElementsAlongPath, NestedObjectWithEmptyArrayValue) { @@ -154,9 +183,12 @@ TEST(ExtractAllElementsAlongPath, NestedObjectWithEmptyArrayValue) { BSONElementSet actualElements; const bool expandArrayOnTrailingField = true; - dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField); + std::set<size_t> actualArrayComponents; + dps::extractAllElementsAlongPath( + obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents); assertBSONElementSetsAreEqual(std::vector<BSONObj>{}, actualElements); + assertArrayComponentsAreEqual(std::set<size_t>{}, actualArrayComponents); } TEST(ExtractAllElementsAlongPath, NestedObjectWithSingletonArrayValue) { @@ -164,9 +196,12 @@ TEST(ExtractAllElementsAlongPath, NestedObjectWithSingletonArrayValue) { BSONElementSet actualElements; const bool expandArrayOnTrailingField = true; - dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField); + std::set<size_t> actualArrayComponents; + dps::extractAllElementsAlongPath( + obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents); assertBSONElementSetsAreEqual({BSON("" << 1)}, actualElements); + assertArrayComponentsAreEqual(std::set<size_t>{}, actualArrayComponents); } TEST(ExtractAllElementsAlongPath, NestedObjectWithArrayValue) { @@ -174,9 +209,12 @@ TEST(ExtractAllElementsAlongPath, NestedObjectWithArrayValue) { BSONElementSet actualElements; const bool expandArrayOnTrailingField = true; - dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField); + std::set<size_t> actualArrayComponents; + dps::extractAllElementsAlongPath( + obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents); assertBSONElementSetsAreEqual({BSON("" << 1), BSON("" << 2), BSON("" << 3)}, actualElements); + assertArrayComponentsAreEqual({1U}, actualArrayComponents); } TEST(ExtractAllElementsAlongPath, ObjectWithArrayOfSubobjectsWithScalarValue) { @@ -184,9 +222,12 @@ TEST(ExtractAllElementsAlongPath, ObjectWithArrayOfSubobjectsWithScalarValue) { BSONElementSet actualElements; const bool expandArrayOnTrailingField = true; - dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField); + std::set<size_t> actualArrayComponents; + dps::extractAllElementsAlongPath( + obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents); assertBSONElementSetsAreEqual({BSON("" << 1), BSON("" << 2), BSON("" << 3)}, actualElements); + assertArrayComponentsAreEqual({0U}, actualArrayComponents); } TEST(ExtractAllElementsAlongPath, ObjectWithArrayOfSubobjectsWithArrayValues) { @@ -196,9 +237,12 @@ TEST(ExtractAllElementsAlongPath, ObjectWithArrayOfSubobjectsWithArrayValues) { BSONElementSet actualElements; const bool expandArrayOnTrailingField = true; - dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField); + std::set<size_t> actualArrayComponents; + dps::extractAllElementsAlongPath( + obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents); assertBSONElementSetsAreEqual({BSON("" << 1), BSON("" << 2), BSON("" << 3)}, actualElements); + assertArrayComponentsAreEqual({0U, 1U}, actualArrayComponents); } TEST(ExtractAllElementsAlongPath, @@ -208,11 +252,14 @@ TEST(ExtractAllElementsAlongPath, BSONElementSet actualElements; const bool expandArrayOnTrailingField = false; - dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField); + std::set<size_t> actualArrayComponents; + dps::extractAllElementsAlongPath( + obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents); assertBSONElementSetsAreEqual( {BSON("" << BSON_ARRAY(1)), BSON("" << BSON_ARRAY(2)), BSON("" << BSON_ARRAY(3))}, actualElements); + assertArrayComponentsAreEqual({0U}, actualArrayComponents); } } // namespace diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp index e44c950758a..89f8614f0b2 100644 --- a/src/mongo/db/index/expression_keys_private.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -33,6 +33,7 @@ #include <utility> #include "mongo/db/bson/dotted_path_support.h" +#include "mongo/db/field_ref.h" #include "mongo/db/fts/fts_index_format.h" #include "mongo/db/geo/geoconstants.h" #include "mongo/db/geo/geometry_container.h" @@ -118,13 +119,16 @@ Status S2GetKeysForElement(const BSONElement& element, /** - * Get the index keys for elements that are GeoJSON. - * Used by getS2Keys. + * Fills 'out' with the S2 keys that should be generated for 'elements' in a 2dsphere index. + * + * Returns true if an indexed element of the document uses multiple cells for its covering, and + * returns false otherwise. */ -void getS2GeoKeys(const BSONObj& document, +bool getS2GeoKeys(const BSONObj& document, const BSONElementSet& elements, const S2IndexingParams& params, BSONObjSet* out) { + bool everGeneratedMultipleCells = false; for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { vector<S2CellId> cells; Status status = S2GetKeysForElement(*i, params, &cells); @@ -139,6 +143,8 @@ void getS2GeoKeys(const BSONObj& document, for (vector<S2CellId>::const_iterator it = cells.begin(); it != cells.end(); ++it) { out->insert(S2CellIdToIndexKey(*it, params.indexVersion)); } + + everGeneratedMultipleCells = everGeneratedMultipleCells || cells.size() > 1; } if (0 == out->size()) { @@ -146,13 +152,16 @@ void getS2GeoKeys(const BSONObj& document, b.appendNull(""); out->insert(b.obj()); } + return everGeneratedMultipleCells; } /** - * Expands array and appends items to 'out'. - * Used by getOneLiteralKey. + * Fills 'out' with the keys that should be generated for an array value 'obj' in a 2dsphere index. + * A key is generated for each element of the array value 'obj'. + * + * Returns true if 'obj' contains more than one element, and returns false otherwise. */ -void getS2LiteralKeysArray(const BSONObj& obj, const CollatorInterface* collator, BSONObjSet* out) { +bool getS2LiteralKeysArray(const BSONObj& obj, const CollatorInterface* collator, BSONObjSet* out) { BSONObjIterator objIt(obj); if (!objIt.more()) { // Empty arrays are indexed as undefined. @@ -161,39 +170,54 @@ void getS2LiteralKeysArray(const BSONObj& obj, const CollatorInterface* collator out->insert(b.obj()); } else { // Non-empty arrays are exploded. + size_t nArrElems = 0; while (objIt.more()) { BSONObjBuilder b; CollationIndexKey::collationAwareIndexKeyAppend(objIt.next(), collator, &b); out->insert(b.obj()); + ++nArrElems; + } + + if (nArrElems > 1) { + return true; } } + return false; } /** - * If 'elt' is an array, expands elt and adds items to 'out'. - * Otherwise, adds 'elt' as a single element. - * Used by getLiteralKeys. + * Fills 'out' with the keys that should be generated for a value 'elt' in a 2dsphere index. If + * 'elt' is an array value, then a key is generated for each element of the array value 'obj'. + * + * Returns true if 'elt' is an array value that contains more than one element, and returns false + * otherwise. */ -void getS2OneLiteralKey(const BSONElement& elt, +bool getS2OneLiteralKey(const BSONElement& elt, const CollatorInterface* collator, BSONObjSet* out) { if (Array == elt.type()) { - getS2LiteralKeysArray(elt.Obj(), collator, out); + return getS2LiteralKeysArray(elt.Obj(), collator, out); } else { // One thing, not an array, index as-is. BSONObjBuilder b; CollationIndexKey::collationAwareIndexKeyAppend(elt, collator, &b); out->insert(b.obj()); } + return false; } /** - * elements is a non-geo field. Add the values literally, expanding arrays. - * Used by getS2Keys. + * Fills 'out' with the non-geo keys that should be generated for 'elements' in a 2dsphere index. If + * any element in 'elements' is an array value, then a key is generated for each element of that + * array value. + * + * Returns true if any element of 'elements' is an array value that contains more than one element, + * and returns false otherwise. */ -void getS2LiteralKeys(const BSONElementSet& elements, +bool getS2LiteralKeys(const BSONElementSet& elements, const CollatorInterface* collator, BSONObjSet* out) { + bool indexedArrayValueWithMultipleElements = false; if (0 == elements.size()) { // Missing fields are indexed as null. BSONObjBuilder b; @@ -201,9 +225,12 @@ void getS2LiteralKeys(const BSONElementSet& elements, out->insert(b.obj()); } else { for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { - getS2OneLiteralKey(*i, collator, out); + const bool thisElemIsArrayWithMultipleElements = getS2OneLiteralKey(*i, collator, out); + indexedArrayValueWithMultipleElements = + indexedArrayValueWithMultipleElements || thisElemIsArrayWithMultipleElements; } } + return indexedArrayValueWithMultipleElements; } } // namespace @@ -431,25 +458,40 @@ std::string ExpressionKeysPrivate::makeHaystackString(int hashedX, int hashedY) void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, - BSONObjSet* keys) { + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) { BSONObjSet keysToAdd; // Does one of our documents have a geo field? bool haveGeoField = false; - // We output keys in the same order as the fields we index. - BSONObjIterator i(keyPattern); - while (i.more()) { - BSONElement e = i.next(); + if (multikeyPaths) { + invariant(multikeyPaths->empty()); + multikeyPaths->resize(keyPattern.nFields()); + } + size_t posInIdx = 0; + + // We output keys in the same order as the fields we index. + for (const auto keyElem : keyPattern) { // First, we get the keys that this field adds. Either they're added literally from // the value of the field, or they're transformed if the field is geo. BSONElementSet fieldElements; - // false means Don't expand the last array, duh. - dps::extractAllElementsAlongPath(obj, e.fieldName(), fieldElements, false); - + const bool expandArrayOnTrailingField = false; + std::set<size_t>* arrayComponents = multikeyPaths ? &(*multikeyPaths)[posInIdx] : nullptr; + dps::extractAllElementsAlongPath( + obj, keyElem.fieldName(), fieldElements, expandArrayOnTrailingField, arrayComponents); + + // Trailing array values aren't being expanded, so we still need to determine whether the + // last component of the indexed path 'keyElem.fieldName()' causes the index to be multikey. + // We say that it does if + // (a) the last component of the indexed path ever refers to an array value containing + // multiple elements, or if + // (b) the last component of the indexed path ever refers to GeoJSON data that requires + // multiple cells for its covering. + bool lastPathComponentCausesIndexToBeMultikey; BSONObjSet keysForThisField; - if (IndexNames::GEO_2DSPHERE == e.valuestr()) { + if (IndexNames::GEO_2DSPHERE == keyElem.valuestr()) { if (params.indexVersion >= S2_INDEX_VERSION_2) { // For >= V2, // geo: null, @@ -478,20 +520,29 @@ void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, } } - getS2GeoKeys(obj, fieldElements, params, &keysForThisField); + lastPathComponentCausesIndexToBeMultikey = + getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { - getS2LiteralKeys(fieldElements, params.collator, &keysForThisField); + lastPathComponentCausesIndexToBeMultikey = + getS2LiteralKeys(fieldElements, params.collator, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is // missing. So, this should be non-empty. verify(!keysForThisField.empty()); + if (multikeyPaths && lastPathComponentCausesIndexToBeMultikey) { + const size_t pathLengthOfThisField = FieldRef{keyElem.fieldNameStringData()}.numParts(); + invariant(pathLengthOfThisField > 0); + (*multikeyPaths)[posInIdx].insert(pathLengthOfThisField - 1); + } + // We take the Cartesian product of all of the keys. This requires that we have // some keys to take the Cartesian product with. If keysToAdd.empty(), we // initialize it. if (keysToAdd.empty()) { keysToAdd = keysForThisField; + ++posInIdx; continue; } @@ -507,6 +558,7 @@ void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, } } keysToAdd = updatedKeysToAdd; + ++posInIdx; } // Make sure that if we're >= V2 there's at least one geo field present in the doc. diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h index 5206f4a6768..a9b032ea97d 100644 --- a/src/mongo/db/index/expression_keys_private.h +++ b/src/mongo/db/index/expression_keys_private.h @@ -33,6 +33,7 @@ #include "mongo/bson/bsonmisc.h" #include "mongo/bson/bsonobj.h" #include "mongo/db/hasher.h" +#include "mongo/db/index/multikey_paths.h" namespace mongo { @@ -125,7 +126,8 @@ public: static void getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, - BSONObjSet* keys); + BSONObjSet* keys, + MultikeyPaths* multikeyPaths); }; } // namespace mongo diff --git a/src/mongo/db/index/external_key_generator.cpp b/src/mongo/db/index/external_key_generator.cpp index 1ab9c1ad9ae..192354dc7eb 100644 --- a/src/mongo/db/index/external_key_generator.cpp +++ b/src/mongo/db/index/external_key_generator.cpp @@ -64,7 +64,11 @@ void getKeysForUpgradeChecking(const BSONObj& infoObj, const BSONObj& doc, BSONO // generated will be wrong. CollatorInterface* collator = nullptr; ExpressionParams::initialize2dsphereParams(infoObj, collator, ¶ms); - ExpressionKeysPrivate::getS2Keys(doc, keyPattern, params, keys); + + // There's no need to compute the prefixes of the indexed fields that cause the index to be + // multikey when checking if any index key is too large. + MultikeyPaths* multikeyPaths = nullptr; + ExpressionKeysPrivate::getS2Keys(doc, keyPattern, params, keys, multikeyPaths); } else if (IndexNames::TEXT == type) { fts::FTSSpec spec(infoObj); ExpressionKeysPrivate::getFTSKeys(doc, spec, keys); diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp index eee92fa8037..32b94a15ca0 100644 --- a/src/mongo/db/index/s2_access_method.cpp +++ b/src/mongo/db/index/s2_access_method.cpp @@ -143,7 +143,7 @@ StatusWith<BSONObj> S2AccessMethod::fixSpec(const BSONObj& specObj) { void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { - ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys); + ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys, multikeyPaths); } } // namespace mongo diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h index 3d39e10fcc9..c0104fe144d 100644 --- a/src/mongo/db/index/s2_access_method.h +++ b/src/mongo/db/index/s2_access_method.h @@ -58,8 +58,10 @@ private: * This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking * path-level multikey information. * - * TODO SERVER-23114: Return prefixes of the indexed fields that cause the index to be multikey - * as a result of inserting 'keys'. + * If the 'multikeyPaths' pointer is non-null, then it must point to an empty vector. This + * function resizes 'multikeyPaths' to have the same number of elements as the index key pattern + * and fills each element with the prefixes of the indexed field that would cause this index to + * be multikey as a result of inserting 'keys'. */ void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; diff --git a/src/mongo/db/index/s2_key_generator_test.cpp b/src/mongo/db/index/s2_key_generator_test.cpp index 1002ea1ecde..e7399eed0b3 100644 --- a/src/mongo/db/index/s2_key_generator_test.cpp +++ b/src/mongo/db/index/s2_key_generator_test.cpp @@ -26,8 +26,6 @@ * it in the license file. */ -#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex - #include "mongo/platform/basic.h" #include "mongo/db/index/expression_keys_private.h" @@ -38,7 +36,7 @@ #include "mongo/db/json.h" #include "mongo/db/query/collation/collator_interface_mock.h" #include "mongo/unittest/unittest.h" -#include "mongo/util/log.h" +#include "mongo/util/mongoutils/str.h" using namespace mongo; @@ -55,31 +53,142 @@ std::string dumpKeyset(const BSONObjSet& objs) { return ss.str(); } -bool assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) { +std::string dumpMultikeyPaths(const MultikeyPaths& multikeyPaths) { + std::stringstream ss; + + ss << "[ "; + for (const auto multikeyComponents : multikeyPaths) { + ss << "[ "; + for (const auto multikeyComponent : multikeyComponents) { + ss << multikeyComponent << " "; + } + ss << "] "; + } + ss << "]"; + + return ss.str(); +} + +void assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) { if (expectedKeys != actualKeys) { - log() << "Expected: " << dumpKeyset(expectedKeys) << ", " - << "Actual: " << dumpKeyset(actualKeys); - return false; + FAIL(str::stream() << "Expected: " << dumpKeyset(expectedKeys) << ", Actual: " + << dumpKeyset(actualKeys)); + } +} + +void assertMultikeyPathsEqual(const MultikeyPaths& expectedMultikeyPaths, + const MultikeyPaths& actualMultikeyPaths) { + if (expectedMultikeyPaths != actualMultikeyPaths) { + FAIL(str::stream() << "Expected: " << dumpMultikeyPaths(expectedMultikeyPaths) + << ", Actual: " + << dumpMultikeyPaths(actualMultikeyPaths)); } - return true; } -long long getCellID(int x, int y) { - BSONObj obj = BSON("a" << BSON("type" - << "Point" - << "coordinates" - << BSON_ARRAY(x << y))); +long long getCellID(int x, int y, bool multiPoint = false) { + BSONObj obj; + if (multiPoint) { + obj = BSON("a" << BSON("type" + << "MultiPoint" + << "coordinates" + << BSON_ARRAY(BSON_ARRAY(x << y)))); + } else { + obj = BSON("a" << BSON("type" + << "Point" + << "coordinates" + << BSON_ARRAY(x << y))); + } BSONObj keyPattern = fromjson("{a: '2dsphere'}"); BSONObj infoObj = fromjson("{key: {a: '2dsphere'}, '2dsphereIndexVersion': 3}"); S2IndexingParams params; const CollatorInterface* collator = nullptr; ExpressionParams::initialize2dsphereParams(infoObj, collator, ¶ms); + BSONObjSet keys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &keys); + // There's no need to compute the prefixes of the indexed fields that cause the index to be + // multikey when computing the cell id of the geo field. + MultikeyPaths* multikeyPaths = nullptr; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &keys, multikeyPaths); + ASSERT_EQUALS(1U, keys.size()); return (*keys.begin()).firstElement().Long(); } +TEST(S2KeyGeneratorTest, GetS2KeysFromSubobjectWithArrayOfGeoAndNonGeoSubobjects) { + BSONObj keyPattern = fromjson("{'a.b.nongeo': 1, 'a.b.geo': '2dsphere'}"); + BSONObj genKeysFrom = fromjson( + "{a: {b: [{nongeo: 1, geo: {type: 'Point', coordinates: [0, 0]}}, " + "{nongeo: 2, geo: {type: 'Point', coordinates: [3, 3]}}]}}"); + BSONObj infoObj = + fromjson("{key: {'a.b.nongeo': 1, 'a.b.geo': '2dsphere'}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock* collator = nullptr; + ExpressionParams::initialize2dsphereParams(infoObj, collator, ¶ms); + + BSONObjSet actualKeys; + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys( + genKeysFrom, keyPattern, params, &actualKeys, &actualMultikeyPaths); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << 1 << "" << getCellID(0, 0))); + expectedKeys.insert(BSON("" << 1 << "" << getCellID(3, 3))); + expectedKeys.insert(BSON("" << 2 << "" << getCellID(0, 0))); + expectedKeys.insert(BSON("" << 2 << "" << getCellID(3, 3))); + + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{{1U}, {1U}}, actualMultikeyPaths); +} + +TEST(S2KeyGeneratorTest, GetS2KeysFromArrayOfNonGeoSubobjectsWithArrayValues) { + BSONObj keyPattern = fromjson("{'a.nongeo': 1, geo: '2dsphere'}"); + BSONObj genKeysFrom = fromjson( + "{a: [{nongeo: [1, 2]}, {nongeo: [2, 3]}], " + "geo: {type: 'Point', coordinates: [0, 0]}}"); + BSONObj infoObj = + fromjson("{key: {'a.nongeo': 1, geo: '2dsphere'}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock* collator = nullptr; + ExpressionParams::initialize2dsphereParams(infoObj, collator, ¶ms); + + BSONObjSet actualKeys; + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys( + genKeysFrom, keyPattern, params, &actualKeys, &actualMultikeyPaths); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << 1 << "" << getCellID(0, 0))); + expectedKeys.insert(BSON("" << 2 << "" << getCellID(0, 0))); + expectedKeys.insert(BSON("" << 3 << "" << getCellID(0, 0))); + + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{{0U, 1U}, std::set<size_t>{}}, actualMultikeyPaths); +} + +TEST(S2KeyGeneratorTest, GetS2KeysFromMultiPointInGeoField) { + BSONObj keyPattern = fromjson("{nongeo: 1, geo: '2dsphere'}"); + BSONObj genKeysFrom = + fromjson("{nongeo: 1, geo: {type: 'MultiPoint', coordinates: [[0, 0], [1, 0], [1, 1]]}}"); + BSONObj infoObj = fromjson("{key: {nongeo: 1, geo: '2dsphere'}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock* collator = nullptr; + ExpressionParams::initialize2dsphereParams(infoObj, collator, ¶ms); + + BSONObjSet actualKeys; + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys( + genKeysFrom, keyPattern, params, &actualKeys, &actualMultikeyPaths); + + const bool multiPoint = true; + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << 1 << "" << getCellID(0, 0, multiPoint))); + expectedKeys.insert(BSON("" << 1 << "" << getCellID(1, 0, multiPoint))); + expectedKeys.insert(BSON("" << 1 << "" << getCellID(1, 1, multiPoint))); + + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, {0U}}, actualMultikeyPaths); +} + TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldAfterGeoField) { BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: 'string'}"); BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}"); @@ -87,14 +196,18 @@ TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldAfterGeoField) { S2IndexingParams params; CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths); BSONObjSet expectedKeys; expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << "gnirts")); - ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}}, + actualMultikeyPaths); } TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldBeforeGeoField) { @@ -104,8 +217,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldBeforeGeoField) { S2IndexingParams params; CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths); BSONObjSet expectedKeys; expectedKeys.insert(BSON("" @@ -113,7 +228,9 @@ TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldBeforeGeoField) { << "" << getCellID(0, 0))); - ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}}, + actualMultikeyPaths); } TEST(S2KeyGeneratorTest, CollationAppliedToAllNonGeoStringFields) { @@ -123,8 +240,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToAllNonGeoStringFields) { S2IndexingParams params; CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths); BSONObjSet expectedKeys; expectedKeys.insert(BSON("" @@ -134,7 +253,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToAllNonGeoStringFields) { << "" << "2gnirts")); - ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual( + MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}, std::set<size_t>{}}, + actualMultikeyPaths); } TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldWithMultiplePathComponents) { @@ -144,14 +266,18 @@ TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldWithMultiplePathComp S2IndexingParams params; CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths); BSONObjSet expectedKeys; expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << "gnirts")); - ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}}, + actualMultikeyPaths); } TEST(S2KeyGeneratorTest, CollationAppliedToStringsInArray) { @@ -161,8 +287,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToStringsInArray) { S2IndexingParams params; CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths); BSONObjSet expectedKeys; expectedKeys.insert(BSON("" << getCellID(0, 0) << "" @@ -170,7 +298,8 @@ TEST(S2KeyGeneratorTest, CollationAppliedToStringsInArray) { expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << "2gnirts")); - ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, {0U}}, actualMultikeyPaths); } TEST(S2KeyGeneratorTest, CollationAppliedToStringsInAllArrays) { @@ -181,8 +310,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToStringsInAllArrays) { S2IndexingParams params; CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths); BSONObjSet expectedKeys; expectedKeys.insert(BSON("" << getCellID(0, 0) << "" @@ -202,7 +333,8 @@ TEST(S2KeyGeneratorTest, CollationAppliedToStringsInAllArrays) { << "" << "fed")); - ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, {0U}, {0U}}, actualMultikeyPaths); } TEST(S2KeyGeneratorTest, CollationDoesNotAffectNonStringFields) { @@ -212,13 +344,17 @@ TEST(S2KeyGeneratorTest, CollationDoesNotAffectNonStringFields) { S2IndexingParams params; CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths); BSONObjSet expectedKeys; expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << 5)); - ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}}, + actualMultikeyPaths); } // TODO SERVER-23172: remove test. @@ -229,14 +365,18 @@ TEST(S2KeyGeneratorTest, CollationDoesNotAffectStringsInEmbeddedDocuments) { S2IndexingParams params; CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths); BSONObjSet expectedKeys; expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << BSON("c" << "string"))); - ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}}, + actualMultikeyPaths); } TEST(S2KeyGeneratorTest, NoCollation) { @@ -246,14 +386,18 @@ TEST(S2KeyGeneratorTest, NoCollation) { S2IndexingParams params; const CollatorInterface* collator = nullptr; ExpressionParams::initialize2dsphereParams(infoObj, collator, ¶ms); + BSONObjSet actualKeys; - ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + MultikeyPaths actualMultikeyPaths; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths); BSONObjSet expectedKeys; expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << "string")); - ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); + assertKeysetsEqual(expectedKeys, actualKeys); + assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}}, + actualMultikeyPaths); } } // namespace |