summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Hirschhorn <max.hirschhorn@mongodb.com>2016-06-03 13:26:35 -0400
committerMax Hirschhorn <max.hirschhorn@mongodb.com>2016-06-03 13:26:35 -0400
commit2a2ca32dd534c801851cadc446906152b4dbeffe (patch)
tree55ceb701ec448d8c7d5a3a325758f55e73038373
parentcecbe424d32cbb475d9b0384d29b98a9fba9c89f (diff)
downloadmongo-2a2ca32dd534c801851cadc446906152b4dbeffe.tar.gz
SERVER-23114 Compute multikey paths in 2dsphere index key generation.
Propagates information about the prefixes of the indexed fields that cause the index to be multikey as a result of inserting the generated keys.
-rw-r--r--src/mongo/db/bson/dotted_path_support.cpp55
-rw-r--r--src/mongo/db/bson/dotted_path_support.h18
-rw-r--r--src/mongo/db/bson/dotted_path_support_test.cpp61
-rw-r--r--src/mongo/db/index/expression_keys_private.cpp104
-rw-r--r--src/mongo/db/index/expression_keys_private.h4
-rw-r--r--src/mongo/db/index/external_key_generator.cpp6
-rw-r--r--src/mongo/db/index/s2_access_method.cpp2
-rw-r--r--src/mongo/db/index/s2_access_method.h6
-rw-r--r--src/mongo/db/index/s2_key_generator_test.cpp208
9 files changed, 376 insertions, 88 deletions
diff --git a/src/mongo/db/bson/dotted_path_support.cpp b/src/mongo/db/bson/dotted_path_support.cpp
index cfb10c5a39f..1a9fda88501 100644
--- a/src/mongo/db/bson/dotted_path_support.cpp
+++ b/src/mongo/db/bson/dotted_path_support.cpp
@@ -50,7 +50,9 @@ template <typename BSONElementColl>
void _extractAllElementsAlongPath(const BSONObj& obj,
StringData path,
BSONElementColl& elements,
- bool expandArrayOnTrailingField) {
+ bool expandArrayOnTrailingField,
+ size_t depth,
+ std::set<size_t>* arrayComponents) {
BSONElement e = obj.getField(path);
if (e.eoo()) {
@@ -62,8 +64,12 @@ void _extractAllElementsAlongPath(const BSONObj& obj,
BSONElement e = obj.getField(left);
if (e.type() == Object) {
- _extractAllElementsAlongPath(
- e.embeddedObject(), next, elements, expandArrayOnTrailingField);
+ _extractAllElementsAlongPath(e.embeddedObject(),
+ next,
+ elements,
+ expandArrayOnTrailingField,
+ depth + 1,
+ arrayComponents);
} else if (e.type() == Array) {
bool allDigits = false;
if (next.size() > 0 && std::isdigit(next[0])) {
@@ -73,15 +79,28 @@ void _extractAllElementsAlongPath(const BSONObj& obj,
allDigits = temp == next.size() || next[temp] == '.';
}
if (allDigits) {
- _extractAllElementsAlongPath(
- e.embeddedObject(), next, elements, expandArrayOnTrailingField);
+ _extractAllElementsAlongPath(e.embeddedObject(),
+ next,
+ elements,
+ expandArrayOnTrailingField,
+ depth + 1,
+ arrayComponents);
} else {
+ size_t nArrElems = 0;
BSONObjIterator i(e.embeddedObject());
while (i.more()) {
BSONElement e2 = i.next();
if (e2.type() == Object || e2.type() == Array)
- _extractAllElementsAlongPath(
- e2.embeddedObject(), next, elements, expandArrayOnTrailingField);
+ _extractAllElementsAlongPath(e2.embeddedObject(),
+ next,
+ elements,
+ expandArrayOnTrailingField,
+ depth + 1,
+ arrayComponents);
+ ++nArrElems;
+ }
+ if (arrayComponents && nArrElems > 1) {
+ arrayComponents->insert(depth);
}
}
} else {
@@ -90,9 +109,15 @@ void _extractAllElementsAlongPath(const BSONObj& obj,
}
} else {
if (e.type() == Array && expandArrayOnTrailingField) {
+ size_t nArrElems = 0;
BSONObjIterator i(e.embeddedObject());
- while (i.more())
+ while (i.more()) {
elements.insert(i.next());
+ ++nArrElems;
+ }
+ if (arrayComponents && nArrElems > 1) {
+ arrayComponents->insert(depth);
+ }
} else {
elements.insert(e);
}
@@ -142,15 +167,21 @@ BSONElement extractElementAtPathOrArrayAlongPath(const BSONObj& obj, const char*
void extractAllElementsAlongPath(const BSONObj& obj,
StringData path,
BSONElementSet& elements,
- bool expandArrayOnTrailingField) {
- _extractAllElementsAlongPath(obj, path, elements, expandArrayOnTrailingField);
+ bool expandArrayOnTrailingField,
+ std::set<size_t>* arrayComponents) {
+ const size_t initialDepth = 0;
+ _extractAllElementsAlongPath(
+ obj, path, elements, expandArrayOnTrailingField, initialDepth, arrayComponents);
}
void extractAllElementsAlongPath(const BSONObj& obj,
StringData path,
BSONElementMSet& elements,
- bool expandArrayOnTrailingField) {
- _extractAllElementsAlongPath(obj, path, elements, expandArrayOnTrailingField);
+ bool expandArrayOnTrailingField,
+ std::set<size_t>* arrayComponents) {
+ const size_t initialDepth = 0;
+ _extractAllElementsAlongPath(
+ obj, path, elements, expandArrayOnTrailingField, initialDepth, arrayComponents);
}
BSONObj extractElementsBasedOnTemplate(const BSONObj& obj,
diff --git a/src/mongo/db/bson/dotted_path_support.h b/src/mongo/db/bson/dotted_path_support.h
index 9e2a3c9e9d0..c9c6676c75a 100644
--- a/src/mongo/db/bson/dotted_path_support.h
+++ b/src/mongo/db/bson/dotted_path_support.h
@@ -82,25 +82,31 @@ BSONElement extractElementAtPathOrArrayAlongPath(const BSONObj& obj, const char*
* The 'path' can be specified using a dotted notation in order to traverse through embedded objects
* and array elements.
*
+ * This function fills 'arrayComponents' with the positions (starting at 0) of 'path' corresponding
+ * to array values with multiple elements.
+ *
* Some examples:
*
* Consider the document {a: [{b: 1}, {b: 2}]} and the path "a.b". The elements {b: 1} and {b: 2}
- * would be added to the set.
+ * would be added to the set. 'arrayComponents' would be set as std::set<size_t>{0U}.
*
* Consider the document {a: [{b: [1, 2]}, {b: [2, 3]}]} and the path "a.b". The elements {b: 1},
- * {b: 2}, and {b: 3} would be added to the set if 'expandArrayOnTrailingField' is true. The
- * elements {b: [1, 2]} and {b: [2, 3]} would be added to the set if 'expandArrayOnTrailingField'
- * is false.
+ * {b: 2}, and {b: 3} would be added to the set and 'arrayComponents' would be set as
+ * std::set<size_t>{0U, 1U} if 'expandArrayOnTrailingField' is true. The elements {b: [1, 2]} and
+ * {b: [2, 3]} would be added to the set and 'arrayComponents' would be set as
+ * std::set<size_t>{0U} if 'expandArrayOnTrailingField' is false.
*/
void extractAllElementsAlongPath(const BSONObj& obj,
StringData path,
BSONElementSet& elements,
- bool expandArrayOnTrailingField = true);
+ bool expandArrayOnTrailingField = true,
+ std::set<std::size_t>* arrayComponents = nullptr);
void extractAllElementsAlongPath(const BSONObj& obj,
StringData path,
BSONElementMSet& elements,
- bool expandArrayOnTrailingField = true);
+ bool expandArrayOnTrailingField = true,
+ std::set<std::size_t>* arrayComponents = nullptr);
/**
* Returns an owned BSONObj with elements in the same order as they appear in the 'pattern' object
diff --git a/src/mongo/db/bson/dotted_path_support_test.cpp b/src/mongo/db/bson/dotted_path_support_test.cpp
index 78d3253d8b5..4473721c3d6 100644
--- a/src/mongo/db/bson/dotted_path_support_test.cpp
+++ b/src/mongo/db/bson/dotted_path_support_test.cpp
@@ -28,6 +28,7 @@
#include "mongo/platform/basic.h"
+#include <set>
#include <vector>
#include "mongo/bson/bsonelement.h"
@@ -139,14 +140,42 @@ void assertBSONElementSetsAreEqual(const std::vector<BSONObj>& expectedObjs,
}
}
+void dumpArrayComponents(const std::set<size_t>& arrayComponents, StringBuilder* sb) {
+ *sb << "[ ";
+ bool firstIteration = true;
+ for (const auto pos : arrayComponents) {
+ if (!firstIteration) {
+ *sb << ", ";
+ }
+ *sb << pos;
+ firstIteration = false;
+ }
+ *sb << " ]";
+}
+
+void assertArrayComponentsAreEqual(const std::set<size_t>& expectedArrayComponents,
+ const std::set<size_t>& actualArrayComponents) {
+ if (expectedArrayComponents != actualArrayComponents) {
+ StringBuilder sb;
+ sb << "Expected: ";
+ dumpArrayComponents(expectedArrayComponents, &sb);
+ sb << ", Actual: ";
+ dumpArrayComponents(actualArrayComponents, &sb);
+ FAIL(sb.str());
+ }
+}
+
TEST(ExtractAllElementsAlongPath, NestedObjectWithScalarValue) {
BSONObj obj = BSON("a" << BSON("b" << 1));
BSONElementSet actualElements;
const bool expandArrayOnTrailingField = true;
- dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField);
+ std::set<size_t> actualArrayComponents;
+ dps::extractAllElementsAlongPath(
+ obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents);
assertBSONElementSetsAreEqual({BSON("" << 1)}, actualElements);
+ assertArrayComponentsAreEqual(std::set<size_t>{}, actualArrayComponents);
}
TEST(ExtractAllElementsAlongPath, NestedObjectWithEmptyArrayValue) {
@@ -154,9 +183,12 @@ TEST(ExtractAllElementsAlongPath, NestedObjectWithEmptyArrayValue) {
BSONElementSet actualElements;
const bool expandArrayOnTrailingField = true;
- dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField);
+ std::set<size_t> actualArrayComponents;
+ dps::extractAllElementsAlongPath(
+ obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents);
assertBSONElementSetsAreEqual(std::vector<BSONObj>{}, actualElements);
+ assertArrayComponentsAreEqual(std::set<size_t>{}, actualArrayComponents);
}
TEST(ExtractAllElementsAlongPath, NestedObjectWithSingletonArrayValue) {
@@ -164,9 +196,12 @@ TEST(ExtractAllElementsAlongPath, NestedObjectWithSingletonArrayValue) {
BSONElementSet actualElements;
const bool expandArrayOnTrailingField = true;
- dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField);
+ std::set<size_t> actualArrayComponents;
+ dps::extractAllElementsAlongPath(
+ obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents);
assertBSONElementSetsAreEqual({BSON("" << 1)}, actualElements);
+ assertArrayComponentsAreEqual(std::set<size_t>{}, actualArrayComponents);
}
TEST(ExtractAllElementsAlongPath, NestedObjectWithArrayValue) {
@@ -174,9 +209,12 @@ TEST(ExtractAllElementsAlongPath, NestedObjectWithArrayValue) {
BSONElementSet actualElements;
const bool expandArrayOnTrailingField = true;
- dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField);
+ std::set<size_t> actualArrayComponents;
+ dps::extractAllElementsAlongPath(
+ obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents);
assertBSONElementSetsAreEqual({BSON("" << 1), BSON("" << 2), BSON("" << 3)}, actualElements);
+ assertArrayComponentsAreEqual({1U}, actualArrayComponents);
}
TEST(ExtractAllElementsAlongPath, ObjectWithArrayOfSubobjectsWithScalarValue) {
@@ -184,9 +222,12 @@ TEST(ExtractAllElementsAlongPath, ObjectWithArrayOfSubobjectsWithScalarValue) {
BSONElementSet actualElements;
const bool expandArrayOnTrailingField = true;
- dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField);
+ std::set<size_t> actualArrayComponents;
+ dps::extractAllElementsAlongPath(
+ obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents);
assertBSONElementSetsAreEqual({BSON("" << 1), BSON("" << 2), BSON("" << 3)}, actualElements);
+ assertArrayComponentsAreEqual({0U}, actualArrayComponents);
}
TEST(ExtractAllElementsAlongPath, ObjectWithArrayOfSubobjectsWithArrayValues) {
@@ -196,9 +237,12 @@ TEST(ExtractAllElementsAlongPath, ObjectWithArrayOfSubobjectsWithArrayValues) {
BSONElementSet actualElements;
const bool expandArrayOnTrailingField = true;
- dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField);
+ std::set<size_t> actualArrayComponents;
+ dps::extractAllElementsAlongPath(
+ obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents);
assertBSONElementSetsAreEqual({BSON("" << 1), BSON("" << 2), BSON("" << 3)}, actualElements);
+ assertArrayComponentsAreEqual({0U, 1U}, actualArrayComponents);
}
TEST(ExtractAllElementsAlongPath,
@@ -208,11 +252,14 @@ TEST(ExtractAllElementsAlongPath,
BSONElementSet actualElements;
const bool expandArrayOnTrailingField = false;
- dps::extractAllElementsAlongPath(obj, "a.b", actualElements, expandArrayOnTrailingField);
+ std::set<size_t> actualArrayComponents;
+ dps::extractAllElementsAlongPath(
+ obj, "a.b", actualElements, expandArrayOnTrailingField, &actualArrayComponents);
assertBSONElementSetsAreEqual(
{BSON("" << BSON_ARRAY(1)), BSON("" << BSON_ARRAY(2)), BSON("" << BSON_ARRAY(3))},
actualElements);
+ assertArrayComponentsAreEqual({0U}, actualArrayComponents);
}
} // namespace
diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp
index e44c950758a..89f8614f0b2 100644
--- a/src/mongo/db/index/expression_keys_private.cpp
+++ b/src/mongo/db/index/expression_keys_private.cpp
@@ -33,6 +33,7 @@
#include <utility>
#include "mongo/db/bson/dotted_path_support.h"
+#include "mongo/db/field_ref.h"
#include "mongo/db/fts/fts_index_format.h"
#include "mongo/db/geo/geoconstants.h"
#include "mongo/db/geo/geometry_container.h"
@@ -118,13 +119,16 @@ Status S2GetKeysForElement(const BSONElement& element,
/**
- * Get the index keys for elements that are GeoJSON.
- * Used by getS2Keys.
+ * Fills 'out' with the S2 keys that should be generated for 'elements' in a 2dsphere index.
+ *
+ * Returns true if an indexed element of the document uses multiple cells for its covering, and
+ * returns false otherwise.
*/
-void getS2GeoKeys(const BSONObj& document,
+bool getS2GeoKeys(const BSONObj& document,
const BSONElementSet& elements,
const S2IndexingParams& params,
BSONObjSet* out) {
+ bool everGeneratedMultipleCells = false;
for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) {
vector<S2CellId> cells;
Status status = S2GetKeysForElement(*i, params, &cells);
@@ -139,6 +143,8 @@ void getS2GeoKeys(const BSONObj& document,
for (vector<S2CellId>::const_iterator it = cells.begin(); it != cells.end(); ++it) {
out->insert(S2CellIdToIndexKey(*it, params.indexVersion));
}
+
+ everGeneratedMultipleCells = everGeneratedMultipleCells || cells.size() > 1;
}
if (0 == out->size()) {
@@ -146,13 +152,16 @@ void getS2GeoKeys(const BSONObj& document,
b.appendNull("");
out->insert(b.obj());
}
+ return everGeneratedMultipleCells;
}
/**
- * Expands array and appends items to 'out'.
- * Used by getOneLiteralKey.
+ * Fills 'out' with the keys that should be generated for an array value 'obj' in a 2dsphere index.
+ * A key is generated for each element of the array value 'obj'.
+ *
+ * Returns true if 'obj' contains more than one element, and returns false otherwise.
*/
-void getS2LiteralKeysArray(const BSONObj& obj, const CollatorInterface* collator, BSONObjSet* out) {
+bool getS2LiteralKeysArray(const BSONObj& obj, const CollatorInterface* collator, BSONObjSet* out) {
BSONObjIterator objIt(obj);
if (!objIt.more()) {
// Empty arrays are indexed as undefined.
@@ -161,39 +170,54 @@ void getS2LiteralKeysArray(const BSONObj& obj, const CollatorInterface* collator
out->insert(b.obj());
} else {
// Non-empty arrays are exploded.
+ size_t nArrElems = 0;
while (objIt.more()) {
BSONObjBuilder b;
CollationIndexKey::collationAwareIndexKeyAppend(objIt.next(), collator, &b);
out->insert(b.obj());
+ ++nArrElems;
+ }
+
+ if (nArrElems > 1) {
+ return true;
}
}
+ return false;
}
/**
- * If 'elt' is an array, expands elt and adds items to 'out'.
- * Otherwise, adds 'elt' as a single element.
- * Used by getLiteralKeys.
+ * Fills 'out' with the keys that should be generated for a value 'elt' in a 2dsphere index. If
+ * 'elt' is an array value, then a key is generated for each element of the array value 'obj'.
+ *
+ * Returns true if 'elt' is an array value that contains more than one element, and returns false
+ * otherwise.
*/
-void getS2OneLiteralKey(const BSONElement& elt,
+bool getS2OneLiteralKey(const BSONElement& elt,
const CollatorInterface* collator,
BSONObjSet* out) {
if (Array == elt.type()) {
- getS2LiteralKeysArray(elt.Obj(), collator, out);
+ return getS2LiteralKeysArray(elt.Obj(), collator, out);
} else {
// One thing, not an array, index as-is.
BSONObjBuilder b;
CollationIndexKey::collationAwareIndexKeyAppend(elt, collator, &b);
out->insert(b.obj());
}
+ return false;
}
/**
- * elements is a non-geo field. Add the values literally, expanding arrays.
- * Used by getS2Keys.
+ * Fills 'out' with the non-geo keys that should be generated for 'elements' in a 2dsphere index. If
+ * any element in 'elements' is an array value, then a key is generated for each element of that
+ * array value.
+ *
+ * Returns true if any element of 'elements' is an array value that contains more than one element,
+ * and returns false otherwise.
*/
-void getS2LiteralKeys(const BSONElementSet& elements,
+bool getS2LiteralKeys(const BSONElementSet& elements,
const CollatorInterface* collator,
BSONObjSet* out) {
+ bool indexedArrayValueWithMultipleElements = false;
if (0 == elements.size()) {
// Missing fields are indexed as null.
BSONObjBuilder b;
@@ -201,9 +225,12 @@ void getS2LiteralKeys(const BSONElementSet& elements,
out->insert(b.obj());
} else {
for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) {
- getS2OneLiteralKey(*i, collator, out);
+ const bool thisElemIsArrayWithMultipleElements = getS2OneLiteralKey(*i, collator, out);
+ indexedArrayValueWithMultipleElements =
+ indexedArrayValueWithMultipleElements || thisElemIsArrayWithMultipleElements;
}
}
+ return indexedArrayValueWithMultipleElements;
}
} // namespace
@@ -431,25 +458,40 @@ std::string ExpressionKeysPrivate::makeHaystackString(int hashedX, int hashedY)
void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj,
const BSONObj& keyPattern,
const S2IndexingParams& params,
- BSONObjSet* keys) {
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) {
BSONObjSet keysToAdd;
// Does one of our documents have a geo field?
bool haveGeoField = false;
- // We output keys in the same order as the fields we index.
- BSONObjIterator i(keyPattern);
- while (i.more()) {
- BSONElement e = i.next();
+ if (multikeyPaths) {
+ invariant(multikeyPaths->empty());
+ multikeyPaths->resize(keyPattern.nFields());
+ }
+ size_t posInIdx = 0;
+
+ // We output keys in the same order as the fields we index.
+ for (const auto keyElem : keyPattern) {
// First, we get the keys that this field adds. Either they're added literally from
// the value of the field, or they're transformed if the field is geo.
BSONElementSet fieldElements;
- // false means Don't expand the last array, duh.
- dps::extractAllElementsAlongPath(obj, e.fieldName(), fieldElements, false);
-
+ const bool expandArrayOnTrailingField = false;
+ std::set<size_t>* arrayComponents = multikeyPaths ? &(*multikeyPaths)[posInIdx] : nullptr;
+ dps::extractAllElementsAlongPath(
+ obj, keyElem.fieldName(), fieldElements, expandArrayOnTrailingField, arrayComponents);
+
+ // Trailing array values aren't being expanded, so we still need to determine whether the
+ // last component of the indexed path 'keyElem.fieldName()' causes the index to be multikey.
+ // We say that it does if
+ // (a) the last component of the indexed path ever refers to an array value containing
+ // multiple elements, or if
+ // (b) the last component of the indexed path ever refers to GeoJSON data that requires
+ // multiple cells for its covering.
+ bool lastPathComponentCausesIndexToBeMultikey;
BSONObjSet keysForThisField;
- if (IndexNames::GEO_2DSPHERE == e.valuestr()) {
+ if (IndexNames::GEO_2DSPHERE == keyElem.valuestr()) {
if (params.indexVersion >= S2_INDEX_VERSION_2) {
// For >= V2,
// geo: null,
@@ -478,20 +520,29 @@ void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj,
}
}
- getS2GeoKeys(obj, fieldElements, params, &keysForThisField);
+ lastPathComponentCausesIndexToBeMultikey =
+ getS2GeoKeys(obj, fieldElements, params, &keysForThisField);
} else {
- getS2LiteralKeys(fieldElements, params.collator, &keysForThisField);
+ lastPathComponentCausesIndexToBeMultikey =
+ getS2LiteralKeys(fieldElements, params.collator, &keysForThisField);
}
// We expect there to be the missing field element present in the keys if data is
// missing. So, this should be non-empty.
verify(!keysForThisField.empty());
+ if (multikeyPaths && lastPathComponentCausesIndexToBeMultikey) {
+ const size_t pathLengthOfThisField = FieldRef{keyElem.fieldNameStringData()}.numParts();
+ invariant(pathLengthOfThisField > 0);
+ (*multikeyPaths)[posInIdx].insert(pathLengthOfThisField - 1);
+ }
+
// We take the Cartesian product of all of the keys. This requires that we have
// some keys to take the Cartesian product with. If keysToAdd.empty(), we
// initialize it.
if (keysToAdd.empty()) {
keysToAdd = keysForThisField;
+ ++posInIdx;
continue;
}
@@ -507,6 +558,7 @@ void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj,
}
}
keysToAdd = updatedKeysToAdd;
+ ++posInIdx;
}
// Make sure that if we're >= V2 there's at least one geo field present in the doc.
diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h
index 5206f4a6768..a9b032ea97d 100644
--- a/src/mongo/db/index/expression_keys_private.h
+++ b/src/mongo/db/index/expression_keys_private.h
@@ -33,6 +33,7 @@
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/db/hasher.h"
+#include "mongo/db/index/multikey_paths.h"
namespace mongo {
@@ -125,7 +126,8 @@ public:
static void getS2Keys(const BSONObj& obj,
const BSONObj& keyPattern,
const S2IndexingParams& params,
- BSONObjSet* keys);
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths);
};
} // namespace mongo
diff --git a/src/mongo/db/index/external_key_generator.cpp b/src/mongo/db/index/external_key_generator.cpp
index 1ab9c1ad9ae..192354dc7eb 100644
--- a/src/mongo/db/index/external_key_generator.cpp
+++ b/src/mongo/db/index/external_key_generator.cpp
@@ -64,7 +64,11 @@ void getKeysForUpgradeChecking(const BSONObj& infoObj, const BSONObj& doc, BSONO
// generated will be wrong.
CollatorInterface* collator = nullptr;
ExpressionParams::initialize2dsphereParams(infoObj, collator, &params);
- ExpressionKeysPrivate::getS2Keys(doc, keyPattern, params, keys);
+
+ // There's no need to compute the prefixes of the indexed fields that cause the index to be
+ // multikey when checking if any index key is too large.
+ MultikeyPaths* multikeyPaths = nullptr;
+ ExpressionKeysPrivate::getS2Keys(doc, keyPattern, params, keys, multikeyPaths);
} else if (IndexNames::TEXT == type) {
fts::FTSSpec spec(infoObj);
ExpressionKeysPrivate::getFTSKeys(doc, spec, keys);
diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp
index eee92fa8037..32b94a15ca0 100644
--- a/src/mongo/db/index/s2_access_method.cpp
+++ b/src/mongo/db/index/s2_access_method.cpp
@@ -143,7 +143,7 @@ StatusWith<BSONObj> S2AccessMethod::fixSpec(const BSONObj& specObj) {
void S2AccessMethod::getKeys(const BSONObj& obj,
BSONObjSet* keys,
MultikeyPaths* multikeyPaths) const {
- ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys);
+ ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys, multikeyPaths);
}
} // namespace mongo
diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h
index 3d39e10fcc9..c0104fe144d 100644
--- a/src/mongo/db/index/s2_access_method.h
+++ b/src/mongo/db/index/s2_access_method.h
@@ -58,8 +58,10 @@ private:
* This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking
* path-level multikey information.
*
- * TODO SERVER-23114: Return prefixes of the indexed fields that cause the index to be multikey
- * as a result of inserting 'keys'.
+ * If the 'multikeyPaths' pointer is non-null, then it must point to an empty vector. This
+ * function resizes 'multikeyPaths' to have the same number of elements as the index key pattern
+ * and fills each element with the prefixes of the indexed field that would cause this index to
+ * be multikey as a result of inserting 'keys'.
*/
void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
diff --git a/src/mongo/db/index/s2_key_generator_test.cpp b/src/mongo/db/index/s2_key_generator_test.cpp
index 1002ea1ecde..e7399eed0b3 100644
--- a/src/mongo/db/index/s2_key_generator_test.cpp
+++ b/src/mongo/db/index/s2_key_generator_test.cpp
@@ -26,8 +26,6 @@
* it in the license file.
*/
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex
-
#include "mongo/platform/basic.h"
#include "mongo/db/index/expression_keys_private.h"
@@ -38,7 +36,7 @@
#include "mongo/db/json.h"
#include "mongo/db/query/collation/collator_interface_mock.h"
#include "mongo/unittest/unittest.h"
-#include "mongo/util/log.h"
+#include "mongo/util/mongoutils/str.h"
using namespace mongo;
@@ -55,31 +53,142 @@ std::string dumpKeyset(const BSONObjSet& objs) {
return ss.str();
}
-bool assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) {
+std::string dumpMultikeyPaths(const MultikeyPaths& multikeyPaths) {
+ std::stringstream ss;
+
+ ss << "[ ";
+ for (const auto multikeyComponents : multikeyPaths) {
+ ss << "[ ";
+ for (const auto multikeyComponent : multikeyComponents) {
+ ss << multikeyComponent << " ";
+ }
+ ss << "] ";
+ }
+ ss << "]";
+
+ return ss.str();
+}
+
+void assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) {
if (expectedKeys != actualKeys) {
- log() << "Expected: " << dumpKeyset(expectedKeys) << ", "
- << "Actual: " << dumpKeyset(actualKeys);
- return false;
+ FAIL(str::stream() << "Expected: " << dumpKeyset(expectedKeys) << ", Actual: "
+ << dumpKeyset(actualKeys));
+ }
+}
+
+void assertMultikeyPathsEqual(const MultikeyPaths& expectedMultikeyPaths,
+ const MultikeyPaths& actualMultikeyPaths) {
+ if (expectedMultikeyPaths != actualMultikeyPaths) {
+ FAIL(str::stream() << "Expected: " << dumpMultikeyPaths(expectedMultikeyPaths)
+ << ", Actual: "
+ << dumpMultikeyPaths(actualMultikeyPaths));
}
- return true;
}
-long long getCellID(int x, int y) {
- BSONObj obj = BSON("a" << BSON("type"
- << "Point"
- << "coordinates"
- << BSON_ARRAY(x << y)));
+long long getCellID(int x, int y, bool multiPoint = false) {
+ BSONObj obj;
+ if (multiPoint) {
+ obj = BSON("a" << BSON("type"
+ << "MultiPoint"
+ << "coordinates"
+ << BSON_ARRAY(BSON_ARRAY(x << y))));
+ } else {
+ obj = BSON("a" << BSON("type"
+ << "Point"
+ << "coordinates"
+ << BSON_ARRAY(x << y)));
+ }
BSONObj keyPattern = fromjson("{a: '2dsphere'}");
BSONObj infoObj = fromjson("{key: {a: '2dsphere'}, '2dsphereIndexVersion': 3}");
S2IndexingParams params;
const CollatorInterface* collator = nullptr;
ExpressionParams::initialize2dsphereParams(infoObj, collator, &params);
+
BSONObjSet keys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &keys);
+ // There's no need to compute the prefixes of the indexed fields that cause the index to be
+ // multikey when computing the cell id of the geo field.
+ MultikeyPaths* multikeyPaths = nullptr;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &keys, multikeyPaths);
+
ASSERT_EQUALS(1U, keys.size());
return (*keys.begin()).firstElement().Long();
}
+TEST(S2KeyGeneratorTest, GetS2KeysFromSubobjectWithArrayOfGeoAndNonGeoSubobjects) {
+ BSONObj keyPattern = fromjson("{'a.b.nongeo': 1, 'a.b.geo': '2dsphere'}");
+ BSONObj genKeysFrom = fromjson(
+ "{a: {b: [{nongeo: 1, geo: {type: 'Point', coordinates: [0, 0]}}, "
+ "{nongeo: 2, geo: {type: 'Point', coordinates: [3, 3]}}]}}");
+ BSONObj infoObj =
+ fromjson("{key: {'a.b.nongeo': 1, 'a.b.geo': '2dsphere'}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock* collator = nullptr;
+ ExpressionParams::initialize2dsphereParams(infoObj, collator, &params);
+
+ BSONObjSet actualKeys;
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(
+ genKeysFrom, keyPattern, params, &actualKeys, &actualMultikeyPaths);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << 1 << "" << getCellID(0, 0)));
+ expectedKeys.insert(BSON("" << 1 << "" << getCellID(3, 3)));
+ expectedKeys.insert(BSON("" << 2 << "" << getCellID(0, 0)));
+ expectedKeys.insert(BSON("" << 2 << "" << getCellID(3, 3)));
+
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{{1U}, {1U}}, actualMultikeyPaths);
+}
+
+TEST(S2KeyGeneratorTest, GetS2KeysFromArrayOfNonGeoSubobjectsWithArrayValues) {
+ BSONObj keyPattern = fromjson("{'a.nongeo': 1, geo: '2dsphere'}");
+ BSONObj genKeysFrom = fromjson(
+ "{a: [{nongeo: [1, 2]}, {nongeo: [2, 3]}], "
+ "geo: {type: 'Point', coordinates: [0, 0]}}");
+ BSONObj infoObj =
+ fromjson("{key: {'a.nongeo': 1, geo: '2dsphere'}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock* collator = nullptr;
+ ExpressionParams::initialize2dsphereParams(infoObj, collator, &params);
+
+ BSONObjSet actualKeys;
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(
+ genKeysFrom, keyPattern, params, &actualKeys, &actualMultikeyPaths);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << 1 << "" << getCellID(0, 0)));
+ expectedKeys.insert(BSON("" << 2 << "" << getCellID(0, 0)));
+ expectedKeys.insert(BSON("" << 3 << "" << getCellID(0, 0)));
+
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{{0U, 1U}, std::set<size_t>{}}, actualMultikeyPaths);
+}
+
+TEST(S2KeyGeneratorTest, GetS2KeysFromMultiPointInGeoField) {
+ BSONObj keyPattern = fromjson("{nongeo: 1, geo: '2dsphere'}");
+ BSONObj genKeysFrom =
+ fromjson("{nongeo: 1, geo: {type: 'MultiPoint', coordinates: [[0, 0], [1, 0], [1, 1]]}}");
+ BSONObj infoObj = fromjson("{key: {nongeo: 1, geo: '2dsphere'}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock* collator = nullptr;
+ ExpressionParams::initialize2dsphereParams(infoObj, collator, &params);
+
+ BSONObjSet actualKeys;
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(
+ genKeysFrom, keyPattern, params, &actualKeys, &actualMultikeyPaths);
+
+ const bool multiPoint = true;
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << 1 << "" << getCellID(0, 0, multiPoint)));
+ expectedKeys.insert(BSON("" << 1 << "" << getCellID(1, 0, multiPoint)));
+ expectedKeys.insert(BSON("" << 1 << "" << getCellID(1, 1, multiPoint)));
+
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, {0U}}, actualMultikeyPaths);
+}
+
TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldAfterGeoField) {
BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: 'string'}");
BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}");
@@ -87,14 +196,18 @@ TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldAfterGeoField) {
S2IndexingParams params;
CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+
BSONObjSet actualKeys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths);
BSONObjSet expectedKeys;
expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
<< "gnirts"));
- ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}},
+ actualMultikeyPaths);
}
TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldBeforeGeoField) {
@@ -104,8 +217,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldBeforeGeoField) {
S2IndexingParams params;
CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+
BSONObjSet actualKeys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths);
BSONObjSet expectedKeys;
expectedKeys.insert(BSON(""
@@ -113,7 +228,9 @@ TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldBeforeGeoField) {
<< ""
<< getCellID(0, 0)));
- ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}},
+ actualMultikeyPaths);
}
TEST(S2KeyGeneratorTest, CollationAppliedToAllNonGeoStringFields) {
@@ -123,8 +240,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToAllNonGeoStringFields) {
S2IndexingParams params;
CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+
BSONObjSet actualKeys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths);
BSONObjSet expectedKeys;
expectedKeys.insert(BSON(""
@@ -134,7 +253,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToAllNonGeoStringFields) {
<< ""
<< "2gnirts"));
- ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(
+ MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}, std::set<size_t>{}},
+ actualMultikeyPaths);
}
TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldWithMultiplePathComponents) {
@@ -144,14 +266,18 @@ TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldWithMultiplePathComp
S2IndexingParams params;
CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+
BSONObjSet actualKeys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths);
BSONObjSet expectedKeys;
expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
<< "gnirts"));
- ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}},
+ actualMultikeyPaths);
}
TEST(S2KeyGeneratorTest, CollationAppliedToStringsInArray) {
@@ -161,8 +287,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToStringsInArray) {
S2IndexingParams params;
CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+
BSONObjSet actualKeys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths);
BSONObjSet expectedKeys;
expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
@@ -170,7 +298,8 @@ TEST(S2KeyGeneratorTest, CollationAppliedToStringsInArray) {
expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
<< "2gnirts"));
- ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, {0U}}, actualMultikeyPaths);
}
TEST(S2KeyGeneratorTest, CollationAppliedToStringsInAllArrays) {
@@ -181,8 +310,10 @@ TEST(S2KeyGeneratorTest, CollationAppliedToStringsInAllArrays) {
S2IndexingParams params;
CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+
BSONObjSet actualKeys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths);
BSONObjSet expectedKeys;
expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
@@ -202,7 +333,8 @@ TEST(S2KeyGeneratorTest, CollationAppliedToStringsInAllArrays) {
<< ""
<< "fed"));
- ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, {0U}, {0U}}, actualMultikeyPaths);
}
TEST(S2KeyGeneratorTest, CollationDoesNotAffectNonStringFields) {
@@ -212,13 +344,17 @@ TEST(S2KeyGeneratorTest, CollationDoesNotAffectNonStringFields) {
S2IndexingParams params;
CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+
BSONObjSet actualKeys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths);
BSONObjSet expectedKeys;
expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << 5));
- ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}},
+ actualMultikeyPaths);
}
// TODO SERVER-23172: remove test.
@@ -229,14 +365,18 @@ TEST(S2KeyGeneratorTest, CollationDoesNotAffectStringsInEmbeddedDocuments) {
S2IndexingParams params;
CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+
BSONObjSet actualKeys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths);
BSONObjSet expectedKeys;
expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << BSON("c"
<< "string")));
- ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}},
+ actualMultikeyPaths);
}
TEST(S2KeyGeneratorTest, NoCollation) {
@@ -246,14 +386,18 @@ TEST(S2KeyGeneratorTest, NoCollation) {
S2IndexingParams params;
const CollatorInterface* collator = nullptr;
ExpressionParams::initialize2dsphereParams(infoObj, collator, &params);
+
BSONObjSet actualKeys;
- ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+ MultikeyPaths actualMultikeyPaths;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys, &actualMultikeyPaths);
BSONObjSet expectedKeys;
expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
<< "string"));
- ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+ assertKeysetsEqual(expectedKeys, actualKeys);
+ assertMultikeyPathsEqual(MultikeyPaths{std::set<size_t>{}, std::set<size_t>{}},
+ actualMultikeyPaths);
}
} // namespace