summaryrefslogtreecommitdiff
path: root/src/mongo/db/index/btree_key_generator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/index/btree_key_generator.cpp')
-rw-r--r--src/mongo/db/index/btree_key_generator.cpp158
1 files changed, 128 insertions, 30 deletions
diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp
index e323272b044..ccf4df2de96 100644
--- a/src/mongo/db/index/btree_key_generator.cpp
+++ b/src/mongo/db/index/btree_key_generator.cpp
@@ -26,8 +26,12 @@
* it in the license file.
*/
-#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/index/btree_key_generator.h"
+
+#include <boost/optional.hpp>
+
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/field_ref.h"
#include "mongo/util/mongoutils/str.h"
namespace mongo {
@@ -58,7 +62,9 @@ BtreeKeyGenerator::BtreeKeyGenerator(std::vector<const char*> fieldNames,
_isIdIndex = fieldNames.size() == 1 && std::string("_id") == fieldNames[0];
}
-void BtreeKeyGenerator::getKeys(const BSONObj& obj, BSONObjSet* keys) const {
+void BtreeKeyGenerator::getKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const {
if (_isIdIndex) {
// we special case for speed
BSONElement e = obj["_id"];
@@ -76,7 +82,7 @@ void BtreeKeyGenerator::getKeys(const BSONObj& obj, BSONObjSet* keys) const {
// '_fieldNames' and '_fixed' are passed by value so that they can be mutated as part of the
// getKeys call. :|
- getKeysImpl(_fieldNames, _fixed, obj, keys);
+ getKeysImpl(_fieldNames, _fixed, obj, keys, multikeyPaths);
if (keys->empty() && !_isSparse) {
keys->insert(_nullKey);
}
@@ -96,7 +102,8 @@ BtreeKeyGeneratorV0::BtreeKeyGeneratorV0(std::vector<const char*> fieldNames,
void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames,
std::vector<BSONElement> fixed,
const BSONObj& obj,
- BSONObjSet* keys) const {
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const {
BSONElement arrElt;
unsigned arrIdx = ~0;
unsigned numNotFound = 0;
@@ -181,7 +188,7 @@ void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames,
while (i.more()) {
BSONElement e = i.next();
if (e.type() == Object) {
- getKeysImpl(fieldNames, fixed, e.embeddedObject(), keys);
+ getKeysImpl(fieldNames, fixed, e.embeddedObject(), keys, multikeyPaths);
}
}
} else {
@@ -210,7 +217,13 @@ void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames,
BtreeKeyGeneratorV1::BtreeKeyGeneratorV1(std::vector<const char*> fieldNames,
std::vector<BSONElement> fixed,
bool isSparse)
- : BtreeKeyGenerator(fieldNames, fixed, isSparse), _emptyPositionalInfo(fieldNames.size()) {}
+ : BtreeKeyGenerator(fieldNames, fixed, isSparse), _emptyPositionalInfo(fieldNames.size()) {
+ for (const char* fieldName : fieldNames) {
+ size_t pathLength = FieldRef{fieldName}.numParts();
+ invariant(pathLength > 0);
+ _pathLengths.push_back(pathLength);
+ }
+}
BSONElement BtreeKeyGeneratorV1::extractNextElement(const BSONObj& obj,
const PositionalPathInfo& positionalInfo,
@@ -242,19 +255,18 @@ BSONElement BtreeKeyGeneratorV1::extractNextElement(const BSONObj& obj,
return BSONElement();
}
-void BtreeKeyGeneratorV1::_getKeysArrEltFixed(
- std::vector<const char*>* fieldNames,
- std::vector<BSONElement>* fixed,
- const BSONElement& arrEntry,
- BSONObjSet* keys,
- unsigned numNotFound,
- const BSONElement& arrObjElt,
- const std::set<unsigned>& arrIdxs,
- bool mayExpandArrayUnembedded,
- const std::vector<PositionalPathInfo>& positionalInfo) const {
+void BtreeKeyGeneratorV1::_getKeysArrEltFixed(std::vector<const char*>* fieldNames,
+ std::vector<BSONElement>* fixed,
+ const BSONElement& arrEntry,
+ BSONObjSet* keys,
+ unsigned numNotFound,
+ const BSONElement& arrObjElt,
+ const std::set<size_t>& arrIdxs,
+ bool mayExpandArrayUnembedded,
+ const std::vector<PositionalPathInfo>& positionalInfo,
+ MultikeyPaths* multikeyPaths) const {
// Set up any terminal array values.
- for (std::set<unsigned>::const_iterator j = arrIdxs.begin(); j != arrIdxs.end(); ++j) {
- unsigned idx = *j;
+ for (const auto idx : arrIdxs) {
if (*(*fieldNames)[idx] == '\0') {
(*fixed)[idx] = mayExpandArrayUnembedded ? arrEntry : arrObjElt;
}
@@ -266,14 +278,19 @@ void BtreeKeyGeneratorV1::_getKeysArrEltFixed(
arrEntry.type() == Object ? arrEntry.embeddedObject() : BSONObj(),
keys,
numNotFound,
- positionalInfo);
+ positionalInfo,
+ multikeyPaths);
}
void BtreeKeyGeneratorV1::getKeysImpl(std::vector<const char*> fieldNames,
std::vector<BSONElement> fixed,
const BSONObj& obj,
- BSONObjSet* keys) const {
- getKeysImplWithArray(fieldNames, fixed, obj, keys, 0, _emptyPositionalInfo);
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const {
+ if (multikeyPaths) {
+ multikeyPaths->resize(fieldNames.size());
+ }
+ getKeysImplWithArray(fieldNames, fixed, obj, keys, 0, _emptyPositionalInfo, multikeyPaths);
}
void BtreeKeyGeneratorV1::getKeysImplWithArray(
@@ -282,11 +299,38 @@ void BtreeKeyGeneratorV1::getKeysImplWithArray(
const BSONObj& obj,
BSONObjSet* keys,
unsigned numNotFound,
- const std::vector<PositionalPathInfo>& positionalInfo) const {
+ const std::vector<PositionalPathInfo>& positionalInfo,
+ MultikeyPaths* multikeyPaths) const {
BSONElement arrElt;
- std::set<unsigned> arrIdxs;
+
+ // A set containing the position of any indexed fields in the key pattern that traverse through
+ // the 'arrElt' array value.
+ std::set<size_t> arrIdxs;
+
+ // A vector with size equal to the number of elements in the index key pattern. Each element in
+ // the vector, if initialized, refers to the component within the indexed field that traverses
+ // through the 'arrElt' array value. We say that this component within the indexed field
+ // corresponds to a path that causes the index to be multikey if the 'arrElt' array value
+ // contains multiple elements.
+ //
+ // For example, consider the index {'a.b': 1, 'a.c'} and the document
+ // {a: [{b: 1, c: 'x'}, {b: 2, c: 'y'}]}. The path "a" causes the index to be multikey, so we'd
+ // have a std::vector<boost::optional<size_t>>{{0U}, {0U}}.
+ //
+ // Furthermore, due to how positional key patterns are specified, it's possible for an indexed
+ // field to cause the index to be multikey at a different component than another indexed field
+ // that also traverses through the 'arrElt' array value. It's then also possible for an indexed
+ // field not to cause the index to be multikey, even if it traverses through the 'arrElt' array
+ // value, because only a particular element would be indexed.
+ //
+ // For example, consider the index {'a.b': 1, 'a.b.0'} and the document {a: {b: [1, 2]}}. The
+ // path "a.b" causes the index to be multikey, but the key pattern "a.b.0" only indexes the
+ // first element of the array, so we'd have a
+ // std::vector<boost::optional<size_t>>{{1U}, boost::none}.
+ std::vector<boost::optional<size_t>> arrComponents(fieldNames.size());
+
bool mayExpandArrayUnembedded = true;
- for (unsigned i = 0; i < fieldNames.size(); ++i) {
+ for (size_t i = 0; i < fieldNames.size(); ++i) {
if (*fieldNames[i] == '\0') {
continue;
}
@@ -340,7 +384,8 @@ void BtreeKeyGeneratorV1::getKeysImplWithArray(
arrElt,
arrIdxs,
true,
- _emptyPositionalInfo);
+ _emptyPositionalInfo,
+ multikeyPaths);
} else {
BSONObj arrObj = arrElt.embeddedObject();
@@ -350,21 +395,61 @@ void BtreeKeyGeneratorV1::getKeysImplWithArray(
// array element).
std::vector<PositionalPathInfo> subPositionalInfo(fixed.size());
for (size_t i = 0; i < fieldNames.size(); ++i) {
+ const bool fieldIsArray = arrIdxs.find(i) != arrIdxs.end();
+
if (*fieldNames[i] == '\0') {
// We've reached the end of the path.
+ if (multikeyPaths && fieldIsArray && mayExpandArrayUnembedded) {
+ // The 'arrElt' array value isn't expanded into multiple elements when the last
+ // component of the indexed field is positional and 'arrElt' contains nested
+ // array values. In all other cases, the 'arrElt' array value may be expanded
+ // into multiple element and can therefore cause the index to be multikey.
+ arrComponents[i] = _pathLengths[i] - 1;
+ }
continue;
}
+ // The earlier call to BSONObj::getFieldDottedOrArray(fieldNames[i]) modified
+ // fieldNames[i] to refer to the suffix of the path immediately following the 'arrElt'
+ // array value. If we haven't reached the end of this indexed field yet, then we must
+ // have traversed through 'arrElt'.
+ invariant(fieldIsArray);
+
StringData part = fieldNames[i];
part = part.substr(0, part.find('.'));
subPositionalInfo[i].positionallyIndexedElt = arrObj[part];
if (subPositionalInfo[i].positionallyIndexedElt.eoo()) {
- // Not indexing an array by position.
+ // We aren't indexing a particular element of the 'arrElt' array value, so it may be
+ // expanded into multiple elements. It can therefore cause the index to be multikey.
+ if (multikeyPaths) {
+ // We need to determine which component of the indexed field causes the index to
+ // be multikey as a result of the 'arrElt' array value. Since
+ //
+ // NumComponents("<pathPrefix>") + NumComponents("<pathSuffix>")
+ // = NumComponents("<pathPrefix>.<pathSuffix>"),
+ //
+ // we can compute the number of components in a prefix of the indexed field by
+ // subtracting the number of components in the suffix 'fieldNames[i]' from the
+ // number of components in the indexed field '_fieldNames[i]'.
+ //
+ // For example, consider the indexed field "a.b.c" and the suffix "c". The path
+ // "a.b.c" has 3 components and the suffix "c" has 1 component. Subtracting the
+ // latter from the former yields the number of components in the prefix "a.b",
+ // i.e. 2.
+ size_t fullPathLength = _pathLengths[i];
+ size_t suffixPathLength = FieldRef{fieldNames[i]}.numParts();
+ invariant(suffixPathLength < fullPathLength);
+ arrComponents[i] = fullPathLength - suffixPathLength - 1;
+ }
continue;
}
// We're indexing an array element by its position. Traverse the remainder of the
// field path now.
+ //
+ // Indexing an array element by its position selects a particular element of the
+ // 'arrElt' array value when generating keys. It therefore cannot cause the index to be
+ // multikey.
subPositionalInfo[i].arrayObj = arrObj;
subPositionalInfo[i].remainingPath = fieldNames[i];
subPositionalInfo[i].dottedElt =
@@ -372,18 +457,31 @@ void BtreeKeyGeneratorV1::getKeysImplWithArray(
}
// Generate a key for each element of the indexed array.
- BSONObjIterator i(arrObj);
- while (i.more()) {
+ size_t nArrObjFields = 0;
+ for (const auto arrObjElem : arrObj) {
_getKeysArrEltFixed(&fieldNames,
&fixed,
- i.next(),
+ arrObjElem,
keys,
numNotFound,
arrElt,
arrIdxs,
mayExpandArrayUnembedded,
- subPositionalInfo);
+ subPositionalInfo,
+ multikeyPaths);
+ ++nArrObjFields;
+ }
+
+ if (multikeyPaths && nArrObjFields > 1) {
+ // The 'arrElt' array value contains multiple elements, so we say that it causes the
+ // index to be multikey.
+ for (size_t i = 0; i < arrComponents.size(); ++i) {
+ if (auto arrComponent = arrComponents[i]) {
+ (*multikeyPaths)[i].insert(*arrComponent);
+ }
+ }
}
}
}
+
} // namespace mongo