summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorBernard Gorman <bernard.gorman@gmail.com>2018-07-10 15:48:30 +0100
committerBernard Gorman <bernard.gorman@gmail.com>2018-08-09 23:02:51 +0100
commitfa8f737443dbcd604071baee6e7daa148d92ce68 (patch)
tree364914a4fac03b464bc3cca272c7b6914553892e /src/mongo
parent55ff5175dfde9be093f69e792bac8408639c4653 (diff)
downloadmongo-fa8f737443dbcd604071baee6e7daa148d92ce68.tar.gz
SERVER-35860 Maintain multikey metadata keys for allPaths indexes
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/catalog/collection_info_cache_impl.cpp38
-rw-r--r--src/mongo/db/catalog/index_consistency.cpp2
-rw-r--r--src/mongo/db/catalog/index_create_impl.cpp2
-rw-r--r--src/mongo/db/catalog/private/record_store_validate_adaptor.cpp19
-rw-r--r--src/mongo/db/exec/projection_exec_agg.cpp9
-rw-r--r--src/mongo/db/exec/projection_exec_agg.h21
-rw-r--r--src/mongo/db/exec/projection_exec_agg_test.cpp138
-rw-r--r--src/mongo/db/exec/working_set_common.cpp3
-rw-r--r--src/mongo/db/index/2d_access_method.cpp1
-rw-r--r--src/mongo/db/index/2d_access_method.h9
-rw-r--r--src/mongo/db/index/all_paths_access_method.cpp12
-rw-r--r--src/mongo/db/index/all_paths_access_method.h16
-rw-r--r--src/mongo/db/index/all_paths_key_generator.cpp22
-rw-r--r--src/mongo/db/index/all_paths_key_generator.h24
-rw-r--r--src/mongo/db/index/btree_access_method.cpp1
-rw-r--r--src/mongo/db/index/btree_access_method.h6
-rw-r--r--src/mongo/db/index/fts_access_method.cpp1
-rw-r--r--src/mongo/db/index/fts_access_method.h9
-rw-r--r--src/mongo/db/index/hash_access_method.cpp1
-rw-r--r--src/mongo/db/index/hash_access_method.h9
-rw-r--r--src/mongo/db/index/haystack_access_method.cpp1
-rw-r--r--src/mongo/db/index/haystack_access_method.h9
-rw-r--r--src/mongo/db/index/index_access_method.cpp191
-rw-r--r--src/mongo/db/index/index_access_method.h63
-rw-r--r--src/mongo/db/index/s2_access_method.cpp1
-rw-r--r--src/mongo/db/index/s2_access_method.h5
-rw-r--r--src/mongo/db/record_id.h46
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp7
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp6
-rw-r--r--src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp2
-rw-r--r--src/mongo/db/storage/key_string_test.cpp2
-rw-r--r--src/mongo/db/storage/mobile/mobile_index.cpp6
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp4
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp8
-rw-r--r--src/mongo/db/views/durable_view_catalog.cpp4
-rw-r--r--src/mongo/dbtests/SConscript1
-rw-r--r--src/mongo/dbtests/all_paths_multikey_persistence_test.cpp706
37 files changed, 1207 insertions, 198 deletions
diff --git a/src/mongo/db/catalog/collection_info_cache_impl.cpp b/src/mongo/db/catalog/collection_info_cache_impl.cpp
index d23da8c5a81..9eacc4887fc 100644
--- a/src/mongo/db/catalog/collection_info_cache_impl.cpp
+++ b/src/mongo/db/catalog/collection_info_cache_impl.cpp
@@ -37,6 +37,7 @@
#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/fts/fts_spec.h"
+#include "mongo/db/index/all_paths_key_generator.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index_legacy.h"
#include "mongo/db/query/plan_cache.h"
@@ -88,18 +89,22 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) {
while (i.more()) {
IndexDescriptor* descriptor = i.next();
- if (descriptor->getAccessMethodName() != IndexNames::TEXT) {
- BSONObj key = descriptor->keyPattern();
- const BSONObj& infoObj = descriptor->infoObj();
- if (infoObj.hasField("expireAfterSeconds")) {
- _hasTTLIndex = true;
- }
- BSONObjIterator j(key);
- while (j.more()) {
- BSONElement e = j.next();
- _indexedPaths.addPath(e.fieldName());
+ if (descriptor->getAccessMethodName() == IndexNames::ALLPATHS) {
+ // Obtain the projection used by the $** index's key generator.
+ auto pathProj = AllPathsKeyGenerator::createProjectionExec(
+ descriptor->keyPattern(), descriptor->pathProjection());
+ // If the projection is an exclusion, then we must check the new document's keys on all
+ // updates, since we do not exhaustively know the set of paths to be indexed.
+ if (pathProj->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection) {
+ _indexedPaths.allPathsIndexed();
+ } else {
+ // If a subtree was specified in the keyPattern, or if an inclusion projection is
+ // present, then we need only index the path(s) preserved by the projection.
+ for (const auto& path : pathProj->getExhaustivePaths()) {
+ _indexedPaths.addPath(path);
+ }
}
- } else {
+ } else if (descriptor->getAccessMethodName() == IndexNames::TEXT) {
fts::FTSSpec ftsSpec(descriptor->infoObj());
if (ftsSpec.wildcard()) {
@@ -120,6 +125,17 @@ void CollectionInfoCacheImpl::computeIndexKeys(OperationContext* opCtx) {
// language of a subdocument. Add the override field as a path component.
_indexedPaths.addPathComponent(ftsSpec.languageOverrideField());
}
+ } else {
+ BSONObj key = descriptor->keyPattern();
+ const BSONObj& infoObj = descriptor->infoObj();
+ if (infoObj.hasField("expireAfterSeconds")) {
+ _hasTTLIndex = true;
+ }
+ BSONObjIterator j(key);
+ while (j.more()) {
+ BSONElement e = j.next();
+ _indexedPaths.addPath(e.fieldName());
+ }
}
// handle partial indexes
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index 7caf6e1ed86..594b0265a43 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -298,7 +298,7 @@ ValidationStage IndexConsistency::getStage() const {
void IndexConsistency::setLastProcessedRecordId(RecordId recordId) {
stdx::lock_guard<stdx::mutex> lock(_classMutex);
- if (!recordId.isNormal()) {
+ if (!recordId.isValid()) {
_lastProcessedRecordId = boost::none;
} else {
_lastProcessedRecordId = recordId;
diff --git a/src/mongo/db/catalog/index_create_impl.cpp b/src/mongo/db/catalog/index_create_impl.cpp
index 24f123e6952..d65eed34084 100644
--- a/src/mongo/db/catalog/index_create_impl.cpp
+++ b/src/mongo/db/catalog/index_create_impl.cpp
@@ -486,7 +486,7 @@ Status MultiIndexBlockImpl::insert(const BSONObj& doc, const RecordId& loc) {
int64_t unused;
Status idxStatus(ErrorCodes::InternalError, "");
if (_indexes[i].bulk) {
- idxStatus = _indexes[i].bulk->insert(_opCtx, doc, loc, _indexes[i].options, &unused);
+ idxStatus = _indexes[i].bulk->insert(_opCtx, doc, loc, _indexes[i].options);
} else {
idxStatus = _indexes[i].real->insert(_opCtx, doc, loc, _indexes[i].options, &unused);
}
diff --git a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
index d4619c81592..cf26dc31084 100644
--- a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
+++ b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
@@ -82,18 +82,19 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId,
}
BSONObjSet documentKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
- // There's no need to compute the prefixes of the indexed fields that cause the
- // index to be multikey when validating the index keys.
- MultikeyPaths* multikeyPaths = nullptr;
+ BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
+ MultikeyPaths multikeyPaths;
iam->getKeys(recordBson,
IndexAccessMethod::GetKeysMode::kEnforceConstraints,
&documentKeySet,
- multikeyPaths);
+ &multikeyMetadataKeys,
+ &multikeyPaths);
- if (!descriptor->isMultikey(_opCtx) && documentKeySet.size() > 1) {
+ if (!descriptor->isMultikey(_opCtx) &&
+ iam->shouldMarkIndexAsMultikey(documentKeySet, multikeyMetadataKeys, multikeyPaths)) {
std::string msg = str::stream() << "Index " << descriptor->indexName()
- << " is not multi-key but has more than one"
- << " key in document " << recordId;
+ << " is not multi-key, but a multikey path "
+ << " is present in document " << recordId;
curRecordResults.errors.push_back(msg);
curRecordResults.valid = false;
}
@@ -185,7 +186,7 @@ void RecordStoreValidateAdaptor::traverseRecordStore(RecordStore* recordStore,
Status status = validate(record->id, record->data, &validatedSize);
// Checks to ensure isInRecordIdOrder() is being used properly.
- if (prevRecordId.isNormal()) {
+ if (prevRecordId.isValid()) {
invariant(prevRecordId < record->id);
}
@@ -269,4 +270,4 @@ void RecordStoreValidateAdaptor::validateIndexKeyCount(IndexDescriptor* idx,
results.warnings.push_back(warning);
}
}
-} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/exec/projection_exec_agg.cpp b/src/mongo/db/exec/projection_exec_agg.cpp
index 65c780fa3b4..d841d5d93f0 100644
--- a/src/mongo/db/exec/projection_exec_agg.cpp
+++ b/src/mongo/db/exec/projection_exec_agg.cpp
@@ -73,6 +73,12 @@ public:
expCtx, projSpec, idPolicy, recursionPolicy, ProjectionParseMode::kBanComputedFields);
}
+ std::set<std::string> getExhaustivePaths() const {
+ DepsTracker depsTracker;
+ _projection->addDependencies(&depsTracker);
+ return depsTracker.fields;
+ }
+
ProjectionType getType() const {
return (_projection->getType() == TransformerType::kInclusionProjection
? ProjectionType::kInclusionProjection
@@ -136,4 +142,7 @@ stdx::unordered_set<std::string> ProjectionExecAgg::applyProjectionToFields(
return _exec->applyProjectionToFields(fields);
}
+std::set<std::string> ProjectionExecAgg::getExhaustivePaths() const {
+ return _exec->getExhaustivePaths();
+}
} // namespace mongo
diff --git a/src/mongo/db/exec/projection_exec_agg.h b/src/mongo/db/exec/projection_exec_agg.h
index f796a250d37..6989478b812 100644
--- a/src/mongo/db/exec/projection_exec_agg.h
+++ b/src/mongo/db/exec/projection_exec_agg.h
@@ -62,21 +62,24 @@ public:
~ProjectionExecAgg();
+ BSONObj applyProjection(BSONObj inputDoc) const;
+
+ stdx::unordered_set<std::string> applyProjectionToFields(
+ const stdx::unordered_set<std::string>& fields) const;
+
+ /**
+ * Returns the exhaustive set of all paths that will be preserved by this projection, or an
+ * empty set if the exhaustive set cannot be determined. An inclusion will always produce an
+ * exhaustive set; an exclusion will always produce an empty set.
+ */
+ std::set<std::string> getExhaustivePaths() const;
+
ProjectionType getType() const;
BSONObj getProjectionSpec() const {
return _projSpec;
}
- const BSONObj& getSpec() const {
- return _projSpec;
- }
-
- BSONObj applyProjection(BSONObj inputDoc) const;
-
- stdx::unordered_set<std::string> applyProjectionToFields(
- const stdx::unordered_set<std::string>& fields) const;
-
private:
/**
* ProjectionExecAgg::ProjectionExecutor wraps all agg-specific calls, and is forward-declared
diff --git a/src/mongo/db/exec/projection_exec_agg_test.cpp b/src/mongo/db/exec/projection_exec_agg_test.cpp
index 8503e7a50f8..5105677f3f0 100644
--- a/src/mongo/db/exec/projection_exec_agg_test.cpp
+++ b/src/mongo/db/exec/projection_exec_agg_test.cpp
@@ -32,6 +32,7 @@
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/bson/json.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/assert_util.h"
@@ -46,14 +47,20 @@ BSONObj wrapInLiteral(const T& arg) {
return BSON("$literal" << arg);
}
-// Helper to simplify the creation of a ProjectionExecAgg which includes _id and recurses nested
-// arrays by default.
+// Helper to simplify the creation of a ProjectionExecAgg which includes _id and recurses arrays.
std::unique_ptr<ProjectionExecAgg> makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion(
BSONObj projSpec) {
return ProjectionExecAgg::create(
projSpec, DefaultIdPolicy::kIncludeId, ArrayRecursionPolicy::kRecurseNestedArrays);
}
+// Helper to simplify the creation of a ProjectionExecAgg which excludes _id and recurses arrays.
+std::unique_ptr<ProjectionExecAgg> makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion(
+ BSONObj projSpec) {
+ return ProjectionExecAgg::create(
+ projSpec, DefaultIdPolicy::kExcludeId, ArrayRecursionPolicy::kRecurseNestedArrays);
+}
+
//
// Error cases.
//
@@ -174,5 +181,132 @@ TEST(ProjectionExecAggType, ShouldAcceptExclusionProjection) {
ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection);
}
+// Misc tests.
+
+TEST(ProjectionExecAggTests, InclusionFieldPathsWithImplicitIdInclusion) {
+ auto parsedProject = makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion(
+ fromjson("{a: {b: {c: 1}}, d: 1}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+ std::set<std::string> expectedPaths{"_id", "a.b.c", "d"};
+
+ // Verify that the exhaustive set of paths is as expected.
+ ASSERT(exhaustivePaths == expectedPaths);
+}
+
+TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdInclusion) {
+ auto parsedProject = makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion(
+ fromjson("{_id: 1, a: {b: {c: 1}}, d: 1}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+ std::set<std::string> expectedPaths{"_id", "a.b.c", "d"};
+
+ // Verify that the exhaustive set of paths is as expected.
+ ASSERT(exhaustivePaths == expectedPaths);
+}
+
+TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdInclusionIdOnly) {
+ auto parsedProject =
+ makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion(fromjson("{_id: 1}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+ std::set<std::string> expectedPaths{"_id"};
+
+ // Verify that the exhaustive set of paths is as expected.
+ ASSERT(exhaustivePaths == expectedPaths);
+}
+
+TEST(ProjectionExecAggTests, InclusionFieldPathsWithImplicitIdExclusion) {
+ auto parsedProject = makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion(
+ fromjson("{a: {b: {c: 1}}, d: 1}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+ std::set<std::string> expectedPaths{"a.b.c", "d"};
+
+ // Verify that the exhaustive set of paths is as expected.
+ ASSERT(exhaustivePaths == expectedPaths);
+}
+
+TEST(ProjectionExecAggTests, InclusionFieldPathsWithExplicitIdExclusion) {
+ auto parsedProject = makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion(
+ fromjson("{_id: 0, a: {b: {c: 1}}, d: 1}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kInclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+ std::set<std::string> expectedPaths{"a.b.c", "d"};
+
+ // Verify that the exhaustive set of paths is as expected.
+ ASSERT(exhaustivePaths == expectedPaths);
+}
+
+TEST(ProjectionExecAggTests, ExclusionFieldPathsWithImplicitIdInclusion) {
+ auto parsedProject = makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion(
+ fromjson("{a: {b: {c: 0}}, d: 0}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+
+ // Verify that the exhaustive set is empty, despite the implicit inclusion of _id.
+ ASSERT(exhaustivePaths.empty());
+}
+
+TEST(ProjectionExecAggTests, ExclusionFieldPathsWithExplicitIdInclusion) {
+ auto parsedProject = makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion(
+ fromjson("{_id: 1, a: {b: {c: 0}}, d: 0}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+
+ // Verify that the exhaustive set is empty, despite the explicit inclusion of _id.
+ ASSERT(exhaustivePaths.empty());
+}
+
+TEST(ProjectionExecAggTests, ExclusionFieldPathsWithImplicitIdExclusion) {
+ auto parsedProject = makeProjectionWithDefaultIdExclusionAndNestedArrayRecursion(
+ fromjson("{a: {b: {c: 0}}, d: 0}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+
+ // Verify that the exhaustive set is empty.
+ ASSERT(exhaustivePaths.empty());
+}
+
+TEST(ProjectionExecAggTests, ExclusionFieldPathsWithExplicitIdExclusion) {
+ auto parsedProject = makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion(
+ fromjson("{_id: 1, a: {b: {c: 0}}, d: 0}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+
+ // Verify that the exhaustive set is empty.
+ ASSERT(exhaustivePaths.empty());
+}
+
+TEST(ProjectionExecAggTests, ExclusionFieldPathsWithExplicitIdExclusionIdOnly) {
+ auto parsedProject =
+ makeProjectionWithDefaultIdInclusionAndNestedArrayRecursion(fromjson("{_id: 0}"));
+ ASSERT(parsedProject->getType() == ProjectionExecAgg::ProjectionType::kExclusionProjection);
+
+ // Extract the exhaustive set of paths that will be preserved by the projection.
+ auto exhaustivePaths = parsedProject->getExhaustivePaths();
+
+ // Verify that the exhaustive set is empty.
+ ASSERT(exhaustivePaths.empty());
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp
index 12a12fb81f5..cdd4b2227eb 100644
--- a/src/mongo/db/exec/working_set_common.cpp
+++ b/src/mongo/db/exec/working_set_common.cpp
@@ -36,7 +36,6 @@
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/service_context.h"
-#include "mongo/db/service_context.h"
namespace mongo {
@@ -117,10 +116,12 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx,
BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
// There's no need to compute the prefixes of the indexed fields that cause the index to
// be multikey when ensuring the keyData is still valid.
+ BSONObjSet* multikeyMetadataKeys = nullptr;
MultikeyPaths* multikeyPaths = nullptr;
member->keyData[i].index->getKeys(member->obj.value(),
IndexAccessMethod::GetKeysMode::kEnforceConstraints,
&keys,
+ multikeyMetadataKeys,
multikeyPaths);
if (!keys.count(member->keyData[i].keyData)) {
// document would no longer be at this position in the index.
diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp
index 5920b46113a..f1db4b83969 100644
--- a/src/mongo/db/index/2d_access_method.cpp
+++ b/src/mongo/db/index/2d_access_method.cpp
@@ -50,6 +50,7 @@ TwoDAccessMethod::TwoDAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte
/** Finds the key objects to put in an index */
void TwoDAccessMethod::doGetKeys(const BSONObj& obj,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::get2DKeys(obj, _params, keys);
}
diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h
index 3ebea0bcf42..1a5451266b1 100644
--- a/src/mongo/db/index/2d_access_method.h
+++ b/src/mongo/db/index/2d_access_method.h
@@ -54,10 +54,13 @@ private:
/**
* Fills 'keys' with the keys that should be generated for 'obj' on this index.
*
- * This function ignores the 'multikeyPaths' pointer because 2d indexes don't support tracking
- * path-level multikey information.
+ * This function ignores the 'multikeyPaths' and 'multikeyMetadataKeys' pointers because 2d
+ * indexes don't support tracking path-level multikey information.
*/
- void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
+ void doGetKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
+ MultikeyPaths* multikeyPaths) const final;
TwoDIndexingParams _params;
};
diff --git a/src/mongo/db/index/all_paths_access_method.cpp b/src/mongo/db/index/all_paths_access_method.cpp
index 46a3d432e4e..a6e95ce7d89 100644
--- a/src/mongo/db/index/all_paths_access_method.cpp
+++ b/src/mongo/db/index/all_paths_access_method.cpp
@@ -40,12 +40,16 @@ AllPathsAccessMethod::AllPathsAccessMethod(IndexCatalogEntry* allPathsState,
_keyGen(
_descriptor->keyPattern(), _descriptor->pathProjection(), _btreeState->getCollator()) {}
+bool AllPathsAccessMethod::shouldMarkIndexAsMultikey(const BSONObjSet& keys,
+ const BSONObjSet& multikeyMetadataKeys,
+ const MultikeyPaths& multikeyPaths) const {
+ return !multikeyMetadataKeys.empty();
+}
+
void AllPathsAccessMethod::doGetKeys(const BSONObj& obj,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const {
- // TODO SERVER-35748: Until MultikeyPaths has been updated to facilitate 'allPaths' indexes, we
- // use AllPathsKeyGenerator::MultikeyPathsMock to separate multikey paths from RecordId keys.
- auto multikeyPathsMock = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
- _keyGen.generateKeys(obj, keys, &multikeyPathsMock);
+ _keyGen.generateKeys(obj, keys, multikeyMetadataKeys);
}
} // namespace mongo
diff --git a/src/mongo/db/index/all_paths_access_method.h b/src/mongo/db/index/all_paths_access_method.h
index 926d8ed1166..53637bf7ee5 100644
--- a/src/mongo/db/index/all_paths_access_method.h
+++ b/src/mongo/db/index/all_paths_access_method.h
@@ -42,8 +42,22 @@ class AllPathsAccessMethod : public IndexAccessMethod {
public:
AllPathsAccessMethod(IndexCatalogEntry* allPathsState, SortedDataInterface* btree);
+ /**
+ * Returns 'true' if the index should become multikey on the basis of the passed arguments.
+ * Because it is possible for a $** index to generate multiple keys per document without any of
+ * them lying along a multikey (i.e. array) path, this method will only return 'true' if one or
+ * more multikey metadata keys have been generated; that is, if the 'multikeyMetadataKeys'
+ * BSONObjSet is non-empty.
+ */
+ bool shouldMarkIndexAsMultikey(const BSONObjSet& keys,
+ const BSONObjSet& multikeyMetadataKeys,
+ const MultikeyPaths& multikeyPaths) const final;
+
private:
- void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
+ void doGetKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
+ MultikeyPaths* multikeyPaths) const final;
const AllPathsKeyGenerator _keyGen;
};
diff --git a/src/mongo/db/index/all_paths_key_generator.cpp b/src/mongo/db/index/all_paths_key_generator.cpp
index c21c01e9fa2..5d894ac40a8 100644
--- a/src/mongo/db/index/all_paths_key_generator.cpp
+++ b/src/mongo/db/index/all_paths_key_generator.cpp
@@ -62,7 +62,7 @@ void popPathComponent(BSONElement elem, bool enclosingObjIsArray, FieldRef* path
constexpr StringData AllPathsKeyGenerator::kSubtreeSuffix;
std::unique_ptr<ProjectionExecAgg> AllPathsKeyGenerator::createProjectionExec(
- const BSONObj& keyPattern, const BSONObj& pathProjection) {
+ BSONObj keyPattern, BSONObj pathProjection) {
// We should never have a key pattern that contains more than a single element.
invariant(keyPattern.nFields() == 1);
@@ -99,17 +99,16 @@ AllPathsKeyGenerator::AllPathsKeyGenerator(BSONObj keyPattern,
void AllPathsKeyGenerator::generateKeys(BSONObj inputDoc,
BSONObjSet* keys,
- MultikeyPathsMock* multikeyPaths) const {
- FieldRef workingPath;
- _traverseAllPaths(
- _projExec->applyProjection(inputDoc), false, &workingPath, keys, multikeyPaths);
+ BSONObjSet* multikeyPaths) const {
+ FieldRef rootPath;
+ _traverseAllPaths(_projExec->applyProjection(inputDoc), false, &rootPath, keys, multikeyPaths);
}
void AllPathsKeyGenerator::_traverseAllPaths(BSONObj obj,
bool objIsArray,
FieldRef* path,
BSONObjSet* keys,
- MultikeyPathsMock* multikeyPaths) const {
+ BSONObjSet* multikeyPaths) const {
for (const auto elem : obj) {
// If the element's fieldName contains a ".", fast-path skip it because it's not queryable.
if (elem.fieldNameStringData().find('.', 0) != std::string::npos)
@@ -167,10 +166,13 @@ void AllPathsKeyGenerator::_addKey(BSONElement elem,
keys->insert(bob.obj());
}
-void AllPathsKeyGenerator::_addMultiKey(const FieldRef& fullPath,
- MultikeyPathsMock* multikeyPaths) const {
- // Multikey paths are denoted by an entry of the form { "": 1, "": "path.to.array" }.
- multikeyPaths->insert(BSON("" << 1 << "" << fullPath.dottedField()));
+void AllPathsKeyGenerator::_addMultiKey(const FieldRef& fullPath, BSONObjSet* multikeyPaths) const {
+ // Multikey paths are denoted by a key of the form { "": 1, "": "path.to.array" }. The argument
+ // 'multikeyPaths' may be nullptr if the access method is being used in an operation which does
+ // not require multikey path generation.
+ if (multikeyPaths) {
+ multikeyPaths->insert(BSON("" << 1 << "" << fullPath.dottedField()));
+ }
}
} // namespace mongo
diff --git a/src/mongo/db/index/all_paths_key_generator.h b/src/mongo/db/index/all_paths_key_generator.h
index 9c5b7850fca..1eb0297ba8d 100644
--- a/src/mongo/db/index/all_paths_key_generator.h
+++ b/src/mongo/db/index/all_paths_key_generator.h
@@ -44,20 +44,12 @@ public:
static constexpr StringData kSubtreeSuffix = ".$**"_sd;
/**
- * Returns an owned ProjectionExecAgg as defined by the 'keyPattern' and 'pathProjection' and
- * created with the parameter necessary for allPaths key generation.
- */
- static std::unique_ptr<ProjectionExecAgg> createProjectionExec(const BSONObj& keyPattern,
- const BSONObj& pathProjection);
-
- /**
- * TODO SERVER-35748: Currently, the MultikeyPaths structure used by IndexAccessMethod is not
- * suitable for tracking multikey paths in AllPaths indexes. In order to keep multikey paths
- * separate from RecordId keys, and to ensure that both this key generator and the
- * AllPathsIndexAccessMethod can be trivially switched over to using the new MultikeyPaths
- * tracker once it is implemented, we use a mock MultikeyPaths here.
+ * Returns an owned ProjectionExecAgg identical to the one that AllPathsKeyGenerator will use
+ * internally when generating the keys for the $** index, as defined by the 'keyPattern' and
+ * 'pathProjection' arguments.
*/
- using MultikeyPathsMock = BSONObjSet;
+ static std::unique_ptr<ProjectionExecAgg> createProjectionExec(BSONObj keyPattern,
+ BSONObj pathProjection);
AllPathsKeyGenerator(BSONObj keyPattern,
BSONObj pathProjection,
@@ -71,7 +63,7 @@ public:
* document, in the following format:
* { '': 1, '': 'path.to.array' }
*/
- void generateKeys(BSONObj inputDoc, BSONObjSet* keys, MultikeyPathsMock* multikeyPaths) const;
+ void generateKeys(BSONObj inputDoc, BSONObjSet* keys, BSONObjSet* multikeyPaths) const;
private:
// Traverses every path of the post-projection document, adding keys to the set as it goes.
@@ -79,10 +71,10 @@ private:
bool objIsArray,
FieldRef* path,
BSONObjSet* keys,
- MultikeyPathsMock* multikeyPaths) const;
+ BSONObjSet* multikeyPaths) const;
// Helper functions to format the entry appropriately before adding it to the key/path tracker.
- void _addMultiKey(const FieldRef& fullPath, MultikeyPathsMock* multikeyPaths) const;
+ void _addMultiKey(const FieldRef& fullPath, BSONObjSet* multikeyPaths) const;
void _addKey(BSONElement elem, const FieldRef& fullPath, BSONObjSet* keys) const;
// Helper to check whether the element is a nested array, and conditionally add it to 'keys'.
diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp
index ca6c7fb115c..ee599f5d23e 100644
--- a/src/mongo/db/index/btree_access_method.cpp
+++ b/src/mongo/db/index/btree_access_method.cpp
@@ -64,6 +64,7 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn
void BtreeAccessMethod::doGetKeys(const BSONObj& obj,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const {
_keyGenerator->getKeys(obj, keys, multikeyPaths);
}
diff --git a/src/mongo/db/index/btree_access_method.h b/src/mongo/db/index/btree_access_method.h
index caed0eccab5..11b811806a3 100644
--- a/src/mongo/db/index/btree_access_method.h
+++ b/src/mongo/db/index/btree_access_method.h
@@ -32,7 +32,6 @@
#include "mongo/base/status.h"
#include "mongo/db/index/btree_key_generator.h"
#include "mongo/db/index/index_access_method.h"
-#include "mongo/db/index/index_access_method.h"
#include "mongo/db/jsobj.h"
namespace mongo {
@@ -48,7 +47,10 @@ public:
BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree);
private:
- void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
+ void doGetKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
+ MultikeyPaths* multikeyPaths) const final;
// Our keys differ for V0 and V1.
std::unique_ptr<BtreeKeyGenerator> _keyGenerator;
diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp
index 9bb650f2eb6..203e555d52e 100644
--- a/src/mongo/db/index/fts_access_method.cpp
+++ b/src/mongo/db/index/fts_access_method.cpp
@@ -38,6 +38,7 @@ FTSAccessMethod::FTSAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterf
void FTSAccessMethod::doGetKeys(const BSONObj& obj,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::getFTSKeys(obj, _ftsSpec, keys);
}
diff --git a/src/mongo/db/index/fts_access_method.h b/src/mongo/db/index/fts_access_method.h
index 8f843e32bc6..d3148ae3d96 100644
--- a/src/mongo/db/index/fts_access_method.h
+++ b/src/mongo/db/index/fts_access_method.h
@@ -48,10 +48,13 @@ private:
/**
* Fills 'keys' with the keys that should be generated for 'obj' on this index.
*
- * This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking
- * path-level multikey information.
+ * This function ignores the 'multikeyPaths' and 'multikeyMetadataKeys' pointers because text
+ * indexes don't support tracking path-level multikey information.
*/
- void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
+ void doGetKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
+ MultikeyPaths* multikeyPaths) const final;
fts::FTSSpec _ftsSpec;
};
diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp
index 734b3812ebf..a2e25d71e7d 100644
--- a/src/mongo/db/index/hash_access_method.cpp
+++ b/src/mongo/db/index/hash_access_method.cpp
@@ -55,6 +55,7 @@ HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte
void HashAccessMethod::doGetKeys(const BSONObj& obj,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::getHashKeys(
obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), _collator, keys);
diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h
index fbd35c812b2..99a82454a82 100644
--- a/src/mongo/db/index/hash_access_method.h
+++ b/src/mongo/db/index/hash_access_method.h
@@ -51,10 +51,13 @@ private:
/**
* Fills 'keys' with the keys that should be generated for 'obj' on this index.
*
- * This function ignores the 'multikeyPaths' pointer because hashed indexes don't support
- * tracking path-level multikey information.
+ * This function ignores the 'multikeyPaths' and 'multikeyMetadataKeys' pointers because hashed
+ * indexes don't support tracking path-level multikey information.
*/
- void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
+ void doGetKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
+ MultikeyPaths* multikeyPaths) const final;
// Only one of our fields is hashed. This is the field name for it.
std::string _hashedField;
diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp
index 843e59d09bb..28b7728c1c2 100644
--- a/src/mongo/db/index/haystack_access_method.cpp
+++ b/src/mongo/db/index/haystack_access_method.cpp
@@ -65,6 +65,7 @@ HaystackAccessMethod::HaystackAccessMethod(IndexCatalogEntry* btreeState,
void HaystackAccessMethod::doGetKeys(const BSONObj& obj,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys);
}
diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h
index 4bf8fc41839..a6aef7a2df4 100644
--- a/src/mongo/db/index/haystack_access_method.h
+++ b/src/mongo/db/index/haystack_access_method.h
@@ -72,10 +72,13 @@ private:
/**
* Fills 'keys' with the keys that should be generated for 'obj' on this index.
*
- * This function ignores the 'multikeyPaths' pointer because geoHaystack indexes don't support
- * tracking path-level multikey information.
+ * This function ignores the 'multikeyPaths' and 'multikeyMetadataKeys' pointers because
+ * geoHaystack indexes don't support tracking path-level multikey information.
*/
- void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
+ void doGetKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
+ MultikeyPaths* multikeyPaths) const final;
std::string _geoField;
std::vector<std::string> _otherFields;
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index 4dab655cfb7..0dbe51ffbc9 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -66,6 +66,10 @@ using IndexVersion = IndexDescriptor::IndexVersion;
namespace {
+// Reserved RecordId against which multikey metadata keys are indexed.
+static const RecordId kMultikeyMetadataKeyId =
+ RecordId{RecordId::ReservedId::kAllPathsMultikeyMetadataId};
+
/**
* Returns true if at least one prefix of any of the indexed fields causes the index to be
* multikey, and returns false otherwise. This function returns false if the 'multikeyPaths'
@@ -77,6 +81,10 @@ bool isMultikeyFromPaths(const MultikeyPaths& multikeyPaths) {
[](const std::set<std::size_t>& components) { return !components.empty(); });
}
+std::vector<BSONObj> asVector(const BSONObjSet& objSet) {
+ return {objSet.begin(), objSet.end()};
+}
+
} // namespace
// TODO SERVER-36386: Remove the server parameter
@@ -125,7 +133,23 @@ bool IndexAccessMethod::ignoreKeyTooLong(OperationContext* opCtx) {
return shouldRelaxConstraints || !failIndexKeyTooLongParam();
}
-// Find the keys for obj, put them in the tree pointing to loc
+bool IndexAccessMethod::isFatalError(OperationContext* opCtx, Status status, BSONObj key) {
+ // If the status is Status::OK(), or if it is ErrorCodes::KeyTooLong and the user has chosen to
+ // ignore this error, return false immediately.
+ if (status.isOK() || (status == ErrorCodes::KeyTooLong && ignoreKeyTooLong(opCtx))) {
+ return false;
+ }
+
+ // A document might be indexed multiple times during a background index build if it moves ahead
+ // of the cursor (e.g. via an update). We test this scenario and swallow the error accordingly.
+ if (status == ErrorCodes::DuplicateKeyValue && !_btreeState->isReady(opCtx)) {
+ LOG(3) << "key " << key << " already in index during background indexing (ok)";
+ return false;
+ }
+ return true;
+}
+
+// Find the keys for obj, put them in the tree pointing to loc.
Status IndexAccessMethod::insert(OperationContext* opCtx,
const BSONObj& obj,
const RecordId& loc,
@@ -133,52 +157,32 @@ Status IndexAccessMethod::insert(OperationContext* opCtx,
int64_t* numInserted) {
invariant(numInserted);
*numInserted = 0;
+ BSONObjSet multikeyMetadataKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
MultikeyPaths multikeyPaths;
// Delegate to the subclass.
- getKeys(obj, options.getKeysMode, &keys, &multikeyPaths);
-
- Status ret = Status::OK();
- for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) {
- Status status = _newInterface->insert(opCtx, *i, loc, options.dupsAllowed);
-
- // Everything's OK, carry on.
- if (status.isOK()) {
- ++*numInserted;
- IndexKeyEntry indexEntry = IndexKeyEntry(*i, loc);
- continue;
- }
-
- // Error cases.
-
- if (status.code() == ErrorCodes::KeyTooLong && ignoreKeyTooLong(opCtx)) {
- IndexKeyEntry indexEntry = IndexKeyEntry(*i, loc);
- continue;
- }
-
- if (status.code() == ErrorCodes::DuplicateKeyValue) {
- // A document might be indexed multiple times during a background index build
- // if it moves ahead of the collection scan cursor (e.g. via an update).
- if (!_btreeState->isReady(opCtx)) {
- LOG(3) << "key " << *i << " already in index during background indexing (ok)";
- continue;
+ getKeys(obj, options.getKeysMode, &keys, &multikeyMetadataKeys, &multikeyPaths);
+
+ // Add all new data keys, and all new multikey metadata keys, into the index. When iterating
+ // over the data keys, each of them should point to the doc's RecordId. When iterating over
+ // the multikey metadata keys, they should point to the reserved 'kMultikeyMetadataKeyId'.
+ for (const auto keySet : {&keys, &multikeyMetadataKeys}) {
+ const auto& recordId = (keySet == &keys ? loc : kMultikeyMetadataKeyId);
+ for (const auto& key : *keySet) {
+ Status status = _newInterface->insert(opCtx, key, recordId, options.dupsAllowed);
+ if (isFatalError(opCtx, status, key)) {
+ return status;
}
}
-
- // Clean up after ourselves.
- for (BSONObjSet::const_iterator j = keys.begin(); j != i; ++j) {
- removeOneKey(opCtx, *j, loc, options.dupsAllowed);
- *numInserted = 0;
- }
-
- return status;
}
- if (*numInserted > 1 || isMultikeyFromPaths(multikeyPaths)) {
+ *numInserted = keys.size() + multikeyMetadataKeys.size();
+
+ if (shouldMarkIndexAsMultikey(keys, multikeyMetadataKeys, multikeyPaths)) {
_btreeState->setMultikey(opCtx, multikeyPaths);
}
- return ret;
+ return Status::OK();
}
void IndexAccessMethod::removeOneKey(OperationContext* opCtx,
@@ -188,7 +192,6 @@ void IndexAccessMethod::removeOneKey(OperationContext* opCtx,
try {
_newInterface->unindex(opCtx, key, loc, dupsAllowed);
- IndexKeyEntry indexEntry = IndexKeyEntry(key, loc);
} catch (AssertionException& e) {
log() << "Assertion failure: _unindex failed " << _descriptor->indexNamespace();
log() << "Assertion failure: _unindex failed: " << redact(e) << " key:" << key.toString()
@@ -214,17 +217,20 @@ Status IndexAccessMethod::remove(OperationContext* opCtx,
// There's no need to compute the prefixes of the indexed fields that cause the index to be
// multikey when removing a document since the index metadata isn't updated when keys are
// deleted.
+ BSONObjSet* multikeyMetadataKeys = nullptr;
MultikeyPaths* multikeyPaths = nullptr;
// Relax key constraints on removal when deleting documents with invalid formats, but only
// those that don't apply to the partialIndex filter.
- getKeys(obj, GetKeysMode::kRelaxConstraintsUnfiltered, &keys, multikeyPaths);
+ getKeys(
+ obj, GetKeysMode::kRelaxConstraintsUnfiltered, &keys, multikeyMetadataKeys, multikeyPaths);
- for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) {
- removeOneKey(opCtx, *i, loc, options.dupsAllowed);
- ++*numDeleted;
+ for (const auto& key : keys) {
+ removeOneKey(opCtx, key, loc, options.dupsAllowed);
}
+ *numDeleted = keys.size();
+
return Status::OK();
}
@@ -236,12 +242,13 @@ Status IndexAccessMethod::touch(OperationContext* opCtx, const BSONObj& obj) {
BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
// There's no need to compute the prefixes of the indexed fields that cause the index to be
// multikey when paging a document's index entries into memory.
+ BSONObjSet* multikeyMetadataKeys = nullptr;
MultikeyPaths* multikeyPaths = nullptr;
- getKeys(obj, GetKeysMode::kEnforceConstraints, &keys, multikeyPaths);
+ getKeys(obj, GetKeysMode::kEnforceConstraints, &keys, multikeyMetadataKeys, multikeyPaths);
std::unique_ptr<SortedDataInterface::Cursor> cursor(_newInterface->newCursor(opCtx));
- for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) {
- cursor->seekExact(*i);
+ for (const auto& key : keys) {
+ cursor->seekExact(key);
}
return Status::OK();
@@ -258,8 +265,13 @@ RecordId IndexAccessMethod::findSingle(OperationContext* opCtx, const BSONObj& r
if (_btreeState->getCollator()) {
// For performance, call get keys only if there is a non-simple collation.
BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
+ BSONObjSet* multikeyMetadataKeys = nullptr;
MultikeyPaths* multikeyPaths = nullptr;
- getKeys(requestedKey, GetKeysMode::kEnforceConstraints, &keys, multikeyPaths);
+ getKeys(requestedKey,
+ GetKeysMode::kEnforceConstraints,
+ &keys,
+ multikeyMetadataKeys,
+ multikeyPaths);
invariant(keys.size() == 1);
actualKey = *keys.begin();
} else {
@@ -346,12 +358,17 @@ Status IndexAccessMethod::validateUpdate(OperationContext* opCtx,
// There's no need to compute the prefixes of the indexed fields that possibly caused the
// index to be multikey when the old version of the document was written since the index
// metadata isn't updated when keys are deleted.
+ BSONObjSet* multikeyMetadataKeys = nullptr;
MultikeyPaths* multikeyPaths = nullptr;
- getKeys(from, options.getKeysMode, &ticket->oldKeys, multikeyPaths);
+ getKeys(from, options.getKeysMode, &ticket->oldKeys, multikeyMetadataKeys, multikeyPaths);
}
if (!indexFilter || indexFilter->matchesBSON(to)) {
- getKeys(to, options.getKeysMode, &ticket->newKeys, &ticket->newMultikeyPaths);
+ getKeys(to,
+ options.getKeysMode,
+ &ticket->newKeys,
+ &ticket->newMultikeyMetadataKeys,
+ &ticket->newMultikeyPaths);
}
ticket->loc = record;
@@ -368,6 +385,8 @@ Status IndexAccessMethod::update(OperationContext* opCtx,
const UpdateTicket& ticket,
int64_t* numInserted,
int64_t* numDeleted) {
+ invariant(ticket.newKeys.size() ==
+ ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size());
invariant(numInserted);
invariant(numDeleted);
@@ -378,34 +397,31 @@ Status IndexAccessMethod::update(OperationContext* opCtx,
return Status(ErrorCodes::InternalError, "Invalid UpdateTicket in update");
}
- if (ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size() > 1 ||
- isMultikeyFromPaths(ticket.newMultikeyPaths)) {
- _btreeState->setMultikey(opCtx, ticket.newMultikeyPaths);
- }
-
- for (size_t i = 0; i < ticket.removed.size(); ++i) {
- _newInterface->unindex(opCtx, ticket.removed[i], ticket.loc, ticket.dupsAllowed);
- IndexKeyEntry indexEntry = IndexKeyEntry(ticket.removed[i], ticket.loc);
+ for (const auto& remKey : ticket.removed) {
+ _newInterface->unindex(opCtx, remKey, ticket.loc, ticket.dupsAllowed);
}
- for (size_t i = 0; i < ticket.added.size(); ++i) {
- Status status =
- _newInterface->insert(opCtx, ticket.added[i], ticket.loc, ticket.dupsAllowed);
- if (!status.isOK()) {
- if (status.code() == ErrorCodes::KeyTooLong && ignoreKeyTooLong(opCtx)) {
- // Ignore.
- IndexKeyEntry indexEntry = IndexKeyEntry(ticket.added[i], ticket.loc);
- continue;
+ // Add all new data keys, and all new multikey metadata keys, into the index. When iterating
+ // over the data keys, each of them should point to the doc's RecordId. When iterating over
+ // the multikey metadata keys, they should point to the reserved 'kMultikeyMetadataKeyId'.
+ const auto newMultikeyMetadataKeys = asVector(ticket.newMultikeyMetadataKeys);
+ for (const auto keySet : {&ticket.added, &newMultikeyMetadataKeys}) {
+ const auto& recordId = (keySet == &ticket.added ? ticket.loc : kMultikeyMetadataKeyId);
+ for (const auto& key : *keySet) {
+ Status status = _newInterface->insert(opCtx, key, recordId, ticket.dupsAllowed);
+ if (isFatalError(opCtx, status, key)) {
+ return status;
}
-
- return status;
}
+ }
- IndexKeyEntry indexEntry = IndexKeyEntry(ticket.added[i], ticket.loc);
+ if (shouldMarkIndexAsMultikey(
+ ticket.newKeys, ticket.newMultikeyMetadataKeys, ticket.newMultikeyPaths)) {
+ _btreeState->setMultikey(opCtx, ticket.newMultikeyPaths);
}
- *numInserted = ticket.added.size();
*numDeleted = ticket.removed.size();
+ *numInserted = ticket.added.size();
return Status::OK();
}
@@ -433,14 +449,11 @@ IndexAccessMethod::BulkBuilder::BulkBuilder(const IndexAccessMethod* index,
Status IndexAccessMethod::BulkBuilder::insert(OperationContext* opCtx,
const BSONObj& obj,
const RecordId& loc,
- const InsertDeleteOptions& options,
- int64_t* numInserted) {
+ const InsertDeleteOptions& options) {
BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
MultikeyPaths multikeyPaths;
- _real->getKeys(obj, options.getKeysMode, &keys, &multikeyPaths);
-
- _everGeneratedMultipleKeys = _everGeneratedMultipleKeys || (keys.size() > 1);
+ _real->getKeys(obj, options.getKeysMode, &keys, &_multikeyMetadataKeys, &multikeyPaths);
if (!multikeyPaths.empty()) {
if (_indexMultikeyPaths.empty()) {
@@ -453,18 +466,24 @@ Status IndexAccessMethod::BulkBuilder::insert(OperationContext* opCtx,
}
}
- for (BSONObjSet::iterator it = keys.begin(); it != keys.end(); ++it) {
- _sorter->add(*it, loc);
- _keysInserted++;
+ for (const auto& key : keys) {
+ _sorter->add(key, loc);
+ ++_keysInserted;
}
- if (NULL != numInserted) {
- *numInserted += keys.size();
- }
+ _isMultiKey =
+ _isMultiKey || _real->shouldMarkIndexAsMultikey(keys, _multikeyMetadataKeys, multikeyPaths);
return Status::OK();
}
+IndexAccessMethod::BulkBuilder::Sorter::Iterator* IndexAccessMethod::BulkBuilder::done() {
+ for (const auto& key : _multikeyMetadataKeys) {
+ _sorter->add(key, kMultikeyMetadataKeyId);
+ ++_keysInserted;
+ }
+ return _sorter->done();
+}
Status IndexAccessMethod::commitBulk(OperationContext* opCtx,
BulkBuilder* bulk,
@@ -473,7 +492,7 @@ Status IndexAccessMethod::commitBulk(OperationContext* opCtx,
set<RecordId>* dupsToDrop) {
Timer timer;
- std::unique_ptr<BulkBuilder::Sorter::Iterator> it(bulk->_sorter->done());
+ std::unique_ptr<BulkBuilder::Sorter::Iterator> it(bulk->done());
stdx::unique_lock<Client> lk(*opCtx->getClient());
ProgressMeterHolder pm(
@@ -516,8 +535,7 @@ Status IndexAccessMethod::commitBulk(OperationContext* opCtx,
return status;
}
- // If we're here either it's a dup and we're cool with it or the addKey went just
- // fine.
+ // If we're here either it's a dup and we're cool with it or the addKey went just fine.
pm.hit();
wunit.commit();
}
@@ -543,6 +561,7 @@ void IndexAccessMethod::setIndexIsMultikey(OperationContext* opCtx, MultikeyPath
void IndexAccessMethod::getKeys(const BSONObj& obj,
GetKeysMode mode,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const {
static stdx::unordered_set<int> whiteList{ErrorCodes::CannotBuildIndexKeys,
// Btree
@@ -569,7 +588,7 @@ void IndexAccessMethod::getKeys(const BSONObj& obj,
13026,
13027};
try {
- doGetKeys(obj, keys, multikeyPaths);
+ doGetKeys(obj, keys, multikeyMetadataKeys, multikeyPaths);
} catch (const AssertionException& ex) {
// Suppress all indexing errors when mode is kRelaxConstraints.
if (mode == GetKeysMode::kEnforceConstraints) {
@@ -598,8 +617,10 @@ void IndexAccessMethod::getKeys(const BSONObj& obj,
}
}
-bool IndexAccessMethod::BulkBuilder::isMultikey() const {
- return _everGeneratedMultipleKeys || isMultikeyFromPaths(_indexMultikeyPaths);
+bool IndexAccessMethod::shouldMarkIndexAsMultikey(const BSONObjSet& keys,
+ const BSONObjSet& multikeyMetadataKeys,
+ const MultikeyPaths& multikeyPaths) const {
+ return (keys.size() > 1 || isMultikeyFromPaths(multikeyPaths));
}
} // namespace mongo
diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h
index 9e1e3185247..07d8fa28c75 100644
--- a/src/mongo/db/index/index_access_method.h
+++ b/src/mongo/db/index/index_access_method.h
@@ -197,26 +197,33 @@ public:
class BulkBuilder {
public:
+ using Sorter = mongo::Sorter<BSONObj, RecordId>;
+
/**
* Insert into the BulkBuilder as-if inserting into an IndexAccessMethod.
*/
Status insert(OperationContext* opCtx,
const BSONObj& obj,
const RecordId& loc,
- const InsertDeleteOptions& options,
- int64_t* numInserted);
+ const InsertDeleteOptions& options);
const MultikeyPaths& getMultikeyPaths() const {
return _indexMultikeyPaths;
}
- bool isMultikey() const;
+ bool isMultikey() const {
+ return _isMultiKey;
+ }
+
+ /**
+ * Inserts all multikey metadata keys cached during the BulkBuilder's lifetime into the
+ * underlying Sorter, finalizes it, and returns an iterator over the sorted dataset.
+ */
+ Sorter::Iterator* done();
private:
friend class IndexAccessMethod;
- using Sorter = mongo::Sorter<BSONObj, RecordId>;
-
BulkBuilder(const IndexAccessMethod* index,
const IndexDescriptor* descriptor,
size_t maxMemoryUsageBytes);
@@ -225,13 +232,17 @@ public:
const IndexAccessMethod* _real;
int64_t _keysInserted = 0;
- // Set to true if at least one document causes IndexAccessMethod::getKeys() to return a
- // BSONObjSet with size strictly greater than one.
- bool _everGeneratedMultipleKeys = false;
+ // Set to true if any document added to the BulkBuilder causes the index to become multikey.
+ bool _isMultiKey = false;
// Holds the path components that cause this index to be multikey. The '_indexMultikeyPaths'
// vector remains empty if this index doesn't support path-level multikey tracking.
MultikeyPaths _indexMultikeyPaths;
+
+ // Caches the set of all multikey metadata keys generated during the bulk build process.
+ // These are inserted into the sorter after all normal data keys have been added, just
+ // before the bulk build is committed.
+ BSONObjSet _multikeyMetadataKeys{SimpleBSONObjComparator::kInstance.makeBSONObjSet()};
};
/**
@@ -285,13 +296,27 @@ public:
* 'multikeyPaths' to have the same number of elements as the index key pattern and fills each
* element with the prefixes of the indexed field that would cause this index to be multikey as
* a result of inserting 'keys'.
+ *
+ * If the 'multikeyMetadataKeys' pointer is non-null, then the function will populate the
+ * BSONObjSet with any multikey metadata keys generated while processing the document. These
+ * keys are not associated with the document itself, but instead represent multi-key path
+ * information that must be stored in a reserved keyspace within the index.
*/
void getKeys(const BSONObj& obj,
GetKeysMode mode,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const;
/**
+ * Given the set of keys, multikeyMetadataKeys and multikeyPaths generated by a particular
+ * document, return 'true' if the index should be marked as multikey and 'false' otherwise.
+ */
+ virtual bool shouldMarkIndexAsMultikey(const BSONObjSet& keys,
+ const BSONObjSet& multikeyMetadataKeys,
+ const MultikeyPaths& multikeyPaths) const;
+
+ /**
* Splits the sets 'left' and 'right' into two vectors, the first containing the elements that
* only appeared in 'left', and the second containing only elements that appeared in 'right'.
*
@@ -311,9 +336,15 @@ protected:
* 'multikeyPaths' to have the same number of elements as the index key pattern and fills each
* element with the prefixes of the indexed field that would cause this index to be multikey as
* a result of inserting 'keys'.
+ *
+ * If the 'multikeyMetadataKeys' pointer is non-null, then the function will populate the
+ * BSONObjSet with any multikey metadata keys generated while processing the document. These
+ * keys are not associated with the document itself, but instead represent multi-key path
+ * information that must be stored in a reserved keyspace within the index.
*/
virtual void doGetKeys(const BSONObj& obj,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const = 0;
/**
@@ -321,6 +352,14 @@ protected:
*/
bool ignoreKeyTooLong(OperationContext* opCtx);
+ /**
+ * Determine whether the given Status represents an exception that should cause the indexing
+ * process to abort. The 'key' argument is passed in to allow the offending entry to be logged
+ * in the event that a non-fatal 'ErrorCodes::DuplicateKeyValue' is encountered during a
+ * background index build.
+ */
+ bool isFatalError(OperationContext* opCtx, Status status, BSONObj key);
+
IndexCatalogEntry* _btreeState; // owned by IndexCatalogEntry
const IndexDescriptor* _descriptor;
@@ -340,16 +379,20 @@ private:
class UpdateTicket {
public:
UpdateTicket()
- : oldKeys(SimpleBSONObjComparator::kInstance.makeBSONObjSet()), newKeys(oldKeys) {}
+ : oldKeys(SimpleBSONObjComparator::kInstance.makeBSONObjSet()),
+ newKeys(oldKeys),
+ newMultikeyMetadataKeys(newKeys) {}
private:
friend class IndexAccessMethod;
- bool _isValid;
+ bool _isValid{false};
BSONObjSet oldKeys;
BSONObjSet newKeys;
+ BSONObjSet newMultikeyMetadataKeys;
+
std::vector<BSONObj> removed;
std::vector<BSONObj> added;
diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp
index 54f9c94f3a7..6e7c17569f9 100644
--- a/src/mongo/db/index/s2_access_method.cpp
+++ b/src/mongo/db/index/s2_access_method.cpp
@@ -142,6 +142,7 @@ StatusWith<BSONObj> S2AccessMethod::fixSpec(const BSONObj& specObj) {
void S2AccessMethod::doGetKeys(const BSONObj& obj,
BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys, multikeyPaths);
}
diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h
index ad0044dc128..bc4456cabe8 100644
--- a/src/mongo/db/index/s2_access_method.h
+++ b/src/mongo/db/index/s2_access_method.h
@@ -63,7 +63,10 @@ private:
* and fills each element with the prefixes of the indexed field that would cause this index to
* be multikey as a result of inserting 'keys'.
*/
- void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
+ void doGetKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ BSONObjSet* multikeyMetadataKeys,
+ MultikeyPaths* multikeyPaths) const final;
S2IndexingParams _params;
diff --git a/src/mongo/db/record_id.h b/src/mongo/db/record_id.h
index e0dbf038573..1a903d1d39e 100644
--- a/src/mongo/db/record_id.h
+++ b/src/mongo/db/record_id.h
@@ -45,6 +45,17 @@ namespace mongo {
*/
class RecordId {
public:
+ // This set of constants define the boundaries of the 'normal' and 'reserved' id ranges.
+ static constexpr int64_t kNullRepr = 0;
+ static constexpr int64_t kMinRepr = LLONG_MIN;
+ static constexpr int64_t kMaxRepr = LLONG_MAX;
+ static constexpr int64_t kMinReservedRepr = kMaxRepr - (1024 * 1024);
+
+ /**
+ * Enumerates all ids in the reserved range that have been allocated for a specific purpose.
+ */
+ enum class ReservedId : int64_t { kAllPathsMultikeyMetadataId = kMinReservedRepr };
+
/**
* Constructs a Null RecordId.
*/
@@ -52,6 +63,8 @@ public:
explicit RecordId(int64_t repr) : _repr(repr) {}
+ explicit RecordId(ReservedId repr) : RecordId(static_cast<int64_t>(repr)) {}
+
/**
* Construct a RecordId from two halves.
* TODO consider removing.
@@ -72,6 +85,13 @@ public:
return RecordId(kMaxRepr);
}
+ /**
+ * Returns the first record in the reserved id range at the top of the RecordId space.
+ */
+ static RecordId minReserved() {
+ return RecordId(kMinReservedRepr);
+ }
+
bool isNull() const {
return _repr == 0;
}
@@ -81,11 +101,27 @@ public:
}
/**
- * Normal RecordIds are the only ones valid for representing Records. All RecordIds outside
- * of this range are sentinel values.
+ * Valid RecordIds are the only ones which may be used to represent Records. The range of valid
+ * RecordIds includes both "normal" ids that refer to user data, and "reserved" ids that are
+ * used internally. All RecordIds outside of the valid range are sentinel values.
+ */
+ bool isValid() const {
+ return isNormal() || isReserved();
+ }
+
+ /**
+ * Normal RecordIds are those which fall within the range used to represent normal user data,
+ * excluding the reserved range at the top of the RecordId space.
*/
bool isNormal() const {
- return _repr > 0 && _repr < kMaxRepr;
+ return _repr > 0 && _repr < kMinReservedRepr;
+ }
+
+ /**
+ * Returns true if this RecordId falls within the reserved range at the top of the record space.
+ */
+ bool isReserved() const {
+ return _repr >= kMinReservedRepr && _repr < kMaxRepr;
}
int compare(RecordId rhs) const {
@@ -121,10 +157,6 @@ public:
}
private:
- static const int64_t kMaxRepr = LLONG_MAX;
- static const int64_t kNullRepr = 0;
- static const int64_t kMinRepr = LLONG_MIN;
-
int64_t _repr;
};
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 15598202a70..e0775d5c0aa 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -66,6 +66,7 @@
#include "mongo/db/operation_context.h"
#include "mongo/db/ops/delete_request.h"
#include "mongo/db/ops/parsed_update.h"
+#include "mongo/db/ops/update_lifecycle_impl.h"
#include "mongo/db/ops/update_request.h"
#include "mongo/db/query/get_executor.h"
#include "mongo/db/query/internal_plans.h"
@@ -879,8 +880,10 @@ Status StorageInterfaceImpl::upsertById(OperationContext* opCtx,
// We can create an UpdateRequest now that the collection's namespace has been resolved, in
// the event it was specified as a UUID.
UpdateRequest request(collection->ns());
+ UpdateLifecycleImpl lifeCycle(collection->ns());
request.setQuery(query);
request.setUpdates(update);
+ request.setLifecycle(&lifeCycle);
request.setUpsert(true);
invariant(!request.isMulti()); // This follows from using an exact _id query.
invariant(!request.shouldReturnAnyDocs());
@@ -919,8 +922,10 @@ Status StorageInterfaceImpl::putSingleton(OperationContext* opCtx,
const NamespaceString& nss,
const TimestampedBSONObj& update) {
UpdateRequest request(nss);
+ UpdateLifecycleImpl lifeCycle(nss);
request.setQuery({});
request.setUpdates(update.obj);
+ request.setLifecycle(&lifeCycle);
request.setUpsert(true);
return _updateWithQuery(opCtx, request, update.timestamp);
}
@@ -930,8 +935,10 @@ Status StorageInterfaceImpl::updateSingleton(OperationContext* opCtx,
const BSONObj& query,
const TimestampedBSONObj& update) {
UpdateRequest request(nss);
+ UpdateLifecycleImpl lifeCycle(nss);
request.setQuery(query);
request.setUpdates(update.obj);
+ request.setLifecycle(&lifeCycle);
invariant(!request.isUpsert());
return _updateWithQuery(opCtx, request, update.timestamp);
}
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp
index 3d212b09900..75c30bc0f68 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_btree_impl.cpp
@@ -106,7 +106,7 @@ public:
return Status(ErrorCodes::KeyTooLong, "key too big");
}
- invariant(loc.isNormal());
+ invariant(loc.isValid());
invariant(!hasFieldNames(key));
if (!_data->empty()) {
@@ -150,7 +150,7 @@ public:
const BSONObj& key,
const RecordId& loc,
bool dupsAllowed) {
- invariant(loc.isNormal());
+ invariant(loc.isValid());
invariant(!hasFieldNames(key));
if (key.objsize() >= TempKeyMaxSize) {
@@ -176,7 +176,7 @@ public:
const BSONObj& key,
const RecordId& loc,
bool dupsAllowed) {
- invariant(loc.isNormal());
+ invariant(loc.isValid());
invariant(!hasFieldNames(key));
IndexKeyEntry entry(key.getOwned(), loc);
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp
index 1cdca41e6ed..f033ae3e576 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_record_store.cpp
@@ -631,7 +631,7 @@ int64_t EphemeralForTestRecordStore::storageSize(OperationContext* opCtx,
RecordId EphemeralForTestRecordStore::allocateLoc() {
RecordId out = RecordId(_data->nextId++);
- invariant(out < RecordId::max());
+ invariant(out.isNormal());
return out;
}
diff --git a/src/mongo/db/storage/key_string_test.cpp b/src/mongo/db/storage/key_string_test.cpp
index 0d87d0dfea4..9e500879ba5 100644
--- a/src/mongo/db/storage/key_string_test.cpp
+++ b/src/mongo/db/storage/key_string_test.cpp
@@ -1123,7 +1123,7 @@ TEST_F(KeyStringTest, RecordIds) {
ASSERT(reader.atEof());
}
- if (rid.isNormal()) {
+ if (rid.isValid()) {
ASSERT_GT(ks, KeyString(version, RecordId()));
ASSERT_GT(ks, KeyString(version, RecordId::min()));
ASSERT_LT(ks, KeyString(version, RecordId::max()));
diff --git a/src/mongo/db/storage/mobile/mobile_index.cpp b/src/mongo/db/storage/mobile/mobile_index.cpp
index bfd190cb4fd..bf61b8fe5cd 100644
--- a/src/mongo/db/storage/mobile/mobile_index.cpp
+++ b/src/mongo/db/storage/mobile/mobile_index.cpp
@@ -80,7 +80,7 @@ Status MobileIndex::insert(OperationContext* opCtx,
const BSONObj& key,
const RecordId& recId,
bool dupsAllowed) {
- invariant(recId.isNormal());
+ invariant(recId.isValid());
invariant(!hasFieldNames(key));
Status status = _checkKeySize(key);
@@ -133,7 +133,7 @@ void MobileIndex::unindex(OperationContext* opCtx,
const BSONObj& key,
const RecordId& recId,
bool dupsAllowed) {
- invariant(recId.isNormal());
+ invariant(recId.isValid());
invariant(!hasFieldNames(key));
return _unindex(opCtx, key, recId, dupsAllowed);
@@ -299,7 +299,7 @@ public:
virtual ~BulkBuilderBase() {}
Status addKey(const BSONObj& key, const RecordId& recId) override {
- invariant(recId.isNormal());
+ invariant(recId.isValid());
invariant(!hasFieldNames(key));
Status status = _checkKeySize(key);
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
index a2d215087bc..ab871b59db3 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
@@ -318,7 +318,7 @@ Status WiredTigerIndex::insert(OperationContext* opCtx,
const RecordId& id,
bool dupsAllowed) {
dassert(opCtx->lockState()->isWriteLocked());
- invariant(id.isNormal());
+ invariant(id.isValid());
dassert(!hasFieldNames(key));
Status s = checkKeySize(key);
@@ -337,7 +337,7 @@ void WiredTigerIndex::unindex(OperationContext* opCtx,
const RecordId& id,
bool dupsAllowed) {
dassert(opCtx->lockState()->isWriteLocked());
- invariant(id.isNormal());
+ invariant(id.isValid());
dassert(!hasFieldNames(key));
WiredTigerCursor curwrap(_uri, _tableId, false, opCtx);
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 72070d947c8..e5b1e785527 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -119,7 +119,7 @@ public:
void commit(boost::optional<Timestamp>) final {
invariant(_bytesInserted >= 0);
- invariant(_highestInserted.isNormal());
+ invariant(_highestInserted.isValid());
_oplogStones->_currentRecords.addAndFetch(_countInserted);
int64_t newCurrentBytes = _oplogStones->_currentBytes.addAndFetch(_bytesInserted);
@@ -225,7 +225,7 @@ void WiredTigerRecordStore::OplogStones::awaitHasExcessStonesOrDead() {
: Timestamp::min();
auto stone = _stones.front();
- invariant(stone.lastRecord.isNormal());
+ invariant(stone.lastRecord.isValid());
if (static_cast<std::uint64_t>(stone.lastRecord.repr()) <
lastStableRecoveryTimestamp.asULL()) {
break;
@@ -1210,7 +1210,7 @@ void WiredTigerRecordStore::reclaimOplog(OperationContext* opCtx) {
void WiredTigerRecordStore::reclaimOplog(OperationContext* opCtx, Timestamp recoveryTimestamp) {
Timer timer;
while (auto stone = _oplogStones->peekOldestStoneIfNeeded()) {
- invariant(stone->lastRecord.isNormal());
+ invariant(stone->lastRecord.isValid());
if (static_cast<std::uint64_t>(stone->lastRecord.repr()) >= recoveryTimestamp.asULL()) {
// Do not truncate oplogs needed for replication recovery.
@@ -1934,7 +1934,7 @@ boost::optional<Record> WiredTigerRecordStoreCursorBase::next() {
}
_skipNextAdvance = false;
- if (!id.isNormal()) {
+ if (!id.isValid()) {
id = getKey(c);
}
diff --git a/src/mongo/db/views/durable_view_catalog.cpp b/src/mongo/db/views/durable_view_catalog.cpp
index b8691d5f15c..c02b091e88c 100644
--- a/src/mongo/db/views/durable_view_catalog.cpp
+++ b/src/mongo/db/views/durable_view_catalog.cpp
@@ -144,7 +144,7 @@ void DurableViewCatalogImpl::upsert(OperationContext* opCtx,
RecordId id = Helpers::findOne(opCtx, systemViews, BSON("_id" << name.ns()), requireIndex);
Snapshotted<BSONObj> oldView;
- if (!id.isNormal() || !systemViews->findDoc(opCtx, id, &oldView)) {
+ if (!id.isValid() || !systemViews->findDoc(opCtx, id, &oldView)) {
LOG(2) << "insert view " << view << " into " << _db->getSystemViewsName();
uassertStatusOK(
systemViews->insertDocument(opCtx, InsertStatement(view), &CurOp::get(opCtx)->debug()));
@@ -168,7 +168,7 @@ void DurableViewCatalogImpl::remove(OperationContext* opCtx, const NamespaceStri
return;
const bool requireIndex = false;
RecordId id = Helpers::findOne(opCtx, systemViews, BSON("_id" << name.ns()), requireIndex);
- if (!id.isNormal())
+ if (!id.isValid())
return;
LOG(2) << "remove view " << name << " from " << _db->getSystemViewsName();
diff --git a/src/mongo/dbtests/SConscript b/src/mongo/dbtests/SConscript
index 9a3b1d505ee..1b66fc11627 100644
--- a/src/mongo/dbtests/SConscript
+++ b/src/mongo/dbtests/SConscript
@@ -52,6 +52,7 @@ env.Library(
dbtest = env.Program(
target="dbtest",
source=[
+ 'all_paths_multikey_persistence_test.cpp',
'basictests.cpp',
'clienttests.cpp',
'commandtests.cpp',
diff --git a/src/mongo/dbtests/all_paths_multikey_persistence_test.cpp b/src/mongo/dbtests/all_paths_multikey_persistence_test.cpp
new file mode 100644
index 00000000000..7f848045d2e
--- /dev/null
+++ b/src/mongo/dbtests/all_paths_multikey_persistence_test.cpp
@@ -0,0 +1,706 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex
+
+#include <memory>
+
+#include "mongo/db/db_raii.h"
+#include "mongo/db/repl/storage_interface_impl.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+namespace {
+
+using namespace unittest;
+
+static const RecordId kMetadataId = RecordId::minReserved();
+
+static const int kIndexVersion = static_cast<int>(IndexDescriptor::kLatestIndexVersion);
+static const NamespaceString kDefaultNSS{"all_paths_multikey_persistence.test"};
+static const std::string kDefaultIndexName{"all_paths_multikey"};
+static const BSONObj kDefaultIndexKey = fromjson("{'$**': 1}");
+static const BSONObj kDefaultPathProjection;
+
+static constexpr auto kIdField = "_id";
+
+std::vector<InsertStatement> toInserts(std::vector<BSONObj> docs) {
+ std::vector<InsertStatement> inserts(docs.size());
+ std::transform(docs.cbegin(), docs.cend(), inserts.begin(), [](const BSONObj& doc) {
+ return InsertStatement(doc);
+ });
+ return inserts;
+}
+
+class AllPathsMultikeyPersistenceTestFixture : public unittest::Test {
+public:
+ AllPathsMultikeyPersistenceTestFixture() {
+ _origAllPathsKnob = internalQueryAllowAllPathsIndexes.load();
+ internalQueryAllowAllPathsIndexes.store(true);
+ _opCtx = cc().makeOperationContext();
+ }
+
+ virtual ~AllPathsMultikeyPersistenceTestFixture() {
+ internalQueryAllowAllPathsIndexes.store(_origAllPathsKnob);
+ _opCtx.reset();
+ }
+
+ OperationContext* opCtx() {
+ return _opCtx.get();
+ }
+
+protected:
+ void assertSetupEnvironment(bool background,
+ std::vector<BSONObj> initialDocs = {},
+ BSONObj indexKey = kDefaultIndexKey,
+ BSONObj pathProjection = kDefaultPathProjection,
+ const std::string& indexName = kDefaultIndexName,
+ const NamespaceString& nss = kDefaultNSS) {
+ assertRecreateCollection(nss);
+ assertInsertDocuments(initialDocs, nss);
+ assertCreateIndexForColl(nss, indexName, indexKey, pathProjection, background);
+ }
+
+ void assertIndexContentsEquals(std::vector<IndexKeyEntry> expectedKeys,
+ bool expectIndexIsMultikey = true,
+ const NamespaceString& nss = kDefaultNSS,
+ const std::string& indexName = kDefaultIndexName) {
+ // Subsequent operations must take place under a collection lock.
+ AutoGetCollectionForRead autoColl(opCtx(), nss);
+ auto collection = autoColl.getCollection();
+
+ // Verify whether or not the index has been marked as multikey.
+ ASSERT_EQ(expectIndexIsMultikey, getIndexDesc(collection, indexName)->isMultikey(opCtx()));
+
+ // Obtain a cursor over the index, and confirm that the keys are present in order.
+ auto indexCursor = getIndexCursor(collection, indexName);
+ auto indexKey = indexCursor->seek(kMinBSONKey, true);
+ try {
+ for (const auto& expectedKey : expectedKeys) {
+ ASSERT(indexKey);
+ ASSERT_BSONOBJ_EQ(expectedKey.key, indexKey->key);
+ ASSERT_EQ(expectedKey.loc, indexKey->loc);
+ indexKey = indexCursor->next();
+ }
+ // Confirm that there are no further keys in the index.
+ ASSERT(!indexCursor->next());
+ } catch (const TestAssertionFailureException& ex) {
+ log() << "Writing remaining index keys to debug log:";
+ while (indexKey) {
+ log() << "{ key: " << indexKey->key << ", loc: " << indexKey->loc << " }";
+ indexKey = indexCursor->next();
+ }
+ throw ex;
+ }
+ }
+
+ void assertRecreateCollection(const NamespaceString& nss) {
+ ASSERT_OK(_storage.dropCollection(opCtx(), nss));
+ ASSERT_OK(_storage.createCollection(opCtx(), nss, collOptions()));
+ }
+
+ void assertInsertDocuments(std::vector<BSONObj> docs,
+ const NamespaceString& nss = kDefaultNSS) {
+ ASSERT_OK(_storage.insertDocuments(opCtx(), nss, toInserts(docs)));
+ }
+
+ void assertUpdateDocuments(std::vector<std::pair<BSONObj, BSONObj>> updates,
+ const NamespaceString& nss = kDefaultNSS) {
+ for (const auto& update : updates) {
+ ASSERT_OK(_storage.updateSingleton(
+ opCtx(), nss, update.first, {update.second, Timestamp(0)}));
+ }
+ }
+
+ void assertUpsertDocuments(std::vector<BSONObj> upserts,
+ const NamespaceString& nss = kDefaultNSS) {
+ for (const auto& upsert : upserts) {
+ ASSERT_OK(_storage.upsertById(opCtx(), nss, upsert[kIdField], upsert));
+ }
+ }
+
+ void assertRemoveDocuments(std::vector<BSONObj> docs,
+ const NamespaceString& nss = kDefaultNSS) {
+ for (const auto& doc : docs) {
+ ASSERT_OK(_storage.deleteByFilter(opCtx(), nss, doc));
+ }
+ }
+
+ void assertCreateIndexForColl(const NamespaceString& nss,
+ const std::string& name,
+ BSONObj key,
+ BSONObj pathProjection,
+ bool background) {
+ BSONObjBuilder bob =
+ std::move(BSONObjBuilder() << "ns" << nss.ns() << "name" << name << "key" << key);
+
+ if (!pathProjection.isEmpty())
+ bob << IndexDescriptor::kPathProjectionFieldName << pathProjection;
+
+ auto indexSpec = (bob << "v" << kIndexVersion << "background" << background).obj();
+
+ Lock::DBLock dbLock(opCtx(), nss.db(), MODE_X);
+ AutoGetCollection autoColl(opCtx(), nss, MODE_X);
+ auto coll = autoColl.getCollection();
+
+ MultiIndexBlock indexer(opCtx(), coll);
+ indexer.allowBackgroundBuilding();
+ indexer.allowInterruption();
+
+ // Initialize the index builder and add all documents currently in the collection.
+ ASSERT_OK(indexer.init(indexSpec).getStatus());
+ ASSERT_OK(indexer.insertAllDocumentsInCollection());
+
+ WriteUnitOfWork wunit(opCtx());
+ indexer.commit();
+ wunit.commit();
+ }
+
+ std::vector<BSONObj> makeDocs(const std::vector<std::string>& jsonObjs) {
+ std::vector<BSONObj> docs(jsonObjs.size());
+ std::transform(
+ jsonObjs.cbegin(), jsonObjs.cend(), docs.begin(), [this](const std::string& json) {
+ return fromjson(json).addField(BSON(kIdField << (_id++))[kIdField]);
+ });
+ return docs;
+ }
+
+ const IndexDescriptor* getIndexDesc(const Collection* collection, const StringData indexName) {
+ return collection->getIndexCatalog()->findIndexByName(opCtx(), indexName);
+ }
+
+ const IndexAccessMethod* getIndex(const Collection* collection, const StringData indexName) {
+ return collection->getIndexCatalog()->getIndex(getIndexDesc(collection, indexName));
+ }
+
+ std::unique_ptr<SortedDataInterface::Cursor> getIndexCursor(const Collection* collection,
+ const StringData indexName) {
+ return getIndex(collection, indexName)->newCursor(opCtx());
+ }
+
+ CollectionOptions collOptions() {
+ CollectionOptions collOpts;
+ collOpts.uuid = UUID::gen();
+ return collOpts;
+ }
+
+private:
+ ServiceContext::UniqueOperationContext _opCtx;
+ repl::StorageInterfaceImpl _storage;
+ bool _origAllPathsKnob{false};
+ int _id{1};
+};
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, RecordMultikeyPathsInBulkIndexBuild) {
+ // Create the test collection, add some initial documents, and build a foreground $** index.
+ assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"}));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, RecordMultikeyPathsInBackgroundIndexBuild) {
+ // Create the test collection, add some initial documents, and build a background $** index.
+ assertSetupEnvironment(true, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"}));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, DedupMultikeyPathsInBulkIndexBuild) {
+ // Create the test collection, add some initial documents, and build a foreground $** index.
+ const auto initialDocs =
+ makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}"});
+ assertSetupEnvironment(false, initialDocs);
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, DedupMultikeyPathsInBackgroundIndexBuild) {
+ // Create the test collection, add some initial documents, and build a background $** index.
+ const auto initialDocs =
+ makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}", "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}"});
+ assertSetupEnvironment(true, initialDocs);
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, AddAndDedupNewMultikeyPathsOnPostBuildInsertion) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"}));
+
+ // Insert some more documents with a mix of new and duplicate multikey paths.
+ assertInsertDocuments(makeDocs({"{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", "{d: {e: {f: [5]}}}"}));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, AddAndDedupNewMultikeyPathsOnUpsert) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"}));
+
+ // Upsert some new documents to add new multikey paths.
+ assertUpsertDocuments(makeDocs({"{a: 2, b: [{c: 3}, {d: {e: [4]}}]}", "{d: {e: {f: [5]}}}"}));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, AddNewMultikeyPathsOnUpdate) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"}));
+
+ // Update the initial document to add a new multikey path.
+ assertUpdateDocuments(
+ {{fromjson("{_id: 1}"), fromjson("{$push: {b: {$each: [{d: {f: [4]}}, {g: [5]}]}}}")}});
+
+ {
+ // Verify that the updated document appears as expected;
+ AutoGetCollectionForRead autoColl(opCtx(), kDefaultNSS);
+ Snapshotted<BSONObj> result;
+ ASSERT(autoColl.getCollection()->findDoc(opCtx(), RecordId(1), &result));
+ ASSERT_BSONOBJ_EQ(result.value(),
+ fromjson("{_id:1, a:1, b:[{c:2}, {d:{e:[3]}}, {d:{f:[4]}}, {g:[5]}]}"));
+ }
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.f'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.g'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.f', '': 4}"), RecordId(1)},
+ {fromjson("{'': 'b.g', '': 5}"), RecordId(1)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, AddNewMultikeyPathsOnReplacement) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ assertSetupEnvironment(false, makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}"}));
+
+ // Update the initial document to modify all existing data keys and add a new multikey path.
+ assertUpdateDocuments(
+ {{fromjson("{_id: 1}"), fromjson("{a: 2, b: [{c: 3}, {d: {e: [4], f: [5]}}]}")}});
+
+ {
+ // Verify that the updated document appears as expected;
+ AutoGetCollectionForRead autoColl(opCtx(), kDefaultNSS);
+ Snapshotted<BSONObj> result;
+ ASSERT(autoColl.getCollection()->findDoc(opCtx(), RecordId(1), &result));
+ ASSERT_BSONOBJ_EQ(result.value(),
+ fromjson("{_id: 1, a: 2, b: [{c: 3}, {d: {e: [4], f: [5]}}]}"));
+ }
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.f'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(1)},
+ {fromjson("{'': 'b.d.f', '': 5}"), RecordId(1)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotRemoveMultikeyPathsOnDocDeletion) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(false, docs);
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys);
+
+ // Now remove all documents in the collection, and verify that only the multikey paths remain.
+ assertRemoveDocuments(docs);
+
+ expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexKeyPatternSubTreeInBulkBuild) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(false, docs, fromjson("{'b.d.$**': 1}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexKeyPatternSubTreeInBackgroundBuild) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(true, docs, fromjson("{'b.d.$**': 1}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexIncludedPathsInBulkBuild) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(
+ false, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 1}}, 'd.e': 1}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexIncludedPathsInBackgroundBuild) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(
+ true, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 1}}, 'd.e': 1}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, OnlyIndexIncludedPathsOnUpdate) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(
+ false, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 1}}, 'd.e': 1}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys);
+
+ // Now update RecordId(3), adding one new field 'd.e.g' within the included 'd.e' subpath and
+ // one new field 'd.h' which lies outside all included subtrees.
+ assertUpdateDocuments({{fromjson("{_id: 3}"), fromjson("{$set: {'d.e.g': 6, 'd.h': 7}}")}});
+
+ {
+ // Verify that the updated document appears as expected;
+ AutoGetCollectionForRead autoColl(opCtx(), kDefaultNSS);
+ Snapshotted<BSONObj> result;
+ ASSERT(autoColl.getCollection()->findDoc(opCtx(), RecordId(3), &result));
+ ASSERT_BSONOBJ_EQ(result.value(), fromjson("{_id: 3, d: {e: {f: [5], g: 6}, h: 7}}"));
+ }
+
+ // Verify that only the key {'d.e.g': 6} has been added to the index.
+ expectedKeys.push_back({fromjson("{'': 'd.e.g', '': 6}"), RecordId(3)});
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotIndexExcludedPathsInBulkBuild) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(
+ false, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 0}}, 'd.e': 0}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotIndexExcludedPathsInBackgroundBuild) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(
+ true, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 0}}, 'd.e': 0}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotIndexExcludedPathsOnUpdate) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(
+ false, docs, fromjson("{'$**': 1}"), fromjson("{b: {d: {e: 0}}, 'd.e': 0}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}};
+
+ assertIndexContentsEquals(expectedKeys);
+
+ // Now update RecordId(3), adding one new field 'd.e.g' within the excluded 'd.e' subpath and
+ // one new field 'd.h' which lies outside all excluded subtrees.
+ assertUpdateDocuments({{fromjson("{_id: 3}"), fromjson("{$set: {'d.e.g': 6, 'd.h': 7}}")}});
+
+ {
+ // Verify that the updated document appears as expected;
+ AutoGetCollectionForRead autoColl(opCtx(), kDefaultNSS);
+ Snapshotted<BSONObj> result;
+ ASSERT(autoColl.getCollection()->findDoc(opCtx(), RecordId(3), &result));
+ ASSERT_BSONOBJ_EQ(result.value(), fromjson("{_id: 3, d: {e: {f: [5], g: 6}, h: 7}}"));
+ }
+
+ // Verify that only the key {'d.h': 7} has been added to the index.
+ expectedKeys.push_back({fromjson("{'': 'd.h', '': 7}"), RecordId(3)});
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, IndexIdFieldIfSpecifiedInInclusionProjection) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(
+ false, docs, fromjson("{'$**': 1}"), fromjson("{_id: 1, 'b.d.e': 1, 'd.e': 1}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'b.d.e'}"), kMetadataId},
+ {fromjson("{'': 1, '': 'd.e.f'}"), kMetadataId},
+ {fromjson("{'': '_id', '': 1}"), RecordId(1)},
+ {fromjson("{'': '_id', '': 2}"), RecordId(2)},
+ {fromjson("{'': '_id', '': 3}"), RecordId(3)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, IndexIdFieldIfSpecifiedInExclusionProjection) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs({"{a: 1, b: [{c: 2}, {d: {e: [3]}}]}",
+ "{a: 2, b: [{c: 3}, {d: {e: [4]}}]}",
+ "{d: {e: {f: [5]}}}"});
+ assertSetupEnvironment(
+ false, docs, fromjson("{'$**': 1}"), fromjson("{_id: 1, 'b.d.e': 0, 'd.e': 0}"));
+
+ // Verify that the data and multikey path keys are present in the expected order.
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 1, '': 'b'}"), kMetadataId},
+ {fromjson("{'': '_id', '': 1}"), RecordId(1)},
+ {fromjson("{'': '_id', '': 2}"), RecordId(2)},
+ {fromjson("{'': '_id', '': 3}"), RecordId(3)},
+ {fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)}};
+
+ assertIndexContentsEquals(expectedKeys);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotMarkAsMultikeyIfNoArraysInBulkBuild) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs(
+ {"{a: 1, b: {c: 2, d: {e: 3}}}", "{a: 2, b: {c: 3, d: {e: 4}}}", "{d: {e: {f: 5}}}"});
+ assertSetupEnvironment(false, docs, fromjson("{'$**': 1}"));
+
+ // Verify that the data keys are present in the expected order, and the index is NOT multikey.
+ const bool expectIndexIsMultikey = false;
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys, expectIndexIsMultikey);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, DoNotMarkAsMultikeyIfNoArraysInBackgroundBuild) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs(
+ {"{a: 1, b: {c: 2, d: {e: 3}}}", "{a: 2, b: {c: 3, d: {e: 4}}}", "{d: {e: {f: 5}}}"});
+ assertSetupEnvironment(true, docs, fromjson("{'$**': 1}"));
+
+ // Verify that the data keys are present in the expected order, and the index is NOT multikey.
+ const bool expectIndexIsMultikey = false;
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys, expectIndexIsMultikey);
+}
+
+TEST_F(AllPathsMultikeyPersistenceTestFixture, IndexShouldBecomeMultikeyIfArrayIsCreatedByUpdate) {
+ // Create the test collection, add some initial documents, and build a $** index.
+ const auto docs = makeDocs(
+ {"{a: 1, b: {c: 2, d: {e: 3}}}", "{a: 2, b: {c: 3, d: {e: 4}}}", "{d: {e: {f: 5}}}"});
+ assertSetupEnvironment(false, docs, fromjson("{'$**': 1}"));
+
+ // Verify that the data keys are present in the expected order, and the index is NOT multikey.
+ bool expectIndexIsMultikey = false;
+ std::vector<IndexKeyEntry> expectedKeys = {{fromjson("{'': 'a', '': 1}"), RecordId(1)},
+ {fromjson("{'': 'a', '': 2}"), RecordId(2)},
+ {fromjson("{'': 'b.c', '': 2}"), RecordId(1)},
+ {fromjson("{'': 'b.c', '': 3}"), RecordId(2)},
+ {fromjson("{'': 'b.d.e', '': 3}"), RecordId(1)},
+ {fromjson("{'': 'b.d.e', '': 4}"), RecordId(2)},
+ {fromjson("{'': 'd.e.f', '': 5}"), RecordId(3)}};
+
+ assertIndexContentsEquals(expectedKeys, expectIndexIsMultikey);
+
+ // Now perform an update that introduces an array into one of the documents...
+ assertUpdateDocuments({{fromjson("{_id: 1}"), fromjson("{$set: {g: {h: []}}}")}});
+
+ // ... and confirm that this has caused the index to become multikey.
+ expectIndexIsMultikey = true;
+ expectedKeys.insert(expectedKeys.begin(), {fromjson("{'': 1, '': 'g.h'}"), kMetadataId});
+
+ assertIndexContentsEquals(expectedKeys, expectIndexIsMultikey);
+}
+
+} // namespace
+} // namespace mongo