summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregory Wlodarek <gregory.wlodarek@mongodb.com>2021-10-30 01:05:07 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-30 01:38:21 +0000
commit8b405630fc1506abf620aaf4b08363d015327c9b (patch)
treed8f81d4f33499857d6c76fb2694cf9c5d4cccaa6
parent2455e1c112c89a3bdde41d718cadf7f9cc9b5bf0 (diff)
downloadmongo-8b405630fc1506abf620aaf4b08363d015327c9b.tar.gz
SERVER-60577 Add logic to check time-series buckets for mixed-schema data at index time
-rw-r--r--jstests/multiVersion/timeseries_collection_mixed_schema_index_build_stepdown.js117
-rw-r--r--jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_continuous.js55
-rw-r--r--jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js55
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp85
-rw-r--r--src/mongo/db/catalog/multi_index_block.h7
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp3
6 files changed, 298 insertions, 24 deletions
diff --git a/jstests/multiVersion/timeseries_collection_mixed_schema_index_build_stepdown.js b/jstests/multiVersion/timeseries_collection_mixed_schema_index_build_stepdown.js
new file mode 100644
index 00000000000..7575c514c20
--- /dev/null
+++ b/jstests/multiVersion/timeseries_collection_mixed_schema_index_build_stepdown.js
@@ -0,0 +1,117 @@
+/**
+ * Only the primary node enforces the mixed-schema data constraint during an index build. This is
+ * because index builds may not fail on secondaries. They can only be aborted via the
+ * abortIndexBuild oplog entry. Secondaries will still record any mixed-schema data they detect
+ * during an index build but take no action. This tests that a secondary stepping up will cause an
+ * index build to fail due to the earlier detection of mixed-schema data.
+ */
+(function() {
+"use strict";
+
+load("jstests/core/timeseries/libs/timeseries.js");
+load("jstests/libs/fail_point_util.js");
+load("jstests/multiVersion/libs/multi_rs.js");
+load('jstests/noPassthrough/libs/index_build.js');
+
+const oldVersion = "last-lts";
+const nodes = {
+ n1: {binVersion: oldVersion},
+ n2: {binVersion: oldVersion}
+};
+
+const rst = new ReplSetTest({nodes: nodes});
+rst.startSet();
+rst.initiate();
+
+const dbName = "test";
+const collName = jsTestName();
+
+let primary = rst.getPrimary();
+let db = primary.getDB(dbName);
+let coll = db.getCollection(collName);
+
+// Create a time-series collection while using older binaries.
+const timeField = "time";
+const metaField = "meta";
+assert.commandWorked(
+ db.createCollection(collName, {timeseries: {timeField: timeField, metaField: metaField}}));
+
+// Create a bucket with mixed-schema data.
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 1, x: 1}));
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 1, x: "abc"}));
+
+// Create buckets without mixed-schema data.
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 2, x: 1}));
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 3, x: 1}));
+
+jsTest.log("Upgrading replica set from last-lts to latest");
+rst.upgradeSet(
+ {binVersion: "latest", setParameter: {logComponentVerbosity: tojson({storage: 1, index: 1})}});
+
+primary = rst.getPrimary();
+db = primary.getDB(dbName);
+coll = db.getCollection(collName);
+
+if (!TimeseriesTest.timeseriesMetricIndexesEnabled(primary)) {
+ jsTest.log("Skipping test as the featureFlagTimeseriesMetricIndexes feature flag is disabled");
+ rst.stopSet();
+ return;
+}
+
+// Building indexes on time-series measurements is only supported in FCV >= 5.2.
+jsTest.log("Setting FCV to 'latestFCV'");
+assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: latestFCV}));
+
+const bucketCollName = dbName + ".system.buckets." + collName;
+
+// The FCV upgrade process adds the catalog entry flag to time-series collections.
+const secondary = rst.getSecondary();
+assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: true}, /*expectedCount=*/1));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: true}, /*expectedCount=*/1));
+
+// Hang the index build on the primary after replicating the startIndexBuild oplog entry.
+const primaryIndexBuild = configureFailPoint(primary, "hangAfterSettingUpIndexBuildUnlocked");
+
+// Hang the index build on the secondary after the collection scan phase is complete.
+const secondaryIndexBuild = configureFailPoint(secondary, "hangAfterStartingIndexBuildUnlocked");
+
+const awaitIndexBuild = IndexBuildTest.startIndexBuild(
+ primary, bucketCollName, {x: 1}, {name: "x_1"}, [ErrorCodes.InterruptedDueToReplStateChange]);
+
+primaryIndexBuild.wait();
+secondaryIndexBuild.wait();
+
+jsTestLog("Stepping up new primary");
+assert.commandWorked(secondary.adminCommand({replSetStepUp: 1}));
+
+primaryIndexBuild.off();
+secondaryIndexBuild.off();
+
+awaitIndexBuild();
+
+// Aborting index build commit due to the earlier detection of mixed-schema data (now primary).
+checkLog.containsJson(secondary, 6057701);
+
+// Index build: failed (now primary).
+checkLog.containsJson(secondary, 20649);
+
+// Aborting index build from oplog entry (now secondary).
+checkLog.containsJson(primary, 3856206);
+
+// Check that the catalog entry flag doesn't get set to false.
+assert(
+ checkLog.checkContainsWithCountJson(primary, 6057601, {setting: false}, /*expectedCount=*/0));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: false}, /*expectedCount=*/0));
+
+// The FCV downgrade process removes the catalog entry flag from time-series collections.
+jsTest.log("Setting FCV to 'lastLTSFCV'");
+assert.commandWorked(secondary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}));
+
+assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: null}, /*expectedCount=*/1));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: null}, /*expectedCount=*/1));
+
+rst.stopSet();
+}()); \ No newline at end of file
diff --git a/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_continuous.js b/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_continuous.js
index cfab82522e5..b6d5af7ce7e 100644
--- a/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_continuous.js
+++ b/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_continuous.js
@@ -3,9 +3,6 @@
* potentially mixed-schema data when building secondary indexes on time-series measurements on the
* latest binary. Additionally, tests that downgrading FCV from 5.2 removes the
* 'timeseriesBucketsMayHaveMixedSchemaData' catalog entry flag from time-series collections.
- *
- * TODO SERVER-60577: expand testing by checking that index builds will fail with mixed-schema data
- * and succeed when there is no mixed-schema data in time-series collections.
*/
(function() {
"use strict";
@@ -16,8 +13,7 @@ load("jstests/multiVersion/libs/multi_rs.js");
const oldVersion = "last-continuous";
const nodes = {
n1: {binVersion: oldVersion},
- n2: {binVersion: oldVersion},
- n3: {binVersion: oldVersion}
+ n2: {binVersion: oldVersion}
};
const rst = new ReplSetTest({nodes: nodes});
@@ -29,16 +25,24 @@ const collName = jsTestName();
let primary = rst.getPrimary();
let db = primary.getDB(dbName);
+let coll = db.getCollection(collName);
// Create a time-series collection while using older binaries.
const timeField = "time";
-assert.commandWorked(db.createCollection(collName, {timeseries: {timeField: timeField}}));
+const metaField = "meta";
+assert.commandWorked(
+ db.createCollection(collName, {timeseries: {timeField: timeField, metaField: metaField}}));
+
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 1, x: 1}));
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 2, x: {y: "z"}}));
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 3, x: "abc"}));
jsTest.log("Upgrading replica set from last-continuous to latest");
rst.upgradeSet({binVersion: "latest", setParameter: {logComponentVerbosity: tojson({storage: 1})}});
primary = rst.getPrimary();
db = primary.getDB(dbName);
+coll = db.getCollection(collName);
if (!TimeseriesTest.timeseriesMetricIndexesEnabled(primary)) {
jsTest.log("Skipping test as the featureFlagTimeseriesMetricIndexes feature flag is disabled");
@@ -53,20 +57,53 @@ assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: lates
const bucketCollName = dbName + ".system.buckets." + collName;
// The FCV upgrade process adds the catalog entry flag to time-series collections.
+const secondary = rst.getSecondary();
assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: true}, /*expectedCount=*/1));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: true}, /*expectedCount=*/1));
-assert.commandWorked(db.getCollection(collName).createIndex({[timeField]: 1}, {name: "time_1"}));
+assert.commandWorked(coll.createIndex({[timeField]: 1}, {name: "time_1"}));
+assert(checkLog.checkContainsWithCountJson(
+ primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0));
+
+assert.commandWorked(coll.createIndex({[metaField]: 1}, {name: "meta_1"}));
assert(checkLog.checkContainsWithCountJson(
primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0));
assert.commandWorked(db.getCollection(collName).createIndex({x: 1}, {name: "x_1"}));
+
+// May have mixed-schema data.
assert(checkLog.checkContainsWithCountJson(
primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/1));
+// No mixed-schema data detected.
+assert(checkLog.checkContainsWithCountJson(
+ primary, 6057700, {namespace: bucketCollName}, /*expectedCount=*/0));
+
+// Catalog entry flag gets set to false.
+assert(
+ checkLog.checkContainsWithCountJson(primary, 6057601, {setting: false}, /*expectedCount=*/1));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: false}, /*expectedCount=*/1));
+
+// After successfully building an index on a time-series measurement, subsequent index builds on
+// time-series measurements will skip checking for mixed-schema data.
assert.commandWorked(
db.getCollection(collName).createIndex({[timeField]: 1, x: 1}, {name: "time_1_x_1"}));
+
+// Check that the log message warning about potential mixed-schema data does not get logged again.
assert(checkLog.checkContainsWithCountJson(
- primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/2));
+ primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/1));
+
+// No mixed-schema data detected.
+assert(checkLog.checkContainsWithCountJson(
+ primary, 6057700, {namespace: bucketCollName}, /*expectedCount=*/0));
+
+// Catalog entry flag should still be set to false, but not again.
+assert(
+ checkLog.checkContainsWithCountJson(primary, 6057601, {setting: false}, /*expectedCount=*/1));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: false}, /*expectedCount=*/1));
// Cannot downgrade when there are indexes on time-series measurements present.
assert.commandFailedWithCode(
@@ -79,6 +116,8 @@ assert.commandWorked(db.getCollection(collName).dropIndex("time_1_x_1"));
jsTest.log("Setting FCV to 'lastContinuousFCV'");
assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: lastContinuousFCV}));
assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: null}, /*expectedCount=*/1));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: null}, /*expectedCount=*/1));
rst.stopSet();
}()); \ No newline at end of file
diff --git a/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js b/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js
index 4d3e020b437..affcfe4e3bd 100644
--- a/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js
+++ b/jstests/multiVersion/upgrade_downgrade_timeseries_collection_from_last_lts.js
@@ -3,9 +3,6 @@
* potentially mixed-schema data when building secondary indexes on time-series measurements on the
* latest binary. Additionally, tests that downgrading FCV from 5.2 removes the
* 'timeseriesBucketsMayHaveMixedSchemaData' catalog entry flag from time-series collections.
- *
- * TODO SERVER-60577: expand testing by checking that index builds will fail with mixed-schema data
- * and succeed when there is no mixed-schema data in time-series collections.
*/
(function() {
"use strict";
@@ -16,8 +13,7 @@ load("jstests/multiVersion/libs/multi_rs.js");
const oldVersion = "last-lts";
const nodes = {
n1: {binVersion: oldVersion},
- n2: {binVersion: oldVersion},
- n3: {binVersion: oldVersion}
+ n2: {binVersion: oldVersion}
};
const rst = new ReplSetTest({nodes: nodes});
@@ -29,16 +25,24 @@ const collName = jsTestName();
let primary = rst.getPrimary();
let db = primary.getDB(dbName);
+let coll = db.getCollection(collName);
// Create a time-series collection while using older binaries.
const timeField = "time";
-assert.commandWorked(db.createCollection(collName, {timeseries: {timeField: timeField}}));
+const metaField = "meta";
+assert.commandWorked(
+ db.createCollection(collName, {timeseries: {timeField: timeField, metaField: metaField}}));
+
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 1, x: 1}));
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 1, x: {y: "z"}}));
+assert.commandWorked(coll.insert({[timeField]: ISODate(), [metaField]: 1, x: "abc"}));
jsTest.log("Upgrading replica set from last-lts to latest");
rst.upgradeSet({binVersion: "latest", setParameter: {logComponentVerbosity: tojson({storage: 1})}});
primary = rst.getPrimary();
db = primary.getDB(dbName);
+coll = db.getCollection(collName);
if (!TimeseriesTest.timeseriesMetricIndexesEnabled(primary)) {
jsTest.log("Skipping test as the featureFlagTimeseriesMetricIndexes feature flag is disabled");
@@ -53,31 +57,52 @@ assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: lates
const bucketCollName = dbName + ".system.buckets." + collName;
// The FCV upgrade process adds the catalog entry flag to time-series collections.
+const secondary = rst.getSecondary();
assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: true}, /*expectedCount=*/1));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: true}, /*expectedCount=*/1));
+
+assert.commandWorked(coll.createIndex({[timeField]: 1}, {name: "time_1"}));
+assert(checkLog.checkContainsWithCountJson(
+ primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0));
-assert.commandWorked(db.getCollection(collName).createIndex({[timeField]: 1}, {name: "time_1"}));
+assert.commandWorked(coll.createIndex({[metaField]: 1}, {name: "meta_1"}));
assert(checkLog.checkContainsWithCountJson(
primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/0));
-assert.commandWorked(db.getCollection(collName).createIndex({x: 1}, {name: "x_1"}));
+assert.commandFailedWithCode(coll.createIndex({x: 1}, {name: "x_1"}), ErrorCodes.CannotCreateIndex);
+
+// May have mixed-schema data.
assert(checkLog.checkContainsWithCountJson(
primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/1));
-assert.commandWorked(
- db.getCollection(collName).createIndex({[timeField]: 1, x: 1}, {name: "time_1_x_1"}));
+// Mixed-schema data detected.
+assert(checkLog.checkContainsWithCountJson(
+ primary, 6057700, {namespace: bucketCollName}, /*expectedCount=*/1));
+
+assert.commandFailedWithCode(coll.createIndex({[timeField]: 1, x: 1}, {name: "time_1_x_1"}),
+ ErrorCodes.CannotCreateIndex);
+
+// May have mixed-schema data.
assert(checkLog.checkContainsWithCountJson(
primary, 6057502, {namespace: bucketCollName}, /*expectedCount=*/2));
-// Cannot downgrade when there are indexes on time-series measurements present.
-assert.commandFailedWithCode(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}),
- ErrorCodes.CannotDowngrade);
-assert.commandWorked(db.getCollection(collName).dropIndex("x_1"));
-assert.commandWorked(db.getCollection(collName).dropIndex("time_1_x_1"));
+// Mixed-schema data detected.
+assert(checkLog.checkContainsWithCountJson(
+ primary, 6057700, {namespace: bucketCollName}, /*expectedCount=*/2));
+
+// Check that the catalog entry flag doesn't get set to false.
+assert(
+ checkLog.checkContainsWithCountJson(primary, 6057601, {setting: false}, /*expectedCount=*/0));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: false}, /*expectedCount=*/0));
// The FCV downgrade process removes the catalog entry flag from time-series collections.
jsTest.log("Setting FCV to 'lastLTSFCV'");
assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}));
assert(checkLog.checkContainsWithCountJson(primary, 6057601, {setting: null}, /*expectedCount=*/1));
+assert(
+ checkLog.checkContainsWithCountJson(secondary, 6057601, {setting: null}, /*expectedCount=*/1));
rst.stopSet();
}()); \ No newline at end of file
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 79ad8d98d47..2d4bf748a29 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -53,6 +53,8 @@
#include "mongo/db/repl/tenant_migration_conflict_info.h"
#include "mongo/db/storage/storage_options.h"
#include "mongo/db/storage/write_unit_of_work.h"
+#include "mongo/db/timeseries/timeseries_constants.h"
+#include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h"
#include "mongo/logv2/log.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/fail_point.h"
@@ -83,6 +85,20 @@ size_t getEachIndexBuildMaxMemoryUsageBytes(size_t numIndexSpecs) {
numIndexSpecs;
}
+Status timeseriesMixedSchemaDataFailure(const Collection* collection) {
+ // TODO SERVER-61070: Re-word the error message below if necessary and add a URL for
+ // workarounds.
+ return Status(
+ ErrorCodes::CannotCreateIndex,
+ str::stream() << "Index build on collection '" << collection->ns() << "' ("
+ << collection->uuid()
+ << ") failed due to the detection of mixed-schema data in the "
+ << "time-series buckets collection. Starting as of v5.2, time-series "
+ << "measurement bucketing has been modified to ensure that newly created "
+ << "time-series buckets do not contain mixed-schema data. For workarounds, "
+ << "see: <url>");
+}
+
} // namespace
MultiIndexBlock::~MultiIndexBlock() {
@@ -255,6 +271,15 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(
info = statusWithInfo.getValue();
indexInfoObjs.push_back(info);
+ // TODO SERVER-54592: Remove FCV check once feature flag is enabled for v5.2.
+ boost::optional<TimeseriesOptions> options = collection->getTimeseriesOptions();
+ if (options &&
+ serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest() &&
+ timeseries::doesBucketsIndexIncludeKeyOnMeasurement(*options, info)) {
+ invariant(collection->getTimeseriesBucketsMayHaveMixedSchemaData());
+ _containsIndexBuildOnTimeseriesMeasurement = true;
+ }
+
boost::optional<IndexStateInfo> stateInfo;
auto& index = _indexes.emplace_back();
index.block =
@@ -651,6 +676,37 @@ Status MultiIndexBlock::_insert(OperationContext* opCtx,
const std::function<void()>& saveCursorBeforeWrite,
const std::function<void()>& restoreCursorAfterWrite) {
invariant(!_buildIsCleanedUp);
+
+ // The detection of mixed-schema data needs to be done before applying the partial filter
+ // expression below. Only check for mixed-schema data if it's possible for the time-series
+ // collection to have it.
+ if (_containsIndexBuildOnTimeseriesMeasurement &&
+ *collection->getTimeseriesBucketsMayHaveMixedSchemaData()) {
+ bool docHasMixedSchemaData =
+ collection->doesTimeseriesBucketsDocContainMixedSchemaData(doc);
+
+ if (docHasMixedSchemaData) {
+ LOGV2(6057700,
+ "Detected mixed-schema data in time-series bucket collection",
+ logAttrs(collection->ns()),
+ logAttrs(collection->uuid()),
+ "recordId"_attr = loc,
+ "control"_attr = redact(doc.getObjectField(timeseries::kBucketControlFieldName)));
+
+ _timeseriesBucketContainsMixedSchemaData = true;
+ }
+
+ // Only enforce the mixed-schema data constraint on the primary. Index builds may not fail
+ // on the secondaries. The primary will replicate an abortIndexBuild oplog entry.
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ const bool replSetAndNotPrimary = replCoord->getSettings().usingReplSets() &&
+ !replCoord->canAcceptWritesFor(opCtx, collection->ns());
+
+ if (docHasMixedSchemaData && !replSetAndNotPrimary) {
+ return timeseriesMixedSchemaDataFailure(collection.get());
+ }
+ }
+
for (size_t i = 0; i < _indexes.size(); i++) {
if (_indexes[i].filterExpression && !_indexes[i].filterExpression->matchesBSON(doc)) {
continue;
@@ -857,6 +913,23 @@ Status MultiIndexBlock::commit(OperationContext* opCtx,
invariant(_collectionUUID.get() == collection->uuid());
}
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ const bool replSetAndNotPrimary = replCoord->getSettings().usingReplSets() &&
+ !replCoord->canAcceptWritesFor(opCtx, collection->ns());
+
+ // During the collection scan phase, only the primary will enforce the mixed-schema data
+ // constraint. Secondaries will only keep track of and take no action if mixed-schema data is
+ // detected. If the primary steps down during the index build, a secondary node will takeover.
+ // This can happen after the collection scan phase, which is why we need this check here.
+ if (_timeseriesBucketContainsMixedSchemaData && !replSetAndNotPrimary) {
+ LOGV2_DEBUG(6057701,
+ 1,
+ "Aborting index build commit due to the earlier detection of mixed-schema data",
+ logAttrs(collection->ns()),
+ logAttrs(collection->uuid()));
+ return timeseriesMixedSchemaDataFailure(collection);
+ }
+
// Do not interfere with writing multikey information when committing index builds.
ScopeGuard restartTracker(
[this, opCtx] { MultikeyPathTracker::get(opCtx).startTrackingMultikeyPathInfo(); });
@@ -894,6 +967,18 @@ Status MultiIndexBlock::commit(OperationContext* opCtx,
onCommit();
+ // Update the 'timeseriesBucketsMayHaveMixedSchemaData' catalog entry flag to false in order to
+ // allow subsequent index builds to skip checking bucket documents for mixed-schema data.
+ if (_containsIndexBuildOnTimeseriesMeasurement && !_timeseriesBucketContainsMixedSchemaData) {
+ boost::optional<bool> mayContainMixedSchemaData =
+ collection->getTimeseriesBucketsMayHaveMixedSchemaData();
+ invariant(mayContainMixedSchemaData);
+
+ if (*mayContainMixedSchemaData) {
+ collection->setTimeseriesBucketsMayHaveMixedSchemaData(opCtx, false);
+ }
+ }
+
CollectionQueryInfo::get(collection).clearQueryCache(opCtx, collection);
opCtx->recoveryUnit()->onCommit(
[this](boost::optional<Timestamp> commitTime) { _buildIsCleanedUp = true; });
diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h
index 2dd45df2a7b..fa747afd8a3 100644
--- a/src/mongo/db/catalog/multi_index_block.h
+++ b/src/mongo/db/catalog/multi_index_block.h
@@ -354,6 +354,13 @@ private:
bool _ignoreUnique = false;
+ // True if one or more indexes being built are on time-series measurements.
+ bool _containsIndexBuildOnTimeseriesMeasurement = false;
+
+ // True if at least one bucket document contains mixed-schema data and
+ // '_containsIndexBuildOnTimeseriesMeasurement=true'.
+ bool _timeseriesBucketContainsMixedSchemaData = false;
+
// Set to true when no work remains to be done, the object can safely destruct without leaving
// incorrect state set anywhere.
bool _buildIsCleanedUp = true;
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 09badb3b528..8b5e6287ecd 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -2183,7 +2183,8 @@ void IndexBuildsCoordinator::_runIndexBuildInner(
fassertFailedNoTrace(5642402);
}
invariant(status.isA<ErrorCategory::Interruption>() ||
- status.isA<ErrorCategory::ShutdownError>(),
+ status.isA<ErrorCategory::ShutdownError>() ||
+ status.code() == ErrorCodes::CannotCreateIndex,
str::stream() << "Unexpected error code during index build cleanup: " << status);
if (IndexBuildProtocol::kSinglePhase == replState->protocol) {
_cleanUpSinglePhaseAfterFailure(opCtx, collection, replState, indexBuildOptions, status);