summaryrefslogtreecommitdiff
path: root/src/mongo/db/catalog
diff options
context:
space:
mode:
authorGregory Wlodarek <gregory.wlodarek@mongodb.com>2021-10-30 01:05:07 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-10-30 01:38:21 +0000
commit8b405630fc1506abf620aaf4b08363d015327c9b (patch)
treed8f81d4f33499857d6c76fb2694cf9c5d4cccaa6 /src/mongo/db/catalog
parent2455e1c112c89a3bdde41d718cadf7f9cc9b5bf0 (diff)
downloadmongo-8b405630fc1506abf620aaf4b08363d015327c9b.tar.gz
SERVER-60577 Add logic to check time-series buckets for mixed-schema data at index time
Diffstat (limited to 'src/mongo/db/catalog')
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp85
-rw-r--r--src/mongo/db/catalog/multi_index_block.h7
2 files changed, 92 insertions, 0 deletions
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 79ad8d98d47..2d4bf748a29 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -53,6 +53,8 @@
#include "mongo/db/repl/tenant_migration_conflict_info.h"
#include "mongo/db/storage/storage_options.h"
#include "mongo/db/storage/write_unit_of_work.h"
+#include "mongo/db/timeseries/timeseries_constants.h"
+#include "mongo/db/timeseries/timeseries_index_schema_conversion_functions.h"
#include "mongo/logv2/log.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/fail_point.h"
@@ -83,6 +85,20 @@ size_t getEachIndexBuildMaxMemoryUsageBytes(size_t numIndexSpecs) {
numIndexSpecs;
}
+Status timeseriesMixedSchemaDataFailure(const Collection* collection) {
+ // TODO SERVER-61070: Re-word the error message below if necessary and add a URL for
+ // workarounds.
+ return Status(
+ ErrorCodes::CannotCreateIndex,
+ str::stream() << "Index build on collection '" << collection->ns() << "' ("
+ << collection->uuid()
+ << ") failed due to the detection of mixed-schema data in the "
+ << "time-series buckets collection. Starting as of v5.2, time-series "
+ << "measurement bucketing has been modified to ensure that newly created "
+ << "time-series buckets do not contain mixed-schema data. For workarounds, "
+ << "see: <url>");
+}
+
} // namespace
MultiIndexBlock::~MultiIndexBlock() {
@@ -255,6 +271,15 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(
info = statusWithInfo.getValue();
indexInfoObjs.push_back(info);
+ // TODO SERVER-54592: Remove FCV check once feature flag is enabled for v5.2.
+ boost::optional<TimeseriesOptions> options = collection->getTimeseriesOptions();
+ if (options &&
+ serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest() &&
+ timeseries::doesBucketsIndexIncludeKeyOnMeasurement(*options, info)) {
+ invariant(collection->getTimeseriesBucketsMayHaveMixedSchemaData());
+ _containsIndexBuildOnTimeseriesMeasurement = true;
+ }
+
boost::optional<IndexStateInfo> stateInfo;
auto& index = _indexes.emplace_back();
index.block =
@@ -651,6 +676,37 @@ Status MultiIndexBlock::_insert(OperationContext* opCtx,
const std::function<void()>& saveCursorBeforeWrite,
const std::function<void()>& restoreCursorAfterWrite) {
invariant(!_buildIsCleanedUp);
+
+ // The detection of mixed-schema data needs to be done before applying the partial filter
+ // expression below. Only check for mixed-schema data if it's possible for the time-series
+ // collection to have it.
+ if (_containsIndexBuildOnTimeseriesMeasurement &&
+ *collection->getTimeseriesBucketsMayHaveMixedSchemaData()) {
+ bool docHasMixedSchemaData =
+ collection->doesTimeseriesBucketsDocContainMixedSchemaData(doc);
+
+ if (docHasMixedSchemaData) {
+ LOGV2(6057700,
+ "Detected mixed-schema data in time-series bucket collection",
+ logAttrs(collection->ns()),
+ logAttrs(collection->uuid()),
+ "recordId"_attr = loc,
+ "control"_attr = redact(doc.getObjectField(timeseries::kBucketControlFieldName)));
+
+ _timeseriesBucketContainsMixedSchemaData = true;
+ }
+
+ // Only enforce the mixed-schema data constraint on the primary. Index builds may not fail
+ // on the secondaries. The primary will replicate an abortIndexBuild oplog entry.
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ const bool replSetAndNotPrimary = replCoord->getSettings().usingReplSets() &&
+ !replCoord->canAcceptWritesFor(opCtx, collection->ns());
+
+ if (docHasMixedSchemaData && !replSetAndNotPrimary) {
+ return timeseriesMixedSchemaDataFailure(collection.get());
+ }
+ }
+
for (size_t i = 0; i < _indexes.size(); i++) {
if (_indexes[i].filterExpression && !_indexes[i].filterExpression->matchesBSON(doc)) {
continue;
@@ -857,6 +913,23 @@ Status MultiIndexBlock::commit(OperationContext* opCtx,
invariant(_collectionUUID.get() == collection->uuid());
}
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ const bool replSetAndNotPrimary = replCoord->getSettings().usingReplSets() &&
+ !replCoord->canAcceptWritesFor(opCtx, collection->ns());
+
+ // During the collection scan phase, only the primary will enforce the mixed-schema data
+ // constraint. Secondaries will only keep track of and take no action if mixed-schema data is
+ // detected. If the primary steps down during the index build, a secondary node will takeover.
+ // This can happen after the collection scan phase, which is why we need this check here.
+ if (_timeseriesBucketContainsMixedSchemaData && !replSetAndNotPrimary) {
+ LOGV2_DEBUG(6057701,
+ 1,
+ "Aborting index build commit due to the earlier detection of mixed-schema data",
+ logAttrs(collection->ns()),
+ logAttrs(collection->uuid()));
+ return timeseriesMixedSchemaDataFailure(collection);
+ }
+
// Do not interfere with writing multikey information when committing index builds.
ScopeGuard restartTracker(
[this, opCtx] { MultikeyPathTracker::get(opCtx).startTrackingMultikeyPathInfo(); });
@@ -894,6 +967,18 @@ Status MultiIndexBlock::commit(OperationContext* opCtx,
onCommit();
+ // Update the 'timeseriesBucketsMayHaveMixedSchemaData' catalog entry flag to false in order to
+ // allow subsequent index builds to skip checking bucket documents for mixed-schema data.
+ if (_containsIndexBuildOnTimeseriesMeasurement && !_timeseriesBucketContainsMixedSchemaData) {
+ boost::optional<bool> mayContainMixedSchemaData =
+ collection->getTimeseriesBucketsMayHaveMixedSchemaData();
+ invariant(mayContainMixedSchemaData);
+
+ if (*mayContainMixedSchemaData) {
+ collection->setTimeseriesBucketsMayHaveMixedSchemaData(opCtx, false);
+ }
+ }
+
CollectionQueryInfo::get(collection).clearQueryCache(opCtx, collection);
opCtx->recoveryUnit()->onCommit(
[this](boost::optional<Timestamp> commitTime) { _buildIsCleanedUp = true; });
diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h
index 2dd45df2a7b..fa747afd8a3 100644
--- a/src/mongo/db/catalog/multi_index_block.h
+++ b/src/mongo/db/catalog/multi_index_block.h
@@ -354,6 +354,13 @@ private:
bool _ignoreUnique = false;
+ // True if one or more indexes being built are on time-series measurements.
+ bool _containsIndexBuildOnTimeseriesMeasurement = false;
+
+ // True if at least one bucket document contains mixed-schema data and
+ // '_containsIndexBuildOnTimeseriesMeasurement=true'.
+ bool _timeseriesBucketContainsMixedSchemaData = false;
+
// Set to true when no work remains to be done, the object can safely destruct without leaving
// incorrect state set anywhere.
bool _buildIsCleanedUp = true;