summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheahuychou Mao <cheahuychou.mao@mongodb.com>2020-01-28 17:20:44 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-02-06 19:46:51 +0000
commitea696eb7a27f18c21223a3ff94d9124f06698af5 (patch)
tree7d34ba1f876b4230cb48ff842262d141bb17988f
parentd6a8ae5bcb0edefc2312dcfa2f6196b74711aa89 (diff)
downloadmongo-ea696eb7a27f18c21223a3ff94d9124f06698af5.tar.gz
SERVER-45389 Add metrics tracking how often shards have inconsistent indexes
create mode 100644 jstests/noPassthrough/sharded_index_consistency_metrics.js create mode 100644 src/mongo/db/commands/sharded_index_consistency_server_status.cpp create mode 100644 src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp create mode 100644 src/mongo/db/s/periodic_sharded_index_consistency_checker.h
-rw-r--r--jstests/noPassthrough/sharded_index_consistency_metrics.js139
-rw-r--r--src/mongo/db/commands/SConscript3
-rw-r--r--src/mongo/db/commands/sharded_index_consistency_server_status.cpp68
-rw-r--r--src/mongo/db/db.cpp6
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp3
-rw-r--r--src/mongo/db/s/SConscript2
-rw-r--r--src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp200
-rw-r--r--src/mongo/db/s/periodic_sharded_index_consistency_checker.h95
-rw-r--r--src/mongo/db/s/sharding_runtime_d_params.idl19
9 files changed, 533 insertions, 2 deletions
diff --git a/jstests/noPassthrough/sharded_index_consistency_metrics.js b/jstests/noPassthrough/sharded_index_consistency_metrics.js
new file mode 100644
index 00000000000..5dc53e0a378
--- /dev/null
+++ b/jstests/noPassthrough/sharded_index_consistency_metrics.js
@@ -0,0 +1,139 @@
+/*
+ * Tests index consistency metrics in the serverStatus output.
+ * @tags: [requires_fcv_44, requires_sharding]
+ */
+(function() {
+"use strict";
+
+// This test creates inconsistent indexes.
+TestData.skipCheckingIndexesConsistentAcrossCluster = true;
+
+/*
+ * Asserts that the serverStatus output does not contain the index consistency metrics
+ * both by default and when 'shardedIndexConsistency' is explicitly included.
+ */
+function assertServerStatusNotContainIndexMetrics(conn) {
+ let res = assert.commandWorked(conn.adminCommand({serverStatus: 1}));
+ assert.eq(undefined, res.shardedIndexConsistency, tojson(res.shardedIndexConsistency));
+
+ res = assert.commandWorked(conn.adminCommand({serverStatus: 1, shardedIndexConsistency: 1}));
+ assert.eq(undefined, res.shardedIndexConsistency, tojson(res.shardedIndexConsistency));
+}
+
+/*
+ * Asserts that eventually the number of sharded collections with inconsistent indexes in the
+ * serverStatus output is equal to the expected count.
+ */
+function checkServerStatusNumCollsWithInconsistentIndexes(conn, expectedCount) {
+ assert.soon(
+ () => {
+ const res = assert.commandWorked(conn.adminCommand({serverStatus: 1}));
+ assert.hasFields(res, ["shardedIndexConsistency"]);
+ assert.hasFields(res.shardedIndexConsistency,
+ ["numShardedCollectionsWithInconsistentIndexes"]);
+ return expectedCount ==
+ res.shardedIndexConsistency.numShardedCollectionsWithInconsistentIndexes;
+ },
+ `expect the count of sharded collections with inconsistent indexes to eventually be equal to ${
+ expectedCount}`,
+ undefined /* timeout */,
+ 1000 /* interval */);
+}
+
+/*
+ * For each mongod in 'connsWithIndexConsistencyMetrics', asserts that its serverStatus
+ * output has the expected number of collections with inconsistent indexes. For each mongod
+ * in 'connsWithoutIndexConsistencyMetrics', asserts that its serverStatus output does
+ * not contain the index consistency metrics.
+ */
+function checkServerStatus(connsWithIndexConsistencyMetrics,
+ connsWithoutIndexConsistencyMetrics,
+ expectedNumCollsWithInconsistentIndexes) {
+ for (const conn of connsWithIndexConsistencyMetrics) {
+ checkServerStatusNumCollsWithInconsistentIndexes(conn,
+ expectedNumCollsWithInconsistentIndexes);
+ }
+ for (const conn of connsWithoutIndexConsistencyMetrics) {
+ assertServerStatusNotContainIndexMetrics(conn);
+ }
+}
+
+const intervalMS = 3000;
+const st = new ShardingTest({
+ shards: 2,
+ config: 2,
+ configOptions: {setParameter: {"shardedIndexConsistencyCheckIntervalMS": intervalMS}}
+});
+const dbName = "testDb";
+const ns1 = dbName + ".testColl1";
+const ns2 = dbName + ".testColl2";
+const ns3 = dbName + ".testColl3";
+const expiration = 1000000;
+const filterExpr = {
+ x: {$gt: 50}
+};
+
+assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
+st.ensurePrimaryShard(dbName, st.shard0.shardName);
+assert.commandWorked(st.s.adminCommand({shardCollection: ns1, key: {_id: "hashed"}}));
+assert.commandWorked(st.s.adminCommand({shardCollection: ns2, key: {_id: "hashed"}}));
+assert.commandWorked(st.s.adminCommand({shardCollection: ns3, key: {_id: "hashed"}}));
+
+st.config1.getDB("admin").runCommand({setParameter: 1, enableShardedIndexConsistencyCheck: false});
+const connsWithIndexConsistencyMetrics = [st.config0];
+const connsWithoutIndexConsistencyMetrics = [st.config1, st.shard0, st.shard1, st.s];
+
+checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 0);
+
+// Create an inconsistent index for ns1.
+assert.commandWorked(st.shard0.getCollection(ns1).createIndex({x: 1}));
+checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 1);
+
+// Create another inconsistent index for ns1.
+assert.commandWorked(st.shard1.getCollection(ns1).createIndexes([{y: 1}]));
+checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 1);
+
+// Create an inconsistent index for ns2.
+assert.commandWorked(st.shard0.getCollection(ns2).createIndex({x: 1}));
+checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 2);
+
+// Resolve the index inconsistency for ns2.
+assert.commandWorked(st.shard1.getCollection(ns2).createIndex({x: 1}));
+checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 1);
+
+// Create indexes for n3 with the same options but in different orders on each shard, and verify
+// that it is not considered as inconsistent.
+assert.commandWorked(st.shard0.getCollection(ns3).createIndex({x: 1}, {
+ name: "indexWithOptionsOrderedDifferently",
+ partialFilterExpression: filterExpr,
+ expireAfterSeconds: expiration
+}));
+assert.commandWorked(st.shard1.getCollection(ns3).createIndex({x: 1}, {
+ name: "indexWithOptionsOrderedDifferently",
+ expireAfterSeconds: expiration,
+ partialFilterExpression: filterExpr
+}));
+checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 1);
+
+// Create indexes for n3 with the same key but different options on each shard, and verify that
+// it is considered as inconsistent.
+assert.commandWorked(st.shard0.getCollection(ns3).createIndex(
+ {y: 1}, {name: "indexWithDifferentOptions", expireAfterSeconds: expiration}));
+assert.commandWorked(
+ st.shard1.getCollection(ns3).createIndex({y: 1}, {name: "indexWithDifferentOptions"}));
+checkServerStatus(connsWithIndexConsistencyMetrics, connsWithoutIndexConsistencyMetrics, 2);
+
+st.stop();
+
+// Verify that the serverStatus output for standalones and non-sharded repilca set servers does
+// not contain the index consistency metrics.
+const standaloneMongod = MongoRunner.runMongod();
+assertServerStatusNotContainIndexMetrics(standaloneMongod);
+MongoRunner.stopMongod(standaloneMongod);
+
+const rst = ReplSetTest({nodes: 1});
+rst.startSet();
+rst.initiate();
+assertServerStatusNotContainIndexMetrics(rst.getPrimary());
+rst.stopSet();
+}());
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript
index 7e90c43ec11..568e23c6272 100644
--- a/src/mongo/db/commands/SConscript
+++ b/src/mongo/db/commands/SConscript
@@ -202,7 +202,7 @@ env.Library(
"fsync.cpp",
],
LIBDEPS_PRIVATE=[
- '$BUILD_DIR/mongo/db/auth/authprivilege',
+ '$BUILD_DIR/mongo/db/auth/authprivilege',
'$BUILD_DIR/mongo/db/commands',
'$BUILD_DIR/mongo/db/concurrency/write_conflict_exception',
'$BUILD_DIR/mongo/db/curop',
@@ -374,6 +374,7 @@ env.Library(
'rwc_defaults_commands.cpp',
"set_feature_compatibility_version_command.cpp",
"set_index_commit_quorum_command.cpp",
+ "sharded_index_consistency_server_status.cpp",
"shutdown_d.cpp",
"snapshot_management.cpp",
"top_command.cpp",
diff --git a/src/mongo/db/commands/sharded_index_consistency_server_status.cpp b/src/mongo/db/commands/sharded_index_consistency_server_status.cpp
new file mode 100644
index 00000000000..876427b1525
--- /dev/null
+++ b/src/mongo/db/commands/sharded_index_consistency_server_status.cpp
@@ -0,0 +1,68 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/commands/server_status.h"
+#include "mongo/db/s/periodic_sharded_index_consistency_checker.h"
+#include "mongo/db/s/sharding_runtime_d_params_gen.h"
+
+namespace mongo {
+namespace {
+
+bool isConfigServerWithShardedIndexConsistencyCheckEnabled() {
+ return serverGlobalParams.clusterRole == ClusterRole::ConfigServer &&
+ enableShardedIndexConsistencyCheck.load();
+}
+
+class ShardedIndexConsistencyServerStatus final : public ServerStatusSection {
+public:
+ ShardedIndexConsistencyServerStatus() : ServerStatusSection("shardedIndexConsistency") {}
+
+ bool includeByDefault() const override {
+ return isConfigServerWithShardedIndexConsistencyCheckEnabled();
+ }
+
+ BSONObj generateSection(OperationContext* opCtx,
+ const BSONElement& configElement) const override {
+ if (!isConfigServerWithShardedIndexConsistencyCheckEnabled()) {
+ return {};
+ }
+
+ BSONObjBuilder builder;
+ builder.append("numShardedCollectionsWithInconsistentIndexes",
+ PeriodicShardedIndexConsistencyChecker::get(opCtx->getServiceContext())
+ .getNumShardedCollsWithInconsistentIndexes());
+ return builder.obj();
+ }
+
+} indexConsistencyServerStatus;
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp
index 14bed1ae035..3fb1a95eabe 100644
--- a/src/mongo/db/db.cpp
+++ b/src/mongo/db/db.cpp
@@ -121,6 +121,7 @@
#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/config_server_op_observer.h"
#include "mongo/db/s/op_observer_sharding_impl.h"
+#include "mongo/db/s/periodic_sharded_index_consistency_checker.h"
#include "mongo/db/s/shard_server_op_observer.h"
#include "mongo/db/s/sharding_initialization_mongod.h"
#include "mongo/db/s/sharding_state_recovery.h"
@@ -965,6 +966,11 @@ void shutdownTask(const ShutdownTaskArgs& shutdownArgs) {
lsc->joinOnShutDown();
}
+ // Terminate the index consistency check.
+ if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
+ PeriodicShardedIndexConsistencyChecker::get(serviceContext).onShutDown();
+ }
+
// Shutdown the TransportLayer so that new connections aren't accepted
if (auto tl = serviceContext->getTransportLayer()) {
log(LogComponent::kNetwork) << "shutdown: going to close listening sockets...";
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 38ffb397076..aa22b6b818e 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -81,6 +81,7 @@
#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/migration_util.h"
#include "mongo/db/s/periodic_balancer_config_refresher.h"
+#include "mongo/db/s/periodic_sharded_index_consistency_checker.h"
#include "mongo/db/s/sharding_initialization_mongod.h"
#include "mongo/db/s/sharding_state_recovery.h"
#include "mongo/db/s/transaction_coordinator_service.h"
@@ -708,6 +709,7 @@ void ReplicationCoordinatorExternalStateImpl::closeConnections() {
void ReplicationCoordinatorExternalStateImpl::shardingOnStepDownHook() {
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
Balancer::get(_service)->interruptBalancer();
+ PeriodicShardedIndexConsistencyChecker::get(_service).onStepDown();
TransactionCoordinatorService::get(_service)->onStepDown();
} else if (ShardingState::get(_service)->enabled()) {
ChunkSplitter::get(_service).onStepDown();
@@ -795,6 +797,7 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook
validator->enableKeyGenerator(opCtx, true);
}
+ PeriodicShardedIndexConsistencyChecker::get(_service).onStepUp(_service);
TransactionCoordinatorService::get(_service)->onStepUp(opCtx);
} else if (ShardingState::get(opCtx)->enabled()) {
Status status = ShardingStateRecovery::recover(opCtx);
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 029f80d5451..22d1ca9e677 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -55,6 +55,7 @@ env.Library(
'move_timing_helper.cpp',
'namespace_metadata_change_notifications.cpp',
'periodic_balancer_config_refresher.cpp',
+ 'periodic_sharded_index_consistency_checker.cpp',
'range_deletion_util.cpp',
'read_only_catalog_cache_loader.cpp',
'scoped_operation_completion_sharding_actions.cpp',
@@ -91,6 +92,7 @@ env.Library(
'$BUILD_DIR/mongo/db/storage/remove_saver',
'$BUILD_DIR/mongo/db/transaction',
'$BUILD_DIR/mongo/s/client/shard_local',
+ '$BUILD_DIR/mongo/s/query/cluster_aggregate',
'$BUILD_DIR/mongo/s/sharding_initialization',
'chunk_splitter',
'sharding_api_d',
diff --git a/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp b/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp
new file mode 100644
index 00000000000..387d2c72652
--- /dev/null
+++ b/src/mongo/db/s/periodic_sharded_index_consistency_checker.cpp
@@ -0,0 +1,200 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/s/periodic_sharded_index_consistency_checker.h"
+
+#include "mongo/db/auth/privilege.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/s/sharding_runtime_d_params_gen.h"
+#include "mongo/db/service_context.h"
+#include "mongo/s/grid.h"
+#include "mongo/s/query/cluster_aggregate.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+
+namespace {
+
+const auto getPeriodicShardedIndexConsistencyChecker =
+ ServiceContext::declareDecoration<PeriodicShardedIndexConsistencyChecker>();
+
+} // namespace
+
+PeriodicShardedIndexConsistencyChecker& PeriodicShardedIndexConsistencyChecker::get(
+ OperationContext* opCtx) {
+ return get(opCtx->getServiceContext());
+}
+
+PeriodicShardedIndexConsistencyChecker& PeriodicShardedIndexConsistencyChecker::get(
+ ServiceContext* serviceContext) {
+ return getPeriodicShardedIndexConsistencyChecker(serviceContext);
+}
+
+long long PeriodicShardedIndexConsistencyChecker::getNumShardedCollsWithInconsistentIndexes()
+ const {
+ return _numShardedCollsWithInconsistentIndexes.load();
+}
+
+void PeriodicShardedIndexConsistencyChecker::_launchShardedIndexConsistencyChecker(
+ ServiceContext* serviceContext) {
+ auto periodicRunner = serviceContext->getPeriodicRunner();
+ invariant(periodicRunner);
+
+ PeriodicRunner::PeriodicJob job(
+ "PeriodicShardedIndexConsistencyChecker",
+ [this](Client* client) {
+ if (!enableShardedIndexConsistencyCheck.load()) {
+ return;
+ }
+
+ log() << "Checking consistency of sharded collection indexes across the cluster";
+
+ const auto aggRequestBSON = fromjson(
+ "{pipeline: [{$indexStats: {}},"
+ "{$group: {_id: null, indexDoc: {$push: \"$$ROOT\"}, allShards: {$addToSet: "
+ "\"$shard\"}}}, "
+ "{$unwind: \"$indexDoc\"}, "
+ "{$group: {\"_id\": \"$indexDoc.name\", \"shards\": {$push: "
+ "\"$indexDoc.shard\"}, "
+ "\"specs\": {$addToSet: {$arrayToObject: {$setUnion: {$objectToArray: "
+ "\"$indexDoc.spec\"}}}}, "
+ "\"allShards\": {$first: \"$allShards\"}}},"
+ "{$addFields: {\"missingFromShards\": {$setDifference: [\"$allShards\", "
+ "\"$shards\"]}}},"
+ "{$match: {$expr: {$or: [{$gt: [{$size: \"$missingFromShards\"}, 0]}, {$gt: "
+ "[{$size: \"$specs\"}, 1]}]}}},"
+ "{$project: {_id: 0, indexName: \"$$ROOT._id\", specs: 1, missingFromShards: "
+ "1}}, {$limit: 1}], cursor: {}}");
+
+ auto uniqueOpCtx = client->makeOperationContext();
+ auto opCtx = uniqueOpCtx.get();
+
+ try {
+ long long numShardedCollsWithInconsistentIndexes = 0;
+ auto collections =
+ uassertStatusOK(Grid::get(opCtx)->catalogClient()->getCollections(
+ opCtx, nullptr, nullptr, repl::ReadConcernLevel::kLocalReadConcern));
+
+ for (const auto& coll : collections) {
+ auto nss = coll.getNs();
+
+ // The only sharded collection in the config database with indexes is
+ // config.system.sessions. Unfortunately, the code path to run aggregation
+ // below would currently invariant if one of the targeted shards was the config
+ // server itself.
+ if (nss.isConfigDB()) {
+ continue;
+ }
+
+ auto request =
+ uassertStatusOK(AggregationRequest::parseFromBSON(nss, aggRequestBSON));
+
+ for (int tries = 0;; ++tries) {
+ const bool canRetry = tries < kMaxNumStaleVersionRetries - 1;
+
+ try {
+ BSONObjBuilder responseBuilder;
+ auto status = ClusterAggregate::runAggregate(
+ opCtx,
+ ClusterAggregate::Namespaces{nss, nss},
+ request,
+ LiteParsedPipeline{request},
+ PrivilegeVector(),
+ &responseBuilder);
+
+ // Stop counting if the agg command failed for one of the collections
+ // to avoid recording a false count.
+ uassertStatusOKWithContext(status, str::stream() << "nss " << nss);
+
+ if (!responseBuilder.obj()["cursor"]["firstBatch"].Array().empty()) {
+ numShardedCollsWithInconsistentIndexes++;
+ }
+ break;
+ } catch (const ExceptionForCat<ErrorCategory::StaleShardVersionError>& ex) {
+ log() << "Attempt " << tries << " to check index consistency for "
+ << nss << " received StaleShardVersion error" << causedBy(ex);
+ if (canRetry) {
+ continue;
+ }
+ throw;
+ }
+ }
+ }
+
+ log() << "Found " << numShardedCollsWithInconsistentIndexes
+ << " collections with inconsistent indexes";
+
+ // Update the count.
+ _numShardedCollsWithInconsistentIndexes.store(
+ numShardedCollsWithInconsistentIndexes);
+ } catch (DBException& ex) {
+ log() << "Failed to check index consistency " << causedBy(ex.toStatus());
+ }
+ },
+ Milliseconds(shardedIndexConsistencyCheckIntervalMS));
+ _shardedIndexConsistencyChecker = periodicRunner->makeJob(std::move(job));
+ _shardedIndexConsistencyChecker.start();
+}
+
+void PeriodicShardedIndexConsistencyChecker::onStepUp(ServiceContext* serviceContext) {
+ if (!_isPrimary) {
+ _isPrimary = true;
+ if (!_shardedIndexConsistencyChecker.isValid()) {
+ // If this is the first time we're stepping up, start a thread to periodically check
+ // index consistency.
+ _launchShardedIndexConsistencyChecker(serviceContext);
+ } else {
+ // If we're stepping up again after having stepped down, just resume the existing task.
+ _shardedIndexConsistencyChecker.resume();
+ }
+ }
+}
+
+void PeriodicShardedIndexConsistencyChecker::onStepDown() {
+ if (_isPrimary) {
+ _isPrimary = false;
+ invariant(_shardedIndexConsistencyChecker.isValid());
+ // We don't need to be checking index consistency unless we're primary.
+ _shardedIndexConsistencyChecker.pause();
+ // Clear the counter to prevent a secondary from reporting an out-of-date count.
+ _numShardedCollsWithInconsistentIndexes.store(0);
+ }
+}
+
+void PeriodicShardedIndexConsistencyChecker::onShutDown() {
+ if (_shardedIndexConsistencyChecker.isValid()) {
+ _shardedIndexConsistencyChecker.stop();
+ }
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/periodic_sharded_index_consistency_checker.h b/src/mongo/db/s/periodic_sharded_index_consistency_checker.h
new file mode 100644
index 00000000000..0be604649fd
--- /dev/null
+++ b/src/mongo/db/s/periodic_sharded_index_consistency_checker.h
@@ -0,0 +1,95 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/util/periodic_runner.h"
+
+namespace mongo {
+
+class OperationContext;
+class ServiceContext;
+
+class PeriodicShardedIndexConsistencyChecker final {
+ PeriodicShardedIndexConsistencyChecker(const PeriodicShardedIndexConsistencyChecker&) = delete;
+ PeriodicShardedIndexConsistencyChecker& operator=(
+ const PeriodicShardedIndexConsistencyChecker&) = delete;
+
+public:
+ PeriodicShardedIndexConsistencyChecker() = default;
+ ~PeriodicShardedIndexConsistencyChecker() = default;
+
+ PeriodicShardedIndexConsistencyChecker(PeriodicShardedIndexConsistencyChecker&& source) =
+ delete;
+ PeriodicShardedIndexConsistencyChecker& operator=(
+ PeriodicShardedIndexConsistencyChecker&& other) = delete;
+
+ /**
+ * Obtains the service-wide PeriodicShardedIndexConsistencyChecker instance.
+ */
+ static PeriodicShardedIndexConsistencyChecker& get(OperationContext* opCtx);
+ static PeriodicShardedIndexConsistencyChecker& get(ServiceContext* serviceContext);
+
+ long long getNumShardedCollsWithInconsistentIndexes() const;
+
+ /**
+ * Invoked when the config server primary enters the 'PRIMARY' state to
+ * trigger the start of the periodic sharded index consistency check.
+ */
+ void onStepUp(ServiceContext* serviceContext);
+
+ /**
+ * Invoked when this node which is currently serving as a 'PRIMARY' steps down.
+ *
+ * Pauses the periodic job until subsequent step up. This method might be called
+ * multiple times in succession, which is what happens as a result of incomplete
+ * transition to primary so it is resilient to that.
+ */
+ void onStepDown();
+
+ /**
+ * Invoked when this node is shutting down. Stops the periodic job.
+ */
+ void onShutDown();
+
+private:
+ /**
+ * Initializes and starts the periodic job.
+ */
+ void _launchShardedIndexConsistencyChecker(ServiceContext* serviceContext);
+
+ bool _isPrimary{false};
+
+ // Periodic job for counting inconsistent indexes in the cluster.
+ PeriodicJobAnchor _shardedIndexConsistencyChecker;
+
+ // The latest count of sharded collections with inconsistent indexes.
+ AtomicWord<long long> _numShardedCollsWithInconsistentIndexes{0};
+};
+} // namespace mongo
diff --git a/src/mongo/db/s/sharding_runtime_d_params.idl b/src/mongo/db/s/sharding_runtime_d_params.idl
index ddb8230386c..a430c9d654a 100644
--- a/src/mongo/db/s/sharding_runtime_d_params.idl
+++ b/src/mongo/db/s/sharding_runtime_d_params.idl
@@ -94,6 +94,23 @@ server_parameters:
disableResumableRangeDeleter:
description: 'Disable the resumable range deleter and revert to prior behavior.'
set_at: [startup, runtime]
- cpp_vartype: AtomicWord<bool>
+ cpp_vartype: AtomicWord<bool>
cpp_varname : disableResumableRangeDeleter
default: false
+
+ enableShardedIndexConsistencyCheck:
+ description: >-
+ Enable the periodic sharded index consistency check on the config server's primary.
+ The count of sharded collections with inconsistent indexes is exposed via the
+ 'shardedIndexConsistency' section in the serverStatus output.
+ set_at: [startup, runtime]
+ cpp_vartype: AtomicWord<bool>
+ cpp_varname: enableShardedIndexConsistencyCheck
+ default: true
+
+ shardedIndexConsistencyCheckIntervalMS:
+ description: 'Time interval in milliseconds between subsequent index checks.'
+ set_at: [startup]
+ cpp_vartype: int
+ cpp_varname: shardedIndexConsistencyCheckIntervalMS
+ default: 600000