summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2022-01-02 12:02:18 +0100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-01-06 12:36:13 +0000
commitbc2a34c6cb046127b6811ae8ae89abeb05a50b90 (patch)
treef7e8ee70d79fdc8f0ef081c1b458f9790716a8e2
parent26a9a04d33ae06fd9ffc5822b685ccea84c3530e (diff)
downloadmongo-bc2a34c6cb046127b6811ae8ae89abeb05a50b90.tar.gz
SERVER-62065 Introduce the 'repairShardedCollectionChunksHistory' command
(cherry picked from commit 3b56acfe78e91b607eafc737ebf88d237db1460a)
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml2
-rw-r--r--jstests/core/views/views_all_commands.js8
-rw-r--r--jstests/replsets/db_reads_while_recovering_all_commands.js2
-rw-r--r--jstests/sharding/database_and_shard_versioning_all_commands.js1
-rw-r--r--jstests/sharding/repair_sharded_collection_history.js59
-rw-r--r--jstests/sharding/safe_secondary_reads_drop_recreate.js1
-rw-r--r--jstests/sharding/safe_secondary_reads_single_migration_suspend_range_deletion.js1
-rw-r--r--jstests/sharding/safe_secondary_reads_single_migration_waitForDelete.js1
-rw-r--r--src/mongo/db/s/SConscript1
-rw-r--r--src/mongo/db/s/config/configsvr_repair_sharded_collection_chunks_history_command.cpp153
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager.h12
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp90
-rw-r--r--src/mongo/db/s/flush_routing_table_cache_updates_command.cpp9
-rw-r--r--src/mongo/s/catalog/type_chunk.cpp1
-rw-r--r--src/mongo/s/catalog/type_chunk.h1
-rw-r--r--src/mongo/s/client/shard.h4
-rw-r--r--src/mongo/s/commands/SConscript1
-rw-r--r--src/mongo/s/commands/cluster_repair_sharded_collection_chunks_history_cmd.cpp122
-rw-r--r--src/mongo/s/request_types/flush_routing_table_cache_updates.idl3
19 files changed, 464 insertions, 8 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index 9aac8d16b57..fab6f7fda0c 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -184,6 +184,8 @@ selector:
- jstests/sharding/mongos_precache_routing_info.js
- jstests/sharding/warm_up_connection_pool.js
- jstests/sharding/collation_shard_targeting_hashed_shard_key.js
+ # Enable when SERVER-62065 is released in 4.0
+ - jstests/sharding/repair_sharded_collection_history.js
exclude_with_any_tags:
- multiversion_incompatible
diff --git a/jstests/core/views/views_all_commands.js b/jstests/core/views/views_all_commands.js
index f8d1be30b7b..345c408e321 100644
--- a/jstests/core/views/views_all_commands.js
+++ b/jstests/core/views/views_all_commands.js
@@ -91,6 +91,7 @@ let viewsCommandTests = {
_configsvrMovePrimary: {skip: isAnInternalCommand},
_configsvrRemoveShard: {skip: isAnInternalCommand},
_configsvrRemoveShardFromZone: {skip: isAnInternalCommand},
+ _configsvrRepairShardedCollectionChunksHistory: {skip: isAnInternalCommand},
_configsvrShardCollection: {skip: isAnInternalCommand},
_configsvrUpdateZoneKeyRange: {skip: isAnInternalCommand},
_cpuProfilerStart: {skip: isAnInternalCommand},
@@ -428,6 +429,13 @@ let viewsCommandTests = {
],
repairCursor: {command: {repairCursor: "view"}, expectFailure: true},
repairDatabase: {skip: isUnrelated},
+ repairShardedCollectionChunksHistory: {
+ command: {repairShardedCollectionChunksHistory: "test.view"},
+ skipStandalone: true,
+ isAdminCommand: true,
+ expectFailure: true,
+ expectedErrorCode: ErrorCodes.NamespaceNotFound,
+ },
replSetAbortPrimaryCatchUp: {skip: isUnrelated},
replSetFreeze: {skip: isUnrelated},
replSetGetConfig: {skip: isUnrelated},
diff --git a/jstests/replsets/db_reads_while_recovering_all_commands.js b/jstests/replsets/db_reads_while_recovering_all_commands.js
index 05dade63ca3..37ed95f1015 100644
--- a/jstests/replsets/db_reads_while_recovering_all_commands.js
+++ b/jstests/replsets/db_reads_while_recovering_all_commands.js
@@ -49,6 +49,7 @@ const allCommands = {
_configsvrRefineCollectionShardKey: {skip: isPrimaryOnly},
_configsvrRemoveShard: {skip: isPrimaryOnly},
_configsvrRemoveShardFromZone: {skip: isPrimaryOnly},
+ _configsvrRepairShardedCollectionChunksHistory: {skip: isAnInternalCommand},
_configsvrShardCollection: {skip: isPrimaryOnly},
_configsvrUpdateZoneKeyRange: {skip: isPrimaryOnly},
_flushDatabaseCacheUpdates: {skip: isPrimaryOnly},
@@ -251,6 +252,7 @@ const allCommands = {
reIndex: {skip: isNotAUserDataRead},
renameCollection: {skip: isPrimaryOnly},
repairDatabase: {skip: isNotAUserDataRead},
+ repairShardedCollectionChunksHistory: {skip: isPrimaryOnly},
repairCursor: {skip: isNotAUserDataRead},
replSetAbortPrimaryCatchUp: {skip: isNotAUserDataRead},
replSetFreeze: {skip: isNotAUserDataRead},
diff --git a/jstests/sharding/database_and_shard_versioning_all_commands.js b/jstests/sharding/database_and_shard_versioning_all_commands.js
index 52a5bd35fe7..ca9d56c8a80 100644
--- a/jstests/sharding/database_and_shard_versioning_all_commands.js
+++ b/jstests/sharding/database_and_shard_versioning_all_commands.js
@@ -376,6 +376,7 @@ let testCases = {
assert(mongosConn.getDB(dbName).getCollection(collName + "_renamed").drop());
}
},
+ repairShardedCollectionChunksHistory: {skip: "always targets the config server"},
replSetGetStatus: {skip: "not supported in mongos"},
resetError: {skip: "not on a user database"},
restartCatalog: {skip: "not on a user database"},
diff --git a/jstests/sharding/repair_sharded_collection_history.js b/jstests/sharding/repair_sharded_collection_history.js
new file mode 100644
index 00000000000..e069873d1cd
--- /dev/null
+++ b/jstests/sharding/repair_sharded_collection_history.js
@@ -0,0 +1,59 @@
+(function() {
+"use strict";
+
+load("jstests/libs/feature_compatibility_version.js");
+
+var st = new ShardingTest({
+ shards: 1,
+});
+
+let configPrimary = st.configRS.getPrimary();
+let configPrimaryAdminDB = configPrimary.getDB("admin");
+let shardPrimary = st.rs0.getPrimary();
+let shardPrimaryAdminDB = shardPrimary.getDB("admin");
+let shardPrimaryConfigDB = shardPrimary.getDB("config");
+
+let testDB = st.s.getDB("test1");
+
+// Create a sharded collection with primary shard 0.
+assert.commandWorked(st.s.adminCommand({enableSharding: testDB.getName()}));
+st.ensurePrimaryShard(testDB.getName(), st.shard0.shardName);
+assert.commandWorked(st.s.adminCommand({shardCollection: testDB.foo.getFullName(), key: {a: 1}}));
+assert.commandWorked(st.s.adminCommand({split: testDB.foo.getFullName(), middle: {a: 0}}));
+assert.commandWorked(st.s.adminCommand({split: testDB.foo.getFullName(), middle: {a: -1000}}));
+assert.commandWorked(st.s.adminCommand({split: testDB.foo.getFullName(), middle: {a: +1000}}));
+
+assert.writeOK(st.s.getDB("test1").foo.insert({_id: "id1", a: 1}));
+assert.neq(null, st.s.getDB("test1").foo.findOne({_id: "id1", a: 1}));
+
+assert.writeOK(st.s.getDB("test1").foo.insert({_id: "id2", a: -1}));
+assert.neq(null, st.s.getDB("test1").foo.findOne({_id: "id2", a: -1}));
+
+// Manually clear the 'historyIsAt40' field from the config server and the history entries from
+// the shards' cache collections in order to simulate a wrong upgrade due to SERVER-62065
+assert.writeOK(st.s.getDB("config").chunks.update(
+ {ns: 'test1.foo'}, {'$unset': {historyIsAt40: ''}}, {multi: true}));
+assert.writeOK(shardPrimaryConfigDB.getCollection("cache.chunks.test1.foo")
+ .update({}, {'$unset': {history: ''}}, {multi: true}));
+
+assert.commandWorked(st.s.adminCommand({repairShardedCollectionChunksHistory: 'test1.foo'}));
+
+// Make sure chunks for test1.foo were given history after repair.
+var chunks = st.s.getDB("config").getCollection("chunks").find({ns: "test1.foo"}).toArray();
+assert.eq(chunks.length, 4);
+chunks.forEach((chunk) => {
+ assert.neq(null, chunk);
+ assert(chunk.hasOwnProperty("history"), "test1.foo does not have a history after repair");
+ assert(chunk.hasOwnProperty("historyIsAt40"),
+ "test1.foo does not have a historyIsAt40 after repair");
+});
+chunks = shardPrimaryConfigDB.getCollection("cache.chunks.test1.foo").find().toArray();
+assert.eq(chunks.length, 4);
+chunks.forEach((chunk) => {
+ assert.neq(null, chunk);
+ assert(chunk.hasOwnProperty("history"),
+ "test1.foo does not have a history on the shard after repair");
+});
+
+st.stop();
+})();
diff --git a/jstests/sharding/safe_secondary_reads_drop_recreate.js b/jstests/sharding/safe_secondary_reads_drop_recreate.js
index 575746f83c1..425eeffc634 100644
--- a/jstests/sharding/safe_secondary_reads_drop_recreate.js
+++ b/jstests/sharding/safe_secondary_reads_drop_recreate.js
@@ -253,6 +253,7 @@ let testCases = {
removeShardFromZone: {skip: "primary only"},
renameCollection: {skip: "primary only"},
repairCursor: {skip: "does not return user data"},
+ repairShardedCollectionChunksHistory: {skip: "does not return user data"},
replSetAbortPrimaryCatchUp: {skip: "does not return user data"},
replSetFreeze: {skip: "does not return user data"},
replSetGetConfig: {skip: "does not return user data"},
diff --git a/jstests/sharding/safe_secondary_reads_single_migration_suspend_range_deletion.js b/jstests/sharding/safe_secondary_reads_single_migration_suspend_range_deletion.js
index 88318a11d31..540855cc851 100644
--- a/jstests/sharding/safe_secondary_reads_single_migration_suspend_range_deletion.js
+++ b/jstests/sharding/safe_secondary_reads_single_migration_suspend_range_deletion.js
@@ -288,6 +288,7 @@ let testCases = {
removeShardFromZone: {skip: "primary only"},
renameCollection: {skip: "primary only"},
repairCursor: {skip: "does not return user data"},
+ repairShardedCollectionChunksHistory: {skip: "does not return user data"},
replSetAbortPrimaryCatchUp: {skip: "does not return user data"},
replSetFreeze: {skip: "does not return user data"},
replSetGetConfig: {skip: "does not return user data"},
diff --git a/jstests/sharding/safe_secondary_reads_single_migration_waitForDelete.js b/jstests/sharding/safe_secondary_reads_single_migration_waitForDelete.js
index 9469bb43581..cda4797802f 100644
--- a/jstests/sharding/safe_secondary_reads_single_migration_waitForDelete.js
+++ b/jstests/sharding/safe_secondary_reads_single_migration_waitForDelete.js
@@ -258,6 +258,7 @@ let testCases = {
removeShardFromZone: {skip: "primary only"},
renameCollection: {skip: "primary only"},
repairCursor: {skip: "does not return user data"},
+ repairShardedCollectionChunksHistory: {skip: "does not return user data"},
replSetAbortPrimaryCatchUp: {skip: "does not return user data"},
replSetFreeze: {skip: "does not return user data"},
replSetGetConfig: {skip: "does not return user data"},
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index e4418131d92..cc37c48d1d7 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -331,6 +331,7 @@ env.Library(
'config/configsvr_move_primary_command.cpp',
'config/configsvr_remove_shard_command.cpp',
'config/configsvr_remove_shard_from_zone_command.cpp',
+ 'config/configsvr_repair_sharded_collection_chunks_history_command.cpp',
'config/configsvr_shard_collection_command.cpp',
'config/configsvr_split_chunk_command.cpp',
'config/configsvr_update_zone_key_range_command.cpp',
diff --git a/src/mongo/db/s/config/configsvr_repair_sharded_collection_chunks_history_command.cpp b/src/mongo/db/s/config/configsvr_repair_sharded_collection_chunks_history_command.cpp
new file mode 100644
index 00000000000..77352ff3611
--- /dev/null
+++ b/src/mongo/db/s/config/configsvr_repair_sharded_collection_chunks_history_command.cpp
@@ -0,0 +1,153 @@
+
+/**
+ * Copyright (C) 2021 MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/auth/action_type.h"
+#include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/auth/privilege.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/logical_clock.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/s/grid.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+namespace {
+
+class ConfigSvrRepairShardedCollectionChunksHistoryCommand : public BasicCommand {
+public:
+ ConfigSvrRepairShardedCollectionChunksHistoryCommand()
+ : BasicCommand("_configsvrRepairShardedCollectionChunksHistory") {}
+
+ std::string help() const override {
+ return "Internal command, which is exported by the sharding config server. Do not call "
+ "directly.";
+ }
+
+ AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
+ return AllowedOnSecondary::kNever;
+ }
+
+ bool adminOnly() const override {
+ return true;
+ }
+
+ bool supportsWriteConcern(const BSONObj& cmd) const override {
+ return true;
+ }
+
+ std::string parseNs(const std::string& unusedDbName, const BSONObj& cmdObj) const override {
+ return CommandHelpers::parseNsFullyQualified(cmdObj);
+ }
+
+ Status checkAuthForCommand(Client* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) const override {
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), ActionType::internal)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
+ }
+ return Status::OK();
+ }
+
+ bool run(OperationContext* opCtx,
+ const std::string& unusedDbName,
+ const BSONObj& cmdObj,
+ BSONObjBuilder& result) override {
+ uassert(ErrorCodes::IllegalOperation,
+ "_configsvrRepairShardedCollectionChunksHistory can only be run on config servers",
+ serverGlobalParams.clusterRole == ClusterRole::ConfigServer);
+
+ // Set the operation context read concern level to local for reads into the config database.
+ repl::ReadConcernArgs::get(opCtx) =
+ repl::ReadConcernArgs(repl::ReadConcernLevel::kLocalReadConcern);
+
+ uassert(ErrorCodes::InvalidOptions,
+ str::stream() << "_configsvrRepairShardedCollectionChunksHistory must be called "
+ "with majority writeConcern, got "
+ << cmdObj,
+ opCtx->getWriteConcern().wMode == WriteConcernOptions::kMajority);
+
+ const NamespaceString nss{parseNs(unusedDbName, cmdObj)};
+
+ auto const catalogClient = Grid::get(opCtx)->catalogClient();
+ auto collection =
+ uassertStatusOK(
+ catalogClient->getCollection(opCtx, nss, repl::ReadConcernLevel::kLocalReadConcern))
+ .value;
+
+ if (cmdObj["force"].booleanSafe()) {
+ LOG(0) << "Resetting the 'historyIsAt40' field for all chunks in collection "
+ << nss.ns() << " in order to force all chunks' history to get recreated";
+
+ BatchedCommandRequest request([&] {
+ write_ops::Update updateOp(ChunkType::ConfigNS);
+ updateOp.setUpdates({[&] {
+ write_ops::UpdateOpEntry entry;
+ entry.setQ(BSON("ns" << nss.ns()));
+ entry.setU(BSON("$unset" << BSON(ChunkType::historyIsAt40() << "")));
+ entry.setUpsert(false);
+ entry.setMulti(true);
+ return entry;
+ }()});
+ return updateOp;
+ }());
+ request.setWriteConcern(ShardingCatalogClient::kLocalWriteConcern.toBSON());
+
+ const auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+ auto response = configShard->runBatchWriteCommand(opCtx,
+ Shard::kDefaultConfigCommandTimeout,
+ request,
+ Shard::RetryPolicy::kIdempotent);
+ uassertStatusOK(response.toStatus());
+
+ uassert(ErrorCodes::Error(5760502),
+ str::stream() << "No chunks found for collection " << nss.ns(),
+ response.getN() > 0);
+ }
+
+ auto validAfter = LogicalClock::get(opCtx)->getClusterTime().asTimestamp();
+
+ ShardingCatalogManager::get(opCtx)->upgradeChunksHistory(
+ opCtx, nss, collection.getEpoch(), validAfter);
+
+ Grid::get(opCtx)->catalogCache()->invalidateShardedCollection(nss);
+
+ return true;
+ }
+
+} configSvrRepairShardedCollectionChunksHistoryCommand;
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager.h b/src/mongo/db/s/config/sharding_catalog_manager.h
index 11131fdfdaf..c0fa607182b 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager.h
+++ b/src/mongo/db/s/config/sharding_catalog_manager.h
@@ -430,6 +430,18 @@ public:
Lock::ExclusiveLock lockZoneMutex(OperationContext* opCtx);
+ //
+ // Upgrade/downgrade
+ //
+
+ /**
+ * Upgrade the chunk metadata to include the history field.
+ */
+ void upgradeChunksHistory(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const OID& collectionEpoch,
+ const Timestamp validAfter);
+
private:
/**
* Performs the necessary checks for version compatibility and creates a new config.version
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
index d821e141506..1856f3dec14 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
@@ -42,8 +42,10 @@
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h"
+#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/s/sharding_logging.h"
#include "mongo/db/s/sharding_runtime_d_params_gen.h"
+#include "mongo/db/write_concern.h"
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/s/catalog/sharding_catalog_client.h"
#include "mongo/s/catalog/type_chunk.h"
@@ -1111,6 +1113,94 @@ StatusWith<ChunkType> ShardingCatalogManager::_findChunkOnConfig(OperationContex
return ChunkType::fromConfigBSON(origChunks.front());
}
+void ShardingCatalogManager::upgradeChunksHistory(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const OID& collectionEpoch,
+ const Timestamp validAfter) {
+ auto const catalogClient = Grid::get(opCtx)->catalogClient();
+ const auto shardRegistry = Grid::get(opCtx)->shardRegistry();
+
+ // Take _kChunkOpLock in exclusive mode to prevent concurrent chunk splits, merges, and
+ // migrations.
+ Lock::ExclusiveLock lk(opCtx->lockState(), _kChunkOpLock);
+
+ auto const configShard = shardRegistry->getConfigShard();
+ const auto chunksVector = [&] {
+ auto findChunksResponse = uassertStatusOK(
+ configShard->exhaustiveFindOnConfig(opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ BSON("ns" << nss.ns()),
+ BSONObj(),
+ boost::none));
+ uassert(ErrorCodes::Error(5760503),
+ str::stream() << "No chunks found for collection " << nss.ns(),
+ !findChunksResponse.docs.empty());
+ return std::move(findChunksResponse.docs);
+ }();
+
+ const auto currentCollectionVersion =
+ uassertStatusOK(_findCollectionVersion(opCtx, nss, collectionEpoch));
+
+ // Bump the major version in order to be guaranteed to trigger refresh on every shard
+ ChunkVersion newCollectionVersion(
+ currentCollectionVersion.majorVersion() + 1, 0, currentCollectionVersion.epoch());
+ std::set<ShardId> changedShardIds;
+ for (const auto& chunk : chunksVector) {
+ auto upgradeChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunk));
+ bool historyIsAt40 = chunk[ChunkType::historyIsAt40()].booleanSafe();
+ if (historyIsAt40) {
+ uassert(
+ ErrorCodes::Error(5760504),
+ str::stream() << "Chunk " << upgradeChunk.getName() << " in collection " << nss.ns()
+ << " indicates that it has been upgraded to version 4.0, but is "
+ "missing the history field. This indicates a corrupted routing "
+ "table and requires a manual intervention to be fixed.",
+ !upgradeChunk.getHistory().empty());
+ continue;
+ }
+
+ upgradeChunk.setVersion(newCollectionVersion);
+ newCollectionVersion.incMinor();
+ changedShardIds.emplace(upgradeChunk.getShard());
+
+ // Construct the fresh history.
+ upgradeChunk.setHistory({ChunkHistory{validAfter, upgradeChunk.getShard()}});
+
+ // Set the 'historyIsAt40' field so that it gets skipped if the command is re-run
+ BSONObjBuilder chunkObjBuilder(upgradeChunk.toConfigBSON());
+ chunkObjBuilder.appendBool(ChunkType::historyIsAt40(), true);
+
+ // Run the update.
+ uassertStatusOK(
+ catalogClient->updateConfigDocument(opCtx,
+ ChunkType::ConfigNS,
+ BSON(ChunkType::name(upgradeChunk.getName())),
+ chunkObjBuilder.obj(),
+ false,
+ ShardingCatalogClient::kLocalWriteConcern));
+ }
+
+ // Wait for the writes to become majority committed so that the subsequent shard refreshes can
+ // see them
+ const auto clientOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
+ WriteConcernResult unusedWCResult;
+ uassertStatusOK(waitForWriteConcern(
+ opCtx, clientOpTime, ShardingCatalogClient::kMajorityWriteConcern, &unusedWCResult));
+
+ for (const auto& shardId : changedShardIds) {
+ auto shard = uassertStatusOK(shardRegistry->getShard(opCtx, shardId));
+ uassertStatusOK(
+ Shard::CommandResponse::getEffectiveStatus(shard->runCommandWithFixedRetryAttempts(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ "admin",
+ BSON("_flushRoutingTableCacheUpdates" << nss.ns()),
+ Shard::RetryPolicy::kIdempotent)));
+ }
+}
+
StatusWith<ChunkVersion> ShardingCatalogManager::_findCollectionVersion(
OperationContext* opCtx, const NamespaceString& nss, const OID& collectionEpoch) {
auto const configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
diff --git a/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp b/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
index 413f6cdf2ea..a5e44e89073 100644
--- a/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
+++ b/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
@@ -55,9 +55,9 @@ namespace {
class FlushRoutingTableCacheUpdatesCmd final
: public TypedCommand<FlushRoutingTableCacheUpdatesCmd> {
public:
- using Request = _flushRoutingTableCacheUpdates;
+ using Request = FlushRoutingTableCacheUpdates;
- // Support deprecated name 'forceRoutingTableRefresh' for backwards compatibility with 3.6.0.
+ // Support deprecated name 'forceRoutingTableRefresh' for backwards compatibility with 4.0
FlushRoutingTableCacheUpdatesCmd()
: TypedCommand<FlushRoutingTableCacheUpdatesCmd>(Request::kCommandName,
"forceRoutingTableRefresh") {}
@@ -104,11 +104,12 @@ public:
uassertStatusOK(shardingState->canAcceptShardedCommands());
uassert(ErrorCodes::IllegalOperation,
- "Can't issue _flushRoutingTableCacheUpdates from 'eval'",
+ str::stream() << "Can't issue " << Request::kCommandName << " from 'eval'",
!opCtx->getClient()->isInDirectClient());
uassert(ErrorCodes::IllegalOperation,
- "Can't call _flushRoutingTableCacheUpdates if in read-only mode",
+ str::stream() << "Can't call " << Request::kCommandName
+ << " if in read-only mode",
!storageGlobalParams.readOnly);
auto& oss = OperationShardingState::get(opCtx);
diff --git a/src/mongo/s/catalog/type_chunk.cpp b/src/mongo/s/catalog/type_chunk.cpp
index a70a2088f51..547d985c387 100644
--- a/src/mongo/s/catalog/type_chunk.cpp
+++ b/src/mongo/s/catalog/type_chunk.cpp
@@ -56,6 +56,7 @@ const BSONField<bool> ChunkType::jumbo("jumbo");
const BSONField<Date_t> ChunkType::lastmod("lastmod");
const BSONField<OID> ChunkType::epoch("lastmodEpoch");
const BSONField<BSONObj> ChunkType::history("history");
+const BSONField<bool> ChunkType::historyIsAt40("historyIsAt40");
namespace {
diff --git a/src/mongo/s/catalog/type_chunk.h b/src/mongo/s/catalog/type_chunk.h
index 72450bad707..9cb03eea92a 100644
--- a/src/mongo/s/catalog/type_chunk.h
+++ b/src/mongo/s/catalog/type_chunk.h
@@ -186,6 +186,7 @@ public:
static const BSONField<Date_t> lastmod;
static const BSONField<OID> epoch;
static const BSONField<BSONObj> history;
+ static const BSONField<bool> historyIsAt40;
ChunkType();
ChunkType(NamespaceString nss, ChunkRange range, ChunkVersion version, ShardId shardId);
diff --git a/src/mongo/s/client/shard.h b/src/mongo/s/client/shard.h
index 44a2c48c43b..faee4157968 100644
--- a/src/mongo/s/client/shard.h
+++ b/src/mongo/s/client/shard.h
@@ -40,11 +40,11 @@
#include "mongo/db/repl/read_concern_args.h"
#include "mongo/executor/remote_command_response.h"
#include "mongo/s/shard_id.h"
+#include "mongo/s/write_ops/batched_command_request.h"
+#include "mongo/s/write_ops/batched_command_response.h"
namespace mongo {
-class BatchedCommandRequest;
-class BatchedCommandResponse;
class OperationContext;
class RemoteCommandTargeter;
diff --git a/src/mongo/s/commands/SConscript b/src/mongo/s/commands/SConscript
index f363155acee..8407084e202 100644
--- a/src/mongo/s/commands/SConscript
+++ b/src/mongo/s/commands/SConscript
@@ -72,6 +72,7 @@ env.Library(
'cluster_profile_cmd.cpp',
'cluster_remove_shard_cmd.cpp',
'cluster_remove_shard_from_zone_cmd.cpp',
+ 'cluster_repair_sharded_collection_chunks_history_cmd.cpp',
'cluster_repl_set_get_status_cmd.cpp',
'cluster_reset_error_cmd.cpp',
'cluster_restart_catalog_command.cpp',
diff --git a/src/mongo/s/commands/cluster_repair_sharded_collection_chunks_history_cmd.cpp b/src/mongo/s/commands/cluster_repair_sharded_collection_chunks_history_cmd.cpp
new file mode 100644
index 00000000000..22f9ccf7525
--- /dev/null
+++ b/src/mongo/s/commands/cluster_repair_sharded_collection_chunks_history_cmd.cpp
@@ -0,0 +1,122 @@
+
+/**
+ * Copyright (C) 2021 MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/audit.h"
+#include "mongo/db/auth/action_set.h"
+#include "mongo/db/auth/action_type.h"
+#include "mongo/db/auth/authorization_manager.h"
+#include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/client.h"
+#include "mongo/db/commands.h"
+#include "mongo/s/grid.h"
+
+namespace mongo {
+namespace {
+
+class RepairShardedCollectionChunksHistoryCommand : public BasicCommand {
+public:
+ RepairShardedCollectionChunksHistoryCommand()
+ : BasicCommand("repairShardedCollectionChunksHistory") {}
+
+ AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
+ return AllowedOnSecondary::kAlways;
+ }
+
+ bool adminOnly() const override {
+ return true;
+ }
+
+ bool supportsWriteConcern(const BSONObj& cmd) const override {
+ return false;
+ }
+
+ std::string help() const override {
+ return "Administrative command to repair the effects of SERVER-62065. If the collection "
+ "has been upgraded through a cluster comprised of binaries which do not contain "
+ "this command, the chunks cache collections on the shards will miss history "
+ "entries. This command will correct that and will mark such collections as "
+ "correctly repaired, so that a subsequent invocation will not cause any changes to "
+ "the routing information. In rare cases where the history entries are missing due "
+ "to corrupted restore, the 'force:true' parameter can be passed which will force "
+ "all history entries to be re-added.";
+ }
+
+ // The command intentionally uses the permission control of split/mergeChunks since it only
+ // modifies the contents of chunk entries and increments the collection/shard versions without
+ // causing any data placement changes
+ Status checkAuthForCommand(Client* client,
+ const std::string& dbname,
+ const BSONObj& cmdObj) const override {
+ if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forExactNamespace(NamespaceString(parseNs(dbname, cmdObj))),
+ ActionType::splitChunk)) {
+ return Status(ErrorCodes::Unauthorized, "Unauthorized");
+ }
+ return Status::OK();
+ }
+
+ std::string parseNs(const std::string& unusedDbName, const BSONObj& cmdObj) const override {
+ return CommandHelpers::parseNsFullyQualified(cmdObj);
+ }
+
+ bool run(OperationContext* opCtx,
+ const std::string& unusedDbName,
+ const BSONObj& cmdObj,
+ BSONObjBuilder& result) override {
+ const NamespaceString nss{parseNs(unusedDbName, cmdObj)};
+
+ BSONObjBuilder cmdBuilder(
+ BSON("_configsvrRepairShardedCollectionChunksHistory" << nss.ns()));
+ if (cmdObj["force"].booleanSafe())
+ cmdBuilder.appendBool("force", true);
+
+ auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+ auto cmdResponse = uassertStatusOK(configShard->runCommandWithFixedRetryAttempts(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ "admin",
+ cmdBuilder.obj(),
+ Shard::RetryPolicy::kIdempotent));
+ uassertStatusOK(cmdResponse.commandStatus);
+
+ // Append any return value from the response, which the config server returned
+ CommandHelpers::filterCommandReplyForPassthrough(cmdResponse.response, &result);
+
+ return true;
+ }
+
+} repairShardedCollectionChunksHistoryCommand;
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/s/request_types/flush_routing_table_cache_updates.idl b/src/mongo/s/request_types/flush_routing_table_cache_updates.idl
index 07981295bac..e5a27a58126 100644
--- a/src/mongo/s/request_types/flush_routing_table_cache_updates.idl
+++ b/src/mongo/s/request_types/flush_routing_table_cache_updates.idl
@@ -26,8 +26,6 @@
# it in the license file.
#
-# _flushRoutingTableCacheUpdates IDL File
-
global:
cpp_namespace: "mongo"
@@ -36,6 +34,7 @@ imports:
commands:
_flushRoutingTableCacheUpdates:
+ cpp_name: FlushRoutingTableCacheUpdates
description: "An internal command to wait for the last routing table cache refresh for a particular namespace to be persisted to disk"
strict: true
namespace: type