From c2c6ed338f617e89600f4a221abc19045431c46e Mon Sep 17 00:00:00 2001 From: Kevin Pulo Date: Thu, 16 May 2019 02:01:11 +0000 Subject: SERVER-36159 Log whenever the gossiped config server opTime term changes --- src/mongo/db/s/sharding_config_optime_gossip.cpp | 10 +++++----- src/mongo/db/s/sharding_config_optime_gossip.h | 6 ++++-- src/mongo/db/s/sharding_state_recovery.cpp | 8 +++++++- src/mongo/db/service_entry_point_common.cpp | 2 +- src/mongo/db/service_entry_point_common.h | 2 +- src/mongo/db/service_entry_point_mongod.cpp | 4 ++-- src/mongo/embedded/service_entry_point_embedded.cpp | 2 +- src/mongo/s/grid.cpp | 21 ++++++++++++++++++++- src/mongo/s/grid.h | 12 +++++++++++- src/mongo/s/sharding_egress_metadata_hook.cpp | 16 ++++++++++------ src/mongo/s/sharding_egress_metadata_hook.h | 4 +++- 11 files changed, 65 insertions(+), 22 deletions(-) (limited to 'src/mongo') diff --git a/src/mongo/db/s/sharding_config_optime_gossip.cpp b/src/mongo/db/s/sharding_config_optime_gossip.cpp index 3fc4fbb57f0..1a9a4980316 100644 --- a/src/mongo/db/s/sharding_config_optime_gossip.cpp +++ b/src/mongo/db/s/sharding_config_optime_gossip.cpp @@ -57,16 +57,16 @@ repl::OpTime ShardingEgressMetadataHookForMongod::_getConfigServerOpTime() { return Grid::get(_serviceContext)->configOpTime(); } -Status ShardingEgressMetadataHookForMongod::_advanceConfigOptimeFromShard( - ShardId shardId, const BSONObj& metadataObj) { +Status ShardingEgressMetadataHookForMongod::_advanceConfigOpTimeFromShard( + OperationContext* opCtx, const ShardId& shardId, const BSONObj& metadataObj) { if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { return Status::OK(); } - return ShardingEgressMetadataHook::_advanceConfigOptimeFromShard(shardId, metadataObj); + return ShardingEgressMetadataHook::_advanceConfigOpTimeFromShard(opCtx, shardId, metadataObj); } -void advanceConfigOptimeFromRequestMetadata(OperationContext* opCtx) { +void advanceConfigOpTimeFromRequestMetadata(OperationContext* opCtx) { auto const shardingState = ShardingState::get(opCtx); if (!shardingState->enabled()) { @@ -84,7 +84,7 @@ void advanceConfigOptimeFromRequestMetadata(OperationContext* opCtx) { ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), ActionType::internal)); - Grid::get(opCtx)->advanceConfigOpTime(*opTime); + Grid::get(opCtx)->advanceConfigOpTime(opCtx, *opTime, "request from"); } } // namespace rpc diff --git a/src/mongo/db/s/sharding_config_optime_gossip.h b/src/mongo/db/s/sharding_config_optime_gossip.h index 73fdfc57b2e..67a4c3e5169 100644 --- a/src/mongo/db/s/sharding_config_optime_gossip.h +++ b/src/mongo/db/s/sharding_config_optime_gossip.h @@ -43,14 +43,16 @@ private: repl::OpTime _getConfigServerOpTime() override; - Status _advanceConfigOptimeFromShard(ShardId shardId, const BSONObj& metadataObj) override; + Status _advanceConfigOpTimeFromShard(OperationContext* opCtx, + const ShardId& shardId, + const BSONObj& metadataObj) override; }; /** * Updates the ShardRegistry's stored notion of the config server optime based on the * ConfigServerMetadata decoration attached to the OperationContext. */ -void advanceConfigOptimeFromRequestMetadata(OperationContext* opCtx); +void advanceConfigOpTimeFromRequestMetadata(OperationContext* opCtx); } // namespace rpc } // namespace mongo diff --git a/src/mongo/db/s/sharding_state_recovery.cpp b/src/mongo/db/s/sharding_state_recovery.cpp index 96fb297896c..6da0799ea1c 100644 --- a/src/mongo/db/s/sharding_state_recovery.cpp +++ b/src/mongo/db/s/sharding_state_recovery.cpp @@ -242,7 +242,13 @@ Status ShardingStateRecovery::recover(OperationContext* opCtx) { if (!recoveryDoc.getMinOpTimeUpdaters()) { // Treat the minOpTime as up-to-date - grid->advanceConfigOpTime(recoveryDoc.getMinOpTime()); + const auto prevOpTime = grid->advanceConfigOpTime( + opCtx, recoveryDoc.getMinOpTime(), "sharding state recovery document"); + if (prevOpTime) { + log() + << "No in flight metadata change operations, so config server optime updated from " + << *prevOpTime << " to " << recoveryDoc.getMinOpTime(); + } return Status::OK(); } diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp index d6e3b71e3b1..498b6e8c2c6 100644 --- a/src/mongo/db/service_entry_point_common.cpp +++ b/src/mongo/db/service_entry_point_common.cpp @@ -778,7 +778,7 @@ void execCommandDatabase(OperationContext* opCtx, uassertStatusOK(shardingState->canAcceptShardedCommands()); } - behaviors.advanceConfigOptimeFromRequestMetadata(opCtx); + behaviors.advanceConfigOpTimeFromRequestMetadata(opCtx); } oss.setAllowImplicitCollectionCreation(allowImplicitCollectionCreationField); diff --git a/src/mongo/db/service_entry_point_common.h b/src/mongo/db/service_entry_point_common.h index 0821944aa27..60fb5f9e2cb 100644 --- a/src/mongo/db/service_entry_point_common.h +++ b/src/mongo/db/service_entry_point_common.h @@ -83,7 +83,7 @@ struct ServiceEntryPointCommon { virtual void handleException(const DBException& e, OperationContext* opCtx) const = 0; - virtual void advanceConfigOptimeFromRequestMetadata(OperationContext* opCtx) const = 0; + virtual void advanceConfigOpTimeFromRequestMetadata(OperationContext* opCtx) const = 0; MONGO_WARN_UNUSED_RESULT_FUNCTION virtual std::unique_ptr scopedOperationCompletionShardingActions(OperationContext* opCtx) const = 0; diff --git a/src/mongo/db/service_entry_point_mongod.cpp b/src/mongo/db/service_entry_point_mongod.cpp index 0269a633e68..ab04138036f 100644 --- a/src/mongo/db/service_entry_point_mongod.cpp +++ b/src/mongo/db/service_entry_point_mongod.cpp @@ -240,9 +240,9 @@ public: } } - void advanceConfigOptimeFromRequestMetadata(OperationContext* opCtx) const override { + void advanceConfigOpTimeFromRequestMetadata(OperationContext* opCtx) const override { // Handle config optime information that may have been sent along with the command. - rpc::advanceConfigOptimeFromRequestMetadata(opCtx); + rpc::advanceConfigOpTimeFromRequestMetadata(opCtx); } std::unique_ptr scopedOperationCompletionShardingActions( diff --git a/src/mongo/embedded/service_entry_point_embedded.cpp b/src/mongo/embedded/service_entry_point_embedded.cpp index 8c19c925ea5..dbf8d03ef4a 100644 --- a/src/mongo/embedded/service_entry_point_embedded.cpp +++ b/src/mongo/embedded/service_entry_point_embedded.cpp @@ -94,7 +94,7 @@ public: void handleException(const DBException& e, OperationContext* opCtx) const override {} - void advanceConfigOptimeFromRequestMetadata(OperationContext* opCtx) const override {} + void advanceConfigOpTimeFromRequestMetadata(OperationContext* opCtx) const override {} std::unique_ptr scopedOperationCompletionShardingActions( OperationContext* opCtx) const override { diff --git a/src/mongo/s/grid.cpp b/src/mongo/s/grid.cpp index 4b99d5552b5..bc9d9abfd23 100644 --- a/src/mongo/s/grid.cpp +++ b/src/mongo/s/grid.cpp @@ -121,13 +121,32 @@ repl::OpTime Grid::configOpTime() const { return _configOpTime; } -void Grid::advanceConfigOpTime(repl::OpTime opTime) { +boost::optional Grid::advanceConfigOpTime(OperationContext* opCtx, + repl::OpTime opTime, + StringData what) { + const auto prevOpTime = _advanceConfigOpTime(opTime); + if (prevOpTime && prevOpTime->getTerm() != opTime.getTerm()) { + std::string clientAddr = "(unknown)"; + if (opCtx && opCtx->getClient()) { + clientAddr = opCtx->getClient()->clientAddress(true); + } + log() << "Received " << what << " " << clientAddr << " indicating config server optime " + "term has increased, previous optime " + << prevOpTime << ", now " << opTime; + } + return prevOpTime; +} + +boost::optional Grid::_advanceConfigOpTime(const repl::OpTime& opTime) { invariant(serverGlobalParams.clusterRole != ClusterRole::ConfigServer); stdx::lock_guard lk(_mutex); if (_configOpTime < opTime) { + repl::OpTime prev = _configOpTime; _configOpTime = opTime; + return prev; } + return boost::none; } void Grid::clearForUnitTests() { diff --git a/src/mongo/s/grid.h b/src/mongo/s/grid.h index a211e4bfcc6..794a93f262f 100644 --- a/src/mongo/s/grid.h +++ b/src/mongo/s/grid.h @@ -153,9 +153,12 @@ public: /** * Called whenever a mongos or shard gets a response from a config server or shard and updates * what we've seen as the last config server optime. + * If the config optime was updated, returns the previous value. * NOTE: This is not valid to call on a config server instance. */ - void advanceConfigOpTime(repl::OpTime opTime); + boost::optional advanceConfigOpTime(OperationContext* opCtx, + repl::OpTime opTime, + StringData what); /** * Clears the grid object so that it can be reused between test executions. This will not @@ -194,6 +197,13 @@ private: // This value is updated any time a shard or mongos talks to a config server or a shard. repl::OpTime _configOpTime; + /** + * Called to update what we've seen as the last config server optime. + * If the config optime was updated, returns the previous value. + * NOTE: This is not valid to call on a config server instance. + */ + boost::optional _advanceConfigOpTime(const repl::OpTime& opTime); + // Deprecated. This is only used on mongos, and once addShard is solely handled by the configs, // it can be deleted. // Can 'localhost' be used in shard addresses? diff --git a/src/mongo/s/sharding_egress_metadata_hook.cpp b/src/mongo/s/sharding_egress_metadata_hook.cpp index 0bafa375784..468fe77bae1 100644 --- a/src/mongo/s/sharding_egress_metadata_hook.cpp +++ b/src/mongo/s/sharding_egress_metadata_hook.cpp @@ -70,13 +70,14 @@ Status ShardingEgressMetadataHook::readReplyMetadata(OperationContext* opCtx, const BSONObj& metadataObj) { try { _saveGLEStats(metadataObj, replySource); - return _advanceConfigOptimeFromShard(replySource.toString(), metadataObj); + return _advanceConfigOpTimeFromShard(opCtx, replySource.toString(), metadataObj); } catch (...) { return exceptionToStatus(); } } -Status ShardingEgressMetadataHook::_advanceConfigOptimeFromShard(ShardId shardId, +Status ShardingEgressMetadataHook::_advanceConfigOpTimeFromShard(OperationContext* opCtx, + const ShardId& shardId, const BSONObj& metadataObj) { auto const grid = Grid::get(_serviceContext); @@ -104,8 +105,8 @@ Status ShardingEgressMetadataHook::_advanceConfigOptimeFromShard(ShardId shardId // due to rollback as explained in SERVER-24630 and the last committed optime // is safe to use. const auto& replMetadata = parseStatus.getValue(); - auto opTime = replMetadata.getLastOpCommitted(); - grid->advanceConfigOpTime(opTime.opTime); + const auto opTime = replMetadata.getLastOpCommitted(); + grid->advanceConfigOpTime(opCtx, opTime.opTime, "reply from config server node"); } } else { // Regular shards return the config opTime as part of ConfigServerMetadata. @@ -115,9 +116,12 @@ Status ShardingEgressMetadataHook::_advanceConfigOptimeFromShard(ShardId shardId } const auto& configMetadata = parseStatus.getValue(); - auto opTime = configMetadata.getOpTime(); + const auto opTime = configMetadata.getOpTime(); if (opTime.is_initialized()) { - grid->advanceConfigOpTime(opTime.get()); + grid->advanceConfigOpTime(opCtx, + opTime.get(), + str::stream() << "reply from shard " << shardId + << " node"); } } return Status::OK(); diff --git a/src/mongo/s/sharding_egress_metadata_hook.h b/src/mongo/s/sharding_egress_metadata_hook.h index f8eb9fe0769..8bdf85adc66 100644 --- a/src/mongo/s/sharding_egress_metadata_hook.h +++ b/src/mongo/s/sharding_egress_metadata_hook.h @@ -71,7 +71,9 @@ protected: * On shards and mongoses this advances the Grid's stored config server optime based on the * metadata in the response object from running a command. */ - virtual Status _advanceConfigOptimeFromShard(ShardId shardId, const BSONObj& metadataObj); + virtual Status _advanceConfigOpTimeFromShard(OperationContext* opCtx, + const ShardId& shardId, + const BSONObj& metadataObj); ServiceContext* const _serviceContext; }; -- cgit v1.2.1