summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRandolph Tan <randolph@mongodb.com>2019-09-13 16:05:48 +0000
committerevergreen <evergreen@mongodb.com>2019-09-13 16:05:48 +0000
commit87d8a588a8f43c580f26f3c3031e507dea6c7e1e (patch)
tree913f715a57f1e488aa919592a0810cf57e0155c6 /src
parent121aa792f301b5edbfbda44048f5bcbd6de830b3 (diff)
downloadmongo-87d8a588a8f43c580f26f3c3031e507dea6c7e1e.tar.gz
SERVER-40258 Relax locking requirements for sharding metadata refresh on shards
(cherry picked from commit 0d07bf5e7a72a5bce3f7d7d681a71d7ecfe7eb8c)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/commands/find_cmd.cpp6
-rw-r--r--src/mongo/db/commands/mr.cpp2
-rw-r--r--src/mongo/db/db_raii.cpp5
-rw-r--r--src/mongo/db/pipeline/pipeline_d.cpp11
-rw-r--r--src/mongo/db/pipeline/process_interface_shardsvr.cpp5
-rw-r--r--src/mongo/db/pipeline/process_interface_standalone.h5
-rw-r--r--src/mongo/db/query/find.cpp5
-rw-r--r--src/mongo/db/query/stage_builder.cpp8
-rw-r--r--src/mongo/db/s/collection_sharding_runtime.cpp3
-rw-r--r--src/mongo/db/s/collection_sharding_state.cpp28
-rw-r--r--src/mongo/db/s/collection_sharding_state.h12
-rw-r--r--src/mongo/db/s/op_observer_sharding_impl.cpp7
-rw-r--r--src/mongo/db/s/shard_filtering_metadata_refresh.cpp9
13 files changed, 55 insertions, 51 deletions
diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp
index abee0523f06..88705768bcc 100644
--- a/src/mongo/db/commands/find_cmd.cpp
+++ b/src/mongo/db/commands/find_cmd.cpp
@@ -49,7 +49,6 @@
#include "mongo/db/query/find_common.h"
#include "mongo/db/query/get_executor.h"
#include "mongo/db/repl/replication_coordinator.h"
-#include "mongo/db/s/collection_sharding_state.h"
#include "mongo/db/service_context.h"
#include "mongo/db/stats/counters.h"
#include "mongo/db/stats/server_read_concern_metrics.h"
@@ -488,11 +487,6 @@ public:
uassertStatusOK(status.withContext("Executor error during find command"));
}
- // Before saving the cursor, ensure that whatever plan we established happened with the
- // expected collection version
- auto css = CollectionShardingState::get(opCtx, nss);
- css->checkShardVersionOrThrow(opCtx);
-
// Set up the cursor for getMore.
CursorId cursorId = 0;
if (shouldSaveCursor(opCtx, collection, state, exec.get())) {
diff --git a/src/mongo/db/commands/mr.cpp b/src/mongo/db/commands/mr.cpp
index 1d7400a634b..9367b4180d6 100644
--- a/src/mongo/db/commands/mr.cpp
+++ b/src/mongo/db/commands/mr.cpp
@@ -1469,7 +1469,7 @@ public:
const auto metadata = [&] {
AutoGetCollectionForReadCommand autoColl(opCtx, config.nss);
- return CollectionShardingState::get(opCtx, config.nss)->getCurrentMetadata();
+ return CollectionShardingState::get(opCtx, config.nss)->getOrphansFilter(opCtx);
}();
bool shouldHaveData = false;
diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp
index 079a1f32f3f..c851fcfcc9c 100644
--- a/src/mongo/db/db_raii.cpp
+++ b/src/mongo/db/db_raii.cpp
@@ -317,8 +317,9 @@ AutoGetCollectionForReadCommand::AutoGetCollectionForReadCommand(
: kDoNotChangeProfilingLevel,
deadline) {
if (!_autoCollForRead.getView()) {
- // We have both the DB and collection locked, which is the prerequisite to do a stable shard
- // version check, but we'd like to do the check after we have a satisfactory snapshot.
+ // Perform the check early so the query planner would be able to extract the correct
+ // shard key. Also make sure that version is compatible if query planner decides to
+ // use an empty plan.
auto css = CollectionShardingState::get(opCtx, _autoCollForRead.getNss());
css->checkShardVersionOrThrow(opCtx);
}
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index 96047148104..4a4f4b7ea14 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -126,16 +126,13 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> createRandomCursorEx
// If the incoming operation is sharded, use the CSS to infer the filtering metadata for the
// collection, otherwise treat it as unsharded
- boost::optional<ScopedCollectionMetadata> shardMetadata =
- (OperationShardingState::isOperationVersioned(opCtx)
- ? CollectionShardingState::get(opCtx, coll->ns())->getOrphansFilter(opCtx)
- : boost::optional<ScopedCollectionMetadata>{});
+ auto shardMetadata = CollectionShardingState::get(opCtx, coll->ns())->getOrphansFilter(opCtx);
// Because 'numRecords' includes orphan documents, our initial decision to optimize the $sample
// cursor may have been mistaken. For sharded collections, build a TRIAL plan that will switch
// to a collection scan if the ratio of orphaned to owned documents encountered over the first
// 100 works() is such that we would have chosen not to optimize.
- if (shardMetadata && (*shardMetadata)->isSharded()) {
+ if (shardMetadata->isSharded()) {
// The ratio of owned to orphaned documents must be at least equal to the ratio between the
// requested sampleSize and the maximum permitted sampleSize for the original constraints to
// be satisfied. For instance, if there are 200 documents and the sampleSize is 5, then at
@@ -146,12 +143,12 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> createRandomCursorEx
sampleSize / (numRecords * kMaxSampleRatioForRandCursor), kMaxSampleRatioForRandCursor);
// The trial plan is SHARDING_FILTER-MULTI_ITERATOR.
auto randomCursorPlan =
- std::make_unique<ShardFilterStage>(opCtx, *shardMetadata, ws.get(), root.release());
+ std::make_unique<ShardFilterStage>(opCtx, shardMetadata, ws.get(), root.release());
// The backup plan is SHARDING_FILTER-COLLSCAN.
std::unique_ptr<PlanStage> collScanPlan = std::make_unique<CollectionScan>(
opCtx, coll, CollectionScanParams{}, ws.get(), nullptr);
collScanPlan = std::make_unique<ShardFilterStage>(
- opCtx, *shardMetadata, ws.get(), collScanPlan.release());
+ opCtx, shardMetadata, ws.get(), collScanPlan.release());
// Place a TRIAL stage at the root of the plan tree, and pass it the trial and backup plans.
root = std::make_unique<TrialStage>(opCtx,
ws.get(),
diff --git a/src/mongo/db/pipeline/process_interface_shardsvr.cpp b/src/mongo/db/pipeline/process_interface_shardsvr.cpp
index c2672dcf2a0..74508c4a10e 100644
--- a/src/mongo/db/pipeline/process_interface_shardsvr.cpp
+++ b/src/mongo/db/pipeline/process_interface_shardsvr.cpp
@@ -206,8 +206,9 @@ unique_ptr<Pipeline, PipelineDeleter> MongoInterfaceShardServer::attachCursorSou
std::unique_ptr<ShardFilterer> MongoInterfaceShardServer::getShardFilterer(
const boost::intrusive_ptr<ExpressionContext>& expCtx) const {
- return std::make_unique<ShardFiltererImpl>(
- CollectionShardingState::get(expCtx->opCtx, expCtx->ns)->getOrphansFilter(expCtx->opCtx));
+ auto shardingMetadata =
+ CollectionShardingState::get(expCtx->opCtx, expCtx->ns)->getOrphansFilter(expCtx->opCtx);
+ return std::make_unique<ShardFiltererImpl>(std::move(shardingMetadata));
}
} // namespace mongo
diff --git a/src/mongo/db/pipeline/process_interface_standalone.h b/src/mongo/db/pipeline/process_interface_standalone.h
index 8b9df872966..ecdb5a2c0d1 100644
--- a/src/mongo/db/pipeline/process_interface_standalone.h
+++ b/src/mongo/db/pipeline/process_interface_standalone.h
@@ -57,6 +57,11 @@ public:
DBClientBase* directClient() final;
std::unique_ptr<TransactionHistoryIteratorBase> createTransactionHistoryIterator(
repl::OpTime time) const final;
+
+ /**
+ * Note: Information returned can be stale. Caller should always attach shardVersion when
+ * sending request against nss based on this information.
+ */
bool isSharded(OperationContext* opCtx, const NamespaceString& nss) final;
Status insert(const boost::intrusive_ptr<ExpressionContext>& expCtx,
const NamespaceString& ns,
diff --git a/src/mongo/db/query/find.cpp b/src/mongo/db/query/find.cpp
index 9fc2b14cd22..d01fd699726 100644
--- a/src/mongo/db/query/find.cpp
+++ b/src/mongo/db/query/find.cpp
@@ -709,11 +709,6 @@ std::string runQuery(OperationContext* opCtx,
MONGO_UNREACHABLE;
}
- // Before saving the cursor, ensure that whatever plan we established happened with the expected
- // collection version
- auto css = CollectionShardingState::get(opCtx, nss);
- css->checkShardVersionOrThrow(opCtx);
-
// Fill out CurOp based on query results. If we have a cursorid, we will fill out CurOp with
// this cursorid later.
long long ccId = 0;
diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp
index 02056023010..0d7d5099b53 100644
--- a/src/mongo/db/query/stage_builder.cpp
+++ b/src/mongo/db/query/stage_builder.cpp
@@ -306,11 +306,9 @@ PlanStage* buildStages(OperationContext* opCtx,
if (nullptr == childStage) {
return nullptr;
}
- return new ShardFilterStage(
- opCtx,
- CollectionShardingState::get(opCtx, collection->ns())->getOrphansFilter(opCtx),
- ws,
- childStage);
+
+ auto css = CollectionShardingState::get(opCtx, collection->ns());
+ return new ShardFilterStage(opCtx, css->getOrphansFilter(opCtx), ws, childStage);
}
case STAGE_DISTINCT_SCAN: {
const DistinctNode* dn = static_cast<const DistinctNode*>(root);
diff --git a/src/mongo/db/s/collection_sharding_runtime.cpp b/src/mongo/db/s/collection_sharding_runtime.cpp
index 1fc98f41876..dcd52ea2ebc 100644
--- a/src/mongo/db/s/collection_sharding_runtime.cpp
+++ b/src/mongo/db/s/collection_sharding_runtime.cpp
@@ -96,7 +96,8 @@ void CollectionShardingRuntime::setFilteringMetadata(OperationContext* opCtx,
CollectionMetadata newMetadata) {
invariant(!newMetadata.isSharded() || !isNamespaceAlwaysUnsharded(_nss),
str::stream() << "Namespace " << _nss.ns() << " must never be sharded.");
- invariant(opCtx->lockState()->isCollectionLockedForMode(_nss, MODE_X));
+
+ auto csrLock = CollectionShardingState::CSRLock::lockExclusive(opCtx, this);
_metadataManager->setFilteringMetadata(std::move(newMetadata));
}
diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp
index e0dd987600d..feb519090e3 100644
--- a/src/mongo/db/s/collection_sharding_state.cpp
+++ b/src/mongo/db/s/collection_sharding_state.cpp
@@ -160,12 +160,8 @@ void CollectionShardingState::report(OperationContext* opCtx, BSONObjBuilder* bu
}
ScopedCollectionMetadata CollectionShardingState::getOrphansFilter(OperationContext* opCtx) {
- const auto receivedShardVersion = getOperationReceivedVersion(opCtx, _nss);
- if (!receivedShardVersion)
- return {kUnshardedCollection};
-
const auto atClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime();
- auto optMetadata = _getMetadata(atClusterTime);
+ auto optMetadata = _getMetadataWithVersionCheckAt(opCtx, atClusterTime);
if (!optMetadata)
return {kUnshardedCollection};
@@ -199,26 +195,34 @@ boost::optional<ChunkVersion> CollectionShardingState::getCurrentShardVersionIfK
}
void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) {
+ (void)_getMetadataWithVersionCheckAt(opCtx, boost::none);
+}
+
+boost::optional<ScopedCollectionMetadata> CollectionShardingState::_getMetadataWithVersionCheckAt(
+ OperationContext* opCtx, const boost::optional<mongo::LogicalTime>& atClusterTime) {
const auto optReceivedShardVersion = getOperationReceivedVersion(opCtx, _nss);
if (!optReceivedShardVersion)
- return;
+ return ScopedCollectionMetadata(kUnshardedCollection);
const auto& receivedShardVersion = *optReceivedShardVersion;
if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) {
- return;
+ return boost::none;
}
// An operation with read concern 'available' should never have shardVersion set.
invariant(repl::ReadConcernArgs::get(opCtx).getLevel() !=
repl::ReadConcernLevel::kAvailableReadConcern);
- const auto metadata = getCurrentMetadata();
- const auto wantedShardVersion =
- metadata->isSharded() ? metadata->getShardVersion() : ChunkVersion::UNSHARDED();
+ auto csrLock = CSRLock::lockShared(opCtx, this);
+
+ auto metadata = _getMetadata(atClusterTime);
+ auto wantedShardVersion = ChunkVersion::UNSHARDED();
+ if (metadata && (*metadata)->isSharded()) {
+ wantedShardVersion = (*metadata)->getShardVersion();
+ }
auto criticalSectionSignal = [&] {
- auto csrLock = CSRLock::lockShared(opCtx, this);
return _critSec.getSignal(opCtx->lockState()->isWriteLocked()
? ShardingMigrationCriticalSection::kWrite
: ShardingMigrationCriticalSection::kRead);
@@ -235,7 +239,7 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx)
}
if (receivedShardVersion.isWriteCompatibleWith(wantedShardVersion)) {
- return;
+ return metadata;
}
//
diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h
index d62b010bec6..906c366b8fb 100644
--- a/src/mongo/db/s/collection_sharding_state.h
+++ b/src/mongo/db/s/collection_sharding_state.h
@@ -91,8 +91,9 @@ public:
* metadata object.
*
* The intended users of this method are callers which need to perform orphan filtering. Use
- * 'getCurrentMetadata' for all other cases, where just sharding-related properties of the
- * collection are necessary (e.g., isSharded or the shard key).
+ * 'getCurrentMetadata' for other cases, like obtaining information about sharding-related
+ * properties of the collection are necessary that won't change under collection IX/IS lock
+ * (e.g., isSharded or the shard key).
*
* The returned object is safe to access even after the collection lock has been dropped.
*/
@@ -155,6 +156,13 @@ protected:
private:
friend CSRLock;
+ /**
+ * Returns the latest version of collection metadata with filtering configured for
+ * atClusterTime if specified.
+ */
+ boost::optional<ScopedCollectionMetadata> _getMetadataWithVersionCheckAt(
+ OperationContext* opCtx, const boost::optional<mongo::LogicalTime>& atClusterTime);
+
// Object-wide ResourceMutex to protect changes to the CollectionShardingRuntime or objects
// held within. Use only the CollectionShardingRuntimeLock to lock this mutex.
Lock::ResourceMutex _stateChangeMutex;
diff --git a/src/mongo/db/s/op_observer_sharding_impl.cpp b/src/mongo/db/s/op_observer_sharding_impl.cpp
index 1ac9e743879..ec0fcb370e9 100644
--- a/src/mongo/db/s/op_observer_sharding_impl.cpp
+++ b/src/mongo/db/s/op_observer_sharding_impl.cpp
@@ -42,9 +42,10 @@ namespace {
const auto getIsMigrating = OperationContext::declareDecoration<bool>();
/**
- * Write operations do shard version checking, but do not perform orphan document filtering. Because
- * of this, if an update operation runs as part of a 'readConcern:snapshot' transaction, it might
- * get routed to a shard which no longer owns the chunk being written to. In such cases, throw a
+ * Write operations do shard version checking, but if an update operation runs as part of a
+ * 'readConcern:snapshot' transaction, the router could have used the metadata at the snapshot
+ * time and yet set the latest shard version on the request. This is why the write can get routed
+ * to a shard which no longer owns the chunk being written to. In such cases, throw a
* MigrationConflict exception to indicate that the transaction needs to be rolled-back and
* restarted.
*/
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
index bd55c8d6b9e..2d8b2359378 100644
--- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
+++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
@@ -157,7 +157,7 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
// No chunk manager, so unsharded. Avoid using AutoGetCollection() as it returns the
// InvalidViewDefinition error code if an invalid view is in the 'system.views' collection.
AutoGetDb autoDb(opCtx, nss.db(), MODE_IX);
- Lock::CollectionLock collLock(opCtx, nss, MODE_X);
+ Lock::CollectionLock collLock(opCtx, nss, MODE_IX);
CollectionShardingRuntime::get(opCtx, nss)
->setFilteringMetadata(opCtx, CollectionMetadata());
@@ -186,11 +186,10 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
}
}
- // Exclusive collection lock needed since we're now changing the metadata. Avoid using
- // AutoGetCollection() as it returns the InvalidViewDefinition error code if an invalid view is
- // in the 'system.views' collection.
+ // Avoid using AutoGetCollection() as it returns the InvalidViewDefinition error code if an
+ // invalid view is in the 'system.views' collection.
AutoGetDb autoDb(opCtx, nss.db(), MODE_IX);
- Lock::CollectionLock collLock(opCtx, nss, MODE_X);
+ Lock::CollectionLock collLock(opCtx, nss, MODE_IX);
auto* const css = CollectionShardingRuntime::get(opCtx, nss);
{