From 5a1dbe698bf88ef3f0ce2348c705d6a042c32011 Mon Sep 17 00:00:00 2001 From: Josef Ahmad Date: Thu, 13 Jan 2022 11:21:43 +0000 Subject: SERVER-61709 Introduce implicitly replicated namespaces Implicitly replicated namespaces are internal namespaces that do not replicate writes, with the exception of deletions, user-initiated direct writes and some maintenance operations. This patch lists config.system.preimages, config.images_collection, config.transactions and config.changes.* as implicitly replicated namespaces, and unifies and validates their semantics. It also special-cases some of the config.transactions replication behaviour that is too specific to be generalised. --- .../set_feature_compatibility_version_command.cpp | 3 ++- src/mongo/db/exec/update_stage.cpp | 12 ++++++++++++ src/mongo/db/exec/upsert_stage.cpp | 9 +++++++++ src/mongo/db/namespace_string.cpp | 16 ++++++++++++++++ src/mongo/db/namespace_string.h | 17 +++++++++++++++++ src/mongo/db/op_observer_impl.cpp | 1 - src/mongo/db/op_observer_impl_test.cpp | 1 + .../db/pipeline/change_stream_pre_image_helpers.cpp | 1 - src/mongo/db/repl/dbcheck.cpp | 3 +-- src/mongo/db/repl/oplog.cpp | 1 - src/mongo/db/session_catalog.h | 2 ++ 11 files changed, 60 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp index d7d6d9f6d8e..a1aa4f78199 100644 --- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp +++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp @@ -711,7 +711,8 @@ private: // Due to the possibility that the shell or drivers have implicit sessions enabled, we // cannot write to the config.transactions collection while we're in a session. So we // construct a temporary client to as a work around. - auto newClient = opCtx->getServiceContext()->makeClient("InternalSessionsCleanup"); + auto newClient = opCtx->getServiceContext()->makeClient( + SessionCatalog::kInternalSessionsCleanupClient.toString()); { stdx::lock_guard lk(*newClient.get()); diff --git a/src/mongo/db/exec/update_stage.cpp b/src/mongo/db/exec/update_stage.cpp index bda0a81078b..d6e53a345ef 100644 --- a/src/mongo/db/exec/update_stage.cpp +++ b/src/mongo/db/exec/update_stage.cpp @@ -52,6 +52,7 @@ #include "mongo/db/s/sharding_state.h" #include "mongo/db/s/sharding_write_router.h" #include "mongo/db/service_context.h" +#include "mongo/db/session_catalog.h" #include "mongo/db/storage/duplicate_key_error_info.h" #include "mongo/db/update/path_support.h" #include "mongo/db/update/storage_validation.h" @@ -372,6 +373,17 @@ PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) { return PlanStage::IS_EOF; } + boost::optional unReplBlock; + const auto isSessionCleanupClient = + opCtx()->getClient()->desc() == SessionCatalog::kInternalSessionsCleanupClient; + if (collection()->ns().isImplicitlyReplicated() && !_isUserInitiatedWrite && + !isSessionCleanupClient) { + // Implictly replicated collections do not replicate updates. + // However, user-initiated writes and some background maintenance tasks are allowed + // to replicate as they cannot be derived from the oplog. + unReplBlock.emplace(opCtx()); + } + // It is possible that after an update was applied, a WriteConflictException // occurred and prevented us from returning ADVANCED with the requested version // of the document. diff --git a/src/mongo/db/exec/upsert_stage.cpp b/src/mongo/db/exec/upsert_stage.cpp index a6f6bf73d04..f1b684a544d 100644 --- a/src/mongo/db/exec/upsert_stage.cpp +++ b/src/mongo/db/exec/upsert_stage.cpp @@ -35,6 +35,7 @@ #include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/query/query_feature_flags_gen.h" #include "mongo/db/s/operation_sharding_state.h" +#include "mongo/db/session_catalog.h" #include "mongo/db/update/storage_validation.h" #include "mongo/s/would_change_owning_shard_exception.h" @@ -72,6 +73,14 @@ PlanStage::StageState UpsertStage::doWork(WorkingSetID* out) { return StageState::IS_EOF; } + boost::optional unReplBlock; + const auto isSessionCleanupClient = + opCtx()->getClient()->desc() == SessionCatalog::kInternalSessionsCleanupClient; + if (collection()->ns().isImplicitlyReplicated() && !isSessionCleanupClient) { + // Implictly replicated collections do not replicate updates. + unReplBlock.emplace(opCtx()); + } + // First, attempt to perform the update on a matching document. auto updateState = UpdateStage::doWork(out); diff --git a/src/mongo/db/namespace_string.cpp b/src/mongo/db/namespace_string.cpp index 631177c4d63..f1e258ba2ce 100644 --- a/src/mongo/db/namespace_string.cpp +++ b/src/mongo/db/namespace_string.cpp @@ -347,6 +347,10 @@ bool NamespaceString::isConfigImagesCollection() const { return ns() == kConfigImagesNamespace.ns(); } +bool NamespaceString::isConfigTransactionsCollection() const { + return ns() == kSessionTransactionsTableNamespace.ns(); +} + NamespaceString NamespaceString::makeTimeseriesBucketsNamespace() const { return {db(), kTimeseriesBucketsCollectionPrefix.toString() + coll()}; } @@ -356,6 +360,18 @@ NamespaceString NamespaceString::getTimeseriesViewNamespace() const { return {db(), coll().substr(kTimeseriesBucketsCollectionPrefix.size())}; } +bool NamespaceString::isImplicitlyReplicated() const { + if (isChangeStreamPreImagesCollection() || isConfigImagesCollection() || + isConfigTransactionsCollection() || isChangeCollection()) { + // Implicitly replicated namespaces are replicated, although they only replicate a subset of + // writes. + invariant(isReplicated()); + return true; + } + + return false; +} + bool NamespaceString::isReplicated() const { if (isLocal()) { return false; diff --git a/src/mongo/db/namespace_string.h b/src/mongo/db/namespace_string.h index 17ea8855bc7..339c62ce23f 100644 --- a/src/mongo/db/namespace_string.h +++ b/src/mongo/db/namespace_string.h @@ -307,6 +307,9 @@ public: bool isSystemDotProfile() const { return coll() == "system.profile"; } + bool isChangeCollection() const { + return (db() == kConfigDb) && coll().startsWith("changes."); + } bool isSystemDotViews() const { return coll() == kSystemDotViewsCollectionName; } @@ -383,6 +386,11 @@ public: */ bool isConfigImagesCollection() const; + /** + * Returns whether the specified namespace is config.transactions. + */ + bool isConfigTransactionsCollection() const; + /** * Returns the time-series buckets namespace for this view. */ @@ -393,6 +401,15 @@ public: */ NamespaceString getTimeseriesViewNamespace() const; + /** + * Returns whether the namespace is implicitly replicated, based only on its string value. + * + * An implicitly replicated namespace is an internal namespace which does not replicate writes + * via the oplog, with the exception of deletions. Writes are not replicated as an optimization + * because their content can be reliably derived from entries in the oplog. + */ + bool isImplicitlyReplicated() const; + /** * Returns whether a namespace is replicated, based only on its string value. One notable * omission is that map reduce `tmp.mr` collections may or may not be replicated. Callers must diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp index 398c577df06..51bf478673f 100644 --- a/src/mongo/db/op_observer_impl.cpp +++ b/src/mongo/db/op_observer_impl.cpp @@ -297,7 +297,6 @@ void writeToImageCollection(OperationContext* opCtx, imageEntry.setImageKind(imageKind); imageEntry.setImage(dataImage); - repl::UnreplicatedWritesBlock unreplicated(opCtx); DisableDocumentValidation documentValidationDisabler( opCtx, DocumentValidationSettings::kDisableInternalValidation); diff --git a/src/mongo/db/op_observer_impl_test.cpp b/src/mongo/db/op_observer_impl_test.cpp index 373d979feab..d0273e162ea 100644 --- a/src/mongo/db/op_observer_impl_test.cpp +++ b/src/mongo/db/op_observer_impl_test.cpp @@ -187,6 +187,7 @@ public: reset(opCtx, NamespaceString::kRsOplogNamespace); reset(opCtx, NamespaceString::kSessionTransactionsTableNamespace); reset(opCtx, NamespaceString::kConfigImagesNamespace); + reset(opCtx, NamespaceString::kChangeStreamPreImagesNamespace); } protected: diff --git a/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp b/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp index 01335139a8d..68e2e6949f8 100644 --- a/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp +++ b/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp @@ -48,7 +48,6 @@ void writeToChangeStreamPreImagesCollection(OperationContext* opCtx, // This lock acquisition can block on a stronger lock held by another operation modifying the // pre-images collection. There are no known cases where an operation holding an exclusive lock // on the pre-images collection also waits for oplog visibility. - repl::UnreplicatedWritesBlock unreplicated(opCtx); AllowLockAcquisitionOnTimestampedUnitOfWork allowLockAcquisition(opCtx->lockState()); AutoGetCollection preimagesCollectionRaii(opCtx, collectionNamespace, LockMode::MODE_IX); UpdateResult res = Helpers::upsert(opCtx, collectionNamespace.toString(), preImage.toBSON()); diff --git a/src/mongo/db/repl/dbcheck.cpp b/src/mongo/db/repl/dbcheck.cpp index 79a98205972..cb909bbd08e 100644 --- a/src/mongo/db/repl/dbcheck.cpp +++ b/src/mongo/db/repl/dbcheck.cpp @@ -206,8 +206,7 @@ std::unique_ptr dbCheckBatchEntry( } // Implcitily replicated collections and capped collections not replicating truncation are // not designed to be consistent, so inconsistency is not necessarily pathological. - if (nss.isChangeStreamPreImagesCollection() || nss.isConfigImagesCollection() || - (options && options->capped)) { + if (nss.isImplicitlyReplicated() || (options && options->capped)) { return SeverityEnum::Warning; } diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index ea9b83dc41b..ec5933b830d 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -297,7 +297,6 @@ void writeToImageCollection(OperationContext* opCtx, request.setFromOplogApplication(true); try { // This code path can also be hit by things such as `applyOps` and tenant migrations. - repl::UnreplicatedWritesBlock dontReplicate(opCtx); ::mongo::update(opCtx, autoColl.getDb(), request); } catch (const ExceptionFor&) { // We can get a duplicate key when two upserts race on inserting a document. diff --git a/src/mongo/db/session_catalog.h b/src/mongo/db/session_catalog.h index 7bed6ae91a3..bcf696042b7 100644 --- a/src/mongo/db/session_catalog.h +++ b/src/mongo/db/session_catalog.h @@ -59,6 +59,8 @@ class SessionCatalog { friend class OperationContextSession; public: + static constexpr StringData kInternalSessionsCleanupClient = "InternalSessionsCleanup"_sd; + class ScopedCheckedOutSession; class SessionToKill; -- cgit v1.2.1