From 43c2b5b172cf6783319944c0d6931478db01eefa Mon Sep 17 00:00:00 2001 From: Jonathan Reams Date: Mon, 10 Feb 2020 10:11:25 +0100 Subject: SERVER-45805 Add recordPreImages flag to collMod and create commands create mode 100644 jstests/noPassthrough/record_preimage_startup_validation.js --- src/mongo/db/catalog/coll_mod.cpp | 12 ++++ src/mongo/db/catalog/collection.h | 4 ++ src/mongo/db/catalog/collection_impl.cpp | 68 ++++++++++++++++++++-- src/mongo/db/catalog/collection_impl.h | 5 ++ src/mongo/db/catalog/collection_mock.h | 8 +++ src/mongo/db/catalog/collection_options.cpp | 10 ++++ src/mongo/db/catalog/collection_options.h | 1 + src/mongo/db/commands/create.idl | 5 ++ src/mongo/db/repair_database_and_check_version.cpp | 7 +++ src/mongo/db/storage/durable_catalog.h | 5 ++ src/mongo/db/storage/durable_catalog_impl.cpp | 6 ++ src/mongo/db/storage/durable_catalog_impl.h | 2 + src/mongo/db/transaction_participant.cpp | 12 +++- 13 files changed, 138 insertions(+), 7 deletions(-) (limited to 'src/mongo/db') diff --git a/src/mongo/db/catalog/coll_mod.cpp b/src/mongo/db/catalog/coll_mod.cpp index 49a666c9c67..5ad39981d44 100644 --- a/src/mongo/db/catalog/coll_mod.cpp +++ b/src/mongo/db/catalog/coll_mod.cpp @@ -74,6 +74,7 @@ struct CollModRequest { BSONElement collValidator = {}; std::string collValidationAction = {}; std::string collValidationLevel = {}; + bool recordPreImages = false; }; StatusWith parseCollModRequest(OperationContext* opCtx, @@ -222,6 +223,13 @@ StatusWith parseCollModRequest(OperationContext* opCtx, return Status(ErrorCodes::InvalidOptions, "'viewOn' option must be a string"); } cmr.viewOn = e.str(); + } else if (fieldName == "recordPreImages") { + if (isView) { + return {ErrorCodes::InvalidOptions, + str::stream() << "option not supported on a view: " << fieldName}; + } + + cmr.recordPreImages = e.trueValue(); } else { if (isView) { return Status(ErrorCodes::InvalidOptions, @@ -396,6 +404,10 @@ Status _collModInternal(OperationContext* opCtx, if (!cmrNew.collValidationLevel.empty()) invariant(coll->setValidationLevel(opCtx, cmrNew.collValidationLevel)); + if (cmrNew.recordPreImages != oldCollOptions.recordPreImages) { + coll->setRecordPreImages(opCtx, cmrNew.recordPreImages); + } + // Only observe non-view collMods, as view operations are observed as operations on the // system.views collection. getGlobalServiceContext()->getOpObserver()->onCollMod( diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h index 5cac53c94b3..e11a790382c 100644 --- a/src/mongo/db/catalog/collection.h +++ b/src/mongo/db/catalog/collection.h @@ -93,6 +93,7 @@ struct CollectionUpdateArgs { bool fromMigrate = false; StoreDocOption storeDocOption = StoreDocOption::None; + bool preImageRecordingEnabledForCollection = false; }; /** @@ -383,6 +384,9 @@ public: StringData newLevel, StringData newAction) = 0; + virtual bool getRecordPreImages() const = 0; + virtual void setRecordPreImages(OperationContext* opCtx, bool val) = 0; + /** * Returns true if this is a temporary collection. * diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp index 553af36eaf5..9d845cb28da 100644 --- a/src/mongo/db/catalog/collection_impl.cpp +++ b/src/mongo/db/catalog/collection_impl.cpp @@ -211,6 +211,35 @@ Status checkValidatorCanBeUsedOnNs(const BSONObj& validator, return Status::OK(); } +Status validatePreImageRecording(OperationContext* opCtx, const NamespaceString& ns) { + if (ns.db() == NamespaceString::kAdminDb || ns.db() == NamespaceString::kLocalDb) { + return {ErrorCodes::InvalidOptions, + str::stream() << "recordPreImages collection option is not supported on the " + << ns.db() << " database"}; + } + + if (!serverGlobalParams.featureCompatibility.isVersionInitialized() || + serverGlobalParams.featureCompatibility.getVersion() != + ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo44) { + return {ErrorCodes::InvalidOptions, + "recordPreImages collection option is only supported when the feature " + "compatibility version is set to 4.4 or above"}; + } + + if (serverGlobalParams.clusterRole != ClusterRole::None) { + return {ErrorCodes::InvalidOptions, + "recordPreImages collection option is not supported on shards or config servers"}; + } + + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + if (!replCoord->isReplEnabled()) { + return {ErrorCodes::InvalidOptions, + "recordPreImages collection option depends on being in a replica set"}; + } + + return Status::OK(); +} + } // namespace CollectionImpl::CollectionImpl(OperationContext* opCtx, @@ -273,6 +302,11 @@ void CollectionImpl::init(OperationContext* opCtx) { } _validationAction = uassertStatusOK(_parseValidationAction(collectionOptions.validationAction)); _validationLevel = uassertStatusOK(_parseValidationLevel(collectionOptions.validationLevel)); + if (collectionOptions.recordPreImages) { + uassertStatusOK(validatePreImageRecording(opCtx, _ns)); + _recordPreImages = true; + } + getIndexCatalog()->init(opCtx).transitional_ignore(); _initialized = true; } @@ -643,7 +677,8 @@ void CollectionImpl::deleteDocument(OperationContext* opCtx, getGlobalServiceContext()->getOpObserver()->aboutToDelete(opCtx, ns(), doc.value()); boost::optional deletedDoc; - if (storeDeletedDoc == Collection::StoreDeletedDoc::On) { + if ((storeDeletedDoc == Collection::StoreDeletedDoc::On && opCtx->getTxnNumber()) || + getRecordPreImages()) { deletedDoc.emplace(doc.value().getOwned()); } @@ -716,7 +751,13 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx, str::stream() << "Cannot change the size of a document in a capped collection: " << oldSize << " != " << newDoc.objsize()); - args->preImageDoc = oldDoc.value().getOwned(); + // The preImageDoc may not be boost::none if this update was a retryable findAndModify or if + // the update may have changed the shard key. For non-in-place updates we always set the + // preImageDoc here to an owned copy of the pre-image. + if (!args->preImageDoc) { + args->preImageDoc = oldDoc.value().getOwned(); + } + args->preImageRecordingEnabledForCollection = getRecordPreImages(); uassertStatusOK( _recordStore->updateRecord(opCtx, oldLocation, newDoc.objdata(), newDoc.objsize())); @@ -725,7 +766,7 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx, int64_t keysInserted, keysDeleted; uassertStatusOK(_indexCatalog->updateRecord( - opCtx, args->preImageDoc.get(), newDoc, oldLocation, &keysInserted, &keysDeleted)); + opCtx, *args->preImageDoc, newDoc, oldLocation, &keysInserted, &keysDeleted)); if (opDebug) { opDebug->additiveMetrics.incrementKeysInserted(keysInserted); @@ -760,12 +801,19 @@ StatusWith CollectionImpl::updateDocumentWithDamages( invariant(oldRec.snapshotId() == opCtx->recoveryUnit()->getSnapshotId()); invariant(updateWithDamagesSupported()); + // For in-place updates we need to grab an owned copy of the pre-image doc if pre-image + // recording is enabled and we haven't already set the pre-image due to this update being + // a retryable findAndModify or a possible update to the shard key. + if (!args->preImageDoc && getRecordPreImages()) { + args->preImageDoc = oldRec.value().toBson().getOwned(); + } + auto newRecStatus = _recordStore->updateWithDamages(opCtx, loc, oldRec.value(), damageSource, damages); if (newRecStatus.isOK()) { args->updatedDoc = newRecStatus.getValue().toBson(); - + args->preImageRecordingEnabledForCollection = getRecordPreImages(); OplogUpdateEntryArgs entryArgs(*args, ns(), _uuid); getGlobalServiceContext()->getOpObserver()->onUpdate(opCtx, entryArgs); } @@ -776,6 +824,18 @@ bool CollectionImpl::isTemporary(OperationContext* opCtx) const { return DurableCatalog::get(opCtx)->getCollectionOptions(opCtx, getCatalogId()).temp; } +bool CollectionImpl::getRecordPreImages() const { + return _recordPreImages; +} + +void CollectionImpl::setRecordPreImages(OperationContext* opCtx, bool val) { + if (val) { + uassertStatusOK(validatePreImageRecording(opCtx, _ns)); + } + DurableCatalog::get(opCtx)->setRecordPreImages(opCtx, getCatalogId(), val); + _recordPreImages = val; +} + bool CollectionImpl::isCapped() const { return _cappedNotifier.get(); } diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h index 42178e704b8..864bb6b9ea7 100644 --- a/src/mongo/db/catalog/collection_impl.h +++ b/src/mongo/db/catalog/collection_impl.h @@ -266,6 +266,9 @@ public: StringData newLevel, StringData newAction) final; + bool getRecordPreImages() const final; + void setRecordPreImages(OperationContext* opCtx, bool val) final; + bool isTemporary(OperationContext* opCtx) const final; // @@ -403,6 +406,8 @@ private: ValidationAction _validationAction; ValidationLevel _validationLevel; + bool _recordPreImages = false; + // Notifier object for awaitData. Threads polling a capped collection for new data can wait // on this object until notified of the arrival of new data. // diff --git a/src/mongo/db/catalog/collection_mock.h b/src/mongo/db/catalog/collection_mock.h index 7a9579fe67b..001bee4db5d 100644 --- a/src/mongo/db/catalog/collection_mock.h +++ b/src/mongo/db/catalog/collection_mock.h @@ -208,6 +208,14 @@ public: std::abort(); } + bool getRecordPreImages() const { + std::abort(); + } + + void setRecordPreImages(OperationContext* opCtx, bool val) { + std::abort(); + } + bool isCapped() const { std::abort(); } diff --git a/src/mongo/db/catalog/collection_options.cpp b/src/mongo/db/catalog/collection_options.cpp index 9c1561e8192..f086b90547c 100644 --- a/src/mongo/db/catalog/collection_options.cpp +++ b/src/mongo/db/catalog/collection_options.cpp @@ -168,6 +168,8 @@ StatusWith CollectionOptions::parse(const BSONObj& options, P continue; } else if (fieldName == "temp") { collectionOptions.temp = e.trueValue(); + } else if (fieldName == "recordPreImages") { + collectionOptions.recordPreImages = e.trueValue(); } else if (fieldName == "storageEngine") { Status status = checkStorageEngineOptions(e); if (!status.isOK()) { @@ -286,6 +288,10 @@ void CollectionOptions::appendBSON(BSONObjBuilder* builder) const { if (temp) builder->appendBool("temp", true); + if (recordPreImages) { + builder->appendBool("recordPreImages", true); + } + if (!storageEngine.isEmpty()) { builder->append("storageEngine", storageEngine); } @@ -341,6 +347,10 @@ bool CollectionOptions::matchesStorageOptions(const CollectionOptions& other, return false; } + if (recordPreImages != other.recordPreImages) { + return false; + } + if (temp != other.temp) { return false; } diff --git a/src/mongo/db/catalog/collection_options.h b/src/mongo/db/catalog/collection_options.h index f1644c69dd8..d5de2b5cb11 100644 --- a/src/mongo/db/catalog/collection_options.h +++ b/src/mongo/db/catalog/collection_options.h @@ -119,6 +119,7 @@ struct CollectionOptions { } autoIndexId = DEFAULT; bool temp = false; + bool recordPreImages = false; // Storage engine collection options. Always owned or empty. BSONObj storageEngine; diff --git a/src/mongo/db/commands/create.idl b/src/mongo/db/commands/create.idl index 09f426e5dc7..a413db7a150 100644 --- a/src/mongo/db/commands/create.idl +++ b/src/mongo/db/commands/create.idl @@ -107,6 +107,11 @@ commands: description: "A document that expresses the write concern for the operation." type: object optional: true + recordPreImages: + description: "Sets whether updates/deletes should store the pre-image of the + document in the oplog" + type: safeBool + optional: true temp: description: "DEPRECATED" type: safeBool diff --git a/src/mongo/db/repair_database_and_check_version.cpp b/src/mongo/db/repair_database_and_check_version.cpp index abbccedf1dd..f152e9c378c 100644 --- a/src/mongo/db/repair_database_and_check_version.cpp +++ b/src/mongo/db/repair_database_and_check_version.cpp @@ -493,6 +493,13 @@ bool repairDatabasesAndCheckVersion(OperationContext* opCtx) { // Refresh list of database names to include newly-created admin, if it exists. dbNames = storageEngine->listDatabases(); + + // We want to recover the admin database first so we can load the FCV early since + // some collection validation may depend on the FCV being set. + if (auto it = std::find(dbNames.begin(), dbNames.end(), "admin"); it != dbNames.end()) { + std::swap(*it, dbNames.front()); + } + for (const auto& dbName : dbNames) { if (dbName != "local") { nonLocalDatabases = true; diff --git a/src/mongo/db/storage/durable_catalog.h b/src/mongo/db/storage/durable_catalog.h index 815e0e3a262..29e1fa394cb 100644 --- a/src/mongo/db/storage/durable_catalog.h +++ b/src/mongo/db/storage/durable_catalog.h @@ -163,6 +163,11 @@ public: */ virtual void setIsTemp(OperationContext* opCtx, RecordId catalogId, bool isTemp) = 0; + /** + * Updates whether updates/deletes should store their pre-images in the opLog. + */ + virtual void setRecordPreImages(OperationContext* opCtx, RecordId catalogId, bool val) = 0; + /** * Updates the validator for this collection. * diff --git a/src/mongo/db/storage/durable_catalog_impl.cpp b/src/mongo/db/storage/durable_catalog_impl.cpp index b374ebf26f7..c6ed7ee93a5 100644 --- a/src/mongo/db/storage/durable_catalog_impl.cpp +++ b/src/mongo/db/storage/durable_catalog_impl.cpp @@ -919,6 +919,12 @@ void DurableCatalogImpl::setIsTemp(OperationContext* opCtx, RecordId catalogId, putMetaData(opCtx, catalogId, md); } +void DurableCatalogImpl::setRecordPreImages(OperationContext* opCtx, RecordId catalogId, bool val) { + BSONCollectionCatalogEntry::MetaData md = getMetaData(opCtx, catalogId); + md.options.recordPreImages = val; + putMetaData(opCtx, catalogId, md); +} + void DurableCatalogImpl::updateValidator(OperationContext* opCtx, RecordId catalogId, const BSONObj& validator, diff --git a/src/mongo/db/storage/durable_catalog_impl.h b/src/mongo/db/storage/durable_catalog_impl.h index 7e86bb4a308..f11e5b445f9 100644 --- a/src/mongo/db/storage/durable_catalog_impl.h +++ b/src/mongo/db/storage/durable_catalog_impl.h @@ -130,6 +130,8 @@ public: void setIsTemp(OperationContext* opCtx, RecordId catalogId, bool isTemp); + void setRecordPreImages(OperationContext* opCtx, RecordId catalogId, bool val) override; + void updateValidator(OperationContext* opCtx, RecordId catalogId, const BSONObj& validator, diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index 7687b9a5d86..031d4ae6568 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -1117,7 +1117,8 @@ Timestamp TransactionParticipant::Participant::prepareTransaction( } else { // Even if the prepared transaction contained no statements, we always reserve at least // 1 oplog slot for the prepare oplog entry. - const auto numSlotsToReserve = retrieveCompletedTransactionOperations(opCtx).size(); + auto numSlotsToReserve = retrieveCompletedTransactionOperations(opCtx).size(); + numSlotsToReserve += p().numberOfPreImagesToWrite; oplogSlotReserver.emplace(opCtx, std::max(1, static_cast(numSlotsToReserve))); invariant(oplogSlotReserver->getSlots().size() >= 1); prepareOplogSlot = oplogSlotReserver->getLastSlot(); @@ -1143,7 +1144,7 @@ Timestamp TransactionParticipant::Participant::prepareTransaction( opCtx->getWriteUnitOfWork()->prepare(); p().needToWriteAbortEntry = true; opCtx->getServiceContext()->getOpObserver()->onTransactionPrepare( - opCtx, reservedSlots, completedTransactionOperations); + opCtx, reservedSlots, &completedTransactionOperations, p().numberOfPreImagesToWrite); abortGuard.dismiss(); @@ -1195,6 +1196,10 @@ void TransactionParticipant::Participant::addTransactionOperation( invariant(opCtx->lockState()->inAWriteUnitOfWork()); p().transactionOperations.push_back(operation); p().transactionOperationBytes += repl::OplogEntry::getDurableReplOperationSize(operation); + if (!operation.getPreImage().isEmpty()) { + p().transactionOperationBytes += operation.getPreImage().objsize(); + ++p().numberOfPreImagesToWrite; + } auto transactionSizeLimitBytes = gTransactionSizeLimitBytes.load(); uassert(ErrorCodes::TransactionTooLarge, @@ -1230,6 +1235,7 @@ void TransactionParticipant::Participant::clearOperationsInMemory(OperationConte invariant(p().autoCommit); p().transactionOperationBytes = 0; p().transactionOperations.clear(); + p().numberOfPreImagesToWrite = 0; } void TransactionParticipant::Participant::commitUnpreparedTransaction(OperationContext* opCtx) { @@ -1241,7 +1247,7 @@ void TransactionParticipant::Participant::commitUnpreparedTransaction(OperationC auto opObserver = opCtx->getServiceContext()->getOpObserver(); invariant(opObserver); - opObserver->onUnpreparedTransactionCommit(opCtx, txnOps); + opObserver->onUnpreparedTransactionCommit(opCtx, &txnOps, p().numberOfPreImagesToWrite); // Read-only transactions with all read concerns must wait for any data they read to be majority // committed. For local read concern this is to match majority read concern. For both local and -- cgit v1.2.1