summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorJonathan Reams <jbreams@mongodb.com>2020-02-10 10:11:25 +0100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-02-10 13:08:24 +0000
commit43c2b5b172cf6783319944c0d6931478db01eefa (patch)
tree16da88e0a58c73bdfd3f721c8376223925e0e07f /src/mongo/db
parent4ae67479e73174a75f2d7141360edb8a8ef90be8 (diff)
downloadmongo-43c2b5b172cf6783319944c0d6931478db01eefa.tar.gz
SERVER-45805 Add recordPreImages flag to collMod and create commands
create mode 100644 jstests/noPassthrough/record_preimage_startup_validation.js
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/catalog/coll_mod.cpp12
-rw-r--r--src/mongo/db/catalog/collection.h4
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp68
-rw-r--r--src/mongo/db/catalog/collection_impl.h5
-rw-r--r--src/mongo/db/catalog/collection_mock.h8
-rw-r--r--src/mongo/db/catalog/collection_options.cpp10
-rw-r--r--src/mongo/db/catalog/collection_options.h1
-rw-r--r--src/mongo/db/commands/create.idl5
-rw-r--r--src/mongo/db/repair_database_and_check_version.cpp7
-rw-r--r--src/mongo/db/storage/durable_catalog.h5
-rw-r--r--src/mongo/db/storage/durable_catalog_impl.cpp6
-rw-r--r--src/mongo/db/storage/durable_catalog_impl.h2
-rw-r--r--src/mongo/db/transaction_participant.cpp12
13 files changed, 138 insertions, 7 deletions
diff --git a/src/mongo/db/catalog/coll_mod.cpp b/src/mongo/db/catalog/coll_mod.cpp
index 49a666c9c67..5ad39981d44 100644
--- a/src/mongo/db/catalog/coll_mod.cpp
+++ b/src/mongo/db/catalog/coll_mod.cpp
@@ -74,6 +74,7 @@ struct CollModRequest {
BSONElement collValidator = {};
std::string collValidationAction = {};
std::string collValidationLevel = {};
+ bool recordPreImages = false;
};
StatusWith<CollModRequest> parseCollModRequest(OperationContext* opCtx,
@@ -222,6 +223,13 @@ StatusWith<CollModRequest> parseCollModRequest(OperationContext* opCtx,
return Status(ErrorCodes::InvalidOptions, "'viewOn' option must be a string");
}
cmr.viewOn = e.str();
+ } else if (fieldName == "recordPreImages") {
+ if (isView) {
+ return {ErrorCodes::InvalidOptions,
+ str::stream() << "option not supported on a view: " << fieldName};
+ }
+
+ cmr.recordPreImages = e.trueValue();
} else {
if (isView) {
return Status(ErrorCodes::InvalidOptions,
@@ -396,6 +404,10 @@ Status _collModInternal(OperationContext* opCtx,
if (!cmrNew.collValidationLevel.empty())
invariant(coll->setValidationLevel(opCtx, cmrNew.collValidationLevel));
+ if (cmrNew.recordPreImages != oldCollOptions.recordPreImages) {
+ coll->setRecordPreImages(opCtx, cmrNew.recordPreImages);
+ }
+
// Only observe non-view collMods, as view operations are observed as operations on the
// system.views collection.
getGlobalServiceContext()->getOpObserver()->onCollMod(
diff --git a/src/mongo/db/catalog/collection.h b/src/mongo/db/catalog/collection.h
index 5cac53c94b3..e11a790382c 100644
--- a/src/mongo/db/catalog/collection.h
+++ b/src/mongo/db/catalog/collection.h
@@ -93,6 +93,7 @@ struct CollectionUpdateArgs {
bool fromMigrate = false;
StoreDocOption storeDocOption = StoreDocOption::None;
+ bool preImageRecordingEnabledForCollection = false;
};
/**
@@ -383,6 +384,9 @@ public:
StringData newLevel,
StringData newAction) = 0;
+ virtual bool getRecordPreImages() const = 0;
+ virtual void setRecordPreImages(OperationContext* opCtx, bool val) = 0;
+
/**
* Returns true if this is a temporary collection.
*
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index 553af36eaf5..9d845cb28da 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -211,6 +211,35 @@ Status checkValidatorCanBeUsedOnNs(const BSONObj& validator,
return Status::OK();
}
+Status validatePreImageRecording(OperationContext* opCtx, const NamespaceString& ns) {
+ if (ns.db() == NamespaceString::kAdminDb || ns.db() == NamespaceString::kLocalDb) {
+ return {ErrorCodes::InvalidOptions,
+ str::stream() << "recordPreImages collection option is not supported on the "
+ << ns.db() << " database"};
+ }
+
+ if (!serverGlobalParams.featureCompatibility.isVersionInitialized() ||
+ serverGlobalParams.featureCompatibility.getVersion() !=
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo44) {
+ return {ErrorCodes::InvalidOptions,
+ "recordPreImages collection option is only supported when the feature "
+ "compatibility version is set to 4.4 or above"};
+ }
+
+ if (serverGlobalParams.clusterRole != ClusterRole::None) {
+ return {ErrorCodes::InvalidOptions,
+ "recordPreImages collection option is not supported on shards or config servers"};
+ }
+
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ if (!replCoord->isReplEnabled()) {
+ return {ErrorCodes::InvalidOptions,
+ "recordPreImages collection option depends on being in a replica set"};
+ }
+
+ return Status::OK();
+}
+
} // namespace
CollectionImpl::CollectionImpl(OperationContext* opCtx,
@@ -273,6 +302,11 @@ void CollectionImpl::init(OperationContext* opCtx) {
}
_validationAction = uassertStatusOK(_parseValidationAction(collectionOptions.validationAction));
_validationLevel = uassertStatusOK(_parseValidationLevel(collectionOptions.validationLevel));
+ if (collectionOptions.recordPreImages) {
+ uassertStatusOK(validatePreImageRecording(opCtx, _ns));
+ _recordPreImages = true;
+ }
+
getIndexCatalog()->init(opCtx).transitional_ignore();
_initialized = true;
}
@@ -643,7 +677,8 @@ void CollectionImpl::deleteDocument(OperationContext* opCtx,
getGlobalServiceContext()->getOpObserver()->aboutToDelete(opCtx, ns(), doc.value());
boost::optional<BSONObj> deletedDoc;
- if (storeDeletedDoc == Collection::StoreDeletedDoc::On) {
+ if ((storeDeletedDoc == Collection::StoreDeletedDoc::On && opCtx->getTxnNumber()) ||
+ getRecordPreImages()) {
deletedDoc.emplace(doc.value().getOwned());
}
@@ -716,7 +751,13 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx,
str::stream() << "Cannot change the size of a document in a capped collection: "
<< oldSize << " != " << newDoc.objsize());
- args->preImageDoc = oldDoc.value().getOwned();
+ // The preImageDoc may not be boost::none if this update was a retryable findAndModify or if
+ // the update may have changed the shard key. For non-in-place updates we always set the
+ // preImageDoc here to an owned copy of the pre-image.
+ if (!args->preImageDoc) {
+ args->preImageDoc = oldDoc.value().getOwned();
+ }
+ args->preImageRecordingEnabledForCollection = getRecordPreImages();
uassertStatusOK(
_recordStore->updateRecord(opCtx, oldLocation, newDoc.objdata(), newDoc.objsize()));
@@ -725,7 +766,7 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx,
int64_t keysInserted, keysDeleted;
uassertStatusOK(_indexCatalog->updateRecord(
- opCtx, args->preImageDoc.get(), newDoc, oldLocation, &keysInserted, &keysDeleted));
+ opCtx, *args->preImageDoc, newDoc, oldLocation, &keysInserted, &keysDeleted));
if (opDebug) {
opDebug->additiveMetrics.incrementKeysInserted(keysInserted);
@@ -760,12 +801,19 @@ StatusWith<RecordData> CollectionImpl::updateDocumentWithDamages(
invariant(oldRec.snapshotId() == opCtx->recoveryUnit()->getSnapshotId());
invariant(updateWithDamagesSupported());
+ // For in-place updates we need to grab an owned copy of the pre-image doc if pre-image
+ // recording is enabled and we haven't already set the pre-image due to this update being
+ // a retryable findAndModify or a possible update to the shard key.
+ if (!args->preImageDoc && getRecordPreImages()) {
+ args->preImageDoc = oldRec.value().toBson().getOwned();
+ }
+
auto newRecStatus =
_recordStore->updateWithDamages(opCtx, loc, oldRec.value(), damageSource, damages);
if (newRecStatus.isOK()) {
args->updatedDoc = newRecStatus.getValue().toBson();
-
+ args->preImageRecordingEnabledForCollection = getRecordPreImages();
OplogUpdateEntryArgs entryArgs(*args, ns(), _uuid);
getGlobalServiceContext()->getOpObserver()->onUpdate(opCtx, entryArgs);
}
@@ -776,6 +824,18 @@ bool CollectionImpl::isTemporary(OperationContext* opCtx) const {
return DurableCatalog::get(opCtx)->getCollectionOptions(opCtx, getCatalogId()).temp;
}
+bool CollectionImpl::getRecordPreImages() const {
+ return _recordPreImages;
+}
+
+void CollectionImpl::setRecordPreImages(OperationContext* opCtx, bool val) {
+ if (val) {
+ uassertStatusOK(validatePreImageRecording(opCtx, _ns));
+ }
+ DurableCatalog::get(opCtx)->setRecordPreImages(opCtx, getCatalogId(), val);
+ _recordPreImages = val;
+}
+
bool CollectionImpl::isCapped() const {
return _cappedNotifier.get();
}
diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h
index 42178e704b8..864bb6b9ea7 100644
--- a/src/mongo/db/catalog/collection_impl.h
+++ b/src/mongo/db/catalog/collection_impl.h
@@ -266,6 +266,9 @@ public:
StringData newLevel,
StringData newAction) final;
+ bool getRecordPreImages() const final;
+ void setRecordPreImages(OperationContext* opCtx, bool val) final;
+
bool isTemporary(OperationContext* opCtx) const final;
//
@@ -403,6 +406,8 @@ private:
ValidationAction _validationAction;
ValidationLevel _validationLevel;
+ bool _recordPreImages = false;
+
// Notifier object for awaitData. Threads polling a capped collection for new data can wait
// on this object until notified of the arrival of new data.
//
diff --git a/src/mongo/db/catalog/collection_mock.h b/src/mongo/db/catalog/collection_mock.h
index 7a9579fe67b..001bee4db5d 100644
--- a/src/mongo/db/catalog/collection_mock.h
+++ b/src/mongo/db/catalog/collection_mock.h
@@ -208,6 +208,14 @@ public:
std::abort();
}
+ bool getRecordPreImages() const {
+ std::abort();
+ }
+
+ void setRecordPreImages(OperationContext* opCtx, bool val) {
+ std::abort();
+ }
+
bool isCapped() const {
std::abort();
}
diff --git a/src/mongo/db/catalog/collection_options.cpp b/src/mongo/db/catalog/collection_options.cpp
index 9c1561e8192..f086b90547c 100644
--- a/src/mongo/db/catalog/collection_options.cpp
+++ b/src/mongo/db/catalog/collection_options.cpp
@@ -168,6 +168,8 @@ StatusWith<CollectionOptions> CollectionOptions::parse(const BSONObj& options, P
continue;
} else if (fieldName == "temp") {
collectionOptions.temp = e.trueValue();
+ } else if (fieldName == "recordPreImages") {
+ collectionOptions.recordPreImages = e.trueValue();
} else if (fieldName == "storageEngine") {
Status status = checkStorageEngineOptions(e);
if (!status.isOK()) {
@@ -286,6 +288,10 @@ void CollectionOptions::appendBSON(BSONObjBuilder* builder) const {
if (temp)
builder->appendBool("temp", true);
+ if (recordPreImages) {
+ builder->appendBool("recordPreImages", true);
+ }
+
if (!storageEngine.isEmpty()) {
builder->append("storageEngine", storageEngine);
}
@@ -341,6 +347,10 @@ bool CollectionOptions::matchesStorageOptions(const CollectionOptions& other,
return false;
}
+ if (recordPreImages != other.recordPreImages) {
+ return false;
+ }
+
if (temp != other.temp) {
return false;
}
diff --git a/src/mongo/db/catalog/collection_options.h b/src/mongo/db/catalog/collection_options.h
index f1644c69dd8..d5de2b5cb11 100644
--- a/src/mongo/db/catalog/collection_options.h
+++ b/src/mongo/db/catalog/collection_options.h
@@ -119,6 +119,7 @@ struct CollectionOptions {
} autoIndexId = DEFAULT;
bool temp = false;
+ bool recordPreImages = false;
// Storage engine collection options. Always owned or empty.
BSONObj storageEngine;
diff --git a/src/mongo/db/commands/create.idl b/src/mongo/db/commands/create.idl
index 09f426e5dc7..a413db7a150 100644
--- a/src/mongo/db/commands/create.idl
+++ b/src/mongo/db/commands/create.idl
@@ -107,6 +107,11 @@ commands:
description: "A document that expresses the write concern for the operation."
type: object
optional: true
+ recordPreImages:
+ description: "Sets whether updates/deletes should store the pre-image of the
+ document in the oplog"
+ type: safeBool
+ optional: true
temp:
description: "DEPRECATED"
type: safeBool
diff --git a/src/mongo/db/repair_database_and_check_version.cpp b/src/mongo/db/repair_database_and_check_version.cpp
index abbccedf1dd..f152e9c378c 100644
--- a/src/mongo/db/repair_database_and_check_version.cpp
+++ b/src/mongo/db/repair_database_and_check_version.cpp
@@ -493,6 +493,13 @@ bool repairDatabasesAndCheckVersion(OperationContext* opCtx) {
// Refresh list of database names to include newly-created admin, if it exists.
dbNames = storageEngine->listDatabases();
+
+ // We want to recover the admin database first so we can load the FCV early since
+ // some collection validation may depend on the FCV being set.
+ if (auto it = std::find(dbNames.begin(), dbNames.end(), "admin"); it != dbNames.end()) {
+ std::swap(*it, dbNames.front());
+ }
+
for (const auto& dbName : dbNames) {
if (dbName != "local") {
nonLocalDatabases = true;
diff --git a/src/mongo/db/storage/durable_catalog.h b/src/mongo/db/storage/durable_catalog.h
index 815e0e3a262..29e1fa394cb 100644
--- a/src/mongo/db/storage/durable_catalog.h
+++ b/src/mongo/db/storage/durable_catalog.h
@@ -164,6 +164,11 @@ public:
virtual void setIsTemp(OperationContext* opCtx, RecordId catalogId, bool isTemp) = 0;
/**
+ * Updates whether updates/deletes should store their pre-images in the opLog.
+ */
+ virtual void setRecordPreImages(OperationContext* opCtx, RecordId catalogId, bool val) = 0;
+
+ /**
* Updates the validator for this collection.
*
* An empty validator removes all validation.
diff --git a/src/mongo/db/storage/durable_catalog_impl.cpp b/src/mongo/db/storage/durable_catalog_impl.cpp
index b374ebf26f7..c6ed7ee93a5 100644
--- a/src/mongo/db/storage/durable_catalog_impl.cpp
+++ b/src/mongo/db/storage/durable_catalog_impl.cpp
@@ -919,6 +919,12 @@ void DurableCatalogImpl::setIsTemp(OperationContext* opCtx, RecordId catalogId,
putMetaData(opCtx, catalogId, md);
}
+void DurableCatalogImpl::setRecordPreImages(OperationContext* opCtx, RecordId catalogId, bool val) {
+ BSONCollectionCatalogEntry::MetaData md = getMetaData(opCtx, catalogId);
+ md.options.recordPreImages = val;
+ putMetaData(opCtx, catalogId, md);
+}
+
void DurableCatalogImpl::updateValidator(OperationContext* opCtx,
RecordId catalogId,
const BSONObj& validator,
diff --git a/src/mongo/db/storage/durable_catalog_impl.h b/src/mongo/db/storage/durable_catalog_impl.h
index 7e86bb4a308..f11e5b445f9 100644
--- a/src/mongo/db/storage/durable_catalog_impl.h
+++ b/src/mongo/db/storage/durable_catalog_impl.h
@@ -130,6 +130,8 @@ public:
void setIsTemp(OperationContext* opCtx, RecordId catalogId, bool isTemp);
+ void setRecordPreImages(OperationContext* opCtx, RecordId catalogId, bool val) override;
+
void updateValidator(OperationContext* opCtx,
RecordId catalogId,
const BSONObj& validator,
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp
index 7687b9a5d86..031d4ae6568 100644
--- a/src/mongo/db/transaction_participant.cpp
+++ b/src/mongo/db/transaction_participant.cpp
@@ -1117,7 +1117,8 @@ Timestamp TransactionParticipant::Participant::prepareTransaction(
} else {
// Even if the prepared transaction contained no statements, we always reserve at least
// 1 oplog slot for the prepare oplog entry.
- const auto numSlotsToReserve = retrieveCompletedTransactionOperations(opCtx).size();
+ auto numSlotsToReserve = retrieveCompletedTransactionOperations(opCtx).size();
+ numSlotsToReserve += p().numberOfPreImagesToWrite;
oplogSlotReserver.emplace(opCtx, std::max(1, static_cast<int>(numSlotsToReserve)));
invariant(oplogSlotReserver->getSlots().size() >= 1);
prepareOplogSlot = oplogSlotReserver->getLastSlot();
@@ -1143,7 +1144,7 @@ Timestamp TransactionParticipant::Participant::prepareTransaction(
opCtx->getWriteUnitOfWork()->prepare();
p().needToWriteAbortEntry = true;
opCtx->getServiceContext()->getOpObserver()->onTransactionPrepare(
- opCtx, reservedSlots, completedTransactionOperations);
+ opCtx, reservedSlots, &completedTransactionOperations, p().numberOfPreImagesToWrite);
abortGuard.dismiss();
@@ -1195,6 +1196,10 @@ void TransactionParticipant::Participant::addTransactionOperation(
invariant(opCtx->lockState()->inAWriteUnitOfWork());
p().transactionOperations.push_back(operation);
p().transactionOperationBytes += repl::OplogEntry::getDurableReplOperationSize(operation);
+ if (!operation.getPreImage().isEmpty()) {
+ p().transactionOperationBytes += operation.getPreImage().objsize();
+ ++p().numberOfPreImagesToWrite;
+ }
auto transactionSizeLimitBytes = gTransactionSizeLimitBytes.load();
uassert(ErrorCodes::TransactionTooLarge,
@@ -1230,6 +1235,7 @@ void TransactionParticipant::Participant::clearOperationsInMemory(OperationConte
invariant(p().autoCommit);
p().transactionOperationBytes = 0;
p().transactionOperations.clear();
+ p().numberOfPreImagesToWrite = 0;
}
void TransactionParticipant::Participant::commitUnpreparedTransaction(OperationContext* opCtx) {
@@ -1241,7 +1247,7 @@ void TransactionParticipant::Participant::commitUnpreparedTransaction(OperationC
auto opObserver = opCtx->getServiceContext()->getOpObserver();
invariant(opObserver);
- opObserver->onUnpreparedTransactionCommit(opCtx, txnOps);
+ opObserver->onUnpreparedTransactionCommit(opCtx, &txnOps, p().numberOfPreImagesToWrite);
// Read-only transactions with all read concerns must wait for any data they read to be majority
// committed. For local read concern this is to match majority read concern. For both local and