summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
authorMatthew Russotto <matthew.russotto@10gen.com>2019-04-08 20:00:32 -0400
committerMatthew Russotto <matthew.russotto@10gen.com>2019-04-08 20:00:39 -0400
commitb0d0de6a13a05c64e9e26392448e68303ccd69d5 (patch)
tree4dbe8249b06a907e9ea60235fcb867e978d244c9 /src/mongo/db/repl
parent07bcfd825c6ad2c347329af1a1b7634029048871 (diff)
downloadmongo-b0d0de6a13a05c64e9e26392448e68303ccd69d5.tar.gz
SERVER-39790 Reconstruct prepared transactions from new oplog format on startup
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/SConscript1
-rw-r--r--src/mongo/db/repl/apply_ops.cpp7
-rw-r--r--src/mongo/db/repl/apply_ops.h4
-rw-r--r--src/mongo/db/repl/replication_recovery.cpp2
-rw-r--r--src/mongo/db/repl/transaction_oplog_application.cpp111
-rw-r--r--src/mongo/db/repl/transaction_oplog_application.h6
6 files changed, 84 insertions, 47 deletions
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 211a8bcb354..d71c9bfb33c 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -350,6 +350,7 @@ env.Library(
LIBDEPS=[
],
LIBDEPS_PRIVATE=[
+ 'oplog',
'oplog_application',
'$BUILD_DIR/mongo/base',
],
diff --git a/src/mongo/db/repl/apply_ops.cpp b/src/mongo/db/repl/apply_ops.cpp
index b3732a99a52..c10e5165f74 100644
--- a/src/mongo/db/repl/apply_ops.cpp
+++ b/src/mongo/db/repl/apply_ops.cpp
@@ -478,12 +478,9 @@ Status applyApplyOpsOplogEntry(OperationContext* opCtx,
&resultWeDontCareAbout);
}
-Status applyRecoveredPrepareTransaction(OperationContext* opCtx, const OplogEntry& entry) {
- // Snapshot transactions never conflict with the PBWM lock.
- invariant(!opCtx->lockState()->shouldConflictWithSecondaryBatchApplication());
- // we might replay a prepared transaction behind oldest timestamp.
+Status applyRecoveredPrepareApplyOpsOplogEntry(OperationContext* opCtx, const OplogEntry& entry) {
+ // We might replay a prepared transaction behind oldest timestamp.
opCtx->recoveryUnit()->setRoundUpPreparedTimestamps(true);
- UnreplicatedWritesBlock uwb(opCtx);
return _applyPrepareTransaction(opCtx, entry, OplogApplication::Mode::kRecovering);
}
diff --git a/src/mongo/db/repl/apply_ops.h b/src/mongo/db/repl/apply_ops.h
index 4c98ceb3de9..d00e186dfa1 100644
--- a/src/mongo/db/repl/apply_ops.h
+++ b/src/mongo/db/repl/apply_ops.h
@@ -104,8 +104,8 @@ Status applyApplyOpsOplogEntry(OperationContext* opCtx,
repl::OplogApplication::Mode oplogApplicationMode);
/**
- * Called from recovery to apply a prepare transaction oplog entry.
+ * Called from recovery to apply an 'applyOps' oplog entry that prepares a transaction.
*/
-Status applyRecoveredPrepareTransaction(OperationContext* opCtx, const OplogEntry& entry);
+Status applyRecoveredPrepareApplyOpsOplogEntry(OperationContext* opCtx, const OplogEntry& entry);
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index 2f797d7be53..239b12bb6ae 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -44,6 +44,7 @@
#include "mongo/db/repl/oplog_buffer.h"
#include "mongo/db/repl/replication_consistency_markers_impl.h"
#include "mongo/db/repl/storage_interface.h"
+#include "mongo/db/repl/transaction_oplog_application.h"
#include "mongo/db/server_recovery.h"
#include "mongo/db/session.h"
#include "mongo/db/transaction_history_iterator.h"
@@ -306,6 +307,7 @@ void ReplicationRecoveryImpl::_reconstructPreparedTransactions(OperationContext*
opCtx->getServiceContext()->makeClient("reconstruct-prepared-transactions");
AlternativeClientRegion acr(newClient);
const auto newOpCtx = cc().makeOperationContext();
+ repl::UnreplicatedWritesBlock uwb(newOpCtx.get());
// Snapshot transaction can never conflict with the PBWM lock.
newOpCtx->lockState()->setShouldConflictWithSecondaryBatchApplication(false);
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp
index 99fab239b08..71905e5e4b1 100644
--- a/src/mongo/db/repl/transaction_oplog_application.cpp
+++ b/src/mongo/db/repl/transaction_oplog_application.cpp
@@ -37,6 +37,7 @@
#include "mongo/db/catalog_raii.h"
#include "mongo/db/commands/txn_cmds_gen.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/index_builds_coordinator.h"
#include "mongo/db/repl/apply_ops.h"
#include "mongo/db/repl/timestamp_block.h"
#include "mongo/db/session_catalog_mongod.h"
@@ -108,10 +109,6 @@ Status _applyTransactionFromOplogChain(OperationContext* opCtx,
}
}
- if (prepareCmd.isEmpty()) {
- return _applyOperationsForTransaction(opCtx, ops, mode);
- }
-
const auto dbName = entry.getNss().db().toString();
Status status = Status::OK();
@@ -122,7 +119,11 @@ Status _applyTransactionFromOplogChain(OperationContext* opCtx,
opCtx->recoveryUnit()->setRoundUpPreparedTimestamps(true);
BSONObjBuilder resultWeDontCareAbout;
- status = applyOps(opCtx, dbName, prepareCmd, mode, &resultWeDontCareAbout);
+ if (prepareCmd.isEmpty()) {
+ status = _applyOperationsForTransaction(opCtx, ops, mode);
+ } else {
+ status = applyOps(opCtx, dbName, prepareCmd, mode, &resultWeDontCareAbout);
+ }
if (status.isOK()) {
opCtx->recoveryUnit()->setPrepareTimestamp(commitTimestamp);
wunit.prepare();
@@ -269,43 +270,24 @@ repl::MultiApplier::Operations readTransactionOperationsFromOplogChain(
return ops;
}
+namespace {
/**
- * Make sure that if we are in replication recovery or initial sync, we don't apply the prepare
- * transaction oplog entry until we either see a commit transaction oplog entry or are at the very
- * end of recovery/initial sync. Otherwise, only apply the prepare transaction oplog entry if we are
- * a secondary.
+ * This is the part of applyPrepareTransaction which is common to steady state and recovery
+ * oplog application.
*/
-Status applyPrepareTransaction(OperationContext* opCtx,
- const OplogEntry& entry,
- repl::OplogApplication::Mode oplogApplicationMode) {
- // Don't apply the operations from the prepared transaction until either we see a commit
- // transaction oplog entry during recovery or are at the end of recovery.
- if (oplogApplicationMode == repl::OplogApplication::Mode::kRecovering) {
- if (!serverGlobalParams.enableMajorityReadConcern) {
- error() << "Cannot replay a prepared transaction when 'enableMajorityReadConcern' is "
- "set to false. Restart the server with --enableMajorityReadConcern=true "
- "to complete recovery.";
- }
- fassert(51146, serverGlobalParams.enableMajorityReadConcern);
- return Status::OK();
- }
+Status _applyPrepareTransaction(OperationContext* opCtx,
+ const OplogEntry& entry,
+ repl::OplogApplication::Mode oplogApplicationMode) {
+ auto ops = readTransactionOperationsFromOplogChain(opCtx, entry, {});
- // Don't apply the operations from the prepared transaction until either we see a commit
- // transaction oplog entry during the oplog application phase of initial sync or are at the end
- // of initial sync.
- if (oplogApplicationMode == repl::OplogApplication::Mode::kInitialSync) {
- return Status::OK();
+ if (oplogApplicationMode == repl::OplogApplication::Mode::kRecovering) {
+ // We might replay a prepared transaction behind oldest timestamp. Note that since this is
+ // scoped to the storage transaction, and readTransactionOperationsFromOplogChain implicitly
+ // abandons the storage transaction when it releases the global lock, this must be done
+ // after readTransactionOperationsFromOplogChain.
+ opCtx->recoveryUnit()->setRoundUpPreparedTimestamps(true);
}
- // Return error if run via applyOps command.
- uassert(51145,
- "prepareTransaction oplog entry is only used internally by secondaries.",
- oplogApplicationMode != repl::OplogApplication::Mode::kApplyOpsCmd);
-
- invariant(oplogApplicationMode == repl::OplogApplication::Mode::kSecondary);
-
- auto ops = readTransactionOperationsFromOplogChain(opCtx, entry, {});
-
// Block application of prepare oplog entry on secondaries when a concurrent background index
// build is running.
// This will prevent hybrid index builds from corrupting an index on secondary nodes if a
@@ -313,9 +295,9 @@ Status applyPrepareTransaction(OperationContext* opCtx,
// commits.
for (const auto& op : ops) {
auto ns = op.getNss();
- if (BackgroundOperation::inProgForNs(ns)) {
- BackgroundOperation::awaitNoBgOpInProgForNs(ns);
- }
+ auto uuid = *op.getUuid();
+ BackgroundOperation::awaitNoBgOpInProgForNs(ns);
+ IndexBuildsCoordinator::get(opCtx)->awaitNoIndexBuildInProgressForCollection(uuid);
}
// Transaction operations are in their own batch, so we can modify their opCtx.
@@ -346,5 +328,54 @@ Status applyPrepareTransaction(OperationContext* opCtx,
return Status::OK();
}
+} // namespace
+
+/**
+ * Make sure that if we are in replication recovery or initial sync, we don't apply the prepare
+ * transaction oplog entry until we either see a commit transaction oplog entry or are at the very
+ * end of recovery/initial sync. Otherwise, only apply the prepare transaction oplog entry if we are
+ * a secondary.
+ */
+Status applyPrepareTransaction(OperationContext* opCtx,
+ const OplogEntry& entry,
+ repl::OplogApplication::Mode oplogApplicationMode) {
+ // Don't apply the operations from the prepared transaction until either we see a commit
+ // transaction oplog entry during recovery or are at the end of recovery.
+ if (oplogApplicationMode == repl::OplogApplication::Mode::kRecovering) {
+ if (!serverGlobalParams.enableMajorityReadConcern) {
+ error() << "Cannot replay a prepared transaction when 'enableMajorityReadConcern' is "
+ "set to false. Restart the server with --enableMajorityReadConcern=true "
+ "to complete recovery.";
+ }
+ fassert(51146, serverGlobalParams.enableMajorityReadConcern);
+ return Status::OK();
+ }
+
+ // Don't apply the operations from the prepared transaction until either we see a commit
+ // transaction oplog entry during the oplog application phase of initial sync or are at the end
+ // of initial sync.
+ if (oplogApplicationMode == repl::OplogApplication::Mode::kInitialSync) {
+ return Status::OK();
+ }
+
+ // Return error if run via applyOps command.
+ uassert(51145,
+ "prepareTransaction oplog entry is only used internally by secondaries.",
+ oplogApplicationMode != repl::OplogApplication::Mode::kApplyOpsCmd);
+
+ invariant(oplogApplicationMode == repl::OplogApplication::Mode::kSecondary);
+ return _applyPrepareTransaction(opCtx, entry, oplogApplicationMode);
+}
+
+Status applyRecoveredPrepareTransaction(OperationContext* opCtx, const OplogEntry& entry) {
+ // Snapshot transactions never conflict with the PBWM lock.
+ invariant(!opCtx->lockState()->shouldConflictWithSecondaryBatchApplication());
+ if (entry.getCommandType() == OplogEntry::CommandType::kPrepareTransaction) {
+ return _applyPrepareTransaction(opCtx, entry, repl::OplogApplication::Mode::kRecovering);
+ } else {
+ // This is an applyOps with prepare.
+ return applyRecoveredPrepareApplyOpsOplogEntry(opCtx, entry);
+ }
+}
} // namespace mongo
diff --git a/src/mongo/db/repl/transaction_oplog_application.h b/src/mongo/db/repl/transaction_oplog_application.h
index 257bf602178..bc960303801 100644
--- a/src/mongo/db/repl/transaction_oplog_application.h
+++ b/src/mongo/db/repl/transaction_oplog_application.h
@@ -66,4 +66,10 @@ Status applyPrepareTransaction(OperationContext* opCtx,
const repl::OplogEntry& entry,
repl::OplogApplication::Mode mode);
+/**
+ * Apply a prepared transaction during recovery. The OplogEntry must be an 'applyOps' with
+ * 'prepare' set or a prepareTransaction command.
+ */
+Status applyRecoveredPrepareTransaction(OperationContext* opCtx, const repl::OplogEntry& entry);
+
} // namespace mongo