summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2019-12-26 10:56:06 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-01-15 16:18:43 +0000
commit756a87f2e86d8f67259b5995d8f1cf7dcc27f7a6 (patch)
tree952bfbcb8959da6d6ca02a44cf74da5d086d2064 /src/mongo
parent1156b8a0008afc68d86e41400baf0f7f4e683210 (diff)
downloadmongo-756a87f2e86d8f67259b5995d8f1cf7dcc27f7a6.tar.gz
SERVER-39364: Get the last oplog entry from the storage engine for setLastOpToSystemLastOpTime
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/base/error_codes.yml3
-rw-r--r--src/mongo/db/ops/write_ops_exec.cpp2
-rw-r--r--src/mongo/db/repl/repl_client_info.cpp41
-rw-r--r--src/mongo/db/repl/repl_client_info.h15
-rw-r--r--src/mongo/db/repl/replication_coordinator.h12
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp15
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h2
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h2
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_noop.h2
-rw-r--r--src/mongo/db/s/collection_range_deleter.cpp8
-rw-r--r--src/mongo/db/s/txn_two_phase_commit_cmds.cpp2
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp6
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.cpp4
-rw-r--r--src/mongo/embedded/replication_coordinator_embedded.h2
16 files changed, 113 insertions, 11 deletions
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml
index fc806464ca7..9f1a4c5f8e5 100644
--- a/src/mongo/base/error_codes.yml
+++ b/src/mongo/base/error_codes.yml
@@ -307,7 +307,8 @@ error_codes:
- {code: 280,name: ChangeStreamFatalError,categories: [NonResumableChangeStreamError]}
# The two codes below are for internal use only and must never be returned in a network response
- - {code: 281,name: TransactionCoordinatorSteppingDown} # Gets converted to InterruptedDueToReplStateChange
+ # TransactionCoordinatorSteppingDown gets converted to InterruptedDueToReplStateChange
+ - {code: 281,name: TransactionCoordinatorSteppingDown,categories: [Interruption]}
- {code: 282,name: TransactionCoordinatorReachedAbortDecision}
- {code: 283,name: WouldChangeOwningShard,extra: WouldChangeOwningShardInfo}
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp
index 7c93bc3303a..196fb827e86 100644
--- a/src/mongo/db/ops/write_ops_exec.cpp
+++ b/src/mongo/db/ops/write_ops_exec.cpp
@@ -167,7 +167,7 @@ public:
// here. No-op updates will not generate a new lastOp, so we still need the
// guard to fire in that case. Operations on the local DB aren't replicated, so they
// don't need to bump the lastOp.
- replClientInfo().setLastOpToSystemLastOpTime(_opCtx);
+ replClientInfo().setLastOpToSystemLastOpTimeIgnoringInterrupt(_opCtx);
LOG(5) << "Set last op to system time: " << replClientInfo().getLastOp().getTimestamp();
}
}
diff --git a/src/mongo/db/repl/repl_client_info.cpp b/src/mongo/db/repl/repl_client_info.cpp
index dc4c04342c7..0690e7b483e 100644
--- a/src/mongo/db/repl/repl_client_info.cpp
+++ b/src/mongo/db/repl/repl_client_info.cpp
@@ -69,7 +69,33 @@ void ReplClientInfo::setLastOp(OperationContext* opCtx, const OpTime& ot) {
void ReplClientInfo::setLastOpToSystemLastOpTime(OperationContext* opCtx) {
auto replCoord = repl::ReplicationCoordinator::get(opCtx->getServiceContext());
if (replCoord->isReplEnabled() && opCtx->writesAreReplicated()) {
- auto systemOpTime = replCoord->getMyLastAppliedOpTime();
+ OpTime systemOpTime;
+ auto status = [&] {
+ try {
+ // Get the latest OpTime from oplog.
+ systemOpTime = replCoord->getLatestWriteOpTime(opCtx);
+ return Status::OK();
+ } catch (const DBException& e) {
+ // Fall back to use my lastAppliedOpTime if we failed to get the latest OpTime from
+ // storage. In most cases, it is safe to ignore errors because if
+ // getLatestWriteOpTime throws, we cannot use the same opCtx to wait for
+ // writeConcern anyways. But getLastError from the same client could use a different
+ // opCtx to wait for the lastOp. So this is a best effort attempt to set the lastOp
+ // to the in-memory lastAppliedOpTime (which could be lagged). And this is a known
+ // bug in getLastError.
+ systemOpTime = replCoord->getMyLastAppliedOpTime();
+ if (e.toStatus() == ErrorCodes::OplogOperationUnsupported ||
+ e.toStatus() == ErrorCodes::NamespaceNotFound ||
+ e.toStatus() == ErrorCodes::CollectionIsEmpty ||
+ ErrorCodes::isNotMasterError(e.toStatus())) {
+ // It is ok if the storage engine does not support getLatestOplogTimestamp() or
+ // if the oplog is empty. If the node stepped down in between, it is correct to
+ // use lastAppliedOpTime as last OpTime.
+ return Status::OK();
+ }
+ return e.toStatus();
+ }
+ }();
// If the system optime has gone backwards, that must mean that there was a rollback.
// This is safe, but the last op for a Client should never go backwards, so just leave
@@ -84,6 +110,19 @@ void ReplClientInfo::setLastOpToSystemLastOpTime(OperationContext* opCtx) {
}
lastOpInfo(opCtx).lastOpSetExplicitly = true;
+
+ // Throw if getLatestWriteOpTime failed.
+ uassertStatusOK(status);
+ }
+}
+
+void ReplClientInfo::setLastOpToSystemLastOpTimeIgnoringInterrupt(OperationContext* opCtx) {
+ try {
+ repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
+ } catch (const ExceptionForCat<ErrorCategory::Interruption>& e) {
+ // In most cases, it is safe to ignore interruption errors because we cannot use the same
+ // OperationContext to wait for writeConcern anyways.
+ LOG(2) << "Ignoring set last op interruption error: " << e.toStatus();
}
}
diff --git a/src/mongo/db/repl/repl_client_info.h b/src/mongo/db/repl/repl_client_info.h
index 9f45ec49a24..232c95460fe 100644
--- a/src/mongo/db/repl/repl_client_info.h
+++ b/src/mongo/db/repl/repl_client_info.h
@@ -69,12 +69,21 @@ public:
}
/**
- * Use this to set the LastOp to the latest known OpTime in the oplog.
- * This is necessary when doing no-op writes, as we need to set the client's lastOp to a proper
- * value for write concern wait to work.
+ * Use this to set the LastOp to the latest known OpTime in the oplog. On primary, The OpTime
+ * used consists of the timestamp of the latest oplog entry on disk and the current term. On
+ * secondaries, lastAppliedOpTime is used. Using lastAppliedOpTime on secondaries is the desired
+ * behavior, since secondaries do timestamped reads at the lastApplied.
+ *
+ * Setting the lastOp to the latest OpTime is necessary when doing no-op writes, as we need to
+ * set the client's lastOp to a proper value for write concern wait to work.
*/
void setLastOpToSystemLastOpTime(OperationContext* opCtx);
+ /**
+ * Same as setLastOpToSystemLastOpTime but ignores interruption errors.
+ */
+ void setLastOpToSystemLastOpTimeIgnoringInterrupt(OperationContext* opCtx);
+
private:
static const long long kUninitializedTerm = -1;
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index d51920f0c95..b46b7ed6804 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -953,6 +953,18 @@ public:
RaftMongoSpecActionEnum action,
boost::optional<Timestamp> oplogReadTimestamp = boost::none) const {}
+ /**
+ * Returns the OpTime that consists of the timestamp of the latest oplog entry and the current
+ * term.
+ * This function throws if:
+ * 1. It is called on secondaries.
+ * 2. OperationContext times out or is interrupted.
+ * 3. Oplog collection does not exist.
+ * 4. Oplog collection is empty.
+ * 5. Getting latest oplog timestamp is not supported by the storage engine.
+ */
+ virtual OpTime getLatestWriteOpTime(OperationContext* opCtx) const = 0;
+
protected:
ReplicationCoordinator();
};
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 28578e2f2d0..9eb3a3848af 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -64,6 +64,7 @@
#include "mongo/db/repl/data_replicator_external_state_initial_sync.h"
#include "mongo/db/repl/is_master_response.h"
#include "mongo/db/repl/last_vote.h"
+#include "mongo/db/repl/local_oplog_info.h"
#include "mongo/db/repl/read_concern_args.h"
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/repl_set_config_checks.h"
@@ -1974,6 +1975,20 @@ std::shared_ptr<const IsMasterResponse> ReplicationCoordinatorImpl::awaitIsMaste
return statusWithIsMaster.getValue();
}
+OpTime ReplicationCoordinatorImpl::getLatestWriteOpTime(OperationContext* opCtx) const {
+ ShouldNotConflictWithSecondaryBatchApplicationBlock noPBWMBlock(opCtx->lockState());
+ Lock::GlobalLock globalLock(opCtx, MODE_IS);
+ // Check if the node is primary after acquiring global IS lock.
+ uassert(ErrorCodes::NotMaster,
+ "Not primary so can't get latest write optime",
+ canAcceptNonLocalWrites());
+ auto oplog = LocalOplogInfo::get(opCtx)->getCollection();
+ uassert(ErrorCodes::NamespaceNotFound, "oplog collection does not exist.", oplog);
+ auto latestOplogTimestamp =
+ uassertStatusOK(oplog->getRecordStore()->getLatestOplogTimestamp(opCtx));
+ return OpTime(latestOplogTimestamp, getTerm());
+}
+
void ReplicationCoordinatorImpl::_killConflictingOpsOnStepUpAndStepDown(
AutoGetRstlForStepUpStepDown* arsc, ErrorCodes::Error reason) {
const OperationContext* rstlOpCtx = arsc->getOpCtx();
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index dfc1a414ddc..5fd6f46dd94 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -348,6 +348,8 @@ public:
RaftMongoSpecActionEnum action,
boost::optional<Timestamp> oplogReadTimestamp = boost::none) const override;
+ virtual OpTime getLatestWriteOpTime(OperationContext* opCtx) const override;
+
// ================== Test support API ===================
/**
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index 57d9b02d7e5..835029ac9a9 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -566,5 +566,9 @@ std::shared_ptr<const IsMasterResponse> ReplicationCoordinatorMock::awaitIsMaste
return response;
}
+OpTime ReplicationCoordinatorMock::getLatestWriteOpTime(OperationContext* opCtx) const {
+ return getMyLastAppliedOpTime();
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index be9e97dc932..2893920d1e5 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -323,6 +323,8 @@ public:
boost::optional<TopologyVersion> clientTopologyVersion,
boost::optional<Date_t> deadline) const override;
+ virtual OpTime getLatestWriteOpTime(OperationContext* opCtx) const override;
+
private:
ServiceContext* const _service;
ReplSettings _settings;
diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp
index a3a6b93775b..6f502574645 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.cpp
+++ b/src/mongo/db/repl/replication_coordinator_noop.cpp
@@ -470,5 +470,9 @@ std::shared_ptr<const IsMasterResponse> ReplicationCoordinatorNoOp::awaitIsMaste
MONGO_UNREACHABLE;
}
+OpTime ReplicationCoordinatorNoOp::getLatestWriteOpTime(OperationContext* opCtx) const {
+ return getMyLastAppliedOpTime();
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h
index 5828943fa97..ea825567454 100644
--- a/src/mongo/db/repl/replication_coordinator_noop.h
+++ b/src/mongo/db/repl/replication_coordinator_noop.h
@@ -260,6 +260,8 @@ public:
boost::optional<TopologyVersion> clientTopologyVersion,
boost::optional<Date_t> deadline) const final;
+ OpTime getLatestWriteOpTime(OperationContext* opCtx) const override;
+
private:
ServiceContext* const _service;
};
diff --git a/src/mongo/db/s/collection_range_deleter.cpp b/src/mongo/db/s/collection_range_deleter.cpp
index 15a37946f98..ef93a23c06f 100644
--- a/src/mongo/db/s/collection_range_deleter.cpp
+++ b/src/mongo/db/s/collection_range_deleter.cpp
@@ -340,12 +340,14 @@ boost::optional<Date_t> CollectionRangeDeleter::cleanUpNextRange(
LOG(0) << "Waiting for majority replication of local deletions in " << nss.ns() << " range "
<< redact(range->toString());
- repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
- const auto clientOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
-
// Wait for replication outside the lock
const auto replicationStatus = [&] {
try {
+ repl::ReplClientInfo::forClient(opCtx->getClient())
+ .setLastOpToSystemLastOpTime(opCtx);
+ const auto clientOpTime =
+ repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
+
WriteConcernResult unusedWCResult;
return waitForWriteConcern(
opCtx, clientOpTime, kMajorityWriteConcern, &unusedWCResult);
diff --git a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
index 21293128b58..30e1d90ae9e 100644
--- a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
+++ b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
@@ -244,7 +244,7 @@ public:
// (in all cases except the one where this command aborts the local participant), so
// ensure waiting for the client's writeConcern of the decision.
repl::ReplClientInfo::forClient(opCtx->getClient())
- .setLastOpToSystemLastOpTime(opCtx);
+ .setLastOpToSystemLastOpTimeIgnoringInterrupt(opCtx);
});
if (coordinatorDecisionFuture) {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 6388ef65ead..ab58970826a 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -1406,7 +1406,11 @@ StatusWith<Timestamp> WiredTigerRecordStore::getLatestOplogTimestamp(
WT_SESSION* sess = sessRaii->getSession();
WT_CURSOR* cursor;
invariantWTOK(sess->open_cursor(sess, _uri.c_str(), nullptr, nullptr, &cursor));
- invariantWTOK(cursor->prev(cursor));
+ int ret = cursor->prev(cursor);
+ if (ret == WT_NOTFOUND) {
+ return Status(ErrorCodes::CollectionIsEmpty, "oplog is empty");
+ }
+ invariantWTOK(ret);
RecordId recordId = getKey(cursor);
invariantWTOK(sess->reset(sess));
diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp
index 9d3ce294c02..f8010210e61 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.cpp
+++ b/src/mongo/embedded/replication_coordinator_embedded.cpp
@@ -496,5 +496,9 @@ std::shared_ptr<const repl::IsMasterResponse> ReplicationCoordinatorEmbedded::aw
UASSERT_NOT_IMPLEMENTED;
};
+OpTime ReplicationCoordinatorEmbedded::getLatestWriteOpTime(OperationContext* opCtx) const {
+ return getMyLastAppliedOpTime();
+}
+
} // namespace embedded
} // namespace mongo
diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h
index 732379e8423..eac2c5e97ae 100644
--- a/src/mongo/embedded/replication_coordinator_embedded.h
+++ b/src/mongo/embedded/replication_coordinator_embedded.h
@@ -267,6 +267,8 @@ public:
boost::optional<TopologyVersion> previous,
boost::optional<Date_t> deadline) const override;
+ repl::OpTime getLatestWriteOpTime(OperationContext* opCtx) const override;
+
private:
// Back pointer to the ServiceContext that has started the instance.
ServiceContext* const _service;