summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2017-10-20 17:03:18 -0400
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2017-10-23 17:54:51 -0400
commitaa95387d1a8fb475c267a3ca0e84a1b4a18a7ace (patch)
treef258c7c961e7577d35b49080cfe1abe660889ab9
parentdbe347e2ec54858a39a2b4c59d5812214b4b94cd (diff)
downloadmongo-aa95387d1a8fb475c267a3ca0e84a1b4a18a7ace.tar.gz
SERVER-31328 Don't fetch oplog entries for retries of executed inserts or deletes
-rw-r--r--src/mongo/db/ops/write_ops_exec.cpp18
-rw-r--r--src/mongo/db/ops/write_ops_retryability.cpp42
-rw-r--r--src/mongo/db/ops/write_ops_retryability.h10
-rw-r--r--src/mongo/db/ops/write_ops_retryability_test.cpp92
-rw-r--r--src/mongo/db/repl/sync_tail.cpp18
-rw-r--r--src/mongo/db/session.cpp5
-rw-r--r--src/mongo/db/session.h4
-rw-r--r--src/mongo/db/session_txn_record.h58
-rw-r--r--src/mongo/db/transaction_reaper.cpp11
9 files changed, 43 insertions, 215 deletions
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp
index d3a9813039b..4cf25eafbad 100644
--- a/src/mongo/db/ops/write_ops_exec.cpp
+++ b/src/mongo/db/ops/write_ops_exec.cpp
@@ -424,6 +424,13 @@ StmtId getStmtIdForWriteOp(OperationContext* opCtx, const T& wholeOp, size_t opI
: kUninitializedStmtId;
}
+SingleWriteResult makeWriteResultForInsertOrDeleteRetry() {
+ SingleWriteResult res;
+ res.setN(1);
+ res.setNModified(0);
+ return res;
+}
+
} // namespace
WriteResult performInserts(OperationContext* opCtx, const write_ops::Insert& wholeOp) {
@@ -482,9 +489,9 @@ WriteResult performInserts(OperationContext* opCtx, const write_ops::Insert& who
const auto stmtId = getStmtIdForWriteOp(opCtx, wholeOp, stmtIdIndex++);
if (opCtx->getTxnNumber()) {
auto session = OperationContextSession::get(opCtx);
- if (auto entry =
- session->checkStatementExecuted(opCtx, *opCtx->getTxnNumber(), stmtId)) {
- out.results.emplace_back(parseOplogEntryForInsert(*entry));
+ if (session->checkStatementExecutedNoOplogEntryFetch(*opCtx->getTxnNumber(),
+ stmtId)) {
+ out.results.emplace_back(makeWriteResultForInsertOrDeleteRetry());
continue;
}
}
@@ -758,9 +765,8 @@ WriteResult performDeletes(OperationContext* opCtx, const write_ops::Delete& who
const auto stmtId = getStmtIdForWriteOp(opCtx, wholeOp, stmtIdIndex++);
if (opCtx->getTxnNumber()) {
auto session = OperationContextSession::get(opCtx);
- if (auto entry =
- session->checkStatementExecuted(opCtx, *opCtx->getTxnNumber(), stmtId)) {
- out.results.emplace_back(parseOplogEntryForDelete(*entry));
+ if (session->checkStatementExecutedNoOplogEntryFetch(*opCtx->getTxnNumber(), stmtId)) {
+ out.results.emplace_back(makeWriteResultForInsertOrDeleteRetry());
continue;
}
}
diff --git a/src/mongo/db/ops/write_ops_retryability.cpp b/src/mongo/db/ops/write_ops_retryability.cpp
index 28ce5bed8be..10e5c8701a2 100644
--- a/src/mongo/db/ops/write_ops_retryability.cpp
+++ b/src/mongo/db/ops/write_ops_retryability.cpp
@@ -173,27 +173,6 @@ repl::OplogEntry getInnerNestedOplogEntry(const repl::OplogEntry& entry) {
} // namespace
-SingleWriteResult parseOplogEntryForInsert(const repl::OplogEntry& entry) {
- if (entry.getOpType() == repl::OpTypeEnum::kNoop) {
- return parseOplogEntryForInsert(getInnerNestedOplogEntry(entry));
- }
-
- uassert(40636,
- str::stream() << "insert retry request is not compatible with previous write in the "
- "transaction of type: "
- << OpType_serializer(entry.getOpType())
- << ", oplogTs: "
- << entry.getTimestamp().toString()
- << ", oplog: "
- << redact(entry.toBSON()),
- entry.getOpType() == repl::OpTypeEnum::kInsert);
-
- SingleWriteResult res;
- res.setN(1);
- res.setNModified(0);
- return res;
-}
-
SingleWriteResult parseOplogEntryForUpdate(const repl::OplogEntry& entry) {
SingleWriteResult res;
// Upserts are stored as inserts.
@@ -223,27 +202,6 @@ SingleWriteResult parseOplogEntryForUpdate(const repl::OplogEntry& entry) {
return res;
}
-SingleWriteResult parseOplogEntryForDelete(const repl::OplogEntry& entry) {
- if (entry.getOpType() == repl::OpTypeEnum::kNoop) {
- return parseOplogEntryForDelete(getInnerNestedOplogEntry(entry));
- }
-
- uassert(40637,
- str::stream() << "delete retry request is not compatible with previous write in the "
- "transaction of type: "
- << OpType_serializer(entry.getOpType())
- << ", oplogTs: "
- << entry.getTimestamp().toString()
- << ", oplog: "
- << redact(entry.toBSON()),
- entry.getOpType() == repl::OpTypeEnum::kDelete);
-
- SingleWriteResult res;
- res.setN(1);
- res.setNModified(0);
- return res;
-}
-
void parseOplogEntryForFindAndModify(OperationContext* opCtx,
const FindAndModifyRequest& request,
const repl::OplogEntry& oplogEntry,
diff --git a/src/mongo/db/ops/write_ops_retryability.h b/src/mongo/db/ops/write_ops_retryability.h
index fa73a4d34ed..55bd53bb804 100644
--- a/src/mongo/db/ops/write_ops_retryability.h
+++ b/src/mongo/db/ops/write_ops_retryability.h
@@ -39,14 +39,12 @@ class FindAndModifyRequest;
class OperationContext;
/**
- * Returns the single write result corresponding to the given oplog entry for insert, update, and
- * delete commands, i.e. the single write result that would have been returned by the statement that
- * would have resulted in the given oplog entry. The oplog entries are assumed to be properly
- * formed and have the correct op type.
+ * Returns the single write result corresponding to the given oplog entry for document update. I.e.,
+ * the single write result that would have been returned by the statement that would have resulted
+ * in the given oplog entry. The oplog entries are assumed to be properly formed and have the
+ * correct op type.
*/
-SingleWriteResult parseOplogEntryForInsert(const repl::OplogEntry& entry);
SingleWriteResult parseOplogEntryForUpdate(const repl::OplogEntry& entry);
-SingleWriteResult parseOplogEntryForDelete(const repl::OplogEntry& entry);
/**
* Populates the passed-in builder with the result of a findAndModify based on the oplog entries
diff --git a/src/mongo/db/ops/write_ops_retryability_test.cpp b/src/mongo/db/ops/write_ops_retryability_test.cpp
index 4c6268c8c7f..7bf0ff72673 100644
--- a/src/mongo/db/ops/write_ops_retryability_test.cpp
+++ b/src/mongo/db/ops/write_ops_retryability_test.cpp
@@ -48,42 +48,6 @@ const BSONObj kNestedOplog(BSON("$sessionMigrateInfo" << 1));
using WriteOpsRetryability = ServiceContextMongoDTest;
-TEST_F(WriteOpsRetryability, ParseOplogEntryForInsert) {
- const auto entry = assertGet(
- repl::OplogEntry::parse(BSON("ts" << Timestamp(50, 10) << "t" << 1LL << "h" << 0LL << "op"
- << "i"
- << "ns"
- << "a.b"
- << "o"
- << BSON("_id" << 1 << "x" << 5))));
-
- auto res = parseOplogEntryForInsert(entry);
-
- ASSERT_EQ(res.getN(), 1);
- ASSERT_EQ(res.getNModified(), 0);
- ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSONObj());
-}
-
-TEST_F(WriteOpsRetryability, ParseOplogEntryForNestedInsert) {
- repl::OplogEntry innerOplog(repl::OpTime(Timestamp(50, 10), 1),
- 0,
- repl::OpTypeEnum::kInsert,
- NamespaceString("a.b"),
- BSON("_id" << 2));
- repl::OplogEntry insertOplog(repl::OpTime(Timestamp(60, 10), 1),
- 0,
- repl::OpTypeEnum::kNoop,
- NamespaceString("a.b"),
- kNestedOplog,
- innerOplog.toBSON());
-
- auto res = parseOplogEntryForInsert(insertOplog);
-
- ASSERT_EQ(res.getN(), 1);
- ASSERT_EQ(res.getNModified(), 0);
- ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSONObj());
-}
-
TEST_F(WriteOpsRetryability, ParseOplogEntryForUpdate) {
const auto entry = assertGet(
repl::OplogEntry::parse(BSON("ts" << Timestamp(50, 10) << "t" << 1LL << "h" << 0LL << "op"
@@ -159,52 +123,6 @@ TEST_F(WriteOpsRetryability, ParseOplogEntryForNestedUpsert) {
ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSON("_id" << 2));
}
-TEST_F(WriteOpsRetryability, ParseOplogEntryForDelete) {
- const auto entry = assertGet(
- repl::OplogEntry::parse(BSON("ts" << Timestamp(50, 10) << "t" << 1LL << "h" << 0LL << "op"
- << "d"
- << "ns"
- << "a.b"
- << "o"
- << BSON("_id" << 1 << "x" << 5))));
-
- auto res = parseOplogEntryForDelete(entry);
-
- ASSERT_EQ(res.getN(), 1);
- ASSERT_EQ(res.getNModified(), 0);
- ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSONObj());
-}
-
-TEST_F(WriteOpsRetryability, ParseOplogEntryForNestedDelete) {
- repl::OplogEntry innerOplog(repl::OpTime(Timestamp(50, 10), 1),
- 0,
- repl::OpTypeEnum::kDelete,
- NamespaceString("a.b"),
- BSON("_id" << 2));
- repl::OplogEntry deleteOplog(repl::OpTime(Timestamp(60, 10), 1),
- 0,
- repl::OpTypeEnum::kNoop,
- NamespaceString("a.b"),
- kNestedOplog,
- innerOplog.toBSON());
-
- auto res = parseOplogEntryForDelete(deleteOplog);
-
- ASSERT_EQ(res.getN(), 1);
- ASSERT_EQ(res.getNModified(), 0);
- ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSONObj());
-}
-
-TEST_F(WriteOpsRetryability, ShouldFailIfParsingDeleteOplogForInsert) {
- repl::OplogEntry deleteOplog(repl::OpTime(Timestamp(50, 10), 1),
- 0,
- repl::OpTypeEnum::kDelete,
- NamespaceString("a.b"),
- BSON("_id" << 2));
-
- ASSERT_THROWS(parseOplogEntryForInsert(deleteOplog), AssertionException);
-}
-
TEST_F(WriteOpsRetryability, ShouldFailIfParsingDeleteOplogForUpdate) {
repl::OplogEntry deleteOplog(repl::OpTime(Timestamp(50, 10), 1),
0,
@@ -215,16 +133,6 @@ TEST_F(WriteOpsRetryability, ShouldFailIfParsingDeleteOplogForUpdate) {
ASSERT_THROWS(parseOplogEntryForUpdate(deleteOplog), AssertionException);
}
-TEST_F(WriteOpsRetryability, ShouldFailIfParsingInsertOplogForDelete) {
- repl::OplogEntry insertOplog(repl::OpTime(Timestamp(50, 10), 1),
- 0,
- repl::OpTypeEnum::kInsert,
- NamespaceString("a.b"),
- BSON("_id" << 2));
-
- ASSERT_THROWS(parseOplogEntryForDelete(insertOplog), AssertionException);
-}
-
class FindAndModifyRetryability : public MockReplCoordServerFixture {
public:
FindAndModifyRetryability() = default;
diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp
index 89f4ee11448..25295eb133d 100644
--- a/src/mongo/db/repl/sync_tail.cpp
+++ b/src/mongo/db/repl/sync_tail.cpp
@@ -69,7 +69,7 @@
#include "mongo/db/server_parameters.h"
#include "mongo/db/service_context.h"
#include "mongo/db/session.h"
-#include "mongo/db/session_txn_record.h"
+#include "mongo/db/session_txn_record_gen.h"
#include "mongo/db/stats/timer_stats.h"
#include "mongo/stdx/memory.h"
#include "mongo/util/exit.h"
@@ -552,6 +552,19 @@ void scheduleTxnTableUpdates(OperationContext* opCtx,
}
/**
+ * A session txn record is greater (i.e. later) than another if its transaction number is greater,
+ * or if its transaction number is the same, and its last write optime is greater.
+ *
+ * Records can only be compared meaningfully if they are for the same session id.
+ */
+bool isSessionTxnRecordLaterThan(const SessionTxnRecord& lhs, const SessionTxnRecord& rhs) {
+ invariant(lhs.getSessionId() == rhs.getSessionId());
+
+ return (lhs.getTxnNum() > rhs.getTxnNum()) ||
+ (lhs.getTxnNum() == rhs.getTxnNum() && lhs.getLastWriteOpTime() > rhs.getLastWriteOpTime());
+}
+
+/**
* Caches per-collection properties which are relevant for oplog application, so that they don't
* have to be retrieved repeatedly for each op.
*/
@@ -612,6 +625,7 @@ void fillWriterVectorsAndLastestSessionRecords(
SessionRecordMap* latestSessionRecords) {
const auto serviceContext = opCtx->getServiceContext();
const auto storageEngine = serviceContext->getGlobalStorageEngine();
+
const bool supportsDocLocking = storageEngine->supportsDocLocking();
const uint32_t numWriters = writerVectors->size();
@@ -656,7 +670,7 @@ void fillWriterVectorsAndLastestSessionRecords(
auto it = latestSessionRecords->find(lsid);
if (it == latestSessionRecords->end()) {
latestSessionRecords->emplace(lsid, std::move(record));
- } else if (record > it->second) {
+ } else if (isSessionTxnRecordLaterThan(record, it->second)) {
(*latestSessionRecords)[lsid] = std::move(record);
}
}
diff --git a/src/mongo/db/session.cpp b/src/mongo/db/session.cpp
index c2ac4731b87..7c66713b13c 100644
--- a/src/mongo/db/session.cpp
+++ b/src/mongo/db/session.cpp
@@ -267,6 +267,11 @@ boost::optional<repl::OplogEntry> Session::checkStatementExecuted(OperationConte
MONGO_UNREACHABLE;
}
+bool Session::checkStatementExecutedNoOplogEntryFetch(TxnNumber txnNumber, StmtId stmtId) const {
+ stdx::lock_guard<stdx::mutex> lg(_mutex);
+ return bool(_checkStatementExecuted(lg, txnNumber, stmtId));
+}
+
void Session::_beginTxn(WithLock wl, TxnNumber txnNumber) {
_checkValid(wl);
diff --git a/src/mongo/db/session.h b/src/mongo/db/session.h
index 21d8d618a9a..ffc0b7b59f5 100644
--- a/src/mongo/db/session.h
+++ b/src/mongo/db/session.h
@@ -34,7 +34,7 @@
#include "mongo/bson/timestamp.h"
#include "mongo/db/logical_session_id.h"
#include "mongo/db/repl/oplog_entry.h"
-#include "mongo/db/session_txn_record.h"
+#include "mongo/db/session_txn_record_gen.h"
#include "mongo/stdx/unordered_map.h"
#include "mongo/util/concurrency/with_lock.h"
@@ -135,6 +135,8 @@ public:
TxnNumber txnNumber,
StmtId stmtId) const;
+ bool checkStatementExecutedNoOplogEntryFetch(TxnNumber txnNumber, StmtId stmtId) const;
+
private:
void _beginTxn(WithLock, TxnNumber txnNumber);
diff --git a/src/mongo/db/session_txn_record.h b/src/mongo/db/session_txn_record.h
deleted file mode 100644
index 332f95c5ef5..00000000000
--- a/src/mongo/db/session_txn_record.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Copyright (C) 2017 MongoDB Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License, version 3,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the GNU Affero General Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/logical_session_id.h"
-#include "mongo/db/repl/optime.h"
-#include "mongo/db/session_txn_record_gen.h"
-
-namespace mongo {
-
-inline bool operator==(const SessionTxnRecord& lhs, const SessionTxnRecord& rhs) {
- return (lhs.getSessionId() == rhs.getSessionId()) && (lhs.getTxnNum() == rhs.getTxnNum()) &&
- (lhs.getLastWriteOpTime() == rhs.getLastWriteOpTime());
-}
-
-inline bool operator!=(const SessionTxnRecord& lhs, const SessionTxnRecord& rhs) {
- return !(lhs == rhs);
-}
-
-/**
- * A record is greater (i.e. later) than another if its transaction number is greater, or if its
- * transaction number is the same, and its last write optime is greater. Records can only be
- * compared meaningfully for the same session id.
- */
-inline bool operator>(const SessionTxnRecord& lhs, const SessionTxnRecord& rhs) {
- invariant(lhs.getSessionId() == rhs.getSessionId());
-
- return (lhs.getTxnNum() > rhs.getTxnNum()) ||
- (lhs.getTxnNum() == rhs.getTxnNum() && lhs.getLastWriteOpTime() > rhs.getLastWriteOpTime());
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/transaction_reaper.cpp b/src/mongo/db/transaction_reaper.cpp
index efdbb4bc3d2..d8203bc8e3f 100644
--- a/src/mongo/db/transaction_reaper.cpp
+++ b/src/mongo/db/transaction_reaper.cpp
@@ -38,7 +38,7 @@
#include "mongo/db/ops/write_ops.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/server_parameters.h"
-#include "mongo/db/session_txn_record.h"
+#include "mongo/db/session_txn_record_gen.h"
#include "mongo/db/sessions_collection.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/client/shard.h"
@@ -85,16 +85,11 @@ const auto kLastWriteTimestampFieldName =
* to pull records likely to be on the same chunks (because they sort near each other).
*/
Query makeQuery(Date_t now) {
- Timestamp possiblyExpired(
+ const Timestamp possiblyExpired(
duration_cast<Seconds>(
(now - Minutes(TransactionRecordMinimumLifetimeMinutes)).toDurationSinceEpoch()),
0);
- BSONObjBuilder bob;
- {
- BSONObjBuilder subbob(bob.subobjStart(kLastWriteTimestampFieldName));
- subbob.append("$lt", possiblyExpired);
- }
- Query query(bob.obj());
+ Query query(BSON(kLastWriteTimestampFieldName << LT << possiblyExpired));
query.sort(kSortById);
return query;
}