diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2017-10-20 17:03:18 -0400 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2017-10-23 17:54:51 -0400 |
commit | aa95387d1a8fb475c267a3ca0e84a1b4a18a7ace (patch) | |
tree | f258c7c961e7577d35b49080cfe1abe660889ab9 | |
parent | dbe347e2ec54858a39a2b4c59d5812214b4b94cd (diff) | |
download | mongo-aa95387d1a8fb475c267a3ca0e84a1b4a18a7ace.tar.gz |
SERVER-31328 Don't fetch oplog entries for retries of executed inserts or deletes
-rw-r--r-- | src/mongo/db/ops/write_ops_exec.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/ops/write_ops_retryability.cpp | 42 | ||||
-rw-r--r-- | src/mongo/db/ops/write_ops_retryability.h | 10 | ||||
-rw-r--r-- | src/mongo/db/ops/write_ops_retryability_test.cpp | 92 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_tail.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/session.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/session.h | 4 | ||||
-rw-r--r-- | src/mongo/db/session_txn_record.h | 58 | ||||
-rw-r--r-- | src/mongo/db/transaction_reaper.cpp | 11 |
9 files changed, 43 insertions, 215 deletions
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp index d3a9813039b..4cf25eafbad 100644 --- a/src/mongo/db/ops/write_ops_exec.cpp +++ b/src/mongo/db/ops/write_ops_exec.cpp @@ -424,6 +424,13 @@ StmtId getStmtIdForWriteOp(OperationContext* opCtx, const T& wholeOp, size_t opI : kUninitializedStmtId; } +SingleWriteResult makeWriteResultForInsertOrDeleteRetry() { + SingleWriteResult res; + res.setN(1); + res.setNModified(0); + return res; +} + } // namespace WriteResult performInserts(OperationContext* opCtx, const write_ops::Insert& wholeOp) { @@ -482,9 +489,9 @@ WriteResult performInserts(OperationContext* opCtx, const write_ops::Insert& who const auto stmtId = getStmtIdForWriteOp(opCtx, wholeOp, stmtIdIndex++); if (opCtx->getTxnNumber()) { auto session = OperationContextSession::get(opCtx); - if (auto entry = - session->checkStatementExecuted(opCtx, *opCtx->getTxnNumber(), stmtId)) { - out.results.emplace_back(parseOplogEntryForInsert(*entry)); + if (session->checkStatementExecutedNoOplogEntryFetch(*opCtx->getTxnNumber(), + stmtId)) { + out.results.emplace_back(makeWriteResultForInsertOrDeleteRetry()); continue; } } @@ -758,9 +765,8 @@ WriteResult performDeletes(OperationContext* opCtx, const write_ops::Delete& who const auto stmtId = getStmtIdForWriteOp(opCtx, wholeOp, stmtIdIndex++); if (opCtx->getTxnNumber()) { auto session = OperationContextSession::get(opCtx); - if (auto entry = - session->checkStatementExecuted(opCtx, *opCtx->getTxnNumber(), stmtId)) { - out.results.emplace_back(parseOplogEntryForDelete(*entry)); + if (session->checkStatementExecutedNoOplogEntryFetch(*opCtx->getTxnNumber(), stmtId)) { + out.results.emplace_back(makeWriteResultForInsertOrDeleteRetry()); continue; } } diff --git a/src/mongo/db/ops/write_ops_retryability.cpp b/src/mongo/db/ops/write_ops_retryability.cpp index 28ce5bed8be..10e5c8701a2 100644 --- a/src/mongo/db/ops/write_ops_retryability.cpp +++ b/src/mongo/db/ops/write_ops_retryability.cpp @@ -173,27 +173,6 @@ repl::OplogEntry getInnerNestedOplogEntry(const repl::OplogEntry& entry) { } // namespace -SingleWriteResult parseOplogEntryForInsert(const repl::OplogEntry& entry) { - if (entry.getOpType() == repl::OpTypeEnum::kNoop) { - return parseOplogEntryForInsert(getInnerNestedOplogEntry(entry)); - } - - uassert(40636, - str::stream() << "insert retry request is not compatible with previous write in the " - "transaction of type: " - << OpType_serializer(entry.getOpType()) - << ", oplogTs: " - << entry.getTimestamp().toString() - << ", oplog: " - << redact(entry.toBSON()), - entry.getOpType() == repl::OpTypeEnum::kInsert); - - SingleWriteResult res; - res.setN(1); - res.setNModified(0); - return res; -} - SingleWriteResult parseOplogEntryForUpdate(const repl::OplogEntry& entry) { SingleWriteResult res; // Upserts are stored as inserts. @@ -223,27 +202,6 @@ SingleWriteResult parseOplogEntryForUpdate(const repl::OplogEntry& entry) { return res; } -SingleWriteResult parseOplogEntryForDelete(const repl::OplogEntry& entry) { - if (entry.getOpType() == repl::OpTypeEnum::kNoop) { - return parseOplogEntryForDelete(getInnerNestedOplogEntry(entry)); - } - - uassert(40637, - str::stream() << "delete retry request is not compatible with previous write in the " - "transaction of type: " - << OpType_serializer(entry.getOpType()) - << ", oplogTs: " - << entry.getTimestamp().toString() - << ", oplog: " - << redact(entry.toBSON()), - entry.getOpType() == repl::OpTypeEnum::kDelete); - - SingleWriteResult res; - res.setN(1); - res.setNModified(0); - return res; -} - void parseOplogEntryForFindAndModify(OperationContext* opCtx, const FindAndModifyRequest& request, const repl::OplogEntry& oplogEntry, diff --git a/src/mongo/db/ops/write_ops_retryability.h b/src/mongo/db/ops/write_ops_retryability.h index fa73a4d34ed..55bd53bb804 100644 --- a/src/mongo/db/ops/write_ops_retryability.h +++ b/src/mongo/db/ops/write_ops_retryability.h @@ -39,14 +39,12 @@ class FindAndModifyRequest; class OperationContext; /** - * Returns the single write result corresponding to the given oplog entry for insert, update, and - * delete commands, i.e. the single write result that would have been returned by the statement that - * would have resulted in the given oplog entry. The oplog entries are assumed to be properly - * formed and have the correct op type. + * Returns the single write result corresponding to the given oplog entry for document update. I.e., + * the single write result that would have been returned by the statement that would have resulted + * in the given oplog entry. The oplog entries are assumed to be properly formed and have the + * correct op type. */ -SingleWriteResult parseOplogEntryForInsert(const repl::OplogEntry& entry); SingleWriteResult parseOplogEntryForUpdate(const repl::OplogEntry& entry); -SingleWriteResult parseOplogEntryForDelete(const repl::OplogEntry& entry); /** * Populates the passed-in builder with the result of a findAndModify based on the oplog entries diff --git a/src/mongo/db/ops/write_ops_retryability_test.cpp b/src/mongo/db/ops/write_ops_retryability_test.cpp index 4c6268c8c7f..7bf0ff72673 100644 --- a/src/mongo/db/ops/write_ops_retryability_test.cpp +++ b/src/mongo/db/ops/write_ops_retryability_test.cpp @@ -48,42 +48,6 @@ const BSONObj kNestedOplog(BSON("$sessionMigrateInfo" << 1)); using WriteOpsRetryability = ServiceContextMongoDTest; -TEST_F(WriteOpsRetryability, ParseOplogEntryForInsert) { - const auto entry = assertGet( - repl::OplogEntry::parse(BSON("ts" << Timestamp(50, 10) << "t" << 1LL << "h" << 0LL << "op" - << "i" - << "ns" - << "a.b" - << "o" - << BSON("_id" << 1 << "x" << 5)))); - - auto res = parseOplogEntryForInsert(entry); - - ASSERT_EQ(res.getN(), 1); - ASSERT_EQ(res.getNModified(), 0); - ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSONObj()); -} - -TEST_F(WriteOpsRetryability, ParseOplogEntryForNestedInsert) { - repl::OplogEntry innerOplog(repl::OpTime(Timestamp(50, 10), 1), - 0, - repl::OpTypeEnum::kInsert, - NamespaceString("a.b"), - BSON("_id" << 2)); - repl::OplogEntry insertOplog(repl::OpTime(Timestamp(60, 10), 1), - 0, - repl::OpTypeEnum::kNoop, - NamespaceString("a.b"), - kNestedOplog, - innerOplog.toBSON()); - - auto res = parseOplogEntryForInsert(insertOplog); - - ASSERT_EQ(res.getN(), 1); - ASSERT_EQ(res.getNModified(), 0); - ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSONObj()); -} - TEST_F(WriteOpsRetryability, ParseOplogEntryForUpdate) { const auto entry = assertGet( repl::OplogEntry::parse(BSON("ts" << Timestamp(50, 10) << "t" << 1LL << "h" << 0LL << "op" @@ -159,52 +123,6 @@ TEST_F(WriteOpsRetryability, ParseOplogEntryForNestedUpsert) { ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSON("_id" << 2)); } -TEST_F(WriteOpsRetryability, ParseOplogEntryForDelete) { - const auto entry = assertGet( - repl::OplogEntry::parse(BSON("ts" << Timestamp(50, 10) << "t" << 1LL << "h" << 0LL << "op" - << "d" - << "ns" - << "a.b" - << "o" - << BSON("_id" << 1 << "x" << 5)))); - - auto res = parseOplogEntryForDelete(entry); - - ASSERT_EQ(res.getN(), 1); - ASSERT_EQ(res.getNModified(), 0); - ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSONObj()); -} - -TEST_F(WriteOpsRetryability, ParseOplogEntryForNestedDelete) { - repl::OplogEntry innerOplog(repl::OpTime(Timestamp(50, 10), 1), - 0, - repl::OpTypeEnum::kDelete, - NamespaceString("a.b"), - BSON("_id" << 2)); - repl::OplogEntry deleteOplog(repl::OpTime(Timestamp(60, 10), 1), - 0, - repl::OpTypeEnum::kNoop, - NamespaceString("a.b"), - kNestedOplog, - innerOplog.toBSON()); - - auto res = parseOplogEntryForDelete(deleteOplog); - - ASSERT_EQ(res.getN(), 1); - ASSERT_EQ(res.getNModified(), 0); - ASSERT_BSONOBJ_EQ(res.getUpsertedId(), BSONObj()); -} - -TEST_F(WriteOpsRetryability, ShouldFailIfParsingDeleteOplogForInsert) { - repl::OplogEntry deleteOplog(repl::OpTime(Timestamp(50, 10), 1), - 0, - repl::OpTypeEnum::kDelete, - NamespaceString("a.b"), - BSON("_id" << 2)); - - ASSERT_THROWS(parseOplogEntryForInsert(deleteOplog), AssertionException); -} - TEST_F(WriteOpsRetryability, ShouldFailIfParsingDeleteOplogForUpdate) { repl::OplogEntry deleteOplog(repl::OpTime(Timestamp(50, 10), 1), 0, @@ -215,16 +133,6 @@ TEST_F(WriteOpsRetryability, ShouldFailIfParsingDeleteOplogForUpdate) { ASSERT_THROWS(parseOplogEntryForUpdate(deleteOplog), AssertionException); } -TEST_F(WriteOpsRetryability, ShouldFailIfParsingInsertOplogForDelete) { - repl::OplogEntry insertOplog(repl::OpTime(Timestamp(50, 10), 1), - 0, - repl::OpTypeEnum::kInsert, - NamespaceString("a.b"), - BSON("_id" << 2)); - - ASSERT_THROWS(parseOplogEntryForDelete(insertOplog), AssertionException); -} - class FindAndModifyRetryability : public MockReplCoordServerFixture { public: FindAndModifyRetryability() = default; diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp index 89f4ee11448..25295eb133d 100644 --- a/src/mongo/db/repl/sync_tail.cpp +++ b/src/mongo/db/repl/sync_tail.cpp @@ -69,7 +69,7 @@ #include "mongo/db/server_parameters.h" #include "mongo/db/service_context.h" #include "mongo/db/session.h" -#include "mongo/db/session_txn_record.h" +#include "mongo/db/session_txn_record_gen.h" #include "mongo/db/stats/timer_stats.h" #include "mongo/stdx/memory.h" #include "mongo/util/exit.h" @@ -552,6 +552,19 @@ void scheduleTxnTableUpdates(OperationContext* opCtx, } /** + * A session txn record is greater (i.e. later) than another if its transaction number is greater, + * or if its transaction number is the same, and its last write optime is greater. + * + * Records can only be compared meaningfully if they are for the same session id. + */ +bool isSessionTxnRecordLaterThan(const SessionTxnRecord& lhs, const SessionTxnRecord& rhs) { + invariant(lhs.getSessionId() == rhs.getSessionId()); + + return (lhs.getTxnNum() > rhs.getTxnNum()) || + (lhs.getTxnNum() == rhs.getTxnNum() && lhs.getLastWriteOpTime() > rhs.getLastWriteOpTime()); +} + +/** * Caches per-collection properties which are relevant for oplog application, so that they don't * have to be retrieved repeatedly for each op. */ @@ -612,6 +625,7 @@ void fillWriterVectorsAndLastestSessionRecords( SessionRecordMap* latestSessionRecords) { const auto serviceContext = opCtx->getServiceContext(); const auto storageEngine = serviceContext->getGlobalStorageEngine(); + const bool supportsDocLocking = storageEngine->supportsDocLocking(); const uint32_t numWriters = writerVectors->size(); @@ -656,7 +670,7 @@ void fillWriterVectorsAndLastestSessionRecords( auto it = latestSessionRecords->find(lsid); if (it == latestSessionRecords->end()) { latestSessionRecords->emplace(lsid, std::move(record)); - } else if (record > it->second) { + } else if (isSessionTxnRecordLaterThan(record, it->second)) { (*latestSessionRecords)[lsid] = std::move(record); } } diff --git a/src/mongo/db/session.cpp b/src/mongo/db/session.cpp index c2ac4731b87..7c66713b13c 100644 --- a/src/mongo/db/session.cpp +++ b/src/mongo/db/session.cpp @@ -267,6 +267,11 @@ boost::optional<repl::OplogEntry> Session::checkStatementExecuted(OperationConte MONGO_UNREACHABLE; } +bool Session::checkStatementExecutedNoOplogEntryFetch(TxnNumber txnNumber, StmtId stmtId) const { + stdx::lock_guard<stdx::mutex> lg(_mutex); + return bool(_checkStatementExecuted(lg, txnNumber, stmtId)); +} + void Session::_beginTxn(WithLock wl, TxnNumber txnNumber) { _checkValid(wl); diff --git a/src/mongo/db/session.h b/src/mongo/db/session.h index 21d8d618a9a..ffc0b7b59f5 100644 --- a/src/mongo/db/session.h +++ b/src/mongo/db/session.h @@ -34,7 +34,7 @@ #include "mongo/bson/timestamp.h" #include "mongo/db/logical_session_id.h" #include "mongo/db/repl/oplog_entry.h" -#include "mongo/db/session_txn_record.h" +#include "mongo/db/session_txn_record_gen.h" #include "mongo/stdx/unordered_map.h" #include "mongo/util/concurrency/with_lock.h" @@ -135,6 +135,8 @@ public: TxnNumber txnNumber, StmtId stmtId) const; + bool checkStatementExecutedNoOplogEntryFetch(TxnNumber txnNumber, StmtId stmtId) const; + private: void _beginTxn(WithLock, TxnNumber txnNumber); diff --git a/src/mongo/db/session_txn_record.h b/src/mongo/db/session_txn_record.h deleted file mode 100644 index 332f95c5ef5..00000000000 --- a/src/mongo/db/session_txn_record.h +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright (C) 2017 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/db/logical_session_id.h" -#include "mongo/db/repl/optime.h" -#include "mongo/db/session_txn_record_gen.h" - -namespace mongo { - -inline bool operator==(const SessionTxnRecord& lhs, const SessionTxnRecord& rhs) { - return (lhs.getSessionId() == rhs.getSessionId()) && (lhs.getTxnNum() == rhs.getTxnNum()) && - (lhs.getLastWriteOpTime() == rhs.getLastWriteOpTime()); -} - -inline bool operator!=(const SessionTxnRecord& lhs, const SessionTxnRecord& rhs) { - return !(lhs == rhs); -} - -/** - * A record is greater (i.e. later) than another if its transaction number is greater, or if its - * transaction number is the same, and its last write optime is greater. Records can only be - * compared meaningfully for the same session id. - */ -inline bool operator>(const SessionTxnRecord& lhs, const SessionTxnRecord& rhs) { - invariant(lhs.getSessionId() == rhs.getSessionId()); - - return (lhs.getTxnNum() > rhs.getTxnNum()) || - (lhs.getTxnNum() == rhs.getTxnNum() && lhs.getLastWriteOpTime() > rhs.getLastWriteOpTime()); -} - -} // namespace mongo diff --git a/src/mongo/db/transaction_reaper.cpp b/src/mongo/db/transaction_reaper.cpp index efdbb4bc3d2..d8203bc8e3f 100644 --- a/src/mongo/db/transaction_reaper.cpp +++ b/src/mongo/db/transaction_reaper.cpp @@ -38,7 +38,7 @@ #include "mongo/db/ops/write_ops.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/server_parameters.h" -#include "mongo/db/session_txn_record.h" +#include "mongo/db/session_txn_record_gen.h" #include "mongo/db/sessions_collection.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/client/shard.h" @@ -85,16 +85,11 @@ const auto kLastWriteTimestampFieldName = * to pull records likely to be on the same chunks (because they sort near each other). */ Query makeQuery(Date_t now) { - Timestamp possiblyExpired( + const Timestamp possiblyExpired( duration_cast<Seconds>( (now - Minutes(TransactionRecordMinimumLifetimeMinutes)).toDurationSinceEpoch()), 0); - BSONObjBuilder bob; - { - BSONObjBuilder subbob(bob.subobjStart(kLastWriteTimestampFieldName)); - subbob.append("$lt", possiblyExpired); - } - Query query(bob.obj()); + Query query(BSON(kLastWriteTimestampFieldName << LT << possiblyExpired)); query.sort(kSortById); return query; } |