diff options
author | Daniel Gottlieb <daniel.gottlieb@10gen.com> | 2017-03-29 10:37:01 -0400 |
---|---|---|
committer | Daniel Gottlieb <daniel.gottlieb@10gen.com> | 2017-03-29 11:58:39 -0400 |
commit | bc1a7d9aa34f3c7e61f89afa481e677e4981437d (patch) | |
tree | 24b74d999d802565db6376491a588d4d6018567e | |
parent | 83007991e9eaba391988c4161fe8f68352c57939 (diff) | |
download | mongo-bc1a7d9aa34f3c7e61f89afa481e677e4981437d.tar.gz |
SERVER-26452: WCE retry on renameCollection. Address possible dassert in dropIndex.
Add a write conflict retry loop around the fast-path for renameCollection,
when the source and target collection are in the same database.
Additionally, when indexes are being dropped, remove them from the
collection info cache before removing them from the catalog
(`_deleteFromDisk`). Removing an index from a catalog can result in
a WriteConflictException that triggers the rollback on the
IndexRemoveChange. The rollback adds the index back into the
collection info cache, which dasserts the index does not exist.
Lastly, this patch separates out the WTWriteConflictException failpoint
into one for modifications (WT_CUSOR::insert/remove etc...) and one
for reads (WT_CURSOR::search/next etc...). The new failpoint is named
`WTWriteConflictExceptionForReads`. Commands that only roll a die for
the read failpoint include `listCollections` and `validate`. This
distinction is useful for resmoke/js tests that are not resilent to
those commands failing with a write conflict.
-rw-r--r-- | src/mongo/db/catalog/index_catalog.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/catalog/rename_collection.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h | 10 |
5 files changed, 35 insertions, 23 deletions
diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp index faf7ad134e1..382c060bdbd 100644 --- a/src/mongo/db/catalog/index_catalog.cpp +++ b/src/mongo/db/catalog/index_catalog.cpp @@ -973,12 +973,12 @@ Status IndexCatalog::_dropIndex(OperationContext* opCtx, IndexCatalogEntry* entr invariant(_entries.release(entry->descriptor()) == entry); opCtx->recoveryUnit()->registerChange( new IndexRemoveChange(opCtx, _collection, &_entries, entry)); + _collection->infoCache()->droppedIndex(opCtx, indexName); entry = NULL; _deleteIndexFromDisk(opCtx, indexName, indexNamespace); _checkMagic(); - _collection->infoCache()->droppedIndex(opCtx, indexName); return Status::OK(); } diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp index b0c5e6b3aa0..d3b93f3ec2b 100644 --- a/src/mongo/db/catalog/rename_collection.cpp +++ b/src/mongo/db/catalog/rename_collection.cpp @@ -41,6 +41,8 @@ #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/catalog/index_create.h" #include "mongo/db/client.h" +#include "mongo/db/concurrency/write_conflict_exception.h" +#include "mongo/db/curop.h" #include "mongo/db/db_raii.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index_builder.h" @@ -124,7 +126,7 @@ Status renameCollection(OperationContext* opCtx, Database* const targetDB = dbHolder().openDb(opCtx, target.db()); - { + MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { WriteUnitOfWork wunit(opCtx); // Check if the target namespace exists and if dropTarget is true. @@ -165,6 +167,7 @@ Status renameCollection(OperationContext* opCtx, wunit.commit(); } + MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, "renameCollection", target.ns()); // If we get here, we are renaming across databases, so we must copy all the data and // indexes, then remove the source collection. diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp index eb91754ab9f..05bafb50c96 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp @@ -374,7 +374,7 @@ bool WiredTigerIndex::isEmpty(OperationContext* opCtx) { WT_CURSOR* c = curwrap.get(); if (!c) return true; - int ret = WT_OP_CHECK(c->next(c)); + int ret = WT_READ_CHECK(c->next(c)); if (ret == WT_NOTFOUND) return true; invariantWTOK(ret); @@ -435,7 +435,7 @@ bool WiredTigerIndex::isDup(WT_CURSOR* c, const BSONObj& key, const RecordId& id KeyString data(keyStringVersion(), key, _ordering); WiredTigerItem item(data.getBuffer(), data.getSize()); c->set_key(c, item.Get()); - int ret = WT_OP_CHECK(c->search(c)); + int ret = WT_READ_CHECK(c->search(c)); if (ret == WT_NOTFOUND) { return false; } @@ -820,7 +820,7 @@ protected: void advanceWTCursor() { WT_CURSOR* c = _cursor->get(); - int ret = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c)); + int ret = WT_READ_CHECK(_forward ? c->next(c) : c->prev(c)); if (ret == WT_NOTFOUND) { _cursorAtEof = true; return; @@ -837,7 +837,7 @@ protected: const WiredTigerItem keyItem(query.getBuffer(), query.getSize()); c->set_key(c, keyItem.Get()); - int ret = WT_OP_CHECK(c->search_near(c, &cmp)); + int ret = WT_READ_CHECK(c->search_near(c, &cmp)); if (ret == WT_NOTFOUND) { _cursorAtEof = true; TRACE_CURSOR << "\t not found"; @@ -989,7 +989,7 @@ public: c->set_key(c, keyItem.Get()); // Using search rather than search_near. - int ret = WT_OP_CHECK(c->search(c)); + int ret = WT_READ_CHECK(c->search(c)); if (ret != WT_NOTFOUND) invariantWTOK(ret); _cursorAtEof = ret == WT_NOTFOUND; @@ -1040,7 +1040,7 @@ Status WiredTigerIndexUnique::_insert(WT_CURSOR* c, // we put them all in the "list" // Note that we can't omit AllZeros when there are multiple ids for a value. When we remove // down to a single value, it will be cleaned up. - ret = WT_OP_CHECK(c->search(c)); + ret = WT_READ_CHECK(c->search(c)); invariantWTOK(ret); WT_ITEM old; @@ -1100,7 +1100,7 @@ void WiredTigerIndexUnique::_unindex(WT_CURSOR* c, // dups are allowed, so we have to deal with a vector of RecordIds. - int ret = WT_OP_CHECK(c->search(c)); + int ret = WT_READ_CHECK(c->search(c)); if (ret == WT_NOTFOUND) { // WT_NOTFOUND is only expected during a background index build. Insert a dummy value and // delete it again to trigger a write conflict in case this is being concurrently indexed by diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index 46dadad3467..593fd2d8bca 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -91,6 +91,7 @@ bool shouldUseOplogHack(OperationContext* opCtx, const std::string& uri) { } // namespace MONGO_FP_DECLARE(WTWriteConflictException); +MONGO_FP_DECLARE(WTWriteConflictExceptionForReads); MONGO_FP_DECLARE(WTPausePrimaryOplogDurabilityLoop); const std::string kWiredTigerEngineName = "wiredTiger"; @@ -453,7 +454,7 @@ public: // Nothing after the next line can throw WCEs. // Note that an unpositioned (or eof) WT_CURSOR returns the first/last entry in the // table when you call next/prev. - int advanceRet = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c)); + int advanceRet = WT_READ_CHECK(_forward ? c->next(c) : c->prev(c)); if (advanceRet == WT_NOTFOUND) { _eof = true; return {}; @@ -492,7 +493,7 @@ public: WT_CURSOR* c = _cursor->get(); c->set_key(c, _makeKey(id)); // Nothing after the next line can throw WCEs. - int seekRet = WT_OP_CHECK(c->search(c)); + int seekRet = WT_READ_CHECK(c->search(c)); if (seekRet == WT_NOTFOUND) { _eof = true; return {}; @@ -541,7 +542,7 @@ public: c->set_key(c, _makeKey(_lastReturnedId)); int cmp; - int ret = WT_OP_CHECK(c->search_near(c, &cmp)); + int ret = WT_READ_CHECK(c->search_near(c, &cmp)); if (ret == WT_NOTFOUND) { _eof = true; return !_rs._isCapped; @@ -645,7 +646,7 @@ public: } boost::optional<Record> next() final { - int advanceRet = WT_OP_CHECK(_cursor->next(_cursor)); + int advanceRet = WT_READ_CHECK(_cursor->next(_cursor)); if (advanceRet == WT_NOTFOUND) return {}; invariantWTOK(advanceRet); @@ -945,7 +946,7 @@ RecordData WiredTigerRecordStore::dataFor(OperationContext* opCtx, const RecordI WT_CURSOR* c = curwrap.get(); invariant(c); c->set_key(c, _makeKey(id)); - int ret = WT_OP_CHECK(c->search(c)); + int ret = WT_READ_CHECK(c->search(c)); massert(28556, "Didn't find RecordId in WiredTigerRecordStore", ret != WT_NOTFOUND); invariantWTOK(ret); return _getData(curwrap); @@ -958,7 +959,7 @@ bool WiredTigerRecordStore::findRecord(OperationContext* opCtx, WT_CURSOR* c = curwrap.get(); invariant(c); c->set_key(c, _makeKey(id)); - int ret = WT_OP_CHECK(c->search(c)); + int ret = WT_READ_CHECK(c->search(c)); if (ret == WT_NOTFOUND) { return false; } @@ -976,7 +977,7 @@ void WiredTigerRecordStore::deleteRecord(OperationContext* opCtx, const RecordId cursor.assertInActiveTxn(); WT_CURSOR* c = cursor.get(); c->set_key(c, _makeKey(id)); - int ret = WT_OP_CHECK(c->search(c)); + int ret = WT_READ_CHECK(c->search(c)); invariantWTOK(ret); WT_ITEM old_value; @@ -1083,7 +1084,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* opC if (_cappedFirstRecord != RecordId()) { int64_t key = _makeKey(_cappedFirstRecord); truncateEnd->set_key(truncateEnd, key); - ret = WT_OP_CHECK(truncateEnd->search(truncateEnd)); + ret = WT_READ_CHECK(truncateEnd->search(truncateEnd)); if (ret == 0) { positioned = true; savedFirstKey = key; @@ -1092,7 +1093,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* opC // Advance the cursor truncateEnd until we find a suitable end point for our truncate while ((sizeSaved < sizeOverCap || docsRemoved < docsOverCap) && (docsRemoved < 20000) && - (positioned || (ret = WT_OP_CHECK(truncateEnd->next(truncateEnd))) == 0)) { + (positioned || (ret = WT_READ_CHECK(truncateEnd->next(truncateEnd))) == 0)) { positioned = false; int64_t key; invariantWTOK(truncateEnd->get_key(truncateEnd, &key)); @@ -1127,7 +1128,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* opC if (docsRemoved > 0) { // if we scanned to the end of the collection or past our insert, go back one if (ret == WT_NOTFOUND || newestIdToDelete >= justInserted) { - ret = WT_OP_CHECK(truncateEnd->prev(truncateEnd)); + ret = WT_READ_CHECK(truncateEnd->prev(truncateEnd)); } invariantWTOK(ret); @@ -1418,7 +1419,7 @@ Status WiredTigerRecordStore::updateRecord(OperationContext* opCtx, WT_CURSOR* c = curwrap.get(); invariant(c); c->set_key(c, _makeKey(id)); - int ret = WT_OP_CHECK(c->search(c)); + int ret = WT_READ_CHECK(c->search(c)); invariantWTOK(ret); WT_ITEM old_value; @@ -1504,7 +1505,7 @@ std::vector<std::unique_ptr<RecordCursor>> WiredTigerRecordStore::getManyCursors Status WiredTigerRecordStore::truncate(OperationContext* opCtx) { WiredTigerCursor startWrap(_uri, _tableId, true, opCtx); WT_CURSOR* start = startWrap.get(); - int ret = WT_OP_CHECK(start->next(start)); + int ret = WT_READ_CHECK(start->next(start)); // Empty collections don't have anything to truncate. if (ret == WT_NOTFOUND) { return Status::OK(); @@ -1784,7 +1785,7 @@ boost::optional<RecordId> WiredTigerRecordStore::oplogStartHack( int cmp; c->set_key(c, _makeKey(startingPosition)); - int ret = WT_OP_CHECK(c->search_near(c, &cmp)); + int ret = WT_READ_CHECK(c->search_near(c, &cmp)); if (ret == 0 && cmp > 0) ret = c->prev(c); // landed one higher than startingPosition if (ret == WT_NOTFOUND) diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h index 131797e5fb5..5e11b67e90a 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h @@ -46,10 +46,17 @@ /** * Either executes the specified operation and returns it's value or randomly throws a write - * conflict exception if the WTWriteConflictException failpoint is enabled. + * conflict exception if the WTWriteConflictException failpoint is enabled. This is only checked + * on cursor methods that make modifications. */ #define WT_OP_CHECK(x) (((MONGO_FAIL_POINT(WTWriteConflictException))) ? (WT_ROLLBACK) : (x)) +/** + * Identical to WT_OP_CHECK except this is checked on cursor seeks/advancement. + */ +#define WT_READ_CHECK(x) \ + (((MONGO_FAIL_POINT(WTWriteConflictExceptionForReads))) ? (WT_ROLLBACK) : (x)) + namespace mongo { class RecoveryUnit; @@ -327,6 +334,7 @@ private: // WT failpoint to throw write conflict exceptions randomly MONGO_FP_FORWARD_DECLARE(WTWriteConflictException); +MONGO_FP_FORWARD_DECLARE(WTWriteConflictExceptionForReads); // Prevents oplog writes from being considered durable on the primary. Once activated, new writes // will not be considered durable until deactivated. It is unspecified whether writes that commit |