summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@10gen.com>2017-03-29 10:37:01 -0400
committerDaniel Gottlieb <daniel.gottlieb@mongodb.com>2017-06-27 16:39:36 -0400
commitfff65159be47d38e0333c0e75c778c62484a776b (patch)
tree4da1f0d0854d090467a9628b651e2cef27dafd60
parentd915259708b79fd7394aa8de9779520710d2bbe7 (diff)
downloadmongo-fff65159be47d38e0333c0e75c778c62484a776b.tar.gz
SERVER-26452: WCE retry on renameCollection. Address possible dassert in dropIndex.
Add a write conflict retry loop around the fast-path for renameCollection, when the source and target collection are in the same database. Additionally, when indexes are being dropped, remove them from the collection info cache before removing them from the catalog (`_deleteFromDisk`). Removing an index from a catalog can result in a WriteConflictException that triggers the rollback on the IndexRemoveChange. The rollback adds the index back into the collection info cache, which dasserts the index does not exist. Lastly, this patch separates out the WTWriteConflictException failpoint into one for modifications (WT_CUSOR::insert/remove etc...) and one for reads (WT_CURSOR::search/next etc...). The new failpoint is named `WTWriteConflictExceptionForReads`. Commands that only roll a die for the read failpoint include `listCollections` and `validate`. This distinction is useful for resmoke/js tests that are not resilent to those commands failing with a write conflict. (cherry picked from commit bc1a7d9aa34f3c7e61f89afa481e677e4981437d)
-rw-r--r--src/mongo/db/catalog/index_catalog.cpp4
-rw-r--r--src/mongo/db/catalog/rename_collection.cpp5
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp14
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp27
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h10
5 files changed, 36 insertions, 24 deletions
diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp
index c6fc1fbcf0f..61dd4d9fd75 100644
--- a/src/mongo/db/catalog/index_catalog.cpp
+++ b/src/mongo/db/catalog/index_catalog.cpp
@@ -965,13 +965,13 @@ Status IndexCatalog::_dropIndex(OperationContext* txn, IndexCatalogEntry* entry)
invariant(_entries.release(entry->descriptor()) == entry);
txn->recoveryUnit()->registerChange(new IndexRemoveChange(txn, _collection, &_entries, entry));
+ _collection->infoCache()->droppedIndex(txn, indexName);
entry = NULL;
+
_deleteIndexFromDisk(txn, indexName, indexNamespace);
_checkMagic();
- _collection->infoCache()->droppedIndex(txn, indexName);
-
return Status::OK();
}
diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp
index 6ec4e9348e4..d9d25a951dc 100644
--- a/src/mongo/db/catalog/rename_collection.cpp
+++ b/src/mongo/db/catalog/rename_collection.cpp
@@ -41,6 +41,8 @@
#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/catalog/index_create.h"
#include "mongo/db/client.h"
+#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/curop.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index_builder.h"
@@ -125,7 +127,7 @@ Status renameCollection(OperationContext* txn,
Database* const targetDB = dbHolder().openDb(txn, target.db());
- {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
WriteUnitOfWork wunit(txn);
// Check if the target namespace exists and if dropTarget is true.
@@ -166,6 +168,7 @@ Status renameCollection(OperationContext* txn,
wunit.commit();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "renameCollection", target.ns());
// If we get here, we are renaming across databases, so we must copy all the data and
// indexes, then remove the source collection.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
index d819776ae38..6331594a57c 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
@@ -368,7 +368,7 @@ bool WiredTigerIndex::isEmpty(OperationContext* txn) {
WT_CURSOR* c = curwrap.get();
if (!c)
return true;
- int ret = WT_OP_CHECK(c->next(c));
+ int ret = WT_READ_CHECK(c->next(c));
if (ret == WT_NOTFOUND)
return true;
invariantWTOK(ret);
@@ -429,7 +429,7 @@ bool WiredTigerIndex::isDup(WT_CURSOR* c, const BSONObj& key, const RecordId& id
KeyString data(keyStringVersion(), key, _ordering);
WiredTigerItem item(data.getBuffer(), data.getSize());
c->set_key(c, item.Get());
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret == WT_NOTFOUND) {
return false;
}
@@ -814,7 +814,7 @@ protected:
void advanceWTCursor() {
WT_CURSOR* c = _cursor->get();
- int ret = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
+ int ret = WT_READ_CHECK(_forward ? c->next(c) : c->prev(c));
if (ret == WT_NOTFOUND) {
_cursorAtEof = true;
return;
@@ -831,7 +831,7 @@ protected:
const WiredTigerItem keyItem(query.getBuffer(), query.getSize());
c->set_key(c, keyItem.Get());
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ int ret = WT_READ_CHECK(c->search_near(c, &cmp));
if (ret == WT_NOTFOUND) {
_cursorAtEof = true;
TRACE_CURSOR << "\t not found";
@@ -983,7 +983,7 @@ public:
c->set_key(c, keyItem.Get());
// Using search rather than search_near.
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret != WT_NOTFOUND)
invariantWTOK(ret);
_cursorAtEof = ret == WT_NOTFOUND;
@@ -1034,7 +1034,7 @@ Status WiredTigerIndexUnique::_insert(WT_CURSOR* c,
// we put them all in the "list"
// Note that we can't omit AllZeros when there are multiple ids for a value. When we remove
// down to a single value, it will be cleaned up.
- ret = WT_OP_CHECK(c->search(c));
+ ret = WT_READ_CHECK(c->search(c));
invariantWTOK(ret);
WT_ITEM old;
@@ -1123,7 +1123,7 @@ void WiredTigerIndexUnique::_unindex(WT_CURSOR* c,
// dups are allowed, so we have to deal with a vector of RecordIds.
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret == WT_NOTFOUND) {
triggerWriteConflictAtPoint(c);
return;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index b181fcdb10d..0d3cf93a307 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -91,6 +91,7 @@ bool shouldUseOplogHack(OperationContext* opCtx, const std::string& uri) {
} // namespace
MONGO_FP_DECLARE(WTWriteConflictException);
+MONGO_FP_DECLARE(WTWriteConflictExceptionForReads);
MONGO_FP_DECLARE(WTPausePrimaryOplogDurabilityLoop);
const std::string kWiredTigerEngineName = "wiredTiger";
@@ -450,7 +451,7 @@ public:
// Nothing after the next line can throw WCEs.
// Note that an unpositioned (or eof) WT_CURSOR returns the first/last entry in the
// table when you call next/prev.
- int advanceRet = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
+ int advanceRet = WT_READ_CHECK(_forward ? c->next(c) : c->prev(c));
if (advanceRet == WT_NOTFOUND) {
_eof = true;
return {};
@@ -489,7 +490,7 @@ public:
WT_CURSOR* c = _cursor->get();
c->set_key(c, _makeKey(id));
// Nothing after the next line can throw WCEs.
- int seekRet = WT_OP_CHECK(c->search(c));
+ int seekRet = WT_READ_CHECK(c->search(c));
if (seekRet == WT_NOTFOUND) {
_eof = true;
return {};
@@ -538,7 +539,7 @@ public:
c->set_key(c, _makeKey(_lastReturnedId));
int cmp;
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ int ret = WT_READ_CHECK(c->search_near(c, &cmp));
if (ret == WT_NOTFOUND) {
_eof = true;
return !_rs._isCapped;
@@ -642,7 +643,7 @@ public:
}
boost::optional<Record> next() final {
- int advanceRet = WT_OP_CHECK(_cursor->next(_cursor));
+ int advanceRet = WT_READ_CHECK(_cursor->next(_cursor));
if (advanceRet == WT_NOTFOUND)
return {};
invariantWTOK(advanceRet);
@@ -942,7 +943,7 @@ RecordData WiredTigerRecordStore::dataFor(OperationContext* txn, const RecordId&
WT_CURSOR* c = curwrap.get();
invariant(c);
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
massert(28556, "Didn't find RecordId in WiredTigerRecordStore", ret != WT_NOTFOUND);
invariantWTOK(ret);
return _getData(curwrap);
@@ -955,7 +956,7 @@ bool WiredTigerRecordStore::findRecord(OperationContext* txn,
WT_CURSOR* c = curwrap.get();
invariant(c);
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret == WT_NOTFOUND) {
return false;
}
@@ -973,7 +974,7 @@ void WiredTigerRecordStore::deleteRecord(OperationContext* txn, const RecordId&
cursor.assertInActiveTxn();
WT_CURSOR* c = cursor.get();
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
invariantWTOK(ret);
WT_ITEM old_value;
@@ -1080,7 +1081,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* txn
if (_cappedFirstRecord != RecordId()) {
int64_t key = _makeKey(_cappedFirstRecord);
truncateEnd->set_key(truncateEnd, key);
- ret = WT_OP_CHECK(truncateEnd->search(truncateEnd));
+ ret = WT_READ_CHECK(truncateEnd->search(truncateEnd));
if (ret == 0) {
positioned = true;
savedFirstKey = key;
@@ -1089,7 +1090,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* txn
// Advance the cursor truncateEnd until we find a suitable end point for our truncate
while ((sizeSaved < sizeOverCap || docsRemoved < docsOverCap) && (docsRemoved < 20000) &&
- (positioned || (ret = WT_OP_CHECK(truncateEnd->next(truncateEnd))) == 0)) {
+ (positioned || (ret = WT_READ_CHECK(truncateEnd->next(truncateEnd))) == 0)) {
positioned = false;
int64_t key;
invariantWTOK(truncateEnd->get_key(truncateEnd, &key));
@@ -1124,7 +1125,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* txn
if (docsRemoved > 0) {
// if we scanned to the end of the collection or past our insert, go back one
if (ret == WT_NOTFOUND || newestIdToDelete >= justInserted) {
- ret = WT_OP_CHECK(truncateEnd->prev(truncateEnd));
+ ret = WT_READ_CHECK(truncateEnd->prev(truncateEnd));
}
invariantWTOK(ret);
@@ -1414,7 +1415,7 @@ Status WiredTigerRecordStore::updateRecord(OperationContext* txn,
WT_CURSOR* c = curwrap.get();
invariant(c);
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
invariantWTOK(ret);
WT_ITEM old_value;
@@ -1499,7 +1500,7 @@ std::vector<std::unique_ptr<RecordCursor>> WiredTigerRecordStore::getManyCursors
Status WiredTigerRecordStore::truncate(OperationContext* txn) {
WiredTigerCursor startWrap(_uri, _tableId, true, txn);
WT_CURSOR* start = startWrap.get();
- int ret = WT_OP_CHECK(start->next(start));
+ int ret = WT_READ_CHECK(start->next(start));
// Empty collections don't have anything to truncate.
if (ret == WT_NOTFOUND) {
return Status::OK();
@@ -1774,7 +1775,7 @@ boost::optional<RecordId> WiredTigerRecordStore::oplogStartHack(
int cmp;
c->set_key(c, _makeKey(startingPosition));
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ int ret = WT_READ_CHECK(c->search_near(c, &cmp));
if (ret == 0 && cmp > 0)
ret = c->prev(c); // landed one higher than startingPosition
if (ret == WT_NOTFOUND)
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
index 9eac5fec85a..e680b046686 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
@@ -46,10 +46,17 @@
/**
* Either executes the specified operation and returns it's value or randomly throws a write
- * conflict exception if the WTWriteConflictException failpoint is enabled.
+ * conflict exception if the WTWriteConflictException failpoint is enabled. This is only checked
+ * on cursor methods that make modifications.
*/
#define WT_OP_CHECK(x) (((MONGO_FAIL_POINT(WTWriteConflictException))) ? (WT_ROLLBACK) : (x))
+/**
+ * Identical to WT_OP_CHECK except this is checked on cursor seeks/advancement.
+ */
+#define WT_READ_CHECK(x) \
+ (((MONGO_FAIL_POINT(WTWriteConflictExceptionForReads))) ? (WT_ROLLBACK) : (x))
+
namespace mongo {
class RecoveryUnit;
@@ -327,6 +334,7 @@ private:
// WT failpoint to throw write conflict exceptions randomly
MONGO_FP_FORWARD_DECLARE(WTWriteConflictException);
+MONGO_FP_FORWARD_DECLARE(WTWriteConflictExceptionForReads);
// Prevents oplog writes from being considered durable on the primary. Once activated, new writes
// will not be considered durable until deactivated. It is unspecified whether writes that commit