summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@10gen.com>2017-05-25 09:47:08 -0400
committerDaniel Gottlieb <daniel.gottlieb@10gen.com>2017-05-25 12:04:04 -0400
commita6e45b6f5c568dc280afe373536baa4e60a0b8de (patch)
tree31ebe66021e32827ec350c346fea77bcd509008d
parent24353098bf91d2914ade05b305f3edc0cbe30c7c (diff)
downloadmongo-a6e45b6f5c568dc280afe373536baa4e60a0b8de.tar.gz
SERVER-26452: WCE retry on renameCollection. Address possible dassert in dropIndex.
Add a write conflict retry loop around the fast-path for renameCollection, when the source and target collection are in the same database. Additionally, when indexes are being dropped, remove them from the collection info cache before removing them from the catalog (`_deleteFromDisk`). Removing an index from a catalog can result in a WriteConflictException that triggers the rollback on the IndexRemoveChange. The rollback adds the index back into the collection info cache, which dasserts the index does not exist. Lastly, this patch separates out the WTWriteConflictException failpoint into one for modifications (WT_CUSOR::insert/remove etc...) and one for reads (WT_CURSOR::search/next etc...). The new failpoint is named `WTWriteConflictExceptionForReads`. Commands that only roll a die for the read failpoint include `listCollections` and `validate`. This distinction is useful for resmoke/js tests that are not resilent to those commands failing with a write conflict. (cherry picked from commit bc1a7d9aa34f3c7e61f89afa481e677e4981437d)
-rw-r--r--src/mongo/db/catalog/index_catalog.cpp3
-rw-r--r--src/mongo/db/catalog/rename_collection.cpp5
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp14
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp27
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h10
5 files changed, 35 insertions, 24 deletions
diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp
index 7ae43d645ea..be51e69ef83 100644
--- a/src/mongo/db/catalog/index_catalog.cpp
+++ b/src/mongo/db/catalog/index_catalog.cpp
@@ -857,13 +857,12 @@ Status IndexCatalog::_dropIndex(OperationContext* txn, IndexCatalogEntry* entry)
invariant(_entries.release(entry->descriptor()) == entry);
txn->recoveryUnit()->registerChange(new IndexRemoveChange(txn, _collection, &_entries, entry));
+ _collection->infoCache()->droppedIndex(txn, indexName);
entry = NULL;
_deleteIndexFromDisk(txn, indexName, indexNamespace);
_checkMagic();
- _collection->infoCache()->droppedIndex(txn, indexName);
-
return Status::OK();
}
diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp
index 6607f49e98f..d8e8cfe0c85 100644
--- a/src/mongo/db/catalog/rename_collection.cpp
+++ b/src/mongo/db/catalog/rename_collection.cpp
@@ -41,6 +41,8 @@
#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/catalog/index_create.h"
#include "mongo/db/client.h"
+#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/curop.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index_builder.h"
@@ -115,7 +117,7 @@ Status renameCollection(OperationContext* txn,
Database* const targetDB = dbHolder().openDb(txn, target.db());
- {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
WriteUnitOfWork wunit(txn);
// Check if the target namespace exists and if dropTarget is true.
@@ -149,6 +151,7 @@ Status renameCollection(OperationContext* txn,
wunit.commit();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(txn, "renameCollection", target.ns());
// If we get here, we are renaming across databases, so we must copy all the data and
// indexes, then remove the source collection.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
index 4394317934a..971460763de 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
@@ -381,7 +381,7 @@ bool WiredTigerIndex::isEmpty(OperationContext* txn) {
WT_CURSOR* c = curwrap.get();
if (!c)
return true;
- int ret = WT_OP_CHECK(c->next(c));
+ int ret = WT_READ_CHECK(c->next(c));
if (ret == WT_NOTFOUND)
return true;
invariantWTOK(ret);
@@ -442,7 +442,7 @@ bool WiredTigerIndex::isDup(WT_CURSOR* c, const BSONObj& key, const RecordId& id
KeyString data(key, _ordering);
WiredTigerItem item(data.getBuffer(), data.getSize());
c->set_key(c, item.Get());
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret == WT_NOTFOUND) {
return false;
}
@@ -809,7 +809,7 @@ protected:
void advanceWTCursor() {
WT_CURSOR* c = _cursor->get();
- int ret = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
+ int ret = WT_READ_CHECK(_forward ? c->next(c) : c->prev(c));
if (ret == WT_NOTFOUND) {
_cursorAtEof = true;
return;
@@ -826,7 +826,7 @@ protected:
const WiredTigerItem keyItem(query.getBuffer(), query.getSize());
c->set_key(c, keyItem.Get());
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ int ret = WT_READ_CHECK(c->search_near(c, &cmp));
if (ret == WT_NOTFOUND) {
_cursorAtEof = true;
TRACE_CURSOR << "\t not found";
@@ -978,7 +978,7 @@ public:
c->set_key(c, keyItem.Get());
// Using search rather than search_near.
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret != WT_NOTFOUND)
invariantWTOK(ret);
_cursorAtEof = ret == WT_NOTFOUND;
@@ -1029,7 +1029,7 @@ Status WiredTigerIndexUnique::_insert(WT_CURSOR* c,
// we put them all in the "list"
// Note that we can't omit AllZeros when there are multiple ids for a value. When we remove
// down to a single value, it will be cleaned up.
- ret = WT_OP_CHECK(c->search(c));
+ ret = WT_READ_CHECK(c->search(c));
invariantWTOK(ret);
WT_ITEM old;
@@ -1118,7 +1118,7 @@ void WiredTigerIndexUnique::_unindex(WT_CURSOR* c,
// dups are allowed, so we have to deal with a vector of RecordIds.
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret == WT_NOTFOUND) {
triggerWriteConflictAtPoint(c);
return;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 20e3e5bf793..3ed0b96f6df 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -92,6 +92,7 @@ bool shouldUseOplogHack(OperationContext* opCtx, const std::string& uri) {
} // namespace
MONGO_FP_DECLARE(WTWriteConflictException);
+MONGO_FP_DECLARE(WTWriteConflictExceptionForReads);
MONGO_FP_DECLARE(WTPausePrimaryOplogDurabilityLoop);
const std::string kWiredTigerEngineName = "wiredTiger";
@@ -450,7 +451,7 @@ public:
// Nothing after the next line can throw WCEs.
// Note that an unpositioned (or eof) WT_CURSOR returns the first/last entry in the
// table when you call next/prev.
- int advanceRet = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
+ int advanceRet = WT_READ_CHECK(_forward ? c->next(c) : c->prev(c));
if (advanceRet == WT_NOTFOUND) {
_eof = true;
return {};
@@ -489,7 +490,7 @@ public:
WT_CURSOR* c = _cursor->get();
c->set_key(c, _makeKey(id));
// Nothing after the next line can throw WCEs.
- int seekRet = WT_OP_CHECK(c->search(c));
+ int seekRet = WT_READ_CHECK(c->search(c));
if (seekRet == WT_NOTFOUND) {
_eof = true;
return {};
@@ -538,7 +539,7 @@ public:
c->set_key(c, _makeKey(_lastReturnedId));
int cmp;
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ int ret = WT_READ_CHECK(c->search_near(c, &cmp));
if (ret == WT_NOTFOUND) {
_eof = true;
return !_rs._isCapped;
@@ -641,7 +642,7 @@ public:
}
boost::optional<Record> next() final {
- int advanceRet = WT_OP_CHECK(_cursor->next(_cursor));
+ int advanceRet = WT_READ_CHECK(_cursor->next(_cursor));
if (advanceRet == WT_NOTFOUND)
return {};
invariantWTOK(advanceRet);
@@ -940,7 +941,7 @@ RecordData WiredTigerRecordStore::dataFor(OperationContext* txn, const RecordId&
WT_CURSOR* c = curwrap.get();
invariant(c);
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
massert(28556, "Didn't find RecordId in WiredTigerRecordStore", ret != WT_NOTFOUND);
invariantWTOK(ret);
return _getData(curwrap);
@@ -953,7 +954,7 @@ bool WiredTigerRecordStore::findRecord(OperationContext* txn,
WT_CURSOR* c = curwrap.get();
invariant(c);
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret == WT_NOTFOUND) {
return false;
}
@@ -971,7 +972,7 @@ void WiredTigerRecordStore::deleteRecord(OperationContext* txn, const RecordId&
cursor.assertInActiveTxn();
WT_CURSOR* c = cursor.get();
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
invariantWTOK(ret);
WT_ITEM old_value;
@@ -1078,7 +1079,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* txn
if (_cappedFirstRecord != RecordId()) {
int64_t key = _makeKey(_cappedFirstRecord);
truncateEnd->set_key(truncateEnd, key);
- ret = WT_OP_CHECK(truncateEnd->search(truncateEnd));
+ ret = WT_READ_CHECK(truncateEnd->search(truncateEnd));
if (ret == 0) {
positioned = true;
savedFirstKey = key;
@@ -1087,7 +1088,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* txn
// Advance the cursor truncateEnd until we find a suitable end point for our truncate
while ((sizeSaved < sizeOverCap || docsRemoved < docsOverCap) && (docsRemoved < 20000) &&
- (positioned || (ret = WT_OP_CHECK(truncateEnd->next(truncateEnd))) == 0)) {
+ (positioned || (ret = WT_READ_CHECK(truncateEnd->next(truncateEnd))) == 0)) {
positioned = false;
int64_t key;
invariantWTOK(truncateEnd->get_key(truncateEnd, &key));
@@ -1122,7 +1123,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* txn
if (docsRemoved > 0) {
// if we scanned to the end of the collection or past our insert, go back one
if (ret == WT_NOTFOUND || newestIdToDelete >= justInserted) {
- ret = WT_OP_CHECK(truncateEnd->prev(truncateEnd));
+ ret = WT_READ_CHECK(truncateEnd->prev(truncateEnd));
}
invariantWTOK(ret);
@@ -1379,7 +1380,7 @@ StatusWith<RecordId> WiredTigerRecordStore::updateRecord(OperationContext* txn,
WT_CURSOR* c = curwrap.get();
invariant(c);
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
invariantWTOK(ret);
WT_ITEM old_value;
@@ -1464,7 +1465,7 @@ std::vector<std::unique_ptr<RecordCursor>> WiredTigerRecordStore::getManyCursors
Status WiredTigerRecordStore::truncate(OperationContext* txn) {
WiredTigerCursor startWrap(_uri, _tableId, true, txn);
WT_CURSOR* start = startWrap.get();
- int ret = WT_OP_CHECK(start->next(start));
+ int ret = WT_READ_CHECK(start->next(start));
// Empty collections don't have anything to truncate.
if (ret == WT_NOTFOUND) {
return Status::OK();
@@ -1726,7 +1727,7 @@ boost::optional<RecordId> WiredTigerRecordStore::oplogStartHack(
int cmp;
c->set_key(c, _makeKey(startingPosition));
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ int ret = WT_READ_CHECK(c->search_near(c, &cmp));
if (ret == 0 && cmp > 0)
ret = c->prev(c); // landed one higher than startingPosition
if (ret == WT_NOTFOUND)
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
index 44eaf3047a9..6a439793ee0 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
@@ -47,10 +47,17 @@
/**
* Either executes the specified operation and returns it's value or randomly throws a write
- * conflict exception if the WTWriteConflictException failpoint is enabled.
+ * conflict exception if the WTWriteConflictException failpoint is enabled. This is only checked
+ * on cursor methods that make modifications.
*/
#define WT_OP_CHECK(x) (((MONGO_FAIL_POINT(WTWriteConflictException))) ? (WT_ROLLBACK) : (x))
+/**
+ * Identical to WT_OP_CHECK except this is checked on cursor seeks/advancement.
+ */
+#define WT_READ_CHECK(x) \
+ (((MONGO_FAIL_POINT(WTWriteConflictExceptionForReads))) ? (WT_ROLLBACK) : (x))
+
namespace mongo {
class RecoveryUnit;
@@ -326,6 +333,7 @@ private:
// WT failpoint to throw write conflict exceptions randomly
MONGO_FP_FORWARD_DECLARE(WTWriteConflictException);
+MONGO_FP_FORWARD_DECLARE(WTWriteConflictExceptionForReads);
// Prevents oplog writes from being considered durable on the primary. Once activated, new writes
// will not be considered durable until deactivated. It is unspecified whether writes that commit