summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@10gen.com>2017-03-29 10:37:01 -0400
committerDaniel Gottlieb <daniel.gottlieb@10gen.com>2017-03-29 11:58:39 -0400
commitbc1a7d9aa34f3c7e61f89afa481e677e4981437d (patch)
tree24b74d999d802565db6376491a588d4d6018567e
parent83007991e9eaba391988c4161fe8f68352c57939 (diff)
downloadmongo-bc1a7d9aa34f3c7e61f89afa481e677e4981437d.tar.gz
SERVER-26452: WCE retry on renameCollection. Address possible dassert in dropIndex.
Add a write conflict retry loop around the fast-path for renameCollection, when the source and target collection are in the same database. Additionally, when indexes are being dropped, remove them from the collection info cache before removing them from the catalog (`_deleteFromDisk`). Removing an index from a catalog can result in a WriteConflictException that triggers the rollback on the IndexRemoveChange. The rollback adds the index back into the collection info cache, which dasserts the index does not exist. Lastly, this patch separates out the WTWriteConflictException failpoint into one for modifications (WT_CUSOR::insert/remove etc...) and one for reads (WT_CURSOR::search/next etc...). The new failpoint is named `WTWriteConflictExceptionForReads`. Commands that only roll a die for the read failpoint include `listCollections` and `validate`. This distinction is useful for resmoke/js tests that are not resilent to those commands failing with a write conflict.
-rw-r--r--src/mongo/db/catalog/index_catalog.cpp2
-rw-r--r--src/mongo/db/catalog/rename_collection.cpp5
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp14
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp27
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h10
5 files changed, 35 insertions, 23 deletions
diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp
index faf7ad134e1..382c060bdbd 100644
--- a/src/mongo/db/catalog/index_catalog.cpp
+++ b/src/mongo/db/catalog/index_catalog.cpp
@@ -973,12 +973,12 @@ Status IndexCatalog::_dropIndex(OperationContext* opCtx, IndexCatalogEntry* entr
invariant(_entries.release(entry->descriptor()) == entry);
opCtx->recoveryUnit()->registerChange(
new IndexRemoveChange(opCtx, _collection, &_entries, entry));
+ _collection->infoCache()->droppedIndex(opCtx, indexName);
entry = NULL;
_deleteIndexFromDisk(opCtx, indexName, indexNamespace);
_checkMagic();
- _collection->infoCache()->droppedIndex(opCtx, indexName);
return Status::OK();
}
diff --git a/src/mongo/db/catalog/rename_collection.cpp b/src/mongo/db/catalog/rename_collection.cpp
index b0c5e6b3aa0..d3b93f3ec2b 100644
--- a/src/mongo/db/catalog/rename_collection.cpp
+++ b/src/mongo/db/catalog/rename_collection.cpp
@@ -41,6 +41,8 @@
#include "mongo/db/catalog/index_catalog.h"
#include "mongo/db/catalog/index_create.h"
#include "mongo/db/client.h"
+#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/curop.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index_builder.h"
@@ -124,7 +126,7 @@ Status renameCollection(OperationContext* opCtx,
Database* const targetDB = dbHolder().openDb(opCtx, target.db());
- {
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
WriteUnitOfWork wunit(opCtx);
// Check if the target namespace exists and if dropTarget is true.
@@ -165,6 +167,7 @@ Status renameCollection(OperationContext* opCtx,
wunit.commit();
}
+ MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, "renameCollection", target.ns());
// If we get here, we are renaming across databases, so we must copy all the data and
// indexes, then remove the source collection.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
index eb91754ab9f..05bafb50c96 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
@@ -374,7 +374,7 @@ bool WiredTigerIndex::isEmpty(OperationContext* opCtx) {
WT_CURSOR* c = curwrap.get();
if (!c)
return true;
- int ret = WT_OP_CHECK(c->next(c));
+ int ret = WT_READ_CHECK(c->next(c));
if (ret == WT_NOTFOUND)
return true;
invariantWTOK(ret);
@@ -435,7 +435,7 @@ bool WiredTigerIndex::isDup(WT_CURSOR* c, const BSONObj& key, const RecordId& id
KeyString data(keyStringVersion(), key, _ordering);
WiredTigerItem item(data.getBuffer(), data.getSize());
c->set_key(c, item.Get());
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret == WT_NOTFOUND) {
return false;
}
@@ -820,7 +820,7 @@ protected:
void advanceWTCursor() {
WT_CURSOR* c = _cursor->get();
- int ret = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
+ int ret = WT_READ_CHECK(_forward ? c->next(c) : c->prev(c));
if (ret == WT_NOTFOUND) {
_cursorAtEof = true;
return;
@@ -837,7 +837,7 @@ protected:
const WiredTigerItem keyItem(query.getBuffer(), query.getSize());
c->set_key(c, keyItem.Get());
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ int ret = WT_READ_CHECK(c->search_near(c, &cmp));
if (ret == WT_NOTFOUND) {
_cursorAtEof = true;
TRACE_CURSOR << "\t not found";
@@ -989,7 +989,7 @@ public:
c->set_key(c, keyItem.Get());
// Using search rather than search_near.
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret != WT_NOTFOUND)
invariantWTOK(ret);
_cursorAtEof = ret == WT_NOTFOUND;
@@ -1040,7 +1040,7 @@ Status WiredTigerIndexUnique::_insert(WT_CURSOR* c,
// we put them all in the "list"
// Note that we can't omit AllZeros when there are multiple ids for a value. When we remove
// down to a single value, it will be cleaned up.
- ret = WT_OP_CHECK(c->search(c));
+ ret = WT_READ_CHECK(c->search(c));
invariantWTOK(ret);
WT_ITEM old;
@@ -1100,7 +1100,7 @@ void WiredTigerIndexUnique::_unindex(WT_CURSOR* c,
// dups are allowed, so we have to deal with a vector of RecordIds.
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret == WT_NOTFOUND) {
// WT_NOTFOUND is only expected during a background index build. Insert a dummy value and
// delete it again to trigger a write conflict in case this is being concurrently indexed by
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 46dadad3467..593fd2d8bca 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -91,6 +91,7 @@ bool shouldUseOplogHack(OperationContext* opCtx, const std::string& uri) {
} // namespace
MONGO_FP_DECLARE(WTWriteConflictException);
+MONGO_FP_DECLARE(WTWriteConflictExceptionForReads);
MONGO_FP_DECLARE(WTPausePrimaryOplogDurabilityLoop);
const std::string kWiredTigerEngineName = "wiredTiger";
@@ -453,7 +454,7 @@ public:
// Nothing after the next line can throw WCEs.
// Note that an unpositioned (or eof) WT_CURSOR returns the first/last entry in the
// table when you call next/prev.
- int advanceRet = WT_OP_CHECK(_forward ? c->next(c) : c->prev(c));
+ int advanceRet = WT_READ_CHECK(_forward ? c->next(c) : c->prev(c));
if (advanceRet == WT_NOTFOUND) {
_eof = true;
return {};
@@ -492,7 +493,7 @@ public:
WT_CURSOR* c = _cursor->get();
c->set_key(c, _makeKey(id));
// Nothing after the next line can throw WCEs.
- int seekRet = WT_OP_CHECK(c->search(c));
+ int seekRet = WT_READ_CHECK(c->search(c));
if (seekRet == WT_NOTFOUND) {
_eof = true;
return {};
@@ -541,7 +542,7 @@ public:
c->set_key(c, _makeKey(_lastReturnedId));
int cmp;
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ int ret = WT_READ_CHECK(c->search_near(c, &cmp));
if (ret == WT_NOTFOUND) {
_eof = true;
return !_rs._isCapped;
@@ -645,7 +646,7 @@ public:
}
boost::optional<Record> next() final {
- int advanceRet = WT_OP_CHECK(_cursor->next(_cursor));
+ int advanceRet = WT_READ_CHECK(_cursor->next(_cursor));
if (advanceRet == WT_NOTFOUND)
return {};
invariantWTOK(advanceRet);
@@ -945,7 +946,7 @@ RecordData WiredTigerRecordStore::dataFor(OperationContext* opCtx, const RecordI
WT_CURSOR* c = curwrap.get();
invariant(c);
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
massert(28556, "Didn't find RecordId in WiredTigerRecordStore", ret != WT_NOTFOUND);
invariantWTOK(ret);
return _getData(curwrap);
@@ -958,7 +959,7 @@ bool WiredTigerRecordStore::findRecord(OperationContext* opCtx,
WT_CURSOR* c = curwrap.get();
invariant(c);
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
if (ret == WT_NOTFOUND) {
return false;
}
@@ -976,7 +977,7 @@ void WiredTigerRecordStore::deleteRecord(OperationContext* opCtx, const RecordId
cursor.assertInActiveTxn();
WT_CURSOR* c = cursor.get();
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
invariantWTOK(ret);
WT_ITEM old_value;
@@ -1083,7 +1084,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* opC
if (_cappedFirstRecord != RecordId()) {
int64_t key = _makeKey(_cappedFirstRecord);
truncateEnd->set_key(truncateEnd, key);
- ret = WT_OP_CHECK(truncateEnd->search(truncateEnd));
+ ret = WT_READ_CHECK(truncateEnd->search(truncateEnd));
if (ret == 0) {
positioned = true;
savedFirstKey = key;
@@ -1092,7 +1093,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* opC
// Advance the cursor truncateEnd until we find a suitable end point for our truncate
while ((sizeSaved < sizeOverCap || docsRemoved < docsOverCap) && (docsRemoved < 20000) &&
- (positioned || (ret = WT_OP_CHECK(truncateEnd->next(truncateEnd))) == 0)) {
+ (positioned || (ret = WT_READ_CHECK(truncateEnd->next(truncateEnd))) == 0)) {
positioned = false;
int64_t key;
invariantWTOK(truncateEnd->get_key(truncateEnd, &key));
@@ -1127,7 +1128,7 @@ int64_t WiredTigerRecordStore::cappedDeleteAsNeeded_inlock(OperationContext* opC
if (docsRemoved > 0) {
// if we scanned to the end of the collection or past our insert, go back one
if (ret == WT_NOTFOUND || newestIdToDelete >= justInserted) {
- ret = WT_OP_CHECK(truncateEnd->prev(truncateEnd));
+ ret = WT_READ_CHECK(truncateEnd->prev(truncateEnd));
}
invariantWTOK(ret);
@@ -1418,7 +1419,7 @@ Status WiredTigerRecordStore::updateRecord(OperationContext* opCtx,
WT_CURSOR* c = curwrap.get();
invariant(c);
c->set_key(c, _makeKey(id));
- int ret = WT_OP_CHECK(c->search(c));
+ int ret = WT_READ_CHECK(c->search(c));
invariantWTOK(ret);
WT_ITEM old_value;
@@ -1504,7 +1505,7 @@ std::vector<std::unique_ptr<RecordCursor>> WiredTigerRecordStore::getManyCursors
Status WiredTigerRecordStore::truncate(OperationContext* opCtx) {
WiredTigerCursor startWrap(_uri, _tableId, true, opCtx);
WT_CURSOR* start = startWrap.get();
- int ret = WT_OP_CHECK(start->next(start));
+ int ret = WT_READ_CHECK(start->next(start));
// Empty collections don't have anything to truncate.
if (ret == WT_NOTFOUND) {
return Status::OK();
@@ -1784,7 +1785,7 @@ boost::optional<RecordId> WiredTigerRecordStore::oplogStartHack(
int cmp;
c->set_key(c, _makeKey(startingPosition));
- int ret = WT_OP_CHECK(c->search_near(c, &cmp));
+ int ret = WT_READ_CHECK(c->search_near(c, &cmp));
if (ret == 0 && cmp > 0)
ret = c->prev(c); // landed one higher than startingPosition
if (ret == WT_NOTFOUND)
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
index 131797e5fb5..5e11b67e90a 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.h
@@ -46,10 +46,17 @@
/**
* Either executes the specified operation and returns it's value or randomly throws a write
- * conflict exception if the WTWriteConflictException failpoint is enabled.
+ * conflict exception if the WTWriteConflictException failpoint is enabled. This is only checked
+ * on cursor methods that make modifications.
*/
#define WT_OP_CHECK(x) (((MONGO_FAIL_POINT(WTWriteConflictException))) ? (WT_ROLLBACK) : (x))
+/**
+ * Identical to WT_OP_CHECK except this is checked on cursor seeks/advancement.
+ */
+#define WT_READ_CHECK(x) \
+ (((MONGO_FAIL_POINT(WTWriteConflictExceptionForReads))) ? (WT_ROLLBACK) : (x))
+
namespace mongo {
class RecoveryUnit;
@@ -327,6 +334,7 @@ private:
// WT failpoint to throw write conflict exceptions randomly
MONGO_FP_FORWARD_DECLARE(WTWriteConflictException);
+MONGO_FP_FORWARD_DECLARE(WTWriteConflictExceptionForReads);
// Prevents oplog writes from being considered durable on the primary. Once activated, new writes
// will not be considered durable until deactivated. It is unspecified whether writes that commit