summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregory Wlodarek <gregory.wlodarek@mongodb.com>2021-05-10 18:11:58 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-28 19:08:48 +0000
commit7f79e08ee7a6184d6367730f93345c14c4cff1a3 (patch)
tree9865df35ea1db755e1bd11fb3914cc7f0f97a11d
parent3601e41f25233022c1ced66b091d66a4310bfe71 (diff)
downloadmongo-7f79e08ee7a6184d6367730f93345c14c4cff1a3.tar.gz
SERVER-56062 CappedPositionLost during an index builds collection scan phase restarts
(cherry picked from commit d7d1234dc646de99413d956b8815897270811b63)
-rw-r--r--jstests/noPassthrough/index_build_capped_position_lost.js49
-rw-r--r--src/mongo/db/catalog/SConscript1
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp156
-rw-r--r--src/mongo/db/catalog/multi_index_block.h9
4 files changed, 159 insertions, 56 deletions
diff --git a/jstests/noPassthrough/index_build_capped_position_lost.js b/jstests/noPassthrough/index_build_capped_position_lost.js
new file mode 100644
index 00000000000..95b69b832ac
--- /dev/null
+++ b/jstests/noPassthrough/index_build_capped_position_lost.js
@@ -0,0 +1,49 @@
+/**
+ * Capped cursors return CappedPositionLost when the document they were positioned on gets deleted.
+ * When this occurs during the collection scan phase of an index build, it will get restarted.
+ */
+(function() {
+"use strict";
+
+load("jstests/noPassthrough/libs/index_build.js");
+
+const conn = MongoRunner.runMongod({});
+
+const dbName = "test";
+const collName = "index_build_capped_position_lost";
+
+const db = conn.getDB(dbName);
+assert.commandWorked(
+ db.createCollection(collName, {capped: true, size: 1024 * 1024 * 1024, max: 5}));
+
+const coll = db.getCollection(collName);
+
+for (let i = 0; i < 5; i++) {
+ assert.commandWorked(coll.insert({i: i}));
+}
+
+// Hang the collection scan phase of the index build when it's halfway finished.
+assert.commandWorked(conn.adminCommand(
+ {configureFailPoint: "hangAfterIndexBuildOf", mode: "alwaysOn", data: {i: 2}}));
+
+const awaitCreateIndex = IndexBuildTest.startIndexBuild(conn, coll.getFullName(), {i: 1});
+checkLog.contains(conn, "Hanging after index build of i=2");
+
+// Rollover the capped collection.
+for (let i = 5; i < 10; i++) {
+ assert.commandWorked(coll.insert({i: i}));
+}
+
+assert.commandWorked(conn.adminCommand({configureFailPoint: "hangAfterIndexBuildOf", mode: "off"}));
+
+checkLog.contains(
+ conn,
+ "index build: collection scan restarting due to 'CollectionScan died due to position in capped collection being deleted.");
+checkLog.contains(conn, "index build: collection scan done. scanned 5 total records");
+
+awaitCreateIndex();
+
+IndexBuildTest.assertIndexes(coll, 2, ["_id_", "i_1"]);
+
+MongoRunner.stopMongod(conn);
+}()); \ No newline at end of file
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index 75c9a173c79..30141f97b05 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -302,6 +302,7 @@ env.Library(
'$BUILD_DIR/mongo/db/index/index_build_interceptor',
'$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/mongo/idl/server_parameter',
+ '$BUILD_DIR/mongo/util/log_and_backoff',
]
)
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 36eb9c08eb2..d01b1e92fb2 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -56,6 +56,7 @@
#include "mongo/util/assert_util.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/log.h"
+#include "mongo/util/log_and_backoff.h"
#include "mongo/util/progress_meter.h"
#include "mongo/util/quick_exit.h"
#include "mongo/util/scopeguard.h"
@@ -98,6 +99,19 @@ MONGO_FAIL_POINT_DEFINE(hangAfterIndexBuildOf);
MONGO_FAIL_POINT_DEFINE(hangAndThenFailIndexBuild);
MONGO_FAIL_POINT_DEFINE(leaveIndexBuildUnfinishedForShutdown);
+namespace {
+
+size_t getEachIndexBuildMaxMemoryUsageBytes(size_t numIndexSpecs) {
+ if (numIndexSpecs == 0) {
+ return 0;
+ }
+
+ return static_cast<std::size_t>(maxIndexBuildMemoryUsageMegabytes.load()) * 1024 * 1024 /
+ numIndexSpecs;
+}
+
+} // namespace
+
MultiIndexBlock::~MultiIndexBlock() {
invariant(_buildIsCleanedUp);
}
@@ -340,12 +354,8 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(OperationContext* opCtx,
std::vector<BSONObj> indexInfoObjs;
indexInfoObjs.reserve(indexSpecs.size());
- std::size_t eachIndexBuildMaxMemoryUsageBytes = 0;
- if (!indexSpecs.empty()) {
- eachIndexBuildMaxMemoryUsageBytes =
- static_cast<std::size_t>(maxIndexBuildMemoryUsageMegabytes.load()) * 1024 * 1024 /
- indexSpecs.size();
- }
+ std::size_t eachIndexBuildMaxMemoryUsageBytes =
+ getEachIndexBuildMaxMemoryUsageBytes(indexSpecs.size());
for (size_t i = 0; i < indexSpecs.size(); i++) {
BSONObj info = indexSpecs[i];
@@ -498,10 +508,91 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx,
"Failed index build because of failpoint 'hangAndThenFailIndexBuild'"};
}
- Timer t;
+ // Hint to the storage engine that this collection scan should not keep data in the cache.
+ // Do not use read-once cursors for background builds because saveState/restoreState is called
+ // with every insert into the index, which resets the collection scan cursor between every call
+ // to getNextSnapshotted(). With read-once cursors enabled, this can evict data we may need to
+ // read again, incurring a significant performance penalty.
+ // Note: This does not apply to hybrid builds because they write keys to the external sorter.
+ bool readOnce =
+ _method != IndexBuildMethod::kBackground && useReadOnceCursorsForIndexBuilds.load();
+ opCtx->recoveryUnit()->setReadOnce(readOnce);
+
+ size_t numScanRestarts = 0;
+ bool restartCollectionScan = false;
+ do {
+ restartCollectionScan = false;
+ progress->reset(collection->numRecords(opCtx));
+ Timer timer;
+
+ Status status = _doCollectionScan(opCtx, collection, &progress);
+ if (status.isOK()) {
+ log() << "index build: collection scan done. scanned " << progress->hits()
+ << " total records in " << timer.seconds() << " seconds";
+ break;
+ }
+
+ if (status.code() == ErrorCodes::CappedPositionLost) {
+ // Capped cursors are invalidated when the document they were positioned on gets
+ // deleted. The collection scan will restart in this case.
+ restartCollectionScan = true;
+ logAndBackoff(::mongo::logger::LogComponent::kIndex,
+ ::mongo::logger::LogSeverity::Info(),
+ ++numScanRestarts,
+ str::stream() << "index build: collection scan restarting due to '"
+ << status.reason() << "'. Scanned " << progress->hits()
+ << " total records in " << timer.seconds() << " seconds");
+
+ for (auto& index : _indexes) {
+ index.bulk =
+ index.real->initiateBulk(getEachIndexBuildMaxMemoryUsageBytes(_indexes.size()));
+ }
+ } else {
+ return status;
+ }
+ } while (restartCollectionScan);
+
+ if (MONGO_FAIL_POINT(leaveIndexBuildUnfinishedForShutdown)) {
+ log() << "Index build interrupted due to 'leaveIndexBuildUnfinishedForShutdown' failpoint. "
+ "Mimicing shutdown error code.";
+ return Status(
+ ErrorCodes::InterruptedAtShutdown,
+ "background index build interrupted due to failpoint. returning a shutdown error.");
+ }
+
+ if (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) {
+ // Unlock before hanging so replication recognizes we've completed.
+ Locker::LockSnapshot lockInfo;
+ invariant(opCtx->lockState()->saveLockStateAndUnlock(&lockInfo));
- unsigned long long n = 0;
+ log() << "Hanging index build with no locks due to "
+ "'hangAfterStartingIndexBuildUnlocked' failpoint";
+ MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangAfterStartingIndexBuildUnlocked);
+ if (isBackgroundBuilding()) {
+ opCtx->lockState()->restoreLockState(opCtx, lockInfo);
+ opCtx->recoveryUnit()->abandonSnapshot();
+ return Status(ErrorCodes::OperationFailed,
+ "background index build aborted due to failpoint");
+ } else {
+ invariant(false,
+ "the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off for "
+ "foreground index builds");
+ }
+ }
+
+ progress.finished();
+
+ Status ret = dumpInsertsFromBulk(opCtx);
+ if (!ret.isOK())
+ return ret;
+
+ return Status::OK();
+}
+
+Status MultiIndexBlock::_doCollectionScan(OperationContext* opCtx,
+ Collection* collection,
+ ProgressMeterHolder* progress) {
PlanExecutor::YieldPolicy yieldPolicy;
if (isBackgroundBuilding()) {
yieldPolicy = PlanExecutor::YIELD_AUTO;
@@ -511,16 +602,6 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx,
auto exec =
collection->makePlanExecutor(opCtx, yieldPolicy, Collection::ScanDirection::kForward);
- // Hint to the storage engine that this collection scan should not keep data in the cache.
- // Do not use read-once cursors for background builds because saveState/restoreState is called
- // with every insert into the index, which resets the collection scan cursor between every call
- // to getNextSnapshotted(). With read-once cursors enabled, this can evict data we may need to
- // read again, incurring a significant performance penalty.
- // Note: This does not apply to hybrid builds because they write keys to the external sorter.
- bool readOnce =
- _method != IndexBuildMethod::kBackground && useReadOnceCursorsForIndexBuilds.load();
- opCtx->recoveryUnit()->setReadOnce(readOnce);
-
Snapshotted<BSONObj> objToIndex;
RecordId loc;
PlanExecutor::ExecState state;
@@ -546,7 +627,7 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx,
}
// Done before insert so we can retry document if it WCEs.
- progress->setTotalWhileRunning(collection->numRecords(opCtx));
+ progress->get()->setTotalWhileRunning(collection->numRecords(opCtx));
failPointHangDuringBuild(&hangBeforeIndexBuildOf, "before", objToIndex.value());
@@ -577,7 +658,6 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx,
// Go to the next document
progress->hit();
- n++;
retries = 0;
} catch (const WriteConflictException&) {
// Only background builds write inside transactions, and therefore should only ever
@@ -604,42 +684,6 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(OperationContext* opCtx,
return exec->getMemberObjectStatus(objToIndex.value());
}
- if (MONGO_FAIL_POINT(leaveIndexBuildUnfinishedForShutdown)) {
- log() << "Index build interrupted due to 'leaveIndexBuildUnfinishedForShutdown' failpoint. "
- "Mimicing shutdown error code.";
- return Status(
- ErrorCodes::InterruptedAtShutdown,
- "background index build interrupted due to failpoint. returning a shutdown error.");
- }
-
- if (MONGO_FAIL_POINT(hangAfterStartingIndexBuildUnlocked)) {
- // Unlock before hanging so replication recognizes we've completed.
- Locker::LockSnapshot lockInfo;
- invariant(opCtx->lockState()->saveLockStateAndUnlock(&lockInfo));
-
- log() << "Hanging index build with no locks due to "
- "'hangAfterStartingIndexBuildUnlocked' failpoint";
- MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangAfterStartingIndexBuildUnlocked);
-
- if (isBackgroundBuilding()) {
- opCtx->lockState()->restoreLockState(opCtx, lockInfo);
- opCtx->recoveryUnit()->abandonSnapshot();
- return Status(ErrorCodes::OperationFailed,
- "background index build aborted due to failpoint");
- } else {
- invariant(!"the hangAfterStartingIndexBuildUnlocked failpoint can't be turned off for foreground index builds");
- }
- }
-
- progress->finished();
-
- log() << "index build: collection scan done. scanned " << n << " total records in "
- << t.seconds() << " seconds";
-
- Status ret = dumpInsertsFromBulk(opCtx);
- if (!ret.isOK())
- return ret;
-
return Status::OK();
}
diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h
index f456162f2e9..aadf821c2f0 100644
--- a/src/mongo/db/catalog/multi_index_block.h
+++ b/src/mongo/db/catalog/multi_index_block.h
@@ -56,6 +56,7 @@ class Collection;
class MatchExpression;
class NamespaceString;
class OperationContext;
+class ProgressMeterHolder;
/**
* Builds one or more indexes.
@@ -329,6 +330,14 @@ private:
*/
void _setStateToAbortedIfNotCommitted(StringData reason);
+ /**
+ * Performs a collection scan on the given collection and inserts the relevant index keys into
+ * the external sorter.
+ */
+ Status _doCollectionScan(OperationContext* opCtx,
+ Collection* collection,
+ ProgressMeterHolder* progress);
+
// Is set during init() and ensures subsequent function calls act on the same Collection.
boost::optional<UUID> _collectionUUID;