diff options
author | Louis Williams <louis.williams@mongodb.com> | 2020-04-10 10:02:34 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-04-10 14:16:22 +0000 |
commit | 3d929ed533a72446353b18b5d60770aed33b58f1 (patch) | |
tree | 09bd3562511aef88eb6aa1ef3b6c45b8d73c16e9 /src/mongo/db/index_builds_coordinator.h | |
parent | 76d4548a751a56c8faf1887114685b540203a650 (diff) | |
download | mongo-3d929ed533a72446353b18b5d60770aed33b58f1.tar.gz |
SERVER-46560 Make abort index build deterministic
This redesigns user index build abort to have the following behavior:
- Take a collection X lock to stop the index build from making progress
- If we are no longer primary, return an error
- Check whether we can abort the index build (i.e. it is not already committing
or aborting)
- Delete the index catalog entry and write the abortIndexBuild oplog entry in a WUOW
- Interrupt the index builder thread
- Wait for the thread to exit
- Release locks
Diffstat (limited to 'src/mongo/db/index_builds_coordinator.h')
-rw-r--r-- | src/mongo/db/index_builds_coordinator.h | 163 |
1 files changed, 62 insertions, 101 deletions
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index c7524cec2c5..b12a1f0041d 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -203,74 +203,54 @@ public: * must continue to operate on the collection by UUID to protect against rename collection. The * provided 'reason' will be used in the error message that the index builders return to their * callers. + * + * Does not stop new index builds from starting. Caller must make that guarantee. + * + * Does not require holding locks. + * + * Returns the UUIDs of the index builds that were aborted or are already in the process of + * being aborted by another caller. */ - void abortCollectionIndexBuilds(OperationContext* opCtx, - const UUID& collectionUUID, - const std::string& reason); - - /** - * Signals all of the index builds on the specified collection to abort and returns the build - * UUIDs of the index builds that will be aborted. Must identify the collection with a UUID. The - * provided 'reason' will be used in the error message that the index builders return to their - * callers. - */ - std::vector<UUID> abortCollectionIndexBuildsNoWait(OperationContext* opCtx, - const UUID& collectionUUID, - const std::string& reason); + std::vector<UUID> abortCollectionIndexBuilds(OperationContext* opCx, + const NamespaceString collectionNss, + const UUID collectionUUID, + const std::string& reason); /** * Signals all of the index builds on the specified 'db' to abort and then waits until the index * builds are no longer running. The provided 'reason' will be used in the error message that * the index builders return to their callers. + * + * Does not require holding locks. + * + * Does not stop new index builds from starting. Caller must make that guarantee. */ void abortDatabaseIndexBuilds(OperationContext* opCtx, StringData db, const std::string& reason); /** - * Signals all of the index builds on the specified database to abort. The provided 'reason' - * will be used in the error message that the index builders return to their callers. - */ - void abortDatabaseIndexBuildsNoWait(OperationContext* opCtx, - StringData db, - const std::string& reason); - - /** - * Aborts an index build by index build UUID. This gets called when the index build on primary - * failed due to interruption or replica set state change. - * It's a wrapper function to abortIndexBuildByBuildUUIDNoWait(). - */ - void abortIndexBuildOnError(OperationContext* opCtx, const UUID& buildUUID, Status abortStatus); - - /** * Aborts an index build by index build UUID. Returns when the index build thread exits. + * + * Returns true if the index build was aborted or the index build is already in the process of + * being aborted. + * Returns false if the index build does not exist or the index build is already in the process + * of committing and cannot be aborted. */ - void abortIndexBuildByBuildUUID(OperationContext* opCtx, + bool abortIndexBuildByBuildUUID(OperationContext* opCtx, const UUID& buildUUID, IndexBuildAction signalAction, - boost::optional<Timestamp> abortTimestamp = boost::none, - boost::optional<std::string> reason = boost::none); - - /** - * Aborts an index build by index build UUID. Does not wait for the index build thread to - * exit. Returns true if an index build was aborted. - */ - bool abortIndexBuildByBuildUUIDNoWait(OperationContext* opCtx, - const UUID& buildUUID, - IndexBuildAction signalAction, - boost::optional<Timestamp> abortTimestamp = boost::none, - boost::optional<std::string> reason = boost::none); + std::string reason); /** * Aborts an index build by its index name(s). This will only abort in-progress index builds if * all of the indexes are specified that a single builder is building together. When an * appropriate builder exists, this returns the build UUID of the index builder that will be * aborted. */ - boost::optional<UUID> abortIndexBuildByIndexNamesNoWait( - OperationContext* opCtx, - const UUID& collectionUUID, - const std::vector<std::string>& indexNames, - boost::optional<std::string> reason = boost::none); + boost::optional<UUID> abortIndexBuildByIndexNames(OperationContext* opCtx, + const UUID& collectionUUID, + const std::vector<std::string>& indexNames, + std::string reason); /** * Returns true if there is an index builder building the given index names on a collection. @@ -568,40 +548,39 @@ protected: const Status& status); /** - * Modularizes the _indexBuildsManager calls part of _runIndexBuildInner. Throws on error. + * Attempt to abort an index build. Returns a flag indicating how the caller should proceed. */ - void _buildIndex(OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - const IndexBuildOptions& indexBuildOptions, - boost::optional<Lock::CollectionLock>* collLock); - + enum class TryAbortResult { kRetry, kAlreadyAborted, kNotAborted, kContinueAbort }; + TryAbortResult _tryAbort(OperationContext* opCtx, + std::shared_ptr<ReplIndexBuildState> replState, + IndexBuildAction signalAction, + std::string reason); /** - * Builds the indexes single-phased. - * This method matches pre-4.4 behavior for a background index build driven by a single - * createIndexes oplog entry. + * Performs last steps of aborting an index build. */ - void _buildIndexSinglePhase(OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - const IndexBuildOptions& indexBuildOptions, - boost::optional<Lock::CollectionLock>* collLock); + void _completeAbort(OperationContext* opCtx, + std::shared_ptr<ReplIndexBuildState> replState, + IndexBuildAction signalAction, + Status reason); + void _completeSelfAbort(OperationContext* opCtx, + std::shared_ptr<ReplIndexBuildState> replState, + Status reason); + void _completeAbortForShutdown(OperationContext* opCtx, + std::shared_ptr<ReplIndexBuildState> replState, + Collection* collection); /** - * Builds the indexes two-phased. - * The beginning and completion of a index build is driven by the startIndexBuild and - * commitIndexBuild oplog entries, respectively. + * Modularizes the _indexBuildsManager calls part of _runIndexBuildInner. Throws on error. */ - void _buildIndexTwoPhase(OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - const IndexBuildOptions& indexBuildOptions, - boost::optional<Lock::CollectionLock>* collLock); + void _buildIndex(OperationContext* opCtx, + std::shared_ptr<ReplIndexBuildState> replState, + const IndexBuildOptions& indexBuildOptions); /** * First phase is the collection scan and insertion of the keys into the sorter. */ - void _scanCollectionAndInsertKeysIntoSorter( - OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - boost::optional<Lock::CollectionLock>* exclusiveCollectionLock); + void _scanCollectionAndInsertKeysIntoSorter(OperationContext* opCtx, + std::shared_ptr<ReplIndexBuildState> replState); /** * Second phase is extracting the sorted keys and writing them into the new index table. @@ -609,7 +588,8 @@ protected: void _insertKeysFromSideTablesWithoutBlockingWrites( OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState); void _insertKeysFromSideTablesBlockingWrites(OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState); + std::shared_ptr<ReplIndexBuildState> replState, + const IndexBuildOptions& indexBuildOptions); /** * Reads the commit ready members list for index build UUID in 'replState' from @@ -622,6 +602,13 @@ protected: OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) = 0; /** + * Attempt to signal the index build to commit and advance the index build to the kPrepareCommit + * state. + * Returns true if successful and false if the attempt was unnecessful and the caller should + * retry. + */ + bool _tryCommit(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState); + /** * Skips the voting process and directly signal primary to commit index build if * commit quorum is not enabled. */ @@ -673,12 +660,10 @@ protected: * index, which sets the ready flag to true, to the catalog; it is not used for the catch-up * writes during the final drain phase. */ - void _insertKeysFromSideTablesAndCommit( - OperationContext* opCtx, - std::shared_ptr<ReplIndexBuildState> replState, - const IndexBuildOptions& indexBuildOptions, - boost::optional<Lock::CollectionLock>* exclusiveCollectionLock, - const Timestamp& commitIndexBuildTimestamp); + void _insertKeysFromSideTablesAndCommit(OperationContext* opCtx, + std::shared_ptr<ReplIndexBuildState> replState, + const IndexBuildOptions& indexBuildOptions, + const Timestamp& commitIndexBuildTimestamp); /** * Runs the index build. @@ -712,33 +697,9 @@ protected: std::vector<std::shared_ptr<ReplIndexBuildState>> _filterIndexBuilds_inlock( WithLock lk, IndexBuildFilterFn indexBuildFilter) const; - /** - * Helper for 'abortCollectionIndexBuilds' and 'abortCollectionIndexBuildsNoWait'. Returns the - * UUIDs of the aborted index builders - */ - std::vector<UUID> _abortCollectionIndexBuilds(stdx::unique_lock<Latch>& lk, - OperationContext* opCtx, - const UUID& collectionUUID, - const std::string& reason, - bool shouldWait); - - void _awaitNoIndexBuildInProgressForCollection(stdx::unique_lock<Latch>& lk, - OperationContext* opCtx, - const UUID& collectionUUID); - - /** - * Helper for 'abortDatabaseIndexBuilds' and 'abortDatabaseIndexBuildsNoWait'. - */ - void _abortDatabaseIndexBuilds(stdx::unique_lock<Latch>& lk, - OperationContext* opCtx, - const StringData& db, - const std::string& reason, - bool shouldWait); - void _awaitNoBgOpInProgForDb(stdx::unique_lock<Latch>& lk, OperationContext* opCtx, StringData db); - // Protects the below state. mutable Mutex _mutex = MONGO_MAKE_LATCH("IndexBuildsCoordinator::_mutex"); |