From 3d929ed533a72446353b18b5d60770aed33b58f1 Mon Sep 17 00:00:00 2001 From: Louis Williams Date: Fri, 10 Apr 2020 10:02:34 -0400 Subject: SERVER-46560 Make abort index build deterministic This redesigns user index build abort to have the following behavior: - Take a collection X lock to stop the index build from making progress - If we are no longer primary, return an error - Check whether we can abort the index build (i.e. it is not already committing or aborting) - Delete the index catalog entry and write the abortIndexBuild oplog entry in a WUOW - Interrupt the index builder thread - Wait for the thread to exit - Release locks --- src/mongo/db/index_builds_coordinator.h | 163 ++++++++++++-------------------- 1 file changed, 62 insertions(+), 101 deletions(-) (limited to 'src/mongo/db/index_builds_coordinator.h') diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h index c7524cec2c5..b12a1f0041d 100644 --- a/src/mongo/db/index_builds_coordinator.h +++ b/src/mongo/db/index_builds_coordinator.h @@ -203,74 +203,54 @@ public: * must continue to operate on the collection by UUID to protect against rename collection. The * provided 'reason' will be used in the error message that the index builders return to their * callers. + * + * Does not stop new index builds from starting. Caller must make that guarantee. + * + * Does not require holding locks. + * + * Returns the UUIDs of the index builds that were aborted or are already in the process of + * being aborted by another caller. */ - void abortCollectionIndexBuilds(OperationContext* opCtx, - const UUID& collectionUUID, - const std::string& reason); - - /** - * Signals all of the index builds on the specified collection to abort and returns the build - * UUIDs of the index builds that will be aborted. Must identify the collection with a UUID. The - * provided 'reason' will be used in the error message that the index builders return to their - * callers. - */ - std::vector abortCollectionIndexBuildsNoWait(OperationContext* opCtx, - const UUID& collectionUUID, - const std::string& reason); + std::vector abortCollectionIndexBuilds(OperationContext* opCx, + const NamespaceString collectionNss, + const UUID collectionUUID, + const std::string& reason); /** * Signals all of the index builds on the specified 'db' to abort and then waits until the index * builds are no longer running. The provided 'reason' will be used in the error message that * the index builders return to their callers. + * + * Does not require holding locks. + * + * Does not stop new index builds from starting. Caller must make that guarantee. */ void abortDatabaseIndexBuilds(OperationContext* opCtx, StringData db, const std::string& reason); - /** - * Signals all of the index builds on the specified database to abort. The provided 'reason' - * will be used in the error message that the index builders return to their callers. - */ - void abortDatabaseIndexBuildsNoWait(OperationContext* opCtx, - StringData db, - const std::string& reason); - - /** - * Aborts an index build by index build UUID. This gets called when the index build on primary - * failed due to interruption or replica set state change. - * It's a wrapper function to abortIndexBuildByBuildUUIDNoWait(). - */ - void abortIndexBuildOnError(OperationContext* opCtx, const UUID& buildUUID, Status abortStatus); - /** * Aborts an index build by index build UUID. Returns when the index build thread exits. + * + * Returns true if the index build was aborted or the index build is already in the process of + * being aborted. + * Returns false if the index build does not exist or the index build is already in the process + * of committing and cannot be aborted. */ - void abortIndexBuildByBuildUUID(OperationContext* opCtx, + bool abortIndexBuildByBuildUUID(OperationContext* opCtx, const UUID& buildUUID, IndexBuildAction signalAction, - boost::optional abortTimestamp = boost::none, - boost::optional reason = boost::none); - - /** - * Aborts an index build by index build UUID. Does not wait for the index build thread to - * exit. Returns true if an index build was aborted. - */ - bool abortIndexBuildByBuildUUIDNoWait(OperationContext* opCtx, - const UUID& buildUUID, - IndexBuildAction signalAction, - boost::optional abortTimestamp = boost::none, - boost::optional reason = boost::none); + std::string reason); /** * Aborts an index build by its index name(s). This will only abort in-progress index builds if * all of the indexes are specified that a single builder is building together. When an * appropriate builder exists, this returns the build UUID of the index builder that will be * aborted. */ - boost::optional abortIndexBuildByIndexNamesNoWait( - OperationContext* opCtx, - const UUID& collectionUUID, - const std::vector& indexNames, - boost::optional reason = boost::none); + boost::optional abortIndexBuildByIndexNames(OperationContext* opCtx, + const UUID& collectionUUID, + const std::vector& indexNames, + std::string reason); /** * Returns true if there is an index builder building the given index names on a collection. @@ -568,40 +548,39 @@ protected: const Status& status); /** - * Modularizes the _indexBuildsManager calls part of _runIndexBuildInner. Throws on error. + * Attempt to abort an index build. Returns a flag indicating how the caller should proceed. */ - void _buildIndex(OperationContext* opCtx, - std::shared_ptr replState, - const IndexBuildOptions& indexBuildOptions, - boost::optional* collLock); - + enum class TryAbortResult { kRetry, kAlreadyAborted, kNotAborted, kContinueAbort }; + TryAbortResult _tryAbort(OperationContext* opCtx, + std::shared_ptr replState, + IndexBuildAction signalAction, + std::string reason); /** - * Builds the indexes single-phased. - * This method matches pre-4.4 behavior for a background index build driven by a single - * createIndexes oplog entry. + * Performs last steps of aborting an index build. */ - void _buildIndexSinglePhase(OperationContext* opCtx, - std::shared_ptr replState, - const IndexBuildOptions& indexBuildOptions, - boost::optional* collLock); + void _completeAbort(OperationContext* opCtx, + std::shared_ptr replState, + IndexBuildAction signalAction, + Status reason); + void _completeSelfAbort(OperationContext* opCtx, + std::shared_ptr replState, + Status reason); + void _completeAbortForShutdown(OperationContext* opCtx, + std::shared_ptr replState, + Collection* collection); /** - * Builds the indexes two-phased. - * The beginning and completion of a index build is driven by the startIndexBuild and - * commitIndexBuild oplog entries, respectively. + * Modularizes the _indexBuildsManager calls part of _runIndexBuildInner. Throws on error. */ - void _buildIndexTwoPhase(OperationContext* opCtx, - std::shared_ptr replState, - const IndexBuildOptions& indexBuildOptions, - boost::optional* collLock); + void _buildIndex(OperationContext* opCtx, + std::shared_ptr replState, + const IndexBuildOptions& indexBuildOptions); /** * First phase is the collection scan and insertion of the keys into the sorter. */ - void _scanCollectionAndInsertKeysIntoSorter( - OperationContext* opCtx, - std::shared_ptr replState, - boost::optional* exclusiveCollectionLock); + void _scanCollectionAndInsertKeysIntoSorter(OperationContext* opCtx, + std::shared_ptr replState); /** * Second phase is extracting the sorted keys and writing them into the new index table. @@ -609,7 +588,8 @@ protected: void _insertKeysFromSideTablesWithoutBlockingWrites( OperationContext* opCtx, std::shared_ptr replState); void _insertKeysFromSideTablesBlockingWrites(OperationContext* opCtx, - std::shared_ptr replState); + std::shared_ptr replState, + const IndexBuildOptions& indexBuildOptions); /** * Reads the commit ready members list for index build UUID in 'replState' from @@ -621,6 +601,13 @@ protected: virtual void _signalIfCommitQuorumIsSatisfied( OperationContext* opCtx, std::shared_ptr replState) = 0; + /** + * Attempt to signal the index build to commit and advance the index build to the kPrepareCommit + * state. + * Returns true if successful and false if the attempt was unnecessful and the caller should + * retry. + */ + bool _tryCommit(OperationContext* opCtx, std::shared_ptr replState); /** * Skips the voting process and directly signal primary to commit index build if * commit quorum is not enabled. @@ -673,12 +660,10 @@ protected: * index, which sets the ready flag to true, to the catalog; it is not used for the catch-up * writes during the final drain phase. */ - void _insertKeysFromSideTablesAndCommit( - OperationContext* opCtx, - std::shared_ptr replState, - const IndexBuildOptions& indexBuildOptions, - boost::optional* exclusiveCollectionLock, - const Timestamp& commitIndexBuildTimestamp); + void _insertKeysFromSideTablesAndCommit(OperationContext* opCtx, + std::shared_ptr replState, + const IndexBuildOptions& indexBuildOptions, + const Timestamp& commitIndexBuildTimestamp); /** * Runs the index build. @@ -712,33 +697,9 @@ protected: std::vector> _filterIndexBuilds_inlock( WithLock lk, IndexBuildFilterFn indexBuildFilter) const; - /** - * Helper for 'abortCollectionIndexBuilds' and 'abortCollectionIndexBuildsNoWait'. Returns the - * UUIDs of the aborted index builders - */ - std::vector _abortCollectionIndexBuilds(stdx::unique_lock& lk, - OperationContext* opCtx, - const UUID& collectionUUID, - const std::string& reason, - bool shouldWait); - - void _awaitNoIndexBuildInProgressForCollection(stdx::unique_lock& lk, - OperationContext* opCtx, - const UUID& collectionUUID); - - /** - * Helper for 'abortDatabaseIndexBuilds' and 'abortDatabaseIndexBuildsNoWait'. - */ - void _abortDatabaseIndexBuilds(stdx::unique_lock& lk, - OperationContext* opCtx, - const StringData& db, - const std::string& reason, - bool shouldWait); - void _awaitNoBgOpInProgForDb(stdx::unique_lock& lk, OperationContext* opCtx, StringData db); - // Protects the below state. mutable Mutex _mutex = MONGO_MAKE_LATCH("IndexBuildsCoordinator::_mutex"); -- cgit v1.2.1