diff options
author | Nathan Myers <nathan.myers@10gen.com> | 2017-04-18 17:46:55 -0400 |
---|---|---|
committer | Nathan Myers <nathan.myers@10gen.com> | 2017-04-20 01:31:00 -0400 |
commit | c192a1b9b1e223f8075ab5ce72dde372467f9650 (patch) | |
tree | df2fad3682c5fce44a7f91d9a216cd912017e38b /src/mongo/db/s/metadata_manager.h | |
parent | 53907c0094e26e39b813ae369be52a4e51fc3a08 (diff) | |
download | mongo-c192a1b9b1e223f8075ab5ce72dde372467f9650.tar.gz |
SERVER-27921 New Range Deleter
Diffstat (limited to 'src/mongo/db/s/metadata_manager.h')
-rw-r--r-- | src/mongo/db/s/metadata_manager.h | 268 |
1 files changed, 145 insertions, 123 deletions
diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h index 5c2e7f2e64a..a76766e5e75 100644 --- a/src/mongo/db/s/metadata_manager.h +++ b/src/mongo/db/s/metadata_manager.h @@ -29,17 +29,18 @@ #pragma once #include <list> -#include <memory> #include "mongo/base/disallow_copying.h" #include "mongo/bson/simple_bsonobj_comparator.h" #include "mongo/db/namespace_string.h" +#include "mongo/db/range_arithmetic.h" #include "mongo/db/s/collection_metadata.h" +#include "mongo/db/s/collection_range_deleter.h" #include "mongo/db/service_context.h" +#include "mongo/executor/task_executor.h" #include "mongo/s/catalog/type_chunk.h" -#include "mongo/util/concurrency/notification.h" - #include "mongo/stdx/memory.h" +#include "mongo/util/concurrency/notification.h" namespace mongo { @@ -49,7 +50,7 @@ class MetadataManager { MONGO_DISALLOW_COPYING(MetadataManager); public: - MetadataManager(ServiceContext* sc, NamespaceString nss); + MetadataManager(ServiceContext*, NamespaceString nss, executor::TaskExecutor* rangeDeleter); ~MetadataManager(); /** @@ -62,167 +63,186 @@ public: ScopedCollectionMetadata getActiveMetadata(); /** + * Returns the number of CollectionMetadata objects being maintained on behalf of running + * queries. The actual number may vary after it returns, so this is really only useful for unit + * tests. + */ + size_t numberOfMetadataSnapshots(); + + /** * Uses the contents of the specified metadata as a way to purge any pending chunks. */ void refreshActiveMetadata(std::unique_ptr<CollectionMetadata> newMetadata); + void toBSONPending(BSONArrayBuilder& bb) const; + /** - * Puts the specified range on the list of chunks, which are being received so that the range - * deleter process will not clean the partially migrated data. + * Appends information on all the chunk ranges in rangesToClean to builder. */ - void beginReceive(const ChunkRange& range); + void append(BSONObjBuilder* builder); /** - * Removes a range from the list of chunks, which are being received. Used externally to - * indicate that a chunk migration failed. + * Returns a map to the set of chunks being migrated in. */ - void forgetReceive(const ChunkRange& range); + RangeMap const& getReceiveMap() const { + return _receivingChunks; + } /** - * Gets copy of the set of chunk ranges which are being received for this collection. This - * method is intended for testing purposes only and should not be used in any production code. + * If no running queries can depend on documents in the range, schedules any such documents for + * immediate cleanup. Otherwise, returns false. */ - RangeMap getCopyOfReceivingChunks(); + bool beginReceive(ChunkRange const& range); /** - * Adds a new range to be cleaned up. - * The newly introduced range must not overlap with the existing ranges. - */ - std::shared_ptr<Notification<Status>> addRangeToClean(const ChunkRange& range); + * Removes the range from the pending list, and schedules any documents in the range for + * immediate cleanup. Assumes no active queries can see any local documents in the range. + */ + void forgetReceive(const ChunkRange& range); /** - * Calls removeRangeToClean with Status::OK. + * Initiates cleanup of the orphaned documents as if a chunk has been migrated out. If any + * documents in the range might still be in use by running queries, queues cleanup to begin + * after they have all terminated. Otherwise, schedules documents for immediate cleanup. + * Fails if the range overlaps any current local shard chunk. + * + * Must be called with the collection locked for writing. To monitor completion, use + * trackOrphanedDataCleanup or CollectionShardingState::waitForClean. */ - void removeRangeToClean(const ChunkRange& range) { - removeRangeToClean(range, Status::OK()); - } + Status cleanUpRange(ChunkRange const& range); /** - * Removes the specified range from the ranges to be cleaned up. - * The specified deletionStatus will be returned to callers waiting - * on whether the deletion succeeded or failed. + * Returns the number of ranges scheduled to be cleaned, exclusive of such ranges that might + * still be in use by running queries. Outside of test drivers, the actual number may vary + * after it returns, so this is really only useful for unit tests. */ - void removeRangeToClean(const ChunkRange& range, Status deletionStatus); + size_t numberOfRangesToClean(); /** - * Gets copy of the set of chunk ranges which are scheduled for cleanup. - * Converts RangeToCleanMap to RangeMap. + * Returns the number of ranges scheduled to be cleaned once all queries that could depend on + * them have terminated. The actual number may vary after it returns, so this is really only + * useful for unit tests. */ - RangeMap getCopyOfRangesToClean(); + size_t numberOfRangesToCleanStillInUse(); + using CleanupNotification = CollectionRangeDeleter::DeleteNotification; /** - * Appends information on all the chunk ranges in rangesToClean to builder. + * Reports whether the argument range is still scheduled for deletion. If not, returns nullptr. + * Otherwise, returns a notification n such that n->get(opCtx) will wake when deletion of a + * range (possibly the one of interest) is completed. */ - void append(BSONObjBuilder* builder); + CleanupNotification trackOrphanedDataCleanup(ChunkRange const& orphans); + + boost::optional<KeyRange> getNextOrphanRange(BSONObj const& from); + +private: + struct Tracker; /** - * Returns true if _rangesToClean is not empty. + * Retires any metadata that has fallen out of use, and pushes any orphan ranges found in them + * to the list of ranges actively being cleaned up. */ - bool hasRangesToClean(); + void _retireExpiredMetadata(); /** - * Returns true if the exact range is in _rangesToClean. + * Pushes current set of chunks, if any, to _metadataInUse, replaces it with newMetadata. */ - bool isInRangesToClean(const ChunkRange& range); + void _setActiveMetadata_inlock(std::unique_ptr<CollectionMetadata> newMetadata); /** - * Gets and returns, but does not remove, a single ChunkRange from _rangesToClean. - * Should not be called if _rangesToClean is empty: it will hit an invariant. + * Returns true if the specified range overlaps any chunk that might be currently in use by a + * running query. + * + * must be called locked. */ - ChunkRange getNextRangeToClean(); -private: - friend class ScopedCollectionMetadata; + bool _overlapsInUseChunk(ChunkRange const& range); - struct CollectionMetadataTracker { - public: - /** - * Creates a new CollectionMetadataTracker, with the usageCounter initialized to zero. - */ - CollectionMetadataTracker(std::unique_ptr<CollectionMetadata> m); - - std::unique_ptr<CollectionMetadata> metadata; - - uint32_t usageCounter{0}; - }; - - // Class for the value of the _rangesToClean map. Used because callers of addRangeToClean - // sometimes need to wait until a range is deleted. Thus, complete(Status) is called - // when the range is deleted from _rangesToClean in removeRangeToClean(), letting callers - // of addRangeToClean know if the deletion succeeded or failed. - class RangeToCleanDescriptor { - public: - /** - * Initializes a RangeToCleanDescriptor with an empty notification. - */ - RangeToCleanDescriptor(BSONObj max) - : _max(max.getOwned()), _notification(std::make_shared<Notification<Status>>()) {} - - /** - * Gets the maximum value of the range to be deleted. - */ - const BSONObj& getMax() const { - return _max; - } - - // See comment on _notification. - std::shared_ptr<Notification<Status>> getNotification() { - return _notification; - } - - /** - * Sets the status on _notification. This will tell threads - * waiting on the value of status that the deletion succeeded or failed. - */ - void complete(Status status) { - _notification->set(status); - } - - private: - // The maximum value of the range to be deleted. - BSONObj _max; - - // This _notification will be set with a value indicating whether the deletion - // succeeded or failed. - std::shared_ptr<Notification<Status>> _notification; - }; + /** + * Returns true if any range (possibly) still in use, but scheduled for cleanup, overlaps + * the argument range. + * + * Must be called locked. + */ + bool _overlapsInUseCleanups(ChunkRange const& range); /** - * Removes the CollectionMetadata stored in the tracker from the _metadataInUse - * list (if it's there). + * Deletes ranges, in background, until done, normally using a task executor attached to the + * ShardingState. + * + * Each time it completes cleaning up a range, it wakes up clients waiting on completion of + * that range, which may then verify their range has no more deletions scheduled, and proceed. */ - void _removeMetadata_inlock(CollectionMetadataTracker* metadataTracker); + static void _scheduleCleanup(executor::TaskExecutor*, NamespaceString nss); - std::shared_ptr<Notification<Status>> _addRangeToClean_inlock(const ChunkRange& range); + /** + * Adds the range to the list of ranges scheduled for immediate deletion, and schedules a + * a background task to perform the work. + * + * Must be called locked. + */ + void _pushRangeToClean(ChunkRange const& range); - void _removeRangeToClean_inlock(const ChunkRange& range, Status deletionStatus); + /** + * Adds a range from the receiving map, so getNextOrphanRange will skip ranges migrating in. + */ + void _addToReceiving(ChunkRange const& range); - RangeMap _getCopyOfRangesToClean_inlock(); + /** + * Removes a range from the receiving map after a migration failure. range.minKey() must + * exactly match an element of _receivingChunks. + */ + void _removeFromReceiving(ChunkRange const& range); - void _setActiveMetadata_inlock(std::unique_ptr<CollectionMetadata> newMetadata); + /** + * Wakes up any clients waiting on a range to leave _metadataInUse + * + * Must be called locked. + */ + void _notifyInUse(); + + // data members const NamespaceString _nss; // ServiceContext from which to obtain instances of global support objects. - ServiceContext* _serviceContext; + ServiceContext* const _serviceContext; // Mutex to protect the state below stdx::mutex _managerLock; - // Holds the collection metadata, which is currently active - std::unique_ptr<CollectionMetadataTracker> _activeMetadataTracker; + bool _shuttingDown{false}; - // Holds collection metadata instances, which have previously been active, but are still in use - // by still active server operations or cursors - std::list<std::unique_ptr<CollectionMetadataTracker>> _metadataInUse; + // The collection metadata reflecting chunks accessible to new queries + std::shared_ptr<Tracker> _activeMetadataTracker; - // Chunk ranges which are currently assumed to be transferred to the shard. Indexed by the min - // key of the range. + // Previously active collection metadata instances still in use by active server operations or + // cursors + std::list<std::shared_ptr<Tracker>> _metadataInUse; + + // Chunk ranges being migrated into to the shard. Indexed by the min key of the range. RangeMap _receivingChunks; - // Set of ranges to be deleted. Indexed by the min key of the range. - typedef BSONObjIndexedMap<RangeToCleanDescriptor> RangeToCleanMap; - RangeToCleanMap _rangesToClean; + // Clients can sleep on copies of _notification while waiting for their orphan ranges to fall + // out of use. + std::shared_ptr<Notification<Status>> _notification; + + // The background task that deletes documents from orphaned chunk ranges. + executor::TaskExecutor* const _executor; + + // Ranges being deleted, or scheduled to be deleted, by a background task + CollectionRangeDeleter _rangesToClean; + + // friends + + // for access to _decrementTrackerUsage(), and to Tracker. + friend class ScopedCollectionMetadata; + + // for access to _rangesToClean and _managerLock under task callback + friend bool CollectionRangeDeleter::cleanUpNextRange(OperationContext*, + NamespaceString const&, + int maxToDelete, + CollectionRangeDeleter*); }; class ScopedCollectionMetadata { @@ -233,41 +253,43 @@ public: * Creates an empty ScopedCollectionMetadata. Using the default constructor means that no * metadata is available. */ - ScopedCollectionMetadata(); - + ScopedCollectionMetadata() = default; ~ScopedCollectionMetadata(); + /** + * Binds *this to the same tracker as other, if any. + */ ScopedCollectionMetadata(ScopedCollectionMetadata&& other); ScopedCollectionMetadata& operator=(ScopedCollectionMetadata&& other); /** - * Dereferencing the ScopedCollectionMetadata will dereference the internal CollectionMetadata. + * Dereferencing the ScopedCollectionMetadata dereferences the private CollectionMetadata. */ CollectionMetadata* operator->() const; CollectionMetadata* getMetadata() const; /** - * True if the ScopedCollectionMetadata stores a metadata (is not empty) + * True if the ScopedCollectionMetadata stores a metadata (is not empty) and the collection is + * sharded. */ operator bool() const; private: - friend ScopedCollectionMetadata MetadataManager::getActiveMetadata(); - /** - * Increments the counter in the CollectionMetadataTracker. + * If tracker is non-null, increments the refcount in the specified tracker. + * + * Must be called with tracker->manager locked. */ - ScopedCollectionMetadata(MetadataManager* manager, - MetadataManager::CollectionMetadataTracker* tracker); + ScopedCollectionMetadata(std::shared_ptr<MetadataManager::Tracker> tracker); /** - * Decrements the usageCounter and conditionally makes a call to _removeMetadata on - * the tracker if the count has reached zero. + * Disconnect from the tracker, possibly triggering GC of unused CollectionMetadata. */ - void _decrementUsageCounter(); + void _clear(); + + std::shared_ptr<MetadataManager::Tracker> _tracker{nullptr}; - MetadataManager* _manager{nullptr}; - MetadataManager::CollectionMetadataTracker* _tracker{nullptr}; + friend ScopedCollectionMetadata MetadataManager::getActiveMetadata(); // uses our private ctor }; } // namespace mongo |