diff options
Diffstat (limited to 'src/mongo/db/s/metadata_manager.cpp')
-rw-r--r-- | src/mongo/db/s/metadata_manager.cpp | 157 |
1 files changed, 59 insertions, 98 deletions
diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp index de6cfcabd96..70cc7de5ee5 100644 --- a/src/mongo/db/s/metadata_manager.cpp +++ b/src/mongo/db/s/metadata_manager.cpp @@ -44,67 +44,60 @@ #include "mongo/util/assert_util.h" #include "mongo/util/log.h" -// MetadataManager maintains pointers to CollectionMetadata objects in a member list named -// _metadata. Each CollectionMetadata contains an immutable _chunksMap of chunks assigned to this -// shard, along with details related to its own lifecycle in a member _tracker. +// MetadataManager maintains std::shared_ptr<CollectionMetadataManager> pointers in a list +// _metadata. It also contains a CollectionRangeDeleter that queues orphan ranges to delete in +// a background thread, and a record of the ranges being migrated in, to avoid deleting them. // -// The current chunk mapping, used by queries starting up, is at _metadata.back(). Each query, -// when it starts up, requests and holds a ScopedCollectionMetadata object, and destroys it on -// termination. Each ScopedCollectionMetadata keeps a shared_ptr to its CollectionMetadata chunk -// mapping, and to the MetadataManager itself. CollectionMetadata mappings also keep a record of -// chunk ranges that may be deleted when it is determined that the range can no longer be in use. +// Free-floating CollectionMetadata objects are maintained by these pointers, and also by clients +// via shared pointers in ScopedCollectionMetadata objects. // -// ScopedCollectionMetadata's destructor decrements the CollectionMetadata's usageCounter. -// Whenever a usageCounter drops to zero, we check whether any now-unused CollectionMetadata -// elements can be popped off the front of _metadata. We need to keep the unused elements in the -// middle (as seen below) because they may schedule deletions of chunks depended on by older -// mappings. +// The _tracker member of CollectionMetadata keeps: +// a count of the ScopedCollectionMetadata objects that have pointers to the CollectionMetadata +// a list of key ranges [min,max) of orphaned documents that may be deleted when the count goes +// to zero +// ____________________________ +// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata | +// _________________________ +----(s) manager metadata (s)-----------------+ +// | CollectionShardingState | | |____________________________| | | +// | _metadataManager (s) | +-------(s) manager metadata (s)-------------+ | +// |____________________|____| | |____________________________| | | | +// ____________________v_______ +----------(s) manager metadata (s) | | | +// | MetadataManager | | |________________________|___| | | +// | |<---+ | | | +// | | ________________________ | | | +// | /----------->| CollectionMetadata |<----+ (1 use) | | +// | [(s),----/ | | ______________________|_ | | +// | (s),------------------->| CollectionMetadata | (0 uses) | | +// | _metadata: (s)]----\ | | | ______________________|_ | | +// | \--------------->| CollectionMetadata | | | +// | | | | | | | | +// | _rangesToClean: | | | | _tracker: |<------------+ | +// | ________________________ | | | | ____________________ |<--------------+ +// | | CollectionRangeDeleter | | | | | | Tracker | | (2 uses) +// | | | | | | | | | | +// | | _orphans [[min,max), | | | | | | usageCounter | | +// | | [min,max), | | | | | | orphans [min,max), | | +// | | ... ] | | | | | | ... ] | | +// | |________________________| | |_| | |____________________| | +// |____________________________| | | _chunksMap | +// |_| _chunkVersion | +// | ... | +// |________________________| // -// New chunk mappings are pushed onto the back of _metadata. Subsequently started queries use the -// new mapping while still-running queries continue using the older "snapshot" mappings. We treat -// _metadata.back()'s usage count differently from the snapshots because it can't reliably be -// compared to zero; a new query may increment it at any time. +// A ScopedCollectionMetadata object is created and held during a query, and destroyed when the +// query no longer needs access to the collection. Its destructor decrements the CollectionMetadata +// _tracker member's usageCounter. Note that the collection may become unsharded, and even get +// sharded again, between construction and destruction of a ScopedCollectionMetadata. // -// (Note that the collection may be dropped or become unsharded, and even get made and sharded -// again, between construction and destruction of a ScopedCollectionMetadata). +// When a new chunk mapping replaces the active mapping, it is pushed onto the back of _metadata. // -// MetadataManager also contains a CollectionRangeDeleter _rangesToClean that queues orphan ranges -// being deleted in a background thread, and a mapping _receivingChunks of the ranges being migrated -// in, to avoid deleting them. Each range deletion is paired with a notification object triggered -// when the deletion is completed or abandoned. -// -// ____________________________ -// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata | -// _________________________ +----(s) manager metadata (s)------------------+ -// | CollectionShardingState | | |____________________________| | | -// | _metadataManager (s) | +-------(s) manager metadata (s)--------------+ | -// |____________________|____| | |____________________________| | | | -// ____________________v________ +------------(s) manager metadata (s)-----+ | | -// | MetadataManager | | |____________________________| | | | -// | |<--+ | | | -// | | ___________________________ (1 use) | | | -// | getActiveMetadata(): /---------->| CollectionMetadata |<---------+ | | -// | back(): [(s),------/ | | _________________________|_ | | -// | (s),-------------------->| CollectionMetadata | (0 uses) | | -// | _metadata: (s)]------\ | | | _________________________|_ | | -// | \-------------->| CollectionMetadata | | | -// | _receivingChunks | | | | | (2 uses) | | -// | _rangesToClean: | | | | _tracker: |<---------+ | -// | _________________________ | | | | _______________________ |<-----------+ -// | | CollectionRangeDeleter | | | | | | Tracker | | -// | | | | | | | | | | -// | | _orphans [range,notif, | | | | | | usageCounter | | -// | | range,notif, | | | | | | orphans [range,notif, | | -// | | ... ] | | | | | | range,notif, | | -// | | | | | | | | ... ] | | -// | |_________________________| | |_| | |_______________________| | -// |_____________________________| | | _chunksMap | -// |_| _chunkVersion | -// | ... | -// |___________________________| +// A CollectionMetadata object pointed to from _metadata is maintained at least as long as any +// query holds a ScopedCollectionMetadata object referring to it, or to any older one. In the +// diagram above, the middle CollectionMetadata is kept until the one below it is disposed of. // // Note that _metadata as shown here has its front() at the bottom, back() at the top. As usual, -// new entries are pushed onto the back, popped off the front. +// new entries are pushed onto the back, popped off the front. The "active" metadata used by new +// queries (when there is one), is _metadata.back(). namespace mongo { @@ -125,17 +118,13 @@ MetadataManager::~MetadataManager() { } void MetadataManager::_clearAllCleanups() { - _clearAllCleanups( - {ErrorCodes::InterruptedDueToReplStateChange, - str::stream() << "Range deletions in " << _nss.ns() - << " abandoned because collection was dropped or became unsharded"}); -} - -void MetadataManager::_clearAllCleanups(Status status) { for (auto& metadata : _metadata) { _pushListToClean(std::move(metadata->_tracker.orphans)); } - _rangesToClean.clear(status); + _rangesToClean.clear({ErrorCodes::InterruptedDueToReplStateChange, + str::stream() << "Range deletions in " << _nss.ns() + << " abandoned because collection was" + " dropped or became unsharded"}); } ScopedCollectionMetadata MetadataManager::getActiveMetadata(std::shared_ptr<MetadataManager> self) { @@ -252,12 +241,10 @@ void MetadataManager::_retireExpiredMetadata() { if (!_metadata.front()->_tracker.orphans.empty()) { log() << "Queries possibly dependent on " << _nss.ns() << " range(s) finished; scheduling for deletion"; - // It is safe to push orphan ranges from _metadata.back(), even though new queries might - // start any time, because any request to delete a range it maps is rejected. _pushListToClean(std::move(_metadata.front()->_tracker.orphans)); } if (&_metadata.front() == &_metadata.back()) - break; // do not pop the active chunk mapping! + break; // do not retire current chunk metadata. } } @@ -267,8 +254,6 @@ void MetadataManager::_retireExpiredMetadata() { ScopedCollectionMetadata::ScopedCollectionMetadata(std::shared_ptr<MetadataManager> manager, std::shared_ptr<CollectionMetadata> metadata) : _metadata(std::move(metadata)), _manager(std::move(manager)) { - invariant(_metadata); - invariant(_manager); ++_metadata->_tracker.usageCounter; } @@ -357,17 +342,15 @@ void MetadataManager::append(BSONObjBuilder* builder) { amrArr.done(); } -void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor, - NamespaceString nss, - CollectionRangeDeleter::Action action) { - executor->scheduleWork([executor, nss, action](auto&) { +void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor, NamespaceString nss) { + executor->scheduleWork([executor, nss](auto&) { const int maxToDelete = std::max(int(internalQueryExecYieldIterations.load()), 1); Client::initThreadIfNotAlready("Collection Range Deleter"); auto UniqueOpCtx = Client::getCurrent()->makeOperationContext(); auto opCtx = UniqueOpCtx.get(); - auto next = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, action, maxToDelete); - if (next != CollectionRangeDeleter::Action::kFinished) { - _scheduleCleanup(executor, nss, next); + bool again = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, maxToDelete); + if (again) { + _scheduleCleanup(executor, nss); } }); } @@ -382,9 +365,9 @@ auto MetadataManager::_pushRangeToClean(ChunkRange const& range) -> CleanupNotif void MetadataManager::_pushListToClean(std::list<Deletion> ranges) { if (_rangesToClean.add(std::move(ranges))) { - _scheduleCleanup(_executor, _nss, CollectionRangeDeleter::Action::kWriteOpLog); + _scheduleCleanup(_executor, _nss); } - invariant(ranges.empty()); + dassert(ranges.empty()); } void MetadataManager::_addToReceiving(ChunkRange const& range) { @@ -459,28 +442,6 @@ auto MetadataManager::cleanUpRange(ChunkRange const& range) -> CleanupNotificati return activeMetadata->_tracker.orphans.back().notification; } -auto MetadataManager::overlappingMetadata(std::shared_ptr<MetadataManager> const& self, - ChunkRange const& range) - -> std::vector<ScopedCollectionMetadata> { - invariant(!_metadata.empty()); - stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); - std::vector<ScopedCollectionMetadata> result; - result.reserve(_metadata.size()); - auto it = _metadata.crbegin(); // start with the current active chunk mapping - if ((*it)->rangeOverlapsChunk(range)) { - // We ignore the refcount of the active mapping; effectively, we assume it is in use. - result.push_back(ScopedCollectionMetadata(self, *it)); - } - ++it; // step to snapshots - for (auto end = _metadata.crend(); it != end; ++it) { - // We want all the overlapping snapshot mappings still possibly in use by a query. - if ((*it)->_tracker.usageCounter > 0 && (*it)->rangeOverlapsChunk(range)) { - result.push_back(ScopedCollectionMetadata(self, *it)); - } - } - return result; -} - size_t MetadataManager::numberOfRangesToCleanStillInUse() { stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); size_t count = 0; |