diff options
author | Nathan Myers <nathan.myers@10gen.com> | 2017-06-02 13:16:12 -0400 |
---|---|---|
committer | Nathan Myers <nathan.myers@10gen.com> | 2017-06-02 13:39:13 -0400 |
commit | 20e9320f7716515d68c27324dc20080d585fd70f (patch) | |
tree | 6c2012039b6ee5f69034f0b2b1850ae02ad59b84 /src/mongo/db/s/metadata_manager.cpp | |
parent | d4eeb742d85e31b95376715dec3454870ae83064 (diff) | |
download | mongo-20e9320f7716515d68c27324dc20080d585fd70f.tar.gz |
SERVER-29342 CollectionShardState/RangeDeleter support for safe secondary reads
Diffstat (limited to 'src/mongo/db/s/metadata_manager.cpp')
-rw-r--r-- | src/mongo/db/s/metadata_manager.cpp | 157 |
1 files changed, 98 insertions, 59 deletions
diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp index 70cc7de5ee5..de6cfcabd96 100644 --- a/src/mongo/db/s/metadata_manager.cpp +++ b/src/mongo/db/s/metadata_manager.cpp @@ -44,60 +44,67 @@ #include "mongo/util/assert_util.h" #include "mongo/util/log.h" -// MetadataManager maintains std::shared_ptr<CollectionMetadataManager> pointers in a list -// _metadata. It also contains a CollectionRangeDeleter that queues orphan ranges to delete in -// a background thread, and a record of the ranges being migrated in, to avoid deleting them. +// MetadataManager maintains pointers to CollectionMetadata objects in a member list named +// _metadata. Each CollectionMetadata contains an immutable _chunksMap of chunks assigned to this +// shard, along with details related to its own lifecycle in a member _tracker. // -// Free-floating CollectionMetadata objects are maintained by these pointers, and also by clients -// via shared pointers in ScopedCollectionMetadata objects. +// The current chunk mapping, used by queries starting up, is at _metadata.back(). Each query, +// when it starts up, requests and holds a ScopedCollectionMetadata object, and destroys it on +// termination. Each ScopedCollectionMetadata keeps a shared_ptr to its CollectionMetadata chunk +// mapping, and to the MetadataManager itself. CollectionMetadata mappings also keep a record of +// chunk ranges that may be deleted when it is determined that the range can no longer be in use. // -// The _tracker member of CollectionMetadata keeps: -// a count of the ScopedCollectionMetadata objects that have pointers to the CollectionMetadata -// a list of key ranges [min,max) of orphaned documents that may be deleted when the count goes -// to zero -// ____________________________ -// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata | -// _________________________ +----(s) manager metadata (s)-----------------+ -// | CollectionShardingState | | |____________________________| | | -// | _metadataManager (s) | +-------(s) manager metadata (s)-------------+ | -// |____________________|____| | |____________________________| | | | -// ____________________v_______ +----------(s) manager metadata (s) | | | -// | MetadataManager | | |________________________|___| | | -// | |<---+ | | | -// | | ________________________ | | | -// | /----------->| CollectionMetadata |<----+ (1 use) | | -// | [(s),----/ | | ______________________|_ | | -// | (s),------------------->| CollectionMetadata | (0 uses) | | -// | _metadata: (s)]----\ | | | ______________________|_ | | -// | \--------------->| CollectionMetadata | | | -// | | | | | | | | -// | _rangesToClean: | | | | _tracker: |<------------+ | -// | ________________________ | | | | ____________________ |<--------------+ -// | | CollectionRangeDeleter | | | | | | Tracker | | (2 uses) -// | | | | | | | | | | -// | | _orphans [[min,max), | | | | | | usageCounter | | -// | | [min,max), | | | | | | orphans [min,max), | | -// | | ... ] | | | | | | ... ] | | -// | |________________________| | |_| | |____________________| | -// |____________________________| | | _chunksMap | -// |_| _chunkVersion | -// | ... | -// |________________________| +// ScopedCollectionMetadata's destructor decrements the CollectionMetadata's usageCounter. +// Whenever a usageCounter drops to zero, we check whether any now-unused CollectionMetadata +// elements can be popped off the front of _metadata. We need to keep the unused elements in the +// middle (as seen below) because they may schedule deletions of chunks depended on by older +// mappings. // -// A ScopedCollectionMetadata object is created and held during a query, and destroyed when the -// query no longer needs access to the collection. Its destructor decrements the CollectionMetadata -// _tracker member's usageCounter. Note that the collection may become unsharded, and even get -// sharded again, between construction and destruction of a ScopedCollectionMetadata. +// New chunk mappings are pushed onto the back of _metadata. Subsequently started queries use the +// new mapping while still-running queries continue using the older "snapshot" mappings. We treat +// _metadata.back()'s usage count differently from the snapshots because it can't reliably be +// compared to zero; a new query may increment it at any time. // -// When a new chunk mapping replaces the active mapping, it is pushed onto the back of _metadata. +// (Note that the collection may be dropped or become unsharded, and even get made and sharded +// again, between construction and destruction of a ScopedCollectionMetadata). // -// A CollectionMetadata object pointed to from _metadata is maintained at least as long as any -// query holds a ScopedCollectionMetadata object referring to it, or to any older one. In the -// diagram above, the middle CollectionMetadata is kept until the one below it is disposed of. +// MetadataManager also contains a CollectionRangeDeleter _rangesToClean that queues orphan ranges +// being deleted in a background thread, and a mapping _receivingChunks of the ranges being migrated +// in, to avoid deleting them. Each range deletion is paired with a notification object triggered +// when the deletion is completed or abandoned. +// +// ____________________________ +// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata | +// _________________________ +----(s) manager metadata (s)------------------+ +// | CollectionShardingState | | |____________________________| | | +// | _metadataManager (s) | +-------(s) manager metadata (s)--------------+ | +// |____________________|____| | |____________________________| | | | +// ____________________v________ +------------(s) manager metadata (s)-----+ | | +// | MetadataManager | | |____________________________| | | | +// | |<--+ | | | +// | | ___________________________ (1 use) | | | +// | getActiveMetadata(): /---------->| CollectionMetadata |<---------+ | | +// | back(): [(s),------/ | | _________________________|_ | | +// | (s),-------------------->| CollectionMetadata | (0 uses) | | +// | _metadata: (s)]------\ | | | _________________________|_ | | +// | \-------------->| CollectionMetadata | | | +// | _receivingChunks | | | | | (2 uses) | | +// | _rangesToClean: | | | | _tracker: |<---------+ | +// | _________________________ | | | | _______________________ |<-----------+ +// | | CollectionRangeDeleter | | | | | | Tracker | | +// | | | | | | | | | | +// | | _orphans [range,notif, | | | | | | usageCounter | | +// | | range,notif, | | | | | | orphans [range,notif, | | +// | | ... ] | | | | | | range,notif, | | +// | | | | | | | | ... ] | | +// | |_________________________| | |_| | |_______________________| | +// |_____________________________| | | _chunksMap | +// |_| _chunkVersion | +// | ... | +// |___________________________| // // Note that _metadata as shown here has its front() at the bottom, back() at the top. As usual, -// new entries are pushed onto the back, popped off the front. The "active" metadata used by new -// queries (when there is one), is _metadata.back(). +// new entries are pushed onto the back, popped off the front. namespace mongo { @@ -118,13 +125,17 @@ MetadataManager::~MetadataManager() { } void MetadataManager::_clearAllCleanups() { + _clearAllCleanups( + {ErrorCodes::InterruptedDueToReplStateChange, + str::stream() << "Range deletions in " << _nss.ns() + << " abandoned because collection was dropped or became unsharded"}); +} + +void MetadataManager::_clearAllCleanups(Status status) { for (auto& metadata : _metadata) { _pushListToClean(std::move(metadata->_tracker.orphans)); } - _rangesToClean.clear({ErrorCodes::InterruptedDueToReplStateChange, - str::stream() << "Range deletions in " << _nss.ns() - << " abandoned because collection was" - " dropped or became unsharded"}); + _rangesToClean.clear(status); } ScopedCollectionMetadata MetadataManager::getActiveMetadata(std::shared_ptr<MetadataManager> self) { @@ -241,10 +252,12 @@ void MetadataManager::_retireExpiredMetadata() { if (!_metadata.front()->_tracker.orphans.empty()) { log() << "Queries possibly dependent on " << _nss.ns() << " range(s) finished; scheduling for deletion"; + // It is safe to push orphan ranges from _metadata.back(), even though new queries might + // start any time, because any request to delete a range it maps is rejected. _pushListToClean(std::move(_metadata.front()->_tracker.orphans)); } if (&_metadata.front() == &_metadata.back()) - break; // do not retire current chunk metadata. + break; // do not pop the active chunk mapping! } } @@ -254,6 +267,8 @@ void MetadataManager::_retireExpiredMetadata() { ScopedCollectionMetadata::ScopedCollectionMetadata(std::shared_ptr<MetadataManager> manager, std::shared_ptr<CollectionMetadata> metadata) : _metadata(std::move(metadata)), _manager(std::move(manager)) { + invariant(_metadata); + invariant(_manager); ++_metadata->_tracker.usageCounter; } @@ -342,15 +357,17 @@ void MetadataManager::append(BSONObjBuilder* builder) { amrArr.done(); } -void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor, NamespaceString nss) { - executor->scheduleWork([executor, nss](auto&) { +void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor, + NamespaceString nss, + CollectionRangeDeleter::Action action) { + executor->scheduleWork([executor, nss, action](auto&) { const int maxToDelete = std::max(int(internalQueryExecYieldIterations.load()), 1); Client::initThreadIfNotAlready("Collection Range Deleter"); auto UniqueOpCtx = Client::getCurrent()->makeOperationContext(); auto opCtx = UniqueOpCtx.get(); - bool again = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, maxToDelete); - if (again) { - _scheduleCleanup(executor, nss); + auto next = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, action, maxToDelete); + if (next != CollectionRangeDeleter::Action::kFinished) { + _scheduleCleanup(executor, nss, next); } }); } @@ -365,9 +382,9 @@ auto MetadataManager::_pushRangeToClean(ChunkRange const& range) -> CleanupNotif void MetadataManager::_pushListToClean(std::list<Deletion> ranges) { if (_rangesToClean.add(std::move(ranges))) { - _scheduleCleanup(_executor, _nss); + _scheduleCleanup(_executor, _nss, CollectionRangeDeleter::Action::kWriteOpLog); } - dassert(ranges.empty()); + invariant(ranges.empty()); } void MetadataManager::_addToReceiving(ChunkRange const& range) { @@ -442,6 +459,28 @@ auto MetadataManager::cleanUpRange(ChunkRange const& range) -> CleanupNotificati return activeMetadata->_tracker.orphans.back().notification; } +auto MetadataManager::overlappingMetadata(std::shared_ptr<MetadataManager> const& self, + ChunkRange const& range) + -> std::vector<ScopedCollectionMetadata> { + invariant(!_metadata.empty()); + stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); + std::vector<ScopedCollectionMetadata> result; + result.reserve(_metadata.size()); + auto it = _metadata.crbegin(); // start with the current active chunk mapping + if ((*it)->rangeOverlapsChunk(range)) { + // We ignore the refcount of the active mapping; effectively, we assume it is in use. + result.push_back(ScopedCollectionMetadata(self, *it)); + } + ++it; // step to snapshots + for (auto end = _metadata.crend(); it != end; ++it) { + // We want all the overlapping snapshot mappings still possibly in use by a query. + if ((*it)->_tracker.usageCounter > 0 && (*it)->rangeOverlapsChunk(range)) { + result.push_back(ScopedCollectionMetadata(self, *it)); + } + } + return result; +} + size_t MetadataManager::numberOfRangesToCleanStillInUse() { stdx::lock_guard<stdx::mutex> scopedLock(_managerLock); size_t count = 0; |