summaryrefslogtreecommitdiff
path: root/src/mongo/db/s/metadata_manager.cpp
diff options
context:
space:
mode:
authorNathan Myers <nathan.myers@10gen.com>2017-06-02 13:16:12 -0400
committerNathan Myers <nathan.myers@10gen.com>2017-06-02 13:39:13 -0400
commit20e9320f7716515d68c27324dc20080d585fd70f (patch)
tree6c2012039b6ee5f69034f0b2b1850ae02ad59b84 /src/mongo/db/s/metadata_manager.cpp
parentd4eeb742d85e31b95376715dec3454870ae83064 (diff)
downloadmongo-20e9320f7716515d68c27324dc20080d585fd70f.tar.gz
SERVER-29342 CollectionShardState/RangeDeleter support for safe secondary reads
Diffstat (limited to 'src/mongo/db/s/metadata_manager.cpp')
-rw-r--r--src/mongo/db/s/metadata_manager.cpp157
1 files changed, 98 insertions, 59 deletions
diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp
index 70cc7de5ee5..de6cfcabd96 100644
--- a/src/mongo/db/s/metadata_manager.cpp
+++ b/src/mongo/db/s/metadata_manager.cpp
@@ -44,60 +44,67 @@
#include "mongo/util/assert_util.h"
#include "mongo/util/log.h"
-// MetadataManager maintains std::shared_ptr<CollectionMetadataManager> pointers in a list
-// _metadata. It also contains a CollectionRangeDeleter that queues orphan ranges to delete in
-// a background thread, and a record of the ranges being migrated in, to avoid deleting them.
+// MetadataManager maintains pointers to CollectionMetadata objects in a member list named
+// _metadata. Each CollectionMetadata contains an immutable _chunksMap of chunks assigned to this
+// shard, along with details related to its own lifecycle in a member _tracker.
//
-// Free-floating CollectionMetadata objects are maintained by these pointers, and also by clients
-// via shared pointers in ScopedCollectionMetadata objects.
+// The current chunk mapping, used by queries starting up, is at _metadata.back(). Each query,
+// when it starts up, requests and holds a ScopedCollectionMetadata object, and destroys it on
+// termination. Each ScopedCollectionMetadata keeps a shared_ptr to its CollectionMetadata chunk
+// mapping, and to the MetadataManager itself. CollectionMetadata mappings also keep a record of
+// chunk ranges that may be deleted when it is determined that the range can no longer be in use.
//
-// The _tracker member of CollectionMetadata keeps:
-// a count of the ScopedCollectionMetadata objects that have pointers to the CollectionMetadata
-// a list of key ranges [min,max) of orphaned documents that may be deleted when the count goes
-// to zero
-// ____________________________
-// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata |
-// _________________________ +----(s) manager metadata (s)-----------------+
-// | CollectionShardingState | | |____________________________| | |
-// | _metadataManager (s) | +-------(s) manager metadata (s)-------------+ |
-// |____________________|____| | |____________________________| | | |
-// ____________________v_______ +----------(s) manager metadata (s) | | |
-// | MetadataManager | | |________________________|___| | |
-// | |<---+ | | |
-// | | ________________________ | | |
-// | /----------->| CollectionMetadata |<----+ (1 use) | |
-// | [(s),----/ | | ______________________|_ | |
-// | (s),------------------->| CollectionMetadata | (0 uses) | |
-// | _metadata: (s)]----\ | | | ______________________|_ | |
-// | \--------------->| CollectionMetadata | | |
-// | | | | | | | |
-// | _rangesToClean: | | | | _tracker: |<------------+ |
-// | ________________________ | | | | ____________________ |<--------------+
-// | | CollectionRangeDeleter | | | | | | Tracker | | (2 uses)
-// | | | | | | | | | |
-// | | _orphans [[min,max), | | | | | | usageCounter | |
-// | | [min,max), | | | | | | orphans [min,max), | |
-// | | ... ] | | | | | | ... ] | |
-// | |________________________| | |_| | |____________________| |
-// |____________________________| | | _chunksMap |
-// |_| _chunkVersion |
-// | ... |
-// |________________________|
+// ScopedCollectionMetadata's destructor decrements the CollectionMetadata's usageCounter.
+// Whenever a usageCounter drops to zero, we check whether any now-unused CollectionMetadata
+// elements can be popped off the front of _metadata. We need to keep the unused elements in the
+// middle (as seen below) because they may schedule deletions of chunks depended on by older
+// mappings.
//
-// A ScopedCollectionMetadata object is created and held during a query, and destroyed when the
-// query no longer needs access to the collection. Its destructor decrements the CollectionMetadata
-// _tracker member's usageCounter. Note that the collection may become unsharded, and even get
-// sharded again, between construction and destruction of a ScopedCollectionMetadata.
+// New chunk mappings are pushed onto the back of _metadata. Subsequently started queries use the
+// new mapping while still-running queries continue using the older "snapshot" mappings. We treat
+// _metadata.back()'s usage count differently from the snapshots because it can't reliably be
+// compared to zero; a new query may increment it at any time.
//
-// When a new chunk mapping replaces the active mapping, it is pushed onto the back of _metadata.
+// (Note that the collection may be dropped or become unsharded, and even get made and sharded
+// again, between construction and destruction of a ScopedCollectionMetadata).
//
-// A CollectionMetadata object pointed to from _metadata is maintained at least as long as any
-// query holds a ScopedCollectionMetadata object referring to it, or to any older one. In the
-// diagram above, the middle CollectionMetadata is kept until the one below it is disposed of.
+// MetadataManager also contains a CollectionRangeDeleter _rangesToClean that queues orphan ranges
+// being deleted in a background thread, and a mapping _receivingChunks of the ranges being migrated
+// in, to avoid deleting them. Each range deletion is paired with a notification object triggered
+// when the deletion is completed or abandoned.
+//
+// ____________________________
+// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata |
+// _________________________ +----(s) manager metadata (s)------------------+
+// | CollectionShardingState | | |____________________________| | |
+// | _metadataManager (s) | +-------(s) manager metadata (s)--------------+ |
+// |____________________|____| | |____________________________| | | |
+// ____________________v________ +------------(s) manager metadata (s)-----+ | |
+// | MetadataManager | | |____________________________| | | |
+// | |<--+ | | |
+// | | ___________________________ (1 use) | | |
+// | getActiveMetadata(): /---------->| CollectionMetadata |<---------+ | |
+// | back(): [(s),------/ | | _________________________|_ | |
+// | (s),-------------------->| CollectionMetadata | (0 uses) | |
+// | _metadata: (s)]------\ | | | _________________________|_ | |
+// | \-------------->| CollectionMetadata | | |
+// | _receivingChunks | | | | | (2 uses) | |
+// | _rangesToClean: | | | | _tracker: |<---------+ |
+// | _________________________ | | | | _______________________ |<-----------+
+// | | CollectionRangeDeleter | | | | | | Tracker | |
+// | | | | | | | | | |
+// | | _orphans [range,notif, | | | | | | usageCounter | |
+// | | range,notif, | | | | | | orphans [range,notif, | |
+// | | ... ] | | | | | | range,notif, | |
+// | | | | | | | | ... ] | |
+// | |_________________________| | |_| | |_______________________| |
+// |_____________________________| | | _chunksMap |
+// |_| _chunkVersion |
+// | ... |
+// |___________________________|
//
// Note that _metadata as shown here has its front() at the bottom, back() at the top. As usual,
-// new entries are pushed onto the back, popped off the front. The "active" metadata used by new
-// queries (when there is one), is _metadata.back().
+// new entries are pushed onto the back, popped off the front.
namespace mongo {
@@ -118,13 +125,17 @@ MetadataManager::~MetadataManager() {
}
void MetadataManager::_clearAllCleanups() {
+ _clearAllCleanups(
+ {ErrorCodes::InterruptedDueToReplStateChange,
+ str::stream() << "Range deletions in " << _nss.ns()
+ << " abandoned because collection was dropped or became unsharded"});
+}
+
+void MetadataManager::_clearAllCleanups(Status status) {
for (auto& metadata : _metadata) {
_pushListToClean(std::move(metadata->_tracker.orphans));
}
- _rangesToClean.clear({ErrorCodes::InterruptedDueToReplStateChange,
- str::stream() << "Range deletions in " << _nss.ns()
- << " abandoned because collection was"
- " dropped or became unsharded"});
+ _rangesToClean.clear(status);
}
ScopedCollectionMetadata MetadataManager::getActiveMetadata(std::shared_ptr<MetadataManager> self) {
@@ -241,10 +252,12 @@ void MetadataManager::_retireExpiredMetadata() {
if (!_metadata.front()->_tracker.orphans.empty()) {
log() << "Queries possibly dependent on " << _nss.ns()
<< " range(s) finished; scheduling for deletion";
+ // It is safe to push orphan ranges from _metadata.back(), even though new queries might
+ // start any time, because any request to delete a range it maps is rejected.
_pushListToClean(std::move(_metadata.front()->_tracker.orphans));
}
if (&_metadata.front() == &_metadata.back())
- break; // do not retire current chunk metadata.
+ break; // do not pop the active chunk mapping!
}
}
@@ -254,6 +267,8 @@ void MetadataManager::_retireExpiredMetadata() {
ScopedCollectionMetadata::ScopedCollectionMetadata(std::shared_ptr<MetadataManager> manager,
std::shared_ptr<CollectionMetadata> metadata)
: _metadata(std::move(metadata)), _manager(std::move(manager)) {
+ invariant(_metadata);
+ invariant(_manager);
++_metadata->_tracker.usageCounter;
}
@@ -342,15 +357,17 @@ void MetadataManager::append(BSONObjBuilder* builder) {
amrArr.done();
}
-void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor, NamespaceString nss) {
- executor->scheduleWork([executor, nss](auto&) {
+void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor,
+ NamespaceString nss,
+ CollectionRangeDeleter::Action action) {
+ executor->scheduleWork([executor, nss, action](auto&) {
const int maxToDelete = std::max(int(internalQueryExecYieldIterations.load()), 1);
Client::initThreadIfNotAlready("Collection Range Deleter");
auto UniqueOpCtx = Client::getCurrent()->makeOperationContext();
auto opCtx = UniqueOpCtx.get();
- bool again = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, maxToDelete);
- if (again) {
- _scheduleCleanup(executor, nss);
+ auto next = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, action, maxToDelete);
+ if (next != CollectionRangeDeleter::Action::kFinished) {
+ _scheduleCleanup(executor, nss, next);
}
});
}
@@ -365,9 +382,9 @@ auto MetadataManager::_pushRangeToClean(ChunkRange const& range) -> CleanupNotif
void MetadataManager::_pushListToClean(std::list<Deletion> ranges) {
if (_rangesToClean.add(std::move(ranges))) {
- _scheduleCleanup(_executor, _nss);
+ _scheduleCleanup(_executor, _nss, CollectionRangeDeleter::Action::kWriteOpLog);
}
- dassert(ranges.empty());
+ invariant(ranges.empty());
}
void MetadataManager::_addToReceiving(ChunkRange const& range) {
@@ -442,6 +459,28 @@ auto MetadataManager::cleanUpRange(ChunkRange const& range) -> CleanupNotificati
return activeMetadata->_tracker.orphans.back().notification;
}
+auto MetadataManager::overlappingMetadata(std::shared_ptr<MetadataManager> const& self,
+ ChunkRange const& range)
+ -> std::vector<ScopedCollectionMetadata> {
+ invariant(!_metadata.empty());
+ stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
+ std::vector<ScopedCollectionMetadata> result;
+ result.reserve(_metadata.size());
+ auto it = _metadata.crbegin(); // start with the current active chunk mapping
+ if ((*it)->rangeOverlapsChunk(range)) {
+ // We ignore the refcount of the active mapping; effectively, we assume it is in use.
+ result.push_back(ScopedCollectionMetadata(self, *it));
+ }
+ ++it; // step to snapshots
+ for (auto end = _metadata.crend(); it != end; ++it) {
+ // We want all the overlapping snapshot mappings still possibly in use by a query.
+ if ((*it)->_tracker.usageCounter > 0 && (*it)->rangeOverlapsChunk(range)) {
+ result.push_back(ScopedCollectionMetadata(self, *it));
+ }
+ }
+ return result;
+}
+
size_t MetadataManager::numberOfRangesToCleanStillInUse() {
stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
size_t count = 0;