summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Myers <nathan.myers@10gen.com>2017-06-01 17:52:56 -0400
committerNathan Myers <nathan.myers@10gen.com>2017-06-01 17:52:56 -0400
commit3c47c2f2992fb881622195ef5c045f08061e278b (patch)
treef3374723c26e8464f7697000baf4c1b2ae152426
parent6bf062db95d69f06b02da708b54f5e3efbb9dfa5 (diff)
downloadmongo-3c47c2f2992fb881622195ef5c045f08061e278b.tar.gz
Revert "SERVER-29342 CollectionShardState/RangeDeleter support for safe secondary reads"
This reverts commit 6bf062db95d69f06b02da708b54f5e3efbb9dfa5.
-rw-r--r--jstests/core/set1.js2
-rw-r--r--jstests/core/update_replace.js8
-rw-r--r--jstests/core/upsert_fields.js2
-rw-r--r--src/mongo/db/exec/update.cpp6
-rw-r--r--src/mongo/db/s/collection_range_deleter.cpp57
-rw-r--r--src/mongo/db/s/collection_range_deleter.h37
-rw-r--r--src/mongo/db/s/collection_range_deleter_test.cpp131
-rw-r--r--src/mongo/db/s/collection_sharding_state.cpp6
-rw-r--r--src/mongo/db/s/collection_sharding_state.h14
-rw-r--r--src/mongo/db/s/metadata_manager.cpp157
-rw-r--r--src/mongo/db/s/metadata_manager.h42
-rw-r--r--src/mongo/db/s/metadata_manager_test.cpp33
12 files changed, 152 insertions, 343 deletions
diff --git a/jstests/core/set1.js b/jstests/core/set1.js
index bae41fc5803..33840e3f431 100644
--- a/jstests/core/set1.js
+++ b/jstests/core/set1.js
@@ -4,4 +4,4 @@ t.drop();
t.insert({_id: 1, emb: {}});
t.update({_id: 1}, {$set: {emb: {'a.dot': 'data'}}});
-assert.eq({_id: 1, emb: {"a.dot": 'data'}}, t.findOne(), "A");
+assert.eq({_id: 1, emb: {}}, t.findOne(), "A");
diff --git a/jstests/core/update_replace.js b/jstests/core/update_replace.js
index e62d03eb09f..44099851ef4 100644
--- a/jstests/core/update_replace.js
+++ b/jstests/core/update_replace.js
@@ -13,13 +13,13 @@ var res;
// Bypass validation in shell so we can test the server.
conn._skipValidation = true;
-// Allow "." in field names
+// Should not allow "." in field names
res = t.save({_id: 1, "a.a": 1});
-assert(!res.hasWriteError(), "a.a");
+assert(res.hasWriteError(), "a.a");
-// Allow "." in field names, embedded
+// Should not allow "." in field names, embedded
res = t.save({_id: 1, a: {"a.a": 1}});
-assert(!res.hasWriteError(), "a: a.a");
+assert(res.hasWriteError(), "a: a.a");
// Should not allow "$"-prefixed field names, caught before "." check
res = t.save({_id: 1, $a: {"a.a": 1}});
diff --git a/jstests/core/upsert_fields.js b/jstests/core/upsert_fields.js
index 490625c23db..311d6984ce9 100644
--- a/jstests/core/upsert_fields.js
+++ b/jstests/core/upsert_fields.js
@@ -154,7 +154,6 @@ for (var i = 0; i < 3; i++) {
assert.eq(value, upsertedXVal({$or: [{x: {$eq: 1}}]}, expr));
// Special types extracted
assert.eq(isReplStyle ? undefined : [1, 2], upsertedXVal({x: [1, 2]}, expr));
- assert.eq({'x.x': 1}, upsertedXVal({x: {'x.x': 1}}, expr));
// field not extracted
assert.eq(undefined, upsertedXVal({x: {$gt: 1}}, expr));
@@ -173,6 +172,7 @@ for (var i = 0; i < 3; i++) {
assert.writeError(upsertedResult({x: undefined}, expr));
if (!isReplStyle) {
+ assert.writeError(upsertedResult({x: {'x.x': 1}}, expr));
assert.writeError(upsertedResult({x: {$all: [1, 2]}}, expr));
assert.writeError(upsertedResult({$and: [{x: 1}, {x: 1}]}, expr));
assert.writeError(upsertedResult({$and: [{x: {$eq: 1}}, {x: 2}]}, expr));
diff --git a/src/mongo/db/exec/update.cpp b/src/mongo/db/exec/update.cpp
index 90cd46ab423..9bcc5460290 100644
--- a/src/mongo/db/exec/update.cpp
+++ b/src/mongo/db/exec/update.cpp
@@ -254,6 +254,12 @@ StatusWith<std::uint32_t> storageValid(const mb::ConstElement& elem,
Status status = validateDollarPrefixElement(elem, deep);
if (!status.isOK())
return status;
+ } else if (fieldName.find(".") != string::npos) {
+ // Field name cannot have a "." in it.
+ return Status(ErrorCodes::DottedFieldName,
+ str::stream() << "The dotted field '" << elem.getFieldName() << "' in '"
+ << mb::getFullName(elem)
+ << "' is not valid for storage.");
}
}
diff --git a/src/mongo/db/s/collection_range_deleter.cpp b/src/mongo/db/s/collection_range_deleter.cpp
index 2392305fe5d..df690affe3d 100644
--- a/src/mongo/db/s/collection_range_deleter.cpp
+++ b/src/mongo/db/s/collection_range_deleter.cpp
@@ -76,16 +76,13 @@ const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority,
CollectionRangeDeleter::~CollectionRangeDeleter() {
// notify anybody still sleeping on orphan ranges
clear(Status{ErrorCodes::InterruptedDueToReplStateChange,
- "Collection sharding metadata discarded"});
+ "Collection sharding metadata destroyed"});
}
-auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx,
+bool CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx,
NamespaceString const& nss,
- Action action,
int maxToDelete,
- CollectionRangeDeleter* forTestOnly) -> Action {
-
- invariant(action != Action::kFinished);
+ CollectionRangeDeleter* rangeDeleterForTestOnly) {
StatusWith<int> wrote = 0;
auto range = boost::optional<ChunkRange>(boost::none);
auto notification = DeleteNotification();
@@ -95,55 +92,28 @@ auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx,
auto* css = CollectionShardingState::get(opCtx, nss);
{
auto scopedCollectionMetadata = css->getMetadata();
- if ((!collection || !scopedCollectionMetadata) && !forTestOnly) {
- log() << "Abandoning range deletions left over from previously sharded collection"
- << nss.ns();
+ if ((!collection || !scopedCollectionMetadata) && !rangeDeleterForTestOnly) {
+ log() << "Abandoning range deletions in collection " << nss.ns()
+ << " left over from sharded state";
stdx::lock_guard<stdx::mutex> lk(css->_metadataManager->_managerLock);
css->_metadataManager->_clearAllCleanups();
- return Action::kFinished;
+ return false; // collection was unsharded
}
// We don't actually know if this is the same collection that we were originally
// scheduled to do deletions on, or another one with the same name. But it doesn't
- // matter: if it has a record of deletions scheduled, now is as good a time as any
- // to do them.
-
- auto self = forTestOnly ? forTestOnly : &css->_metadataManager->_rangesToClean;
+ // matter: if it has deletions scheduled, now is as good a time as any to do them.
+ auto self = rangeDeleterForTestOnly ? rangeDeleterForTestOnly
+ : &css->_metadataManager->_rangesToClean;
{
stdx::lock_guard<stdx::mutex> scopedLock(css->_metadataManager->_managerLock);
if (self->isEmpty())
- return Action::kFinished;
+ return false;
const auto& frontRange = self->_orphans.front().range;
range.emplace(frontRange.getMin().getOwned(), frontRange.getMax().getOwned());
notification = self->_orphans.front().notification;
}
- invariant(range);
-
- if (action == Action::kWriteOpLog) {
- // clang-format off
- // Secondaries will watch for this update, and kill any queries that may depend on
- // documents in the range -- excepting any queries with a read-concern option
- // 'ignoreChunkMigration'
- try {
- auto& adminSystemVersion = NamespaceString::kConfigCollectionNamespace;
- auto epoch = scopedCollectionMetadata->getCollVersion().epoch();
- AutoGetCollection autoAdmin(opCtx, adminSystemVersion, MODE_IX);
-
- Helpers::upsert(opCtx, adminSystemVersion.ns(),
- BSON("_id" << "startRangeDeletion" << "ns" << nss.ns() << "epoch" << epoch
- << "min" << range->getMin() << "max" << range->getMax()));
-
- } catch (DBException const& e) {
- stdx::lock_guard<stdx::mutex> scopedLock(css->_metadataManager->_managerLock);
- css->_metadataManager->_clearAllCleanups(
- {ErrorCodes::fromInt(e.getCode()),
- str::stream() << "cannot push startRangeDeletion record to Op Log,"
- " abandoning scheduled range deletions: " << e.what()});
- return Action::kFinished;
- }
- // clang-format on
- }
try {
auto keyPattern = scopedCollectionMetadata->getKeyPattern();
@@ -153,6 +123,7 @@ auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx,
wrote = e.toStatus();
warning() << e.what();
}
+
if (!wrote.isOK() || wrote.getValue() == 0) {
if (wrote.isOK()) {
log() << "No documents remain to delete in " << nss << " range "
@@ -160,7 +131,7 @@ auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx,
}
stdx::lock_guard<stdx::mutex> scopedLock(css->_metadataManager->_managerLock);
self->_pop(wrote.getStatus());
- return Action::kWriteOpLog;
+ return true;
}
} // drop scopedCollectionMetadata
} // drop autoColl
@@ -203,7 +174,7 @@ auto CollectionRangeDeleter::cleanUpNextRange(OperationContext* opCtx,
}
notification.abandon();
- return Action::kMore;
+ return true;
}
StatusWith<int> CollectionRangeDeleter::_doDeletion(OperationContext* opCtx,
diff --git a/src/mongo/db/s/collection_range_deleter.h b/src/mongo/db/s/collection_range_deleter.h
index 52d3ec44256..a0a6625ba02 100644
--- a/src/mongo/db/s/collection_range_deleter.h
+++ b/src/mongo/db/s/collection_range_deleter.h
@@ -46,8 +46,7 @@ public:
/**
* This is an object n that asynchronously changes state when a scheduled range deletion
* completes or fails. Call n.ready() to discover if the event has already occurred. Call
- * n.waitStatus(opCtx) to sleep waiting for the event, and get its result. If the wait is
- * interrupted, waitStatus throws.
+ * n.waitStatus(opCtx) to sleep waiting for the event, and get its result.
*
* It is an error to destroy a returned CleanupNotification object n unless either n.ready()
* is true or n.abandon() has been called. After n.abandon(), n is in a moved-from state.
@@ -86,9 +85,6 @@ public:
bool operator==(DeleteNotification const& other) const {
return notification == other.notification;
}
- bool operator!=(DeleteNotification const& other) const {
- return notification != other.notification;
- }
private:
std::shared_ptr<Notification<Status>> notification;
@@ -100,8 +96,6 @@ public:
DeleteNotification notification{};
};
- enum class Action { kFinished, kMore, kWriteOpLog };
-
CollectionRangeDeleter() = default;
~CollectionRangeDeleter();
@@ -134,8 +128,8 @@ public:
bool isEmpty() const;
/*
- * Notifies with the specified status anything waiting on ranges scheduled, and then discards
- * the ranges and notifications. Is called in the destructor.
+ * Notify with the specified status anything waiting on ranges scheduled, before discarding the
+ * ranges and notifications.
*/
void clear(Status);
@@ -145,22 +139,21 @@ public:
void append(BSONObjBuilder* builder) const;
/**
- * If any range deletions are scheduled, deletes up to maxToDelete documents, notifying
- * watchers of ranges as they are done being deleted. It performs its own collection locking, so
- * it must be called without locks.
+ * If any ranges are scheduled to clean, deletes up to maxToDelete documents, notifying watchers
+ * of ranges as they are done being deleted. It performs its own collection locking so it must
+ * be called without locks.
*
- * Returns kMore or kWriteOpLog if it should be scheduled to run again because there might be
- * more documents to delete, or kFinished otherwise. When calling again, pass the value
- * returned.
+ * The 'rangeDeleterForTestOnly' is used as a utility for unit-tests that directly test the
+ * CollectionRangeDeleter class so they do not need to set up CollectionShardingState and
+ * MetadataManager objects.
*
- * Argument 'forTestOnly' is used in unit tests that exercise the CollectionRangeDeleter class,
- * so that they do not need to set up CollectionShardingState and MetadataManager objects.
+ * Returns true if it should be scheduled to run again because there might be more documents to
+ * delete, or false otherwise.
*/
- static Action cleanUpNextRange(OperationContext*,
- NamespaceString const& nss,
- Action,
- int maxToDelete,
- CollectionRangeDeleter* forTestOnly = nullptr);
+ static bool cleanUpNextRange(OperationContext*,
+ NamespaceString const& nss,
+ int maxToDelete,
+ CollectionRangeDeleter* rangeDeleterForTestOnly = nullptr);
private:
/**
diff --git a/src/mongo/db/s/collection_range_deleter_test.cpp b/src/mongo/db/s/collection_range_deleter_test.cpp
index adaf90e7d0f..6442734d927 100644
--- a/src/mongo/db/s/collection_range_deleter_test.cpp
+++ b/src/mongo/db/s/collection_range_deleter_test.cpp
@@ -60,25 +60,18 @@ const std::string kPattern = "_id";
const BSONObj kKeyPattern = BSON(kPattern << 1);
const std::string kShardName{"a"};
const HostAndPort dummyHost("dummy", 123);
-const NamespaceString kAdminSystemVersion = NamespaceString("admin", "system.version");
class CollectionRangeDeleterTest : public ShardingMongodTestFixture {
-public:
- using Deletion = CollectionRangeDeleter::Deletion;
- using Action = CollectionRangeDeleter::Action;
-
protected:
- auto next(CollectionRangeDeleter& rangeDeleter, Action action, int maxToDelete)
- -> CollectionRangeDeleter::Action {
+ bool next(CollectionRangeDeleter& rangeDeleter, int maxToDelete) {
return CollectionRangeDeleter::cleanUpNextRange(
- operationContext(), kNss, action, maxToDelete, &rangeDeleter);
+ operationContext(), kNss, maxToDelete, &rangeDeleter);
}
std::shared_ptr<RemoteCommandTargeterMock> configTargeter() {
return RemoteCommandTargeterMock::get(shardRegistry()->getConfigShard()->getTargeter());
}
- OID const& epoch() {
- return _epoch;
- }
+
+ using Deletion = CollectionRangeDeleter::Deletion;
private:
void setUp() override;
@@ -98,19 +91,9 @@ private:
std::unique_ptr<DistLockManager> distLockManager) override {
return stdx::make_unique<ShardingCatalogClientMock>(std::move(distLockManager));
}
-
- OID _epoch;
};
-bool operator==(CollectionRangeDeleter::Action a, CollectionRangeDeleter::Action b) {
- return (int)a == (int)b;
-}
-std::ostream& operator<<(std::ostream& os, CollectionRangeDeleter::Action a) {
- return os << (int)a;
-}
-
void CollectionRangeDeleterTest::setUp() {
- _epoch = OID::gen();
serverGlobalParams.clusterRole = ClusterRole::ShardServer;
ShardingMongodTestFixture::setUp();
replicationCoordinator()->alwaysAllowWrites(true);
@@ -125,12 +108,13 @@ void CollectionRangeDeleterTest::setUp() {
{
AutoGetCollection autoColl(operationContext(), kNss, MODE_IX);
auto collectionShardingState = CollectionShardingState::get(operationContext(), kNss);
+ const OID epoch = OID::gen();
collectionShardingState->refreshMetadata(
operationContext(),
stdx::make_unique<CollectionMetadata>(
kKeyPattern,
- ChunkVersion(1, 0, epoch()),
- ChunkVersion(0, 0, epoch()),
+ ChunkVersion(1, 0, epoch),
+ ChunkVersion(0, 0, epoch),
SimpleBSONObjComparator::kInstance.makeBSONObjIndexedMap<CachedChunkInfo>()));
}
}
@@ -149,7 +133,7 @@ namespace {
// Tests the case that there is nothing in the database.
TEST_F(CollectionRangeDeleterTest, EmptyDatabase) {
CollectionRangeDeleter rangeDeleter;
- ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1));
+ ASSERT_FALSE(next(rangeDeleter, 1));
}
// Tests the case that there is data, but it is not in a range to clean.
@@ -161,14 +145,14 @@ TEST_F(CollectionRangeDeleterTest, NoDataInGivenRangeToClean) {
ASSERT_BSONOBJ_EQ(insertedDoc, dbclient.findOne(kNss.toString(), QUERY(kPattern << 25)));
std::list<Deletion> ranges;
ranges.emplace_back(Deletion(ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10))));
- ASSERT_TRUE(rangeDeleter.add(std::move(ranges)));
+ rangeDeleter.add(std::move(ranges));
ASSERT_EQ(1u, rangeDeleter.size());
- ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kWriteOpLog, 1));
+ ASSERT_TRUE(next(rangeDeleter, 1));
ASSERT_EQ(0u, rangeDeleter.size());
ASSERT_BSONOBJ_EQ(insertedDoc, dbclient.findOne(kNss.toString(), QUERY(kPattern << 25)));
- ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1));
+ ASSERT_FALSE(next(rangeDeleter, 1));
}
// Tests the case that there is a single document within a range to clean.
@@ -182,24 +166,23 @@ TEST_F(CollectionRangeDeleterTest, OneDocumentInOneRangeToClean) {
std::list<Deletion> ranges;
Deletion deletion{ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10))};
ranges.emplace_back(std::move(deletion));
- ASSERT_TRUE(rangeDeleter.add(std::move(ranges)));
+ rangeDeleter.add(std::move(ranges));
ASSERT_TRUE(ranges.empty()); // spliced elements out of it
auto optNotifn = rangeDeleter.overlaps(ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10)));
ASSERT(optNotifn);
auto notifn = *optNotifn;
ASSERT(!notifn.ready());
- ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 1)); // actually delete one
+ ASSERT_TRUE(next(rangeDeleter, 1)); // actually delete one
ASSERT(!notifn.ready());
ASSERT_EQ(rangeDeleter.size(), 1u);
- // range empty, pop range, notify
- ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 1));
+ ASSERT_TRUE(next(rangeDeleter, 1)); // range empty, pop range, notify
ASSERT_TRUE(rangeDeleter.isEmpty());
ASSERT(notifn.ready() && notifn.waitStatus(operationContext()).isOK());
ASSERT_TRUE(dbclient.findOne(kNss.toString(), QUERY(kPattern << 5)).isEmpty());
- ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1));
+ ASSERT_FALSE(next(rangeDeleter, 1));
}
// Tests the case that there are multiple documents within a range to clean.
@@ -214,12 +197,12 @@ TEST_F(CollectionRangeDeleterTest, MultipleDocumentsInOneRangeToClean) {
std::list<Deletion> ranges;
Deletion deletion{ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10))};
ranges.emplace_back(std::move(deletion));
- ASSERT_TRUE(rangeDeleter.add(std::move(ranges)));
+ rangeDeleter.add(std::move(ranges));
- ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 100));
- ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 100));
+ ASSERT_TRUE(next(rangeDeleter, 100));
+ ASSERT_TRUE(next(rangeDeleter, 100));
ASSERT_EQUALS(0ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 5)));
- ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 100));
+ ASSERT_FALSE(next(rangeDeleter, 100));
}
// Tests the case that there are multiple documents within a range to clean, and the range deleter
@@ -235,20 +218,21 @@ TEST_F(CollectionRangeDeleterTest, MultipleCleanupNextRangeCalls) {
std::list<Deletion> ranges;
Deletion deletion{ChunkRange(BSON(kPattern << 0), BSON(kPattern << 10))};
ranges.emplace_back(std::move(deletion));
- ASSERT_TRUE(rangeDeleter.add(std::move(ranges)));
+ rangeDeleter.add(std::move(ranges));
- ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 1));
+ ASSERT_TRUE(next(rangeDeleter, 1));
ASSERT_EQUALS(2ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 5)));
- ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kMore, 1));
+ ASSERT_TRUE(next(rangeDeleter, 1));
ASSERT_EQUALS(1ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 5)));
- ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kMore, 1));
- ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 1));
+ ASSERT_TRUE(next(rangeDeleter, 1));
+ ASSERT_TRUE(next(rangeDeleter, 1));
ASSERT_EQUALS(0ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 5)));
- ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1));
+ ASSERT_FALSE(next(rangeDeleter, 1));
}
+
// Tests the case that there are two ranges to clean, each containing multiple documents.
TEST_F(CollectionRangeDeleterTest, MultipleDocumentsInMultipleRangesToClean) {
CollectionRangeDeleter rangeDeleter;
@@ -263,10 +247,10 @@ TEST_F(CollectionRangeDeleterTest, MultipleDocumentsInMultipleRangesToClean) {
std::list<Deletion> ranges;
ranges.emplace_back(Deletion{ChunkRange{BSON(kPattern << 0), BSON(kPattern << 4)}});
- ASSERT_TRUE(rangeDeleter.add(std::move(ranges)));
+ rangeDeleter.add(std::move(ranges));
ASSERT_TRUE(ranges.empty());
ranges.emplace_back(Deletion{ChunkRange{BSON(kPattern << 4), BSON(kPattern << 7)}});
- ASSERT_FALSE(rangeDeleter.add(std::move(ranges)));
+ rangeDeleter.add(std::move(ranges));
auto optNotifn1 = rangeDeleter.overlaps(ChunkRange{BSON(kPattern << 0), BSON(kPattern << 4)});
ASSERT_TRUE(optNotifn1);
@@ -277,68 +261,29 @@ TEST_F(CollectionRangeDeleterTest, MultipleDocumentsInMultipleRangesToClean) {
auto& notifn2 = *optNotifn2;
ASSERT_FALSE(notifn2.ready());
- // test op== on notifications
- ASSERT_TRUE(notifn1 == *optNotifn1);
- ASSERT_FALSE(notifn1 == *optNotifn2);
- ASSERT_TRUE(notifn1 != *optNotifn2);
- ASSERT_FALSE(notifn1 != *optNotifn1);
-
- ASSERT_EQUALS(0ULL,
- dbclient.count(kAdminSystemVersion.ns(), BSON(kPattern << "startRangeDeletion")));
-
- ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 100));
+ ASSERT_TRUE(next(rangeDeleter, 100));
ASSERT_FALSE(notifn1.ready()); // no trigger yet
ASSERT_FALSE(notifn2.ready()); // no trigger yet
- ASSERT_EQUALS(1ULL,
- dbclient.count(kAdminSystemVersion.ns(), BSON(kPattern << "startRangeDeletion")));
- // clang-format off
- ASSERT_BSONOBJ_EQ(
- BSON("_id" << "startRangeDeletion" << "ns" << kNss.ns()
- << "epoch" << epoch() << "min" << BSON("_id" << 0) << "max" << BSON("_id" << 4)),
- dbclient.findOne(kAdminSystemVersion.ns(), QUERY("_id" << "startRangeDeletion")));
- // clang-format on
-
- ASSERT_EQUALS(0ULL, dbclient.count(kNss.ns(), BSON(kPattern << LT << 4)));
- ASSERT_EQUALS(3ULL, dbclient.count(kNss.ns(), BSON(kPattern << LT << 10)));
-
- // discover there are no more < 4, pop range 1
- ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 100));
-
- ASSERT_EQUALS(1ULL,
- dbclient.count(kAdminSystemVersion.ns(), BSON(kPattern << "startRangeDeletion")));
- // clang-format off
- ASSERT_BSONOBJ_EQ(
- BSON("_id" << "startRangeDeletion" << "ns" << kNss.ns()
- << "epoch" << epoch() << "min" << BSON("_id" << 0) << "max" << BSON("_id" << 4)),
- dbclient.findOne(kAdminSystemVersion.ns(), QUERY("_id" << "startRangeDeletion")));
- // clang-format on
+ ASSERT_EQUALS(0ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 4)));
+ ASSERT_EQUALS(3ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 10)));
+
+ ASSERT_TRUE(next(rangeDeleter, 100)); // discover there are no more < 4, pop range 1
ASSERT_TRUE(notifn1.ready() && notifn1.waitStatus(operationContext()).isOK());
ASSERT_FALSE(notifn2.ready());
- ASSERT_EQUALS(3ULL, dbclient.count(kNss.ns(), BSON(kPattern << LT << 10)));
+ ASSERT_EQUALS(3ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 10)));
- // delete the remaining documents
- ASSERT_EQ(Action::kMore, next(rangeDeleter, Action::kWriteOpLog, 100));
+ ASSERT_TRUE(next(rangeDeleter, 100)); // delete the remaining documents
ASSERT_FALSE(notifn2.ready());
- // clang-format off
- ASSERT_BSONOBJ_EQ(
- BSON("_id" << "startRangeDeletion" << "ns" << kNss.ns()
- << "epoch" << epoch() << "min" << BSON("_id" << 4) << "max" << BSON("_id" << 7)),
- dbclient.findOne(kAdminSystemVersion.ns(), QUERY("_id" << "startRangeDeletion")));
- // clang-format on
-
- ASSERT_EQUALS(0ULL, dbclient.count(kNss.ns(), BSON(kPattern << LT << 10)));
-
- // discover there are no more, pop range 2
- ASSERT_EQ(Action::kWriteOpLog, next(rangeDeleter, Action::kMore, 1));
+ ASSERT_EQUALS(0ULL, dbclient.count(kNss.toString(), BSON(kPattern << LT << 10)));
+ ASSERT_TRUE(next(rangeDeleter, 1)); // discover there are no more, pop range 2
ASSERT_TRUE(notifn2.ready() && notifn2.waitStatus(operationContext()).isOK());
- // discover there are no more ranges
- ASSERT_EQ(Action::kFinished, next(rangeDeleter, Action::kWriteOpLog, 1));
+ ASSERT_FALSE(next(rangeDeleter, 1)); // discover there are no more ranges
}
} // unnamed namespace
diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp
index e4a04ed2aaf..6440415d511 100644
--- a/src/mongo/db/s/collection_sharding_state.cpp
+++ b/src/mongo/db/s/collection_sharding_state.cpp
@@ -134,12 +134,6 @@ auto CollectionShardingState::cleanUpRange(ChunkRange const& range) -> CleanupNo
return _metadataManager->cleanUpRange(range);
}
-auto CollectionShardingState::overlappingMetadata(ChunkRange const& range) const
- -> std::vector<ScopedCollectionMetadata> {
- return _metadataManager->overlappingMetadata(_metadataManager, range);
-}
-
-
MigrationSourceManager* CollectionShardingState::getMigrationSourceManager() {
return _sourceMgr;
}
diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h
index 28ad2ad12d1..ff0489f4d9a 100644
--- a/src/mongo/db/s/collection_sharding_state.h
+++ b/src/mongo/db/s/collection_sharding_state.h
@@ -141,14 +141,6 @@ public:
auto cleanUpRange(ChunkRange const& range) -> CleanupNotification;
/**
- * Returns a vector of ScopedCollectionMetadata objects representing metadata instances in use
- * by running queries that overlap the argument range, suitable for identifying and invalidating
- * those queries.
- */
- auto overlappingMetadata(ChunkRange const& range) const
- -> std::vector<ScopedCollectionMetadata>;
-
- /**
* Returns the active migration source manager, if one is available.
*/
MigrationSourceManager* getMigrationSourceManager();
@@ -251,12 +243,10 @@ private:
MigrationSourceManager* _sourceMgr{nullptr};
// for access to _metadataManager
- friend auto CollectionRangeDeleter::cleanUpNextRange(OperationContext*,
+ friend bool CollectionRangeDeleter::cleanUpNextRange(OperationContext*,
NamespaceString const&,
- CollectionRangeDeleter::Action,
int maxToDelete,
- CollectionRangeDeleter*)
- -> CollectionRangeDeleter::Action;
+ CollectionRangeDeleter*);
};
} // namespace mongo
diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp
index de6cfcabd96..70cc7de5ee5 100644
--- a/src/mongo/db/s/metadata_manager.cpp
+++ b/src/mongo/db/s/metadata_manager.cpp
@@ -44,67 +44,60 @@
#include "mongo/util/assert_util.h"
#include "mongo/util/log.h"
-// MetadataManager maintains pointers to CollectionMetadata objects in a member list named
-// _metadata. Each CollectionMetadata contains an immutable _chunksMap of chunks assigned to this
-// shard, along with details related to its own lifecycle in a member _tracker.
+// MetadataManager maintains std::shared_ptr<CollectionMetadataManager> pointers in a list
+// _metadata. It also contains a CollectionRangeDeleter that queues orphan ranges to delete in
+// a background thread, and a record of the ranges being migrated in, to avoid deleting them.
//
-// The current chunk mapping, used by queries starting up, is at _metadata.back(). Each query,
-// when it starts up, requests and holds a ScopedCollectionMetadata object, and destroys it on
-// termination. Each ScopedCollectionMetadata keeps a shared_ptr to its CollectionMetadata chunk
-// mapping, and to the MetadataManager itself. CollectionMetadata mappings also keep a record of
-// chunk ranges that may be deleted when it is determined that the range can no longer be in use.
+// Free-floating CollectionMetadata objects are maintained by these pointers, and also by clients
+// via shared pointers in ScopedCollectionMetadata objects.
//
-// ScopedCollectionMetadata's destructor decrements the CollectionMetadata's usageCounter.
-// Whenever a usageCounter drops to zero, we check whether any now-unused CollectionMetadata
-// elements can be popped off the front of _metadata. We need to keep the unused elements in the
-// middle (as seen below) because they may schedule deletions of chunks depended on by older
-// mappings.
+// The _tracker member of CollectionMetadata keeps:
+// a count of the ScopedCollectionMetadata objects that have pointers to the CollectionMetadata
+// a list of key ranges [min,max) of orphaned documents that may be deleted when the count goes
+// to zero
+// ____________________________
+// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata |
+// _________________________ +----(s) manager metadata (s)-----------------+
+// | CollectionShardingState | | |____________________________| | |
+// | _metadataManager (s) | +-------(s) manager metadata (s)-------------+ |
+// |____________________|____| | |____________________________| | | |
+// ____________________v_______ +----------(s) manager metadata (s) | | |
+// | MetadataManager | | |________________________|___| | |
+// | |<---+ | | |
+// | | ________________________ | | |
+// | /----------->| CollectionMetadata |<----+ (1 use) | |
+// | [(s),----/ | | ______________________|_ | |
+// | (s),------------------->| CollectionMetadata | (0 uses) | |
+// | _metadata: (s)]----\ | | | ______________________|_ | |
+// | \--------------->| CollectionMetadata | | |
+// | | | | | | | |
+// | _rangesToClean: | | | | _tracker: |<------------+ |
+// | ________________________ | | | | ____________________ |<--------------+
+// | | CollectionRangeDeleter | | | | | | Tracker | | (2 uses)
+// | | | | | | | | | |
+// | | _orphans [[min,max), | | | | | | usageCounter | |
+// | | [min,max), | | | | | | orphans [min,max), | |
+// | | ... ] | | | | | | ... ] | |
+// | |________________________| | |_| | |____________________| |
+// |____________________________| | | _chunksMap |
+// |_| _chunkVersion |
+// | ... |
+// |________________________|
//
-// New chunk mappings are pushed onto the back of _metadata. Subsequently started queries use the
-// new mapping while still-running queries continue using the older "snapshot" mappings. We treat
-// _metadata.back()'s usage count differently from the snapshots because it can't reliably be
-// compared to zero; a new query may increment it at any time.
+// A ScopedCollectionMetadata object is created and held during a query, and destroyed when the
+// query no longer needs access to the collection. Its destructor decrements the CollectionMetadata
+// _tracker member's usageCounter. Note that the collection may become unsharded, and even get
+// sharded again, between construction and destruction of a ScopedCollectionMetadata.
//
-// (Note that the collection may be dropped or become unsharded, and even get made and sharded
-// again, between construction and destruction of a ScopedCollectionMetadata).
+// When a new chunk mapping replaces the active mapping, it is pushed onto the back of _metadata.
//
-// MetadataManager also contains a CollectionRangeDeleter _rangesToClean that queues orphan ranges
-// being deleted in a background thread, and a mapping _receivingChunks of the ranges being migrated
-// in, to avoid deleting them. Each range deletion is paired with a notification object triggered
-// when the deletion is completed or abandoned.
-//
-// ____________________________
-// (s): std::shared_ptr<> Clients:| ScopedCollectionMetadata |
-// _________________________ +----(s) manager metadata (s)------------------+
-// | CollectionShardingState | | |____________________________| | |
-// | _metadataManager (s) | +-------(s) manager metadata (s)--------------+ |
-// |____________________|____| | |____________________________| | | |
-// ____________________v________ +------------(s) manager metadata (s)-----+ | |
-// | MetadataManager | | |____________________________| | | |
-// | |<--+ | | |
-// | | ___________________________ (1 use) | | |
-// | getActiveMetadata(): /---------->| CollectionMetadata |<---------+ | |
-// | back(): [(s),------/ | | _________________________|_ | |
-// | (s),-------------------->| CollectionMetadata | (0 uses) | |
-// | _metadata: (s)]------\ | | | _________________________|_ | |
-// | \-------------->| CollectionMetadata | | |
-// | _receivingChunks | | | | | (2 uses) | |
-// | _rangesToClean: | | | | _tracker: |<---------+ |
-// | _________________________ | | | | _______________________ |<-----------+
-// | | CollectionRangeDeleter | | | | | | Tracker | |
-// | | | | | | | | | |
-// | | _orphans [range,notif, | | | | | | usageCounter | |
-// | | range,notif, | | | | | | orphans [range,notif, | |
-// | | ... ] | | | | | | range,notif, | |
-// | | | | | | | | ... ] | |
-// | |_________________________| | |_| | |_______________________| |
-// |_____________________________| | | _chunksMap |
-// |_| _chunkVersion |
-// | ... |
-// |___________________________|
+// A CollectionMetadata object pointed to from _metadata is maintained at least as long as any
+// query holds a ScopedCollectionMetadata object referring to it, or to any older one. In the
+// diagram above, the middle CollectionMetadata is kept until the one below it is disposed of.
//
// Note that _metadata as shown here has its front() at the bottom, back() at the top. As usual,
-// new entries are pushed onto the back, popped off the front.
+// new entries are pushed onto the back, popped off the front. The "active" metadata used by new
+// queries (when there is one), is _metadata.back().
namespace mongo {
@@ -125,17 +118,13 @@ MetadataManager::~MetadataManager() {
}
void MetadataManager::_clearAllCleanups() {
- _clearAllCleanups(
- {ErrorCodes::InterruptedDueToReplStateChange,
- str::stream() << "Range deletions in " << _nss.ns()
- << " abandoned because collection was dropped or became unsharded"});
-}
-
-void MetadataManager::_clearAllCleanups(Status status) {
for (auto& metadata : _metadata) {
_pushListToClean(std::move(metadata->_tracker.orphans));
}
- _rangesToClean.clear(status);
+ _rangesToClean.clear({ErrorCodes::InterruptedDueToReplStateChange,
+ str::stream() << "Range deletions in " << _nss.ns()
+ << " abandoned because collection was"
+ " dropped or became unsharded"});
}
ScopedCollectionMetadata MetadataManager::getActiveMetadata(std::shared_ptr<MetadataManager> self) {
@@ -252,12 +241,10 @@ void MetadataManager::_retireExpiredMetadata() {
if (!_metadata.front()->_tracker.orphans.empty()) {
log() << "Queries possibly dependent on " << _nss.ns()
<< " range(s) finished; scheduling for deletion";
- // It is safe to push orphan ranges from _metadata.back(), even though new queries might
- // start any time, because any request to delete a range it maps is rejected.
_pushListToClean(std::move(_metadata.front()->_tracker.orphans));
}
if (&_metadata.front() == &_metadata.back())
- break; // do not pop the active chunk mapping!
+ break; // do not retire current chunk metadata.
}
}
@@ -267,8 +254,6 @@ void MetadataManager::_retireExpiredMetadata() {
ScopedCollectionMetadata::ScopedCollectionMetadata(std::shared_ptr<MetadataManager> manager,
std::shared_ptr<CollectionMetadata> metadata)
: _metadata(std::move(metadata)), _manager(std::move(manager)) {
- invariant(_metadata);
- invariant(_manager);
++_metadata->_tracker.usageCounter;
}
@@ -357,17 +342,15 @@ void MetadataManager::append(BSONObjBuilder* builder) {
amrArr.done();
}
-void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor,
- NamespaceString nss,
- CollectionRangeDeleter::Action action) {
- executor->scheduleWork([executor, nss, action](auto&) {
+void MetadataManager::_scheduleCleanup(executor::TaskExecutor* executor, NamespaceString nss) {
+ executor->scheduleWork([executor, nss](auto&) {
const int maxToDelete = std::max(int(internalQueryExecYieldIterations.load()), 1);
Client::initThreadIfNotAlready("Collection Range Deleter");
auto UniqueOpCtx = Client::getCurrent()->makeOperationContext();
auto opCtx = UniqueOpCtx.get();
- auto next = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, action, maxToDelete);
- if (next != CollectionRangeDeleter::Action::kFinished) {
- _scheduleCleanup(executor, nss, next);
+ bool again = CollectionRangeDeleter::cleanUpNextRange(opCtx, nss, maxToDelete);
+ if (again) {
+ _scheduleCleanup(executor, nss);
}
});
}
@@ -382,9 +365,9 @@ auto MetadataManager::_pushRangeToClean(ChunkRange const& range) -> CleanupNotif
void MetadataManager::_pushListToClean(std::list<Deletion> ranges) {
if (_rangesToClean.add(std::move(ranges))) {
- _scheduleCleanup(_executor, _nss, CollectionRangeDeleter::Action::kWriteOpLog);
+ _scheduleCleanup(_executor, _nss);
}
- invariant(ranges.empty());
+ dassert(ranges.empty());
}
void MetadataManager::_addToReceiving(ChunkRange const& range) {
@@ -459,28 +442,6 @@ auto MetadataManager::cleanUpRange(ChunkRange const& range) -> CleanupNotificati
return activeMetadata->_tracker.orphans.back().notification;
}
-auto MetadataManager::overlappingMetadata(std::shared_ptr<MetadataManager> const& self,
- ChunkRange const& range)
- -> std::vector<ScopedCollectionMetadata> {
- invariant(!_metadata.empty());
- stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
- std::vector<ScopedCollectionMetadata> result;
- result.reserve(_metadata.size());
- auto it = _metadata.crbegin(); // start with the current active chunk mapping
- if ((*it)->rangeOverlapsChunk(range)) {
- // We ignore the refcount of the active mapping; effectively, we assume it is in use.
- result.push_back(ScopedCollectionMetadata(self, *it));
- }
- ++it; // step to snapshots
- for (auto end = _metadata.crend(); it != end; ++it) {
- // We want all the overlapping snapshot mappings still possibly in use by a query.
- if ((*it)->_tracker.usageCounter > 0 && (*it)->rangeOverlapsChunk(range)) {
- result.push_back(ScopedCollectionMetadata(self, *it));
- }
- }
- return result;
-}
-
size_t MetadataManager::numberOfRangesToCleanStillInUse() {
stdx::lock_guard<stdx::mutex> scopedLock(_managerLock);
size_t count = 0;
diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h
index ff0770f450e..46da84db9e2 100644
--- a/src/mongo/db/s/metadata_manager.h
+++ b/src/mongo/db/s/metadata_manager.h
@@ -114,14 +114,6 @@ public:
CleanupNotification cleanUpRange(ChunkRange const& range);
/**
- * Returns a vector of ScopedCollectionMetadata objects representing metadata instances in use
- * by running queries that overlap the argument range, suitable for identifying and invalidating
- * those queries.
- */
- auto overlappingMetadata(std::shared_ptr<MetadataManager> const& itself,
- ChunkRange const& range) -> std::vector<ScopedCollectionMetadata>;
-
- /**
* Returns the number of ranges scheduled to be cleaned, exclusive of such ranges that might
* still be in use by running queries. Outside of test drivers, the actual number may vary
* after it returns, so this is really only useful for unit tests.
@@ -154,19 +146,12 @@ private:
* Each time it completes cleaning up a range, it wakes up clients waiting on completion of
* that range, which may then verify their range has no more deletions scheduled, and proceed.
*/
- static void _scheduleCleanup(executor::TaskExecutor*,
- NamespaceString nss,
- CollectionRangeDeleter::Action);
+ static void _scheduleCleanup(executor::TaskExecutor*, NamespaceString nss);
// All of the following functions must be called while holding _managerLock.
/**
- * Cancels all scheduled deletions of orphan ranges, notifying listeners with specified status.
- */
- void _clearAllCleanups(Status);
-
- /**
- * Cancels all scheduled deletions of orphan ranges, notifying listeners with status
+ * Cancel all scheduled deletions of orphan ranges, notifying listeners with status
* InterruptedDueToReplStateChange.
*/
void _clearAllCleanups();
@@ -244,12 +229,10 @@ private:
// friends
// for access to _rangesToClean and _managerLock under task callback
- friend auto CollectionRangeDeleter::cleanUpNextRange(OperationContext*,
+ friend bool CollectionRangeDeleter::cleanUpNextRange(OperationContext*,
NamespaceString const&,
- CollectionRangeDeleter::Action,
int maxToDelete,
- CollectionRangeDeleter*)
- -> CollectionRangeDeleter::Action;
+ CollectionRangeDeleter*);
friend class ScopedCollectionMetadata;
};
@@ -282,17 +265,6 @@ public:
*/
operator bool() const;
- /**
- * Checks whether both objects refer to the identically the same metadata.
- */
- bool operator==(ScopedCollectionMetadata const& other) const {
- return _metadata == other._metadata;
- }
- bool operator!=(ScopedCollectionMetadata const& other) const {
- return _metadata != other._metadata;
- }
-
-
private:
/**
* Increments the usageCounter in the specified CollectionMetadata.
@@ -313,12 +285,8 @@ private:
std::shared_ptr<MetadataManager> _manager{nullptr};
- // These use our private ctor
friend ScopedCollectionMetadata MetadataManager::getActiveMetadata(
- std::shared_ptr<MetadataManager>);
- friend auto MetadataManager::overlappingMetadata(std::shared_ptr<MetadataManager> const& itself,
- ChunkRange const& range)
- -> std::vector<ScopedCollectionMetadata>;
+ std::shared_ptr<MetadataManager>); // uses our private ctor
};
} // namespace mongo
diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp
index 5ba653ac731..a3375634d9f 100644
--- a/src/mongo/db/s/metadata_manager_test.cpp
+++ b/src/mongo/db/s/metadata_manager_test.cpp
@@ -222,36 +222,17 @@ TEST_F(MetadataManagerTest, NotificationBlocksUntilDeletion) {
ASSERT_EQ(manager->numberOfMetadataSnapshots(), 0UL);
ASSERT_EQ(manager->numberOfRangesToClean(), 0UL);
- auto scm1 = manager->getActiveMetadata(manager); // and increment refcount
- ASSERT_TRUE(bool(scm1));
- ASSERT_EQ(0ULL, scm1->getChunks().size());
-
- addChunk(manager); // push new metadata
- auto scm2 = manager->getActiveMetadata(manager); // and increment refcount
- ASSERT_EQ(1ULL, scm2->getChunks().size());
-
- // this is here solely to pacify an invariant in addChunk
- manager->refreshActiveMetadata(makeEmptyMetadata());
-
- addChunk(manager); // push new metadata
- auto scm3 = manager->getActiveMetadata(manager); // and increment refcount
- ASSERT_EQ(1ULL, scm3->getChunks().size());
-
- auto overlaps =
- manager->overlappingMetadata(manager, ChunkRange(BSON("key" << 0), BSON("key" << 10)));
- ASSERT_EQ(2ULL, overlaps.size());
- std::vector<ScopedCollectionMetadata> ref;
- ref.push_back(std::move(scm3));
- ref.push_back(std::move(scm2));
- ASSERT(ref == overlaps);
-
- ASSERT_EQ(manager->numberOfMetadataSnapshots(), 3UL);
+ auto scm = manager->getActiveMetadata(manager); // and increment scm's refcount
+ ASSERT(bool(scm));
+ addChunk(manager); // push new metadata
+
+ ASSERT_EQ(manager->numberOfMetadataSnapshots(), 1UL);
ASSERT_EQ(manager->numberOfRangesToClean(), 0UL); // not yet...
optNotif = manager->cleanUpRange(cr1);
- ASSERT_EQ(manager->numberOfMetadataSnapshots(), 3UL);
+ ASSERT_EQ(manager->numberOfMetadataSnapshots(), 1UL);
ASSERT_EQ(manager->numberOfRangesToClean(), 1UL);
- } // scm1,2,3 destroyed, refcount of each metadata goes to zero
+ } // scm destroyed, refcount of metadata goes to zero
ASSERT_EQ(manager->numberOfMetadataSnapshots(), 0UL);
ASSERT_EQ(manager->numberOfRangesToClean(), 1UL);
ASSERT_FALSE(optNotif->ready());