summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@mongodb.com>2020-08-27 11:09:08 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-08-27 18:26:55 +0000
commit9ced0d1888b2ab1e738b87b01a5454686e97811e (patch)
tree56601177259b68e85cbdab7e492a5c0f4e45a89a
parent18f397e322dbc7cc1d6272bfb6df5279a0ab6337 (diff)
downloadmongo-9ced0d1888b2ab1e738b87b01a5454686e97811e.tar.gz
SERVER-49890: Create resharding oplog view on startup.
-rw-r--r--jstests/libs/check_unique_indexes.js4
-rw-r--r--jstests/libs/check_uuids.js4
-rw-r--r--jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js4
-rw-r--r--jstests/noPassthrough/slim_oplog_test.js90
-rw-r--r--src/mongo/db/catalog/database_impl.cpp5
-rw-r--r--src/mongo/db/repl/SConscript1
-rw-r--r--src/mongo/db/repl/oplog.cpp3
-rw-r--r--src/mongo/db/s/SConscript33
-rw-r--r--src/mongo/db/s/resharding_util.cpp49
-rw-r--r--src/mongo/db/s/resharding_util.h9
-rw-r--r--src/mongo/db/views/view_catalog.cpp5
11 files changed, 193 insertions, 14 deletions
diff --git a/jstests/libs/check_unique_indexes.js b/jstests/libs/check_unique_indexes.js
index 0dbaedf2d53..9decdbded64 100644
--- a/jstests/libs/check_unique_indexes.js
+++ b/jstests/libs/check_unique_indexes.js
@@ -23,6 +23,10 @@ function checkUniqueIndexFormatVersion(adminDB) {
currentDatabase.runCommand("listCollections", {includePendingDrops: true})
.cursor.firstBatch;
collectionsWithPending.forEach(function(c) {
+ if (c.type == "view") {
+ return;
+ }
+
let currentCollection = currentDatabase.getCollection(c.name);
currentCollection.getIndexes().forEach(function(index) {
if (index.unique) {
diff --git a/jstests/libs/check_uuids.js b/jstests/libs/check_uuids.js
index 54cf9bd2f90..69c4adfc06a 100644
--- a/jstests/libs/check_uuids.js
+++ b/jstests/libs/check_uuids.js
@@ -13,6 +13,10 @@ function checkCollectionUUIDs(adminDB) {
let currentDatabase = adminDB.getSiblingDB(database.name);
let collectionInfos = currentDatabase.getCollectionInfos();
for (let i = 0; i < collectionInfos.length; i++) {
+ if (collectionInfos[i].type == "view") {
+ continue;
+ }
+
assert(collectionInfos[i].info.uuid,
"Expect uuid for collection: " + tojson(collectionInfos[i]));
}
diff --git a/jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js b/jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js
index 686932593c8..e5888c67565 100644
--- a/jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js
+++ b/jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js
@@ -62,6 +62,10 @@ let recreateUniqueIndexes = function(db, secondary) {
let mdb = db.getSiblingDB(d.name);
mdb.getCollectionInfos().forEach(function(c) {
let currentCollection = mdb.getCollection(c.name);
+ if (c.type == "view") {
+ return;
+ }
+
currentCollection.getIndexes().forEach(function(spec) {
if (!spec.unique) {
return;
diff --git a/jstests/noPassthrough/slim_oplog_test.js b/jstests/noPassthrough/slim_oplog_test.js
new file mode 100644
index 00000000000..f049e9d2a55
--- /dev/null
+++ b/jstests/noPassthrough/slim_oplog_test.js
@@ -0,0 +1,90 @@
+/**
+ * The slim oplog is a view on the oplog to support efficient lookup queries for resharding.
+ *
+ * @tags: [
+ * requires_replication,
+ * requires_majority_read_concern,
+ * uses_transactions,
+ * uses_prepare_transaction
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/retryable_writes_util.js");
+
+if (!RetryableWritesUtil.storageEngineSupportsRetryableWrites(jsTest.options().storageEngine)) {
+ jsTestLog("Retryable writes are not supported, skipping test");
+ return;
+}
+
+load("jstests/core/txns/libs/prepare_helpers.js");
+
+const rst = new ReplSetTest({nodes: 2});
+rst.startSet();
+rst.initiate();
+
+const rsConn = new Mongo(rst.getURL());
+const oplog = rst.getPrimary().getDB("local")["oplog.rs"];
+const slimOplog = rst.getPrimary().getDB("local")["system.resharding.slimOplogForGraphLookup"];
+const session = rsConn.startSession({retryWrites: true});
+const collection = session.getDatabase("test")["collection"];
+
+{
+ // Assert an oplog entry representing a retryable write projects a `prevOpTime.ts` of the null
+ // timestamp.
+ assert.commandWorked(collection.insert({_id: "slim"}));
+
+ const oplogEntry = oplog.find({"o._id": "slim"}).next();
+ jsTestLog({"slim entry": oplogEntry, "slim": slimOplog.exists()});
+ assert(oplogEntry.hasOwnProperty("txnNumber"));
+ assert(oplogEntry.hasOwnProperty("stmtId"));
+ assert.eq(Timestamp(0, 0), slimOplog.find({ts: oplogEntry["ts"]}).next()["prevOpTime"]["ts"]);
+}
+
+{
+ // Assert an oplog entry that commits a prepared transaction projects a `prevOpTime.ts` equal to
+ // the timestamp of the preparing oplog entry.
+ session.startTransaction();
+ assert.commandWorked(collection.insert({_id: "slim_1"}));
+ assert.commandWorked(collection.insert({_id: "slim_2"}));
+ let prepTs = PrepareHelpers.prepareTransaction(session);
+ assert.commandWorked(PrepareHelpers.commitTransaction(session, prepTs));
+
+ const commitEntry = oplog.find({"prevOpTime.ts": prepTs}).next();
+ assert.eq(prepTs, commitEntry["prevOpTime"]["ts"]);
+ jsTestLog({
+ PrepEntry: oplog.find({ts: prepTs}).next(),
+ CommitEntry: commitEntry,
+ SlimEntry: slimOplog.find({ts: commitEntry["ts"]}).next()
+ });
+
+ // Perform a $graphLookup connecting non-trivial slim oplog entries to the associated applyOps
+ // operations. Assert the history is as expected.
+ let oplogDocsWithHistory = oplog.aggregate([
+ {$match: {ts: commitEntry["ts"]}},
+ {$graphLookup: {from: "system.resharding.slimOplogForGraphLookup",
+ startWith: "$ts",
+ connectFromField: "prevOpTime.ts",
+ connectToField: "ts",
+ depthField: "depthForResharding",
+ as: "history"}},
+ // For the purposes of this test, unwind the history to give a predictable order of items
+ // that graphLookup accumulates.
+ {$unwind: "$history"},
+ {$sort: {"history.ts": 1}}
+ ]).toArray();
+
+ jsTestLog({"Unwound history": oplogDocsWithHistory});
+ assert.eq(2, oplogDocsWithHistory.length);
+ assert.eq(1, oplogDocsWithHistory[0]["history"]["depthForResharding"]);
+ assert.eq(Timestamp(0, 0), oplogDocsWithHistory[0]["history"]["prevOpTime"]["ts"]);
+ assert.eq(2, oplogDocsWithHistory[0]["history"]["o"]["applyOps"].length);
+
+ assert.eq(0, oplogDocsWithHistory[1]["history"].depthForResharding);
+ assert.eq(prepTs, oplogDocsWithHistory[1]["history"]["prevOpTime"]["ts"]);
+ assert.eq({}, oplogDocsWithHistory[1]["history"]["o"]);
+}
+
+rst.stopSet();
+})();
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp
index 231bc593115..3315c56e9d2 100644
--- a/src/mongo/db/catalog/database_impl.cpp
+++ b/src/mongo/db/catalog/database_impl.cpp
@@ -929,6 +929,11 @@ Status DatabaseImpl::userCreateNS(OperationContext* opCtx,
}
if (collectionOptions.isView()) {
+ if (nss.isSystem())
+ return Status(
+ ErrorCodes::InvalidNamespace,
+ "View name cannot start with 'system.', which is reserved for system namespaces");
+
uassertStatusOK(createView(opCtx, nss, collectionOptions));
} else {
invariant(createCollection(opCtx, nss, collectionOptions, createDefaultIndexes, idIndex),
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index a5f1502fcab..d3412d5569e 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -50,6 +50,7 @@ env.Library(
'$BUILD_DIR/mongo/db/dbdirectclient',
'$BUILD_DIR/mongo/db/dbhelpers',
'$BUILD_DIR/mongo/db/op_observer',
+ '$BUILD_DIR/mongo/db/s/resharding_util',
'$BUILD_DIR/mongo/db/stats/counters',
'$BUILD_DIR/mongo/db/stats/server_read_concern_write_concern_metrics',
'$BUILD_DIR/mongo/db/transaction',
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 7a0a79476e0..1ed66938941 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -83,6 +83,7 @@
#include "mongo/db/repl/tenant_migration_donor_util.h"
#include "mongo/db/repl/timestamp_block.h"
#include "mongo/db/repl/transaction_oplog_application.h"
+#include "mongo/db/s/resharding_util.h"
#include "mongo/db/service_context.h"
#include "mongo/db/stats/counters.h"
#include "mongo/db/stats/server_write_concern_metrics.h"
@@ -569,6 +570,8 @@ void createOplog(OperationContext* opCtx,
uow.commit();
});
+ createSlimOplogView(opCtx, ctx.db());
+
/* sync here so we don't get any surprising lag later when we try to sync */
service->getStorageEngine()->flushAllFiles(opCtx, /*callerHoldsReadLock*/ false);
}
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 3dd77b842fd..e8b56544b60 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -52,8 +52,8 @@ env.Library(
'collection_sharding_state_factory_standalone.cpp',
'config_server_op_observer.cpp',
'metadata_manager.cpp',
- 'migration_chunk_cloner_source_legacy.cpp',
'migration_chunk_cloner_source.cpp',
+ 'migration_chunk_cloner_source_legacy.cpp',
'migration_coordinator.cpp',
'migration_destination_manager.cpp',
'migration_session_id.cpp',
@@ -67,7 +67,6 @@ env.Library(
'periodic_sharded_index_consistency_checker.cpp',
'range_deletion_util.cpp',
'read_only_catalog_cache_loader.cpp',
- 'resharding_util.cpp',
'resharding/resharding_coordinator_observer.cpp',
'resharding/resharding_coordinator_service.cpp',
'scoped_operation_completion_sharding_actions.cpp',
@@ -76,6 +75,7 @@ env.Library(
'shard_filtering_metadata_refresh.cpp',
'shard_identity_rollback_notifier.cpp',
'shard_key_util.cpp',
+ 'shard_local.cpp',
'shard_metadata_util.cpp',
'shard_server_catalog_cache_loader.cpp',
'shard_server_op_observer.cpp',
@@ -84,14 +84,12 @@ env.Library(
'sharding_state_recovery.cpp',
'sharding_statistics.cpp',
'split_chunk.cpp',
- 'shard_local.cpp',
'split_vector.cpp',
'start_chunk_clone_request.cpp',
env.Idlc('migration_coordinator_document.idl')[0],
env.Idlc('range_deletion_task.idl')[0],
env.Idlc('resharding/coordinator_document.idl')[0],
env.Idlc('resharding/donor_document.idl')[0],
- env.Idlc('resharding/donor_oplog_id.idl')[0],
env.Idlc('resharding/recipient_document.idl')[0],
],
LIBDEPS=[
@@ -102,8 +100,6 @@ env.Library(
'$BUILD_DIR/mongo/db/dbhelpers',
'$BUILD_DIR/mongo/db/op_observer_impl',
'$BUILD_DIR/mongo/db/ops/write_ops_exec',
- '$BUILD_DIR/mongo/db/pipeline/expression_context',
- '$BUILD_DIR/mongo/db/pipeline/pipeline',
'$BUILD_DIR/mongo/db/repl/oplog',
'$BUILD_DIR/mongo/db/repl/wait_for_majority_service',
'$BUILD_DIR/mongo/db/rw_concern_d',
@@ -125,6 +121,27 @@ env.Library(
'$BUILD_DIR/mongo/db/rs_local_client',
'$BUILD_DIR/mongo/db/session_catalog',
'$BUILD_DIR/mongo/idl/server_parameter',
+ 'resharding_util',
+ ],
+)
+
+env.Library(
+ target='resharding_util',
+ source=[
+ 'resharding_util.cpp',
+ env.Idlc('resharding/donor_oplog_id.idl')[0],
+ ],
+ LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/db/common',
+ '$BUILD_DIR/mongo/db/concurrency/write_conflict_exception',
+ '$BUILD_DIR/mongo/db/curop',
+ '$BUILD_DIR/mongo/db/namespace_string',
+ '$BUILD_DIR/mongo/db/pipeline/expression_context',
+ '$BUILD_DIR/mongo/db/pipeline/pipeline',
+ '$BUILD_DIR/mongo/db/storage/write_unit_of_work',
+ '$BUILD_DIR/mongo/s/common_s',
+ '$BUILD_DIR/mongo/s/grid',
],
)
@@ -326,6 +343,7 @@ env.Library(
'$BUILD_DIR/mongo/s/sharding_initialization',
'$BUILD_DIR/mongo/s/sharding_router_api',
'balancer',
+ 'resharding_util',
'sharding_runtime_d',
],
)
@@ -516,5 +534,6 @@ env.CppUnitTest(
'$BUILD_DIR/mongo/util/version_impl',
'balancer',
'config_server_test_fixture',
- ]
+ 'resharding_util',
+ ],
)
diff --git a/src/mongo/db/s/resharding_util.cpp b/src/mongo/db/s/resharding_util.cpp
index 5d81a724ce1..5939c83f97b 100644
--- a/src/mongo/db/s/resharding_util.cpp
+++ b/src/mongo/db/s/resharding_util.cpp
@@ -30,11 +30,14 @@
#include "mongo/platform/basic.h"
#include "mongo/bson/bsonobj.h"
+#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/pipeline/document_source_lookup.h"
#include "mongo/db/pipeline/document_source_match.h"
#include "mongo/db/pipeline/document_source_sort.h"
#include "mongo/db/s/resharding_util.h"
+#include "mongo/db/storage/write_unit_of_work.h"
#include "mongo/s/grid.h"
namespace mongo {
@@ -152,4 +155,48 @@ std::unique_ptr<Pipeline, PipelineDeleter> createAggForReshardingOplogBuffer(
return Pipeline::create(std::move(stages), expCtx);
}
-} // namespace mongo \ No newline at end of file
+void createSlimOplogView(OperationContext* opCtx, Database* db) {
+ writeConflictRetry(
+ opCtx, "createReshardingOplog", "local.system.resharding.slimOplogForGraphLookup", [&] {
+ {
+ // Create 'system.views' in a separate WUOW if it does not exist.
+ WriteUnitOfWork wuow(opCtx);
+ Collection* coll = CollectionCatalog::get(opCtx).lookupCollectionByNamespace(
+ opCtx, NamespaceString(db->getSystemViewsName()));
+ if (!coll) {
+ coll = db->createCollection(opCtx, NamespaceString(db->getSystemViewsName()));
+ }
+ invariant(coll);
+ wuow.commit();
+ }
+
+ // Resharding uses the `prevOpTime` to link oplog related entries via a
+ // $graphLookup. Large transactions and prepared transaction use prevOpTime to identify
+ // earlier oplog entries from the same transaction. Retryable writes (identified via the
+ // presence of `stmtId`) use prevOpTime to identify earlier run statements from the same
+ // retryable write. This view will unlink oplog entries from the same retryable write
+ // by zeroing out their `prevOpTime`.
+ CollectionOptions options;
+ options.viewOn = NamespaceString::kRsOplogNamespace.coll().toString();
+ options.pipeline = BSON_ARRAY(BSON(
+ "$project" << BSON(
+ "_id"
+ << "$ts"
+ << "op" << 1 << "o" << BSON("applyOps" << BSON("ui" << 1 << "reshardDest" << 1))
+ << "ts" << 1 << "prevOpTime.ts"
+ << BSON("$cond" << BSON("if" << BSON("$eq" << BSON_ARRAY(BSON("$type"
+ << "$stmtId")
+ << "missing"))
+ << "then"
+ << "$prevOpTime.ts"
+ << "else" << Timestamp::min())))));
+ WriteUnitOfWork wuow(opCtx);
+ uassertStatusOK(
+ db->createView(opCtx,
+ NamespaceString("local.system.resharding.slimOplogForGraphLookup"),
+ options));
+ wuow.commit();
+ });
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/resharding_util.h b/src/mongo/db/s/resharding_util.h
index b6441cd16fe..db10d68c4fd 100644
--- a/src/mongo/db/s/resharding_util.h
+++ b/src/mongo/db/s/resharding_util.h
@@ -31,6 +31,7 @@
#include <vector>
#include "mongo/bson/bsonobj.h"
+#include "mongo/db/catalog/database.h"
#include "mongo/db/keypattern.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/pipeline/pipeline.h"
@@ -79,4 +80,10 @@ std::unique_ptr<Pipeline, PipelineDeleter> createAggForReshardingOplogBuffer(
const boost::intrusive_ptr<ExpressionContext>& expCtx,
const boost::optional<ReshardingDonorOplogId>& resumeToken);
-} // namespace mongo \ No newline at end of file
+/**
+ * Creates a view on the oplog that facilitates the specialized oplog tailing a resharding recipient
+ * performs on a donor.
+ */
+void createSlimOplogView(OperationContext* opCtx, Database* db);
+
+} // namespace mongo
diff --git a/src/mongo/db/views/view_catalog.cpp b/src/mongo/db/views/view_catalog.cpp
index e5e67c7a73e..d816f520460 100644
--- a/src/mongo/db/views/view_catalog.cpp
+++ b/src/mongo/db/views/view_catalog.cpp
@@ -417,11 +417,6 @@ Status ViewCatalog::createView(OperationContext* opCtx,
return Status(ErrorCodes::InvalidNamespace,
str::stream() << "invalid name for 'viewOn': " << viewOn.coll());
- if (viewName.isSystem())
- return Status(
- ErrorCodes::InvalidNamespace,
- "View name cannot start with 'system.', which is reserved for system namespaces");
-
auto collator = parseCollator(opCtx, collation);
if (!collator.isOK())
return collator.getStatus();