diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2020-08-27 11:09:08 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-08-27 18:26:55 +0000 |
commit | 9ced0d1888b2ab1e738b87b01a5454686e97811e (patch) | |
tree | 56601177259b68e85cbdab7e492a5c0f4e45a89a | |
parent | 18f397e322dbc7cc1d6272bfb6df5279a0ab6337 (diff) | |
download | mongo-9ced0d1888b2ab1e738b87b01a5454686e97811e.tar.gz |
SERVER-49890: Create resharding oplog view on startup.
-rw-r--r-- | jstests/libs/check_unique_indexes.js | 4 | ||||
-rw-r--r-- | jstests/libs/check_uuids.js | 4 | ||||
-rw-r--r-- | jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js | 4 | ||||
-rw-r--r-- | jstests/noPassthrough/slim_oplog_test.js | 90 | ||||
-rw-r--r-- | src/mongo/db/catalog/database_impl.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/s/SConscript | 33 | ||||
-rw-r--r-- | src/mongo/db/s/resharding_util.cpp | 49 | ||||
-rw-r--r-- | src/mongo/db/s/resharding_util.h | 9 | ||||
-rw-r--r-- | src/mongo/db/views/view_catalog.cpp | 5 |
11 files changed, 193 insertions, 14 deletions
diff --git a/jstests/libs/check_unique_indexes.js b/jstests/libs/check_unique_indexes.js index 0dbaedf2d53..9decdbded64 100644 --- a/jstests/libs/check_unique_indexes.js +++ b/jstests/libs/check_unique_indexes.js @@ -23,6 +23,10 @@ function checkUniqueIndexFormatVersion(adminDB) { currentDatabase.runCommand("listCollections", {includePendingDrops: true}) .cursor.firstBatch; collectionsWithPending.forEach(function(c) { + if (c.type == "view") { + return; + } + let currentCollection = currentDatabase.getCollection(c.name); currentCollection.getIndexes().forEach(function(index) { if (index.unique) { diff --git a/jstests/libs/check_uuids.js b/jstests/libs/check_uuids.js index 54cf9bd2f90..69c4adfc06a 100644 --- a/jstests/libs/check_uuids.js +++ b/jstests/libs/check_uuids.js @@ -13,6 +13,10 @@ function checkCollectionUUIDs(adminDB) { let currentDatabase = adminDB.getSiblingDB(database.name); let collectionInfos = currentDatabase.getCollectionInfos(); for (let i = 0; i < collectionInfos.length; i++) { + if (collectionInfos[i].type == "view") { + continue; + } + assert(collectionInfos[i].info.uuid, "Expect uuid for collection: " + tojson(collectionInfos[i])); } diff --git a/jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js b/jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js index 686932593c8..e5888c67565 100644 --- a/jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js +++ b/jstests/multiVersion/genericSetFCVUsage/do_upgrade_downgrade.js @@ -62,6 +62,10 @@ let recreateUniqueIndexes = function(db, secondary) { let mdb = db.getSiblingDB(d.name); mdb.getCollectionInfos().forEach(function(c) { let currentCollection = mdb.getCollection(c.name); + if (c.type == "view") { + return; + } + currentCollection.getIndexes().forEach(function(spec) { if (!spec.unique) { return; diff --git a/jstests/noPassthrough/slim_oplog_test.js b/jstests/noPassthrough/slim_oplog_test.js new file mode 100644 index 00000000000..f049e9d2a55 --- /dev/null +++ b/jstests/noPassthrough/slim_oplog_test.js @@ -0,0 +1,90 @@ +/** + * The slim oplog is a view on the oplog to support efficient lookup queries for resharding. + * + * @tags: [ + * requires_replication, + * requires_majority_read_concern, + * uses_transactions, + * uses_prepare_transaction + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/retryable_writes_util.js"); + +if (!RetryableWritesUtil.storageEngineSupportsRetryableWrites(jsTest.options().storageEngine)) { + jsTestLog("Retryable writes are not supported, skipping test"); + return; +} + +load("jstests/core/txns/libs/prepare_helpers.js"); + +const rst = new ReplSetTest({nodes: 2}); +rst.startSet(); +rst.initiate(); + +const rsConn = new Mongo(rst.getURL()); +const oplog = rst.getPrimary().getDB("local")["oplog.rs"]; +const slimOplog = rst.getPrimary().getDB("local")["system.resharding.slimOplogForGraphLookup"]; +const session = rsConn.startSession({retryWrites: true}); +const collection = session.getDatabase("test")["collection"]; + +{ + // Assert an oplog entry representing a retryable write projects a `prevOpTime.ts` of the null + // timestamp. + assert.commandWorked(collection.insert({_id: "slim"})); + + const oplogEntry = oplog.find({"o._id": "slim"}).next(); + jsTestLog({"slim entry": oplogEntry, "slim": slimOplog.exists()}); + assert(oplogEntry.hasOwnProperty("txnNumber")); + assert(oplogEntry.hasOwnProperty("stmtId")); + assert.eq(Timestamp(0, 0), slimOplog.find({ts: oplogEntry["ts"]}).next()["prevOpTime"]["ts"]); +} + +{ + // Assert an oplog entry that commits a prepared transaction projects a `prevOpTime.ts` equal to + // the timestamp of the preparing oplog entry. + session.startTransaction(); + assert.commandWorked(collection.insert({_id: "slim_1"})); + assert.commandWorked(collection.insert({_id: "slim_2"})); + let prepTs = PrepareHelpers.prepareTransaction(session); + assert.commandWorked(PrepareHelpers.commitTransaction(session, prepTs)); + + const commitEntry = oplog.find({"prevOpTime.ts": prepTs}).next(); + assert.eq(prepTs, commitEntry["prevOpTime"]["ts"]); + jsTestLog({ + PrepEntry: oplog.find({ts: prepTs}).next(), + CommitEntry: commitEntry, + SlimEntry: slimOplog.find({ts: commitEntry["ts"]}).next() + }); + + // Perform a $graphLookup connecting non-trivial slim oplog entries to the associated applyOps + // operations. Assert the history is as expected. + let oplogDocsWithHistory = oplog.aggregate([ + {$match: {ts: commitEntry["ts"]}}, + {$graphLookup: {from: "system.resharding.slimOplogForGraphLookup", + startWith: "$ts", + connectFromField: "prevOpTime.ts", + connectToField: "ts", + depthField: "depthForResharding", + as: "history"}}, + // For the purposes of this test, unwind the history to give a predictable order of items + // that graphLookup accumulates. + {$unwind: "$history"}, + {$sort: {"history.ts": 1}} + ]).toArray(); + + jsTestLog({"Unwound history": oplogDocsWithHistory}); + assert.eq(2, oplogDocsWithHistory.length); + assert.eq(1, oplogDocsWithHistory[0]["history"]["depthForResharding"]); + assert.eq(Timestamp(0, 0), oplogDocsWithHistory[0]["history"]["prevOpTime"]["ts"]); + assert.eq(2, oplogDocsWithHistory[0]["history"]["o"]["applyOps"].length); + + assert.eq(0, oplogDocsWithHistory[1]["history"].depthForResharding); + assert.eq(prepTs, oplogDocsWithHistory[1]["history"]["prevOpTime"]["ts"]); + assert.eq({}, oplogDocsWithHistory[1]["history"]["o"]); +} + +rst.stopSet(); +})(); diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp index 231bc593115..3315c56e9d2 100644 --- a/src/mongo/db/catalog/database_impl.cpp +++ b/src/mongo/db/catalog/database_impl.cpp @@ -929,6 +929,11 @@ Status DatabaseImpl::userCreateNS(OperationContext* opCtx, } if (collectionOptions.isView()) { + if (nss.isSystem()) + return Status( + ErrorCodes::InvalidNamespace, + "View name cannot start with 'system.', which is reserved for system namespaces"); + uassertStatusOK(createView(opCtx, nss, collectionOptions)); } else { invariant(createCollection(opCtx, nss, collectionOptions, createDefaultIndexes, idIndex), diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index a5f1502fcab..d3412d5569e 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -50,6 +50,7 @@ env.Library( '$BUILD_DIR/mongo/db/dbdirectclient', '$BUILD_DIR/mongo/db/dbhelpers', '$BUILD_DIR/mongo/db/op_observer', + '$BUILD_DIR/mongo/db/s/resharding_util', '$BUILD_DIR/mongo/db/stats/counters', '$BUILD_DIR/mongo/db/stats/server_read_concern_write_concern_metrics', '$BUILD_DIR/mongo/db/transaction', diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index 7a0a79476e0..1ed66938941 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -83,6 +83,7 @@ #include "mongo/db/repl/tenant_migration_donor_util.h" #include "mongo/db/repl/timestamp_block.h" #include "mongo/db/repl/transaction_oplog_application.h" +#include "mongo/db/s/resharding_util.h" #include "mongo/db/service_context.h" #include "mongo/db/stats/counters.h" #include "mongo/db/stats/server_write_concern_metrics.h" @@ -569,6 +570,8 @@ void createOplog(OperationContext* opCtx, uow.commit(); }); + createSlimOplogView(opCtx, ctx.db()); + /* sync here so we don't get any surprising lag later when we try to sync */ service->getStorageEngine()->flushAllFiles(opCtx, /*callerHoldsReadLock*/ false); } diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript index 3dd77b842fd..e8b56544b60 100644 --- a/src/mongo/db/s/SConscript +++ b/src/mongo/db/s/SConscript @@ -52,8 +52,8 @@ env.Library( 'collection_sharding_state_factory_standalone.cpp', 'config_server_op_observer.cpp', 'metadata_manager.cpp', - 'migration_chunk_cloner_source_legacy.cpp', 'migration_chunk_cloner_source.cpp', + 'migration_chunk_cloner_source_legacy.cpp', 'migration_coordinator.cpp', 'migration_destination_manager.cpp', 'migration_session_id.cpp', @@ -67,7 +67,6 @@ env.Library( 'periodic_sharded_index_consistency_checker.cpp', 'range_deletion_util.cpp', 'read_only_catalog_cache_loader.cpp', - 'resharding_util.cpp', 'resharding/resharding_coordinator_observer.cpp', 'resharding/resharding_coordinator_service.cpp', 'scoped_operation_completion_sharding_actions.cpp', @@ -76,6 +75,7 @@ env.Library( 'shard_filtering_metadata_refresh.cpp', 'shard_identity_rollback_notifier.cpp', 'shard_key_util.cpp', + 'shard_local.cpp', 'shard_metadata_util.cpp', 'shard_server_catalog_cache_loader.cpp', 'shard_server_op_observer.cpp', @@ -84,14 +84,12 @@ env.Library( 'sharding_state_recovery.cpp', 'sharding_statistics.cpp', 'split_chunk.cpp', - 'shard_local.cpp', 'split_vector.cpp', 'start_chunk_clone_request.cpp', env.Idlc('migration_coordinator_document.idl')[0], env.Idlc('range_deletion_task.idl')[0], env.Idlc('resharding/coordinator_document.idl')[0], env.Idlc('resharding/donor_document.idl')[0], - env.Idlc('resharding/donor_oplog_id.idl')[0], env.Idlc('resharding/recipient_document.idl')[0], ], LIBDEPS=[ @@ -102,8 +100,6 @@ env.Library( '$BUILD_DIR/mongo/db/dbhelpers', '$BUILD_DIR/mongo/db/op_observer_impl', '$BUILD_DIR/mongo/db/ops/write_ops_exec', - '$BUILD_DIR/mongo/db/pipeline/expression_context', - '$BUILD_DIR/mongo/db/pipeline/pipeline', '$BUILD_DIR/mongo/db/repl/oplog', '$BUILD_DIR/mongo/db/repl/wait_for_majority_service', '$BUILD_DIR/mongo/db/rw_concern_d', @@ -125,6 +121,27 @@ env.Library( '$BUILD_DIR/mongo/db/rs_local_client', '$BUILD_DIR/mongo/db/session_catalog', '$BUILD_DIR/mongo/idl/server_parameter', + 'resharding_util', + ], +) + +env.Library( + target='resharding_util', + source=[ + 'resharding_util.cpp', + env.Idlc('resharding/donor_oplog_id.idl')[0], + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/common', + '$BUILD_DIR/mongo/db/concurrency/write_conflict_exception', + '$BUILD_DIR/mongo/db/curop', + '$BUILD_DIR/mongo/db/namespace_string', + '$BUILD_DIR/mongo/db/pipeline/expression_context', + '$BUILD_DIR/mongo/db/pipeline/pipeline', + '$BUILD_DIR/mongo/db/storage/write_unit_of_work', + '$BUILD_DIR/mongo/s/common_s', + '$BUILD_DIR/mongo/s/grid', ], ) @@ -326,6 +343,7 @@ env.Library( '$BUILD_DIR/mongo/s/sharding_initialization', '$BUILD_DIR/mongo/s/sharding_router_api', 'balancer', + 'resharding_util', 'sharding_runtime_d', ], ) @@ -516,5 +534,6 @@ env.CppUnitTest( '$BUILD_DIR/mongo/util/version_impl', 'balancer', 'config_server_test_fixture', - ] + 'resharding_util', + ], ) diff --git a/src/mongo/db/s/resharding_util.cpp b/src/mongo/db/s/resharding_util.cpp index 5d81a724ce1..5939c83f97b 100644 --- a/src/mongo/db/s/resharding_util.cpp +++ b/src/mongo/db/s/resharding_util.cpp @@ -30,11 +30,14 @@ #include "mongo/platform/basic.h" #include "mongo/bson/bsonobj.h" +#include "mongo/db/concurrency/write_conflict_exception.h" +#include "mongo/db/namespace_string.h" #include "mongo/db/operation_context.h" #include "mongo/db/pipeline/document_source_lookup.h" #include "mongo/db/pipeline/document_source_match.h" #include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/s/resharding_util.h" +#include "mongo/db/storage/write_unit_of_work.h" #include "mongo/s/grid.h" namespace mongo { @@ -152,4 +155,48 @@ std::unique_ptr<Pipeline, PipelineDeleter> createAggForReshardingOplogBuffer( return Pipeline::create(std::move(stages), expCtx); } -} // namespace mongo
\ No newline at end of file +void createSlimOplogView(OperationContext* opCtx, Database* db) { + writeConflictRetry( + opCtx, "createReshardingOplog", "local.system.resharding.slimOplogForGraphLookup", [&] { + { + // Create 'system.views' in a separate WUOW if it does not exist. + WriteUnitOfWork wuow(opCtx); + Collection* coll = CollectionCatalog::get(opCtx).lookupCollectionByNamespace( + opCtx, NamespaceString(db->getSystemViewsName())); + if (!coll) { + coll = db->createCollection(opCtx, NamespaceString(db->getSystemViewsName())); + } + invariant(coll); + wuow.commit(); + } + + // Resharding uses the `prevOpTime` to link oplog related entries via a + // $graphLookup. Large transactions and prepared transaction use prevOpTime to identify + // earlier oplog entries from the same transaction. Retryable writes (identified via the + // presence of `stmtId`) use prevOpTime to identify earlier run statements from the same + // retryable write. This view will unlink oplog entries from the same retryable write + // by zeroing out their `prevOpTime`. + CollectionOptions options; + options.viewOn = NamespaceString::kRsOplogNamespace.coll().toString(); + options.pipeline = BSON_ARRAY(BSON( + "$project" << BSON( + "_id" + << "$ts" + << "op" << 1 << "o" << BSON("applyOps" << BSON("ui" << 1 << "reshardDest" << 1)) + << "ts" << 1 << "prevOpTime.ts" + << BSON("$cond" << BSON("if" << BSON("$eq" << BSON_ARRAY(BSON("$type" + << "$stmtId") + << "missing")) + << "then" + << "$prevOpTime.ts" + << "else" << Timestamp::min()))))); + WriteUnitOfWork wuow(opCtx); + uassertStatusOK( + db->createView(opCtx, + NamespaceString("local.system.resharding.slimOplogForGraphLookup"), + options)); + wuow.commit(); + }); +} + +} // namespace mongo diff --git a/src/mongo/db/s/resharding_util.h b/src/mongo/db/s/resharding_util.h index b6441cd16fe..db10d68c4fd 100644 --- a/src/mongo/db/s/resharding_util.h +++ b/src/mongo/db/s/resharding_util.h @@ -31,6 +31,7 @@ #include <vector> #include "mongo/bson/bsonobj.h" +#include "mongo/db/catalog/database.h" #include "mongo/db/keypattern.h" #include "mongo/db/operation_context.h" #include "mongo/db/pipeline/pipeline.h" @@ -79,4 +80,10 @@ std::unique_ptr<Pipeline, PipelineDeleter> createAggForReshardingOplogBuffer( const boost::intrusive_ptr<ExpressionContext>& expCtx, const boost::optional<ReshardingDonorOplogId>& resumeToken); -} // namespace mongo
\ No newline at end of file +/** + * Creates a view on the oplog that facilitates the specialized oplog tailing a resharding recipient + * performs on a donor. + */ +void createSlimOplogView(OperationContext* opCtx, Database* db); + +} // namespace mongo diff --git a/src/mongo/db/views/view_catalog.cpp b/src/mongo/db/views/view_catalog.cpp index e5e67c7a73e..d816f520460 100644 --- a/src/mongo/db/views/view_catalog.cpp +++ b/src/mongo/db/views/view_catalog.cpp @@ -417,11 +417,6 @@ Status ViewCatalog::createView(OperationContext* opCtx, return Status(ErrorCodes::InvalidNamespace, str::stream() << "invalid name for 'viewOn': " << viewOn.coll()); - if (viewName.isSystem()) - return Status( - ErrorCodes::InvalidNamespace, - "View name cannot start with 'system.', which is reserved for system namespaces"); - auto collator = parseCollator(opCtx, collation); if (!collator.isOK()) return collator.getStatus(); |