From 03e2efc547344112c1129ee4eb34908e8b67569f Mon Sep 17 00:00:00 2001 From: Wenbin Zhu Date: Tue, 5 Oct 2021 19:37:10 +0000 Subject: SERVER-59710 Create dummy replicator in resmoke that copies data between clusters. --- .../hooks/dummy_cluster_to_cluster_replicator.js | 73 ++++++++++++ jstests/libs/cluster_to_cluster_util.js | 126 +++++++++++++++++++++ jstests/libs/namespace_utils.js | 7 ++ 3 files changed, 206 insertions(+) create mode 100644 jstests/hooks/dummy_cluster_to_cluster_replicator.js create mode 100644 jstests/libs/cluster_to_cluster_util.js diff --git a/jstests/hooks/dummy_cluster_to_cluster_replicator.js b/jstests/hooks/dummy_cluster_to_cluster_replicator.js new file mode 100644 index 00000000000..3ecd313eb40 --- /dev/null +++ b/jstests/hooks/dummy_cluster_to_cluster_replicator.js @@ -0,0 +1,73 @@ +// A dummy replicator that copies collections from one cluster to another, +// with the ability to filter specific collections to clone. + +(function() { +'use strict'; + +load("jstests/libs/cluster_to_cluster_util.js"); +load('jstests/libs/discover_topology.js'); + +// Copy the collection from one cluster to another. +function copyCollection(c0Conn, c1Conn, c0Topology, c1Topology, dbName, collInfo) { + const collName = collInfo.name; + jsTestLog(`Copying collection: ${dbName}.${collName}, ${tojson(collInfo)}`); + + // Create collection or view with the same option as in the source cluster. + assert.commandWorked(c1Conn.getDB(dbName).createCollection( + collName, Object.extend(collInfo.options, {writeConcern: {w: "majority"}}))); + + // Skip all following operations if this is not of type collection (i.e. a view). + if (collInfo.type !== "collection") { + return; + } + + // Create indexes on the destination collection except for _id index. + const c0Coll = c0Conn.getDB(dbName).getCollection(collName); + const c1Coll = c1Conn.getDB(dbName).getCollection(collName); + for (const index of c0Coll.getIndexes()) { + if (!index.key._id) { + let options = Object.assign({}, index); + delete options.v; + delete options.key; + assert.commandWorked(c1Coll.createIndex(index.key, options)); + } + } + + // Retrieve shard key information from the source cluster and shard the + // collection on the destination cluster. + if (c0Topology.type === Topology.kShardedCluster && + c1Topology.type === Topology.kShardedCluster) { + const shardKeyInfo = ClusterToClusterUtil.getShardKeyInfo(c0Conn, dbName, collName); + // Skip if the collection is not sharded. + if (shardKeyInfo) { + assert.commandWorked(c1Conn.adminCommand({enableSharding: dbName})); + assert.commandWorked(c1Conn.adminCommand({ + shardCollection: `${dbName}.${collName}`, + key: shardKeyInfo.key, + unique: shardKeyInfo.unique + })); + } + } + + // Read and copy all collection data. + const findRes = c0Coll.find({}).sort({_id: 1}).toArray(); + assert.commandWorked(c1Coll.insert(findRes)); +} + +// Create connections to both clusters, the connection string can represent a replica set +// primary in case of a replicaSet fixture or a mongos in case of a sharded cluster. +const c0Conn = new Mongo(TestData.cluster0ConnectionString); +const c1Conn = new Mongo(TestData.cluster1ConnectionString); +const c0Topology = DiscoverTopology.findConnectedNodes(c0Conn); +const c1Topology = DiscoverTopology.findConnectedNodes(c1Conn); + +// Get the filtered collections and do copy. +const collInfoMap = ClusterToClusterUtil.getCollectionsToCopy( + c0Conn, TestData.includeNamespaces, TestData.excludeNamespaces); +for (const [dbName, collInfos] of Object.entries(collInfoMap)) { + jsTestLog(`Copying database: ${dbName}`); + for (const collInfo of collInfos) { + copyCollection(c0Conn, c1Conn, c0Topology, c1Topology, dbName, collInfo); + } +} +})(); diff --git a/jstests/libs/cluster_to_cluster_util.js b/jstests/libs/cluster_to_cluster_util.js new file mode 100644 index 00000000000..4c43bf67f16 --- /dev/null +++ b/jstests/libs/cluster_to_cluster_util.js @@ -0,0 +1,126 @@ +/** + * Utilities for testing cluster to cluster replicator. + */ +let ClusterToClusterUtil = (function() { + load("jstests/libs/namespace_utils.js"); + + // System databases and collections that are excluded from copying. + const excludedSystemDatabases = ["admin", "config", "local"]; + const excludedSystemCollections = + ["system.views", "system.profile", "system.resharding.", "system.buckets.", "system.drop."]; + + /** + * Perform sanity check on the namespaces to filter. + */ + function checkFilteredNamespacesInput(namespaces) { + if (namespaces) { + for (const ns of namespaces) { + const [db, coll] = getDBNameAndCollNameFromFullNamespace(ns); + assert(db && coll, `Incorrect namespace format: ${ns}`); + assert(!excludedSystemDatabases.includes(db), + "Filtered namespaces cannot contain excluded system databases"); + assert(!coll.startsWith("system.")); + } + } + } + + /** + * Return the databases to copy from the source cluster. + */ + function getDatabasesToCopy(conn) { + const listDBRes = assert.commandWorked(conn.adminCommand( + {listDatabases: 1, filter: {name: {$nin: excludedSystemDatabases}}, nameOnly: true})); + return listDBRes.databases.map(entry => entry.name); + } + + /** + * Return all the collections to copy from the source cluster, grouped by database and + * filtered by includeNamespaces and excludeNamespaces. When includeNamespaces is not + * provided, the collection infos returned will include views, so callers may need to + * explicitly check the collection type to different between collections and views. + */ + function getCollectionsToCopy(conn, includeNamespaces, excludeNamespaces) { + const collInfoMap = {}; + + if (includeNamespaces && includeNamespaces.length > 0) { + assert(!excludeNamespaces || excludeNamespaces.size == 0, + "Cannot have inputs for both includeNamespaces and excludeNamespaces"); + checkFilteredNamespacesInput(includeNamespaces); + + for (const ns of includeNamespaces) { + const [dbName, collName] = getDBNameAndCollNameFromFullNamespace(ns); + const collInfo = getCollectionInfo(conn, dbName, collName); + if (!collInfo) { + print(`Namespace to include for copy does not exist: ${dbName}.${collName}`); + continue; + } + if (!collInfoMap.hasOwnProperty(dbName)) { + collInfoMap[dbName] = []; + } + collInfoMap[dbName].push(collInfo); + } + + return collInfoMap; + } + + checkFilteredNamespacesInput(excludeNamespaces); + const databases = getDatabasesToCopy(conn); + databases.forEach(dbName => { + const collInfos = getCollectionsFromDatabase(conn, dbName, excludeNamespaces); + if (collInfos.length > 0) { + collInfoMap[dbName] = collInfos; + } + }); + + return collInfoMap; + } + + /** + * Return the collection infos of the given database, excluding those in the excludeNamespaces. + */ + function getCollectionsFromDatabase(conn, dbName, excludeNamespaces = []) { + let excludedCollections = excludeNamespaces.reduce((list, ns) => { + const [db, coll] = getDBNameAndCollNameFromFullNamespace(ns); + if (db === dbName) { + list.push(coll); + } + return list; + }, [...excludedSystemCollections]); + + excludedCollections = excludedCollections.map(coll => { + // If collection ends with '.', match the prefix + return coll.endsWith('.') ? new RegExp(`^${coll}`) : coll; + }); + + const res = assert.commandWorked(conn.getDB(dbName).runCommand( + {listCollections: 1, filter: {name: {$nin: excludedCollections}}})); + return new DBCommandCursor(db, res).toArray().sort(compareOn("name")); + } + + /** + * Return the collection info of the given collection name, or null if no such collection. + */ + function getCollectionInfo(conn, dbName, collName) { + const res = assert.commandWorked( + conn.getDB(dbName).runCommand({listCollections: 1, filter: {name: collName}})); + const firstBatch = res.cursor.firstBatch; + return firstBatch.length > 0 ? firstBatch[0] : null; + } + + /** + * Return the shard key information of the given collection, or null if the collection + * is not sharded. + */ + function getShardKeyInfo(conn, dbName, collName) { + return conn.getDB("config").collections.findOne({_id: `${dbName}.${collName}`}); + } + + return { + checkFilteredNamespacesInput, + getDatabasesToCopy, + getCollectionsToCopy, + getCollectionsFromDatabase, + getCollectionInfo, + getShardKeyInfo, + }; +})(); diff --git a/jstests/libs/namespace_utils.js b/jstests/libs/namespace_utils.js index 16e6e97be4b..fa2493c486e 100644 --- a/jstests/libs/namespace_utils.js +++ b/jstests/libs/namespace_utils.js @@ -4,3 +4,10 @@ function getCollectionNameFromFullNamespace(ns) { return ns.split(/\.(.+)/)[1]; } + +/** + * Returns the database and collection name extracted from a namespace string. + */ +function getDBNameAndCollNameFromFullNamespace(ns) { + return ns.split(/\.(.+)/); +} -- cgit v1.2.1