diff options
author | Josef Ahmad <josef.ahmad@mongodb.com> | 2022-08-30 07:49:48 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-30 09:03:31 +0000 |
commit | 3724e774d67d6ffb3bf1c2f52f7451eac38b5fd9 (patch) | |
tree | 3ef25a75f1aed9274bee74103468e3b4008ae017 | |
parent | 3f101399b8b165cb1e3a70bd56d9be7978124f51 (diff) | |
download | mongo-3724e774d67d6ffb3bf1c2f52f7451eac38b5fd9.tar.gz |
SERVER-67892 Add _shardsvrCreateGlobalIndex command
Co-authored-by: Yu Jin Kang Park <yujin.kang@mongodb.com>
38 files changed, 668 insertions, 2 deletions
diff --git a/jstests/auth/lib/commands_lib.js b/jstests/auth/lib/commands_lib.js index 4913b33f3b8..7f229c2f614 100644 --- a/jstests/auth/lib/commands_lib.js +++ b/jstests/auth/lib/commands_lib.js @@ -209,6 +209,21 @@ var authCommandsLib = { tests: [ { + testname: "_shardsvrCreateGlobalIndex", + command: {_shardsvrCreateGlobalIndex: UUID()}, + skipSharded: true, + testcases: [ + { + runOnDb: adminDbName, + roles: {__system: 1}, + privileges: [{resource: {cluster: true}, actions: ["internal"]}], + expectFail: true + }, + {runOnDb: firstDbName, roles: {}}, + {runOnDb: secondDbName, roles: {}} + ] + }, + { testname: "abortReshardCollection", command: {abortReshardCollection: "test.x"}, skipUnlessSharded: true, @@ -3083,7 +3098,7 @@ var authCommandsLib = { t: new Timestamp(1655722668, 22), v: new Timestamp(1, 0) }, - min: {_id: MinKey}, + min: {_id: MinKey}, max: {_id: -4611686018427387902} }, fromShardCollectionVersion: { @@ -5781,7 +5796,7 @@ var authCommandsLib = { } ] }, - { + { testname: "setClusterParameter", command: {setClusterParameter: {testIntClusterParameter: {intData: 17}}}, skipTest: (conn) => { diff --git a/jstests/core/views/views_all_commands.js b/jstests/core/views/views_all_commands.js index 0722c9d5174..15a0faf7b69 100644 --- a/jstests/core/views/views_all_commands.js +++ b/jstests/core/views/views_all_commands.js @@ -163,6 +163,7 @@ let viewsCommandTests = { _shardsvrCommitReshardCollection: {skip: isAnInternalCommand}, _shardsvrCreateCollection: {skip: isAnInternalCommand}, _shardsvrCreateCollectionParticipant: {skip: isAnInternalCommand}, + _shardsvrCreateGlobalIndex: {skip: isAnInternalCommand}, _shardsvrDropDatabase: {skip: isAnInternalCommand}, _shardsvrDropDatabaseParticipant: {skip: isAnInternalCommand}, _shardsvrGetStatsForBalancing: {skip: isAnInternalCommand}, diff --git a/jstests/noPassthrough/shardsvr_create_global_index.js b/jstests/noPassthrough/shardsvr_create_global_index.js new file mode 100644 index 00000000000..2b676068e29 --- /dev/null +++ b/jstests/noPassthrough/shardsvr_create_global_index.js @@ -0,0 +1,160 @@ +/** + * Tests the _shardsvrCreateGlobalIndex command, which creates a global index container. + * + * @tags: [ + * featureFlagGlobalIndexes, + * requires_fcv_61, + * requires_replication, + * ] + */ +(function() { +"use strict"; + +load("jstests/libs/collection_drop_recreate.js"); // For assertDropCollection. + +function uuidToString(uuid) { + const [_, uuidString] = uuid.toString().match(/"((?:\\.|[^"\\])*)"/); + return uuidString; +} + +function verifyCollectionExists(node, globalIndexUUID, namespace) { + // Check the global index container has been replicated. + const systemDB = node.getDB("system"); + const res = systemDB.runCommand({listCollections: 1, filter: {name: namespace}}); + assert.eq(res.cursor.firstBatch.length, 1); + assert.eq(res.cursor.firstBatch[0].info.uuid, globalIndexUUID); +} + +function verifyIndexSpecs(node, namespace) { + const primaryIndexSpecs = + {"v": 2, "key": {"_id": 1}, "name": "_id_", "unique": true, "clustered": true}; + + const secondaryIndexSpecs = + {"v": 2, "key": {"indexKey": 1}, "name": "indexKey_1", "unique": true}; + + const referenceIndexSpecList = [primaryIndexSpecs, secondaryIndexSpecs]; + + const systemDB = node.getDB("system"); + let listIndexes = systemDB.runCommand({listIndexes: namespace}); + assert.commandWorked(listIndexes); + + const indexSpecList = listIndexes["cursor"]["firstBatch"]; + assert.sameMembers(indexSpecList, + referenceIndexSpecList, + "Global index collection has unexpected index specs."); +} + +function verifyMultiDocumentTransactionDisallowed(node) { + let session = node.startSession(); + // Verify the command is not allowed in multi-document transactions. + let sessionDB = session.getDatabase("admin"); + session.startTransaction(); + + assert.commandFailedWithCode(sessionDB.runCommand({_shardsvrCreateGlobalIndex: UUID()}), + ErrorCodes.OperationNotSupportedInTransaction); + session.endSession(); +} + +function verifyOplogEntry(node, globalIndexUUID, namespace, lsid, txnNumber) { + // Sample oplog entry. + // { + // "op" : "c", + // "ns" : "system.$cmd", + // "ui" : UUID("abe869a0-932f-418c-9baa-2f826fbf23e9"), + // "o" : { + // "createGlobalIndex" : "globalIndexes.abe869a0-932f-418c-9baa-2f826fbf23e9" + // }, + // "ts" : Timestamp(1659625616, 4), + // "t" : NumberLong(1), + // "v" : NumberLong(2), + // "wall" : ISODate("2022-08-04T15:06:56.647Z") + // } + const oplogEntry = node.getDB("local").oplog.rs.find({}).sort({$natural: -1}).limit(1).next(); + assert.eq(oplogEntry.op, "c"); + assert.eq(oplogEntry.ui, globalIndexUUID); + assert.docEq(oplogEntry.o, {"createGlobalIndex": namespace}); + + // lsid and txnNumber are either both present (retryable writes) or absent. + assert((lsid && txnNumber) || (!lsid && !txnNumber)); + if (lsid) { + assert.eq(oplogEntry.lsid.id, lsid.id); + assert.eq(oplogEntry.txnNumber, txnNumber); + assert.eq(oplogEntry.stmtId, 0); + } +} + +function verifyCommandIsRetryableWrite(node) { + const session = node.startSession({retryWrites: true}); + const adminDB = session.getDatabase("admin"); + const lsid = session.getSessionId(); + const txnNumber = NumberLong(10); + const indexUUID = UUID(); + const globalIndexCollName = "globalIndexes." + uuidToString(indexUUID); + + const commandInvocations = 5; + const ssBefore = assert.commandWorked(node.getDB("test").runCommand({serverStatus: 1})); + + const retriedCommandsCountBefore = ssBefore["transactions"]["retriedCommandsCount"]; + const retriedStatementsCountBefore = ssBefore["transactions"]["retriedStatementsCount"]; + const transactionsCollectionWriteCountBefore = + ssBefore["transactions"]["transactionsCollectionWriteCount"]; + const commandCountBefore = + ssBefore["metrics"]["commands"]["_shardsvrCreateGlobalIndex"]["total"]; + + const doRetry = () => { + assert.commandWorked(adminDB.runCommand( + {_shardsvrCreateGlobalIndex: indexUUID, lsid: lsid, txnNumber: txnNumber})); + }; + + // Run the same retryable _shardsvrCreateGlobalIndex invocation multiple times. + for (var i = 0; i < commandInvocations; i++) { + doRetry(); + } + + const ssAfter = assert.commandWorked(node.getDB("test").runCommand({serverStatus: 1})); + const retriedCommandsCountAfter = ssAfter["transactions"]["retriedCommandsCount"]; + const retriedStatementsCountAfter = ssAfter["transactions"]["retriedStatementsCount"]; + const transactionsCollectionWriteCountAfter = + ssAfter["transactions"]["transactionsCollectionWriteCount"]; + const commandCountAfter = ssAfter["metrics"]["commands"]["_shardsvrCreateGlobalIndex"]["total"]; + + // The increase of retried commands and statements are globally >= the number of command + // invocations. + assert.gte(retriedCommandsCountAfter, retriedCommandsCountBefore + commandInvocations - 1); + assert.gte(retriedStatementsCountAfter, retriedStatementsCountBefore + commandInvocations - 1); + // The command executed exactly once (config.transactions are globally >= 1) while the command + // was invoked more than once. + assert.gte(transactionsCollectionWriteCountAfter, transactionsCollectionWriteCountBefore + 1); + assert.gte(commandCountAfter, commandCountBefore + commandInvocations); + + verifyOplogEntry(node, indexUUID, globalIndexCollName, lsid, txnNumber); +} + +const rst = new ReplSetTest({nodes: 2}); +rst.startSet(); +rst.initiate(); +const primary = rst.getPrimary(); +const adminDB = primary.getDB("admin"); +const globalIndexUUID = UUID(); +const globalIndexCollName = "globalIndexes." + uuidToString(globalIndexUUID); + +verifyMultiDocumentTransactionDisallowed(primary); +verifyCommandIsRetryableWrite(primary); + +// Create a global index container. +assert.commandWorked(adminDB.runCommand({_shardsvrCreateGlobalIndex: globalIndexUUID})); +// The command is idempotent: it returns OK if the container already exists. +assert.commandWorked(adminDB.runCommand({_shardsvrCreateGlobalIndex: globalIndexUUID})); +verifyOplogEntry(primary, globalIndexUUID, globalIndexCollName); + +rst.awaitReplication(); +rst.nodes.forEach((node) => { + // Verify collection exists in all nodes, command does what is expected in primary and is + // replicated to secondaries. + verifyCollectionExists(node, globalIndexUUID, globalIndexCollName); + // Verify index spec in all nodes. + verifyIndexSpecs(node, globalIndexCollName); +}); + +rst.stopSet(); +})(); diff --git a/jstests/replsets/db_reads_while_recovering_all_commands.js b/jstests/replsets/db_reads_while_recovering_all_commands.js index 5f31ed42c01..f1762b7a8c4 100644 --- a/jstests/replsets/db_reads_while_recovering_all_commands.js +++ b/jstests/replsets/db_reads_while_recovering_all_commands.js @@ -85,6 +85,7 @@ const allCommands = { _shardsvrRegisterIndex: {skip: isPrimaryOnly}, _shardsvrCommitIndexParticipant: {skip: isPrimaryOnly}, _shardsvrCommitReshardCollection: {skip: isPrimaryOnly}, + _shardsvrCreateGlobalIndex: {skip: isAnInternalCommand}, _shardsvrDropCollection: {skip: isPrimaryOnly}, _shardsvrCreateCollection: {skip: isPrimaryOnly}, _shardsvrDropCollectionIfUUIDNotMatching: {skip: isNotAUserDataRead}, diff --git a/jstests/replsets/shardsvr_create_global_index_rollback.js b/jstests/replsets/shardsvr_create_global_index_rollback.js new file mode 100644 index 00000000000..8427fff64c9 --- /dev/null +++ b/jstests/replsets/shardsvr_create_global_index_rollback.js @@ -0,0 +1,42 @@ +/** + * Tests that global index container creation is properly rolled back. + * + * @tags: [ + * featureFlagGlobalIndexes, + * requires_fcv_61, + * requires_replication, + * ] + */ +(function() { +'use strict'; + +load('jstests/replsets/libs/rollback_test.js'); + +const rollbackTest = new RollbackTest(jsTestName()); + +const primary = rollbackTest.getPrimary(); +const adminDB = primary.getDB("admin"); +const globalIndexUUID = UUID(); + +rollbackTest.transitionToRollbackOperations(); + +// Create a global index container to be rolled back. +assert.commandWorked(adminDB.runCommand({_shardsvrCreateGlobalIndex: globalIndexUUID})); + +// Perform the rollback. +rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); +rollbackTest.transitionToSyncSourceOperationsDuringRollback(); +rollbackTest.transitionToSteadyStateOperations(); + +const [_, uuidString] = globalIndexUUID.toString().match(/"((?:\\.|[^"\\])*)"/); +const namespace = "globalIndexes." + uuidString; + +// Check the global index container collection does not exist. +rollbackTest.getTestFixture().nodes.forEach(function(node) { + const nodeDB = node.getDB("system"); + const res = nodeDB.runCommand({listCollections: 1, filter: {name: namespace}}); + assert.eq(res.cursor.firstBatch.length, 0); +}); + +rollbackTest.stop(); +})(); diff --git a/jstests/sharding/read_write_concern_defaults_application.js b/jstests/sharding/read_write_concern_defaults_application.js index e701b536279..7803d524e59 100644 --- a/jstests/sharding/read_write_concern_defaults_application.js +++ b/jstests/sharding/read_write_concern_defaults_application.js @@ -154,6 +154,7 @@ let testCases = { _shardsvrCompactStructuredEncryptionData: {skip: "internal command"}, _shardsvrCreateCollection: {skip: "internal command"}, _shardsvrCreateCollectionParticipant: {skip: "internal command"}, + _shardsvrCreateGlobalIndex: {skip: "internal command"}, _shardsvrDropCollection: {skip: "internal command"}, _shardsvrDropCollectionIfUUIDNotMatching: {skip: "internal command"}, _shardsvrDropCollectionParticipant: {skip: "internal command"}, diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 8f4058340ea..93c4345d51e 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -104,6 +104,23 @@ env.Library( ) env.Library( + target='global_index', + source=[ + 'global_index.cpp', + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/catalog/clustered_collection_options', + '$BUILD_DIR/mongo/db/catalog_raii', + '$BUILD_DIR/mongo/db/concurrency/exception_util', + '$BUILD_DIR/mongo/db/curop', + '$BUILD_DIR/mongo/db/session/logical_session_id_helpers', + '$BUILD_DIR/mongo/db/transaction/transaction', + "index_builds_coordinator_interface", + ], +) + +env.Library( target='multitenancy', source=[ 'multitenancy.cpp', diff --git a/src/mongo/db/auth/auth_op_observer.h b/src/mongo/db/auth/auth_op_observer.h index 1d92efbd197..da46492f146 100644 --- a/src/mongo/db/auth/auth_op_observer.h +++ b/src/mongo/db/auth/auth_op_observer.h @@ -45,6 +45,10 @@ public: AuthOpObserver(); ~AuthOpObserver(); + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/free_mon/free_mon_op_observer.h b/src/mongo/db/free_mon/free_mon_op_observer.h index 2b34f06783c..46c1aa1469b 100644 --- a/src/mongo/db/free_mon/free_mon_op_observer.h +++ b/src/mongo/db/free_mon/free_mon_op_observer.h @@ -45,6 +45,10 @@ public: FreeMonOpObserver(); ~FreeMonOpObserver(); + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/global_index.cpp b/src/mongo/db/global_index.cpp new file mode 100644 index 00000000000..e858cbf9315 --- /dev/null +++ b/src/mongo/db/global_index.cpp @@ -0,0 +1,125 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/global_index.h" + +#include "mongo/bson/bsonobj.h" +#include "mongo/db/catalog/clustered_collection_util.h" +#include "mongo/db/catalog/index_catalog.h" +#include "mongo/db/catalog_raii.h" +#include "mongo/db/concurrency/exception_util.h" +#include "mongo/db/index_builds_coordinator.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/op_observer/op_observer.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/transaction/retryable_writes_stats.h" +#include "mongo/db/transaction/transaction_participant.h" +#include "mongo/logv2/log.h" + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kIndex + +namespace mongo::global_index { + + +void createContainer(OperationContext* opCtx, const UUID& indexUUID) { + // StmtId will always be 0, as the command only replicates a createGlobalIndex oplog entry. + constexpr StmtId stmtId = 0; + if (opCtx->isRetryableWrite()) { + const auto txnParticipant = TransactionParticipant::get(opCtx); + if (txnParticipant.checkStatementExecuted(opCtx, stmtId)) { + RetryableWritesStats::get(opCtx)->incrementRetriedCommandsCount(); + RetryableWritesStats::get(opCtx)->incrementRetriedStatementsCount(); + LOGV2_DEBUG(6789203, + 1, + "_shardsvrCreateGlobalIndex retried statement already executed", + "indexUUID"_attr = indexUUID); + return; + } + } + + const auto nss = NamespaceString::makeGlobalIndexNSS(indexUUID); + LOGV2(6789200, "Create global index container", "indexUUID"_attr = indexUUID); + + // Create the container. + return writeConflictRetry(opCtx, "createGlobalIndexContainer", nss.ns(), [&]() { + const auto indexKeySpec = BSON("v" << 2 << "name" + << "indexKey_1" + << "key" << BSON("indexKey" << 1) << "unique" << true); + + WriteUnitOfWork wuow(opCtx); + + // createIndexesOnEmptyCollection requires the MODE_X collection lock. + AutoGetCollection autoColl(opCtx, nss, MODE_X); + if (!CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss)) { + { + repl::UnreplicatedWritesBlock unreplicatedWrites(opCtx); + + auto db = autoColl.ensureDbExists(opCtx); + + CollectionOptions collOptions; + collOptions.clusteredIndex = clustered_util::makeDefaultClusteredIdIndex(); + collOptions.uuid = indexUUID; + db->createCollection(opCtx, nss, collOptions); + + CollectionWriter writer(opCtx, nss); + IndexBuildsCoordinator::get(opCtx)->createIndexesOnEmptyCollection( + opCtx, writer, {indexKeySpec}, false); + } + auto opObserver = opCtx->getServiceContext()->getOpObserver(); + opObserver->onCreateGlobalIndex(opCtx, nss, indexUUID); + + wuow.commit(); + } else { + // Container already exists, this can happen when attempting to create a global index + // that already exists. Sanity check its storage format. + tassert(6789204, + str::stream() << "Collection with UUID " << indexUUID + << " already exists but it's not clustered.", + autoColl->getCollectionOptions().clusteredIndex); + tassert(6789205, + str::stream() << "Collection with UUID " << indexUUID + << " already exists but it's missing a unique index on " + "'indexKey'.", + autoColl->getIndexCatalog()->findIndexByKeyPatternAndOptions( + opCtx, + BSON("indexKey" << 1), + indexKeySpec, + IndexCatalog::InclusionPolicy::kReady)); + tassert(6789206, + str::stream() << "Collection with namespace " << nss.ns() + << " already exists but it has inconsistent UUID " + << autoColl->uuid().toString() << ".", + autoColl->uuid() == indexUUID); + LOGV2(6789201, "Global index container already exists", "indexUUID"_attr = indexUUID); + } + return; + }); +} + +} // namespace mongo::global_index diff --git a/src/mongo/db/global_index.h b/src/mongo/db/global_index.h new file mode 100644 index 00000000000..605a2e24c50 --- /dev/null +++ b/src/mongo/db/global_index.h @@ -0,0 +1,47 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + + +#pragma once + +#include "mongo/db/operation_context.h" +#include "mongo/util/uuid.h" + +namespace mongo::global_index { + +/** + * Creates the internal collection implements the global index container with the given UUID on the + * shard. Replicates as a 'createGlobalIndex' command. This container-backing collection: + * - is clustered by _id. The _id field stores the cluster-unique document key of the index entry; + * - has a secondary unique index on the indexKey field which stores a binary-comparable + * representation of the index key value. + */ +void createContainer(OperationContext* opCtx, const UUID& indexUUID); + +} // namespace mongo::global_index diff --git a/src/mongo/db/namespace_string.cpp b/src/mongo/db/namespace_string.cpp index 6478ea29580..337cdcf5d54 100644 --- a/src/mongo/db/namespace_string.cpp +++ b/src/mongo/db/namespace_string.cpp @@ -336,6 +336,12 @@ NamespaceString NamespaceString::makeChangeCollectionNSS( return NamespaceString{NamespaceString::kConfigDb, NamespaceString::kChangeCollectionName}; } +NamespaceString NamespaceString::makeGlobalIndexNSS(const UUID& id) { + return NamespaceString( + kSystemDb, + fmt::format("{}{}", NamespaceString::kGlobalIndexCollectionPrefix, id.toString())); +} + NamespaceString NamespaceString::makePreImageCollectionNSS( const boost::optional<TenantId>& tenantId) { return tenantId ? NamespaceString(tenantId, kConfigDb, "system.preimages") diff --git a/src/mongo/db/namespace_string.h b/src/mongo/db/namespace_string.h index 166bdf6dd1b..4d7cbfa6dab 100644 --- a/src/mongo/db/namespace_string.h +++ b/src/mongo/db/namespace_string.h @@ -67,6 +67,9 @@ public: // Namespace for the local database static constexpr StringData kLocalDb = "local"_sd; + // Namespace for the system database + static constexpr StringData kSystemDb = "system"_sd; + // Namespace for the sharding config database static constexpr StringData kConfigDb = "config"_sd; @@ -103,6 +106,10 @@ public: // Prefix for time-series buckets collection. static constexpr StringData kTimeseriesBucketsCollectionPrefix = "system.buckets."_sd; + // Prefix for global index container collections. These collections belong to the system + // database. + static constexpr StringData kGlobalIndexCollectionPrefix = "globalIndexes."_sd; + // Namespace for storing configuration data, which needs to be replicated if the server is // running as a replica set. Documents in this collection should represent some configuration // state of the server, which needs to be recovered/consulted at startup. Each document in this @@ -351,6 +358,11 @@ public: static NamespaceString makeListCollectionsNSS(const DatabaseName& dbName); /** + * Constructs a NamespaceString for the specified global index. + */ + static NamespaceString makeGlobalIndexNSS(const UUID& uuid); + + /** * Constructs the cluster parameters NamespaceString for the specified tenant. The format for * this namespace is "(<tenantId>_)config.clusterParameters". */ diff --git a/src/mongo/db/op_observer/fcv_op_observer.h b/src/mongo/db/op_observer/fcv_op_observer.h index 22b2b429b52..d77f7d468a2 100644 --- a/src/mongo/db/op_observer/fcv_op_observer.h +++ b/src/mongo/db/op_observer/fcv_op_observer.h @@ -64,6 +64,9 @@ public: const OplogDeleteEntryArgs& args) final; // Noop overrides. + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, diff --git a/src/mongo/db/op_observer/op_observer.h b/src/mongo/db/op_observer/op_observer.h index 79d8a00e335..08e76cd21aa 100644 --- a/src/mongo/db/op_observer/op_observer.h +++ b/src/mongo/db/op_observer/op_observer.h @@ -123,6 +123,10 @@ public: virtual ~OpObserver() = default; + virtual void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) = 0; + virtual void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/op_observer/op_observer_impl.cpp b/src/mongo/db/op_observer/op_observer_impl.cpp index 76e2dfdac9c..dfbd6d77361 100644 --- a/src/mongo/db/op_observer/op_observer_impl.cpp +++ b/src/mongo/db/op_observer/op_observer_impl.cpp @@ -369,6 +369,40 @@ bool shouldTimestampIndexBuildSinglePhase(OperationContext* opCtx, const Namespa OpObserverImpl::OpObserverImpl(std::unique_ptr<OplogWriter> oplogWriter) : _oplogWriter(std::move(oplogWriter)) {} +void OpObserverImpl::onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) { + invariant(!opCtx->inMultiDocumentTransaction()); + + BSONObjBuilder builder; + // The rollback implementation requires the collection name to list affected namespaces. + builder.append("createGlobalIndex", globalIndexNss.coll()); + + MutableOplogEntry oplogEntry; + oplogEntry.setOpType(repl::OpTypeEnum::kCommand); + oplogEntry.setObject(builder.done()); + oplogEntry.setNss(globalIndexNss.getCommandNS()); + oplogEntry.setUuid(globalIndexUUID); + + constexpr StmtId stmtId = 0; + if (TransactionParticipant::get(opCtx)) { + // This is a retryable write: populate the lsid, txnNumber and stmtId fields. + // The oplog link to previous statement is empty and the stmtId is zero because this is a + // single-statement command replicating as a single createGlobalIndex oplog entry. + repl::OplogLink oplogLink; + _oplogWriter->appendOplogEntryChainInfo(opCtx, &oplogEntry, &oplogLink, {stmtId}); + } + + auto writeOpTime = + logOperation(opCtx, &oplogEntry, true /*assignWallClockTime*/, _oplogWriter.get()); + + // Register the retryable write to in-memory transactions table. + SessionTxnRecord sessionTxnRecord; + sessionTxnRecord.setLastWriteOpTime(writeOpTime); + sessionTxnRecord.setLastWriteDate(oplogEntry.getWallClockTime()); + onWriteOpCompleted(opCtx, {stmtId}, sessionTxnRecord); +} + void OpObserverImpl::onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/op_observer/op_observer_impl.h b/src/mongo/db/op_observer/op_observer_impl.h index c0346e63ccc..95da2e60daf 100644 --- a/src/mongo/db/op_observer/op_observer_impl.h +++ b/src/mongo/db/op_observer/op_observer_impl.h @@ -53,6 +53,10 @@ public: OpObserverImpl(std::unique_ptr<OplogWriter> oplogWriter); virtual ~OpObserverImpl() = default; + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/op_observer/op_observer_noop.h b/src/mongo/db/op_observer/op_observer_noop.h index 320a7180bd4..64a884fba04 100644 --- a/src/mongo/db/op_observer/op_observer_noop.h +++ b/src/mongo/db/op_observer/op_observer_noop.h @@ -35,6 +35,10 @@ namespace mongo { class OpObserverNoop : public OpObserver { public: + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/op_observer/op_observer_registry.h b/src/mongo/db/op_observer/op_observer_registry.h index 3e387e2ea29..999defbefc0 100644 --- a/src/mongo/db/op_observer/op_observer_registry.h +++ b/src/mongo/db/op_observer/op_observer_registry.h @@ -58,6 +58,14 @@ public: _observers.push_back(std::move(observer)); } + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final { + ReservedTimes times{opCtx}; + for (auto& o : _observers) + o->onCreateGlobalIndex(opCtx, globalIndexNss, globalIndexUUID); + }; + void onCreateIndex(OperationContext* const opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/op_observer/user_write_block_mode_op_observer.h b/src/mongo/db/op_observer/user_write_block_mode_op_observer.h index 7f6b929b688..25dc0ec0040 100644 --- a/src/mongo/db/op_observer/user_write_block_mode_op_observer.h +++ b/src/mongo/db/op_observer/user_write_block_mode_op_observer.h @@ -148,6 +148,11 @@ public: // Noop operations (don't perform any check). + // Unchecked because global indexes are created from internal commands. + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + // Index builds committing can be left unchecked since we kill any active index builds before // enabling write blocking. This means any index build which gets to the commit phase while // write blocking is active was started and hit the onStartIndexBuild hook with write blocking diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 2eb121a4e49..950d66d2d79 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -82,6 +82,7 @@ env.Library( '$BUILD_DIR/mongo/db/db_raii', '$BUILD_DIR/mongo/db/dbdirectclient', '$BUILD_DIR/mongo/db/dbhelpers', + '$BUILD_DIR/mongo/db/global_index', '$BUILD_DIR/mongo/db/index_builds_coordinator_interface', '$BUILD_DIR/mongo/db/multitenancy', '$BUILD_DIR/mongo/db/op_observer/op_observer', diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index ea896fc3114..ca128697197 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -65,6 +65,7 @@ #include "mongo/db/dbdirectclient.h" #include "mongo/db/dbhelpers.h" #include "mongo/db/exec/write_stage_common.h" +#include "mongo/db/global_index.h" #include "mongo/db/index/index_access_method.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index_builds_coordinator.h" @@ -1103,6 +1104,12 @@ const StringMap<ApplyOpMetadata> kOpsMap = { {[](OperationContext* opCtx, const OplogEntry& entry, OplogApplication::Mode mode) -> Status { return applyAbortTransaction(opCtx, entry, mode); }}}, + {"createGlobalIndex", + {[](OperationContext* opCtx, const OplogEntry& entry, OplogApplication::Mode mode) -> Status { + const auto& globalIndexUUID = entry.getUuid().get(); + global_index::createContainer(opCtx, globalIndexUUID); + return Status::OK(); + }}}, }; // Writes a change stream pre-image 'preImage' associated with oplog entry 'oplogEntry' and a write diff --git a/src/mongo/db/repl/oplog_entry.cpp b/src/mongo/db/repl/oplog_entry.cpp index 8f2a10e50f1..28c62e4470b 100644 --- a/src/mongo/db/repl/oplog_entry.cpp +++ b/src/mongo/db/repl/oplog_entry.cpp @@ -163,6 +163,8 @@ DurableOplogEntry::CommandType parseCommandType(const BSONObj& objectField) { return DurableOplogEntry::CommandType::kAbortTransaction; } else if (commandString == "importCollection") { return DurableOplogEntry::CommandType::kImportCollection; + } else if (commandString == "createGlobalIndex") { + return DurableOplogEntry::CommandType::kCreateGlobalIndex; } else { uasserted(ErrorCodes::BadValue, str::stream() << "Unknown oplog entry command type: " << commandString diff --git a/src/mongo/db/repl/oplog_entry.h b/src/mongo/db/repl/oplog_entry.h index 72fb37fd478..fce171b36a3 100644 --- a/src/mongo/db/repl/oplog_entry.h +++ b/src/mongo/db/repl/oplog_entry.h @@ -458,6 +458,7 @@ public: kCommitTransaction, kAbortTransaction, kImportCollection, + kCreateGlobalIndex, }; // Get the in-memory size in bytes of a ReplOperation. diff --git a/src/mongo/db/repl/primary_only_service_op_observer.h b/src/mongo/db/repl/primary_only_service_op_observer.h index 40307a88b0b..ae65fb9f0b5 100644 --- a/src/mongo/db/repl/primary_only_service_op_observer.h +++ b/src/mongo/db/repl/primary_only_service_op_observer.h @@ -47,6 +47,10 @@ public: explicit PrimaryOnlyServiceOpObserver(ServiceContext* serviceContext); ~PrimaryOnlyServiceOpObserver(); + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp index f08e4c6ef66..78f742a3039 100644 --- a/src/mongo/db/repl/rollback_impl.cpp +++ b/src/mongo/db/repl/rollback_impl.cpp @@ -431,6 +431,7 @@ StatusWith<std::set<NamespaceString>> RollbackImpl::_namespacesForOp(const Oplog << "' during rollback."; return Status(ErrorCodes::UnrecoverableRollbackError, message); } + case OplogEntry::CommandType::kCreateGlobalIndex: case OplogEntry::CommandType::kCreate: case OplogEntry::CommandType::kDrop: case OplogEntry::CommandType::kImportCollection: diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp index 5d4ffa07b13..81f4725a695 100644 --- a/src/mongo/db/repl/rs_rollback.cpp +++ b/src/mongo/db/repl/rs_rollback.cpp @@ -336,6 +336,8 @@ Status rollback_internal::updateFixUpInfoFromLocalOplogEntry(OperationContext* o BSONElement first = obj.firstElement(); switch (oplogEntry.getCommandType()) { + case OplogEntry::CommandType::kCreateGlobalIndex: + // Drop the collection created for global index. case OplogEntry::CommandType::kCreate: { // Example create collection oplog entry // { diff --git a/src/mongo/db/repl/tenant_migration_donor_op_observer.h b/src/mongo/db/repl/tenant_migration_donor_op_observer.h index 38f20c88966..0d2ea44b20d 100644 --- a/src/mongo/db/repl/tenant_migration_donor_op_observer.h +++ b/src/mongo/db/repl/tenant_migration_donor_op_observer.h @@ -45,6 +45,10 @@ public: TenantMigrationDonorOpObserver() = default; ~TenantMigrationDonorOpObserver() = default; + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/repl/tenant_migration_recipient_op_observer.h b/src/mongo/db/repl/tenant_migration_recipient_op_observer.h index 8c62da9a371..8e60a4eb681 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_op_observer.h +++ b/src/mongo/db/repl/tenant_migration_recipient_op_observer.h @@ -46,6 +46,10 @@ public: TenantMigrationRecipientOpObserver() = default; ~TenantMigrationRecipientOpObserver() = default; + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript index 4d1acba7af6..2d0419ea5da 100644 --- a/src/mongo/db/s/SConscript +++ b/src/mongo/db/s/SConscript @@ -458,6 +458,7 @@ env.Library( 'shardsvr_compact_structured_encryption_data_command.cpp', 'shardsvr_create_collection_command.cpp', 'shardsvr_create_collection_participant_command.cpp', + 'shardsvr_create_global_index_command.cpp', 'shardsvr_drop_collection_command.cpp', 'shardsvr_drop_collection_if_uuid_not_matching_command.cpp', 'shardsvr_drop_collection_participant_command.cpp', @@ -498,6 +499,7 @@ env.Library( '$BUILD_DIR/mongo/db/commands/test_commands_enabled', '$BUILD_DIR/mongo/db/commands/txn_cmd_request', '$BUILD_DIR/mongo/db/fle_crud', + '$BUILD_DIR/mongo/db/global_index', '$BUILD_DIR/mongo/db/index_builds_coordinator_interface', '$BUILD_DIR/mongo/db/internal_transactions_feature_flag', '$BUILD_DIR/mongo/db/multitenancy', diff --git a/src/mongo/db/s/config_server_op_observer.h b/src/mongo/db/s/config_server_op_observer.h index 464f704dcee..49fab4662e8 100644 --- a/src/mongo/db/s/config_server_op_observer.h +++ b/src/mongo/db/s/config_server_op_observer.h @@ -46,6 +46,10 @@ public: ConfigServerOpObserver(); ~ConfigServerOpObserver(); + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/s/range_deleter_service_op_observer.h b/src/mongo/db/s/range_deleter_service_op_observer.h index 299fda66880..2b94c3a94bb 100644 --- a/src/mongo/db/s/range_deleter_service_op_observer.h +++ b/src/mongo/db/s/range_deleter_service_op_observer.h @@ -65,6 +65,10 @@ public: const OplogDeleteEntryArgs& args) override; private: + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/s/resharding/resharding_op_observer.h b/src/mongo/db/s/resharding/resharding_op_observer.h index 4b7e8131c36..5cc42d84d0d 100644 --- a/src/mongo/db/s/resharding/resharding_op_observer.h +++ b/src/mongo/db/s/resharding/resharding_op_observer.h @@ -61,6 +61,10 @@ public: ReshardingOpObserver(); ~ReshardingOpObserver() override; + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/s/shard_server_op_observer.h b/src/mongo/db/s/shard_server_op_observer.h index 1e4733832f4..13a755bbde2 100644 --- a/src/mongo/db/s/shard_server_op_observer.h +++ b/src/mongo/db/s/shard_server_op_observer.h @@ -45,6 +45,10 @@ public: ShardServerOpObserver(); ~ShardServerOpObserver(); + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/db/s/shardsvr_create_global_index_command.cpp b/src/mongo/db/s/shardsvr_create_global_index_command.cpp new file mode 100644 index 00000000000..c955676a169 --- /dev/null +++ b/src/mongo/db/s/shardsvr_create_global_index_command.cpp @@ -0,0 +1,108 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include <string> + +#include "mongo/db/auth/authorization_session.h" +#include "mongo/db/commands.h" +#include "mongo/db/global_index.h" +#include "mongo/db/server_feature_flags_gen.h" +#include "mongo/s/request_types/sharded_ddl_commands_gen.h" + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand + + +namespace mongo { +namespace { + +class ShardsvrCreateGlobalIndexCommand final + : public TypedCommand<ShardsvrCreateGlobalIndexCommand> { +public: + using Request = CreateGlobalIndex; + + AllowedOnSecondary secondaryAllowed(ServiceContext*) const override { + return AllowedOnSecondary::kNever; + } + + std::string help() const override { + return "Internal command that creates the shard-local container for a global index."; + } + + ReadWriteType getReadWriteType() const override { + return ReadWriteType::kWrite; + } + + bool skipApiVersionCheck() const override { + // Internal command (server to server). + return true; + } + + bool adminOnly() const override { + return true; + } + + bool supportsRetryableWrite() const final { + return true; + } + + class Invocation final : public InvocationBase { + public: + using InvocationBase::InvocationBase; + + bool supportsWriteConcern() const final { + return true; + } + + NamespaceString ns() const final { + return NamespaceString(request().getDbName(), ""); + } + + void typedRun(OperationContext* opCtx) { + + uassert(ErrorCodes::CommandNotSupported, + "_shardsvrCreateGlobalIndex command not enabled", + gFeatureFlagGlobalIndexes.isEnabledAndIgnoreFCV()); + + const auto indexUUID = request().getCommandParameter(); + global_index::createContainer(opCtx, indexUUID); + } + + private: + void doCheckAuthorization(OperationContext* opCtx) const override { + uassert(ErrorCodes::Unauthorized, + "Unauthorized", + AuthorizationSession::get(opCtx->getClient()) + ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(), + ActionType::internal)); + } + }; +} shardsvrCreateGlobalIndexCommand; + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer.h b/src/mongo/db/serverless/shard_split_donor_op_observer.h index b2bcd54da7f..171d615430f 100644 --- a/src/mongo/db/serverless/shard_split_donor_op_observer.h +++ b/src/mongo/db/serverless/shard_split_donor_op_observer.h @@ -44,6 +44,10 @@ public: ShardSplitDonorOpObserver() = default; ~ShardSplitDonorOpObserver() = default; + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; + void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, const UUID& uuid, diff --git a/src/mongo/idl/cluster_server_parameter_op_observer.h b/src/mongo/idl/cluster_server_parameter_op_observer.h index 62928b52d53..01e0fc7c9d6 100644 --- a/src/mongo/idl/cluster_server_parameter_op_observer.h +++ b/src/mongo/idl/cluster_server_parameter_op_observer.h @@ -88,6 +88,9 @@ public: public: // Remainder of operations are ignorable. + void onCreateGlobalIndex(OperationContext* opCtx, + const NamespaceString& globalIndexNss, + const UUID& globalIndexUUID) final{}; void onCreateIndex(OperationContext* opCtx, const NamespaceString& nss, diff --git a/src/mongo/s/request_types/sharded_ddl_commands.idl b/src/mongo/s/request_types/sharded_ddl_commands.idl index d01e312697c..fe4030d7884 100644 --- a/src/mongo/s/request_types/sharded_ddl_commands.idl +++ b/src/mongo/s/request_types/sharded_ddl_commands.idl @@ -280,6 +280,16 @@ commands: type: object description: "Id index." + _shardsvrCreateGlobalIndex: + description: "Internal command sent to participant shards to create a global index container with a given UUID." + command_name: _shardsvrCreateGlobalIndex + cpp_name: CreateGlobalIndex + strict: true + type: uuid + namespace: type + api_version: "" + reply_type: OkReply + _shardsvrDropDatabase: description: "Internal command sent to the primary shard of a database to drop it." command_name: _shardsvrDropDatabase |