summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/replsets/buildindexes_false_with_system_indexes.js86
-rw-r--r--src/mongo/db/catalog/database_impl.cpp41
-rw-r--r--src/mongo/db/catalog/rename_collection_test.cpp8
-rw-r--r--src/mongo/db/catalog/uuid_catalog.cpp3
-rw-r--r--src/mongo/db/catalog/uuid_catalog.h3
-rw-r--r--src/mongo/db/commands/dbcheck.cpp3
-rw-r--r--src/mongo/db/commands/mr_test.cpp6
-rw-r--r--src/mongo/db/db.cpp3
-rw-r--r--src/mongo/db/free_mon/free_mon_op_observer.h3
-rw-r--r--src/mongo/db/op_observer.h4
-rw-r--r--src/mongo/db/op_observer_impl.cpp54
-rw-r--r--src/mongo/db/op_observer_impl.h3
-rw-r--r--src/mongo/db/op_observer_noop.h3
-rw-r--r--src/mongo/db/op_observer_registry.h5
-rw-r--r--src/mongo/db/repl/SConscript1
-rw-r--r--src/mongo/db/repl/oplog.cpp9
-rw-r--r--src/mongo/db/repl/oplog.h4
-rw-r--r--src/mongo/db/repl/oplog_test.cpp6
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp11
-rw-r--r--src/mongo/db/repl/sync_tail_test_fixture.cpp3
-rw-r--r--src/mongo/db/repl/sync_tail_test_fixture.h3
-rw-r--r--src/mongo/db/s/config_server_op_observer.h3
-rw-r--r--src/mongo/db/s/session_catalog_migration_destination.cpp3
-rw-r--r--src/mongo/db/s/shard_server_op_observer.h3
-rw-r--r--src/mongo/db/session_test.cpp9
-rw-r--r--src/mongo/db/system_index.cpp30
-rw-r--r--src/mongo/dbtests/storage_timestamp_tests.cpp60
-rw-r--r--src/mongo/shell/replsettest.js8
28 files changed, 312 insertions, 66 deletions
diff --git a/jstests/replsets/buildindexes_false_with_system_indexes.js b/jstests/replsets/buildindexes_false_with_system_indexes.js
new file mode 100644
index 00000000000..96fcc732764
--- /dev/null
+++ b/jstests/replsets/buildindexes_false_with_system_indexes.js
@@ -0,0 +1,86 @@
+/*
+ * Tests that hidden nodes with buildIndexes: false behave correctly when system tables with
+ * default indexes are created.
+ *
+ */
+(function() {
+ 'use strict';
+
+ load("jstests/replsets/rslib.js");
+
+ const testName = "buildindexes_false_with_system_indexes";
+
+ let rst = new ReplSetTest({
+ name: testName,
+ nodes: [
+ {},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {priority: 0, hidden: true, buildIndexes: false}},
+ ],
+ });
+ const nodes = rst.startSet();
+ rst.initiate();
+
+ let primary = rst.getPrimary();
+ assert.eq(primary, nodes[0]);
+ let secondary = nodes[1];
+ const hidden = nodes[2];
+
+ rst.awaitReplication();
+ jsTestLog("Creating a role in the admin database");
+ let adminDb = primary.getDB("admin");
+ adminDb.createRole(
+ {role: 'test_role', roles: [{role: 'readWrite', db: 'test'}], privileges: []});
+ rst.awaitReplication();
+
+ jsTestLog("Creating a user in the admin database");
+ adminDb.createUser({user: 'test_user', pwd: 'test', roles: [{role: 'test_role', db: 'admin'}]});
+ rst.awaitReplication();
+
+ // Make sure the indexes we expect are present on all nodes. The buildIndexes: false node
+ // should have only the _id_ index.
+ let secondaryAdminDb = secondary.getDB("admin");
+ const hiddenAdminDb = hidden.getDB("admin");
+
+ assert.eq(["_id_", "user_1_db_1"], adminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "role_1_db_1"], adminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "user_1_db_1"],
+ secondaryAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "role_1_db_1"],
+ secondaryAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+
+ // Drop the indexes and restart the secondary. The indexes should not be re-created.
+ jsTestLog("Dropping system indexes and restarting secondary.");
+ adminDb.system.users.dropIndex("user_1_db_1");
+ adminDb.system.roles.dropIndex("role_1_db_1");
+ rst.awaitReplication();
+ assert.eq(["_id_"], adminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], adminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], secondaryAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], secondaryAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+
+ secondary = rst.restart(secondary, {}, true /* wait for node to become healthy */);
+ secondaryAdminDb = secondary.getDB("admin");
+ assert.eq(["_id_"], secondaryAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], secondaryAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+
+ jsTestLog("Now restarting primary; indexes should be created.");
+ rst.restart(primary);
+ primary = rst.getPrimary();
+ rst.awaitReplication();
+ adminDb = primary.getDB("admin");
+ assert.eq(["_id_", "user_1_db_1"], adminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "role_1_db_1"], adminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "user_1_db_1"],
+ secondaryAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "role_1_db_1"],
+ secondaryAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+
+ rst.stopSet();
+}());
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp
index 5d4ed394e7c..1ed51dcfe9a 100644
--- a/src/mongo/db/catalog/database_impl.cpp
+++ b/src/mongo/db/catalog/database_impl.cpp
@@ -774,26 +774,38 @@ Collection* DatabaseImpl::createCollection(OperationContext* opCtx,
"request doesn't allow collection to be created implicitly",
OperationShardingState::get(opCtx).allowImplicitCollectionCreation());
+ auto coordinator = repl::ReplicationCoordinator::get(opCtx);
+ bool canAcceptWrites =
+ (coordinator->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) ||
+ coordinator->canAcceptWritesForDatabase(opCtx, nss.db()) || nss.isSystemDotProfile();
+
+
CollectionOptions optionsWithUUID = options;
bool generatedUUID = false;
if (!optionsWithUUID.uuid) {
- auto coordinator = repl::ReplicationCoordinator::get(opCtx);
- bool canGenerateUUID =
- (coordinator->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) ||
- coordinator->canAcceptWritesForDatabase(opCtx, nss.db()) || nss.isSystemDotProfile();
-
- if (!canGenerateUUID) {
+ if (!canAcceptWrites) {
std::string msg = str::stream() << "Attempted to create a new collection " << nss.ns()
<< " without a UUID";
severe() << msg;
uasserted(ErrorCodes::InvalidOptions, msg);
}
- if (canGenerateUUID) {
+ if (canAcceptWrites) {
optionsWithUUID.uuid.emplace(CollectionUUID::gen());
generatedUUID = true;
}
}
+ // Because writing the oplog entry depends on having the full spec for the _id index, which is
+ // not available until the collection is actually created, we can't write the oplog entry until
+ // after we have created the collection. In order to make the storage timestamp for the
+ // collection create always correct even when other operations are present in the same storage
+ // transaction, we reserve an opTime before the collection creation, then pass it to the
+ // opObserver. Reserving the optime automatically sets the storage timestamp.
+ OplogSlot createOplogSlot;
+ if (canAcceptWrites && supportsDocLocking() && !coordinator->isOplogDisabledFor(opCtx, nss)) {
+ createOplogSlot = repl::getNextOpTime(opCtx);
+ }
+
_checkCanCreateCollection(opCtx, nss, optionsWithUUID);
audit::logCreateCollection(&cc(), ns);
@@ -833,16 +845,21 @@ Collection* DatabaseImpl::createCollection(OperationContext* opCtx,
!nss.isReplicated());
}
}
-
- if (nss.isSystem()) {
- createSystemIndexes(opCtx, collection);
- }
}
MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLoggingCreateCollection);
opCtx->getServiceContext()->getOpObserver()->onCreateCollection(
- opCtx, collection, nss, optionsWithUUID, fullIdIndexSpec);
+ opCtx, collection, nss, optionsWithUUID, fullIdIndexSpec, createOplogSlot);
+
+ // It is necessary to create the system index *after* running the onCreateCollection so that
+ // the storage timestamp for the index creation is after the storage timestamp for the
+ // collection creation, and the opTimes for the corresponding oplog entries are the same as the
+ // storage timestamps. This way both primary and any secondaries will see the index created
+ // after the collection is created.
+ if (canAcceptWrites && createIdIndex && nss.isSystem()) {
+ createSystemIndexes(opCtx, collection);
+ }
return collection;
}
diff --git a/src/mongo/db/catalog/rename_collection_test.cpp b/src/mongo/db/catalog/rename_collection_test.cpp
index 300bc94192f..092b1328406 100644
--- a/src/mongo/db/catalog/rename_collection_test.cpp
+++ b/src/mongo/db/catalog/rename_collection_test.cpp
@@ -88,7 +88,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override;
repl::OpTime onDropCollection(OperationContext* opCtx,
const NamespaceString& collectionName,
@@ -165,9 +166,10 @@ void OpObserverMock::onCreateCollection(OperationContext* opCtx,
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) {
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) {
_logOp(opCtx, collectionName, "create");
- OpObserverNoop::onCreateCollection(opCtx, coll, collectionName, options, idIndex);
+ OpObserverNoop::onCreateCollection(opCtx, coll, collectionName, options, idIndex, createOpTime);
}
repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx,
diff --git a/src/mongo/db/catalog/uuid_catalog.cpp b/src/mongo/db/catalog/uuid_catalog.cpp
index 5ea95f14d2a..781c0748fb2 100644
--- a/src/mongo/db/catalog/uuid_catalog.cpp
+++ b/src/mongo/db/catalog/uuid_catalog.cpp
@@ -48,7 +48,8 @@ void UUIDCatalogObserver::onCreateCollection(OperationContext* opCtx,
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) {
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) {
if (!options.uuid)
return;
UUIDCatalog& catalog = UUIDCatalog::get(opCtx);
diff --git a/src/mongo/db/catalog/uuid_catalog.h b/src/mongo/db/catalog/uuid_catalog.h
index cc093b31c06..7b1ace95701 100644
--- a/src/mongo/db/catalog/uuid_catalog.h
+++ b/src/mongo/db/catalog/uuid_catalog.h
@@ -73,7 +73,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override;
void onCollMod(OperationContext* opCtx,
const NamespaceString& nss,
OptionalCollectionUUID uuid,
diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp
index 3380a7c3198..9c4a4f376d5 100644
--- a/src/mongo/db/commands/dbcheck.cpp
+++ b/src/mongo/db/commands/dbcheck.cpp
@@ -478,7 +478,8 @@ private:
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
uow.commit();
return result;
});
diff --git a/src/mongo/db/commands/mr_test.cpp b/src/mongo/db/commands/mr_test.cpp
index 9a640137ece..f8c81c473a5 100644
--- a/src/mongo/db/commands/mr_test.cpp
+++ b/src/mongo/db/commands/mr_test.cpp
@@ -292,7 +292,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override;
// Hook for onInserts. Defaults to a no-op function but may be overridden to inject exceptions
// while mapReduce inserts its results into the temporary output collection.
@@ -315,7 +316,8 @@ void MapReduceOpObserver::onCreateCollection(OperationContext*,
Collection*,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj&) {
+ const BSONObj&,
+ const OplogSlot&) {
if (!options.temp) {
return;
}
diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp
index 996a0123864..d2282433f7e 100644
--- a/src/mongo/db/db.cpp
+++ b/src/mongo/db/db.cpp
@@ -475,6 +475,9 @@ ExitCode _initAndListen(int listenPort) {
log() << redact(status);
if (status == ErrorCodes::AuthSchemaIncompatible) {
exitCleanly(EXIT_NEED_UPGRADE);
+ } else if (status == ErrorCodes::NotMaster) {
+ // Try creating the indexes if we become master. If we do not become master,
+ // the master will create the indexes and we will replicate them.
} else {
quickExit(EXIT_FAILURE);
}
diff --git a/src/mongo/db/free_mon/free_mon_op_observer.h b/src/mongo/db/free_mon/free_mon_op_observer.h
index 05d16a21e4a..02578a32a1f 100644
--- a/src/mongo/db/free_mon/free_mon_op_observer.h
+++ b/src/mongo/db/free_mon/free_mon_op_observer.h
@@ -80,7 +80,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) final {}
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) final {}
void onCollMod(OperationContext* opCtx,
const NamespaceString& nss,
diff --git a/src/mongo/db/op_observer.h b/src/mongo/db/op_observer.h
index 38027587930..e042cc3dfc5 100644
--- a/src/mongo/db/op_observer.h
+++ b/src/mongo/db/op_observer.h
@@ -42,6 +42,7 @@ namespace mongo {
struct InsertStatement;
class OperationContext;
+struct OplogSlot;
namespace repl {
class OpTime;
@@ -148,7 +149,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) = 0;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) = 0;
/**
* This function logs an oplog entry when a 'collMod' command on a collection is executed.
* Since 'collMod' commands can take a variety of different formats, the 'o' field of the
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp
index 7305ff49267..06bf33a1fe6 100644
--- a/src/mongo/db/op_observer_impl.cpp
+++ b/src/mongo/db/op_observer_impl.cpp
@@ -75,7 +75,8 @@ repl::OpTime logOperation(OperationContext* opCtx,
const OperationSessionInfo& sessionInfo,
StmtId stmtId,
const repl::OplogLink& oplogLink,
- bool prepare) {
+ bool prepare,
+ const OplogSlot& oplogSlot) {
auto& times = OpObserver::Times::get(opCtx).reservedOpTimes;
auto opTime = repl::logOp(opCtx,
opstr,
@@ -88,7 +89,8 @@ repl::OpTime logOperation(OperationContext* opCtx,
sessionInfo,
stmtId,
oplogLink,
- prepare);
+ prepare,
+ oplogSlot);
times.push_back(opTime);
return opTime;
@@ -198,7 +200,8 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
sessionInfo,
args.stmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
opTimes.prePostImageOpTime = noteUpdateOpTime;
@@ -220,7 +223,8 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
sessionInfo,
args.stmtId,
oplogLink,
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
return opTimes;
}
@@ -259,7 +263,8 @@ OpTimeBundle replLogDelete(OperationContext* opCtx,
sessionInfo,
stmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
opTimes.prePostImageOpTime = noteOplog;
oplogLink.preImageOpTime = noteOplog;
}
@@ -276,7 +281,8 @@ OpTimeBundle replLogDelete(OperationContext* opCtx,
sessionInfo,
stmtId,
oplogLink,
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
return opTimes;
}
@@ -303,7 +309,8 @@ OpTimeBundle replLogApplyOps(OperationContext* opCtx,
sessionInfo,
stmtId,
oplogLink,
- prepare);
+ prepare,
+ OplogSlot());
return times;
}
@@ -336,7 +343,8 @@ void OpObserverImpl::onCreateIndex(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
} else {
logOperation(opCtx,
"i",
@@ -349,7 +357,8 @@ void OpObserverImpl::onCreateIndex(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
}
AuthorizationManager::get(opCtx->getServiceContext())
@@ -574,14 +583,16 @@ void OpObserverImpl::onInternalOpMessage(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
}
void OpObserverImpl::onCreateCollection(OperationContext* opCtx,
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) {
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) {
const auto cmdNss = collectionName.getCommandNS();
BSONObjBuilder b;
@@ -618,7 +629,8 @@ void OpObserverImpl::onCreateCollection(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ createOpTime);
}
AuthorizationManager::get(opCtx->getServiceContext())
@@ -665,7 +677,8 @@ void OpObserverImpl::onCollMod(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
}
AuthorizationManager::get(opCtx->getServiceContext())
@@ -702,7 +715,8 @@ void OpObserverImpl::onDropDatabase(OperationContext* opCtx, const std::string&
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
uassert(
50714, "dropping the admin database is not allowed.", dbName != NamespaceString::kAdminDb);
@@ -736,7 +750,8 @@ repl::OpTime OpObserverImpl::onDropCollection(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
}
uassert(50715,
@@ -777,7 +792,8 @@ void OpObserverImpl::onDropIndex(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
AuthorizationManager::get(opCtx->getServiceContext())
->logOp(opCtx, "c", cmdNss, cmdObj, &indexInfo);
@@ -813,7 +829,8 @@ repl::OpTime OpObserverImpl::preRenameCollection(OperationContext* const opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
return {};
}
@@ -894,7 +911,8 @@ void OpObserverImpl::onEmptyCapped(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
}
AuthorizationManager::get(opCtx->getServiceContext())
diff --git a/src/mongo/db/op_observer_impl.h b/src/mongo/db/op_observer_impl.h
index fb67efbe104..67a645332a4 100644
--- a/src/mongo/db/op_observer_impl.h
+++ b/src/mongo/db/op_observer_impl.h
@@ -69,7 +69,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override;
void onCollMod(OperationContext* opCtx,
const NamespaceString& nss,
OptionalCollectionUUID uuid,
diff --git a/src/mongo/db/op_observer_noop.h b/src/mongo/db/op_observer_noop.h
index 5d74fa914af..9e1a496660a 100644
--- a/src/mongo/db/op_observer_noop.h
+++ b/src/mongo/db/op_observer_noop.h
@@ -64,7 +64,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override {}
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override {}
void onCollMod(OperationContext* opCtx,
const NamespaceString& nss,
OptionalCollectionUUID uuid,
diff --git a/src/mongo/db/op_observer_registry.h b/src/mongo/db/op_observer_registry.h
index 2ab2b971219..2d322329c99 100644
--- a/src/mongo/db/op_observer_registry.h
+++ b/src/mongo/db/op_observer_registry.h
@@ -116,10 +116,11 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override {
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override {
ReservedTimes times{opCtx};
for (auto& o : _observers)
- o->onCreateCollection(opCtx, coll, collectionName, options, idIndex);
+ o->onCreateCollection(opCtx, coll, collectionName, options, idIndex, createOpTime);
}
void onCollMod(OperationContext* const opCtx,
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index a1b502e935a..e9a4998db59 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -1576,6 +1576,7 @@ env.Library(
'$BUILD_DIR/mongo/db/s/sharding_runtime_d',
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/db/stats/counters',
+ '$BUILD_DIR/mongo/db/system_index',
'$BUILD_DIR/mongo/rpc/client_metadata',
'$BUILD_DIR/mongo/util/fail_point',
'bgsync',
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index a0d13182694..42f471bedd4 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -434,7 +434,8 @@ OpTime logOp(OperationContext* opCtx,
const OperationSessionInfo& sessionInfo,
StmtId statementId,
const OplogLink& oplogLink,
- bool prepare) {
+ bool prepare,
+ const OplogSlot& oplogSlot) {
auto replCoord = ReplicationCoordinator::get(opCtx);
// For commands, the test below is on the command ns and therefore does not check for
// specific namespaces such as system.profile. This is the caller's responsibility.
@@ -453,7 +454,11 @@ OpTime logOp(OperationContext* opCtx,
OplogSlot slot;
WriteUnitOfWork wuow(opCtx);
- _getNextOpTimes(opCtx, oplog, 1, &slot);
+ if (oplogSlot.opTime.isNull()) {
+ _getNextOpTimes(opCtx, oplog, 1, &slot);
+ } else {
+ slot = oplogSlot;
+ }
auto writer = _logOpWriter(opCtx,
opstr,
diff --git a/src/mongo/db/repl/oplog.h b/src/mongo/db/repl/oplog.h
index b4d34ca0ef6..ef96a304626 100644
--- a/src/mongo/db/repl/oplog.h
+++ b/src/mongo/db/repl/oplog.h
@@ -127,6 +127,7 @@ std::vector<OpTime> logInsertOps(OperationContext* opCtx,
* linked via prevTs, and the timestamps of the oplog entry that contains the document
* before/after update was applied. The timestamps are ignored if isNull() is true.
* prepare this specifies if the oplog entry should be put into a 'prepare' state.
+ * oplogSlot If non-null, use this reserved oplog slot instead of a new one.
*
* Returns the optime of the oplog entry written to the oplog.
* Returns a null optime if oplog was not modified.
@@ -142,7 +143,8 @@ OpTime logOp(OperationContext* opCtx,
const OperationSessionInfo& sessionInfo,
StmtId stmtId,
const OplogLink& oplogLink,
- bool prepare);
+ bool prepare,
+ const OplogSlot& oplogSlot);
// Flush out the cached pointer to the oplog.
// Used by the closeDatabase command to ensure we don't cache closed things.
diff --git a/src/mongo/db/repl/oplog_test.cpp b/src/mongo/db/repl/oplog_test.cpp
index 93738bb47ed..4f0de89c5f8 100644
--- a/src/mongo/db/repl/oplog_test.cpp
+++ b/src/mongo/db/repl/oplog_test.cpp
@@ -111,7 +111,8 @@ TEST_F(OplogTest, LogOpReturnsOpTimeOnSuccessfulInsertIntoOplogCollection) {
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
ASSERT_FALSE(opTime.isNull());
wunit.commit();
}
@@ -234,7 +235,8 @@ OpTime _logOpNoopWithMsg(OperationContext* opCtx,
{},
kUninitializedStmtId,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
ASSERT_FALSE(opTime.isNull());
ASSERT(opTimeNssMap->find(opTime) == opTimeNssMap->end())
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 139d23183ab..dfd54fb93f3 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -79,6 +79,7 @@
#include "mongo/db/service_context.h"
#include "mongo/db/session_catalog.h"
#include "mongo/db/storage/storage_engine.h"
+#include "mongo/db/system_index.h"
#include "mongo/executor/network_connection_hook.h"
#include "mongo/executor/network_interface.h"
#include "mongo/executor/network_interface_factory.h"
@@ -519,6 +520,16 @@ OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationC
_shardingOnTransitionToPrimaryHook(opCtx);
_dropAllTempCollections(opCtx);
+ // It is only necessary to check the system indexes on the first transition to master.
+ // On subsequent transitions to master the indexes will have already been created.
+ static std::once_flag verifySystemIndexesOnce;
+ std::call_once(verifySystemIndexesOnce, [opCtx] {
+ const auto globalAuthzManager = AuthorizationManager::get(opCtx->getServiceContext());
+ if (globalAuthzManager->shouldValidateAuthSchemaOnStartup()) {
+ fassert(65536, verifySystemIndexes(opCtx));
+ }
+ });
+
serverGlobalParams.validateFeaturesAsMaster.store(true);
return opTimeToReturn;
diff --git a/src/mongo/db/repl/sync_tail_test_fixture.cpp b/src/mongo/db/repl/sync_tail_test_fixture.cpp
index dc1ed3dec95..3929aa5239d 100644
--- a/src/mongo/db/repl/sync_tail_test_fixture.cpp
+++ b/src/mongo/db/repl/sync_tail_test_fixture.cpp
@@ -78,7 +78,8 @@ void SyncTailOpObserver::onCreateCollection(OperationContext* opCtx,
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) {
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) {
if (!onCreateCollectionFn) {
return;
}
diff --git a/src/mongo/db/repl/sync_tail_test_fixture.h b/src/mongo/db/repl/sync_tail_test_fixture.h
index cbbe56b94b5..4f483d26eb6 100644
--- a/src/mongo/db/repl/sync_tail_test_fixture.h
+++ b/src/mongo/db/repl/sync_tail_test_fixture.h
@@ -74,7 +74,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override;
// Hooks for OpObserver functions. Defaults to a no-op function but may be overridden to check
// actual documents mutated.
diff --git a/src/mongo/db/s/config_server_op_observer.h b/src/mongo/db/s/config_server_op_observer.h
index 91870bf4732..234d6b2ad60 100644
--- a/src/mongo/db/s/config_server_op_observer.h
+++ b/src/mongo/db/s/config_server_op_observer.h
@@ -80,7 +80,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override {}
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override {}
void onCollMod(OperationContext* opCtx,
const NamespaceString& nss,
diff --git a/src/mongo/db/s/session_catalog_migration_destination.cpp b/src/mongo/db/s/session_catalog_migration_destination.cpp
index 0c318ba7605..68c62abbce2 100644
--- a/src/mongo/db/s/session_catalog_migration_destination.cpp
+++ b/src/mongo/db/s/session_catalog_migration_destination.cpp
@@ -282,7 +282,8 @@ ProcessOplogResult processSessionOplog(OperationContext* opCtx,
sessionInfo,
stmtId,
oplogLink,
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
auto oplogOpTime = result.oplogTime;
uassert(40633,
diff --git a/src/mongo/db/s/shard_server_op_observer.h b/src/mongo/db/s/shard_server_op_observer.h
index 413373446bd..05b87bdb238 100644
--- a/src/mongo/db/s/shard_server_op_observer.h
+++ b/src/mongo/db/s/shard_server_op_observer.h
@@ -81,7 +81,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override {}
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override {}
void onCollMod(OperationContext* opCtx,
const NamespaceString& nss,
diff --git a/src/mongo/db/session_test.cpp b/src/mongo/db/session_test.cpp
index 248da45d091..5052e0150bd 100644
--- a/src/mongo/db/session_test.cpp
+++ b/src/mongo/db/session_test.cpp
@@ -159,7 +159,8 @@ protected:
osi,
stmtId,
link,
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
}
void bumpTxnNumberFromDifferentOpCtx(Session* session, TxnNumber newTxnNum) {
@@ -554,7 +555,8 @@ TEST_F(SessionTest, ErrorOnlyWhenStmtIdBeingCheckedIsNotInCache) {
osi,
1,
{},
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
session.onWriteOpCompletedOnPrimary(opCtx(), txnNum, {1}, opTime, wallClockTime);
wuow.commit();
@@ -581,7 +583,8 @@ TEST_F(SessionTest, ErrorOnlyWhenStmtIdBeingCheckedIsNotInCache) {
osi,
kIncompleteHistoryStmtId,
link,
- false /* prepare */);
+ false /* prepare */,
+ OplogSlot());
session.onWriteOpCompletedOnPrimary(
opCtx(), txnNum, {kIncompleteHistoryStmtId}, opTime, wallClockTime);
diff --git a/src/mongo/db/system_index.cpp b/src/mongo/db/system_index.cpp
index f1e2992ec70..d662a718e10 100644
--- a/src/mongo/db/system_index.cpp
+++ b/src/mongo/db/system_index.cpp
@@ -105,6 +105,15 @@ void generateSystemIndexForExistingCollection(OperationContext* opCtx,
return;
}
+ // Do not try to generate any system indexes on a secondary.
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ uassert(ErrorCodes::NotMaster,
+ "Not primary while creating authorization index",
+ replCoord->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet ||
+ replCoord->canAcceptWritesForDatabase(opCtx, ns.db()));
+
+ invariant(!opCtx->lockState()->inAWriteUnitOfWork());
+
try {
auto indexSpecStatus = index_key_validate::validateIndexSpec(
opCtx, spec.toBSON(), ns, serverGlobalParams.featureCompatibility);
@@ -124,7 +133,10 @@ void generateSystemIndexForExistingCollection(OperationContext* opCtx,
writeConflictRetry(opCtx, "authorization index regeneration", ns.ns(), [&] {
WriteUnitOfWork wunit(opCtx);
- indexer.commit();
+ indexer.commit([opCtx, &ns, collection](const BSONObj& spec) {
+ opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
+ opCtx, ns, collection->uuid(), spec, false /* fromMigrate */);
+ });
wunit.commit();
});
@@ -203,25 +215,29 @@ Status verifySystemIndexes(OperationContext* opCtx) {
void createSystemIndexes(OperationContext* opCtx, Collection* collection) {
invariant(collection);
const NamespaceString& ns = collection->ns();
+ BSONObj indexSpec;
if (ns == AuthorizationManager::usersCollectionNamespace) {
- auto indexSpec =
+ indexSpec =
fassert(40455,
index_key_validate::validateIndexSpec(opCtx,
v3SystemUsersIndexSpec.toBSON(),
ns,
serverGlobalParams.featureCompatibility));
- fassert(40456,
- collection->getIndexCatalog()->createIndexOnEmptyCollection(opCtx, indexSpec));
} else if (ns == AuthorizationManager::rolesCollectionNamespace) {
- auto indexSpec =
+ indexSpec =
fassert(40457,
index_key_validate::validateIndexSpec(opCtx,
v3SystemRolesIndexSpec.toBSON(),
ns,
serverGlobalParams.featureCompatibility));
-
- fassert(40458,
+ }
+ if (!indexSpec.isEmpty()) {
+ opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
+ opCtx, ns, collection->uuid(), indexSpec, false /* fromMigrate */);
+ // Note that the opObserver is called prior to creating the index. This ensures the index
+ // write gets the same storage timestamp as the oplog entry.
+ fassert(40456,
collection->getIndexCatalog()->createIndexOnEmptyCollection(opCtx, indexSpec));
}
}
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
index 7d558c814a9..0a26e2d3002 100644
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ b/src/mongo/dbtests/storage_timestamp_tests.cpp
@@ -2260,6 +2260,65 @@ public:
}
};
+class CreateCollectionWithSystemIndex : public StorageTimestampTest {
+public:
+ void run() {
+ // Only run on 'wiredTiger'. No other storage engines to-date support timestamp writes.
+ if (mongo::storageGlobalParams.engine != "wiredTiger") {
+ return;
+ }
+
+ const LogicalTime indexCreateLt = futureLt.addTicks(1);
+ const Timestamp indexCreateTs = indexCreateLt.asTimestamp();
+
+ NamespaceString nss("admin.system.users");
+
+ { ASSERT_FALSE(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
+
+ ASSERT_OK(createCollection(_opCtx, nss.db().toString(), BSON("create" << nss.coll())));
+
+ { ASSERT(AutoGetCollectionForReadCommand(_opCtx, nss).getCollection()); }
+
+ BSONObj result = queryOplog(BSON("op"
+ << "c"
+ << "ns"
+ << nss.getCommandNS().ns()
+ << "o.create"
+ << nss.coll()));
+ repl::OplogEntry op(result);
+ // The logOp() call for createCollection should have timestamp 'futureTs', which will also
+ // be the timestamp at which we do the write which creates the collection. Thus we expect
+ // the collection to appear at 'futureTs' and not before.
+ ASSERT_EQ(op.getTimestamp(), futureTs) << op.toBSON();
+
+ assertNamespaceInIdents(nss, pastTs, false);
+ assertNamespaceInIdents(nss, presentTs, false);
+ assertNamespaceInIdents(nss, futureTs, true);
+ assertNamespaceInIdents(nss, indexCreateTs, true);
+ assertNamespaceInIdents(nss, nullTs, true);
+
+ result = queryOplog(BSON("op"
+ << "c"
+ << "ns"
+ << nss.getCommandNS().ns()
+ << "o.createIndexes"
+ << nss.coll()));
+ repl::OplogEntry indexOp(result);
+ ASSERT_EQ(indexOp.getObject()["name"].str(), "user_1_db_1");
+ ASSERT_GT(indexOp.getTimestamp(), futureTs) << op.toBSON();
+ AutoGetCollection autoColl(_opCtx, nss, LockMode::MODE_IS, LockMode::MODE_IS);
+ auto kvStorageEngine =
+ dynamic_cast<KVStorageEngine*>(_opCtx->getServiceContext()->getStorageEngine());
+ KVCatalog* kvCatalog = kvStorageEngine->getCatalog();
+ auto indexIdent = kvCatalog->getIndexIdent(_opCtx, nss.ns(), "user_1_db_1");
+ assertIdentsMissingAtTimestamp(kvCatalog, "", indexIdent, pastTs);
+ assertIdentsMissingAtTimestamp(kvCatalog, "", indexIdent, presentTs);
+ assertIdentsMissingAtTimestamp(kvCatalog, "", indexIdent, futureTs);
+ assertIdentsExistAtTimestamp(kvCatalog, "", indexIdent, indexCreateTs);
+ assertIdentsExistAtTimestamp(kvCatalog, "", indexIdent, nullTs);
+ }
+};
+
class AllStorageTimestampTests : public unittest::Suite {
public:
AllStorageTimestampTests() : unittest::Suite("StorageTimestampTests") {}
@@ -2304,6 +2363,7 @@ public:
add<TimestampIndexBuilderOnPrimary<true>>();
add<SecondaryReadsDuringBatchApplicationAreAllowed>();
add<ViewCreationSeparateTransaction>();
+ add<CreateCollectionWithSystemIndex>();
}
};
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index 74c677671bc..94e5e8b18b3 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -1816,6 +1816,7 @@ var ReplSetTest = function(opts) {
// '_master' must have been populated.
var primary = rst._master;
var combinedDBs = new Set(primary.getDBNames());
+ const replSetConfig = rst.getReplSetConfigFromNode();
slaves.forEach(secondary => {
secondary.getDBNames().forEach(dbName => combinedDBs.add(dbName));
@@ -1915,8 +1916,10 @@ var ReplSetTest = function(opts) {
// Check that the following collection stats are the same across replica set
// members:
// capped
- // nindexes
+ // nindexes, except on nodes with buildIndexes: false
// ns
+ const hasSecondaryIndexes =
+ replSetConfig.members[rst.getNodeId(secondary)].buildIndexes !== false;
primaryCollections.forEach(collName => {
var primaryCollStats =
primary.getDB(dbName).runCommand({collStats: collName});
@@ -1928,7 +1931,8 @@ var ReplSetTest = function(opts) {
printCollectionInfo('secondary', secondary, dbName, collName);
success = false;
} else if (primaryCollStats.capped !== secondaryCollStats.capped ||
- primaryCollStats.nindexes !== secondaryCollStats.nindexes ||
+ (hasSecondaryIndexes &&
+ primaryCollStats.nindexes !== secondaryCollStats.nindexes) ||
primaryCollStats.ns !== secondaryCollStats.ns) {
print(msgPrefix +
', the primary and secondary have different stats for the ' +