summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Russotto <matthew.russotto@10gen.com>2018-07-03 17:23:47 -0400
committerMatthew Russotto <matthew.russotto@10gen.com>2018-07-23 15:24:13 -0400
commitdeb1161a6c3feb0497c9af4c7b72b01c6d4bdb97 (patch)
tree9ab730a53b8cab924cd173d476eea37c10c2965f
parent5e1b2d0af5b73d49fd51a1b11cc5e970a1abe773 (diff)
downloadmongo-deb1161a6c3feb0497c9af4c7b72b01c6d4bdb97.tar.gz
SERVER-34414 Create system indexes using the normal index creation and replication process.
Do not create them directly on secondaries. Do create oplog entries for index creation. (cherry picked from commit cfa75c933e125b41596c80b9a1297bb12d318ac3)
-rw-r--r--jstests/replsets/buildindexes_false_with_system_indexes.js86
-rw-r--r--jstests/replsets/oplog_note_cmd.js5
-rw-r--r--src/mongo/db/catalog/database_impl.cpp41
-rw-r--r--src/mongo/db/catalog/rename_collection_test.cpp8
-rw-r--r--src/mongo/db/commands/dbcheck.cpp3
-rw-r--r--src/mongo/db/db.cpp3
-rw-r--r--src/mongo/db/op_observer.h4
-rw-r--r--src/mongo/db/op_observer_impl.cpp48
-rw-r--r--src/mongo/db/op_observer_impl.h3
-rw-r--r--src/mongo/db/op_observer_noop.cpp3
-rw-r--r--src/mongo/db/op_observer_noop.h3
-rw-r--r--src/mongo/db/repl/SConscript1
-rw-r--r--src/mongo/db/repl/oplog.cpp9
-rw-r--r--src/mongo/db/repl/oplog.h4
-rw-r--r--src/mongo/db/repl/oplog_test.cpp17
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp13
-rw-r--r--src/mongo/db/s/session_catalog_migration_destination.cpp3
-rw-r--r--src/mongo/db/session_test.cpp9
-rw-r--r--src/mongo/db/system_index.cpp27
-rw-r--r--src/mongo/shell/replsettest.js10
20 files changed, 244 insertions, 56 deletions
diff --git a/jstests/replsets/buildindexes_false_with_system_indexes.js b/jstests/replsets/buildindexes_false_with_system_indexes.js
new file mode 100644
index 00000000000..96fcc732764
--- /dev/null
+++ b/jstests/replsets/buildindexes_false_with_system_indexes.js
@@ -0,0 +1,86 @@
+/*
+ * Tests that hidden nodes with buildIndexes: false behave correctly when system tables with
+ * default indexes are created.
+ *
+ */
+(function() {
+ 'use strict';
+
+ load("jstests/replsets/rslib.js");
+
+ const testName = "buildindexes_false_with_system_indexes";
+
+ let rst = new ReplSetTest({
+ name: testName,
+ nodes: [
+ {},
+ {rsConfig: {priority: 0}},
+ {rsConfig: {priority: 0, hidden: true, buildIndexes: false}},
+ ],
+ });
+ const nodes = rst.startSet();
+ rst.initiate();
+
+ let primary = rst.getPrimary();
+ assert.eq(primary, nodes[0]);
+ let secondary = nodes[1];
+ const hidden = nodes[2];
+
+ rst.awaitReplication();
+ jsTestLog("Creating a role in the admin database");
+ let adminDb = primary.getDB("admin");
+ adminDb.createRole(
+ {role: 'test_role', roles: [{role: 'readWrite', db: 'test'}], privileges: []});
+ rst.awaitReplication();
+
+ jsTestLog("Creating a user in the admin database");
+ adminDb.createUser({user: 'test_user', pwd: 'test', roles: [{role: 'test_role', db: 'admin'}]});
+ rst.awaitReplication();
+
+ // Make sure the indexes we expect are present on all nodes. The buildIndexes: false node
+ // should have only the _id_ index.
+ let secondaryAdminDb = secondary.getDB("admin");
+ const hiddenAdminDb = hidden.getDB("admin");
+
+ assert.eq(["_id_", "user_1_db_1"], adminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "role_1_db_1"], adminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "user_1_db_1"],
+ secondaryAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "role_1_db_1"],
+ secondaryAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+
+ // Drop the indexes and restart the secondary. The indexes should not be re-created.
+ jsTestLog("Dropping system indexes and restarting secondary.");
+ adminDb.system.users.dropIndex("user_1_db_1");
+ adminDb.system.roles.dropIndex("role_1_db_1");
+ rst.awaitReplication();
+ assert.eq(["_id_"], adminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], adminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], secondaryAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], secondaryAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+
+ secondary = rst.restart(secondary, {}, true /* wait for node to become healthy */);
+ secondaryAdminDb = secondary.getDB("admin");
+ assert.eq(["_id_"], secondaryAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], secondaryAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+
+ jsTestLog("Now restarting primary; indexes should be created.");
+ rst.restart(primary);
+ primary = rst.getPrimary();
+ rst.awaitReplication();
+ adminDb = primary.getDB("admin");
+ assert.eq(["_id_", "user_1_db_1"], adminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "role_1_db_1"], adminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "user_1_db_1"],
+ secondaryAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_", "role_1_db_1"],
+ secondaryAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.users.getIndexes().map(x => x.name).sort());
+ assert.eq(["_id_"], hiddenAdminDb.system.roles.getIndexes().map(x => x.name).sort());
+
+ rst.stopSet();
+}());
diff --git a/jstests/replsets/oplog_note_cmd.js b/jstests/replsets/oplog_note_cmd.js
index 0c92609535a..77757b457c2 100644
--- a/jstests/replsets/oplog_note_cmd.js
+++ b/jstests/replsets/oplog_note_cmd.js
@@ -13,9 +13,10 @@ var statusBefore = db.runCommand({replSetGetStatus: 1});
assert.commandWorked(db.runCommand({appendOplogNote: 1, data: {a: 1}}));
var statusAfter = db.runCommand({replSetGetStatus: 1});
if (rs.getReplSetConfigFromNode().protocolVersion != 1) {
- assert.lt(statusBefore.members[0].optime, statusAfter.members[0].optime);
+ assert.lt(bsonWoCompare(statusBefore.members[0].optime, statusAfter.members[0].optime), 0);
} else {
- assert.lt(statusBefore.members[0].optime.ts, statusAfter.members[0].optime.ts);
+ assert.lt(bsonWoCompare(statusBefore.members[0].optime.ts, statusAfter.members[0].optime.ts),
+ 0);
}
// Make sure note written successfully
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp
index d9d123fa167..0d73e2e50fe 100644
--- a/src/mongo/db/catalog/database_impl.cpp
+++ b/src/mongo/db/catalog/database_impl.cpp
@@ -776,29 +776,41 @@ Collection* DatabaseImpl::createCollection(OperationContext* opCtx,
"request doesn't allow collection to be created implicitly",
OperationShardingState::get(opCtx).allowImplicitCollectionCreation());
+ auto coordinator = repl::ReplicationCoordinator::get(opCtx);
+ bool canAcceptWrites =
+ (coordinator->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) ||
+ coordinator->canAcceptWritesForDatabase(opCtx, nss.db()) || nss.isSystemDotProfile();
+
+
CollectionOptions optionsWithUUID = options;
bool generatedUUID = false;
if (enableCollectionUUIDs && !optionsWithUUID.uuid &&
serverGlobalParams.featureCompatibility.isSchemaVersion36()) {
- auto coordinator = repl::ReplicationCoordinator::get(opCtx);
bool fullyUpgraded = serverGlobalParams.featureCompatibility.getVersion() ==
ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36;
- bool canGenerateUUID =
- (coordinator->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet) ||
- coordinator->canAcceptWritesForDatabase(opCtx, nss.db()) || nss.isSystemDotProfile();
-
- if (fullyUpgraded && !canGenerateUUID) {
+ if (fullyUpgraded && !canAcceptWrites) {
std::string msg = str::stream() << "Attempted to create a new collection " << nss.ns()
<< " without a UUID";
severe() << msg;
uasserted(ErrorCodes::InvalidOptions, msg);
}
- if (canGenerateUUID) {
+ if (canAcceptWrites) {
optionsWithUUID.uuid.emplace(CollectionUUID::gen());
generatedUUID = true;
}
}
+ // Because writing the oplog entry depends on having the full spec for the _id index, which is
+ // not available until the collection is actually created, we can't write the oplog entry until
+ // after we have created the collection. In order to make the storage timestamp for the
+ // collection create always correct even when other operations are present in the same storage
+ // transaction, we reserve an opTime before the collection creation, then pass it to the
+ // opObserver. Reserving the optime automatically sets the storage timestamp.
+ OplogSlot createOplogSlot;
+ if (canAcceptWrites && supportsDocLocking() && !coordinator->isOplogDisabledFor(opCtx, nss)) {
+ createOplogSlot = repl::getNextOpTime(opCtx);
+ }
+
_checkCanCreateCollection(opCtx, nss, optionsWithUUID);
audit::logCreateCollection(&cc(), ns);
@@ -833,16 +845,21 @@ Collection* DatabaseImpl::createCollection(OperationContext* opCtx,
: ic->getDefaultIdIndexSpec(featureCompatibilityVersion)));
}
}
-
- if (nss.isSystem()) {
- createSystemIndexes(opCtx, collection);
- }
}
MONGO_FAIL_POINT_PAUSE_WHILE_SET(hangBeforeLoggingCreateCollection);
opCtx->getServiceContext()->getOpObserver()->onCreateCollection(
- opCtx, collection, nss, optionsWithUUID, fullIdIndexSpec);
+ opCtx, collection, nss, optionsWithUUID, fullIdIndexSpec, createOplogSlot);
+
+ // It is necessary to create the system index *after* running the onCreateCollection so that
+ // the storage timestamp for the index creation is after the storage timestamp for the
+ // collection creation, and the opTimes for the corresponding oplog entries are the same as the
+ // storage timestamps. This way both primary and any secondaries will see the index created
+ // after the collection is created.
+ if (canAcceptWrites && createIdIndex && nss.isSystem()) {
+ createSystemIndexes(opCtx, collection);
+ }
return collection;
}
diff --git a/src/mongo/db/catalog/rename_collection_test.cpp b/src/mongo/db/catalog/rename_collection_test.cpp
index d516045d1fa..276ec9355e1 100644
--- a/src/mongo/db/catalog/rename_collection_test.cpp
+++ b/src/mongo/db/catalog/rename_collection_test.cpp
@@ -87,7 +87,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override;
repl::OpTime onDropCollection(OperationContext* opCtx,
const NamespaceString& collectionName,
@@ -153,9 +154,10 @@ void OpObserverMock::onCreateCollection(OperationContext* opCtx,
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) {
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) {
_logOp(opCtx, collectionName, "create");
- OpObserverNoop::onCreateCollection(opCtx, coll, collectionName, options, idIndex);
+ OpObserverNoop::onCreateCollection(opCtx, coll, collectionName, options, idIndex, createOpTime);
}
repl::OpTime OpObserverMock::onDropCollection(OperationContext* opCtx,
diff --git a/src/mongo/db/commands/dbcheck.cpp b/src/mongo/db/commands/dbcheck.cpp
index 9adf39f6b5e..a89815a5583 100644
--- a/src/mongo/db/commands/dbcheck.cpp
+++ b/src/mongo/db/commands/dbcheck.cpp
@@ -475,7 +475,8 @@ private:
wallClockTime,
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
uow.commit();
return result;
});
diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp
index ab70e7ee56b..8e9e21573e2 100644
--- a/src/mongo/db/db.cpp
+++ b/src/mongo/db/db.cpp
@@ -879,6 +879,9 @@ ExitCode _initAndListen(int listenPort) {
log() << redact(status);
if (status == ErrorCodes::AuthSchemaIncompatible) {
exitCleanly(EXIT_NEED_UPGRADE);
+ } else if (status == ErrorCodes::NotMaster) {
+ // Try creating the indexes if we become master. If we do not become master,
+ // the master will create the indexes and we will replicate them.
} else {
quickExit(EXIT_FAILURE);
}
diff --git a/src/mongo/db/op_observer.h b/src/mongo/db/op_observer.h
index 53c2a79f0e5..6fb5738e6a0 100644
--- a/src/mongo/db/op_observer.h
+++ b/src/mongo/db/op_observer.h
@@ -41,6 +41,7 @@ struct CollectionOptions;
struct InsertStatement;
class NamespaceString;
class OperationContext;
+struct OplogSlot;
namespace repl {
class OpTime;
@@ -144,7 +145,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) = 0;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) = 0;
/**
* This function logs an oplog entry when a 'collMod' command on a collection is executed.
* Since 'collMod' commands can take a variety of different formats, the 'o' field of the
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp
index 66445bb839a..84e59cffec8 100644
--- a/src/mongo/db/op_observer_impl.cpp
+++ b/src/mongo/db/op_observer_impl.cpp
@@ -169,7 +169,8 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
opTimes.wallClockTime,
sessionInfo,
args.stmtId,
- {});
+ {},
+ OplogSlot());
opTimes.prePostImageOpTime = noteUpdateOpTime;
@@ -190,7 +191,8 @@ OpTimeBundle replLogUpdate(OperationContext* opCtx,
opTimes.wallClockTime,
sessionInfo,
args.stmtId,
- oplogLink);
+ oplogLink,
+ OplogSlot());
return opTimes;
}
@@ -229,7 +231,8 @@ OpTimeBundle replLogDelete(OperationContext* opCtx,
opTimes.wallClockTime,
sessionInfo,
stmtId,
- {});
+ {},
+ OplogSlot());
opTimes.prePostImageOpTime = noteOplog;
oplogLink.preImageOpTime = noteOplog;
}
@@ -244,7 +247,8 @@ OpTimeBundle replLogDelete(OperationContext* opCtx,
opTimes.wallClockTime,
sessionInfo,
stmtId,
- oplogLink);
+ oplogLink,
+ OplogSlot());
return opTimes;
}
@@ -276,7 +280,8 @@ void OpObserverImpl::onCreateIndex(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
} else {
repl::logOp(opCtx,
"i",
@@ -288,7 +293,8 @@ void OpObserverImpl::onCreateIndex(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
}
AuthorizationManager::get(opCtx->getServiceContext())
@@ -473,14 +479,16 @@ void OpObserverImpl::onInternalOpMessage(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
}
void OpObserverImpl::onCreateCollection(OperationContext* opCtx,
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) {
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) {
const auto cmdNss = collectionName.getCommandNS();
BSONObjBuilder b;
@@ -516,7 +524,8 @@ void OpObserverImpl::onCreateCollection(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
}
AuthorizationManager::get(opCtx->getServiceContext())
@@ -564,7 +573,8 @@ void OpObserverImpl::onCollMod(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
}
AuthorizationManager::get(opCtx->getServiceContext())
@@ -604,7 +614,8 @@ void OpObserverImpl::onDropDatabase(OperationContext* opCtx, const std::string&
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
if (dbName == FeatureCompatibilityVersion::kDatabase) {
FeatureCompatibilityVersion::onDropCollection(opCtx);
@@ -637,7 +648,8 @@ repl::OpTime OpObserverImpl::onDropCollection(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
}
if (collectionName.coll() == DurableViewCatalog::viewsCollectionName()) {
@@ -684,7 +696,8 @@ void OpObserverImpl::onDropIndex(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
AuthorizationManager::get(opCtx->getServiceContext())
->logOp(opCtx, "c", cmdNss, cmdObj, &indexInfo);
@@ -721,7 +734,8 @@ repl::OpTime OpObserverImpl::onRenameCollection(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
if (fromCollection.isSystemDotViews())
DurableViewCatalog::onExternalChange(opCtx, fromCollection);
@@ -767,7 +781,8 @@ void OpObserverImpl::onApplyOps(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
AuthorizationManager::get(opCtx->getServiceContext())
->logOp(opCtx, "c", cmdNss, applyOpCmd, nullptr);
@@ -791,7 +806,8 @@ void OpObserverImpl::onEmptyCapped(OperationContext* opCtx,
getWallClockTimeForOpLog(opCtx),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
}
AuthorizationManager::get(opCtx->getServiceContext())
diff --git a/src/mongo/db/op_observer_impl.h b/src/mongo/db/op_observer_impl.h
index e5395abdd22..1252443d633 100644
--- a/src/mongo/db/op_observer_impl.h
+++ b/src/mongo/db/op_observer_impl.h
@@ -70,7 +70,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override;
void onCollMod(OperationContext* opCtx,
const NamespaceString& nss,
OptionalCollectionUUID uuid,
diff --git a/src/mongo/db/op_observer_noop.cpp b/src/mongo/db/op_observer_noop.cpp
index 66c3eae95bd..1d05a90b954 100644
--- a/src/mongo/db/op_observer_noop.cpp
+++ b/src/mongo/db/op_observer_noop.cpp
@@ -73,7 +73,8 @@ void OpObserverNoop::onCreateCollection(OperationContext* opCtx,
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) {
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) {
if (options.uuid) {
UUIDCatalog& catalog = UUIDCatalog::get(opCtx);
catalog.onCreateCollection(opCtx, coll, options.uuid.get());
diff --git a/src/mongo/db/op_observer_noop.h b/src/mongo/db/op_observer_noop.h
index 66f64fe69e3..841255ca26a 100644
--- a/src/mongo/db/op_observer_noop.h
+++ b/src/mongo/db/op_observer_noop.h
@@ -70,7 +70,8 @@ public:
Collection* coll,
const NamespaceString& collectionName,
const CollectionOptions& options,
- const BSONObj& idIndex) override;
+ const BSONObj& idIndex,
+ const OplogSlot& createOpTime) override;
void onCollMod(OperationContext* opCtx,
const NamespaceString& nss,
OptionalCollectionUUID uuid,
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 89d025eef41..10cb6c30e0a 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -1553,6 +1553,7 @@ env.Library(
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/db/write_ops',
'$BUILD_DIR/mongo/db/stats/counters',
+ '$BUILD_DIR/mongo/db/system_index',
'$BUILD_DIR/mongo/rpc/client_metadata',
'$BUILD_DIR/mongo/s/catalog/sharding_catalog_manager',
'bgsync',
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 28c91c9f244..25c70cdd697 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -435,7 +435,8 @@ OpTime logOp(OperationContext* opCtx,
Date_t wallClockTime,
const OperationSessionInfo& sessionInfo,
StmtId statementId,
- const OplogLink& oplogLink) {
+ const OplogLink& oplogLink,
+ const OplogSlot& oplogSlot) {
auto replCoord = ReplicationCoordinator::get(opCtx);
if (replCoord->isOplogDisabledFor(opCtx, nss)) {
uassert(ErrorCodes::IllegalOperation,
@@ -451,7 +452,11 @@ OpTime logOp(OperationContext* opCtx,
OplogSlot slot;
WriteUnitOfWork wuow(opCtx);
- _getNextOpTimes(opCtx, oplog, 1, &slot);
+ if (oplogSlot.opTime.isNull()) {
+ _getNextOpTimes(opCtx, oplog, 1, &slot);
+ } else {
+ slot = oplogSlot;
+ }
auto writer = _logOpWriter(opCtx,
opstr,
diff --git a/src/mongo/db/repl/oplog.h b/src/mongo/db/repl/oplog.h
index 6f773694945..37d93d1dcc0 100644
--- a/src/mongo/db/repl/oplog.h
+++ b/src/mongo/db/repl/oplog.h
@@ -130,6 +130,7 @@ std::vector<OpTime> logInsertOps(OperationContext* opCtx,
* oplogLink this contains the timestamp that points to the previous write that will be
* linked via prevTs, and the timestamps of the oplog entry that contains the document
* before/after update was applied. The timestamps are ignored if isNull() is true.
+ * oplogSlot If non-null, use this reserved oplog slot instead of a new one.
*
* Returns the optime of the oplog entry written to the oplog.
* Returns a null optime if oplog was not modified.
@@ -144,7 +145,8 @@ OpTime logOp(OperationContext* opCtx,
Date_t wallClockTime,
const OperationSessionInfo& sessionInfo,
StmtId stmtId,
- const OplogLink& oplogLink);
+ const OplogLink& oplogLink,
+ const OplogSlot& oplogSlot);
// Flush out the cached pointer to the oplog.
// Used by the closeDatabase command to ensure we don't cache closed things.
diff --git a/src/mongo/db/repl/oplog_test.cpp b/src/mongo/db/repl/oplog_test.cpp
index 7666e026955..4bcc5639dc8 100644
--- a/src/mongo/db/repl/oplog_test.cpp
+++ b/src/mongo/db/repl/oplog_test.cpp
@@ -110,7 +110,8 @@ TEST_F(OplogTest, LogOpReturnsOpTimeOnSuccessfulInsertIntoOplogCollection) {
Date_t::now(),
{},
kUninitializedStmtId,
- {});
+ {},
+ OplogSlot());
ASSERT_FALSE(opTime.isNull());
wunit.commit();
}
@@ -222,8 +223,18 @@ OpTime _logOpNoopWithMsg(OperationContext* opCtx,
// logOp() must be called while holding lock because ephemeralForTest storage engine does not
// support concurrent updates to its internal state.
const auto msgObj = BSON("msg" << nss.ns());
- auto opTime = logOp(
- opCtx, "n", nss, {}, msgObj, nullptr, false, Date_t::now(), {}, kUninitializedStmtId, {});
+ auto opTime = logOp(opCtx,
+ "n",
+ nss,
+ {},
+ msgObj,
+ nullptr,
+ false,
+ Date_t::now(),
+ {},
+ kUninitializedStmtId,
+ {},
+ OplogSlot());
ASSERT_FALSE(opTime.isNull());
ASSERT(opTimeNssMap->find(opTime) == opTimeNssMap->end())
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 3edc51bb1b3..ba0d511c85d 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -38,6 +38,8 @@
#include "mongo/base/status_with.h"
#include "mongo/bson/oid.h"
#include "mongo/bson/util/bson_extract.h"
+#include "mongo/db/auth/authorization_manager.h"
+#include "mongo/db/auth/authorization_manager_global.h"
#include "mongo/db/catalog/coll_mod.h"
#include "mongo/db/catalog/database.h"
#include "mongo/db/catalog/database_holder.h"
@@ -77,6 +79,7 @@
#include "mongo/db/service_context.h"
#include "mongo/db/session_catalog.h"
#include "mongo/db/storage/storage_engine.h"
+#include "mongo/db/system_index.h"
#include "mongo/executor/network_connection_hook.h"
#include "mongo/executor/network_interface.h"
#include "mongo/executor/network_interface_factory.h"
@@ -463,6 +466,16 @@ OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationC
_shardingOnTransitionToPrimaryHook(opCtx);
_dropAllTempCollections(opCtx);
+ // It is only necessary to check the system indexes on the first transition to master.
+ // On subsequent transitions to master the indexes will have already been created.
+ static std::once_flag verifySystemIndexesOnce;
+ std::call_once(verifySystemIndexesOnce, [opCtx] {
+ const auto globalAuthzManager = AuthorizationManager::get(opCtx->getServiceContext());
+ if (globalAuthzManager->shouldValidateAuthSchemaOnStartup()) {
+ fassert(65536, verifySystemIndexes(opCtx));
+ }
+ });
+
serverGlobalParams.validateFeaturesAsMaster.store(true);
return opTimeToReturn;
diff --git a/src/mongo/db/s/session_catalog_migration_destination.cpp b/src/mongo/db/s/session_catalog_migration_destination.cpp
index e6750114746..8fe9d02095d 100644
--- a/src/mongo/db/s/session_catalog_migration_destination.cpp
+++ b/src/mongo/db/s/session_catalog_migration_destination.cpp
@@ -292,7 +292,8 @@ ProcessOplogResult processSessionOplog(OperationContext* opCtx,
*oplogEntry.getWallClockTime(),
sessionInfo,
stmtId,
- oplogLink);
+ oplogLink,
+ OplogSlot());
auto oplogOpTime = result.oplogTime;
uassert(40633,
diff --git a/src/mongo/db/session_test.cpp b/src/mongo/db/session_test.cpp
index c015d5399b0..4c62c251815 100644
--- a/src/mongo/db/session_test.cpp
+++ b/src/mongo/db/session_test.cpp
@@ -123,7 +123,8 @@ protected:
Date_t::now(),
osi,
stmtId,
- link);
+ link,
+ OplogSlot());
}
};
@@ -481,7 +482,8 @@ TEST_F(SessionTest, ErrorOnlyWhenStmtIdBeingCheckedIsNotInCache) {
wallClockTime,
osi,
1,
- {});
+ {},
+ OplogSlot());
session.onWriteOpCompletedOnPrimary(opCtx(), txnNum, {1}, opTime, wallClockTime);
wuow.commit();
@@ -507,7 +509,8 @@ TEST_F(SessionTest, ErrorOnlyWhenStmtIdBeingCheckedIsNotInCache) {
wallClockTime,
osi,
kIncompleteHistoryStmtId,
- link);
+ link,
+ OplogSlot());
session.onWriteOpCompletedOnPrimary(
opCtx(), txnNum, {kIncompleteHistoryStmtId}, opTime, wallClockTime);
diff --git a/src/mongo/db/system_index.cpp b/src/mongo/db/system_index.cpp
index 594cf4c4162..0cdcb670dd3 100644
--- a/src/mongo/db/system_index.cpp
+++ b/src/mongo/db/system_index.cpp
@@ -105,6 +105,15 @@ void generateSystemIndexForExistingCollection(OperationContext* opCtx,
return;
}
+ // Do not try to generate any system indexes on a secondary.
+ auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ uassert(ErrorCodes::NotMaster,
+ "Not primary while creating authorization index",
+ replCoord->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet ||
+ replCoord->canAcceptWritesForDatabase(opCtx, ns.db()));
+
+ invariant(!opCtx->lockState()->inAWriteUnitOfWork());
+
try {
auto indexSpecStatus = index_key_validate::validateIndexSpec(
opCtx, spec.toBSON(), ns, serverGlobalParams.featureCompatibility);
@@ -125,6 +134,8 @@ void generateSystemIndexForExistingCollection(OperationContext* opCtx,
WriteUnitOfWork wunit(opCtx);
indexer.commit();
+ opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
+ opCtx, ns, collection->uuid(), indexSpec, false /* fromMigrate */);
wunit.commit();
});
@@ -203,26 +214,30 @@ Status verifySystemIndexes(OperationContext* opCtx) {
void createSystemIndexes(OperationContext* opCtx, Collection* collection) {
invariant(collection);
const NamespaceString& ns = collection->ns();
+ BSONObj indexSpec;
if (ns == AuthorizationManager::usersCollectionNamespace) {
- auto indexSpec = fassertStatusOK(
+ indexSpec = fassertStatusOK(
40455,
index_key_validate::validateIndexSpec(opCtx,
v3SystemUsersIndexSpec.toBSON(),
ns,
serverGlobalParams.featureCompatibility));
- fassertStatusOK(
- 40456, collection->getIndexCatalog()->createIndexOnEmptyCollection(opCtx, indexSpec));
} else if (ns == AuthorizationManager::rolesCollectionNamespace) {
- auto indexSpec = fassertStatusOK(
+ indexSpec = fassertStatusOK(
40457,
index_key_validate::validateIndexSpec(opCtx,
v3SystemRolesIndexSpec.toBSON(),
ns,
serverGlobalParams.featureCompatibility));
-
+ }
+ if (!indexSpec.isEmpty()) {
+ opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
+ opCtx, ns, collection->uuid(), indexSpec, false /* fromMigrate */);
+ // Note that the opObserver is called prior to creating the index. This ensures the index
+ // write gets the same storage timestamp as the oplog entry.
fassertStatusOK(
- 40458, collection->getIndexCatalog()->createIndexOnEmptyCollection(opCtx, indexSpec));
+ 40456, collection->getIndexCatalog()->createIndexOnEmptyCollection(opCtx, indexSpec));
}
}
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index ba671912252..4ffdf4451fe 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -1447,6 +1447,9 @@ var ReplSetTest = function(opts) {
// liveNodes must have been populated.
var primary = rst.liveNodes.master;
var combinedDBs = new Set(primary.getDBNames());
+ // replSetConfig will be undefined for master/slave passthrough.
+ const replSetConfig =
+ rst.getReplSetConfigFromNode ? rst.getReplSetConfigFromNode() : undefined;
rst.liveNodes.slaves.forEach(secondary => {
secondary.getDBNames().forEach(dbName => combinedDBs.add(dbName));
@@ -1547,8 +1550,10 @@ var ReplSetTest = function(opts) {
// Check that the following collection stats are the same across replica set
// members:
// capped
- // nindexes
+ // nindexes, except on nodes with buildIndexes: false
// ns
+ const hasSecondaryIndexes = !replSetConfig ||
+ replSetConfig.members[rst.getNodeId(secondary)].buildIndexes !== false;
primaryCollections.forEach(collName => {
var primaryCollStats =
primary.getDB(dbName).runCommand({collStats: collName});
@@ -1558,7 +1563,8 @@ var ReplSetTest = function(opts) {
assert.commandWorked(secondaryCollStats);
if (primaryCollStats.capped !== secondaryCollStats.capped ||
- primaryCollStats.nindexes !== secondaryCollStats.nindexes ||
+ (hasSecondaryIndexes &&
+ primaryCollStats.nindexes !== secondaryCollStats.nindexes) ||
primaryCollStats.ns !== secondaryCollStats.ns) {
print(msgPrefix +
', the primary and secondary have different stats for the ' +