diff options
author | William Schultz <william.schultz@mongodb.com> | 2018-01-22 16:40:27 -0500 |
---|---|---|
committer | William Schultz <william.schultz@mongodb.com> | 2018-01-29 14:31:45 -0500 |
commit | 3e4d4d6c8839eea43df01d1eb25de2979f88606c (patch) | |
tree | e30e0753b693cb7669868ee5658401a66b657e87 | |
parent | b0308845c11c937bccc593feae84d9dd368bc9c9 (diff) | |
download | mongo-3e4d4d6c8839eea43df01d1eb25de2979f88606c.tar.gz |
SERVER-32466 Remove FCV replication behaviors only relevant to the 3.4-3.6 transition
19 files changed, 17 insertions, 3772 deletions
diff --git a/jstests/noPassthrough/drop_collections_two_phase_feature_compatibility_version.js b/jstests/noPassthrough/drop_collections_two_phase_feature_compatibility_version.js deleted file mode 100644 index 713e12c1866..00000000000 --- a/jstests/noPassthrough/drop_collections_two_phase_feature_compatibility_version.js +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Test to ensure that drop pending collections are dropped upon clean shutdown under FCV 3.4 but - * retained under FCV 3.6. - * - * This test does not work with non-persistent storage engines because it checks for the presence of - * drop-pending collections across server restarts. - * @tags: [requires_persistence, requires_replication] - */ - -(function() { - "use strict"; - - load("jstests/libs/feature_compatibility_version.js"); - load("jstests/replsets/libs/two_phase_drops.js"); // For TwoPhaseDropCollectionTest. - - // Set feature compatibility version on the given node. Note that setting FCV requires a - // majority write to work so replication to secondaries must be enabled. - function setFCV(node, featureCompatibilityVersion) { - assert.commandWorked( - node.adminCommand({setFeatureCompatibilityVersion: featureCompatibilityVersion})); - checkFCV(node.getDB("admin"), featureCompatibilityVersion); - } - - // Restart the primary of the given ReplSetTest. - function restartPrimary(replTest) { - let primaryId = replTest.getNodeId(replTest.getPrimary()); - replTest.restart(primaryId); - } - - // Set up a two phase drop test. - let testName = "drop_collection_two_phase"; - let dbName = testName; - let twoPhaseDropTest = new TwoPhaseDropCollectionTest(testName, dbName); - - // Initialize replica set. - let replTest = twoPhaseDropTest.initReplSet(); - - // - // [FCV 3.4] - // Create a collection, put it into drop pending state, and then restart primary node under FCV - // 3.4. Drop-pending collection should NOT be present after node comes back up. - // - let collToDrop34 = "collectionToDrop34"; - twoPhaseDropTest.createCollection(collToDrop34); - - // Make sure checkpointTimestamp collection is not empty at shutdown. - let checkpointColl = replTest.getPrimary().getCollection("local.replset.checkpointTimestamp"); - assert.writeOK(checkpointColl.insert({a: 1})); - assert.gt(checkpointColl.find().itcount(), 0); - - jsTestLog("Setting FCV=3.4 on the primary before collection drop."); - setFCV(replTest.getPrimary(), "3.4"); - twoPhaseDropTest.prepareDropCollection(collToDrop34); - - jsTestLog("Restarting the primary."); - restartPrimary(replTest); - - assert(!twoPhaseDropTest.collectionIsPendingDrop(collToDrop34), - "Collection was not removed on clean shutdown when FCV is 3.4."); - - // Test that the checkpointTimestamp collection is also dropped on 3.4 fCV shutdown. - assert(!twoPhaseDropTest.collectionExists("local.replset.checkpointTimestamp")); - - // Resume oplog application so that we can set FCV again. - twoPhaseDropTest.resumeOplogApplication(replTest.getSecondary()); - - // - // [FCV 3.6] - // Create a collection, put it into drop pending state, and then restart primary node under FCV - // 3.6. Drop-pending collection should be present after node comes back up. - // - let collToDrop36 = "collectionToDrop36"; - - jsTestLog("Creating collection " + collToDrop36 + " on primary."); - twoPhaseDropTest.createCollection(collToDrop36); - - jsTestLog("Setting FCV=3.6 on the primary before collection drop."); - setFCV(replTest.getPrimary(), "3.6"); - twoPhaseDropTest.prepareDropCollection(collToDrop36); - - jsTestLog("Restarting the primary."); - restartPrimary(replTest); - - assert(twoPhaseDropTest.collectionIsPendingDrop(collToDrop36), - "Collection was removed on clean shutdown when FCV is 3.6."); - - // Let the secondary apply the collection drop operation, so that the replica set commit point - // will advance, and the 'Commit' phase of the collection drop will complete on the primary. - twoPhaseDropTest.commitDropCollection(collToDrop36); - - twoPhaseDropTest.stop(); -}()); diff --git a/jstests/replsets/rollback_during_downgrade.js b/jstests/replsets/rollback_during_downgrade.js deleted file mode 100644 index ccacb2b73f0..00000000000 --- a/jstests/replsets/rollback_during_downgrade.js +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Test that rollback via refetch with no UUID support succeeds during downgrade. This runs various - * ddl commands that are expected to cause different UUID conflicts among the two nodes. - */ - -(function() { - 'use strict'; - - load('jstests/replsets/libs/rollback_test.js'); - load("jstests/libs/check_log.js"); - - const testName = 'rollback_during_downgrade'; - const dbName = testName; - - // Set up Rollback Test. - let rollbackTest = new RollbackTest(testName); - - let rollbackNode = rollbackTest.getPrimary(); - let syncSourceNode = rollbackTest.getSecondary(); - - // Create some collections. - assert.writeOK(rollbackNode.getDB(dbName)['tToRemoveUUID'].insert({t: 1})); - assert.writeOK(rollbackNode.getDB(dbName)['tToDrop'].insert({t: 2})); - assert.writeOK(rollbackNode.getDB(dbName)['tToRename'].insert({t: 3})); - assert.writeOK(rollbackNode.getDB(dbName)['tToDropIndexes'].insert({t: 4})); - assert.commandWorked(rollbackNode.getDB(dbName)['tToDropIndexes'].createIndex({t: 1})); - assert.writeOK(rollbackNode.getDB(dbName)['tToNotRemoveUUID'].insert({t: 5})); - - // Stop downgrade in the middle of the collMod section. - assert.commandWorked(rollbackNode.adminCommand({ - configureFailPoint: 'hangBeforeDatabaseUpgrade', - data: {database: dbName}, - mode: 'alwaysOn' - })); - - // Downgrade the cluster in the background. - let downgradeFn = function() { - jsTestLog('Downgrading replica set'); - let adminDB = db.getSiblingDB('admin'); - try { - adminDB.runCommand({setFeatureCompatibilityVersion: '3.4'}); - } catch (e) { - if (!isNetworkError(e)) { - throw e; - } - print("Downgrading set threw expected network error: " + tojson(e)); - } - }; - let waitForDowngradeToFinish = startParallelShell(downgradeFn, rollbackNode.port); - - checkLog.contains(rollbackNode, 'collMod - hangBeforeDatabaseUpgrade fail point enabled'); - - // ----------------- Begins running operations only on the rollback node ------------------ - rollbackTest.transitionToRollbackOperations(); - - // Allow the downgrade to complete the collMod section. - assert.commandWorked( - rollbackNode.adminCommand({configureFailPoint: 'hangBeforeDatabaseUpgrade', mode: 'off'})); - checkLog.contains(rollbackNode, 'Finished updating UUID schema version for downgrade'); - - // Tests that when we resync a two-phase dropped collection with a UUID on the sync source, - // the UUID is assigned correctly. - // Also tests that two-phase dropped collections are dropped eventually. - assert(rollbackNode.getDB(dbName)['tToDrop'].drop()); - - // Tests that when we resync a collection in a rename, the collection's UUID is - // assigned properly. - assert.commandWorked(rollbackNode.getDB(dbName)['tToRename'].renameCollection('tRenamed')); - - // Tests that collections resynced due to dropIndexes commands are assigned UUIDs correctly. - assert.commandWorked(rollbackNode.getDB(dbName)['tToDropIndexes'].dropIndexes()); - - // ----------------- Begins running operations only on the sync source node --------------- - rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); - - // Fake removing the UUID on the sync source, to test when neither node has a UUID for - // completeness. - assert.commandWorked(syncSourceNode.getDB(dbName).runCommand({ - applyOps: [ - {op: 'c', ns: dbName + '.$cmd', o: {collMod: 'tToRemoveUUID'}}, - ] - })); - - // ----------------- Allows rollback to occur and checks for consistency ------------------ - rollbackTest.transitionToSyncSourceOperationsDuringRollback(); - rollbackTest.transitionToSteadyStateOperations(); - - waitForDowngradeToFinish(); - - rollbackTest.stop(); -})(); diff --git a/jstests/replsets/rollback_during_upgrade.js b/jstests/replsets/rollback_during_upgrade.js deleted file mode 100644 index 739908a6c73..00000000000 --- a/jstests/replsets/rollback_during_upgrade.js +++ /dev/null @@ -1,111 +0,0 @@ -/** - * Test that rollback via refetch with no UUID support succeeds during upgrade. This runs various - * ddl commands that are expected to cause different UUID conflicts among the two nodes. - */ - -(function() { - 'use strict'; - - load('jstests/replsets/libs/rollback_test.js'); - load("jstests/libs/check_log.js"); - - const testName = 'rollback_during_upgrade'; - const dbName = testName; - - // Set up Rollback Test. - let rollbackTest = new RollbackTest(testName); - - let rollbackNode = rollbackTest.getPrimary(); - let syncSourceNode = rollbackTest.getSecondary(); - - // Begin in fCV 3.4. - assert.commandWorked(rollbackNode.adminCommand({setFeatureCompatibilityVersion: '3.4'})); - - // Create some collections. - assert.writeOK(rollbackNode.getDB(dbName)['tToAssignUUID'].insert({t: 1})); - assert.writeOK(rollbackNode.getDB(dbName)['tToDropAndNotAssignUUID'].insert({t: 2})); - assert.writeOK(rollbackNode.getDB(dbName)['tToDrop'].insert({t: 3})); - assert.writeOK(rollbackNode.getDB(dbName)['tToRename'].insert({t: 4})); - assert.writeOK(rollbackNode.getDB(dbName)['tToDropIndexes'].insert({t: 5})); - assert.commandWorked(rollbackNode.getDB(dbName)['tToDropIndexes'].createIndex({t: 1})); - assert.writeOK(rollbackNode.getDB(dbName)['tToRemoveUUIDLocally'].insert({t: 6})); - assert.writeOK(rollbackNode.getDB(dbName)['tToNotAssignUUID'].insert({t: 7})); - - // Stop upgrade in the middle of the collMod section. - assert.commandWorked(rollbackNode.adminCommand({ - configureFailPoint: 'hangBeforeDatabaseUpgrade', - data: {database: dbName}, - mode: 'alwaysOn' - })); - - // Upgrade the cluster in the background. - let upgradeFn = function() { - jsTestLog('Upgrading replica set'); - let adminDB = db.getSiblingDB('admin'); - try { - adminDB.runCommand({setFeatureCompatibilityVersion: '3.6'}); - } catch (e) { - if (!isNetworkError(e)) { - throw e; - } - print("Upgrading set threw expected network error: " + tojson(e)); - } - }; - let waitForUpgradeToFinish = startParallelShell(upgradeFn, rollbackNode.port); - - checkLog.contains(rollbackNode, 'collMod - hangBeforeDatabaseUpgrade fail point enabled'); - - // ----------------- Begins running operations only on the rollback node ------------------ - rollbackTest.transitionToRollbackOperations(); - - // Allow the upgrade to complete the collMod section. - assert.commandWorked( - rollbackNode.adminCommand({configureFailPoint: 'hangBeforeDatabaseUpgrade', mode: 'off'})); - checkLog.contains(rollbackNode, 'Finished updating UUID schema version for upgrade'); - - // Tests that we resolve UUID conflicts correctly when we resync a two-phase dropped collection - // with a different UUID on both nodes. - // Also tests that two-phase dropped collections are dropped eventually. - assert(rollbackNode.getDB(dbName)['tToDrop'].drop()); - - // Tests that when we resync a two-phase dropped collection where only the rolling back node - // has a UUID, that the node removes its UUID. - assert(rollbackNode.getDB(dbName)['tToDropAndNotAssignUUID'].drop()); - - // Tests that we correctly assign the sync source's UUID when we resync collections in a rename. - assert.commandWorked(rollbackNode.getDB(dbName)['tToRename'].renameCollection('tRenamed')); - - // Tests that we correctly assign the sync source's UUID when we resync collections during - // rollback of dropIndexes commands. - assert.commandWorked(rollbackNode.getDB(dbName)['tToDropIndexes'].dropIndexes()); - - // Tests that collections get assigned the correct UUID when the sync source has a - // UUID but there is no UUID locally. - assert.commandWorked(rollbackNode.getDB(dbName).runCommand({ - applyOps: [ - {op: 'c', ns: dbName + '.$cmd', o: {collMod: 'tToRemoveUUIDLocally'}}, - ] - })); - - // ----------------- Begins running operations only on the sync source node --------------- - rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); - - // Fake giving the collections UUIDs to simulate an upgrade on the sync source. - // The rollback test fixture only has two data bearing nodes so we cannot run an upgrade using - // the `setFeatureCompatibilityVersion` command. - assert.commandWorked(syncSourceNode.getDB(dbName).runCommand({ - applyOps: [ - {op: 'c', ns: dbName + '.$cmd', ui: UUID(), o: {collMod: 'tToAssignUUID'}}, - {op: 'c', ns: dbName + '.$cmd', ui: UUID(), o: {collMod: 'tToDrop'}}, - {op: 'c', ns: dbName + '.$cmd', ui: UUID(), o: {collMod: 'tToRename'}}, - {op: 'c', ns: dbName + '.$cmd', ui: UUID(), o: {collMod: 'tToDropIndexes'}}, - ] - })); - - // ----------------- Allows rollback to occur and checks for consistency ------------------ - rollbackTest.transitionToSyncSourceOperationsDuringRollback(); - rollbackTest.transitionToSteadyStateOperations(); - waitForUpgradeToFinish(); - - rollbackTest.stop(); -})(); diff --git a/src/mongo/db/commands/feature_compatibility_version.cpp b/src/mongo/db/commands/feature_compatibility_version.cpp index c7a877e6f31..59b21bd46e2 100644 --- a/src/mongo/db/commands/feature_compatibility_version.cpp +++ b/src/mongo/db/commands/feature_compatibility_version.cpp @@ -311,71 +311,6 @@ bool FeatureCompatibilityVersion::isCleanStartUp() { return true; } -namespace { - -// If a node finishes rollback while in fcv=3.6, but while in RECOVERING before reaching -// minValid, sees that its sync source started a downgrade to fcv=3.4, it may be be unable to -// become consistent. This is because an fcv=3.6 node will have used the UUID algorithm for -// rollback, which treats a missing UUID as a collection drop. We fassert before becoming -// SECONDARY in an inconsistent state. -// -// We only care about the start of downgrade, because if we see an oplog entry marking the end -// of downgrade, then either we also saw a downgrade start op, or we did our rollback in fcv=3.4 -// with the no-UUID algorithm. Similarly, we do not care about upgrade, because either -// it will also have a downgrade op, or we did our rollback in fcv=3.4. The no-UUID rollback -// algorithm will be as safe as it was in 3.4 regardless of if the sync source has UUIDs or not. -void uassertDuringRollbackOnDowngradeOp( - OperationContext* opCtx, - ServerGlobalParams::FeatureCompatibility::Version newVersion, - std::string msg) { - auto currentVersion = serverGlobalParams.featureCompatibility.getVersion(); - - if ((newVersion != ServerGlobalParams::FeatureCompatibility::Version::kDowngradingTo34) || - (currentVersion != ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36)) { - return; - } - - invariant(newVersion == ServerGlobalParams::FeatureCompatibility::Version::kDowngradingTo34); - invariant(currentVersion == - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36); - - auto replCoord = repl::ReplicationCoordinator::get(opCtx); - uassert(ErrorCodes::OplogOperationUnsupported, - msg, - replCoord->getReplicationMode() != repl::ReplicationCoordinator::modeReplSet || - replCoord->isInPrimaryOrSecondaryState()); -} - -// If we are finishing an upgrade, all collections should have UUIDs. We check to make sure -// that the feature compatibility version collection has a UUID. -void uassertDuringInvalidUpgradeOp(OperationContext* opCtx, - ServerGlobalParams::FeatureCompatibility::Version version) { - auto replCoord = repl::ReplicationCoordinator::get(opCtx); - if (replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeMasterSlave) { - // TODO (SERVER-31593) master-slave should uassert here, but cannot due to a bug. - return; - } - - if (version < ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36) { - // Only check this assertion if we're completing an upgrade to 3.6. - return; - } - - auto fcvUUID = repl::StorageInterface::get(opCtx)->getCollectionUUID( - opCtx, NamespaceString(FeatureCompatibilityVersion::kCollection)); - uassert(ErrorCodes::IllegalOperation, - str::stream() << "Error checking for UUID in feature compatibility version collection: " - << fcvUUID.getStatus().toString(), - fcvUUID.isOK()); - uassert(ErrorCodes::IllegalOperation, - str::stream() << "Tried to complete upgrade, but " - << FeatureCompatibilityVersion::kCollection - << " did not have a UUID.", - fcvUUID.getValue()); -} - -} // namespace - void FeatureCompatibilityVersion::onInsertOrUpdate(OperationContext* opCtx, const BSONObj& doc) { auto idElement = doc["_id"]; if (idElement.type() != BSONType::String || @@ -384,14 +319,6 @@ void FeatureCompatibilityVersion::onInsertOrUpdate(OperationContext* opCtx, cons } auto newVersion = uassertStatusOK(FeatureCompatibilityVersion::parse(doc)); - uassertDuringRollbackOnDowngradeOp(opCtx, - newVersion, - str::stream() - << "Must be in primary or secondary state to " - "downgrade feature compatibility version document: " - << redact(doc)); - uassertDuringInvalidUpgradeOp(opCtx, newVersion); - // To avoid extra log messages when the targetVersion is set/unset, only log when the version // changes. auto oldVersion = serverGlobalParams.featureCompatibility.getVersion(); @@ -423,11 +350,6 @@ void FeatureCompatibilityVersion::onDelete(OperationContext* opCtx, const BSONOb return; } - uassertDuringRollbackOnDowngradeOp( - opCtx, - ServerGlobalParams::FeatureCompatibility::Version::kDowngradingTo34, - "Must be in primary or secondary state to delete feature compatibility version document"); - log() << "setting featureCompatibilityVersion to " << FeatureCompatibilityVersionCommandParser::kVersion34; opCtx->recoveryUnit()->onCommit([]() { @@ -438,14 +360,6 @@ void FeatureCompatibilityVersion::onDelete(OperationContext* opCtx, const BSONOb } void FeatureCompatibilityVersion::onDropCollection(OperationContext* opCtx) { - - uassertDuringRollbackOnDowngradeOp( - opCtx, - ServerGlobalParams::FeatureCompatibility::Version::kDowngradingTo34, - str::stream() << "Must be in primary or secondary state to drop " - << FeatureCompatibilityVersion::kCollection - << " collection"); - log() << "setting featureCompatibilityVersion to " << FeatureCompatibilityVersionCommandParser::kVersion34; opCtx->recoveryUnit()->onCommit([]() { diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp index 472d346f0fa..87409298366 100644 --- a/src/mongo/db/db.cpp +++ b/src/mongo/db/db.cpp @@ -1268,13 +1268,11 @@ void shutdownTask() { opCtx = uniqueOpCtx.get(); } - if (serverGlobalParams.featureCompatibility.getVersion() < + // TODO: Upgrade this check so that this block only runs when (FCV != kFullyUpgradedTo38). + // See SERVER-32589. + if (serverGlobalParams.featureCompatibility.getVersion() != ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36) { - log(LogComponent::kReplication) << "shutdown: removing all drop-pending collections..."; - repl::DropPendingCollectionReaper::get(serviceContext) - ->dropCollectionsOlderThan(opCtx, repl::OpTime::max()); - - // If we are in fCV 3.4, drop the 'checkpointTimestamp' collection so if we downgrade + // If we are in fCV 3.6, drop the 'checkpointTimestamp' collection so if we downgrade // and then upgrade again, we do not trust a stale 'checkpointTimestamp'. log(LogComponent::kReplication) << "shutdown: removing checkpointTimestamp collection..."; diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 4cc74eabb31..ed586a34868 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -128,7 +128,6 @@ env.Library( 'rollback_impl', 'rollback_source_impl', 'rs_rollback', - 'rs_rollback_no_uuid', 'storage_interface', 'sync_source_resolver', '$BUILD_DIR/mongo/client/connection_pool', @@ -493,24 +492,6 @@ env.Library( ) env.Library( - target='rs_rollback_no_uuid', - source=[ - 'rs_rollback_no_uuid.cpp', - ], - LIBDEPS=[ - 'oplog', - 'replication_process', - 'roll_back_local_operations', - 'rslog', - '$BUILD_DIR/mongo/db/catalog/collection_options', - '$BUILD_DIR/mongo/db/catalog/database_holder', - '$BUILD_DIR/mongo/db/s/sharding', - '$BUILD_DIR/mongo/util/fail_point', - '$BUILD_DIR/mongo/db/dbhelpers', - ], -) - -env.Library( target='rs_rollback', source=[ 'rs_rollback.cpp', @@ -566,7 +547,6 @@ env.Library( 'replication_process', 'replmocks', 'rs_rollback', - 'rs_rollback_no_uuid', 'storage_interface_impl', '$BUILD_DIR/mongo/db/query_exec', '$BUILD_DIR/mongo/db/service_context', @@ -586,18 +566,6 @@ env.CppUnitTest( ], ) -env.CppUnitTest( - target='rs_rollback_no_uuid_test', - source=[ - 'rs_rollback_no_uuid_test.cpp', - ], - LIBDEPS=[ - 'oplog_interface_local', - 'rollback_test_fixture', - 'rs_rollback_no_uuid', - ], -) - env.Library( target='rollback_impl', source=[ diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index 4ef648d0b12..22b4788808d 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -50,7 +50,6 @@ #include "mongo/db/repl/replication_process.h" #include "mongo/db/repl/rollback_source_impl.h" #include "mongo/db/repl/rs_rollback.h" -#include "mongo/db/repl/rs_rollback_no_uuid.h" #include "mongo/db/repl/rs_sync.h" #include "mongo/db/repl/storage_interface.h" #include "mongo/db/s/shard_identity_rollback_notifier.h" @@ -646,24 +645,8 @@ void BackgroundSync::_runRollback(OperationContext* opCtx, return connection->get(); }; - // Run a rollback algorithm that either uses UUIDs or does not use UUIDs depending on - // the FCV. Since collection UUIDs were only added in 3.6, the 3.4 rollback algorithm - // remains in place to maintain backwards compatibility. - if (serverGlobalParams.featureCompatibility.getVersion() >= - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36) { - // If the user is fully upgraded to FCV 3.6, use the "rollbackViaRefetch" method. - log() << "Rollback using the 'rollbackViaRefetch' method because UUID support " - "is feature compatible with featureCompatibilityVersion 3.6."; - _fallBackOnRollbackViaRefetch( - opCtx, source, requiredRBID, &localOplog, true, getConnection); - } else { - // If the user is either fully downgraded to FCV 3.4, downgrading to FCV 3.4, - // or upgrading to FCV 3.6, use the "rollbackViaRefetchNoUUID" method. - log() << "Rollback using the 'rollbackViaRefetchNoUUID' method because UUID " - "support is not feature compatible with featureCompatibilityVersion 3.4"; - _fallBackOnRollbackViaRefetch( - opCtx, source, requiredRBID, &localOplog, false, getConnection); - } + log() << "Rollback using the 'rollbackViaRefetch' method."; + _fallBackOnRollbackViaRefetch(opCtx, source, requiredRBID, &localOplog, getConnection); // Reset the producer to clear the sync source and the last optime fetched. stop(true); @@ -705,18 +688,12 @@ void BackgroundSync::_fallBackOnRollbackViaRefetch( const HostAndPort& source, int requiredRBID, OplogInterface* localOplog, - bool useUUID, OplogInterfaceRemote::GetConnectionFn getConnection) { RollbackSourceImpl rollbackSource( getConnection, source, NamespaceString::kRsOplogNamespace.ns()); - if (useUUID) { - rollback(opCtx, *localOplog, rollbackSource, requiredRBID, _replCoord, _replicationProcess); - } else { - rollbackNoUUID( - opCtx, *localOplog, rollbackSource, requiredRBID, _replCoord, _replicationProcess); - } + rollback(opCtx, *localOplog, rollbackSource, requiredRBID, _replCoord, _replicationProcess); } HostAndPort BackgroundSync::getSyncTarget() const { diff --git a/src/mongo/db/repl/bgsync.h b/src/mongo/db/repl/bgsync.h index cf1f064c4f5..99e5584d0d2 100644 --- a/src/mongo/db/repl/bgsync.h +++ b/src/mongo/db/repl/bgsync.h @@ -187,12 +187,10 @@ private: OplogInterfaceRemote::GetConnectionFn getConnection); /** - * Executes a rollback via refetch in either rs_rollback.cpp or rs_rollback_no_uuid.cpp + * Executes a rollback via refetch in rs_rollback.cpp. * - * We fall back on the rollback via refetch algorithm when: - * 1) the server parameter "rollbackMethod" is set to "rollbackViaRefetch" or - * "rollbackViaRefetchNoUUID"; or - * 2) the storage engine does not support "rollback to a checkpoint." + * We fall back on the rollback via refetch algorithm when the storage engine does not support + * "rollback to a checkpoint." * * Must be called from _runRollback() which ensures that all the conditions for entering * rollback have been met. @@ -201,7 +199,6 @@ private: const HostAndPort& source, int requiredRBID, OplogInterface* localOplog, - bool useUUID, OplogInterfaceRemote::GetConnectionFn getConnection); // restart syncing diff --git a/src/mongo/db/repl/idempotency_test.cpp b/src/mongo/db/repl/idempotency_test.cpp index d001f704508..56866630784 100644 --- a/src/mongo/db/repl/idempotency_test.cpp +++ b/src/mongo/db/repl/idempotency_test.cpp @@ -192,12 +192,6 @@ void RandomizedIdempotencyTest::runIdempotencyTestCase() { } } -TEST_F(RandomizedIdempotencyTest, CheckUpdateSequencesAreIdempotentWhenFCV34) { - serverGlobalParams.featureCompatibility.setVersion( - ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo34); - runIdempotencyTestCase(); -} - TEST_F(RandomizedIdempotencyTest, CheckUpdateSequencesAreIdempotentWhenFCV36) { serverGlobalParams.featureCompatibility.setVersion( ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36); diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp index 1e110ebc971..94e0ac6d63f 100644 --- a/src/mongo/db/repl/oplog_fetcher.cpp +++ b/src/mongo/db/repl/oplog_fetcher.cpp @@ -369,11 +369,7 @@ BSONObj OplogFetcher::_makeFindCommandObject(const NamespaceString& nss, // TODO(SERVER-30977): Remove the term comparison when this ticket is fixed. if (term == lastOpTimeFetched.getTerm()) { - cmdBob.append("readConcern", - (serverGlobalParams.featureCompatibility.getVersion() < - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36) - ? BSON("afterOpTime" << lastOpTimeFetched) - : BSON("afterClusterTime" << lastOpTimeFetched.getTimestamp())); + cmdBob.append("readConcern", BSON("afterClusterTime" << lastOpTimeFetched.getTimestamp())); } return cmdBob.obj(); diff --git a/src/mongo/db/repl/oplog_fetcher_test.cpp b/src/mongo/db/repl/oplog_fetcher_test.cpp index 598d8faa245..352298b195a 100644 --- a/src/mongo/db/repl/oplog_fetcher_test.cpp +++ b/src/mongo/db/repl/oplog_fetcher_test.cpp @@ -280,33 +280,6 @@ TEST_F(OplogFetcherTest, FindQueryHasNoReadconcernIfTermUninitialized) { ASSERT_FALSE(cmdObj.hasField("readConcern")); } -TEST_F(OplogFetcherTest, FindQueryHasAfterOpTimeWithFeatureCompatibilityVersion34) { - EnsureFCV ensureFCV(EnsureFCV::Version::kFullyDowngradedTo34); - ASSERT(serverGlobalParams.featureCompatibility.getVersion() != - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36); - auto cmdObj = makeOplogFetcher(_createConfig(true))->getFindQuery_forTest(); - auto readConcernElem = cmdObj["readConcern"]; - ASSERT_EQUALS(mongo::BSONType::Object, readConcernElem.type()); - ASSERT_FALSE(readConcernElem.Obj().hasField("afterClusterTime")); - ASSERT_BSONOBJ_EQ(readConcernElem.Obj(), BSON("afterOpTime" << lastFetched.opTime)); - - _checkDefaultCommandObjectFields(cmdObj); -} - -TEST_F(OplogFetcherTest, FindQueryHasAfterOpTimeWithFeatureCompatibilityVersion36) { - EnsureFCV ensureFCV(EnsureFCV::Version::kFullyUpgradedTo36); - ASSERT(serverGlobalParams.featureCompatibility.getVersion() == - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36); - auto cmdObj = makeOplogFetcher(_createConfig(true))->getFindQuery_forTest(); - auto readConcernElem = cmdObj["readConcern"]; - ASSERT_EQUALS(mongo::BSONType::Object, readConcernElem.type()); - ASSERT_FALSE(readConcernElem.Obj().hasField("afterOpTime")); - ASSERT_BSONOBJ_EQ(readConcernElem.Obj(), - BSON("afterClusterTime" << lastFetched.opTime.getTimestamp())); - - _checkDefaultCommandObjectFields(cmdObj); -} - TEST_F(OplogFetcherTest, InvalidReplSetMetadataInResponseStopsTheOplogFetcher) { auto shutdownState = processSingleBatch( {makeCursorResponse(0, {makeNoopOplogEntry(lastFetched)}), diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.cpp b/src/mongo/db/repl/replication_consistency_markers_impl.cpp index d50c5f14e34..c51f00e15f3 100644 --- a/src/mongo/db/repl/replication_consistency_markers_impl.cpp +++ b/src/mongo/db/repl/replication_consistency_markers_impl.cpp @@ -359,12 +359,6 @@ Timestamp ReplicationConsistencyMarkersImpl::getOplogTruncateAfterPoint( OperationContext* opCtx) const { auto doc = _getOplogTruncateAfterPointDocument(opCtx); if (!doc) { - if (serverGlobalParams.featureCompatibility.getVersion() < - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36) { - LOG(3) << "Falling back on old oplog delete from point because there is no oplog " - "truncate after point and we are in FCV 3.4."; - return _getOldOplogDeleteFromPoint(opCtx); - } LOG(3) << "Returning empty oplog truncate after point since document did not exist"; return {}; } diff --git a/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp b/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp index 9abfb52c525..729dad6becf 100644 --- a/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp +++ b/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp @@ -336,76 +336,4 @@ TEST_F(ReplicationConsistencyMarkersTest, SetMinValidOnPVChange) { OpTime upgradeOpTime({Seconds(70), 0}, 0LL); advanceAndCheckMinValidOpTime(upgradeOpTime, upgradeOpTime); } - -TEST_F(ReplicationConsistencyMarkersTest, OplogTruncateAfterPointUpgrade) { - auto minValidNss = makeNamespace(_agent, "minValid"); - auto oplogTruncateAfterPointNss = makeNamespace(_agent, "oplogTruncateAfterPoint"); - auto checkpointTimestampNss = makeNamespace(_agent, "checkpointTimestamp"); - - ReplicationConsistencyMarkersImpl consistencyMarkers( - getStorageInterface(), minValidNss, oplogTruncateAfterPointNss, checkpointTimestampNss); - auto opCtx = getOperationContext(); - Timestamp time1(Seconds(123), 0); - Timestamp time2(Seconds(456), 0); - OpTime minValidTime(Timestamp(789), 2); - - // Insert the old oplogDeleteFromPoint and make sure getOplogTruncateAfterPoint() returns it. - ASSERT_OK(getStorageInterface()->createCollection(opCtx, minValidNss, {})); - ASSERT_OK(getStorageInterface()->insertDocument( - opCtx, - minValidNss, - TimestampedBSONObj{BSON("_id" << OID::gen() << MinValidDocument::kMinValidTimestampFieldName - << minValidTime.getTimestamp() - << MinValidDocument::kMinValidTermFieldName - << minValidTime.getTerm() - << MinValidDocument::kOldOplogDeleteFromPointFieldName - << time1), - Timestamp(0)}, - OpTime::kUninitializedTerm)); - consistencyMarkers.initializeMinValidDocument(opCtx); - - // Set the feature compatibility version to 3.6. - serverGlobalParams.featureCompatibility.setVersion( - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36); - - // Check that we see no oplog truncate after point in FCV 3.6. - ASSERT(consistencyMarkers.getOplogTruncateAfterPoint(opCtx).isNull()); - ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), minValidTime); - - // Set the feature compatibility version to 3.4. - serverGlobalParams.featureCompatibility.setVersion( - ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo34); - - // Check that we see the old oplog delete from point in FCV 3.4. - ASSERT_EQ(consistencyMarkers.getOplogTruncateAfterPoint(opCtx), time1); - ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), minValidTime); - - // Check that the minValid document has the oplog delete from point. - auto minValidDocument = getMinValidDocument(opCtx, minValidNss); - ASSERT_TRUE(minValidDocument.hasField(MinValidDocument::kOldOplogDeleteFromPointFieldName)); - - consistencyMarkers.removeOldOplogDeleteFromPointField(opCtx); - - // Check that the minValid document does not have the oplog delete from point. - minValidDocument = getMinValidDocument(opCtx, minValidNss); - ASSERT_FALSE(minValidDocument.hasField(MinValidDocument::kOldOplogDeleteFromPointFieldName)); - - // Check that after removing the old oplog delete from point, that we do not see the oplog - // truncate after point in FCV 3.4. - ASSERT(consistencyMarkers.getOplogTruncateAfterPoint(opCtx).isNull()); - ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), minValidTime); - - // Set the feature compatibility version to 3.6. - serverGlobalParams.featureCompatibility.setVersion( - ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36); - - // Check that after removing the old oplog delete from point, that we do not see the oplog - // truncate after point in FCV 3.6. - ASSERT(consistencyMarkers.getOplogTruncateAfterPoint(opCtx).isNull()); - ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), minValidTime); - - // Check that we can set the oplog truncate after point. - consistencyMarkers.setOplogTruncateAfterPoint(opCtx, time2); - ASSERT_EQ(consistencyMarkers.getOplogTruncateAfterPoint(opCtx), time2); -} } // namespace diff --git a/src/mongo/db/repl/rollback_test_fixture.cpp b/src/mongo/db/repl/rollback_test_fixture.cpp index ed38f2453b4..00f90b00b6b 100644 --- a/src/mongo/db/repl/rollback_test_fixture.cpp +++ b/src/mongo/db/repl/rollback_test_fixture.cpp @@ -43,7 +43,6 @@ #include "mongo/db/repl/replication_process.h" #include "mongo/db/repl/replication_recovery_mock.h" #include "mongo/db/repl/rs_rollback.h" -#include "mongo/db/repl/rs_rollback_no_uuid.h" #include "mongo/db/session_catalog.h" #include "mongo/logger/log_component.h" #include "mongo/logger/logger.h" @@ -222,18 +221,9 @@ RollbackResyncsCollectionOptionsTest::RollbackSourceWithCollectionOptions:: BSONObj collOptionsObj) : RollbackSourceMock(std::move(oplog)), collOptionsObj(collOptionsObj) {} - -StatusWith<BSONObj> -RollbackResyncsCollectionOptionsTest::RollbackSourceWithCollectionOptions::getCollectionInfo( - const NamespaceString& nss) const { - calledNoUUID = true; - return BSON("options" << collOptionsObj); -} - StatusWith<BSONObj> RollbackResyncsCollectionOptionsTest::RollbackSourceWithCollectionOptions::getCollectionInfoByUUID( const std::string& db, const UUID& uuid) const { - calledWithUUID = true; return BSON("options" << collOptionsObj << "info" << BSON("uuid" << uuid)); } @@ -268,29 +258,12 @@ void RollbackResyncsCollectionOptionsTest::resyncCollectionOptionsTest( std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation})), remoteCollOptionsObj); - if (coll->uuid()) { - ASSERT_OK( - syncRollback(_opCtx.get(), - OplogInterfaceMock({collectionModificationOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - - ASSERT_TRUE(rollbackSource.calledWithUUID); - ASSERT_FALSE(rollbackSource.calledNoUUID); - } else { - ASSERT_OK(syncRollbackNoUUID( - _opCtx.get(), - OplogInterfaceMock({collectionModificationOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - - ASSERT_TRUE(rollbackSource.calledNoUUID); - ASSERT_FALSE(rollbackSource.calledWithUUID); - } + ASSERT_OK(syncRollback(_opCtx.get(), + OplogInterfaceMock({collectionModificationOperation, commonOperation}), + rollbackSource, + {}, + _coordinator, + _replicationProcess.get())); // Make sure the collection options are correct. AutoGetCollectionForReadCommand autoColl(_opCtx.get(), NamespaceString(nss.toString())); diff --git a/src/mongo/db/repl/rollback_test_fixture.h b/src/mongo/db/repl/rollback_test_fixture.h index 29a301b8e1c..c90acf64068 100644 --- a/src/mongo/db/repl/rollback_test_fixture.h +++ b/src/mongo/db/repl/rollback_test_fixture.h @@ -184,12 +184,9 @@ class RollbackResyncsCollectionOptionsTest : public RollbackTest { RollbackSourceWithCollectionOptions(std::unique_ptr<OplogInterface> oplog, BSONObj collOptionsObj); - StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const override; StatusWith<BSONObj> getCollectionInfoByUUID(const std::string& db, const UUID& uuid) const override; - mutable bool calledNoUUID = false; - mutable bool calledWithUUID = false; BSONObj collOptionsObj; }; diff --git a/src/mongo/db/repl/rs_rollback_no_uuid.cpp b/src/mongo/db/repl/rs_rollback_no_uuid.cpp deleted file mode 100644 index 7b87b8dbc1c..00000000000 --- a/src/mongo/db/repl/rs_rollback_no_uuid.cpp +++ /dev/null @@ -1,1143 +0,0 @@ -/** -* @file rs_rollback_no_uuid.cpp -* -* Copyright (C) 2008-2017 MongoDB Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - -#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplicationRollback - -#include "mongo/platform/basic.h" - -#include "mongo/db/repl/rs_rollback_no_uuid.h" - -#include <algorithm> -#include <memory> - -#include "mongo/bson/bsonelement_comparator.h" -#include "mongo/bson/util/bson_extract.h" -#include "mongo/db/auth/authorization_manager.h" -#include "mongo/db/auth/authorization_manager_global.h" -#include "mongo/db/catalog/collection.h" -#include "mongo/db/catalog/collection_catalog_entry.h" -#include "mongo/db/catalog/document_validation.h" -#include "mongo/db/catalog/index_catalog.h" -#include "mongo/db/client.h" -#include "mongo/db/commands.h" -#include "mongo/db/concurrency/write_conflict_exception.h" -#include "mongo/db/db_raii.h" -#include "mongo/db/dbhelpers.h" -#include "mongo/db/exec/working_set_common.h" -#include "mongo/db/ops/delete.h" -#include "mongo/db/ops/update.h" -#include "mongo/db/ops/update_lifecycle_impl.h" -#include "mongo/db/ops/update_request.h" -#include "mongo/db/query/internal_plans.h" -#include "mongo/db/repl/bgsync.h" -#include "mongo/db/repl/oplog.h" -#include "mongo/db/repl/oplog_interface.h" -#include "mongo/db/repl/replication_coordinator.h" -#include "mongo/db/repl/replication_coordinator_impl.h" -#include "mongo/db/repl/replication_process.h" -#include "mongo/db/repl/roll_back_local_operations.h" -#include "mongo/db/repl/rollback_source.h" -#include "mongo/db/repl/rslog.h" -#include "mongo/db/s/shard_identity_rollback_notifier.h" -#include "mongo/util/exit.h" -#include "mongo/util/fail_point_service.h" -#include "mongo/util/log.h" -#include "mongo/util/scopeguard.h" - -namespace mongo { - -using std::shared_ptr; -using std::unique_ptr; -using std::endl; -using std::list; -using std::map; -using std::multimap; -using std::set; -using std::string; -using std::pair; - -namespace repl { - -using namespace rollback_internal_no_uuid; - -bool DocID::operator<(const DocID& other) const { - int comp = strcmp(ns, other.ns); - if (comp < 0) - return true; - if (comp > 0) - return false; - - const StringData::ComparatorInterface* stringComparator = nullptr; - BSONElementComparator eltCmp(BSONElementComparator::FieldNamesMode::kIgnore, stringComparator); - return eltCmp.evaluate(_id < other._id); -} - -bool DocID::operator==(const DocID& other) const { - // Since this is only used for tests, going with the simple impl that reuses operator< which is - // used in the real code. - return !(*this < other || other < *this); -} - -void FixUpInfo::removeAllDocsToRefetchFor(const std::string& collection) { - docsToRefetch.erase(docsToRefetch.lower_bound(DocID::minFor(collection.c_str())), - docsToRefetch.upper_bound(DocID::maxFor(collection.c_str()))); -} - -void FixUpInfo::removeRedundantOperations() { - // These loops and their bodies can be done in any order. The final result of the FixUpInfo - // members will be the same. - for (const auto& collection : collectionsToDrop) { - removeAllDocsToRefetchFor(collection); - indexesToDrop.erase(collection); - collectionsToResyncMetadata.erase(collection); - } - - for (const auto& collection : collectionsToResyncData) { - removeAllDocsToRefetchFor(collection); - indexesToDrop.erase(collection); - collectionsToResyncMetadata.erase(collection); - collectionsToDrop.erase(collection); - } -} - -void FixUpInfo::addIndexToDrop(const NamespaceString& nss, const DocID& doc) { - BSONObj obj = doc.ownedObj.getObjectField("o"); - if (!nss.isValid()) { - severe() << "Invalid collection namespace in createIndexes operation, document: " - << redact(doc.ownedObj); - throw RSFatalException( - str::stream() << "Invalid collection namespace in createIndexes operation, namespace: " - << doc.ns); - } - string indexName; - Status status = bsonExtractStringField(obj, "name", &indexName); - if (!status.isOK()) { - severe() << "Missing index name in createIndexes operation, document: " - << redact(doc.ownedObj); - throw RSFatalException("Missing index name in createIndexes operation."); - } - using ValueType = multimap<string, string>::value_type; - ValueType pairToInsert = std::make_pair(nss.ns(), indexName); - auto lowerBound = indexesToDrop.lower_bound(nss.ns()); - auto upperBound = indexesToDrop.upper_bound(nss.ns()); - auto matcher = [pairToInsert](const ValueType& pair) { return pair == pairToInsert; }; - if (!std::count_if(lowerBound, upperBound, matcher)) { - indexesToDrop.insert(pairToInsert); - } -} - -namespace { - -bool canRollBackCollMod(BSONObj obj) { - - // If there is only one field, then the collMod is a UUID upgrade/downgrade collMod. - if (obj.nFields() == 1) { - return true; - } - for (auto field : obj) { - // Example collMod obj - // o:{ - // collMod : "x", - // validationLevel : "off", - // index: { - // name: "indexName_1", - // expireAfterSeconds: 600 - // } - // } - - const auto modification = field.fieldNameStringData(); - if (modification == "collMod") { - continue; // Skips the command name. The first field in the obj will be the - // command name. - } - - if (modification == "validator" || modification == "validationAction" || - modification == "validationLevel" || modification == "usePowerOf2Sizes" || - modification == "noPadding") { - continue; - } - - // Some collMod fields cannot be rolled back, such as the index field. - return false; - } - return true; -} - -} // namespace - -Status rollback_internal_no_uuid::updateFixUpInfoFromLocalOplogEntry(FixUpInfo& fixUpInfo, - const BSONObj& ourObj) { - - const char* op = ourObj.getStringField("op"); - - if (*op == 'n') - return Status::OK(); - - // Checks that the oplog entry is smaller than 512 MB. We do not roll back if the - // oplog entry is larger than 512 MB. - if (ourObj.objsize() > 512 * 1024 * 1024) - throw RSFatalException(str::stream() << "Rollback too large, oplog size: " - << ourObj.objsize()); - - DocID doc; - doc.ownedObj = ourObj.getOwned(); - doc.ns = doc.ownedObj.getStringField("ns"); - - if (*doc.ns == '\0') { - throw RSFatalException(str::stream() << "Local op on rollback has no ns: " - << redact(doc.ownedObj)); - } - - BSONObj obj = doc.ownedObj.getObjectField(*op == 'u' ? "o2" : "o"); - - if (obj.isEmpty()) { - throw RSFatalException(str::stream() << "Local op on rollback has no object field: " - << redact(doc.ownedObj)); - } - - if (*op == 'c') { - - // The first element of the object is the name of the command - // and the collection it is acting on, e.x. {renameCollection: "test.x"}. - BSONElement first = obj.firstElement(); - - // Retrieves the namespace string. - NamespaceString nss(doc.ns); // foo.$cmd - - // Retrieves the command name, so out of {renameCollection: "test.x"} it returns - // "renameCollection". - string cmdname = first.fieldName(); - Command* cmd = CommandHelpers::findCommand(cmdname.c_str()); - if (cmd == NULL) { - severe() << "Rollback no such command " << first.fieldName(); - return Status(ErrorCodes::UnrecoverableRollbackError, - str::stream() << "Rollback no such command " << first.fieldName()); - } - if (cmdname == "create") { - // Create collection operation - // { - // ts: ..., - // h: ..., - // op: "c", - // ns: "foo.$cmd", - // o: { - // create: "abc", ... - // } - // ... - // } - - string ns = nss.db().toString() + '.' + first.valuestr(); // -> foo.abc - fixUpInfo.collectionsToDrop.insert(ns); - return Status::OK(); - } else if (cmdname == "createIndexes") { - // Create indexes operation - // { - // ts: ..., - // h: ..., - // op: "c", - // ns: "foo.$cmd", - // o: { - // createIndexes: "abc", - // v : 2, - // key : { - // z : 1 - // }, - // name : "z_1" - // } - // ... - // } - fixUpInfo.addIndexToDrop(NamespaceString(nss.db(), first.valuestr()), doc); - return Status::OK(); - } else if (cmdname == "drop") { - // Drop collection operation - // { - // ts: ..., - // h: ..., - // op: "c", - // ns: "foo.$cmd", - // o: { - // drop: "abc" - // } - // ... - // } - string ns = nss.db().toString() + '.' + first.valuestr(); // -> foo.abc - fixUpInfo.collectionsToResyncData.insert(ns); - return Status::OK(); - } else if (cmdname == "dropIndexes" || cmdname == "deleteIndexes") { - // TODO: This is bad. We simply full resync the collection here, - // which could be very slow. - warning() << "Rollback of dropIndexes is slow in this version of " - << "mongod."; - string ns = nss.db().toString() + '.' + first.valuestr(); - fixUpInfo.collectionsToResyncData.insert(ns); - return Status::OK(); - } else if (cmdname == "renameCollection") { - // TODO: Slow. - warning() << "Rollback of renameCollection is slow in this version of " - << "mongod."; - string from = first.valuestr(); - string to = obj["to"].String(); - fixUpInfo.collectionsToResyncData.insert(from); - fixUpInfo.collectionsToResyncData.insert(to); - return Status::OK(); - } else if (cmdname == "dropDatabase") { - string message = "Can't roll back drop database. Full resync will be required."; - severe() << message << redact(obj); - throw RSFatalException(message); - } else if (cmdname == "collMod") { - if (canRollBackCollMod(obj)) { - const auto ns = NamespaceString(cmd->parseNs(nss.db().toString(), obj)); - fixUpInfo.collectionsToResyncMetadata.insert(ns.ns()); - return Status::OK(); - } - string message = "Cannot roll back a collMod command: "; - severe() << message << redact(obj); - throw RSFatalException(message); - } else if (cmdname == "applyOps") { - - if (first.type() != Array) { - std::string message = str::stream() - << "Expected applyOps argument to be an array; found " << redact(first); - severe() << message; - return Status(ErrorCodes::UnrecoverableRollbackError, message); - } - for (const auto& subopElement : first.Array()) { - if (subopElement.type() != Object) { - std::string message = str::stream() - << "Expected applyOps operations to be of Object type, but found " - << redact(subopElement); - severe() << message; - return Status(ErrorCodes::UnrecoverableRollbackError, message); - } - // In applyOps, the object contains an array of different oplog entries, we call - // updateFixUpInfoFromLocalOplogEntry here in order to record the information - // needed for rollback that is contained within the applyOps, creating a nested - // call. - auto subStatus = updateFixUpInfoFromLocalOplogEntry(fixUpInfo, subopElement.Obj()); - if (!subStatus.isOK()) { - return subStatus; - } - } - return Status::OK(); - } else { - std::string message = str::stream() << "Can't roll back this command yet: " - << " cmdname = " << cmdname; - severe() << message << " document: " << redact(obj); - throw RSFatalException(message); - } - } - - NamespaceString nss(doc.ns); - if (nss.isSystemDotIndexes()) { - if (*op != 'i') { - std::string message = str::stream() << "Unexpected operation type '" << *op - << "' on system.indexes operation, " - << "document: "; - severe() << message << redact(doc.ownedObj); - throw RSFatalException(message); - } - string objNs; - auto status = bsonExtractStringField(obj, "ns", &objNs); - if (!status.isOK()) { - severe() << "Missing collection namespace in system.indexes operation, document: " - << redact(doc.ownedObj); - throw RSFatalException("Missing collection namespace in system.indexes operation."); - } - NamespaceString objNss(objNs); - fixUpInfo.addIndexToDrop(objNss, doc); - return Status::OK(); - } - - doc._id = obj["_id"]; - if (doc._id.eoo()) { - std::string message = str::stream() << "Cannot roll back op with no _id. ns: " << doc.ns; - severe() << message << ", document: " << redact(doc.ownedObj); - throw RSFatalException(message); - } - - fixUpInfo.docsToRefetch.insert(doc); - return Status::OK(); -} - -namespace { - -/** - * This must be called before making any changes to our local data and after fetching any - * information from the upstream node. If any information is fetched from the upstream node after we - * have written locally, the function must be called again. - */ -void checkRbidAndUpdateMinValid(OperationContext* opCtx, - const int rbid, - const RollbackSource& rollbackSource, - ReplicationProcess* replicationProcess) { - // It is important that the steps are performed in order to avoid racing with upstream - // rollbacks. - // 1. Gets the last doc in their oplog. - // 2. Gets their RBID and fail if it has changed. - // 3. Sets our minValid to the previously fetched OpTime of the top of their oplog. - const auto newMinValidDoc = rollbackSource.getLastOperation(); - if (newMinValidDoc.isEmpty()) { - uasserted(40361, "rollback error newest oplog entry on source is missing or empty"); - } - if (rbid != rollbackSource.getRollbackId()) { - // Our source rolled back so the data we received is not necessarily consistent. - uasserted(40365, "rollback rbid on source changed during rollback, canceling this attempt"); - } - - // We have items we are writing that aren't from a point-in-time. Thus, it is best not to come - // online until we get to that point in freshness. In other words, we do not transition from - // RECOVERING state to SECONDARY state until we have reached the minValid oplog entry. - - OpTime minValid = fassertStatusOK(28774, OpTime::parseFromOplogEntry(newMinValidDoc)); - log() << "Setting minvalid to " << minValid; - replicationProcess->getConsistencyMarkers()->clearAppliedThrough(opCtx, {}); - replicationProcess->getConsistencyMarkers()->setMinValid(opCtx, minValid); - - if (MONGO_FAIL_POINT(rollbackHangThenFailAfterWritingMinValid)) { - - // This log output is used in jstests so please leave it. - log() << "rollback - rollbackHangThenFailAfterWritingMinValid fail point " - "enabled. Blocking until fail point is disabled."; - while (MONGO_FAIL_POINT(rollbackHangThenFailAfterWritingMinValid)) { - invariant(!globalInShutdownDeprecated()); // It is an error to shutdown while enabled. - mongo::sleepsecs(1); - } - uasserted(40378, - "failing rollback due to rollbackHangThenFailAfterWritingMinValid fail point"); - } -} - -void syncFixUp(OperationContext* opCtx, - const FixUpInfo& fixUpInfo, - const RollbackSource& rollbackSource, - ReplicationCoordinator* replCoord, - ReplicationProcess* replicationProcess) { - unsigned long long totalSize = 0; - - // namespace -> doc id -> doc - map<string, map<DocID, BSONObj>> goodVersions; - - // Fetches all the goodVersions of each document from the current sync source. - unsigned long long numFetched = 0; - - log() << "Starting refetching documents"; - - for (auto&& doc : fixUpInfo.docsToRefetch) { - invariant(!doc._id.eoo()); // This is checked when we insert to the set. - - try { - LOG(2) << "Refetching document, namespace: " << doc.ns << ", _id: " << redact(doc._id); - // TODO : Slow. Lots of round trips. - numFetched++; - BSONObj good = rollbackSource.findOne(NamespaceString(doc.ns), doc._id.wrap()); - totalSize += good.objsize(); - - // Checks that the total amount of data that needs to be refetched is at most - // 300 MB. We do not roll back more than 300 MB of documents in order to - // prevent out of memory errors from too much data being stored. See SERVER-23392. - if (totalSize >= 300 * 1024 * 1024) { - throw RSFatalException("replSet too much data to roll back."); - } - - // Note good might be empty, indicating we should delete it. - goodVersions[doc.ns][doc] = good; - } catch (const DBException& ex) { - // If the collection turned into a view, we might get an error trying to - // refetch documents, but these errors should be ignored, as we'll be creating - // the view during oplog replay. - if (ex.code() == ErrorCodes::CommandNotSupportedOnView) - continue; - - log() << "Rollback couldn't re-get from ns: " << doc.ns << " _id: " << redact(doc._id) - << ' ' << numFetched << '/' << fixUpInfo.docsToRefetch.size() << ": " - << redact(ex); - throw; - } - } - - log() << "Finished refetching documents. Total size of documents refetched: " - << goodVersions.size(); - - log() << "Checking the RollbackID and updating the MinValid if necessary"; - - checkRbidAndUpdateMinValid(opCtx, fixUpInfo.rbid, rollbackSource, replicationProcess); - - invariant(!fixUpInfo.commonPointOurDiskloc.isNull()); - - // Full collection data and metadata resync. - if (!fixUpInfo.collectionsToResyncData.empty() || - !fixUpInfo.collectionsToResyncMetadata.empty()) { - - // Reloads the collection data from the sync source in order - // to roll back a drop/dropIndexes/renameCollection operation. - for (const string& ns : fixUpInfo.collectionsToResyncData) { - log() << "Resyncing collection, namespace: " << ns; - - invariant(!fixUpInfo.indexesToDrop.count(ns)); - invariant(!fixUpInfo.collectionsToResyncMetadata.count(ns)); - - const NamespaceString nss(ns); - - { - Lock::DBLock dbLock(opCtx, nss.db(), MODE_X); - Database* db = dbHolder().openDb(opCtx, nss.db().toString()); - invariant(db); - WriteUnitOfWork wunit(opCtx); - fassertStatusOK(40359, db->dropCollectionEvenIfSystem(opCtx, nss)); - wunit.commit(); - } - - rollbackSource.copyCollectionFromRemote(opCtx, nss); - } - - // Retrieves collections from the sync source in order to obtain - // the collection flags needed to roll back collMod operations. - for (const string& ns : fixUpInfo.collectionsToResyncMetadata) { - log() << "Resyncing collection metadata, namespace: " << ns; - - const NamespaceString nss(ns); - Lock::DBLock dbLock(opCtx, nss.db(), MODE_X); - auto db = dbHolder().openDb(opCtx, nss.db().toString()); - invariant(db); - auto collection = db->getCollection(opCtx, nss); - invariant(collection); - auto cce = collection->getCatalogEntry(); - - auto infoResult = rollbackSource.getCollectionInfo(nss); - - if (!infoResult.isOK()) { - // The collection was dropped by the sync source so we can't correctly change it - // here. If we get to the roll-forward phase, we will drop it then. If the drop - // is rolled back upstream and we restart, we expect to still have the - // collection. - - log() << ns << " not found on remote host, so we do not roll back collmod " - "operation. Instead, we will drop the collection soon."; - continue; - } - - auto info = infoResult.getValue(); - CollectionOptions options; - - // Updates the collection flags. - if (auto optionsField = info["options"]) { - if (optionsField.type() != Object) { - throw RSFatalException(str::stream() << "Failed to parse options " - << redact(info) - << ": expected 'options' to be an " - << "Object, got " - << typeName(optionsField.type())); - } - - auto status = options.parse(optionsField.Obj(), CollectionOptions::parseForCommand); - if (!status.isOK()) { - throw RSFatalException(str::stream() << "Failed to parse options " << info - << ": " - << status.toString()); - } - } else { - // Use default options. - } - - WriteUnitOfWork wuow(opCtx); - - // Set collection to whatever temp status is on the sync source. - cce->setIsTemp(opCtx, options.temp); - - // Resets collection user flags such as noPadding and usePowerOf2Sizes. - if (options.flagsSet || cce->getCollectionOptions(opCtx).flagsSet) { - cce->updateFlags(opCtx, options.flags); - } - - // Set any document validation options. We update the validator fields without - // parsing/validation, since we fetched the options object directly from the sync - // source, and we should set our validation options to match it exactly. - auto validatorStatus = collection->updateValidator( - opCtx, options.validator, options.validationLevel, options.validationAction); - if (!validatorStatus.isOK()) { - throw RSFatalException( - str::stream() << "Failed to update validator for " << nss.toString() << " with " - << redact(info) - << ". Got: " - << validatorStatus.toString()); - } - - OptionalCollectionUUID originalLocalUUID = collection->uuid(); - OptionalCollectionUUID remoteUUID = boost::none; - if (auto infoField = info["info"]) { - if (infoField.type() != Object) { - throw RSFatalException(str::stream() << "Failed to parse collection info " - << redact(info) - << ": expected 'info' to be an " - << "Object, got " - << typeName(infoField.type())); - } - auto infoFieldObj = infoField.Obj(); - if (infoFieldObj.hasField("uuid")) { - remoteUUID = boost::make_optional(UUID::parse(infoFieldObj)); - } - } - - // If the local collection has a UUID, it must match the remote UUID. If the sync source - // has no UUID or they do not match, we remove the local UUID and allow the 'collMod' - // operation on the sync source to add the UUID back. - if (originalLocalUUID) { - if (!remoteUUID) { - log() << "Removing UUID " << originalLocalUUID.get() << " from " << nss.ns() - << " because sync source had no UUID for namespace."; - cce->removeUUID(opCtx); - collection->refreshUUID(opCtx); - } else if (originalLocalUUID.get() != remoteUUID.get()) { - log() << "Removing UUID " << originalLocalUUID.get() << " from " << nss.ns() - << " because sync source had different UUID (" << remoteUUID.get() - << ") for collection."; - cce->removeUUID(opCtx); - collection->refreshUUID(opCtx); - } - } else if (remoteUUID && - (serverGlobalParams.featureCompatibility.getVersion() == - ServerGlobalParams::FeatureCompatibility::Version::kDowngradingTo34)) { - // If we are in the process of downgrading, and we have no UUID but the sync source - // does, then it is possible we will not see a 'collMod' during RECOVERING to add - // back in the UUID from the sync source. In that case, we add the sync source's - // UUID here. - invariant(!originalLocalUUID); - log() << "Assigning UUID " << remoteUUID.get() << " to " << nss.ns() - << " because we had no UUID, the sync source had a UUID, and we were in the " - "middle of downgrade."; - cce->addUUID(opCtx, remoteUUID.get(), collection); - collection->refreshUUID(opCtx); - } - - wuow.commit(); - - auto originalLocalUUIDString = - (originalLocalUUID) ? originalLocalUUID.get().toString() : "no UUID"; - auto remoteUuidString = (remoteUUID) ? remoteUUID.get().toString() : "no UUID"; - LOG(1) << "Resynced collection metadata for collection: " << nss - << ", original local UUID: " << originalLocalUUIDString - << ", remote UUID: " << remoteUuidString << ", with: " << redact(info) - << ", to: " << redact(cce->getCollectionOptions(opCtx).toBSON()); - } - - // Since we read from the sync source to retrieve the metadata of the - // collection, we must check if the sync source rolled back as well as update - // minValid if necessary. - log() << "Rechecking the Rollback ID and minValid"; - checkRbidAndUpdateMinValid(opCtx, fixUpInfo.rbid, rollbackSource, replicationProcess); - } - - log() << "Dropping collections to roll back create operations"; - - // Drops collections before updating individual documents. - for (set<string>::iterator it = fixUpInfo.collectionsToDrop.begin(); - it != fixUpInfo.collectionsToDrop.end(); - it++) { - log() << "Dropping collection: " << *it; - - invariant(!fixUpInfo.indexesToDrop.count(*it)); - - const NamespaceString nss(*it); - Lock::DBLock dbLock(opCtx, nss.db(), MODE_X); - Database* db = dbHolder().get(opCtx, nsToDatabaseSubstring(*it)); - if (db) { - Helpers::RemoveSaver removeSaver("rollback", "", *it); - - // Performs a collection scan and writes all documents in the collection to disk - // in order to keep an archive of items that were rolled back. - auto exec = InternalPlanner::collectionScan( - opCtx, *it, db->getCollection(opCtx, *it), PlanExecutor::YIELD_AUTO); - BSONObj curObj; - PlanExecutor::ExecState execState; - while (PlanExecutor::ADVANCED == (execState = exec->getNext(&curObj, NULL))) { - auto status = removeSaver.goingToDelete(curObj); - if (!status.isOK()) { - severe() << "Rolling back createCollection on " << *it - << " failed to write document to remove saver file: " - << redact(status); - throw RSFatalException( - "Rolling back createCollection. Failed to write document to remove saver " - "file."); - } - } - - // If we exited the above for loop with any other execState than IS_EOF, this means that - // a FAILURE or DEAD state was returned. If a DEAD state occurred, the collection or - // database that we are attempting to save may no longer be valid. If a FAILURE state - // was returned, either an unrecoverable error was thrown by exec, or we attempted to - // retrieve data that could not be provided by the PlanExecutor. In both of these cases - // it is necessary for a full resync of the server. - - if (execState != PlanExecutor::IS_EOF) { - if (execState == PlanExecutor::FAILURE && - WorkingSetCommon::isValidStatusMemberObject(curObj)) { - Status errorStatus = WorkingSetCommon::getMemberObjectStatus(curObj); - severe() << "Rolling back createCollection on " << *it << " failed with " - << redact(errorStatus) << ". A full resync is necessary."; - throw RSFatalException( - "Rolling back createCollection failed. A full resync is necessary."); - } else { - severe() << "Rolling back createCollection on " << *it - << " failed. A full resync is necessary."; - throw RSFatalException( - "Rolling back createCollection failed. A full resync is necessary."); - } - } - - WriteUnitOfWork wunit(opCtx); - fassertStatusOK(40360, db->dropCollectionEvenIfSystem(opCtx, nss)); - wunit.commit(); - } - } - - // Drops indexes. - for (auto it = fixUpInfo.indexesToDrop.begin(); it != fixUpInfo.indexesToDrop.end(); it++) { - const NamespaceString nss(it->first); - const string& indexName = it->second; - log() << "Dropping index: collection = " << nss.toString() << ". index = " << indexName; - - Lock::DBLock dbLock(opCtx, nss.db(), MODE_X); - auto db = dbHolder().get(opCtx, nss.db()); - - // If the db is null, we skip over dropping the index. - if (!db) { - continue; - } - auto collection = db->getCollection(opCtx, nss); - - // If the collection is null, we skip over dropping the index. - if (!collection) { - continue; - } - auto indexCatalog = collection->getIndexCatalog(); - if (!indexCatalog) { - continue; - } - bool includeUnfinishedIndexes = false; - auto indexDescriptor = - indexCatalog->findIndexByName(opCtx, indexName, includeUnfinishedIndexes); - if (!indexDescriptor) { - warning() << "Rollback failed to drop index " << indexName << " in " << nss.toString() - << ": index not found."; - continue; - } - WriteUnitOfWork wunit(opCtx); - auto status = indexCatalog->dropIndex(opCtx, indexDescriptor); - if (!status.isOK()) { - severe() << "Rollback failed to drop index " << indexName << " in " << nss.toString() - << ": " << redact(status); - throw RSFatalException(str::stream() << "Rollback failed to drop index " << indexName - << " in " - << nss.toString()); - } - wunit.commit(); - } - - log() << "Deleting and updating documents to roll back insert, update and remove " - "operations"; - unsigned deletes = 0, updates = 0; - time_t lastProgressUpdate = time(0); - time_t progressUpdateGap = 10; - - for (const auto& nsAndGoodVersionsByDocID : goodVersions) { - - // Keeps an archive of items rolled back if the collection has not been dropped - // while rolling back createCollection operations. - const auto& ns = nsAndGoodVersionsByDocID.first; - unique_ptr<Helpers::RemoveSaver> removeSaver; - invariant(!fixUpInfo.collectionsToDrop.count(ns)); - removeSaver.reset(new Helpers::RemoveSaver("rollback", "", ns)); - - const auto& goodVersionsByDocID = nsAndGoodVersionsByDocID.second; - for (const auto& idAndDoc : goodVersionsByDocID) { - time_t now = time(0); - if (now - lastProgressUpdate > progressUpdateGap) { - log() << deletes << " delete and " << updates - << " update operations processed out of " << goodVersions.size() - << " total operations."; - lastProgressUpdate = now; - } - const DocID& doc = idAndDoc.first; - BSONObj pattern = doc._id.wrap(); // { _id : ... } - try { - verify(doc.ns && *doc.ns); - invariant(!fixUpInfo.collectionsToResyncData.count(doc.ns)); - - // TODO: Lots of overhead in context. This can be faster. - const NamespaceString docNss(doc.ns); - Lock::DBLock docDbLock(opCtx, docNss.db(), MODE_X); - OldClientContext ctx(opCtx, doc.ns); - - Collection* collection = ctx.db()->getCollection(opCtx, docNss); - - // Adds the doc to our rollback file if the collection was not dropped while - // rolling back createCollection operations. Does not log an error when - // undoing an insert on a no longer existing collection. It is likely that - // the collection was dropped as part of rolling back a createCollection - // command and the document no longer exists. - - if (collection && removeSaver) { - BSONObj obj; - bool found = Helpers::findOne(opCtx, collection, pattern, obj, false); - if (found) { - auto status = removeSaver->goingToDelete(obj); - if (!status.isOK()) { - severe() << "Rollback cannot write document in namespace " << doc.ns - << " to archive file: " << redact(status); - throw RSFatalException(str::stream() - << "Rollback cannot write document in namespace " - << doc.ns - << " to archive file."); - } - } else { - error() << "Rollback cannot find object: " << pattern << " in namespace " - << doc.ns; - } - } - - if (idAndDoc.second.isEmpty()) { - // If the document could not be found on the primary, deletes the document. - // TODO 1.6 : can't delete from a capped collection. Need to handle that - // here. - deletes++; - - if (collection) { - if (collection->isCapped()) { - // Can't delete from a capped collection - so we truncate instead. - // if this item must go, so must all successors. - - try { - // TODO: IIRC cappedTruncateAfter does not handle completely - // empty. This will be slow if there is no _id index in - // the collection. - - const auto clock = opCtx->getServiceContext()->getFastClockSource(); - const auto findOneStart = clock->now(); - RecordId loc = Helpers::findOne(opCtx, collection, pattern, false); - if (clock->now() - findOneStart > Milliseconds(200)) - warning() << "Roll back slow no _id index for " << doc.ns - << " perhaps?"; - // Would be faster but requires index: - // RecordId loc = Helpers::findById(nsd, pattern); - if (!loc.isNull()) { - try { - collection->cappedTruncateAfter(opCtx, loc, true); - } catch (const DBException& e) { - if (e.code() == 13415) { - // hack: need to just make cappedTruncate do this... - writeConflictRetry( - opCtx, "truncate", collection->ns().ns(), [&] { - WriteUnitOfWork wunit(opCtx); - uassertStatusOK(collection->truncate(opCtx)); - wunit.commit(); - }); - } else { - throw; - } - } - } - } catch (const DBException& e) { - // Replicated capped collections have many ways to become - // inconsistent. We rely on age-out to make these problems go away - // eventually. - - warning() << "Ignoring failure to roll back change to capped " - << "collection " << doc.ns << " with _id " - << redact(idAndDoc.first._id.toString( - /*includeFieldName*/ false)) - << ": " << redact(e); - } - } else { - deleteObjects(opCtx, - collection, - docNss, - pattern, - true, // justOne - true); // god - } - } - } else { - // TODO faster... - updates++; - - UpdateRequest request(docNss); - - request.setQuery(pattern); - request.setUpdates(idAndDoc.second); - request.setGod(); - request.setUpsert(); - UpdateLifecycleImpl updateLifecycle(docNss); - request.setLifecycle(&updateLifecycle); - - update(opCtx, ctx.db(), request); - } - } catch (const DBException& e) { - log() << "Exception in rollback ns:" << doc.ns << ' ' << pattern.toString() << ' ' - << redact(e) << " ndeletes:" << deletes; - throw; - } - } - } - - log() << "Rollback deleted " << deletes << " documents and updated " << updates - << " documents."; - - log() << "Truncating the oplog at " << fixUpInfo.commonPoint.toString(); - - // Cleans up the oplog. - { - const NamespaceString oplogNss(NamespaceString::kRsOplogNamespace); - Lock::DBLock oplogDbLock(opCtx, oplogNss.db(), MODE_IX); - Lock::CollectionLock oplogCollectionLoc(opCtx->lockState(), oplogNss.ns(), MODE_X); - OldClientContext ctx(opCtx, oplogNss.ns()); - Collection* oplogCollection = ctx.db()->getCollection(opCtx, oplogNss); - if (!oplogCollection) { - fassertFailedWithStatusNoTrace( - 13423, - Status(ErrorCodes::UnrecoverableRollbackError, - str::stream() << "Can't find " << NamespaceString::kRsOplogNamespace.ns())); - } - // TODO: fatal error if this throws? - oplogCollection->cappedTruncateAfter(opCtx, fixUpInfo.commonPointOurDiskloc, false); - } - - Status status = getGlobalAuthorizationManager()->initialize(opCtx); - if (!status.isOK()) { - severe() << "Failed to reinitialize auth data after rollback: " << redact(status); - fassertFailedNoTrace(40366); - } - - // Reload the lastAppliedOpTime and lastDurableOpTime value in the replcoord and the - // lastAppliedHash value in bgsync to reflect our new last op. The rollback common point does - // not necessarily represent a consistent database state. For example, on a secondary, we may - // have rolled back to an optime that fell in the middle of an oplog application batch. We make - // the database consistent again after rollback by applying ops forward until we reach - // 'minValid'. - replCoord->resetLastOpTimesFromOplog(opCtx, - ReplicationCoordinator::DataConsistency::Inconsistent); -} - -Status _syncRollback(OperationContext* opCtx, - const OplogInterface& localOplog, - const RollbackSource& rollbackSource, - int requiredRBID, - ReplicationCoordinator* replCoord, - ReplicationProcess* replicationProcess) { - invariant(!opCtx->lockState()->isLocked()); - - FixUpInfo how; - log() << "Starting rollback. Sync source: " << rollbackSource.getSource() << rsLog; - how.rbid = rollbackSource.getRollbackId(); - uassert( - 40362, "Upstream node rolled back. Need to retry our rollback.", how.rbid == requiredRBID); - - log() << "Finding the Common Point"; - try { - - auto processOperationForFixUp = [&how](const BSONObj& operation) { - return updateFixUpInfoFromLocalOplogEntry(how, operation); - }; - - // Calls syncRollBackLocalOperations to run updateFixUpInfoFromLocalOplogEntry - // on each oplog entry up until the common point. - auto res = syncRollBackLocalOperations( - localOplog, rollbackSource.getOplog(), processOperationForFixUp); - if (!res.isOK()) { - const auto status = res.getStatus(); - switch (status.code()) { - case ErrorCodes::OplogStartMissing: - case ErrorCodes::UnrecoverableRollbackError: - return status; - default: - throw RSFatalException(status.toString()); - } - } - - how.commonPoint = res.getValue().first; // OpTime - how.commonPointOurDiskloc = res.getValue().second; // RecordID - how.removeRedundantOperations(); - } catch (const RSFatalException& e) { - return Status(ErrorCodes::UnrecoverableRollbackError, - str::stream() - << "need to rollback, but unable to determine common point between" - " local and remote oplog: " - << e.what()); - } - - OpTime commonPoint = how.commonPoint; - OpTime lastCommittedOpTime = replCoord->getLastCommittedOpTime(); - OpTime committedSnapshot = replCoord->getCurrentCommittedSnapshotOpTime(); - - log() << "Rollback common point is " << how.commonPoint; - - // Rollback common point should be >= the replication commit point. - invariant(!replCoord->isV1ElectionProtocol() || - commonPoint.getTimestamp() >= lastCommittedOpTime.getTimestamp()); - invariant(!replCoord->isV1ElectionProtocol() || commonPoint >= lastCommittedOpTime); - - // Rollback common point should be >= the committed snapshot optime. - invariant(commonPoint.getTimestamp() >= committedSnapshot.getTimestamp()); - invariant(commonPoint >= committedSnapshot); - - try { - ON_BLOCK_EXIT([&] { - auto status = replicationProcess->incrementRollbackID(opCtx); - fassertStatusOK(40425, status); - }); - syncFixUp(opCtx, how, rollbackSource, replCoord, replicationProcess); - } catch (const RSFatalException& e) { - return Status(ErrorCodes::UnrecoverableRollbackError, e.what()); - } - - if (MONGO_FAIL_POINT(rollbackHangBeforeFinish)) { - // This log output is used in js tests so please leave it. - log() << "rollback - rollbackHangBeforeFinish fail point " - "enabled. Blocking until fail point is disabled."; - while (MONGO_FAIL_POINT(rollbackHangBeforeFinish)) { - invariant(!globalInShutdownDeprecated()); // It is an error to shutdown while enabled. - mongo::sleepsecs(1); - } - } - - return Status::OK(); -} - -} // namespace - -Status syncRollbackNoUUID(OperationContext* opCtx, - const OplogInterface& localOplog, - const RollbackSource& rollbackSource, - int requiredRBID, - ReplicationCoordinator* replCoord, - ReplicationProcess* replicationProcess) { - invariant(opCtx); - invariant(replCoord); - - DisableDocumentValidation validationDisabler(opCtx); - UnreplicatedWritesBlock replicationDisabler(opCtx); - Status status = _syncRollback( - opCtx, localOplog, rollbackSource, requiredRBID, replCoord, replicationProcess); - - log() << "Rollback finished. The final minValid is: " - << replicationProcess->getConsistencyMarkers()->getMinValid(opCtx) << rsLog; - - return status; -} - -void rollbackNoUUID(OperationContext* opCtx, - const OplogInterface& localOplog, - const RollbackSource& rollbackSource, - int requiredRBID, - ReplicationCoordinator* replCoord, - ReplicationProcess* replicationProcess, - stdx::function<void(int)> sleepSecsFn) { - // Set state to ROLLBACK while we are in this function. This prevents serving reads, even from - // the oplog. This can fail if we are elected PRIMARY, in which case we better not do any - // rolling back. If we successfully enter ROLLBACK we will only exit this function fatally or - // after transitioning to RECOVERING. We always transition to RECOVERING regardless of success - // or (recoverable) failure since we may be in an inconsistent state. If rollback failed before - // writing anything, SyncTail will quickly take us to SECONDARY since are are still at our - // original MinValid, which is fine because we may choose a sync source that doesn't require - // rollback. If it failed after we wrote to MinValid, then we will pick a sync source that will - // cause us to roll back to the same common point, which is fine. If we succeeded, we will be - // consistent as soon as we apply up to/through MinValid and SyncTail will make us SECONDARY - // then. - - { - Lock::GlobalWrite globalWrite(opCtx); - auto status = replCoord->setFollowerMode(MemberState::RS_ROLLBACK); - if (!status.isOK()) { - log() << "Cannot transition from " << replCoord->getMemberState().toString() << " to " - << MemberState(MemberState::RS_ROLLBACK).toString() << causedBy(status); - return; - } - } - - try { - auto status = syncRollbackNoUUID( - opCtx, localOplog, rollbackSource, requiredRBID, replCoord, replicationProcess); - - // Aborts only when syncRollback detects we are in a unrecoverable state. - // WARNING: these statuses sometimes have location codes which are lost with uassertStatusOK - // so we need to check here first. - if (ErrorCodes::UnrecoverableRollbackError == status.code()) { - severe() << "Unable to complete rollback. A full resync may be needed: " - << redact(status); - fassertFailedNoTrace(28723); - } - - // In other cases, we log the message contained in the error status and retry later. - uassertStatusOK(status); - } catch (const DBException& ex) { - // UnrecoverableRollbackError should only come from a returned status which is handled - // above. - invariant(ex.code() != ErrorCodes::UnrecoverableRollbackError); - - warning() << "Rollback cannot complete at this time (retrying later): " << redact(ex) - << " appliedThrough= " << replCoord->getMyLastAppliedOpTime() << " minvalid= " - << replicationProcess->getConsistencyMarkers()->getMinValid(opCtx); - - // Sleep a bit to allow upstream node to coalesce, if that was the cause of the failure. If - // we failed in a way that will keep failing, but wasn't flagged as a fatal failure, this - // will also prevent us from hot-looping and putting too much load on upstream nodes. - sleepSecsFn(5); // 5 seconds was chosen as a completely arbitrary amount of time. - } catch (...) { - std::terminate(); - } - - // At this point we are about to leave rollback. Before we do, wait for any writes done - // as part of rollback to be durable, and then do any necessary checks that we didn't - // wind up rolling back something illegal. We must wait for the rollback to be durable - // so that if we wind up shutting down uncleanly in response to something we rolled back - // we know that we won't wind up right back in the same situation when we start back up - // because the rollback wasn't durable. - opCtx->recoveryUnit()->waitUntilDurable(); - - // If we detected that we rolled back the shardIdentity document as part of this rollback - // then we must shut down to clear the in-memory ShardingState associated with the - // shardIdentity document. - if (ShardIdentityRollbackNotifier::get(opCtx)->didRollbackHappen()) { - severe() << "shardIdentity document rollback detected. Shutting down to clear " - "in-memory sharding state. Restarting this process should safely return it " - "to a healthy state"; - fassertFailedNoTrace(40276); - } - - auto status = replCoord->setFollowerMode(MemberState::RS_RECOVERING); - if (!status.isOK()) { - severe() << "Failed to transition into " << MemberState(MemberState::RS_RECOVERING) - << "; expected to be in state " << MemberState(MemberState::RS_ROLLBACK) - << "; found self in " << replCoord->getMemberState() << causedBy(status); - fassertFailedNoTrace(40364); - } -} - -} // namespace repl -} // namespace mongo diff --git a/src/mongo/db/repl/rs_rollback_no_uuid.h b/src/mongo/db/repl/rs_rollback_no_uuid.h deleted file mode 100644 index c830286dc11..00000000000 --- a/src/mongo/db/repl/rs_rollback_no_uuid.h +++ /dev/null @@ -1,303 +0,0 @@ -/** - * Copyright (C) 2017 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/base/status.h" -#include "mongo/db/jsobj.h" -#include "mongo/db/record_id.h" -#include "mongo/db/repl/optime.h" -#include "mongo/stdx/functional.h" -#include "mongo/util/time_support.h" - -/** - * This rollback algorithm does not make use of the UUID oplog field that was implemented in 3.6. - * It is only used to maintain 3.4 compatibility and should not be updated in the future. - * After MongoDB 3.8 is released, this file along with rs_rollback_no_uuid.cpp and - * rs_rollback_no_uuid_test.cpp can be removed as we will no longer support functionality for - * oplog entries that do not include UUID's in 3.8. See SERVER-29766. - * - * Rollback Overview: - * - * Rollback occurs when a node's oplog diverges from its sync source's oplog and needs to regain - * consistency with the sync source's oplog. - * - * R and S are defined below to represent two nodes involved in rollback. - * - * R = The node whose oplog has diverged from its sync source and is rolling back. - * S = The sync source of node R. - * - * The rollback algorithm is designed to keep S's data and to make node R consistent with node S. - * One could argue here that keeping R's data has some merits, however, in most - * cases S will have significantly more data. Also note that S may have a proper subset of R's - * stream if there were no subsequent writes. Our goal is to get R back in sync with S. - * - * A visualization of what happens in the oplogs of the sync source and node that is rolling back - * is shown below. On the left side of each example are the oplog entries of the nodes before - * rollback occurs and on the right are the oplog entries of the node after rollback occurs. - * During rollback only the oplog entries of R change. - * - * #1: Status of R after operations e, f, and g are rolled back to the common point [d]. - * Since there were no other writes to node S after [d], we do not need to apply - * more writes to node R after rolling back. - * - * R : a b c d e f g -> a b c d - * S : a b c d - * - * #2: In this case, we first roll back to [d], and since S has written q to z oplog entries, - * we need to replay these oplog operations onto R after it has rolled back to the common - * point. - * - * R : a b c d e f g -> a b c d q r s t u v w x z - * S : a b c d q r s t u v w x z - * - * Rollback Algorithm: - * - * We will continue to use the notation of R as the node whose oplog is inconsistent with - * its sync source and S as the sync source of R. We will also represent the common point - * as point C. - * - * 1. Increment rollback ID of node R. - * 2. Find the most recent common oplog entry, which we will say is point C. In the above - * example, the common point was oplog entry 'd'. - * 3. Undo all the oplog entries that occurred after point C on the node R. - * a. Consider how to revert the oplog entries (i.e. for a create collection, drops the - * collection) and place that information into a FixUpInfo struct. - * b. Cancel out unnecessary operations (i.e. If dropping a collection, there is no need - * to do dropIndex if the index is within the collection that will eventually be - * dropped). - * c. Undo all operations done on node R until the point. We attempt to revert all data - * and metadata until point C. However, if we need to refetch data from the sync - * source, the data on node R will not be completely consistent with what it was - * previously at point C, as some of the data may have been modified by the sync - * source after the common point. - * i. Refetch any documents from node S that are needed for the - * rollback. - * ii. Find minValid, which is the last OpTime of node S. - * i.e. the last oplog entry of node S at the time that rollback occurs. - * iii. Resync collection data and metadata. - * iv. Update minValid if necessary, as more fetching may have occurred from - * the sync source requiring that minValid is updated to an even later - * point. - * v. Drop all collections that were created after point C. - * vi. Drop all indexes that were created after point C. - * vii. Delete, update and insert necessary documents that were modified after - * point C. - * viii. Truncate the oplog to point C. - * 4. After rolling back to point C, node R transitions from ROLLBACK to RECOVERING mode. - * - * Steps 5 and 6 occur in ordinary replication code and are not done in this file. - * - * 5. Retrieve the oplog entries from node S until reaching the minValid oplog entry. - * a. Fetch the oplog entries from node S. - * b. Apply the oplog entries of node S to node R starting from point C up until - * the minValid - * 6. Transition node R from RECOVERING to SECONDARY state. - */ - -namespace mongo { - -class DBClientConnection; -class NamespaceString; -class OperationContext; - -namespace repl { - -class OplogInterface; -class ReplicationCoordinator; -class ReplicationProcess; -class RollbackSource; - -/** - * Entry point to rollback process. - * Set state to ROLLBACK while we are in this function. This prevents serving reads, even from - * the oplog. This can fail if we are elected PRIMARY, in which case we better not do any - * rolling back. If we successfully enter ROLLBACK, we will only exit this function fatally or - * after transition to RECOVERING. - * - * 'sleepSecsFn' is an optional testing-only argument for overriding mongo::sleepsecs(). - */ - -void rollbackNoUUID(OperationContext* opCtx, - const OplogInterface& localOplog, - const RollbackSource& rollbackSource, - int requiredRBID, - ReplicationCoordinator* replCoord, - ReplicationProcess* replicationProcess, - stdx::function<void(int)> sleepSecsFn = [](int secs) { sleepsecs(secs); }); - -/** - * Initiates the rollback process after transition to ROLLBACK. - * This function assumes the preconditions for undertaking rollback have already been met; - * we have ops in our oplog that our sync source does not have, and we are not currently - * PRIMARY. - * - * This function can throw exceptions on failures. - * This function runs a command on the sync source to detect if the sync source rolls back - * while our rollback is in progress. - * - * @param opCtx: Used to read and write from this node's databases. - * @param localOplog: reads the oplog on this server. - * @param rollbackSource: Interface for sync source. Provides the oplog and - * supports fetching documents and copying collections. - * @param requiredRBID: Rollback ID we are required to have throughout rollback. - * @param replCoord: Used to track the rollback ID and to change the follower state. - * @param replicationProcess: Used to update minValid. - * - * If requiredRBID is supplied, we error if the upstream node has a different RBID (i.e. it rolled - * back) after fetching any information from it. - * - * Failures: If a Status with code UnrecoverableRollbackError is returned, the caller must exit - * fatally. All other errors should be considered recoverable regardless of whether reported as a - * status or exception. - */ -Status syncRollbackNoUUID(OperationContext* opCtx, - const OplogInterface& localOplog, - const RollbackSource& rollbackSource, - int requiredRBID, - ReplicationCoordinator* replCoord, - ReplicationProcess* replicationProcess); - - -/* -Rollback function flowchart: - -1. rollbackNoUUID() called. - a. syncRollbackNoUUID() called by rollbackNoUUID(). - i. _syncRollback() called by syncRollbackNoUUID(). - I. syncRollbackLocalOperations() called by _syncRollback(). - A. processOperationFixUp called by syncRollbackLocalOperations(). - 1. updateFixUpInfoFromLocalOplogEntry called by - processOperationFixUp(). - II. removeRedundantOperations() called by _syncRollback(). - III. syncFixUp() called by _syncRollback(). - 1. Retrieves documents to refetch. - 2. Checks the rollback ID and updates minValid. - 3. Resyncs collection data and metadata. - 4. Checks the rollbackID and updates minValid. - 5. Drops collections. - 6. Drops indexes. - 7. Deletes, updates and inserts individual oplogs. - 8. Truncates the oplog. - IV. Returns back to syncRollbackNoUUID(). - ii. Returns back to rollbackNoUUID(). - b. Rollback ends. -*/ - - -/** - * This namespace contains internal details of the rollback system. It is only exposed in a header - * for unit testing. Nothing here should be used outside of rs_rollback.cpp or its unit test. - */ -namespace rollback_internal_no_uuid { - -struct DocID { - BSONObj ownedObj; - const char* ns; - BSONElement _id; - bool operator<(const DocID& other) const; - bool operator==(const DocID& other) const; - - static DocID minFor(const char* ns) { - auto obj = BSON("" << MINKEY); - return {obj, ns, obj.firstElement()}; - } - - static DocID maxFor(const char* ns) { - auto obj = BSON("" << MAXKEY); - return {obj, ns, obj.firstElement()}; - } -}; - -struct FixUpInfo { - // Note this is a set -- if there are many $inc's on a single document we need to roll back, - // we only need to refetch it once. - std::set<DocID> docsToRefetch; - - // Key is collection namespace. Value is name of index to drop. - std::multimap<std::string, std::string> indexesToDrop; - - // Namespaces of collections that need to be dropped or resynced from the sync source. - std::set<std::string> collectionsToDrop; - std::set<std::string> collectionsToResyncData; - std::set<std::string> collectionsToResyncMetadata; - - OpTime commonPoint; - RecordId commonPointOurDiskloc; - - /** - * Remote server's current rollback id. Keeping track of this - * allows us to determine if the sync source has rolled back, in which case - * we can terminate the rollback of the local node, as we cannot - * roll back against a sync source that is also rolled back. - */ - int rbid; - - /** - * Removes all documents in the docsToRefetch set that are in - * the collection passed into the function. - */ - void removeAllDocsToRefetchFor(const std::string& collection); - - /** - * Removes any redundant operations that may have happened during - * the period of time that the rolling back node was out of sync - * with its sync source. For example, if a collection is dropped, there is - * no need to also drop the indexes that are part of the collection. This - * function removes any operations that were recorded that are unnecessary - * because the collection that the operation is part of is either going - * to be dropped, or fully resynced. - */ - void removeRedundantOperations(); - - void addIndexToDrop(const NamespaceString& nss, const DocID& doc); -}; - -// Indicates that rollback cannot complete and the server must abort. -class RSFatalException : public std::exception { -public: - RSFatalException(std::string m = "replica set fatal exception") : msg(m) {} - virtual const char* what() const throw() { - return msg.c_str(); - } - -private: - std::string msg; -}; - -/** - * This function goes through a single oplog document of the node and records the necessary - * information in order to undo the given oplog entry. The data is placed into a FixUpInfo - * struct that holds all the necessary information to undo all of the oplog entries of the - * rolling back node from after the common point. "ourObj" is the oplog document that needs - * to be reverted. - */ -Status updateFixUpInfoFromLocalOplogEntry(FixUpInfo& fixUpInfo, const BSONObj& ourObj); -} // namespace rollback_internal -} // namespace repl -} // namespace mongo diff --git a/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp b/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp deleted file mode 100644 index 07b767548eb..00000000000 --- a/src/mongo/db/repl/rs_rollback_no_uuid_test.cpp +++ /dev/null @@ -1,1576 +0,0 @@ -/** - * Copyright 2015 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault - -#include "mongo/platform/basic.h" - -#include <initializer_list> -#include <utility> - -#include "mongo/db/catalog/collection.h" -#include "mongo/db/catalog/collection_catalog_entry.h" -#include "mongo/db/catalog/drop_indexes.h" -#include "mongo/db/catalog/index_catalog.h" -#include "mongo/db/catalog/index_create.h" -#include "mongo/db/client.h" -#include "mongo/db/concurrency/d_concurrency.h" -#include "mongo/db/db_raii.h" -#include "mongo/db/dbhelpers.h" -#include "mongo/db/index/index_descriptor.h" -#include "mongo/db/jsobj.h" -#include "mongo/db/namespace_string.h" -#include "mongo/db/repl/oplog.h" -#include "mongo/db/repl/oplog_interface.h" -#include "mongo/db/repl/oplog_interface_mock.h" -#include "mongo/db/repl/rollback_source.h" -#include "mongo/db/repl/rollback_test_fixture.h" -#include "mongo/db/repl/rs_rollback_no_uuid.h" -#include "mongo/db/s/shard_identity_rollback_notifier.h" -#include "mongo/stdx/memory.h" -#include "mongo/unittest/death_test.h" -#include "mongo/unittest/unittest.h" -#include "mongo/util/net/hostandport.h" - -namespace { - -using namespace mongo; -using namespace mongo::repl; -using namespace mongo::repl::rollback_internal_no_uuid; - -const auto kIndexVersion = IndexDescriptor::IndexVersion::kV2; - -class RSRollbackTest : public RollbackTest { -private: - void setUp() override; - void tearDown() override; -}; - -void RSRollbackTest::setUp() { - RollbackTest::setUp(); -} - -void RSRollbackTest::tearDown() { - RollbackTest::tearDown(); -} - -OplogInterfaceMock::Operation makeNoopOplogEntryAndRecordId(Seconds seconds) { - OpTime ts(Timestamp(seconds, 0), 0); - return std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId(1)); -} - -TEST_F(RSRollbackTest, InconsistentMinValid) { - _replicationProcess->getConsistencyMarkers()->setAppliedThrough( - _opCtx.get(), OpTime(Timestamp(Seconds(0), 0), 0)); - _replicationProcess->getConsistencyMarkers()->setMinValid(_opCtx.get(), - OpTime(Timestamp(Seconds(1), 0), 0)); - auto status = syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock(), - RollbackSourceMock(stdx::make_unique<OplogInterfaceMock>()), - {}, - _coordinator, - _replicationProcess.get()); - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code()); - ASSERT_STRING_CONTAINS(status.reason(), "unable to determine common point"); -} - -TEST_F(RSRollbackTest, OplogStartMissing) { - OpTime ts(Timestamp(Seconds(1), 0), 0); - auto operation = - std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId()); - OplogInterfaceMock::Operations remoteOperations({operation}); - auto remoteOplog = stdx::make_unique<OplogInterfaceMock>(remoteOperations); - ASSERT_EQUALS(ErrorCodes::OplogStartMissing, - syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock(), - RollbackSourceMock(std::move(remoteOplog)), - {}, - _coordinator, - _replicationProcess.get()) - .code()); -} - -TEST_F(RSRollbackTest, NoRemoteOpLog) { - OpTime ts(Timestamp(Seconds(1), 0), 0); - auto operation = - std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId()); - auto status = syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({operation}), - RollbackSourceMock(stdx::make_unique<OplogInterfaceMock>()), - {}, - _coordinator, - _replicationProcess.get()); - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code()); - ASSERT_STRING_CONTAINS(status.reason(), "unable to determine common point"); -} - -TEST_F(RSRollbackTest, RemoteGetRollbackIdThrows) { - OpTime ts(Timestamp(Seconds(1), 0), 0); - auto operation = - std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId()); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)) {} - int getRollbackId() const override { - uassert(ErrorCodes::UnknownError, "getRollbackId() failed", false); - } - }; - ASSERT_THROWS_CODE( - syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({operation}), - RollbackSourceLocal(stdx::make_unique<OplogInterfaceMock>()), - {}, - _coordinator, - _replicationProcess.get()) - .transitional_ignore(), - AssertionException, - ErrorCodes::UnknownError); -} - -TEST_F(RSRollbackTest, RemoteGetRollbackIdDiffersFromRequiredRBID) { - OpTime ts(Timestamp(Seconds(1), 0), 0); - auto operation = - std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId()); - - class RollbackSourceLocal : public RollbackSourceMock { - public: - using RollbackSourceMock::RollbackSourceMock; - int getRollbackId() const override { - return 2; - } - }; - - ASSERT_THROWS_CODE( - syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({operation}), - RollbackSourceLocal(stdx::make_unique<OplogInterfaceMock>()), - 1, - _coordinator, - _replicationProcess.get()) - .transitional_ignore(), - AssertionException, - ErrorCodes::duplicateCodeForTest(40362)); -} - -TEST_F(RSRollbackTest, BothOplogsAtCommonPoint) { - createOplog(_opCtx.get()); - OpTime ts(Timestamp(Seconds(1), 0), 1); - auto operation = - std::make_pair(BSON("ts" << ts.getTimestamp() << "h" << ts.getTerm()), RecordId(1)); - ASSERT_OK(syncRollbackNoUUID( - _opCtx.get(), - OplogInterfaceMock({operation}), - RollbackSourceMock(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - operation, - }))), - {}, - _coordinator, - _replicationProcess.get())); -} - -/** - * Test function to roll back a delete operation. - * Returns number of records in collection after rolling back delete operation. - * If collection does not exist after rolling back, returns -1. - */ -int _testRollbackDelete(OperationContext* opCtx, - ReplicationCoordinator* coordinator, - ReplicationProcess* replicationProcess, - const BSONObj& documentAtSource) { - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto deleteOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "d" - << "ns" - << "test.t" - << "o" - << BSON("_id" << 0)), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(const BSONObj& documentAtSource, std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), - called(false), - _documentAtSource(documentAtSource) {} - BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const { - called = true; - return _documentAtSource; - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(documentAtSource, - std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - ASSERT_OK(syncRollbackNoUUID(opCtx, - OplogInterfaceMock({deleteOperation, commonOperation}), - rollbackSource, - {}, - coordinator, - replicationProcess)); - ASSERT_TRUE(rollbackSource.called); - - Lock::DBLock dbLock(opCtx, "test", MODE_S); - Lock::CollectionLock collLock(opCtx->lockState(), "test.t", MODE_S); - auto db = dbHolder().get(opCtx, "test"); - ASSERT_TRUE(db); - auto collection = db->getCollection(opCtx, "test.t"); - if (!collection) { - return -1; - } - return collection->getRecordStore()->numRecords(opCtx); -} - -TEST_F(RSRollbackTest, RollbackDeleteNoDocumentAtSourceCollectionDoesNotExist) { - createOplog(_opCtx.get()); - ASSERT_EQUALS( - -1, _testRollbackDelete(_opCtx.get(), _coordinator, _replicationProcess.get(), BSONObj())); -} - -TEST_F(RSRollbackTest, RollbackDeleteNoDocumentAtSourceCollectionExistsNonCapped) { - createOplog(_opCtx.get()); - _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - _testRollbackDelete(_opCtx.get(), _coordinator, _replicationProcess.get(), BSONObj()); - ASSERT_EQUALS( - 0, _testRollbackDelete(_opCtx.get(), _coordinator, _replicationProcess.get(), BSONObj())); -} - -TEST_F(RSRollbackTest, RollbackDeleteNoDocumentAtSourceCollectionExistsCapped) { - createOplog(_opCtx.get()); - CollectionOptions options; - options.capped = true; - _createCollection(_opCtx.get(), "test.t", options); - ASSERT_EQUALS( - 0, _testRollbackDelete(_opCtx.get(), _coordinator, _replicationProcess.get(), BSONObj())); -} - -TEST_F(RSRollbackTest, RollbackDeleteRestoreDocument) { - createOplog(_opCtx.get()); - _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - BSONObj doc = BSON("_id" << 0 << "a" << 1); - _testRollbackDelete(_opCtx.get(), _coordinator, _replicationProcess.get(), doc); - ASSERT_EQUALS(1, - _testRollbackDelete(_opCtx.get(), _coordinator, _replicationProcess.get(), doc)); -} - -TEST_F(RSRollbackTest, RollbackInsertDocumentWithNoId) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto insertDocumentOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "i" - << "ns" - << "test.t" - << "o" - << BSON("a" << 1)), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const { - called = true; - return BSONObj(); - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - startCapturingLogMessages(); - auto status = syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({insertDocumentOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get()); - stopCapturingLogMessages(); - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code()); - ASSERT_STRING_CONTAINS(status.reason(), "unable to determine common point"); - ASSERT_EQUALS(1, countLogLinesContaining("Cannot roll back op with no _id. ns: test.t,")); - ASSERT_FALSE(rollbackSource.called); -} - -TEST_F(RSRollbackTest, RollbackCreateIndexCommand) { - createOplog(_opCtx.get()); - auto collection = _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - auto indexSpec = BSON("ns" - << "test.t" - << "key" - << BSON("a" << 1) - << "name" - << "a_1" - << "v" - << static_cast<int>(kIndexVersion)); - { - Lock::DBLock dbLock(_opCtx.get(), "test", MODE_X); - MultiIndexBlock indexer(_opCtx.get(), collection); - ASSERT_OK(indexer.init(indexSpec).getStatus()); - WriteUnitOfWork wunit(_opCtx.get()); - indexer.commit(); - wunit.commit(); - auto indexCatalog = collection->getIndexCatalog(); - ASSERT(indexCatalog); - ASSERT_EQUALS(2, indexCatalog->numIndexesReady(_opCtx.get())); - } - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto insertDocumentOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "i" - << "ns" - << "test.system.indexes" - << "o" - << indexSpec), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - called = true; - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - // Repeat index creation operation and confirm that rollback attempts to drop index just once. - // This can happen when an index is re-created with different options. - startCapturingLogMessages(); - ASSERT_OK(syncRollbackNoUUID( - _opCtx.get(), - OplogInterfaceMock({insertDocumentOperation, insertDocumentOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - stopCapturingLogMessages(); - ASSERT_EQUALS(1, countLogLinesContaining("Dropping index: collection = test.t. index = a_1")); - ASSERT_FALSE(rollbackSource.called); - { - Lock::DBLock dbLock(_opCtx.get(), "test", MODE_S); - auto indexCatalog = collection->getIndexCatalog(); - ASSERT(indexCatalog); - ASSERT_EQUALS(1, indexCatalog->numIndexesReady(_opCtx.get())); - } -} - -TEST_F(RSRollbackTest, RollbackCreateIndexCommandIndexNotInCatalog) { - createOplog(_opCtx.get()); - auto collection = _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - auto indexSpec = BSON("ns" - << "test.t" - << "key" - << BSON("a" << 1) - << "name" - << "a_1"); - // Skip index creation to trigger warning during rollback. - { - Lock::DBLock dbLock(_opCtx.get(), "test", MODE_S); - auto indexCatalog = collection->getIndexCatalog(); - ASSERT(indexCatalog); - ASSERT_EQUALS(1, indexCatalog->numIndexesReady(_opCtx.get())); - } - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto insertDocumentOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "i" - << "ns" - << "test.system.indexes" - << "o" - << indexSpec), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - called = true; - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - startCapturingLogMessages(); - ASSERT_OK(syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({insertDocumentOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - stopCapturingLogMessages(); - ASSERT_EQUALS(1, countLogLinesContaining("Dropping index: collection = test.t. index = a_1")); - ASSERT_EQUALS(1, countLogLinesContaining("Rollback failed to drop index a_1 in test.t")); - ASSERT_FALSE(rollbackSource.called); - { - Lock::DBLock dbLock(_opCtx.get(), "test", MODE_S); - auto indexCatalog = collection->getIndexCatalog(); - ASSERT(indexCatalog); - ASSERT_EQUALS(1, indexCatalog->numIndexesReady(_opCtx.get())); - } -} - -TEST_F(RSRollbackTest, RollbackInsertSystemIndexesMissingNamespace) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto insertDocumentOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "i" - << "ns" - << "test.system.indexes" - << "o" - << BSON("key" << BSON("a" << 1) << "name" - << "a_1")), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - called = true; - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - startCapturingLogMessages(); - auto status = syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({insertDocumentOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get()); - stopCapturingLogMessages(); - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code()); - ASSERT_STRING_CONTAINS(status.reason(), "unable to determine common point"); - ASSERT_EQUALS( - 1, countLogLinesContaining("Missing collection namespace in system.indexes operation,")); - ASSERT_FALSE(rollbackSource.called); -} - -TEST_F(RSRollbackTest, RollbackCreateIndexCommandInvalidNamespace) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto createIndexesOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "c" - << "ns" - << "test.$cmd" - << "o" - << BSON("createIndexes" - << "" - << "key" - << BSON("a" << 1) - << "name" - << "a_1")), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - called = true; - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - startCapturingLogMessages(); - auto status = syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({createIndexesOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get()); - stopCapturingLogMessages(); - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code()); - ASSERT_STRING_CONTAINS(status.reason(), "unable to determine common point"); - ASSERT_EQUALS( - 1, countLogLinesContaining("Invalid collection namespace in createIndexes operation,")); - ASSERT_FALSE(rollbackSource.called); -} - - -TEST_F(RSRollbackTest, RollbackDropIndexCommandWithOneIndex) { - createOplog(_opCtx.get()); - auto collection = _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - { - Lock::DBLock dbLock(_opCtx.get(), "test", MODE_S); - auto indexCatalog = collection->getIndexCatalog(); - ASSERT(indexCatalog); - ASSERT_EQUALS(1, indexCatalog->numIndexesReady(_opCtx.get())); - } - - auto indexSpec = BSON("ns" - << "test.t" - << "key" - << BSON("a" << 1) - << "name" - << "a_1" - << "v" - << static_cast<int>(kIndexVersion)); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto dropIndexOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "c" - << "ns" - << "test.$cmd" - << "o" - << BSON("dropIndexes" - << "t" - << "index" - << "a_1") - << "o2" - << indexSpec), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - called = true; - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - ASSERT_OK(syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({dropIndexOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - ASSERT(rollbackSource.called); -} - -TEST_F(RSRollbackTest, RollbackDropIndexCommandWithMultipleIndexes) { - createOplog(_opCtx.get()); - auto collection = _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - { - Lock::DBLock dbLock(_opCtx.get(), "test", MODE_S); - auto indexCatalog = collection->getIndexCatalog(); - ASSERT(indexCatalog); - ASSERT_EQUALS(1, indexCatalog->numIndexesReady(_opCtx.get())); - } - - auto indexSpec1 = BSON("ns" - << "test.t" - << "key" - << BSON("a" << 1) - << "name" - << "a_1" - << "v" - << static_cast<int>(kIndexVersion)); - auto indexSpec2 = BSON("ns" - << "test.t" - << "key" - << BSON("b" << 1) - << "name" - << "b_1" - << "v" - << static_cast<int>(kIndexVersion)); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto dropIndexOperation1 = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "c" - << "ns" - << "test.$cmd" - << "o" - << BSON("dropIndexes" - << "t" - << "index" - << "a_1") - << "o2" - << indexSpec1), - RecordId(2)); - auto dropIndexOperation2 = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "c" - << "ns" - << "test.$cmd" - << "o" - << BSON("dropIndexes" - << "t" - << "index" - << "b_1") - << "o2" - << indexSpec2), - RecordId(3)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - called = true; - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - ASSERT_OK(syncRollbackNoUUID( - _opCtx.get(), - OplogInterfaceMock({dropIndexOperation2, dropIndexOperation1, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - ASSERT(rollbackSource.called); -} - -TEST_F(RSRollbackTest, RollbackInsertSystemIndexesCommandInvalidNamespace) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto insertDocumentOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "i" - << "ns" - << "test.system.indexes" - << "o" - << BSON("ns" - << "test." - << "key" - << BSON("a" << 1) - << "name" - << "a_1")), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - called = true; - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - startCapturingLogMessages(); - auto status = syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({insertDocumentOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get()); - stopCapturingLogMessages(); - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code()); - ASSERT_STRING_CONTAINS(status.reason(), "unable to determine common point"); - ASSERT_EQUALS( - 1, countLogLinesContaining("Invalid collection namespace in createIndexes operation,")); - ASSERT_FALSE(rollbackSource.called); -} - -TEST_F(RSRollbackTest, RollbackCreateIndexCommandMissingIndexName) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto insertDocumentOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "i" - << "ns" - << "test.system.indexes" - << "o" - << BSON("ns" - << "test.t" - << "key" - << BSON("a" << 1))), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - called = true; - } - mutable bool called; - - private: - BSONObj _documentAtSource; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - startCapturingLogMessages(); - auto status = syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({insertDocumentOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get()); - stopCapturingLogMessages(); - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code()); - ASSERT_STRING_CONTAINS(status.reason(), "unable to determine common point"); - ASSERT_EQUALS(1, countLogLinesContaining("Missing index name in createIndexes operation,")); - ASSERT_FALSE(rollbackSource.called); -} - -TEST_F(RSRollbackTest, RollbackUnknownCommand) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto unknownCommandOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "c" - << "ns" - << "test.t" - << "o" - << BSON("unknown_command" - << "t")), - RecordId(2)); - { - Lock::DBLock dbLock(_opCtx.get(), "test", MODE_X); - mongo::WriteUnitOfWork wuow(_opCtx.get()); - auto db = dbHolder().openDb(_opCtx.get(), "test"); - ASSERT_TRUE(db); - ASSERT_TRUE(db->getOrCreateCollection(_opCtx.get(), NamespaceString("test.t"))); - wuow.commit(); - } - auto status = syncRollbackNoUUID( - _opCtx.get(), - OplogInterfaceMock({unknownCommandOperation, commonOperation}), - RollbackSourceMock(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))), - {}, - _coordinator, - _replicationProcess.get()); - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code()); - ASSERT_STRING_CONTAINS(status.reason(), "Rollback no such command"); -} - -TEST_F(RSRollbackTest, RollbackDropCollectionCommand) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto dropCollectionOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "c" - << "ns" - << "test.t" - << "o" - << BSON("drop" - << "t")), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - called = true; - } - mutable bool called; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - ASSERT_OK(syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({dropCollectionOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - ASSERT_TRUE(rollbackSource.called); -} - -TEST_F(RSRollbackTest, RollbackDropCollectionCommandFailsIfRBIDChangesWhileSyncingCollection) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto dropCollectionOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "c" - << "ns" - << "test.t" - << "o" - << BSON("drop" - << "t")), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - using RollbackSourceMock::RollbackSourceMock; - int getRollbackId() const override { - return copyCollectionCalled ? 1 : 0; - } - void copyCollectionFromRemote(OperationContext* opCtx, - const NamespaceString& nss) const override { - copyCollectionCalled = true; - } - mutable bool copyCollectionCalled = false; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - - _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - ASSERT_THROWS_CODE( - syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({dropCollectionOperation, commonOperation}), - rollbackSource, - 0, - _coordinator, - _replicationProcess.get()) - .transitional_ignore(), - DBException, - 40365); - ASSERT(rollbackSource.copyCollectionCalled); -} - -BSONObj makeApplyOpsOplogEntry(Timestamp ts, std::initializer_list<BSONObj> ops) { - BSONObjBuilder entry; - entry << "ts" << ts << "h" << 1LL << "op" - << "c" - << "ns" - << "admin"; - { - BSONObjBuilder cmd(entry.subobjStart("o")); - BSONArrayBuilder subops(entry.subarrayStart("applyOps")); - for (const auto& op : ops) { - subops << op; - } - } - return entry.obj(); -} - -OpTime getOpTimeFromOplogEntry(const BSONObj& entry) { - const BSONElement tsElement = entry["ts"]; - const BSONElement termElement = entry["t"]; - const BSONElement hashElement = entry["h"]; - ASSERT_EQUALS(bsonTimestamp, tsElement.type()) << entry; - ASSERT_TRUE(hashElement.isNumber()) << entry; - ASSERT_TRUE(termElement.eoo() || termElement.isNumber()) << entry; - long long term = hashElement.numberLong(); - if (!termElement.eoo()) { - term = termElement.numberLong(); - } - return OpTime(tsElement.timestamp(), term); -} - -TEST_F(RSRollbackTest, RollbackApplyOpsCommand) { - createOplog(_opCtx.get()); - - { - AutoGetOrCreateDb autoDb(_opCtx.get(), "test", MODE_X); - mongo::WriteUnitOfWork wuow(_opCtx.get()); - auto coll = autoDb.getDb()->getCollection(_opCtx.get(), "test.t"); - if (!coll) { - coll = autoDb.getDb()->createCollection(_opCtx.get(), "test.t"); - } - ASSERT(coll); - OpDebug* const nullOpDebug = nullptr; - ASSERT_OK(coll->insertDocument( - _opCtx.get(), InsertStatement(BSON("_id" << 1 << "v" << 2)), nullOpDebug, false)); - ASSERT_OK(coll->insertDocument( - _opCtx.get(), InsertStatement(BSON("_id" << 2 << "v" << 4)), nullOpDebug, false)); - ASSERT_OK(coll->insertDocument( - _opCtx.get(), InsertStatement(BSON("_id" << 4)), nullOpDebug, false)); - wuow.commit(); - } - const auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - const auto applyOpsOperation = - std::make_pair(makeApplyOpsOplogEntry(Timestamp(Seconds(2), 0), - {BSON("op" - << "u" - << "ns" - << "test.t" - << "o2" - << BSON("_id" << 1) - << "o" - << BSON("_id" << 1 << "v" << 2)), - BSON("op" - << "u" - << "ns" - << "test.t" - << "o2" - << BSON("_id" << 2) - << "o" - << BSON("_id" << 2 << "v" << 4)), - BSON("op" - << "d" - << "ns" - << "test.t" - << "o" - << BSON("_id" << 3)), - BSON("op" - << "i" - << "ns" - << "test.t" - << "o" - << BSON("_id" << 4))}), - RecordId(2)); - - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)) {} - - BSONObj findOne(const NamespaceString& nss, const BSONObj& filter) const override { - int numFields = 0; - for (const auto element : filter) { - ++numFields; - ASSERT_EQUALS("_id", element.fieldNameStringData()) << filter; - } - ASSERT_EQUALS(1, numFields) << filter; - searchedIds.insert(filter.firstElement().numberInt()); - switch (filter.firstElement().numberInt()) { - case 1: - return BSON("_id" << 1 << "v" << 1); - case 2: - return BSON("_id" << 2 << "v" << 3); - case 3: - return BSON("_id" << 3 << "v" << 5); - case 4: - return {}; - } - FAIL("Unexpected findOne request") << filter; - return {}; // Unreachable; why doesn't compiler know? - } - - mutable std::multiset<int> searchedIds; - } rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation}))); - - _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - ASSERT_OK(syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({applyOpsOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - ASSERT_EQUALS(4U, rollbackSource.searchedIds.size()); - ASSERT_EQUALS(1U, rollbackSource.searchedIds.count(1)); - ASSERT_EQUALS(1U, rollbackSource.searchedIds.count(2)); - ASSERT_EQUALS(1U, rollbackSource.searchedIds.count(3)); - ASSERT_EQUALS(1U, rollbackSource.searchedIds.count(4)); - - AutoGetCollectionForReadCommand acr(_opCtx.get(), NamespaceString("test.t")); - BSONObj result; - ASSERT(Helpers::findOne(_opCtx.get(), acr.getCollection(), BSON("_id" << 1), result)); - ASSERT_EQUALS(1, result["v"].numberInt()) << result; - ASSERT(Helpers::findOne(_opCtx.get(), acr.getCollection(), BSON("_id" << 2), result)); - ASSERT_EQUALS(3, result["v"].numberInt()) << result; - ASSERT(Helpers::findOne(_opCtx.get(), acr.getCollection(), BSON("_id" << 3), result)); - ASSERT_EQUALS(5, result["v"].numberInt()) << result; - ASSERT_FALSE(Helpers::findOne(_opCtx.get(), acr.getCollection(), BSON("_id" << 4), result)) - << result; -} - -TEST_F(RSRollbackTest, RollbackCreateCollectionCommand) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto createCollectionOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "c" - << "ns" - << "test.t" - << "o" - << BSON("create" - << "t")), - RecordId(2)); - RollbackSourceMock rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - ASSERT_OK(syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({createCollectionOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - { - Lock::DBLock dbLock(_opCtx.get(), "test", MODE_S); - auto db = dbHolder().get(_opCtx.get(), "test"); - ASSERT_TRUE(db); - ASSERT_FALSE(db->getCollection(_opCtx.get(), "test.t")); - } -} - -TEST_F(RSRollbackTest, RollbackCollectionModificationCommand) { - createOplog(_opCtx.get()); - _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - BSONObj collModCmd = BSON("collMod" - << "t" - << "noPadding" - << false); - auto collectionModificationOperation = - makeCommandOp(Timestamp(Seconds(2), 0), boost::none, "test.t", collModCmd, 2); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)), called(false) {} - - // Remote collection options are empty. - StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const { - called = true; - return BSON("options" << BSONObj()); - } - mutable bool called; - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - startCapturingLogMessages(); - ASSERT_OK( - syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({collectionModificationOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get())); - stopCapturingLogMessages(); - ASSERT_TRUE(rollbackSource.called); - for (const auto& message : getCapturedLogMessages()) { - ASSERT_TRUE(message.find("ignoring op with no _id during rollback. ns: test.t") == - std::string::npos); - } - - // Make sure the collection options are correct. - AutoGetCollectionForReadCommand autoColl(_opCtx.get(), NamespaceString("test.t")); - auto collAfterRollbackOptions = - autoColl.getCollection()->getCatalogEntry()->getCollectionOptions(_opCtx.get()); - ASSERT(collAfterRollbackOptions.toBSON().isEmpty()); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, - FullRemoteCollectionValidationOptionsAndEmptyLocalValidationOptions) { - // Empty local collection options. - CollectionOptions localCollOptions; - - // Full remote collection validation options. - BSONObj remoteCollOptionsObj = - BSON("validator" << BSON("x" << BSON("$exists" << 1)) << "validationLevel" - << "moderate" - << "validationAction" - << "warn"); - - resyncCollectionOptionsTest(localCollOptions, remoteCollOptionsObj); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, - PartialRemoteCollectionValidationOptionsAndEmptyLocalValidationOptions) { - CollectionOptions localCollOptions; - - BSONObj remoteCollOptionsObj = BSON("validationLevel" - << "moderate" - << "validationAction" - << "warn"); - - resyncCollectionOptionsTest(localCollOptions, remoteCollOptionsObj); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, - PartialRemoteCollectionValidationOptionsAndFullLocalValidationOptions) { - CollectionOptions localCollOptions; - localCollOptions.validator = BSON("x" << BSON("$exists" << 1)); - localCollOptions.validationLevel = "moderate"; - localCollOptions.validationAction = "warn"; - - BSONObj remoteCollOptionsObj = BSON("validationLevel" - << "strict" - << "validationAction" - << "error"); - - - resyncCollectionOptionsTest(localCollOptions, remoteCollOptionsObj); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, - EmptyRemoteCollectionValidationOptionsAndEmptyLocalValidationOptions) { - CollectionOptions localCollOptions; - - BSONObj remoteCollOptionsObj = BSONObj(); - - resyncCollectionOptionsTest(localCollOptions, remoteCollOptionsObj); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, - EmptyRemoteCollectionValidationOptionsAndFullLocalValidationOptions) { - CollectionOptions localCollOptions; - localCollOptions.validator = BSON("x" << BSON("$exists" << 1)); - localCollOptions.validationLevel = "moderate"; - localCollOptions.validationAction = "warn"; - - BSONObj remoteCollOptionsObj = BSONObj(); - - resyncCollectionOptionsTest(localCollOptions, remoteCollOptionsObj); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, LocalTempCollectionRemotePermanentCollection) { - CollectionOptions localCollOptions; - localCollOptions.temp = true; - - BSONObj remoteCollOptionsObj = BSONObj(); - - resyncCollectionOptionsTest(localCollOptions, remoteCollOptionsObj); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, LocalPermanentCollectionRemoteTempCollection) { - CollectionOptions localCollOptions; - - BSONObj remoteCollOptionsObj = BSON("temp" << true); - - resyncCollectionOptionsTest(localCollOptions, remoteCollOptionsObj); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, BothCollectionsTemp) { - CollectionOptions localCollOptions; - localCollOptions.temp = true; - - BSONObj remoteCollOptionsObj = BSON("temp" << true); - - resyncCollectionOptionsTest(localCollOptions, remoteCollOptionsObj); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, ChangingTempStatusAlsoChangesOtherCollectionOptions) { - CollectionOptions localCollOptions; - localCollOptions.temp = true; - - BSONObj remoteCollOptionsObj = BSON("validationLevel" - << "strict" - << "validationAction" - << "error"); - - resyncCollectionOptionsTest(localCollOptions, remoteCollOptionsObj); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, EmptyCollModResyncsCollectionMetadata) { - CollectionOptions localCollOptions; - localCollOptions.validator = BSON("x" << BSON("$exists" << 1)); - localCollOptions.validationLevel = "moderate"; - localCollOptions.validationAction = "warn"; - - BSONObj remoteCollOptionsObj = BSONObj(); - - resyncCollectionOptionsTest(localCollOptions, - remoteCollOptionsObj, - BSON("collMod" - << "coll"), - "coll"); -} - -void resyncInconsistentUUIDsTest(OperationContext* opCtx, - ReplicationCoordinator* coordinator, - ReplicationProcess* replicationProcess, - OptionalCollectionUUID localUUID, - OptionalCollectionUUID remoteUUID, - OptionalCollectionUUID expectedUUIDAfterRollback) { - createOplog(opCtx); - CollectionOptions localOptions; - localOptions.uuid = localUUID; - RollbackTest::_createCollection(opCtx, "test.t", localOptions); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - BSONObj collModCmd = BSON("collMod" - << "t" - << "noPadding" - << false); - auto collectionModificationOperation = - RollbackTest::makeCommandOp(Timestamp(Seconds(2), 0), boost::none, "test.t", collModCmd, 2); - - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog, OptionalCollectionUUID uuid) - : RollbackSourceMock(std::move(oplog)), called(false), _uuid(uuid) {} - - // Remote collection options are empty. - StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const { - called = true; - if (_uuid) { - return BSON("options" << BSONObj() << "info" << BSON("uuid" << _uuid.get())); - } else { - return BSON("options" << BSONObj()); - } - } - mutable bool called; - - private: - OptionalCollectionUUID _uuid; - }; - - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - })), - remoteUUID); - - ASSERT_OK( - syncRollbackNoUUID(opCtx, - OplogInterfaceMock({collectionModificationOperation, commonOperation}), - rollbackSource, - {}, - coordinator, - replicationProcess)); - ASSERT_TRUE(rollbackSource.called); - - // Make sure the collection options are correct. - AutoGetCollectionForReadCommand autoColl(opCtx, NamespaceString("test.t")); - auto collAfterRollbackOptions = - autoColl.getCollection()->getCatalogEntry()->getCollectionOptions(opCtx); - BSONObjBuilder expectedOptionsBob; - if (expectedUUIDAfterRollback) { - expectedUUIDAfterRollback.get().appendToBuilder(&expectedOptionsBob, "uuid"); - } - ASSERT_BSONOBJ_EQ(expectedOptionsBob.obj(), collAfterRollbackOptions.toBSON()); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, LocalUUIDGetsRemovedOnConflict) { - resyncInconsistentUUIDsTest(_opCtx.get(), - _coordinator, - _replicationProcess.get(), - UUID::gen(), - UUID::gen(), - boost::none); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, LocalUUIDWithNoRemoteUUIDGetsRemoved) { - resyncInconsistentUUIDsTest(_opCtx.get(), - _coordinator, - _replicationProcess.get(), - UUID::gen(), - boost::none, - boost::none); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, RemoteUUIDWithNoLocalUUIDGetsAddedWhileDowngrading) { - serverGlobalParams.featureCompatibility.setVersion( - ServerGlobalParams::FeatureCompatibility::Version::kDowngradingTo34); - auto remoteUuid = UUID::gen(); - resyncInconsistentUUIDsTest( - _opCtx.get(), _coordinator, _replicationProcess.get(), boost::none, remoteUuid, remoteUuid); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, - RemoteUUIDWithNoLocalUUIDDoesNotGetAddedWhileUpgrading) { - serverGlobalParams.featureCompatibility.setVersion( - ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo36); - resyncInconsistentUUIDsTest(_opCtx.get(), - _coordinator, - _replicationProcess.get(), - boost::none, - UUID::gen(), - boost::none); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, - RemoteUUIDWithNoLocalUUIDDoesNotGetAddedWhileDowngraded) { - serverGlobalParams.featureCompatibility.setVersion( - ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo34); - resyncInconsistentUUIDsTest(_opCtx.get(), - _coordinator, - _replicationProcess.get(), - boost::none, - UUID::gen(), - boost::none); -} - -TEST_F(RollbackResyncsCollectionOptionsTest, SameUUIDIsNotChanged) { - UUID uuid = UUID::gen(); - resyncInconsistentUUIDsTest( - _opCtx.get(), _coordinator, _replicationProcess.get(), uuid, uuid, uuid); -} - -TEST_F(RSRollbackTest, RollbackCollectionModificationCommandInvalidCollectionOptions) { - createOplog(_opCtx.get()); - auto commonOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(1), 0) << "h" << 1LL), RecordId(1)); - auto collectionModificationOperation = - std::make_pair(BSON("ts" << Timestamp(Seconds(2), 0) << "h" << 1LL << "op" - << "c" - << "ns" - << "test.t" - << "o" - << BSON("collMod" - << "t" - << "noPadding" - << false)), - RecordId(2)); - class RollbackSourceLocal : public RollbackSourceMock { - public: - RollbackSourceLocal(std::unique_ptr<OplogInterface> oplog) - : RollbackSourceMock(std::move(oplog)) {} - StatusWith<BSONObj> getCollectionInfo(const NamespaceString& nss) const { - return BSON("name" << nss.ns() << "options" << 12345); - } - }; - RollbackSourceLocal rollbackSource(std::unique_ptr<OplogInterface>(new OplogInterfaceMock({ - commonOperation, - }))); - _createCollection(_opCtx.get(), "test.t", CollectionOptions()); - auto status = - syncRollbackNoUUID(_opCtx.get(), - OplogInterfaceMock({collectionModificationOperation, commonOperation}), - rollbackSource, - {}, - _coordinator, - _replicationProcess.get()); - ASSERT_EQUALS(ErrorCodes::UnrecoverableRollbackError, status.code()); - ASSERT_STRING_CONTAINS(status.reason(), "Failed to parse options"); -} - -TEST(RSRollbackTest, LocalEntryWithoutNsIsFatal) { - const auto validOplogEntry = fromjson("{op: 'i', ns: 'test.t', o: {_id:1, a: 1}}"); - FixUpInfo fui; - ASSERT_OK(updateFixUpInfoFromLocalOplogEntry(fui, validOplogEntry)); - ASSERT_THROWS(updateFixUpInfoFromLocalOplogEntry(fui, validOplogEntry.removeField("ns")) - .transitional_ignore(), - RSFatalException); -} - -TEST(RSRollbackTest, LocalEntryWithoutOIsFatal) { - const auto validOplogEntry = fromjson("{op: 'i', ns: 'test.t', o: {_id:1, a: 1}}"); - FixUpInfo fui; - ASSERT_OK(updateFixUpInfoFromLocalOplogEntry(fui, validOplogEntry)); - ASSERT_THROWS(updateFixUpInfoFromLocalOplogEntry(fui, validOplogEntry.removeField("o")) - .transitional_ignore(), - RSFatalException); -} - -TEST(RSRollbackTest, LocalEntryWithoutO2IsFatal) { - const auto validOplogEntry = - fromjson("{op: 'u', ns: 'test.t', o2: {_id: 1}, o: {_id:1, a: 1}}"); - FixUpInfo fui; - ASSERT_OK(updateFixUpInfoFromLocalOplogEntry(fui, validOplogEntry)); - ASSERT_THROWS(updateFixUpInfoFromLocalOplogEntry(fui, validOplogEntry.removeField("o2")) - .transitional_ignore(), - RSFatalException); -} - -TEST_F(RSRollbackTest, RollbackReturnsImmediatelyOnFailureToTransitionToRollback) { - // On failing to transition to ROLLBACK, rollback() should return immediately and not call - // syncRollback(). We provide an empty oplog so that if syncRollback() is called erroneously, - // we would go fatal. - OplogInterfaceMock localOplogWithSingleOplogEntry({makeNoopOplogEntryAndRecordId(Seconds(1))}); - RollbackSourceMock rollbackSourceWithInvalidOplog(stdx::make_unique<OplogInterfaceMock>()); - - // Inject ReplicationCoordinator::setFollowerMode() error. We set the current member state - // because it will be logged by rollback() on failing to transition to ROLLBACK. - ASSERT_OK(_coordinator->setFollowerMode(MemberState::RS_SECONDARY)); - _coordinator->failSettingFollowerMode(MemberState::RS_ROLLBACK, ErrorCodes::NotSecondary); - - startCapturingLogMessages(); - rollbackNoUUID(_opCtx.get(), - localOplogWithSingleOplogEntry, - rollbackSourceWithInvalidOplog, - {}, - _coordinator, - _replicationProcess.get()); - stopCapturingLogMessages(); - - ASSERT_EQUALS(1, countLogLinesContaining("Cannot transition from SECONDARY to ROLLBACK")); - ASSERT_EQUALS(MemberState(MemberState::RS_SECONDARY), _coordinator->getMemberState()); -} - -DEATH_TEST_F( - RSRollbackTest, - RollbackUnrecoverableRollbackErrorTriggersFatalAssertion, - "Unable to complete rollback. A full resync may be needed: " - "UnrecoverableRollbackError: need to rollback, but unable to determine common point " - "between local and remote oplog: InvalidSyncSource: remote oplog empty or unreadable") { - - // rollback() should abort on getting UnrecoverableRollbackError from syncRollback(). An empty - // local oplog will make syncRollback() return the intended error. - OplogInterfaceMock localOplogWithSingleOplogEntry({makeNoopOplogEntryAndRecordId(Seconds(1))}); - RollbackSourceMock rollbackSourceWithInvalidOplog(stdx::make_unique<OplogInterfaceMock>()); - - rollbackNoUUID(_opCtx.get(), - localOplogWithSingleOplogEntry, - rollbackSourceWithInvalidOplog, - {}, - _coordinator, - _replicationProcess.get()); -} - -TEST_F(RSRollbackTest, RollbackLogsRetryMessageAndReturnsOnNonUnrecoverableRollbackError) { - // If local oplog is empty, syncRollback() returns OplogStartMissing (instead of - // UnrecoverableRollbackError when the remote oplog is missing). rollback() should log a message - // about retrying rollback later before returning. - OplogInterfaceMock localOplogWithNoEntries; - OplogInterfaceMock::Operations remoteOperations({makeNoopOplogEntryAndRecordId(Seconds(1))}); - auto remoteOplog = stdx::make_unique<OplogInterfaceMock>(remoteOperations); - RollbackSourceMock rollbackSourceWithValidOplog(std::move(remoteOplog)); - auto noopSleepSecsFn = [](int) {}; - - startCapturingLogMessages(); - rollbackNoUUID(_opCtx.get(), - localOplogWithNoEntries, - rollbackSourceWithValidOplog, - {}, - _coordinator, - _replicationProcess.get(), - noopSleepSecsFn); - stopCapturingLogMessages(); - - ASSERT_EQUALS( - 1, countLogLinesContaining("Rollback cannot complete at this time (retrying later)")); - ASSERT_EQUALS(MemberState(MemberState::RS_RECOVERING), _coordinator->getMemberState()); -} - -DEATH_TEST_F(RSRollbackTest, - RollbackTriggersFatalAssertionOnDetectingShardIdentityDocumentRollback, - "shardIdentity document rollback detected. Shutting down to clear in-memory sharding " - "state. Restarting this process should safely return it to a healthy state") { - auto commonOperation = makeNoopOplogEntryAndRecordId(Seconds(1)); - OplogInterfaceMock localOplog({commonOperation}); - RollbackSourceMock rollbackSource( - std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation}))); - - ASSERT_FALSE(ShardIdentityRollbackNotifier::get(_opCtx.get())->didRollbackHappen()); - ShardIdentityRollbackNotifier::get(_opCtx.get())->recordThatRollbackHappened(); - ASSERT_TRUE(ShardIdentityRollbackNotifier::get(_opCtx.get())->didRollbackHappen()); - - createOplog(_opCtx.get()); - rollbackNoUUID( - _opCtx.get(), localOplog, rollbackSource, {}, _coordinator, _replicationProcess.get()); -} - -DEATH_TEST_F( - RSRollbackTest, - RollbackTriggersFatalAssertionOnFailingToTransitionToRecoveringAfterSyncRollbackReturns, - "Failed to transition into RECOVERING; expected to be in state ROLLBACK; found self in " - "ROLLBACK") { - auto commonOperation = makeNoopOplogEntryAndRecordId(Seconds(1)); - OplogInterfaceMock localOplog({commonOperation}); - RollbackSourceMock rollbackSource( - std::unique_ptr<OplogInterface>(new OplogInterfaceMock({commonOperation}))); - - _coordinator->failSettingFollowerMode(MemberState::RS_RECOVERING, ErrorCodes::IllegalOperation); - - createOplog(_opCtx.get()); - rollbackNoUUID( - _opCtx.get(), localOplog, rollbackSource, {}, _coordinator, _replicationProcess.get()); -} - -// The testcases used here are trying to detect off-by-one errors in -// FixUpInfo::removeAllDocsToRefectchFor. -TEST(FixUpInfoTest, RemoveAllDocsToRefetchForWorks) { - const auto normalHolder = BSON("" << OID::gen()); - const auto normalKey = normalHolder.firstElement(); - - // Can't use ASSERT_EQ with this since it isn't ostream-able. Failures will at least give you - // the size. If that isn't enough, use GDB. - using DocSet = std::set<DocID>; - - FixUpInfo fui; - fui.docsToRefetch = { - DocID::minFor("a"), - DocID{{}, "a", normalKey}, - DocID::maxFor("a"), - - DocID::minFor("b"), - DocID{{}, "b", normalKey}, - DocID::maxFor("b"), - - DocID::minFor("c"), - DocID{{}, "c", normalKey}, - DocID::maxFor("c"), - }; - - // Remove from the middle. - fui.removeAllDocsToRefetchFor("b"); - ASSERT((fui.docsToRefetch == - DocSet{ - DocID::minFor("a"), - DocID{{}, "a", normalKey}, - DocID::maxFor("a"), - - DocID::minFor("c"), - DocID{{}, "c", normalKey}, - DocID::maxFor("c"), - })) - << "remaining docs: " << fui.docsToRefetch.size(); - - // Remove from the end. - fui.removeAllDocsToRefetchFor("c"); - ASSERT((fui.docsToRefetch == - DocSet{ - DocID::minFor("a"), // This comment helps clang-format. - DocID{{}, "a", normalKey}, - DocID::maxFor("a"), - })) - << "remaining docs: " << fui.docsToRefetch.size(); - - // Everything else. - fui.removeAllDocsToRefetchFor("a"); - ASSERT((fui.docsToRefetch == DocSet{})) << "remaining docs: " << fui.docsToRefetch.size(); -} - -} // namespace diff --git a/src/mongo/db/repl/sync_tail_test.cpp b/src/mongo/db/repl/sync_tail_test.cpp index aa565b57da1..b5447bf4087 100644 --- a/src/mongo/db/repl/sync_tail_test.cpp +++ b/src/mongo/db/repl/sync_tail_test.cpp @@ -1655,154 +1655,6 @@ TEST_F(IdempotencyTest, DropDatabaseSucceeds) { ASSERT_OK(runOpInitialSync(op)); } -TEST_F(SyncTailTest, FailOnDropFCVCollectionInRecovering) { - auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); - ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); - ASSERT_OK( - ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); - - auto cmd = BSON("drop" << fcvNS.coll()); - auto op = makeCommandOplogEntry(nextOpTime(), fcvNS, cmd); - ASSERT_EQUALS(runOpSteadyState(op), ErrorCodes::OplogOperationUnsupported); -} - -TEST_F(SyncTailTest, SuccessOnUpdateFCV34TargetVersionUnsetDocumentInRecovering) { - auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); - ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); - ASSERT_OK( - ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); - - auto op = makeUpdateDocumentOplogEntry( - nextOpTime(), - fcvNS, - BSON("_id" << FeatureCompatibilityVersion::kParameterName), - BSON("$set" << BSON(FeatureCompatibilityVersion::kVersionField - << FeatureCompatibilityVersionCommandParser::kVersion34) - << "$unset" - << BSON(FeatureCompatibilityVersion::kTargetVersionField << 1))); - ASSERT_OK(runOpSteadyState(op)); -} - -TEST_F(SyncTailTest, FailOnUpdateFCV34TargetVersion34DocumentInRecovering) { - auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); - ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); - ASSERT_OK( - ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); - - auto op = makeUpdateDocumentOplogEntry( - nextOpTime(), - fcvNS, - BSON("_id" << FeatureCompatibilityVersion::kParameterName), - BSON("$set" << BSON(FeatureCompatibilityVersion::kVersionField - << FeatureCompatibilityVersionCommandParser::kVersion34 - << FeatureCompatibilityVersion::kTargetVersionField - << FeatureCompatibilityVersionCommandParser::kVersion34))); - ASSERT_EQUALS(runOpSteadyState(op), ErrorCodes::OplogOperationUnsupported); -} - -TEST_F(SyncTailTest, SuccessOnDropFCVCollectionInSecondary) { - auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); - ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); - ASSERT_OK( - ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_SECONDARY)); - - auto cmd = BSON("drop" << fcvNS.coll()); - auto op = makeCommandOplogEntry(nextOpTime(), fcvNS, cmd); - ASSERT_OK(runOpSteadyState(op)); -} - -TEST_F(SyncTailTest, SuccessOnUpdateFCV34TargetVersion34DocumentInSecondary) { - auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); - ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); - ASSERT_OK( - ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_SECONDARY)); - - auto op = makeUpdateDocumentOplogEntry( - nextOpTime(), - fcvNS, - BSON("_id" << FeatureCompatibilityVersion::kParameterName), - BSON("$set" << BSON(FeatureCompatibilityVersion::kVersionField - << FeatureCompatibilityVersionCommandParser::kVersion34 - << FeatureCompatibilityVersion::kTargetVersionField - << FeatureCompatibilityVersionCommandParser::kVersion34))); - ASSERT_OK(runOpSteadyState(op)); -} - -TEST_F(SyncTailTest, SuccessOnUpdateFCV36TargetVersionUnsetDocumentInRecovering) { - auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); - CollectionOptions options; - options.uuid = UUID::gen(); - ::mongo::repl::createCollection(_opCtx.get(), fcvNS, options); - ASSERT_OK( - ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); - - auto op = makeUpdateDocumentOplogEntry( - nextOpTime(), - fcvNS, - BSON("_id" << FeatureCompatibilityVersion::kParameterName), - BSON("$set" << BSON(FeatureCompatibilityVersion::kVersionField - << FeatureCompatibilityVersionCommandParser::kVersion36) - << "$unset" - << BSON(FeatureCompatibilityVersion::kTargetVersionField << 1))); - ASSERT_OK(runOpSteadyState(op)); -} - -TEST_F(SyncTailTest, SuccessOnUpdateFCV34TargetVersion36DocumentInRecovering) { - auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); - ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); - ASSERT_OK( - ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); - - auto op = makeUpdateDocumentOplogEntry( - nextOpTime(), - fcvNS, - BSON("_id" << FeatureCompatibilityVersion::kParameterName), - BSON("$set" << BSON(FeatureCompatibilityVersion::kVersionField - << FeatureCompatibilityVersionCommandParser::kVersion34 - << FeatureCompatibilityVersion::kTargetVersionField - << FeatureCompatibilityVersionCommandParser::kVersion36))); - ASSERT_OK(runOpSteadyState(op)); -} - -TEST_F(SyncTailTest, UpdateToFCVCollectionBesidesFCVDocumentSucceedsInRecovering) { - auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); - ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); - ASSERT_OK( - ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_RECOVERING)); - - auto op = makeUpdateDocumentOplogEntry( - nextOpTime(), - fcvNS, - BSON("_id" - << "other"), - BSON("$set" << BSON(FeatureCompatibilityVersion::kVersionField - << FeatureCompatibilityVersionCommandParser::kVersion34 - << FeatureCompatibilityVersion::kTargetVersionField - << FeatureCompatibilityVersionCommandParser::kVersion34))); - ASSERT_OK(runOpSteadyState(op)); -} - -TEST_F(SyncTailTest, UpgradeWithNoUUIDFailsInSecondary) { - // Set fCV to 3.4 so the node does not create a UUID for the collection. - serverGlobalParams.featureCompatibility.setVersion( - ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo34); - - auto fcvNS = NamespaceString(FeatureCompatibilityVersion::kCollection); - ::mongo::repl::createCollection(_opCtx.get(), fcvNS, CollectionOptions()); - ASSERT_OK( - ReplicationCoordinator::get(_opCtx.get())->setFollowerMode(MemberState::RS_SECONDARY)); - - auto op = makeUpdateDocumentOplogEntry( - nextOpTime(), - fcvNS, - BSON("_id" << FeatureCompatibilityVersion::kParameterName), - BSON("$set" << BSON(FeatureCompatibilityVersion::kVersionField - << FeatureCompatibilityVersionCommandParser::kVersion36) - << "$unset" - << BSON(FeatureCompatibilityVersion::kTargetVersionField << 1))); - ASSERT_EQUALS(runOpSteadyState(op), ErrorCodes::IllegalOperation); -} - TEST_F(SyncTailTest, DropDatabaseSucceedsInRecovering) { // Choose `system.profile` so the storage engine doesn't expect the drop to be timestamped. auto ns = NamespaceString("foo.system.profile"); |