diff options
author | Gregory Noma <gregory.noma@gmail.com> | 2021-08-20 23:57:49 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-08-21 04:20:06 +0000 |
commit | 9862b5bd81d901e1a36f98df73069d1bb754a921 (patch) | |
tree | b26d05e0362b05c2227bf2c4b33ec439de17d7c3 | |
parent | 71058411b9d7ba713d206918d9f1c6c00677a9fa (diff) | |
download | mongo-9862b5bd81d901e1a36f98df73069d1bb754a921.tar.gz |
Revert "SERVER-59058 Enforce fast count validation in ValidateCollections Python hook"
This reverts commit 038958ef6ef7351f669d0ddf893546095ad9ab3c.
21 files changed, 116 insertions, 47 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml index 2c7f3423e9c..2f19da95d58 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml @@ -203,10 +203,6 @@ executor: kill: true - class: CheckReplDBHash - class: ValidateCollections - shell_options: - global_vars: - TestData: - skipEnforceFastCountOnValidate: true - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml index 37441b625e6..7bd2fb85758 100644 --- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml +++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml @@ -250,10 +250,6 @@ executor: wait_for_mongos_retarget: true - class: CheckReplDBHash - class: ValidateCollections - shell_options: - global_vars: - TestData: - skipEnforceFastCountOnValidate: true - class: CheckOrphansDeleted - class: CleanupConcurrencyWorkloads fixture: diff --git a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml index d9c4581dd16..beedb9dbdd8 100644 --- a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_kill_primary_jscore_passthrough.yml @@ -397,7 +397,7 @@ executor: shell_options: global_vars: TestData: - skipEnforceFastCountOnValidate: true + checkCollectionCounts: true - class: CleanEveryN n: 20 fixture: diff --git a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml index ee88802e137..16a1ea2dd33 100644 --- a/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/multi_shard_multi_stmt_txn_stepdown_primary_jscore_passthrough.yml @@ -392,6 +392,10 @@ executor: - class: CheckReplOplogs - class: CheckReplDBHash - class: ValidateCollections + shell_options: + global_vars: + TestData: + checkCollectionCounts: true - class: CleanEveryN n: 20 fixture: diff --git a/buildscripts/resmokeconfig/suites/replica_sets_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_kill_primary_jscore_passthrough.yml index 70509fc7c5b..4d7ac98666e 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_kill_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_kill_primary_jscore_passthrough.yml @@ -156,10 +156,6 @@ executor: - class: CheckReplOplogs - class: CheckReplDBHash - class: ValidateCollections - shell_options: - global_vars: - TestData: - skipEnforceFastCountOnValidate: true - class: CleanEveryN n: 20 fixture: diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml index 470ae885650..433d8a7ed07 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_kill_primary_jscore_passthrough.yml @@ -346,10 +346,6 @@ executor: - class: CheckReplOplogs - class: CheckReplDBHash - class: ValidateCollections - shell_options: - global_vars: - TestData: - skipEnforceFastCountOnValidate: true - class: CleanEveryN n: 20 fixture: diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml index 0a7300a1a8b..81d5855d266 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml @@ -335,6 +335,10 @@ executor: - class: CheckReplOplogs - class: CheckReplDBHash - class: ValidateCollections + shell_options: + global_vars: + TestData: + checkCollectionCounts: true - class: CleanEveryN n: 20 fixture: diff --git a/buildscripts/resmokeconfig/suites/replica_sets_reconfig_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_reconfig_kill_primary_jscore_passthrough.yml index afb8a8e2373..7462c89afb2 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_reconfig_kill_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_reconfig_kill_primary_jscore_passthrough.yml @@ -180,10 +180,6 @@ executor: - class: CheckReplOplogs - class: CheckReplDBHash - class: ValidateCollections - shell_options: - global_vars: - TestData: - skipEnforceFastCountOnValidate: true - class: CleanEveryN n: 20 fixture: diff --git a/buildscripts/resmokeconfig/suites/replica_sets_terminate_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_terminate_primary_jscore_passthrough.yml index 57472342f04..22e53534b16 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_terminate_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_terminate_primary_jscore_passthrough.yml @@ -139,6 +139,10 @@ executor: - class: CheckReplOplogs - class: CheckReplDBHash - class: ValidateCollections + shell_options: + global_vars: + TestData: + checkCollectionCounts: true - class: CleanEveryN n: 20 fixture: diff --git a/buildscripts/resmokeconfig/suites/tenant_migration_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/tenant_migration_kill_primary_jscore_passthrough.yml index f283b073351..c5d6a5c74a3 100644 --- a/buildscripts/resmokeconfig/suites/tenant_migration_kill_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/tenant_migration_kill_primary_jscore_passthrough.yml @@ -289,10 +289,6 @@ executor: global_vars: TestData: *TestData - class: ValidateCollections - shell_options: - global_vars: - TestData: - skipEnforceFastCountOnValidate: true - class: CleanEveryN n: 1 fixture: diff --git a/buildscripts/resmokelib/powercycle/powercycle.py b/buildscripts/resmokelib/powercycle/powercycle.py index 067a706d9d3..56f17b02950 100755 --- a/buildscripts/resmokelib/powercycle/powercycle.py +++ b/buildscripts/resmokelib/powercycle/powercycle.py @@ -1560,9 +1560,7 @@ def main(parser_actions, options): # pylint: disable=too-many-branches,too-many host_port = f"localhost:{secret_port}" new_config_file = NamedTempFile.create(suffix=".yml", directory="tmp") temp_client_files.append(new_config_file) - validation_test_data = { - "skipValidationOnNamespaceNotFound": True, "allowUncleanShutdowns": True - } + validation_test_data = {"skipValidationOnNamespaceNotFound": True} new_resmoke_config(with_external_server, new_config_file, validation_test_data) ret, output = resmoke_client(mongo_repo_root_dir, mongo_path, host_port, "jstests/hooks/run_validate_collections.js", new_config_file) diff --git a/buildscripts/resmokelib/testing/hooks/periodic_kill_secondaries.py b/buildscripts/resmokelib/testing/hooks/periodic_kill_secondaries.py index 8e70da8a09f..33b6a405cd0 100644 --- a/buildscripts/resmokelib/testing/hooks/periodic_kill_secondaries.py +++ b/buildscripts/resmokelib/testing/hooks/periodic_kill_secondaries.py @@ -218,9 +218,7 @@ class PeriodicKillSecondariesTestCase(interface.DynamicTestCase): node.preserve_dbpath = preserve_dbpaths[i] def _validate_collections(self, test_report): - validate_test_case = validate.ValidateCollections( - self._hook.logger, self.fixture, - {'global_vars': {'TestData': {'skipEnforceFastCountOnValidate': True}}}) + validate_test_case = validate.ValidateCollections(self._hook.logger, self.fixture) validate_test_case.before_suite(test_report) validate_test_case.before_test(self, test_report) validate_test_case.after_test(self, test_report) diff --git a/jstests/hooks/run_check_repl_dbhash.js b/jstests/hooks/run_check_repl_dbhash.js index ec5daf9894b..b2432af5b76 100644 --- a/jstests/hooks/run_check_repl_dbhash.js +++ b/jstests/hooks/run_check_repl_dbhash.js @@ -13,6 +13,9 @@ function checkReplicatedDataHashesThread(hosts) { const excludedDBs = jsTest.options().excludedDBsFromDBHash; const rst = new ReplSetTest(hosts[0]); rst.checkReplicatedDataHashes(undefined, excludedDBs); + if (TestData.checkCollectionCounts) { + rst.checkCollectionCounts(); + } return {ok: 1}; } catch (e) { return {ok: 0, hosts: hosts, error: e.toString(), stack: e.stack}; diff --git a/jstests/hooks/validate_collections.js b/jstests/hooks/validate_collections.js index 1ef1a9a761a..4b8e02bdcae 100644 --- a/jstests/hooks/validate_collections.js +++ b/jstests/hooks/validate_collections.js @@ -111,13 +111,7 @@ function CollectionValidator() { const dbNames = conn.getDBNames(); for (let dbName of dbNames) { - const validateRes = validatorFunc(conn.getDB(dbName), { - full: true, - // TODO (SERVER-24266): Always enforce fast counts, once they are always - // accurate. - enforceFastCount: - !TestData.skipEnforceFastCountOnValidate && !TestData.allowUncleanShutdowns, - }); + const validateRes = validatorFunc(conn.getDB(dbName), {full: true}); if (validateRes.ok !== 1) { return {ok: 0, host: host, validateRes: validateRes}; } diff --git a/jstests/replsets/libs/initial_sync_test.js b/jstests/replsets/libs/initial_sync_test.js index 1b7c711788b..235fa279b16 100644 --- a/jstests/replsets/libs/initial_sync_test.js +++ b/jstests/replsets/libs/initial_sync_test.js @@ -126,13 +126,17 @@ function InitialSyncTest( return isNodeInState(secondary, ReplSetTest.State.SECONDARY); } - /** - * Asserts that there are no open transactions. - */ - function assertNoOpenTxns() { - const status = assert.commandWorked(primary.adminCommand('serverStatus')); + function checkDataConsistency() { + const name = replSet.name; + + // Make sure there are no open transactions. + let status = assert.commandWorked(primary.adminCommand('serverStatus')); assert(typeof status.transactions === "object", status); assert.eq(0, status.transactions.currentOpen, status.transactions); + + // We must check counts before validate is called during stopSet since validate fixes + // counts. + replSet.checkCollectionCounts(name); } /** @@ -283,7 +287,7 @@ function InitialSyncTest( */ this.stop = function() { transitionIfAllowed(State.kStopped); - assertNoOpenTxns(); + checkDataConsistency(); return replSet.stopSet(); }; } diff --git a/jstests/replsets/libs/rollback_test.js b/jstests/replsets/libs/rollback_test.js index 5e70cd96614..04eea18066c 100644 --- a/jstests/replsets/libs/rollback_test.js +++ b/jstests/replsets/libs/rollback_test.js @@ -250,6 +250,11 @@ function RollbackTest(name = "RollbackTest", replSet) { rst.nodes.forEach(TwoPhaseDropCollectionTest.waitForAllCollectionDropsToComplete); const name = rst.name; + // We must check counts before we validate since validate fixes counts. We cannot check + // counts if unclean shutdowns occur. + if (!TestData.allowUncleanShutdowns || !TestData.rollbackShutdowns) { + rst.checkCollectionCounts(name); + } rst.checkOplogs(name); rst.checkReplicatedDataHashes(name); collectionValidator.validateNodes(rst.nodeList()); diff --git a/jstests/replsets/libs/rollback_test_deluxe.js b/jstests/replsets/libs/rollback_test_deluxe.js index 2c9382e9908..29f1749b473 100644 --- a/jstests/replsets/libs/rollback_test_deluxe.js +++ b/jstests/replsets/libs/rollback_test_deluxe.js @@ -225,6 +225,12 @@ function RollbackTestDeluxe(name = "FiveNodeDoubleRollbackTest", replSet) { }); const name = rst.name; + // Check collection counts except when unclean shutdowns are allowed, as such a shutdown is + // not guaranteed to preserve accurate collection counts. This count check must occur before + // collection validation as the validate command will fix incorrect counts. + if (!TestData.allowUncleanShutdowns || !TestData.rollbackShutdowns) { + rst.checkCollectionCounts(name); + } rst.checkOplogs(name); rst.checkReplicatedDataHashes(name); collectionValidator.validateNodes(rst.nodeList()); diff --git a/jstests/replsets/recovery_after_clean_shutdown_but_not_all_writes_in_snapshot.js b/jstests/replsets/recovery_after_clean_shutdown_but_not_all_writes_in_snapshot.js index ab194ae5168..3e73080e098 100644 --- a/jstests/replsets/recovery_after_clean_shutdown_but_not_all_writes_in_snapshot.js +++ b/jstests/replsets/recovery_after_clean_shutdown_but_not_all_writes_in_snapshot.js @@ -65,6 +65,9 @@ assert.commandWorked(primaryDB[collNoStableCreation].runCommand("create", w1)); primaryDB[coll].insert({_id: "insertedAfterSnapshottingDisabled"}, w1))); rst.awaitReplication(); +jsTestLog("Checking collection counts after snapshotting has been disabled"); +rst.checkCollectionCounts(); + // Perform a clean shutdown and restart. Note that the 'disableSnapshotting' failpoint will be // unset on each node following the restart. nodes.forEach(node => rst.restart(node)); @@ -77,5 +80,8 @@ assert.commandWorked( primaryDB[collCreatedAfterRestart].insert({_id: "insertedAfterRestart", wMajority})); // Fast metadata count should be correct after restart in the face of a clean shutdown. +jsTestLog("Checking collection counts after clean restart of all nodes"); +rst.checkCollectionCounts(); + rst.stopSet(); }()); diff --git a/jstests/replsets/replsettest_checks_wait_for_secondaries.js b/jstests/replsets/replsettest_checks_wait_for_secondaries.js index 321a6670f9a..861f038b8f1 100644 --- a/jstests/replsets/replsettest_checks_wait_for_secondaries.js +++ b/jstests/replsets/replsettest_checks_wait_for_secondaries.js @@ -1,6 +1,7 @@ /** - * Tests that ReplSetTest consistency checks, namely checkDBHashesForReplSet, wait for secondaries - * to have fully transitioned to SECONDARY state before attempting data reads. + * Tests that ReplSetTest consistency checks, namely checkDBHashesForReplSet and + * checkCollectionCounts, wait for secondaries to have fully transitioned to SECONDARY state before + * attempting data reads. */ (function() { "use strict"; @@ -35,9 +36,15 @@ assert.commandWorked(secondary.adminCommand({ maxTimeMS: kDefaultWaitForFailPointTimeout })); -// Turn off the failpoint and immediately proceeed with checking db hashes. +// Turn off the failpoint and immediately proceeed with collection counts checks. +jsTestLog("Trying checkCollectionCounts"); assert.commandWorked(secondary.adminCommand( {configureFailPoint: "initialSyncHangBeforeCopyingDatabases", mode: "off"})); +rst.checkCollectionCounts(); + +// Restart the node so we can try checkReplicatedDBHashes. +rst.stop(1); +rst.start(1, {startClean: true}); // stopSet() will call checkReplicatedDBHashes rst.stopSet(); diff --git a/jstests/replsets/rollback_transaction_table.js b/jstests/replsets/rollback_transaction_table.js index 3e399517abf..9dfa29ad4f8 100644 --- a/jstests/replsets/rollback_transaction_table.js +++ b/jstests/replsets/rollback_transaction_table.js @@ -230,6 +230,7 @@ assert.eq(upstream.getDB("config").transactions.find().itcount(), 2); // Confirm the nodes are consistent. replTest.checkOplogs(); replTest.checkReplicatedDataHashes(testName); +replTest.checkCollectionCounts(); replTest.stopSet(); }()); diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index 9557422a43e..72e1352884d 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -2670,6 +2670,65 @@ var ReplSetTest = function(opts) { } /** + * Checks that 'fastCount' matches an iterative count for all collections. + */ + this.checkCollectionCounts = function(msgPrefix = 'checkCollectionCounts') { + let success = true; + const errPrefix = `${msgPrefix}, counts did not match for collection`; + + function checkCollectionCount(coll) { + const itCount = coll.find().itcount(); + const fastCount = coll.count(); + if (itCount !== fastCount) { + print(`${errPrefix} ${coll.getFullName()} on ${coll.getMongo().host}.` + + ` itcount: ${itCount}, fast count: ${fastCount}`); + print("Collection info: " + + tojson(coll.getDB().getCollectionInfos({name: coll.getName()}))); + print("Collection stats: " + tojson(coll.stats())); + print("First 10 documents in collection: " + + tojson(coll.find().limit(10).toArray())); + + if (coll.getFullName() == "config.transactions") { + print(`Ignoring fastcount error for ${coll.getFullName()} on ` + + `${coll.getMongo().host}. itcount: ${itCount}, fast count: ${fastCount}`); + return; + } + success = false; + } + } + + function checkCollectionCountsForDB(_db) { + const res = assert.commandWorked( + _db.runCommand({listCollections: 1, includePendingDrops: true})); + const collNames = new DBCommandCursor(_db, res).toArray(); + collNames.forEach(c => checkCollectionCount(_db.getCollection(c.name))); + } + + function checkCollectionCountsForNode(node) { + const dbNames = node.getDBNames(); + dbNames.forEach(dbName => checkCollectionCountsForDB(node.getDB(dbName))); + } + + function checkCollectionCountsForReplSet(rst) { + print("checkCollectionCountsForReplSet waiting for secondaries to be ready: " + + tojson(rst.nodes)); + this.awaitSecondaryNodes(); + + rst.nodes.forEach(node => { + // Arbiters have no replicated collections. + if (isNodeArbiter(node)) { + print("checkCollectionCounts skipping counts for arbiter: " + node.host); + return; + } + checkCollectionCountsForNode(node); + }); + assert(success, `Collection counts did not match. search for '${errPrefix}' in logs.`); + } + + this.checkReplicaSet(checkCollectionCountsForReplSet, _determineLiveSecondaries(), this); + }; + + /** * Waits for an initial connection to a given node. Should only be called after the node's * process has already been started. Updates the corresponding entry in 'this.nodes' with the * newly established connection object. |