SERVER-45094 add disabled replica set reconfig passthroughs

SERVER-45094 add retryable read logic to network_error_and_txn_override.js (cherry picked from commit f59f63db6c37c0d4657b57d559c95d830b0e34c2) SERVER-45094 add replica_sets_reconfig_jscore_passthrough suite (cherry picked from commit 4d91fac171cbe3f2af53d9258965399e648a1947) SERVER-45094 use w:1 writes and remove causal consistency in reconfig passthrough (cherry picked from commit a43cb23defc6182d08a7814e4731ef98f2d30b6a) SERVER-45094 add replica_sets_reconfig_jscore_stepdown_passthrough (cherry picked from commit 81e0ad27c280c02a49beb65ff4473d5dce62b089) SERVER-45094 add replica_sets_reconfig_kill_primary_jscore_passthrough (cherry picked from commit 2debab7987b24bf902f9a128654ce928441c29a2) SERVER-47678 stepdown and kill primary reconfig passthroughs should ignore ReplicaSetMonitorErrors (cherry picked from commit 91672e58f1169c7edd684b911f20f62b8a71f8d1) SERVER-47544 always increase election timeout to 24 hours in passthrough suites (cherry picked from commit 81d53a715f49827a9f2538d4572f9b01f2b12887)
author: Pavi Vetriselvan <pvselvan@umich.edu> 2020-03-16 11:13:36 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-05-14 12:42:24 +0000
commit: f4528563033d933ca920b3e4b2a5e3344e198a5c (patch)
tree: 8c20856b344e02483dceb1e13f35533e41db3ecd
parent: cd9fbb56900343e7b1193922a2c4b197895e7f56 (diff)
download: mongo-f4528563033d933ca920b3e4b2a5e3344e198a5c.tar.gz
10 files changed, 1043 insertions, 9 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_reconfig_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_reconfig_jscore_passthrough.yml
new file mode 100644
index 00000000000..8285339493b
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/replica_sets_reconfig_jscore_passthrough.yml
@@ -0,0 +1,84 @@
+test_kind: js_test
+# This suite starts a 5-node replica set and uses DoReconfigInBackground hook to periodically run
+# safe reconfigs against the primary. These reconfigs change the number of voting nodes in the
+# replica set, which changes the voting majority used to satisfy the config commitment check and
+# oplog commitment check.
+
+selector:
+  roots:
+  - jstests/core/**/*.js
+  exclude_files:
+  # Transactions do not support retryability of individual operations.
+  # TODO: Remove this once it is supported (SERVER-33952).
+  - jstests/core/txns/**/*.js
+  # These tests are not expected to pass with replica-sets:
+  - jstests/core/dbadmin.js
+  - jstests/core/opcounters_write_cmd.js
+  - jstests/core/read_after_optime.js
+  - jstests/core/capped_update.js
+  # The set_param1.js test attempts to compare the response from running the {getParameter: "*"}
+  # command multiple times, which may observe the change to the "transactionLifetimeLimitSeconds"
+  # server parameter.
+  - jstests/core/set_param1.js
+
+  # These test run commands using legacy queries, which are not supported on sessions.
+  - jstests/core/comment_field.js
+  - jstests/core/exhaust.js
+  - jstests/core/invalidated_legacy_cursors.js
+  - jstests/core/validate_cmd_ns.js
+
+  # Unacknowledged writes prohibited in an explicit session.
+  - jstests/core/batch_write_command_w0.js
+  - jstests/core/crud_api.js
+
+  - jstests/core/connection_string_validation.js # Does not expect a replica set connection string.
+
+  exclude_with_any_tags:
+  - assumes_read_preference_unchanged
+  - requires_sharding
+
+executor:
+  archive:
+    hooks:
+      - CheckReplDBHash
+      - CheckReplOplogs
+      - ValidateCollections
+  config:
+    shell_options:
+      eval: >-
+        testingReplication = true;
+        load('jstests/libs/override_methods/network_error_and_txn_override.js');
+        load("jstests/libs/override_methods/enable_sessions.js");
+      global_vars:
+        TestData:
+          networkErrorAndTxnOverrideConfig:
+            backgroundReconfigs: true
+          sessionOptions:
+            # Force DBClientRS to find the primary for non-write commands to make sure reads still
+            # work as expected during reconfigs.
+            readPreference:
+              mode: "primary"
+      readMode: commands
+  hooks:
+  # The CheckReplDBHash hook waits until all operations have replicated to and have been applied
+  # on the secondaries, so we run the ValidateCollections hook after it to ensure we're
+  # validating the entire contents of the collection.
+  - class: DoReconfigInBackground
+    shell_options:
+      nodb: ""
+  - class: CheckReplOplogs
+  - class: CheckReplDBHash
+  - class: ValidateCollections
+  - class: CleanEveryN
+    n: 20
+  fixture:
+    class: ReplicaSetFixture
+    mongod_options:
+      enableMajorityReadConcern: ''
+      set_parameters:
+        enableTestCommands: 1
+        logComponentVerbosity:
+          replication:
+            heartbeats: 2
+    all_nodes_electable: true
+    num_nodes: 5
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_reconfig_jscore_stepdown_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_reconfig_jscore_stepdown_passthrough.yml
new file mode 100644
index 00000000000..d221ccba640
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/replica_sets_reconfig_jscore_stepdown_passthrough.yml
@@ -0,0 +1,198 @@
+test_kind: js_test
+# This suite starts a 5-node replica set and runs both the DoReconfigInBackground and
+# ContinuousStepdown hook to periodically run safe reconfigs and step downs against the
+# primary. This tests that the concurrency between stepdowns and reconfigs is still ultimately
+# safe.
+
+selector:
+  roots:
+  - jstests/core/**/*.js
+  exclude_files:
+  # Transactions do not support retryability of individual operations.
+  # TODO: Remove this once it is supported (SERVER-33952).
+  - jstests/core/txns/**/*.js
+  # These tests are not expected to pass with replica-sets:
+  - jstests/core/dbadmin.js
+  - jstests/core/opcounters_write_cmd.js
+  - jstests/core/read_after_optime.js
+  - jstests/core/capped_update.js
+  # The set_param1.js test attempts to compare the response from running the {getParameter: "*"}
+  # command multiple times, which may observe the change to the "transactionLifetimeLimitSeconds"
+  # server parameter.
+  - jstests/core/set_param1.js
+
+  # No-op retries are not ignored by top, the profiler, or opcount.
+  - jstests/core/operation_latency_histogram.js
+  - jstests/core/profile2.js
+  - jstests/core/profile3.js
+  - jstests/core/profile_findandmodify.js
+  - jstests/core/top.js
+  - jstests/core/views/views_stats.js
+
+  # TODO SERVER-31249: getLastError should not be affected by no-op retries.
+  - jstests/core/bulk_legacy_enforce_gle.js
+
+  # TODO SERVER-31242: findAndModify no-op retry should respect the fields option.
+  - jstests/core/crud_api.js
+  - jstests/core/find_and_modify.js
+  - jstests/core/find_and_modify2.js
+  - jstests/core/find_and_modify_pipeline_update.js
+  - jstests/core/find_and_modify_server6865.js
+
+  # These test run commands using legacy queries, which are not supported on sessions.
+  - jstests/core/comment_field.js
+  - jstests/core/exhaust.js
+  - jstests/core/validate_cmd_ns.js
+
+  # Stepdown commands during fsync lock will fail.
+  - jstests/core/currentop.js
+  - jstests/core/fsync.js
+  - jstests/core/killop_drop_collection.js
+
+  # Assert on the ismaster field of an isMaster response. If a primary steps down after accepting
+  # an isMaster command and returns before its connection is closed, the response can contain
+  # ismaster: false.
+  - jstests/core/dbadmin.js
+  - jstests/core/ismaster.js
+
+  # Spawns new mongo shells, which don't retry connecting on stepdown errors.
+  - jstests/core/shell_connection_strings.js
+
+  # Expect drops/creates to fail or have a certain response:
+  - jstests/core/drop.js
+  - jstests/core/dropdb.js
+  - jstests/core/explain_upsert.js
+  - jstests/core/indexes_multiple_commands.js
+
+  # Expect certain responses, but retries of successfully completed commands may return
+  # different values:
+  - jstests/core/create_indexes.js
+  - jstests/core/objid5.js
+
+  # Unacknowledged writes prohibited in an explicit session.
+  - jstests/core/batch_write_command_w0.js
+
+  - jstests/core/bench_test*.js # benchRun() used for writes
+  - jstests/core/benchrun_pipeline_updates.js # benchRun() used for writes
+  - jstests/core/connection_string_validation.js # Does not expect a replica set connection string.
+  - jstests/core/list_collections_filter.js # Temporary collections are dropped on failover.
+  - jstests/core/top.js # Tests read commands (including getMore) against the secondary
+  - jstests/core/drop3.js # getMore is not causally consistent if collection is dropped
+  - jstests/core/validate_cmd_ns.js # Calls _exec() directly, not retryable.
+  - jstests/core/list_collections_filter.js # Temporary collections are dropped on failover.
+  - jstests/core/explain_large_bounds.js # Stepdown can timeout waiting for global lock.
+
+  # Tests that fail for Causal Consistency as they have statements that do not support
+  # non-local read concern.
+  - jstests/core/collation.js
+  # Parallel shell is not causally consistent
+  - jstests/core/benchrun_pipeline_updates.js
+  - jstests/core/find_and_modify_concurrent_update.js
+  - jstests/core/shellstartparallel.js
+
+  exclude_with_any_tags:
+  ##
+  # The next four tags correspond to the special errors thrown by the auto_retry_on_network_error.js
+  # override when it refuses to run a certain command. Above each tag are the message(s) that cause
+  # the tag to be warranted.
+  ##
+  # "Refusing to run a test that issues a getMore command since if a network error occurs during
+  #   it then we won't know whether the cursor was advanced or not"
+  - requires_getmore
+  # "Refusing to run a test that issues non-retryable write operations since the test likely makes
+  #   assertions on the write results and can lead to spurious failures if a network error occurs"
+  - requires_non_retryable_writes
+  # "Refusing to run a test that issues commands that are not blindly retryable"
+  # "Refusing to run a test that issues an aggregation command with $out because it is not
+  #   retryable"
+  - requires_non_retryable_commands
+  # "Refusing to run a test that issues commands that may return different values after a failover"
+  # "Refusing to run a test that issues an aggregation command with explain because it may return
+  #   incomplete results"
+  # "Refusing to run a test that issues an aggregation command with
+  #   $listLocalSessions because it relies on in-memory state that may not survive failovers"
+  # "Refusing to run a test that issues a mapReduce command, because it calls std::terminate() if
+  #   interrupted by a stepdown"
+  - does_not_support_stepdowns
+  ##
+  # The next two tags correspond to the special errors thrown by the
+  # set_read_and_write_concerns.js override when it refuses to replace the readConcern or
+  # writeConcern of a particular command. Above each tag are the message(s) that cause the tag to be
+  # warranted.
+  ##
+  # "Cowardly refusing to override read concern of command: ..."
+  - assumes_read_concern_unchanged
+  # "Cowardly refusing to override write concern of command: ..."
+  - assumes_write_concern_unchanged
+  ## The next tag corresponds to long running-operations, as they may exhaust their number
+  # of retries and result in a network error being thrown.
+  - operations_longer_than_stepdown_interval
+  - does_not_support_causal_consistency
+  - uses_transactions
+  # collStats is not causally consistent
+  - requires_collstats
+  - requires_dbstats
+  - requires_datasize
+  - requires_sharding
+
+executor:
+  archive:
+    hooks:
+      - CheckReplDBHash
+      - CheckReplOplogs
+      - ValidateCollections
+  config:
+    shell_options:
+      eval: >-
+        testingReplication = true;
+        load('jstests/libs/override_methods/network_error_and_txn_override.js');
+        db = connect(TestData.connectionString);
+        load("jstests/libs/override_methods/enable_sessions.js");
+        load("jstests/libs/override_methods/set_read_and_write_concerns.js");
+      global_vars:
+        TestData:
+          runningWithCausalConsistency: true
+          alwaysInjectTransactionNumber: true
+          defaultReadConcernLevel: "majority"
+          logRetryAttempts: true
+          networkErrorAndTxnOverrideConfig:
+            retryOnNetworkErrors: true
+            backgroundReconfigs: true
+          overrideRetryAttempts: 3
+          sessionOptions:
+            writeConcern:
+              w: "majority"
+            readConcern:
+              level: "majority"
+            # Force DBClientRS to find the primary for non-write commands.
+            readPreference:
+              mode: "primary"
+            retryWrites: true
+      # We specify nodb so the shell used by each test will attempt to connect after loading the
+      # retry logic in auto_retry_on_network_error.js.
+      nodb: ""
+      readMode: commands
+  hooks:
+  - class: DoReconfigInBackground
+    shell_options:
+      nodb: ""
+  - class: ContinuousStepdown
+  # The CheckReplDBHash hook waits until all operations have replicated to and have been applied
+  # on the secondaries, so we run the ValidateCollections hook after it to ensure we're
+  # validating the entire contents of the collection.
+  - class: CheckReplOplogs
+  - class: CheckReplDBHash
+  - class: ValidateCollections
+  - class: CleanEveryN
+    n: 20
+  fixture:
+    class: ReplicaSetFixture
+    mongod_options:
+      enableMajorityReadConcern: ''
+      set_parameters:
+        enableTestCommands: 1
+        logComponentVerbosity:
+          replication:
+            heartbeats: 2
+    all_nodes_electable: true
+    num_nodes: 5
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_reconfig_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_reconfig_kill_primary_jscore_passthrough.yml
new file mode 100644
index 00000000000..6c973e3dd06
--- /dev/null
+++ b/buildscripts/resmokeconfig/suites/replica_sets_reconfig_kill_primary_jscore_passthrough.yml
@@ -0,0 +1,214 @@
+test_kind: js_test
+# This suite starts a 5-node replica set and runs both the DoReconfigInBackground and
+# ContinuousStepdown hook with kill: true and background_reconfig: true to periodically run
+# safe reconfigs and send kill signals to the primary.
+# This tests that the concurrency between killing the primary and reconfigs is still
+# ultimately safe.
+
+selector:
+  roots:
+  - jstests/core/**/*.js
+  exclude_files:
+  # Transactions do not support retryability of individual operations.
+  # TODO: Remove this once it is supported (SERVER-33952).
+  - jstests/core/txns/**/*.js
+  # These tests are not expected to pass with replica-sets:
+  - jstests/core/dbadmin.js
+  - jstests/core/opcounters_write_cmd.js
+  - jstests/core/read_after_optime.js
+  - jstests/core/capped_update.js
+  # The set_param1.js test attempts to compare the response from running the {getParameter: "*"}
+  # command multiple times, which may observe the change to the "transactionLifetimeLimitSeconds"
+  # server parameter.
+  - jstests/core/set_param1.js
+
+  # No-op retries are not ignored by top, the profiler, or opcount.
+  - jstests/core/operation_latency_histogram.js
+  - jstests/core/profile2.js
+  - jstests/core/profile3.js
+  - jstests/core/profile_findandmodify.js
+  - jstests/core/top.js
+  - jstests/core/views/views_stats.js
+
+  # TODO SERVER-31249: getLastError should not be affected by no-op retries.
+  - jstests/core/bulk_legacy_enforce_gle.js
+
+  # TODO SERVER-31242: findAndModify no-op retry should respect the fields option.
+  - jstests/core/crud_api.js
+  - jstests/core/find_and_modify.js
+  - jstests/core/find_and_modify2.js
+  - jstests/core/find_and_modify_pipeline_update.js
+  - jstests/core/find_and_modify_server6865.js
+
+  # These test run commands using legacy queries, which are not supported on sessions.
+  - jstests/core/comment_field.js
+  - jstests/core/exhaust.js
+  - jstests/core/validate_cmd_ns.js
+
+  # Stepdown commands during fsync lock will fail.
+  - jstests/core/currentop.js
+  - jstests/core/fsync.js
+  - jstests/core/killop_drop_collection.js
+
+  # Assert on the ismaster field of an isMaster response. If a primary steps down after accepting
+  # an isMaster command and returns before its connection is closed, the response can contain
+  # ismaster: false.
+  - jstests/core/dbadmin.js
+  - jstests/core/ismaster.js
+
+  # Spawns new mongo shells, which don't retry connecting on stepdown errors.
+  - jstests/core/shell_connection_strings.js
+
+  # Expect drops/creates to fail or have a certain response:
+  - jstests/core/drop.js
+  - jstests/core/dropdb.js
+  - jstests/core/explain_upsert.js
+  - jstests/core/indexes_multiple_commands.js
+
+  # Expect certain responses, but retries of successfully completed commands may return
+  # different values:
+  - jstests/core/create_indexes.js
+  - jstests/core/objid5.js
+
+  # Unacknowledged writes prohibited in an explicit session.
+  - jstests/core/batch_write_command_w0.js
+
+  - jstests/core/bench_test*.js # benchRun() used for writes
+  - jstests/core/benchrun_pipeline_updates.js # benchRun() used for writes
+  - jstests/core/connection_string_validation.js # Does not expect a replica set connection string.
+  - jstests/core/list_collections_filter.js # Temporary collections are dropped on failover.
+  - jstests/core/top.js # Tests read commands (including getMore) against the secondary
+  - jstests/core/drop3.js # getMore is not causally consistent if collection is dropped
+  - jstests/core/validate_cmd_ns.js # Calls _exec() directly, not retryable.
+  - jstests/core/list_collections_filter.js # Temporary collections are dropped on failover.
+  - jstests/core/explain_large_bounds.js # Stepdown can timeout waiting for global lock.
+
+  # Tests that fail for Causal Consistency as they have statements that do not support
+  # non-local read concern.
+  - jstests/core/collation.js
+  # Starts a parallel shell but won't restart it after unclean shutdown.
+  # TODO SERVER-33229: Remove these exclusions
+  - jstests/core/compact_keeps_indexes.js
+  - jstests/core/benchrun_pipeline_updates.js
+  - jstests/core/find_and_modify_concurrent_update.js
+  - jstests/core/shellstartparallel.js
+
+  # Inserts enough data that recovery takes more than 8 seconds, so we never get a working primary.
+  - jstests/core/geo_s2ordering.js
+
+  exclude_with_any_tags:
+  ##
+  # The next four tags correspond to the special errors thrown by the auto_retry_on_network_error.js
+  # override when it refuses to run a certain command. Above each tag are the message(s) that cause
+  # the tag to be warranted.
+  ##
+  # "Refusing to run a test that issues a getMore command since if a network error occurs during
+  #   it then we won't know whether the cursor was advanced or not"
+  - requires_getmore
+  # "Refusing to run a test that issues non-retryable write operations since the test likely makes
+  #   assertions on the write results and can lead to spurious failures if a network error occurs"
+  - requires_non_retryable_writes
+  # "Refusing to run a test that issues commands that are not blindly retryable"
+  # "Refusing to run a test that issues an aggregation command with $out because it is not
+  #   retryable"
+  - requires_non_retryable_commands
+  # "Refusing to run a test that issues commands that may return different values after a failover"
+  # "Refusing to run a test that issues an aggregation command with explain because it may return
+  #   incomplete results"
+  # "Refusing to run a test that issues an aggregation command with
+  #   $listLocalSessions because it relies on in-memory state that may not survive failovers"
+  # "Refusing to run a test that issues a mapReduce command, because it calls std::terminate() if
+  #   interrupted by a stepdown"
+  - does_not_support_stepdowns
+  ##
+  # The next two tags correspond to the special errors thrown by the
+  # set_read_and_write_concerns.js override when it refuses to replace the readConcern or
+  # writeConcern of a particular command. Above each tag are the message(s) that cause the tag to be
+  # warranted.
+  ##
+  # "Cowardly refusing to override read concern of command: ..."
+  - assumes_read_concern_unchanged
+  # "Cowardly refusing to override write concern of command: ..."
+  - assumes_write_concern_unchanged
+  ##
+  # The next three tags corresponds to the special errors thrown by the
+  # fail_unclean_shutdown_incompatible_commands.js override when it refuses to run commands that are
+  # inaccurate after an unclean shutdown. Above each tag is the message that causes the tag to be
+  # warranted.
+  ##
+  # "Cowardly fail if fastcount is run with a mongod that had an unclean shutdown: ..."
+  - requires_fastcount
+  # "Cowardly fail if dbStats is run with a mongod that had an unclean shutdown: ..."
+  - requires_dbstats
+  # "Cowardly fail if collStats is run with a mongod that had an unclean shutdown: ..."
+  - requires_collstats
+  # "Cowardly fail if unbounded dataSize is run with a mongod that had an unclean shutdown: ..."
+  - requires_datasize
+  ## The next tag corresponds to long running-operations, as they may exhaust their number
+  # of retries and result in a network error being thrown.
+  - operations_longer_than_stepdown_interval
+
+executor:
+  archive:
+    hooks:
+      - CheckReplDBHash
+      - CheckReplOplogs
+      - ValidateCollections
+  config:
+    shell_options:
+      eval: >-
+        testingReplication = true;
+        load('jstests/libs/override_methods/network_error_and_txn_override.js');
+        db = connect(TestData.connectionString);
+        load("jstests/libs/override_methods/enable_sessions.js");
+        load("jstests/libs/override_methods/set_read_and_write_concerns.js");
+        load("jstests/libs/override_methods/fail_unclean_shutdown_incompatible_commands.js");
+        load("jstests/libs/override_methods/fail_unclean_shutdown_start_parallel_shell.js");
+      global_vars:
+        TestData:
+          alwaysInjectTransactionNumber: true
+          defaultReadConcernLevel: "majority"
+          logRetryAttempts: true
+          networkErrorAndTxnOverrideConfig:
+            retryOnNetworkErrors: true
+            backgroundReconfigs: true
+          overrideRetryAttempts: 3
+          sessionOptions:
+            writeConcern:
+              w: "majority"
+            readConcern:
+              level: "majority"
+            # Force DBClientRS to find the primary for non-write commands.
+            readPreference:
+              mode: "primary"
+            retryWrites: true
+      # We specify nodb so the shell used by each test will attempt to connect after loading the
+      # retry logic in auto_retry_on_network_error.js.
+      nodb: ""
+      readMode: commands
+  hooks:
+  - class: DoReconfigInBackground
+    shell_options:
+      nodb: ""
+  - class: ContinuousStepdown
+    kill: true
+    background_reconfig: true
+  # The CheckReplDBHash hook waits until all operations have replicated to and have been applied
+  # on the secondaries, so we run the ValidateCollections hook after it to ensure we're
+  # validating the entire contents of the collection.
+  - class: CheckReplOplogs
+  - class: CheckReplDBHash
+  - class: ValidateCollections
+  - class: CleanEveryN
+    n: 20
+  fixture:
+    class: ReplicaSetFixture
+    mongod_options:
+      enableMajorityReadConcern: ''
+      set_parameters:
+        enableTestCommands: 1
+        logComponentVerbosity:
+          replication:
+            heartbeats: 2
+    all_nodes_electable: true
+    num_nodes: 5
diff --git a/buildscripts/resmokelib/testing/fixtures/replicaset.py b/buildscripts/resmokelib/testing/fixtures/replicaset.py
index fb8e90b5296..5de9996de1c 100644
--- a/buildscripts/resmokelib/testing/fixtures/replicaset.py
+++ b/buildscripts/resmokelib/testing/fixtures/replicaset.py
@@ -187,12 +187,10 @@ class ReplicaSetFixture(interface.ReplFixture):  # pylint: disable=too-many-inst
             replset_settings = self.replset_config_options["settings"]
             repl_config["settings"] = replset_settings
 
-        # If not all nodes are electable and no election timeout was specified, then we increase
-        # the election timeout to 24 hours to prevent spurious elections.
-        if not self.all_nodes_electable:
-            repl_config.setdefault("settings", {})
-            if "electionTimeoutMillis" not in repl_config["settings"]:
-                repl_config["settings"]["electionTimeoutMillis"] = 24 * 60 * 60 * 1000
+        # Increase the election timeout to 24 hours to prevent spurious elections.
+        repl_config.setdefault("settings", {})
+        if "electionTimeoutMillis" not in repl_config["settings"]:
+            repl_config["settings"]["electionTimeoutMillis"] = 24 * 60 * 60 * 1000
 
         # Start up a single node replica set then reconfigure to the correct size (if the config
         # contains more than 1 node), so the primary is elected more quickly.
@@ -500,6 +498,16 @@ class ReplicaSetFixture(interface.ReplFixture):  # pylint: disable=too-many-inst
         primary = self.get_primary()
         return [node for node in self.nodes if node.port != primary.port]
 
+    def get_voting_members(self):
+        """Return the number of voting nodes in the replica set."""
+        primary = self.get_primary()
+        client = primary.mongo_client()
+
+        members = client.admin.command({"replSetGetConfig": 1})['config']['members']
+        voting_members = [member['host'] for member in members if member['votes'] == 1]
+
+        return voting_members
+
     def get_initial_sync_node(self):
         """Return initial sync node from the replica set."""
         return self.initial_sync_node
diff --git a/buildscripts/resmokelib/testing/hooks/reconfig_background.py b/buildscripts/resmokelib/testing/hooks/reconfig_background.py
new file mode 100644
index 00000000000..dbf9b33a242
--- /dev/null
+++ b/buildscripts/resmokelib/testing/hooks/reconfig_background.py
@@ -0,0 +1,71 @@
+"""Test hook for running safe reconfigs against the primary of a replica set.
+
+This hook runs continously in a background thread while the test is running.
+"""
+
+import os.path
+
+from buildscripts.resmokelib import errors
+from buildscripts.resmokelib.testing.hooks import jsfile
+from buildscripts.resmokelib.testing.testcases import interface as testcase
+from buildscripts.resmokelib.testing.hooks.background_job import _BackgroundJob, _ContinuousDynamicJSTestCase
+
+
+class DoReconfigInBackground(jsfile.JSHook):
+    """A hook for running a safe reconfig against a replica set while a test is running."""
+
+    def __init__(self, hook_logger, fixture, shell_options=None):
+        """Initialize DoReconfigInBackground."""
+        description = "Run reconfigs against the primary while the test is running."
+        js_filename = os.path.join("jstests", "hooks", "run_reconfig_background.js")
+        jsfile.JSHook.__init__(self, hook_logger, fixture, js_filename, description,
+                               shell_options=shell_options)
+
+        self._background_job = None
+
+    def before_suite(self, test_report):
+        """Start the background thread."""
+        self._background_job = _BackgroundJob("ReconfigInBackground")
+        self.logger.info("Starting the background reconfig thread.")
+        self._background_job.start()
+
+    def after_suite(self, test_report):
+        """Signal the background thread to exit, and wait until it does."""
+        if self._background_job is None:
+            return
+
+        self.logger.info("Stopping the background reconfig thread.")
+        self._background_job.stop()
+
+    def before_test(self, test, test_report):
+        """Instruct the background thread to run reconfigs while 'test' is also running."""
+        if self._background_job is None:
+            return
+
+        hook_test_case = _ContinuousDynamicJSTestCase.create_before_test(
+            self.logger.test_case_logger, test, self, self._js_filename, self._shell_options)
+        hook_test_case.configure(self.fixture)
+
+        self.logger.info("Resuming the background reconfig thread.")
+        self._background_job.resume(hook_test_case, test_report)
+
+    def after_test(self, test, test_report):  # noqa: D205,D400
+        """Instruct the background thread to stop running reconfigs now that 'test' has
+        finished running.
+        """
+        if self._background_job is None:
+            return
+
+        self.logger.info("Pausing the background reconfig thread.")
+        self._background_job.pause()
+
+        if self._background_job.exc_info is not None:
+            if isinstance(self._background_job.exc_info[1], errors.TestFailure):
+                # If the mongo shell process running the JavaScript file exited with a non-zero
+                # return code, then we raise an errors.ServerFailure exception to cause resmoke.py's
+                # test execution to stop.
+                raise errors.ServerFailure(self._background_job.exc_info[1].args[0])
+            else:
+                self.logger.error("Encountered an error inside the background reconfig thread.",
+                                  exc_info=self._background_job.exc_info)
+                raise self._background_job.exc_info[1]
diff --git a/buildscripts/resmokelib/testing/hooks/stepdown.py b/buildscripts/resmokelib/testing/hooks/stepdown.py
index de2c89e9a38..4cfd09fd52d 100644
--- a/buildscripts/resmokelib/testing/hooks/stepdown.py
+++ b/buildscripts/resmokelib/testing/hooks/stepdown.py
@@ -27,7 +27,7 @@ class ContinuousStepdown(interface.Hook):  # pylint: disable=too-many-instance-a
             self, hook_logger, fixture, config_stepdown=True, shard_stepdown=True,
             stepdown_interval_ms=8000, terminate=False, kill=False,
             use_stepdown_permitted_file=False, wait_for_mongos_retarget=False,
-            stepdown_via_heartbeats=True):
+            stepdown_via_heartbeats=True, background_reconfig=False):
         """Initialize the ContinuousStepdown.
 
         Args:
@@ -64,6 +64,8 @@ class ContinuousStepdown(interface.Hook):  # pylint: disable=too-many-instance-a
         self._terminate = terminate or kill
         self._kill = kill
 
+        self._background_reconfig = background_reconfig
+
         # The stepdown file names need to match the same construction as found in
         # jstests/concurrency/fsm_libs/resmoke_runner.js.
         dbpath_prefix = fixture.get_dbpath_prefix()
@@ -87,7 +89,7 @@ class ContinuousStepdown(interface.Hook):  # pylint: disable=too-many-instance-a
         self._stepdown_thread = _StepdownThread(
             self.logger, self._mongos_fixtures, self._rs_fixtures, self._stepdown_interval_secs,
             self._terminate, self._kill, lifecycle, self._wait_for_mongos_retarget,
-            self._stepdown_via_heartbeats)
+            self._stepdown_via_heartbeats, self._background_reconfig)
         self.logger.info("Starting the stepdown thread.")
         self._stepdown_thread.start()
 
@@ -348,7 +350,8 @@ class FileBasedStepdownLifecycle(object):
 class _StepdownThread(threading.Thread):  # pylint: disable=too-many-instance-attributes
     def __init__(  # pylint: disable=too-many-arguments
             self, logger, mongos_fixtures, rs_fixtures, stepdown_interval_secs, terminate, kill,
-            stepdown_lifecycle, wait_for_mongos_retarget, stepdown_via_heartbeats):
+            stepdown_lifecycle, wait_for_mongos_retarget, stepdown_via_heartbeats,
+            background_reconfig):
         """Initialize _StepdownThread."""
         threading.Thread.__init__(self, name="StepdownThread")
         self.daemon = True
@@ -365,6 +368,7 @@ class _StepdownThread(threading.Thread):  # pylint: disable=too-many-instance-at
         self.__lifecycle = stepdown_lifecycle
         self._should_wait_for_mongos_retarget = wait_for_mongos_retarget
         self._stepdown_via_heartbeats = stepdown_via_heartbeats
+        self._background_reconfig = background_reconfig
 
         self._last_exec = time.time()
         # Event set when the thread has been stopped using the 'stop()' method.
@@ -474,6 +478,22 @@ class _StepdownThread(threading.Thread):  # pylint: disable=too-many-instance-at
                                            rs_fixture.replset_name))
 
         if self._terminate:
+            # If we're running with background reconfigs, it's possible to be in a scenario
+            # where we kill a necessary voting node (i.e. in a 5 node repl set), only 2 are
+            # voting. In this scenario, we want to avoid killing the primary because no
+            # secondary can step up.
+            if self._background_reconfig:
+                # stagger the kill thread so that it runs a little after the reconfig thread
+                time.sleep(1)
+                voting_members = rs_fixture.get_voting_members()
+
+                self.logger.info("Current voting members: %s", voting_members)
+
+                if len(voting_members) <= 3:
+                    # Do not kill or terminate the primary if we don't have enough voting nodes to
+                    # elect a new primary.
+                    return
+
             should_kill = self._kill and random.choice([True, False])
             action = "Killing" if should_kill else "Terminating"
             self.logger.info("%s the primary on port %d of replica set '%s'.", action, primary.port,
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 48f75418a26..fc22975e4b9 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -6088,6 +6088,36 @@ tasks:
     vars:
       resmoke_args: --suites=replica_sets_jscore_passthrough --storageEngine=wiredTiger
 
+- <<: *task_template
+  name: replica_sets_reconfig_jscore_passthrough
+  depends_on:
+  - name: jsCore
+  commands:
+  - func: "do setup"
+  - func: "run tests"
+    vars:
+      resmoke_args: --suites=replica_sets_reconfig_jscore_passthrough --storageEngine=wiredTiger
+
+- <<: *task_template
+  name: replica_sets_reconfig_jscore_stepdown_passthrough
+  depends_on:
+  - name: jsCore
+  commands:
+  - func: "do setup"
+  - func: "run tests"
+    vars:
+      resmoke_args: --suites=replica_sets_reconfig_jscore_stepdown_passthrough --storageEngine=wiredTiger
+
+- <<: *task_template
+  name: replica_sets_reconfig_kill_primary_jscore_passthrough
+  depends_on:
+  - name: jsCore
+  commands:
+  - func: "do setup"
+  - func: "run tests"
+    vars:
+      resmoke_args: --suites=replica_sets_reconfig_kill_primary_jscore_passthrough --storageEngine=wiredTiger
+
 - name: replica_sets_jscore_passthrough_gen
   depends_on:
   - name: jsCore
diff --git a/jstests/hooks/run_reconfig_background.js b/jstests/hooks/run_reconfig_background.js
new file mode 100644
index 00000000000..4717a8d4710
--- /dev/null
+++ b/jstests/hooks/run_reconfig_background.js
@@ -0,0 +1,147 @@
+/**
+ * This hook runs the reconfig command against the primary of a replica set:
+ * The reconfig command first chooses a random node (not the primary) and will change
+ * its votes and priority to 0 or 1 depending on the current value.
+ *
+ * This hook will run concurrently with tests.
+ */
+
+'use strict';
+
+(function() {
+load('jstests/libs/discover_topology.js');  // For Topology and DiscoverTopology.
+load('jstests/libs/parallelTester.js');     // For Thread.
+
+/**
+ * Returns true if the error code is transient.
+ */
+function isIgnorableError(codeName) {
+    if (codeName == "ConfigurationInProgress" || codeName == "NotMaster" ||
+        codeName == "InterruptedDueToReplStateChange" || codeName == "PrimarySteppedDown" ||
+        codeName === "NodeNotFound" || codeName === "ShutdownInProgress") {
+        return true;
+    }
+    return false;
+}
+
+/**
+ * Runs the reconfig command against the primary of a replica set.
+ *
+ * The reconfig command randomly chooses a node to change it's votes and priority to 0 or 1
+ * based on what the node's current votes and priority fields are. We always check to see that
+ * there exists at least two voting nodes in the set, which ensures that we can always have a
+ * primary in the case of stepdowns.
+ * We also want to avoid changing the votes and priority of the current primary to 0, since this
+ * will result in an error.
+ *
+ * The number of voting nodes in the replica set determines what the config majority is for both
+ * reconfig config commitment and reconfig oplog commitment.
+ *
+ * This function should not throw if everything is working properly.
+ */
+function reconfigBackground(primary, numNodes) {
+    // Calls 'func' with the print() function overridden to be a no-op.
+    Random.setRandomSeed();
+    const quietly = (func) => {
+        const printOriginal = print;
+        try {
+            print = Function.prototype;
+            func();
+        } finally {
+            print = printOriginal;
+        }
+    };
+
+    // The stepdown and kill primary hooks run concurrently with this reconfig hook. It is
+    // possible that the topology will not be properly updated in time, meaning that the
+    // current primary can be undefined if a secondary has not stepped up soon enough.
+    if (primary === undefined) {
+        jsTestLog("Skipping reconfig because we do not have a primary yet.");
+        return {ok: 1};
+    }
+
+    jsTestLog("primary is " + primary);
+
+    // Suppress the log messages generated establishing new mongo connections. The
+    // run_reconfig_background.js hook is executed frequently by resmoke.py and
+    // could lead to generating an overwhelming amount of log messages.
+    let conn;
+    quietly(() => {
+        conn = new Mongo(primary);
+    });
+    assert.neq(
+        null, conn, "Failed to connect to primary '" + primary + "' for background reconfigs");
+
+    var config = assert.commandWorked(conn.getDB("admin").runCommand({replSetGetConfig: 1})).config;
+
+    // Find the correct host in the member config
+    const primaryHostIndex = (cfg, pHost) => cfg.members.findIndex(m => m.host === pHost);
+    const primaryIndex = primaryHostIndex(config, primary);
+    jsTestLog("primaryIndex is " + primaryIndex);
+
+    // Calculate the total number of voting nodes in this set so that we make sure we
+    // always have at least two voting nodes. This is so that the primary can always
+    // safely step down because there is at least one other electable secondary.
+    const numVotingNodes = config.members.filter(member => member.votes === 1).length;
+
+    // Randomly change the vote of a node to 1 or 0 depending on its current value. Do not
+    // change the primary's votes.
+    var indexToChange = primaryIndex;
+    while (indexToChange === primaryIndex) {
+        // randInt is exclusive of the upper bound.
+        indexToChange = Random.randInt(numNodes);
+    }
+
+    jsTestLog("Running reconfig to change votes of node at index" + indexToChange);
+
+    // Change the priority to correspond to the votes. If the member's current votes field
+    // is 1, only change it to 0 if there are more than 3 voting members in this set.
+    // We want to ensure that there are at least 3 voting nodes so that killing the primary
+    // will not affect a majority.
+    config.version++;
+    config.members[indexToChange].votes =
+        (config.members[indexToChange].votes === 1 && numVotingNodes > 3) ? 0 : 1;
+    config.members[indexToChange].priority = config.members[indexToChange].votes;
+
+    let votingRes = conn.getDB("admin").runCommand({replSetReconfig: config});
+    if (!votingRes.ok && !isIgnorableError(votingRes.codeName)) {
+        jsTestLog("Reconfig to change votes FAILED.");
+        return votingRes;
+    }
+
+    return {ok: 1};
+}
+
+// It is possible that the primary will be killed before actually running the reconfig
+// command. If we fail with a network error, ignore it.
+let res;
+try {
+    const conn = connect(TestData.connectionString);
+    const topology = DiscoverTopology.findConnectedNodes(conn.getMongo());
+
+    if (topology.type !== Topology.kReplicaSet) {
+        throw new Error('Unsupported topology configuration: ' + tojson(topology));
+    }
+
+    const numNodes = topology.nodes.length;
+    res = reconfigBackground(topology.primary, numNodes);
+} catch (e) {
+    // If the ReplicaSetMonitor cannot find a primary because it has stepped down or
+    // been killed, it may take longer than 15 seconds for a new primary to step up.
+    // Ignore this error until we find a new primary.
+    const kReplicaSetMonitorError =
+        /^Could not find host matching read preference.*mode: "primary"/;
+
+    if (isNetworkError(e)) {
+        jsTestLog("Ignoring network error" + tojson(e));
+    } else if (e.message.match(kReplicaSetMonitorError)) {
+        jsTestLog("Ignoring read preference primary error" + tojson(e));
+    } else {
+        throw e;
+    }
+
+    res = {ok: 1};
+}
+
+assert.commandWorked(res, "reconfig hook failed: " + tojson(res));
+})();
diff --git a/jstests/libs/override_methods/network_error_and_txn_override.js b/jstests/libs/override_methods/network_error_and_txn_override.js
index 798d9bf7fda..b08b6d0b233 100644
--- a/jstests/libs/override_methods/network_error_and_txn_override.js
+++ b/jstests/libs/override_methods/network_error_and_txn_override.js
@@ -64,6 +64,11 @@ function configuredForTxnOverride() {
     return TestData.networkErrorAndTxnOverrideConfig.wrapCRUDinTransactions;
 }
 
+function configuredForBackgroundReconfigs() {
+    assert(TestData.networkErrorAndTxnOverrideConfig, TestData);
+    return TestData.networkErrorAndTxnOverrideConfig.backgroundReconfigs;
+}
+
 // Commands assumed to not be blindly retryable.
 const kNonRetryableCommands = new Set([
     // Commands that take write concern and do not support txnNumbers.
@@ -128,6 +133,15 @@ const kAcceptableNonRetryableCommands = new Set([
     "moveChunk",
 ]);
 
+// The following read operations defined in the CRUD specification are retryable.
+// Note that estimatedDocumentCount() and countDocuments() use the count command.
+const kRetryableReadCommands = new Set(["find", "aggregate", "distinct", "count"]);
+
+// Returns true if the command name is that of a retryable read command.
+function isRetryableReadCmdName(cmdName) {
+    return kRetryableReadCommands.has(cmdName);
+}
+
 // Returns if the given failed response is a safe response to ignore when retrying the
 // given command type.
 function isAcceptableRetryFailedResponse(cmdName, res) {
@@ -186,6 +200,20 @@ function canRetryNetworkErrorForCommand(cmdName, cmdObj) {
     return true;
 }
 
+// Returns if the given command should retry a read error when reconfigs are present.
+function canRetryReadErrorDuringBackgroundReconfig(cmdName) {
+    if (!configuredForBackgroundReconfigs()) {
+        return false;
+    }
+    return isRetryableReadCmdName(cmdName);
+}
+
+// When running the reconfig command on a node, it will drop its snapshot. Read commands issued
+// to this node before it updates its snapshot will fail with ReadConcernMajorityNotAvailableYet.
+function isRetryableReadCode(code) {
+    return code === ErrorCodes.ReadConcernMajorityNotAvailableYet;
+}
+
 // Several commands that use the plan executor swallow the actual error code from a failed plan
 // into their error message and instead return OperationFailed.
 //
@@ -905,6 +933,19 @@ function shouldRetryWithNetworkErrorOverride(
     return res;
 }
 
+function shouldRetryForBackgroundReconfigOverride(res, cmdName, logError) {
+    assert(configuredForBackgroundReconfigs());
+    // Background reconfigs can interfere with read commands if they are using readConcern: majority
+    // and readPreference: primary. If we're running a read command and it fails with
+    // ReadConcernMajorityNotAvailableYet, retry because it should eventually succeed.
+    if (isRetryableReadCmdName(cmdName) && isRetryableReadCode(res.code)) {
+        logError("Retrying read command after 100ms because of background reconfigs");
+        sleep(100);
+        return kContinue;
+    }
+    return res;
+}
+
 // Processes exceptions if configured for txn override. Retries the entire transaction on
 // transient transaction errors or network errors if configured for network errors as well.
 // If a retry fails, returns the response, or returns null for further exception processing.
@@ -990,6 +1031,7 @@ function runCommandOverrideBody(conn, dbName, cmdName, cmdObj, lsid, clientFunct
     }
 
     const canRetryNetworkError = canRetryNetworkErrorForCommand(cmdName, cmdObj);
+    const canRetryReadError = canRetryReadErrorDuringBackgroundReconfig(cmdName);
     let numNetworkErrorRetries = canRetryNetworkError ? kMaxNumRetries : 0;
     do {
         try {
@@ -1020,6 +1062,16 @@ function runCommandOverrideBody(conn, dbName, cmdName, cmdObj, lsid, clientFunct
                 }
             }
 
+            if (canRetryReadError) {
+                const readRetryRes =
+                    shouldRetryForBackgroundReconfigOverride(res, cmdName, logError);
+                if (readRetryRes === kContinue) {
+                    continue;
+                } else {
+                    res = readRetryRes;
+                }
+            }
+
             return res;
 
         } catch (e) {
diff --git a/jstests/replsets/txn_override_unittests.js b/jstests/replsets/txn_override_unittests.js
index 8fe114b7789..8bab3f5b167 100644
--- a/jstests/replsets/txn_override_unittests.js
+++ b/jstests/replsets/txn_override_unittests.js
@@ -1896,6 +1896,168 @@ const txnOverridePlusRetryOnNetworkErrorTestsFcv42 = [
     }
 ];
 
+const retryOnReadErrorsFromBackgroundReconfigTest = [
+    {
+        name: "find retries on ReadConcernMajorityNotAvailableYet",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({_id: 1}));
+            failCommandWithFailPoint(["find"],
+                                     {errorCode: ErrorCodes.ReadConcernMajorityNotAvailableYet});
+            assert.eq(coll1.findOne({_id: 1}), {_id: 1});
+        }
+    },
+    {
+        name: "aggregate retries on ReadConcernMajorityNotAvailableYet",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 2}));
+            failCommandWithFailPoint(["aggregate"],
+                                     {errorCode: ErrorCodes.ReadConcernMajorityNotAvailableYet});
+            const cursor = coll1.aggregate([{$match: {a: 1}}]);
+            assert.eq(cursor.toArray().length, 2);
+        }
+    },
+    {
+        name: "distinct retries on ReadConcernMajorityNotAvailableYet",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 2}));
+            failCommandWithFailPoint(["distinct"],
+                                     {errorCode: ErrorCodes.ReadConcernMajorityNotAvailableYet});
+            assert.eq(coll1.distinct("a").sort(), [1, 2]);
+        }
+    },
+    {
+        name: "count retries on ReadConcernMajorityNotAvailableYet",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 2}));
+            failCommandWithFailPoint(["count"],
+                                     {errorCode: ErrorCodes.ReadConcernMajorityNotAvailableYet});
+            assert.eq(coll1.count({a: 1}), 2);
+        }
+    },
+];
+
+const retryReadsOnNetworkErrorsWithNetworkRetryAndBackgroundReconfigTest = [
+    {
+        name: "find retries on network errors",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({_id: 1}));
+            failCommandWithFailPoint(["find"], {closeConnection: true});
+            assert.eq(coll1.findOne({_id: 1}), {_id: 1});
+        }
+    },
+    {
+        name: "aggregate retries on network errors",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 2}));
+            failCommandWithFailPoint(["aggregate"], {closeConnection: true});
+            const cursor = coll1.aggregate([{$match: {a: 1}}]);
+            assert.eq(cursor.toArray().length, 2);
+        }
+    },
+    {
+        name: "distinct retries on network errors",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 2}));
+            failCommandWithFailPoint(["distinct"], {closeConnection: true});
+            assert.eq(coll1.distinct("a").sort(), [1, 2]);
+        }
+    },
+    {
+        name: "count retries on network errors",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 2}));
+            failCommandWithFailPoint(["count"], {closeConnection: true});
+            assert.eq(coll1.count({a: 1}), 2);
+        }
+    },
+];
+
+const doNotRetryReadErrorWithOutBackgroundReconfigTest = [
+    {
+        name: "find fails on ReadConcernMajorityNotAvailableYet",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({_id: 1}));
+            failCommandWithFailPoint(["find"],
+                                     {errorCode: ErrorCodes.ReadConcernMajorityNotAvailableYet});
+            assert.commandFailedWithCode(
+                assert.throws(function() {
+                                 coll1.findOne({_id: 1});
+                             }),
+                             ErrorCodes.ReadConcernMajorityNotAvailableYet);
+        }
+    },
+    {
+        name: "aggregate fails on ReadConcernMajorityNotAvailableYet",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 2}));
+            failCommandWithFailPoint(["aggregate"],
+                                     {errorCode: ErrorCodes.ReadConcernMajorityNotAvailableYet});
+            assert.commandFailedWithCode(
+                assert.throws(function() {
+                                 const cursor = coll1.aggregate([{$match: {a: 1}}]);
+                                 assert.eq(cursor.toArray().length, 2);
+                             }),
+                             ErrorCodes.ReadConcernMajorityNotAvailableYet);
+        }
+    },
+    {
+        name: "distinct fails on ReadConcernMajorityNotAvailableYet",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 2}));
+            failCommandWithFailPoint(["distinct"],
+                                     {errorCode: ErrorCodes.ReadConcernMajorityNotAvailableYet});
+            assert.commandFailedWithCode(
+                assert.throws(function() {
+                                 coll1.distinct("a");
+                             }),
+                             ErrorCodes.ReadConcernMajorityNotAvailableYet);
+        }
+    },
+    {
+        name: "count fails on ReadConcernMajorityNotAvailableYet",
+        test: function() {
+            assert.commandWorked(testDB.createCollection(collName1));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 1}));
+            assert.commandWorked(coll1.insert({a: 2}));
+            failCommandWithFailPoint(["count"],
+                                     {errorCode: ErrorCodes.ReadConcernMajorityNotAvailableYet});
+            assert.commandFailedWithCode(
+                assert.throws(function() {
+                                 coll1.count({a: 1});
+                             }),
+                             ErrorCodes.ReadConcernMajorityNotAvailableYet);
+        }
+    },
+];
+
 TestData.networkErrorAndTxnOverrideConfig = {};
 TestData.sessionOptions = new SessionOptions();
 TestData.overrideRetryAttempts = 3;
@@ -1912,6 +2074,7 @@ jsTestLog("=-=-=-=-=-= Testing with 'retry on network error' by itself. =-=-=-=-
 TestData.sessionOptions = new SessionOptions({retryWrites: true});
 TestData.networkErrorAndTxnOverrideConfig.retryOnNetworkErrors = true;
 TestData.networkErrorAndTxnOverrideConfig.wrapCRUDinTransactions = false;
+TestData.networkErrorAndTxnOverrideConfig.backgroundReconfigs = false;
 
 session = conn.startSession(TestData.sessionOptions);
 testDB = session.getDatabase(dbName);
@@ -1924,6 +2087,7 @@ jsTestLog("=-=-=-=-=-= Testing with 'txn override' by itself. =-=-=-=-=-=");
 TestData.sessionOptions = new SessionOptions({retryWrites: false});
 TestData.networkErrorAndTxnOverrideConfig.retryOnNetworkErrors = false;
 TestData.networkErrorAndTxnOverrideConfig.wrapCRUDinTransactions = true;
+TestData.networkErrorAndTxnOverrideConfig.backgroundReconfigs = false;
 
 session = conn.startSession(TestData.sessionOptions);
 testDB = session.getDatabase(dbName);
@@ -1939,6 +2103,7 @@ jsTestLog("=-=-=-=-=-= Testing 'both txn override and retry on network error'. =
 TestData.sessionOptions = new SessionOptions({retryWrites: true});
 TestData.networkErrorAndTxnOverrideConfig.retryOnNetworkErrors = true;
 TestData.networkErrorAndTxnOverrideConfig.wrapCRUDinTransactions = true;
+TestData.networkErrorAndTxnOverrideConfig.backgroundReconfigs = false;
 
 session = conn.startSession(TestData.sessionOptions);
 testDB = session.getDatabase(dbName);
@@ -1952,5 +2117,50 @@ if (usingFcv42) {
         (testCase) => runTest("txnOverridePlusRetryOnNetworkErrorTestsFcv42", testCase));
 }
 
+jsTestLog("=-=-=-=-=-= Testing 'retry on read errors from background reconfigs'. =-=-=-=-=-=");
+TestData.sessionOptions = new SessionOptions({retryWrites: false});
+TestData.networkErrorAndTxnOverrideConfig.retryOnNetworkErrors = false;
+TestData.networkErrorAndTxnOverrideConfig.backgroundReconfigs = true;
+TestData.networkErrorAndTxnOverrideConfig.wrapCRUDinTransactions = false;
+
+session = conn.startSession(TestData.sessionOptions);
+testDB = session.getDatabase(dbName);
+coll1 = testDB[collName1];
+coll2 = testDB[collName2];
+
+retryOnReadErrorsFromBackgroundReconfigTest.forEach(
+    (testCase) => runTest("retryOnReadErrorsFromBackgroundReconfigTest", testCase));
+
+jsTestLog(
+    "=-=-=-=-=-= Testing 'retry on network errors during network error retry and background reconfigs'. =-=-=-=-=-=");
+TestData.sessionOptions = new SessionOptions({retryWrites: true});
+TestData.networkErrorAndTxnOverrideConfig.retryOnNetworkErrors = true;
+TestData.networkErrorAndTxnOverrideConfig.backgroundReconfigs = true;
+TestData.networkErrorAndTxnOverrideConfig.wrapCRUDinTransactions = false;
+
+session = conn.startSession(TestData.sessionOptions);
+testDB = session.getDatabase(dbName);
+coll1 = testDB[collName1];
+coll2 = testDB[collName2];
+
+retryReadsOnNetworkErrorsWithNetworkRetryAndBackgroundReconfigTest.forEach(
+    (testCase) =>
+        runTest("retryReadsOnNetworkErrorsWithNetworkRetryAndBackgroundReconfigTest", testCase));
+
+jsTestLog(
+    "=-=-=-=-=-= Testing 'don't retry on network errors during background reconfigs'. =-=-=-=-=-=");
+TestData.sessionOptions = new SessionOptions({retryWrites: true});
+TestData.networkErrorAndTxnOverrideConfig.retryOnNetworkErrors = true;
+TestData.networkErrorAndTxnOverrideConfig.backgroundReconfigs = false;
+TestData.networkErrorAndTxnOverrideConfig.wrapCRUDinTransactions = false;
+
+session = conn.startSession(TestData.sessionOptions);
+testDB = session.getDatabase(dbName);
+coll1 = testDB[collName1];
+coll2 = testDB[collName2];
+
+doNotRetryReadErrorWithOutBackgroundReconfigTest.forEach(
+    (testCase) => runTest("doNotRetryReadErrorWithOutBackgroundReconfigTest", testCase));
+
 rst.stopSet();
 })();
author	Pavi Vetriselvan <pvselvan@umich.edu>	2020-03-16 11:13:36 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-05-14 12:42:24 +0000
commit	f4528563033d933ca920b3e4b2a5e3344e198a5c (patch)
tree	8c20856b344e02483dceb1e13f35533e41db3ecd
parent	cd9fbb56900343e7b1193922a2c4b197895e7f56 (diff)
download	mongo-f4528563033d933ca920b3e4b2a5e3344e198a5c.tar.gz