buildscripts/resmokeconfig/suites/shard_split_kill_primary_jscore_passthrough.yml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233

# This suite starts a 3-node replica sets and uses the ContinuousShardSplit hook and the
# ContinuousStepdown hook to run background shard splits while periodically killing the primary of
# the current shard split donor.

config_variables:
- &keyFile jstests/libs/authTestsKey
- &keyFileData Thiskeyisonlyforrunningthesuitewithauthenticationdontuseitinanytestsdirectly
- &authOptions
  authenticationDatabase: admin
  authenticationMechanism: SCRAM-SHA-256
  password: *keyFileData
  username: __system

test_kind: js_test

selector:
  roots:
  - jstests/core/**/*.js
  exclude_files:
  - jstests/core/txns/**/*.js
  # Skip any tests that run with auth explicitly.
  - jstests/core/administrative/*[aA]uth*.js

  # This test runs createIndexes commands without asserting they succeed and depends on a particular
  # index to exist.
  - jstests/core/**/index_many2.js

  #
  # From retryable writes jscore stepdown passthrough
  #

  # No-op retries are not ignored by top, the profiler, or opcount.
  - jstests/core/**/operation_latency_histogram.js
  - jstests/core/**/profile2.js
  - jstests/core/**/profile3.js
  - jstests/core/**/profile_findandmodify.js
  - jstests/core/**/top.js
  - jstests/core/views/views_stats.js

  # TODO SERVER-31242: findAndModify no-op retry should respect the fields option.
  - jstests/core/**/crud_api.js
  - jstests/core/**/find_and_modify.js
  - jstests/core/**/find_and_modify2.js
  - jstests/core/**/find_and_modify_server6865.js
  - jstests/core/**/fts_find_and_modify.js
  - jstests/core/**/project_with_collation.js

  # Stepdown commands during fsync lock will fail.
  - jstests/core/**/currentop.js
  - jstests/core/**/fsync.js
  - jstests/core/**/killop_drop_collection.js

  # Expect drops/creates to fail or have a certain response:
  - jstests/core/**/explain_upsert.js
  - jstests/core/**/indexes_multiple_commands.js

  # Expect certain responses, but retries of successfully completed commands may return
  # different values:
  - jstests/core/**/create_indexes.js
  - jstests/core/**/objid5.js

  # Expect results to return in a certain order, secondaries may apply ops out of order.
  - jstests/core/**/coveredIndex1.js
  - jstests/core/**/sortc.js

  # Spawns new mongo shells, which don't retry connecting on stepdown errors.
  - jstests/core/**/shell_connection_strings.js

  - jstests/core/**/bench_test*.js # benchRun() used for writes
  - jstests/core/**/benchrun_pipeline_updates.js # benchRun() used for writes
  - jstests/core/**/connection_string_validation.js # Does not expect a replica set connection string.
  - jstests/core/**/explain_large_bounds.js # Stepdown can timeout waiting for global lock.
  - jstests/core/**/list_collections_filter.js # Temporary collections are dropped on failover.
  - jstests/core/**/startup_log.js # Checks pid, which is different on each server.

  #
  # Denylists specific to this suite
  #

  - jstests/core/**/api_version_parameters.js
  - jstests/core/**/command_let_variables.js
  - jstests/core/**/crud_ops_do_not_throw_locktimeout.js
  - jstests/core/**/explain_includes_command.js
  - jstests/core/**/explain_multi_plan.js
  - jstests/core/**/explain_uuid.js
  - jstests/core/**/field_name_validation.js
  - jstests/core/**/noop_writes.js
  - jstests/core/**/profile_findandmodify.js
  - jstests/core/**/sortl.js
  exclude_with_any_tags:
  - assumes_standalone_mongod
  # Cursor ids will not survive rerouting between replica sets during a shard split.
  - requires_getmore
  # fastcount can product inaccurate counts after unclean shutdowns.
  - requires_fastcount
  # "Cowardly fail if dbStats is run with a mongod that had an unclean shutdown: ..."
  - requires_dbstats
  # "Cowardly fail if collStats is run with a mongod that had an unclean shutdown: ..."
  - requires_collstats
  # Due to background shard splits, operations in the main test shell are not guaranteed to
  # be causally consistent with operations in a parallel shell. The reason is that
  # TenantMigrationCommitted error is only thrown when the client does a write or a atClusterTime/
  # afterClusterTime or linearlizable read. Therefore, one of shell may not be aware that the
  # split has occurred and would not forward the read/write command to the right replica set.
  - uses_parallel_shell
  # Profile settings are stored in-memory only so are not transferred to the recipient.
  - requires_profiling
  # capped collections are banned in Serverless
  - requires_capped
  # emptycapped command is blocked during shard split.
  - requires_emptycapped
  # Multi-updates that conflict with shard split are not retried by inject_tenant_prefix.js.
  - requires_multi_updates
  - tenant_migration_incompatible
  - operations_longer_than_stepdown_interval
  - requires_non_retryable_writes
  - requires_non_retryable_commands
  - does_not_support_stepdowns
  - assumes_read_concern_unchanged
  - assumes_write_concern_unchanged
  # Full validation can cause ongoing queries to fail. This can affect the shard split process.
  - uses_full_validation
  - multiple_tenants_incompatible
  - requires_timeseries # Tenant migration not supported

executor:
  archive:
    tests: true
    hooks:
      - CheckReplDBHash
      - CheckReplOplogs
      - ValidateCollections
  config:
    shell_options:
      eval: >-
        testingReplication = true;
        load('jstests/libs/override_methods/network_error_and_txn_override.js');
        db = connect(TestData.connectionString);
        load('jstests/libs/override_methods/inject_tenant_prefix.js');
        load("jstests/libs/override_methods/enable_sessions.js");
        load("jstests/libs/override_methods/set_read_and_write_concerns.js");
        load("jstests/libs/override_methods/fail_unclean_shutdown_incompatible_commands.js");
        load("jstests/libs/override_methods/fail_unclean_shutdown_start_parallel_shell.js");
        jsTest.authenticate(db.getMongo());
      global_vars:
        TestData: &TestData
          # Informs behavior in the inject_tenant_prefix override.
          splitPassthrough: true
          tenantIds: ["636d957b2646ddfaf9b5e13f", "123d957b2646ddfaf9b5e91d"]
          # Temp collections are removed when the recipient steps up. We expect a mismatch, this
          # ensures we don't compare them.
          skipTempCollections: true
          auth: true
          authMechanism: SCRAM-SHA-256
          keyFile: *keyFile
          keyFileData: *keyFileData
          roleGraphInvalidationIsFatal: true
          alwaysInjectTransactionNumber: true
          defaultReadConcernLevel: "majority"
          logRetryAttempts: true
          networkErrorAndTxnOverrideConfig:
            retryOnNetworkErrors: true
          overrideRetryAttempts: 3
          sessionOptions:
            readConcern:
              level: "majority"
            # Force DBClientRS to find the primary for non-write commands.
            readPreference:
              mode: "primary"
            retryWrites: true
      # We specify nodb so the shell used by each test will attempt to connect after loading the
      # retry logic in auto_retry_on_network_error.js.
      nodb: ""
  hooks:
  - class: ContinuousStepdown
    kill: true
    auth_options: *authOptions
  - class: ContinuousShardSplit
    shell_options:
      eval: >-
        testingReplication = true;
        load('jstests/libs/override_methods/network_error_and_txn_override.js');
        load("jstests/libs/override_methods/set_read_and_write_concerns.js");
      global_vars:
        TestData:
          <<: *TestData
          authOptions: *authOptions
      # We specify nodb so the shell used by each test will attempt to connect after loading the
      # retry logic in auto_retry_on_network_error.js.
      nodb: ""
  - class: CheckReplOplogs
    shell_options:
      global_vars:
        TestData: *TestData
  # The CheckReplDBHash hook waits until all operations have replicated to and have been applied
  # on the secondaries, so we run the ValidateCollections hook after it to ensure we're
  # validating the entire contents of the collection.
  - class: CheckReplDBHash
    shell_options:
      global_vars:
        TestData: *TestData
  - class: ValidateCollections
    shell_options:
      global_vars:
        TestData:
          <<: *TestData
          skipEnforceFastCountOnValidate: true
  - class: CleanEveryN
    n: 1
  fixture:
    class: ShardSplitFixture
    common_mongod_options:
      set_parameters:
        enableTestCommands: 1
        enableElectionHandoff: 0
        failpoint.abortShardSplitBeforeLeavingBlockingState:
            mode:
              activationProbability: .5
        failpoint.pauseShardSplitBeforeLeavingBlockingState:
            mode: alwaysOn
            data:
              blockTimeMS: 250
        shardSplitGarbageCollectionDelayMS: 1
        ttlMonitorSleepSecs: 1
        # Unclean shutdown may cause blockTimestamp catch up to be very long
        shardSplitTimeoutMS: 60000
      auth: ''
      keyFile: *keyFile
    num_nodes_per_replica_set: 3
    replset_config_options:
      settings:
        catchUpTimeoutMillis: 0
    auth_options: *authOptions