diff options
author | Judah Schvimer <judah.schvimer@10gen.com> | 2019-10-04 18:33:43 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-10-04 18:33:43 +0000 |
commit | d9e67706b802791f73eb485f35d359047a25ce9f (patch) | |
tree | 4aba377b118213702923ae27d3c0d08129174df6 | |
parent | 6169d10404669fb8a05f4cfd895e837592846e3b (diff) | |
download | mongo-d9e67706b802791f73eb485f35d359047a25ce9f.tar.gz |
SERVER-42951 Test that CRUD operations outside transactions don't throw LockTimeout
11 files changed, 233 insertions, 1 deletions
diff --git a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough.yml index ef36cb66ea0..a4baf25e924 100644 --- a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough.yml @@ -72,6 +72,7 @@ selector: - jstests/core/ord.js # Parallel shell is not causally consistent - jstests/core/benchrun_pipeline_updates.js + - jstests/core/crud_ops_do_not_throw_locktimeout.js - jstests/core/cursora.js - jstests/core/find_and_modify_concurrent_update.js - jstests/core/removec.js diff --git a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml index 3e9fd938cff..cfba4046343 100644 --- a/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml +++ b/buildscripts/resmokeconfig/suites/causally_consistent_jscore_passthrough_auth.yml @@ -84,6 +84,7 @@ selector: - jstests/core/ord.js # Parallel shell is not causally consistent - jstests/core/benchrun_pipeline_updates.js + - jstests/core/crud_ops_do_not_throw_locktimeout.js - jstests/core/cursora.js - jstests/core/find_and_modify_concurrent_update.js - jstests/core/removec.js diff --git a/buildscripts/resmokeconfig/suites/replica_sets_kill_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_kill_primary_jscore_passthrough.yml index 8c207bb37b5..a44a1593231 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_kill_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_kill_primary_jscore_passthrough.yml @@ -73,6 +73,7 @@ selector: # Starts a parallel shell but won't restart it after unclean shutdown. # TODO SERVER-33229: Remove these exclusions - jstests/core/compact_keeps_indexes.js + - jstests/core/crud_ops_do_not_throw_locktimeout.js - jstests/core/find_and_modify_concurrent_update.js - jstests/core/shellstartparallel.js diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml index fcb206a0645..de2c4f806fc 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_jscore_passthrough.yml @@ -236,6 +236,7 @@ selector: - jstests/core/count10.js - jstests/core/count_plan_summary.js - jstests/core/coveredIndex3.js + - jstests/core/crud_ops_do_not_throw_locktimeout.js - jstests/core/currentop.js - jstests/core/distinct3.js - jstests/core/find_and_modify_concurrent_update.js diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml index bdc68477138..6e7f9697b01 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_stepdown_jscore_passthrough.yml @@ -232,6 +232,7 @@ selector: - jstests/core/count10.js - jstests/core/count_plan_summary.js - jstests/core/coveredIndex3.js + - jstests/core/crud_ops_do_not_throw_locktimeout.js - jstests/core/currentop.js - jstests/core/distinct3.js - jstests/core/find_and_modify_concurrent_update.js diff --git a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml index 43f3668ea49..7955ab0d6d6 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_multi_stmt_txn_terminate_primary_jscore_passthrough.yml @@ -229,6 +229,7 @@ selector: - jstests/core/count10.js - jstests/core/count_plan_summary.js - jstests/core/coveredIndex3.js + - jstests/core/crud_ops_do_not_throw_locktimeout.js - jstests/core/currentop.js - jstests/core/distinct3.js - jstests/core/find_and_modify_concurrent_update.js diff --git a/buildscripts/resmokeconfig/suites/replica_sets_terminate_primary_jscore_passthrough.yml b/buildscripts/resmokeconfig/suites/replica_sets_terminate_primary_jscore_passthrough.yml index 68fbd77c0f4..24bd76896e5 100644 --- a/buildscripts/resmokeconfig/suites/replica_sets_terminate_primary_jscore_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/replica_sets_terminate_primary_jscore_passthrough.yml @@ -73,6 +73,7 @@ selector: # Starts a parallel shell but won't restart it after shutdown. # TODO SERVER-33229: Remove these exclusions - jstests/core/compact_keeps_indexes.js + - jstests/core/crud_ops_do_not_throw_locktimeout.js - jstests/core/find_and_modify_concurrent_update.js - jstests/core/shellstartparallel.js diff --git a/buildscripts/resmokeconfig/suites/secondary_reads_passthrough.yml b/buildscripts/resmokeconfig/suites/secondary_reads_passthrough.yml index 320ae4d22a2..01b5f941fb8 100644 --- a/buildscripts/resmokeconfig/suites/secondary_reads_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/secondary_reads_passthrough.yml @@ -35,6 +35,7 @@ selector: - jstests/core/ord.js # Parallel shell is not causally consistent - jstests/core/benchrun_pipeline_updates.js + - jstests/core/crud_ops_do_not_throw_locktimeout.js - jstests/core/cursora.js - jstests/core/find_and_modify_concurrent_update.js - jstests/core/shellstartparallel.js diff --git a/jstests/core/crud_ops_do_not_throw_locktimeout.js b/jstests/core/crud_ops_do_not_throw_locktimeout.js new file mode 100644 index 00000000000..7ecd8a778b7 --- /dev/null +++ b/jstests/core/crud_ops_do_not_throw_locktimeout.js @@ -0,0 +1,81 @@ +/** + * Tests that CRUD operations do not throw lock timeouts outside of transactions. + * + * @tags: [assumes_against_mongod_not_mongos, + * assumes_read_concern_unchanged, + * assumes_write_concern_unchanged] + */ +(function() { +"use strict"; + +load("jstests/libs/curop_helpers.js"); +load('jstests/libs/parallel_shell_helpers.js'); + +const coll = db[jsTestName()]; +coll.drop(); + +const doc = { + _id: 1 +}; +assert.commandWorked(coll.insert(doc)); + +const failpoint = 'hangAfterDatabaseLock'; +assert.commandWorked(db.adminCommand({configureFailPoint: failpoint, mode: "alwaysOn"})); + +jsTestLog("Starting collMod that will block"); + +const awaitBlockingDDL = + startParallelShell(funWithArgs(function(collName) { + assert.commandWorked(db.runCommand({collMod: collName})); + }, coll.getName()), db.getMongo().port); + +jsTestLog("Waiting for collMod to acquire a database lock"); +waitForCurOpByFailPointNoNS(db, failpoint); + +// Each of the following operations should time out trying to acquire the collection lock, which the +// collMod is holding in mode X. +jsTestLog("Testing CRUD op timeouts"); + +const failureTimeoutMS = 1 * 1000; + +assert.commandFailedWithCode( + db.runCommand({insert: coll.getName(), documents: [{_id: 2}], maxTimeMS: failureTimeoutMS}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode(db.runCommand({find: coll.getName(), maxTimeMS: failureTimeoutMS}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode(db.runCommand({ + update: coll.getName(), + updates: [{q: doc, u: {$set: {b: 1}}}], + maxTimeMS: failureTimeoutMS +}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode( + db.runCommand( + {delete: coll.getName(), deletes: [{q: doc, limit: 1}], maxTimeMS: failureTimeoutMS}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode(db.runCommand({ + findAndModify: coll.getName(), + query: {q: doc}, + update: {$set: {b: 2}}, + maxTimeMS: failureTimeoutMS +}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode(db.runCommand({ + findAndModify: coll.getName(), + query: {q: doc}, + remove: true, + maxTimeMS: failureTimeoutMS +}), + ErrorCodes.MaxTimeMSExpired); + +jsTestLog("Waiting for threads to join"); +assert.commandWorked(db.adminCommand({configureFailPoint: failpoint, mode: "off"})); +awaitBlockingDDL(); + +assert.sameMembers(coll.find().toArray(), [doc]); +})(); diff --git a/jstests/replsets/crud_ops_do_not_throw_locktimeout_on_ticket_exhaustion.js b/jstests/replsets/crud_ops_do_not_throw_locktimeout_on_ticket_exhaustion.js new file mode 100644 index 00000000000..c99f35b3593 --- /dev/null +++ b/jstests/replsets/crud_ops_do_not_throw_locktimeout_on_ticket_exhaustion.js @@ -0,0 +1,141 @@ +/** + * Test ensures that CRUD operations that time out because they cannot acquire a ticket do not + * return a LockTimeout. + * + * @tags: [uses_transactions, uses_prepare_transaction] + */ +(function() { +"use strict"; + +load("jstests/core/txns/libs/prepare_helpers.js"); +load('jstests/libs/parallel_shell_helpers.js'); + +// We set the number of tickets to be a small value in order to avoid needing to spawn a large +// number of threads to exhaust all of the available ones. +const kNumWriteTickets = 5; +const kNumReadTickets = 5; + +const rst = new ReplSetTest({ + nodes: 1, + nodeOptions: { + setParameter: { + wiredTigerConcurrentWriteTransactions: kNumWriteTickets, + wiredTigerConcurrentReadTransactions: kNumReadTickets, + } + } +}); +rst.startSet(); +rst.initiate(); + +const primary = rst.getPrimary(); +const dbName = "test"; +const db = primary.getDB(dbName); +const coll = db[jsTestName()]; +const otherCollName = jsTestName() + "_other"; + +const doc = { + _id: 1 +}; +assert.commandWorked(coll.insert(doc)); +assert.commandWorked(db[otherCollName].insert(doc)); + +jsTestLog("Starting transaction"); +const session = primary.startSession(); +const sessionDb = session.getDatabase(dbName); + +session.startTransaction(); +assert.commandWorked(sessionDb[otherCollName].update(doc, {$set: {a: 1}})); + +jsTestLog("Preparing transaction so readers and writers block holding a ticket"); +PrepareHelpers.prepareTransaction(session); + +const threads = []; +jsTestLog(`Starting ${kNumReadTickets} readers`); +for (let i = 0; i < kNumReadTickets; ++i) { + const thread = + startParallelShell(funWithArgs(function(dbName, collName) { + assert.commandWorked(db.getSiblingDB(dbName).runCommand( + {"find": collName, readConcern: {level: "linearizable"}})); + }, db.getName(), otherCollName), primary.port); + + threads.push(thread); +} + +jsTestLog(`Starting ${kNumWriteTickets} writers`); +for (let i = 0; i < kNumWriteTickets; ++i) { + const thread = startParallelShell( + funWithArgs(function(dbName, collName, doc) { + assert.commandWorked(db.getSiblingDB(dbName)[collName].update(doc, {$set: {a: 2}})); + }, db.getName(), otherCollName, doc), primary.port); + + threads.push(thread); +} + +jsTestLog("Waiting for reads and writes to block on prepare conflicts"); + +assert.soon( + () => { + const ops = db.currentOp({ + "$and": [ + {"$or": [{"op": "query"}, {"op": "update"}]}, + {"ns": dbName + "." + otherCollName}, + {"prepareReadConflicts": {"$gt": 0}} + ] + }); + return ops.inprog.length === (kNumReadTickets + kNumWriteTickets); + }, + () => { + return "Didn't find enough commands running: " + tojson(db.currentOp()); + }); + +const failureTimeoutMS = 1 * 1000; + +// Each of the following operations should time out trying to acquire a read or write ticket. The +// tickets are all held by the readers and writers started above. +jsTestLog("Testing CRUD op timeouts"); + +assert.commandFailedWithCode( + db.runCommand({insert: coll.getName(), documents: [{_id: 2}], maxTimeMS: failureTimeoutMS}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode(db.runCommand({find: coll.getName(), maxTimeMS: failureTimeoutMS}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode(db.runCommand({ + update: coll.getName(), + updates: [{q: doc, u: {$set: {b: 1}}}], + maxTimeMS: failureTimeoutMS +}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode( + db.runCommand( + {delete: coll.getName(), deletes: [{q: doc, limit: 1}], maxTimeMS: failureTimeoutMS}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode(db.runCommand({ + findAndModify: coll.getName(), + query: {q: doc}, + update: {$set: {b: 2}}, + maxTimeMS: failureTimeoutMS +}), + ErrorCodes.MaxTimeMSExpired); + +assert.commandFailedWithCode(db.runCommand({ + findAndModify: coll.getName(), + query: {q: doc}, + remove: true, + maxTimeMS: failureTimeoutMS +}), + ErrorCodes.MaxTimeMSExpired); + +jsTestLog("Aborting transaction"); +assert.commandWorked(session.abortTransaction_forTesting()); + +jsTestLog("Waiting for threads to join"); +for (let joinThread of threads) { + joinThread(); +} + +rst.stopSet(); +})(); diff --git a/src/mongo/db/catalog/coll_mod.cpp b/src/mongo/db/catalog/coll_mod.cpp index fdcb001a229..8ee9582bd1c 100644 --- a/src/mongo/db/catalog/coll_mod.cpp +++ b/src/mongo/db/catalog/coll_mod.cpp @@ -43,6 +43,7 @@ #include "mongo/db/client.h" #include "mongo/db/command_generic_argument.h" #include "mongo/db/concurrency/write_conflict_exception.h" +#include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/db_raii.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index_builds_coordinator.h" @@ -268,7 +269,8 @@ Status _collModInternal(OperationContext* opCtx, Collection* coll = db ? CollectionCatalog::get(opCtx).lookupCollectionByNamespace(nss) : nullptr; - hangAfterDatabaseLock.pauseWhileSet(); + CurOpFailpointHelpers::waitWhileFailPointEnabled( + &hangAfterDatabaseLock, opCtx, "hangAfterDatabaseLock", []() {}, false, nss); // May also modify a view instead of a collection. boost::optional<ViewDefinition> view; |