summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCharlie Swanson <charlie.swanson@mongodb.com>2018-03-12 17:32:59 -0400
committerCharlie Swanson <charlie.swanson@mongodb.com>2018-03-14 14:02:00 -0400
commitb5b4975daf0a327cc2ee2756b6771ca4c9fabfed (patch)
tree058ac35a0058e99c2d0f58e689ae4bd1b6c6027f /src
parentda9c4e4ef0e9b5539fee9265b89e1d28f844c547 (diff)
downloadmongo-b5b4975daf0a327cc2ee2756b6771ca4c9fabfed.tar.gz
SERVER-33785 Prevent interrupt in clientcursormon thread
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/cursor_manager.cpp25
-rw-r--r--src/mongo/db/operation_context.cpp21
2 files changed, 26 insertions, 20 deletions
diff --git a/src/mongo/db/cursor_manager.cpp b/src/mongo/db/cursor_manager.cpp
index 5617369698b..08653cfda22 100644
--- a/src/mongo/db/cursor_manager.cpp
+++ b/src/mongo/db/cursor_manager.cpp
@@ -285,18 +285,25 @@ std::size_t GlobalCursorIdCache::timeoutCursors(OperationContext* opCtx, Date_t
// For each collection, time out its cursors under the collection lock (to prevent the
// collection from going away during the erase).
for (const auto& nsTodo : todo) {
- // Note that we specify 'kViewsPermitted' here, even though we don't expect 'nsTodo' to be a
- // view. Because we are not holding the mutex anymore, it is possible that the collection we
- // are trying to access has since been destroyed and a view of the same name has been
- // created in its place. Without 'kViewsPermitted' here, that would result in a uassert that
- // would crash the cursor cleaner background thread.
- AutoGetCollectionForReadCommand ctx(
- opCtx, nsTodo, AutoGetCollection::ViewMode::kViewsPermitted);
- if (!ctx.getDb()) {
+ // We need to be careful to not use an AutoGet* helper, since we only need the lock to
+ // protect potential access to the Collection's CursorManager, and those helpers may
+ // do things we don't want here, like check the shard version or throw an exception if this
+ // namespace has since turned into a view. Using Database::getCollection() will simply
+ // return nullptr if the collection has since turned into a view. In this case, the cursors
+ // will already have been cleaned up when the collection was dropped, so there will be none
+ // left to time out.
+ //
+ // Additionally, we need to use the UninterruptibleLockGuard to ensure the lock acquisition
+ // will not throw due to an interrupt. This method can be called from a background thread so
+ // we do not want to throw any exceptions.
+ UninterruptibleLockGuard noInterrupt(opCtx->lockState());
+ AutoGetDb dbLock(opCtx, nsTodo.db(), MODE_IS);
+ Lock::CollectionLock collLock(opCtx->lockState(), nsTodo.ns(), MODE_IS);
+ if (!dbLock.getDb()) {
continue;
}
- Collection* const collection = ctx.getCollection();
+ Collection* const collection = dbLock.getDb()->getCollection(opCtx, nsTodo);
if (!collection) {
// The 'nsTodo' collection has been dropped since we held _mutex. We can safely skip it.
continue;
diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp
index e503c2e071d..0ce0b29a459 100644
--- a/src/mongo/db/operation_context.cpp
+++ b/src/mongo/db/operation_context.cpp
@@ -66,11 +66,12 @@ MONGO_FP_DECLARE(maxTimeNeverTimeOut);
//
// {configureFailPoint: "checkForInterruptFail",
// mode: "alwaysOn",
-// data: {conn: 17, chance: .01}}
+// data: {threadName: "threadName", chance: .01}}
//
-// Both data fields must be specified. In the above example, all interrupt points on connection 17
-// will generate a kill on the current operation with probability p(.01), including interrupt points
-// of nested operations. "chance" must be a double between 0 and 1, inclusive.
+// Both data fields must be specified. In the above example, all interrupt points on the thread with
+// name 'threadName' will generate a kill on the current operation with probability p(.01),
+// including interrupt points of nested operations. "chance" must be a double between 0 and 1,
+// inclusive.
MONGO_FP_DECLARE(checkForInterruptFail);
} // namespace
@@ -168,17 +169,15 @@ namespace {
// Helper function for checkForInterrupt fail point. Decides whether the operation currently
// being run by the given Client meet the (probabilistic) conditions for interruption as
// specified in the fail point info.
-bool opShouldFail(const OperationContext* opCtx, const BSONObj& failPointInfo) {
+bool opShouldFail(Client* client, const BSONObj& failPointInfo) {
// Only target the client with the specified connection number.
- if (opCtx->getClient()->getConnectionId() != failPointInfo["conn"].safeNumberLong()) {
+ if (client->desc() != failPointInfo["threadName"].valuestrsafe()) {
return false;
}
// Return true with (approx) probability p = "chance". Recall: 0 <= chance <= 1.
- double next = static_cast<double>(std::abs(opCtx->getClient()->getPrng().nextInt64()));
- double upperBound =
- std::numeric_limits<int64_t>::max() * failPointInfo["chance"].numberDouble();
- if (next > upperBound) {
+ double next = client->getPrng().nextCanonicalDouble();
+ if (next > failPointInfo["chance"].numberDouble()) {
return false;
}
return true;
@@ -200,7 +199,7 @@ Status OperationContext::checkForInterruptNoAssert() {
}
MONGO_FAIL_POINT_BLOCK(checkForInterruptFail, scopedFailPoint) {
- if (opShouldFail(this, scopedFailPoint.getData())) {
+ if (opShouldFail(getClient(), scopedFailPoint.getData())) {
log() << "set pending kill on op " << getOpID() << ", for checkForInterruptFail";
markKilled();
}