summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Schultz <william.schultz@mongodb.com>2019-04-23 14:28:50 -0400
committerWilliam Schultz <william.schultz@mongodb.com>2019-04-23 14:28:50 -0400
commit75aac812bfce0419d68c65d48233f94c06aaeafd (patch)
tree6cc5220614e4374f04a1fab8fdbf5667e73f3a7e
parent8d8f1d7b95a7d8afd5bf1d2536b861e467e7ca81 (diff)
downloadmongo-75aac812bfce0419d68c65d48233f94c06aaeafd.tar.gz
SERVER-40706 AutoGetCollectionForRead invariant should permit kNoOverlap read source when there are conflicting catalog changes
-rw-r--r--jstests/replsets/change_stream_speculative_majority_conflicting_catalog_changes.js53
-rw-r--r--src/mongo/db/db_raii.cpp20
2 files changed, 72 insertions, 1 deletions
diff --git a/jstests/replsets/change_stream_speculative_majority_conflicting_catalog_changes.js b/jstests/replsets/change_stream_speculative_majority_conflicting_catalog_changes.js
new file mode 100644
index 00000000000..b9a55d1c3fa
--- /dev/null
+++ b/jstests/replsets/change_stream_speculative_majority_conflicting_catalog_changes.js
@@ -0,0 +1,53 @@
+/**
+ * Make sure that a speculative majority change stream read on a secondary does not trigger an
+ * invariant when there are conflicting catalog changes on the collection.
+ *
+ * Regression test for SERVER-40706.
+ *
+ * @tags: [uses_speculative_majority]
+ */
+(function() {
+ "use strict";
+
+ const replTest = new ReplSetTest({
+ name: "replset",
+ nodes: [{}, {rsConfig: {priority: 0}}],
+ nodeOptions: {enableMajorityReadConcern: 'false'}
+ });
+ replTest.startSet();
+ replTest.initiate();
+
+ const dbName = "test";
+ const collName = "coll";
+
+ let primary = replTest.getPrimary();
+ let secondary = replTest.getSecondary();
+ let primaryDB = primary.getDB(dbName);
+ let primaryColl = primaryDB[collName];
+ let secondaryDB = secondary.getDB(dbName);
+
+ // Insert some documents on the primary that we can index.
+ var bulk = primaryColl.initializeUnorderedBulkOp();
+ for (var i = 0; i < 1000; i++) {
+ let doc = {};
+ bulk.insert({a: i, b: i, c: i, d: i, e: i});
+ }
+ assert.commandWorked(bulk.execute());
+
+ // Start several index builds on the primary. This should make it likely that index builds are
+ // in progress on the secondary while doing reads below.
+ primaryColl.createIndex({a: 1});
+ primaryColl.createIndex({b: 1});
+ primaryColl.createIndex({c: 1});
+ primaryColl.createIndex({d: 1});
+ primaryColl.createIndex({e: 1});
+
+ // Do a bunch of change stream reads against the secondary. We are not worried about the
+ // responses, since we are only verifying that the server doesn't crash.
+ for (var i = 0; i < 20; i++) {
+ assert.commandWorked(secondaryDB.runCommand(
+ {aggregate: collName, pipeline: [{$changeStream: {}}], cursor: {}}));
+ }
+
+ replTest.stopSet();
+})(); \ No newline at end of file
diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp
index d4d876e4214..3d111f8a195 100644
--- a/src/mongo/db/db_raii.cpp
+++ b/src/mongo/db/db_raii.cpp
@@ -162,7 +162,10 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
}
invariant(lastAppliedTimestamp ||
- readSource == RecoveryUnit::ReadSource::kMajorityCommitted);
+ // The kMajorityCommitted and kNoOverlap read sources already read from timestamps
+ // that are safe with respect to concurrent secondary batch application.
+ readSource == RecoveryUnit::ReadSource::kMajorityCommitted ||
+ readSource == RecoveryUnit::ReadSource::kNoOverlap);
invariant(readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern);
// Yield locks in order to do the blocking call below.
@@ -192,6 +195,21 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
}
+ // If there are pending catalog changes when using a no-overlap read source, we choose to
+ // take the PBWM lock to conflict with any in-progress batches. This prevents us from idly
+ // spinning in this loop trying to get a new read timestamp ahead of the minimum visible
+ // snapshot. This helps us guarantee liveness (i.e. we can eventually get a suitable read
+ // timestamp) but should not be necessary for correctness.
+ if (readSource == RecoveryUnit::ReadSource::kNoOverlap) {
+ invariant(!lastAppliedTimestamp); // no-overlap read source selects its own timestamp.
+ _shouldNotConflictWithSecondaryBatchApplicationBlock = boost::none;
+ invariant(opCtx->lockState()->shouldConflictWithSecondaryBatchApplication());
+
+ // Abandon our snapshot but don't change our read source, so that we can select a new
+ // read timestamp on the next loop iteration.
+ opCtx->recoveryUnit()->abandonSnapshot();
+ }
+
if (readSource == RecoveryUnit::ReadSource::kMajorityCommitted) {
replCoord->waitUntilSnapshotCommitted(opCtx, *minSnapshot);
uassertStatusOK(opCtx->recoveryUnit()->obtainMajorityCommittedSnapshot());