diff options
author | Xin Hao Zhang <xinhao.zhang@mongodb.com> | 2019-07-23 11:35:07 -0400 |
---|---|---|
committer | Xin Hao Zhang <xinhao.zhang@mongodb.com> | 2019-07-24 08:09:43 -0400 |
commit | 5c9f7fb5d38df176bda950bdea6aaadbcb73e8e1 (patch) | |
tree | efbde299324ff4b5f1e132c64b5ea00918cf8ecd /jstests/noPassthroughWithMongod/query_oplogreplay.js | |
parent | 093b5bf0d87d3415e32af22ce8a7f19a47f64929 (diff) | |
download | mongo-5c9f7fb5d38df176bda950bdea6aaadbcb73e8e1.tar.gz |
SERVER-36186 Default to the the oplogreplay optimization path for oplog collections
Diffstat (limited to 'jstests/noPassthroughWithMongod/query_oplogreplay.js')
-rw-r--r-- | jstests/noPassthroughWithMongod/query_oplogreplay.js | 457 |
1 files changed, 238 insertions, 219 deletions
diff --git a/jstests/noPassthroughWithMongod/query_oplogreplay.js b/jstests/noPassthroughWithMongod/query_oplogreplay.js index f56a67d15b1..cf7c255ce4b 100644 --- a/jstests/noPassthroughWithMongod/query_oplogreplay.js +++ b/jstests/noPassthroughWithMongod/query_oplogreplay.js @@ -1,4 +1,4 @@ -// Test queries that set the OplogReplay flag. +// Test oplog queries that can be optimized with oplogReplay. // @tags: [requires_replication, requires_capped] (function() { @@ -7,241 +7,260 @@ load("jstests/libs/analyze_plan.js"); load("jstests/libs/storage_engine_utils.js"); - function test(t) { - const isOplog = t.getName().startsWith("oplog."); + const t = db.getSiblingDB("local").oplog.jstests_query_oplogreplay; - if (storageEngineIsWiredTigerOrInMemory() && isOplog) { + function dropOplogAndCreateNew(oplog, newCollectionSpec) { + if (storageEngineIsWiredTigerOrInMemory()) { // We forbid dropping the oplog when using the WiredTiger or in-memory storage engines // and so we can't drop the oplog here. Because Evergreen reuses nodes for testing, // the oplog may already exist on the test node; in this case, trying to create the // oplog once again would fail. // To ensure we are working with a clean oplog (an oplog without entries), we resort // to truncating the oplog instead. - if (!t.getDB().getCollectionNames().includes(t.getName())) { - t.getDB().createCollection(t.getName(), {capped: true, size: 16 * 1024}); + if (!oplog.getDB().getCollectionNames().includes(oplog.getName())) { + oplog.getDB().createCollection(oplog.getName(), newCollectionSpec); } - t.runCommand('emptycapped'); - t.getDB().adminCommand({replSetResizeOplog: 1, size: 16 * 1024}); + oplog.runCommand('emptycapped'); + oplog.getDB().adminCommand({replSetResizeOplog: 1, size: 16 * 1024}); } else { - t.drop(); + oplog.drop(); assert.commandWorked( - t.getDB().createCollection(t.getName(), {capped: true, size: 16 * 1024})); + oplog.getDB().createCollection(oplog.getName(), newCollectionSpec)); } + } - /** - * Helper function for making timestamps with the property that if i < j, then makeTS(i) < - * makeTS(j). - */ - function makeTS(i) { - return Timestamp(1000, i); - } + dropOplogAndCreateNew(t, {capped: true, size: 16 * 1024}); - for (let i = 1; i <= 100; i++) { - assert.writeOK(t.insert({_id: i, ts: makeTS(i)})); - } + /** + * Helper function for making timestamps with the property that if i < j, then makeTS(i) < + * makeTS(j). + */ + function makeTS(i) { + return Timestamp(1000, i); + } + + for (let i = 1; i <= 100; i++) { + assert.writeOK(t.insert({_id: i, ts: makeTS(i)})); + } + + // A $gt query on just the 'ts' field should return the next document after the timestamp. + var cursor = t.find({ts: {$gt: makeTS(20)}}); + assert.eq(21, cursor.next()["_id"]); + assert.eq(22, cursor.next()["_id"]); + + // A $gte query on the 'ts' field should include the timestamp. + cursor = t.find({ts: {$gte: makeTS(20)}}); + assert.eq(20, cursor.next()["_id"]); + assert.eq(21, cursor.next()["_id"]); + + // An $eq query on the 'ts' field should return the single record with the timestamp. + cursor = t.find({ts: {$eq: makeTS(20)}}); + assert.eq(20, cursor.next()["_id"]); + assert(!cursor.hasNext()); + + // An AND with both a $gt and $lt query on the 'ts' field will correctly return results in + // the proper bounds. + cursor = t.find({$and: [{ts: {$lt: makeTS(5)}}, {ts: {$gt: makeTS(1)}}]}); + assert.eq(2, cursor.next()["_id"]); + assert.eq(3, cursor.next()["_id"]); + assert.eq(4, cursor.next()["_id"]); + assert(!cursor.hasNext()); + + // An AND with multiple predicates on the 'ts' field correctly returns results on the + // tightest range. + cursor = t.find({ + $and: [ + {ts: {$gte: makeTS(2)}}, + {ts: {$gt: makeTS(3)}}, + {ts: {$lte: makeTS(7)}}, + {ts: {$lt: makeTS(7)}} + ] + }); + assert.eq(4, cursor.next()["_id"]); + assert.eq(5, cursor.next()["_id"]); + assert.eq(6, cursor.next()["_id"]); + assert(!cursor.hasNext()); + + // An AND with an $eq predicate in conjunction with other bounds correctly returns one + // result. + cursor = t.find({ + $and: [ + {ts: {$gte: makeTS(1)}}, + {ts: {$gt: makeTS(2)}}, + {ts: {$eq: makeTS(5)}}, + {ts: {$lte: makeTS(8)}}, + {ts: {$lt: makeTS(8)}} + ] + }); + assert.eq(5, cursor.next()["_id"]); + assert(!cursor.hasNext()); + + // An $eq query stops scanning after passing the max timestamp. + let res = t.find({ts: {$eq: makeTS(10)}}).explain("executionStats"); + assert.commandWorked(res); + // We expect to be able to seek directly to the entry with a 'ts' of 10. + assert.lte(res.executionStats.totalDocsExamined, 2, tojson(res)); + let collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res)); + assert.eq(makeTS(10), collScanStage.maxTs, tojson(res)); - // Missing 'ts' field. - assert.throws(function() { - t.find().addOption(DBQuery.Option.oplogReplay).next(); - }); - assert.throws(function() { - t.find({_id: 3}).addOption(DBQuery.Option.oplogReplay).next(); - }); - - // 'ts' field is not top-level. - assert.throws(function() { - t.find({$or: [{ts: {$gt: makeTS(3)}}, {foo: 3}]}) - .addOption(DBQuery.Option.oplogReplay) - .next(); - }); - assert.throws(function() { - t.find({$nor: [{ts: {$gt: makeTS(4)}}, {foo: 4}]}) - .addOption(DBQuery.Option.oplogReplay) - .next(); - }); - - // There is no $eq, $gt or $gte predicate on 'ts'. - assert.throws(function() { - t.find({ts: {$lt: makeTS(4)}}).addOption(DBQuery.Option.oplogReplay).next(); - }); - assert.throws(function() { - t.find({ts: {$lt: makeTS(4)}, _id: 3}).addOption(DBQuery.Option.oplogReplay).next(); - }); - - // A $gt query on just the 'ts' field should return the next document after the timestamp. - var cursor = t.find({ts: {$gt: makeTS(20)}}).addOption(DBQuery.Option.oplogReplay); - assert.eq(21, cursor.next()["_id"]); - assert.eq(22, cursor.next()["_id"]); - - // A $gte query on the 'ts' field should include the timestamp. - cursor = t.find({ts: {$gte: makeTS(20)}}).addOption(DBQuery.Option.oplogReplay); - assert.eq(20, cursor.next()["_id"]); - assert.eq(21, cursor.next()["_id"]); - - // An $eq query on the 'ts' field should return the single record with the timestamp. - cursor = t.find({ts: {$eq: makeTS(20)}}).addOption(DBQuery.Option.oplogReplay); - assert.eq(20, cursor.next()["_id"]); - assert(!cursor.hasNext()); - - // An AND with both a $gt and $lt query on the 'ts' field will correctly return results in - // the proper bounds. - cursor = t.find({ - $and: [{ts: {$lt: makeTS(5)}}, {ts: {$gt: makeTS(1)}}] - }).addOption(DBQuery.Option.oplogReplay); - assert.eq(2, cursor.next()["_id"]); - assert.eq(3, cursor.next()["_id"]); - assert.eq(4, cursor.next()["_id"]); - assert(!cursor.hasNext()); - - // An AND with multiple predicates on the 'ts' field correctly returns results on the - // tightest range. - cursor = t.find({ - $and: [ - {ts: {$gte: makeTS(2)}}, - {ts: {$gt: makeTS(3)}}, - {ts: {$lte: makeTS(7)}}, - {ts: {$lt: makeTS(7)}} - ] - }).addOption(DBQuery.Option.oplogReplay); - assert.eq(4, cursor.next()["_id"]); - assert.eq(5, cursor.next()["_id"]); - assert.eq(6, cursor.next()["_id"]); - assert(!cursor.hasNext()); - - // An AND with an $eq predicate in conjunction with other bounds correctly returns one - // result. - cursor = t.find({ - $and: [ - {ts: {$gte: makeTS(1)}}, - {ts: {$gt: makeTS(2)}}, - {ts: {$eq: makeTS(5)}}, - {ts: {$lte: makeTS(8)}}, - {ts: {$lt: makeTS(8)}} - ] - }).addOption(DBQuery.Option.oplogReplay); - assert.eq(5, cursor.next()["_id"]); - assert(!cursor.hasNext()); - - // An $eq query stops scanning after passing the max timestamp. - let res = t.find({ts: {$eq: makeTS(10)}}) - .addOption(DBQuery.Option.oplogReplay) - .explain("executionStats"); - assert.commandWorked(res); - // If this is the oplog, we expect to be able to seek directly to the entry with a 'ts' of - // 10. Otherwise, we have to scan from the beginning of the oplog. - let expectedDocsExamined = isOplog ? 2 : 11; - assert.lte(res.executionStats.totalDocsExamined, expectedDocsExamined, tojson(res)); - let collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); - assert.neq( - null, collScanStage, "no collection scan found in explain output: " + tojson(res)); - assert.eq(makeTS(10), collScanStage.maxTs, tojson(res)); - - // An AND with an $lt predicate stops scanning after passing the max timestamp. - res = t.find({$and: [{ts: {$gte: makeTS(1)}}, {ts: {$lt: makeTS(10)}}]}) - .addOption(DBQuery.Option.oplogReplay) - .explain("executionStats"); - assert.commandWorked(res); - assert.lte(res.executionStats.totalDocsExamined, 11, tojson(res)); - collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); - assert.neq( - null, collScanStage, "no collection scan found in explain output: " + tojson(res)); - assert.eq(makeTS(10), collScanStage.maxTs, tojson(res)); - - // An AND with an $lte predicate stops scanning after passing the max timestamp. - res = t.find({$and: [{ts: {$gte: makeTS(1)}}, {ts: {$lte: makeTS(10)}}]}) - .addOption(DBQuery.Option.oplogReplay) - .explain("executionStats"); - assert.commandWorked(res); - assert.lte(res.executionStats.totalDocsExamined, 12, tojson(res)); - collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); - assert.neq( - null, collScanStage, "no collection scan found in explain output: " + tojson(res)); - assert.eq(makeTS(10), collScanStage.maxTs, tojson(res)); - - // The max timestamp is respected even when the min timestamp is smaller than the lowest - // timestamp in the collection. - res = t.find({$and: [{ts: {$gte: makeTS(0)}}, {ts: {$lte: makeTS(10)}}]}) - .addOption(DBQuery.Option.oplogReplay) - .explain("executionStats"); - assert.commandWorked(res); - assert.lte(res.executionStats.totalDocsExamined, 12, tojson(res)); - collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); - assert.neq( - null, collScanStage, "no collection scan found in explain output: " + tojson(res)); - assert.eq(makeTS(10), collScanStage.maxTs, tojson(res)); - - // An AND with redundant $eq/$lt/$lte predicates stops scanning after passing the max - // timestamp. - res = t.find({ - $and: [ - {ts: {$gte: makeTS(0)}}, - {ts: {$lte: makeTS(10)}}, - {ts: {$eq: makeTS(5)}}, - {ts: {$lt: makeTS(20)}} - ] - }) - .addOption(DBQuery.Option.oplogReplay) - .explain("executionStats"); - assert.commandWorked(res); - // If this is the oplog, we expect to be able to seek directly to the entry with a 'ts' of - // 5. Otherwise, we have to scan from the beginning of the oplog. - expectedDocsExamined = isOplog ? 2 : 11; - assert.lte(res.executionStats.totalDocsExamined, expectedDocsExamined, tojson(res)); - collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); - assert.neq( - null, collScanStage, "no collection scan found in explain output: " + tojson(res)); - assert.eq(makeTS(5), collScanStage.maxTs, tojson(res)); - - // An $eq query for a non-existent timestamp scans a single oplog document. - res = t.find({ts: {$eq: makeTS(200)}}) - .addOption(DBQuery.Option.oplogReplay) - .explain("executionStats"); - assert.commandWorked(res); - // If this is the oplog, we expect to be able to seek directly to the end of the oplog. - // Otherwise, we have to scan the entire oplog before determining that 'ts' 100 does not - // exist. - expectedDocsExamined = isOplog ? 1 : 100; - assert.lte(res.executionStats.totalDocsExamined, expectedDocsExamined, tojson(res)); - collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); - assert.neq( - null, collScanStage, "no collection scan found in explain output: " + tojson(res)); - assert.eq(makeTS(200), collScanStage.maxTs, tojson(res)); - - // When the filter matches the last document within the timestamp range, the collection scan - // examines at most one more document. - res = t.find({$and: [{ts: {$gte: makeTS(4)}}, {ts: {$lte: makeTS(8)}}, {_id: 8}]}) - .addOption(DBQuery.Option.oplogReplay) - .explain("executionStats"); - assert.commandWorked(res); - // If this is the oplog, we expect to be able to seek directly to the start of the 'ts' - // range. Otherwise, we have to scan the capped collection from the beginning. - expectedDocsExamined = isOplog ? 6 : 9; - assert.lte(res.executionStats.totalDocsExamined, expectedDocsExamined, tojson(res)); - collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); - assert.neq( - null, collScanStage, "no collection scan found in explain output: " + tojson(res)); - assert.eq(makeTS(8), collScanStage.maxTs, tojson(res)); - - // A query over both 'ts' and '_id' should only pay attention to the 'ts' field for finding - // the oplog start (SERVER-13566). - cursor = t.find({ts: {$gte: makeTS(20)}, _id: 25}).addOption(DBQuery.Option.oplogReplay); - assert.eq(25, cursor.next()["_id"]); - assert(!cursor.hasNext()); + // An AND with an $lt predicate stops scanning after passing the max timestamp. + res = t.find({ + $and: [{ts: {$gte: makeTS(1)}}, {ts: {$lt: makeTS(10)}}] + }).explain("executionStats"); + assert.commandWorked(res); + assert.lte(res.executionStats.totalDocsExamined, 11, tojson(res)); + collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res)); + assert.eq(makeTS(10), collScanStage.maxTs, tojson(res)); + + // An AND with an $lte predicate stops scanning after passing the max timestamp. + res = t.find({ + $and: [{ts: {$gte: makeTS(1)}}, {ts: {$lte: makeTS(10)}}] + }).explain("executionStats"); + assert.commandWorked(res); + assert.lte(res.executionStats.totalDocsExamined, 12, tojson(res)); + collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res)); + assert.eq(makeTS(10), collScanStage.maxTs, tojson(res)); + + // The max timestamp is respected even when the min timestamp is smaller than the lowest + // timestamp in the collection. + res = t.find({ + $and: [{ts: {$gte: makeTS(0)}}, {ts: {$lte: makeTS(10)}}] + }).explain("executionStats"); + assert.commandWorked(res); + assert.lte(res.executionStats.totalDocsExamined, 12, tojson(res)); + collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res)); + assert.eq(makeTS(10), collScanStage.maxTs, tojson(res)); + + // An AND with redundant $eq/$lt/$lte predicates stops scanning after passing the max + // timestamp. + res = t.find({ + $and: [ + {ts: {$gte: makeTS(0)}}, + {ts: {$lte: makeTS(10)}}, + {ts: {$eq: makeTS(5)}}, + {ts: {$lt: makeTS(20)}} + ] + }).explain("executionStats"); + assert.commandWorked(res); + // We expect to be able to seek directly to the entry with a 'ts' of 5. + assert.lte(res.executionStats.totalDocsExamined, 2, tojson(res)); + collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res)); + assert.eq(makeTS(5), collScanStage.maxTs, tojson(res)); + assert.eq(makeTS(5), collScanStage.minTs, tojson(res)); + + // An $eq query for a non-existent timestamp scans a single oplog document. + res = t.find({ts: {$eq: makeTS(200)}}).explain("executionStats"); + assert.commandWorked(res); + // We expect to be able to seek directly to the end of the oplog. + assert.lte(res.executionStats.totalDocsExamined, 1, tojson(res)); + collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res)); + assert.eq(makeTS(200), collScanStage.maxTs, tojson(res)); + + // When the filter matches the last document within the timestamp range, the collection scan + // examines at most one more document. + res = t.find({ + $and: [{ts: {$gte: makeTS(4)}}, {ts: {$lte: makeTS(8)}}, {_id: 8}] + }).explain("executionStats"); + assert.commandWorked(res); + // We expect to be able to seek directly to the start of the 'ts' range. + assert.lte(res.executionStats.totalDocsExamined, 6, tojson(res)); + collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res)); + assert.eq(makeTS(8), collScanStage.maxTs, tojson(res)); + + // A filter with only an upper bound predicate on 'ts' stops scanning after + // passing the max timestamp. + res = t.find({ts: {$lt: makeTS(4)}}).explain("executionStats"); + assert.commandWorked(res); + assert.lte(res.executionStats.totalDocsExamined, 4, tojson(res)); + collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res)); + assert.eq(makeTS(4), collScanStage.maxTs, tojson(res)); + + // Oplog replay optimization should work with projection. + res = t.find({ts: {$lte: makeTS(4)}}).projection({'_id': 0}); + while (res.hasNext()) { + const next = res.next(); + assert(!next.hasOwnProperty('_id')); + assert(next.hasOwnProperty('ts')); } + res = res.explain("executionStats"); + assert.commandWorked(res); + assert.lte(res.executionStats.totalDocsExamined, 5); - jsTestLog("Non-oplog."); - // Test that oplog replay on a non-oplog collection succeeds. - test(db.jstests_query_oplogreplay); + res = t.find({ts: {$gte: makeTS(90)}}).projection({'_id': 0}); + while (res.hasNext()) { + const next = res.next(); + assert(!next.hasOwnProperty('_id')); + assert(next.hasOwnProperty('ts')); + } + res = res.explain("executionStats"); + assert.commandWorked(res); + assert.lte(res.executionStats.totalDocsExamined, 11); - jsTestLog("Oplog."); - // Test that oplog replay on the actual oplog succeeds. - test(db.getSiblingDB("local").oplog.jstests_query_oplogreplay); + // Oplog replay optimization should work with limit. + res = t.find({$and: [{ts: {$gte: makeTS(4)}}, {ts: {$lte: makeTS(8)}}]}) + .limit(2) + .explain("executionStats"); + assert.commandWorked(res); + assert.eq(2, res.executionStats.totalDocsExamined); + collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.eq(2, collScanStage.nReturned); - // Test that oplog replay on a non-capped collection fails. - const coll = db.jstests_query_oplogreplay; - coll.drop(); - assert.commandWorked(coll.getDB().createCollection(coll.getName())); - assert.throws(function() { - coll.find({ts: {$gt: "abcd"}}).addOption(DBQuery.Option.oplogReplay).next(); + // A query over both 'ts' and '_id' should only pay attention to the 'ts' field for finding + // the oplog start (SERVER-13566). + cursor = t.find({ts: {$gte: makeTS(20)}, _id: 25}); + assert.eq(25, cursor.next()["_id"]); + assert(!cursor.hasNext()); + + // 'oplogreplay' flag is allowed but ignored on the oplog collection. + assert.commandWorked(t.runCommand({find: t.getName(), oplogReplay: true})); + + // 'oplogreplay' flag is allowed but ignored on capped collections. + const cappedColl = db.cappedColl_jstests_query_oplogreplay; + cappedColl.drop(); + assert.commandWorked( + db.createCollection(cappedColl.getName(), {capped: true, size: 16 * 1024})); + for (let i = 1; i <= 100; i++) { + assert.commandWorked(cappedColl.insert({_id: i, ts: makeTS(i)})); + } + res = cappedColl.runCommand({ + explain: + {find: cappedColl.getName(), filter: {ts: {$eq: makeTS(200)}}, oplogReplay: true} }); + assert.commandWorked(res); + assert.eq(res.executionStats.totalDocsExamined, 100); + + // Ensure oplog replay hack does not work for backward scans. + res = t.find({ts: {$lt: makeTS(4)}}).sort({$natural: -1}).explain("executionStats"); + assert.commandWorked(res); + assert.eq(res.executionStats.totalDocsExamined, 100, tojson(res)); + collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN"); + assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res)); + + // We expect correct results when no collation specified and collection has a default collation. + const t_collation = db.getSiblingDB("local").oplog.jstests_query_oplogreplay_collation; + dropOplogAndCreateNew( + t_collation, {collation: {locale: "en_US", strength: 2}, capped: true, size: 16 * 1024}); + + assert.writeOK(t_collation.insert({str: "FOO", ts: Timestamp(1000, 0)})); + assert.writeOK(t_collation.insert({str: "FOO", ts: Timestamp(1000, 1)})); + assert.writeOK(t_collation.insert({str: "FOO", ts: Timestamp(1000, 2)})); + assert.eq(2, t_collation.find({str: "foo", ts: {$gte: Timestamp(1000, 1)}}).itcount()); + + // We expect correct results when "simple" collation specified and collection has a default + // collation. + assert.writeOK(t_collation.insert({str: "FOO", ts: Timestamp(1000, 0)})); + assert.writeOK(t_collation.insert({str: "FOO", ts: Timestamp(1000, 1)})); + assert.writeOK(t_collation.insert({str: "FOO", ts: Timestamp(1000, 2)})); + assert.eq(0, + t_collation.find({str: "foo", ts: {$gte: Timestamp(1000, 1)}}) + .collation({locale: "simple"}) + .itcount()); }()); |