diff options
author | Kyle Suarez <kyle.suarez@mongodb.com> | 2022-09-19 14:53:31 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-19 23:55:45 +0000 |
commit | e8ed64ed0440f576fb0c62d0be61f2f5f0749765 (patch) | |
tree | e997ff3b7b15257f8aecaf281f6c32280c98d04d | |
parent | 6424b1f1f167117b53b81e8b34361d64cac43222 (diff) | |
download | mongo-e8ed64ed0440f576fb0c62d0be61f2f5f0749765.tar.gz |
SERVER-67715 escape $changeStream regex
(cherry picked from commit c9fe899fff347770c0e30fa0272f6157be6676a8)
4 files changed, 82 insertions, 4 deletions
diff --git a/jstests/change_streams/oplog_rewrite/change_stream_match_pushdown_namespace_rewrite.js b/jstests/change_streams/oplog_rewrite/change_stream_match_pushdown_namespace_rewrite.js index ded0e67f3f7..526eff8b252 100644 --- a/jstests/change_streams/oplog_rewrite/change_stream_match_pushdown_namespace_rewrite.js +++ b/jstests/change_streams/oplog_rewrite/change_stream_match_pushdown_namespace_rewrite.js @@ -1084,5 +1084,29 @@ verifyOnWholeCluster( {change_stream_match_pushdown_and_rewrite_and_rewrite: {dropDatabase: [dbName, dbName]}}, 1 /* expectedOplogRetDocsForEachShard */); +// Create two sharded collections in the main test database, then start a new change stream to get a +// fresh resume token. +const collWithDot = + createShardedCollection(st, "_id" /* shardKey */, dbName, "foo.bar", 2 /*splitAt */); +const collWithUnderscore = + createShardedCollection(st, "_id" /* shardKey */, dbName, "foo_bar", 2 /*splitAt */); +const thirdResumeAfterToken = + db.getSiblingDB("admin").watch([], {allChangesForCluster: true}).getResumeToken(); + +// Insert one document per collection, per shard. The test cases below verify the behavior of regex +// matches with escaped characters on collections with special names (e.g. containing dots). This +// exercises the fix for SERVER-67715. +assert.commandWorked(collWithDot.insert({_id: 1})); +assert.commandWorked(collWithDot.insert({_id: 3})); +assert.commandWorked(collWithUnderscore.insert({_id: 1})); +assert.commandWorked(collWithUnderscore.insert({_id: 3})); + +// Ensure that a regex match properly respects escaped characters (here, testing that the escaped +// "." character is treated as a literal dot). +verifyOnWholeCluster(thirdResumeAfterToken, + {$match: {"ns.coll": {$nin: [/^foo\./]}}}, + {"foo_bar": {insert: [1, 3]}}, + 1 /* expectedOplogRetDocsForEachShard */); + st.stop(); })(); diff --git a/jstests/change_streams/oplog_rewrite/match_pushdown_namespace_rewrite_with_expanded_events.js b/jstests/change_streams/oplog_rewrite/match_pushdown_namespace_rewrite_with_expanded_events.js index 789a7663604..b49ce4d721d 100644 --- a/jstests/change_streams/oplog_rewrite/match_pushdown_namespace_rewrite_with_expanded_events.js +++ b/jstests/change_streams/oplog_rewrite/match_pushdown_namespace_rewrite_with_expanded_events.js @@ -816,5 +816,48 @@ verifyOnWholeCluster( }, [9, 3] /* expectedOplogRetDocsForEachShard */); +// Create a new change stream and resume token for replaying the stream after this point. +const thirdResumeAfterToken = + db.getSiblingDB("admin").watch([], {allChangesForCluster: true}).getResumeToken(); + +// The test cases below verify the behavior of regex matches with escaped characters on collections +// with special names (e.g. containing dots). This exercises the fix for SERVER-67715. +const collWithDot = + createShardedCollection(st, "_id" /* shardKey */, dbName, "foo.bar", 2 /*splitAt */); +assert.commandWorked(collWithDot.createIndex({x: 1})); +assert.commandWorked(collWithDot.insert({_id: 1})); +assert.commandWorked(collWithDot.insert({_id: 3})); +assert.commandWorked( + collWithDot.runCommand({collMod: "foo.bar", index: {keyPattern: {x: 1}, hidden: true}})); +assert.commandWorked(collWithDot.runCommand({dropIndexes: "foo.bar", index: {x: 1}})); + +const collWithUnderscore = + createShardedCollection(st, "_id" /* shardKey */, dbName, "foo_bar", 2 /*splitAt */); +assert.commandWorked(collWithUnderscore.createIndex({x: 1})); +assert.commandWorked(collWithUnderscore.insert({_id: 1})); +assert.commandWorked(collWithUnderscore.insert({_id: 3})); +assert.commandWorked( + collWithUnderscore.runCommand({collMod: "foo_bar", index: {keyPattern: {x: 1}, hidden: true}})); +assert.commandWorked(collWithUnderscore.runCommand({dropIndexes: "foo_bar", index: {x: 1}})); + +// Ensure that a regex match properly respects escaped characters (here, testing that the escaped +// "." character is treated as a literal dot). Note that we expect 5 extra oplog entries on shard0: +// - 1 from the "create" event (which only appears on shard0) +// - 4 no-op entries from sharding the two collections that are not affected by the filter pushdown +// (2 "shardCollection" + 2 "migrateChunkToNewShard") +verifyOnWholeCluster(thirdResumeAfterToken, + {$match: {"ns.coll": {$nin: [/^foo\./]}}}, + { + "foo_bar": { + create: ["foo_bar"], + shardCollection: ["foo_bar"], + createIndexes: ["foo_bar", "foo_bar"], + insert: [1, 3], + modify: ["foo_bar", "foo_bar"], + dropIndexes: ["foo_bar", "foo_bar"] + } + }, + [9, 4] /* expectedOplogRetDocsForEachShard */); + st.stop(); })(); diff --git a/jstests/libs/change_stream_rewrite_util.js b/jstests/libs/change_stream_rewrite_util.js index ebbb0937222..918873c17bc 100644 --- a/jstests/libs/change_stream_rewrite_util.js +++ b/jstests/libs/change_stream_rewrite_util.js @@ -138,7 +138,10 @@ function assertNumMatchingOplogEventsForShard(stats, shardName, expectedTotalRet assert(stats.shards.hasOwnProperty(shardName), stats); assert.eq(Object.keys(stats.shards[shardName].stages[0])[0], "$cursor", stats); const executionStats = stats.shards[shardName].stages[0].$cursor.executionStats; - assert.eq(executionStats.nReturned, expectedTotalReturned, executionStats); + assert.eq(executionStats.nReturned, + expectedTotalReturned, + () => `Expected ${expectedTotalReturned} events on shard ${shardName} but got ` + + `${executionStats.nReturned}. Execution stats:\n${tojson(executionStats)}`); } // Returns a newly created sharded collection sharded by caller provided shard key. @@ -180,7 +183,10 @@ function verifyChangeStreamOnWholeCluster( eventIdentifierList.forEach(eventIdentifier => { assert.soon(() => cursor.hasNext(), {op: op, eventIdentifier: eventIdentifier}); const event = cursor.next(); - assert.eq(event.operationType, op, event); + assert.eq(event.operationType, + op, + () => `Expected "${op}" but got "${event.operationType}". Full event: ` + + `${tojson(event)}`); if (op == "dropDatabase") { assert.eq(event.ns.db, eventIdentifier, event); diff --git a/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp b/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp index bee55d5c709..cb385656a63 100644 --- a/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp +++ b/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp @@ -29,6 +29,8 @@ #include "mongo/db/pipeline/change_stream_rewrite_helpers.h" +#include <boost/algorithm/string/replace.hpp> + #include "mongo/db/matcher/expression_always_boolean.h" #include "mongo/db/matcher/expression_expr.h" #include "mongo/db/pipeline/document_source_change_stream.h" @@ -888,9 +890,12 @@ std::unique_ptr<MatchExpression> matchRewriteGenericNamespace( }(); // Convert the MatchExpression $regex into a $regexMatch on the corresponding field. + // Backslashes must be escaped to ensure they retain their special behavior. + const auto regex = + boost::replace_all_copy(std::string(nsElem.regex()), R"(\)", R"(\\)"); const std::string exprRegexMatch = str::stream() - << "{$regexMatch: {input: " << exprDbOrCollName << ", regex: '" - << nsElem.regex() << "', options: '" << nsElem.regexFlags() << "'}}"; + << "{$regexMatch: {input: " << exprDbOrCollName << ", regex: '" << regex + << "', options: '" << nsElem.regexFlags() << "'}}"; // Finally, wrap the regex in a $let which defines the '$$oplogField' variable. const std::string exprRewrittenPredicate = str::stream() |