summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyle Suarez <kyle.suarez@mongodb.com>2022-09-19 14:53:31 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-19 23:55:45 +0000
commite8ed64ed0440f576fb0c62d0be61f2f5f0749765 (patch)
treee997ff3b7b15257f8aecaf281f6c32280c98d04d
parent6424b1f1f167117b53b81e8b34361d64cac43222 (diff)
downloadmongo-e8ed64ed0440f576fb0c62d0be61f2f5f0749765.tar.gz
SERVER-67715 escape $changeStream regex
(cherry picked from commit c9fe899fff347770c0e30fa0272f6157be6676a8)
-rw-r--r--jstests/change_streams/oplog_rewrite/change_stream_match_pushdown_namespace_rewrite.js24
-rw-r--r--jstests/change_streams/oplog_rewrite/match_pushdown_namespace_rewrite_with_expanded_events.js43
-rw-r--r--jstests/libs/change_stream_rewrite_util.js10
-rw-r--r--src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp9
4 files changed, 82 insertions, 4 deletions
diff --git a/jstests/change_streams/oplog_rewrite/change_stream_match_pushdown_namespace_rewrite.js b/jstests/change_streams/oplog_rewrite/change_stream_match_pushdown_namespace_rewrite.js
index ded0e67f3f7..526eff8b252 100644
--- a/jstests/change_streams/oplog_rewrite/change_stream_match_pushdown_namespace_rewrite.js
+++ b/jstests/change_streams/oplog_rewrite/change_stream_match_pushdown_namespace_rewrite.js
@@ -1084,5 +1084,29 @@ verifyOnWholeCluster(
{change_stream_match_pushdown_and_rewrite_and_rewrite: {dropDatabase: [dbName, dbName]}},
1 /* expectedOplogRetDocsForEachShard */);
+// Create two sharded collections in the main test database, then start a new change stream to get a
+// fresh resume token.
+const collWithDot =
+ createShardedCollection(st, "_id" /* shardKey */, dbName, "foo.bar", 2 /*splitAt */);
+const collWithUnderscore =
+ createShardedCollection(st, "_id" /* shardKey */, dbName, "foo_bar", 2 /*splitAt */);
+const thirdResumeAfterToken =
+ db.getSiblingDB("admin").watch([], {allChangesForCluster: true}).getResumeToken();
+
+// Insert one document per collection, per shard. The test cases below verify the behavior of regex
+// matches with escaped characters on collections with special names (e.g. containing dots). This
+// exercises the fix for SERVER-67715.
+assert.commandWorked(collWithDot.insert({_id: 1}));
+assert.commandWorked(collWithDot.insert({_id: 3}));
+assert.commandWorked(collWithUnderscore.insert({_id: 1}));
+assert.commandWorked(collWithUnderscore.insert({_id: 3}));
+
+// Ensure that a regex match properly respects escaped characters (here, testing that the escaped
+// "." character is treated as a literal dot).
+verifyOnWholeCluster(thirdResumeAfterToken,
+ {$match: {"ns.coll": {$nin: [/^foo\./]}}},
+ {"foo_bar": {insert: [1, 3]}},
+ 1 /* expectedOplogRetDocsForEachShard */);
+
st.stop();
})();
diff --git a/jstests/change_streams/oplog_rewrite/match_pushdown_namespace_rewrite_with_expanded_events.js b/jstests/change_streams/oplog_rewrite/match_pushdown_namespace_rewrite_with_expanded_events.js
index 789a7663604..b49ce4d721d 100644
--- a/jstests/change_streams/oplog_rewrite/match_pushdown_namespace_rewrite_with_expanded_events.js
+++ b/jstests/change_streams/oplog_rewrite/match_pushdown_namespace_rewrite_with_expanded_events.js
@@ -816,5 +816,48 @@ verifyOnWholeCluster(
},
[9, 3] /* expectedOplogRetDocsForEachShard */);
+// Create a new change stream and resume token for replaying the stream after this point.
+const thirdResumeAfterToken =
+ db.getSiblingDB("admin").watch([], {allChangesForCluster: true}).getResumeToken();
+
+// The test cases below verify the behavior of regex matches with escaped characters on collections
+// with special names (e.g. containing dots). This exercises the fix for SERVER-67715.
+const collWithDot =
+ createShardedCollection(st, "_id" /* shardKey */, dbName, "foo.bar", 2 /*splitAt */);
+assert.commandWorked(collWithDot.createIndex({x: 1}));
+assert.commandWorked(collWithDot.insert({_id: 1}));
+assert.commandWorked(collWithDot.insert({_id: 3}));
+assert.commandWorked(
+ collWithDot.runCommand({collMod: "foo.bar", index: {keyPattern: {x: 1}, hidden: true}}));
+assert.commandWorked(collWithDot.runCommand({dropIndexes: "foo.bar", index: {x: 1}}));
+
+const collWithUnderscore =
+ createShardedCollection(st, "_id" /* shardKey */, dbName, "foo_bar", 2 /*splitAt */);
+assert.commandWorked(collWithUnderscore.createIndex({x: 1}));
+assert.commandWorked(collWithUnderscore.insert({_id: 1}));
+assert.commandWorked(collWithUnderscore.insert({_id: 3}));
+assert.commandWorked(
+ collWithUnderscore.runCommand({collMod: "foo_bar", index: {keyPattern: {x: 1}, hidden: true}}));
+assert.commandWorked(collWithUnderscore.runCommand({dropIndexes: "foo_bar", index: {x: 1}}));
+
+// Ensure that a regex match properly respects escaped characters (here, testing that the escaped
+// "." character is treated as a literal dot). Note that we expect 5 extra oplog entries on shard0:
+// - 1 from the "create" event (which only appears on shard0)
+// - 4 no-op entries from sharding the two collections that are not affected by the filter pushdown
+// (2 "shardCollection" + 2 "migrateChunkToNewShard")
+verifyOnWholeCluster(thirdResumeAfterToken,
+ {$match: {"ns.coll": {$nin: [/^foo\./]}}},
+ {
+ "foo_bar": {
+ create: ["foo_bar"],
+ shardCollection: ["foo_bar"],
+ createIndexes: ["foo_bar", "foo_bar"],
+ insert: [1, 3],
+ modify: ["foo_bar", "foo_bar"],
+ dropIndexes: ["foo_bar", "foo_bar"]
+ }
+ },
+ [9, 4] /* expectedOplogRetDocsForEachShard */);
+
st.stop();
})();
diff --git a/jstests/libs/change_stream_rewrite_util.js b/jstests/libs/change_stream_rewrite_util.js
index ebbb0937222..918873c17bc 100644
--- a/jstests/libs/change_stream_rewrite_util.js
+++ b/jstests/libs/change_stream_rewrite_util.js
@@ -138,7 +138,10 @@ function assertNumMatchingOplogEventsForShard(stats, shardName, expectedTotalRet
assert(stats.shards.hasOwnProperty(shardName), stats);
assert.eq(Object.keys(stats.shards[shardName].stages[0])[0], "$cursor", stats);
const executionStats = stats.shards[shardName].stages[0].$cursor.executionStats;
- assert.eq(executionStats.nReturned, expectedTotalReturned, executionStats);
+ assert.eq(executionStats.nReturned,
+ expectedTotalReturned,
+ () => `Expected ${expectedTotalReturned} events on shard ${shardName} but got ` +
+ `${executionStats.nReturned}. Execution stats:\n${tojson(executionStats)}`);
}
// Returns a newly created sharded collection sharded by caller provided shard key.
@@ -180,7 +183,10 @@ function verifyChangeStreamOnWholeCluster(
eventIdentifierList.forEach(eventIdentifier => {
assert.soon(() => cursor.hasNext(), {op: op, eventIdentifier: eventIdentifier});
const event = cursor.next();
- assert.eq(event.operationType, op, event);
+ assert.eq(event.operationType,
+ op,
+ () => `Expected "${op}" but got "${event.operationType}". Full event: ` +
+ `${tojson(event)}`);
if (op == "dropDatabase") {
assert.eq(event.ns.db, eventIdentifier, event);
diff --git a/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp b/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp
index bee55d5c709..cb385656a63 100644
--- a/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp
+++ b/src/mongo/db/pipeline/change_stream_rewrite_helpers.cpp
@@ -29,6 +29,8 @@
#include "mongo/db/pipeline/change_stream_rewrite_helpers.h"
+#include <boost/algorithm/string/replace.hpp>
+
#include "mongo/db/matcher/expression_always_boolean.h"
#include "mongo/db/matcher/expression_expr.h"
#include "mongo/db/pipeline/document_source_change_stream.h"
@@ -888,9 +890,12 @@ std::unique_ptr<MatchExpression> matchRewriteGenericNamespace(
}();
// Convert the MatchExpression $regex into a $regexMatch on the corresponding field.
+ // Backslashes must be escaped to ensure they retain their special behavior.
+ const auto regex =
+ boost::replace_all_copy(std::string(nsElem.regex()), R"(\)", R"(\\)");
const std::string exprRegexMatch = str::stream()
- << "{$regexMatch: {input: " << exprDbOrCollName << ", regex: '"
- << nsElem.regex() << "', options: '" << nsElem.regexFlags() << "'}}";
+ << "{$regexMatch: {input: " << exprDbOrCollName << ", regex: '" << regex
+ << "', options: '" << nsElem.regexFlags() << "'}}";
// Finally, wrap the regex in a $let which defines the '$$oplogField' variable.
const std::string exprRewrittenPredicate = str::stream()