summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/core/index_bounds_pipe.js113
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp50
-rw-r--r--src/mongo/db/query/index_bounds_builder_test.cpp49
3 files changed, 198 insertions, 14 deletions
diff --git a/jstests/core/index_bounds_pipe.js b/jstests/core/index_bounds_pipe.js
new file mode 100644
index 00000000000..e284d5c78f3
--- /dev/null
+++ b/jstests/core/index_bounds_pipe.js
@@ -0,0 +1,113 @@
+/**
+ * Tests the tightness of index bounds when attempting to match a regex that contains escaped and
+ * non-escaped pipe '|' characters.
+ */
+(function() {
+ 'use strict';
+
+ load('jstests/libs/analyze_plan.js');
+
+ const collName = 'index_bounds_pipe';
+ const coll = db.getCollection(collName);
+ coll.drop();
+
+ assert.writeOK(coll.insert({_id: ''}));
+ assert.writeOK(coll.insert({_id: '\\|'}));
+ assert.writeOK(coll.insert({_id: 'a'}));
+ assert.writeOK(coll.insert({_id: 'a|b'}));
+ assert.writeOK(coll.insert({_id: 'b'}));
+ assert.writeOK(coll.insert({_id: '|'}));
+
+ /**
+ * Asserts that a query on a field using 'params.regex' uses index bounds 'params.bounds' and
+ * returns results identical to 'params.results'.
+ *
+ * Also tests that a query using 'params.regex' will return documents with a field of type regex
+ * with an identical regular expression value.
+ */
+ function assertIndexBoundsAndResult(params) {
+ const query = {_id: params.regex};
+ const command = {find: collName, filter: query, projection: {_id: 1}, sort: {_id: 1}};
+ const explain = db.runCommand({explain: command});
+ assert.commandWorked(explain);
+
+ // Check that the query uses correct index bounds.
+ const ixscan = getPlanStage(explain.queryPlanner.winningPlan, 'IXSCAN');
+ assert.neq(ixscan, null, 'Plan unexpectedly missing IXSCAN stage: ' + tojson(explain));
+ assert.eq(ixscan.indexBounds._id,
+ params.bounds,
+ 'Expected bounds of ' + tojson(params.bounds) + ' but got ' +
+ tojson(ixscan.indexBounds._id));
+
+ // Check that the query regex matches expected strings.
+ const results = db.runCommand(command);
+ assert.commandWorked(results);
+ assert.eq(results.cursor.firstBatch,
+ params.results,
+ 'Regex query ' + tojson(query) + ' returned incorrect results');
+
+ // Check that the query regex will exactly match identical regular expression objects.
+ const collRegexValue = db.getCollection(collName + params.regex);
+ collRegexValue.drop();
+ assert.commandWorked(collRegexValue.createIndex({x: 1}));
+
+ const doc = {_id: 0, x: params.regex};
+ assert.writeOK(collRegexValue.insert(doc));
+
+ const regexQuery = {x: params.regex};
+ assert.eq(collRegexValue.findOne(regexQuery),
+ doc,
+ 'Regex query ' + tojson(regexQuery) +
+ ' did not match document with identical regex value');
+ }
+
+ // An anchored regex that uses no special operators can use tight index bounds.
+ assertIndexBoundsAndResult(
+ {regex: /^a/, bounds: ['["a", "b")', '[/^a/, /^a/]'], results: [{_id: 'a'}, {_id: 'a|b'}]});
+ assertIndexBoundsAndResult(
+ {regex: /^\\/, bounds: ['["\\", "]")', '[/^\\\\/, /^\\\\/]'], results: [{_id: '\\|'}]});
+
+ // An anchored regex using the alternation operator cannot use tight index bounds.
+ assertIndexBoundsAndResult({
+ regex: /^a|b/,
+ bounds: ['["", {})', '[/^a|b/, /^a|b/]'],
+ results: [{_id: 'a'}, {_id: 'a|b'}, {_id: 'b'}]
+ });
+
+ // An anchored regex that uses an escaped pipe character can use tight index bounds.
+ assertIndexBoundsAndResult(
+ {regex: /^a\|/, bounds: ['["a|", "a}")', '[/^a\\|/, /^a\\|/]'], results: [{_id: 'a|b'}]});
+ assertIndexBoundsAndResult(
+ {regex: /^\|/, bounds: ['["|", "}")', '[/^\\|/, /^\\|/]'], results: [{_id: '|'}]});
+
+ // A pipe character that is preceded by an escaped backslash is correctly interpreted as the
+ // alternation operator and cannot use tight index bounds.
+ assertIndexBoundsAndResult({
+ regex: /^\\|b/,
+ bounds: ['["", {})', '[/^\\\\|b/, /^\\\\|b/]'],
+ results: [{_id: '\\|'}, {_id: 'a|b'}, {_id: 'b'}]
+ });
+ assertIndexBoundsAndResult({
+ regex: /^\\|^b/,
+ bounds: ['["", {})', '[/^\\\\|^b/, /^\\\\|^b/]'],
+ results: [{_id: '\\|'}, {_id: 'b'}]
+ });
+
+ // An escaped backslash immediately followed by an escaped pipe does not use tight index bounds.
+ assertIndexBoundsAndResult({
+ regex: /^\\\|/,
+ bounds: ['["", {})', '[/^\\\\\\|/, /^\\\\\\|/]'],
+ results: [{_id: '\\|'}]
+ });
+
+ // A pipe escaped with the \Q...\E escape sequence does not use tight index bounds.
+ assertIndexBoundsAndResult(
+ {regex: /^\Q|\E/, bounds: ['["", {})', '[/^\\Q|\\E/, /^\\Q|\\E/]'], results: [{_id: '|'}]});
+
+ // An escaped pipe within \Q...\E can use tight index bounds.
+ assertIndexBoundsAndResult({
+ regex: /^\Q\|\E/,
+ bounds: ['["\\|", "\\}")', '[/^\\Q\\|\\E/, /^\\Q\\|\\E/]'],
+ results: [{_id: '\\|'}]
+ });
+}());
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index cf94e16822c..344cb119877 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -61,6 +61,34 @@ IndexBoundsBuilder::BoundsTightness getInequalityPredicateTightness(const BSONEl
: IndexBoundsBuilder::INEXACT_FETCH;
}
+/**
+ * Returns true if 'str' contains a non-escaped pipe character '|' on a best-effort basis. This
+ * function reports no false negatives, but will return false positives. For example, a pipe
+ * character inside of a character class or the \Q...\E escape sequence has no special meaning but
+ * may still be reported by this function as being non-escaped.
+ */
+bool stringMayHaveUnescapedPipe(StringData str) {
+ if (str.size() > 0 && str[0] == '|') {
+ return true;
+ }
+ if (str.size() > 1 && str[1] == '|' && str[0] != '\\') {
+ return true;
+ }
+
+ for (size_t i = 2U; i < str.size(); ++i) {
+ auto probe = str[i];
+ auto prev = str[i - 1];
+ auto tail = str[i - 2];
+
+ // We consider the pipe to have a special meaning if it is not preceded by a backslash, or
+ // preceded by a backslash that is itself escaped.
+ if (probe == '|' && (prev != '\\' || (prev == '\\' && tail == '\\'))) {
+ return true;
+ }
+ }
+ return false;
+}
+
} // namespace
string IndexBoundsBuilder::simpleRegex(const char* regex,
@@ -77,7 +105,6 @@ string IndexBoundsBuilder::simpleRegex(const char* regex,
return "";
}
- string r = "";
*tightnessOut = IndexBoundsBuilder::INEXACT_COVERED;
bool multilineOK;
@@ -88,42 +115,43 @@ string IndexBoundsBuilder::simpleRegex(const char* regex,
multilineOK = false;
regex += 1;
} else {
- return r;
+ return "";
}
- // A regex with the "|" character is never considered a simple regular expression.
- if (StringData(regex).find('|') != std::string::npos) {
+ // A regex with an unescaped pipe character is not considered a simple regex.
+ if (stringMayHaveUnescapedPipe(StringData(regex))) {
return "";
}
bool extended = false;
while (*flags) {
switch (*(flags++)) {
- case 'm': // multiline
+ case 'm':
+ // Multiline mode.
if (multilineOK)
continue;
else
- return r;
+ return "";
case 's':
// Single-line mode specified. This just changes the behavior of the '.'
// character to match every character instead of every character except '\n'.
continue;
- case 'x': // extended
+ case 'x':
+ // Extended free-spacing mode.
extended = true;
break;
default:
- return r; // cant use index
+ // Cannot use the index.
+ return "";
}
}
mongoutils::str::stream ss;
+ string r = "";
while (*regex) {
char c = *(regex++);
- // We should have bailed out early above if '|' is in the regex.
- invariant(c != '|');
-
if (c == '*' || c == '?') {
// These are the only two symbols that make the last char optional
r = ss;
diff --git a/src/mongo/db/query/index_bounds_builder_test.cpp b/src/mongo/db/query/index_bounds_builder_test.cpp
index 264972d017f..8305f91be4b 100644
--- a/src/mongo/db/query/index_bounds_builder_test.cpp
+++ b/src/mongo/db/query/index_bounds_builder_test.cpp
@@ -1046,8 +1046,9 @@ TEST(SimpleRegexTest, RootedLiteralNestedEscapeEnd) {
ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT);
}
-// A regular expression with the "|" character is not considered simple. See SERVER-15235.
-TEST(SimpleRegexTest, PipeCharacterDisallowed) {
+// An anchored regular expression that uses the "|" operator is not considered "simple" and has
+// non-tight index bounds.
+TEST(SimpleRegexTest, PipeCharacterUsesInexactBounds) {
IndexEntry testIndex = IndexEntry(BSONObj());
IndexBoundsBuilder::BoundsTightness tightness;
string prefix = IndexBoundsBuilder::simpleRegex("^(a(a|$)|b", "", testIndex, &tightness);
@@ -1055,7 +1056,7 @@ TEST(SimpleRegexTest, PipeCharacterDisallowed) {
ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
}
-TEST(SimpleRegexTest, PipeCharacterDisallowed2) {
+TEST(SimpleRegexTest, PipeCharacterUsesInexactBoundsWithTwoPrefixes) {
IndexEntry testIndex = IndexEntry(BSONObj());
IndexBoundsBuilder::BoundsTightness tightness;
string prefix = IndexBoundsBuilder::simpleRegex("^(a(a|$)|^b", "", testIndex, &tightness);
@@ -1063,6 +1064,48 @@ TEST(SimpleRegexTest, PipeCharacterDisallowed2) {
ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
}
+TEST(SimpleRegexTest, PipeCharacterPrecededByEscapedBackslashUsesInexactBounds) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ IndexBoundsBuilder::BoundsTightness tightness;
+ string prefix = IndexBoundsBuilder::simpleRegex(R"(^a\\|b)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
+
+ prefix = IndexBoundsBuilder::simpleRegex(R"(^(foo\\|bar)\\|baz)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
+}
+
+// However, a regular expression with an escaped pipe (that is, using no special meaning) can use
+// exact index bounds.
+TEST(SimpleRegexTest, PipeCharacterEscapedWithBackslashUsesExactBounds) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ IndexBoundsBuilder::BoundsTightness tightness;
+ string prefix = IndexBoundsBuilder::simpleRegex(R"(^a\|b)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "a|b");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT);
+
+ prefix = IndexBoundsBuilder::simpleRegex(R"(^\|1\|2\|\|)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "|1|2||");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT);
+}
+
+TEST(SimpleRegexTest, FalsePositiveOnPipeInQEEscapeSequenceUsesInexactBounds) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ IndexBoundsBuilder::BoundsTightness tightness;
+ string prefix = IndexBoundsBuilder::simpleRegex(R"(^\Q|\E)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
+}
+
+TEST(SimpleRegexTest, FalsePositiveOnPipeInCharacterClassUsesInexactBounds) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ IndexBoundsBuilder::BoundsTightness tightness;
+ string prefix = IndexBoundsBuilder::simpleRegex(R"(^[|])", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
+}
+
// SERVER-9035
TEST(SimpleRegexTest, RootedSingleLineMode) {
IndexEntry testIndex = IndexEntry(BSONObj());