summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorKyle Suarez <kyle.suarez@mongodb.com>2017-10-26 18:08:11 -0400
committerKyle Suarez <kyle.suarez@mongodb.com>2017-10-26 18:08:34 -0400
commit260fd0c76599520d9d733874753a94a2db763538 (patch)
tree6d2e5c0c91ed2e279d3fa5cdef89dd687cf8514f /src/mongo/db
parent722136a8bff8208787bb97186d7b339bb5116ba3 (diff)
downloadmongo-260fd0c76599520d9d733874753a94a2db763538.tar.gz
SERVER-20432 allow some escaped | chars in regexes to use tight index bounds
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp50
-rw-r--r--src/mongo/db/query/index_bounds_builder_test.cpp49
2 files changed, 85 insertions, 14 deletions
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index cf94e16822c..344cb119877 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -61,6 +61,34 @@ IndexBoundsBuilder::BoundsTightness getInequalityPredicateTightness(const BSONEl
: IndexBoundsBuilder::INEXACT_FETCH;
}
+/**
+ * Returns true if 'str' contains a non-escaped pipe character '|' on a best-effort basis. This
+ * function reports no false negatives, but will return false positives. For example, a pipe
+ * character inside of a character class or the \Q...\E escape sequence has no special meaning but
+ * may still be reported by this function as being non-escaped.
+ */
+bool stringMayHaveUnescapedPipe(StringData str) {
+ if (str.size() > 0 && str[0] == '|') {
+ return true;
+ }
+ if (str.size() > 1 && str[1] == '|' && str[0] != '\\') {
+ return true;
+ }
+
+ for (size_t i = 2U; i < str.size(); ++i) {
+ auto probe = str[i];
+ auto prev = str[i - 1];
+ auto tail = str[i - 2];
+
+ // We consider the pipe to have a special meaning if it is not preceded by a backslash, or
+ // preceded by a backslash that is itself escaped.
+ if (probe == '|' && (prev != '\\' || (prev == '\\' && tail == '\\'))) {
+ return true;
+ }
+ }
+ return false;
+}
+
} // namespace
string IndexBoundsBuilder::simpleRegex(const char* regex,
@@ -77,7 +105,6 @@ string IndexBoundsBuilder::simpleRegex(const char* regex,
return "";
}
- string r = "";
*tightnessOut = IndexBoundsBuilder::INEXACT_COVERED;
bool multilineOK;
@@ -88,42 +115,43 @@ string IndexBoundsBuilder::simpleRegex(const char* regex,
multilineOK = false;
regex += 1;
} else {
- return r;
+ return "";
}
- // A regex with the "|" character is never considered a simple regular expression.
- if (StringData(regex).find('|') != std::string::npos) {
+ // A regex with an unescaped pipe character is not considered a simple regex.
+ if (stringMayHaveUnescapedPipe(StringData(regex))) {
return "";
}
bool extended = false;
while (*flags) {
switch (*(flags++)) {
- case 'm': // multiline
+ case 'm':
+ // Multiline mode.
if (multilineOK)
continue;
else
- return r;
+ return "";
case 's':
// Single-line mode specified. This just changes the behavior of the '.'
// character to match every character instead of every character except '\n'.
continue;
- case 'x': // extended
+ case 'x':
+ // Extended free-spacing mode.
extended = true;
break;
default:
- return r; // cant use index
+ // Cannot use the index.
+ return "";
}
}
mongoutils::str::stream ss;
+ string r = "";
while (*regex) {
char c = *(regex++);
- // We should have bailed out early above if '|' is in the regex.
- invariant(c != '|');
-
if (c == '*' || c == '?') {
// These are the only two symbols that make the last char optional
r = ss;
diff --git a/src/mongo/db/query/index_bounds_builder_test.cpp b/src/mongo/db/query/index_bounds_builder_test.cpp
index 264972d017f..8305f91be4b 100644
--- a/src/mongo/db/query/index_bounds_builder_test.cpp
+++ b/src/mongo/db/query/index_bounds_builder_test.cpp
@@ -1046,8 +1046,9 @@ TEST(SimpleRegexTest, RootedLiteralNestedEscapeEnd) {
ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT);
}
-// A regular expression with the "|" character is not considered simple. See SERVER-15235.
-TEST(SimpleRegexTest, PipeCharacterDisallowed) {
+// An anchored regular expression that uses the "|" operator is not considered "simple" and has
+// non-tight index bounds.
+TEST(SimpleRegexTest, PipeCharacterUsesInexactBounds) {
IndexEntry testIndex = IndexEntry(BSONObj());
IndexBoundsBuilder::BoundsTightness tightness;
string prefix = IndexBoundsBuilder::simpleRegex("^(a(a|$)|b", "", testIndex, &tightness);
@@ -1055,7 +1056,7 @@ TEST(SimpleRegexTest, PipeCharacterDisallowed) {
ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
}
-TEST(SimpleRegexTest, PipeCharacterDisallowed2) {
+TEST(SimpleRegexTest, PipeCharacterUsesInexactBoundsWithTwoPrefixes) {
IndexEntry testIndex = IndexEntry(BSONObj());
IndexBoundsBuilder::BoundsTightness tightness;
string prefix = IndexBoundsBuilder::simpleRegex("^(a(a|$)|^b", "", testIndex, &tightness);
@@ -1063,6 +1064,48 @@ TEST(SimpleRegexTest, PipeCharacterDisallowed2) {
ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
}
+TEST(SimpleRegexTest, PipeCharacterPrecededByEscapedBackslashUsesInexactBounds) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ IndexBoundsBuilder::BoundsTightness tightness;
+ string prefix = IndexBoundsBuilder::simpleRegex(R"(^a\\|b)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
+
+ prefix = IndexBoundsBuilder::simpleRegex(R"(^(foo\\|bar)\\|baz)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
+}
+
+// However, a regular expression with an escaped pipe (that is, using no special meaning) can use
+// exact index bounds.
+TEST(SimpleRegexTest, PipeCharacterEscapedWithBackslashUsesExactBounds) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ IndexBoundsBuilder::BoundsTightness tightness;
+ string prefix = IndexBoundsBuilder::simpleRegex(R"(^a\|b)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "a|b");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT);
+
+ prefix = IndexBoundsBuilder::simpleRegex(R"(^\|1\|2\|\|)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "|1|2||");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::EXACT);
+}
+
+TEST(SimpleRegexTest, FalsePositiveOnPipeInQEEscapeSequenceUsesInexactBounds) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ IndexBoundsBuilder::BoundsTightness tightness;
+ string prefix = IndexBoundsBuilder::simpleRegex(R"(^\Q|\E)", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
+}
+
+TEST(SimpleRegexTest, FalsePositiveOnPipeInCharacterClassUsesInexactBounds) {
+ IndexEntry testIndex = IndexEntry(BSONObj());
+ IndexBoundsBuilder::BoundsTightness tightness;
+ string prefix = IndexBoundsBuilder::simpleRegex(R"(^[|])", "", testIndex, &tightness);
+ ASSERT_EQUALS(prefix, "");
+ ASSERT_EQUALS(tightness, IndexBoundsBuilder::INEXACT_COVERED);
+}
+
// SERVER-9035
TEST(SimpleRegexTest, RootedSingleLineMode) {
IndexEntry testIndex = IndexEntry(BSONObj());