summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline/expression.h
diff options
context:
space:
mode:
authorArun Banala <arun.banala@mongodb.com>2019-03-26 19:58:24 +0000
committerArun Banala <arun.banala@mongodb.com>2019-04-23 11:20:42 +0100
commita60f6a53734fa3a022e9ba39bbdab95608ba9108 (patch)
treef7cf4e0573681b06cb2e01e1dfa8c616d5952992 /src/mongo/db/pipeline/expression.h
parentd5f8a6a510d1f3db0ad7f737488979a751e9df95 (diff)
downloadmongo-a60f6a53734fa3a022e9ba39bbdab95608ba9108.tar.gz
SERVER-40083 Don't recompile each time $regex is evaluated when regex argument is a constant
Diffstat (limited to 'src/mongo/db/pipeline/expression.h')
-rw-r--r--src/mongo/db/pipeline/expression.h99
1 files changed, 99 insertions, 0 deletions
diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h
index 4969e2ea231..42c525c0e52 100644
--- a/src/mongo/db/pipeline/expression.h
+++ b/src/mongo/db/pipeline/expression.h
@@ -34,6 +34,7 @@
#include <algorithm>
#include <boost/intrusive_ptr.hpp>
#include <map>
+#include <pcre.h>
#include <string>
#include <vector>
@@ -2433,17 +2434,101 @@ private:
boost::intrusive_ptr<Expression> _onNull;
};
+class RegexMatchHandler {
+public:
+ /**
+ * Object to hold data that is required by 'RegexMatchHandler' for calling 'execute()' or
+ * 'nextMatch()'.
+ */
+ struct RegexExecutionState {
+ boost::optional<std::string> pattern;
+ std::string options;
+ int numCaptures = 0;
+ std::vector<int> capturesBuffer;
+ /**
+ * If there is a constant regex, the underlying object of 'pcre' will be owned by
+ * 'RegexMatchHandler', as part of '_preExecutionState'. If not, it will be owned by
+ * 'RegexExecutionState'.
+ */
+ std::shared_ptr<pcre> pcre;
+ boost::optional<std::string> input;
+ int startBytePos = 0;
+ int startCodePointPos = 0;
+
+ /**
+ * If either the text input or regex pattern is nullish, then we consider the operation as a
+ * whole nullish.
+ */
+ bool nullish() {
+ return !input || !pattern;
+ }
+ };
+
+ /**
+ * Checks if there is a match for the given input and pattern that are part of 'executionState'.
+ * The method will return a positive number if there is a match and '-1' if there is no match.
+ * Throws 'uassert()' for any errors.
+ */
+ int execute(RegexExecutionState* executionState) const;
+
+ /**
+ * Finds the next possible match for the given input and pattern that are part of
+ * 'executionState'. If there is a match, the function will return a 'Value' object
+ * encapsulating the matched string, the code point index of the matched string and a vector
+ * representing all the captured substrings. The function will also update the parameters
+ * 'startBytePos' and 'startCodePointPos' to the corresponding new indices. If there is no
+ * match, the function will return null 'Value' object.
+ */
+ Value nextMatch(RegexExecutionState* executionState) const;
+
+ /**
+ * Optimizes '$regex*' expressions. If the expression has a constant 'regex' and 'options'
+ * fields, then it can be optimized. Stores the optimized regex as part of '_constantRegex' so
+ * that it can be reused during expression evaluation.
+ */
+ void optimize(boost::intrusive_ptr<Expression> expression);
+
+ /**
+ * Validates the structure of input passed in 'inputExpr'. If valid, generates an initial
+ * execution state. This returned object can later be used for calling execute() or nextMatch().
+ */
+ RegexExecutionState buildInitialState(const Value& inputExpr) const;
+ bool hasConstantRegex() const {
+ return _initialExecStateForConstantRegex.has_value();
+ }
+
+private:
+ void _extractInputField(RegexExecutionState* executionState, const Value& textInput) const;
+ void _extractRegexAndOptions(RegexExecutionState* executionState,
+ const Value& regexPattern,
+ const Value& regexOptions) const;
+ void _compile(RegexExecutionState* executionState) const;
+ /**
+ * This variable will be set when the $regex* expressions have constant values for their 'regex'
+ * and 'options' fields, allowing us to pre-compile the regex and re-use it across the
+ * Expression's lifetime.
+ */
+ boost::optional<RegexExecutionState> _initialExecStateForConstantRegex;
+};
+
class ExpressionRegexFind final : public ExpressionFixedArity<ExpressionRegexFind, 1> {
public:
explicit ExpressionRegexFind(const boost::intrusive_ptr<ExpressionContext>& expCtx)
: ExpressionFixedArity<ExpressionRegexFind, 1>(expCtx) {}
Value evaluate(const Document& root) const final;
+ boost::intrusive_ptr<Expression> optimize() final;
const char* getOpName() const final;
void acceptVisitor(ExpressionVisitor* visitor) final {
return visitor->visit(this);
}
+ bool hasConstantRegex() const {
+ return _handler.hasConstantRegex();
+ }
+
+private:
+ RegexMatchHandler _handler;
};
class ExpressionRegexFindAll final : public ExpressionFixedArity<ExpressionRegexFindAll, 1> {
@@ -2452,10 +2537,17 @@ public:
: ExpressionFixedArity<ExpressionRegexFindAll, 1>(expCtx) {}
Value evaluate(const Document& root) const final;
+ boost::intrusive_ptr<Expression> optimize() final;
const char* getOpName() const final;
void acceptVisitor(ExpressionVisitor* visitor) final {
return visitor->visit(this);
}
+ bool hasConstantRegex() const {
+ return _handler.hasConstantRegex();
+ }
+
+private:
+ RegexMatchHandler _handler;
};
class ExpressionRegexMatch final : public ExpressionFixedArity<ExpressionRegexMatch, 1> {
@@ -2464,10 +2556,17 @@ public:
: ExpressionFixedArity<ExpressionRegexMatch, 1>(expCtx) {}
Value evaluate(const Document& root) const final;
+ boost::intrusive_ptr<Expression> optimize() final;
const char* getOpName() const final;
void acceptVisitor(ExpressionVisitor* visitor) final {
return visitor->visit(this);
}
+ bool hasConstantRegex() const {
+ return _handler.hasConstantRegex();
+ }
+
+private:
+ RegexMatchHandler _handler;
};
}