summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline/expression.cpp
diff options
context:
space:
mode:
authorIan Boros <ian.boros@mongodb.com>2020-01-29 21:48:39 +0000
committerevergreen <evergreen@mongodb.com>2020-01-29 21:48:39 +0000
commitb91fd88b61e72b1711c2bac48337a884d81f3dad (patch)
tree97a3f909f29d1a5388d4e41bfbcb05d88411122c /src/mongo/db/pipeline/expression.cpp
parent695146e648e032e04d97bb0b4de873272c242f04 (diff)
downloadmongo-b91fd88b61e72b1711c2bac48337a884d81f3dad.tar.gz
SERVER-45846 Improve performance of $split
Diffstat (limited to 'src/mongo/db/pipeline/expression.cpp')
-rw-r--r--src/mongo/db/pipeline/expression.cpp34
1 files changed, 14 insertions, 20 deletions
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp
index d82c541b204..c33900f7549 100644
--- a/src/mongo/db/pipeline/expression.cpp
+++ b/src/mongo/db/pipeline/expression.cpp
@@ -4441,34 +4441,28 @@ Value ExpressionSplit::evaluate(const Document& root, Variables* variables) cons
<< typeName(separatorArg.getType()),
separatorArg.getType() == BSONType::String);
- std::string input = inputArg.getString();
- std::string separator = separatorArg.getString();
+ StringData input = inputArg.getStringData();
+ StringData separator = separatorArg.getStringData();
uassert(40087, "$split requires a non-empty separator", !separator.empty());
std::vector<Value> output;
- // Keep track of the index at which the current output string began.
- size_t splitStartIndex = 0;
-
- // Iterate through 'input' and check to see if 'separator' matches at any point.
- for (size_t i = 0; i < input.size();) {
- if (stringHasTokenAtIndex(i, input, separator)) {
- // We matched; add the current string to our output and jump ahead.
- StringData splitString(input.c_str() + splitStartIndex, i - splitStartIndex);
- output.push_back(Value(splitString));
- i += separator.size();
- splitStartIndex = i;
- } else {
- // We did not match, continue to the next character.
- ++i;
- }
+ const char* needle = separator.rawData();
+ const char* const needleEnd = needle + separator.size();
+ const char* remainingHaystack = input.rawData();
+ const char* const haystackEnd = remainingHaystack + input.size();
+
+ const char* it = remainingHaystack;
+ while ((it = std::search(remainingHaystack, haystackEnd, needle, needleEnd)) != haystackEnd) {
+ StringData sd(remainingHaystack, it - remainingHaystack);
+ output.push_back(Value(sd));
+ remainingHaystack = it + separator.size();
}
- StringData splitString(input.c_str() + splitStartIndex, input.size() - splitStartIndex);
+ StringData splitString(remainingHaystack, input.size() - (remainingHaystack - input.rawData()));
output.push_back(Value(splitString));
-
- return Value(output);
+ return Value(std::move(output));
}
REGISTER_EXPRESSION(split, ExpressionSplit::parse);