6 files changed, 620 insertions, 53 deletions
diff --git a/jstests/aggregation/expressions/indexof_array.js b/jstests/aggregation/expressions/indexof_array.js
new file mode 100644
index 00000000000..bfc9ef71a15
--- /dev/null
+++ b/jstests/aggregation/expressions/indexof_array.js
@@ -0,0 +1,68 @@
+// In SERVER-8951, $indexOfArray was introduced. In this file, we test the correctness and error
+// cases of the expression.
+load("jstests/aggregation/extras/utils.js");  // For assertErrorCode and testExpression.
+
+(function() {
+    "use strict";
+
+    var coll = db.indexofarray;
+    coll.drop();
+
+    // Insert a dummy document to ensure something flows through the pipeline.
+    assert.writeOK(coll.insert({}));
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3], 2]}, 1);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3], 4]}, -1);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3, 2, 1], 2, 2]}, 3);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3, 4, 5], 4, 0, 3]}, -1);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3], 2, 1]}, 1);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3], 2, 0, 10]}, 1);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3, 2, 1, 2, 3], 2, 2, 4]}, 3);
+
+    testExpression(coll, {$indexOfArray: [null, 2]}, null);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3], 2, 3]}, -1);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3], 2, 3, 1]}, -1);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3], 2, 3, 3]}, -1);
+
+    testExpression(coll, {$indexOfArray: [[1, 2, 3], 2, 3, 5]}, -1);
+
+    testExpression(coll, {$indexOfArray: [[], 1]}, -1);
+
+    var pipeline = {
+        $project: {
+            output: {
+                $indexOfArray: ["string", "s"],
+            }
+        }
+    };
+    assertErrorCode(coll, pipeline, 40090);
+
+    pipeline = {
+        $project: {output: {$indexOfArray: [[1, 2, 3], 2, "bad"]}}
+    };
+    assertErrorCode(coll, pipeline, 40096);
+
+    pipeline = {
+        $project: {output: {$indexOfArray: [[1, 2, 3], 2, 0, "bad"]}}
+    };
+    assertErrorCode(coll, pipeline, 40096);
+
+    pipeline = {
+        $project: {output: {$indexOfArray: [[1, 2, 3], 2, -1]}}
+    };
+    assertErrorCode(coll, pipeline, 40097);
+
+    pipeline = {
+        $project: {output: {$indexOfArray: [[1, 2, 3], 2, 1, -1]}}
+    };
+    assertErrorCode(coll, pipeline, 40097);
+}());
diff --git a/jstests/aggregation/expressions/indexof_bytes.js b/jstests/aggregation/expressions/indexof_bytes.js
new file mode 100644
index 00000000000..ac3cefda790
--- /dev/null
+++ b/jstests/aggregation/expressions/indexof_bytes.js
@@ -0,0 +1,149 @@
+// In SERVER-8951, $indexOfBytes was introduced. In this file, we test the correctness and error
+// cases of the expression.
+load("jstests/aggregation/extras/utils.js");  // For assertErrorCode and testExpression.
+
+(function() {
+    "use strict";
+
+    function testExpressionBytes(coll, expression, result, shouldTestEquivalence = true) {
+        testExpression(coll, expression, result);
+
+        if (shouldTestEquivalence) {
+            // If we are specifying a starting or ending index for the search, we should be able to
+            // achieve equivalent behavior using $substrBytes.
+            var indexOfSpec = expression["$indexOfBytes"];
+            var input = indexOfSpec[0];
+            var token = indexOfSpec[1];
+            var start = indexOfSpec.length > 2 ? indexOfSpec[2] : 0;
+            // Use $strLenBytes because JavaScript's length property is based off of UTF-16, not the
+            // actual number of bytes.
+            var end = indexOfSpec.length > 3 ? indexOfSpec[3] : {
+                $strLenBytes: input
+            };
+
+            var substrExpr = {
+                $indexOfBytes: [{$substrBytes: [input, start, {$subtract: [end, start]}]}, token]
+            };
+
+            // Since the new expression takes the index with respect to a shortened string, the
+            // output index will differ from the index with respect to the full length string,
+            // unless the output is -1.
+            var substrResult = (result === -1) ? -1 : result - start;
+
+            testExpression(coll, substrExpr, substrResult);
+        }
+    }
+
+    var coll = db.indexofbytes;
+    coll.drop();
+
+    // Insert a dummy document so something flows through the pipeline.
+    assert.writeOK(coll.insert({}));
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc", "b"]}, 1);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abcba", "b"]}, 1);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc", "d"]}, -1);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abcba", "b", 2]}, 3);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abcde", "d", 0, 2]}, -1);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc", "b", 1]}, 1);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc", "b", 0, 10]}, 1);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abcbabc", "b", 2, 4]}, 3);
+
+    // $strLenBytes does not accept null as an input.
+    testExpressionBytes(coll, {$indexOfBytes: [null, "b"]}, null, false);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc", "b", 3]}, -1);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc", "b", 3, 1]}, -1);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc", "b", 3, 5]}, -1);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["", " "]}, -1);
+
+    testExpressionBytes(coll, {$indexOfBytes: [" ", ""]}, 0);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["", ""]}, 0);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc", "", 3]}, 3);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc", "", 1]}, 1);
+
+    // Test with multi-byte tokens.
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abcde", "de"]}, 3);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abcde", "def"]}, -1);
+
+    // Test with non-ASCII characters. Some tests do not test equivalence using $substrBytes because
+    // $substrBytes disallows taking a substring that begins or ends in the middle of a UTF-8
+    // encoding of a character.
+    testExpressionBytes(coll, {$indexOfBytes: ["a∫∫b", "b"]}, 7);
+
+    // $substrBytes would attempt to take the substring from the middle of a UTF-8
+    // encoding of a character.
+    testExpressionBytes(coll, {$indexOfBytes: ["a∫∫b", "b", 6]}, 7, false);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc∫ba", "∫"]}, 3);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["∫∫∫", "a"]}, -1);
+
+    // $substrBytes would attempt to take the substring from the middle of a UTF-8
+    // encoding of a character.
+    testExpressionBytes(coll, {$indexOfBytes: ["ab∫c", "c", 0, 3]}, -1, false);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc∫b∫", "b∫"]}, 6);
+
+    // Test with embedded null bytes.
+    testExpressionBytes(coll, {$indexOfBytes: ["abc\0d", "d"]}, 4);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc\0", "\0"]}, 3);
+
+    testExpressionBytes(coll, {$indexOfBytes: ["abc\0d\0", "d", 5, 6]}, -1);
+
+    // Error cases.
+
+    var pipeline = {
+        $project: {
+            output: {
+                $indexOfBytes: [3, "s"],
+            }
+        }
+    };
+    assertErrorCode(coll, pipeline, 40091);
+
+    pipeline = {
+        $project: {
+            output: {
+                $indexOfBytes: ["s", 3],
+            }
+        }
+    };
+    assertErrorCode(coll, pipeline, 40092);
+
+    pipeline = {
+        $project: {output: {$indexOfBytes: ["abc", "b", "bad"]}}
+    };
+    assertErrorCode(coll, pipeline, 40096);
+
+    pipeline = {
+        $project: {output: {$indexOfBytes: ["abc", "b", 0, "bad"]}}
+    };
+    assertErrorCode(coll, pipeline, 40096);
+
+    pipeline = {
+        $project: {output: {$indexOfBytes: ["abc", "b", -1]}}
+    };
+    assertErrorCode(coll, pipeline, 40097);
+
+    pipeline = {
+        $project: {output: {$indexOfBytes: ["abc", "b", 1, -1]}}
+    };
+    assertErrorCode(coll, pipeline, 40097);
+}());
diff --git a/jstests/aggregation/expressions/indexof_codepoints.js b/jstests/aggregation/expressions/indexof_codepoints.js
new file mode 100644
index 00000000000..20b9534b050
--- /dev/null
+++ b/jstests/aggregation/expressions/indexof_codepoints.js
@@ -0,0 +1,129 @@
+// In SERVER-8951, $indexOfCP was introduced. In this file, we test the correctness and error
+// cases of the expression.
+load("jstests/aggregation/extras/utils.js");  // For assertErrorCode and testExpression.
+
+(function() {
+    "use strict";
+
+    function testExpressionCodePoints(coll, expression, result, shouldTestEquivalence = true) {
+        testExpression(coll, expression, result);
+
+        var indexOfSpec = expression["$indexOfCP"];
+        if (shouldTestEquivalence) {
+            // If we are specifying a starting or ending index for the search, we should be able to
+            // achieve equivalent behavior using $substrCP.
+            var input = indexOfSpec[0];
+            var token = indexOfSpec[1];
+            var start = indexOfSpec.length > 2 ? indexOfSpec[2] : 0;
+            var end = indexOfSpec.length > 3 ? indexOfSpec[3] : {
+                $strLenCP: input
+            };
+
+            var substrExpr = {
+                $indexOfCP: [{$substrCP: [input, start, {$subtract: [end, start]}]}, token]
+            };
+
+            // Since the new expression takes the index with respect to a shortened string, the
+            // output index will differ from the index with respect to the full length string,
+            // unless the output is -1.
+            var substrResult = (result === -1) ? -1 : result - start;
+
+            testExpression(coll, substrExpr, substrResult);
+        }
+    }
+
+    var coll = db.indexofcp;
+    coll.drop();
+
+    // Insert a dummy document so something flows through the pipeline.
+    assert.writeOK(coll.insert({}));
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["∫aƒ", "ƒ"]}, 2);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["a∫c", "d"]}, -1);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["∫b∫ba", "b", 2]}, 3);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["ab∫de", "d", 0, 3]}, -1);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["ab∫de", "d", 0, 4]}, 3);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["øøc", "ø", 1]}, 1);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["øƒc", "ƒ", 0, 10]}, 1);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["abcbabc", "b", 2, 4]}, 3);
+
+    // $strLenCP does not accept null as an input.
+    testExpressionCodePoints(coll, {$indexOfCP: [null, "√"]}, null, false);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["abc", "b", 3]}, -1);
+
+    // We are intentionally testing specifying an end index before the start index, which is why we
+    // cannot use $substrCP in checking for equivalence.
+    testExpressionCodePoints(coll, {$indexOfCP: ["a√cb", "b", 3, 1]}, -1, false);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["a∫b", "b", 3, 5]}, -1);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["", "∫"]}, -1);
+
+    testExpressionCodePoints(coll, {$indexOfCP: [" ", ""]}, 0);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["", ""]}, 0);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["abc", "", 1]}, 1);
+
+    // Test with multi-byte tokens.
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["abcƒe", "ƒe"]}, 3);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["∫aeøø", "øøø"]}, -1);
+
+    // Test with embedded null bytes.
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["ab∫\0d", "d"]}, 4);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["øbc\0", "\0"]}, 3);
+
+    testExpressionCodePoints(coll, {$indexOfCP: ["πbƒ\0d\0", "d", 5, 6]}, -1);
+
+    // Error cases.
+
+    var pipeline = {
+        $project: {
+            output: {
+                $indexOfCP: [3, "s"],
+            }
+        }
+    };
+    assertErrorCode(coll, pipeline, 40093);
+
+    pipeline = {
+        $project: {
+            output: {
+                $indexOfCP: ["s", 3],
+            }
+        }
+    };
+    assertErrorCode(coll, pipeline, 40094);
+
+    pipeline = {
+        $project: {output: {$indexOfCP: ["abc", "b", "bad"]}}
+    };
+    assertErrorCode(coll, pipeline, 40096);
+
+    pipeline = {
+        $project: {output: {$indexOfCP: ["abc", "b", 0, "bad"]}}
+    };
+    assertErrorCode(coll, pipeline, 40096);
+
+    pipeline = {
+        $project: {output: {$indexOfCP: ["abc", "b", -1]}}
+    };
+    assertErrorCode(coll, pipeline, 40097);
+
+    pipeline = {
+        $project: {output: {$indexOfCP: ["abc", "b", 1, -1]}}
+    };
+    assertErrorCode(coll, pipeline, 40097);
+}());
diff --git a/jstests/aggregation/extras/utils.js b/jstests/aggregation/extras/utils.js
index c1a8fc5645d..d8203f56b94 100644
--- a/jstests/aggregation/extras/utils.js
+++ b/jstests/aggregation/extras/utils.js
@@ -8,8 +8,8 @@ function testExpression(coll, expression, result) {
 
     var res = coll.aggregate({$project: {output: expression}}).toArray();
 
-    assert.eq(res.length, 1);
-    assert.eq(res[0].output, result);
+    assert.eq(res.length, 1, tojson(res));
+    assert.eq(res[0].output, result, tojson(res));
 }
 
 /*
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp
index f462052d856..8024ec39cf2 100644
--- a/src/mongo/db/pipeline/expression.cpp
+++ b/src/mongo/db/pipeline/expression.cpp
@@ -359,6 +359,44 @@ intrusive_ptr<Expression> Expression::parseOperand(BSONElement exprElement,
     }
 }
 
+namespace {
+/**
+ * UTF-8 multi-byte code points consist of one leading byte of the form 11xxxxxx, and potentially
+ * many continuation bytes of the form 10xxxxxx. This method checks whether 'charByte' is a
+ * continuation byte.
+ */
+bool isContinuationByte(char charByte) {
+    return (charByte & 0xc0) == 0x80;
+}
+
+/**
+ * UTF-8 multi-byte code points consist of one leading byte of the form 11xxxxxx, and potentially
+ * many continuation bytes of the form 10xxxxxx. This method checks whether 'charByte' is a leading
+ * byte.
+ */
+bool isLeadingByte(char charByte) {
+    return (charByte & 0xc0) == 0xc0;
+}
+
+/**
+ * UTF-8 single-byte code points are of the form 0xxxxxxx. This method checks whether 'charByte' is
+ * a single-byte code point.
+ */
+bool isSingleByte(char charByte) {
+    return (charByte & 0x80) == 0x0;
+}
+
+size_t getCodePointLength(char charByte) {
+    if (isSingleByte(charByte)) {
+        return 1;
+    }
+
+    invariant(isLeadingByte(charByte));
+
+    // In UTF-8, the number of leading ones is the number of bytes the code point takes up.
+    return countLeadingZeros64(~(uint64_t(charByte) << (64 - 8)));
+}
+}  // namespace
 
 /* ----------------------- ExpressionAbs ---------------------------- */
 
@@ -2118,6 +2156,216 @@ const char* ExpressionIn::getOpName() const {
     return "$in";
 }
 
+/* ----------------------- ExpressionIndexOfArray ------------------ */
+
+namespace {
+
+void uassertIfNotIntegralAndNonNegative(Value val,
+                                        StringData expressionName,
+                                        StringData argumentName) {
+    uassert(40096,
+            str::stream() << expressionName << "requires an integral " << argumentName
+                          << ", found a value of type: " << typeName(val.getType())
+                          << ", with value: " << val.toString(),
+            val.integral());
+    uassert(40097,
+            str::stream() << expressionName << " requires a nonnegative " << argumentName
+                          << ", found: " << val.toString(),
+            val.coerceToInt() >= 0);
+}
+
+}  // namespace
+
+Value ExpressionIndexOfArray::evaluateInternal(Variables* vars) const {
+    Value arrayArg = vpOperand[0]->evaluateInternal(vars);
+
+    if (arrayArg.nullish()) {
+        return Value(BSONNULL);
+    }
+
+    uassert(40090,
+            str::stream() << "$indexOfArray requires an array as a first argument, found: "
+                          << typeName(arrayArg.getType()),
+            arrayArg.isArray());
+
+    std::vector<Value> array = arrayArg.getArray();
+
+    Value searchItem = vpOperand[1]->evaluateInternal(vars);
+
+    size_t startIndex = 0;
+    if (vpOperand.size() > 2) {
+        Value startIndexArg = vpOperand[2]->evaluateInternal(vars);
+        uassertIfNotIntegralAndNonNegative(startIndexArg, getOpName(), "starting index");
+        startIndex = static_cast<size_t>(startIndexArg.coerceToInt());
+    }
+
+    size_t endIndex = array.size();
+    if (vpOperand.size() > 3) {
+        Value endIndexArg = vpOperand[3]->evaluateInternal(vars);
+        uassertIfNotIntegralAndNonNegative(endIndexArg, getOpName(), "ending index");
+        // Don't let 'endIndex' exceed the length of the array.
+        endIndex = std::min(array.size(), static_cast<size_t>(endIndexArg.coerceToInt()));
+    }
+
+    for (size_t i = startIndex; i < endIndex; i++) {
+        if (array[i] == searchItem) {
+            return Value(static_cast<int>(i));
+        }
+    }
+
+    return Value(-1);
+}
+
+REGISTER_EXPRESSION(indexOfArray, ExpressionIndexOfArray::parse);
+const char* ExpressionIndexOfArray::getOpName() const {
+    return "$indexOfArray";
+}
+
+/* ----------------------- ExpressionIndexOfBytes ------------------ */
+
+namespace {
+
+bool stringHasTokenAtIndex(size_t index, const std::string& input, const std::string& token) {
+    if (token.size() + index > input.size()) {
+        return false;
+    }
+    return input.compare(index, token.size(), token) == 0;
+}
+
+}  // namespace
+
+Value ExpressionIndexOfBytes::evaluateInternal(Variables* vars) const {
+    Value stringArg = vpOperand[0]->evaluateInternal(vars);
+
+    if (stringArg.nullish()) {
+        return Value(BSONNULL);
+    }
+
+    uassert(40091,
+            str::stream() << "$indexOfBytes requires a string as the first argument, found: "
+                          << typeName(stringArg.getType()),
+            stringArg.getType() == String);
+    const std::string& input = stringArg.getString();
+
+    Value tokenArg = vpOperand[1]->evaluateInternal(vars);
+    uassert(40092,
+            str::stream() << "$indexOfBytes requires a string as the second argument, found: "
+                          << typeName(tokenArg.getType()),
+            tokenArg.getType() == String);
+    const std::string& token = tokenArg.getString();
+
+    size_t startIndex = 0;
+    if (vpOperand.size() > 2) {
+        Value startIndexArg = vpOperand[2]->evaluateInternal(vars);
+        uassertIfNotIntegralAndNonNegative(startIndexArg, getOpName(), "starting index");
+        startIndex = static_cast<size_t>(startIndexArg.coerceToInt());
+    }
+
+    size_t endIndex = input.size();
+    if (vpOperand.size() > 3) {
+        Value endIndexArg = vpOperand[3]->evaluateInternal(vars);
+        uassertIfNotIntegralAndNonNegative(endIndexArg, getOpName(), "ending index");
+        // Don't let 'endIndex' exceed the length of the string.
+        endIndex = std::min(input.size(), static_cast<size_t>(endIndexArg.coerceToInt()));
+    }
+
+    if (startIndex > input.length() || endIndex < startIndex) {
+        return Value(-1);
+    }
+
+    size_t position = input.substr(0, endIndex).find(token, startIndex);
+    if (position == std::string::npos) {
+        return Value(-1);
+    }
+
+    return Value(static_cast<int>(position));
+}
+
+REGISTER_EXPRESSION(indexOfBytes, ExpressionIndexOfBytes::parse);
+const char* ExpressionIndexOfBytes::getOpName() const {
+    return "$indexOfBytes";
+}
+
+/* ----------------------- ExpressionIndexOfCP --------------------- */
+
+Value ExpressionIndexOfCP::evaluateInternal(Variables* vars) const {
+    Value stringArg = vpOperand[0]->evaluateInternal(vars);
+
+    if (stringArg.nullish()) {
+        return Value(BSONNULL);
+    }
+
+    uassert(40093,
+            str::stream() << "$indexOfCP requires a string as the first argument, found: "
+                          << typeName(stringArg.getType()),
+            stringArg.getType() == String);
+    const std::string& input = stringArg.getString();
+
+    Value tokenArg = vpOperand[1]->evaluateInternal(vars);
+    uassert(40094,
+            str::stream() << "$indexOfCP requires a string as the second argument, found: "
+                          << typeName(tokenArg.getType()),
+            tokenArg.getType() == String);
+    const std::string& token = tokenArg.getString();
+
+    size_t startCodePointIndex = 0;
+    if (vpOperand.size() > 2) {
+        Value startIndexArg = vpOperand[2]->evaluateInternal(vars);
+        uassertIfNotIntegralAndNonNegative(startIndexArg, getOpName(), "starting index");
+        startCodePointIndex = static_cast<size_t>(startIndexArg.coerceToInt());
+    }
+
+    // Compute the length (in code points) of the input, and convert 'startCodePointIndex' to a byte
+    // index.
+    size_t codePointLength = 0;
+    size_t startByteIndex = 0;
+    for (size_t byteIx = 0; byteIx < input.size(); ++codePointLength) {
+        if (codePointLength == startCodePointIndex) {
+            // We have determined the byte at which our search will start.
+            startByteIndex = byteIx;
+        }
+
+        uassert(
+            40095, "$indexOfCP found bad UTF-8 in the input", !isContinuationByte(input[byteIx]));
+        byteIx += getCodePointLength(input[byteIx]);
+    }
+
+    size_t endCodePointIndex = codePointLength;
+    if (vpOperand.size() > 3) {
+        Value endIndexArg = vpOperand[3]->evaluateInternal(vars);
+        uassertIfNotIntegralAndNonNegative(endIndexArg, getOpName(), "ending index");
+
+        // Don't let 'endCodePointIndex' exceed the number of code points in the string.
+        endCodePointIndex =
+            std::min(codePointLength, static_cast<size_t>(endIndexArg.coerceToInt()));
+    }
+
+    if (startByteIndex == 0 && input.empty() && token.empty()) {
+        // If we are finding the index of "" in the string "", the below loop will not loop, so we
+        // need a special case for this.
+        return Value(0);
+    }
+
+    // We must keep track of which byte, and which code point, we are examining, being careful not
+    // to overflow either the length of the string or the ending code point.
+
+    size_t currentCodePointIndex = startCodePointIndex;
+    for (size_t byteIx = startByteIndex; currentCodePointIndex < endCodePointIndex;
+         ++currentCodePointIndex) {
+        if (stringHasTokenAtIndex(byteIx, input, token)) {
+            return Value(static_cast<int>(currentCodePointIndex));
+        }
+        byteIx += getCodePointLength(input[byteIx]);
+    }
+
+    return Value(-1);
+}
+
+REGISTER_EXPRESSION(indexOfCP, ExpressionIndexOfCP::parse);
+const char* ExpressionIndexOfCP::getOpName() const {
+    return "$indexOfCP";
+}
+
 /* ----------------------- ExpressionLn ---------------------------- */
 
 Value ExpressionLn::evaluateNumericArg(const Value& numericArg) const {
@@ -3071,18 +3319,6 @@ const char* ExpressionSize::getOpName() const {
 
 /* ----------------------- ExpressionSplit --------------------------- */
 
-namespace {
-
-bool stringHasTokenAtIndex(size_t index, const std::string& input, const std::string& token) {
-    if (token.size() + index > input.size()) {
-        return false;
-    }
-
-    return input.compare(index, token.size(), token) == 0;
-}
-
-}  // namespace
-
 Value ExpressionSplit::evaluateInternal(Variables* vars) const {
     Value inputArg = vpOperand[0]->evaluateInternal(vars);
     Value separatorArg = vpOperand[1]->evaluateInternal(vars);
@@ -3174,45 +3410,6 @@ const char* ExpressionStrcasecmp::getOpName() const {
     return "$strcasecmp";
 }
 
-namespace {
-/**
- * UTF-8 multi-byte code points consist of one leading byte of the form 11xxxxxx, and potentially
- * many continuation bytes of the form 10xxxxxx. This method checks whether 'charByte' is a
- * continuation byte.
- */
-bool isContinuationByte(char charByte) {
-    return (charByte & 0xc0) == 0x80;
-}
-
-/**
- * UTF-8 multi-byte code points consist of one leading byte of the form 11xxxxxx, and potentially
- * many continuation bytes of the form 10xxxxxx. This method checks whether 'charByte' is a leading
- * byte.
- */
-bool isLeadingByte(char charByte) {
-    return (charByte & 0xc0) == 0xc0;
-}
-
-/**
- * UTF-8 single-byte code points are of the form 0xxxxxxx. This method checks whether 'charByte' is
- * a single-byte code point.
- */
-bool isSingleByte(char charByte) {
-    return (charByte & 0x80) == 0x0;
-}
-
-size_t getCodePointLength(char charByte) {
-    if (isSingleByte(charByte)) {
-        return 1;
-    }
-
-    invariant(isLeadingByte(charByte));
-
-    // In UTF-8, the number of leading ones is the number of bytes the code point takes up.
-    return countLeadingZeros64(~(uint64_t(charByte) << (64 - 8)));
-}
-}  // namespace
-
 /* ----------------------- ExpressionSubstrBytes ---------------------------- */
 
 Value ExpressionSubstrBytes::evaluateInternal(Variables* vars) const {
diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h
index 777fbd4dc8c..87a9536e689 100644
--- a/src/mongo/db/pipeline/expression.h
+++ b/src/mongo/db/pipeline/expression.h
@@ -859,6 +859,30 @@ public:
 };
 
 
+class ExpressionIndexOfArray final : public ExpressionRangedArity<ExpressionIndexOfArray, 2, 4> {
+public:
+    Value evaluateInternal(Variables* vars) const final;
+    const char* getOpName() const final;
+};
+
+
+class ExpressionIndexOfBytes final : public ExpressionRangedArity<ExpressionIndexOfBytes, 2, 4> {
+public:
+    Value evaluateInternal(Variables* vars) const final;
+    const char* getOpName() const final;
+};
+
+
+/**
+ * Implements indexOf behavior for strings with UTF-8 encoding.
+ */
+class ExpressionIndexOfCP final : public ExpressionRangedArity<ExpressionIndexOfCP, 2, 4> {
+public:
+    Value evaluateInternal(Variables* vars) const final;
+    const char* getOpName() const final;
+};
+
+
 class ExpressionLet final : public Expression {
 public:
     boost::intrusive_ptr<Expression> optimize() final;