SERVER-48789 Use mid rule actions to sort tokens in lexer

author: Ted Tuckman <ted.tuckman@mongodb.com> 2020-07-13 08:08:15 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-07-16 12:23:47 +0000
commit: 4bfdc5ddfc4ad5569cf995e734c4b2efe77f769a (patch)
tree: 5efe8a3195d1e3d622ec73f24079bc2ceb649032
parent: 1fadc68643210125bd855925f0d99ebbd9c48478 (diff)
download: mongo-4bfdc5ddfc4ad5569cf995e734c4b2efe77f769a.tar.gz
9 files changed, 636 insertions, 218 deletions
diff --git a/src/mongo/db/cst/SConscript b/src/mongo/db/cst/SConscript
index 281631992f4..b981e5a4748 100644
--- a/src/mongo/db/cst/SConscript
+++ b/src/mongo/db/cst/SConscript
@@ -22,6 +22,7 @@ env.Library(
 env.CppUnitTest(
     target='cst_test',
     source=[
+        'bson_lexer_test.cpp',
         'cst_test.cpp',
     ],
     LIBDEPS=[
diff --git a/src/mongo/db/cst/bson_lexer.cpp b/src/mongo/db/cst/bson_lexer.cpp
index cb4c8b10109..0f15a7eccef 100644
--- a/src/mongo/db/cst/bson_lexer.cpp
+++ b/src/mongo/db/cst/bson_lexer.cpp
@@ -37,66 +37,108 @@ namespace mongo {
 
 namespace {
 
-// Structure to annotate reserved tokens, used to hint the lexer into behavior once the token is
-// seen. There are no semantic values associated with these tokens.
-struct KeywordToken {
-    boost::optional<PipelineParserGen::token_type> type;
-    bool traverseChild;
-    bool orderedArguments;
-};
-
-// Mapping of reserved keywords to BSON token. Any key which is not included in this map is treated
-// as a terminal.
-const StringMap<KeywordToken> reservedKeyLookup = {
-    {"$_internalInhibitOptimization",
-     {
-         PipelineParserGen::token::STAGE_INHIBIT_OPTIMIZATION,
-         false /* traverseChild */,
-         false /* orderedArguments */
-     }},
+// Mapping of reserved keywords to BSON token. Any key which is not included in this map is assumed
+// to be a user field name and is treated as a terminal by the parser.
+const StringMap<PipelineParserGen::token_type> reservedKeyLookup = {
+    {"$_internalInhibitOptimization", PipelineParserGen::token::STAGE_INHIBIT_OPTIMIZATION},
+    {"$unionWith", PipelineParserGen::token::STAGE_UNION_WITH},
+    {"coll", PipelineParserGen::token::COLL_ARG},
+    {"pipeline", PipelineParserGen::token::PIPELINE_ARG},
 };
+bool isCompound(PipelineParserGen::symbol_type token) {
+    return token.type_get() == static_cast<int>(PipelineParserGen::token::START_OBJECT) ||
+        token.type_get() == static_cast<int>(PipelineParserGen::token::START_ARRAY);
+}
 
 }  // namespace
 
-void BSONLexer::tokenize(BSONElement elem, bool includeFieldName) {
-    // The rules for tokenizing element field names fall into two buckets:
-    //      1. It matches a reserved keyword or operator. In this case, the traversal rules and
-    //      sensitivity to ordered nested arguments is dictated by the annotation in the lookup
-    //      table.
-    //      2. It is not a reserved keyword and thus is treated as a STRING literal token. The
-    //      traversal rules for such elements is always to traverse but ignore argument order if the
-    //      value is an object.
-    KeywordToken fieldNameToken = [&] {
-        if (includeFieldName) {
-            if (auto it = reservedKeyLookup.find(elem.fieldNameStringData());
-                it != reservedKeyLookup.end()) {
-                _tokens.emplace_back(*it->second.type, getNextLoc());
-                return it->second;
-            } else {
-                _tokens.emplace_back(
-                    PipelineParserGen::make_STRING(elem.fieldName(), getNextLoc()));
-                return KeywordToken{PipelineParserGen::token::STRING, true, false};
+void BSONLexer::sortObjTokens() {
+    // A TokenElement is similar to a BSONElement, with the payload being a vector of Bison symbols
+    // if the type is compound (object or array).
+    using TokenElement =
+        std::pair<PipelineParserGen::symbol_type, std::vector<PipelineParserGen::symbol_type>>;
+    struct TokenElementCompare {
+        bool operator()(const TokenElement& elem1, const TokenElement& elem2) const {
+            return elem1.first.type_get() < elem2.first.type_get();
+        }
+    };
+
+    auto currentPosition = _position;
+    if (_tokens[currentPosition].type_get() !=
+        static_cast<int>(PipelineParserGen::token::START_OBJECT)) {
+        return;
+    }
+
+    std::list<TokenElement> sortedTokenPairs;
+    // Increment to get to the first token after the START_OBJECT. We will sort tokens until the
+    // matching END_OBJECT is found.
+    currentPosition++;
+    while (_tokens[currentPosition].type_get() !=
+           static_cast<int>(PipelineParserGen::token::END_OBJECT)) {
+        invariant(size_t(currentPosition) < _tokens.size());
+
+        auto keyToken = _tokens[currentPosition++];
+
+        std::vector<PipelineParserGen::symbol_type> rhsTokens;
+        rhsTokens.push_back(_tokens[currentPosition]);
+        if (isCompound(_tokens[currentPosition])) {
+            auto braceCount = 1;
+            currentPosition++;
+            // Only sort the top level tokens. If we encounter a compound type, then jump to its
+            // matching bracket or brace.
+            while (braceCount > 0) {
+                if (isCompound(_tokens[currentPosition]))
+                    braceCount++;
+                if (_tokens[currentPosition].type_get() ==
+                        static_cast<int>(PipelineParserGen::token::END_OBJECT) ||
+                    _tokens[currentPosition].type_get() ==
+                        static_cast<int>(PipelineParserGen::token::END_ARRAY))
+                    braceCount--;
+
+                rhsTokens.push_back(_tokens[currentPosition++]);
             }
+        } else {
+            // Scalar, already added above.
+            currentPosition++;
+        }
+        sortedTokenPairs.push_back(std::make_pair(keyToken, rhsTokens));
+    }
+    sortedTokenPairs.sort(TokenElementCompare());
+
+    // _position is at the initial START_OBJECT, and currentPosition is at its matching
+    // END_OBJECT. We need to flatten the sorted list of KV pairs to get the correct order of
+    // tokens.
+    auto replacePosition = _position + 1;
+    for (auto&& [key, rhsTokens] : sortedTokenPairs) {
+        _tokens[replacePosition].clear();
+        _tokens[replacePosition++].move(key);
+        for (auto&& token : rhsTokens) {
+            _tokens[replacePosition].clear();
+            _tokens[replacePosition++].move(token);
+        }
+    }
+}
+
+void BSONLexer::tokenize(BSONElement elem, bool includeFieldName) {
+    // Skipped when we are tokenizing arrays.
+    if (includeFieldName) {
+        if (auto it = reservedKeyLookup.find(elem.fieldNameStringData());
+            it != reservedKeyLookup.end()) {
+            // Place the token expected by the parser if this is a reserved keyword.
+            _tokens.emplace_back(it->second, getNextLoc());
+        } else {
+            // If we don't care about the keyword, the fieldname is treated as a normal string.
+            _tokens.emplace_back(PipelineParserGen::make_STRING(elem.fieldName(), getNextLoc()));
         }
-        return KeywordToken{boost::none, true, false};
-    }();
+    }
 
     switch (elem.type()) {
         case BSONType::Object:
-            if (fieldNameToken.orderedArguments) {
-                _tokens.emplace_back(PipelineParserGen::token::START_ORDERED_OBJECT, getNextLoc());
-                BSONObjIteratorSorted sortedIt(elem.embeddedObject());
-                while (sortedIt.more()) {
-                    tokenize(sortedIt.next(), true);
-                }
-                _tokens.emplace_back(PipelineParserGen::token::END_ORDERED_OBJECT, getNextLoc());
-            } else {
-                _tokens.emplace_back(PipelineParserGen::token::START_OBJECT, getNextLoc());
-                for (auto&& nestedElem : elem.embeddedObject()) {
-                    tokenize(nestedElem, true);
-                }
-                _tokens.emplace_back(PipelineParserGen::token::END_OBJECT, getNextLoc());
+            _tokens.emplace_back(PipelineParserGen::token::START_OBJECT, getNextLoc());
+            for (auto&& nestedElem : elem.embeddedObject()) {
+                tokenize(nestedElem, true);
             }
+            _tokens.emplace_back(PipelineParserGen::token::END_OBJECT, getNextLoc());
             break;
         case BSONType::Array:
             _tokens.emplace_back(PipelineParserGen::token::START_ARRAY, getNextLoc());
diff --git a/src/mongo/db/cst/bson_lexer.h b/src/mongo/db/cst/bson_lexer.h
index cbd735aae5b..e9a602c2cd8 100644
--- a/src/mongo/db/cst/bson_lexer.h
+++ b/src/mongo/db/cst/bson_lexer.h
@@ -49,6 +49,19 @@ public:
         return _tokens[_position++];
     }
 
+    /**
+     * Sorts the object that starts at the current position, based on the enum for each of the field
+     * name tokens.
+     */
+    void sortObjTokens();
+
+    /**
+     * Convenience for retrieving the token at the given offset.
+     */
+    auto& operator[](int offset) {
+        return _tokens[offset];
+    }
+
 private:
     // Tokenizes the given BSONElement, traversing its children if necessary. If the field name
     // should not be considered, set 'includeFieldName' to false.
diff --git a/src/mongo/db/cst/bson_lexer_test.cpp b/src/mongo/db/cst/bson_lexer_test.cpp
new file mode 100644
index 00000000000..726fdb0125c
--- /dev/null
+++ b/src/mongo/db/cst/bson_lexer_test.cpp
@@ -0,0 +1,223 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include <string>
+
+#include "mongo/bson/json.h"
+#include "mongo/db/cst/bson_lexer.h"
+#include "mongo/db/cst/pipeline_parser_gen.hpp"
+
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace {
+
+void assertTokensMatch(BSONLexer& lexer,
+                       std::vector<PipelineParserGen::token::yytokentype> tokens) {
+    for (auto&& token : tokens) {
+        ASSERT_EQ(lexer.getNext().type_get(), token);
+    }
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::END_OF_FILE);
+}
+
+TEST(BSONLexerTest, TokenizesOpaqueUserObjects) {
+    auto input = fromjson("{pipeline: [{a: 1, b: '1'}]}");
+    BSONLexer lexer(input["pipeline"].Array());
+    assertTokensMatch(lexer,
+                      {PipelineParserGen::token::START_ARRAY,
+                       PipelineParserGen::token::START_OBJECT,
+                       PipelineParserGen::token::STRING,
+                       PipelineParserGen::token::NUMBER_INT,
+                       PipelineParserGen::token::STRING,
+                       PipelineParserGen::token::STRING,
+                       PipelineParserGen::token::END_OBJECT,
+                       PipelineParserGen::token::END_ARRAY});
+}
+
+TEST(BSONLexerTest, TokenizesReservedKeywords) {
+    auto input = fromjson("{pipeline: [{$_internalInhibitOptimization: {}}]}");
+    BSONLexer lexer(input["pipeline"].Array());
+    assertTokensMatch(lexer,
+                      {PipelineParserGen::token::START_ARRAY,
+                       PipelineParserGen::token::START_OBJECT,
+                       PipelineParserGen::token::STAGE_INHIBIT_OPTIMIZATION,
+                       PipelineParserGen::token::START_OBJECT,
+                       PipelineParserGen::token::END_OBJECT,
+                       PipelineParserGen::token::END_OBJECT,
+                       PipelineParserGen::token::END_ARRAY});
+}
+
+TEST(BSONLexerTest, TokenizesReservedKeywordsAtAnyDepth) {
+    auto input = fromjson("{pipeline: [{a: {$_internalInhibitOptimization: {}}}]}");
+    BSONLexer lexer(input["pipeline"].Array());
+    assertTokensMatch(lexer,
+                      {PipelineParserGen::token::START_ARRAY,
+                       PipelineParserGen::token::START_OBJECT,
+                       PipelineParserGen::token::STRING,
+                       PipelineParserGen::token::START_OBJECT,
+                       PipelineParserGen::token::STAGE_INHIBIT_OPTIMIZATION,
+                       PipelineParserGen::token::START_OBJECT,
+                       PipelineParserGen::token::END_OBJECT,
+                       PipelineParserGen::token::END_OBJECT,
+                       PipelineParserGen::token::END_OBJECT,
+                       PipelineParserGen::token::END_ARRAY});
+}
+
+TEST(BSONLexerTest, MidRuleActionToSortNestedObject) {
+    auto input = fromjson("{pipeline: [{pipeline: 1, coll: 'test'}]}");
+    BSONLexer lexer(input["pipeline"].Array());
+    // Iterate until the first object.
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::START_ARRAY);
+    // Kick the lexer to sort the object, which should move element 'coll' in front of 'pipeline'.
+    // Not that this only works because these are reserved keywords recognized by the lexer,
+    // arbitrary string field names with *not* get sorted.
+    lexer.sortObjTokens();
+    auto expected = {PipelineParserGen::token::START_OBJECT,
+                     PipelineParserGen::token::COLL_ARG,
+                     PipelineParserGen::token::STRING,
+                     PipelineParserGen::token::PIPELINE_ARG,
+                     PipelineParserGen::token::NUMBER_INT,
+                     PipelineParserGen::token::END_OBJECT,
+                     PipelineParserGen::token::END_ARRAY};
+    assertTokensMatch(lexer, expected);
+}
+
+
+TEST(BSONLexerTest, MidRuleActionToSortDoesNotSortNestedObjects) {
+    auto input = fromjson(
+        "{pipeline: [{$unionWith: {pipeline: [{$unionWith: 'inner', a: 1}], coll: 'outer'}}]}");
+    BSONLexer lexer(input["pipeline"].Array());
+    // Iterate until we reach the $unionWith object.
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::START_ARRAY);
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::START_OBJECT);
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::STAGE_UNION_WITH);
+    lexer.sortObjTokens();
+    auto expected = {
+        PipelineParserGen::token::START_OBJECT,
+        PipelineParserGen::token::COLL_ARG,
+        PipelineParserGen::token::STRING,        // coll: 'outer'
+        PipelineParserGen::token::PIPELINE_ARG,  // inner pipeline
+        PipelineParserGen::token::START_ARRAY,
+        PipelineParserGen::token::START_OBJECT,
+        // The nested pipeline does *not* get sorted, meaning '$unionWith' stays before 'a'.
+        PipelineParserGen::token::STAGE_UNION_WITH,
+        PipelineParserGen::token::STRING,  // $unionWith: 'inner'
+        PipelineParserGen::token::STRING,
+        PipelineParserGen::token::NUMBER_INT,  // a: 1
+        PipelineParserGen::token::END_OBJECT,
+        PipelineParserGen::token::END_ARRAY,
+        PipelineParserGen::token::END_OBJECT,
+        PipelineParserGen::token::END_OBJECT,
+        PipelineParserGen::token::END_ARRAY,
+    };
+    assertTokensMatch(lexer, expected);
+}
+
+TEST(BSONLexerTest, MultipleNestedObjectsAreReorderedCorrectly) {
+    auto input = fromjson(
+        "{pipeline: [{$unionWith: {pipeline: [{$unionWith: 'inner', a: 1}], coll: [{$unionWith: "
+        "'innerB', a: 2}]}}]}");
+    BSONLexer lexer(input["pipeline"].Array());
+    // Iterate until we reach the $unionWith object.
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::START_ARRAY);
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::START_OBJECT);
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::STAGE_UNION_WITH);
+    lexer.sortObjTokens();
+    auto expected = {
+        PipelineParserGen::token::START_OBJECT,
+        PipelineParserGen::token::COLL_ARG,
+        PipelineParserGen::token::START_ARRAY,
+        PipelineParserGen::token::START_OBJECT,
+        // The nested pipeline does *not* get sorted, meaning '$unionWith' stays before 'a'.
+        PipelineParserGen::token::STAGE_UNION_WITH,
+        PipelineParserGen::token::STRING,      // innerb
+        PipelineParserGen::token::STRING,      // a
+        PipelineParserGen::token::NUMBER_INT,  // a: 2
+        PipelineParserGen::token::END_OBJECT,
+        PipelineParserGen::token::END_ARRAY,
+        // Coll nested object ends here.
+        PipelineParserGen::token::PIPELINE_ARG,  // inner pipeline
+        PipelineParserGen::token::START_ARRAY,
+        PipelineParserGen::token::START_OBJECT,
+        // The nested pipeline does *not* get sorted, meaning '$unionWith' stays before 'a'.
+        PipelineParserGen::token::STAGE_UNION_WITH,
+        PipelineParserGen::token::STRING,      // $unionWith: 'inner'
+        PipelineParserGen::token::STRING,      // a
+        PipelineParserGen::token::NUMBER_INT,  // a: 1
+        PipelineParserGen::token::END_OBJECT,
+        PipelineParserGen::token::END_ARRAY,
+        PipelineParserGen::token::END_OBJECT,
+        PipelineParserGen::token::END_OBJECT,
+        PipelineParserGen::token::END_ARRAY,
+    };
+    assertTokensMatch(lexer, expected);
+}
+TEST(BSONLexerTest, MultiLevelBSONDoesntSortChildren) {
+    auto input = fromjson(
+        "{pipeline: [{$unionWith: {pipeline: [{$unionWith: {'nested': 1, 'apple': 1}, a: 1}], "
+        "coll: 'outer'}}]}");
+    BSONLexer lexer(input["pipeline"].Array());
+    // Iterate until we reach the $unionWith object.
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::START_ARRAY);
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::START_OBJECT);
+    ASSERT_EQ(lexer.getNext().type_get(), PipelineParserGen::token::STAGE_UNION_WITH);
+    lexer.sortObjTokens();
+    auto expected = {
+        PipelineParserGen::token::START_OBJECT,
+        PipelineParserGen::token::COLL_ARG,
+        PipelineParserGen::token::STRING,        // coll: 'outer'
+        PipelineParserGen::token::PIPELINE_ARG,  // inner pipeline
+        // First nested object
+        PipelineParserGen::token::START_ARRAY,
+        PipelineParserGen::token::START_OBJECT,
+        PipelineParserGen::token::STAGE_UNION_WITH,
+        // Second nested object
+        PipelineParserGen::token::START_OBJECT,
+        PipelineParserGen::token::STRING,  // nested: 1
+        PipelineParserGen::token::NUMBER_INT,
+        PipelineParserGen::token::STRING,  // apple: 1
+        PipelineParserGen::token::NUMBER_INT,
+        PipelineParserGen::token::END_OBJECT,
+        // End second nested object
+        PipelineParserGen::token::STRING,
+        PipelineParserGen::token::NUMBER_INT,  // a: 1
+        PipelineParserGen::token::END_OBJECT,
+        // End first nested object
+        PipelineParserGen::token::END_ARRAY,
+        PipelineParserGen::token::END_OBJECT,
+        PipelineParserGen::token::END_OBJECT,
+        PipelineParserGen::token::END_ARRAY,
+    };
+    assertTokensMatch(lexer, expected);
+}
+
+}  // namespace
+}  // namespace mongo
diff --git a/src/mongo/db/cst/cst_test.cpp b/src/mongo/db/cst/cst_test.cpp
index 0d4181c55aa..02111f28ab4 100644
--- a/src/mongo/db/cst/cst_test.cpp
+++ b/src/mongo/db/cst/cst_test.cpp
@@ -79,14 +79,14 @@ TEST(CstGrammarTest, InvalidPipelineSpec) {
         auto input = fromjson("{pipeline: [{}]}");
         BSONLexer lexer(input["pipeline"].Array());
         auto parseTree = PipelineParserGen(lexer, &output);
-        ASSERT_EQ(1, parseTree.parse());
+        ASSERT_THROWS_CODE(parseTree.parse(), AssertionException, ErrorCodes::FailedToParse);
     }
     {
         CNode output;
         auto input = fromjson("{pipeline: [{$unknownStage: {}}]}");
         BSONLexer lexer(input["pipeline"].Array());
         auto parseTree = PipelineParserGen(lexer, &output);
-        ASSERT_EQ(1, parseTree.parse());
+        ASSERT_THROWS_CODE(parseTree.parse(), AssertionException, ErrorCodes::FailedToParse);
     }
     {
         ASSERT_THROWS_CODE(
@@ -116,7 +116,33 @@ TEST(CstGrammarTest, ParsesInternalInhibitOptimization) {
         auto input = fromjson("{pipeline: [{$_internalInhibitOptimization: 'invalid'}]}");
         BSONLexer lexer(input["pipeline"].Array());
         auto parseTree = PipelineParserGen(lexer, &output);
-        ASSERT_EQ(1, parseTree.parse());
+        ASSERT_THROWS_CODE(parseTree.parse(), AssertionException, ErrorCodes::FailedToParse);
+    }
+}
+
+TEST(CstGrammarTest, ParsesUnionWith) {
+    {
+        CNode output;
+        auto input = fromjson("{pipeline: [{$unionWith: {coll: 'hey', pipeline: 1.0}}]}");
+        BSONLexer lexer(input["pipeline"].Array());
+        auto parseTree = PipelineParserGen(lexer, &output);
+        ASSERT_EQ(0, parseTree.parse());
+        auto stages = stdx::get<CNode::ArrayChildren>(output.payload);
+        ASSERT_EQ(1, stages.size());
+        ASSERT(KeyFieldname::unionWith == stages[0].firstKeyFieldname());
+    }
+    {
+        CNode output;
+        auto input = fromjson("{pipeline: [{$unionWith: {pipeline: 1.0, coll: 'hey'}}]}");
+        BSONLexer lexer(input["pipeline"].Array());
+        auto parseTree = PipelineParserGen(lexer, &output);
+        ASSERT_EQ(0, parseTree.parse());
+        auto stages = stdx::get<CNode::ArrayChildren>(output.payload);
+        ASSERT_EQ(1, stages.size());
+        ASSERT(KeyFieldname::unionWith == stages[0].firstKeyFieldname());
+        ASSERT_EQ(stages[0].toBson().toString(),
+                  "{ unionWith: { collArg: \"<UserString hey>\", pipelineArg: \"<UserDouble "
+                  "1.000000>\" } }");
     }
 }
 
diff --git a/src/mongo/db/cst/key_fieldname.h b/src/mongo/db/cst/key_fieldname.h
index edde3699941..37bcd6b7650 100644
--- a/src/mongo/db/cst/key_fieldname.h
+++ b/src/mongo/db/cst/key_fieldname.h
@@ -33,11 +33,14 @@
 
 #include "mongo/util/printable_enum.h"
 
-#define KEYFIELDNAMES(ENUMIFY) \
-    ENUMIFY(atan2)             \
-    ENUMIFY(id)                \
-    ENUMIFY(project)           \
-    ENUMIFY(inhibitOptimization)
+#define KEYFIELDNAMES(ENUMIFY)   \
+    ENUMIFY(atan2)               \
+    ENUMIFY(id)                  \
+    ENUMIFY(project)             \
+    ENUMIFY(inhibitOptimization) \
+    ENUMIFY(unionWith)           \
+    ENUMIFY(collArg)             \
+    ENUMIFY(pipelineArg)
 
 MAKE_PRINTABLE_ENUM(KeyFieldname, KEYFIELDNAMES);
 MAKE_PRINTABLE_ENUM_STRING_ARRAY(key_fieldname, KeyFieldname, KEYFIELDNAMES);
diff --git a/src/mongo/db/cst/pipeline_grammar.yy b/src/mongo/db/cst/pipeline_grammar.yy
index 9616e113443..8ef7dc65d97 100644
--- a/src/mongo/db/cst/pipeline_grammar.yy
+++ b/src/mongo/db/cst/pipeline_grammar.yy
@@ -81,13 +81,17 @@
         // Mandatory error function.
         void PipelineParserGen::error (const PipelineParserGen::location_type& loc, 
                                        const std::string& msg) {
-          std::cerr << msg << " at loc " << loc << std::endl;
+            uasserted(ErrorCodes::FailedToParse, str::stream() << msg <<
+                    " at location " <<
+                    loc.begin.line << ":" << loc.begin.column <<
+                    " of input BSON. Lexer produced token of type " <<
+                    lexer[loc.begin.column].type_get() << "." );
         }
     }  // namespace mongo
 }
 
 // Parsing parameters, funneled through yyparse() to yylex().
-%param {BSONLexer& driver}
+%param {BSONLexer& lexer}
 // yyparse() parameter only.
 %parse-param {CNode* cst}
 
@@ -98,13 +102,16 @@
 %token 
     START_OBJECT
     END_OBJECT
-    START_ORDERED_OBJECT
-    END_ORDERED_OBJECT
     START_ARRAY
     END_ARRAY
 
     // Reserve pipeline stage names.
     STAGE_INHIBIT_OPTIMIZATION
+    STAGE_UNION_WITH
+
+    // $unionWith arguments.
+    COLL_ARG
+    PIPELINE_ARG
 
     END_OF_FILE 0 "EOF"
 ;
@@ -118,7 +125,7 @@
 //
 // Semantic values (aka the C++ types produced by the actions).
 //
-%nterm <CNode> stageList stage
+%nterm <CNode> stageList stage inhibitOptimization unionWith
 
 //
 // Grammar rules
@@ -133,16 +140,34 @@ pipeline: START_ARRAY stageList END_ARRAY {
 stageList[result]:
     %empty { }
     | START_OBJECT stage END_OBJECT stageList[stagesArg] { 
-        $result = std::move($stagesArg);
-        auto& children = stdx::get<CNode::ArrayChildren>($result.payload);
-        children.emplace_back(std::move($stage));
+        $result = CNode{CNode::ArrayChildren{$stage}};
     }
 ;
 
+// Special rule to hint to the lexer that the next set of tokens should be sorted. Note that the 
+// sort order is not lexicographical, but rather based on the enum generated from the %token list
+// above.
+START_ORDERED_OBJECT: { lexer.sortObjTokens(); } START_OBJECT;
+
 stage:
-    STAGE_INHIBIT_OPTIMIZATION START_OBJECT END_OBJECT { 
-        $stage = CNode{CNode::ObjectChildren{std::pair{KeyFieldname::inhibitOptimization, CNode::noopLeaf()}}};
-    }
+    inhibitOptimization | unionWith
 ;
 
+inhibitOptimization:
+    STAGE_INHIBIT_OPTIMIZATION START_OBJECT END_OBJECT { 
+        $inhibitOptimization =
+CNode{CNode::ObjectChildren{std::pair{KeyFieldname::inhibitOptimization, CNode::noopLeaf()}}};
+    };
+
+unionWith:
+    STAGE_UNION_WITH START_ORDERED_OBJECT COLL_ARG STRING PIPELINE_ARG NUMBER_DOUBLE END_OBJECT {
+    auto coll = CNode{UserString($STRING)};
+    auto pipeline = CNode{UserDouble($NUMBER_DOUBLE)};
+    $unionWith = CNode{CNode::ObjectChildren{std::pair{KeyFieldname::unionWith,
+        CNode{CNode::ObjectChildren{
+            {KeyFieldname::collArg, std::move(coll)},
+            {KeyFieldname::pipelineArg, std::move(pipeline)}
+     }}}}};
+};
+
 %%
diff --git a/src/mongo/db/cst/pipeline_parser_gen.cpp b/src/mongo/db/cst/pipeline_parser_gen.cpp
index 83f31a004e2..261d4510841 100644
--- a/src/mongo/db/cst/pipeline_parser_gen.cpp
+++ b/src/mongo/db/cst/pipeline_parser_gen.cpp
@@ -46,11 +46,14 @@
 namespace mongo {
 // Mandatory error function.
 void PipelineParserGen::error(const PipelineParserGen::location_type& loc, const std::string& msg) {
-    std::cerr << msg << " at loc " << loc << std::endl;
+    uasserted(ErrorCodes::FailedToParse,
+              str::stream() << msg << " at location " << loc.begin.line << ":" << loc.begin.column
+                            << " of input BSON. Lexer produced token of type "
+                            << lexer[loc.begin.column].type_get() << ".");
 }
 }  // namespace mongo
 
-#line 58 "pipeline_parser_gen.cpp"
+#line 62 "pipeline_parser_gen.cpp"
 
 
 #ifndef YY_
@@ -143,17 +146,17 @@ void PipelineParserGen::error(const PipelineParserGen::location_type& loc, const
 
 #line 52 "pipeline_grammar.yy"
 namespace mongo {
-#line 151 "pipeline_parser_gen.cpp"
+#line 155 "pipeline_parser_gen.cpp"
 
 /// Build a parser object.
-PipelineParserGen::PipelineParserGen(BSONLexer& driver_yyarg, CNode* cst_yyarg)
+PipelineParserGen::PipelineParserGen(BSONLexer& lexer_yyarg, CNode* cst_yyarg)
 #if YYDEBUG
     : yydebug_(false),
       yycdebug_(&std::cerr),
 #else
     :
 #endif
-      driver(driver_yyarg),
+      lexer(lexer_yyarg),
       cst(cst_yyarg) {
 }
 
@@ -194,28 +197,30 @@ PipelineParserGen::stack_symbol_type::stack_symbol_type() {}
 PipelineParserGen::stack_symbol_type::stack_symbol_type(YY_RVREF(stack_symbol_type) that)
     : super_type(YY_MOVE(that.state), YY_MOVE(that.location)) {
     switch (that.kind()) {
-        case 16:  // stageList
-        case 17:  // stage
+        case 17:  // stageList
+        case 18:  // stage
+        case 19:  // inhibitOptimization
+        case 20:  // unionWith
             value.YY_MOVE_OR_COPY<CNode>(YY_MOVE(that.value));
             break;
 
-        case 14:  // BOOL
+        case 15:  // BOOL
             value.YY_MOVE_OR_COPY<bool>(YY_MOVE(that.value));
             break;
 
-        case 13:  // NUMBER_DOUBLE
+        case 14:  // NUMBER_DOUBLE
             value.YY_MOVE_OR_COPY<double>(YY_MOVE(that.value));
             break;
 
-        case 11:  // NUMBER_INT
+        case 12:  // NUMBER_INT
             value.YY_MOVE_OR_COPY<int>(YY_MOVE(that.value));
             break;
 
-        case 12:  // NUMBER_LONG
+        case 13:  // NUMBER_LONG
             value.YY_MOVE_OR_COPY<long long>(YY_MOVE(that.value));
             break;
 
-        case 10:  // STRING
+        case 11:  // STRING
             value.YY_MOVE_OR_COPY<std::string>(YY_MOVE(that.value));
             break;
 
@@ -232,28 +237,30 @@ PipelineParserGen::stack_symbol_type::stack_symbol_type(YY_RVREF(stack_symbol_ty
 PipelineParserGen::stack_symbol_type::stack_symbol_type(state_type s, YY_MOVE_REF(symbol_type) that)
     : super_type(s, YY_MOVE(that.location)) {
     switch (that.kind()) {
-        case 16:  // stageList
-        case 17:  // stage
+        case 17:  // stageList
+        case 18:  // stage
+        case 19:  // inhibitOptimization
+        case 20:  // unionWith
             value.move<CNode>(YY_MOVE(that.value));
             break;
 
-        case 14:  // BOOL
+        case 15:  // BOOL
             value.move<bool>(YY_MOVE(that.value));
             break;
 
-        case 13:  // NUMBER_DOUBLE
+        case 14:  // NUMBER_DOUBLE
             value.move<double>(YY_MOVE(that.value));
             break;
 
-        case 11:  // NUMBER_INT
+        case 12:  // NUMBER_INT
             value.move<int>(YY_MOVE(that.value));
             break;
 
-        case 12:  // NUMBER_LONG
+        case 13:  // NUMBER_LONG
             value.move<long long>(YY_MOVE(that.value));
             break;
 
-        case 10:  // STRING
+        case 11:  // STRING
             value.move<std::string>(YY_MOVE(that.value));
             break;
 
@@ -270,28 +277,30 @@ PipelineParserGen::stack_symbol_type& PipelineParserGen::stack_symbol_type::oper
     const stack_symbol_type& that) {
     state = that.state;
     switch (that.kind()) {
-        case 16:  // stageList
-        case 17:  // stage
+        case 17:  // stageList
+        case 18:  // stage
+        case 19:  // inhibitOptimization
+        case 20:  // unionWith
             value.copy<CNode>(that.value);
             break;
 
-        case 14:  // BOOL
+        case 15:  // BOOL
             value.copy<bool>(that.value);
             break;
 
-        case 13:  // NUMBER_DOUBLE
+        case 14:  // NUMBER_DOUBLE
             value.copy<double>(that.value);
             break;
 
-        case 11:  // NUMBER_INT
+        case 12:  // NUMBER_INT
             value.copy<int>(that.value);
             break;
 
-        case 12:  // NUMBER_LONG
+        case 13:  // NUMBER_LONG
             value.copy<long long>(that.value);
             break;
 
-        case 10:  // STRING
+        case 11:  // STRING
             value.copy<std::string>(that.value);
             break;
 
@@ -307,28 +316,30 @@ PipelineParserGen::stack_symbol_type& PipelineParserGen::stack_symbol_type::oper
     stack_symbol_type& that) {
     state = that.state;
     switch (that.kind()) {
-        case 16:  // stageList
-        case 17:  // stage
+        case 17:  // stageList
+        case 18:  // stage
+        case 19:  // inhibitOptimization
+        case 20:  // unionWith
             value.move<CNode>(that.value);
             break;
 
-        case 14:  // BOOL
+        case 15:  // BOOL
             value.move<bool>(that.value);
             break;
 
-        case 13:  // NUMBER_DOUBLE
+        case 14:  // NUMBER_DOUBLE
             value.move<double>(that.value);
             break;
 
-        case 11:  // NUMBER_INT
+        case 12:  // NUMBER_INT
             value.move<int>(that.value);
             break;
 
-        case 12:  // NUMBER_LONG
+        case 13:  // NUMBER_LONG
             value.move<long long>(that.value);
             break;
 
-        case 10:  // STRING
+        case 11:  // STRING
             value.move<std::string>(that.value);
             break;
 
@@ -486,7 +497,7 @@ int PipelineParserGen::parse() {
             try
 #endif  // YY_EXCEPTIONS
             {
-                symbol_type yylookahead(yylex(driver));
+                symbol_type yylookahead(yylex(lexer));
                 yyla.move(yylookahead);
             }
 #if YY_EXCEPTIONS
@@ -555,28 +566,30 @@ int PipelineParserGen::parse() {
                correct type. The default '$$ = $1' action is NOT applied
                when using variants.  */
             switch (yyr1_[yyn]) {
-                case 16:  // stageList
-                case 17:  // stage
+                case 17:  // stageList
+                case 18:  // stage
+                case 19:  // inhibitOptimization
+                case 20:  // unionWith
                     yylhs.value.emplace<CNode>();
                     break;
 
-                case 14:  // BOOL
+                case 15:  // BOOL
                     yylhs.value.emplace<bool>();
                     break;
 
-                case 13:  // NUMBER_DOUBLE
+                case 14:  // NUMBER_DOUBLE
                     yylhs.value.emplace<double>();
                     break;
 
-                case 11:  // NUMBER_INT
+                case 12:  // NUMBER_INT
                     yylhs.value.emplace<int>();
                     break;
 
-                case 12:  // NUMBER_LONG
+                case 13:  // NUMBER_LONG
                     yylhs.value.emplace<long long>();
                     break;
 
-                case 10:  // STRING
+                case 11:  // STRING
                     yylhs.value.emplace<std::string>();
                     break;
 
@@ -600,42 +613,78 @@ int PipelineParserGen::parse() {
             {
                 switch (yyn) {
                     case 2:
-#line 129 "pipeline_grammar.yy"
+#line 136 "pipeline_grammar.yy"
                     {
                         *cst = std::move(yystack_[1].value.as<CNode>());
                     }
-#line 673 "pipeline_parser_gen.cpp"
+#line 687 "pipeline_parser_gen.cpp"
                     break;
 
                     case 3:
-#line 134 "pipeline_grammar.yy"
+#line 141 "pipeline_grammar.yy"
                     {
                     }
-#line 679 "pipeline_parser_gen.cpp"
+#line 693 "pipeline_parser_gen.cpp"
                     break;
 
                     case 4:
-#line 135 "pipeline_grammar.yy"
+#line 142 "pipeline_grammar.yy"
                     {
-                        yylhs.value.as<CNode>() = std::move(yystack_[0].value.as<CNode>());
-                        auto& children =
-                            stdx::get<CNode::ArrayChildren>(yylhs.value.as<CNode>().payload);
-                        children.emplace_back(std::move(yystack_[2].value.as<CNode>()));
+                        yylhs.value.as<CNode>() =
+                            CNode{CNode::ArrayChildren{yystack_[2].value.as<CNode>()}};
                     }
-#line 689 "pipeline_parser_gen.cpp"
+#line 701 "pipeline_parser_gen.cpp"
                     break;
 
                     case 5:
-#line 143 "pipeline_grammar.yy"
+#line 150 "pipeline_grammar.yy"
+                    {
+                        lexer.sortObjTokens();
+                    }
+#line 707 "pipeline_parser_gen.cpp"
+                    break;
+
+                    case 7:
+#line 153 "pipeline_grammar.yy"
+                    {
+                        yylhs.value.as<CNode>() = yystack_[0].value.as<CNode>();
+                    }
+#line 713 "pipeline_parser_gen.cpp"
+                    break;
+
+                    case 8:
+#line 153 "pipeline_grammar.yy"
+                    {
+                        yylhs.value.as<CNode>() = yystack_[0].value.as<CNode>();
+                    }
+#line 719 "pipeline_parser_gen.cpp"
+                    break;
+
+                    case 9:
+#line 157 "pipeline_grammar.yy"
                     {
                         yylhs.value.as<CNode>() = CNode{CNode::ObjectChildren{
                             std::pair{KeyFieldname::inhibitOptimization, CNode::noopLeaf()}}};
                     }
-#line 697 "pipeline_parser_gen.cpp"
+#line 728 "pipeline_parser_gen.cpp"
+                    break;
+
+                    case 10:
+#line 163 "pipeline_grammar.yy"
+                    {
+                        auto coll = CNode{UserString(yystack_[3].value.as<std::string>())};
+                        auto pipeline = CNode{UserDouble(yystack_[1].value.as<double>())};
+                        yylhs.value.as<CNode>() = CNode{CNode::ObjectChildren{
+                            std::pair{KeyFieldname::unionWith,
+                                      CNode{CNode::ObjectChildren{
+                                          {KeyFieldname::collArg, std::move(coll)},
+                                          {KeyFieldname::pipelineArg, std::move(pipeline)}}}}}};
+                    }
+#line 742 "pipeline_parser_gen.cpp"
                     break;
 
 
-#line 701 "pipeline_parser_gen.cpp"
+#line 746 "pipeline_parser_gen.cpp"
 
                     default:
                         break;
@@ -810,23 +859,28 @@ const signed char PipelineParserGen::yypact_ninf_ = -8;
 
 const signed char PipelineParserGen::yytable_ninf_ = -1;
 
-const signed char PipelineParserGen::yypact_[] = {-7, -2, 2, -6, -4, -8, 3, 1, -8, 4, -2, -8, -8};
+const signed char PipelineParserGen::yypact_[] = {-3, 0,  4, -7, -1, -8, 3,  -8, 5, -8, -8, -8,
+                                                  6,  -2, 8, 0,  -8, 1,  -8, -8, 7, -6, 9,  -8};
 
-const signed char PipelineParserGen::yydefact_[] = {0, 3, 0, 0, 0, 1, 0, 0, 2, 0, 3, 5, 4};
+const signed char PipelineParserGen::yydefact_[] = {0, 3, 0, 0, 0, 1, 0, 5, 0, 7, 8, 2,
+                                                    0, 0, 0, 3, 9, 0, 6, 4, 0, 0, 0, 10};
 
-const signed char PipelineParserGen::yypgoto_[] = {-8, -3, -8, -8};
+const signed char PipelineParserGen::yypgoto_[] = {-8, 10, -8, -8, -8, -8, -8, -8};
 
-const signed char PipelineParserGen::yydefgoto_[] = {-1, 4, 7, 2};
+const signed char PipelineParserGen::yydefgoto_[] = {-1, 4, 8, 9, 10, 2, 13, 14};
 
-const signed char PipelineParserGen::yytable_[] = {1, 3, 5, 6, 8, 10, 9, 12, 11};
+const signed char PipelineParserGen::yytable_[] = {6,  7, 1, 3, 5,  11, 12, 17, 22, 15, 16, 18, 20,
+                                                   23, 0, 0, 0, 21, 0,  0,  0,  0,  0,  0,  0,  19};
 
-const signed char PipelineParserGen::yycheck_[] = {7, 3, 0, 9, 8, 4, 3, 10, 4};
+const signed char PipelineParserGen::yycheck_[] = {
+    7, 8, 5, 3, 0, 6, 3, 9, 14, 4, 4, 3, 11, 4, -1, -1, -1, 10, -1, -1, -1, -1, -1, -1, -1, 15};
 
-const signed char PipelineParserGen::yystos_[] = {0, 7, 18, 3, 16, 0, 9, 17, 8, 3, 4, 4, 16};
+const signed char PipelineParserGen::yystos_[] = {0, 5,  21, 3, 17, 0, 7, 8,  18, 19, 20, 6,
+                                                  3, 22, 23, 4, 4,  9, 3, 17, 11, 10, 14, 4};
 
-const signed char PipelineParserGen::yyr1_[] = {0, 15, 18, 16, 16, 17};
+const signed char PipelineParserGen::yyr1_[] = {0, 16, 21, 17, 17, 23, 22, 18, 18, 19, 20};
 
-const signed char PipelineParserGen::yyr2_[] = {0, 2, 3, 0, 4, 3};
+const signed char PipelineParserGen::yyr2_[] = {0, 2, 3, 0, 4, 0, 2, 1, 1, 3, 7};
 
 
 #if YYDEBUG
@@ -837,11 +891,12 @@ const char* const PipelineParserGen::yytname_[] = {"\"EOF\"",
                                                    "\"invalid token\"",
                                                    "START_OBJECT",
                                                    "END_OBJECT",
-                                                   "START_ORDERED_OBJECT",
-                                                   "END_ORDERED_OBJECT",
                                                    "START_ARRAY",
                                                    "END_ARRAY",
                                                    "STAGE_INHIBIT_OPTIMIZATION",
+                                                   "STAGE_UNION_WITH",
+                                                   "COLL_ARG",
+                                                   "PIPELINE_ARG",
                                                    "STRING",
                                                    "NUMBER_INT",
                                                    "NUMBER_LONG",
@@ -850,13 +905,18 @@ const char* const PipelineParserGen::yytname_[] = {"\"EOF\"",
                                                    "$accept",
                                                    "stageList",
                                                    "stage",
+                                                   "inhibitOptimization",
+                                                   "unionWith",
                                                    "pipeline",
+                                                   "START_ORDERED_OBJECT",
+                                                   "$@1",
                                                    YY_NULLPTR};
 #endif
 
 
 #if YYDEBUG
-const unsigned char PipelineParserGen::yyrline_[] = {0, 129, 129, 134, 135, 143};
+const unsigned char PipelineParserGen::yyrline_[] = {
+    0, 136, 136, 141, 142, 150, 150, 153, 153, 157, 163};
 
 void PipelineParserGen::yy_stack_print_() const {
     *yycdebug_ << "Stack now";
@@ -879,6 +939,6 @@ void PipelineParserGen::yy_reduce_print_(int yyrule) const {
 
 #line 52 "pipeline_grammar.yy"
 }  // namespace mongo
-#line 1003 "pipeline_parser_gen.cpp"
+#line 1060 "pipeline_parser_gen.cpp"
 
-#line 148 "pipeline_grammar.yy"
+#line 173 "pipeline_grammar.yy"
diff --git a/src/mongo/db/cst/pipeline_parser_gen.hpp b/src/mongo/db/cst/pipeline_parser_gen.hpp
index c2f2e24a314..8e9787090f0 100644
--- a/src/mongo/db/cst/pipeline_parser_gen.hpp
+++ b/src/mongo/db/cst/pipeline_parser_gen.hpp
@@ -369,6 +369,8 @@ public:
         union union_type {
             // stageList
             // stage
+            // inhibitOptimization
+            // unionWith
             char dummy1[sizeof(CNode)];
 
             // BOOL
@@ -429,16 +431,17 @@ public:
             YYUNDEF = 2,                     // "invalid token"
             START_OBJECT = 3,                // START_OBJECT
             END_OBJECT = 4,                  // END_OBJECT
-            START_ORDERED_OBJECT = 5,        // START_ORDERED_OBJECT
-            END_ORDERED_OBJECT = 6,          // END_ORDERED_OBJECT
-            START_ARRAY = 7,                 // START_ARRAY
-            END_ARRAY = 8,                   // END_ARRAY
-            STAGE_INHIBIT_OPTIMIZATION = 9,  // STAGE_INHIBIT_OPTIMIZATION
-            STRING = 10,                     // STRING
-            NUMBER_INT = 11,                 // NUMBER_INT
-            NUMBER_LONG = 12,                // NUMBER_LONG
-            NUMBER_DOUBLE = 13,              // NUMBER_DOUBLE
-            BOOL = 14                        // BOOL
+            START_ARRAY = 5,                 // START_ARRAY
+            END_ARRAY = 6,                   // END_ARRAY
+            STAGE_INHIBIT_OPTIMIZATION = 7,  // STAGE_INHIBIT_OPTIMIZATION
+            STAGE_UNION_WITH = 8,            // STAGE_UNION_WITH
+            COLL_ARG = 9,                    // COLL_ARG
+            PIPELINE_ARG = 10,               // PIPELINE_ARG
+            STRING = 11,                     // STRING
+            NUMBER_INT = 12,                 // NUMBER_INT
+            NUMBER_LONG = 13,                // NUMBER_LONG
+            NUMBER_DOUBLE = 14,              // NUMBER_DOUBLE
+            BOOL = 15                        // BOOL
         };
         /// Backward compatibility alias (Bison 3.6).
         typedef token_kind_type yytokentype;
@@ -453,27 +456,32 @@ public:
     /// Symbol kinds.
     struct symbol_kind {
         enum symbol_kind_type {
-            YYNTOKENS = 15,  ///< Number of tokens.
+            YYNTOKENS = 16,  ///< Number of tokens.
             S_YYEMPTY = -2,
             S_YYEOF = 0,                       // "EOF"
             S_YYerror = 1,                     // error
             S_YYUNDEF = 2,                     // "invalid token"
             S_START_OBJECT = 3,                // START_OBJECT
             S_END_OBJECT = 4,                  // END_OBJECT
-            S_START_ORDERED_OBJECT = 5,        // START_ORDERED_OBJECT
-            S_END_ORDERED_OBJECT = 6,          // END_ORDERED_OBJECT
-            S_START_ARRAY = 7,                 // START_ARRAY
-            S_END_ARRAY = 8,                   // END_ARRAY
-            S_STAGE_INHIBIT_OPTIMIZATION = 9,  // STAGE_INHIBIT_OPTIMIZATION
-            S_STRING = 10,                     // STRING
-            S_NUMBER_INT = 11,                 // NUMBER_INT
-            S_NUMBER_LONG = 12,                // NUMBER_LONG
-            S_NUMBER_DOUBLE = 13,              // NUMBER_DOUBLE
-            S_BOOL = 14,                       // BOOL
-            S_YYACCEPT = 15,                   // $accept
-            S_stageList = 16,                  // stageList
-            S_stage = 17,                      // stage
-            S_pipeline = 18                    // pipeline
+            S_START_ARRAY = 5,                 // START_ARRAY
+            S_END_ARRAY = 6,                   // END_ARRAY
+            S_STAGE_INHIBIT_OPTIMIZATION = 7,  // STAGE_INHIBIT_OPTIMIZATION
+            S_STAGE_UNION_WITH = 8,            // STAGE_UNION_WITH
+            S_COLL_ARG = 9,                    // COLL_ARG
+            S_PIPELINE_ARG = 10,               // PIPELINE_ARG
+            S_STRING = 11,                     // STRING
+            S_NUMBER_INT = 12,                 // NUMBER_INT
+            S_NUMBER_LONG = 13,                // NUMBER_LONG
+            S_NUMBER_DOUBLE = 14,              // NUMBER_DOUBLE
+            S_BOOL = 15,                       // BOOL
+            S_YYACCEPT = 16,                   // $accept
+            S_stageList = 17,                  // stageList
+            S_stage = 18,                      // stage
+            S_inhibitOptimization = 19,        // inhibitOptimization
+            S_unionWith = 20,                  // unionWith
+            S_pipeline = 21,                   // pipeline
+            S_START_ORDERED_OBJECT = 22,       // START_ORDERED_OBJECT
+            S_23_1 = 23                        // $@1
         };
     };
 
@@ -502,28 +510,30 @@ public:
         basic_symbol(basic_symbol&& that)
             : Base(std::move(that)), value(), location(std::move(that.location)) {
             switch (this->kind()) {
-                case 16:  // stageList
-                case 17:  // stage
+                case 17:  // stageList
+                case 18:  // stage
+                case 19:  // inhibitOptimization
+                case 20:  // unionWith
                     value.move<CNode>(std::move(that.value));
                     break;
 
-                case 14:  // BOOL
+                case 15:  // BOOL
                     value.move<bool>(std::move(that.value));
                     break;
 
-                case 13:  // NUMBER_DOUBLE
+                case 14:  // NUMBER_DOUBLE
                     value.move<double>(std::move(that.value));
                     break;
 
-                case 11:  // NUMBER_INT
+                case 12:  // NUMBER_INT
                     value.move<int>(std::move(that.value));
                     break;
 
-                case 12:  // NUMBER_LONG
+                case 13:  // NUMBER_LONG
                     value.move<long long>(std::move(that.value));
                     break;
 
-                case 10:  // STRING
+                case 11:  // STRING
                     value.move<std::string>(std::move(that.value));
                     break;
 
@@ -604,28 +614,30 @@ public:
 
             // Value type destructor.
             switch (yykind) {
-                case 16:  // stageList
-                case 17:  // stage
+                case 17:  // stageList
+                case 18:  // stage
+                case 19:  // inhibitOptimization
+                case 20:  // unionWith
                     value.template destroy<CNode>();
                     break;
 
-                case 14:  // BOOL
+                case 15:  // BOOL
                     value.template destroy<bool>();
                     break;
 
-                case 13:  // NUMBER_DOUBLE
+                case 14:  // NUMBER_DOUBLE
                     value.template destroy<double>();
                     break;
 
-                case 11:  // NUMBER_INT
+                case 12:  // NUMBER_INT
                     value.template destroy<int>();
                     break;
 
-                case 12:  // NUMBER_LONG
+                case 13:  // NUMBER_LONG
                     value.template destroy<long long>();
                     break;
 
-                case 10:  // STRING
+                case 11:  // STRING
                     value.template destroy<std::string>();
                     break;
 
@@ -719,17 +731,17 @@ public:
         symbol_type(int tok, location_type l) : super_type(token_type(tok), std::move(l)) {
             YY_ASSERT(tok == token::END_OF_FILE || tok == token::YYerror || tok == token::YYUNDEF ||
                       tok == token::START_OBJECT || tok == token::END_OBJECT ||
-                      tok == token::START_ORDERED_OBJECT || tok == token::END_ORDERED_OBJECT ||
                       tok == token::START_ARRAY || tok == token::END_ARRAY ||
-                      tok == token::STAGE_INHIBIT_OPTIMIZATION);
+                      tok == token::STAGE_INHIBIT_OPTIMIZATION || tok == token::STAGE_UNION_WITH ||
+                      tok == token::COLL_ARG || tok == token::PIPELINE_ARG);
         }
 #else
         symbol_type(int tok, const location_type& l) : super_type(token_type(tok), l) {
             YY_ASSERT(tok == token::END_OF_FILE || tok == token::YYerror || tok == token::YYUNDEF ||
                       tok == token::START_OBJECT || tok == token::END_OBJECT ||
-                      tok == token::START_ORDERED_OBJECT || tok == token::END_ORDERED_OBJECT ||
                       tok == token::START_ARRAY || tok == token::END_ARRAY ||
-                      tok == token::STAGE_INHIBIT_OPTIMIZATION);
+                      tok == token::STAGE_INHIBIT_OPTIMIZATION || tok == token::STAGE_UNION_WITH ||
+                      tok == token::COLL_ARG || tok == token::PIPELINE_ARG);
         }
 #endif
 #if 201103L <= YY_CPLUSPLUS
@@ -790,7 +802,7 @@ public:
     };
 
     /// Build a parser object.
-    PipelineParserGen(BSONLexer& driver_yyarg, CNode* cst_yyarg);
+    PipelineParserGen(BSONLexer& lexer_yyarg, CNode* cst_yyarg);
     virtual ~PipelineParserGen();
 
 #if 201103L <= YY_CPLUSPLUS
@@ -884,24 +896,6 @@ public:
     }
 #endif
 #if 201103L <= YY_CPLUSPLUS
-    static symbol_type make_START_ORDERED_OBJECT(location_type l) {
-        return symbol_type(token::START_ORDERED_OBJECT, std::move(l));
-    }
-#else
-    static symbol_type make_START_ORDERED_OBJECT(const location_type& l) {
-        return symbol_type(token::START_ORDERED_OBJECT, l);
-    }
-#endif
-#if 201103L <= YY_CPLUSPLUS
-    static symbol_type make_END_ORDERED_OBJECT(location_type l) {
-        return symbol_type(token::END_ORDERED_OBJECT, std::move(l));
-    }
-#else
-    static symbol_type make_END_ORDERED_OBJECT(const location_type& l) {
-        return symbol_type(token::END_ORDERED_OBJECT, l);
-    }
-#endif
-#if 201103L <= YY_CPLUSPLUS
     static symbol_type make_START_ARRAY(location_type l) {
         return symbol_type(token::START_ARRAY, std::move(l));
     }
@@ -929,6 +923,33 @@ public:
     }
 #endif
 #if 201103L <= YY_CPLUSPLUS
+    static symbol_type make_STAGE_UNION_WITH(location_type l) {
+        return symbol_type(token::STAGE_UNION_WITH, std::move(l));
+    }
+#else
+    static symbol_type make_STAGE_UNION_WITH(const location_type& l) {
+        return symbol_type(token::STAGE_UNION_WITH, l);
+    }
+#endif
+#if 201103L <= YY_CPLUSPLUS
+    static symbol_type make_COLL_ARG(location_type l) {
+        return symbol_type(token::COLL_ARG, std::move(l));
+    }
+#else
+    static symbol_type make_COLL_ARG(const location_type& l) {
+        return symbol_type(token::COLL_ARG, l);
+    }
+#endif
+#if 201103L <= YY_CPLUSPLUS
+    static symbol_type make_PIPELINE_ARG(location_type l) {
+        return symbol_type(token::PIPELINE_ARG, std::move(l));
+    }
+#else
+    static symbol_type make_PIPELINE_ARG(const location_type& l) {
+        return symbol_type(token::PIPELINE_ARG, l);
+    }
+#endif
+#if 201103L <= YY_CPLUSPLUS
     static symbol_type make_STRING(std::string v, location_type l) {
         return symbol_type(token::STRING, std::move(v), std::move(l));
     }
@@ -1249,14 +1270,14 @@ private:
 
     /// Constants.
     enum {
-        yylast_ = 8,  ///< Last index in yytable_.
-        yynnts_ = 4,  ///< Number of nonterminal symbols.
-        yyfinal_ = 5  ///< Termination state number.
+        yylast_ = 25,  ///< Last index in yytable_.
+        yynnts_ = 8,   ///< Number of nonterminal symbols.
+        yyfinal_ = 5   ///< Termination state number.
     };
 
 
     // User arguments.
-    BSONLexer& driver;
+    BSONLexer& lexer;
     CNode* cst;
 };
 
@@ -1269,28 +1290,30 @@ template <typename Base>
 PipelineParserGen::basic_symbol<Base>::basic_symbol(const basic_symbol& that)
     : Base(that), value(), location(that.location) {
     switch (this->kind()) {
-        case 16:  // stageList
-        case 17:  // stage
+        case 17:  // stageList
+        case 18:  // stage
+        case 19:  // inhibitOptimization
+        case 20:  // unionWith
             value.copy<CNode>(YY_MOVE(that.value));
             break;
 
-        case 14:  // BOOL
+        case 15:  // BOOL
             value.copy<bool>(YY_MOVE(that.value));
             break;
 
-        case 13:  // NUMBER_DOUBLE
+        case 14:  // NUMBER_DOUBLE
             value.copy<double>(YY_MOVE(that.value));
             break;
 
-        case 11:  // NUMBER_INT
+        case 12:  // NUMBER_INT
             value.copy<int>(YY_MOVE(that.value));
             break;
 
-        case 12:  // NUMBER_LONG
+        case 13:  // NUMBER_LONG
             value.copy<long long>(YY_MOVE(that.value));
             break;
 
-        case 10:  // STRING
+        case 11:  // STRING
             value.copy<std::string>(YY_MOVE(that.value));
             break;
 
@@ -1315,28 +1338,30 @@ template <typename Base>
 void PipelineParserGen::basic_symbol<Base>::move(basic_symbol& s) {
     super_type::move(s);
     switch (this->kind()) {
-        case 16:  // stageList
-        case 17:  // stage
+        case 17:  // stageList
+        case 18:  // stage
+        case 19:  // inhibitOptimization
+        case 20:  // unionWith
             value.move<CNode>(YY_MOVE(s.value));
             break;
 
-        case 14:  // BOOL
+        case 15:  // BOOL
             value.move<bool>(YY_MOVE(s.value));
             break;
 
-        case 13:  // NUMBER_DOUBLE
+        case 14:  // NUMBER_DOUBLE
             value.move<double>(YY_MOVE(s.value));
             break;
 
-        case 11:  // NUMBER_INT
+        case 12:  // NUMBER_INT
             value.move<int>(YY_MOVE(s.value));
             break;
 
-        case 12:  // NUMBER_LONG
+        case 13:  // NUMBER_LONG
             value.move<long long>(YY_MOVE(s.value));
             break;
 
-        case 10:  // STRING
+        case 11:  // STRING
             value.move<std::string>(YY_MOVE(s.value));
             break;
 
@@ -1380,7 +1405,7 @@ inline PipelineParserGen::symbol_kind_type PipelineParserGen::by_kind::type_get(
 
 #line 52 "pipeline_grammar.yy"
 }  // namespace mongo
-#line 1658 "pipeline_parser_gen.hpp"
+#line 1689 "pipeline_parser_gen.hpp"
 
 
 #endif  // !YY_YY_PIPELINE_PARSER_GEN_HPP_INCLUDED
author	Ted Tuckman <ted.tuckman@mongodb.com>	2020-07-13 08:08:15 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-07-16 12:23:47 +0000
commit	4bfdc5ddfc4ad5569cf995e734c4b2efe77f769a (patch)
tree	5efe8a3195d1e3d622ec73f24079bc2ceb649032
parent	1fadc68643210125bd855925f0d99ebbd9c48478 (diff)
download	mongo-4bfdc5ddfc4ad5569cf995e734c4b2efe77f769a.tar.gz