summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline/change_stream_document_diff_parser.cpp
blob: c847f32872ea9f24df79448e7732cd4f0c158ae5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
/**
 *    Copyright (C) 2020-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#include "mongo/db/pipeline/change_stream_document_diff_parser.h"

#include "mongo/db/field_ref.h"

namespace mongo {
using doc_diff::ArrayDiffReader;
using doc_diff::Diff;
using doc_diff::DocumentDiffReader;

namespace {
using DeltaUpdateDescription = change_stream_document_diff_parser::DeltaUpdateDescription;
using FieldNameOrArrayIndex = stdx::variant<StringData, size_t>;

/**
 * DeltaUpdateDescriptionBuilder is responsible both for tracking the current path as we traverse
 * the diff, and for populating a DeltaUpdateDescription reflecting the contents of that diff.
 */
struct DeltaUpdateDescriptionBuilder {
    // Adds the specified entry to the 'updateFields' document in the DeltaUpdateDescription.
    void addToUpdatedFields(FieldNameOrArrayIndex terminalField, Value updatedValue) {
        DeltaUpdateDescriptionBuilder::TempAppendToPath tmpAppend(*this, terminalField);
        _updatedFields.addField(_fieldRef.dottedField(), updatedValue);
        _addToDisambiguatedPathsIfRequired();
    }

    // Adds the specified entry to the 'removedFields' vector in the DeltaUpdateDescription.
    void addToRemovedFields(StringData terminalFieldName) {
        DeltaUpdateDescriptionBuilder::TempAppendToPath tmpAppend(*this, terminalFieldName);
        _updateDesc.removedFields.push_back(Value(_fieldRef.dottedField()));
        _addToDisambiguatedPathsIfRequired();
    }

    // Adds the current path to the 'truncatedArrays' vector in the DeltaUpdateDescription.
    void addToTruncatedArrays(int newSize) {
        _updateDesc.truncatedArrays.push_back(
            Value(Document{{"field", _fieldRef.dottedField()}, {"newSize", newSize}}));
        _addToDisambiguatedPathsIfRequired();
    }

    // Called once the diff traversal is complete. Freezes and returns the DeltaUpdateDescription.
    // It is an error to use the DeltaUpdateDescriptionBuilder again after this method is called.
    DeltaUpdateDescription&& freezeDeltaUpdateDescription() {
        _updateDesc.updatedFields = _updatedFields.freeze();
        _updateDesc.disambiguatedPaths = _disambiguatedPaths.freeze();
        return std::move(_updateDesc);
    }

    // Returns the last field in the current path.
    StringData lastPart() const {
        return _fieldRef.getPart(_fieldRef.numParts() - 1);
    }

    // Returns the number of fields in the current path.
    FieldIndex numParts() const {
        return _fieldRef.numParts();
    }

    // A structure used to add a scope-guarded field to the current path maintained by the builder.
    // When this object goes out of scope, it will automatically remove the field from the path.
    struct TempAppendToPath {
        TempAppendToPath(DeltaUpdateDescriptionBuilder& builder, FieldNameOrArrayIndex field)
            : _builder(builder) {
            // Append the specified field to the builder's path.
            _builder._appendFieldToPath(std::move(field));
        }

        ~TempAppendToPath() {
            // Remove the last field from the path when we go out of scope.
            _builder._removeLastFieldfromPath();
        }

    private:
        DeltaUpdateDescriptionBuilder& _builder;
    };

private:
    // A structure for tracking path ambiguity information. Maps 1:1 to fields in the FieldRef via
    // the _pathAmbiguity list. The 'pathIsAmbiguous' bool indicates whether the path as a whole is
    // ambiguous as of the corresponding field. Once a path is marked as ambiguous, all subsequent
    // entries must also be marked as ambiguous.
    struct AmbiguityInfo {
        bool pathIsAmbiguous = false;
        BSONType fieldType = BSONType::String;
    };

    // Append the given field to the path, and update the path ambiguity information accordingly.
    void _appendFieldToPath(FieldNameOrArrayIndex field) {
        // Resolve the FieldNameOrArrayIndex to one or the other, and append it to the path.
        const bool isArrayIndex = stdx::holds_alternative<size_t>(field);
        _fieldRef.appendPart(isArrayIndex ? std::to_string(stdx::get<size_t>(field))
                                          : stdx::get<StringData>(field));

        // Once a path has become ambiguous, it will remain so as new fields are added. If the final
        // path component is marked ambiguous, retain that value and add the type of the new field.
        const auto fieldType = (isArrayIndex ? BSONType::NumberInt : BSONType::String);
        if (!_pathAmbiguity.empty() && _pathAmbiguity.back().pathIsAmbiguous) {
            _pathAmbiguity.push_back({true /* pathIsAmbiguous */, fieldType});
            return;
        }
        // If the field is a numeric string or contains an embedded dot, it's ambiguous. We record
        // array indices so that we can reconstruct the path, but the presence of an array index is
        // not itself sufficient to make the path ambiguous. We don't include numeric fields at the
        // start of the path because those are unambiguous.
        const bool isNumeric = (!isArrayIndex && _fieldRef.numParts() > 1 &&
                                FieldRef::isNumericPathComponentStrict(lastPart()));
        const bool isDotted =
            (!isArrayIndex && !isNumeric && lastPart().find('.') != std::string::npos);

        // Add to the field list, marking the path as ambiguous if this field is dotted or numeric.
        _pathAmbiguity.push_back({(isNumeric || isDotted), fieldType});
    }

    // Remove the last field from the path, along with its entry in the ambiguity list.
    void _removeLastFieldfromPath() {
        _fieldRef.removeLastPart();
        _pathAmbiguity.pop_back();
    }

    // If this path is marked as ambiguous, add a new entry for it to 'disambiguatedPaths'.
    void _addToDisambiguatedPathsIfRequired() {
        // The final entry in _pathAmbiguity will always be marked as ambiguous if any field in the
        // path is ambiguous. If so, iterate over the list and create a vector of individual fields.
        if (!_pathAmbiguity.empty() && _pathAmbiguity.back().pathIsAmbiguous) {
            std::vector<Value> disambiguatedPath;
            FieldIndex fieldNum = 0;
            for (const auto& fieldInfo : _pathAmbiguity) {
                auto fieldVal = _fieldRef.getPart(fieldNum++);
                disambiguatedPath.push_back(fieldInfo.fieldType == BSONType::NumberInt
                                                ? Value(std::stoi(fieldVal.toString()))
                                                : Value(fieldVal));
            }
            // Add the vector of individual fields into the 'disambiguatedPaths' document. The name
            // of the field matches the entry in updatedFields, removedFields, or truncatedArrays.
            _disambiguatedPaths.addField(_fieldRef.dottedField(),
                                         Value(std::move(disambiguatedPath)));
        }
    }

    friend struct DeltaUpdateDescriptionBuilder::TempAppendToPath;

    // Each element in the _pathAmbiguity list annotates the field at the corresponding index in the
    // _fieldRef, indicating the type of that field and whether the path is ambiguous at that point.
    std::list<AmbiguityInfo> _pathAmbiguity;
    FieldRef _fieldRef;

    DeltaUpdateDescription _updateDesc;
    MutableDocument _updatedFields;
    MutableDocument _disambiguatedPaths;
};

void buildUpdateDescriptionWithDeltaOplog(
    stdx::variant<DocumentDiffReader*, ArrayDiffReader*> reader,
    DeltaUpdateDescriptionBuilder* builder,
    boost::optional<FieldNameOrArrayIndex> currentSubField) {

    // Append the field name associated with the current level of the diff to the path.
    boost::optional<DeltaUpdateDescriptionBuilder::TempAppendToPath> tempAppend;
    if (currentSubField) {
        tempAppend.emplace(*builder, std::move(*currentSubField));
    }

    stdx::visit(
        visit_helper::Overloaded{
            [&](DocumentDiffReader* reader) {
                boost::optional<BSONElement> nextMod;
                while ((nextMod = reader->nextUpdate()) || (nextMod = reader->nextInsert())) {
                    builder->addToUpdatedFields(nextMod->fieldNameStringData(), Value(*nextMod));
                }

                while (auto nextDelete = reader->nextDelete()) {
                    builder->addToRemovedFields(*nextDelete);
                }

                while (auto nextSubDiff = reader->nextSubDiff()) {
                    stdx::variant<DocumentDiffReader*, ArrayDiffReader*> nextReader;
                    stdx::visit(visit_helper::Overloaded{[&nextReader](auto& reader) {
                                    nextReader = &reader;
                                }},
                                nextSubDiff->second);
                    buildUpdateDescriptionWithDeltaOplog(
                        nextReader, builder, {{nextSubDiff->first}});
                }
            },

            [&](ArrayDiffReader* reader) {
                // Cannot be the root of the diff object, so 'fieldRef' should not be empty.
                tassert(6697700, "Invalid diff or parsing error", builder->numParts() > 0);

                // We don't need to add a fieldname, since we already descended into the array diff.
                if (auto newSize = reader->newSize()) {
                    builder->addToTruncatedArrays(*newSize);
                }

                for (auto nextMod = reader->next(); nextMod; nextMod = reader->next()) {
                    stdx::visit(
                        visit_helper::Overloaded{
                            [&](BSONElement elem) {
                                builder->addToUpdatedFields(nextMod->first, Value(elem));
                            },

                            [&](auto& nextReader) {
                                buildUpdateDescriptionWithDeltaOplog(
                                    &nextReader, builder, {{nextMod->first}});
                            },
                        },
                        nextMod->second);
                }
            },
        },
        reader);
    return;
}

}  // namespace

namespace change_stream_document_diff_parser {

DeltaUpdateDescription parseDiff(const Diff& diff) {
    DeltaUpdateDescriptionBuilder builder;
    DocumentDiffReader docReader(diff);

    buildUpdateDescriptionWithDeltaOplog(&docReader, &builder, boost::none);

    return builder.freezeDeltaUpdateDescription();
}

}  // namespace change_stream_document_diff_parser
}  // namespace mongo