From 851e346893f82a134e0bf8f3a4f393798340cd53 Mon Sep 17 00:00:00 2001 From: Mathias Stearn Date: Tue, 12 Apr 2022 16:10:28 +0000 Subject: SERVER-65192 Implement ColumnShredder to extract paths and cells for a document --- src/mongo/db/index/column_key_generator.h | 111 ++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 src/mongo/db/index/column_key_generator.h (limited to 'src/mongo/db/index/column_key_generator.h') diff --git a/src/mongo/db/index/column_key_generator.h b/src/mongo/db/index/column_key_generator.h new file mode 100644 index 00000000000..d88e642ff0d --- /dev/null +++ b/src/mongo/db/index/column_key_generator.h @@ -0,0 +1,111 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ +#include + +#include "mongo/db/storage/column_store.h" +#include "mongo/util/functional.h" + +namespace mongo::column_keygen { +/** + * This is a representation of the cell prior to flattening it out into a buffer which is passed to + * visitor callbacks. + * + * All memory within the UnencodedCellView should only be assumed valid within the callback. If you + * need it longer, you must copy it yourself. Non-test callers will generally immediately encode + * this to a flat buffer, so this shouldn't be a problem. + */ +struct UnencodedCellView { + const std::vector& vals; + StringData arrayInfo; + + // If true, this path has multiple values in a single (possibly nested) object with the same + // field name. In this case, arrayInfo will be empty and this cell must not be used to + // reconstruct an object. We should probably not attempt to encode vals in the index either, and + // just put a marker that causes us to either skip the row (because it breaks the rules) or go + // to the row store. + // + // Note that this detection is best-effort and will only detect cases that would result in + // corrupt array info. We have decided that query results do not need to be precise for objects + // with duplicate fields, so it is OK if we don't detect every case, as long as we don't crash + // or cause corruption on the undetected cases. + bool hasDuplicateFields; + + // If true, this cell omits values that are stored in subpaths. + bool hasSubPaths; + + // If true, when reconstructing an object, you will need to visit the parent path in order to + // match current semantics for projections and field-path expressions. + bool isSparse; + + // If true, at least one of the values in vals is inside of a directly-double-nested array, or + // the field name was encountered while already inside of a directly-double-nested array, so + // arrayInfo must be consulted to know which values to skip when matching. If false, simple + // matches can ignore the array info and just match against each value independently. + bool hasDoubleNestedArrays; + + // These are only used in tests and for debugging. + friend bool operator==(const UnencodedCellView&, const UnencodedCellView&); + friend std::ostream& operator<<(std::ostream&, const UnencodedCellView&); + friend std::ostream& operator<<(std::ostream&, const UnencodedCellView*); +}; + +/** + * Visits all paths within obj and provides their cell values. + * Path visit order is unspecified. + */ +void visitCellsForInsert(const BSONObj& obj, + function_ref cb); + +/** + * Visits all paths within obj. When deleting, you do not need to know about values. + * Path visit order is unspecified. + */ +void visitPathsForDelete(const BSONObj& obj, function_ref cb); + +/** + * See visitDiffForUpdate(). + */ +enum DiffAction { kInsert, kUpdate, kDelete }; + +/** + * Computes differences between oldObj and newObj, and invokes cb() with the required actions to + * take to update the columnar index. + * + * For kInsert and kUpdate, the UnencodedCellView will point to the cell data for newObj (you + * don't need to know the value for oldObj). + * + * For kDelete, the UnencodedCellView pointer will be null. + * + * Path visit order is unspecified. + */ +void visitDiffForUpdate(const BSONObj& oldObj, + const BSONObj& newObj, + function_ref cb); + +} // namespace mongo::column_keygen -- cgit v1.2.1