diff options
author | Matt Boros <matt.boros@mongodb.com> | 2023-05-11 22:10:35 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-05-11 23:52:56 +0000 |
commit | f4b6a7cd809dba448f1c474f492556d0027e160d (patch) | |
tree | d2183a88aaaeb141f2244d0bd7cfde11ce707357 | |
parent | 3e097ab7b72c36b2acad55a1f93de710ffec4da7 (diff) | |
download | mongo-f4b6a7cd809dba448f1c474f492556d0027e160d.tar.gz |
SERVER-70405 Document Bonsai classes and functions
7 files changed, 63 insertions, 31 deletions
diff --git a/src/mongo/db/query/optimizer/metadata.cpp b/src/mongo/db/query/optimizer/metadata.cpp index 4a4bea82d51..e8dabcaff36 100644 --- a/src/mongo/db/query/optimizer/metadata.cpp +++ b/src/mongo/db/query/optimizer/metadata.cpp @@ -109,7 +109,7 @@ IndexDefinition::IndexDefinition(IndexCollationSpec collationSpec, DistributionAndPaths distributionAndPaths, PartialSchemaRequirements partialReqMap) : IndexDefinition(std::move(collationSpec), - 2 /*version*/, + 1 /*version*/, 0 /*orderingBits*/, isMultiKey, std::move(distributionAndPaths), diff --git a/src/mongo/db/query/optimizer/metadata.h b/src/mongo/db/query/optimizer/metadata.h index c698ca83352..50f7e1d4b20 100644 --- a/src/mongo/db/query/optimizer/metadata.h +++ b/src/mongo/db/query/optimizer/metadata.h @@ -65,6 +65,14 @@ struct DistributionAndPaths { }; +/** + * Structure to represent index field component and its associated collation. The _path field + * contains the path to the field component, restricted to Get, Traverse, and Id elements. + * For example, if we have an index on {a.b, c} that contains arrays, the _path for the first entry + * would be Get "a" Traverse Get "b" Traverse Id, and the _path for the second entry would be + * Get "c" Traverse Id. + * Implicitly contains multikey info through Traverse element or lack of Traverse element. + */ struct IndexCollationEntry { IndexCollationEntry(ABT path, CollationOp op); @@ -74,6 +82,7 @@ struct IndexCollationEntry { CollationOp _op; }; +// Full collation specification, using a list of component entries. using IndexCollationSpec = std::vector<IndexCollationEntry>; /** @@ -115,7 +124,9 @@ struct MultikeynessTrie { }; /** - * Defines an available system index. + * Metadata associated with an index. Holds the index specification (index fields and their + * collations), its version (0 or 1), the collations as a bit mask, multikeyness info, and + * distribution info. This is a convenient structure for the query planning process. */ class IndexDefinition { public: @@ -161,8 +172,12 @@ private: using IndexDefinitions = opt::unordered_map<std::string, IndexDefinition>; using ScanDefOptions = opt::unordered_map<std::string, std::string>; -// Used to specify parameters to scan node, such as collection name, or file where collection is -// read from. +/** + * Parameters to a scan node, including distribution information, associated index definitions, + * and multikeyness information. Also includes any ScanDefOptions we might have, such as which + * database the collection is associated with, the origin of the collection (mongod or a BSON file), + * or the UUID of the collection. + */ class ScanDefinition { public: ScanDefinition(); @@ -207,6 +222,14 @@ private: boost::optional<CEType> _ce; }; +/** + * Represents the optimizer’s view of the state of the rest of the system in terms of relevant + * resources. Currently we store the set of available collections in the system. In the future, + * when we support distributed planning, this is where we will put information related to the + * physical organization and topology of the machines. + * For each collection, we hold distribution information (fields it may be sharded on), multikeyness + * info, and data related to associated indexes in addition to other relevant metadata. + */ struct Metadata { Metadata(opt::unordered_map<std::string, ScanDefinition> scanDefs); Metadata(opt::unordered_map<std::string, ScanDefinition> scanDefs, size_t numberOfPartitions); diff --git a/src/mongo/db/query/optimizer/opt_phase_manager.h b/src/mongo/db/query/optimizer/opt_phase_manager.h index 5f22ebcbf18..593d779aa7a 100644 --- a/src/mongo/db/query/optimizer/opt_phase_manager.h +++ b/src/mongo/db/query/optimizer/opt_phase_manager.h @@ -42,16 +42,15 @@ namespace mongo::optimizer { using namespace cascades; -/** - * This class wraps together different optimization phases. - * First the transport rewrites are applied such as constant folding and redundant expression - * elimination. Second the logical and physical reordering rewrites are applied using the memo. - * Third the final transport rewritesd are applied. - */ - #define OPT_PHASE(F) \ /* ConstEval performs the following rewrites: constant folding, inlining, and dead code \ - * elimination. */ \ + * elimination. \ + * PathFusion implements path laws, for example shortcutting field assignment and reads, and \ + * other path optimizations. \ + * We switch between applying ConstEval and PathFusion for as long as they change the query, \ + * as they can enable new rewrites in each other. These are both done in-place rather than \ + * creating plan alternatives \ + */ \ F(ConstEvalPre) \ F(PathFuse) \ \ @@ -65,13 +64,23 @@ using namespace cascades; /* Implementation and enforcement rules. */ \ F(MemoImplementationPhase) \ \ + /* Lowers paths to expressions. Not to be confused with SBENodeLowering, which lowers ABT \ + * nodes and expressions to an SBE plan. */ \ F(PathLower) \ + /* Final round of constant folding, identical to the first ConstEval stage. */ \ F(ConstEvalPost) MAKE_PRINTABLE_ENUM(OptPhase, OPT_PHASE); MAKE_PRINTABLE_ENUM_STRING_ARRAY(OptPhaseEnum, OptPhase, OPT_PHASE); #undef OPT_PHASE +/** + * This class drives the optimization process, wrapping together different optimization phases. + * First the transport rewrites are applied such as constant folding and redundant expression + * elimination. Second the logical and physical reordering rewrites are applied using the memo. + * Third the final transport rewrites are applied. + * Phases may be skipped by specifying a subset of the phases to run in the phaseSet argument. + */ class OptPhaseManager { public: using PhaseSet = opt::unordered_set<OptPhase>; diff --git a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt index b5fa136f28e..2786522ff59 100644 --- a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt +++ b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt @@ -5,25 +5,25 @@ IndexScan [{'<rid>': rid}, scanDefName: collName, indexDefName: index0, interval: {(Const [23], Const [35]]}] -- OUTPUT: -[0] ixseek ks(2ll, 0, 23L, 2ll) ks(2ll, 0, 35L, 2ll) none s1 none none [] @"<collUUID>" @"<collUUID>" true +[0] ixseek ks(1ll, 0, 23L, 2ll) ks(1ll, 0, 35L, 2ll) none s1 none none [] @"<collUUID>" @"<collUUID>" true ==== VARIATION: Covering forward index scan with one field ==== -- INPUT: IndexScan [{'<indexKey> 0': proj0}, scanDefName: collName, indexDefName: index0, interval: {[Const [26], Const [35])}] -- OUTPUT: -[0] ixseek ks(2ll, 0, 26L, 1ll) ks(2ll, 0, 35L, 1ll) none none none none [s1 = 0] @"<collUUID>" @"<collUUID>" true +[0] ixseek ks(1ll, 0, 26L, 1ll) ks(1ll, 0, 35L, 1ll) none none none none [s1 = 0] @"<collUUID>" @"<collUUID>" true ==== VARIATION: Basic reverse index scan with RID ==== -- INPUT: IndexScan [{'<rid>': rid}, scanDefName: collName, indexDefName: index0, interval: {[Const [27], Const [135])}, reversed] -- OUTPUT: -[0] ixseek ks(2ll, 0, 135L, 1ll) ks(2ll, 0, 27L, 1ll) none s1 none none [] @"<collUUID>" @"<collUUID>" false +[0] ixseek ks(1ll, 0, 135L, 1ll) ks(1ll, 0, 27L, 1ll) none s1 none none [] @"<collUUID>" @"<collUUID>" false ==== VARIATION: Covering reverse index scan with one field ==== -- INPUT: IndexScan [{'<indexKey> 0': proj0}, scanDefName: collName, indexDefName: index0, interval: {[Const [29], Const [47]]}, reversed] -- OUTPUT: -[0] ixseek ks(2ll, 0, 47L, 2ll) ks(2ll, 0, 29L, 1ll) none none none none [s1 = 0] @"<collUUID>" @"<collUUID>" false +[0] ixseek ks(1ll, 0, 47L, 2ll) ks(1ll, 0, 29L, 1ll) none none none none [s1 = 0] @"<collUUID>" @"<collUUID>" false diff --git a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt index 66c58c879cb..0292de4fdba 100644 --- a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt +++ b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt @@ -11,7 +11,7 @@ IndexScan [{'<rid>': rid}, scanDefName: collName, indexDefName: index0, interval -- OUTPUT: [3] nlj inner [] [s1] {true} left - [0] ixseek ks(2ll, 0, 23L, 2ll) ks(2ll, 0, 35L, 2ll) none s1 none none [] @"<collUUID>" @"<collUUID>" true + [0] ixseek ks(1ll, 0, 23L, 2ll) ks(1ll, 0, 35L, 2ll) none s1 none none [] @"<collUUID>" @"<collUUID>" true right [2] limitskip 1 0 [1] seek s1 s2 none none none none none none none [] @"<collUUID>" true false diff --git a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt index aeac2c94272..303ac77e9d7 100644 --- a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt +++ b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt @@ -18,7 +18,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -65,7 +65,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -336,7 +336,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -379,7 +379,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -426,7 +426,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -478,7 +478,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -535,7 +535,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -600,7 +600,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -656,7 +656,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -701,7 +701,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -766,7 +766,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -810,7 +810,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: diff --git a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt index 5f3e634225e..e7f944ff1f3 100644 --- a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt +++ b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt @@ -18,7 +18,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -77,7 +77,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: |