From f4b6a7cd809dba448f1c474f492556d0027e160d Mon Sep 17 00:00:00 2001 From: Matt Boros Date: Thu, 11 May 2023 22:10:35 +0000 Subject: SERVER-70405 Document Bonsai classes and functions --- src/mongo/db/query/optimizer/metadata.cpp | 2 +- src/mongo/db/query/optimizer/metadata.h | 29 +++++++++++++++++++--- src/mongo/db/query/optimizer/opt_phase_manager.h | 25 +++++++++++++------ .../lower_index_scan_node.txt | 8 +++--- .../sbe/a_b_t_plan_generation/lower_seek_node.txt | 2 +- .../optimize_pipeline_tests.txt | 24 +++++++++--------- .../abt/a_b_t_optimization_test/partial_index.txt | 4 +-- 7 files changed, 63 insertions(+), 31 deletions(-) diff --git a/src/mongo/db/query/optimizer/metadata.cpp b/src/mongo/db/query/optimizer/metadata.cpp index 4a4bea82d51..e8dabcaff36 100644 --- a/src/mongo/db/query/optimizer/metadata.cpp +++ b/src/mongo/db/query/optimizer/metadata.cpp @@ -109,7 +109,7 @@ IndexDefinition::IndexDefinition(IndexCollationSpec collationSpec, DistributionAndPaths distributionAndPaths, PartialSchemaRequirements partialReqMap) : IndexDefinition(std::move(collationSpec), - 2 /*version*/, + 1 /*version*/, 0 /*orderingBits*/, isMultiKey, std::move(distributionAndPaths), diff --git a/src/mongo/db/query/optimizer/metadata.h b/src/mongo/db/query/optimizer/metadata.h index c698ca83352..50f7e1d4b20 100644 --- a/src/mongo/db/query/optimizer/metadata.h +++ b/src/mongo/db/query/optimizer/metadata.h @@ -65,6 +65,14 @@ struct DistributionAndPaths { }; +/** + * Structure to represent index field component and its associated collation. The _path field + * contains the path to the field component, restricted to Get, Traverse, and Id elements. + * For example, if we have an index on {a.b, c} that contains arrays, the _path for the first entry + * would be Get "a" Traverse Get "b" Traverse Id, and the _path for the second entry would be + * Get "c" Traverse Id. + * Implicitly contains multikey info through Traverse element or lack of Traverse element. + */ struct IndexCollationEntry { IndexCollationEntry(ABT path, CollationOp op); @@ -74,6 +82,7 @@ struct IndexCollationEntry { CollationOp _op; }; +// Full collation specification, using a list of component entries. using IndexCollationSpec = std::vector; /** @@ -115,7 +124,9 @@ struct MultikeynessTrie { }; /** - * Defines an available system index. + * Metadata associated with an index. Holds the index specification (index fields and their + * collations), its version (0 or 1), the collations as a bit mask, multikeyness info, and + * distribution info. This is a convenient structure for the query planning process. */ class IndexDefinition { public: @@ -161,8 +172,12 @@ private: using IndexDefinitions = opt::unordered_map; using ScanDefOptions = opt::unordered_map; -// Used to specify parameters to scan node, such as collection name, or file where collection is -// read from. +/** + * Parameters to a scan node, including distribution information, associated index definitions, + * and multikeyness information. Also includes any ScanDefOptions we might have, such as which + * database the collection is associated with, the origin of the collection (mongod or a BSON file), + * or the UUID of the collection. + */ class ScanDefinition { public: ScanDefinition(); @@ -207,6 +222,14 @@ private: boost::optional _ce; }; +/** + * Represents the optimizer’s view of the state of the rest of the system in terms of relevant + * resources. Currently we store the set of available collections in the system. In the future, + * when we support distributed planning, this is where we will put information related to the + * physical organization and topology of the machines. + * For each collection, we hold distribution information (fields it may be sharded on), multikeyness + * info, and data related to associated indexes in addition to other relevant metadata. + */ struct Metadata { Metadata(opt::unordered_map scanDefs); Metadata(opt::unordered_map scanDefs, size_t numberOfPartitions); diff --git a/src/mongo/db/query/optimizer/opt_phase_manager.h b/src/mongo/db/query/optimizer/opt_phase_manager.h index 5f22ebcbf18..593d779aa7a 100644 --- a/src/mongo/db/query/optimizer/opt_phase_manager.h +++ b/src/mongo/db/query/optimizer/opt_phase_manager.h @@ -42,16 +42,15 @@ namespace mongo::optimizer { using namespace cascades; -/** - * This class wraps together different optimization phases. - * First the transport rewrites are applied such as constant folding and redundant expression - * elimination. Second the logical and physical reordering rewrites are applied using the memo. - * Third the final transport rewritesd are applied. - */ - #define OPT_PHASE(F) \ /* ConstEval performs the following rewrites: constant folding, inlining, and dead code \ - * elimination. */ \ + * elimination. \ + * PathFusion implements path laws, for example shortcutting field assignment and reads, and \ + * other path optimizations. \ + * We switch between applying ConstEval and PathFusion for as long as they change the query, \ + * as they can enable new rewrites in each other. These are both done in-place rather than \ + * creating plan alternatives \ + */ \ F(ConstEvalPre) \ F(PathFuse) \ \ @@ -65,13 +64,23 @@ using namespace cascades; /* Implementation and enforcement rules. */ \ F(MemoImplementationPhase) \ \ + /* Lowers paths to expressions. Not to be confused with SBENodeLowering, which lowers ABT \ + * nodes and expressions to an SBE plan. */ \ F(PathLower) \ + /* Final round of constant folding, identical to the first ConstEval stage. */ \ F(ConstEvalPost) MAKE_PRINTABLE_ENUM(OptPhase, OPT_PHASE); MAKE_PRINTABLE_ENUM_STRING_ARRAY(OptPhaseEnum, OptPhase, OPT_PHASE); #undef OPT_PHASE +/** + * This class drives the optimization process, wrapping together different optimization phases. + * First the transport rewrites are applied such as constant folding and redundant expression + * elimination. Second the logical and physical reordering rewrites are applied using the memo. + * Third the final transport rewrites are applied. + * Phases may be skipped by specifying a subset of the phases to run in the phaseSet argument. + */ class OptPhaseManager { public: using PhaseSet = opt::unordered_set; diff --git a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt index b5fa136f28e..2786522ff59 100644 --- a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt +++ b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt @@ -5,25 +5,25 @@ IndexScan [{'': rid}, scanDefName: collName, indexDefName: index0, interval: {(Const [23], Const [35]]}] -- OUTPUT: -[0] ixseek ks(2ll, 0, 23L, 2ll) ks(2ll, 0, 35L, 2ll) none s1 none none [] @"" @"" true +[0] ixseek ks(1ll, 0, 23L, 2ll) ks(1ll, 0, 35L, 2ll) none s1 none none [] @"" @"" true ==== VARIATION: Covering forward index scan with one field ==== -- INPUT: IndexScan [{' 0': proj0}, scanDefName: collName, indexDefName: index0, interval: {[Const [26], Const [35])}] -- OUTPUT: -[0] ixseek ks(2ll, 0, 26L, 1ll) ks(2ll, 0, 35L, 1ll) none none none none [s1 = 0] @"" @"" true +[0] ixseek ks(1ll, 0, 26L, 1ll) ks(1ll, 0, 35L, 1ll) none none none none [s1 = 0] @"" @"" true ==== VARIATION: Basic reverse index scan with RID ==== -- INPUT: IndexScan [{'': rid}, scanDefName: collName, indexDefName: index0, interval: {[Const [27], Const [135])}, reversed] -- OUTPUT: -[0] ixseek ks(2ll, 0, 135L, 1ll) ks(2ll, 0, 27L, 1ll) none s1 none none [] @"" @"" false +[0] ixseek ks(1ll, 0, 135L, 1ll) ks(1ll, 0, 27L, 1ll) none s1 none none [] @"" @"" false ==== VARIATION: Covering reverse index scan with one field ==== -- INPUT: IndexScan [{' 0': proj0}, scanDefName: collName, indexDefName: index0, interval: {[Const [29], Const [47]]}, reversed] -- OUTPUT: -[0] ixseek ks(2ll, 0, 47L, 2ll) ks(2ll, 0, 29L, 1ll) none none none none [s1 = 0] @"" @"" false +[0] ixseek ks(1ll, 0, 47L, 2ll) ks(1ll, 0, 29L, 1ll) none none none none [s1 = 0] @"" @"" false diff --git a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt index 66c58c879cb..0292de4fdba 100644 --- a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt +++ b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt @@ -11,7 +11,7 @@ IndexScan [{'': rid}, scanDefName: collName, indexDefName: index0, interval -- OUTPUT: [3] nlj inner [] [s1] {true} left - [0] ixseek ks(2ll, 0, 23L, 2ll) ks(2ll, 0, 35L, 2ll) none s1 none none [] @"" @"" true + [0] ixseek ks(1ll, 0, 23L, 2ll) ks(1ll, 0, 35L, 2ll) none s1 none none [] @"" @"" true right [2] limitskip 1 0 [1] seek s1 s2 none none none none none none none [] @"" true false diff --git a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt index aeac2c94272..303ac77e9d7 100644 --- a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt +++ b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt @@ -18,7 +18,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -65,7 +65,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -336,7 +336,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -379,7 +379,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -426,7 +426,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -478,7 +478,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -535,7 +535,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -600,7 +600,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 0 distribution and paths: @@ -656,7 +656,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -701,7 +701,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -766,7 +766,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -810,7 +810,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: diff --git a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt index 5f3e634225e..e7f944ff1f3 100644 --- a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt +++ b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt @@ -18,7 +18,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: @@ -77,7 +77,7 @@ metadata: PathIdentity [] collation op: Ascending - version: 2 + version: 1 ordering bits: 0 is multi-key: 1 distribution and paths: -- cgit v1.2.1