summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Boros <matt.boros@mongodb.com>2023-05-11 22:10:35 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-05-11 23:52:56 +0000
commitf4b6a7cd809dba448f1c474f492556d0027e160d (patch)
treed2183a88aaaeb141f2244d0bd7cfde11ce707357
parent3e097ab7b72c36b2acad55a1f93de710ffec4da7 (diff)
downloadmongo-f4b6a7cd809dba448f1c474f492556d0027e160d.tar.gz
SERVER-70405 Document Bonsai classes and functions
-rw-r--r--src/mongo/db/query/optimizer/metadata.cpp2
-rw-r--r--src/mongo/db/query/optimizer/metadata.h29
-rw-r--r--src/mongo/db/query/optimizer/opt_phase_manager.h25
-rw-r--r--src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt8
-rw-r--r--src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt2
-rw-r--r--src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt24
-rw-r--r--src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt4
7 files changed, 63 insertions, 31 deletions
diff --git a/src/mongo/db/query/optimizer/metadata.cpp b/src/mongo/db/query/optimizer/metadata.cpp
index 4a4bea82d51..e8dabcaff36 100644
--- a/src/mongo/db/query/optimizer/metadata.cpp
+++ b/src/mongo/db/query/optimizer/metadata.cpp
@@ -109,7 +109,7 @@ IndexDefinition::IndexDefinition(IndexCollationSpec collationSpec,
DistributionAndPaths distributionAndPaths,
PartialSchemaRequirements partialReqMap)
: IndexDefinition(std::move(collationSpec),
- 2 /*version*/,
+ 1 /*version*/,
0 /*orderingBits*/,
isMultiKey,
std::move(distributionAndPaths),
diff --git a/src/mongo/db/query/optimizer/metadata.h b/src/mongo/db/query/optimizer/metadata.h
index c698ca83352..50f7e1d4b20 100644
--- a/src/mongo/db/query/optimizer/metadata.h
+++ b/src/mongo/db/query/optimizer/metadata.h
@@ -65,6 +65,14 @@ struct DistributionAndPaths {
};
+/**
+ * Structure to represent index field component and its associated collation. The _path field
+ * contains the path to the field component, restricted to Get, Traverse, and Id elements.
+ * For example, if we have an index on {a.b, c} that contains arrays, the _path for the first entry
+ * would be Get "a" Traverse Get "b" Traverse Id, and the _path for the second entry would be
+ * Get "c" Traverse Id.
+ * Implicitly contains multikey info through Traverse element or lack of Traverse element.
+ */
struct IndexCollationEntry {
IndexCollationEntry(ABT path, CollationOp op);
@@ -74,6 +82,7 @@ struct IndexCollationEntry {
CollationOp _op;
};
+// Full collation specification, using a list of component entries.
using IndexCollationSpec = std::vector<IndexCollationEntry>;
/**
@@ -115,7 +124,9 @@ struct MultikeynessTrie {
};
/**
- * Defines an available system index.
+ * Metadata associated with an index. Holds the index specification (index fields and their
+ * collations), its version (0 or 1), the collations as a bit mask, multikeyness info, and
+ * distribution info. This is a convenient structure for the query planning process.
*/
class IndexDefinition {
public:
@@ -161,8 +172,12 @@ private:
using IndexDefinitions = opt::unordered_map<std::string, IndexDefinition>;
using ScanDefOptions = opt::unordered_map<std::string, std::string>;
-// Used to specify parameters to scan node, such as collection name, or file where collection is
-// read from.
+/**
+ * Parameters to a scan node, including distribution information, associated index definitions,
+ * and multikeyness information. Also includes any ScanDefOptions we might have, such as which
+ * database the collection is associated with, the origin of the collection (mongod or a BSON file),
+ * or the UUID of the collection.
+ */
class ScanDefinition {
public:
ScanDefinition();
@@ -207,6 +222,14 @@ private:
boost::optional<CEType> _ce;
};
+/**
+ * Represents the optimizer’s view of the state of the rest of the system in terms of relevant
+ * resources. Currently we store the set of available collections in the system. In the future,
+ * when we support distributed planning, this is where we will put information related to the
+ * physical organization and topology of the machines.
+ * For each collection, we hold distribution information (fields it may be sharded on), multikeyness
+ * info, and data related to associated indexes in addition to other relevant metadata.
+ */
struct Metadata {
Metadata(opt::unordered_map<std::string, ScanDefinition> scanDefs);
Metadata(opt::unordered_map<std::string, ScanDefinition> scanDefs, size_t numberOfPartitions);
diff --git a/src/mongo/db/query/optimizer/opt_phase_manager.h b/src/mongo/db/query/optimizer/opt_phase_manager.h
index 5f22ebcbf18..593d779aa7a 100644
--- a/src/mongo/db/query/optimizer/opt_phase_manager.h
+++ b/src/mongo/db/query/optimizer/opt_phase_manager.h
@@ -42,16 +42,15 @@ namespace mongo::optimizer {
using namespace cascades;
-/**
- * This class wraps together different optimization phases.
- * First the transport rewrites are applied such as constant folding and redundant expression
- * elimination. Second the logical and physical reordering rewrites are applied using the memo.
- * Third the final transport rewritesd are applied.
- */
-
#define OPT_PHASE(F) \
/* ConstEval performs the following rewrites: constant folding, inlining, and dead code \
- * elimination. */ \
+ * elimination. \
+ * PathFusion implements path laws, for example shortcutting field assignment and reads, and \
+ * other path optimizations. \
+ * We switch between applying ConstEval and PathFusion for as long as they change the query, \
+ * as they can enable new rewrites in each other. These are both done in-place rather than \
+ * creating plan alternatives \
+ */ \
F(ConstEvalPre) \
F(PathFuse) \
\
@@ -65,13 +64,23 @@ using namespace cascades;
/* Implementation and enforcement rules. */ \
F(MemoImplementationPhase) \
\
+ /* Lowers paths to expressions. Not to be confused with SBENodeLowering, which lowers ABT \
+ * nodes and expressions to an SBE plan. */ \
F(PathLower) \
+ /* Final round of constant folding, identical to the first ConstEval stage. */ \
F(ConstEvalPost)
MAKE_PRINTABLE_ENUM(OptPhase, OPT_PHASE);
MAKE_PRINTABLE_ENUM_STRING_ARRAY(OptPhaseEnum, OptPhase, OPT_PHASE);
#undef OPT_PHASE
+/**
+ * This class drives the optimization process, wrapping together different optimization phases.
+ * First the transport rewrites are applied such as constant folding and redundant expression
+ * elimination. Second the logical and physical reordering rewrites are applied using the memo.
+ * Third the final transport rewrites are applied.
+ * Phases may be skipped by specifying a subset of the phases to run in the phaseSet argument.
+ */
class OptPhaseManager {
public:
using PhaseSet = opt::unordered_set<OptPhase>;
diff --git a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt
index b5fa136f28e..2786522ff59 100644
--- a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt
+++ b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_index_scan_node.txt
@@ -5,25 +5,25 @@
IndexScan [{'<rid>': rid}, scanDefName: collName, indexDefName: index0, interval: {(Const [23], Const [35]]}]
-- OUTPUT:
-[0] ixseek ks(2ll, 0, 23L, 2ll) ks(2ll, 0, 35L, 2ll) none s1 none none [] @"<collUUID>" @"<collUUID>" true
+[0] ixseek ks(1ll, 0, 23L, 2ll) ks(1ll, 0, 35L, 2ll) none s1 none none [] @"<collUUID>" @"<collUUID>" true
==== VARIATION: Covering forward index scan with one field ====
-- INPUT:
IndexScan [{'<indexKey> 0': proj0}, scanDefName: collName, indexDefName: index0, interval: {[Const [26], Const [35])}]
-- OUTPUT:
-[0] ixseek ks(2ll, 0, 26L, 1ll) ks(2ll, 0, 35L, 1ll) none none none none [s1 = 0] @"<collUUID>" @"<collUUID>" true
+[0] ixseek ks(1ll, 0, 26L, 1ll) ks(1ll, 0, 35L, 1ll) none none none none [s1 = 0] @"<collUUID>" @"<collUUID>" true
==== VARIATION: Basic reverse index scan with RID ====
-- INPUT:
IndexScan [{'<rid>': rid}, scanDefName: collName, indexDefName: index0, interval: {[Const [27], Const [135])}, reversed]
-- OUTPUT:
-[0] ixseek ks(2ll, 0, 135L, 1ll) ks(2ll, 0, 27L, 1ll) none s1 none none [] @"<collUUID>" @"<collUUID>" false
+[0] ixseek ks(1ll, 0, 135L, 1ll) ks(1ll, 0, 27L, 1ll) none s1 none none [] @"<collUUID>" @"<collUUID>" false
==== VARIATION: Covering reverse index scan with one field ====
-- INPUT:
IndexScan [{'<indexKey> 0': proj0}, scanDefName: collName, indexDefName: index0, interval: {[Const [29], Const [47]]}, reversed]
-- OUTPUT:
-[0] ixseek ks(2ll, 0, 47L, 2ll) ks(2ll, 0, 29L, 1ll) none none none none [s1 = 0] @"<collUUID>" @"<collUUID>" false
+[0] ixseek ks(1ll, 0, 47L, 2ll) ks(1ll, 0, 29L, 1ll) none none none none [s1 = 0] @"<collUUID>" @"<collUUID>" false
diff --git a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt
index 66c58c879cb..0292de4fdba 100644
--- a/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt
+++ b/src/mongo/db/test_output/exec/sbe/a_b_t_plan_generation/lower_seek_node.txt
@@ -11,7 +11,7 @@ IndexScan [{'<rid>': rid}, scanDefName: collName, indexDefName: index0, interval
-- OUTPUT:
[3] nlj inner [] [s1] {true}
left
- [0] ixseek ks(2ll, 0, 23L, 2ll) ks(2ll, 0, 35L, 2ll) none s1 none none [] @"<collUUID>" @"<collUUID>" true
+ [0] ixseek ks(1ll, 0, 23L, 2ll) ks(1ll, 0, 35L, 2ll) none s1 none none [] @"<collUUID>" @"<collUUID>" true
right
[2] limitskip 1 0
[1] seek s1 s2 none none none none none none none [] @"<collUUID>" true false
diff --git a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt
index aeac2c94272..303ac77e9d7 100644
--- a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt
+++ b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/optimize_pipeline_tests.txt
@@ -18,7 +18,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 1
distribution and paths:
@@ -65,7 +65,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 1
distribution and paths:
@@ -336,7 +336,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 1
distribution and paths:
@@ -379,7 +379,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 0
distribution and paths:
@@ -426,7 +426,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 0
distribution and paths:
@@ -478,7 +478,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 0
distribution and paths:
@@ -535,7 +535,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 0
distribution and paths:
@@ -600,7 +600,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 0
distribution and paths:
@@ -656,7 +656,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 1
distribution and paths:
@@ -701,7 +701,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 1
distribution and paths:
@@ -766,7 +766,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 1
distribution and paths:
@@ -810,7 +810,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 1
distribution and paths:
diff --git a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt
index 5f3e634225e..e7f944ff1f3 100644
--- a/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt
+++ b/src/mongo/db/test_output/pipeline/abt/a_b_t_optimization_test/partial_index.txt
@@ -18,7 +18,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 1
distribution and paths:
@@ -77,7 +77,7 @@ metadata:
PathIdentity []
collation op: Ascending
- version: 2
+ version: 1
ordering bits: 0
is multi-key: 1
distribution and paths: