diff options
author | Militsa Sotirova <militsa.sotirova@mongodb.com> | 2022-09-29 12:58:50 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-29 14:04:48 +0000 |
commit | 0b24ce9b359574b8f695fce8b4d9b80fdcaa4a10 (patch) | |
tree | 5d6ceb3b8f3776a02dabc4f027a410a19688c3d1 /src/mongo/db/query | |
parent | 44d7b27d4ba081d27e4ae4826ecda9ef05323cb5 (diff) | |
download | mongo-0b24ce9b359574b8f695fce8b4d9b80fdcaa4a10.tar.gz |
SERVER-66928 convert ABT pipeline tests to golden testing
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r-- | src/mongo/db/query/optimizer/utils/unit_test_utils.cpp | 173 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/unit_test_utils.h | 19 |
2 files changed, 192 insertions, 0 deletions
diff --git a/src/mongo/db/query/optimizer/utils/unit_test_utils.cpp b/src/mongo/db/query/optimizer/utils/unit_test_utils.cpp index 1bd82ce80f5..c060535acf6 100644 --- a/src/mongo/db/query/optimizer/utils/unit_test_utils.cpp +++ b/src/mongo/db/query/optimizer/utils/unit_test_utils.cpp @@ -156,4 +156,177 @@ ABT translatePipeline(const std::string& pipelineStr, std::string scanDefName) { return translatePipeline(metadata, pipelineStr, std::move(scanDefName), prefixId); } +void serializeOptPhases(std::ostream& stream, opt::unordered_set<OptPhase> phaseSet) { + // The order of phases in the golden file must be the same every time the test is run. + std::set<std::string> orderedPhases; + for (const auto& phase : phaseSet) { + orderedPhases.insert(OptPhaseEnum::toString[static_cast<int>(phase)]); + } + + stream << "optimization phases: " << std::endl; + for (const auto& phase : orderedPhases) { + stream << "\t" << phase << std::endl; + } +} + +void explainPreserveIndentation(std::ostream& stream, std::string baseTabs, std::string explain) { + std::string currLine = ""; + for (char ch : explain) { + if (ch == '\n') { + stream << baseTabs << currLine << std::endl; + currLine = ""; + } else { + currLine += ch; + } + } + stream << std::endl; +} + +void serializeDistributionAndPaths(std::ostream& stream, + DistributionAndPaths distributionAndPaths, + std::string baseTabs) { + stream << baseTabs << "distribution and paths: " << std::endl; + stream << baseTabs << "\tdistribution type: " + << DistributionTypeEnum::toString[static_cast<int>(distributionAndPaths._type)] + << std::endl; + stream << baseTabs << "\tdistribution paths: " << std::endl; + for (const ABT& abt : distributionAndPaths._paths) { + explainPreserveIndentation(stream, baseTabs + "\t\t", ExplainGenerator::explainV2(abt)); + } +} + +void serializeMetadata(std::ostream& stream, Metadata metadata) { + stream << "metadata: " << std::endl; + + stream << "\tnumber of partitions: " << metadata._numberOfPartitions << std::endl; + + // The ScanDefinitions are stored in an unordered map, and the order of the ScanDefinitions in + // the golden file must be the same every time the test is run. + std::map<std::string, ScanDefinition> orderedScanDefs; + for (auto element : metadata._scanDefs) { + orderedScanDefs.insert(element); + } + + stream << "\tscan definitions: " << std::endl; + for (const auto& element : orderedScanDefs) { + stream << "\t\t" << element.first << ": " << std::endl; + + ScanDefinition scanDef = element.second; + + stream << "\t\t\toptions: " << std::endl; + for (const auto& optionElem : scanDef.getOptionsMap()) { + stream << "\t\t\t\t" << optionElem.first << ": " << optionElem.second << std::endl; + } + + serializeDistributionAndPaths(stream, scanDef.getDistributionAndPaths(), "\t\t\t"); + + stream << "\t\t\tindexes: " << std::endl; + for (const auto& indexElem : scanDef.getIndexDefs()) { + stream << "\t\t\t\t" << indexElem.first << ": " << std::endl; + + IndexDefinition indexDef = indexElem.second; + + stream << "\t\t\t\t\tcollation spec: " << std::endl; + for (const auto& indexCollationEntry : indexDef.getCollationSpec()) { + stream << "\t\t\t\t\t\tABT path: " << std::endl; + explainPreserveIndentation(stream, + "\t\t\t\t\t\t\t", + ExplainGenerator::explainV2(indexCollationEntry._path)); + + stream << "\t\t\t\t\t\tcollation op: " + << CollationOpEnum::toString[static_cast<int>(indexCollationEntry._op)] + << std::endl; + } + + stream << "\t\t\t\t\tversion: " << indexDef.getVersion() << std::endl; + stream << "\t\t\t\t\tordering bits: " << indexDef.getOrdering() << std::endl; + stream << "\t\t\t\t\tis multi-key: " << indexDef.isMultiKey() << std::endl; + + serializeDistributionAndPaths(stream, indexDef.getDistributionAndPaths(), "\t\t\t\t\t"); + + std::string serializedReqMap = + ExplainGenerator::explainPartialSchemaReqMap(indexDef.getPartialReqMap()); + explainPreserveIndentation(stream, "\t\t\t\t\t", serializedReqMap); + } + + stream << "\t\t\tnon multi-key index paths: " << std::endl; + for (const auto& indexPath : scanDef.getNonMultiKeyPathSet()) { + explainPreserveIndentation(stream, "\t\t\t\t", ExplainGenerator::explainV2(indexPath)); + } + + stream << "\t\t\tcollection exists: " << scanDef.exists() << std::endl; + stream << "\t\t\tCE type: " << scanDef.getCE() << std::endl; + } +} + +ABT translatetoABT(const std::string& pipelineStr, + std::string scanDefName, + Metadata metadata, + const std::vector<ExpressionContext::ResolvedNamespace>& involvedNss) { + PrefixId prefixId; + return translatePipeline( + metadata, pipelineStr, prefixId.getNextId("scan"), scanDefName, prefixId, involvedNss); +} + +ABT optimizeABT(ABT abt, + opt::unordered_set<OptPhase> phaseSet, + Metadata metadata, + PathToIntervalFn pathToInterval, + bool phaseManagerDisableScan) { + PrefixId prefixId; + + OptPhaseManager phaseManager(phaseSet, + prefixId, + false, + metadata, + std::make_unique<HeuristicCE>(), + std::make_unique<DefaultCosting>(), + pathToInterval, + DebugInfo::kDefaultForTests); + if (phaseManagerDisableScan) { + phaseManager.getHints()._disableScan = true; + } + + ABT optimized = abt; + phaseManager.optimize(optimized); + return optimized; +} + +void testABTTranslationAndOptimization( + unittest::GoldenTestContext& gctx, + const std::string& variationName, + const std::string& pipelineStr, + std::string scanDefName, + opt::unordered_set<OptPhase> phaseSet, + Metadata metadata, + PathToIntervalFn pathToInterval, + bool phaseManagerDisableScan, + const std::vector<ExpressionContext::ResolvedNamespace>& involvedNss) { + auto& stream = gctx.outStream(); + bool optimizePipeline = !phaseSet.empty(); + + stream << "==== VARIATION: " << variationName << " ====" << std::endl; + stream << "-- INPUTS:" << std::endl; + stream << "pipeline: " << pipelineStr << std::endl; + + serializeMetadata(stream, metadata); + if (optimizePipeline) { + serializeOptPhases(stream, phaseSet); + } + + stream << std::endl << "-- OUTPUT:" << std::endl; + + ABT translated = translatetoABT(pipelineStr, scanDefName, metadata, involvedNss); + + if (optimizePipeline) { + ABT optimized = + optimizeABT(translated, phaseSet, metadata, pathToInterval, phaseManagerDisableScan); + stream << ExplainGenerator::explainV2(optimized) << std::endl; + } else { + stream << ExplainGenerator::explainV2(translated) << std::endl; + } + + stream << std::endl; +} + } // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/utils/unit_test_utils.h b/src/mongo/db/query/optimizer/utils/unit_test_utils.h index f25df1a11b0..1183c3e7859 100644 --- a/src/mongo/db/query/optimizer/utils/unit_test_utils.h +++ b/src/mongo/db/query/optimizer/utils/unit_test_utils.h @@ -33,9 +33,12 @@ #include "mongo/db/operation_context_noop.h" #include "mongo/db/pipeline/expression_context_for_test.h" #include "mongo/db/pipeline/pipeline.h" +#include "mongo/db/query/optimizer/cascades/cost_derivation.h" #include "mongo/db/query/optimizer/defs.h" #include "mongo/db/query/optimizer/metadata.h" +#include "mongo/db/query/optimizer/opt_phase_manager.h" #include "mongo/db/query/optimizer/utils/utils.h" +#include "mongo/unittest/golden_test.h" namespace mongo::optimizer { @@ -107,4 +110,20 @@ ABT translatePipeline(Metadata& metadata, ABT translatePipeline(const std::string& pipelineStr, std::string scanDefName = "collection"); +/** + * This function translates the given pipeline string to an ABT and (if optimization phases are + * provided) optimizes the ABT using the parameters specified. It then writes the output to a file + * that will be compared to the golden testing file for the test file. + **/ +void testABTTranslationAndOptimization( + unittest::GoldenTestContext& gctx, + const std::string& variationName, + const std::string& pipelineStr, + std::string scanDefName = "collection", + opt::unordered_set<OptPhase> phaseSet = {}, + Metadata metadata = {{{"collection", ScanDefinition{{}, {}}}}}, + PathToIntervalFn pathToInterval = {}, + bool phaseManagerDisableScan = false, + const std::vector<ExpressionContext::ResolvedNamespace>& involvedNss = {}); + } // namespace mongo::optimizer |