diff options
author | Eric Cox <eric.cox@mongodb.com> | 2022-04-12 22:14:51 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-04-12 22:39:07 +0000 |
commit | c0036901e114de3411f4b3d6a1da8f2c0196c6d8 (patch) | |
tree | 259b5a3ce00c5595928df60e2ca2408275caf3da | |
parent | f163e680437934975a00d25a14f9655c7d68e159 (diff) | |
download | mongo-c0036901e114de3411f4b3d6a1da8f2c0196c6d8.tar.gz |
SERVER-65285 Gracefully handle empty group-by key when spilling in HashAgg
-rw-r--r-- | src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp | 53 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/hash_agg.cpp | 8 |
2 files changed, 61 insertions, 0 deletions
diff --git a/src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp b/src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp index 20839e6624f..87fc8dbd1f2 100644 --- a/src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp +++ b/src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp @@ -610,6 +610,59 @@ TEST_F(HashAggStageTest, HashAggBasicCountSpillDouble) { stage->close(); } +TEST_F(HashAggStageTest, HashAggBasicCountNoSpillWithNoGroupByDouble) { + auto defaultInternalQuerySBEAggApproxMemoryUseInBytesBeforeSpill = + internalQuerySBEAggApproxMemoryUseInBytesBeforeSpill.load(); + internalQuerySBEAggApproxMemoryUseInBytesBeforeSpill.store(1); + ON_BLOCK_EXIT([&] { + internalQuerySBEAggApproxMemoryUseInBytesBeforeSpill.store( + defaultInternalQuerySBEAggApproxMemoryUseInBytesBeforeSpill); + }); + + auto ctx = makeCompileCtx(); + + auto [inputTag, inputVal] = + stage_builder::makeValue(BSON_ARRAY(1.0 << 2.0 << 3.0 << 4.0 << 5.0)); + auto [scanSlot, scanStage] = generateVirtualScan(inputTag, inputVal); + + // Build a HashAggStage, with an empty group by slot and compute a simple count. + auto countsSlot = generateSlotId(); + auto stage = makeS<HashAggStage>( + std::move(scanStage), + makeSV(), + makeEM(countsSlot, + stage_builder::makeFunction( + "sum", + makeE<EConstant>(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(1)))), + makeSV(), // Seek slot + true, + boost::none, + true /* allowDiskUse */, + kEmptyPlanNodeId); + + // Prepare the tree and get the 'SlotAccessor' for the output slot. + auto resultAccessor = prepareTree(ctx.get(), stage.get(), countsSlot); + + // Read in all of the results. + std::set<int64_t> results; + while (stage->getNext() == PlanState::ADVANCED) { + auto [resTag, resVal] = resultAccessor->getViewOfValue(); + ASSERT_EQ(value::TypeTags::NumberInt64, resTag); + ASSERT_TRUE(results.insert(value::bitcastFrom<int64_t>(resVal)).second); + } + + // Check that the results match the expected. + ASSERT_EQ(1, results.size()); + ASSERT_EQ(1, results.count(5)); + + // Check that the spilling behavior matches the expected. + auto stats = static_cast<const HashAggStats*>(stage->getSpecificStats()); + ASSERT_FALSE(stats->usedDisk); + ASSERT_EQ(0, stats->spilledRecords); + + stage->close(); +} + TEST_F(HashAggStageTest, HashAggMultipleAccSpill) { // We estimate the size of result row like {double, int64} at 59B. Set the memory threshold to // 128B so that two rows fit in memory. diff --git a/src/mongo/db/exec/sbe/stages/hash_agg.cpp b/src/mongo/db/exec/sbe/stages/hash_agg.cpp index 95568e8edf4..7465630432c 100644 --- a/src/mongo/db/exec/sbe/stages/hash_agg.cpp +++ b/src/mongo/db/exec/sbe/stages/hash_agg.cpp @@ -321,6 +321,14 @@ void HashAggStage::checkMemoryUsageAndSpillIfNecessary(MemoryCheckData& mcd) { return; } + // If the group-by key is empty we must avoid spilling it to the '_recordStore', because we + // cannot spill an empty RecordId per the storage contract. Never spilling for the the empty + // group-by key case is OK because we will only ever aggregate into a single row and spilling + // would mean moving this row back and forth from disk to main memory anyway. + if (_inKeyAccessors.size() == 0) { + return; + } + mcd.memoryCheckpointCounter++; if (mcd.memoryCheckpointCounter >= mcd.nextMemoryCheckpoint) { if (_htIt == _ht->end()) { |