summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Cox <eric.cox@mongodb.com>2022-04-12 22:14:51 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-04-12 22:39:07 +0000
commitc0036901e114de3411f4b3d6a1da8f2c0196c6d8 (patch)
tree259b5a3ce00c5595928df60e2ca2408275caf3da
parentf163e680437934975a00d25a14f9655c7d68e159 (diff)
downloadmongo-c0036901e114de3411f4b3d6a1da8f2c0196c6d8.tar.gz
SERVER-65285 Gracefully handle empty group-by key when spilling in HashAgg
-rw-r--r--src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp53
-rw-r--r--src/mongo/db/exec/sbe/stages/hash_agg.cpp8
2 files changed, 61 insertions, 0 deletions
diff --git a/src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp b/src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp
index 20839e6624f..87fc8dbd1f2 100644
--- a/src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp
+++ b/src/mongo/db/exec/sbe/sbe_hash_agg_test.cpp
@@ -610,6 +610,59 @@ TEST_F(HashAggStageTest, HashAggBasicCountSpillDouble) {
stage->close();
}
+TEST_F(HashAggStageTest, HashAggBasicCountNoSpillWithNoGroupByDouble) {
+ auto defaultInternalQuerySBEAggApproxMemoryUseInBytesBeforeSpill =
+ internalQuerySBEAggApproxMemoryUseInBytesBeforeSpill.load();
+ internalQuerySBEAggApproxMemoryUseInBytesBeforeSpill.store(1);
+ ON_BLOCK_EXIT([&] {
+ internalQuerySBEAggApproxMemoryUseInBytesBeforeSpill.store(
+ defaultInternalQuerySBEAggApproxMemoryUseInBytesBeforeSpill);
+ });
+
+ auto ctx = makeCompileCtx();
+
+ auto [inputTag, inputVal] =
+ stage_builder::makeValue(BSON_ARRAY(1.0 << 2.0 << 3.0 << 4.0 << 5.0));
+ auto [scanSlot, scanStage] = generateVirtualScan(inputTag, inputVal);
+
+ // Build a HashAggStage, with an empty group by slot and compute a simple count.
+ auto countsSlot = generateSlotId();
+ auto stage = makeS<HashAggStage>(
+ std::move(scanStage),
+ makeSV(),
+ makeEM(countsSlot,
+ stage_builder::makeFunction(
+ "sum",
+ makeE<EConstant>(value::TypeTags::NumberInt64, value::bitcastFrom<int64_t>(1)))),
+ makeSV(), // Seek slot
+ true,
+ boost::none,
+ true /* allowDiskUse */,
+ kEmptyPlanNodeId);
+
+ // Prepare the tree and get the 'SlotAccessor' for the output slot.
+ auto resultAccessor = prepareTree(ctx.get(), stage.get(), countsSlot);
+
+ // Read in all of the results.
+ std::set<int64_t> results;
+ while (stage->getNext() == PlanState::ADVANCED) {
+ auto [resTag, resVal] = resultAccessor->getViewOfValue();
+ ASSERT_EQ(value::TypeTags::NumberInt64, resTag);
+ ASSERT_TRUE(results.insert(value::bitcastFrom<int64_t>(resVal)).second);
+ }
+
+ // Check that the results match the expected.
+ ASSERT_EQ(1, results.size());
+ ASSERT_EQ(1, results.count(5));
+
+ // Check that the spilling behavior matches the expected.
+ auto stats = static_cast<const HashAggStats*>(stage->getSpecificStats());
+ ASSERT_FALSE(stats->usedDisk);
+ ASSERT_EQ(0, stats->spilledRecords);
+
+ stage->close();
+}
+
TEST_F(HashAggStageTest, HashAggMultipleAccSpill) {
// We estimate the size of result row like {double, int64} at 59B. Set the memory threshold to
// 128B so that two rows fit in memory.
diff --git a/src/mongo/db/exec/sbe/stages/hash_agg.cpp b/src/mongo/db/exec/sbe/stages/hash_agg.cpp
index 95568e8edf4..7465630432c 100644
--- a/src/mongo/db/exec/sbe/stages/hash_agg.cpp
+++ b/src/mongo/db/exec/sbe/stages/hash_agg.cpp
@@ -321,6 +321,14 @@ void HashAggStage::checkMemoryUsageAndSpillIfNecessary(MemoryCheckData& mcd) {
return;
}
+ // If the group-by key is empty we must avoid spilling it to the '_recordStore', because we
+ // cannot spill an empty RecordId per the storage contract. Never spilling for the the empty
+ // group-by key case is OK because we will only ever aggregate into a single row and spilling
+ // would mean moving this row back and forth from disk to main memory anyway.
+ if (_inKeyAccessors.size() == 0) {
+ return;
+ }
+
mcd.memoryCheckpointCounter++;
if (mcd.memoryCheckpointCounter >= mcd.nextMemoryCheckpoint) {
if (_htIt == _ht->end()) {