/** * Copyright (C) 2020-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #pragma once #include #include "mongo/db/exec/sbe/stages/stages.h" namespace mongo::sbe { /** * This stage deduplicates by a given key. Unlike a HashAgg, this stage is not blocking and rows are * returned in the same order as they appear in the input stream. * * TODO: It is possible to optimize this stage in the case where the input is sorted by key X, we * are "uniquing" by key Y, and we are guaranteed that all identical values of Y appear are * associated with the same key X. In this case the hash table of seen elements can be cleared each * time a new key X is encountered. * * For example, this optimization is possible when the UniqueStage is uniquing by record ID and * below it there are non-multikey index scans merged via a SortMerge stage. Each duplicate record * ID will be associated with the same sort key. * * Debug string representation: * * unique [] childStage */ class UniqueStage final : public PlanStage { public: UniqueStage(std::unique_ptr input, value::SlotVector keys, PlanNodeId planNodeId, bool participateInTrialRunTracking = true); std::unique_ptr clone() const final; void prepare(CompileCtx& ctx) final; value::SlotAccessor* getAccessor(CompileCtx& ctx, value::SlotId slot) final; void open(bool reOpen) final; PlanState getNext() final; void close() final; std::unique_ptr getStats(bool includeDebugInfo) const final; const SpecificStats* getSpecificStats() const final; std::vector debugPrint() const final; size_t estimateCompileTimeSize() const final; private: const value::SlotVector _keySlots; std::vector _inKeyAccessors; // Table of keys that have been seen. stdx::unordered_set _seen; UniqueStats _specificStats; }; } // namespace mongo::sbe