/**
* Copyright (C) 2014 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include
#include
#include "mongo/base/string_data.h"
#include "mongo/base/status_with.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/exec/plan_stage.h"
#include "mongo/db/exec/plan_stats.h"
#include "mongo/db/exec/working_set.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/record_id.h"
#include "mongo/platform/unordered_map.h"
namespace mongo {
/**
* An abstract stage which uses a progressive sort to return results sorted by distance. This
* is useful when we do not have a full ordering computed over the distance metric and don't
* want to generate one.
*
* Child stages need to implement functionality which:
*
* - defines a distance metric
* - iterates through ordered distance intervals, nearest to furthest
* - provides a covering for each distance interval
*
* For example - given a distance search over documents with distances from [0 -> 10], the child
* stage might break up the search into intervals [0->5),[5,7),[7->10].
*
* Each interval requires a PlanStage which *covers* the interval (returns all results in the
* interval). Results in each interval are buffered fully before being returned to ensure that
* ordering is preserved.
*
* For efficient search, the child stage which covers the distance interval in question should
* not return too many results outside the interval, but correctness only depends on the child
* stage returning all results inside the interval. As an example, a PlanStage which covers the
* interval [0->5) might just be a full collection scan - this will always cover every interval,
* but is slow. If there is an index available, an IndexScan stage might also return all
* documents with distance [0->5) but would be much faster.
*
* Also for efficient search, the intervals should not be too large or too small - though again
* correctness does not depend on interval size.
*
* TODO: Right now the interface allows the nextCovering() to be adaptive, but doesn't allow
* aborting and shrinking a covered range being buffered if we guess wrong.
*/
class NearStage : public PlanStage {
public:
struct CoveredInterval;
virtual ~NearStage();
virtual bool isEOF();
virtual StageState work(WorkingSetID* out);
virtual void saveState();
virtual void restoreState(OperationContext* opCtx);
virtual void invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type);
virtual std::vector getChildren() const;
virtual StageType stageType() const;
virtual PlanStageStats* getStats();
virtual const CommonStats* getCommonStats();
virtual const SpecificStats* getSpecificStats();
protected:
/**
* Subclasses of NearStage must provide basics + a stats object which gets owned here.
* The stats object must have specific stats which are a subclass of NearStats, otherwise
* it's generated automatically.
*/
NearStage(OperationContext* txn,
WorkingSet* workingSet,
Collection* collection,
PlanStageStats* stats);
/**
* Exposes NearStats for adaptive search, allows additional specific stats in subclasses.
*/
NearStats* getNearStats();
//
// Methods implemented for specific search functionality
//
/**
* Constructs the next covering over the next interval to buffer results from, or NULL
* if the full range has been searched. Use the provided working set as the working
* set for the covering stage if required.
*
* Returns !OK on failure to create next stage.
*/
virtual StatusWith nextInterval(OperationContext* txn,
WorkingSet* workingSet,
Collection* collection) = 0;
/**
* Computes the distance value for the given member data, or -1 if the member should not be
* returned in the sorted results.
*
* Returns !OK on invalid member data.
*/
virtual StatusWith computeDistance(WorkingSetMember* member) = 0;
/*
* Initialize near stage before buffering the data.
* Return IS_EOF if subclass finishes the initialization.
* Return NEED_TIME if we need more time.
* Return errors if an error occurs.
* Can't return ADVANCED.
*/
virtual StageState initialize(OperationContext* txn,
WorkingSet* workingSet,
Collection* collection);
private:
//
// Generic methods for progressive search functionality
//
StageState initNext();
StageState bufferNext(WorkingSetID* toReturn, Status* error);
StageState advanceNext(WorkingSetID* toReturn);
//
// Generic state for progressive near search
//
// Not owned here
OperationContext* _txn;
// Not owned here
WorkingSet* const _workingSet;
// Not owned here, used for fetching buffered results before invalidation
Collection* const _collection;
// A progressive search works in stages of buffering and then advancing
enum SearchState {
SearchState_Initializing,
SearchState_Buffering,
SearchState_Advancing,
SearchState_Finished
} _searchState;
// May need to track disklocs from the child stage to do our own deduping, also to do
// invalidation of buffered results.
unordered_map _nextIntervalSeen;
// Stats for the stage covering this interval
boost::scoped_ptr _nextIntervalStats;
// Sorted buffered results to be returned - the current interval
struct SearchResult;
std::priority_queue _resultBuffer;
// Stats
boost::scoped_ptr _stats;
// The current stage from which this stage should buffer results
// Pointer to the last interval in _childrenIntervals. Owned by _childrenIntervals.
CoveredInterval* _nextInterval;
// All children CoveredIntervals and the sub-stages owned by them.
//
// All children intervals except the last active one are only used by getStats(),
// because they are all EOF.
OwnedPointerVector _childrenIntervals;
};
/**
* A covered interval over which a portion of a near search can be run.
*/
struct NearStage::CoveredInterval {
CoveredInterval(PlanStage* covering,
bool dedupCovering,
double minDistance,
double maxDistance,
bool inclusiveMax);
// Owned by NearStage
boost::scoped_ptr const covering;
const bool dedupCovering;
const double minDistance;
const double maxDistance;
const bool inclusiveMax;
};
} // namespace mongo