diff options
author | Brandon Zhang <brandon.zhang@mongodb.com> | 2015-07-30 16:15:47 -0400 |
---|---|---|
committer | Brandon Zhang <brandon.zhang@mongodb.com> | 2015-08-14 15:50:31 -0400 |
commit | 5987ace974b284fde633c6f6384c5ce6d4295c3c (patch) | |
tree | 64b667507df447bdc09414c386bc0f52fe861308 | |
parent | 1cee34ee3907b7652abdfc7f75ca27ef421a46c4 (diff) | |
download | mongo-5987ace974b284fde633c6f6384c5ce6d4295c3c.tar.gz |
SERVER-19480 Meaningful geoNear specific statistics after eliminating duplicated index scans
-rw-r--r-- | src/mongo/db/exec/geo_near.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/exec/near.cpp | 20 | ||||
-rw-r--r-- | src/mongo/db/exec/near.h | 38 | ||||
-rw-r--r-- | src/mongo/db/exec/plan_stats.h | 43 | ||||
-rw-r--r-- | src/mongo/db/query/explain.cpp | 2 |
5 files changed, 44 insertions, 67 deletions
diff --git a/src/mongo/db/exec/geo_near.cpp b/src/mongo/db/exec/geo_near.cpp index b683062e379..686c681f45d 100644 --- a/src/mongo/db/exec/geo_near.cpp +++ b/src/mongo/db/exec/geo_near.cpp @@ -562,9 +562,9 @@ StatusWith<NearStage::CoveredInterval*> // const IntervalStats& lastIntervalStats = _specificStats.intervalStats.back(); // TODO: Generally we want small numbers of results fast, then larger numbers later - if (lastIntervalStats.numResultsBuffered < 300) + if (lastIntervalStats.numResultsReturned < 300) _boundsIncrement *= 2; - else if (lastIntervalStats.numResultsBuffered > 600) + else if (lastIntervalStats.numResultsReturned > 600) _boundsIncrement /= 2; } @@ -949,9 +949,9 @@ StatusWith<NearStage::CoveredInterval*> // const IntervalStats& lastIntervalStats = _specificStats.intervalStats.back(); // TODO: Generally we want small numbers of results fast, then larger numbers later - if (lastIntervalStats.numResultsBuffered < 300) + if (lastIntervalStats.numResultsReturned < 300) _boundsIncrement *= 2; - else if (lastIntervalStats.numResultsBuffered > 600) + else if (lastIntervalStats.numResultsReturned > 600) _boundsIncrement /= 2; } diff --git a/src/mongo/db/exec/near.cpp b/src/mongo/db/exec/near.cpp index 1568339cbfe..760bf4f1247 100644 --- a/src/mongo/db/exec/near.cpp +++ b/src/mongo/db/exec/near.cpp @@ -202,7 +202,7 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro } } - ++_nextIntervalStats->numResultsFound; + ++_nextIntervalStats->numResultsBuffered; StatusWith<double> distanceStatus = computeDistance(nextMember); @@ -216,12 +216,6 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro // results. double memberDistance = distanceStatus.getValue(); - // Update found distance stats - if (_nextIntervalStats->minDistanceFound < 0 || - memberDistance < _nextIntervalStats->minDistanceFound) { - _nextIntervalStats->minDistanceFound = memberDistance; - } - _resultBuffer.push(SearchResult(nextMemberID, memberDistance)); // Store the member's RecordId, if available, for quick invalidation @@ -275,7 +269,7 @@ PlanStage::StageState NearStage::advanceNext(WorkingSetID* toReturn) { return PlanStage::NEED_TIME; } - // The next document in _resultBuffer is in the search interval, so we can return it + // The next document in _resultBuffer is in the search interval, so we can return it. _resultBuffer.pop(); // If we're returning something, take it out of our RecordId -> WSID map so that future @@ -286,14 +280,8 @@ PlanStage::StageState NearStage::advanceNext(WorkingSetID* toReturn) { _seenDocuments.erase(member->loc); } - // TODO: SERVER-19480 Find a more appropriate place to increment numResultsBuffered - ++_nextIntervalStats->numResultsBuffered; - - // Update buffered distance stats - if (_nextIntervalStats->minDistanceBuffered < 0 || - memberDistance < _nextIntervalStats->minDistanceBuffered) { - _nextIntervalStats->minDistanceBuffered = memberDistance; - } + // This value is used by nextInterval() to determine the size of the next interval. + ++_nextIntervalStats->numResultsReturned; return PlanStage::ADVANCED; } diff --git a/src/mongo/db/exec/near.h b/src/mongo/db/exec/near.h index aee013cadc6..abc8f0d35e9 100644 --- a/src/mongo/db/exec/near.h +++ b/src/mongo/db/exec/near.h @@ -47,26 +47,32 @@ namespace mongo { * is useful when we do not have a full ordering computed over the distance metric and don't * want to generate one. * - * Child stages need to implement functionality which: + * Some parts of the geoNear implementation depend on the type of index being used, so + * subclasses need to implement these three functions: * - * - defines a distance metric - * - iterates through ordered distance intervals, nearest to farthest - * - provides a covering for each distance interval + * - initialize() - Prepares the stage to begin the geoNear search. Must return IS_EOF iff the + * stage is prepared to begin buffering documents. + * - nextInterval() - Must return the bounds of the next interval with a PlanStage that will find + * all of the results in this interval that have not already been buffered in + * previous intervals. + * - computeDistance() - Must return the distance from a document to the centroid of search using + * the correct metric (spherical/flat, radians/meters). * * For example - given a distance search over documents with distances from [0 -> 10], the child * stage might break up the search into intervals [0->5),[5,7),[7->10]. * - * Each interval requires a PlanStage which *covers* the interval (returns all results in the - * interval). Results in each interval are buffered fully before being returned to ensure that - * ordering is preserved. Results that are in the cover, but not in the interval will remain - * buffered to be returned in subsequent search intervals. + * Each interval requires a PlanStage which returns all of the results in the interval that have + * not been buffered in a previous interval. Results in each interval are buffered fully before + * being returned to ensure that ordering is preserved. Results that are in the cover, but outside + * the outer bounds of the current interval will remain buffered to be returned in subsequent + * search intervals. * - * For efficient search, the child stage which covers the distance interval in question should - * not return too many results outside the interval, but correctness only depends on the child - * stage returning all results inside the interval. As an example, a PlanStage which covers the - * interval [0->5) might just be a full collection scan - this will always cover every interval, - * but is slow. If there is an index available, an IndexScan stage might also return all - * documents with distance [0->5) but would be much faster. + * For efficient search, the child stage should not return too many results outside the interval, + * but correctness only depends on all the results in the interval being buffered before any are + * returned. As an example, a PlanStage for the interval [0->5) might just be a full collection + * scan - this will always buffer every result in the interval, but is slow. If there is an index + * available, an IndexScan stage might also return all documents with distance [0->5) but + * would be much faster. * * Also for efficient search, the intervals should not be too large or too small - though again * correctness does not depend on interval size. @@ -78,7 +84,9 @@ namespace mongo { * TODO: If a document is indexed in multiple cells (Polygons, PolyLines, etc.), there is a * possibility that it will be returned more than once. Since doInvalidate() force fetches a * document and removes it from _seenDocuments, NearStage will not deduplicate if it encounters - * another instance of this document. + * another instance of this document. This will only occur if two cells for a document are in the + * same interval and the invalidation occurs between the scan of the first cell and the second, so + * NearStage no longer knows that it's seen this document before. * * TODO: Right now the interface allows the nextCovering() to be adaptive, but doesn't allow * aborting and shrinking a covered range being buffered if we guess wrong. diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h index f2ff04a0ce3..66403f131c5 100644 --- a/src/mongo/db/exec/plan_stats.h +++ b/src/mongo/db/exec/plan_stats.h @@ -524,28 +524,17 @@ struct SkipStats : public SpecificStats { }; struct IntervalStats { - IntervalStats() - : numResultsFound(0), - numResultsBuffered(0), - minDistanceAllowed(-1), - maxDistanceAllowed(-1), - inclusiveMaxDistanceAllowed(false), - minDistanceFound(-1), - maxDistanceFound(-1), - minDistanceBuffered(-1), - maxDistanceBuffered(-1) {} - - long long numResultsFound; - long long numResultsBuffered; - - double minDistanceAllowed; - double maxDistanceAllowed; - bool inclusiveMaxDistanceAllowed; - - double minDistanceFound; - double maxDistanceFound; - double minDistanceBuffered; - double maxDistanceBuffered; + // Number of results found in the covering of this interval. + long long numResultsBuffered = 0; + // Number of documents in this interval returned to the parent stage. + long long numResultsReturned = 0; + + // Min distance of this interval - always inclusive. + double minDistanceAllowed = -1; + // Max distance of this interval - inclusive iff inclusiveMaxDistanceAllowed. + double maxDistanceAllowed = -1; + // True only in the last interval. + bool inclusiveMaxDistanceAllowed = false; }; class NearStats : public SpecificStats { @@ -556,16 +545,6 @@ public: return new NearStats(*this); } - long long totalResultsFound() { - long long totalResultsFound = 0; - for (std::vector<IntervalStats>::iterator it = intervalStats.begin(); - it != intervalStats.end(); - ++it) { - totalResultsFound += it->numResultsFound; - } - return totalResultsFound; - } - std::vector<IntervalStats> intervalStats; std::string indexName; BSONObj keyPattern; diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp index 7c19289fdc5..38b211b4ad2 100644 --- a/src/mongo/db/query/explain.cpp +++ b/src/mongo/db/query/explain.cpp @@ -321,6 +321,8 @@ void Explain::statsToBSON(const PlanStageStats& stats, intervalBob.append("minDistance", it->minDistanceAllowed); intervalBob.append("maxDistance", it->maxDistanceAllowed); intervalBob.append("maxInclusive", it->inclusiveMaxDistanceAllowed); + intervalBob.appendNumber("nBuffered", it->numResultsBuffered); + intervalBob.appendNumber("nReturned", it->numResultsReturned); } intervalsBob.doneFast(); } |