summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBrandon Zhang <brandon.zhang@mongodb.com>2015-07-15 15:14:11 -0400
committerBrandon Zhang <brandon.zhang@mongodb.com>2015-07-20 11:30:07 -0400
commit1de0a646d1c373e8a694d2a26c3f5ebb10137f14 (patch)
tree45a115eeaa15b66941855628c4bdb6e16e154bab /src
parent85f9c15c98f9490d81e2bdc6ba4a4c8162eaa0f4 (diff)
downloadmongo-1de0a646d1c373e8a694d2a26c3f5ebb10137f14.tar.gz
SERVER-19039 Buffer fetched documents in geoNear
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/exec/geo_near.cpp207
-rw-r--r--src/mongo/db/exec/geo_near.h9
-rw-r--r--src/mongo/db/exec/near.cpp104
-rw-r--r--src/mongo/db/exec/near.h19
-rw-r--r--src/mongo/db/query/expression_index.cpp37
-rw-r--r--src/mongo/db/query/expression_index.h12
-rw-r--r--src/mongo/dbtests/query_stage_near.cpp11
7 files changed, 172 insertions, 227 deletions
diff --git a/src/mongo/db/exec/geo_near.cpp b/src/mongo/db/exec/geo_near.cpp
index 400b35bf039..a91b7e49583 100644
--- a/src/mongo/db/exec/geo_near.cpp
+++ b/src/mongo/db/exec/geo_near.cpp
@@ -41,11 +41,11 @@
#include "mongo/db/geo/geoconstants.h"
#include "mongo/db/geo/geoparser.h"
#include "mongo/db/geo/hash.h"
+#include "mongo/db/index/expression_params.h"
+#include "mongo/db/index/s2_keys.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/query/expression_index.h"
#include "mongo/db/query/expression_index_knobs.h"
-#include "mongo/db/index/expression_params.h"
-#include "mongo/db/index/s2_keys.h"
#include "mongo/util/log.h"
#include <algorithm>
@@ -498,70 +498,6 @@ private:
R2Annulus _annulus;
};
-/**
- * Expression which checks whether a 2D key for a point (2D hash) intersects our search
- * region. The search region may have been formed by more granular hashes.
- */
-class TwoDKeyInRegionExpression : public LeafMatchExpression {
-public:
- TwoDKeyInRegionExpression(R2Region* region,
- const GeoHashConverter::Parameters& hashParams,
- StringData twoDKeyPath)
- : LeafMatchExpression(INTERNAL_2D_KEY_IN_REGION), _region(region), _unhasher(hashParams) {
- initPath(twoDKeyPath);
- }
-
- virtual ~TwoDKeyInRegionExpression() {}
-
- virtual void toBSON(BSONObjBuilder* out) const {
- out->append("TwoDKeyInRegionExpression", true);
- }
-
- virtual bool matchesSingleElement(const BSONElement& e) const {
- // Something has gone terribly wrong if this doesn't hold.
- invariant(BinData == e.type());
- return !_region->fastDisjoint(_unhasher.unhashToBoxCovering(_unhasher.hash(e)));
- }
-
- //
- // These won't be called.
- //
-
- virtual void debugString(StringBuilder& debug, int level = 0) const {
- invariant(false);
- }
-
- virtual bool equivalent(const MatchExpression* other) const {
- invariant(false);
- return true;
- }
-
- virtual unique_ptr<MatchExpression> shallowClone() const {
- invariant(false);
- return NULL;
- }
-
-private:
- const unique_ptr<R2Region> _region;
- const GeoHashConverter _unhasher;
-};
-
-// Helper class to maintain ownership of a match expression alongside an index scan
-class IndexScanWithMatch : public IndexScan {
-public:
- IndexScanWithMatch(OperationContext* txn,
- const IndexScanParams& params,
- WorkingSet* workingSet,
- MatchExpression* filter)
- : IndexScan(txn, params, workingSet, filter), _matcher(filter) {}
-
- virtual ~IndexScanWithMatch() {}
-
-private:
- // Owns matcher
- const unique_ptr<MatchExpression> _matcher;
-};
-
// Helper class to maintain ownership of a match expression alongside an index scan
class FetchStageWithMatch : public FetchStage {
public:
@@ -707,11 +643,8 @@ StatusWith<NearStage::CoveredInterval*> //
IndexScanParams scanParams;
scanParams.descriptor = _twoDIndex;
scanParams.direction = 1;
- // We use a filter on the key. The filter rejects keys that don't intersect with the
- // annulus. An object that is in the annulus might have a key that's not in it and a key
- // that's in it. As such we can't just look at one key per object.
- //
- // This does force us to do our own deduping of results, though.
+
+ // This does force us to do our own deduping of results.
scanParams.doNotDedup = true;
// Scan bounds on 2D indexes are only over the 2D field - other bounds aren't applicable.
@@ -722,13 +655,23 @@ StatusWith<NearStage::CoveredInterval*> //
const string twoDFieldName = _nearParams.nearQuery->field;
const int twoDFieldPosition = 0;
+ std::vector<GeoHash> unorderedCovering = ExpressionMapping::get2dCovering(
+ *coverRegion, _twoDIndex->infoObj(), internalGeoNearQuery2DMaxCoveringCells);
+
+ // Make sure the same index key isn't visited twice
+ R2CellUnion diffUnion;
+ diffUnion.init(unorderedCovering);
+ diffUnion.getDifference(_scannedCells);
+ // After taking the difference, there may be cells in the covering that don't intersect
+ // with the annulus.
+ diffUnion.detach(&unorderedCovering);
+
+ // Add the cells in this covering to the _scannedCells union
+ _scannedCells.add(unorderedCovering);
+
OrderedIntervalList coveredIntervals;
coveredIntervals.name = scanParams.bounds.fields[twoDFieldPosition].name;
-
- ExpressionMapping::cover2d(*coverRegion,
- _twoDIndex->infoObj(),
- internalGeoNearQuery2DMaxCoveringCells,
- &coveredIntervals);
+ ExpressionMapping::GeoHashsToIntervalsWithParents(unorderedCovering, &coveredIntervals);
// Intersect the $near bounds we just generated into the bounds we have for anything else
// in the scan (i.e. $within)
@@ -739,24 +682,13 @@ StatusWith<NearStage::CoveredInterval*> //
GeoHashConverter::Parameters hashParams;
GeoHashConverter::parseParameters(_twoDIndex->infoObj(), &hashParams);
- MatchExpression* keyMatcher =
- new TwoDKeyInRegionExpression(coverRegion.release(), hashParams, twoDFieldName);
-
// 2D indexes support covered search over additional fields they contain
- // TODO: Don't need to clone, can just attach to custom matcher above
- if (_nearParams.filter) {
- AndMatchExpression* andMatcher = new AndMatchExpression();
- andMatcher->add(keyMatcher);
- andMatcher->add(_nearParams.filter->shallowClone().release());
- keyMatcher = andMatcher;
- }
-
- // IndexScanWithMatch owns the matcher
- IndexScan* scan = new IndexScanWithMatch(txn, scanParams, workingSet, keyMatcher);
+ IndexScan* scan = new IndexScan(txn, scanParams, workingSet, _nearParams.filter);
- MatchExpression* docMatcher = NULL;
+ MatchExpression* docMatcher = nullptr;
// FLAT searches need to add an additional annulus $within matcher, see above
+ // TODO: Find out if this matcher is actually needed
if (FLAT == queryCRS) {
docMatcher = new TwoDPtInAnnulusExpression(_fullBounds, twoDFieldName);
}
@@ -855,68 +787,6 @@ S2Region* buildS2Region(const R2Annulus& sphereBounds) {
// Takes ownership of caps
return new S2RegionIntersection(&regions);
}
-
-/**
- * Expression which checks whether a 2DSphere key for a point (S2 hash) intersects our
- * search region. The search region may have been formed by more granular hashes.
- */
-class TwoDSphereKeyInRegionExpression : public LeafMatchExpression {
-public:
- TwoDSphereKeyInRegionExpression(const R2Annulus& bounds,
- S2IndexVersion indexVersion,
- StringData twoDSpherePath)
- : LeafMatchExpression(INTERNAL_2DSPHERE_KEY_IN_REGION),
- _region(buildS2Region(bounds)),
- _indexVersion(indexVersion) {
- initPath(twoDSpherePath);
- }
-
- virtual ~TwoDSphereKeyInRegionExpression() {}
-
- virtual void toBSON(BSONObjBuilder* out) const {
- out->append("TwoDSphereKeyInRegionExpression", true);
- }
-
- virtual bool matchesSingleElement(const BSONElement& e) const {
- S2Cell keyCell;
- if (_indexVersion < S2_INDEX_VERSION_3) {
- // Something has gone terribly wrong if this doesn't hold.
- invariant(String == e.type());
- keyCell = S2Cell(S2CellId::FromString(e.str()));
- } else {
- // Something has gone terribly wrong if this doesn't hold.
- invariant(NumberLong == e.type());
- keyCell = S2Cell(S2CellIdFromIndexKey(e.numberLong()));
- }
- return _region->MayIntersect(keyCell);
- }
-
- const S2Region& getRegion() {
- return *_region;
- }
-
- //
- // These won't be called.
- //
-
- virtual void debugString(StringBuilder& debug, int level = 0) const {
- invariant(false);
- }
-
- virtual bool equivalent(const MatchExpression* other) const {
- invariant(false);
- return true;
- }
-
- virtual unique_ptr<MatchExpression> shallowClone() const {
- invariant(false);
- return NULL;
- }
-
-private:
- const unique_ptr<S2Region> _region;
- const S2IndexVersion _indexVersion;
-};
}
// Estimate the density of data by search the nearest cells level by level around center.
@@ -1103,11 +973,8 @@ StatusWith<NearStage::CoveredInterval*> //
IndexScanParams scanParams;
scanParams.descriptor = _s2Index;
scanParams.direction = 1;
- // We use a filter on the key. The filter rejects keys that don't intersect with the
- // annulus. An object that is in the annulus might have a key that's not in it and a key
- // that's in it. As such we can't just look at one key per object.
- //
- // This does force us to do our own deduping of results, though.
+
+ // This does force us to do our own deduping of results.
scanParams.doNotDedup = true;
scanParams.bounds = _nearParams.baseBounds;
@@ -1116,15 +983,29 @@ StatusWith<NearStage::CoveredInterval*> //
const int s2FieldPosition = getFieldPosition(_s2Index, s2Field);
fassert(28678, s2FieldPosition >= 0);
scanParams.bounds.fields[s2FieldPosition].intervals.clear();
- OrderedIntervalList* coveredIntervals = &scanParams.bounds.fields[s2FieldPosition];
+ std::unique_ptr<S2Region> region(buildS2Region(_currBounds));
+
+ std::vector<S2CellId> cover = ExpressionMapping::get2dsphereCovering(*region);
+
+ // Generate a covering that does not intersect with any previous coverings
+ S2CellUnion coverUnion;
+ coverUnion.InitSwap(&cover);
+ invariant(cover.empty());
+ S2CellUnion diffUnion;
+ diffUnion.GetDifference(&coverUnion, &_scannedCells);
+ for (auto cellId : diffUnion.cell_ids()) {
+ if (region->MayIntersect(S2Cell(cellId))) {
+ cover.push_back(cellId);
+ }
+ }
- TwoDSphereKeyInRegionExpression* keyMatcher =
- new TwoDSphereKeyInRegionExpression(_currBounds, _indexParams.indexVersion, s2Field);
+ // Add the cells in this covering to the _scannedCells union
+ _scannedCells.Add(cover);
- ExpressionMapping::cover2dsphere(keyMatcher->getRegion(), _indexParams, coveredIntervals);
+ OrderedIntervalList* coveredIntervals = &scanParams.bounds.fields[s2FieldPosition];
+ S2CellIdsToIntervalsWithParents(cover, _indexParams, coveredIntervals);
- // IndexScan owns the hash matcher
- IndexScan* scan = new IndexScanWithMatch(txn, scanParams, workingSet, keyMatcher);
+ IndexScan* scan = new IndexScan(txn, scanParams, workingSet, nullptr);
// FetchStage owns index scan
_children.emplace_back(new FetchStage(txn, workingSet, scan, _nearParams.filter, collection));
diff --git a/src/mongo/db/exec/geo_near.h b/src/mongo/db/exec/geo_near.h
index b898ee51280..e429a742197 100644
--- a/src/mongo/db/exec/geo_near.h
+++ b/src/mongo/db/exec/geo_near.h
@@ -28,16 +28,17 @@
#pragma once
-
#include "mongo/db/exec/near.h"
#include "mongo/db/exec/working_set.h"
#include "mongo/db/exec/plan_stats.h"
#include "mongo/db/geo/geometry_container.h"
#include "mongo/db/index/index_descriptor.h"
#include "mongo/db/index/s2_indexing_params.h"
+#include "mongo/db/geo/r2_region_coverer.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/matcher/expression_geo.h"
#include "mongo/db/query/index_bounds.h"
+#include "third_party/s2/s2cellunion.h"
namespace mongo {
@@ -98,6 +99,9 @@ private:
// Amount to increment the next bounds by
double _boundsIncrement;
+ // Keeps track of the region that has already been scanned
+ R2CellUnion _scannedCells;
+
class DensityEstimator;
std::unique_ptr<DensityEstimator> _densityEstimator;
};
@@ -145,6 +149,9 @@ private:
// Amount to increment the next bounds by
double _boundsIncrement;
+ // Keeps track of the region that has already been scanned
+ S2CellUnion _scannedCells;
+
class DensityEstimator;
std::unique_ptr<DensityEstimator> _densityEstimator;
};
diff --git a/src/mongo/db/exec/near.cpp b/src/mongo/db/exec/near.cpp
index ceeda89c192..5cf3018c8a7 100644
--- a/src/mongo/db/exec/near.cpp
+++ b/src/mongo/db/exec/near.cpp
@@ -51,8 +51,9 @@ NearStage::NearStage(OperationContext* txn,
_workingSet(workingSet),
_collection(collection),
_searchState(SearchState_Initializing),
+ _nextIntervalStats(nullptr),
_stageType(type),
- _nextInterval(NULL) {}
+ _nextInterval(nullptr) {}
NearStage::~NearStage() {}
@@ -164,7 +165,8 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro
// CoveredInterval and its child stage are owned by _childrenIntervals
_childrenIntervals.push_back(intervalStatus.getValue());
_nextInterval = _childrenIntervals.back();
- _nextIntervalStats.reset(new IntervalStats());
+ _specificStats.intervalStats.emplace_back();
+ _nextIntervalStats = &_specificStats.intervalStats.back();
_nextIntervalStats->minDistanceAllowed = _nextInterval->minDistance;
_nextIntervalStats->maxDistanceAllowed = _nextInterval->maxDistance;
_nextIntervalStats->inclusiveMaxDistanceAllowed = _nextInterval->inclusiveMax;
@@ -174,9 +176,6 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro
PlanStage::StageState intervalState = _nextInterval->covering->work(&nextMemberID);
if (PlanStage::IS_EOF == intervalState) {
- _specificStats.intervalStats.push_back(*_nextIntervalStats);
- _nextIntervalStats.reset();
- _nextInterval = NULL;
_searchState = SearchState_Advancing;
return PlanStage::NEED_TIME;
} else if (PlanStage::FAILURE == intervalState) {
@@ -197,7 +196,7 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro
// The child stage may not dedup so we must dedup them ourselves.
if (_nextInterval->dedupCovering && nextMember->hasLoc()) {
- if (_nextIntervalSeen.end() != _nextIntervalSeen.find(nextMember->loc)) {
+ if (_seenDocuments.end() != _seenDocuments.find(nextMember->loc)) {
_workingSet->free(nextMemberID);
return PlanStage::NEED_TIME;
}
@@ -216,9 +215,6 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro
// If the member's distance is in the current distance interval, add it to our buffered
// results.
double memberDistance = distanceStatus.getValue();
- bool inInterval = memberDistance >= _nextInterval->minDistance &&
- (_nextInterval->inclusiveMax ? memberDistance <= _nextInterval->maxDistance
- : memberDistance < _nextInterval->maxDistance);
// Update found distance stats
if (_nextIntervalStats->minDistanceFound < 0 ||
@@ -226,55 +222,77 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro
_nextIntervalStats->minDistanceFound = memberDistance;
}
- if (_nextIntervalStats->maxDistanceFound < 0 ||
- memberDistance > _nextIntervalStats->maxDistanceFound) {
- _nextIntervalStats->maxDistanceFound = memberDistance;
- }
-
- if (inInterval) {
- _resultBuffer.push(SearchResult(nextMemberID, memberDistance));
+ _resultBuffer.push(SearchResult(nextMemberID, memberDistance));
- // Store the member's RecordId, if available, for quick invalidation
- if (nextMember->hasLoc()) {
- _nextIntervalSeen.insert(std::make_pair(nextMember->loc, nextMemberID));
- }
+ // Store the member's RecordId, if available, for quick invalidation
+ if (nextMember->hasLoc()) {
+ _seenDocuments.insert(std::make_pair(nextMember->loc, nextMemberID));
+ }
- ++_nextIntervalStats->numResultsBuffered;
+ return PlanStage::NEED_TIME;
+}
- // Update buffered distance stats
- if (_nextIntervalStats->minDistanceBuffered < 0 ||
- memberDistance < _nextIntervalStats->minDistanceBuffered) {
- _nextIntervalStats->minDistanceBuffered = memberDistance;
+PlanStage::StageState NearStage::advanceNext(WorkingSetID* toReturn) {
+ // Returns documents to the parent stage.
+ // If the document does not fall in the current interval, it will be buffered so that
+ // it might be returned in a following interval.
+
+ // Check if the next member is in the search interval and that the buffer isn't empty
+ WorkingSetID resultID = WorkingSet::INVALID_ID;
+ // memberDistance is initialized to produce an error if used before its value is changed
+ double memberDistance = std::numeric_limits<double>::lowest();
+ if (!_resultBuffer.empty()) {
+ SearchResult result = _resultBuffer.top();
+ memberDistance = result.distance;
+
+ // Throw out all documents with memberDistance < minDistance
+ if (memberDistance < _nextInterval->minDistance) {
+ WorkingSetMember* member = _workingSet->get(result.resultID);
+ if (member->hasLoc()) {
+ _seenDocuments.erase(member->loc);
+ }
+ _resultBuffer.pop();
+ _workingSet->free(result.resultID);
+ return PlanStage::NEED_TIME;
}
- if (_nextIntervalStats->maxDistanceBuffered < 0 ||
- memberDistance > _nextIntervalStats->maxDistanceBuffered) {
- _nextIntervalStats->maxDistanceBuffered = memberDistance;
+ bool inInterval = _nextInterval->inclusiveMax ? memberDistance <= _nextInterval->maxDistance
+ : memberDistance < _nextInterval->maxDistance;
+ if (inInterval) {
+ resultID = result.resultID;
}
} else {
- _workingSet->free(nextMemberID);
+ // A document should be in _seenDocuments if and only if it's in _resultBuffer
+ invariant(_seenDocuments.empty());
}
- return PlanStage::NEED_TIME;
-}
-
-PlanStage::StageState NearStage::advanceNext(WorkingSetID* toReturn) {
- if (_resultBuffer.empty()) {
- // We're done returning the documents buffered for this annulus, so we can
- // clear out our buffered RecordIds.
- _nextIntervalSeen.clear();
+ // memberDistance is not in the interval or _resultBuffer is empty,
+ // so we need to move to the next interval.
+ if (WorkingSet::INVALID_ID == resultID) {
+ _nextInterval = nullptr;
+ _nextIntervalStats = nullptr;
_searchState = SearchState_Buffering;
return PlanStage::NEED_TIME;
}
- *toReturn = _resultBuffer.top().resultID;
+ // The next document in _resultBuffer is in the search interval, so we can return it
_resultBuffer.pop();
// If we're returning something, take it out of our RecordId -> WSID map so that future
// calls to invalidate don't cause us to take action for a RecordId we're done with.
+ *toReturn = resultID;
WorkingSetMember* member = _workingSet->get(*toReturn);
if (member->hasLoc()) {
- _nextIntervalSeen.erase(member->loc);
+ _seenDocuments.erase(member->loc);
+ }
+
+ // TODO: SERVER-19480 Find a more appropriate place to increment numResultsBuffered
+ ++_nextIntervalStats->numResultsBuffered;
+
+ // Update buffered distance stats
+ if (_nextIntervalStats->minDistanceBuffered < 0 ||
+ memberDistance < _nextIntervalStats->minDistanceBuffered) {
+ _nextIntervalStats->minDistanceBuffered = memberDistance;
}
return PlanStage::ADVANCED;
@@ -289,19 +307,19 @@ void NearStage::doReattachToOperationContext(OperationContext* opCtx) {
}
void NearStage::doInvalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) {
- // If a result is in _resultBuffer and has a RecordId it will be in _nextIntervalSeen as
+ // If a result is in _resultBuffer and has a RecordId it will be in _seenDocuments as
// well. It's safe to return the result w/o the RecordId, so just fetch the result.
unordered_map<RecordId, WorkingSetID, RecordId::Hasher>::iterator seenIt =
- _nextIntervalSeen.find(dl);
+ _seenDocuments.find(dl);
- if (seenIt != _nextIntervalSeen.end()) {
+ if (seenIt != _seenDocuments.end()) {
WorkingSetMember* member = _workingSet->get(seenIt->second);
verify(member->hasLoc());
WorkingSetCommon::fetchAndInvalidateLoc(txn, member, _collection);
verify(!member->hasLoc());
// Don't keep it around in the seen map since there's no valid RecordId anymore
- _nextIntervalSeen.erase(seenIt);
+ _seenDocuments.erase(seenIt);
}
}
diff --git a/src/mongo/db/exec/near.h b/src/mongo/db/exec/near.h
index 57773e42b7c..20603cc5e50 100644
--- a/src/mongo/db/exec/near.h
+++ b/src/mongo/db/exec/near.h
@@ -50,7 +50,7 @@ namespace mongo {
* Child stages need to implement functionality which:
*
* - defines a distance metric
- * - iterates through ordered distance intervals, nearest to furthest
+ * - iterates through ordered distance intervals, nearest to farthest
* - provides a covering for each distance interval
*
* For example - given a distance search over documents with distances from [0 -> 10], the child
@@ -58,7 +58,8 @@ namespace mongo {
*
* Each interval requires a PlanStage which *covers* the interval (returns all results in the
* interval). Results in each interval are buffered fully before being returned to ensure that
- * ordering is preserved.
+ * ordering is preserved. Results that are in the cover, but not in the interval will remain
+ * buffered to be returned in subsequent search intervals.
*
* For efficient search, the child stage which covers the distance interval in question should
* not return too many results outside the interval, but correctness only depends on the child
@@ -70,6 +71,15 @@ namespace mongo {
* Also for efficient search, the intervals should not be too large or too small - though again
* correctness does not depend on interval size.
*
+ * The child stage may return duplicate documents, so it is the responsibility of NearStage to
+ * deduplicate. Every document in _resultBuffer is kept track of in _seenDocuments. When a
+ * document is returned or invalidated, it is removed from _seenDocuments.
+ *
+ * TODO: If a document is indexed in multiple cells (Polygons, PolyLines, etc.), there is a
+ * possibility that it will be returned more than once. Since doInvalidate() force fetches a
+ * document and removes it from _seenDocuments, NearStage will not deduplicate if it encounters
+ * another instance of this document.
+ *
* TODO: Right now the interface allows the nextCovering() to be adaptive, but doesn't allow
* aborting and shrinking a covered range being buffered if we guess wrong.
*/
@@ -167,10 +177,11 @@ private:
// May need to track disklocs from the child stage to do our own deduping, also to do
// invalidation of buffered results.
- unordered_map<RecordId, WorkingSetID, RecordId::Hasher> _nextIntervalSeen;
+ unordered_map<RecordId, WorkingSetID, RecordId::Hasher> _seenDocuments;
// Stats for the stage covering this interval
- std::unique_ptr<IntervalStats> _nextIntervalStats;
+ // This is owned by _specificStats
+ IntervalStats* _nextIntervalStats;
// Sorted buffered results to be returned - the current interval
struct SearchResult;
diff --git a/src/mongo/db/query/expression_index.cpp b/src/mongo/db/query/expression_index.cpp
index 17faed44490..9cbdb70ccba 100644
--- a/src/mongo/db/query/expression_index.cpp
+++ b/src/mongo/db/query/expression_index.cpp
@@ -33,7 +33,6 @@
#include "third_party/s2/s2regioncoverer.h"
#include "mongo/db/geo/geoconstants.h"
-#include "mongo/db/geo/hash.h"
#include "mongo/db/geo/r2_region_coverer.h"
#include "mongo/db/hasher.h"
#include "mongo/db/index/expression_params.h"
@@ -69,10 +68,9 @@ static std::string toCoveringString(const GeoHashConverter& hashConverter,
return result + "]";
}
-void ExpressionMapping::cover2d(const R2Region& region,
- const BSONObj& indexInfoObj,
- int maxCoveringCells,
- OrderedIntervalList* oil) {
+std::vector<GeoHash> ExpressionMapping::get2dCovering(const R2Region& region,
+ const BSONObj& indexInfoObj,
+ int maxCoveringCells) {
GeoHashConverter::Parameters hashParams;
Status paramStatus = GeoHashConverter::parseParameters(indexInfoObj, &hashParams);
verify(paramStatus.isOK()); // We validated the parameters when creating the index
@@ -83,23 +81,34 @@ void ExpressionMapping::cover2d(const R2Region& region,
coverer.setMaxCells(maxCoveringCells);
// TODO: Maybe slightly optimize by returning results in order
- vector<GeoHash> unorderedCovering;
+ std::vector<GeoHash> unorderedCovering;
coverer.getCovering(region, &unorderedCovering);
- set<GeoHash> covering(unorderedCovering.begin(), unorderedCovering.end());
+ return unorderedCovering;
+}
+void ExpressionMapping::GeoHashsToIntervalsWithParents(
+ const std::vector<GeoHash>& unorderedCovering, OrderedIntervalList* oilOut) {
+ set<GeoHash> covering(unorderedCovering.begin(), unorderedCovering.end());
for (set<GeoHash>::const_iterator it = covering.begin(); it != covering.end(); ++it) {
const GeoHash& geoHash = *it;
BSONObjBuilder builder;
geoHash.appendHashMin(&builder, "");
geoHash.appendHashMax(&builder, "");
- oil->intervals.push_back(IndexBoundsBuilder::makeRangeInterval(builder.obj(), true, true));
+ oilOut->intervals.push_back(
+ IndexBoundsBuilder::makeRangeInterval(builder.obj(), true, true));
}
}
-void ExpressionMapping::cover2dsphere(const S2Region& region,
- const S2IndexingParams& indexingParams,
- OrderedIntervalList* oilOut) {
+void ExpressionMapping::cover2d(const R2Region& region,
+ const BSONObj& indexInfoObj,
+ int maxCoveringCells,
+ OrderedIntervalList* oilOut) {
+ std::vector<GeoHash> unorderedCovering = get2dCovering(region, indexInfoObj, maxCoveringCells);
+ GeoHashsToIntervalsWithParents(unorderedCovering, oilOut);
+}
+
+std::vector<S2CellId> ExpressionMapping::get2dsphereCovering(const S2Region& region) {
uassert(28739,
"Geo coarsest level must be in range [0,30]",
0 <= internalQueryS2GeoCoarsestLevel && internalQueryS2GeoCoarsestLevel <= 30);
@@ -117,7 +126,13 @@ void ExpressionMapping::cover2dsphere(const S2Region& region,
std::vector<S2CellId> cover;
coverer.GetCovering(region, &cover);
+ return cover;
+}
+void ExpressionMapping::cover2dsphere(const S2Region& region,
+ const S2IndexingParams& indexingParams,
+ OrderedIntervalList* oilOut) {
+ std::vector<S2CellId> cover = get2dsphereCovering(region);
S2CellIdsToIntervalsWithParents(cover, indexingParams, oilOut);
}
diff --git a/src/mongo/db/query/expression_index.h b/src/mongo/db/query/expression_index.h
index c65d57c1322..84b2cd5d288 100644
--- a/src/mongo/db/query/expression_index.h
+++ b/src/mongo/db/query/expression_index.h
@@ -30,6 +30,7 @@
#include "third_party/s2/s2region.h"
+#include "mongo/db/geo/hash.h"
#include "mongo/db/geo/shapes.h"
#include "mongo/db/index/s2_indexing_params.h"
#include "mongo/db/jsobj.h"
@@ -46,10 +47,19 @@ class ExpressionMapping {
public:
static BSONObj hash(const BSONElement& value);
+ static std::vector<GeoHash> get2dCovering(const R2Region& region,
+ const BSONObj& indexInfoObj,
+ int maxCoveringCells);
+
+ static void GeoHashsToIntervalsWithParents(const std::vector<GeoHash>& unorderedCovering,
+ OrderedIntervalList* oilOut);
+
static void cover2d(const R2Region& region,
const BSONObj& indexInfoObj,
int maxCoveringCells,
- OrderedIntervalList* oil);
+ OrderedIntervalList* oilOut);
+
+ static std::vector<S2CellId> get2dsphereCovering(const S2Region& region);
static void cover2dsphere(const S2Region& region,
const S2IndexingParams& indexParams,
diff --git a/src/mongo/dbtests/query_stage_near.cpp b/src/mongo/dbtests/query_stage_near.cpp
index 0832dbf77cb..3349f0b872a 100644
--- a/src/mongo/dbtests/query_stage_near.cpp
+++ b/src/mongo/dbtests/query_stage_near.cpp
@@ -192,14 +192,16 @@ TEST(query_stage_near, Basic) {
// First set of results
mockData.clear();
mockData.push_back(BSON("distance" << 0.5));
- mockData.push_back(BSON("distance" << 2.0 << "$included" << false)); // Not included
+ // Not included in this interval, but will be buffered and included in the last interval
+ mockData.push_back(BSON("distance" << 2.0));
mockData.push_back(BSON("distance" << 0.0));
+ mockData.push_back(BSON("distance" << 3.5)); // Not included
nearStage.addInterval(mockData, 0.0, 1.0);
// Second set of results
mockData.clear();
mockData.push_back(BSON("distance" << 1.5));
- mockData.push_back(BSON("distance" << 2.0 << "$included" << false)); // Not included
+ mockData.push_back(BSON("distance" << 0.5)); // Not included
mockData.push_back(BSON("distance" << 1.0));
nearStage.addInterval(mockData, 1.0, 2.0);
@@ -208,10 +210,11 @@ TEST(query_stage_near, Basic) {
mockData.push_back(BSON("distance" << 2.5));
mockData.push_back(BSON("distance" << 3.0)); // Included
mockData.push_back(BSON("distance" << 2.0));
+ mockData.push_back(BSON("distance" << 3.5)); // Not included
nearStage.addInterval(mockData, 2.0, 3.0);
vector<BSONObj> results = advanceStage(&nearStage, &workingSet);
- ASSERT_EQUALS(results.size(), 7u);
+ ASSERT_EQUALS(results.size(), 8u);
assertAscendingAndValid(results);
}
@@ -225,7 +228,7 @@ TEST(query_stage_near, EmptyResults) {
mockData.clear();
nearStage.addInterval(mockData, 0.0, 1.0);
- // Non-empty sest of results
+ // Non-empty set of results
mockData.clear();
mockData.push_back(BSON("distance" << 1.5));
mockData.push_back(BSON("distance" << 2.0));