diff options
author | Brandon Zhang <brandon.zhang@mongodb.com> | 2015-07-15 15:14:11 -0400 |
---|---|---|
committer | Brandon Zhang <brandon.zhang@mongodb.com> | 2015-07-20 11:30:07 -0400 |
commit | 1de0a646d1c373e8a694d2a26c3f5ebb10137f14 (patch) | |
tree | 45a115eeaa15b66941855628c4bdb6e16e154bab /src | |
parent | 85f9c15c98f9490d81e2bdc6ba4a4c8162eaa0f4 (diff) | |
download | mongo-1de0a646d1c373e8a694d2a26c3f5ebb10137f14.tar.gz |
SERVER-19039 Buffer fetched documents in geoNear
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/exec/geo_near.cpp | 207 | ||||
-rw-r--r-- | src/mongo/db/exec/geo_near.h | 9 | ||||
-rw-r--r-- | src/mongo/db/exec/near.cpp | 104 | ||||
-rw-r--r-- | src/mongo/db/exec/near.h | 19 | ||||
-rw-r--r-- | src/mongo/db/query/expression_index.cpp | 37 | ||||
-rw-r--r-- | src/mongo/db/query/expression_index.h | 12 | ||||
-rw-r--r-- | src/mongo/dbtests/query_stage_near.cpp | 11 |
7 files changed, 172 insertions, 227 deletions
diff --git a/src/mongo/db/exec/geo_near.cpp b/src/mongo/db/exec/geo_near.cpp index 400b35bf039..a91b7e49583 100644 --- a/src/mongo/db/exec/geo_near.cpp +++ b/src/mongo/db/exec/geo_near.cpp @@ -41,11 +41,11 @@ #include "mongo/db/geo/geoconstants.h" #include "mongo/db/geo/geoparser.h" #include "mongo/db/geo/hash.h" +#include "mongo/db/index/expression_params.h" +#include "mongo/db/index/s2_keys.h" #include "mongo/db/matcher/expression.h" #include "mongo/db/query/expression_index.h" #include "mongo/db/query/expression_index_knobs.h" -#include "mongo/db/index/expression_params.h" -#include "mongo/db/index/s2_keys.h" #include "mongo/util/log.h" #include <algorithm> @@ -498,70 +498,6 @@ private: R2Annulus _annulus; }; -/** - * Expression which checks whether a 2D key for a point (2D hash) intersects our search - * region. The search region may have been formed by more granular hashes. - */ -class TwoDKeyInRegionExpression : public LeafMatchExpression { -public: - TwoDKeyInRegionExpression(R2Region* region, - const GeoHashConverter::Parameters& hashParams, - StringData twoDKeyPath) - : LeafMatchExpression(INTERNAL_2D_KEY_IN_REGION), _region(region), _unhasher(hashParams) { - initPath(twoDKeyPath); - } - - virtual ~TwoDKeyInRegionExpression() {} - - virtual void toBSON(BSONObjBuilder* out) const { - out->append("TwoDKeyInRegionExpression", true); - } - - virtual bool matchesSingleElement(const BSONElement& e) const { - // Something has gone terribly wrong if this doesn't hold. - invariant(BinData == e.type()); - return !_region->fastDisjoint(_unhasher.unhashToBoxCovering(_unhasher.hash(e))); - } - - // - // These won't be called. - // - - virtual void debugString(StringBuilder& debug, int level = 0) const { - invariant(false); - } - - virtual bool equivalent(const MatchExpression* other) const { - invariant(false); - return true; - } - - virtual unique_ptr<MatchExpression> shallowClone() const { - invariant(false); - return NULL; - } - -private: - const unique_ptr<R2Region> _region; - const GeoHashConverter _unhasher; -}; - -// Helper class to maintain ownership of a match expression alongside an index scan -class IndexScanWithMatch : public IndexScan { -public: - IndexScanWithMatch(OperationContext* txn, - const IndexScanParams& params, - WorkingSet* workingSet, - MatchExpression* filter) - : IndexScan(txn, params, workingSet, filter), _matcher(filter) {} - - virtual ~IndexScanWithMatch() {} - -private: - // Owns matcher - const unique_ptr<MatchExpression> _matcher; -}; - // Helper class to maintain ownership of a match expression alongside an index scan class FetchStageWithMatch : public FetchStage { public: @@ -707,11 +643,8 @@ StatusWith<NearStage::CoveredInterval*> // IndexScanParams scanParams; scanParams.descriptor = _twoDIndex; scanParams.direction = 1; - // We use a filter on the key. The filter rejects keys that don't intersect with the - // annulus. An object that is in the annulus might have a key that's not in it and a key - // that's in it. As such we can't just look at one key per object. - // - // This does force us to do our own deduping of results, though. + + // This does force us to do our own deduping of results. scanParams.doNotDedup = true; // Scan bounds on 2D indexes are only over the 2D field - other bounds aren't applicable. @@ -722,13 +655,23 @@ StatusWith<NearStage::CoveredInterval*> // const string twoDFieldName = _nearParams.nearQuery->field; const int twoDFieldPosition = 0; + std::vector<GeoHash> unorderedCovering = ExpressionMapping::get2dCovering( + *coverRegion, _twoDIndex->infoObj(), internalGeoNearQuery2DMaxCoveringCells); + + // Make sure the same index key isn't visited twice + R2CellUnion diffUnion; + diffUnion.init(unorderedCovering); + diffUnion.getDifference(_scannedCells); + // After taking the difference, there may be cells in the covering that don't intersect + // with the annulus. + diffUnion.detach(&unorderedCovering); + + // Add the cells in this covering to the _scannedCells union + _scannedCells.add(unorderedCovering); + OrderedIntervalList coveredIntervals; coveredIntervals.name = scanParams.bounds.fields[twoDFieldPosition].name; - - ExpressionMapping::cover2d(*coverRegion, - _twoDIndex->infoObj(), - internalGeoNearQuery2DMaxCoveringCells, - &coveredIntervals); + ExpressionMapping::GeoHashsToIntervalsWithParents(unorderedCovering, &coveredIntervals); // Intersect the $near bounds we just generated into the bounds we have for anything else // in the scan (i.e. $within) @@ -739,24 +682,13 @@ StatusWith<NearStage::CoveredInterval*> // GeoHashConverter::Parameters hashParams; GeoHashConverter::parseParameters(_twoDIndex->infoObj(), &hashParams); - MatchExpression* keyMatcher = - new TwoDKeyInRegionExpression(coverRegion.release(), hashParams, twoDFieldName); - // 2D indexes support covered search over additional fields they contain - // TODO: Don't need to clone, can just attach to custom matcher above - if (_nearParams.filter) { - AndMatchExpression* andMatcher = new AndMatchExpression(); - andMatcher->add(keyMatcher); - andMatcher->add(_nearParams.filter->shallowClone().release()); - keyMatcher = andMatcher; - } - - // IndexScanWithMatch owns the matcher - IndexScan* scan = new IndexScanWithMatch(txn, scanParams, workingSet, keyMatcher); + IndexScan* scan = new IndexScan(txn, scanParams, workingSet, _nearParams.filter); - MatchExpression* docMatcher = NULL; + MatchExpression* docMatcher = nullptr; // FLAT searches need to add an additional annulus $within matcher, see above + // TODO: Find out if this matcher is actually needed if (FLAT == queryCRS) { docMatcher = new TwoDPtInAnnulusExpression(_fullBounds, twoDFieldName); } @@ -855,68 +787,6 @@ S2Region* buildS2Region(const R2Annulus& sphereBounds) { // Takes ownership of caps return new S2RegionIntersection(®ions); } - -/** - * Expression which checks whether a 2DSphere key for a point (S2 hash) intersects our - * search region. The search region may have been formed by more granular hashes. - */ -class TwoDSphereKeyInRegionExpression : public LeafMatchExpression { -public: - TwoDSphereKeyInRegionExpression(const R2Annulus& bounds, - S2IndexVersion indexVersion, - StringData twoDSpherePath) - : LeafMatchExpression(INTERNAL_2DSPHERE_KEY_IN_REGION), - _region(buildS2Region(bounds)), - _indexVersion(indexVersion) { - initPath(twoDSpherePath); - } - - virtual ~TwoDSphereKeyInRegionExpression() {} - - virtual void toBSON(BSONObjBuilder* out) const { - out->append("TwoDSphereKeyInRegionExpression", true); - } - - virtual bool matchesSingleElement(const BSONElement& e) const { - S2Cell keyCell; - if (_indexVersion < S2_INDEX_VERSION_3) { - // Something has gone terribly wrong if this doesn't hold. - invariant(String == e.type()); - keyCell = S2Cell(S2CellId::FromString(e.str())); - } else { - // Something has gone terribly wrong if this doesn't hold. - invariant(NumberLong == e.type()); - keyCell = S2Cell(S2CellIdFromIndexKey(e.numberLong())); - } - return _region->MayIntersect(keyCell); - } - - const S2Region& getRegion() { - return *_region; - } - - // - // These won't be called. - // - - virtual void debugString(StringBuilder& debug, int level = 0) const { - invariant(false); - } - - virtual bool equivalent(const MatchExpression* other) const { - invariant(false); - return true; - } - - virtual unique_ptr<MatchExpression> shallowClone() const { - invariant(false); - return NULL; - } - -private: - const unique_ptr<S2Region> _region; - const S2IndexVersion _indexVersion; -}; } // Estimate the density of data by search the nearest cells level by level around center. @@ -1103,11 +973,8 @@ StatusWith<NearStage::CoveredInterval*> // IndexScanParams scanParams; scanParams.descriptor = _s2Index; scanParams.direction = 1; - // We use a filter on the key. The filter rejects keys that don't intersect with the - // annulus. An object that is in the annulus might have a key that's not in it and a key - // that's in it. As such we can't just look at one key per object. - // - // This does force us to do our own deduping of results, though. + + // This does force us to do our own deduping of results. scanParams.doNotDedup = true; scanParams.bounds = _nearParams.baseBounds; @@ -1116,15 +983,29 @@ StatusWith<NearStage::CoveredInterval*> // const int s2FieldPosition = getFieldPosition(_s2Index, s2Field); fassert(28678, s2FieldPosition >= 0); scanParams.bounds.fields[s2FieldPosition].intervals.clear(); - OrderedIntervalList* coveredIntervals = &scanParams.bounds.fields[s2FieldPosition]; + std::unique_ptr<S2Region> region(buildS2Region(_currBounds)); + + std::vector<S2CellId> cover = ExpressionMapping::get2dsphereCovering(*region); + + // Generate a covering that does not intersect with any previous coverings + S2CellUnion coverUnion; + coverUnion.InitSwap(&cover); + invariant(cover.empty()); + S2CellUnion diffUnion; + diffUnion.GetDifference(&coverUnion, &_scannedCells); + for (auto cellId : diffUnion.cell_ids()) { + if (region->MayIntersect(S2Cell(cellId))) { + cover.push_back(cellId); + } + } - TwoDSphereKeyInRegionExpression* keyMatcher = - new TwoDSphereKeyInRegionExpression(_currBounds, _indexParams.indexVersion, s2Field); + // Add the cells in this covering to the _scannedCells union + _scannedCells.Add(cover); - ExpressionMapping::cover2dsphere(keyMatcher->getRegion(), _indexParams, coveredIntervals); + OrderedIntervalList* coveredIntervals = &scanParams.bounds.fields[s2FieldPosition]; + S2CellIdsToIntervalsWithParents(cover, _indexParams, coveredIntervals); - // IndexScan owns the hash matcher - IndexScan* scan = new IndexScanWithMatch(txn, scanParams, workingSet, keyMatcher); + IndexScan* scan = new IndexScan(txn, scanParams, workingSet, nullptr); // FetchStage owns index scan _children.emplace_back(new FetchStage(txn, workingSet, scan, _nearParams.filter, collection)); diff --git a/src/mongo/db/exec/geo_near.h b/src/mongo/db/exec/geo_near.h index b898ee51280..e429a742197 100644 --- a/src/mongo/db/exec/geo_near.h +++ b/src/mongo/db/exec/geo_near.h @@ -28,16 +28,17 @@ #pragma once - #include "mongo/db/exec/near.h" #include "mongo/db/exec/working_set.h" #include "mongo/db/exec/plan_stats.h" #include "mongo/db/geo/geometry_container.h" #include "mongo/db/index/index_descriptor.h" #include "mongo/db/index/s2_indexing_params.h" +#include "mongo/db/geo/r2_region_coverer.h" #include "mongo/db/matcher/expression.h" #include "mongo/db/matcher/expression_geo.h" #include "mongo/db/query/index_bounds.h" +#include "third_party/s2/s2cellunion.h" namespace mongo { @@ -98,6 +99,9 @@ private: // Amount to increment the next bounds by double _boundsIncrement; + // Keeps track of the region that has already been scanned + R2CellUnion _scannedCells; + class DensityEstimator; std::unique_ptr<DensityEstimator> _densityEstimator; }; @@ -145,6 +149,9 @@ private: // Amount to increment the next bounds by double _boundsIncrement; + // Keeps track of the region that has already been scanned + S2CellUnion _scannedCells; + class DensityEstimator; std::unique_ptr<DensityEstimator> _densityEstimator; }; diff --git a/src/mongo/db/exec/near.cpp b/src/mongo/db/exec/near.cpp index ceeda89c192..5cf3018c8a7 100644 --- a/src/mongo/db/exec/near.cpp +++ b/src/mongo/db/exec/near.cpp @@ -51,8 +51,9 @@ NearStage::NearStage(OperationContext* txn, _workingSet(workingSet), _collection(collection), _searchState(SearchState_Initializing), + _nextIntervalStats(nullptr), _stageType(type), - _nextInterval(NULL) {} + _nextInterval(nullptr) {} NearStage::~NearStage() {} @@ -164,7 +165,8 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro // CoveredInterval and its child stage are owned by _childrenIntervals _childrenIntervals.push_back(intervalStatus.getValue()); _nextInterval = _childrenIntervals.back(); - _nextIntervalStats.reset(new IntervalStats()); + _specificStats.intervalStats.emplace_back(); + _nextIntervalStats = &_specificStats.intervalStats.back(); _nextIntervalStats->minDistanceAllowed = _nextInterval->minDistance; _nextIntervalStats->maxDistanceAllowed = _nextInterval->maxDistance; _nextIntervalStats->inclusiveMaxDistanceAllowed = _nextInterval->inclusiveMax; @@ -174,9 +176,6 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro PlanStage::StageState intervalState = _nextInterval->covering->work(&nextMemberID); if (PlanStage::IS_EOF == intervalState) { - _specificStats.intervalStats.push_back(*_nextIntervalStats); - _nextIntervalStats.reset(); - _nextInterval = NULL; _searchState = SearchState_Advancing; return PlanStage::NEED_TIME; } else if (PlanStage::FAILURE == intervalState) { @@ -197,7 +196,7 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro // The child stage may not dedup so we must dedup them ourselves. if (_nextInterval->dedupCovering && nextMember->hasLoc()) { - if (_nextIntervalSeen.end() != _nextIntervalSeen.find(nextMember->loc)) { + if (_seenDocuments.end() != _seenDocuments.find(nextMember->loc)) { _workingSet->free(nextMemberID); return PlanStage::NEED_TIME; } @@ -216,9 +215,6 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro // If the member's distance is in the current distance interval, add it to our buffered // results. double memberDistance = distanceStatus.getValue(); - bool inInterval = memberDistance >= _nextInterval->minDistance && - (_nextInterval->inclusiveMax ? memberDistance <= _nextInterval->maxDistance - : memberDistance < _nextInterval->maxDistance); // Update found distance stats if (_nextIntervalStats->minDistanceFound < 0 || @@ -226,55 +222,77 @@ PlanStage::StageState NearStage::bufferNext(WorkingSetID* toReturn, Status* erro _nextIntervalStats->minDistanceFound = memberDistance; } - if (_nextIntervalStats->maxDistanceFound < 0 || - memberDistance > _nextIntervalStats->maxDistanceFound) { - _nextIntervalStats->maxDistanceFound = memberDistance; - } - - if (inInterval) { - _resultBuffer.push(SearchResult(nextMemberID, memberDistance)); + _resultBuffer.push(SearchResult(nextMemberID, memberDistance)); - // Store the member's RecordId, if available, for quick invalidation - if (nextMember->hasLoc()) { - _nextIntervalSeen.insert(std::make_pair(nextMember->loc, nextMemberID)); - } + // Store the member's RecordId, if available, for quick invalidation + if (nextMember->hasLoc()) { + _seenDocuments.insert(std::make_pair(nextMember->loc, nextMemberID)); + } - ++_nextIntervalStats->numResultsBuffered; + return PlanStage::NEED_TIME; +} - // Update buffered distance stats - if (_nextIntervalStats->minDistanceBuffered < 0 || - memberDistance < _nextIntervalStats->minDistanceBuffered) { - _nextIntervalStats->minDistanceBuffered = memberDistance; +PlanStage::StageState NearStage::advanceNext(WorkingSetID* toReturn) { + // Returns documents to the parent stage. + // If the document does not fall in the current interval, it will be buffered so that + // it might be returned in a following interval. + + // Check if the next member is in the search interval and that the buffer isn't empty + WorkingSetID resultID = WorkingSet::INVALID_ID; + // memberDistance is initialized to produce an error if used before its value is changed + double memberDistance = std::numeric_limits<double>::lowest(); + if (!_resultBuffer.empty()) { + SearchResult result = _resultBuffer.top(); + memberDistance = result.distance; + + // Throw out all documents with memberDistance < minDistance + if (memberDistance < _nextInterval->minDistance) { + WorkingSetMember* member = _workingSet->get(result.resultID); + if (member->hasLoc()) { + _seenDocuments.erase(member->loc); + } + _resultBuffer.pop(); + _workingSet->free(result.resultID); + return PlanStage::NEED_TIME; } - if (_nextIntervalStats->maxDistanceBuffered < 0 || - memberDistance > _nextIntervalStats->maxDistanceBuffered) { - _nextIntervalStats->maxDistanceBuffered = memberDistance; + bool inInterval = _nextInterval->inclusiveMax ? memberDistance <= _nextInterval->maxDistance + : memberDistance < _nextInterval->maxDistance; + if (inInterval) { + resultID = result.resultID; } } else { - _workingSet->free(nextMemberID); + // A document should be in _seenDocuments if and only if it's in _resultBuffer + invariant(_seenDocuments.empty()); } - return PlanStage::NEED_TIME; -} - -PlanStage::StageState NearStage::advanceNext(WorkingSetID* toReturn) { - if (_resultBuffer.empty()) { - // We're done returning the documents buffered for this annulus, so we can - // clear out our buffered RecordIds. - _nextIntervalSeen.clear(); + // memberDistance is not in the interval or _resultBuffer is empty, + // so we need to move to the next interval. + if (WorkingSet::INVALID_ID == resultID) { + _nextInterval = nullptr; + _nextIntervalStats = nullptr; _searchState = SearchState_Buffering; return PlanStage::NEED_TIME; } - *toReturn = _resultBuffer.top().resultID; + // The next document in _resultBuffer is in the search interval, so we can return it _resultBuffer.pop(); // If we're returning something, take it out of our RecordId -> WSID map so that future // calls to invalidate don't cause us to take action for a RecordId we're done with. + *toReturn = resultID; WorkingSetMember* member = _workingSet->get(*toReturn); if (member->hasLoc()) { - _nextIntervalSeen.erase(member->loc); + _seenDocuments.erase(member->loc); + } + + // TODO: SERVER-19480 Find a more appropriate place to increment numResultsBuffered + ++_nextIntervalStats->numResultsBuffered; + + // Update buffered distance stats + if (_nextIntervalStats->minDistanceBuffered < 0 || + memberDistance < _nextIntervalStats->minDistanceBuffered) { + _nextIntervalStats->minDistanceBuffered = memberDistance; } return PlanStage::ADVANCED; @@ -289,19 +307,19 @@ void NearStage::doReattachToOperationContext(OperationContext* opCtx) { } void NearStage::doInvalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) { - // If a result is in _resultBuffer and has a RecordId it will be in _nextIntervalSeen as + // If a result is in _resultBuffer and has a RecordId it will be in _seenDocuments as // well. It's safe to return the result w/o the RecordId, so just fetch the result. unordered_map<RecordId, WorkingSetID, RecordId::Hasher>::iterator seenIt = - _nextIntervalSeen.find(dl); + _seenDocuments.find(dl); - if (seenIt != _nextIntervalSeen.end()) { + if (seenIt != _seenDocuments.end()) { WorkingSetMember* member = _workingSet->get(seenIt->second); verify(member->hasLoc()); WorkingSetCommon::fetchAndInvalidateLoc(txn, member, _collection); verify(!member->hasLoc()); // Don't keep it around in the seen map since there's no valid RecordId anymore - _nextIntervalSeen.erase(seenIt); + _seenDocuments.erase(seenIt); } } diff --git a/src/mongo/db/exec/near.h b/src/mongo/db/exec/near.h index 57773e42b7c..20603cc5e50 100644 --- a/src/mongo/db/exec/near.h +++ b/src/mongo/db/exec/near.h @@ -50,7 +50,7 @@ namespace mongo { * Child stages need to implement functionality which: * * - defines a distance metric - * - iterates through ordered distance intervals, nearest to furthest + * - iterates through ordered distance intervals, nearest to farthest * - provides a covering for each distance interval * * For example - given a distance search over documents with distances from [0 -> 10], the child @@ -58,7 +58,8 @@ namespace mongo { * * Each interval requires a PlanStage which *covers* the interval (returns all results in the * interval). Results in each interval are buffered fully before being returned to ensure that - * ordering is preserved. + * ordering is preserved. Results that are in the cover, but not in the interval will remain + * buffered to be returned in subsequent search intervals. * * For efficient search, the child stage which covers the distance interval in question should * not return too many results outside the interval, but correctness only depends on the child @@ -70,6 +71,15 @@ namespace mongo { * Also for efficient search, the intervals should not be too large or too small - though again * correctness does not depend on interval size. * + * The child stage may return duplicate documents, so it is the responsibility of NearStage to + * deduplicate. Every document in _resultBuffer is kept track of in _seenDocuments. When a + * document is returned or invalidated, it is removed from _seenDocuments. + * + * TODO: If a document is indexed in multiple cells (Polygons, PolyLines, etc.), there is a + * possibility that it will be returned more than once. Since doInvalidate() force fetches a + * document and removes it from _seenDocuments, NearStage will not deduplicate if it encounters + * another instance of this document. + * * TODO: Right now the interface allows the nextCovering() to be adaptive, but doesn't allow * aborting and shrinking a covered range being buffered if we guess wrong. */ @@ -167,10 +177,11 @@ private: // May need to track disklocs from the child stage to do our own deduping, also to do // invalidation of buffered results. - unordered_map<RecordId, WorkingSetID, RecordId::Hasher> _nextIntervalSeen; + unordered_map<RecordId, WorkingSetID, RecordId::Hasher> _seenDocuments; // Stats for the stage covering this interval - std::unique_ptr<IntervalStats> _nextIntervalStats; + // This is owned by _specificStats + IntervalStats* _nextIntervalStats; // Sorted buffered results to be returned - the current interval struct SearchResult; diff --git a/src/mongo/db/query/expression_index.cpp b/src/mongo/db/query/expression_index.cpp index 17faed44490..9cbdb70ccba 100644 --- a/src/mongo/db/query/expression_index.cpp +++ b/src/mongo/db/query/expression_index.cpp @@ -33,7 +33,6 @@ #include "third_party/s2/s2regioncoverer.h" #include "mongo/db/geo/geoconstants.h" -#include "mongo/db/geo/hash.h" #include "mongo/db/geo/r2_region_coverer.h" #include "mongo/db/hasher.h" #include "mongo/db/index/expression_params.h" @@ -69,10 +68,9 @@ static std::string toCoveringString(const GeoHashConverter& hashConverter, return result + "]"; } -void ExpressionMapping::cover2d(const R2Region& region, - const BSONObj& indexInfoObj, - int maxCoveringCells, - OrderedIntervalList* oil) { +std::vector<GeoHash> ExpressionMapping::get2dCovering(const R2Region& region, + const BSONObj& indexInfoObj, + int maxCoveringCells) { GeoHashConverter::Parameters hashParams; Status paramStatus = GeoHashConverter::parseParameters(indexInfoObj, &hashParams); verify(paramStatus.isOK()); // We validated the parameters when creating the index @@ -83,23 +81,34 @@ void ExpressionMapping::cover2d(const R2Region& region, coverer.setMaxCells(maxCoveringCells); // TODO: Maybe slightly optimize by returning results in order - vector<GeoHash> unorderedCovering; + std::vector<GeoHash> unorderedCovering; coverer.getCovering(region, &unorderedCovering); - set<GeoHash> covering(unorderedCovering.begin(), unorderedCovering.end()); + return unorderedCovering; +} +void ExpressionMapping::GeoHashsToIntervalsWithParents( + const std::vector<GeoHash>& unorderedCovering, OrderedIntervalList* oilOut) { + set<GeoHash> covering(unorderedCovering.begin(), unorderedCovering.end()); for (set<GeoHash>::const_iterator it = covering.begin(); it != covering.end(); ++it) { const GeoHash& geoHash = *it; BSONObjBuilder builder; geoHash.appendHashMin(&builder, ""); geoHash.appendHashMax(&builder, ""); - oil->intervals.push_back(IndexBoundsBuilder::makeRangeInterval(builder.obj(), true, true)); + oilOut->intervals.push_back( + IndexBoundsBuilder::makeRangeInterval(builder.obj(), true, true)); } } -void ExpressionMapping::cover2dsphere(const S2Region& region, - const S2IndexingParams& indexingParams, - OrderedIntervalList* oilOut) { +void ExpressionMapping::cover2d(const R2Region& region, + const BSONObj& indexInfoObj, + int maxCoveringCells, + OrderedIntervalList* oilOut) { + std::vector<GeoHash> unorderedCovering = get2dCovering(region, indexInfoObj, maxCoveringCells); + GeoHashsToIntervalsWithParents(unorderedCovering, oilOut); +} + +std::vector<S2CellId> ExpressionMapping::get2dsphereCovering(const S2Region& region) { uassert(28739, "Geo coarsest level must be in range [0,30]", 0 <= internalQueryS2GeoCoarsestLevel && internalQueryS2GeoCoarsestLevel <= 30); @@ -117,7 +126,13 @@ void ExpressionMapping::cover2dsphere(const S2Region& region, std::vector<S2CellId> cover; coverer.GetCovering(region, &cover); + return cover; +} +void ExpressionMapping::cover2dsphere(const S2Region& region, + const S2IndexingParams& indexingParams, + OrderedIntervalList* oilOut) { + std::vector<S2CellId> cover = get2dsphereCovering(region); S2CellIdsToIntervalsWithParents(cover, indexingParams, oilOut); } diff --git a/src/mongo/db/query/expression_index.h b/src/mongo/db/query/expression_index.h index c65d57c1322..84b2cd5d288 100644 --- a/src/mongo/db/query/expression_index.h +++ b/src/mongo/db/query/expression_index.h @@ -30,6 +30,7 @@ #include "third_party/s2/s2region.h" +#include "mongo/db/geo/hash.h" #include "mongo/db/geo/shapes.h" #include "mongo/db/index/s2_indexing_params.h" #include "mongo/db/jsobj.h" @@ -46,10 +47,19 @@ class ExpressionMapping { public: static BSONObj hash(const BSONElement& value); + static std::vector<GeoHash> get2dCovering(const R2Region& region, + const BSONObj& indexInfoObj, + int maxCoveringCells); + + static void GeoHashsToIntervalsWithParents(const std::vector<GeoHash>& unorderedCovering, + OrderedIntervalList* oilOut); + static void cover2d(const R2Region& region, const BSONObj& indexInfoObj, int maxCoveringCells, - OrderedIntervalList* oil); + OrderedIntervalList* oilOut); + + static std::vector<S2CellId> get2dsphereCovering(const S2Region& region); static void cover2dsphere(const S2Region& region, const S2IndexingParams& indexParams, diff --git a/src/mongo/dbtests/query_stage_near.cpp b/src/mongo/dbtests/query_stage_near.cpp index 0832dbf77cb..3349f0b872a 100644 --- a/src/mongo/dbtests/query_stage_near.cpp +++ b/src/mongo/dbtests/query_stage_near.cpp @@ -192,14 +192,16 @@ TEST(query_stage_near, Basic) { // First set of results mockData.clear(); mockData.push_back(BSON("distance" << 0.5)); - mockData.push_back(BSON("distance" << 2.0 << "$included" << false)); // Not included + // Not included in this interval, but will be buffered and included in the last interval + mockData.push_back(BSON("distance" << 2.0)); mockData.push_back(BSON("distance" << 0.0)); + mockData.push_back(BSON("distance" << 3.5)); // Not included nearStage.addInterval(mockData, 0.0, 1.0); // Second set of results mockData.clear(); mockData.push_back(BSON("distance" << 1.5)); - mockData.push_back(BSON("distance" << 2.0 << "$included" << false)); // Not included + mockData.push_back(BSON("distance" << 0.5)); // Not included mockData.push_back(BSON("distance" << 1.0)); nearStage.addInterval(mockData, 1.0, 2.0); @@ -208,10 +210,11 @@ TEST(query_stage_near, Basic) { mockData.push_back(BSON("distance" << 2.5)); mockData.push_back(BSON("distance" << 3.0)); // Included mockData.push_back(BSON("distance" << 2.0)); + mockData.push_back(BSON("distance" << 3.5)); // Not included nearStage.addInterval(mockData, 2.0, 3.0); vector<BSONObj> results = advanceStage(&nearStage, &workingSet); - ASSERT_EQUALS(results.size(), 7u); + ASSERT_EQUALS(results.size(), 8u); assertAscendingAndValid(results); } @@ -225,7 +228,7 @@ TEST(query_stage_near, EmptyResults) { mockData.clear(); nearStage.addInterval(mockData, 0.0, 1.0); - // Non-empty sest of results + // Non-empty set of results mockData.clear(); mockData.push_back(BSON("distance" << 1.5)); mockData.push_back(BSON("distance" << 2.0)); |