/** * Copyright (C) 2014 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery #include "mongo/db/exec/geo_near.h" // For s2 search #include "third_party/s2/s2regionintersection.h" #include "mongo/base/owned_pointer_vector.h" #include "mongo/db/exec/index_scan.h" #include "mongo/db/exec/fetch.h" #include "mongo/db/exec/working_set_computed_data.h" #include "mongo/db/geo/geoconstants.h" #include "mongo/db/geo/geoparser.h" #include "mongo/db/geo/hash.h" #include "mongo/db/matcher/expression.h" #include "mongo/db/query/expression_index.h" #include "mongo/db/query/expression_index_knobs.h" #include "mongo/db/index/expression_params.h" #include "mongo/util/log.h" #include namespace mongo { using std::abs; using std::unique_ptr; // // Shared GeoNear search functionality // static const double kCircOfEarthInMeters = 2 * M_PI * kRadiusOfEarthInMeters; static const double kMaxEarthDistanceInMeters = kCircOfEarthInMeters / 2; static const double kMetersPerDegreeAtEquator = kCircOfEarthInMeters / 360; namespace { /** * Structure that holds BSON addresses (BSONElements) and the corresponding geometry parsed * at those locations. * Used to separate the parsing of geometries from a BSONObj (which must stay in scope) from * the computation over those geometries. * TODO: Merge with 2D/2DSphere key extraction? */ struct StoredGeometry { static StoredGeometry* parseFrom(const BSONElement& element) { if (!element.isABSONObj()) return NULL; unique_ptr stored(new StoredGeometry); if (!stored->geometry.parseFromStorage(element).isOK()) return NULL; stored->element = element; return stored.release(); } BSONElement element; GeometryContainer geometry; }; } /** * Find and parse all geometry elements on the appropriate field path from the document. */ static void extractGeometries(const BSONObj& doc, const string& path, vector* geometries) { BSONElementSet geomElements; // NOTE: Annoyingly, we cannot just expand arrays b/c single 2d points are arrays, we need // to manually expand all results to check if they are geometries doc.getFieldsDotted(path, geomElements, false /* expand arrays */); for (BSONElementSet::iterator it = geomElements.begin(); it != geomElements.end(); ++it) { const BSONElement& el = *it; unique_ptr stored(StoredGeometry::parseFrom(el)); if (stored.get()) { // Valid geometry element geometries->push_back(stored.release()); } else if (el.type() == Array) { // Many geometries may be in an array BSONObjIterator arrIt(el.Obj()); while (arrIt.more()) { const BSONElement nextEl = arrIt.next(); stored.reset(StoredGeometry::parseFrom(nextEl)); if (stored.get()) { // Valid geometry element geometries->push_back(stored.release()); } else { warning() << "geoNear stage read non-geometry element " << nextEl.toString() << " in array " << el.toString(); } } } else { warning() << "geoNear stage read non-geometry element " << el.toString(); } } } static StatusWith computeGeoNearDistance(const GeoNearParams& nearParams, WorkingSetMember* member) { // // Generic GeoNear distance computation // Distances are computed by projecting the stored geometry into the query CRS, and // computing distance in that CRS. // // Must have an object in order to get geometry out of it. invariant(member->hasObj()); CRS queryCRS = nearParams.nearQuery->centroid->crs; // Extract all the geometries out of this document for the near query OwnedPointerVector geometriesOwned; vector& geometries = geometriesOwned.mutableVector(); extractGeometries(member->obj.value(), nearParams.nearQuery->field, &geometries); // Compute the minimum distance of all the geometries in the document double minDistance = -1; BSONObj minDistanceObj; for (vector::iterator it = geometries.begin(); it != geometries.end(); ++it) { StoredGeometry& stored = **it; // NOTE: A stored document with STRICT_SPHERE CRS is treated as a malformed document // and ignored. Since GeoNear requires an index, there's no stored STRICT_SPHERE shape. // So we don't check it here. // NOTE: For now, we're sure that if we get this far in the query we'll have an // appropriate index which validates the type of geometry we're pulling back here. // TODO: It may make sense to change our semantics and, by default, only return // shapes in the same CRS from $geoNear. if (!stored.geometry.supportsProject(queryCRS)) continue; stored.geometry.projectInto(queryCRS); double nextDistance = stored.geometry.minDistance(*nearParams.nearQuery->centroid); if (minDistance < 0 || nextDistance < minDistance) { minDistance = nextDistance; minDistanceObj = stored.element.Obj(); } } if (minDistance < 0) { // No distance to report return StatusWith(-1); } if (nearParams.addDistMeta) { if (nearParams.nearQuery->unitsAreRadians) { // Hack for nearSphere // TODO: Remove nearSphere? invariant(SPHERE == queryCRS); member->addComputed(new GeoDistanceComputedData(minDistance / kRadiusOfEarthInMeters)); } else { member->addComputed(new GeoDistanceComputedData(minDistance)); } } if (nearParams.addPointMeta) { member->addComputed(new GeoNearPointComputedData(minDistanceObj)); } return StatusWith(minDistance); } static R2Annulus geoNearDistanceBounds(const GeoNearExpression& query) { const CRS queryCRS = query.centroid->crs; if (FLAT == queryCRS) { return R2Annulus(query.centroid->oldPoint, query.minDistance, query.maxDistance); } invariant(SPHERE == queryCRS); // TODO: Tighten this up a bit by making a CRS for "sphere with radians" double minDistance = query.minDistance; double maxDistance = query.maxDistance; if (query.unitsAreRadians) { // Our input bounds are in radians, convert to meters since the query CRS is actually // SPHERE. We'll convert back to radians on outputting distances. minDistance *= kRadiusOfEarthInMeters; maxDistance *= kRadiusOfEarthInMeters; } // GOTCHA: oldPoint is a misnomer - it is the original point data and is in the correct // CRS. We must not try to derive the original point from the spherical S2Point generated // as an optimization - the mapping is not 1->1 - [-180, 0] and [180, 0] map to the same // place. // TODO: Wrapping behavior should not depend on the index, which would make $near code // insensitive to which direction we explore the index in. return R2Annulus(query.centroid->oldPoint, min(minDistance, kMaxEarthDistanceInMeters), min(maxDistance, kMaxEarthDistanceInMeters)); } // // GeoNear2DStage // static R2Annulus twoDDistanceBounds(const GeoNearParams& nearParams, const IndexDescriptor* twoDIndex) { R2Annulus fullBounds = geoNearDistanceBounds(*nearParams.nearQuery); const CRS queryCRS = nearParams.nearQuery->centroid->crs; if (FLAT == queryCRS) { // Reset the full bounds based on our index bounds GeoHashConverter::Parameters hashParams; Status status = GeoHashConverter::parseParameters(twoDIndex->infoObj(), &hashParams); invariant(status.isOK()); // The index status should always be valid // The biggest distance possible in this indexed collection is the diagonal of the // square indexed region. const double sqrt2Approx = 1.5; const double diagonalDist = sqrt2Approx * (hashParams.max - hashParams.min); fullBounds = R2Annulus( fullBounds.center(), fullBounds.getInner(), min(fullBounds.getOuter(), diagonalDist)); } else { // Spherical queries have upper bounds set by the earth - no-op // TODO: Wrapping errors would creep in here if nearSphere wasn't defined to not wrap invariant(SPHERE == queryCRS); invariant(!nearParams.nearQuery->isWrappingQuery); } return fullBounds; } class GeoNear2DStage::DensityEstimator { public: DensityEstimator(const IndexDescriptor* twoDindex, const GeoNearParams* nearParams) : _twoDIndex(twoDindex), _nearParams(nearParams), _currentLevel(0) { GeoHashConverter::Parameters hashParams; Status status = GeoHashConverter::parseParameters(_twoDIndex->infoObj(), &hashParams); // The index status should always be valid. invariant(status.isOK()); _converter.reset(new GeoHashConverter(hashParams)); _centroidCell = _converter->hash(_nearParams->nearQuery->centroid->oldPoint); // Since appendVertexNeighbors(level, output) requires level < hash.getBits(), // we have to start to find documents at most GeoHash::kMaxBits - 1. Thus the finest // search area is 16 * finest cell area at GeoHash::kMaxBits. _currentLevel = std::max(0u, hashParams.bits - 1u); } PlanStage::StageState work(OperationContext* txn, WorkingSet* workingSet, Collection* collection, WorkingSetID* out, double* estimatedDistance); void saveState(); void restoreState(OperationContext* txn); void invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type); private: void buildIndexScan(OperationContext* txn, WorkingSet* workingSet, Collection* collection); const IndexDescriptor* _twoDIndex; // Not owned here. const GeoNearParams* _nearParams; // Not owned here. unique_ptr _indexScan; unique_ptr _converter; GeoHash _centroidCell; unsigned _currentLevel; }; // Initialize the internal states void GeoNear2DStage::DensityEstimator::buildIndexScan(OperationContext* txn, WorkingSet* workingSet, Collection* collection) { IndexScanParams scanParams; scanParams.descriptor = _twoDIndex; scanParams.direction = 1; scanParams.doNotDedup = true; // Scan bounds on 2D indexes are only over the 2D field - other bounds aren't applicable. // This is handled in query planning. scanParams.bounds = _nearParams->baseBounds; // The "2d" field is always the first in the index const string twoDFieldName = _nearParams->nearQuery->field; const int twoDFieldPosition = 0; // Construct index intervals used by this stage OrderedIntervalList oil; oil.name = scanParams.bounds.fields[twoDFieldPosition].name; vector neighbors; // Return the neighbors of closest vertex to this cell at the given level. _centroidCell.appendVertexNeighbors(_currentLevel, &neighbors); std::sort(neighbors.begin(), neighbors.end()); for (vector::const_iterator it = neighbors.begin(); it != neighbors.end(); it++) { mongo::BSONObjBuilder builder; it->appendHashMin(&builder, ""); it->appendHashMax(&builder, ""); oil.intervals.push_back(IndexBoundsBuilder::makeRangeInterval(builder.obj(), true, true)); } invariant(oil.isValidFor(1)); // Intersect the $near bounds we just generated into the bounds we have for anything else // in the scan (i.e. $within) IndexBoundsBuilder::intersectize(oil, &scanParams.bounds.fields[twoDFieldPosition]); _indexScan.reset(new IndexScan(txn, scanParams, workingSet, NULL)); } // Return IS_EOF is we find a document in it's ancestor cells and set estimated distance // from the nearest document. PlanStage::StageState GeoNear2DStage::DensityEstimator::work(OperationContext* txn, WorkingSet* workingSet, Collection* collection, WorkingSetID* out, double* estimatedDistance) { if (!_indexScan) { // Setup index scan stage for current level. buildIndexScan(txn, workingSet, collection); } WorkingSetID workingSetID; PlanStage::StageState state = _indexScan->work(&workingSetID); if (state == PlanStage::IS_EOF) { // We ran through the neighbors but found nothing. if (_currentLevel > 0u) { // Advance to the next level and search again. _currentLevel--; // Reset index scan for the next level. _indexScan.reset(NULL); return PlanStage::NEED_TIME; } // We are already at the top level. *estimatedDistance = _converter->sizeEdge(_currentLevel); return PlanStage::IS_EOF; } else if (state == PlanStage::ADVANCED) { // Found a document at current level. *estimatedDistance = _converter->sizeEdge(_currentLevel); // Clean up working set. workingSet->free(workingSetID); return PlanStage::IS_EOF; } else if (state == PlanStage::NEED_YIELD) { *out = workingSetID; } // Propagate NEED_TIME or errors return state; } void GeoNear2DStage::DensityEstimator::saveState() { if (_indexScan) { _indexScan->saveState(); } } void GeoNear2DStage::DensityEstimator::restoreState(OperationContext* txn) { if (_indexScan) { _indexScan->restoreState(txn); } } void GeoNear2DStage::DensityEstimator::invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) { if (_indexScan) { _indexScan->invalidate(txn, dl, type); } } PlanStage::StageState GeoNear2DStage::initialize(OperationContext* txn, WorkingSet* workingSet, Collection* collection, WorkingSetID* out) { if (!_densityEstimator) { _densityEstimator.reset(new DensityEstimator(_twoDIndex, &_nearParams)); } double estimatedDistance; PlanStage::StageState state = _densityEstimator->work(txn, workingSet, collection, out, &estimatedDistance); if (state == PlanStage::IS_EOF) { // 2d index only works with legacy points as centroid. $nearSphere will project // the point into SPHERE CRS and calculate distance based on that. // STRICT_SPHERE is impossible here, as GeoJSON centroid is not allowed for 2d index. // Estimator finished its work, we need to finish initialization too. if (SPHERE == _nearParams.nearQuery->centroid->crs) { // Estimated distance is in degrees, convert it to meters. _boundsIncrement = deg2rad(estimatedDistance) * kRadiusOfEarthInMeters * 3; // Limit boundsIncrement to ~20KM, so that the first circle won't be too aggressive. _boundsIncrement = std::min(_boundsIncrement, kMaxEarthDistanceInMeters / 1000.0); } else { // We expand the radius by 3 times to give a reasonable starting search area. // Assume points are distributed evenly. X is the edge size of cells at whose // level we found a document in 4 neighbors. Thus the closest point is at least // X/2 far from the centroid. The distance between two points is at least X. // The area of Pi * (3X)^2 ~= 28 * X^2 will cover dozens of points at most. // We'll explore the space with exponentially increasing radius if this guess is // too small, so starting from a conservative initial radius doesn't hurt. _boundsIncrement = 3 * estimatedDistance; } invariant(_boundsIncrement > 0.0); // Clean up _densityEstimator.reset(NULL); } return state; } static const string kTwoDIndexNearStage("GEO_NEAR_2D"); GeoNear2DStage::GeoNear2DStage(const GeoNearParams& nearParams, OperationContext* txn, WorkingSet* workingSet, Collection* collection, IndexDescriptor* twoDIndex) : NearStage(txn, workingSet, collection, new PlanStageStats(CommonStats(kTwoDIndexNearStage.c_str()), STAGE_GEO_NEAR_2D)), _nearParams(nearParams), _twoDIndex(twoDIndex), _fullBounds(twoDDistanceBounds(nearParams, twoDIndex)), _currBounds(_fullBounds.center(), -1, _fullBounds.getInner()), _boundsIncrement(0.0) { getNearStats()->keyPattern = twoDIndex->keyPattern(); getNearStats()->indexName = twoDIndex->indexName(); } GeoNear2DStage::~GeoNear2DStage() {} void GeoNear2DStage::finishSaveState() { if (_densityEstimator) { _densityEstimator->saveState(); } } void GeoNear2DStage::finishRestoreState(OperationContext* txn) { if (_densityEstimator) { _densityEstimator->restoreState(txn); } } void GeoNear2DStage::finishInvalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) { if (_densityEstimator) { _densityEstimator->invalidate(txn, dl, type); } } namespace { /** * Expression which checks whether a legacy 2D index point is contained within our near * search annulus. See nextInterval() below for more discussion. * TODO: Make this a standard type of GEO match expression */ class TwoDPtInAnnulusExpression : public LeafMatchExpression { public: TwoDPtInAnnulusExpression(const R2Annulus& annulus, StringData twoDPath) : LeafMatchExpression(INTERNAL_2D_POINT_IN_ANNULUS), _annulus(annulus) { initPath(twoDPath); } virtual ~TwoDPtInAnnulusExpression() {} virtual void toBSON(BSONObjBuilder* out) const { out->append("TwoDPtInAnnulusExpression", true); } virtual bool matchesSingleElement(const BSONElement& e) const { if (!e.isABSONObj()) return false; PointWithCRS point; if (!GeoParser::parseStoredPoint(e, &point).isOK()) return false; return _annulus.contains(point.oldPoint); } // // These won't be called. // virtual void debugString(StringBuilder& debug, int level = 0) const { invariant(false); } virtual bool equivalent(const MatchExpression* other) const { invariant(false); return false; } virtual LeafMatchExpression* shallowClone() const { invariant(false); return NULL; } private: R2Annulus _annulus; }; /** * Expression which checks whether a 2D key for a point (2D hash) intersects our search * region. The search region may have been formed by more granular hashes. */ class TwoDKeyInRegionExpression : public LeafMatchExpression { public: TwoDKeyInRegionExpression(R2Region* region, const GeoHashConverter::Parameters& hashParams, StringData twoDKeyPath) : LeafMatchExpression(INTERNAL_2D_KEY_IN_REGION), _region(region), _unhasher(hashParams) { initPath(twoDKeyPath); } virtual ~TwoDKeyInRegionExpression() {} virtual void toBSON(BSONObjBuilder* out) const { out->append("TwoDKeyInRegionExpression", true); } virtual bool matchesSingleElement(const BSONElement& e) const { // Something has gone terribly wrong if this doesn't hold. invariant(BinData == e.type()); return !_region->fastDisjoint(_unhasher.unhashToBoxCovering(_unhasher.hash(e))); } // // These won't be called. // virtual void debugString(StringBuilder& debug, int level = 0) const { invariant(false); } virtual bool equivalent(const MatchExpression* other) const { invariant(false); return true; } virtual MatchExpression* shallowClone() const { invariant(false); return NULL; } private: const unique_ptr _region; const GeoHashConverter _unhasher; }; // Helper class to maintain ownership of a match expression alongside an index scan class IndexScanWithMatch : public IndexScan { public: IndexScanWithMatch(OperationContext* txn, const IndexScanParams& params, WorkingSet* workingSet, MatchExpression* filter) : IndexScan(txn, params, workingSet, filter), _matcher(filter) {} virtual ~IndexScanWithMatch() {} private: // Owns matcher const unique_ptr _matcher; }; // Helper class to maintain ownership of a match expression alongside an index scan class FetchStageWithMatch : public FetchStage { public: FetchStageWithMatch(OperationContext* txn, WorkingSet* ws, PlanStage* child, MatchExpression* filter, const Collection* collection) : FetchStage(txn, ws, child, filter, collection), _matcher(filter) {} virtual ~FetchStageWithMatch() {} private: // Owns matcher const unique_ptr _matcher; }; } static double min2DBoundsIncrement(const GeoNearExpression& query, IndexDescriptor* twoDIndex) { GeoHashConverter::Parameters hashParams; Status status = GeoHashConverter::parseParameters(twoDIndex->infoObj(), &hashParams); invariant(status.isOK()); // The index status should always be valid GeoHashConverter hasher(hashParams); // The hasher error is the diagonal of a 2D hash region - it's generally not helpful // to change region size such that a search radius is smaller than the 2D hash region // max radius. This is slightly conservative for now (box diagonal vs circle radius). double minBoundsIncrement = hasher.getError() / 2; const CRS queryCRS = query.centroid->crs; if (FLAT == queryCRS) return minBoundsIncrement; invariant(SPHERE == queryCRS); // If this is a spherical query, units are in meters - this is just a heuristic return minBoundsIncrement * kMetersPerDegreeAtEquator; } static R2Annulus projectBoundsToTwoDDegrees(R2Annulus sphereBounds) { const double outerDegrees = rad2deg(sphereBounds.getOuter() / kRadiusOfEarthInMeters); const double innerDegrees = rad2deg(sphereBounds.getInner() / kRadiusOfEarthInMeters); const double maxErrorDegrees = computeXScanDistance(sphereBounds.center().y, outerDegrees); return R2Annulus(sphereBounds.center(), max(0.0, innerDegrees - maxErrorDegrees), outerDegrees + maxErrorDegrees); } StatusWith // GeoNear2DStage::nextInterval(OperationContext* txn, WorkingSet* workingSet, Collection* collection) { // The search is finished if we searched at least once and all the way to the edge if (_currBounds.getInner() >= 0 && _currBounds.getOuter() == _fullBounds.getOuter()) { return StatusWith(NULL); } // // Setup the next interval // const NearStats* stats = getNearStats(); if (!stats->intervalStats.empty()) { const IntervalStats& lastIntervalStats = stats->intervalStats.back(); // TODO: Generally we want small numbers of results fast, then larger numbers later if (lastIntervalStats.numResultsBuffered < 300) _boundsIncrement *= 2; else if (lastIntervalStats.numResultsBuffered > 600) _boundsIncrement /= 2; } _boundsIncrement = max(_boundsIncrement, min2DBoundsIncrement(*_nearParams.nearQuery, _twoDIndex)); R2Annulus nextBounds(_currBounds.center(), _currBounds.getOuter(), min(_currBounds.getOuter() + _boundsIncrement, _fullBounds.getOuter())); const bool isLastInterval = (nextBounds.getOuter() == _fullBounds.getOuter()); _currBounds = nextBounds; // // Get a covering region for this interval // const CRS queryCRS = _nearParams.nearQuery->centroid->crs; unique_ptr coverRegion; if (FLAT == queryCRS) { // NOTE: Due to floating point math issues, FLAT searches of a 2D index need to treat // containment and distance separately. // Ex: (distance) 54.001 - 54 > 0.001, but (containment) 54 + 0.001 <= 54.001 // The idea is that a $near search with bounds is really a $within search, sorted by // distance. We attach a custom $within : annulus matcher to do the $within search, // and adjust max/min bounds slightly since, as above, containment does not mean the // distance calculation won't slightly overflow the boundary. // // The code below adjusts: // 1) Overall min/max bounds of the generated distance intervals to be more inclusive // 2) Bounds of the interval covering to be more inclusive // ... and later on we add the custom $within : annulus matcher. // // IMPORTANT: The *internal* interval distance bounds are *exact thresholds* - these // should not be adjusted. // TODO: Maybe integrate annuluses as a standard shape, and literally transform $near // internally into a $within query with $near just as sort. // Compute the maximum axis-aligned distance error const double epsilon = std::numeric_limits::epsilon() * (max(abs(_fullBounds.center().x), abs(_fullBounds.center().y)) + _fullBounds.getOuter()); if (nextBounds.getInner() > 0 && nextBounds.getInner() == _fullBounds.getInner()) { nextBounds = R2Annulus(nextBounds.center(), max(0.0, nextBounds.getInner() - epsilon), nextBounds.getOuter()); } if (nextBounds.getOuter() > 0 && nextBounds.getOuter() == _fullBounds.getOuter()) { // We're at the max bound of the search, adjust interval maximum nextBounds = R2Annulus( nextBounds.center(), nextBounds.getInner(), nextBounds.getOuter() + epsilon); } // *Always* adjust the covering bounds to be more inclusive coverRegion.reset(new R2Annulus(nextBounds.center(), max(0.0, nextBounds.getInner() - epsilon), nextBounds.getOuter() + epsilon)); } else { invariant(SPHERE == queryCRS); // TODO: As above, make this consistent with $within : $centerSphere // Our intervals aren't in the same CRS as our index, so we need to adjust them coverRegion.reset(new R2Annulus(projectBoundsToTwoDDegrees(nextBounds))); } // // Setup the stages for this interval // IndexScanParams scanParams; scanParams.descriptor = _twoDIndex; scanParams.direction = 1; // We use a filter on the key. The filter rejects keys that don't intersect with the // annulus. An object that is in the annulus might have a key that's not in it and a key // that's in it. As such we can't just look at one key per object. // // This does force us to do our own deduping of results, though. scanParams.doNotDedup = true; // Scan bounds on 2D indexes are only over the 2D field - other bounds aren't applicable. // This is handled in query planning. scanParams.bounds = _nearParams.baseBounds; // The "2d" field is always the first in the index const string twoDFieldName = _nearParams.nearQuery->field; const int twoDFieldPosition = 0; OrderedIntervalList coveredIntervals; coveredIntervals.name = scanParams.bounds.fields[twoDFieldPosition].name; ExpressionMapping::cover2d(*coverRegion, _twoDIndex->infoObj(), internalGeoNearQuery2DMaxCoveringCells, &coveredIntervals); // Intersect the $near bounds we just generated into the bounds we have for anything else // in the scan (i.e. $within) IndexBoundsBuilder::intersectize(coveredIntervals, &scanParams.bounds.fields[twoDFieldPosition]); // These parameters are stored by the index, and so must be ok GeoHashConverter::Parameters hashParams; GeoHashConverter::parseParameters(_twoDIndex->infoObj(), &hashParams); MatchExpression* keyMatcher = new TwoDKeyInRegionExpression(coverRegion.release(), hashParams, twoDFieldName); // 2D indexes support covered search over additional fields they contain // TODO: Don't need to clone, can just attach to custom matcher above if (_nearParams.filter) { AndMatchExpression* andMatcher = new AndMatchExpression(); andMatcher->add(keyMatcher); andMatcher->add(_nearParams.filter->shallowClone()); keyMatcher = andMatcher; } // IndexScanWithMatch owns the matcher IndexScan* scan = new IndexScanWithMatch(txn, scanParams, workingSet, keyMatcher); MatchExpression* docMatcher = NULL; // FLAT searches need to add an additional annulus $within matcher, see above if (FLAT == queryCRS) { docMatcher = new TwoDPtInAnnulusExpression(_fullBounds, twoDFieldName); } // FetchStage owns index scan FetchStage* fetcher(new FetchStageWithMatch(txn, workingSet, scan, docMatcher, collection)); return StatusWith(new CoveredInterval( fetcher, true, nextBounds.getInner(), nextBounds.getOuter(), isLastInterval)); } StatusWith GeoNear2DStage::computeDistance(WorkingSetMember* member) { return computeGeoNearDistance(_nearParams, member); } // // GeoNear2DSphereStage // static int getFieldPosition(const IndexDescriptor* index, const string& fieldName) { int fieldPosition = 0; BSONObjIterator specIt(index->keyPattern()); while (specIt.more()) { if (specIt.next().fieldName() == fieldName) { break; } ++fieldPosition; } if (fieldPosition == index->keyPattern().nFields()) return -1; return fieldPosition; } static const string kS2IndexNearStage("GEO_NEAR_2DSPHERE"); GeoNear2DSphereStage::GeoNear2DSphereStage(const GeoNearParams& nearParams, OperationContext* txn, WorkingSet* workingSet, Collection* collection, IndexDescriptor* s2Index) : NearStage( txn, workingSet, collection, new PlanStageStats(CommonStats(kS2IndexNearStage.c_str()), STAGE_GEO_NEAR_2DSPHERE)), _nearParams(nearParams), _s2Index(s2Index), _fullBounds(geoNearDistanceBounds(*nearParams.nearQuery)), _currBounds(_fullBounds.center(), -1, _fullBounds.getInner()), _boundsIncrement(0.0) { getNearStats()->keyPattern = s2Index->keyPattern(); getNearStats()->indexName = s2Index->indexName(); } GeoNear2DSphereStage::~GeoNear2DSphereStage() {} namespace { S2Region* buildS2Region(const R2Annulus& sphereBounds) { // Internal bounds come in SPHERE CRS units // i.e. center is lon/lat, inner/outer are in meters S2LatLng latLng = S2LatLng::FromDegrees(sphereBounds.center().y, sphereBounds.center().x); vector regions; S2Cap innerCap = S2Cap::FromAxisAngle( latLng.ToPoint(), S1Angle::Radians(sphereBounds.getInner() / kRadiusOfEarthInMeters)); innerCap = innerCap.Complement(); regions.push_back(new S2Cap(innerCap)); // We only need to max bound if this is not a full search of the Earth // Using the constant here is important since we use the min of kMaxEarthDistance // and the actual bounds passed in to set up the search area. if (sphereBounds.getOuter() < kMaxEarthDistanceInMeters) { S2Cap outerCap = S2Cap::FromAxisAngle( latLng.ToPoint(), S1Angle::Radians(sphereBounds.getOuter() / kRadiusOfEarthInMeters)); regions.push_back(new S2Cap(outerCap)); } // Takes ownership of caps return new S2RegionIntersection(®ions); } /** * Expression which checks whether a 2DSphere key for a point (S2 hash) intersects our * search region. The search region may have been formed by more granular hashes. */ class TwoDSphereKeyInRegionExpression : public LeafMatchExpression { public: TwoDSphereKeyInRegionExpression(const R2Annulus& bounds, StringData twoDSpherePath) : LeafMatchExpression(INTERNAL_2DSPHERE_KEY_IN_REGION), _region(buildS2Region(bounds)) { initPath(twoDSpherePath); } virtual ~TwoDSphereKeyInRegionExpression() {} virtual void toBSON(BSONObjBuilder* out) const { out->append("TwoDSphereKeyInRegionExpression", true); } virtual bool matchesSingleElement(const BSONElement& e) const { // Something has gone terribly wrong if this doesn't hold. invariant(String == e.type()); S2Cell keyCell = S2Cell(S2CellId::FromString(e.str())); return _region->MayIntersect(keyCell); } const S2Region& getRegion() { return *_region; } // // These won't be called. // virtual void debugString(StringBuilder& debug, int level = 0) const { invariant(false); } virtual bool equivalent(const MatchExpression* other) const { invariant(false); return true; } virtual MatchExpression* shallowClone() const { invariant(false); return NULL; } private: const unique_ptr _region; }; } // Estimate the density of data by search the nearest cells level by level around center. class GeoNear2DSphereStage::DensityEstimator { public: DensityEstimator(const IndexDescriptor* s2Index, const GeoNearParams* nearParams) : _s2Index(s2Index), _nearParams(nearParams), _currentLevel(0) { S2IndexingParams params; ExpressionParams::parse2dsphereParams(_s2Index->infoObj(), ¶ms); // Since cellId.AppendVertexNeighbors(level, output) requires level < cellId.level(), // we have to start to find documents at most S2::kMaxCellLevel - 1. Thus the finest // search area is 16 * finest cell area at S2::kMaxCellLevel, which is less than // (1.4 inch X 1.4 inch) on the earth. _currentLevel = std::max(0, params.finestIndexedLevel - 1); } // Search for a document in neighbors at current level. // Return IS_EOF is such document exists and set the estimated distance to the nearest doc. PlanStage::StageState work(OperationContext* txn, WorkingSet* workingSet, Collection* collection, WorkingSetID* out, double* estimatedDistance); void saveState(); void restoreState(OperationContext* txn); void invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type); private: void buildIndexScan(OperationContext* txn, WorkingSet* workingSet, Collection* collection); const IndexDescriptor* _s2Index; // Not owned here. const GeoNearParams* _nearParams; // Not owned here. int _currentLevel; unique_ptr _indexScan; }; // Setup the index scan stage for neighbors at this level. void GeoNear2DSphereStage::DensityEstimator::buildIndexScan(OperationContext* txn, WorkingSet* workingSet, Collection* collection) { IndexScanParams scanParams; scanParams.descriptor = _s2Index; scanParams.direction = 1; scanParams.doNotDedup = true; scanParams.bounds = _nearParams->baseBounds; // Because the planner doesn't yet set up 2D index bounds, do it ourselves here const string s2Field = _nearParams->nearQuery->field; const int s2FieldPosition = getFieldPosition(_s2Index, s2Field); fassert(28677, s2FieldPosition >= 0); OrderedIntervalList* coveredIntervals = &scanParams.bounds.fields[s2FieldPosition]; coveredIntervals->intervals.clear(); // Find 4 neighbors (3 neighbors at face vertex) at current level. const S2CellId& centerId = _nearParams->nearQuery->centroid->cell.id(); vector neighbors; // The search area expands 4X each time. // Return the neighbors of closest vertex to this cell at the given level. invariant(_currentLevel < centerId.level()); centerId.AppendVertexNeighbors(_currentLevel, &neighbors); // Convert S2CellId to string and sort vector neighborKeys; for (vector::const_iterator it = neighbors.begin(); it != neighbors.end(); it++) { neighborKeys.push_back(it->toString()); } std::sort(neighborKeys.begin(), neighborKeys.end()); for (vector::const_iterator it = neighborKeys.begin(); it != neighborKeys.end(); it++) { // construct interval [*it, end) for this cell. std::string end = *it; end[end.size() - 1]++; coveredIntervals->intervals.push_back( IndexBoundsBuilder::makeRangeInterval(*it, end, true, false)); } invariant(coveredIntervals->isValidFor(1)); // Index scan _indexScan.reset(new IndexScan(txn, scanParams, workingSet, NULL)); } PlanStage::StageState GeoNear2DSphereStage::DensityEstimator::work(OperationContext* txn, WorkingSet* workingSet, Collection* collection, WorkingSetID* out, double* estimatedDistance) { if (!_indexScan) { // Setup index scan stage for current level. buildIndexScan(txn, workingSet, collection); } WorkingSetID workingSetID; PlanStage::StageState state = _indexScan->work(&workingSetID); if (state == PlanStage::IS_EOF) { // We ran through the neighbors but found nothing. if (_currentLevel > 0) { // Advance to the next level and search again. _currentLevel--; // Reset index scan for the next level. _indexScan.reset(NULL); return PlanStage::NEED_TIME; } // We are already at the top level. *estimatedDistance = S2::kAvgEdge.GetValue(_currentLevel) * kRadiusOfEarthInMeters; return PlanStage::IS_EOF; } else if (state == PlanStage::ADVANCED) { // We found something! *estimatedDistance = S2::kAvgEdge.GetValue(_currentLevel) * kRadiusOfEarthInMeters; // Clean up working set. workingSet->free(workingSetID); return PlanStage::IS_EOF; } else if (state == PlanStage::NEED_YIELD) { *out = workingSetID; } // Propagate NEED_TIME or errors return state; } void GeoNear2DSphereStage::DensityEstimator::saveState() { if (_indexScan) { _indexScan->saveState(); } } void GeoNear2DSphereStage::DensityEstimator::restoreState(OperationContext* txn) { if (_indexScan) { _indexScan->restoreState(txn); } } void GeoNear2DSphereStage::DensityEstimator::invalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) { if (_indexScan) { _indexScan->invalidate(txn, dl, type); } } PlanStage::StageState GeoNear2DSphereStage::initialize(OperationContext* txn, WorkingSet* workingSet, Collection* collection, WorkingSetID* out) { if (!_densityEstimator) { _densityEstimator.reset(new DensityEstimator(_s2Index, &_nearParams)); } double estimatedDistance; PlanStage::StageState state = _densityEstimator->work(txn, workingSet, collection, out, &estimatedDistance); if (state == IS_EOF) { // We find a document in 4 neighbors at current level, but didn't at previous level. // // Assuming cell size at current level is d and data is even distributed, the distance // between two nearest points are at least d. The following circle with radius of 3 * d // covers PI * 9 * d^2, giving at most 30 documents. // // At the coarsest level, the search area is the whole earth. _boundsIncrement = 3 * estimatedDistance; invariant(_boundsIncrement > 0.0); // Clean up _densityEstimator.reset(NULL); } return state; } void GeoNear2DSphereStage::finishSaveState() { if (_densityEstimator) { _densityEstimator->saveState(); } } void GeoNear2DSphereStage::finishRestoreState(OperationContext* txn) { if (_densityEstimator) { _densityEstimator->restoreState(txn); } } void GeoNear2DSphereStage::finishInvalidate(OperationContext* txn, const RecordId& dl, InvalidationType type) { if (_densityEstimator) { _densityEstimator->invalidate(txn, dl, type); } } StatusWith // GeoNear2DSphereStage::nextInterval(OperationContext* txn, WorkingSet* workingSet, Collection* collection) { // The search is finished if we searched at least once and all the way to the edge if (_currBounds.getInner() >= 0 && _currBounds.getOuter() == _fullBounds.getOuter()) { return StatusWith(NULL); } // // Setup the next interval // const NearStats* stats = getNearStats(); if (!stats->intervalStats.empty()) { const IntervalStats& lastIntervalStats = stats->intervalStats.back(); // TODO: Generally we want small numbers of results fast, then larger numbers later if (lastIntervalStats.numResultsBuffered < 300) _boundsIncrement *= 2; else if (lastIntervalStats.numResultsBuffered > 600) _boundsIncrement /= 2; } invariant(_boundsIncrement > 0.0); R2Annulus nextBounds(_currBounds.center(), _currBounds.getOuter(), min(_currBounds.getOuter() + _boundsIncrement, _fullBounds.getOuter())); bool isLastInterval = (nextBounds.getOuter() == _fullBounds.getOuter()); _currBounds = nextBounds; // // Setup the covering region and stages for this interval // IndexScanParams scanParams; scanParams.descriptor = _s2Index; scanParams.direction = 1; // We use a filter on the key. The filter rejects keys that don't intersect with the // annulus. An object that is in the annulus might have a key that's not in it and a key // that's in it. As such we can't just look at one key per object. // // This does force us to do our own deduping of results, though. scanParams.doNotDedup = true; scanParams.bounds = _nearParams.baseBounds; // Because the planner doesn't yet set up 2D index bounds, do it ourselves here const string s2Field = _nearParams.nearQuery->field; const int s2FieldPosition = getFieldPosition(_s2Index, s2Field); fassert(28678, s2FieldPosition >= 0); scanParams.bounds.fields[s2FieldPosition].intervals.clear(); OrderedIntervalList* coveredIntervals = &scanParams.bounds.fields[s2FieldPosition]; TwoDSphereKeyInRegionExpression* keyMatcher = new TwoDSphereKeyInRegionExpression(_currBounds, s2Field); ExpressionMapping::cover2dsphere( keyMatcher->getRegion(), _s2Index->infoObj(), coveredIntervals); // IndexScan owns the hash matcher IndexScan* scan = new IndexScanWithMatch(txn, scanParams, workingSet, keyMatcher); // FetchStage owns index scan FetchStage* fetcher(new FetchStage(txn, workingSet, scan, _nearParams.filter, collection)); return StatusWith(new CoveredInterval( fetcher, true, nextBounds.getInner(), nextBounds.getOuter(), isLastInterval)); } StatusWith GeoNear2DSphereStage::computeDistance(WorkingSetMember* member) { return computeGeoNearDistance(_nearParams, member); } } // namespace mongo