/**
* Copyright (C) 2014 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the GNU Affero General Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
#include "mongo/db/exec/geo_near.h"
#include
#include
// For s2 search
#include "third_party/s2/s2regionintersection.h"
#include "mongo/base/owned_pointer_vector.h"
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/exec/fetch.h"
#include "mongo/db/exec/index_scan.h"
#include "mongo/db/exec/working_set_computed_data.h"
#include "mongo/db/geo/geoconstants.h"
#include "mongo/db/geo/geoparser.h"
#include "mongo/db/geo/hash.h"
#include "mongo/db/index/expression_params.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/query/expression_index.h"
#include "mongo/db/query/expression_index_knobs.h"
#include "mongo/stdx/memory.h"
#include "mongo/util/log.h"
#include
namespace mongo {
using std::abs;
using std::unique_ptr;
namespace dps = ::mongo::dotted_path_support;
//
// Shared GeoNear search functionality
//
static const double kCircOfEarthInMeters = 2 * M_PI * kRadiusOfEarthInMeters;
static const double kMaxEarthDistanceInMeters = kCircOfEarthInMeters / 2;
static const double kMetersPerDegreeAtEquator = kCircOfEarthInMeters / 360;
namespace {
/**
* Structure that holds BSON addresses (BSONElements) and the corresponding geometry parsed
* at those locations.
* Used to separate the parsing of geometries from a BSONObj (which must stay in scope) from
* the computation over those geometries.
* TODO: Merge with 2D/2DSphere key extraction?
*/
struct StoredGeometry {
static StoredGeometry* parseFrom(const BSONElement& element) {
if (!element.isABSONObj())
return NULL;
unique_ptr stored(new StoredGeometry);
// GeoNear stage can only be run with an existing index
// Therefore, it is always safe to skip geometry validation
if (!stored->geometry.parseFromStorage(element, true).isOK())
return NULL;
stored->element = element;
return stored.release();
}
BSONElement element;
GeometryContainer geometry;
};
}
/**
* Find and parse all geometry elements on the appropriate field path from the document.
*/
static void extractGeometries(const BSONObj& doc,
const string& path,
std::vector>* geometries) {
BSONElementSet geomElements;
// NOTE: Annoyingly, we cannot just expand arrays b/c single 2d points are arrays, we need
// to manually expand all results to check if they are geometries
dps::extractAllElementsAlongPath(doc, path, geomElements, false /* expand arrays */);
for (BSONElementSet::iterator it = geomElements.begin(); it != geomElements.end(); ++it) {
const BSONElement& el = *it;
unique_ptr stored(StoredGeometry::parseFrom(el));
if (stored.get()) {
// Valid geometry element
geometries->push_back(std::move(stored));
} else if (el.type() == Array) {
// Many geometries may be in an array
BSONObjIterator arrIt(el.Obj());
while (arrIt.more()) {
const BSONElement nextEl = arrIt.next();
stored.reset(StoredGeometry::parseFrom(nextEl));
if (stored.get()) {
// Valid geometry element
geometries->push_back(std::move(stored));
} else {
warning() << "geoNear stage read non-geometry element " << redact(nextEl)
<< " in array " << redact(el);
}
}
} else {
warning() << "geoNear stage read non-geometry element " << redact(el);
}
}
}
static StatusWith computeGeoNearDistance(const GeoNearParams& nearParams,
WorkingSetMember* member) {
//
// Generic GeoNear distance computation
// Distances are computed by projecting the stored geometry into the query CRS, and
// computing distance in that CRS.
//
// Must have an object in order to get geometry out of it.
invariant(member->hasObj());
CRS queryCRS = nearParams.nearQuery->centroid->crs;
// Extract all the geometries out of this document for the near query
std::vector> geometries;
extractGeometries(member->obj.value(), nearParams.nearQuery->field, &geometries);
// Compute the minimum distance of all the geometries in the document
double minDistance = -1;
BSONObj minDistanceObj;
for (auto it = geometries.begin(); it != geometries.end(); ++it) {
StoredGeometry& stored = **it;
// NOTE: A stored document with STRICT_SPHERE CRS is treated as a malformed document
// and ignored. Since GeoNear requires an index, there's no stored STRICT_SPHERE shape.
// So we don't check it here.
// NOTE: For now, we're sure that if we get this far in the query we'll have an
// appropriate index which validates the type of geometry we're pulling back here.
// TODO: It may make sense to change our semantics and, by default, only return
// shapes in the same CRS from $geoNear.
if (!stored.geometry.supportsProject(queryCRS))
continue;
stored.geometry.projectInto(queryCRS);
double nextDistance = stored.geometry.minDistance(*nearParams.nearQuery->centroid);
if (minDistance < 0 || nextDistance < minDistance) {
minDistance = nextDistance;
minDistanceObj = stored.element.Obj();
}
}
if (minDistance < 0) {
// No distance to report
return StatusWith(-1);
}
if (nearParams.addDistMeta) {
if (nearParams.nearQuery->unitsAreRadians) {
// Hack for nearSphere
// TODO: Remove nearSphere?
invariant(SPHERE == queryCRS);
member->addComputed(new GeoDistanceComputedData(minDistance / kRadiusOfEarthInMeters));
} else {
member->addComputed(new GeoDistanceComputedData(minDistance));
}
}
if (nearParams.addPointMeta) {
member->addComputed(new GeoNearPointComputedData(minDistanceObj));
}
return StatusWith(minDistance);
}
static R2Annulus geoNearDistanceBounds(const GeoNearExpression& query) {
const CRS queryCRS = query.centroid->crs;
if (FLAT == queryCRS) {
return R2Annulus(query.centroid->oldPoint, query.minDistance, query.maxDistance);
}
invariant(SPHERE == queryCRS);
// TODO: Tighten this up a bit by making a CRS for "sphere with radians"
double minDistance = query.minDistance;
double maxDistance = query.maxDistance;
if (query.unitsAreRadians) {
// Our input bounds are in radians, convert to meters since the query CRS is actually
// SPHERE. We'll convert back to radians on outputting distances.
minDistance *= kRadiusOfEarthInMeters;
maxDistance *= kRadiusOfEarthInMeters;
}
// GOTCHA: oldPoint is a misnomer - it is the original point data and is in the correct
// CRS. We must not try to derive the original point from the spherical S2Point generated
// as an optimization - the mapping is not 1->1 - [-180, 0] and [180, 0] map to the same
// place.
// TODO: Wrapping behavior should not depend on the index, which would make $near code
// insensitive to which direction we explore the index in.
return R2Annulus(query.centroid->oldPoint,
min(minDistance, kMaxEarthDistanceInMeters),
min(maxDistance, kMaxEarthDistanceInMeters));
}
//
// GeoNear2DStage
//
static R2Annulus twoDDistanceBounds(const GeoNearParams& nearParams,
const IndexDescriptor* twoDIndex) {
R2Annulus fullBounds = geoNearDistanceBounds(*nearParams.nearQuery);
const CRS queryCRS = nearParams.nearQuery->centroid->crs;
if (FLAT == queryCRS) {
// Reset the full bounds based on our index bounds
GeoHashConverter::Parameters hashParams;
Status status = GeoHashConverter::parseParameters(twoDIndex->infoObj(), &hashParams);
invariant(status.isOK()); // The index status should always be valid
// The biggest distance possible in this indexed collection is the diagonal of the
// square indexed region.
const double sqrt2Approx = 1.5;
const double diagonalDist = sqrt2Approx * (hashParams.max - hashParams.min);
fullBounds = R2Annulus(
fullBounds.center(), fullBounds.getInner(), min(fullBounds.getOuter(), diagonalDist));
} else {
// Spherical queries have upper bounds set by the earth - no-op
// TODO: Wrapping errors would creep in here if nearSphere wasn't defined to not wrap
invariant(SPHERE == queryCRS);
invariant(!nearParams.nearQuery->isWrappingQuery);
}
return fullBounds;
}
class GeoNear2DStage::DensityEstimator {
public:
DensityEstimator(PlanStage::Children* children,
const IndexDescriptor* twoDindex,
const GeoNearParams* nearParams,
const R2Annulus& fullBounds)
: _children(children),
_twoDIndex(twoDindex),
_nearParams(nearParams),
_fullBounds(fullBounds),
_currentLevel(0) {
GeoHashConverter::Parameters hashParams;
Status status = GeoHashConverter::parseParameters(_twoDIndex->infoObj(), &hashParams);
// The index status should always be valid.
invariant(status.isOK());
_converter.reset(new GeoHashConverter(hashParams));
_centroidCell = _converter->hash(_nearParams->nearQuery->centroid->oldPoint);
// Since appendVertexNeighbors(level, output) requires level < hash.getBits(),
// we have to start to find documents at most GeoHash::kMaxBits - 1. Thus the finest
// search area is 16 * finest cell area at GeoHash::kMaxBits.
_currentLevel = std::max(0u, hashParams.bits - 1u);
}
PlanStage::StageState work(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection,
WorkingSetID* out,
double* estimatedDistance);
private:
void buildIndexScan(OperationContext* opCtx, WorkingSet* workingSet, Collection* collection);
PlanStage::Children* _children; // Points to PlanStage::_children in the NearStage.
const IndexDescriptor* _twoDIndex; // Not owned here.
const GeoNearParams* _nearParams; // Not owned here.
const R2Annulus& _fullBounds;
IndexScan* _indexScan = nullptr; // Owned in PlanStage::_children.
unique_ptr _converter;
GeoHash _centroidCell;
unsigned _currentLevel;
};
// Initialize the internal states
void GeoNear2DStage::DensityEstimator::buildIndexScan(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection) {
IndexScanParams scanParams;
scanParams.descriptor = _twoDIndex;
scanParams.direction = 1;
scanParams.doNotDedup = true;
// Scan bounds on 2D indexes are only over the 2D field - other bounds aren't applicable.
// This is handled in query planning.
scanParams.bounds = _nearParams->baseBounds;
// The "2d" field is always the first in the index
const string twoDFieldName = _nearParams->nearQuery->field;
const int twoDFieldPosition = 0;
// Construct index intervals used by this stage
OrderedIntervalList oil;
oil.name = scanParams.bounds.fields[twoDFieldPosition].name;
vector neighbors;
// Return the neighbors of closest vertex to this cell at the given level.
_centroidCell.appendVertexNeighbors(_currentLevel, &neighbors);
std::sort(neighbors.begin(), neighbors.end());
for (vector::const_iterator it = neighbors.begin(); it != neighbors.end(); it++) {
mongo::BSONObjBuilder builder;
it->appendHashMin(&builder, "");
it->appendHashMax(&builder, "");
oil.intervals.push_back(IndexBoundsBuilder::makeRangeInterval(
builder.obj(), BoundInclusion::kIncludeBothStartAndEndKeys));
}
invariant(oil.isValidFor(1));
// Intersect the $near bounds we just generated into the bounds we have for anything else
// in the scan (i.e. $within)
IndexBoundsBuilder::intersectize(oil, &scanParams.bounds.fields[twoDFieldPosition]);
invariant(!_indexScan);
_indexScan = new IndexScan(opCtx, scanParams, workingSet, NULL);
_children->emplace_back(_indexScan);
}
// Return IS_EOF is we find a document in it's ancestor cells and set estimated distance
// from the nearest document.
PlanStage::StageState GeoNear2DStage::DensityEstimator::work(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection,
WorkingSetID* out,
double* estimatedDistance) {
if (!_indexScan) {
// Setup index scan stage for current level.
buildIndexScan(opCtx, workingSet, collection);
}
WorkingSetID workingSetID;
PlanStage::StageState state = _indexScan->work(&workingSetID);
if (state == PlanStage::IS_EOF) {
// We ran through the neighbors but found nothing.
//
// Before going to the next-coarsest level, check whether our search area contains the
// entire search annulus, since we don't want to spend time doing density estimation over
// areas that are much larger than the requested $maxDistance.
//
// The search area consists of four cells with side length S. Within its cell, the closest
// vertex to the search point must be the vertex shared with the other three cells. If the
// search point lies in the upper left cell, this means that it must lie in the lower right
// quadrant of that cell. Furthermore, this lower-right quadrant has a side-length of S/2.
//
// +-----------+-----------+
// | | |
// | S/2 | |
// + +-----+ |
// | | o | |
// | | | |
// +-----+-----+-----------+
// | | |
// | | |
// | | |
// | | |
// | | |
// +-----------+-----------+
// S
//
// As long as the outer radius of the search annulus is less than S/2, it must be entirely
// contained within these four cells.
if (_fullBounds.getOuter() < (0.5 * _converter->sizeEdge(_currentLevel))) {
// We're covering the entire search annulus. Return EOF to indicate we're done.
*estimatedDistance = 0.5 * _converter->sizeEdge(_currentLevel);
return PlanStage::IS_EOF;
}
if (_currentLevel > 0u) {
// Advance to the next level and search again.
_currentLevel--;
// Reset index scan for the next level.
invariant(_children->back().get() == _indexScan);
_indexScan = nullptr;
_children->pop_back();
return PlanStage::NEED_TIME;
}
// We are already at the top level.
*estimatedDistance = _converter->sizeEdge(_currentLevel);
return PlanStage::IS_EOF;
} else if (state == PlanStage::ADVANCED) {
// Found a document at current level.
*estimatedDistance = _converter->sizeEdge(_currentLevel);
// Clean up working set.
workingSet->free(workingSetID);
return PlanStage::IS_EOF;
} else if (state == PlanStage::NEED_YIELD) {
*out = workingSetID;
}
// Propagate NEED_TIME or errors
return state;
}
PlanStage::StageState GeoNear2DStage::initialize(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection,
WorkingSetID* out) {
if (!_densityEstimator) {
_densityEstimator.reset(
new DensityEstimator(&_children, _twoDIndex, &_nearParams, _fullBounds));
}
double estimatedDistance;
PlanStage::StageState state =
_densityEstimator->work(opCtx, workingSet, collection, out, &estimatedDistance);
if (state == PlanStage::IS_EOF) {
// 2d index only works with legacy points as centroid. $nearSphere will project
// the point into SPHERE CRS and calculate distance based on that.
// STRICT_SPHERE is impossible here, as GeoJSON centroid is not allowed for 2d index.
// Estimator finished its work, we need to finish initialization too.
if (SPHERE == _nearParams.nearQuery->centroid->crs) {
// Estimated distance is in degrees, convert it to meters.
_boundsIncrement = deg2rad(estimatedDistance) * kRadiusOfEarthInMeters * 3;
// Limit boundsIncrement to ~20KM, so that the first circle won't be too aggressive.
_boundsIncrement = std::min(_boundsIncrement, kMaxEarthDistanceInMeters / 1000.0);
} else {
// We expand the radius by 3 times to give a reasonable starting search area.
// Assume points are distributed evenly. X is the edge size of cells at whose
// level we found a document in 4 neighbors. Thus the closest point is at least
// X/2 far from the centroid. The distance between two points is at least X.
// The area of Pi * (3X)^2 ~= 28 * X^2 will cover dozens of points at most.
// We'll explore the space with exponentially increasing radius if this guess is
// too small, so starting from a conservative initial radius doesn't hurt.
_boundsIncrement = 3 * estimatedDistance;
}
invariant(_boundsIncrement > 0.0);
// Clean up
_densityEstimator.reset(NULL);
}
return state;
}
static const string kTwoDIndexNearStage("GEO_NEAR_2D");
GeoNear2DStage::GeoNear2DStage(const GeoNearParams& nearParams,
OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection,
IndexDescriptor* twoDIndex)
: NearStage(opCtx, kTwoDIndexNearStage.c_str(), STAGE_GEO_NEAR_2D, workingSet, collection),
_nearParams(nearParams),
_twoDIndex(twoDIndex),
_fullBounds(twoDDistanceBounds(nearParams, twoDIndex)),
_currBounds(_fullBounds.center(), -1, _fullBounds.getInner()),
_boundsIncrement(0.0) {
_specificStats.keyPattern = twoDIndex->keyPattern();
_specificStats.indexName = twoDIndex->indexName();
_specificStats.indexVersion = static_cast(twoDIndex->version());
}
namespace {
/**
* Expression which checks whether a legacy 2D index point is contained within our near
* search annulus. See nextInterval() below for more discussion.
* TODO: Make this a standard type of GEO match expression
*/
class TwoDPtInAnnulusExpression : public LeafMatchExpression {
public:
TwoDPtInAnnulusExpression(const R2Annulus& annulus, StringData twoDPath)
: LeafMatchExpression(INTERNAL_2D_POINT_IN_ANNULUS), _annulus(annulus) {
setPath(twoDPath);
}
void serialize(BSONObjBuilder* out) const final {
out->append("TwoDPtInAnnulusExpression", true);
}
bool matchesSingleElement(const BSONElement& e) const final {
if (!e.isABSONObj())
return false;
PointWithCRS point;
if (!GeoParser::parseStoredPoint(e, &point).isOK())
return false;
return _annulus.contains(point.oldPoint);
}
//
// These won't be called.
//
void debugString(StringBuilder& debug, int level = 0) const final {
invariant(false);
}
bool equivalent(const MatchExpression* other) const final {
invariant(false);
return false;
}
unique_ptr shallowClone() const final {
invariant(false);
return NULL;
}
private:
R2Annulus _annulus;
};
// Helper class to maintain ownership of a match expression alongside an index scan
class FetchStageWithMatch final : public FetchStage {
public:
FetchStageWithMatch(OperationContext* opCtx,
WorkingSet* ws,
PlanStage* child,
MatchExpression* filter,
const Collection* collection)
: FetchStage(opCtx, ws, child, filter, collection), _matcher(filter) {}
private:
// Owns matcher
const unique_ptr _matcher;
};
}
static double min2DBoundsIncrement(const GeoNearExpression& query, IndexDescriptor* twoDIndex) {
GeoHashConverter::Parameters hashParams;
Status status = GeoHashConverter::parseParameters(twoDIndex->infoObj(), &hashParams);
invariant(status.isOK()); // The index status should always be valid
GeoHashConverter hasher(hashParams);
// The hasher error is the diagonal of a 2D hash region - it's generally not helpful
// to change region size such that a search radius is smaller than the 2D hash region
// max radius. This is slightly conservative for now (box diagonal vs circle radius).
double minBoundsIncrement = hasher.getError() / 2;
const CRS queryCRS = query.centroid->crs;
if (FLAT == queryCRS)
return minBoundsIncrement;
invariant(SPHERE == queryCRS);
// If this is a spherical query, units are in meters - this is just a heuristic
return minBoundsIncrement * kMetersPerDegreeAtEquator;
}
static R2Annulus projectBoundsToTwoDDegrees(R2Annulus sphereBounds) {
const double outerDegrees = rad2deg(sphereBounds.getOuter() / kRadiusOfEarthInMeters);
const double innerDegrees = rad2deg(sphereBounds.getInner() / kRadiusOfEarthInMeters);
const double maxErrorDegrees = computeXScanDistance(sphereBounds.center().y, outerDegrees);
return R2Annulus(sphereBounds.center(),
max(0.0, innerDegrees - maxErrorDegrees),
outerDegrees + maxErrorDegrees);
}
StatusWith //
GeoNear2DStage::nextInterval(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection) {
// The search is finished if we searched at least once and all the way to the edge
if (_currBounds.getInner() >= 0 && _currBounds.getOuter() == _fullBounds.getOuter()) {
return StatusWith(NULL);
}
//
// Setup the next interval
//
if (!_specificStats.intervalStats.empty()) {
const IntervalStats& lastIntervalStats = _specificStats.intervalStats.back();
// TODO: Generally we want small numbers of results fast, then larger numbers later
if (lastIntervalStats.numResultsReturned < 300)
_boundsIncrement *= 2;
else if (lastIntervalStats.numResultsReturned > 600)
_boundsIncrement /= 2;
}
_boundsIncrement =
max(_boundsIncrement, min2DBoundsIncrement(*_nearParams.nearQuery, _twoDIndex));
R2Annulus nextBounds(_currBounds.center(),
_currBounds.getOuter(),
min(_currBounds.getOuter() + _boundsIncrement, _fullBounds.getOuter()));
const bool isLastInterval = (nextBounds.getOuter() == _fullBounds.getOuter());
_currBounds = nextBounds;
//
// Get a covering region for this interval
//
const CRS queryCRS = _nearParams.nearQuery->centroid->crs;
unique_ptr coverRegion;
if (FLAT == queryCRS) {
// NOTE: Due to floating point math issues, FLAT searches of a 2D index need to treat
// containment and distance separately.
// Ex: (distance) 54.001 - 54 > 0.001, but (containment) 54 + 0.001 <= 54.001
// The idea is that a $near search with bounds is really a $within search, sorted by
// distance. We attach a custom $within : annulus matcher to do the $within search,
// and adjust max/min bounds slightly since, as above, containment does not mean the
// distance calculation won't slightly overflow the boundary.
//
// The code below adjusts:
// 1) Overall min/max bounds of the generated distance intervals to be more inclusive
// 2) Bounds of the interval covering to be more inclusive
// ... and later on we add the custom $within : annulus matcher.
//
// IMPORTANT: The *internal* interval distance bounds are *exact thresholds* - these
// should not be adjusted.
// TODO: Maybe integrate annuluses as a standard shape, and literally transform $near
// internally into a $within query with $near just as sort.
// Compute the maximum axis-aligned distance error
const double epsilon = std::numeric_limits::epsilon() *
(max(abs(_fullBounds.center().x), abs(_fullBounds.center().y)) +
_fullBounds.getOuter());
if (nextBounds.getInner() > 0 && nextBounds.getInner() == _fullBounds.getInner()) {
nextBounds = R2Annulus(nextBounds.center(),
max(0.0, nextBounds.getInner() - epsilon),
nextBounds.getOuter());
}
if (nextBounds.getOuter() > 0 && nextBounds.getOuter() == _fullBounds.getOuter()) {
// We're at the max bound of the search, adjust interval maximum
nextBounds = R2Annulus(
nextBounds.center(), nextBounds.getInner(), nextBounds.getOuter() + epsilon);
}
// *Always* adjust the covering bounds to be more inclusive
coverRegion.reset(new R2Annulus(nextBounds.center(),
max(0.0, nextBounds.getInner() - epsilon),
nextBounds.getOuter() + epsilon));
} else {
invariant(SPHERE == queryCRS);
// TODO: As above, make this consistent with $within : $centerSphere
// Our intervals aren't in the same CRS as our index, so we need to adjust them
coverRegion.reset(new R2Annulus(projectBoundsToTwoDDegrees(nextBounds)));
}
//
// Setup the stages for this interval
//
IndexScanParams scanParams;
scanParams.descriptor = _twoDIndex;
scanParams.direction = 1;
// This does force us to do our own deduping of results.
scanParams.doNotDedup = true;
// Scan bounds on 2D indexes are only over the 2D field - other bounds aren't applicable.
// This is handled in query planning.
scanParams.bounds = _nearParams.baseBounds;
// The "2d" field is always the first in the index
const string twoDFieldName = _nearParams.nearQuery->field;
const int twoDFieldPosition = 0;
std::vector unorderedCovering = ExpressionMapping::get2dCovering(
*coverRegion, _twoDIndex->infoObj(), internalGeoNearQuery2DMaxCoveringCells.load());
// Make sure the same index key isn't visited twice
R2CellUnion diffUnion;
diffUnion.init(unorderedCovering);
diffUnion.getDifference(_scannedCells);
// After taking the difference, there may be cells in the covering that don't intersect
// with the annulus.
diffUnion.detach(&unorderedCovering);
// Add the cells in this covering to the _scannedCells union
_scannedCells.add(unorderedCovering);
OrderedIntervalList coveredIntervals;
coveredIntervals.name = scanParams.bounds.fields[twoDFieldPosition].name;
ExpressionMapping::GeoHashsToIntervalsWithParents(unorderedCovering, &coveredIntervals);
// Intersect the $near bounds we just generated into the bounds we have for anything else
// in the scan (i.e. $within)
IndexBoundsBuilder::intersectize(coveredIntervals,
&scanParams.bounds.fields[twoDFieldPosition]);
// These parameters are stored by the index, and so must be ok
GeoHashConverter::Parameters hashParams;
GeoHashConverter::parseParameters(_twoDIndex->infoObj(), &hashParams);
// 2D indexes support covered search over additional fields they contain
IndexScan* scan = new IndexScan(opCtx, scanParams, workingSet, _nearParams.filter);
MatchExpression* docMatcher = nullptr;
// FLAT searches need to add an additional annulus $within matcher, see above
// TODO: Find out if this matcher is actually needed
if (FLAT == queryCRS) {
docMatcher = new TwoDPtInAnnulusExpression(_fullBounds, twoDFieldName);
}
// FetchStage owns index scan
_children.emplace_back(
new FetchStageWithMatch(opCtx, workingSet, scan, docMatcher, collection));
return StatusWith(new CoveredInterval(_children.back().get(),
true,
nextBounds.getInner(),
nextBounds.getOuter(),
isLastInterval));
}
StatusWith GeoNear2DStage::computeDistance(WorkingSetMember* member) {
return computeGeoNearDistance(_nearParams, member);
}
//
// GeoNear2DSphereStage
//
static int getFieldPosition(const IndexDescriptor* index, const string& fieldName) {
int fieldPosition = 0;
BSONObjIterator specIt(index->keyPattern());
while (specIt.more()) {
if (specIt.next().fieldName() == fieldName) {
break;
}
++fieldPosition;
}
if (fieldPosition == index->keyPattern().nFields())
return -1;
return fieldPosition;
}
static const string kS2IndexNearStage("GEO_NEAR_2DSPHERE");
GeoNear2DSphereStage::GeoNear2DSphereStage(const GeoNearParams& nearParams,
OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection,
IndexDescriptor* s2Index)
: NearStage(opCtx, kS2IndexNearStage.c_str(), STAGE_GEO_NEAR_2DSPHERE, workingSet, collection),
_nearParams(nearParams),
_s2Index(s2Index),
_fullBounds(geoNearDistanceBounds(*nearParams.nearQuery)),
_currBounds(_fullBounds.center(), -1, _fullBounds.getInner()),
_boundsIncrement(0.0) {
_specificStats.keyPattern = s2Index->keyPattern();
_specificStats.indexName = s2Index->indexName();
_specificStats.indexVersion = static_cast(s2Index->version());
// initialize2dsphereParams() does not require the collator during the GEO_NEAR_2DSPHERE stage.
// It only requires the collator for index key generation. For query execution,
// _nearParams.baseBounds should have collator-generated comparison keys in place of raw
// strings, and _nearParams.filter should have the collator.
const CollatorInterface* collator = nullptr;
ExpressionParams::initialize2dsphereParams(s2Index->infoObj(), collator, &_indexParams);
}
GeoNear2DSphereStage::~GeoNear2DSphereStage() {}
namespace {
S2Region* buildS2Region(const R2Annulus& sphereBounds) {
// Internal bounds come in SPHERE CRS units
// i.e. center is lon/lat, inner/outer are in meters
S2LatLng latLng = S2LatLng::FromDegrees(sphereBounds.center().y, sphereBounds.center().x);
vector regions;
const double inner = sphereBounds.getInner();
const double outer = sphereBounds.getOuter();
if (inner > 0) {
// TODO: Currently a workaround to fix occasional floating point errors
// in S2, where sometimes points near the axis will not be returned
// if inner == 0
S2Cap innerCap = S2Cap::FromAxisAngle(latLng.ToPoint(),
S1Angle::Radians(inner / kRadiusOfEarthInMeters));
innerCap = innerCap.Complement();
regions.push_back(new S2Cap(innerCap));
}
// We only need to max bound if this is not a full search of the Earth
// Using the constant here is important since we use the min of kMaxEarthDistance
// and the actual bounds passed in to set up the search area.
if (outer < kMaxEarthDistanceInMeters) {
S2Cap outerCap = S2Cap::FromAxisAngle(latLng.ToPoint(),
S1Angle::Radians(outer / kRadiusOfEarthInMeters));
regions.push_back(new S2Cap(outerCap));
}
// if annulus is entire world, return a full cap
if (regions.empty()) {
regions.push_back(new S2Cap(S2Cap::Full()));
}
// Takes ownership of caps
return new S2RegionIntersection(®ions);
}
}
// Estimate the density of data by search the nearest cells level by level around center.
class GeoNear2DSphereStage::DensityEstimator {
public:
DensityEstimator(PlanStage::Children* children,
const IndexDescriptor* s2Index,
const GeoNearParams* nearParams,
const S2IndexingParams& indexParams,
const R2Annulus& fullBounds)
: _children(children),
_s2Index(s2Index),
_nearParams(nearParams),
_indexParams(indexParams),
_fullBounds(fullBounds),
_currentLevel(0) {
// cellId.AppendVertexNeighbors(level, output) requires level < finest,
// so we use the minimum of max_level - 1 and the user specified finest
int level = std::min(S2::kMaxCellLevel - 1, internalQueryS2GeoFinestLevel.load());
_currentLevel = std::max(0, level);
}
// Search for a document in neighbors at current level.
// Return IS_EOF is such document exists and set the estimated distance to the nearest doc.
PlanStage::StageState work(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection,
WorkingSetID* out,
double* estimatedDistance);
private:
void buildIndexScan(OperationContext* opCtx, WorkingSet* workingSet, Collection* collection);
PlanStage::Children* _children; // Points to PlanStage::_children in the NearStage.
const IndexDescriptor* _s2Index; // Not owned here.
const GeoNearParams* _nearParams; // Not owned here.
const S2IndexingParams _indexParams;
const R2Annulus& _fullBounds;
int _currentLevel;
IndexScan* _indexScan = nullptr; // Owned in PlanStage::_children.
};
// Setup the index scan stage for neighbors at this level.
void GeoNear2DSphereStage::DensityEstimator::buildIndexScan(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection) {
IndexScanParams scanParams;
scanParams.descriptor = _s2Index;
scanParams.direction = 1;
scanParams.doNotDedup = true;
scanParams.bounds = _nearParams->baseBounds;
// Because the planner doesn't yet set up 2D index bounds, do it ourselves here
const string s2Field = _nearParams->nearQuery->field;
const int s2FieldPosition = getFieldPosition(_s2Index, s2Field);
fassert(28677, s2FieldPosition >= 0);
OrderedIntervalList* coveredIntervals = &scanParams.bounds.fields[s2FieldPosition];
coveredIntervals->intervals.clear();
// Find 4 neighbors (3 neighbors at face vertex) at current level.
const S2CellId& centerId = _nearParams->nearQuery->centroid->cell.id();
vector neighbors;
// The search area expands 4X each time.
// Return the neighbors of closest vertex to this cell at the given level.
invariant(_currentLevel < centerId.level());
centerId.AppendVertexNeighbors(_currentLevel, &neighbors);
ExpressionMapping::S2CellIdsToIntervals(neighbors, _indexParams.indexVersion, coveredIntervals);
// Index scan
invariant(!_indexScan);
_indexScan = new IndexScan(opCtx, scanParams, workingSet, NULL);
_children->emplace_back(_indexScan);
}
PlanStage::StageState GeoNear2DSphereStage::DensityEstimator::work(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection,
WorkingSetID* out,
double* estimatedDistance) {
if (!_indexScan) {
// Setup index scan stage for current level.
buildIndexScan(opCtx, workingSet, collection);
}
WorkingSetID workingSetID;
PlanStage::StageState state = _indexScan->work(&workingSetID);
if (state == PlanStage::IS_EOF) {
// We ran through the neighbors but found nothing.
//
// Before going to the next-coarsest level, check whether our search area contains the
// entire search annulus, since we don't want to spend time doing density estimation over
// areas that are much larger than the requested $maxDistance.
//
// The search area consists of four cells at level L. Within its cell, the closest vertex to
// the search point must be the vertex shared with the other three cells. If the search
// point lies in the upper left cell, this means that it must lie in the lower right
// sub-cell at level L+1.
//
// +-----------+-----------+
// | | |
// | S | |
// + +-----+ |
// | | o | |
// | | | |
// +-----+-----+-----------+
// | | |
// | | |
// | | |
// | | |
// | | |
// +-----------+-----------+
//
// In the diagram above, S is the width of the cell at level L+1. We can determine a lower
// bound for the width any cell at this level, i.e. S > minWidth(L+1). As long as the outer
// radius of the search annulus is less than minWidth(L+1), it must be entirely contained
// within these four level L cells.
if (_fullBounds.getOuter() <
(S2::kMinWidth.GetValue(_currentLevel + 1) * kRadiusOfEarthInMeters)) {
// We're covering the entire search annulus. Return EOF to indicate we're done.
*estimatedDistance = S2::kMinWidth.GetValue(_currentLevel + 1) * kRadiusOfEarthInMeters;
return PlanStage::IS_EOF;
}
if (_currentLevel > 0) {
// Advance to the next level and search again.
_currentLevel--;
// Reset index scan for the next level.
invariant(_children->back().get() == _indexScan);
_indexScan = nullptr;
_children->pop_back();
return PlanStage::NEED_TIME;
}
// We are already at the top level.
*estimatedDistance = S2::kAvgEdge.GetValue(_currentLevel) * kRadiusOfEarthInMeters;
return PlanStage::IS_EOF;
} else if (state == PlanStage::ADVANCED) {
// We found something!
*estimatedDistance = S2::kAvgEdge.GetValue(_currentLevel) * kRadiusOfEarthInMeters;
// Clean up working set.
workingSet->free(workingSetID);
return PlanStage::IS_EOF;
} else if (state == PlanStage::NEED_YIELD) {
*out = workingSetID;
}
// Propagate NEED_TIME or errors
return state;
}
PlanStage::StageState GeoNear2DSphereStage::initialize(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection,
WorkingSetID* out) {
if (!_densityEstimator) {
_densityEstimator.reset(
new DensityEstimator(&_children, _s2Index, &_nearParams, _indexParams, _fullBounds));
}
double estimatedDistance;
PlanStage::StageState state =
_densityEstimator->work(opCtx, workingSet, collection, out, &estimatedDistance);
if (state == IS_EOF) {
// We find a document in 4 neighbors at current level, but didn't at previous level.
//
// Assuming cell size at current level is d and data is even distributed, the distance
// between two nearest points are at least d. The following circle with radius of 3 * d
// covers PI * 9 * d^2, giving at most 30 documents.
//
// At the coarsest level, the search area is the whole earth.
_boundsIncrement = 3 * estimatedDistance;
invariant(_boundsIncrement > 0.0);
// Clean up
_densityEstimator.reset(NULL);
}
return state;
}
StatusWith //
GeoNear2DSphereStage::nextInterval(OperationContext* opCtx,
WorkingSet* workingSet,
Collection* collection) {
// The search is finished if we searched at least once and all the way to the edge
if (_currBounds.getInner() >= 0 && _currBounds.getOuter() == _fullBounds.getOuter()) {
return StatusWith(NULL);
}
//
// Setup the next interval
//
if (!_specificStats.intervalStats.empty()) {
const IntervalStats& lastIntervalStats = _specificStats.intervalStats.back();
// TODO: Generally we want small numbers of results fast, then larger numbers later
if (lastIntervalStats.numResultsReturned < 300)
_boundsIncrement *= 2;
else if (lastIntervalStats.numResultsReturned > 600)
_boundsIncrement /= 2;
}
invariant(_boundsIncrement > 0.0);
R2Annulus nextBounds(_currBounds.center(),
_currBounds.getOuter(),
min(_currBounds.getOuter() + _boundsIncrement, _fullBounds.getOuter()));
bool isLastInterval = (nextBounds.getOuter() == _fullBounds.getOuter());
_currBounds = nextBounds;
//
// Setup the covering region and stages for this interval
//
IndexScanParams scanParams;
scanParams.descriptor = _s2Index;
scanParams.direction = 1;
// This does force us to do our own deduping of results.
scanParams.doNotDedup = true;
scanParams.bounds = _nearParams.baseBounds;
// Because the planner doesn't yet set up 2D index bounds, do it ourselves here
const string s2Field = _nearParams.nearQuery->field;
const int s2FieldPosition = getFieldPosition(_s2Index, s2Field);
fassert(28678, s2FieldPosition >= 0);
scanParams.bounds.fields[s2FieldPosition].intervals.clear();
std::unique_ptr region(buildS2Region(_currBounds));
std::vector cover = ExpressionMapping::get2dsphereCovering(*region);
// Generate a covering that does not intersect with any previous coverings
S2CellUnion coverUnion;
coverUnion.InitSwap(&cover);
invariant(cover.empty());
S2CellUnion diffUnion;
diffUnion.GetDifference(&coverUnion, &_scannedCells);
for (auto cellId : diffUnion.cell_ids()) {
if (region->MayIntersect(S2Cell(cellId))) {
cover.push_back(cellId);
}
}
// Add the cells in this covering to the _scannedCells union
_scannedCells.Add(cover);
OrderedIntervalList* coveredIntervals = &scanParams.bounds.fields[s2FieldPosition];
ExpressionMapping::S2CellIdsToIntervalsWithParents(cover, _indexParams, coveredIntervals);
IndexScan* scan = new IndexScan(opCtx, scanParams, workingSet, nullptr);
// FetchStage owns index scan
_children.emplace_back(new FetchStage(opCtx, workingSet, scan, _nearParams.filter, collection));
return StatusWith(new CoveredInterval(_children.back().get(),
true,
nextBounds.getInner(),
nextBounds.getOuter(),
isLastInterval));
}
StatusWith GeoNear2DSphereStage::computeDistance(WorkingSetMember* member) {
return computeGeoNearDistance(_nearParams, member);
}
} // namespace mongo