diff options
51 files changed, 2532 insertions, 2884 deletions
diff --git a/jstests/core/geo_2d_explain.js b/jstests/core/geo_2d_explain.js index 8195642aabc..c9bfe624436 100644 --- a/jstests/core/geo_2d_explain.js +++ b/jstests/core/geo_2d_explain.js @@ -24,6 +24,5 @@ var explain = t.find({loc: {$near: [40, 40]}, _id: {$lt: 50}}).explain(); print('explain = ' + tojson(explain)); -assert.eq({}, explain.indexBounds); assert.eq(explain.n, explain.nscannedObjects); assert.lte(explain.n, explain.nscanned); diff --git a/jstests/core/geo_borders.js b/jstests/core/geo_borders.js index 48dd6316dee..6443a940c02 100644 --- a/jstests/core/geo_borders.js +++ b/jstests/core/geo_borders.js @@ -127,9 +127,12 @@ assert.eq( overallMax, t.find( { loc : { $near : offCenter } } ).next().loc.y ); assert.eq( overallMin, t.find( { loc : { $near : onBoundsNeg } } ).next().loc.y ); // Make sure we can't get all nearby points to point over boundary +// TODO: SERVER-9986 clean up wrapping rules for different CRS queries - not sure this is an error +/* assert.throws(function(){ t.findOne( { loc : { $near : offBounds } } ); }); +*/ // Make sure we can't get all nearby points to point on max boundary //Broken - see SERVER-13581 @@ -151,7 +154,10 @@ assert.eq( overallMax, db.runCommand( { geoNear : "borders", near : offCenter } assert.eq( overallMin, db.runCommand( { geoNear : "borders", near : onBoundsNeg } ).results[0].obj.loc.y ); // Make sure we can't get all nearby points to point over boundary +//TODO: SERVER-9986 clean up wrapping rules for different CRS queries - not sure this is an error +/* assert.commandFailed( db.runCommand( { geoNear : "borders", near : offBounds } )); +*/ // Make sure we can't get all nearby points to point on max boundary assert.commandWorked( db.runCommand( { geoNear : "borders", near : onBounds } )); diff --git a/jstests/core/geo_oob_sphere.js b/jstests/core/geo_oob_sphere.js index 59343e7d7ac..e03111bcf16 100644 --- a/jstests/core/geo_oob_sphere.js +++ b/jstests/core/geo_oob_sphere.js @@ -17,7 +17,8 @@ assert.commandWorked(t.ensureIndex({ loc : "2d" })) assert.throws( function() { t.find({ loc : { $nearSphere : [ 30, 91 ], $maxDistance : 0.25 } }).count() } ); -assert.throws( function() { t.find({ loc : { $nearSphere : [ 30, 89 ], $maxDistance : 0.25 } }).count() } ); +// TODO: SERVER-9986 - it's not clear that throwing is correct behavior here +// assert.throws( function() { t.find({ loc : { $nearSphere : [ 30, 89 ], $maxDistance : 0.25 } }).count() } ); assert.throws( function() { t.find({ loc : { $within : { $centerSphere : [[ -180, -91 ], 0.25] } } }).count() } ); @@ -26,6 +27,7 @@ res = db.runCommand({ geoNear : "geooobsphere", near : [179, -91], maxDistance : assert.commandFailed( res ) printjson( res ) -res = db.runCommand({ geoNear : "geooobsphere", near : [30, 89], maxDistance : 0.25, spherical : true }) -assert.commandFailed( res ) -printjson( res ) +// TODO: SERVER-9986 - it's not clear that throwing is correct behavior here +// res = db.runCommand({ geoNear : "geooobsphere", near : [30, 89], maxDistance : 0.25, spherical : true }) +// assert.commandFailed( res ) +// printjson( res ) diff --git a/src/mongo/SConscript b/src/mongo/SConscript index 278e6f63495..76c0037ea1e 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -643,7 +643,7 @@ serverOnlyFiles = [ "db/curop.cpp", "db/structure/catalog/index_details.cpp", "db/index_builder.cpp", "db/index_rebuilder.cpp", - "db/commands/geonear.cpp", + "db/commands/geo_near_cmd.cpp", "db/geo/haystack.cpp", "db/geo/s2common.cpp", "db/ops/delete.cpp", @@ -846,7 +846,9 @@ if has_option( 'use-cpu-profiler' ): env.Library("defaultversion", "s/default_version.cpp") # Geo -env.Library("geometry", [ "db/geo/hash.cpp", "db/geo/shapes.cpp", ], LIBDEPS = [ "bson" ]) +env.Library("geometry", [ "db/geo/hash.cpp", "db/geo/shapes.cpp", ], + LIBDEPS = [ "bson", + "$BUILD_DIR/third_party/s2/s2" ]) env.Library("geoparser", [ "db/geo/geoparser.cpp", ], LIBDEPS = [ "bson", "geometry", diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index 88ed8876be5..a5e3931da5d 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -96,7 +96,7 @@ error_code("InvalidReplicaSetConfig", 93) error_code("NotYetInitialized", 94) error_code("NotSecondary", 95) error_code("OperationFailed", 96) - +error_code("NoProjectionFound", 97) # Non-sequential error codes (for compatibility only) error_code("NotMaster", 10107) #this comes from assert_util.h diff --git a/src/mongo/db/commands/geonear.cpp b/src/mongo/db/commands/geo_near_cmd.cpp index f994004e2b2..f994004e2b2 100644 --- a/src/mongo/db/commands/geonear.cpp +++ b/src/mongo/db/commands/geo_near_cmd.cpp diff --git a/src/mongo/db/exec/2dcommon.cpp b/src/mongo/db/exec/2dcommon.cpp deleted file mode 100644 index f7c01ac3a25..00000000000 --- a/src/mongo/db/exec/2dcommon.cpp +++ /dev/null @@ -1,700 +0,0 @@ -/** - * Copyright (C) 2013 10gen Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/exec/2dcommon.h" - -#include "mongo/db/matcher/matchable.h" -#include "mongo/db/query/index_bounds_builder.h" - -namespace mongo { -namespace twod_exec { - - // - // A MatchableDocument that will load the doc if need be but records if it does. - // - - class GeoMatchableDocument : public MatchableDocument { - public: - GeoMatchableDocument(const BSONObj& keyPattern, - const BSONObj& key, - DiskLoc loc, - const Collection* collection, - bool* fetched) - : _collection(collection), - _keyPattern(keyPattern), - _key(key), - _loc(loc), - _fetched(fetched) { } - - BSONObj toBSON() const { - *_fetched = true; - return _collection->docFor(_loc); - } - - virtual ElementIterator* allocateIterator(const ElementPath* path) const { - BSONObjIterator keyPatternIt(_keyPattern); - BSONObjIterator keyDataIt(_key); - - // Skip the "2d"-indexed stuff. We might have a diff. predicate over that field - // and those can't be covered. - keyPatternIt.next(); - keyDataIt.next(); - - // Look in the key. - while (keyPatternIt.more()) { - BSONElement keyPatternElt = keyPatternIt.next(); - verify(keyDataIt.more()); - BSONElement keyDataElt = keyDataIt.next(); - - if (path->fieldRef().equalsDottedField(keyPatternElt.fieldName())) { - if (Array == keyDataElt.type()) { - return new SimpleArrayElementIterator(keyDataElt, true); - } - else { - return new SingleElementElementIterator(keyDataElt); - } - } - } - - // All else fails, fetch. - *_fetched = true; - return new BSONElementIterator(path, _collection->docFor(_loc)); - } - - virtual void releaseIterator( ElementIterator* iterator ) const { - delete iterator; - } - - private: - const Collection* _collection; - - BSONObj _keyPattern; - BSONObj _key; - DiskLoc _loc; - bool* _fetched; - }; - - - // - // GeoAccumulator - // - - GeoAccumulator::GeoAccumulator(Collection* collection, - TwoDAccessMethod* accessMethod, - MatchExpression* filter) - : _collection(collection), - _accessMethod(accessMethod), _converter(accessMethod->getParams().geoHashConverter), - _filter(filter), - _lookedAt(0), _matchesPerfd(0), _objectsLoaded(0), _pointsLoaded(0), _found(0) { } - - GeoAccumulator::~GeoAccumulator() { } - - void GeoAccumulator::add(const GeoIndexEntry& node) { - _lookedAt++; - - // Approximate distance check using key data - double keyD = 0; - Point keyP(_converter->unhashToPoint(node._key.firstElement())); - KeyResult keyOk = approxKeyCheck(keyP, keyD); - if (keyOk == BAD) { - return; - } - - // Check for match using other key (and potentially doc) criteria - // Remember match results for each object - map<DiskLoc, bool>::iterator match = _matched.find(node.recordLoc); - bool newDoc = match == _matched.end(); - - //cout << "newDoc: " << newDoc << endl; - if(newDoc) { - bool fetched = false; - - if (NULL != _filter) { - GeoMatchableDocument md(_accessMethod->getDescriptor()->keyPattern(), - node._key, - node.recordLoc, - _collection, - &fetched); - bool good = _filter->matches(&md); - - _matchesPerfd++; - - if (fetched) { - _objectsLoaded++; - } - - if (! good) { - _matched[ node.recordLoc ] = false; - return; - } - } - // Don't double-count. - if (!fetched) { - _objectsLoaded++; - } - } else if(!((*match).second)) { - return; - } - - // Exact check with particular data fields - // Can add multiple points - int diff = addSpecific(node, keyP, keyOk == BORDER, keyD, newDoc); - if(diff > 0) _found += diff; - else _found -= -diff; - } - - void GeoAccumulator::getPointsFor(const BSONObj& key, const BSONObj& obj, - vector<BSONObj> &locsForNode, bool allPoints) { - // Find all the location objects from the keys - vector<BSONObj> locs; - _accessMethod->getKeys(obj, allPoints ? locsForNode : locs); - ++_pointsLoaded; - - if (allPoints) return; - if (locs.size() == 1){ - locsForNode.push_back(locs[0]); - return; - } - - // Find the particular location we want - GeoHash keyHash(key.firstElement(), _converter->getBits()); - - for(vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i) { - // Ignore all locations not hashed to the key's hash, since we may see - // those later - if(_converter->hash(*i) != keyHash) continue; - locsForNode.push_back(*i); - } - } - - // - // BtreeLocation - // - - // static - bool BtreeLocation::hasPrefix(const BSONObj& key, const GeoHash& hash) { - BSONElement e = key.firstElement(); - if (e.eoo()) { return false; } - return GeoHash(e).hasPrefix(hash); - } - - void BtreeLocation::advance() { - WorkingSetID id = WorkingSet::INVALID_ID; - for (;;) { - PlanStage::StageState state = _scan->work(&id); - if (PlanStage::ADVANCED == state) { - break; - } - else if (PlanStage::NEED_TIME == state) { - continue; - } - else { - // Error or EOF. Either way, stop. - _eof = true; - return; - } - } - verify(WorkingSet::INVALID_ID != id); - WorkingSetMember* wsm = _ws->get(id); - verify(WorkingSetMember::LOC_AND_IDX == wsm->state); - _key = wsm->keyData[0].keyData; - _loc = wsm->loc; - _ws->free(id); - } - - // Returns the min and max keys which bound a particular location. - // The only time these may be equal is when we actually equal the location - // itself, otherwise our expanding algorithm will fail. - // static - bool BtreeLocation::initial(OperationContext* txn, - const IndexDescriptor* descriptor, - const TwoDIndexingParams& params, - BtreeLocation& min, - BtreeLocation& max, - GeoHash start) { - verify(descriptor); - - min._eof = false; - max._eof = false; - - // Add the range for the 2d indexed field to the keys used. - - // Two scans: one for min one for max. - IndexScanParams minParams; - minParams.direction = -1; - minParams.descriptor = descriptor; - minParams.bounds.fields.resize(descriptor->keyPattern().nFields()); - minParams.doNotDedup = true; - // First field of start key goes (MINKEY, start] (in reverse) - BSONObjBuilder firstBob; - firstBob.appendMinKey(""); - start.appendHashMin(&firstBob, ""); - minParams.bounds.fields[0].intervals.push_back(Interval(firstBob.obj(), false, true)); - - IndexScanParams maxParams; - maxParams.direction = 1; - maxParams.descriptor = descriptor; - maxParams.bounds.fields.resize(descriptor->keyPattern().nFields()); - // Don't have the ixscan dedup since we want dup DiskLocs because of multi-point docs. - maxParams.doNotDedup = true; - // First field of end key goes (start, MAXKEY) - BSONObjBuilder secondBob; - start.appendHashMin(&secondBob, ""); - secondBob.appendMaxKey(""); - maxParams.bounds.fields[0].intervals.push_back(Interval(secondBob.obj(), false, false)); - - BSONObjIterator it(descriptor->keyPattern()); - BSONElement kpElt = it.next(); - maxParams.bounds.fields[0].name = kpElt.fieldName(); - minParams.bounds.fields[0].name = kpElt.fieldName(); - // Fill out the non-2d indexed fields with the "all values" interval, aligned properly. - size_t idx = 1; - while (it.more()) { - kpElt = it.next(); - maxParams.bounds.fields[idx].intervals.push_back(IndexBoundsBuilder::allValues()); - minParams.bounds.fields[idx].intervals.push_back(IndexBoundsBuilder::allValues()); - maxParams.bounds.fields[idx].name = kpElt.fieldName(); - minParams.bounds.fields[idx].name = kpElt.fieldName(); - if (kpElt.number() == -1) { - IndexBoundsBuilder::reverseInterval(&minParams.bounds.fields[idx].intervals[0]); - IndexBoundsBuilder::reverseInterval(&maxParams.bounds.fields[idx].intervals[0]); - } - ++idx; - } - - for (size_t i = 0; i < minParams.bounds.fields.size(); ++i) { - IndexBoundsBuilder::reverseInterval(&minParams.bounds.fields[i].intervals[0]); - } - - //cout << "keyPattern " << descriptor->keyPattern().toString() << endl; - //cout << "minBounds " << minParams.bounds.toString() << endl; - //cout << "maxBounds " << maxParams.bounds.toString() << endl; - verify(minParams.bounds.isValidFor(descriptor->keyPattern(), -1)); - verify(maxParams.bounds.isValidFor(descriptor->keyPattern(), 1)); - - min._ws.reset(new WorkingSet()); - min._scan.reset(new IndexScan(txn, minParams, min._ws.get(), NULL)); - - max._ws.reset(new WorkingSet()); - max._scan.reset(new IndexScan(txn, maxParams, max._ws.get(), NULL)); - - min.advance(); - max.advance(); - - return !max._eof || !min._eof; - } - - // - // GeoBrowse - // - - GeoBrowse::GeoBrowse(Collection* collection, - TwoDAccessMethod* accessMethod, string type, MatchExpression* filter) - : GeoAccumulator(collection, accessMethod, filter), - _type(type), _firstCall(true), _nscanned(), - _centerPrefix(0, 0, 0), - _descriptor(accessMethod->getDescriptor()), - _converter(accessMethod->getParams().geoHashConverter), - _params(accessMethod->getParams()), - _collection(collection) { - - // Set up the initial expand state - _state = START; - _neighbor = -1; - _foundInExp = 0; - - } - - bool GeoBrowse::ok() { - /* - cout << "Checking cursor, in state " << (int) _state << ", first call " - << _firstCall << ", empty : " << _cur.isEmpty() - << ", stack : " << _stack.size() << endl; - */ - - bool first = _firstCall; - - if (_firstCall) { - fillStack(maxPointsHeuristic); - _firstCall = false; - } - - if (_stack.size()) { - if (first) { ++_nscanned; } - return true; - } - - while (moreToDo()) { - fillStack(maxPointsHeuristic); - if (! _cur.isEmpty()) { - if (first) { ++_nscanned; } - return true; - } - } - - return !_cur.isEmpty(); - } - - bool GeoBrowse::advance() { - _cur._o = BSONObj(); - - if (_stack.size()) { - _cur = _stack.front(); - _stack.pop_front(); - ++_nscanned; - return true; - } - - if (! moreToDo()) return false; - - while (_cur.isEmpty() && moreToDo()){ - fillStack(maxPointsHeuristic); - } - - return ! _cur.isEmpty() && ++_nscanned; - } - - void GeoBrowse::noteLocation() { - // Remember where our _max, _min are - _min.prepareToYield(); - _max.prepareToYield(); - } - - /* called before query getmore block is iterated */ - void GeoBrowse::checkLocation() { - // We can assume an error was thrown earlier if this database somehow disappears - // Recall our _max, _min - _min.recoverFromYield(); - _max.recoverFromYield(); - } - - BSONObj GeoBrowse::current() { verify(ok()); return _cur._o; } - DiskLoc GeoBrowse::currLoc() { verify(ok()); return _cur._loc; } - BSONObj GeoBrowse::currKey() const { return _cur._key; } - - // Are we finished getting points? - bool GeoBrowse::moreToDo() { return _state != DONE; } - - bool GeoBrowse::checkAndAdvance(BtreeLocation* bl, - const GeoHash& hash, - int& totalFound) { - if (bl->eof()) { return false; } - - //cout << "looking at " << bl->_loc.obj().toString() << " dl " << bl->_loc.toString() << endl; - - if (!BtreeLocation::hasPrefix(bl->_key, hash)) { return false; } - - totalFound++; - GeoIndexEntry n(bl->_loc, bl->_key); - add(n); - //cout << "adding\n"; - - bl->advance(); - - return true; - } - - - // Fills the stack, but only checks a maximum number of maxToCheck points at a time. - // Further calls to this function will continue the expand/check neighbors algorithm. - void GeoBrowse::fillStack(int maxToCheck, int maxToAdd, bool onlyExpand) { - if(maxToAdd < 0) maxToAdd = maxToCheck; - int maxFound = _foundInExp + maxToCheck; - verify(maxToCheck > 0); - verify(maxFound > 0); - verify(_found <= 0x7fffffff); // conversion to int - int maxAdded = static_cast<int>(_found) + maxToAdd; - verify(maxAdded >= 0); // overflow check - - bool isNeighbor = _centerPrefix.constrains(); - - // Starting a box expansion - if (_state == START) { - // Get the very first hash point, if required - if(! isNeighbor) - _prefix = expandStartHash(); - - if (!BtreeLocation::initial(&_txn, _descriptor, _params, _min, _max, _prefix)) { - _state = isNeighbor ? DONE_NEIGHBOR : DONE; - } else { - _state = DOING_EXPAND; - _lastPrefix.reset(); - } - } - - // Doing the actual box expansion - if (_state == DOING_EXPAND) { - while (true) { - // Record the prefix we're actively exploring... - _expPrefix.reset(new GeoHash(_prefix)); - - // Find points inside this prefix - while (checkAndAdvance(&_min, _prefix, _foundInExp) - && _foundInExp < maxFound && _found < maxAdded) {} - while (checkAndAdvance(&_max, _prefix, _foundInExp) - && _foundInExp < maxFound && _found < maxAdded) {} - - if(_foundInExp >= maxFound || _found >= maxAdded) return; - - // We've searched this prefix fully, remember - _lastPrefix.reset(new GeoHash(_prefix)); - - // If we've searched the entire space, we're finished. - if (! _prefix.constrains()) { - _state = DONE; - notePrefix(); - return; - } - - // If we won't fit in the box, and we're not doing a sub-scan, increase the size - if (! fitsInBox(_converter->sizeEdge(_prefix)) && _fringe.size() == 0) { - // If we're still not expanded bigger than the box size, expand again - _prefix = _prefix.up(); - continue; - } - - // We're done and our size is large enough - _state = DONE_NEIGHBOR; - - // Go to the next sub-box, if applicable - if(_fringe.size() > 0) _fringe.pop_back(); - // Go to the next neighbor if this was the last sub-search - if(_fringe.size() == 0) _neighbor++; - break; - } - notePrefix(); - } - - // If we doeighbors - if(onlyExpand) return; - - // If we're done expanding the current box... - if(_state == DONE_NEIGHBOR) { - // Iterate to the next neighbor - // Loop is useful for cases where we want to skip over boxes entirely, - // otherwise recursion increments the neighbors. - for (; _neighbor < 9; _neighbor++) { - // If we have no fringe for the neighbor, make sure we have the default fringe - if(_fringe.size() == 0) _fringe.push_back(""); - - if(! isNeighbor) { - _centerPrefix = _prefix; - _centerBox = _converter->unhashToBox(_centerPrefix); - isNeighbor = true; - } - - int i = (_neighbor / 3) - 1; - int j = (_neighbor % 3) - 1; - - if ((i == 0 && j == 0) || - (i < 0 && _centerPrefix.atMinX()) || - (i > 0 && _centerPrefix.atMaxX()) || - (j < 0 && _centerPrefix.atMinY()) || - (j > 0 && _centerPrefix.atMaxY())) { - - continue; // main box or wrapped edge - // TODO: We may want to enable wrapping in future, probably best as layer - // on top of this search. - } - - // Make sure we've got a reasonable center - verify(_centerPrefix.constrains()); - - GeoHash _neighborPrefix = _centerPrefix; - _neighborPrefix.move(i, j); - - while(_fringe.size() > 0) { - _prefix = _neighborPrefix + _fringe.back(); - Box cur(_converter->unhashToBox(_prefix)); - - double intAmt = intersectsBox(cur); - - // No intersection - if(intAmt <= 0) { - _fringe.pop_back(); - continue; - } else if(intAmt < 0.5 && _prefix.canRefine() - && _fringe.back().size() < 4 /* two bits */) { - // Small intersection, refine search - string lastSuffix = _fringe.back(); - _fringe.pop_back(); - _fringe.push_back(lastSuffix + "00"); - _fringe.push_back(lastSuffix + "01"); - _fringe.push_back(lastSuffix + "11"); - _fringe.push_back(lastSuffix + "10"); - continue; - } - - // Restart our search from a diff box. - _state = START; - verify(! onlyExpand); - verify(_found <= 0x7fffffff); - fillStack(maxFound - _foundInExp, maxAdded - static_cast<int>(_found)); - // When we return from the recursive fillStack call, we'll either have - // checked enough points or be entirely done. Max recurse depth is < 8 * - // 16. - // If we're maxed out on points, return - if(_foundInExp >= maxFound || _found >= maxAdded) { - // Make sure we'll come back to add more points - verify(_state == DOING_EXPAND); - return; - } - - // Otherwise we must be finished to return - verify(_state == DONE); - return; - } - } - // Finished with neighbors - _state = DONE; - } - } - - bool GeoBrowse::remembered(BSONObj o){ - BSONObj seenId = o["_id"].wrap("").getOwned(); - if(_seenIds.find(seenId) != _seenIds.end()){ - return true; - } else{ - _seenIds.insert(seenId); - return false; - } - } - - int GeoBrowse::addSpecific(const GeoIndexEntry& node, const Point& keyP, bool onBounds, - double keyD, bool potentiallyNewDoc) { - int found = 0; - // We need to handle every possible point in this method, even those not in the key - // value, to avoid us tracking which hashes we've already seen. - if(! potentiallyNewDoc){ return 0; } - - // Final check for new doc - // OK to touch, since we're probably returning this object now - const BSONObj obj = _collection->docFor(node.recordLoc); - - if (remembered(obj)) { - //cout << "remembered\n"; - return 0; - } - - if(! onBounds) { - //log() << "Added ind to " << _type << endl; - _stack.push_front(GeoPoint(node, obj)); - found++; - } else { - // We now handle every possible point in the document, even those not in the key - // value, since we're iterating through them anyway - prevents us from having to - // save the hashes we've seen per-doc - // If we're filtering by hash, get the original - - vector< BSONObj > locs; - getPointsFor(node._key, obj, locs, true); - for(vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i){ - double d = -1; - Point p(*i); - // We can avoid exact document checks by redoing approx checks, - // if the exact checks are more expensive. - bool needExact = true; - - if(! needExact || exactDocCheck(p, d)){ - //log() << "Added mult to " << _type << endl; - _stack.push_front(GeoPoint(node, obj)); - found++; - // IExit after first point is added - break; - } - } - } - - while(_cur.isEmpty() && _stack.size() > 0){ - _cur = _stack.front(); - _stack.pop_front(); - } - - return found; - } - - long long GeoBrowse::nscanned() { - if (_firstCall) { ok(); } - return _nscanned; - } - - void GeoBrowse::explainDetails(BSONObjBuilder& b){ - b << "lookedAt" << _lookedAt; - b << "matchesPerfd" << _matchesPerfd; - b << "objectsLoaded" << _objectsLoaded; - b << "pointsLoaded" << _pointsLoaded; - // b << "pointsSavedForYield" << _nDirtied; - // b << "pointsChangedOnYield" << _nChangedOnYield; - // b << "pointsRemovedOnYield" << _nRemovedOnYield; - } - - bool GeoBrowse::invalidate(const DiskLoc& dl) { - if (_firstCall) { return false; } - - // Are we tossing out a result that we (probably) would have returned? - bool found = false; - - if (_cur._loc == dl) { - advance(); - found = true; - } - - list<GeoPoint>::iterator it = _stack.begin(); - while (it != _stack.end()) { - if (it->_loc == dl) { - list<GeoPoint>::iterator old = it; - it++; - _stack.erase(old); - found = true; - } - else { - it++; - } - } - - if (!_min.eof() && _min._loc == dl) { - _min.recoverFromYield(); - while (_min._loc == dl && !_min.eof()) { - _min.advance(); - } - _min.prepareToYield(); - } - - if (!_max.eof() && _max._loc == dl) { - _max.recoverFromYield(); - while (_max._loc == dl && !_max.eof()) { - _max.advance(); - } - _max.prepareToYield(); - } - - return found; - } - -} // namespace twod_exec -} // namespace mongo diff --git a/src/mongo/db/exec/2dcommon.h b/src/mongo/db/exec/2dcommon.h deleted file mode 100644 index 1d27b610a70..00000000000 --- a/src/mongo/db/exec/2dcommon.h +++ /dev/null @@ -1,289 +0,0 @@ -/** - * Copyright (C) 2013-2014 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/exec/index_scan.h" -#include "mongo/db/geo/core.h" -#include "mongo/db/geo/hash.h" -#include "mongo/db/geo/shapes.h" - -#include "mongo/db/index/2d_access_method.h" -#include "mongo/db/operation_context_noop.h" - -#pragma once - -namespace mongo { - - class OperationContext; - -namespace twod_exec { - - // - // Data structures - // - - enum GeoDistType { - GEO_PLANE, - GEO_SPHERE - }; - - class GeoIndexEntry { - public: - GeoIndexEntry(DiskLoc r, BSONObj k) : recordLoc(r), _key(k) { } - const DiskLoc recordLoc; - const BSONObj _key; - private: - GeoIndexEntry(); - }; - - class GeoPoint { - public: - GeoPoint() : _distance(-1), _exact(false) { } - - //// Distance not used //// - - GeoPoint(const GeoIndexEntry& node, const BSONObj& obj) - : _key(node._key), _loc(node.recordLoc), _o(obj), - _distance(-1), _exact(false) { } - - //// Immediate initialization of distance //// - - GeoPoint(const GeoIndexEntry& node, const BSONObj& obj, double distance, bool exact) - : _key(node._key), _loc(node.recordLoc), _o(obj), - _distance(distance), _exact(exact) { } - - GeoPoint(const GeoPoint& pt, double distance, bool exact) - : _key(pt.key()), _loc(pt.loc()), _o(pt.obj()), _distance(distance), _exact(exact) { } - - bool operator<(const GeoPoint& other) const { - if (_distance != other._distance) return _distance < other._distance; - if (_exact != other._exact) return _exact < other._exact; - return _loc < other._loc; - } - - double distance() const { return _distance; } - bool isExact() const { return _exact; } - BSONObj key() const { return _key; } - bool hasLoc() const { return _loc.isNull(); } - BSONObj obj() const { return _o; } - BSONObj pt() const { return _pt; } - bool isEmpty() { return _o.isEmpty(); } - - DiskLoc loc() const { - return _loc; - } - - std::string toString() const { - return str::stream() << "Point from " << _key << " - " << _o - << " dist : " << _distance << (_exact ? " (ex)" : " (app)"); - } - - BSONObj _key; - DiskLoc _loc; - BSONObj _o; - BSONObj _pt; - - double _distance; - bool _exact; - - BSONObj _id; - }; - - struct BtreeLocation { - BtreeLocation() : _eof(false) { } - scoped_ptr<IndexScan> _scan; - scoped_ptr<WorkingSet> _ws; - DiskLoc _loc; - BSONObj _key; - bool _eof; - - bool eof() { return _eof; } - - static bool hasPrefix(const BSONObj& key, const GeoHash& hash); - - void advance(); - - void prepareToYield() { _scan->prepareToYield(); } - void recoverFromYield() { _scan->recoverFromYield(); } - - // Returns the min and max keys which bound a particular location. - // The only time these may be equal is when we actually equal the location - // itself, otherwise our expanding algorithm will fail. - static bool initial(OperationContext* txn, - const IndexDescriptor* descriptor, - const TwoDIndexingParams& params, - BtreeLocation& min, - BtreeLocation& max, - GeoHash start); - }; - - // - // Execution - // - - class GeoAccumulator { - public: - GeoAccumulator(Collection* collection, - TwoDAccessMethod* accessMethod, MatchExpression* filter); - - virtual ~GeoAccumulator(); - - enum KeyResult { BAD, BORDER, GOOD }; - - virtual void add(const GeoIndexEntry& node); - - long long found() const { return _found; } - - virtual void getPointsFor(const BSONObj& key, const BSONObj& obj, - std::vector<BSONObj> &locsForNode, bool allPoints = false); - - virtual int addSpecific(const GeoIndexEntry& node, const Point& p, bool inBounds, double d, - bool newDoc) = 0; - - virtual KeyResult approxKeyCheck(const Point& p, double& keyD) = 0; - - Collection* _collection; - TwoDAccessMethod* _accessMethod; - shared_ptr<GeoHashConverter> _converter; - std::map<DiskLoc, bool> _matched; - - MatchExpression* _filter; - - long long _lookedAt; - long long _matchesPerfd; - long long _objectsLoaded; - long long _pointsLoaded; - long long _found; - }; - - class GeoBrowse : public GeoAccumulator { - public: - // The max points which should be added to an expanding box at one time - static const int maxPointsHeuristic = 50; - - // Expand states - enum State { - START, - DOING_EXPAND, - DONE_NEIGHBOR, - DONE - } _state; - - GeoBrowse(Collection* collection, - TwoDAccessMethod* accessMethod, - std::string type, - MatchExpression* filter); - - virtual bool ok(); - virtual bool advance(); - virtual void noteLocation(); - - /* called before query getmore block is iterated */ - virtual void checkLocation(); - - virtual BSONObj current(); - virtual DiskLoc currLoc(); - virtual BSONObj currKey() const; - - // Are we finished getting points? - virtual bool moreToDo(); - - // Fills the stack, but only checks a maximum number of maxToCheck points at a time. - // Further calls to this function will continue the expand/check neighbors algorithm. - virtual void fillStack(int maxToCheck, - int maxToAdd = -1, - bool onlyExpand = false); - - bool checkAndAdvance(BtreeLocation* bl, const GeoHash& hash, int& totalFound); - - // The initial geo hash box for our first expansion - virtual GeoHash expandStartHash() = 0; - - // Whether the current box width is big enough for our search area - virtual bool fitsInBox(double width) = 0; - - // The amount the current box overlaps our search area - virtual double intersectsBox(Box& cur) = 0; - - virtual bool exactDocCheck(const Point& p, double& d) = 0; - - bool remembered(BSONObj o); - - virtual int addSpecific(const GeoIndexEntry& node, const Point& keyP, bool onBounds, - double keyD, bool potentiallyNewDoc); - - virtual long long nscanned(); - - virtual void explainDetails(BSONObjBuilder& b); - - void notePrefix() { _expPrefixes.push_back(_prefix); } - - /** - * Returns true if the result was actually invalidated, false otherwise. - */ - bool invalidate(const DiskLoc& dl); - - std::string _type; - std::list<GeoPoint> _stack; - std::set<BSONObj> _seenIds; - - GeoPoint _cur; - bool _firstCall; - - long long _nscanned; - - // The current box we're expanding (-1 is first/center box) - int _neighbor; - - // The points we've found so far - int _foundInExp; - - // The current hash prefix we're expanding and the center-box hash prefix - GeoHash _prefix; - shared_ptr<GeoHash> _lastPrefix; - GeoHash _centerPrefix; - std::list<std::string> _fringe; - int recurseDepth; - Box _centerBox; - - // Start and end of our search range in the current box - BtreeLocation _min; - BtreeLocation _max; - - shared_ptr<GeoHash> _expPrefix; - mutable std::vector<GeoHash> _expPrefixes; - const IndexDescriptor* _descriptor; - shared_ptr<GeoHashConverter> _converter; - TwoDIndexingParams _params; - - private: - const Collection* _collection; - OperationContextNoop _txn; - }; - -} // namespace twod_exec -} // namespace mongo diff --git a/src/mongo/db/exec/2dnear.cpp b/src/mongo/db/exec/2dnear.cpp deleted file mode 100644 index cfc669a52cc..00000000000 --- a/src/mongo/db/exec/2dnear.cpp +++ /dev/null @@ -1,556 +0,0 @@ -/** - * Copyright (C) 2013 10gen Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/exec/2dnear.h" - -#include "mongo/db/catalog/collection.h" -#include "mongo/db/catalog/database.h" -#include "mongo/db/client.h" -#include "mongo/db/exec/working_set_common.h" -#include "mongo/db/exec/working_set_computed_data.h" -#include "mongo/db/jsobj.h" - -namespace mongo { - - // static - const char* TwoDNear::kStageType = "GEO_NEAR_2D"; - - TwoDNear::TwoDNear(OperationContext* txn, const TwoDNearParams& params, WorkingSet* ws) - : _txn(txn), _commonStats(kStageType) { - _params = params; - _workingSet = ws; - _initted = false; - _specificStats.keyPattern = _params.indexKeyPattern; - } - - TwoDNear::~TwoDNear() { } - - bool TwoDNear::isEOF() { - return _initted && _results.empty(); - } - - PlanStage::StageState TwoDNear::work(WorkingSetID* out) { - ++_commonStats.works; - - // Adds the amount of time taken by work() to executionTimeMillis. - ScopedTimer timer(&_commonStats.executionTimeMillis); - - if (!_initted) { - _initted = true; - - if ( !_params.collection ) - return PlanStage::IS_EOF; - - IndexCatalog* indexCatalog = _params.collection->getIndexCatalog(); - - IndexDescriptor* desc = indexCatalog->findIndexByKeyPattern(_params.indexKeyPattern); - if ( desc == NULL ) - return PlanStage::IS_EOF; - TwoDAccessMethod* am = static_cast<TwoDAccessMethod*>( indexCatalog->getIndex( desc ) ); - - auto_ptr<twod_exec::GeoSearch> search; - search.reset(new twod_exec::GeoSearch(_params.collection, - am, - _params.nearQuery.centroid.oldPoint, - _params.numWanted, - _params.filter, - _params.nearQuery.maxDistance, - _params.nearQuery.isNearSphere ? twod_exec::GEO_SPHERE - : twod_exec::GEO_PLANE)); - - // This is where all the work is done. :( - search->exec(); - _specificStats.objectsLoaded = search->_objectsLoaded; - _specificStats.nscanned = search->_lookedAt; - - for (twod_exec::GeoHopper::Holder::iterator it = search->_points.begin(); - it != search->_points.end(); it++) { - - WorkingSetID id = _workingSet->allocate(); - WorkingSetMember* member = _workingSet->get(id); - member->loc = it->_loc; - member->obj = _params.collection->docFor(member->loc); - member->state = WorkingSetMember::LOC_AND_UNOWNED_OBJ; - if (_params.addDistMeta) { - member->addComputed(new GeoDistanceComputedData(it->_distance)); - } - if (_params.addPointMeta) { - member->addComputed(new GeoNearPointComputedData(it->_pt)); - } - _results.push(Result(id, it->_distance)); - _invalidationMap.insert(pair<DiskLoc, WorkingSetID>(it->_loc, id)); - } - } - - if (isEOF()) { return PlanStage::IS_EOF; } - - Result result = _results.top(); - _results.pop(); - *out = result.id; - - // Remove from invalidation map. - WorkingSetMember* member = _workingSet->get(*out); - - // The WSM may have been mutated or deleted so it may not have a loc. - if (member->hasLoc()) { - typedef multimap<DiskLoc, WorkingSetID>::iterator MMIT; - pair<MMIT, MMIT> range = _invalidationMap.equal_range(member->loc); - for (MMIT it = range.first; it != range.second; ++it) { - if (it->second == *out) { - _invalidationMap.erase(it); - break; - } - } - } - - ++_commonStats.advanced; - return PlanStage::ADVANCED; - } - - void TwoDNear::prepareToYield() { - // Nothing to do here. - } - - void TwoDNear::recoverFromYield() { - // Also nothing to do here. - } - - void TwoDNear::invalidate(const DiskLoc& dl, InvalidationType type) { - // We do the same thing for mutation or deletion: fetch the doc and forget about the - // DiskLoc. 2d's near search computes all its results in one go so we know that we'll still - // return valid data. - typedef multimap<DiskLoc, WorkingSetID>::iterator MMIT; - pair<MMIT, MMIT> range = _invalidationMap.equal_range(dl); - for (MMIT it = range.first; it != range.second; ++it) { - WorkingSetMember* member = _workingSet->get(it->second); - // If it's in the invalidation map it must have a DiskLoc. - verify(member->hasLoc()); - WorkingSetCommon::fetchAndInvalidateLoc(member, _params.collection); - verify(!member->hasLoc()); - } - _invalidationMap.erase(range.first, range.second); - } - - vector<PlanStage*> TwoDNear::getChildren() const { - vector<PlanStage*> empty; - return empty; - } - - PlanStageStats* TwoDNear::getStats() { - _commonStats.isEOF = isEOF(); - auto_ptr<PlanStageStats> ret(new PlanStageStats(_commonStats, STAGE_GEO_NEAR_2D)); - ret->specific.reset(new TwoDNearStats(_specificStats)); - return ret.release(); - } - - const CommonStats* TwoDNear::getCommonStats() { - return &_commonStats; - } - - const SpecificStats* TwoDNear::getSpecificStats() { - return &_specificStats; - } - -} // namespace mongo - -namespace mongo { -namespace twod_exec { - - // - // GeoHopper - // - - GeoHopper::GeoHopper(Collection* collection, - TwoDAccessMethod* accessMethod, - unsigned max, - const Point& n, - MatchExpression* filter, - double maxDistance, - GeoDistType type) - : GeoBrowse(collection, accessMethod, "search", filter), - _max(max), - _near(n), - _maxDistance(maxDistance), - _type(type), - _distError(type == GEO_PLANE - ? accessMethod->getParams().geoHashConverter->getError() - : accessMethod->getParams().geoHashConverter->getErrorSphere()), - _farthest(0), - _collection(collection) {} - - GeoAccumulator:: KeyResult GeoHopper::approxKeyCheck(const Point& p, double& d) { - // Always check approximate distance, since it lets us avoid doing - // checks of the rest of the object if it succeeds - switch (_type) { - case GEO_PLANE: - d = distance(_near, p); - break; - case GEO_SPHERE: - checkEarthBounds(p); - d = spheredist_deg(_near, p); - break; - default: verify(false); - } - verify(d >= 0); - - // If we need more points - double borderDist = (_points.size() < _max ? _maxDistance : farthest()); - - if (d >= borderDist - 2 * _distError && d <= borderDist + 2 * _distError) return BORDER; - else return d < borderDist ? GOOD : BAD; - } - - bool GeoHopper::exactDocCheck(const Point& p, double& d){ - bool within = false; - - // Get the appropriate distance for the type - switch (_type) { - case GEO_PLANE: - d = distance(_near, p); - within = distanceWithin(_near, p, _maxDistance); - break; - case GEO_SPHERE: - checkEarthBounds(p); - d = spheredist_deg(_near, p); - within = (d <= _maxDistance); - break; - default: verify(false); - } - - return within; - } - - - int GeoHopper::addSpecific(const GeoIndexEntry& node, const Point& keyP, bool onBounds, - double keyD, bool potentiallyNewDoc) { - // Unique documents - GeoPoint newPoint(node, _collection->docFor(node.recordLoc), keyD, false); - int prevSize = _points.size(); - - // STEP 1 : Remove old duplicate points from the set if needed - - // Lookup old point with same doc - map<DiskLoc, Holder::iterator>::iterator oldPointIt = _seenPts.find(newPoint.loc()); - - if(oldPointIt != _seenPts.end()){ - const GeoPoint& oldPoint = *(oldPointIt->second); - // We don't need to care if we've already seen this same approx pt or better, - // or we've already gone to disk once for the point - if(oldPoint < newPoint){ - return 0; - } - _points.erase(oldPointIt->second); - } - - //cout << "inserting point\n"; - Holder::iterator newIt = _points.insert(newPoint); - _seenPts[ newPoint.loc() ] = newIt; - - verify(_max > 0); - - Holder::iterator lastPtIt = _points.end(); - lastPtIt--; - _farthest = lastPtIt->distance() + 2 * _distError; - return _points.size() - prevSize; - } - - // Removes extra points from end of _points set. - // Check can be a bit costly if we have lots of exact points near borders, - // so we'll do this every once and awhile. - void GeoHopper::processExtraPoints(){ - if(_points.size() == 0) return; - int prevSize = _points.size(); - - // Erase all points from the set with a position >= _max *and* - // whose distance isn't close to the _max - 1 position distance - int numToErase = _points.size() - _max; - if(numToErase < 0) numToErase = 0; - - // Get the first point definitely in the _points array - Holder::iterator startErase = _points.end(); - for(int i = 0; i < numToErase + 1; i++) startErase--; - _farthest = startErase->distance() + 2 * _distError; - - startErase++; - while(numToErase > 0 && startErase->distance() <= _farthest){ - numToErase--; - startErase++; - verify(startErase != _points.end() || numToErase == 0); - } - - for(Holder::iterator i = startErase; i != _points.end(); ++i) { - _seenPts.erase(i->loc()); - } - - _points.erase(startErase, _points.end()); - - int diff = _points.size() - prevSize; - if(diff > 0) _found += diff; - else _found -= -diff; - } - - // - // GeoSearch - // - - GeoSearch::GeoSearch(Collection* collection, - TwoDAccessMethod* accessMethod, - const Point& startPt, - int numWanted, - MatchExpression* filter, - double maxDistance, - GeoDistType type) - : GeoHopper(collection, accessMethod, numWanted, startPt, filter, maxDistance, type), - _start(accessMethod->getParams().geoHashConverter->hash(startPt.x, startPt.y)), - _numWanted(numWanted), - _type(type), - _params(accessMethod->getParams()) { - - _nscanned = 0; - _found = 0; - - if(_maxDistance < 0){ - _scanDistance = numeric_limits<double>::max(); - } else if (type == GEO_PLANE) { - _scanDistance = maxDistance + _params.geoHashConverter->getError(); - } else if (type == GEO_SPHERE) { - checkEarthBounds(startPt); - // TODO: consider splitting into x and y scan distances - _scanDistance = computeXScanDistance(startPt.y, - rad2deg(_maxDistance) + _params.geoHashConverter->getError()); - } - - verify(_scanDistance > 0); - } - - void GeoSearch::exec() { - if(_numWanted == 0) return; - - /* - * Search algorithm - * 1) use geohash prefix to find X items - * 2) compute max distance from want to an item - * 3) find optimal set of boxes that complete circle - * 4) use regular btree cursors to scan those boxes - */ - - // Part 1 - { - do { - long long f = found(); - verify(f <= 0x7fffffff); - fillStack(maxPointsHeuristic, _numWanted - static_cast<int>(f), true); - processExtraPoints(); - } while(_state != DONE && _state != DONE_NEIGHBOR && - found() < _numWanted && - (!_prefix.constrains() || - _params.geoHashConverter->sizeEdge(_prefix) <= _scanDistance)); - - // If we couldn't scan or scanned everything, we're done - if(_state == DONE){ - expandEndPoints(); - return; - } - } - - // Part 2 - { - // Find farthest distance for completion scan - double farDist = farthest(); - if(found() < _numWanted) { - // Not enough found in Phase 1 - farDist = _scanDistance; - } - else if (_type == GEO_PLANE) { - // Enough found, but need to search neighbor boxes - farDist += _params.geoHashConverter->getError(); - } - else if (_type == GEO_SPHERE) { - // Enough found, but need to search neighbor boxes - farDist = std::min(_scanDistance, - computeXScanDistance(_near.y, - rad2deg(farDist)) - + 2 * _params.geoHashConverter->getError()); - } - verify(farDist >= 0); - - // Find the box that includes all the points we need to return - _want = Box(_near.x - farDist, _near.y - farDist, farDist * 2); - - // Remember the far distance for further scans - _scanDistance = farDist; - - // Reset the search, our distances have probably changed - if(_state == DONE_NEIGHBOR){ - _state = DOING_EXPAND; - _neighbor = -1; - } - - // Do regular search in the full region - do { - fillStack(maxPointsHeuristic); - processExtraPoints(); - } - while(_state != DONE); - } - - expandEndPoints(); - } - - void GeoSearch::addExactPoints(const GeoPoint& pt, Holder& points, bool force){ - int before, after; - addExactPoints(pt, points, before, after, force); - } - - void GeoSearch::addExactPoints(const GeoPoint& pt, Holder& points, int& before, int& after, - bool force){ - before = 0; - after = 0; - - if(pt.isExact()){ - if(force) points.insert(pt); - return; - } - - vector<BSONObj> locs; - // last argument is uniqueDocs - getPointsFor(pt.key(), pt.obj(), locs, true); - - GeoPoint nearestPt(pt, -1, true); - - for(vector<BSONObj>::iterator i = locs.begin(); i != locs.end(); i++){ - Point loc(*i); - double d; - if(! exactDocCheck(loc, d)) continue; - - if(nearestPt.distance() < 0 || d < nearestPt.distance()){ - nearestPt._distance = d; - nearestPt._pt = *i; - continue; - } - } - - if(nearestPt.distance() >= 0){ - points.insert(nearestPt); - if(nearestPt < pt) before++; - else after++; - } - } - - // TODO: Refactor this back into holder class, allow to run periodically when we are seeing - // a lot of pts - void GeoSearch::expandEndPoints(bool finish) { - processExtraPoints(); - // All points in array *could* be in maxDistance - - // Step 1 : Trim points to max size TODO: This check will do little for now, but is - // skeleton for future work in incremental $near - // searches - if(_max > 0){ - int numToErase = _points.size() - _max; - if(numToErase > 0){ - Holder tested; - // Work backward through all points we're not sure belong in the set - Holder::iterator maybePointIt = _points.end(); - maybePointIt--; - double approxMin = maybePointIt->distance() - 2 * _distError; - - // Insert all - int erased = 0; - while(_points.size() > 0 - && (maybePointIt->distance() >= approxMin || erased < numToErase)){ - - Holder::iterator current = maybePointIt; - if (current != _points.begin()) - --maybePointIt; - - addExactPoints(*current, tested, true); - _points.erase(current); - erased++; - - if(tested.size()) - approxMin = tested.begin()->distance() - 2 * _distError; - } - - int numToAddBack = erased - numToErase; - verify(numToAddBack >= 0); - - Holder::iterator testedIt = tested.begin(); - for(int i = 0; i < numToAddBack && testedIt != tested.end(); i++){ - _points.insert(*testedIt); - testedIt++; - } - } - } - - // We've now trimmed first set of unneeded points - - // Step 2: iterate through all points and add as needed - unsigned expandedPoints = 0; - Holder::iterator it = _points.begin(); - double expandWindowEnd = -1; - - while(it != _points.end()){ - const GeoPoint& currPt = *it; - // TODO: If one point is exact, maybe not 2 * _distError - - // See if we're in an expand window - bool inWindow = currPt.distance() <= expandWindowEnd; - // If we're not, and we're done with points, break - if(! inWindow && expandedPoints >= _max) break; - - bool expandApprox = !currPt.isExact() && (finish || inWindow); - - if (expandApprox) { - // Add new point(s). These will only be added in a radius of 2 * _distError - // around the current point, so should not affect previously valid points. - int before, after; - addExactPoints(currPt, _points, before, after, false); - expandedPoints += before; - - if(_max > 0 && expandedPoints < _max) - expandWindowEnd = currPt.distance() + 2 * _distError; - - // Iterate to the next point - Holder::iterator current = it++; - // Erase the current point - _points.erase(current); - } else{ - expandedPoints++; - it++; - } - } - - // Finish - // TODO: Don't really need to trim? - for(; expandedPoints > _max; expandedPoints--) it--; - _points.erase(it, _points.end()); - } - -} // namespace twod_exec -} // namespace mongo diff --git a/src/mongo/db/exec/2dnear.h b/src/mongo/db/exec/2dnear.h deleted file mode 100644 index 1a2b9a70f0f..00000000000 --- a/src/mongo/db/exec/2dnear.h +++ /dev/null @@ -1,203 +0,0 @@ -/** - * Copyright (C) 2013 10gen Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include <queue> - -#include "mongo/db/diskloc.h" -#include "mongo/db/exec/2dcommon.h" -#include "mongo/db/exec/plan_stage.h" -#include "mongo/db/geo/geoquery.h" -#include "mongo/db/jsobj.h" -#include "mongo/db/matcher/expression.h" - -namespace mongo { - - struct TwoDNearParams { - NearQuery nearQuery; - Collection* collection; // not owned - BSONObj indexKeyPattern; - MatchExpression* filter; - int numWanted; - bool addPointMeta; - bool addDistMeta; - }; - - struct Result { - Result(WorkingSetID wsid, double dist) : id(wsid), distance(dist) { } - - bool operator<(const Result& other) const { - // We want increasing distance, not decreasing, so we reverse the <. - return distance > other.distance; - } - - WorkingSetID id; - - double distance; - }; - - class TwoDNear : public PlanStage { - public: - TwoDNear(OperationContext* txn, const TwoDNearParams& params, WorkingSet* ws); - virtual ~TwoDNear(); - - virtual bool isEOF(); - virtual StageState work(WorkingSetID* out); - - virtual void prepareToYield(); - virtual void recoverFromYield(); - virtual void invalidate(const DiskLoc& dl, InvalidationType type); - - virtual std::vector<PlanStage*> getChildren() const; - - virtual StageType stageType() const { return STAGE_GEO_NEAR_2D; } - - virtual PlanStageStats* getStats(); - - virtual const CommonStats* getCommonStats(); - - virtual const SpecificStats* getSpecificStats(); - - static const char* kStageType; - - private: - - // transactional context for read locks. Not owned by us - OperationContext* _txn; - - WorkingSet* _workingSet; - - // Stats - CommonStats _commonStats; - TwoDNearStats _specificStats; - - // We compute an annulus of results and cache it here. - priority_queue<Result> _results; - - // For fast invalidation. Perhaps not worth it. - // - // Multi-location docs mean that this is not one diskloc -> one WSID but one DiskLoc -> many - // WSIDs. - multimap<DiskLoc, WorkingSetID> _invalidationMap; - - TwoDNearParams _params; - - bool _initted; - }; - -} // namespace mongo - -namespace mongo { -namespace twod_exec { - - class GeoHopper : public GeoBrowse { - public: - typedef multiset<GeoPoint> Holder; - - GeoHopper(Collection* collection, - TwoDAccessMethod* accessMethod, - unsigned max, - const Point& n, - MatchExpression* filter, - double maxDistance = numeric_limits<double>::max(), - GeoDistType type = GEO_PLANE); - - virtual KeyResult approxKeyCheck(const Point& p, double& d); - - virtual bool exactDocCheck(const Point& p, double& d); - - // Always in distance units, whether radians or normal - double farthest() const { return _farthest; } - - virtual int addSpecific(const GeoIndexEntry& node, const Point& keyP, bool onBounds, - double keyD, bool potentiallyNewDoc); - - // Removes extra points from end of _points set. - // Check can be a bit costly if we have lots of exact points near borders, - // so we'll do this every once and awhile. - void processExtraPoints(); - - unsigned _max; - Point _near; - Holder _points; - double _maxDistance; - GeoDistType _type; - double _distError; - double _farthest; - - // Safe to use currently since we don't yield in $near searches. If we do start to yield, - // we may need to replace dirtied disklocs in our holder / ensure our logic is correct. - std::map<DiskLoc, Holder::iterator> _seenPts; - - private: - const Collection* _collection; - }; - - class GeoSearch : public GeoHopper { - public: - GeoSearch(Collection* collection, - TwoDAccessMethod* accessMethod, - const Point& startPt, - int numWanted = 100, - MatchExpression* filter = NULL, - double maxDistance = numeric_limits<double>::max(), - GeoDistType type = GEO_PLANE); - - void exec(); - - void addExactPoints(const GeoPoint& pt, Holder& points, bool force); - - void addExactPoints(const GeoPoint& pt, Holder& points, int& before, int& after, - bool force); - - // TODO: Refactor this back into holder class, allow to run periodically when we are seeing - // a lot of pts - void expandEndPoints(bool finish = true); - - virtual GeoHash expandStartHash() { return _start; } - - // Whether the current box width is big enough for our search area - virtual bool fitsInBox(double width) { return width >= _scanDistance; } - - // Whether the current box overlaps our search area - virtual double intersectsBox(Box& cur) { return cur.legacyIntersectFraction(_want); } - - std::set< std::pair<DiskLoc,int> > _seen; - GeoHash _start; - int _numWanted; - double _scanDistance; - long long _nscanned; - int _found; - GeoDistType _type; - Box _want; - TwoDIndexingParams& _params; - }; - -} // namespace twod_exec -} // namespace mongo diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index 96054d3f97c..abfd4c0c458 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -35,8 +35,6 @@ env.Library( env.Library( target = 'exec', source = [ - "2dcommon.cpp", - "2dnear.cpp", "and_hash.cpp", "and_sorted.cpp", "cached_plan.cpp", @@ -45,17 +43,18 @@ env.Library( "distinct_scan.cpp", "eof.cpp", "fetch.cpp", + "geo_near.cpp", "idhack.cpp", "index_scan.cpp", "keep_mutations.cpp", "limit.cpp", "merge_sort.cpp", "multi_plan.cpp", + "near.cpp", "oplogstart.cpp", "or.cpp", "projection.cpp", "projection_exec.cpp", - "s2near.cpp", "shard_filter.cpp", "skip.cpp", "sort.cpp", diff --git a/src/mongo/db/exec/filter.h b/src/mongo/db/exec/filter.h index 8fdd3b8be80..964f5d7f6f7 100644 --- a/src/mongo/db/exec/filter.h +++ b/src/mongo/db/exec/filter.h @@ -103,8 +103,7 @@ namespace mongo { : _keyPattern(keyPattern), _key(key) { } BSONObj toBSON() const { - // Planning shouldn't let this happen. - invariant(0); + return _key; } virtual ElementIterator* allocateIterator(const ElementPath* path) const { diff --git a/src/mongo/db/exec/geo_near.cpp b/src/mongo/db/exec/geo_near.cpp new file mode 100644 index 00000000000..ed570ed0e4f --- /dev/null +++ b/src/mongo/db/exec/geo_near.cpp @@ -0,0 +1,898 @@ +/** + * Copyright (C) 2014 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/exec/geo_near.h" + +// For s2 search +#include "third_party/s2/s2regionintersection.h" + +#include "mongo/base/owned_pointer_vector.h" +#include "mongo/db/exec/index_scan.h" +#include "mongo/db/exec/fetch.h" +#include "mongo/db/exec/working_set_computed_data.h" +#include "mongo/db/geo/geoconstants.h" +#include "mongo/db/index/expression_index.h" +#include "mongo/db/matcher/expression.h" + +namespace mongo { + + // + // Shared GeoNear search functionality + // + + static const double kCircOfEarthInMeters = 2 * M_PI * kRadiusOfEarthInMeters; + static const double kMaxEarthDistanceInMeters = kCircOfEarthInMeters / 2; + static const double kMetersPerDegreeAtEquator = kCircOfEarthInMeters / 360; + + namespace { + + /** + * Structure that holds BSON addresses (BSONElements) and the corresponding geometry parsed + * at those locations. + * Used to separate the parsing of geometries from a BSONObj (which must stay in scope) from + * the computation over those geometries. + * TODO: Merge with 2D/2DSphere key extraction? + */ + struct StoredGeometry { + + static StoredGeometry* parseFrom(const BSONElement& element) { + if (!element.isABSONObj()) + return NULL; + + auto_ptr<StoredGeometry> stored(new StoredGeometry); + if (!stored->geometry.parseFrom(element.Obj())) + return NULL; + stored->element = element; + return stored.release(); + } + + BSONElement element; + GeometryContainer geometry; + }; + } + + /** + * Find and parse all geometry elements on the appropriate field path from the document. + */ + static void extractGeometries(const BSONObj& doc, + const string& path, + vector<StoredGeometry*>* geometries) { + + BSONElementSet geomElements; + // NOTE: Annoyingly, we cannot just expand arrays b/c single 2d points are arrays, we need + // to manually expand all results to check if they are geometries + doc.getFieldsDotted(path, geomElements, false /* expand arrays */); + + for (BSONElementSet::iterator it = geomElements.begin(); it != geomElements.end(); ++it) { + + const BSONElement& el = *it; + auto_ptr<StoredGeometry> stored(StoredGeometry::parseFrom(el)); + + if (stored.get()) { + // Valid geometry element + geometries->push_back(stored.release()); + } + else if (el.type() == Array) { + + // Many geometries may be in an array + BSONObjIterator arrIt(el.Obj()); + while (arrIt.more()) { + + const BSONElement nextEl = arrIt.next(); + stored.reset(StoredGeometry::parseFrom(nextEl)); + + if (stored.get()) { + // Valid geometry element + geometries->push_back(stored.release()); + } + else { + warning() << "geoNear stage read non-geometry element " << nextEl.toString() + << " in array " << el.toString(); + } + } + } + else { + warning() << "geoNear stage read non-geometry element " << el.toString(); + } + } + } + + static StatusWith<double> computeGeoNearDistance(const GeoNearParams& nearParams, + WorkingSetMember* member) { + + // + // Generic GeoNear distance computation + // Distances are computed by projecting the stored geometry into the query CRS, and + // computing distance in that CRS. + // + + // Must have an object in order to get geometry out of it. + invariant(member->hasObj()); + + CRS queryCRS = nearParams.nearQuery.getQueryCRS(); + + // Extract all the geometries out of this document for the near query + OwnedPointerVector<StoredGeometry> geometriesOwned; + vector<StoredGeometry*>& geometries = geometriesOwned.mutableVector(); + extractGeometries(member->obj, nearParams.nearQuery.field, &geometries); + + // Compute the minimum distance of all the geometries in the document + double minDistance = -1; + BSONObj minDistanceObj; + for (vector<StoredGeometry*>::iterator it = geometries.begin(); it != geometries.end(); + ++it) { + + StoredGeometry& stored = **it; + + // NOTE: For now, we're sure that if we get this far in the query we'll have an + // appropriate index which validates the type of geometry we're pulling back here. + // TODO: It may make sense to change our semantics and, by default, only return + // shapes in the same CRS from $geoNear. + if (!stored.geometry.supportsProject(queryCRS)) + continue; + stored.geometry.projectInto(queryCRS); + + double nextDistance = stored.geometry.minDistance(nearParams.nearQuery.centroid); + + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + minDistanceObj = stored.element.Obj(); + } + } + + if (minDistance < 0) { + // No distance to report + return StatusWith<double>(-1); + } + + if (nearParams.addDistMeta) { + if (nearParams.nearQuery.unitsAreRadians()) { + // Hack for nearSphere + // TODO: Remove nearSphere? + invariant(SPHERE == queryCRS); + member->addComputed(new GeoDistanceComputedData(minDistance + / kRadiusOfEarthInMeters)); + } + else { + member->addComputed(new GeoDistanceComputedData(minDistance)); + } + } + + if (nearParams.addPointMeta) { + member->addComputed(new GeoNearPointComputedData(minDistanceObj)); + } + + return StatusWith<double>(minDistance); + } + + static Point toPoint(const PointWithCRS& ptCRS) { + if (ptCRS.crs == FLAT) { + return ptCRS.oldPoint; + } + else { + S2LatLng latLng(ptCRS.point); + return Point(latLng.lng().degrees(), latLng.lat().degrees()); + } + } + + static R2Annulus geoNearDistanceBounds(const NearQuery& query) { + + if (FLAT == query.getQueryCRS()) { + return R2Annulus(toPoint(query.centroid), query.minDistance, query.maxDistance); + } + + invariant(SPHERE == query.getQueryCRS()); + + // TODO: Tighten this up a bit by making a CRS for "sphere with radians" + double minDistance = query.minDistance; + double maxDistance = query.maxDistance; + + if (query.unitsAreRadians()) { + // Our input bounds are in radians, convert to meters since the query CRS is actually + // SPHERE. We'll convert back to radians on outputting distances. + minDistance *= kRadiusOfEarthInMeters; + maxDistance *= kRadiusOfEarthInMeters; + } + + invariant(SPHERE == query.getQueryCRS()); + return R2Annulus(toPoint(query.centroid), + min(minDistance, kMaxEarthDistanceInMeters), + min(maxDistance, kMaxEarthDistanceInMeters)); + } + + // + // GeoNear2DStage + // + + static R2Annulus twoDDistanceBounds(const GeoNearParams& nearParams, + const IndexDescriptor* twoDIndex) { + + R2Annulus fullBounds = geoNearDistanceBounds(nearParams.nearQuery); + + if (FLAT == nearParams.nearQuery.getQueryCRS()) { + + // Reset the full bounds based on our index bounds + GeoHashConverter::Parameters hashParams; + Status status = GeoHashConverter::parseParameters(twoDIndex->infoObj(), &hashParams); + invariant(status.isOK()); // The index status should always be valid + + // The biggest distance possible in this indexed collection is the diagonal of the + // square indexed region. + const double sqrt2Approx = 1.5; + const double diagonalDist = sqrt2Approx * (hashParams.max - hashParams.min); + + fullBounds = R2Annulus(fullBounds.center(), + fullBounds.getInner(), + min(fullBounds.getOuter(), diagonalDist)); + } + else { + // Spherical queries have upper bounds set by the earth - no-op + // TODO: Wrapping errors would creep in here if nearSphere wasn't defined to not wrap + invariant(SPHERE == nearParams.nearQuery.getQueryCRS()); + invariant(!nearParams.nearQuery.isWrappingQuery()); + } + + return fullBounds; + } + + static double twoDBoundsIncrement(const GeoNearParams& nearParams) { + // TODO: Revisit and tune these + if (FLAT == nearParams.nearQuery.getQueryCRS()) { + return 10; + } + else { + return kMaxEarthDistanceInMeters / 1000.0; + } + } + + static const string kTwoDIndexNearStage("GEO_NEAR_2D"); + + GeoNear2DStage::GeoNear2DStage(const GeoNearParams& nearParams, + OperationContext* txn, + WorkingSet* workingSet, + Collection* collection, + IndexDescriptor* twoDIndex) + : NearStage(txn, + workingSet, + collection, + new PlanStageStats(CommonStats(kTwoDIndexNearStage.c_str()), + STAGE_GEO_NEAR_2D)), + _nearParams(nearParams), + _twoDIndex(twoDIndex), + _fullBounds(twoDDistanceBounds(nearParams, twoDIndex)), + _currBounds(_fullBounds.center(), -1, _fullBounds.getInner()), + _boundsIncrement(twoDBoundsIncrement(nearParams)) { + + getNearStats()->keyPattern = twoDIndex->keyPattern(); + } + + GeoNear2DStage::~GeoNear2DStage() { + } + + namespace { + + /** + * Expression which checks whether a legacy 2D index point is contained within our near + * search annulus. See nextInterval() below for more discussion. + * TODO: Make this a standard type of GEO match expression + */ + class TwoDPtInAnnulusExpression : public LeafMatchExpression { + public: + + TwoDPtInAnnulusExpression(const R2Annulus& annulus, const StringData& twoDPath) + : LeafMatchExpression(INTERNAL_2D_POINT_IN_ANNULUS), _annulus(annulus) { + + initPath(twoDPath); + } + + virtual ~TwoDPtInAnnulusExpression() { + } + + virtual void toBSON(BSONObjBuilder* out) const { + out->append("TwoDPtInAnnulusExpression", true); + } + + virtual bool matchesSingleElement(const BSONElement& e) const { + if (!e.isABSONObj()) + return false; + + if (!GeoParser::isIndexablePoint(e.Obj())) + return false; + + PointWithCRS point; + if (!GeoParser::parsePoint(e.Obj(), &point)) + return false; + + return _annulus.contains(point.oldPoint); + } + + // + // These won't be called. + // + + virtual void debugString(StringBuilder& debug, int level = 0) const { + invariant(false); + } + + virtual bool equivalent(const MatchExpression* other) const { + invariant(false); + return false; + } + + virtual LeafMatchExpression* shallowClone() const { + invariant(false); + return NULL; + } + + private: + + R2Annulus _annulus; + }; + + /** + * Expression which checks whether a 2D key for a point (2D hash) intersects our search + * region. The search region may have been formed by more granular hashes. + */ + class TwoDKeyInRegionExpression : public LeafMatchExpression { + public: + + TwoDKeyInRegionExpression(R2Region* region, + const GeoHashConverter::Parameters& hashParams, + const StringData& twoDKeyPath) + : LeafMatchExpression(INTERNAL_2D_KEY_IN_REGION), + _region(region), + _unhasher(hashParams) { + + initPath(twoDKeyPath); + } + + virtual ~TwoDKeyInRegionExpression() { + } + + virtual void toBSON(BSONObjBuilder* out) const { + out->append("TwoDKeyInRegionExpression", true); + } + + virtual bool matchesSingleElement(const BSONElement& e) const { + // Something has gone terribly wrong if this doesn't hold. + invariant(BinData == e.type()); + return !_region->fastDisjoint(_unhasher.unhashToBox(e)); + } + + // + // These won't be called. + // + + virtual void debugString(StringBuilder& debug, int level = 0) const { + invariant(false); + } + + virtual bool equivalent(const MatchExpression* other) const { + invariant(false); + return true; + } + + virtual MatchExpression* shallowClone() const { + invariant(false); + return NULL; + } + + private: + + const scoped_ptr<R2Region> _region; + const GeoHashConverter _unhasher; + }; + + // Helper class to maintain ownership of a match expression alongside an index scan + class IndexScanWithMatch : public IndexScan { + public: + + IndexScanWithMatch(OperationContext* txn, + const IndexScanParams& params, + WorkingSet* workingSet, + MatchExpression* filter) + : IndexScan(txn, params, workingSet, filter), _matcher(filter) { + } + + virtual ~IndexScanWithMatch() { + } + + private: + + // Owns matcher + const scoped_ptr<MatchExpression> _matcher; + }; + + // Helper class to maintain ownership of a match expression alongside an index scan + class FetchStageWithMatch : public FetchStage { + public: + + FetchStageWithMatch(WorkingSet* ws, + PlanStage* child, + MatchExpression* filter, + const Collection* collection) + : FetchStage(ws, child, filter, collection), _matcher(filter) { + } + + virtual ~FetchStageWithMatch() { + } + + private: + + // Owns matcher + const scoped_ptr<MatchExpression> _matcher; + }; + } + + static double min2DBoundsIncrement(NearQuery query, IndexDescriptor* twoDIndex) { + GeoHashConverter::Parameters hashParams; + Status status = GeoHashConverter::parseParameters(twoDIndex->infoObj(), &hashParams); + invariant(status.isOK()); // The index status should always be valid + GeoHashConverter hasher(hashParams); + + // The hasher error is the diagonal of a 2D hash region - it's generally not helpful + // to change region size such that a search radius is smaller than the 2D hash region + // max radius. This is slightly conservative for now (box diagonal vs circle radius). + double minBoundsIncrement = hasher.getError() / 2; + + if (FLAT == query.getQueryCRS()) + return minBoundsIncrement; + + invariant(SPHERE == query.getQueryCRS()); + + // If this is a spherical query, units are in meters - this is just a heuristic + return minBoundsIncrement * kMetersPerDegreeAtEquator; + } + + static R2Annulus projectBoundsToTwoDDegrees(R2Annulus sphereBounds) { + + const double outerDegrees = rad2deg(sphereBounds.getOuter() / kRadiusOfEarthInMeters); + const double innerDegrees = rad2deg(sphereBounds.getInner() / kRadiusOfEarthInMeters); + const double maxErrorDegrees = computeXScanDistance(sphereBounds.center().y, outerDegrees); + + return R2Annulus(sphereBounds.center(), + max(0.0, innerDegrees - maxErrorDegrees), + outerDegrees + maxErrorDegrees); + } + + StatusWith<NearStage::CoveredInterval*> // + GeoNear2DStage::nextInterval(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection) { + + // The search is finished if we searched at least once and all the way to the edge + if (_currBounds.getInner() >= 0 && _currBounds.getOuter() == _fullBounds.getOuter()) { + return StatusWith<CoveredInterval*>(NULL); + } + + // + // Setup the next interval + // + + const NearStats* stats = getNearStats(); + + if (!stats->intervalStats.empty()) { + + const IntervalStats& lastIntervalStats = stats->intervalStats.back(); + + // TODO: Generally we want small numbers of results fast, then larger numbers later + if (lastIntervalStats.numResultsBuffered < 300) + _boundsIncrement *= 2; + else if (lastIntervalStats.numResultsBuffered > 600) + _boundsIncrement /= 2; + } + + _boundsIncrement = max(_boundsIncrement, + min2DBoundsIncrement(_nearParams.nearQuery, _twoDIndex)); + + R2Annulus nextBounds(_currBounds.center(), + _currBounds.getOuter(), + min(_currBounds.getOuter() + _boundsIncrement, + _fullBounds.getOuter())); + + const bool isLastInterval = (nextBounds.getOuter() == _fullBounds.getOuter()); + _currBounds = nextBounds; + + // + // Get a covering region for this interval + // + + auto_ptr<R2Region> coverRegion; + if (_nearParams.nearQuery.getQueryCRS() == FLAT) { + + // NOTE: Due to floating point math issues, FLAT searches of a 2D index need to treat + // containment and distance separately. + // Ex: (distance) 54.001 - 54 > 0.001, but (containment) 54 + 0.001 <= 54.001 + // The idea is that a $near search with bounds is really a $within search, sorted by + // distance. We attach a custom $within : annulus matcher to do the $within search, + // and adjust max/min bounds slightly since, as above, containment does not mean the + // distance calculation won't slightly overflow the boundary. + // + // The code below adjusts: + // 1) Overall min/max bounds of the generated distance intervals to be more inclusive + // 2) Bounds of the interval covering to be more inclusive + // ... and later on we add the custom $within : annulus matcher. + // + // IMPORTANT: The *internal* interval distance bounds are *exact thresholds* - these + // should not be adjusted. + // TODO: Maybe integrate annuluses as a standard shape, and literally transform $near + // internally into a $within query with $near just as sort. + + // Compute the maximum axis-aligned distance error + const double epsilon = std::numeric_limits<double>::epsilon() + * (max(abs(_fullBounds.center().x), abs(_fullBounds.center().y)) + + _fullBounds.getOuter()); + + if (nextBounds.getInner() > 0 && nextBounds.getInner() == _fullBounds.getInner()) { + nextBounds = R2Annulus(nextBounds.center(), + max(0.0, nextBounds.getInner() - epsilon), + nextBounds.getOuter()); + } + + if (nextBounds.getOuter() > 0 && nextBounds.getOuter() == _fullBounds.getOuter()) { + // We're at the max bound of the search, adjust interval maximum + nextBounds = R2Annulus(nextBounds.center(), + nextBounds.getInner(), + nextBounds.getOuter() + epsilon); + } + + // *Always* adjust the covering bounds to be more inclusive + coverRegion.reset(new R2Annulus(nextBounds.center(), + max(0.0, nextBounds.getInner() - epsilon), + nextBounds.getOuter() + epsilon)); + } + else { + invariant(SPHERE == _nearParams.nearQuery.getQueryCRS()); + // TODO: As above, make this consistent with $within : $centerSphere + + // Our intervals aren't in the same CRS as our index, so we need to adjust them + coverRegion.reset(new R2Annulus(projectBoundsToTwoDDegrees(nextBounds))); + } + + // + // Setup the stages for this interval + // + + IndexScanParams scanParams; + scanParams.descriptor = _twoDIndex; + scanParams.direction = 1; + // We use a filter on the key. The filter rejects keys that don't intersect with the + // annulus. An object that is in the annulus might have a key that's not in it and a key + // that's in it. As such we can't just look at one key per object. + // + // This does force us to do our own deduping of results, though. + scanParams.doNotDedup = true; + + // Scan bounds on 2D indexes are only over the 2D field - other bounds aren't applicable. + // This is handled in query planning. + scanParams.bounds = _nearParams.baseBounds; + + // The "2d" field is always the first in the index + const string twoDFieldName = _nearParams.nearQuery.field; + const int twoDFieldPosition = 0; + + OrderedIntervalList coveredIntervals; + coveredIntervals.name = scanParams.bounds.fields[twoDFieldPosition].name; + ExpressionMapping::cover2d(*coverRegion, _twoDIndex->infoObj(), &coveredIntervals); + + // Intersect the $near bounds we just generated into the bounds we have for anything else + // in the scan (i.e. $within) + IndexBoundsBuilder::intersectize(coveredIntervals, + &scanParams.bounds.fields[twoDFieldPosition]); + + // These parameters are stored by the index, and so must be ok + GeoHashConverter::Parameters hashParams; + GeoHashConverter::parseParameters(_twoDIndex->infoObj(), &hashParams); + + MatchExpression* keyMatcher = + new TwoDKeyInRegionExpression(coverRegion.release(), + hashParams, + twoDFieldName); + + // 2D indexes support covered search over additional fields they contain + // TODO: Don't need to clone, can just attach to custom matcher above + if (_nearParams.filter) { + AndMatchExpression* andMatcher = new AndMatchExpression(); + andMatcher->add(keyMatcher); + andMatcher->add(_nearParams.filter->shallowClone()); + keyMatcher = andMatcher; + } + + // IndexScanWithMatch owns the matcher + IndexScan* scan = new IndexScanWithMatch(txn, scanParams, workingSet, keyMatcher); + + MatchExpression* docMatcher = + _nearParams.fullFilter ? _nearParams.fullFilter->shallowClone() : NULL; + + // FLAT searches need to add an additional annulus $within matcher, see above + if (FLAT == _nearParams.nearQuery.getQueryCRS()) { + + MatchExpression* withinMatcher = + new TwoDPtInAnnulusExpression(_fullBounds, twoDFieldName); + + if (docMatcher) { + AndMatchExpression* andMatcher = new AndMatchExpression(); + andMatcher->add(docMatcher); + andMatcher->add(withinMatcher); + docMatcher = andMatcher; + } + else { + docMatcher = withinMatcher; + } + } + + // FetchStage owns index scan + FetchStage* fetcher(new FetchStageWithMatch(workingSet, + scan, + docMatcher, + collection)); + + return StatusWith<CoveredInterval*>(new CoveredInterval(fetcher, + true, + nextBounds.getInner(), + nextBounds.getOuter(), + isLastInterval)); + } + + StatusWith<double> GeoNear2DStage::computeDistance(WorkingSetMember* member) { + return computeGeoNearDistance(_nearParams, member); + } + + // + // GeoNear2DSphereStage + // + + static double twoDSphereBoundsIncrement(const IndexDescriptor* s2Index) { + + // The user can override this so we honor it. We could ignore it though -- it's just used + // to set _radiusIncrement, not to do any covering. + // TODO: Make this parsed somewhere else + int finestIndexedLevel; + BSONElement finestLevelEl = s2Index->infoObj()["finestIndexedLevel"]; + if (finestLevelEl.isNumber()) { + finestIndexedLevel = finestLevelEl.numberInt(); + } + else { + finestIndexedLevel = S2::kAvgEdge.GetClosestLevel(500.0 / kRadiusOfEarthInMeters); + } + + // Start with a conservative bounds increment. When we're done searching a shell we + // increment the two radii by this. + return 5 * S2::kAvgEdge.GetValue(finestIndexedLevel) * kRadiusOfEarthInMeters; + } + + static const string kS2IndexNearStage("GEO_NEAR_2DSPHERE"); + + GeoNear2DSphereStage::GeoNear2DSphereStage(const GeoNearParams& nearParams, + OperationContext* txn, + WorkingSet* workingSet, + Collection* collection, + IndexDescriptor* s2Index) + : NearStage(txn, + workingSet, + collection, + new PlanStageStats(CommonStats(kS2IndexNearStage.c_str()), + STAGE_GEO_NEAR_2DSPHERE)), + _nearParams(nearParams), + _s2Index(s2Index), + _fullBounds(geoNearDistanceBounds(nearParams.nearQuery)), + _currBounds(_fullBounds.center(), -1, _fullBounds.getInner()), + _boundsIncrement(twoDSphereBoundsIncrement(s2Index)) { + + getNearStats()->keyPattern = s2Index->keyPattern(); + } + + GeoNear2DSphereStage::~GeoNear2DSphereStage() { + } + + namespace { + + S2Region* buildS2Region(const R2Annulus& sphereBounds) { + // Internal bounds come in SPHERE CRS units + // i.e. center is lon/lat, inner/outer are in meters + S2LatLng latLng = S2LatLng::FromDegrees(sphereBounds.center().y, + sphereBounds.center().x); + + vector<S2Region*> regions; + + S2Cap innerCap = S2Cap::FromAxisAngle(latLng.ToPoint(), + S1Angle::Radians(sphereBounds.getInner() + / kRadiusOfEarthInMeters)); + innerCap = innerCap.Complement(); + regions.push_back(new S2Cap(innerCap)); + + // We only need to max bound if this is not a full search of the Earth + // Using the constant here is important since we use the min of kMaxEarthDistance + // and the actual bounds passed in to set up the search area. + if (sphereBounds.getOuter() < kMaxEarthDistanceInMeters) { + S2Cap outerCap = S2Cap::FromAxisAngle(latLng.ToPoint(), + S1Angle::Radians(sphereBounds.getOuter() + / kRadiusOfEarthInMeters)); + regions.push_back(new S2Cap(outerCap)); + } + + // Takes ownership of caps + return new S2RegionIntersection(®ions); + } + + /** + * Expression which checks whether a 2DSphere key for a point (S2 hash) intersects our + * search region. The search region may have been formed by more granular hashes. + */ + class TwoDSphereKeyInRegionExpression : public LeafMatchExpression { + public: + + TwoDSphereKeyInRegionExpression(const R2Annulus& bounds, StringData twoDSpherePath) + : LeafMatchExpression(INTERNAL_2DSPHERE_KEY_IN_REGION), + _region(buildS2Region(bounds)) { + + initPath(twoDSpherePath); + } + + virtual ~TwoDSphereKeyInRegionExpression() { + } + + virtual void toBSON(BSONObjBuilder* out) const { + out->append("TwoDSphereKeyInRegionExpression", true); + } + + virtual bool matchesSingleElement(const BSONElement& e) const { + // Something has gone terribly wrong if this doesn't hold. + invariant(String == e.type()); + S2Cell keyCell = S2Cell(S2CellId::FromString(e.str())); + return _region->MayIntersect(keyCell); + } + + const S2Region& getRegion() { + return *_region; + } + + // + // These won't be called. + // + + virtual void debugString(StringBuilder& debug, int level = 0) const { + invariant(false); + } + + virtual bool equivalent(const MatchExpression* other) const { + invariant(false); + return true; + } + + virtual MatchExpression* shallowClone() const { + invariant(false); + return NULL; + } + + private: + + const scoped_ptr<S2Region> _region; + }; + } + + static int getFieldPosition(const IndexDescriptor* index, const string& fieldName) { + + int fieldPosition = 0; + + BSONObjIterator specIt(index->keyPattern()); + while (specIt.more()) { + if (specIt.next().fieldName() == fieldName) { + break; + } + ++fieldPosition; + } + + if (fieldPosition == index->keyPattern().nFields()) + return -1; + + return fieldPosition; + } + + StatusWith<NearStage::CoveredInterval*> // + GeoNear2DSphereStage::nextInterval(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection) { + + // The search is finished if we searched at least once and all the way to the edge + if (_currBounds.getInner() >= 0 && _currBounds.getOuter() == _fullBounds.getOuter()) { + return StatusWith<CoveredInterval*>(NULL); + } + + // + // Setup the next interval + // + + const NearStats* stats = getNearStats(); + + if (!stats->intervalStats.empty()) { + + const IntervalStats& lastIntervalStats = stats->intervalStats.back(); + + // TODO: Generally we want small numbers of results fast, then larger numbers later + if (lastIntervalStats.numResultsBuffered < 300) + _boundsIncrement *= 2; + else if (lastIntervalStats.numResultsBuffered > 600) + _boundsIncrement /= 2; + } + + R2Annulus nextBounds(_currBounds.center(), + _currBounds.getOuter(), + min(_currBounds.getOuter() + _boundsIncrement, + _fullBounds.getOuter())); + + bool isLastInterval = (nextBounds.getOuter() == _fullBounds.getOuter()); + _currBounds = nextBounds; + + // + // Setup the covering region and stages for this interval + // + + IndexScanParams scanParams; + scanParams.descriptor = _s2Index; + scanParams.direction = 1; + // We use a filter on the key. The filter rejects keys that don't intersect with the + // annulus. An object that is in the annulus might have a key that's not in it and a key + // that's in it. As such we can't just look at one key per object. + // + // This does force us to do our own deduping of results, though. + scanParams.doNotDedup = true; + scanParams.bounds = _nearParams.baseBounds; + + // Because the planner doesn't yet set up 2D index bounds, do it ourselves here + const string s2Field = _nearParams.nearQuery.field; + const int s2FieldPosition = getFieldPosition(_s2Index, s2Field); + scanParams.bounds.fields[s2FieldPosition].intervals.clear(); + OrderedIntervalList* coveredIntervals = &scanParams.bounds.fields[s2FieldPosition]; + + TwoDSphereKeyInRegionExpression* keyMatcher = + new TwoDSphereKeyInRegionExpression(_currBounds, s2Field); + + ExpressionMapping::cover2dsphere(keyMatcher->getRegion(), + _s2Index->infoObj(), + coveredIntervals); + + // IndexScan owns the hash matcher + IndexScan* scan = new IndexScanWithMatch(txn, scanParams, workingSet, keyMatcher); + + // FetchStage owns index scan + FetchStage* fetcher(new FetchStage(workingSet, scan, _nearParams.filter, collection)); + + return StatusWith<CoveredInterval*>(new CoveredInterval(fetcher, + true, + nextBounds.getInner(), + nextBounds.getOuter(), + isLastInterval)); + } + + StatusWith<double> GeoNear2DSphereStage::computeDistance(WorkingSetMember* member) { + return computeGeoNearDistance(_nearParams, member); + } + +} // namespace mongo + diff --git a/src/mongo/db/exec/geo_near.h b/src/mongo/db/exec/geo_near.h new file mode 100644 index 00000000000..bc3eccf6f00 --- /dev/null +++ b/src/mongo/db/exec/geo_near.h @@ -0,0 +1,143 @@ +/** + * Copyright (C) 2014 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/exec/near.h" +#include "mongo/db/exec/working_set.h" +#include "mongo/db/exec/plan_stats.h" +#include "mongo/db/index/index_descriptor.h" +#include "mongo/db/geo/geoquery.h" +#include "mongo/db/matcher/expression.h" +#include "mongo/db/query/index_bounds.h" + +namespace mongo { + + /** + * Generic parameters for a GeoNear search + */ + struct GeoNearParams { + + GeoNearParams() : + filter(NULL), addPointMeta(false), addDistMeta(false), fullFilter(NULL) { + } + + // MatchExpression to apply to the index keys and fetched documents + MatchExpression* filter; + // Index scan bounds, not including the geo bounds + IndexBounds baseBounds; + + NearQuery nearQuery; + bool addPointMeta; + bool addDistMeta; + + // More exclusive MatchExpression to apply to the fetched documents, if set + MatchExpression* fullFilter; + }; + + /** + * Implementation of GeoNear on top of a 2D index + */ + class GeoNear2DStage : public NearStage { + public: + + GeoNear2DStage(const GeoNearParams& nearParams, + OperationContext* txn, + WorkingSet* workingSet, + Collection* collection, + IndexDescriptor* twoDIndex); + + virtual ~GeoNear2DStage(); + + protected: + + virtual StatusWith<CoveredInterval*> nextInterval(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection); + + virtual StatusWith<double> computeDistance(WorkingSetMember* member); + + private: + + const GeoNearParams _nearParams; + + // The 2D index we're searching over + // Not owned here + IndexDescriptor* const _twoDIndex; + + // The total search annulus + const R2Annulus _fullBounds; + + // The current search annulus + R2Annulus _currBounds; + + // Amount to increment the next bounds by + double _boundsIncrement; + }; + + /** + * Implementation of GeoNear on top of a 2DSphere (S2) index + */ + class GeoNear2DSphereStage : public NearStage { + public: + + GeoNear2DSphereStage(const GeoNearParams& nearParams, + OperationContext* txn, + WorkingSet* workingSet, + Collection* collection, + IndexDescriptor* s2Index); + + virtual ~GeoNear2DSphereStage(); + + protected: + + virtual StatusWith<CoveredInterval*> nextInterval(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection); + + virtual StatusWith<double> computeDistance(WorkingSetMember* member); + + private: + + const GeoNearParams _nearParams; + + // The 2D index we're searching over + // Not owned here + IndexDescriptor* const _s2Index; + + // The total search annulus + const R2Annulus _fullBounds; + + // The current search annulus + R2Annulus _currBounds; + + // Amount to increment the next bounds by + double _boundsIncrement; + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/near.cpp b/src/mongo/db/exec/near.cpp new file mode 100644 index 00000000000..370f7a3d0d0 --- /dev/null +++ b/src/mongo/db/exec/near.cpp @@ -0,0 +1,352 @@ +/** + * Copyright (C) 2014 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/exec/near.h" + +#include "mongo/db/exec/working_set_common.h" +#include "mongo/util/assert_util.h" + +namespace mongo { + + NearStage::NearStage(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection, + PlanStageStats* stats) + : _txn(txn), + _workingSet(workingSet), + _collection(collection), + _searchState(SearchState_Buffering), + _limit(-1), + _totalReturned(0), + _stats(stats) { + + // Ensure we have specific distance search stats unless a child class specified their + // own distance stats subclass + if (!_stats->specific) { + _stats->specific.reset(new NearStats); + } + } + + NearStage::~NearStage() { + } + + NearStage::CoveredInterval::CoveredInterval(PlanStage* covering, + bool dedupCovering, + double minDistance, + double maxDistance, + bool inclusiveMax) : + covering(covering), + dedupCovering(dedupCovering), + minDistance(minDistance), + maxDistance(maxDistance), + inclusiveMax(inclusiveMax) { + } + + void NearStage::setLimit(int limit) { + _limit = limit; + } + + PlanStage::StageState NearStage::work(WorkingSetID* out) { + + ++_stats->common.works; + + WorkingSetID toReturn = WorkingSet::INVALID_ID; + Status error = Status::OK(); + PlanStage::StageState nextState = PlanStage::NEED_TIME; + + // + // Work the search + // + + if (SearchState_Buffering == _searchState) { + nextState = bufferNext(&error); + } + else if (SearchState_Advancing == _searchState) { + nextState = advanceNext(&toReturn); + } + else { + invariant(SearchState_Finished == _searchState); + nextState = PlanStage::IS_EOF; + } + + // + // Handle the results + // + + if (PlanStage::FAILURE == nextState) { + *out = WorkingSetCommon::allocateStatusMember(_workingSet, error); + } + else if (PlanStage::ADVANCED == nextState) { + *out = toReturn; + ++_stats->common.advanced; + } + else if (PlanStage::NEED_TIME == nextState) { + ++_stats->common.needTime; + } + else if (PlanStage::IS_EOF == nextState) { + _stats->common.isEOF = true; + } + + return nextState; + } + + /** + * Holds a generic search result with a distance computed in some fashion. + */ + struct NearStage::SearchResult { + + SearchResult(WorkingSetID resultID, double distance) : + resultID(resultID), distance(distance) { + } + + bool operator<(const SearchResult& other) const { + // We want increasing distance, not decreasing, so we reverse the < + return distance > other.distance; + } + + WorkingSetID resultID; + double distance; + }; + + PlanStage::StageState NearStage::bufferNext(Status* error) { + + // + // Try to retrieve the next covered member + // + + if (!_nextInterval) { + + StatusWith<CoveredInterval*> intervalStatus = nextInterval(_txn, + _workingSet, + _collection); + if (!intervalStatus.isOK()) { + _searchState = SearchState_Finished; + *error = intervalStatus.getStatus(); + return PlanStage::FAILURE; + } + + if (NULL == intervalStatus.getValue()) { + _searchState = SearchState_Finished; + return PlanStage::IS_EOF; + } + + _nextInterval.reset(intervalStatus.getValue()); + _nextIntervalStats.reset(new IntervalStats()); + } + + WorkingSetID nextMemberID; + PlanStage::StageState intervalState = _nextInterval->covering->work(&nextMemberID); + + if (PlanStage::IS_EOF == intervalState) { + + _stats->children.push_back(_nextInterval->covering->getStats()); + _nextInterval.reset(); + _nextIntervalSeen.clear(); + + _searchState = SearchState_Advancing; + return PlanStage::NEED_TIME; + } + else if (PlanStage::FAILURE == intervalState) { + *error = WorkingSetCommon::getMemberStatus(*_workingSet->get(nextMemberID)); + return intervalState; + } + else if (PlanStage::ADVANCED != intervalState) { + return intervalState; + } + + // + // Try to buffer the next covered member + // + + WorkingSetMember* nextMember = _workingSet->get(nextMemberID); + + // The child stage may not dedup so we must dedup them ourselves. + if (_nextInterval->dedupCovering && nextMember->hasLoc()) { + if (_nextIntervalSeen.end() != _nextIntervalSeen.find(nextMember->loc)) + return PlanStage::NEED_TIME; + } + + ++_nextIntervalStats->numResultsFound; + + StatusWith<double> distanceStatus = computeDistance(nextMember); + + // Store the member's DiskLoc, if available, for quick invalidation + if (nextMember->hasLoc()) { + _nextIntervalSeen.insert(make_pair(nextMember->loc, nextMemberID)); + } + + if (!distanceStatus.isOK()) { + _searchState = SearchState_Finished; + *error = distanceStatus.getStatus(); + return PlanStage::FAILURE; + } + + // If the member's distance is in the current distance interval, add it to our buffered + // results. + double memberDistance = distanceStatus.getValue(); + bool inInterval = memberDistance >= _nextInterval->minDistance + && (_nextInterval->inclusiveMax ? + memberDistance <= _nextInterval->maxDistance : + memberDistance < _nextInterval->maxDistance); + + // Update found distance stats + if (_nextIntervalStats->minDistanceFound < 0 + || memberDistance < _nextIntervalStats->minDistanceFound) { + _nextIntervalStats->minDistanceFound = memberDistance; + } + + if (_nextIntervalStats->maxDistanceFound < 0 + || memberDistance > _nextIntervalStats->maxDistanceFound) { + _nextIntervalStats->maxDistanceFound = memberDistance; + } + + if (inInterval) { + _resultBuffer.push(SearchResult(nextMemberID, memberDistance)); + + ++_nextIntervalStats->numResultsBuffered; + + // Update buffered distance stats + if (_nextIntervalStats->minDistanceBuffered < 0 + || memberDistance < _nextIntervalStats->minDistanceBuffered) { + _nextIntervalStats->minDistanceBuffered = memberDistance; + } + + if (_nextIntervalStats->maxDistanceBuffered < 0 + || memberDistance > _nextIntervalStats->maxDistanceBuffered) { + _nextIntervalStats->maxDistanceBuffered = memberDistance; + } + } + else { + // We won't pass this WSM up, so deallocate it + _workingSet->free(nextMemberID); + } + + return PlanStage::NEED_TIME; + } + + PlanStage::StageState NearStage::advanceNext(WorkingSetID* toReturn) { + + if (_limit >= 0 && _totalReturned >= _limit) { + + getNearStats()->intervalStats.push_back(*_nextIntervalStats); + _nextIntervalStats.reset(); + + _searchState = SearchState_Finished; + return PlanStage::IS_EOF; + } + + if (_resultBuffer.empty()) { + + getNearStats()->intervalStats.push_back(*_nextIntervalStats); + _nextIntervalStats.reset(); + + _searchState = SearchState_Buffering; + return PlanStage::NEED_TIME; + } + + *toReturn = _resultBuffer.top().resultID; + _resultBuffer.pop(); + ++_totalReturned; + return PlanStage::ADVANCED; + } + + bool NearStage::isEOF() { + return SearchState_Finished == _searchState; + } + + void NearStage::prepareToYield() { + ++_stats->common.yields; + if (_nextInterval) { + _nextInterval->covering->prepareToYield(); + } + } + + void NearStage::recoverFromYield() { + ++_stats->common.unyields; + if (_nextInterval) { + _nextInterval->covering->recoverFromYield(); + } + } + + void NearStage::invalidate(const DiskLoc& dl, InvalidationType type) { + ++_stats->common.invalidates; + if (_nextInterval) { + _nextInterval->covering->invalidate(dl, type); + } + + // If a result is in _resultBuffer and has a DiskLoc it will be in _nextIntervalSeen as + // well. It's safe to return the result w/o the DiskLoc, so just fetch the result. + unordered_map<DiskLoc, WorkingSetID, DiskLoc::Hasher>::iterator seenIt = _nextIntervalSeen + .find(dl); + + if (seenIt != _nextIntervalSeen.end()) { + + WorkingSetMember* member = _workingSet->get(seenIt->second); + verify(member->hasLoc()); + WorkingSetCommon::fetchAndInvalidateLoc(member, _collection); + verify(!member->hasLoc()); + + // Don't keep it around in the seen map since there's no valid DiskLoc anymore + _nextIntervalSeen.erase(seenIt); + } + } + + vector<PlanStage*> NearStage::getChildren() const { + // TODO: Keep around all our interval stages and report as children + return vector<PlanStage*>(); + } + + PlanStageStats* NearStage::getStats() { + PlanStageStats* statsClone = _stats->clone(); + + // If we have an active interval, add those child stats as well + if (_nextInterval) + statsClone->children.push_back(_nextInterval->covering->getStats()); + + return statsClone; + } + + StageType NearStage::stageType() const { + return _stats->stageType; + } + + const CommonStats* NearStage::getCommonStats() { + return &_stats->common; + } + + const SpecificStats* NearStage::getSpecificStats() { + return _stats->specific.get(); + } + + NearStats* NearStage::getNearStats() { + return static_cast<NearStats*>(_stats->specific.get()); + } + +} // namespace mongo diff --git a/src/mongo/db/exec/near.h b/src/mongo/db/exec/near.h new file mode 100644 index 00000000000..64cbfe2fd1c --- /dev/null +++ b/src/mongo/db/exec/near.h @@ -0,0 +1,211 @@ +/** + * Copyright (C) 2014 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <queue> + +#include "mongo/base/string_data.h" +#include "mongo/base/status_with.h" +#include "mongo/db/diskloc.h" +#include "mongo/db/catalog/collection.h" +#include "mongo/db/exec/plan_stage.h" +#include "mongo/db/exec/plan_stats.h" +#include "mongo/db/exec/working_set.h" +#include "mongo/db/jsobj.h" +#include "mongo/platform/unordered_map.h" + +namespace mongo { + + /** + * An abstract stage which uses a progressive sort to return results sorted by distance. This + * is useful when we do not have a full ordering computed over the distance metric and don't + * want to generate one. + * + * Child stages need to implement functionality which: + * + * - defines a distance metric + * - iterates through ordered distance intervals, nearest to furthest + * - provides a covering for each distance interval + * + * For example - given a distance search over documents with distances from [0 -> 10], the child + * stage might break up the search into intervals [0->5),[5,7),[7->10]. + * + * Each interval requires a PlanStage which *covers* the interval (returns all results in the + * interval). Results in each interval are buffered fully before being returned to ensure that + * ordering is preserved. + * + * For efficient search, the child stage which covers the distance interval in question should + * not return too many results outside the interval, but correctness only depends on the child + * stage returning all results inside the interval. As an example, a PlanStage which covers the + * interval [0->5) might just be a full collection scan - this will always cover every interval, + * but is slow. If there is an index available, an IndexScan stage might also return all + * documents with distance [0->5) but would be much faster. + * + * Also for efficient search, the intervals should not be too large or too small - though again + * correctness does not depend on interval size. + * + * TODO: Right now the interface allows the nextCovering() to be adaptive, but doesn't allow + * aborting and shrinking a covered range being buffered if we guess wrong. + */ + class NearStage : public PlanStage { + public: + + struct CoveredInterval; + + virtual ~NearStage(); + + /** + * Sets a limit on the total number of results this stage will return. + * Not required. + */ + void setLimit(int limit); + + virtual bool isEOF(); + virtual StageState work(WorkingSetID* out); + + virtual void prepareToYield(); + virtual void recoverFromYield(); + virtual void invalidate(const DiskLoc& dl, InvalidationType type); + + virtual vector<PlanStage*> getChildren() const; + + virtual StageType stageType() const; + virtual PlanStageStats* getStats(); + virtual const CommonStats* getCommonStats(); + virtual const SpecificStats* getSpecificStats(); + + protected: + + /** + * Subclasses of NearStage must provide basics + a stats object which gets owned here. + * The stats object must have specific stats which are a subclass of NearStats, otherwise + * it's generated automatically. + */ + NearStage(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection, + PlanStageStats* stats); + + /** + * Exposes NearStats for adaptive search, allows additional specific stats in subclasses. + */ + NearStats* getNearStats(); + + // + // Methods implemented for specific search functionality + // + + /** + * Constructs the next covering over the next interval to buffer results from, or NULL + * if the full range has been searched. Use the provided working set as the working + * set for the covering stage if required. + * + * Returns !OK on failure to create next stage. + */ + virtual StatusWith<CoveredInterval*> nextInterval(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection) = 0; + + /** + * Computes the distance value for the given member data, or -1 if the member should not be + * returned in the sorted results. + * + * Returns !OK on invalid member data. + */ + virtual StatusWith<double> computeDistance(WorkingSetMember* member) = 0; + + private: + + // + // Generic methods for progressive search functionality + // + + StageState bufferNext(Status* error); + StageState advanceNext(WorkingSetID* toReturn); + + // + // Generic state for progressive near search + // + + // Not owned here + OperationContext* _txn; + // Not owned here + WorkingSet* const _workingSet; + // Not owned here, used for fetching buffered results before invalidation + Collection* const _collection; + + // A progressive search works in stages of buffering and then advancing + enum SearchState { + SearchState_Buffering, + SearchState_Advancing, + SearchState_Finished + } _searchState; + + // The current stage from which this stage should buffer results + scoped_ptr<CoveredInterval> _nextInterval; + + // May need to track disklocs from the child stage to do our own deduping, also to do + // invalidation of buffered results. + unordered_map<DiskLoc, WorkingSetID, DiskLoc::Hasher> _nextIntervalSeen; + + // Stats for the stage covering this interval + scoped_ptr<IntervalStats> _nextIntervalStats; + + // Sorted buffered results to be returned - the current interval + struct SearchResult; + std::priority_queue<SearchResult> _resultBuffer; + + // Tracking for the number of results we should return + int _limit; + int _totalReturned; + + // Stats + scoped_ptr<PlanStageStats> _stats; + }; + + /** + * A covered interval over which a portion of a near search can be run. + */ + struct NearStage::CoveredInterval { + + CoveredInterval(PlanStage* covering, + bool dedupCovering, + double minDistance, + double maxDistance, + bool inclusiveMax); + + const scoped_ptr<PlanStage> covering; + const bool dedupCovering; + + const double minDistance; + const double maxDistance; + const bool inclusiveMax; + }; + +} // namespace mongo diff --git a/src/mongo/db/exec/plan_stats.h b/src/mongo/db/exec/plan_stats.h index d5732597a62..f22b9bef7c2 100644 --- a/src/mongo/db/exec/plan_stats.h +++ b/src/mongo/db/exec/plan_stats.h @@ -485,17 +485,6 @@ namespace mongo { BSONObj sortPattern; }; - struct S2NearStats : public SpecificStats { - - virtual SpecificStats* clone() const { - S2NearStats* specific = new S2NearStats(*this); - specific->keyPattern = keyPattern.getOwned(); - return specific; - } - - BSONObj keyPattern; - }; - struct ShardingFilterStats : public SpecificStats { ShardingFilterStats() : chunkSkips(0) { } @@ -518,46 +507,45 @@ namespace mongo { size_t skip; }; - struct TwoDStats : public SpecificStats { - TwoDStats() { } + struct IntervalStats { - virtual SpecificStats* clone() const { - TwoDStats* specific = new TwoDStats(*this); - return specific; + IntervalStats() : + numResultsFound(0), + numResultsBuffered(0), + minDistanceFound(-1), + maxDistanceFound(-1), + minDistanceBuffered(-1), + maxDistanceBuffered(-1) { } - // Type of GeoBrowse (box, circle, ...) - std::string type; + long long numResultsFound; + long long numResultsBuffered; - // Field name in 2d index. - std::string field; - - // Geo hash converter parameters. - // Used to construct a geo hash converter to generate - // explain-style index bounds from geo hashes. - GeoHashConverter::Parameters converterParams; - - // Geo hashes generated by GeoBrowse::fillStack. - // Raw data for explain index bounds. - std::vector<GeoHash> expPrefixes; - - BSONObj keyPattern; + double minDistanceFound; + double maxDistanceFound; + double minDistanceBuffered; + double maxDistanceBuffered; }; - struct TwoDNearStats : public SpecificStats { - TwoDNearStats() : objectsLoaded(0), nscanned(0) { } + class NearStats : public SpecificStats { + public: + + NearStats() {} virtual SpecificStats* clone() const { - TwoDNearStats* specific = new TwoDNearStats(*this); - return specific; + return new NearStats(*this); } - size_t objectsLoaded; - - // Since 2d's near does all its work in one go we can't divine the real number of - // keys scanned from anything else. - size_t nscanned; + long long totalResultsFound() { + long long totalResultsFound = 0; + for (vector<IntervalStats>::iterator it = intervalStats.begin(); + it != intervalStats.end(); ++it) { + totalResultsFound += it->numResultsFound; + } + return totalResultsFound; + } + vector<IntervalStats> intervalStats; BSONObj keyPattern; }; diff --git a/src/mongo/db/exec/s2near.cpp b/src/mongo/db/exec/s2near.cpp deleted file mode 100644 index ca4396e5648..00000000000 --- a/src/mongo/db/exec/s2near.cpp +++ /dev/null @@ -1,453 +0,0 @@ -/** - * Copyright (C) 2013-2014 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/exec/s2near.h" - -#include "mongo/db/client.h" -#include "mongo/db/catalog/database.h" -#include "mongo/db/exec/fetch.h" -#include "mongo/db/exec/index_scan.h" -#include "mongo/db/exec/working_set_common.h" -#include "mongo/db/exec/working_set_computed_data.h" -#include "mongo/db/geo/geoconstants.h" -#include "mongo/db/index/expression_index.h" -#include "third_party/s2/s2regionintersection.h" - -namespace mongo { - - // static - const char* S2NearStage::kStageType = "GEO_NEAR_2DSPHERE"; - - S2NearStage::S2NearStage(OperationContext* txn, const S2NearParams& params, WorkingSet* ws) - : _txn(txn), _commonStats(kStageType) { - _initted = false; - _params = params; - _ws = ws; - _worked = false; - _failed = false; - _specificStats.keyPattern = _params.indexKeyPattern; - } - - void S2NearStage::init() { - _initted = true; - - // The field we're near-ing from is the n-th field. Figure out what that 'n' is. We - // put the cover for the search annulus in this spot in the bounds. - _nearFieldIndex = 0; - BSONObjIterator specIt(_params.indexKeyPattern); - while (specIt.more()) { - if (specIt.next().fieldName() == _params.nearQuery.field) { - break; - } - ++_nearFieldIndex; - } - - verify(_nearFieldIndex < _params.indexKeyPattern.nFields()); - - // FLAT implies the input distances are in radians. Convert to meters. - if (FLAT == _params.nearQuery.centroid.crs) { - _params.nearQuery.minDistance *= kRadiusOfEarthInMeters; - _params.nearQuery.maxDistance *= kRadiusOfEarthInMeters; - } - - // Make sure distances are sane. Possibly redundant given the checking during parsing. - _minDistance = max(0.0, _params.nearQuery.minDistance); - _maxDistance = min(M_PI * kRadiusOfEarthInMeters, _params.nearQuery.maxDistance); - _minDistance = min(_minDistance, _maxDistance); - - // We grow _outerRadius in nextAnnulus() below. - _innerRadius = _outerRadius = _minDistance; - _outerRadiusInclusive = false; - - // Grab the IndexDescriptor. - if ( !_params.collection ) { - _failed = true; - return; - } - - _descriptor = - _params.collection->getIndexCatalog()->findIndexByKeyPattern(_params.indexKeyPattern); - if (NULL == _descriptor) { - _failed = true; - return; - } - - // The user can override this so we honor it. We could ignore it though -- it's just used - // to set _radiusIncrement, not to do any covering. - int finestIndexedLevel; - BSONElement fl = _descriptor->infoObj()["finestIndexedLevel"]; - if (fl.isNumber()) { - finestIndexedLevel = fl.numberInt(); - } - else { - finestIndexedLevel = S2::kAvgEdge.GetClosestLevel(500.0 / kRadiusOfEarthInMeters); - } - - // Start with a conservative _radiusIncrement. When we're done searching a shell we - // increment the two radii by this. - _radiusIncrement = 5 * S2::kAvgEdge.GetValue(finestIndexedLevel) * kRadiusOfEarthInMeters; - } - - S2NearStage::~S2NearStage() { - // _annulus temporarily takes ownership of some member variables. - // Release them to avoid double-deleting _innerCap and _outerCap. - _annulus.Release(NULL); - } - - PlanStage::StageState S2NearStage::work(WorkingSetID* out) { - // Adds the amount of time taken by work() to executionTimeMillis. - ScopedTimer timer(&_commonStats.executionTimeMillis); - - if (!_initted) { init(); } - - if (_failed) { - mongoutils::str::stream ss; - ss << "unable to load geo index " << _params.indexKeyPattern; - Status status(ErrorCodes::IndexNotFound, ss); - *out = WorkingSetCommon::allocateStatusMember( _ws, status); - return PlanStage::FAILURE; - } - if (isEOF()) { return PlanStage::IS_EOF; } - ++_commonStats.works; - - // If we haven't opened up our very first ixscan+fetch children, do it. This is kind of - // heavy so we don't want to do it in the ctor. - if (!_worked) { - nextAnnulus(); - _worked = true; - } - - // If we're still reading results from the child, do that. - if (NULL != _child.get()) { - return addResultToQueue(out); - } - - // Not reading results. Perhaps we're returning buffered results. - if (!_results.empty()) { - Result result = _results.top(); - _results.pop(); - *out = result.id; - - // Remove from invalidation map. - WorkingSetMember* member = _ws->get(*out); - if (member->hasLoc()) { - unordered_map<DiskLoc, WorkingSetID, DiskLoc::Hasher>::iterator it - = _invalidationMap.find(member->loc); - verify(_invalidationMap.end() != it); - _invalidationMap.erase(it); - } - - ++_commonStats.advanced; - return PlanStage::ADVANCED; - } - - // Not EOF, not reading results, not returning any buffered results. Look in the next shell - // for results. - nextAnnulus(); - return PlanStage::NEED_TIME; - } - - /** - * A MatchExpression for seeing if an S2Cell-in-a-key is within an annulus. - */ - class GeoS2KeyMatchExpression : public MatchExpression { - public: - /** - * 'annulus' must outlive 'this'. - */ - GeoS2KeyMatchExpression(S2RegionIntersection* annulus, - StringData nearFieldPath) - : MatchExpression(INTERNAL_GEO_S2_KEYCHECK), - _annulus(annulus) { - - _elementPath.init(nearFieldPath); - } - - virtual ~GeoS2KeyMatchExpression(){} - - virtual bool matches(const MatchableDocument* doc, MatchDetails* details = 0) const { - MatchableDocument::IteratorHolder cursor(doc, &_elementPath); - - while (cursor->more()) { - ElementIterator::Context e = cursor->next(); - if (matchesSingleElement(e.element())) { - return true; - } - } - - return false; - } - - virtual bool matchesSingleElement(const BSONElement& e) const { - // Something has gone terribly wrong if this doesn't hold. - invariant(String == e.type()); - S2Cell keyCell = S2Cell(S2CellId::FromString(e.str())); - return _annulus->MayIntersect(keyCell); - } - - // - // These won't be called. - // - - virtual void debugString( StringBuilder& debug, int level = 0 ) const { - } - - virtual void toBSON(BSONObjBuilder* out) const { - } - - virtual bool equivalent( const MatchExpression* other ) const { - return false; - } - - virtual MatchExpression* shallowClone() const { - return NULL; - } - - private: - // Not owned here. - S2RegionIntersection* _annulus; - - ElementPath _elementPath; - }; - - void S2NearStage::nextAnnulus() { - // Step 1: Grow the annulus. - _innerRadius = _outerRadius; - _outerRadius += _radiusIncrement; - if (_outerRadius >= _maxDistance) { - _outerRadius = _maxDistance; - _outerRadiusInclusive = true; - } - verify(_innerRadius <= _outerRadius); - - // We might have just grown our radius beyond anything reasonable. - if (isEOF()) { return; } - - // Step 2: Fill out bounds for the ixscan we use. - _innerCap = S2Cap::FromAxisAngle(_params.nearQuery.centroid.point, - S1Angle::Radians(_innerRadius / kRadiusOfEarthInMeters)); - _outerCap = S2Cap::FromAxisAngle(_params.nearQuery.centroid.point, - S1Angle::Radians(_outerRadius / kRadiusOfEarthInMeters)); - _innerCap = _innerCap.Complement(); - - vector<S2Region*> regions; - regions.push_back(&_innerCap); - regions.push_back(&_outerCap); - - _annulus.Release(NULL); - _annulus.Init(®ions); - - // Step 3: Actually create the ixscan. - - IndexScanParams params; - params.descriptor = _descriptor; - _params.baseBounds.fields[_nearFieldIndex].intervals.clear(); - ExpressionMapping::cover2dsphere(_annulus, - params.descriptor->infoObj(), - &_params.baseBounds.fields[_nearFieldIndex]); - - params.bounds = _params.baseBounds; - params.direction = 1; - // We use a filter on the key. The filter rejects keys that don't intersect with the - // annulus. An object that is in the annulus might have a key that's not in it and a key - // that's in it. As such we can't just look at one key per object. - // - // This does force us to do our own deduping of results, though. - params.doNotDedup = true; - - // Owns geo filter. - _keyGeoFilter.reset(new GeoS2KeyMatchExpression( - &_annulus, _params.baseBounds.fields[_nearFieldIndex].name)); - IndexScan* scan = new IndexScan(_txn, params, _ws, _keyGeoFilter.get()); - - // Owns 'scan'. - _child.reset(new FetchStage(_ws, scan, _params.filter, _params.collection)); - _seenInScan.clear(); - } - - PlanStage::StageState S2NearStage::addResultToQueue(WorkingSetID* out) { - PlanStage::StageState state = _child->work(out); - - // All done reading from _child. - if (PlanStage::IS_EOF == state) { - _child.reset(); - _keyGeoFilter.reset(); - - // Adjust the annulus size depending on how many results we got. - if (_results.empty()) { - _radiusIncrement *= 2; - } else if (_results.size() < 300) { - _radiusIncrement *= 2; - } else if (_results.size() > 600) { - _radiusIncrement /= 2; - } - - // Make a new ixscan next time. - return PlanStage::NEED_TIME; - } - - // Nothing to do unless we advance. - if (PlanStage::ADVANCED != state) { return state; } - - WorkingSetMember* member = _ws->get(*out); - // Must have an object in order to get geometry out of it. - verify(member->hasObj()); - - // The scans we use don't dedup so we must dedup them ourselves. We only put locs into here - // if we know for sure whether or not we'll return them in this annulus. - if (member->hasLoc()) { - if (_seenInScan.end() != _seenInScan.find(member->loc)) { - return PlanStage::NEED_TIME; - } - } - - // Get all the fields with that name from the document. - BSONElementSet geom; - member->obj.getFieldsDotted(_params.nearQuery.field, geom, false); - if (geom.empty()) { - return PlanStage::NEED_TIME; - } - - // Some value that any distance we can calculate will be less than. - double minDistance = numeric_limits<double>::max(); - BSONObj minDistanceObj; - for (BSONElementSet::iterator git = geom.begin(); git != geom.end(); ++git) { - if (!git->isABSONObj()) { - mongoutils::str::stream ss; - ss << "s2near stage read invalid geometry element " << *git << " from child"; - Status status(ErrorCodes::InternalError, ss); - *out = WorkingSetCommon::allocateStatusMember( _ws, status); - return PlanStage::FAILURE; - } - BSONObj obj = git->Obj(); - - double distToObj; - if (S2SearchUtil::distanceBetween(_params.nearQuery.centroid.point, obj, &distToObj)) { - if (distToObj < minDistance) { - minDistance = distToObj; - minDistanceObj = obj; - } - } - else { - warning() << "unknown geometry: " << obj.toString(); - } - } - - // If we're here we'll either include the doc in this annulus or reject it. It's safe to - // ignore it if it pops up again in this annulus. - if (member->hasLoc()) { - _seenInScan.insert(member->loc); - } - - // If the distance to the doc satisfies our distance criteria, add it to our buffered - // results. - if (minDistance >= _innerRadius && - (_outerRadiusInclusive ? minDistance <= _outerRadius : minDistance < _outerRadius)) { - _results.push(Result(*out, minDistance)); - if (_params.addDistMeta) { - // FLAT implies the output distances are in radians. Convert to meters. - if (FLAT == _params.nearQuery.centroid.crs) { - member->addComputed(new GeoDistanceComputedData(minDistance - / kRadiusOfEarthInMeters)); - } - else { - member->addComputed(new GeoDistanceComputedData(minDistance)); - } - } - if (_params.addPointMeta) { - member->addComputed(new GeoNearPointComputedData(minDistanceObj)); - } - if (member->hasLoc()) { - _invalidationMap[member->loc] = *out; - } - } - - return PlanStage::NEED_TIME; - } - - void S2NearStage::prepareToYield() { - if (NULL != _child.get()) { - _child->prepareToYield(); - } - } - - void S2NearStage::recoverFromYield() { - if (NULL != _child.get()) { - _child->recoverFromYield(); - } - } - - void S2NearStage::invalidate(const DiskLoc& dl, InvalidationType type) { - if (NULL != _child.get()) { - _child->invalidate(dl, type); - } - - // _results is a queue of results that we will return for the current shell we're on. - // If a result is in _results and has a DiskLoc it will be in _invalidationMap as well. - // It's safe to return the result w/o the DiskLoc. - unordered_map<DiskLoc, WorkingSetID, DiskLoc::Hasher>::iterator it - = _invalidationMap.find(dl); - - if (it != _invalidationMap.end()) { - WorkingSetMember* member = _ws->get(it->second); - verify(member->hasLoc()); - WorkingSetCommon::fetchAndInvalidateLoc(member, _params.collection); - verify(!member->hasLoc()); - // Don't keep it around in the invalidation map since there's no valid DiskLoc anymore. - _invalidationMap.erase(it); - } - } - - bool S2NearStage::isEOF() { - if (!_worked) { return false; } - if (_failed) { return true; } - // We're only done if we exhaust the search space. - return _innerRadius >= _maxDistance; - } - - vector<PlanStage*> S2NearStage::getChildren() const { - vector<PlanStage*> empty; - return empty; - } - - PlanStageStats* S2NearStage::getStats() { - // TODO: must agg stats across child ixscan/fetches. - // TODO: we can do better than this, need own common stats. - _commonStats.isEOF = isEOF(); - return new PlanStageStats(_commonStats, STAGE_GEO_NEAR_2DSPHERE); - } - - const CommonStats* S2NearStage::getCommonStats() { - return &_commonStats; - } - - const SpecificStats* S2NearStage::getSpecificStats() { - return &_specificStats; - } - -} // namespace mongo diff --git a/src/mongo/db/exec/s2near.h b/src/mongo/db/exec/s2near.h deleted file mode 100644 index 298f3d9a7b5..00000000000 --- a/src/mongo/db/exec/s2near.h +++ /dev/null @@ -1,175 +0,0 @@ -/** - * Copyright (C) 2013-2014 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include <queue> - -#include "mongo/db/exec/plan_stage.h" -#include "mongo/db/geo/geoquery.h" -#include "mongo/db/geo/s2common.h" -#include "mongo/db/index/index_descriptor.h" -#include "mongo/db/jsobj.h" -#include "mongo/db/matcher/expression.h" -#include "mongo/db/query/index_bounds.h" -#include "mongo/platform/unordered_set.h" -#include "third_party/s2/s2cap.h" -#include "third_party/s2/s2regionintersection.h" - -namespace mongo { - - class OperationContext; - - struct S2NearParams { - S2NearParams() : collection(NULL) { } - Collection* collection; - BSONObj indexKeyPattern; - NearQuery nearQuery; - IndexBounds baseBounds; - MatchExpression* filter; - bool addPointMeta; - bool addDistMeta; - }; - - /** - * Executes a geoNear search. Is a leaf node. Output type is LOC_AND_UNOWNED_OBJ. - */ - class S2NearStage : public PlanStage { - public: - /** - * Takes: index to scan over, MatchExpression with near point, other MatchExpressions for - * covered data, - */ - S2NearStage(OperationContext* txn, const S2NearParams& params, WorkingSet* ws); - - virtual ~S2NearStage(); - - StageState work(WorkingSetID* out); - bool isEOF(); - - void prepareToYield(); - void recoverFromYield(); - void invalidate(const DiskLoc& dl, InvalidationType type); - - virtual std::vector<PlanStage*> getChildren() const; - - virtual StageType stageType() const { return STAGE_GEO_NEAR_2DSPHERE; } - - PlanStageStats* getStats(); - - virtual const CommonStats* getCommonStats(); - - virtual const SpecificStats* getSpecificStats(); - - static const char* kStageType; - - private: - void init(); - StageState addResultToQueue(WorkingSetID* out); - void nextAnnulus(); - - // transactional context for read locks. Not owned by us - OperationContext* _txn; - - bool _worked; - - S2NearParams _params; - - WorkingSet* _ws; - - // This is the "array index" of the key field that is the near field. We use this to do - // cheap is-this-doc-in-the-annulus testing. We also need to know where to stuff the index - // bounds for the various annuluses/annuli. - int _nearFieldIndex; - - // Geo filter in index scan (which is owned by fetch stage in _child). - scoped_ptr<MatchExpression> _keyGeoFilter; - - scoped_ptr<PlanStage> _child; - - // The S2 machinery that represents the search annulus. We keep this around after bounds - // generation to check for intersection. - S2Cap _innerCap; - S2Cap _outerCap; - S2RegionIntersection _annulus; - - // We use this to hold on to the results in an annulus. Results are sorted to have - // decreasing distance. - struct Result { - Result(WorkingSetID wsid, double dist) : id(wsid), distance(dist) { } - - bool operator<(const Result& other) const { - // We want increasing distance, not decreasing, so we reverse the <. - return distance > other.distance; - } - - WorkingSetID id; - double distance; - }; - - // Our index scans aren't deduped so we might see the same doc twice in a given - // annulus. - unordered_set<DiskLoc, DiskLoc::Hasher> _seenInScan; - - // We compute an annulus of results and cache it here. - priority_queue<Result> _results; - - // For fast invalidation. Perhaps not worth it. - unordered_map<DiskLoc, WorkingSetID, DiskLoc::Hasher> _invalidationMap; - - // Geo-related variables. - // At what min distance (arc length) do we start looking for results? - double _minDistance; - // What's the max distance (arc length) we're willing to look for results? - double _maxDistance; - - // These radii define the annulus we're currently looking at. - double _innerRadius; - double _outerRadius; - - // True if we are looking at last annulus - bool _outerRadiusInclusive; - - // When we search the next annulus, what to adjust our radius by? Grows when we search an - // annulus and find no results. - double _radiusIncrement; - - // Did we encounter an unrecoverable error? - bool _failed; - - // Have we init()'d yet? - bool _initted; - - // What index are we searching over? - IndexDescriptor* _descriptor; - - CommonStats _commonStats; - S2NearStats _specificStats; - }; - -} // namespace mongo diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp index bd228f6e603..a3929d0b620 100644 --- a/src/mongo/db/exec/working_set_common.cpp +++ b/src/mongo/db/exec/working_set_common.cpp @@ -66,18 +66,23 @@ namespace mongo { } // static - WorkingSetID WorkingSetCommon::allocateStatusMember(WorkingSet* ws, const Status& status) { - invariant(ws); - + BSONObj WorkingSetCommon::buildMemberStatusObject(const Status& status) { BSONObjBuilder bob; bob.append("ok", status.isOK() ? 1.0 : 0.0); bob.append("code", status.code()); bob.append("errmsg", status.reason()); + return bob.obj(); + } + + // static + WorkingSetID WorkingSetCommon::allocateStatusMember(WorkingSet* ws, const Status& status) { + invariant(ws); + WorkingSetID wsid = ws->allocate(); WorkingSetMember* member = ws->get(wsid); member->state = WorkingSetMember::OWNED_OBJ; - member->obj = bob.obj(); + member->obj = buildMemberStatusObject(status); return wsid; } @@ -111,6 +116,19 @@ namespace mongo { } // static + Status WorkingSetCommon::getMemberObjectStatus(const BSONObj& memberObj) { + invariant(WorkingSetCommon::isValidStatusMemberObject(memberObj)); + return Status(static_cast<ErrorCodes::Error>(memberObj["code"].numberInt()), + memberObj["errMsg"]); + } + + // static + Status WorkingSetCommon::getMemberStatus(const WorkingSetMember& member) { + invariant(member.hasObj()); + return getMemberObjectStatus(member.obj); + } + + // static std::string WorkingSetCommon::toStatusString(const BSONObj& obj) { if (!isValidStatusMemberObject(obj)) { Status unknownStatus(ErrorCodes::UnknownError, "no details available"); diff --git a/src/mongo/db/exec/working_set_common.h b/src/mongo/db/exec/working_set_common.h index 2fe0ab97e6d..2bd177edf86 100644 --- a/src/mongo/db/exec/working_set_common.h +++ b/src/mongo/db/exec/working_set_common.h @@ -46,6 +46,10 @@ namespace mongo { */ static void initFrom(WorkingSetMember* dest, const WorkingSetMember& src); + /** + * Build a BSONObj which represents a Status to return in a WorkingSet. + */ + static BSONObj buildMemberStatusObject(const Status& status); /** * Allocate a new WSM and initialize it with @@ -74,6 +78,18 @@ namespace mongo { BSONObj* objOut); /** + * Returns status from working set member object. + * Assumes isValidStatusMemberObject(). + */ + static Status getMemberObjectStatus(const BSONObj& memberObj); + + /** + * Returns status from working set member created with allocateStatusMember(). + * Assumes isValidStatusMemberObject(). + */ + static Status getMemberStatus(const WorkingSetMember& member); + + /** * Formats working set member object created with allocateStatusMember(). */ static std::string toStatusString(const BSONObj& obj); diff --git a/src/mongo/db/geo/core.h b/src/mongo/db/geo/core.h deleted file mode 100644 index 9ca48d86dd0..00000000000 --- a/src/mongo/db/geo/core.h +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Copyright (C) 2013 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include <cmath> - -#ifndef M_PI -# define M_PI 3.14159265358979323846 -#endif - -namespace mongo { - - inline double deg2rad(const double deg) { return deg * (M_PI / 180.0); } - - inline double rad2deg(const double rad) { return rad * (180.0 / M_PI); } - - inline double computeXScanDistance(double y, double maxDistDegrees) { - // TODO: this overestimates for large maxDistDegrees far from the equator - return maxDistDegrees / std::min(cos(deg2rad(std::min(+89.0, y + maxDistDegrees))), - cos(deg2rad(std::max(-89.0, y - maxDistDegrees)))); - } - -} diff --git a/src/mongo/db/geo/geoparser.cpp b/src/mongo/db/geo/geoparser.cpp index bcc6ca177dc..a0e956d5315 100644 --- a/src/mongo/db/geo/geoparser.cpp +++ b/src/mongo/db/geo/geoparser.cpp @@ -77,7 +77,7 @@ namespace mongo { return isValidLngLat(lng, lat); } - static bool isLegacyPoint(const BSONObj &obj) { + static bool isLegacyPoint(const BSONObj &obj, bool allowAddlFields) { BSONObjIterator it(obj); if (!it.more()) { return false; } BSONElement x = it.next(); @@ -85,7 +85,7 @@ namespace mongo { if (!it.more()) { return false; } BSONElement y = it.next(); if (!y.isNumber()) { return false; } - if (it.more()) { return false; } + if (it.more() && !allowAddlFields) { return false; } return true; } @@ -269,7 +269,7 @@ namespace mongo { while (coordIt.more()) { BSONElement coord = coordIt.next(); if (!coord.isABSONObj()) { return false; } - if (!isLegacyPoint(coord.Obj())) { return false; } + if (!isLegacyPoint(coord.Obj(), false)) { return false; } ++vertices; } if (vertices < 3) { return false; } @@ -285,7 +285,7 @@ namespace mongo { BSONObjIterator objIt(type.embeddedObject()); BSONElement center = objIt.next(); if (!center.isABSONObj()) { return false; } - if (!isLegacyPoint(center.Obj())) { return false; } + if (!isLegacyPoint(center.Obj(), false)) { return false; } if (!objIt.more()) { return false; } BSONElement radius = objIt.next(); if (!radius.isNumber()) { return false; } @@ -301,7 +301,7 @@ namespace mongo { BSONObjIterator objIt(type.embeddedObject()); BSONElement center = objIt.next(); if (!center.isABSONObj()) { return false; } - if (!isLegacyPoint(center.Obj())) { return false; } + if (!isLegacyPoint(center.Obj(), false)) { return false; } // Check to make sure the points are valid lng/lat. BSONObjIterator coordIt(center.Obj()); BSONElement lng = coordIt.next(); @@ -316,7 +316,11 @@ namespace mongo { /** exported **/ bool GeoParser::isPoint(const BSONObj &obj) { - return isGeoJSONPoint(obj) || isLegacyPoint(obj); + return isGeoJSONPoint(obj) || isLegacyPoint(obj, false); + } + + bool GeoParser::isIndexablePoint(const BSONObj &obj) { + return isGeoJSONPoint(obj) || isLegacyPoint(obj, true); } bool GeoParser::parsePoint(const BSONObj &obj, PointWithCRS *out) { @@ -327,7 +331,7 @@ namespace mongo { out->oldPoint.x = coords[0].Number(); out->oldPoint.y = coords[1].Number(); out->crs = SPHERE; - } else if (isLegacyPoint(obj)) { + } else if (isLegacyPoint(obj, true)) { BSONObjIterator it(obj); BSONElement x = it.next(); BSONElement y = it.next(); @@ -379,11 +383,11 @@ namespace mongo { BSONObjIterator coordIt(type.embeddedObject()); BSONElement minE = coordIt.next(); if (!minE.isABSONObj()) { return false; } - if (!isLegacyPoint(minE.Obj())) { return false; } + if (!isLegacyPoint(minE.Obj(), false)) { return false; } if (!coordIt.more()) { return false; } BSONElement maxE = coordIt.next(); if (!maxE.isABSONObj()) { return false; } - if (!isLegacyPoint(maxE.Obj())) { return false; } + if (!isLegacyPoint(maxE.Obj(), false)) { return false; } // XXX: VERIFY AREA >= 0 return true; } diff --git a/src/mongo/db/geo/geoparser.h b/src/mongo/db/geo/geoparser.h index a133f8a3989..e8222bcce74 100644 --- a/src/mongo/db/geo/geoparser.h +++ b/src/mongo/db/geo/geoparser.h @@ -43,7 +43,10 @@ namespace mongo { // encounter invalid geometry and true if the geometry is parsed successfully. class GeoParser { public: + static bool isPoint(const BSONObj &obj); + // Legacy points can contain extra data as extra fields - these are valid to index + static bool isIndexablePoint(const BSONObj& obj); static bool parsePoint(const BSONObj &obj, PointWithCRS *out); static bool isLine(const BSONObj &obj); diff --git a/src/mongo/db/geo/geoquery.cpp b/src/mongo/db/geo/geoquery.cpp index 6385fa4f288..c393951964b 100644 --- a/src/mongo/db/geo/geoquery.cpp +++ b/src/mongo/db/geo/geoquery.cpp @@ -29,6 +29,7 @@ #include "mongo/db/geo/geoquery.h" #include "mongo/db/geo/geoconstants.h" +#include "mongo/db/geo/s2common.h" namespace mongo { @@ -40,6 +41,7 @@ namespace mongo { // First, try legacy near, e.g.: // t.find({ loc : { $nearSphere: [0,0], $minDistance: 1, $maxDistance: 3 }}) // t.find({ loc : { $nearSphere: [0,0] }}) + // t.find({ loc : { $near : [0, 0, 1] } }); // t.find({ loc : { $near: { someGeoJSONPoint}}) // t.find({ loc : { $geoNear: { someGeoJSONPoint}}) BSONObjIterator it(obj); @@ -342,13 +344,11 @@ namespace mongo { bool GeometryContainer::R2BoxRegion::fastContains(const Box& other) const { // TODO: Add more cases here to make coverings better - if (_geometry->_box && FLAT == _geometry->_box->crs) { const Box& box = _geometry->_box->box; - return box.contains(other); - } - - if ( _geometry->_cap && FLAT == _geometry->_cap->crs ) { + if (box.contains(other)) + return true; + } else if (_geometry->_cap && FLAT == _geometry->_cap->crs) { const Circle& circle = _geometry->_cap->circle; // Exact test return circleContainsBox(circle, other); @@ -369,18 +369,6 @@ namespace mongo { if (!_bounds.intersects(other)) return true; - if (_geometry->_cap && FLAT == _geometry->_cap->crs) { - const Circle& circle = _geometry->_cap->circle; - // Exact test - return !circleIntersectsWithBox(circle, other); - } - - if (_geometry->_polygon && FLAT == _geometry->_polygon->crs) { - const Polygon& polygon = _geometry->_polygon->oldPolygon; - // Exact test - return !polygonIntersectsWithBox(polygon, other); - } - // Not sure return false; } @@ -1025,7 +1013,7 @@ namespace mongo { // We can't really pass these things around willy-nilly except by ptr. _polygon.reset(new PolygonWithCRS()); if (!GeoParser::parsePolygon(obj, _polygon.get())) { return false; } - } else if (GeoParser::isPoint(obj)) { + } else if (GeoParser::isIndexablePoint(obj)) { _point.reset(new PointWithCRS()); if (!GeoParser::parsePoint(obj, _point.get())) { return false; } } else if (GeoParser::isLine(obj)) { @@ -1138,6 +1126,211 @@ namespace mongo { } } + bool GeometryContainer::supportsProject(CRS otherCRS) const { + + // TODO: Fix geometry collection reporting when/if we support more CRSes + + if (NULL != _point) { + if (_point->crs == otherCRS) return true; + // SPHERE can always go FLAT, but FLAT may not always go back to SPHERE + return _point->crs == SPHERE || _point->flatUpgradedToSphere; + } + else if (NULL != _line) { return _line->crs == otherCRS; } + else if (NULL != _box) { return _box->crs == otherCRS; } + else if (NULL != _polygon) { return _polygon->crs == otherCRS; } + else if (NULL != _cap ) { return _cap->crs == otherCRS; } + else if (NULL != _multiPoint) { return _multiPoint->crs == otherCRS; } + else if (NULL != _multiLine) { return _multiLine->crs == otherCRS; } + else if (NULL != _multiPolygon) { return _multiPolygon->crs == otherCRS; } + else if (NULL != _geometryCollection) { return SPHERE == otherCRS; } + else { + invariant(false); + return false; + } + } + + void GeometryContainer::projectInto(CRS otherCRS) { + + if (otherCRS == getNativeCRS()) + return; + + invariant(NULL != _point); + + if (FLAT == _point->crs) { + invariant(_point->flatUpgradedToSphere); + _point->crs = SPHERE; + } + else { + invariant(FLAT == otherCRS); + S2LatLng latLng(_point->point); + _point->oldPoint = Point(latLng.lng().degrees(), latLng.lat().degrees()); + _point->flatUpgradedToSphere = true; + _point->crs = FLAT; + } + } + + static double s2MinDistanceRad(const S2Point& s2Point, const MultiPointWithCRS& s2MultiPoint) { + + double minDistance = -1; + for (vector<S2Point>::const_iterator it = s2MultiPoint.points.begin(); + it != s2MultiPoint.points.end(); ++it) { + + double nextDistance = S2Distance::distanceRad(s2Point, *it); + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + return minDistance; + } + + static double s2MinDistanceRad(const S2Point& s2Point, const MultiLineWithCRS& s2MultiLine) { + + double minDistance = -1; + for (vector<S2Polyline*>::const_iterator it = s2MultiLine.lines.vector().begin(); + it != s2MultiLine.lines.vector().end(); ++it) { + + double nextDistance = S2Distance::minDistanceRad(s2Point, **it); + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + return minDistance; + } + + static double s2MinDistanceRad(const S2Point& s2Point, const MultiPolygonWithCRS& s2MultiPolygon) { + + double minDistance = -1; + for (vector<S2Polygon*>::const_iterator it = s2MultiPolygon.polygons.vector().begin(); + it != s2MultiPolygon.polygons.vector().end(); ++it) { + + double nextDistance = S2Distance::minDistanceRad(s2Point, **it); + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + return minDistance; + } + + static double s2MinDistanceRad(const S2Point& s2Point, + const GeometryCollection& geometryCollection) { + + double minDistance = -1; + for (vector<PointWithCRS>::const_iterator it = geometryCollection.points.begin(); + it != geometryCollection.points.end(); ++it) { + + invariant(SPHERE == it->crs); + double nextDistance = S2Distance::distanceRad(s2Point, it->point); + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + for (vector<LineWithCRS*>::const_iterator it = geometryCollection.lines.vector().begin(); + it != geometryCollection.lines.vector().end(); ++it) { + + invariant(SPHERE == (*it)->crs); + double nextDistance = S2Distance::minDistanceRad(s2Point, (*it)->line); + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + for (vector<PolygonWithCRS*>::const_iterator it = geometryCollection.polygons.vector().begin(); + it != geometryCollection.polygons.vector().end(); ++it) { + + invariant(SPHERE == (*it)->crs); + double nextDistance = S2Distance::minDistanceRad(s2Point, (*it)->polygon); + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + for (vector<MultiPointWithCRS*>::const_iterator it = geometryCollection.multiPoints.vector() + .begin(); it != geometryCollection.multiPoints.vector().end(); ++it) { + + double nextDistance = s2MinDistanceRad(s2Point, **it); + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + for (vector<MultiLineWithCRS*>::const_iterator it = geometryCollection.multiLines.vector() + .begin(); it != geometryCollection.multiLines.vector().end(); ++it) { + + double nextDistance = s2MinDistanceRad(s2Point, **it); + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + for (vector<MultiPolygonWithCRS*>::const_iterator it = geometryCollection.multiPolygons + .vector().begin(); it != geometryCollection.multiPolygons.vector().end(); ++it) { + + double nextDistance = s2MinDistanceRad(s2Point, **it); + if (minDistance < 0 || nextDistance < minDistance) { + minDistance = nextDistance; + } + } + + return minDistance; + } + + double GeometryContainer::minDistance(const PointWithCRS& otherPoint) const { + + const CRS crs = getNativeCRS(); + + if (FLAT == crs) { + + invariant(NULL != _point); + + if (FLAT == otherPoint.crs) { + return distance(_point->oldPoint, otherPoint.oldPoint); + } + else { + S2LatLng latLng(otherPoint.point); + return distance(_point->oldPoint, + Point(latLng.lng().degrees(), latLng.lat().degrees())); + } + } + else { + invariant(SPHERE == crs); + invariant(FLAT != otherPoint.crs || otherPoint.flatUpgradedToSphere); + + double minDistance = -1; + + if (NULL != _point) { + minDistance = S2Distance::distanceRad(otherPoint.point, _point->point); + } + else if (NULL != _line) { + minDistance = S2Distance::minDistanceRad(otherPoint.point, _line->line); + } + else if (NULL != _polygon) { + minDistance = S2Distance::minDistanceRad(otherPoint.point, _polygon->polygon); + } + else if (NULL != _cap) { + minDistance = S2Distance::minDistanceRad(otherPoint.point, _cap->cap); + } + else if (NULL != _multiPoint) { + minDistance = s2MinDistanceRad(otherPoint.point, *_multiPoint); + } + else if (NULL != _multiLine) { + minDistance = s2MinDistanceRad(otherPoint.point, *_multiLine); + } + else if (NULL != _multiPolygon) { + minDistance = s2MinDistanceRad(otherPoint.point, *_multiPolygon); + } + else if (NULL != _geometryCollection) { + minDistance = s2MinDistanceRad(otherPoint.point, *_geometryCollection); + } + + invariant(minDistance != -1); + return minDistance * kRadiusOfEarthInMeters; + } + } + const CapWithCRS* GeometryContainer::getCapGeometryHack() const { return _cap.get(); } diff --git a/src/mongo/db/geo/geoquery.h b/src/mongo/db/geo/geoquery.h index 16ed8da11b2..f8085b2db52 100644 --- a/src/mongo/db/geo/geoquery.h +++ b/src/mongo/db/geo/geoquery.h @@ -41,10 +41,13 @@ namespace mongo { public: /** - * Creates an empty geometry container which may then only be loaded via parseFrom(). + * Creates an empty geometry container which may then be loaded from BSON or directly. */ GeometryContainer(); + /** + * Loads an empty GeometryContainer from BSON + */ bool parseFrom(const BSONObj &obj); /** @@ -53,11 +56,32 @@ namespace mongo { bool isSimpleContainer() const; /** - * To check intersection, we iterate over the otherContainer's geometries, checking each - * geometry to see if we intersect it. If we intersect one geometry, we intersect the - * entire other container. + * Reports the CRS of the contained geometry. + * TODO: Rework once we have collections of multiple CRSes */ - bool intersects(const GeometryContainer& otherContainer) const; + CRS getNativeCRS() const; + + /** + * Whether or not this geometry can be projected into a particular CRS + */ + bool supportsProject(CRS crs) const; + + /** + * Projects the current geometry into the supplied crs. + * It is an error to call this function if canProjectInto(crs) is false. + */ + void projectInto(CRS crs); + + /** + * Minimum distance between this geometry and the supplied point. + * TODO: Rework and generalize to full GeometryContainer distance + */ + double minDistance(const PointWithCRS& point) const; + + /** + * Only polygons (and aggregate types thereof) support contains. + */ + bool supportsContains() const; /** * To check containment, we iterate over the otherContainer's geometries. If we don't @@ -68,9 +92,11 @@ namespace mongo { bool contains(const GeometryContainer& otherContainer) const; /** - * Only polygons (and aggregate types thereof) support contains. + * To check intersection, we iterate over the otherContainer's geometries, checking each + * geometry to see if we intersect it. If we intersect one geometry, we intersect the + * entire other container. */ - bool supportsContains() const; + bool intersects(const GeometryContainer& otherContainer) const; // Region which can be used to generate a covering of the query object in the S2 space. bool hasS2Region() const; @@ -80,9 +106,6 @@ namespace mongo { bool hasR2Region() const; const R2Region& getR2Region() const; - // Reports as best we can the CRS closest to that of the contained geometry - CRS getNativeCRS() const; - // Returns a string related to the type of the geometry (for debugging queries) std::string getDebugType() const; @@ -145,6 +168,18 @@ namespace mongo { Status parseFrom(const BSONObj &obj); + CRS getQueryCRS() const { + return isNearSphere ? SPHERE : centroid.crs; + } + + bool unitsAreRadians() const { + return isNearSphere && FLAT == centroid.crs; + } + + bool isWrappingQuery() const { + return SPHERE == centroid.crs && !isNearSphere; + } + // The name of the field that contains the geometry. std::string field; @@ -152,9 +187,7 @@ namespace mongo { PointWithCRS centroid; // Min and max distance from centroid that we're willing to search. - // Distance is in whatever units the centroid's CRS implies. - // If centroid.crs == FLAT these are radians. - // If centroid.crs == SPHERE these are meters. + // Distance is in units of the geometry's CRS, except SPHERE and isNearSphere => radians double minDistance; double maxDistance; diff --git a/src/mongo/db/geo/hash.cpp b/src/mongo/db/geo/hash.cpp index cb4b5b0381b..baee21d2f4b 100644 --- a/src/mongo/db/geo/hash.cpp +++ b/src/mongo/db/geo/hash.cpp @@ -28,7 +28,6 @@ #include "mongo/db/field_parser.h" #include "mongo/db/jsobj.h" -#include "mongo/db/geo/core.h" #include "mongo/db/geo/hash.h" #include "mongo/db/geo/shapes.h" #include "mongo/util/mongoutils/str.h" @@ -681,6 +680,10 @@ namespace mongo { return box; } + Box GeoHashConverter::unhashToBox(const BSONElement &e) const { + return unhashToBox(hash(e)); + } + double GeoHashConverter::sizeOfDiag(const GeoHash& a) const { GeoHash b = a; b.move(1, 1); diff --git a/src/mongo/db/geo/hash.h b/src/mongo/db/geo/hash.h index 95dbbf37d86..57d2083c430 100644 --- a/src/mongo/db/geo/hash.h +++ b/src/mongo/db/geo/hash.h @@ -221,6 +221,7 @@ namespace mongo { * geo hashes in plan stats. */ Box unhashToBox(const GeoHash &h) const; + Box unhashToBox(const BSONElement &e) const; double sizeOfDiag(const GeoHash& a) const; // XXX: understand/clean this. diff --git a/src/mongo/db/geo/s2common.cpp b/src/mongo/db/geo/s2common.cpp index 3f2ab4bbd10..2dd2082c96a 100644 --- a/src/mongo/db/geo/s2common.cpp +++ b/src/mongo/db/geo/s2common.cpp @@ -43,127 +43,6 @@ namespace mongo { return ss.str(); } - double dist(const S2Point& a, const S2Point& b) { - S1Angle angle(a, b); - return angle.radians(); - } - - double dist(const S2Point& a, const MultiPointWithCRS& b) { - double minDist = numeric_limits<double>::max(); - for (size_t i = 0; i < b.points.size(); ++i) { - minDist = min(minDist, dist(a, b.points[i])); - } - return minDist; - } - - double dist(const S2Point& a, const S2Polyline& b) { - int tmp; - S1Angle angle(a, b.Project(a, &tmp)); - return angle.radians(); - } - - double dist(const S2Point& a, const MultiLineWithCRS& b) { - double minDist = numeric_limits<double>::max(); - for (size_t i = 0; i < b.lines.vector().size(); ++i) { - minDist = min(minDist, dist(a, *b.lines.vector()[i])); - } - return minDist; - } - - double dist(const S2Point& a, const S2Polygon& b) { - S1Angle angle(a, b.Project(a)); - return angle.radians(); - } - - double dist(const S2Point& a, const MultiPolygonWithCRS& b) { - double minDist = numeric_limits<double>::max(); - for (size_t i = 0; i < b.polygons.vector().size(); ++i) { - minDist = min(minDist, dist(a, *b.polygons.vector()[i])); - } - return minDist; - } - - bool S2SearchUtil::distanceBetween(const S2Point& us, const BSONObj& them, double *out) { - if (GeoParser::isGeometryCollection(them)) { - GeometryCollection c; - if (!GeoParser::parseGeometryCollection(them, &c)) { return false; } - double minDist = numeric_limits<double>::max(); - - for (size_t i = 0; i < c.points.size(); ++i) { - minDist = min(minDist, dist(us, c.points[i].point)); - } - - const vector<LineWithCRS*>& lines = c.lines.vector(); - for (size_t i = 0; i < lines.size(); ++i) { - minDist = min(minDist, dist(us, lines[i]->line)); - } - - const vector<PolygonWithCRS*>& polys = c.polygons.vector(); - for (size_t i = 0; i < polys.size(); ++i) { - minDist = min(minDist, dist(us, polys[i]->polygon)); - } - - const vector<MultiPointWithCRS*>& multipoints = c.multiPoints.vector(); - for (size_t i = 0; i < multipoints.size(); ++i) { - MultiPointWithCRS* mp = multipoints[i]; - for (size_t j = 0; j < mp->points.size(); ++j) { - minDist = min(minDist, dist(us, mp->points[i])); - } - } - - const vector<MultiLineWithCRS*>& multilines = c.multiLines.vector(); - for (size_t i = 0; i < multilines.size(); ++i) { - const vector<S2Polyline*>& lines = multilines[i]->lines.vector(); - for (size_t j = 0; j < lines.size(); ++j) { - minDist = min(minDist, dist(us, *lines[j])); - } - } - - const vector<MultiPolygonWithCRS*>& multipolys = c.multiPolygons.vector(); - for (size_t i = 0; i < multipolys.size(); ++i) { - const vector<S2Polygon*>& polys = multipolys[i]->polygons.vector(); - for (size_t j = 0; j < polys.size(); ++j) { - minDist = min(minDist, dist(us, *polys[j])); - } - } - - *out = kRadiusOfEarthInMeters * minDist; - return true; - } else if (GeoParser::isMultiPoint(them)) { - MultiPointWithCRS multiPoint; - if (!GeoParser::parseMultiPoint(them, &multiPoint)) { return false; } - *out = dist(us, multiPoint) * kRadiusOfEarthInMeters; - return true; - } else if (GeoParser::isMultiLine(them)) { - MultiLineWithCRS multiLine; - if (!GeoParser::parseMultiLine(them, &multiLine)) { return false; } - *out = dist(us, multiLine) * kRadiusOfEarthInMeters; - return true; - } else if (GeoParser::isMultiPolygon(them)) { - MultiPolygonWithCRS multiPolygon; - if (!GeoParser::parseMultiPolygon(them, &multiPolygon)) { return false; } - *out = dist(us, multiPolygon) * kRadiusOfEarthInMeters; - return true; - } else if (GeoParser::isPolygon(them)) { - PolygonWithCRS poly; - if (!GeoParser::parsePolygon(them, &poly)) { return false; } - *out = dist(us, poly.polygon) * kRadiusOfEarthInMeters; - return true; - } else if (GeoParser::isLine(them)) { - LineWithCRS line; - if (!GeoParser::parseLine(them, &line)) { return false; } - *out = dist(us, line.line) * kRadiusOfEarthInMeters; - return true; - } else if (GeoParser::isPoint(them)) { - PointWithCRS point; - if (!GeoParser::parsePoint(them, &point)) { return false; } - *out = dist(us, point.point) * kRadiusOfEarthInMeters; - return true; - } else { - return false; - } - } - void S2SearchUtil::setCoverLimitsBasedOnArea(double area, S2RegionCoverer *coverer, int coarsestIndexedLevel) { area = sqrt(area); diff --git a/src/mongo/db/geo/s2common.h b/src/mongo/db/geo/s2common.h index acee143c1d0..6f8da991764 100644 --- a/src/mongo/db/geo/s2common.h +++ b/src/mongo/db/geo/s2common.h @@ -94,7 +94,6 @@ namespace mongo { static BSONObj coverAsBSON(const std::vector<S2CellId> &cover, const std::string& field, const int coarsestIndexedLevel); static void setCoverLimitsBasedOnArea(double area, S2RegionCoverer *coverer, int coarsestIndexedLevel); - static bool distanceBetween(const S2Point& us, const BSONObj& them, double *out); }; } // namespace mongo diff --git a/src/mongo/db/geo/shapes.cpp b/src/mongo/db/geo/shapes.cpp index 95796025590..9c1e5b80745 100644 --- a/src/mongo/db/geo/shapes.cpp +++ b/src/mongo/db/geo/shapes.cpp @@ -26,9 +26,7 @@ * it in the license file. */ -#include "mongo/pch.h" #include "mongo/db/jsobj.h" -#include "mongo/db/geo/core.h" #include "mongo/db/geo/shapes.h" #include "mongo/util/mongoutils/str.h" @@ -444,12 +442,37 @@ namespace mongo { bool R2Annulus::fastDisjoint(const Box& other) const { return !circleIntersectsWithBox(Circle(_outer, _center), other) - || circleInteriorContainsBox(Circle(_inner, _center), other); + || circleInteriorContainsBox(Circle(_inner, _center), other); } + string R2Annulus::toString() const { + return str::stream() << "center: " << _center.toString() << " inner: " << _inner + << " outer: " << _outer; + } /////// Other methods + double S2Distance::distanceRad(const S2Point& pointA, const S2Point& pointB) { + S1Angle angle(pointA, pointB); + return angle.radians(); + } + + double S2Distance::minDistanceRad(const S2Point& point, const S2Polyline& line) { + int tmp; + S1Angle angle(point, line.Project(point, &tmp)); + return angle.radians(); + } + + double S2Distance::minDistanceRad(const S2Point& point, const S2Polygon& polygon) { + S1Angle angle(point, polygon.Project(point)); + return angle.radians(); + } + + double S2Distance::minDistanceRad(const S2Point& point, const S2Cap& cap) { + S1Angle angleToCenter(point, cap.axis()); + return (angleToCenter - cap.angle()).radians(); + } + /** * Distance method that compares x or y coords when other direction is zero, * avoids numerical error when distances are very close to radius but axis-aligned. @@ -506,16 +529,6 @@ namespace mongo { return sqrt((a * a) + (b * b)) - radius; } - // Technically lat/long bounds, not really tied to earth radius. - void checkEarthBounds(const Point &p) { - uassert(14808, str::stream() << "point " << p.toString() - << " must be in earth-like bounds of long " - << ": [-180, 180], lat : [-90, 90] ", - p.x >= -180 && p.x <= 180 && p.y >= -90 && p.y <= 90); - } - - - // WARNING: x and y MUST be longitude and latitude in that order // note: multiply by earth radius for distance double spheredist_rad(const Point& p1, const Point& p2) { // this uses the n-vector formula: http://en.wikipedia.org/wiki/N-vector @@ -549,6 +562,14 @@ namespace mongo { Point(deg2rad(p2.x), deg2rad(p2.y))); } + // Technically lat/long bounds, not really tied to earth radius. + void checkEarthBounds(const Point &p) { + uassert(14808, str::stream() << "point " << p.toString() + << " must be in earth-like bounds of long " + << ": [-180, 180], lat : [-90, 90] ", + p.x >= -180 && p.x <= 180 && p.y >= -90 && p.y <= 90); + } + double distance(const Point& p1, const Point &p2) { double a = p1.x - p2.x; double b = p1.y - p2.y; @@ -600,6 +621,10 @@ namespace mongo { static bool circleContainsBoxInternal(const Circle& circle, const Box& box, bool includeCircleBoundary) { + + // NOTE: a circle of zero radius is a point, and there are NO points contained inside a + // zero-radius circle, not even the point itself. + const Point& a = box._min; const Point& b = box._max; double compareLL = distanceCompare( circle.center, a, circle.radius ); // Lower left @@ -628,6 +653,12 @@ namespace mongo { static bool circleIntersectsWithBoxInternal(const Circle& circle, const Box& box, bool includeCircleBoundary) { + + // NOTE: a circle of zero radius is a point, and there are NO points to intersect inside a + // zero-radius circle, not even the point itself. + if (circle.radius == 0.0 && !includeCircleBoundary) + return false; + /* Collapses the four quadrants down into one. * ________ * r|___B___ \ <- a quarter round corner here. Let's name it "D". diff --git a/src/mongo/db/geo/shapes.h b/src/mongo/db/geo/shapes.h index 5d6e97035a7..fd79c3680e4 100644 --- a/src/mongo/db/geo/shapes.h +++ b/src/mongo/db/geo/shapes.h @@ -28,6 +28,7 @@ #pragma once +#include <cmath> #include <string> #include <vector> @@ -40,6 +41,10 @@ #include "third_party/s2/s2polygon.h" #include "third_party/s2/s2polyline.h" +#ifndef M_PI +# define M_PI 3.14159265358979323846 +#endif + namespace mongo { struct Point; @@ -47,12 +52,17 @@ namespace mongo { class Box; class Polygon; - double distance(const Point& p1, const Point &p2); - double distanceCompare(const Point &p1, const Point &p2, double radius); - bool distanceWithin(const Point &p1, const Point &p2, double radius); + inline double deg2rad(const double deg) { return deg * (M_PI / 180.0); } + + inline double rad2deg(const double rad) { return rad * (180.0 / M_PI); } + + inline double computeXScanDistance(double y, double maxDistDegrees) { + // TODO: this overestimates for large maxDistDegrees far from the equator + return maxDistDegrees / std::min(cos(deg2rad(std::min(+89.0, y + maxDistDegrees))), + cos(deg2rad(std::max(-89.0, y - maxDistDegrees)))); + } + void checkEarthBounds(const Point &p); - double spheredist_rad(const Point& p1, const Point& p2); - double spheredist_deg(const Point& p1, const Point& p2); bool linesIntersect(const Point& pA, const Point& pB, const Point& pC, const Point& pD); bool circleContainsBox(const Circle& circle, const Box& box); bool circleInteriorContainsBox(const Circle& circle, const Box& box); @@ -62,6 +72,30 @@ namespace mongo { bool polygonContainsBox(const Polygon& polygon, const Box& box); bool polygonIntersectsWithBox(const Polygon& polygon, const Box& box); + /** + * Distance utilities for R2 geometries + */ + double distance(const Point& p1, const Point &p2); + bool distanceWithin(const Point &p1, const Point &p2, double radius); + double distanceCompare(const Point &p1, const Point &p2, double radius); + // Still needed for non-wrapping $nearSphere + double spheredist_rad(const Point& p1, const Point& p2); + double spheredist_deg(const Point& p1, const Point& p2); + + + + /** + * Distance utilities for S2 geometries + */ + struct S2Distance { + + static double distanceRad(const S2Point& pointA, const S2Point& pointB); + static double minDistanceRad(const S2Point& point, const S2Polyline& line); + static double minDistanceRad(const S2Point& point, const S2Polygon& polygon); + static double minDistanceRad(const S2Point& point, const S2Cap& cap); + + }; + struct Point { Point(); Point(double x, double y); @@ -203,6 +237,8 @@ namespace mongo { bool fastContains(const Box& other) const; bool fastDisjoint(const Box& other) const; + // For debugging + std::string toString() const; private: diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp index c1af261b3b1..05003290ad0 100644 --- a/src/mongo/db/index/2d_access_method.cpp +++ b/src/mongo/db/index/2d_access_method.cpp @@ -31,7 +31,6 @@ #include <string> #include <vector> -#include "mongo/db/geo/core.h" #include "mongo/db/index_names.h" #include "mongo/db/index/2d_common.h" #include "mongo/db/index/expression_keys_private.h" diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h index c45349b8ff8..42d0f37d110 100644 --- a/src/mongo/db/index/2d_access_method.h +++ b/src/mongo/db/index/2d_access_method.h @@ -40,30 +40,6 @@ namespace mongo { class IndexDescriptor; struct TwoDIndexingParams; - namespace twod_exec { - class GeoPoint; - class GeoAccumulator; - class GeoBrowse; - class GeoHopper; - class GeoSearch; - class GeoCircleBrowse; - class GeoBoxBrowse; - class GeoPolygonBrowse; - class TwoDGeoNearRunner; - } - - namespace twod_internal { - class GeoPoint; - class GeoAccumulator; - class GeoBrowse; - class GeoHopper; - class GeoSearch; - class GeoCircleBrowse; - class GeoBoxBrowse; - class GeoPolygonBrowse; - class TwoDGeoNearRunner; - } - class TwoDAccessMethod : public BtreeBasedAccessMethod { public: using BtreeBasedAccessMethod::_descriptor; @@ -73,25 +49,6 @@ namespace mongo { virtual ~TwoDAccessMethod() { } private: - friend class TwoDIndexCursor; - friend class twod_internal::GeoAccumulator; - friend class twod_internal::GeoBrowse; - friend class twod_internal::GeoHopper; - friend class twod_internal::GeoSearch; - friend class twod_internal::GeoCircleBrowse; - friend class twod_internal::GeoBoxBrowse; - friend class twod_internal::GeoPolygonBrowse; - - friend class twod_exec::GeoPoint; - friend class twod_exec::GeoAccumulator; - friend class twod_exec::GeoBrowse; - friend class twod_exec::GeoHopper; - friend class twod_exec::GeoSearch; - friend class twod_exec::GeoCircleBrowse; - friend class twod_exec::GeoBoxBrowse; - friend class twod_exec::GeoPolygonBrowse; - - friend class twod_internal::TwoDGeoNearRunner; const IndexDescriptor* getDescriptor() { return _descriptor; } TwoDIndexingParams& getParams() { return _params; } diff --git a/src/mongo/db/index/expression_index.h b/src/mongo/db/index/expression_index.h index 8dd5a7db569..f3877cb8d0e 100644 --- a/src/mongo/db/index/expression_index.h +++ b/src/mongo/db/index/expression_index.h @@ -28,6 +28,8 @@ #pragma once +#include "third_party/s2/s2regioncoverer.h" + #include "mongo/db/jsobj.h" #include "mongo/db/geo/hash.h" #include "mongo/db/geo/geoquery.h" @@ -50,6 +52,24 @@ namespace mongo { return bob.obj(); } + // For debugging only + static std::string toCoveringString(const GeoHashConverter& hashConverter, + const set<GeoHash>& covering) { + string result = "["; + for (set<GeoHash>::const_iterator it = covering.begin(); it != covering.end(); + ++it) { + + if (it != covering.begin()) result += ", "; + + const GeoHash& geoHash = *it; + + result += hashConverter.unhashToBox(geoHash).toString(); + result += " (" + geoHash.toStringHex1() + ")"; + } + + return result + "]"; + } + static void cover2d(const R2Region& region, const BSONObj& indexInfoObj, OrderedIntervalList* oil) { diff --git a/src/mongo/db/index/haystack_access_method_internal.h b/src/mongo/db/index/haystack_access_method_internal.h index b1e8c11ec42..67c97d25b90 100644 --- a/src/mongo/db/index/haystack_access_method_internal.h +++ b/src/mongo/db/index/haystack_access_method_internal.h @@ -31,7 +31,6 @@ #include <vector> #include "mongo/db/diskloc.h" -#include "mongo/db/geo/core.h" #include "mongo/db/geo/shapes.h" namespace mongo { diff --git a/src/mongo/db/matcher/expression.h b/src/mongo/db/matcher/expression.h index 1b9635221af..fb2850f1017 100644 --- a/src/mongo/db/matcher/expression.h +++ b/src/mongo/db/matcher/expression.h @@ -66,7 +66,7 @@ namespace mongo { GEO_NEAR, TEXT, // Expressions that are only created internally - INTERNAL_GEO_S2_KEYCHECK + INTERNAL_2DSPHERE_KEY_IN_REGION, INTERNAL_2D_KEY_IN_REGION, INTERNAL_2D_POINT_IN_ANNULUS }; MatchExpression( MatchType type ); diff --git a/src/mongo/db/matcher/expression_geo.cpp b/src/mongo/db/matcher/expression_geo.cpp index 1702c26740f..23d28940d01 100644 --- a/src/mongo/db/matcher/expression_geo.cpp +++ b/src/mongo/db/matcher/expression_geo.cpp @@ -104,9 +104,9 @@ namespace mongo { // Parse-only geo expressions: geoNear (formerly known as near). // - Status GeoNearMatchExpression::init( const StringData& path, const NearQuery& query, + Status GeoNearMatchExpression::init( const StringData& path, const NearQuery* query, const BSONObj& rawObj ) { - _query = query; + _query.reset(query); _rawObj = rawObj; return initPath( path ); } @@ -120,7 +120,7 @@ namespace mongo { void GeoNearMatchExpression::debugString( StringBuilder& debug, int level ) const { _debugAddSpace( debug, level ); - debug << "GEONEAR " << _query.toString(); + debug << "GEONEAR " << _query->toString(); MatchExpression::TagData* td = getTag(); if (NULL != td) { debug << " "; @@ -149,7 +149,8 @@ namespace mongo { LeafMatchExpression* GeoNearMatchExpression::shallowClone() const { GeoNearMatchExpression* next = new GeoNearMatchExpression(); - next->init( path(), _query, _rawObj ); + next->init( path(), NULL, _rawObj ); + next->_query = _query; if (getTag()) { next->setTag(getTag()->clone()); } diff --git a/src/mongo/db/matcher/expression_geo.h b/src/mongo/db/matcher/expression_geo.h index a227dea159b..a784dbdabda 100644 --- a/src/mongo/db/matcher/expression_geo.h +++ b/src/mongo/db/matcher/expression_geo.h @@ -71,7 +71,7 @@ namespace mongo { GeoNearMatchExpression() : LeafMatchExpression( GEO_NEAR ){} virtual ~GeoNearMatchExpression(){} - Status init( const StringData& path, const NearQuery& query, const BSONObj& rawObj ); + Status init( const StringData& path, const NearQuery* query, const BSONObj& rawObj ); // This shouldn't be called and as such will crash. GeoNear always requires an index. virtual bool matchesSingleElement( const BSONElement& e ) const; @@ -84,11 +84,12 @@ namespace mongo { virtual LeafMatchExpression* shallowClone() const; - const NearQuery& getData() const { return _query; } + const NearQuery& getData() const { return *_query; } const BSONObj getRawObj() const { return _rawObj; } private: - NearQuery _query; BSONObj _rawObj; + // Share ownership of our query with all of our clones + shared_ptr<const NearQuery> _query; }; } // namespace mongo diff --git a/src/mongo/db/matcher/expression_geo_test.cpp b/src/mongo/db/matcher/expression_geo_test.cpp index 7fe244d555a..7f2b5fb3742 100644 --- a/src/mongo/db/matcher/expression_geo_test.cpp +++ b/src/mongo/db/matcher/expression_geo_test.cpp @@ -60,11 +60,11 @@ namespace mongo { TEST(ExpressionGeoTest, GeoNear1) { BSONObj query = fromjson("{loc:{$near:{$maxDistance:100, " "$geometry:{type:\"Point\", coordinates:[0,0]}}}}"); - NearQuery nq; - ASSERT_OK(nq.parseFrom(query["loc"].Obj())); + auto_ptr<NearQuery> nq(new NearQuery); + ASSERT_OK(nq->parseFrom(query["loc"].Obj())); GeoNearMatchExpression gne; - ASSERT(gne.init("a", nq, query).isOK()); + ASSERT(gne.init("a", nq.release(), query).isOK()); // We can't match the data but we can make sure it was parsed OK. ASSERT_EQUALS(gne.getData().centroid.crs, SPHERE); diff --git a/src/mongo/db/matcher/expression_parser_geo.cpp b/src/mongo/db/matcher/expression_parser_geo.cpp index 8ed24e8224e..146f104ce9e 100644 --- a/src/mongo/db/matcher/expression_parser_geo.cpp +++ b/src/mongo/db/matcher/expression_parser_geo.cpp @@ -61,8 +61,8 @@ namespace mongo { } else { verify(BSONObj::opNEAR == type); - NearQuery nq(name); - Status s = nq.parseFrom( section ); + auto_ptr<NearQuery> nq(new NearQuery(name)); + Status s = nq->parseFrom( section ); if ( !s.isOK() ) { return StatusWithMatchExpression( s ); } @@ -72,7 +72,7 @@ namespace mongo { // layer. BSONObjBuilder bob; bob.append(name, section); - s = e->init( name, nq, bob.obj() ); + s = e->init( name, nq.release(), bob.obj() ); if ( !s.isOK() ) return StatusWithMatchExpression( s ); return StatusWithMatchExpression( e.release() ); diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp index 0424229ec53..7e4adc96754 100644 --- a/src/mongo/db/query/explain.cpp +++ b/src/mongo/db/query/explain.cpp @@ -102,10 +102,6 @@ namespace { const IndexScanStats* spec = static_cast<const IndexScanStats*>(specific); return spec->keysExamined; } - else if (STAGE_GEO_NEAR_2D == type) { - const TwoDNearStats* spec = static_cast<const TwoDNearStats*>(specific); - return spec->nscanned; - } else if (STAGE_IDHACK == type) { const IDHackStats* spec = static_cast<const IDHackStats*>(specific); return spec->keysExamined; @@ -132,11 +128,7 @@ namespace { * (in which case this gets called from Explain::getSummaryStats()). */ size_t getDocsExamined(StageType type, const SpecificStats* specific) { - if (STAGE_GEO_NEAR_2D == type) { - const TwoDNearStats* spec = static_cast<const TwoDNearStats*>(specific); - return spec->objectsLoaded; - } - else if (STAGE_IDHACK == type) { + if (STAGE_IDHACK == type) { const IDHackStats* spec = static_cast<const IDHackStats*>(specific); return spec->docsExamined; } @@ -175,11 +167,11 @@ namespace { ss << " " << spec->keyPattern; } else if (STAGE_GEO_NEAR_2D == stage->stageType()) { - const TwoDNearStats* spec = static_cast<const TwoDNearStats*>(specific); + const NearStats* spec = static_cast<const NearStats*>(specific); ss << " " << spec->keyPattern; } else if (STAGE_GEO_NEAR_2DSPHERE == stage->stageType()) { - const S2NearStats* spec = static_cast<const S2NearStats*>(specific); + const NearStats* spec = static_cast<const NearStats*>(specific); ss << " " << spec->keyPattern; } else if (STAGE_IXSCAN == stage->stageType()) { @@ -254,16 +246,6 @@ namespace mongo { bob->appendNumber("docsExamined", spec->docsExamined); } } - else if (STAGE_GEO_NEAR_2D == stats.stageType) { - TwoDNearStats* spec = static_cast<TwoDNearStats*>(stats.specific.get()); - - if (verbosity >= Explain::EXEC_STATS) { - bob->appendNumber("keysExamined", spec->nscanned); - bob->appendNumber("docsExamined", spec->objectsLoaded); - } - - bob->append("keyPattern", spec->keyPattern); - } else if (STAGE_IDHACK == stats.stageType) { IDHackStats* spec = static_cast<IDHackStats*>(stats.specific.get()); if (verbosity >= Explain::EXEC_STATS) { @@ -381,6 +363,7 @@ namespace mongo { size_t totalKeysExamined = 0; size_t totalDocsExamined = 0; for (size_t i = 0; i < statsNodes.size(); ++i) { + totalKeysExamined += getKeysExamined(statsNodes[i]->stageType, statsNodes[i]->specific.get()); totalDocsExamined += getDocsExamined(statsNodes[i]->stageType, diff --git a/src/mongo/db/query/explain_plan.cpp b/src/mongo/db/query/explain_plan.cpp index 1e15efd37f1..ccc662e0c7e 100644 --- a/src/mongo/db/query/explain_plan.cpp +++ b/src/mongo/db/query/explain_plan.cpp @@ -28,7 +28,6 @@ #include "mongo/db/query/explain_plan.h" -#include "mongo/db/exec/2dcommon.h" #include "mongo/db/query/stage_types.h" #include "mongo/db/query/type_explain.h" #include "mongo/util/mongoutils/str.h" @@ -43,6 +42,10 @@ namespace mongo { return stageType == STAGE_OR || stageType == STAGE_SORT_MERGE; } + bool isNearStage(StageType stageType) { + return stageType == STAGE_GEO_NEAR_2D || stageType == STAGE_GEO_NEAR_2DSPHERE; + } + bool isIntersectPlan(const PlanStageStats& stats) { if (stats.stageType == STAGE_AND_HASH || stats.stageType == STAGE_AND_SORTED) { return true; @@ -149,6 +152,9 @@ namespace mongo { bool sortPresent = false; size_t chunkSkips = 0; + + // XXX: TEMPORARY HACK - GEONEAR explains like OR queries (both have children) until the + // new explain framework makes this file go away. const PlanStageStats* orStage = NULL; const PlanStageStats* root = &stats; const PlanStageStats* leaf = root; @@ -156,10 +162,10 @@ namespace mongo { while (leaf->children.size() > 0) { // We shouldn't be here if there are any ANDs if (leaf->children.size() > 1) { - verify(isOrStage(leaf->stageType)); + verify(isOrStage(leaf->stageType) || isNearStage(leaf->stageType)); } - if (isOrStage(leaf->stageType)) { + if (isOrStage(leaf->stageType) || isNearStage(leaf->stageType)) { orStage = leaf; break; } @@ -223,7 +229,18 @@ namespace mongo { } } // We set the cursor name for backwards compatibility with 2.4. - res->setCursor("QueryOptimizerCursor"); + if (isOrStage(leaf->stageType)) { + res->setCursor("QueryOptimizerCursor"); + } + else { + if (leaf->stageType == STAGE_GEO_NEAR_2D) + res->setCursor("GeoSearchCursor"); + else + res->setCursor("S2NearCursor"); + + res->setIndexOnly(false); + res->setIsMultiKey(false); + } res->setNScanned(nScanned); res->setNScannedObjects(nScannedObjects); } @@ -235,30 +252,6 @@ namespace mongo { res->setIndexOnly(false); res->setIsMultiKey(false); } - else if (leaf->stageType == STAGE_GEO_NEAR_2DSPHERE) { - // TODO: This is kind of a lie for STAGE_GEO_NEAR_2DSPHERE. - res->setCursor("S2NearCursor"); - // The first work() is an init. Every subsequent work examines a document. - res->setNScanned(leaf->common.works); - res->setNScannedObjects(leaf->common.works); - // TODO: only adding empty index bounds for backwards compatibility. - res->setIndexBounds(BSONObj()); - // TODO: Could be multikey. - res->setIsMultiKey(false); - res->setIndexOnly(false); - } - else if (leaf->stageType == STAGE_GEO_NEAR_2D) { - TwoDNearStats* nStats = static_cast<TwoDNearStats*>(leaf->specific.get()); - res->setCursor("GeoSearchCursor"); - // The first work() is an init. Every subsequent work examines a document. - res->setNScanned(nStats->nscanned); - res->setNScannedObjects(nStats->objectsLoaded); - // TODO: only adding empty index bounds for backwards compatibility. - res->setIndexBounds(BSONObj()); - // TODO: Could be multikey. - res->setIsMultiKey(false); - res->setIndexOnly(false); - } else if (leaf->stageType == STAGE_TEXT) { TextStats* tStats = static_cast<TextStats*>(leaf->specific.get()); res->setCursor("TextCursor"); @@ -481,11 +474,6 @@ namespace mongo { bob->appendNumber("forcedFetches", spec->forcedFetches); bob->appendNumber("matchTested", spec->matchTested); } - else if (STAGE_GEO_NEAR_2D == stats.stageType) { - TwoDNearStats* spec = static_cast<TwoDNearStats*>(stats.specific.get()); - bob->appendNumber("objectsLoaded", spec->objectsLoaded); - bob->appendNumber("nscanned", spec->nscanned); - } else if (STAGE_IXSCAN == stats.stageType) { IndexScanStats* spec = static_cast<IndexScanStats*>(stats.specific.get()); // TODO: how much do we really want here? we should separate runtime stats vs. tree diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp index 5e59b6e34e0..c6ab1bc09c5 100644 --- a/src/mongo/db/query/planner_access.cpp +++ b/src/mongo/db/query/planner_access.cpp @@ -90,6 +90,44 @@ namespace mongo { return csn; } + // Helper needed for GeoNear until we remove the default 100 limit + static MatchExpression* cloneExcludingGeoNear(const MatchExpression* expr) { + + if (MatchExpression::GEO_NEAR == expr->matchType()) { + return NULL; + } + + auto_ptr<MatchExpression> clone(expr->shallowClone()); + vector<MatchExpression*> stack; + stack.push_back(clone.get()); + + while (!stack.empty()) { + + MatchExpression* next = stack.back(); + stack.pop_back(); + + vector<MatchExpression*>* childVector = next->getChildVector(); + if (!childVector) + continue; + + for (vector<MatchExpression*>::iterator it = childVector->begin(); + it != childVector->end();) { + + MatchExpression* child = *it; + if (MatchExpression::GEO_NEAR == child->matchType()) { + it = childVector->erase(it); + delete child; + } + else { + stack.push_back(child); + ++it; + } + } + } + + return clone.release(); + } + // static QuerySolutionNode* QueryPlannerAccess::makeLeafNode(const CanonicalQuery& query, const IndexEntry& index, @@ -111,21 +149,37 @@ namespace mongo { *tightnessOut = IndexBoundsBuilder::EXACT; GeoNearMatchExpression* nearExpr = static_cast<GeoNearMatchExpression*>(expr); - // 2d geoNear requires a hard limit and as such we take it out before it gets here. If - // this happens it's a bug. BSONElement elt = index.keyPattern.firstElement(); bool indexIs2D = (String == elt.type() && "2d" == elt.String()); - verify(!indexIs2D); - GeoNear2DSphereNode* ret = new GeoNear2DSphereNode(); - ret->indexKeyPattern = index.keyPattern; - ret->nq = nearExpr->getData(); - ret->baseBounds.fields.resize(index.keyPattern.nFields()); - if (NULL != query.getProj()) { - ret->addPointMeta = query.getProj()->wantGeoNearPoint(); - ret->addDistMeta = query.getProj()->wantGeoNearDistance(); + if (indexIs2D) { + GeoNear2DNode* ret = new GeoNear2DNode(); + ret->indexKeyPattern = index.keyPattern; + ret->nq = nearExpr->getData(); + ret->baseBounds.fields.resize(index.keyPattern.nFields()); + if (NULL != query.getProj()) { + ret->addPointMeta = query.getProj()->wantGeoNearPoint(); + ret->addDistMeta = query.getProj()->wantGeoNearDistance(); + } + + ret->numToReturn = query.getParsed().getNumToReturn(); + if (ret->numToReturn == 0) + ret->numToReturn = 100; + ret->fullFilterExcludingNear.reset(cloneExcludingGeoNear(query.root())); + + return ret; + } + else { + GeoNear2DSphereNode* ret = new GeoNear2DSphereNode(); + ret->indexKeyPattern = index.keyPattern; + ret->nq = nearExpr->getData(); + ret->baseBounds.fields.resize(index.keyPattern.nFields()); + if (NULL != query.getProj()) { + ret->addPointMeta = query.getProj()->wantGeoNearPoint(); + ret->addDistMeta = query.getProj()->wantGeoNearDistance(); + } + return ret; } - return ret; } else if (MatchExpression::TEXT == expr->matchType()) { // We must not keep the expression node around. @@ -184,8 +238,6 @@ namespace mongo { const MatchExpression::MatchType mergeType = scanState.root->matchType(); const StageType type = node->getType(); - verify(STAGE_GEO_NEAR_2D != type); - const MatchExpression::MatchType exprType = expr->matchType(); // @@ -204,7 +256,7 @@ namespace mongo { && MatchExpression::TEXT != exprType; } - if (STAGE_GEO_NEAR_2DSPHERE == type) { + if (STAGE_GEO_NEAR_2D == type || STAGE_GEO_NEAR_2DSPHERE == type) { // Currently only one GEO_NEAR is allowed, but to be safe, make sure that we // do not try to merge two GEO_NEAR predicates. return MatchExpression::AND == mergeType @@ -250,7 +302,6 @@ namespace mongo { const IndexEntry& index = scanState->indices[scanState->currentIndexNumber]; const StageType type = node->getType(); - verify(STAGE_GEO_NEAR_2D != type); // Text data is covered, but not exactly. Text covering is unlike any other covering // so we deal with it in addFilterToSolutionNode. @@ -261,7 +312,32 @@ namespace mongo { IndexBounds* boundsToFillOut = NULL; - if (STAGE_GEO_NEAR_2DSPHERE == type) { + if (STAGE_GEO_NEAR_2D == type) { + + invariant(INDEX_2D == index.type); + + // 2D indexes are weird - the "2d" field stores a normally-indexed BinData field, but + // additional array fields are *not* exploded into multi-keys - they are stored directly + // as arrays in the index. Also, no matter what the index expression, the "2d" field is + // always first. + // This means that we can only generically accumulate bounds for 2D indexes over the + // first "2d" field (pos == 0) - MatchExpressions over other fields in the 2D index may + // be covered (can be evaluated using only the 2D index key). The additional fields + // must not affect the index scan bounds, since they are not stored in an + // IndexScan-compatible format. + + if (pos > 0) { + // Marking this field as covered allows the planner to accumulate a MatchExpression + // over the returned 2D index keys instead of adding to the index bounds. + scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED; + return; + } + + // We may have other $geoPredicates on a near index - generate bounds for these + GeoNear2DNode* gn = static_cast<GeoNear2DNode*>(node); + boundsToFillOut = &gn->baseBounds; + } + else if (STAGE_GEO_NEAR_2DSPHERE == type) { GeoNear2DSphereNode* gn = static_cast<GeoNear2DSphereNode*>(node); boundsToFillOut = &gn->baseBounds; } @@ -269,8 +345,8 @@ namespace mongo { verify(type == STAGE_IXSCAN); IndexScanNode* scan = static_cast<IndexScanNode*>(node); - // 2D indexes can only support additional non-geometric criteria by filtering after the - // initial element - don't generate bounds if this is not the first field + // See STAGE_GEO_NEAR_2D above - 2D indexes can only accumulate scan bounds over the + // first "2d" field (pos == 0) if (INDEX_2D == index.type && pos > 0) { scanState->tightness = IndexBoundsBuilder::INEXACT_COVERED; return; @@ -463,7 +539,6 @@ namespace mongo { // static void QueryPlannerAccess::finishLeafNode(QuerySolutionNode* node, const IndexEntry& index) { const StageType type = node->getType(); - verify(STAGE_GEO_NEAR_2D != type); if (STAGE_TEXT == type) { finishTextNode(node, index); @@ -472,7 +547,11 @@ namespace mongo { IndexBounds* bounds = NULL; - if (STAGE_GEO_NEAR_2DSPHERE == type) { + if (STAGE_GEO_NEAR_2D == type) { + GeoNear2DNode* gnode = static_cast<GeoNear2DNode*>(node); + bounds = &gnode->baseBounds; + } + else if (STAGE_GEO_NEAR_2DSPHERE == type) { GeoNear2DSphereNode* gnode = static_cast<GeoNear2DSphereNode*>(node); bounds = &gnode->baseBounds; } diff --git a/src/mongo/db/query/planner_ixselect.cpp b/src/mongo/db/query/planner_ixselect.cpp index 464bffce401..8d4252edc6a 100644 --- a/src/mongo/db/query/planner_ixselect.cpp +++ b/src/mongo/db/query/planner_ixselect.cpp @@ -30,7 +30,6 @@ #include <vector> -#include "mongo/db/geo/core.h" #include "mongo/db/geo/hash.h" #include "mongo/db/index_names.h" #include "mongo/db/matcher/expression_array.h" diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index 9939d13e9ef..c5ca34d850d 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -624,76 +624,6 @@ namespace mongo { return Status(ErrorCodes::BadValue, "unable to find index for $geoNear query"); } - GeoNearMatchExpression* gnme = static_cast<GeoNearMatchExpression*>(gnNode); - - vector<size_t> newFirst; - - // 2d + GEO_NEAR is annoying. Because 2d's GEO_NEAR isn't streaming we have to embed - // the full query tree inside it as a matcher. - for (size_t i = 0; i < tag->first.size(); ++i) { - // GEO_NEAR has a non-2d index it can use. We can deal w/that in normal planning. - if (!is2DIndex(relevantIndices[tag->first[i]].keyPattern)) { - newFirst.push_back(tag->first[i]); - continue; - } - - // If we're here, GEO_NEAR has a 2d index. We create a 2dgeonear plan with the - // entire tree as a filter, if possible. - - GeoNear2DNode* solnRoot = new GeoNear2DNode(); - solnRoot->nq = gnme->getData(); - if (NULL != query.getProj()) { - solnRoot->addPointMeta = query.getProj()->wantGeoNearPoint(); - solnRoot->addDistMeta = query.getProj()->wantGeoNearDistance(); - } - - if (MatchExpression::GEO_NEAR != query.root()->matchType()) { - // root is an AND, clone and delete the GEO_NEAR child. - MatchExpression* filterTree = query.root()->shallowClone(); - verify(MatchExpression::AND == filterTree->matchType()); - - bool foundChild = false; - for (size_t i = 0; i < filterTree->numChildren(); ++i) { - if (MatchExpression::GEO_NEAR == filterTree->getChild(i)->matchType()) { - foundChild = true; - scoped_ptr<MatchExpression> holder(filterTree->getChild(i)); - filterTree->getChildVector()->erase(filterTree->getChildVector()->begin() + i); - break; - } - } - verify(foundChild); - solnRoot->filter.reset(filterTree); - } - - solnRoot->numWanted = query.getParsed().getNumToReturn(); - if (0 == solnRoot->numWanted) { - solnRoot->numWanted = 100; - } - solnRoot->indexKeyPattern = relevantIndices[tag->first[i]].keyPattern; - - // Remove the 2d index. 2d can only be the first field, and we know there is - // only one GEO_NEAR, so we don't care if anyone else was assigned it; it'll - // only be first for gnNode. - tag->first.erase(tag->first.begin() + i); - - QuerySolution* soln = QueryPlannerAnalysis::analyzeDataAccess(query, - params, - solnRoot); - - if (NULL != soln) { - out->push_back(soln); - } - } - - // Continue planning w/non-2d indices tagged for this pred. - tag->first.swap(newFirst); - - if (0 == tag->first.size() && 0 == tag->notFirst.size()) { - // Don't leave tags on query tree. - query.root()->resetTag(); - return Status::OK(); - } - QLOG() << "Rated tree after geonear processing:" << query.root()->toString(); } diff --git a/src/mongo/db/query/query_planner_test.cpp b/src/mongo/db/query/query_planner_test.cpp index eaec8ea0399..7ab31051316 100644 --- a/src/mongo/db/query/query_planner_test.cpp +++ b/src/mongo/db/query/query_planner_test.cpp @@ -1984,7 +1984,7 @@ namespace { // GEO_NEAR must use the index, and GEO predicate becomes a filter. assertNumSolutions(1U); - assertSolutionExists("{geoNear2d: {a: '2d'}}"); + assertSolutionExists("{fetch: { node : { geoNear2d: {a: '2d'} } } }"); } TEST_F(QueryPlannerTest, And2DSphereSameFieldNonNear) { @@ -2141,7 +2141,8 @@ namespace { runQuery(fromjson("{a: {$near: [0, 0]}, b: {$gte: 0}}")); assertNumSolutions(1U); - assertSolutionExists("{geoNear2d: {a: '2d', b: 1}}"); + assertSolutionExists("{fetch: { filter : {b:{$gte: 0}}, node: " + "{geoNear2d: {a: '2d', b: 1} } } }"); } // @@ -3220,7 +3221,7 @@ namespace { addIndex(BSON("a" << "2d")); runQuery(fromjson("{$and: [{a: {$near: [0, 0], $maxDistance: 0.3}}, {b: {$ne: 1}}]}")); assertNumSolutions(1U); - assertSolutionExists("{geoNear2d: {a: '2d'}}"); + assertSolutionExists("{fetch: {node: { geoNear2d: {a: '2d'} } } }"); } // diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index e3fef1dead7..bcdc7d00a12 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -708,7 +708,8 @@ namespace mongo { copy->_sorts = this->_sorts; copy->nq = this->nq; - copy->numWanted = this->numWanted; + copy->baseBounds = this->baseBounds; + copy->numToReturn = this->numToReturn; copy->indexKeyPattern = this->indexKeyPattern; copy->addPointMeta = this->addPointMeta; copy->addDistMeta = this->addDistMeta; diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 096b35473d8..20c0c265245 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -564,7 +564,7 @@ namespace mongo { // This is a standalone stage. struct GeoNear2DNode : public QuerySolutionNode { - GeoNear2DNode() : numWanted(100), addPointMeta(false), addDistMeta(false) { } + GeoNear2DNode() : numToReturn(0), addPointMeta(false), addDistMeta(false) { } virtual ~GeoNear2DNode() { } virtual StageType getType() const { return STAGE_GEO_NEAR_2D; } @@ -580,7 +580,15 @@ namespace mongo { BSONObjSet _sorts; NearQuery nq; - int numWanted; + IndexBounds baseBounds; + + // TODO: Remove both of these + int numToReturn; + // A full match expression for the query with geoNear removed is currently needed - + // since 2D geoNear has a default limit not set in the query, we must limit ourselves and + // so generate only valid results. + scoped_ptr<MatchExpression> fullFilterExcludingNear; + BSONObj indexKeyPattern; bool addPointMeta; bool addDistMeta; diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index 5e29d472dfb..b0441eb0a25 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -29,20 +29,19 @@ #include "mongo/db/query/stage_builder.h" #include "mongo/db/client.h" -#include "mongo/db/exec/2dnear.h" #include "mongo/db/exec/and_hash.h" #include "mongo/db/exec/and_sorted.h" #include "mongo/db/exec/collection_scan.h" #include "mongo/db/exec/count.h" #include "mongo/db/exec/distinct_scan.h" #include "mongo/db/exec/fetch.h" +#include "mongo/db/exec/geo_near.h" #include "mongo/db/exec/index_scan.h" #include "mongo/db/exec/keep_mutations.h" #include "mongo/db/exec/limit.h" #include "mongo/db/exec/merge_sort.h" #include "mongo/db/exec/or.h" #include "mongo/db/exec/projection.h" -#include "mongo/db/exec/s2near.h" #include "mongo/db/exec/shard_filter.h" #include "mongo/db/exec/sort.h" #include "mongo/db/exec/skip.h" @@ -191,27 +190,51 @@ namespace mongo { } else if (STAGE_GEO_NEAR_2D == root->getType()) { const GeoNear2DNode* node = static_cast<const GeoNear2DNode*>(root); - TwoDNearParams params; + + GeoNearParams params; params.nearQuery = node->nq; - params.collection = collection; - params.indexKeyPattern = node->indexKeyPattern; + params.baseBounds = node->baseBounds; params.filter = node->filter.get(); - params.numWanted = node->numWanted; params.addPointMeta = node->addPointMeta; params.addDistMeta = node->addDistMeta; - return new TwoDNear(txn, params, ws); + + IndexDescriptor* twoDIndex = collection->getIndexCatalog()->findIndexByKeyPattern(node + ->indexKeyPattern); + + if (twoDIndex == NULL) { + warning() << "Can't find 2D index " << node->indexKeyPattern.toString() + << "in namespace " << collection->ns() << endl; + return NULL; + } + + int numToReturn = node->numToReturn; + params.fullFilter = node->fullFilterExcludingNear.get(); + + GeoNear2DStage* nearStage = new GeoNear2DStage(params, txn, ws, collection, twoDIndex); + nearStage->setLimit(numToReturn); + + return nearStage; } else if (STAGE_GEO_NEAR_2DSPHERE == root->getType()) { const GeoNear2DSphereNode* node = static_cast<const GeoNear2DSphereNode*>(root); - S2NearParams params; - params.collection = collection; - params.indexKeyPattern = node->indexKeyPattern; + + GeoNearParams params; params.nearQuery = node->nq; params.baseBounds = node->baseBounds; params.filter = node->filter.get(); params.addPointMeta = node->addPointMeta; params.addDistMeta = node->addDistMeta; - return new S2NearStage(txn, params, ws); + + IndexDescriptor* s2Index = collection->getIndexCatalog()->findIndexByKeyPattern(node + ->indexKeyPattern); + + if (s2Index == NULL) { + warning() << "Can't find 2DSphere index " << node->indexKeyPattern.toString() + << "in namespace " << collection->ns() << endl; + return NULL; + } + + return new GeoNear2DSphereStage(params, txn, ws, collection, s2Index); } else if (STAGE_TEXT == root->getType()) { const TextNode* node = static_cast<const TextNode*>(root); diff --git a/src/mongo/dbtests/query_stage_near.cpp b/src/mongo/dbtests/query_stage_near.cpp new file mode 100644 index 00000000000..c4939d16e75 --- /dev/null +++ b/src/mongo/dbtests/query_stage_near.cpp @@ -0,0 +1,269 @@ +/** + * Copyright (C) 2014 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +/** + * This file tests near search functionality. + */ + +#include <boost/shared_ptr.hpp> + +#include "mongo/base/owned_pointer_vector.h" +#include "mongo/db/exec/near.h" +#include "mongo/db/exec/working_set_common.h" +#include "mongo/unittest/unittest.h" + +namespace { + + using namespace mongo; + using std::vector; + + /** + * Stage which takes in an array of BSONObjs and returns them. + * If the BSONObj is in the form of a Status, returns the Status as a FAILURE. + */ + class MockStage : public PlanStage { + public: + + MockStage(const vector<BSONObj>& data, WorkingSet* workingSet) : + _data(data), _pos(0), _workingSet(workingSet), _stats("MOCK_STAGE") { + } + + virtual ~MockStage() { + } + + virtual StageState work(WorkingSetID* out) { + ++_stats.works; + + if (isEOF()) + return PlanStage::IS_EOF; + + BSONObj next = _data[_pos++]; + + if (WorkingSetCommon::isValidStatusMemberObject(next)) { + Status status = WorkingSetCommon::getMemberObjectStatus(next); + *out = WorkingSetCommon::allocateStatusMember(_workingSet, status); + return PlanStage::FAILURE; + } + + *out = _workingSet->allocate(); + WorkingSetMember* member = _workingSet->get(*out); + member->state = WorkingSetMember::OWNED_OBJ; + member->obj = next; + + return PlanStage::ADVANCED; + } + + virtual bool isEOF() { + return _pos == static_cast<int>(_data.size()); + } + + virtual void prepareToYield() { + } + + virtual void recoverFromYield() { + } + + virtual void invalidate(const DiskLoc& dl, InvalidationType type) { + } + virtual vector<PlanStage*> getChildren() const { + return vector<PlanStage*>(); + } + + virtual StageType stageType() const { + return STAGE_UNKNOWN; + } + + virtual PlanStageStats* getStats() { + return new PlanStageStats(_stats, STAGE_UNKNOWN); + } + + virtual const CommonStats* getCommonStats() { + return &_stats; + } + + virtual const SpecificStats* getSpecificStats() { + return NULL; + } + + private: + + vector<BSONObj> _data; + int _pos; + + // Not owned here + WorkingSet* const _workingSet; + + CommonStats _stats; + }; + + /** + * Stage which implements a basic distance search, and interprets the "distance" field of + * fetched documents as the distance. + */ + class MockNearStage : public NearStage { + public: + + struct MockInterval { + + MockInterval(const vector<BSONObj>& data, double min, double max) : + data(data), min(min), max(max) { + } + + vector<BSONObj> data; + double min; + double max; + }; + + MockNearStage(WorkingSet* workingSet) : + NearStage(NULL, workingSet, NULL, + new PlanStageStats(CommonStats("MOCK_DISTANCE_SEARCH_STAGE"), STAGE_UNKNOWN)), + _pos(0) { + } + + virtual ~MockNearStage() { + } + + void addInterval(vector<BSONObj> data, double min, double max) { + _intervals.mutableVector().push_back(new MockInterval(data, min, max)); + } + + virtual StatusWith<CoveredInterval*> nextInterval(OperationContext* txn, + WorkingSet* workingSet, + Collection* collection) { + + if (_pos == static_cast<int>(_intervals.size())) + return StatusWith<CoveredInterval*>(NULL); + + const MockInterval& interval = *_intervals.vector()[_pos++]; + + bool lastInterval = _pos == static_cast<int>(_intervals.vector().size()); + return StatusWith<CoveredInterval*>(new CoveredInterval(new MockStage(interval.data, + workingSet), + true, + interval.min, + interval.max, + lastInterval)); + } + + virtual StatusWith<double> computeDistance(WorkingSetMember* member) { + ASSERT(member->hasObj()); + return StatusWith<double>(member->obj["distance"].numberDouble()); + } + + private: + + OwnedPointerVector<MockInterval> _intervals; + int _pos; + }; + + static vector<BSONObj> advanceStage(PlanStage* stage, WorkingSet* workingSet) { + + vector<BSONObj> results; + + WorkingSetID nextMemberID; + PlanStage::StageState state = PlanStage::NEED_TIME; + + while (PlanStage::NEED_TIME == state) { + while (PlanStage::ADVANCED == (state = stage->work(&nextMemberID))) { + results.push_back(workingSet->get(nextMemberID)->obj); + } + } + + return results; + } + + static void assertAscendingAndValid(const vector<BSONObj>& results) { + double lastDistance = -1.0; + for (vector<BSONObj>::const_iterator it = results.begin(); it != results.end(); ++it) { + double distance = (*it)["distance"].numberDouble(); + bool shouldInclude = (*it)["$included"].eoo() || (*it)["$included"].trueValue(); + ASSERT(shouldInclude); + ASSERT_GREATER_THAN_OR_EQUALS(distance, lastDistance); + lastDistance = distance; + } + } + + TEST(query_stage_near, Basic) { + + vector<BSONObj> mockData; + WorkingSet workingSet; + + MockNearStage nearStage(&workingSet); + + // First set of results + mockData.clear(); + mockData.push_back(BSON("distance" << 0.5)); + mockData.push_back(BSON("distance" << 2.0 << "$included" << false)); // Not included + mockData.push_back(BSON("distance" << 0.0)); + nearStage.addInterval(mockData, 0.0, 1.0); + + // Second set of results + mockData.clear(); + mockData.push_back(BSON("distance" << 1.5)); + mockData.push_back(BSON("distance" << 2.0 << "$included" << false)); // Not included + mockData.push_back(BSON("distance" << 1.0)); + nearStage.addInterval(mockData, 1.0, 2.0); + + // Last set of results + mockData.clear(); + mockData.push_back(BSON("distance" << 2.5)); + mockData.push_back(BSON("distance" << 3.0)); // Included + mockData.push_back(BSON("distance" << 2.0)); + nearStage.addInterval(mockData, 2.0, 3.0); + + vector<BSONObj> results = advanceStage(&nearStage, &workingSet); + ASSERT_EQUALS(results.size(), 7u); + assertAscendingAndValid(results); + } + + TEST(query_stage_near, EmptyResults) { + + vector<BSONObj> mockData; + WorkingSet workingSet; + + MockNearStage nearStage(&workingSet); + + // Empty set of results + mockData.clear(); + nearStage.addInterval(mockData, 0.0, 1.0); + + // Non-empty sest of results + mockData.clear(); + mockData.push_back(BSON("distance" << 1.5)); + mockData.push_back(BSON("distance" << 2.0)); + mockData.push_back(BSON("distance" << 1.0)); + nearStage.addInterval(mockData, 1.0, 2.0); + + vector<BSONObj> results = advanceStage(&nearStage, &workingSet); + ASSERT_EQUALS(results.size(), 3u); + assertAscendingAndValid(results); + } + + + +} |