diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2012-11-02 10:31:42 -0400 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2012-11-05 15:31:27 -0500 |
commit | b500d17c347044b6d38c135c7edc142e3ac68658 (patch) | |
tree | 87f1d36019c5399e5c77e9085c67074c610f8253 /src/mongo/db | |
parent | dcc1f2404856344885a3f5b1ef25c40ea3a105fb (diff) | |
download | mongo-b500d17c347044b6d38c135c7edc142e3ac68658.tar.gz |
SERVER-2874 add s2 indexing and cursor
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/geo/geojsonparser.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/geo/geojsonparser.h | 2 | ||||
-rw-r--r-- | src/mongo/db/geo/s2common.h | 84 | ||||
-rw-r--r-- | src/mongo/db/geo/s2cursor.cpp | 288 | ||||
-rw-r--r-- | src/mongo/db/geo/s2cursor.h | 84 | ||||
-rw-r--r-- | src/mongo/db/geo/s2index.cpp | 311 | ||||
-rw-r--r-- | src/mongo/db/jsobj.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/matcher.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/queryutil.cpp | 3 |
9 files changed, 788 insertions, 3 deletions
diff --git a/src/mongo/db/geo/geojsonparser.cpp b/src/mongo/db/geo/geojsonparser.cpp index 138583fec9a..c5ceae69b88 100644 --- a/src/mongo/db/geo/geojsonparser.cpp +++ b/src/mongo/db/geo/geojsonparser.cpp @@ -19,6 +19,7 @@ #include "mongo/db/jsobj.h" #include "mongo/db/geo/geojsonparser.h" #include "third_party/s2/s2.h" +#include "third_party/s2/s2cell.h" #include "third_party/s2/s2latlng.h" #include "third_party/s2/s2loop.h" #include "third_party/s2/s2polygon.h" @@ -91,6 +92,13 @@ namespace mongo { return coordinates[0].isNumber() && coordinates[1].isNumber(); } + void GeoJSONParser::parsePoint(const BSONObj& obj, S2Cell* out) { + const vector<BSONElement>& coords = obj.getFieldDotted(GEOJSON_COORDINATES).Array(); + S2LatLng ll = S2LatLng::FromDegrees(coords[1].Number(), + coords[0].Number()).Normalized(); + *out = S2Cell(ll); + } + void GeoJSONParser::parsePoint(const BSONObj& obj, S2Point* out) { const vector<BSONElement>& coords = obj.getFieldDotted(GEOJSON_COORDINATES).Array(); *out = latLngToPoint(coords); diff --git a/src/mongo/db/geo/geojsonparser.h b/src/mongo/db/geo/geojsonparser.h index f7b81986d4b..429640962c4 100644 --- a/src/mongo/db/geo/geojsonparser.h +++ b/src/mongo/db/geo/geojsonparser.h @@ -20,6 +20,7 @@ #include <vector> #include "third_party/s2/s2.h" +class S2Cell; class S2Polyline; class S2Polygon; @@ -34,6 +35,7 @@ namespace mongo { public: static bool isPoint(const BSONObj &obj); static void parsePoint(const BSONObj &obj, S2Point *out); + static void parsePoint(const BSONObj &obj, S2Cell *out); static bool isLineString(const BSONObj &obj); static void parseLineString(const BSONObj &obj, S2Polyline *out); diff --git a/src/mongo/db/geo/s2common.h b/src/mongo/db/geo/s2common.h new file mode 100644 index 00000000000..43d19afdfdd --- /dev/null +++ b/src/mongo/db/geo/s2common.h @@ -0,0 +1,84 @@ +/** +* Copyright (C) 2012 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/db/geo/geojsonparser.h" +#include "third_party/s2/s2.h" +#include "third_party/s2/s2regioncoverer.h" +#include "third_party/s2/s2cell.h" +#include "third_party/s2/s2polyline.h" +#include "third_party/s2/s2polygon.h" +#include "third_party/s2/s2regioncoverer.h" + +#pragma once + +namespace mongo { + // Used for passing geo data from the newCursor entry point to the S2Cursor class. + struct GeoQueryField { + GeoQueryField(const string& f) : field(f), cell(NULL), line(NULL), polygon(NULL) { } + + // Name of the field in the query. + string field; + // Only one of these should be non-NULL. S2Region is a superclass but it only supports + // testing against S2Cells. We need the most specific class we can get. + // Owned by S2Cursor. + S2Cell *cell; + S2Polyline *line; + S2Polygon *polygon; + + // The functions below are defined in s2cursor.cpp. + + // Does this GeoQueryField intersect the provided data? Sadly there is no common good way + // to check this, so we do different things for all query/data pairs. + bool intersectsPoint(const S2Cell &otherPoint); + bool intersectsLine(const S2Polyline& otherLine); + bool intersectsPolygon(const S2Polygon& otherPolygon); + // One region is not NULL and this returns it. + const S2Region& getRegion() const; + // Delete the not NULL region. + void free(); + }; + + struct S2IndexingParams { + // Since we take the cartesian product when we generate keys for an insert, + // we need a cap. + size_t maxKeysPerInsert; + // This is really an advisory parameter that we pass to the cover generator. The + // finest/coarsest index level determine the required # of cells. + int maxCellsInCovering; + // What's the finest grained level that we'll index? When we query for a point + // we start at that -- we index nothing finer than this. + int finestIndexedLevel; + // And, what's the coarsest? When we search in larger coverings we know we + // can stop here -- we index nothing coarser than this. + int coarsestIndexedLevel; + + string toString() const { + stringstream ss; + ss << "maxKeysPerInsert: " << maxKeysPerInsert << endl; + ss << "maxCellsInCovering: " << maxCellsInCovering << endl; + ss << "finestIndexedLevel: " << finestIndexedLevel << endl; + ss << "coarsestIndexedLevel: " << coarsestIndexedLevel << endl; + return ss.str(); + } + + void configureCoverer(S2RegionCoverer *coverer) const { + coverer->set_min_level(coarsestIndexedLevel); + coverer->set_max_level(finestIndexedLevel); + // This is advisory; the two above are strict. + coverer->set_max_cells(maxCellsInCovering); + } + }; +} // namespace mongo diff --git a/src/mongo/db/geo/s2cursor.cpp b/src/mongo/db/geo/s2cursor.cpp new file mode 100644 index 00000000000..e5160c61cd9 --- /dev/null +++ b/src/mongo/db/geo/s2cursor.cpp @@ -0,0 +1,288 @@ +/** +* Copyright (C) 2012 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/db/btree.h" +#include "mongo/db/index.h" +#include "mongo/db/matcher.h" +#include "mongo/db/geo/s2common.h" +#include "mongo/db/geo/s2cursor.h" + +namespace mongo { + // Does this GeoQueryField intersect the provided data? + bool GeoQueryField::intersectsPoint(const S2Cell &otherPoint) { + if (NULL != cell) { + return cell->MayIntersect(otherPoint); + } else if (NULL != line) { + return line->MayIntersect(otherPoint); + } else { + return polygon->MayIntersect(otherPoint); + } + } + + bool GeoQueryField::intersectsLine(const S2Polyline& otherLine) { + if (NULL != cell) { + return otherLine.MayIntersect(*cell); + } else if (NULL != line) { + return otherLine.Intersects(line); + } else { + // TODO(hk): modify s2 library to just let us know if it intersected + // rather than returning all this. + vector<S2Polyline*> clipped; + polygon->IntersectWithPolyline(&otherLine, &clipped); + bool ret = clipped.size() > 0; + for (size_t i = 0; i < clipped.size(); ++i) delete clipped[i]; + return ret; + } + return false; + } + + bool GeoQueryField::intersectsPolygon(const S2Polygon& otherPolygon) { + if (NULL != cell) { + return otherPolygon.MayIntersect(*cell); + } else if (NULL != line) { + // TODO(hk): modify s2 library to just let us know if it intersected + // rather than returning all this. + vector<S2Polyline*> clipped; + otherPolygon.IntersectWithPolyline(line, &clipped); + bool ret = clipped.size() > 0; + for (size_t i = 0; i < clipped.size(); ++i) delete clipped[i]; + return ret; + } else { + return otherPolygon.Intersects(polygon); + } + } + + const S2Region& GeoQueryField::getRegion() const { + if (NULL != cell) { + return *cell; + } else if (NULL != line) { + return *line; + } else if (NULL != polygon) { + return *polygon; + } else { + // TODO: freak out here. + return S2Cell(); + } + } + + void GeoQueryField::free() { + if (NULL != cell) { + delete cell; + } else if (NULL != line) { + delete line; + } else if (NULL != polygon) { + delete polygon; + } + } + + S2Cursor::S2Cursor(const BSONObj &keyPattern, const IndexDetails *details, + const BSONObj &query, const vector<GeoQueryField> &fields, + const S2IndexingParams ¶ms, int numWanted) + : _details(details), _fields(fields), _params(params), + _keyPattern(keyPattern), _numToReturn(numWanted) { + BSONObjBuilder geoFieldsToNuke; + for (size_t i = 0; i < _fields.size(); ++i) { + geoFieldsToNuke.append(_fields[i].field, ""); + } + // false means we want to filter OUT geoFieldsToNuke, not filter to include only that. + _filteredQuery = query.filterFieldsUndotted(geoFieldsToNuke.obj(), false); + _matcher.reset(new CoveredIndexMatcher(_filteredQuery, keyPattern)); + } + + S2Cursor::~S2Cursor() { + // We own these pointers. + for (size_t i = 0; i < _fields.size(); ++i) { + _fields[i].free(); + } + } + + CoveredIndexMatcher* S2Cursor::matcher() const { return _matcher.get(); } + + bool S2Cursor::ok() { + if (NULL == _btreeCursor.get()) { + // FieldRangeVector needs an IndexSpec so we make it one. + BSONObjBuilder specBuilder; + BSONObjIterator i(_keyPattern); + while (i.more()) { + BSONElement e = i.next(); + specBuilder.append(e.fieldName(), 1); + } + BSONObj spec = specBuilder.obj(); + IndexSpec specForFRV(spec); + // All the magic is in makeUnifiedFRS. See below. + // A lot of these arguments are opaque. + FieldRangeSet frs(_details->parentNS().c_str(), makeUnifiedFRS(), false, false); + shared_ptr<FieldRangeVector> frv(new FieldRangeVector(frs, specForFRV, 1)); + _btreeCursor.reset(BtreeCursor::make(nsdetails(_details->parentNS().c_str()), + *_details, frv, 1)); + return advance(); + } + return _btreeCursor->ok(); + } + + // Make the FieldRangeSet of keys we look for. Here is an example: + // regularfield1: regularvalue1, $or [ { geo1 : {$in [parentcover1, ... ]}}, + // { geo1 : {regex: ^cover1 } }, + // { geo1 : {regex: ^cover2 } }, + // As for what we put into the geo field, see lengthy comments below. + BSONObj S2Cursor::makeUnifiedFRS() { + BSONObjBuilder frsObjBuilder; + frsObjBuilder.appendElements(_filteredQuery); + + S2RegionCoverer coverer; + _params.configureCoverer(&coverer); + + for (size_t i = 0; i < _fields.size(); ++i) { + // Get a set of coverings. We look for keys that have this covering as a prefix + // (meaning the key is contained within the covering). + vector<S2CellId> cover; + coverer.GetCovering(_fields[i].getRegion(), &cover); + + BSONArrayBuilder orBuilder; + + // Look at the cells we cover (and any cells they cover via prefix). Examine + // everything which has our cover as a strict prefix of its key. Anything with our + // cover as a strict prefix is contained within the cover and should be intersection + // tested. + for (size_t j = 0; j < cover.size(); ++j) { + string regex = "^" + cover[j].toString(); + orBuilder.append(BSON(_fields[i].field << BSON("$regex" << regex))); + } + + // Look at the cells that cover us. We want to look at every cell that contains the + // covering we would index on. We generate the would-index-with-this-covering and + // find all the cells strictly containing the cells in that set, until we hit the + // coarsest indexed cell. We use $in, not a prefix match. Why not prefix? Because + // we've already looked at everything finer or as fine as our initial covering. + // + // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't + // want to look at cells 21212[not-1] because we know they're not going to intersect + // with 212121, but entries inserted with cell value 21212 (no trailing digits) may. + // And we've already looked at points with the cell id 211111 from the regex search + // created above, so we only want things where the value of the last digit is not + // stored (and therefore could be 1). + set<S2CellId> parentCells; + for (size_t j = 0; j < cover.size(); ++j) { + for (S2CellId id = cover[j].parent(); + id.level() >= _params.coarsestIndexedLevel; id = id.parent()) { + parentCells.insert(id); + } + } + + // Create the actual $in statement. + BSONArrayBuilder inBuilder; + for (set<S2CellId>::const_iterator it = parentCells.begin(); + it != parentCells.end(); ++it) { + inBuilder.append(it->toString()); + } + orBuilder.append(BSON(_fields[i].field << BSON("$in" << inBuilder.arr()))); + + // Join the regexes with the in statement via an or. + // TODO(hk): see if this actually works with two geo fields or if they have + // to be joined with an and or what. + frsObjBuilder.append("$or", orBuilder.arr()); + } + return frsObjBuilder.obj(); + } + + Record* S2Cursor::_current() { return _btreeCursor->currLoc().rec(); } + BSONObj S2Cursor::current() { return _btreeCursor->currLoc().obj(); } + DiskLoc S2Cursor::currLoc() { return _btreeCursor->currLoc(); } + BSONObj S2Cursor::currKey() const { return _btreeCursor->currKey(); } + DiskLoc S2Cursor::refLoc() { return DiskLoc(); } + long long S2Cursor::nscanned() { return _nscanned; } + + // This is the actual search. + bool S2Cursor::advance() { + if (_numToReturn <= 0) { return false; } + for (; _btreeCursor->ok(); _btreeCursor->advance()) { + if (_seen.end() != _seen.find(_btreeCursor->currLoc())) { continue; } + _seen.insert(_btreeCursor->currLoc()); + ++_nscanned; + + MatchDetails details; + bool matched = _matcher->matchesCurrent(_btreeCursor.get(), &details); + if (!matched) { continue; } + + const BSONObj &indexedObj = _btreeCursor->currLoc().obj(); + + size_t geoFieldsMatched = 0; + // OK, cool, non-geo match satisfied. See if the object actually overlaps w/the geo + // query fields. + for (size_t i = 0; i < _fields.size(); ++i) { + BSONElementSet geoFieldElements; + indexedObj.getFieldsDotted(_fields[i].field, geoFieldElements); + if (geoFieldElements.empty()) { continue; } + + bool match = false; + + for (BSONElementSet::iterator oi = geoFieldElements.begin(); + oi != geoFieldElements.end(); ++oi) { + const BSONObj &geoObj = oi->Obj(); + if (GeoJSONParser::isPolygon(geoObj)) { + S2Polygon shape; + GeoJSONParser::parsePolygon(geoObj, &shape); + match = _fields[i].intersectsPolygon(shape); + } else if (GeoJSONParser::isLineString(geoObj)) { + S2Polyline shape; + GeoJSONParser::parseLineString(geoObj, &shape); + match = _fields[i].intersectsLine(shape); + } else if (GeoJSONParser::isPoint(geoObj)) { + S2Cell point; + GeoJSONParser::parsePoint(geoObj, &point); + match = _fields[i].intersectsPoint(point); + } + if (match) break; + } + + if (match) { ++geoFieldsMatched; } + } + + if (geoFieldsMatched == _fields.size()) { + // We have a winner! And we point at it. + --_numToReturn; + return true; + } + } + return false; + } + + // TODO: yielding is very un-tested. + // This is called when we're supposed to yield. + void S2Cursor::noteLocation() { + _btreeCursor->noteLocation(); + _seen.clear(); + } + + // Called when we're un-yielding. + void S2Cursor::checkLocation() { + _btreeCursor->checkLocation(); + // We are pointing at a valid btree location now, but it may not be a valid result. + // This ensures that we're pointing at a valid result that satisfies the query. + + // There is something subtle here: Say we point at something valid, and note the location + // (yield), then checkLocation (unyield), when we call advance, we don't go past the object + // that we were/are pointing at since we only do that if we've seen it before (that is, it's + // in _seen, which we clear when we yield). + + advance(); + } + + void S2Cursor::explainDetails(BSONObjBuilder& b) { + // TODO(hk): Dump more meaningful stats. + b << "nscanned " << _nscanned; + } +} // namespace mongo diff --git a/src/mongo/db/geo/s2cursor.h b/src/mongo/db/geo/s2cursor.h new file mode 100644 index 00000000000..e220f407062 --- /dev/null +++ b/src/mongo/db/geo/s2cursor.h @@ -0,0 +1,84 @@ +/** +* Copyright (C) 2012 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <vector> +#include "mongo/db/jsobj.h" +#include "mongo/db/commands.h" +#include "mongo/db/btree.h" +#include "mongo/db/cursor.h" +#include "mongo/db/diskloc.h" +#include "mongo/db/matcher.h" +#include "mongo/db/queryutil.h" +#include "mongo/db/geo/s2common.h" + +namespace mongo { + class S2Cursor : public Cursor { + public: + S2Cursor(const BSONObj &keyPattern, const IndexDetails* details, const BSONObj &query, + const vector<GeoQueryField> ®ions, const S2IndexingParams ¶ms, + int numWanted); + virtual ~S2Cursor(); + virtual CoveredIndexMatcher *matcher() const; + + virtual bool supportYields() { return true; } + virtual bool supportGetMore() { return true; } + virtual bool isMultiKey() const { return true; } + virtual bool autoDedup() const { return false; } + virtual bool modifiedKeys() const { return true; } + virtual bool getsetdup(DiskLoc loc) { return false; } + virtual string toString() { return "S2Cursor"; } + BSONObj indexKeyPattern() { return _keyPattern; } + virtual bool ok(); + virtual Record* _current(); + virtual BSONObj current(); + virtual DiskLoc currLoc(); + virtual bool advance(); + virtual BSONObj currKey() const; + virtual DiskLoc refLoc(); + virtual void noteLocation(); + virtual void checkLocation(); + virtual long long nscanned(); + virtual void explainDetails(BSONObjBuilder& b); + private: + // Make an object that describes the restrictions on all possible valid keys. + // It's kind of a monstrous object. Thanks, FieldRangeSet, for doing all the work + // for us. + BSONObj makeUnifiedFRS(); + + // Need this to make a FieldRangeSet. + const IndexDetails *_details; + // The query with the geo stuff taken out. We use this with a matcher. + BSONObj _filteredQuery; + // What geo regions are we looking for? + vector<GeoQueryField> _fields; + // We use this for matching non-GEO stuff. + shared_ptr<CoveredIndexMatcher> _matcher; + // How were the keys created? We need this to search for the right stuff. + S2IndexingParams _params; + // How many things did we scan/look at? Not sure exactly how this is defined. + long long _nscanned; + // We have to pass this to the FieldRangeVector ctor (in modified form). + BSONObj _keyPattern; + // How many docs do we want to return? Starts with the # the user requests + // and goes down. + int _numToReturn; + + // What have we checked so we don't repeat it and waste time? + set<DiskLoc> _seen; + // This really does all the work/points into the btree. + scoped_ptr<BtreeCursor> _btreeCursor; + }; +} // namespace mongo diff --git a/src/mongo/db/geo/s2index.cpp b/src/mongo/db/geo/s2index.cpp new file mode 100644 index 00000000000..c03d6dae906 --- /dev/null +++ b/src/mongo/db/geo/s2index.cpp @@ -0,0 +1,311 @@ +/** +* Copyright (C) 2012 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/db/namespace-inl.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/index.h" +#include "mongo/db/queryutil.h" +#include "mongo/db/geo/geojsonparser.h" +#include "mongo/db/geo/s2common.h" +#include "mongo/db/geo/s2cursor.h" +#include "third_party/s2/s2.h" +#include "third_party/s2/s2cell.h" +#include "third_party/s2/s2polygon.h" +#include "third_party/s2/s2polyline.h" +#include "third_party/s2/s2regioncoverer.h" + +namespace { + // Used in a handful of places in GeoSphere2DType below. + static void keysFromRegion(S2RegionCoverer *coverer, const S2Region ®ion, + vector<string> *out) { + vector<S2CellId> covering; + coverer->GetCovering(region, &covering); + for (size_t i = 0; i < covering.size(); ++i) { + out->push_back(covering[i].toString()); + } + } +} // namespace + +namespace mongo { + + static const string SPHERE_2D_NAME = "s2d"; + + class GeoSphere2DType : public IndexType { + public: + // We keep track of what fields we've indexed and if they're geo or not. + struct IndexedField { + enum Type { + GEO, + LITERAL + }; + + Type type; + string name; + IndexedField(Type t, const string& n) : type(t), name(n) { } + }; + + GeoSphere2DType(const IndexPlugin *plugin, const IndexSpec *spec, + const S2IndexingParams ¶ms) + : IndexType(plugin, spec), _params(params) { + int geoFields = 0; + // Categorize the fields we're indexing and make sure we have a geo field. + BSONObjIterator i(spec->keyPattern); + while (i.more()) { + BSONElement e = i.next(); + if (e.type() == String && SPHERE_2D_NAME == e.valuestr()) { + _fields.push_back(IndexedField(IndexedField::GEO, e.fieldName())); + ++geoFields; + } else { + _fields.push_back(IndexedField(IndexedField::LITERAL, e.fieldName())); + } + } + uassert(16450, "Expect at least one geo field, spec=" + spec->keyPattern.toString(), + geoFields >= 1); + } + + virtual ~GeoSphere2DType() { } + + void getKeys(const BSONObj& obj, BSONObjSet& keys) const { + verify(_fields.size() >= 1); + + BSONObjSet keysToAdd; + // We output keys in the same order as the fields we index. + for (size_t i = 0; i < _fields.size(); ++i) { + const IndexedField &field = _fields[i]; + + // First, we get the keys that this field adds. Either they're added literally from + // the value of the field, or they're transformed if the field is geo. + BSONElementSet fieldElements; + obj.getFieldsDotted(field.name, fieldElements); + + BSONObj keysForThisField; + if (IndexedField::GEO == field.type) { + keysForThisField = getGeoKeys(fieldElements); + } else if (IndexedField::LITERAL == field.type) { + keysForThisField = getLiteralKeys(fieldElements); + } else { + verify(0); + } + + // We expect there to be _spec->_missingField() present in the keys if data is + // missing. So, this should be non-empty. + verify(!keysForThisField.isEmpty()); + + // We take the Cartesian product of all of the keys. This requires that we have + // some keys to take the Cartesian product with. If keysToAdd.empty(), we + // initialize it. + if (keysToAdd.empty()) { + // This should only happen w/the first field. + verify(0 == i); + BSONObjIterator newIt(keysForThisField); + while (newIt.more()) { + BSONObjBuilder b; + b.append("", newIt.next().String()); + keysToAdd.insert(b.obj()); + } + continue; + } + + BSONObjSet updatedKeysToAdd; + for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); + ++it) { + + BSONObjIterator newIt(keysForThisField); + while (newIt.more()) { + BSONObjBuilder b; + b.appendElements(*it); + b.append("", newIt.next().String()); + updatedKeysToAdd.insert(b.obj()); + } + } + keysToAdd = updatedKeysToAdd; + } + + if (keysToAdd.size() > _params.maxKeysPerInsert) { + warning() << "insert of geo object generated lots of keys.\n" + << "consider creating larger buckets. obj=" + << obj; + } + + for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) { + keys.insert(*it); + } + } + + // Entry point for a search. + virtual shared_ptr<Cursor> newCursor(const BSONObj& query, const BSONObj& order, + int numWanted) const { + // I copied this from 2d.cpp. Guard against perversion. + if (numWanted < 0) numWanted *= -1; + if (0 == numWanted) numWanted = INT_MAX; + + vector<GeoQueryField> regions; + + // Go through the fields that we index, and for each geo one, make a GeoQueryField + // object for the S2Cursor class to do intersection testing/cover generating with. + for (size_t i = 0; i < _fields.size(); ++i) { + const IndexedField &field = _fields[i]; + if (IndexedField::GEO != field.type) { continue; } + + // Example of what we're trying to parse: + // pointA = { "type" : "Point", "coordinates": [ 40, 5 ] } + // t.find({ "geo" : { "$intersect" : { "$point" : pointA} } }) + // where field.name is "geo" + BSONElement e = query.getFieldDotted(field.name); + if (e.eoo()) { continue; } + + if (!e.isABSONObj()) { continue; } + e = e.embeddedObject().firstElement(); + + if (!e.isABSONObj()) { continue; } + + BSONObj::MatchType matchType = static_cast<BSONObj::MatchType>(e.getGtLtOp()); + if (BSONObj::opINTERSECT != matchType && BSONObj::opWITHIN != matchType) { + continue; + } + + e = e.embeddedObject().firstElement(); + if (!e.isABSONObj()) { continue; } + + BSONObj shapeObj = e.embeddedObject(); + if (2 != shapeObj.nFields()) { continue; } + + GeoQueryField geoQueryField(field.name); + + if (strcmp(e.fieldName(), "$geometry")) { continue; } + if (GeoJSONParser::isPolygon(shapeObj)) { + // We can't really pass these things around willy-nilly except by ptr. + // The cursor owns them. + geoQueryField.polygon = new S2Polygon(); + GeoJSONParser::parsePolygon(shapeObj, geoQueryField.polygon); + } else if (GeoJSONParser::isPoint(shapeObj)) { + geoQueryField.cell = new S2Cell(); + GeoJSONParser::parsePoint(shapeObj, geoQueryField.cell); + } else if (GeoJSONParser::isLineString(shapeObj)) { + geoQueryField.line = new S2Polyline(); + GeoJSONParser::parseLineString(shapeObj, geoQueryField.line); + } else { + // Maybe it's unknown geometry, maybe it's garbage. + // TODO: alert the user? + continue; + } + regions.push_back(geoQueryField); + } + + S2Cursor *cursor = new S2Cursor(keyPattern(), getDetails(), query, regions, _params, + numWanted); + return shared_ptr<Cursor>(cursor); + } + + virtual IndexSuitability suitability(const BSONObj& query, const BSONObj& order) const { + for (size_t i = 0; i < _fields.size(); ++i) { + const IndexedField &field = _fields[i]; + if (IndexedField::GEO != field.type) { continue; } + + BSONElement e = query.getFieldDotted(field.name); + if (Object != e.type()) { continue; } + // we only support opWITHIN and opINTERSECT + // getGtLtOp is horribly misnamed and really means get the operation. + switch (e.embeddedObject().firstElement().getGtLtOp()) { + case BSONObj::opWITHIN: + case BSONObj::opINTERSECT: + return OPTIMAL; + default: + return HELPFUL; + } + } + return USELESS; + } + + const IndexDetails* getDetails() const { return _spec->getDetails(); } + private: + // Get the index keys for elements that are GeoJSON. + BSONObj getGeoKeys(const BSONElementSet &elements) const { + BSONArrayBuilder aBuilder; + + S2RegionCoverer coverer; + _params.configureCoverer(&coverer); + + // See here for GeoJSON format: geojson.org/geojson-spec.html + for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { + const BSONObj &obj = i->Obj(); + + vector<string> cells; + if (GeoJSONParser::isPolygon(obj)) { + S2Polygon shape; + GeoJSONParser::parsePolygon(obj, &shape); + keysFromRegion(&coverer, shape, &cells); + } else if (GeoJSONParser::isLineString(obj)) { + S2Polyline shape; + GeoJSONParser::parseLineString(obj, &shape); + keysFromRegion(&coverer, shape, &cells); + } else if (GeoJSONParser::isPoint(obj)) { + S2Cell point; + GeoJSONParser::parsePoint(obj, &point); + keysFromRegion(&coverer, point, &cells); + } else { + // TODO(hk): report an error? + } + + for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) { + aBuilder.append(*it); + } + } + + if (0 == aBuilder.arrSize()) { + // TODO(hk): We use "" for empty. I should verify this actually works. + aBuilder.append(""); + } + + return aBuilder.arr(); + } + + // elements is a non-geo field. Add the values literally, expanding arrays. + BSONObj getLiteralKeys(const BSONElementSet &elements) const { + BSONArrayBuilder builder; + if (0 == elements.size()) { + builder.append(""); + } else { + for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { + builder.append(*i); + } + } + return builder.arr(); + } + + vector<IndexedField> _fields; + S2IndexingParams _params; + }; + + class GeoSphere2DIndexPlugin : public IndexPlugin { + public: + GeoSphere2DIndexPlugin() : IndexPlugin(SPHERE_2D_NAME) { } + + virtual IndexType* generate(const IndexSpec* spec) const { + S2IndexingParams params; + // TODO: parse params optionally from spec? + params.maxKeysPerInsert = 100; + // This is advisory. + params.maxCellsInCovering = 5; + const double radiusOfEarthInMeters = 6378.1 * 1000.0; + // These are not advisory. + params.finestIndexedLevel = S2::kAvgEdge.GetClosestLevel(10.0 / radiusOfEarthInMeters); + params.coarsestIndexedLevel = + S2::kAvgEdge.GetClosestLevel(1000000.0 / radiusOfEarthInMeters); + return new GeoSphere2DType(this, spec, params); + } + } geoSphere2DIndexPlugin; +} // namespace mongo diff --git a/src/mongo/db/jsobj.cpp b/src/mongo/db/jsobj.cpp index 2278097dd8f..19741de8b7b 100644 --- a/src/mongo/db/jsobj.cpp +++ b/src/mongo/db/jsobj.cpp @@ -291,9 +291,13 @@ namespace mongo { } else if ( fn[1] == 't' && fn[2] == 'y' && fn[3] == 'p' && fn[4] == 'e' && fn[5] == 0 ) return BSONObj::opTYPE; - else if ( fn[1] == 'i' && fn[2] == 'n' && fn[3] == 0 ) - return BSONObj::opIN; - else if ( fn[1] == 'n' && fn[2] == 'i' && fn[3] == 'n' && fn[4] == 0 ) + else if ( fn[1] == 'i' && fn[2] == 'n') { + if (0 == fn[3]) { + return BSONObj::opIN; + } else if (mongoutils::str::equals(fn + 3, "tersect")) { + return BSONObj::opINTERSECT; + } + } else if ( fn[1] == 'n' && fn[2] == 'i' && fn[3] == 'n' && fn[4] == 0 ) return BSONObj::NIN; else if ( fn[1] == 'a' && fn[2] == 'l' && fn[3] == 'l' && fn[4] == 0 ) return BSONObj::opALL; diff --git a/src/mongo/db/matcher.cpp b/src/mongo/db/matcher.cpp index 1d29b255b95..719dc2dfd2c 100644 --- a/src/mongo/db/matcher.cpp +++ b/src/mongo/db/matcher.cpp @@ -371,6 +371,7 @@ namespace mongo { } case BSONObj::opNEAR: case BSONObj::opWITHIN: + case BSONObj::opINTERSECT: case BSONObj::opMAX_DISTANCE: break; default: diff --git a/src/mongo/db/queryutil.cpp b/src/mongo/db/queryutil.cpp index 63c887b672f..73bb151cdda 100644 --- a/src/mongo/db/queryutil.cpp +++ b/src/mongo/db/queryutil.cpp @@ -449,6 +449,9 @@ namespace mongo { case BSONObj::opWITHIN: _special = "2d"; break; + case BSONObj::opINTERSECT: + _special = "s2d"; + break; case BSONObj::opEXISTS: { if ( !existsSpec ) { lower = upper = staticNull.firstElement(); |