summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorHari Khalsa <hkhalsa@10gen.com>2012-11-02 10:31:42 -0400
committerHari Khalsa <hkhalsa@10gen.com>2012-11-05 15:31:27 -0500
commitb500d17c347044b6d38c135c7edc142e3ac68658 (patch)
tree87f1d36019c5399e5c77e9085c67074c610f8253 /src
parentdcc1f2404856344885a3f5b1ef25c40ea3a105fb (diff)
downloadmongo-b500d17c347044b6d38c135c7edc142e3ac68658.tar.gz
SERVER-2874 add s2 indexing and cursor
Diffstat (limited to 'src')
-rw-r--r--src/mongo/SConscript4
-rw-r--r--src/mongo/bson/bsonobj.h3
-rw-r--r--src/mongo/db/geo/geojsonparser.cpp8
-rw-r--r--src/mongo/db/geo/geojsonparser.h2
-rw-r--r--src/mongo/db/geo/s2common.h84
-rw-r--r--src/mongo/db/geo/s2cursor.cpp288
-rw-r--r--src/mongo/db/geo/s2cursor.h84
-rw-r--r--src/mongo/db/geo/s2index.cpp311
-rw-r--r--src/mongo/db/jsobj.cpp10
-rw-r--r--src/mongo/db/matcher.cpp1
-rw-r--r--src/mongo/db/queryutil.cpp3
11 files changed, 793 insertions, 5 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript
index a78fb0488fc..9634923deef 100644
--- a/src/mongo/SConscript
+++ b/src/mongo/SConscript
@@ -329,6 +329,8 @@ serverOnlyFiles = [ "db/curop.cpp",
"db/explain.cpp",
"db/geo/2d.cpp",
"db/geo/haystack.cpp",
+ "db/geo/s2cursor.cpp",
+ "db/geo/s2index.cpp",
"db/hashindex.cpp",
"db/ops/count.cpp",
"db/ops/delete.cpp",
@@ -422,7 +424,7 @@ env.StaticLibrary("serveronly", serverOnlyFiles,
LIBDEPS=["coreshard",
"dbcmdline",
"defaultversion",
- # "geojson",
+ "geojson",
"geometry",
'$BUILD_DIR/third_party/shim_snappy'])
diff --git a/src/mongo/bson/bsonobj.h b/src/mongo/bson/bsonobj.h
index fb232ebf100..0fa73be3f3f 100644
--- a/src/mongo/bson/bsonobj.h
+++ b/src/mongo/bson/bsonobj.h
@@ -420,7 +420,8 @@ namespace mongo {
opELEM_MATCH = 0x12,
opNEAR = 0x13,
opWITHIN = 0x14,
- opMAX_DISTANCE=0x15
+ opMAX_DISTANCE = 0x15,
+ opINTERSECT = 0x16
};
/** add all elements of the object to the specified vector */
diff --git a/src/mongo/db/geo/geojsonparser.cpp b/src/mongo/db/geo/geojsonparser.cpp
index 138583fec9a..c5ceae69b88 100644
--- a/src/mongo/db/geo/geojsonparser.cpp
+++ b/src/mongo/db/geo/geojsonparser.cpp
@@ -19,6 +19,7 @@
#include "mongo/db/jsobj.h"
#include "mongo/db/geo/geojsonparser.h"
#include "third_party/s2/s2.h"
+#include "third_party/s2/s2cell.h"
#include "third_party/s2/s2latlng.h"
#include "third_party/s2/s2loop.h"
#include "third_party/s2/s2polygon.h"
@@ -91,6 +92,13 @@ namespace mongo {
return coordinates[0].isNumber() && coordinates[1].isNumber();
}
+ void GeoJSONParser::parsePoint(const BSONObj& obj, S2Cell* out) {
+ const vector<BSONElement>& coords = obj.getFieldDotted(GEOJSON_COORDINATES).Array();
+ S2LatLng ll = S2LatLng::FromDegrees(coords[1].Number(),
+ coords[0].Number()).Normalized();
+ *out = S2Cell(ll);
+ }
+
void GeoJSONParser::parsePoint(const BSONObj& obj, S2Point* out) {
const vector<BSONElement>& coords = obj.getFieldDotted(GEOJSON_COORDINATES).Array();
*out = latLngToPoint(coords);
diff --git a/src/mongo/db/geo/geojsonparser.h b/src/mongo/db/geo/geojsonparser.h
index f7b81986d4b..429640962c4 100644
--- a/src/mongo/db/geo/geojsonparser.h
+++ b/src/mongo/db/geo/geojsonparser.h
@@ -20,6 +20,7 @@
#include <vector>
#include "third_party/s2/s2.h"
+class S2Cell;
class S2Polyline;
class S2Polygon;
@@ -34,6 +35,7 @@ namespace mongo {
public:
static bool isPoint(const BSONObj &obj);
static void parsePoint(const BSONObj &obj, S2Point *out);
+ static void parsePoint(const BSONObj &obj, S2Cell *out);
static bool isLineString(const BSONObj &obj);
static void parseLineString(const BSONObj &obj, S2Polyline *out);
diff --git a/src/mongo/db/geo/s2common.h b/src/mongo/db/geo/s2common.h
new file mode 100644
index 00000000000..43d19afdfdd
--- /dev/null
+++ b/src/mongo/db/geo/s2common.h
@@ -0,0 +1,84 @@
+/**
+* Copyright (C) 2012 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "mongo/db/geo/geojsonparser.h"
+#include "third_party/s2/s2.h"
+#include "third_party/s2/s2regioncoverer.h"
+#include "third_party/s2/s2cell.h"
+#include "third_party/s2/s2polyline.h"
+#include "third_party/s2/s2polygon.h"
+#include "third_party/s2/s2regioncoverer.h"
+
+#pragma once
+
+namespace mongo {
+ // Used for passing geo data from the newCursor entry point to the S2Cursor class.
+ struct GeoQueryField {
+ GeoQueryField(const string& f) : field(f), cell(NULL), line(NULL), polygon(NULL) { }
+
+ // Name of the field in the query.
+ string field;
+ // Only one of these should be non-NULL. S2Region is a superclass but it only supports
+ // testing against S2Cells. We need the most specific class we can get.
+ // Owned by S2Cursor.
+ S2Cell *cell;
+ S2Polyline *line;
+ S2Polygon *polygon;
+
+ // The functions below are defined in s2cursor.cpp.
+
+ // Does this GeoQueryField intersect the provided data? Sadly there is no common good way
+ // to check this, so we do different things for all query/data pairs.
+ bool intersectsPoint(const S2Cell &otherPoint);
+ bool intersectsLine(const S2Polyline& otherLine);
+ bool intersectsPolygon(const S2Polygon& otherPolygon);
+ // One region is not NULL and this returns it.
+ const S2Region& getRegion() const;
+ // Delete the not NULL region.
+ void free();
+ };
+
+ struct S2IndexingParams {
+ // Since we take the cartesian product when we generate keys for an insert,
+ // we need a cap.
+ size_t maxKeysPerInsert;
+ // This is really an advisory parameter that we pass to the cover generator. The
+ // finest/coarsest index level determine the required # of cells.
+ int maxCellsInCovering;
+ // What's the finest grained level that we'll index? When we query for a point
+ // we start at that -- we index nothing finer than this.
+ int finestIndexedLevel;
+ // And, what's the coarsest? When we search in larger coverings we know we
+ // can stop here -- we index nothing coarser than this.
+ int coarsestIndexedLevel;
+
+ string toString() const {
+ stringstream ss;
+ ss << "maxKeysPerInsert: " << maxKeysPerInsert << endl;
+ ss << "maxCellsInCovering: " << maxCellsInCovering << endl;
+ ss << "finestIndexedLevel: " << finestIndexedLevel << endl;
+ ss << "coarsestIndexedLevel: " << coarsestIndexedLevel << endl;
+ return ss.str();
+ }
+
+ void configureCoverer(S2RegionCoverer *coverer) const {
+ coverer->set_min_level(coarsestIndexedLevel);
+ coverer->set_max_level(finestIndexedLevel);
+ // This is advisory; the two above are strict.
+ coverer->set_max_cells(maxCellsInCovering);
+ }
+ };
+} // namespace mongo
diff --git a/src/mongo/db/geo/s2cursor.cpp b/src/mongo/db/geo/s2cursor.cpp
new file mode 100644
index 00000000000..e5160c61cd9
--- /dev/null
+++ b/src/mongo/db/geo/s2cursor.cpp
@@ -0,0 +1,288 @@
+/**
+* Copyright (C) 2012 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "mongo/db/btree.h"
+#include "mongo/db/index.h"
+#include "mongo/db/matcher.h"
+#include "mongo/db/geo/s2common.h"
+#include "mongo/db/geo/s2cursor.h"
+
+namespace mongo {
+ // Does this GeoQueryField intersect the provided data?
+ bool GeoQueryField::intersectsPoint(const S2Cell &otherPoint) {
+ if (NULL != cell) {
+ return cell->MayIntersect(otherPoint);
+ } else if (NULL != line) {
+ return line->MayIntersect(otherPoint);
+ } else {
+ return polygon->MayIntersect(otherPoint);
+ }
+ }
+
+ bool GeoQueryField::intersectsLine(const S2Polyline& otherLine) {
+ if (NULL != cell) {
+ return otherLine.MayIntersect(*cell);
+ } else if (NULL != line) {
+ return otherLine.Intersects(line);
+ } else {
+ // TODO(hk): modify s2 library to just let us know if it intersected
+ // rather than returning all this.
+ vector<S2Polyline*> clipped;
+ polygon->IntersectWithPolyline(&otherLine, &clipped);
+ bool ret = clipped.size() > 0;
+ for (size_t i = 0; i < clipped.size(); ++i) delete clipped[i];
+ return ret;
+ }
+ return false;
+ }
+
+ bool GeoQueryField::intersectsPolygon(const S2Polygon& otherPolygon) {
+ if (NULL != cell) {
+ return otherPolygon.MayIntersect(*cell);
+ } else if (NULL != line) {
+ // TODO(hk): modify s2 library to just let us know if it intersected
+ // rather than returning all this.
+ vector<S2Polyline*> clipped;
+ otherPolygon.IntersectWithPolyline(line, &clipped);
+ bool ret = clipped.size() > 0;
+ for (size_t i = 0; i < clipped.size(); ++i) delete clipped[i];
+ return ret;
+ } else {
+ return otherPolygon.Intersects(polygon);
+ }
+ }
+
+ const S2Region& GeoQueryField::getRegion() const {
+ if (NULL != cell) {
+ return *cell;
+ } else if (NULL != line) {
+ return *line;
+ } else if (NULL != polygon) {
+ return *polygon;
+ } else {
+ // TODO: freak out here.
+ return S2Cell();
+ }
+ }
+
+ void GeoQueryField::free() {
+ if (NULL != cell) {
+ delete cell;
+ } else if (NULL != line) {
+ delete line;
+ } else if (NULL != polygon) {
+ delete polygon;
+ }
+ }
+
+ S2Cursor::S2Cursor(const BSONObj &keyPattern, const IndexDetails *details,
+ const BSONObj &query, const vector<GeoQueryField> &fields,
+ const S2IndexingParams &params, int numWanted)
+ : _details(details), _fields(fields), _params(params),
+ _keyPattern(keyPattern), _numToReturn(numWanted) {
+ BSONObjBuilder geoFieldsToNuke;
+ for (size_t i = 0; i < _fields.size(); ++i) {
+ geoFieldsToNuke.append(_fields[i].field, "");
+ }
+ // false means we want to filter OUT geoFieldsToNuke, not filter to include only that.
+ _filteredQuery = query.filterFieldsUndotted(geoFieldsToNuke.obj(), false);
+ _matcher.reset(new CoveredIndexMatcher(_filteredQuery, keyPattern));
+ }
+
+ S2Cursor::~S2Cursor() {
+ // We own these pointers.
+ for (size_t i = 0; i < _fields.size(); ++i) {
+ _fields[i].free();
+ }
+ }
+
+ CoveredIndexMatcher* S2Cursor::matcher() const { return _matcher.get(); }
+
+ bool S2Cursor::ok() {
+ if (NULL == _btreeCursor.get()) {
+ // FieldRangeVector needs an IndexSpec so we make it one.
+ BSONObjBuilder specBuilder;
+ BSONObjIterator i(_keyPattern);
+ while (i.more()) {
+ BSONElement e = i.next();
+ specBuilder.append(e.fieldName(), 1);
+ }
+ BSONObj spec = specBuilder.obj();
+ IndexSpec specForFRV(spec);
+ // All the magic is in makeUnifiedFRS. See below.
+ // A lot of these arguments are opaque.
+ FieldRangeSet frs(_details->parentNS().c_str(), makeUnifiedFRS(), false, false);
+ shared_ptr<FieldRangeVector> frv(new FieldRangeVector(frs, specForFRV, 1));
+ _btreeCursor.reset(BtreeCursor::make(nsdetails(_details->parentNS().c_str()),
+ *_details, frv, 1));
+ return advance();
+ }
+ return _btreeCursor->ok();
+ }
+
+ // Make the FieldRangeSet of keys we look for. Here is an example:
+ // regularfield1: regularvalue1, $or [ { geo1 : {$in [parentcover1, ... ]}},
+ // { geo1 : {regex: ^cover1 } },
+ // { geo1 : {regex: ^cover2 } },
+ // As for what we put into the geo field, see lengthy comments below.
+ BSONObj S2Cursor::makeUnifiedFRS() {
+ BSONObjBuilder frsObjBuilder;
+ frsObjBuilder.appendElements(_filteredQuery);
+
+ S2RegionCoverer coverer;
+ _params.configureCoverer(&coverer);
+
+ for (size_t i = 0; i < _fields.size(); ++i) {
+ // Get a set of coverings. We look for keys that have this covering as a prefix
+ // (meaning the key is contained within the covering).
+ vector<S2CellId> cover;
+ coverer.GetCovering(_fields[i].getRegion(), &cover);
+
+ BSONArrayBuilder orBuilder;
+
+ // Look at the cells we cover (and any cells they cover via prefix). Examine
+ // everything which has our cover as a strict prefix of its key. Anything with our
+ // cover as a strict prefix is contained within the cover and should be intersection
+ // tested.
+ for (size_t j = 0; j < cover.size(); ++j) {
+ string regex = "^" + cover[j].toString();
+ orBuilder.append(BSON(_fields[i].field << BSON("$regex" << regex)));
+ }
+
+ // Look at the cells that cover us. We want to look at every cell that contains the
+ // covering we would index on. We generate the would-index-with-this-covering and
+ // find all the cells strictly containing the cells in that set, until we hit the
+ // coarsest indexed cell. We use $in, not a prefix match. Why not prefix? Because
+ // we've already looked at everything finer or as fine as our initial covering.
+ //
+ // Say we have a fine point with cell id 212121, we go up one, get 21212, we don't
+ // want to look at cells 21212[not-1] because we know they're not going to intersect
+ // with 212121, but entries inserted with cell value 21212 (no trailing digits) may.
+ // And we've already looked at points with the cell id 211111 from the regex search
+ // created above, so we only want things where the value of the last digit is not
+ // stored (and therefore could be 1).
+ set<S2CellId> parentCells;
+ for (size_t j = 0; j < cover.size(); ++j) {
+ for (S2CellId id = cover[j].parent();
+ id.level() >= _params.coarsestIndexedLevel; id = id.parent()) {
+ parentCells.insert(id);
+ }
+ }
+
+ // Create the actual $in statement.
+ BSONArrayBuilder inBuilder;
+ for (set<S2CellId>::const_iterator it = parentCells.begin();
+ it != parentCells.end(); ++it) {
+ inBuilder.append(it->toString());
+ }
+ orBuilder.append(BSON(_fields[i].field << BSON("$in" << inBuilder.arr())));
+
+ // Join the regexes with the in statement via an or.
+ // TODO(hk): see if this actually works with two geo fields or if they have
+ // to be joined with an and or what.
+ frsObjBuilder.append("$or", orBuilder.arr());
+ }
+ return frsObjBuilder.obj();
+ }
+
+ Record* S2Cursor::_current() { return _btreeCursor->currLoc().rec(); }
+ BSONObj S2Cursor::current() { return _btreeCursor->currLoc().obj(); }
+ DiskLoc S2Cursor::currLoc() { return _btreeCursor->currLoc(); }
+ BSONObj S2Cursor::currKey() const { return _btreeCursor->currKey(); }
+ DiskLoc S2Cursor::refLoc() { return DiskLoc(); }
+ long long S2Cursor::nscanned() { return _nscanned; }
+
+ // This is the actual search.
+ bool S2Cursor::advance() {
+ if (_numToReturn <= 0) { return false; }
+ for (; _btreeCursor->ok(); _btreeCursor->advance()) {
+ if (_seen.end() != _seen.find(_btreeCursor->currLoc())) { continue; }
+ _seen.insert(_btreeCursor->currLoc());
+ ++_nscanned;
+
+ MatchDetails details;
+ bool matched = _matcher->matchesCurrent(_btreeCursor.get(), &details);
+ if (!matched) { continue; }
+
+ const BSONObj &indexedObj = _btreeCursor->currLoc().obj();
+
+ size_t geoFieldsMatched = 0;
+ // OK, cool, non-geo match satisfied. See if the object actually overlaps w/the geo
+ // query fields.
+ for (size_t i = 0; i < _fields.size(); ++i) {
+ BSONElementSet geoFieldElements;
+ indexedObj.getFieldsDotted(_fields[i].field, geoFieldElements);
+ if (geoFieldElements.empty()) { continue; }
+
+ bool match = false;
+
+ for (BSONElementSet::iterator oi = geoFieldElements.begin();
+ oi != geoFieldElements.end(); ++oi) {
+ const BSONObj &geoObj = oi->Obj();
+ if (GeoJSONParser::isPolygon(geoObj)) {
+ S2Polygon shape;
+ GeoJSONParser::parsePolygon(geoObj, &shape);
+ match = _fields[i].intersectsPolygon(shape);
+ } else if (GeoJSONParser::isLineString(geoObj)) {
+ S2Polyline shape;
+ GeoJSONParser::parseLineString(geoObj, &shape);
+ match = _fields[i].intersectsLine(shape);
+ } else if (GeoJSONParser::isPoint(geoObj)) {
+ S2Cell point;
+ GeoJSONParser::parsePoint(geoObj, &point);
+ match = _fields[i].intersectsPoint(point);
+ }
+ if (match) break;
+ }
+
+ if (match) { ++geoFieldsMatched; }
+ }
+
+ if (geoFieldsMatched == _fields.size()) {
+ // We have a winner! And we point at it.
+ --_numToReturn;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // TODO: yielding is very un-tested.
+ // This is called when we're supposed to yield.
+ void S2Cursor::noteLocation() {
+ _btreeCursor->noteLocation();
+ _seen.clear();
+ }
+
+ // Called when we're un-yielding.
+ void S2Cursor::checkLocation() {
+ _btreeCursor->checkLocation();
+ // We are pointing at a valid btree location now, but it may not be a valid result.
+ // This ensures that we're pointing at a valid result that satisfies the query.
+
+ // There is something subtle here: Say we point at something valid, and note the location
+ // (yield), then checkLocation (unyield), when we call advance, we don't go past the object
+ // that we were/are pointing at since we only do that if we've seen it before (that is, it's
+ // in _seen, which we clear when we yield).
+
+ advance();
+ }
+
+ void S2Cursor::explainDetails(BSONObjBuilder& b) {
+ // TODO(hk): Dump more meaningful stats.
+ b << "nscanned " << _nscanned;
+ }
+} // namespace mongo
diff --git a/src/mongo/db/geo/s2cursor.h b/src/mongo/db/geo/s2cursor.h
new file mode 100644
index 00000000000..e220f407062
--- /dev/null
+++ b/src/mongo/db/geo/s2cursor.h
@@ -0,0 +1,84 @@
+/**
+* Copyright (C) 2012 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <vector>
+#include "mongo/db/jsobj.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/btree.h"
+#include "mongo/db/cursor.h"
+#include "mongo/db/diskloc.h"
+#include "mongo/db/matcher.h"
+#include "mongo/db/queryutil.h"
+#include "mongo/db/geo/s2common.h"
+
+namespace mongo {
+ class S2Cursor : public Cursor {
+ public:
+ S2Cursor(const BSONObj &keyPattern, const IndexDetails* details, const BSONObj &query,
+ const vector<GeoQueryField> &regions, const S2IndexingParams &params,
+ int numWanted);
+ virtual ~S2Cursor();
+ virtual CoveredIndexMatcher *matcher() const;
+
+ virtual bool supportYields() { return true; }
+ virtual bool supportGetMore() { return true; }
+ virtual bool isMultiKey() const { return true; }
+ virtual bool autoDedup() const { return false; }
+ virtual bool modifiedKeys() const { return true; }
+ virtual bool getsetdup(DiskLoc loc) { return false; }
+ virtual string toString() { return "S2Cursor"; }
+ BSONObj indexKeyPattern() { return _keyPattern; }
+ virtual bool ok();
+ virtual Record* _current();
+ virtual BSONObj current();
+ virtual DiskLoc currLoc();
+ virtual bool advance();
+ virtual BSONObj currKey() const;
+ virtual DiskLoc refLoc();
+ virtual void noteLocation();
+ virtual void checkLocation();
+ virtual long long nscanned();
+ virtual void explainDetails(BSONObjBuilder& b);
+ private:
+ // Make an object that describes the restrictions on all possible valid keys.
+ // It's kind of a monstrous object. Thanks, FieldRangeSet, for doing all the work
+ // for us.
+ BSONObj makeUnifiedFRS();
+
+ // Need this to make a FieldRangeSet.
+ const IndexDetails *_details;
+ // The query with the geo stuff taken out. We use this with a matcher.
+ BSONObj _filteredQuery;
+ // What geo regions are we looking for?
+ vector<GeoQueryField> _fields;
+ // We use this for matching non-GEO stuff.
+ shared_ptr<CoveredIndexMatcher> _matcher;
+ // How were the keys created? We need this to search for the right stuff.
+ S2IndexingParams _params;
+ // How many things did we scan/look at? Not sure exactly how this is defined.
+ long long _nscanned;
+ // We have to pass this to the FieldRangeVector ctor (in modified form).
+ BSONObj _keyPattern;
+ // How many docs do we want to return? Starts with the # the user requests
+ // and goes down.
+ int _numToReturn;
+
+ // What have we checked so we don't repeat it and waste time?
+ set<DiskLoc> _seen;
+ // This really does all the work/points into the btree.
+ scoped_ptr<BtreeCursor> _btreeCursor;
+ };
+} // namespace mongo
diff --git a/src/mongo/db/geo/s2index.cpp b/src/mongo/db/geo/s2index.cpp
new file mode 100644
index 00000000000..c03d6dae906
--- /dev/null
+++ b/src/mongo/db/geo/s2index.cpp
@@ -0,0 +1,311 @@
+/**
+* Copyright (C) 2012 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "mongo/db/namespace-inl.h"
+#include "mongo/db/jsobj.h"
+#include "mongo/db/index.h"
+#include "mongo/db/queryutil.h"
+#include "mongo/db/geo/geojsonparser.h"
+#include "mongo/db/geo/s2common.h"
+#include "mongo/db/geo/s2cursor.h"
+#include "third_party/s2/s2.h"
+#include "third_party/s2/s2cell.h"
+#include "third_party/s2/s2polygon.h"
+#include "third_party/s2/s2polyline.h"
+#include "third_party/s2/s2regioncoverer.h"
+
+namespace {
+ // Used in a handful of places in GeoSphere2DType below.
+ static void keysFromRegion(S2RegionCoverer *coverer, const S2Region &region,
+ vector<string> *out) {
+ vector<S2CellId> covering;
+ coverer->GetCovering(region, &covering);
+ for (size_t i = 0; i < covering.size(); ++i) {
+ out->push_back(covering[i].toString());
+ }
+ }
+} // namespace
+
+namespace mongo {
+
+ static const string SPHERE_2D_NAME = "s2d";
+
+ class GeoSphere2DType : public IndexType {
+ public:
+ // We keep track of what fields we've indexed and if they're geo or not.
+ struct IndexedField {
+ enum Type {
+ GEO,
+ LITERAL
+ };
+
+ Type type;
+ string name;
+ IndexedField(Type t, const string& n) : type(t), name(n) { }
+ };
+
+ GeoSphere2DType(const IndexPlugin *plugin, const IndexSpec *spec,
+ const S2IndexingParams &params)
+ : IndexType(plugin, spec), _params(params) {
+ int geoFields = 0;
+ // Categorize the fields we're indexing and make sure we have a geo field.
+ BSONObjIterator i(spec->keyPattern);
+ while (i.more()) {
+ BSONElement e = i.next();
+ if (e.type() == String && SPHERE_2D_NAME == e.valuestr()) {
+ _fields.push_back(IndexedField(IndexedField::GEO, e.fieldName()));
+ ++geoFields;
+ } else {
+ _fields.push_back(IndexedField(IndexedField::LITERAL, e.fieldName()));
+ }
+ }
+ uassert(16450, "Expect at least one geo field, spec=" + spec->keyPattern.toString(),
+ geoFields >= 1);
+ }
+
+ virtual ~GeoSphere2DType() { }
+
+ void getKeys(const BSONObj& obj, BSONObjSet& keys) const {
+ verify(_fields.size() >= 1);
+
+ BSONObjSet keysToAdd;
+ // We output keys in the same order as the fields we index.
+ for (size_t i = 0; i < _fields.size(); ++i) {
+ const IndexedField &field = _fields[i];
+
+ // First, we get the keys that this field adds. Either they're added literally from
+ // the value of the field, or they're transformed if the field is geo.
+ BSONElementSet fieldElements;
+ obj.getFieldsDotted(field.name, fieldElements);
+
+ BSONObj keysForThisField;
+ if (IndexedField::GEO == field.type) {
+ keysForThisField = getGeoKeys(fieldElements);
+ } else if (IndexedField::LITERAL == field.type) {
+ keysForThisField = getLiteralKeys(fieldElements);
+ } else {
+ verify(0);
+ }
+
+ // We expect there to be _spec->_missingField() present in the keys if data is
+ // missing. So, this should be non-empty.
+ verify(!keysForThisField.isEmpty());
+
+ // We take the Cartesian product of all of the keys. This requires that we have
+ // some keys to take the Cartesian product with. If keysToAdd.empty(), we
+ // initialize it.
+ if (keysToAdd.empty()) {
+ // This should only happen w/the first field.
+ verify(0 == i);
+ BSONObjIterator newIt(keysForThisField);
+ while (newIt.more()) {
+ BSONObjBuilder b;
+ b.append("", newIt.next().String());
+ keysToAdd.insert(b.obj());
+ }
+ continue;
+ }
+
+ BSONObjSet updatedKeysToAdd;
+ for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end();
+ ++it) {
+
+ BSONObjIterator newIt(keysForThisField);
+ while (newIt.more()) {
+ BSONObjBuilder b;
+ b.appendElements(*it);
+ b.append("", newIt.next().String());
+ updatedKeysToAdd.insert(b.obj());
+ }
+ }
+ keysToAdd = updatedKeysToAdd;
+ }
+
+ if (keysToAdd.size() > _params.maxKeysPerInsert) {
+ warning() << "insert of geo object generated lots of keys.\n"
+ << "consider creating larger buckets. obj="
+ << obj;
+ }
+
+ for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); ++it) {
+ keys.insert(*it);
+ }
+ }
+
+ // Entry point for a search.
+ virtual shared_ptr<Cursor> newCursor(const BSONObj& query, const BSONObj& order,
+ int numWanted) const {
+ // I copied this from 2d.cpp. Guard against perversion.
+ if (numWanted < 0) numWanted *= -1;
+ if (0 == numWanted) numWanted = INT_MAX;
+
+ vector<GeoQueryField> regions;
+
+ // Go through the fields that we index, and for each geo one, make a GeoQueryField
+ // object for the S2Cursor class to do intersection testing/cover generating with.
+ for (size_t i = 0; i < _fields.size(); ++i) {
+ const IndexedField &field = _fields[i];
+ if (IndexedField::GEO != field.type) { continue; }
+
+ // Example of what we're trying to parse:
+ // pointA = { "type" : "Point", "coordinates": [ 40, 5 ] }
+ // t.find({ "geo" : { "$intersect" : { "$point" : pointA} } })
+ // where field.name is "geo"
+ BSONElement e = query.getFieldDotted(field.name);
+ if (e.eoo()) { continue; }
+
+ if (!e.isABSONObj()) { continue; }
+ e = e.embeddedObject().firstElement();
+
+ if (!e.isABSONObj()) { continue; }
+
+ BSONObj::MatchType matchType = static_cast<BSONObj::MatchType>(e.getGtLtOp());
+ if (BSONObj::opINTERSECT != matchType && BSONObj::opWITHIN != matchType) {
+ continue;
+ }
+
+ e = e.embeddedObject().firstElement();
+ if (!e.isABSONObj()) { continue; }
+
+ BSONObj shapeObj = e.embeddedObject();
+ if (2 != shapeObj.nFields()) { continue; }
+
+ GeoQueryField geoQueryField(field.name);
+
+ if (strcmp(e.fieldName(), "$geometry")) { continue; }
+ if (GeoJSONParser::isPolygon(shapeObj)) {
+ // We can't really pass these things around willy-nilly except by ptr.
+ // The cursor owns them.
+ geoQueryField.polygon = new S2Polygon();
+ GeoJSONParser::parsePolygon(shapeObj, geoQueryField.polygon);
+ } else if (GeoJSONParser::isPoint(shapeObj)) {
+ geoQueryField.cell = new S2Cell();
+ GeoJSONParser::parsePoint(shapeObj, geoQueryField.cell);
+ } else if (GeoJSONParser::isLineString(shapeObj)) {
+ geoQueryField.line = new S2Polyline();
+ GeoJSONParser::parseLineString(shapeObj, geoQueryField.line);
+ } else {
+ // Maybe it's unknown geometry, maybe it's garbage.
+ // TODO: alert the user?
+ continue;
+ }
+ regions.push_back(geoQueryField);
+ }
+
+ S2Cursor *cursor = new S2Cursor(keyPattern(), getDetails(), query, regions, _params,
+ numWanted);
+ return shared_ptr<Cursor>(cursor);
+ }
+
+ virtual IndexSuitability suitability(const BSONObj& query, const BSONObj& order) const {
+ for (size_t i = 0; i < _fields.size(); ++i) {
+ const IndexedField &field = _fields[i];
+ if (IndexedField::GEO != field.type) { continue; }
+
+ BSONElement e = query.getFieldDotted(field.name);
+ if (Object != e.type()) { continue; }
+ // we only support opWITHIN and opINTERSECT
+ // getGtLtOp is horribly misnamed and really means get the operation.
+ switch (e.embeddedObject().firstElement().getGtLtOp()) {
+ case BSONObj::opWITHIN:
+ case BSONObj::opINTERSECT:
+ return OPTIMAL;
+ default:
+ return HELPFUL;
+ }
+ }
+ return USELESS;
+ }
+
+ const IndexDetails* getDetails() const { return _spec->getDetails(); }
+ private:
+ // Get the index keys for elements that are GeoJSON.
+ BSONObj getGeoKeys(const BSONElementSet &elements) const {
+ BSONArrayBuilder aBuilder;
+
+ S2RegionCoverer coverer;
+ _params.configureCoverer(&coverer);
+
+ // See here for GeoJSON format: geojson.org/geojson-spec.html
+ for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) {
+ const BSONObj &obj = i->Obj();
+
+ vector<string> cells;
+ if (GeoJSONParser::isPolygon(obj)) {
+ S2Polygon shape;
+ GeoJSONParser::parsePolygon(obj, &shape);
+ keysFromRegion(&coverer, shape, &cells);
+ } else if (GeoJSONParser::isLineString(obj)) {
+ S2Polyline shape;
+ GeoJSONParser::parseLineString(obj, &shape);
+ keysFromRegion(&coverer, shape, &cells);
+ } else if (GeoJSONParser::isPoint(obj)) {
+ S2Cell point;
+ GeoJSONParser::parsePoint(obj, &point);
+ keysFromRegion(&coverer, point, &cells);
+ } else {
+ // TODO(hk): report an error?
+ }
+
+ for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) {
+ aBuilder.append(*it);
+ }
+ }
+
+ if (0 == aBuilder.arrSize()) {
+ // TODO(hk): We use "" for empty. I should verify this actually works.
+ aBuilder.append("");
+ }
+
+ return aBuilder.arr();
+ }
+
+ // elements is a non-geo field. Add the values literally, expanding arrays.
+ BSONObj getLiteralKeys(const BSONElementSet &elements) const {
+ BSONArrayBuilder builder;
+ if (0 == elements.size()) {
+ builder.append("");
+ } else {
+ for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) {
+ builder.append(*i);
+ }
+ }
+ return builder.arr();
+ }
+
+ vector<IndexedField> _fields;
+ S2IndexingParams _params;
+ };
+
+ class GeoSphere2DIndexPlugin : public IndexPlugin {
+ public:
+ GeoSphere2DIndexPlugin() : IndexPlugin(SPHERE_2D_NAME) { }
+
+ virtual IndexType* generate(const IndexSpec* spec) const {
+ S2IndexingParams params;
+ // TODO: parse params optionally from spec?
+ params.maxKeysPerInsert = 100;
+ // This is advisory.
+ params.maxCellsInCovering = 5;
+ const double radiusOfEarthInMeters = 6378.1 * 1000.0;
+ // These are not advisory.
+ params.finestIndexedLevel = S2::kAvgEdge.GetClosestLevel(10.0 / radiusOfEarthInMeters);
+ params.coarsestIndexedLevel =
+ S2::kAvgEdge.GetClosestLevel(1000000.0 / radiusOfEarthInMeters);
+ return new GeoSphere2DType(this, spec, params);
+ }
+ } geoSphere2DIndexPlugin;
+} // namespace mongo
diff --git a/src/mongo/db/jsobj.cpp b/src/mongo/db/jsobj.cpp
index 2278097dd8f..19741de8b7b 100644
--- a/src/mongo/db/jsobj.cpp
+++ b/src/mongo/db/jsobj.cpp
@@ -291,9 +291,13 @@ namespace mongo {
}
else if ( fn[1] == 't' && fn[2] == 'y' && fn[3] == 'p' && fn[4] == 'e' && fn[5] == 0 )
return BSONObj::opTYPE;
- else if ( fn[1] == 'i' && fn[2] == 'n' && fn[3] == 0 )
- return BSONObj::opIN;
- else if ( fn[1] == 'n' && fn[2] == 'i' && fn[3] == 'n' && fn[4] == 0 )
+ else if ( fn[1] == 'i' && fn[2] == 'n') {
+ if (0 == fn[3]) {
+ return BSONObj::opIN;
+ } else if (mongoutils::str::equals(fn + 3, "tersect")) {
+ return BSONObj::opINTERSECT;
+ }
+ } else if ( fn[1] == 'n' && fn[2] == 'i' && fn[3] == 'n' && fn[4] == 0 )
return BSONObj::NIN;
else if ( fn[1] == 'a' && fn[2] == 'l' && fn[3] == 'l' && fn[4] == 0 )
return BSONObj::opALL;
diff --git a/src/mongo/db/matcher.cpp b/src/mongo/db/matcher.cpp
index 1d29b255b95..719dc2dfd2c 100644
--- a/src/mongo/db/matcher.cpp
+++ b/src/mongo/db/matcher.cpp
@@ -371,6 +371,7 @@ namespace mongo {
}
case BSONObj::opNEAR:
case BSONObj::opWITHIN:
+ case BSONObj::opINTERSECT:
case BSONObj::opMAX_DISTANCE:
break;
default:
diff --git a/src/mongo/db/queryutil.cpp b/src/mongo/db/queryutil.cpp
index 63c887b672f..73bb151cdda 100644
--- a/src/mongo/db/queryutil.cpp
+++ b/src/mongo/db/queryutil.cpp
@@ -449,6 +449,9 @@ namespace mongo {
case BSONObj::opWITHIN:
_special = "2d";
break;
+ case BSONObj::opINTERSECT:
+ _special = "s2d";
+ break;
case BSONObj::opEXISTS: {
if ( !existsSpec ) {
lower = upper = staticNull.firstElement();