/** * Copyright (C) 2013 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #include "mongo/db/index/haystack_access_method.h" #include "mongo/base/status.h" #include "mongo/db/btreecursor.h" #include "mongo/db/geo/hash.h" #include "mongo/db/index/haystack_access_method_internal.h" #include "mongo/db/jsobj.h" #include "mongo/db/pdfile.h" namespace mongo { static const string GEOSEARCHNAME = "geoHaystack"; HaystackAccessMethod::HaystackAccessMethod(IndexDescriptor* descriptor) : BtreeBasedAccessMethod(descriptor) { BSONElement e = descriptor->getInfoElement("bucketSize"); uassert(16777, "need bucketSize", e.isNumber()); _bucketSize = e.numberDouble(); uassert(16769, "bucketSize cannot be zero", _bucketSize != 0.0); // Example: // db.foo.ensureIndex({ pos : "geoHaystack", type : 1 }, { bucketSize : 1 }) BSONObjIterator i(descriptor->keyPattern()); while (i.more()) { BSONElement e = i.next(); if (e.type() == String && GEOSEARCHNAME == e.valuestr()) { uassert(16770, "can't have more than one geo field", _geoField.size() == 0); uassert(16771, "the geo field has to be first in index", _otherFields.size() == 0); _geoField = e.fieldName(); } else { uassert(16772, "geoSearch can only have 1 non-geo field for now", _otherFields.size() == 0); _otherFields.push_back(e.fieldName()); } } uassert(16773, "no geo field specified", _geoField.size()); uassert(16774, "no non-geo fields specified", _otherFields.size()); } void HaystackAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { BSONElement loc = obj.getFieldDotted(_geoField); if (loc.eoo()) { return; } uassert(16775, "latlng not an array", loc.isABSONObj()); string root; { BSONObjIterator i(loc.Obj()); BSONElement x = i.next(); BSONElement y = i.next(); root = makeString(hash(x), hash(y)); } verify(_otherFields.size() == 1); BSONElementSet all; // This is getFieldsDotted (plural not singular) since the object we're indexing // may be an array. obj.getFieldsDotted(_otherFields[0], all); if (all.size() == 0) { // We're indexing a document that doesn't have the secondary non-geo field present. // XXX: do we want to add this even if all.size() > 0? result:empty search terms // match everything instead of only things w/empty search terms) addKey(root, BSONElement(), keys); } else { // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"}, // all.size()==1. We can query on the complete field. // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values // "A" and "B". The query looks for any of the fields in the array. for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) { addKey(root, *i, keys); } } } int HaystackAccessMethod::hash(const BSONElement& e) const { uassert(16776, "geo field is not a number", e.isNumber()); double d = e.numberDouble(); d += 180; d /= _bucketSize; return static_cast(d); } string HaystackAccessMethod::makeString(int hashedX, int hashedY) const { stringstream ss; ss << hashedX << "_" << hashedY; return ss.str(); } // Build a new BSONObj with root in it. If e is non-empty, append that to the key. Insert // the BSONObj into keys. void HaystackAccessMethod::addKey(const string& root, const BSONElement& e, BSONObjSet* keys) const { BSONObjBuilder buf; buf.append("", root); if (e.eoo()) buf.appendNull(""); else buf.appendAs(e, ""); keys->insert(buf.obj()); } Status HaystackAccessMethod::newCursor(IndexCursor** out) { return Status(ErrorCodes::IllegalOperation, "Unimplemented seek called on Haystack"); } void HaystackAccessMethod::searchCommand(const BSONObj& nearObj, double maxDistance, const BSONObj& search, BSONObjBuilder* result, unsigned limit) { Timer t; LOG(1) << "SEARCH near:" << nearObj << " maxDistance:" << maxDistance << " search: " << search << endl; int x, y; { BSONObjIterator i(nearObj); x = hash(i.next()); y = hash(i.next()); } int scale = static_cast(ceil(maxDistance / _bucketSize)); GeoHaystackSearchHopper hopper(nearObj, maxDistance, limit, _geoField); long long btreeMatches = 0; for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { BSONObjBuilder bb; bb.append("", makeString(x + a, y + b)); for (unsigned i = 0; i < _otherFields.size(); i++) { // See if the non-geo field we're indexing on is in the provided search term. BSONElement e = search.getFieldDotted(_otherFields[i]); if (e.eoo()) bb.appendNull(""); else bb.appendAs(e, ""); } BSONObj key = bb.obj(); GEOQUADDEBUG("KEY: " << key); // TODO(hk): this keeps a set of all DiskLoc seen in this pass so that we don't // consider the element twice. Do we want to instead store a hash of the set? // Is this often big? unordered_set thisPass; scoped_ptr cursor(BtreeCursor::make(nsdetails(_descriptor->parentNS()), _descriptor->getOnDisk(), key, key, true, 1)); while (cursor->ok() && !hopper.limitReached()) { pair::iterator, bool> p = thisPass.insert(cursor->currLoc()); // If a new element was inserted (haven't seen the DiskLoc before), p.second // is true. if (p.second) { hopper.consider(cursor->currLoc()); GEOQUADDEBUG("\t" << cursor->current()); btreeMatches++; } cursor->advance(); } } } BSONArrayBuilder arr(result->subarrayStart("results")); int num = hopper.appendResultsTo(&arr); arr.done(); { BSONObjBuilder b(result->subobjStart("stats")); b.append("time", t.millis()); b.appendNumber("btreeMatches", btreeMatches); b.append("n", num); b.done(); } } } // namespace mongo