diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2013-04-11 16:22:55 -0400 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2013-04-12 16:40:38 -0400 |
commit | ef3d06fb6c5402d56a90ad60fd61c761021b92a7 (patch) | |
tree | 4097ad3c2fa71297977faddae8bc6c8028c3997e /src/mongo/db/index/haystack_access_method.cpp | |
parent | 7797f459758a98bce90593e583f90c858a287a1c (diff) | |
download | mongo-ef3d06fb6c5402d56a90ad60fd61c761021b92a7.tar.gz |
migrate fts/haystack + add/del logic SERVER-8791 SERVER-9164 SERVER-9165
Diffstat (limited to 'src/mongo/db/index/haystack_access_method.cpp')
-rw-r--r-- | src/mongo/db/index/haystack_access_method.cpp | 207 |
1 files changed, 207 insertions, 0 deletions
diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp new file mode 100644 index 00000000000..ca44366378d --- /dev/null +++ b/src/mongo/db/index/haystack_access_method.cpp @@ -0,0 +1,207 @@ +/** + * Copyright (C) 2013 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "mongo/db/index/haystack_access_method.h" + +#include "mongo/base/status.h" +#include "mongo/db/btreecursor.h" +#include "mongo/db/geo/hash.h" +#include "mongo/db/index/haystack_access_method_internal.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/pdfile.h" + +namespace mongo { + + static const string GEOSEARCHNAME = "geoHaystack"; + + HaystackAccessMethod::HaystackAccessMethod(IndexDescriptor* descriptor) + : BtreeBasedAccessMethod(descriptor) { + + BSONElement e = descriptor->getInfoElement("bucketSize"); + uassert(16777, "need bucketSize", e.isNumber()); + _bucketSize = e.numberDouble(); + uassert(16769, "bucketSize cannot be zero", _bucketSize != 0.0); + + // Example: + // db.foo.ensureIndex({ pos : "geoHaystack", type : 1 }, { bucketSize : 1 }) + BSONObjIterator i(descriptor->keyPattern()); + while (i.more()) { + BSONElement e = i.next(); + if (e.type() == String && GEOSEARCHNAME == e.valuestr()) { + uassert(16770, "can't have more than one geo field", _geoField.size() == 0); + uassert(16771, "the geo field has to be first in index", + _otherFields.size() == 0); + _geoField = e.fieldName(); + } else { + uassert(16772, "geoSearch can only have 1 non-geo field for now", + _otherFields.size() == 0); + _otherFields.push_back(e.fieldName()); + } + } + + uassert(16773, "no geo field specified", _geoField.size()); + uassert(16774, "no non-geo fields specified", _otherFields.size()); + } + + void HaystackAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { + BSONElement loc = obj.getFieldDotted(_geoField); + + if (loc.eoo()) { return; } + + uassert(16775, "latlng not an array", loc.isABSONObj()); + string root; + { + BSONObjIterator i(loc.Obj()); + BSONElement x = i.next(); + BSONElement y = i.next(); + root = makeString(hash(x), hash(y)); + } + + verify(_otherFields.size() == 1); + + BSONElementSet all; + + // This is getFieldsDotted (plural not singular) since the object we're indexing + // may be an array. + obj.getFieldsDotted(_otherFields[0], all); + + if (all.size() == 0) { + // We're indexing a document that doesn't have the secondary non-geo field present. + // XXX: do we want to add this even if all.size() > 0? result:empty search terms + // match everything instead of only things w/empty search terms) + addKey(root, BSONElement(), keys); + } else { + // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"}, + // all.size()==1. We can query on the complete field. + // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values + // "A" and "B". The query looks for any of the fields in the array. + for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) { + addKey(root, *i, keys); + } + } + } + + int HaystackAccessMethod::hash(const BSONElement& e) const { + uassert(16776, "geo field is not a number", e.isNumber()); + double d = e.numberDouble(); + d += 180; + d /= _bucketSize; + return static_cast<int>(d); + } + + string HaystackAccessMethod::makeString(int hashedX, int hashedY) const { + stringstream ss; + ss << hashedX << "_" << hashedY; + return ss.str(); + } + + // Build a new BSONObj with root in it. If e is non-empty, append that to the key. Insert + // the BSONObj into keys. + void HaystackAccessMethod::addKey(const string& root, const BSONElement& e, + BSONObjSet* keys) const { + BSONObjBuilder buf; + buf.append("", root); + + if (e.eoo()) + buf.appendNull(""); + else + buf.appendAs(e, ""); + + keys->insert(buf.obj()); + } + + Status HaystackAccessMethod::newCursor(IndexCursor** out) { + return Status(ErrorCodes::IllegalOperation, "Unimplemented seek called on Haystack"); + } + + void HaystackAccessMethod::searchCommand(const BSONObj& nearObj, double maxDistance, + const BSONObj& search, BSONObjBuilder* result, + unsigned limit) { + Timer t; + + LOG(1) << "SEARCH near:" << nearObj << " maxDistance:" << maxDistance + << " search: " << search << endl; + int x, y; + { + BSONObjIterator i(nearObj); + x = hash(i.next()); + y = hash(i.next()); + } + int scale = static_cast<int>(ceil(maxDistance / _bucketSize)); + + GeoHaystackSearchHopper hopper(nearObj, maxDistance, limit, _geoField); + + long long btreeMatches = 0; + + for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { + for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { + BSONObjBuilder bb; + bb.append("", makeString(x + a, y + b)); + + for (unsigned i = 0; i < _otherFields.size(); i++) { + // See if the non-geo field we're indexing on is in the provided search term. + BSONElement e = search.getFieldDotted(_otherFields[i]); + if (e.eoo()) + bb.appendNull(""); + else + bb.appendAs(e, ""); + } + + BSONObj key = bb.obj(); + + GEOQUADDEBUG("KEY: " << key); + + // TODO(hk): this keeps a set of all DiskLoc seen in this pass so that we don't + // consider the element twice. Do we want to instead store a hash of the set? + // Is this often big? + unordered_set<DiskLoc, DiskLoc::Hasher> thisPass; + + + scoped_ptr<BtreeCursor> cursor(BtreeCursor::make(nsdetails(_descriptor->parentNS()), + _descriptor->getOnDisk(), + key, + key, + true, + 1)); + while (cursor->ok() && !hopper.limitReached()) { + pair<unordered_set<DiskLoc, DiskLoc::Hasher>::iterator, bool> p + = thisPass.insert(cursor->currLoc()); + // If a new element was inserted (haven't seen the DiskLoc before), p.second + // is true. + if (p.second) { + hopper.consider(cursor->currLoc()); + GEOQUADDEBUG("\t" << cursor->current()); + btreeMatches++; + } + cursor->advance(); + } + } + } + + BSONArrayBuilder arr(result->subarrayStart("results")); + int num = hopper.appendResultsTo(&arr); + arr.done(); + + { + BSONObjBuilder b(result->subobjStart("stats")); + b.append("time", t.millis()); + b.appendNumber("btreeMatches", btreeMatches); + b.append("n", num); + b.done(); + } + } + +} // namespace mongo |