diff options
author | Benety Goh <benety@mongodb.com> | 2014-02-18 14:38:58 -0500 |
---|---|---|
committer | Benety Goh <benety@mongodb.com> | 2014-02-19 06:11:20 -0500 |
commit | 16612e609a83ebb06891e220200d4014331701cb (patch) | |
tree | c8637c0c12f07fde8748f9bc6d1ec91f771ce89d /src/mongo/db/index | |
parent | 85b43895e9f54277d1b9696951fc3e7550477e74 (diff) | |
download | mongo-16612e609a83ebb06891e220200d4014331701cb.tar.gz |
SERVER-12767 refactored key generation in db/index.
Diffstat (limited to 'src/mongo/db/index')
-rw-r--r-- | src/mongo/db/index/2d_access_method.cpp | 106 | ||||
-rw-r--r-- | src/mongo/db/index/2d_access_method.h | 5 | ||||
-rw-r--r-- | src/mongo/db/index/expression_key_generator.cpp | 436 | ||||
-rw-r--r-- | src/mongo/db/index/expression_key_generator.h | 128 | ||||
-rw-r--r-- | src/mongo/db/index/fts_access_method.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/index/hash_access_method.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/index/hash_access_method.h | 12 | ||||
-rw-r--r-- | src/mongo/db/index/haystack_access_method.cpp | 72 | ||||
-rw-r--r-- | src/mongo/db/index/haystack_access_method.h | 5 | ||||
-rw-r--r-- | src/mongo/db/index/s2_access_method.cpp | 128 | ||||
-rw-r--r-- | src/mongo/db/index/s2_access_method.h | 7 |
11 files changed, 579 insertions, 352 deletions
diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp index 4e21df67779..aebc92f1970 100644 --- a/src/mongo/db/index/2d_access_method.cpp +++ b/src/mongo/db/index/2d_access_method.cpp @@ -34,6 +34,7 @@ #include "mongo/db/geo/core.h" #include "mongo/db/index_names.h" #include "mongo/db/index/2d_common.h" +#include "mongo/db/index/expression_key_generator.h" #include "mongo/db/jsobj.h" #include "mongo/db/pdfile.h" @@ -78,117 +79,16 @@ namespace mongo { params.scaling = numBuckets / (params.max - params.min); _params.geoHashConverter.reset(new GeoHashConverter(params)); - - BSONObjBuilder b; - b.appendNull(""); - _nullObj = b.obj(); - _nullElt = _nullObj.firstElement(); } /** Finds the key objects to put in an index */ void TwoDAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - getKeys(obj, keys, NULL); + get2DKeys(obj, _params, keys, NULL); } /** Finds all locations in a geo-indexed object */ void TwoDAccessMethod::getKeys(const BSONObj& obj, vector<BSONObj>& locs) const { - getKeys(obj, NULL, &locs); - } - - /** Finds the key objects and/or locations for a geo-indexed object */ - void TwoDAccessMethod::getKeys(const BSONObj &obj, BSONObjSet* keys, - vector<BSONObj>* locs) const { - BSONElementMSet bSet; - - // Get all the nested location fields, but don't return individual elements from - // the last array, if it exists. - obj.getFieldsDotted(_params.geo.c_str(), bSet, false); - - if (bSet.empty()) - return; - - for (BSONElementMSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) { - BSONElement geo = *setI; - - if (geo.eoo() || !geo.isABSONObj()) - continue; - - // - // Grammar for location lookup: - // locs ::= [loc,loc,...,loc]|{<k>:loc,<k>:loc,...,<k>:loc}|loc - // loc ::= { <k1> : #, <k2> : # }|[#, #]|{} - // - // Empty locations are ignored, preserving single-location semantics - // - - BSONObj embed = geo.embeddedObject(); - if (embed.isEmpty()) - continue; - - // Differentiate between location arrays and locations - // by seeing if the first element value is a number - bool singleElement = embed.firstElement().isNumber(); - - BSONObjIterator oi(embed); - - while (oi.more()) { - BSONObj locObj; - - if (singleElement) { - locObj = embed; - } else { - BSONElement locElement = oi.next(); - - uassert(16804, str::stream() << "location object expected, location " - "array not in correct format", - locElement.isABSONObj()); - - locObj = locElement.embeddedObject(); - if(locObj.isEmpty()) - continue; - } - - BSONObjBuilder b(64); - - // Remember the actual location object if needed - if (locs) - locs->push_back(locObj); - - // Stop if we don't need to get anything but location objects - if (!keys) { - if (singleElement) break; - else continue; - } - - _params.geoHashConverter->hash(locObj, &obj).appendToBuilder(&b, ""); - - // Go through all the other index keys - for (vector<pair<string, int> >::const_iterator i = _params.other.begin(); - i != _params.other.end(); ++i) { - // Get *all* fields for the index key - BSONElementSet eSet; - obj.getFieldsDotted(i->first, eSet); - - if (eSet.size() == 0) - b.appendAs(_nullElt, ""); - else if (eSet.size() == 1) - b.appendAs(*(eSet.begin()), ""); - else { - // If we have more than one key, store as an array of the objects - BSONArrayBuilder aBuilder; - - for (BSONElementSet::iterator ei = eSet.begin(); ei != eSet.end(); - ++ei) { - aBuilder.append(*ei); - } - - b.append("", aBuilder.arr()); - } - } - keys->insert(b.obj()); - if(singleElement) break; - } - } + get2DKeys(obj, _params, NULL, &locs); } } // namespace mongo diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h index f387c121aad..594ae3ab27c 100644 --- a/src/mongo/db/index/2d_access_method.h +++ b/src/mongo/db/index/2d_access_method.h @@ -103,11 +103,6 @@ namespace mongo { virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); - // This is called by the two getKeys above. - void getKeys(const BSONObj &obj, BSONObjSet* keys, vector<BSONObj>* locs) const; - - BSONObj _nullObj; - BSONElement _nullElt; TwoDIndexingParams _params; }; diff --git a/src/mongo/db/index/expression_key_generator.cpp b/src/mongo/db/index/expression_key_generator.cpp new file mode 100644 index 00000000000..8b2061cb00a --- /dev/null +++ b/src/mongo/db/index/expression_key_generator.cpp @@ -0,0 +1,436 @@ +/** +* Copyright (C) 2014 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3 +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#include "mongo/db/index/expression_key_generator.h" + +#include <utility> +#include "mongo/db/fts/fts_index_format.h" +#include "mongo/db/geo/s2common.h" +#include "mongo/db/index_names.h" +#include "mongo/db/index/2d_common.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/mongoutils/str.h" + +namespace { + + using namespace mongo; + + // + // Helper functions for getHaystackKeys + // + + /** + * Build a new BSONObj with root in it. If e is non-empty, append that to the key. + * Insert the BSONObj into keys. + * Used by getHaystackKeys. + */ + void addKey(const string& root, const BSONElement& e, BSONObjSet* keys) { + BSONObjBuilder buf; + buf.append("", root); + + if (e.eoo()) + buf.appendNull(""); + else + buf.appendAs(e, ""); + + keys->insert(buf.obj()); + } + + + + // + // Helper functions for getS2Keys + // + + /** + * Get the index keys for elements that are GeoJSON. + * Used by getS2Keys. + */ + void getGeoKeys(const BSONObj& document, const BSONElementSet& elements, + const S2IndexingParams& params, + BSONObjSet* out) { + for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { + uassert(16754, "Can't parse geometry from element: " + i->toString(), + i->isABSONObj()); + const BSONObj &geoObj = i->Obj(); + + vector<string> cells; + bool succeeded = S2SearchUtil::getKeysForObject(geoObj, params, &cells); + uassert(16755, "Can't extract geo keys from object, malformed geometry?: " + + document.toString(), succeeded); + + uassert(16756, "Unable to generate keys for (likely malformed) geometry: " + + document.toString(), + cells.size() > 0); + + for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) { + BSONObjBuilder b; + b.append("", *it); + out->insert(b.obj()); + } + } + + if (0 == out->size()) { + BSONObjBuilder b; + b.appendNull(""); + out->insert(b.obj()); + } + } + + /** + * Expands array and appends items to 'out'. + * Used by getOneLiteralKey. + */ + void getLiteralKeysArray(const BSONObj& obj, BSONObjSet* out) { + BSONObjIterator objIt(obj); + if (!objIt.more()) { + // Empty arrays are indexed as undefined. + BSONObjBuilder b; + b.appendUndefined(""); + out->insert(b.obj()); + } else { + // Non-empty arrays are exploded. + while (objIt.more()) { + BSONObjBuilder b; + b.appendAs(objIt.next(), ""); + out->insert(b.obj()); + } + } + } + + /** + * If 'elt' is an array, expands elt and adds items to 'out'. + * Otherwise, adds 'elt' as a single element. + * Used by getLiteralKeys. + */ + void getOneLiteralKey(const BSONElement& elt, BSONObjSet* out) { + if (Array == elt.type()) { + getLiteralKeysArray(elt.Obj(), out); + } else { + // One thing, not an array, index as-is. + BSONObjBuilder b; + b.appendAs(elt, ""); + out->insert(b.obj()); + } + } + + /** + * elements is a non-geo field. Add the values literally, expanding arrays. + * Used by getS2Keys. + */ + void getLiteralKeys(const BSONElementSet& elements, BSONObjSet* out) { + if (0 == elements.size()) { + // Missing fields are indexed as null. + BSONObjBuilder b; + b.appendNull(""); + out->insert(b.obj()); + } else { + for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { + getOneLiteralKey(*i, out); + } + } + } + +} // namespace + +namespace mongo { + + using std::pair; + using std::string; + using std::vector; + + + + // + // 2D + // + + // static + void get2DKeys(const BSONObj &obj, const TwoDIndexingParams& params, + BSONObjSet* keys, std::vector<BSONObj>* locs) { + BSONElementMSet bSet; + + // Get all the nested location fields, but don't return individual elements from + // the last array, if it exists. + obj.getFieldsDotted(params.geo.c_str(), bSet, false); + + if (bSet.empty()) + return; + + for (BSONElementMSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) { + BSONElement geo = *setI; + + if (geo.eoo() || !geo.isABSONObj()) + continue; + + // + // Grammar for location lookup: + // locs ::= [loc,loc,...,loc]|{<k>:loc,<k>:loc,...,<k>:loc}|loc + // loc ::= { <k1> : #, <k2> : # }|[#, #]|{} + // + // Empty locations are ignored, preserving single-location semantics + // + + BSONObj embed = geo.embeddedObject(); + if (embed.isEmpty()) + continue; + + // Differentiate between location arrays and locations + // by seeing if the first element value is a number + bool singleElement = embed.firstElement().isNumber(); + + BSONObjIterator oi(embed); + + while (oi.more()) { + BSONObj locObj; + + if (singleElement) { + locObj = embed; + } else { + BSONElement locElement = oi.next(); + + uassert(16804, mongoutils::str::stream() << + "location object expected, location array not in correct format", + locElement.isABSONObj()); + + locObj = locElement.embeddedObject(); + if(locObj.isEmpty()) + continue; + } + + BSONObjBuilder b(64); + + // Remember the actual location object if needed + if (locs) + locs->push_back(locObj); + + // Stop if we don't need to get anything but location objects + if (!keys) { + if (singleElement) break; + else continue; + } + + params.geoHashConverter->hash(locObj, &obj).appendToBuilder(&b, ""); + + // Go through all the other index keys + for (vector<pair<string, int> >::const_iterator i = params.other.begin(); + i != params.other.end(); ++i) { + // Get *all* fields for the index key + BSONElementSet eSet; + obj.getFieldsDotted(i->first, eSet); + + if (eSet.size() == 0) + b.appendNull(""); + else if (eSet.size() == 1) + b.appendAs(*(eSet.begin()), ""); + else { + // If we have more than one key, store as an array of the objects + BSONArrayBuilder aBuilder; + + for (BSONElementSet::iterator ei = eSet.begin(); ei != eSet.end(); + ++ei) { + aBuilder.append(*ei); + } + + b.append("", aBuilder.arr()); + } + } + keys->insert(b.obj()); + if(singleElement) break; + } + } + } + + + + // + // FTS + // + + // static + void getFTSKeys(const BSONObj &obj, const fts::FTSSpec& ftsSpec, BSONObjSet* keys) { + fts::FTSIndexFormat::getKeys(ftsSpec, obj, keys); + } + + + + // + // Hash + // + + // static + void getHashKeys(const BSONObj& obj, const string& hashedField, HashSeed seed, + int hashVersion, bool isSparse, BSONObjSet* keys) { + const char* cstr = hashedField.c_str(); + BSONElement fieldVal = obj.getFieldDottedOrArray(cstr); + uassert(16766, "Error: hashed indexes do not currently support array values", + fieldVal.type() != Array ); + + if (!fieldVal.eoo()) { + BSONObj key = BSON( "" << makeSingleHashKey(fieldVal, seed, hashVersion)); + keys->insert(key); + } + else if (!isSparse) { + BSONObj nullObj = BSON("" << BSONNULL); + keys->insert(BSON("" << makeSingleHashKey(nullObj.firstElement(), seed, hashVersion))); + } + } + + // static + long long int makeSingleHashKey(const BSONElement& e, HashSeed seed, int v) { + massert(16767, "Only HashVersion 0 has been defined" , v == 0 ); + return BSONElementHasher::hash64(e, seed); + } + + + + // + // Haystack + // + + // static + void getHaystackKeys(const BSONObj& obj, const std::string& geoField, + const std::vector<std::string>& otherFields, + double bucketSize, BSONObjSet* keys) { + BSONElement loc = obj.getFieldDotted(geoField); + + if (loc.eoo()) { return; } + + uassert(16775, "latlng not an array", loc.isABSONObj()); + string root; + { + BSONObjIterator i(loc.Obj()); + BSONElement x = i.next(); + BSONElement y = i.next(); + root = makeHaystackString(hashHaystackElement(x, bucketSize), + hashHaystackElement(y, bucketSize)); + } + + verify(otherFields.size() == 1); + + BSONElementSet all; + + // This is getFieldsDotted (plural not singular) since the object we're indexing + // may be an array. + obj.getFieldsDotted(otherFields[0], all); + + if (all.size() == 0) { + // We're indexing a document that doesn't have the secondary non-geo field present. + // XXX: do we want to add this even if all.size() > 0? result:empty search terms + // match everything instead of only things w/empty search terms) + addKey(root, BSONElement(), keys); + } else { + // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"}, + // all.size()==1. We can query on the complete field. + // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values + // "A" and "B". The query looks for any of the fields in the array. + for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) { + addKey(root, *i, keys); + } + } + } + + // static + int hashHaystackElement(const BSONElement& e, double bucketSize) { + uassert(16776, "geo field is not a number", e.isNumber()); + double d = e.numberDouble(); + d += 180; + d /= bucketSize; + return static_cast<int>(d); + } + + // static + std::string makeHaystackString(int hashedX, int hashedY) { + mongoutils::str::stream ss; + ss << hashedX << "_" << hashedY; + return ss; + } + + + + // + // S2 + // + + void getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, + const S2IndexingParams& params, BSONObjSet* keys) { + BSONObjSet keysToAdd; + // We output keys in the same order as the fields we index. + BSONObjIterator i(keyPattern); + while (i.more()) { + BSONElement e = i.next(); + + // First, we get the keys that this field adds. Either they're added literally from + // the value of the field, or they're transformed if the field is geo. + BSONElementSet fieldElements; + // false means Don't expand the last array, duh. + obj.getFieldsDotted(e.fieldName(), fieldElements, false); + + BSONObjSet keysForThisField; + if (IndexNames::GEO_2DSPHERE == e.valuestr()) { + getGeoKeys(obj, fieldElements, params, &keysForThisField); + } else { + getLiteralKeys(fieldElements, &keysForThisField); + } + + // We expect there to be the missing field element present in the keys if data is + // missing. So, this should be non-empty. + verify(!keysForThisField.empty()); + + // We take the Cartesian product of all of the keys. This requires that we have + // some keys to take the Cartesian product with. If keysToAdd.empty(), we + // initialize it. + if (keysToAdd.empty()) { + keysToAdd = keysForThisField; + continue; + } + + BSONObjSet updatedKeysToAdd; + for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); + ++it) { + for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); + newIt!= keysForThisField.end(); ++newIt) { + BSONObjBuilder b; + b.appendElements(*it); + b.append(newIt->firstElement()); + updatedKeysToAdd.insert(b.obj()); + } + } + keysToAdd = updatedKeysToAdd; + } + + if (keysToAdd.size() > params.maxKeysPerInsert) { + warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() + << ") consider creating larger buckets. obj=" + << obj; + } + + *keys = keysToAdd; + } + +} // namespace mongo diff --git a/src/mongo/db/index/expression_key_generator.h b/src/mongo/db/index/expression_key_generator.h new file mode 100644 index 00000000000..679c776a675 --- /dev/null +++ b/src/mongo/db/index/expression_key_generator.h @@ -0,0 +1,128 @@ +/** +* Copyright (C) 2014 MongoDB Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +* +* As a special exception, the copyright holders give permission to link the +* code of portions of this program with the OpenSSL library under certain +* conditions as described in each individual source file and distribute +* linked combinations including the program with the OpenSSL library. You +* must comply with the GNU Affero General Public License in all respects for +* all of the code used other than as permitted herein. If you modify file(s) +* with this exception, you may extend this exception to your version of the +* file(s), but you are not obligated to do so. If you do not wish to do so, +* delete this exception statement from your version. If you delete this +* exception statement from all source files in the program, then also delete +* it in the license file. +*/ + +#pragma once + +#include <vector> +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonmisc.h" +#include "mongo/db/hasher.h" + +namespace mongo { + + struct TwoDIndexingParams; + struct S2IndexingParams; + + namespace fts { + class FTSSpec; + } // namespace fts + + + + // + // 2D + // + + /** + * Generate keys for 2d access method. + * Finds the key objects and/or locations for a geo-indexed object. + */ + void get2DKeys(const BSONObj &obj, const TwoDIndexingParams& params, + BSONObjSet* keys, std::vector<BSONObj>* locs); + + + + // + // FTS + // + + /** + * Generates keys for FTS access method + */ + void getFTSKeys(const BSONObj &obj, const fts::FTSSpec& ftsSpec, BSONObjSet* keys); + + + + // + // Hash + // + + /** + * Generates keys for hash access method. + */ + void getHashKeys(const BSONObj& obj, const std::string& hashedField, HashSeed seed, + int hashVersion, bool isSparse, BSONObjSet* keys); + + /** + * Hashing function used by both getHashKeys and the cursors we create. + * Exposed for testing in dbtests/namespacetests.cpp and + * so mongo/db/index_legacy.cpp can use it. + */ + long long int makeSingleHashKey(const BSONElement& e, HashSeed seed, int v); + + + + // + // Haystack + // + + /** + * Generates keys for hay stack access method. + */ + void getHaystackKeys(const BSONObj& obj, const std::string& geoField, + const std::vector<std::string>& otherFields, + double bucketSize, + BSONObjSet* keys); + + /** + * Returns a hash of a BSON element. + * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. + */ + int hashHaystackElement(const BSONElement& e, double bucketSize); + + + + /** + * Joins two strings using underscore as separator. + * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. + */ + std::string makeHaystackString(int hashedX, int hashedY); + + + + // + // S2 + // + + /** + * Generates keys for S2 access method. + */ + void getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, + BSONObjSet* keys); + +} // namespace mongo diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp index 47cb519f3bc..3eac70d8e71 100644 --- a/src/mongo/db/index/fts_access_method.cpp +++ b/src/mongo/db/index/fts_access_method.cpp @@ -27,7 +27,7 @@ */ #include "mongo/db/index/fts_access_method.h" -#include "mongo/db/fts/fts_index_format.h" +#include "mongo/db/index/expression_key_generator.h" namespace mongo { @@ -35,7 +35,7 @@ namespace mongo { : BtreeBasedAccessMethod(btreeState), _ftsSpec(btreeState->descriptor()->infoObj()) { } void FTSAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - fts::FTSIndexFormat::getKeys(_ftsSpec, obj, keys); + getFTSKeys(obj, _ftsSpec, keys); } } // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp index eb78f1934d8..a658e7078f4 100644 --- a/src/mongo/db/index/hash_access_method.cpp +++ b/src/mongo/db/index/hash_access_method.cpp @@ -28,15 +28,11 @@ #include "mongo/db/structure/btree/btree.h" #include "mongo/db/hasher.h" +#include "mongo/db/index/expression_key_generator.h" #include "mongo/db/index/hash_access_method.h" namespace mongo { - long long int HashAccessMethod::makeSingleKey(const BSONElement& e, HashSeed seed, int v) { - massert(16767, "Only HashVersion 0 has been defined" , v == 0 ); - return BSONElementHasher::hash64(e, seed); - } - HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState) : BtreeBasedAccessMethod(btreeState) { @@ -66,7 +62,7 @@ namespace mongo { //In case we have hashed indexes based on other hash functions in //the future, we store a hashVersion number. If hashVersion changes, - // "makeSingleKey" will need to change accordingly. + // "makeSingleHashKey" will need to change accordingly. //Defaults to 0 if "hashVersion" is not included in the index spec //or if the value of "hashversion" is not a number _hashVersion = descriptor->getInfoElement("hashVersion").numberInt(); @@ -79,25 +75,7 @@ namespace mongo { } void HashAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - getKeysImpl(obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), keys); - } - - // static - void HashAccessMethod::getKeysImpl(const BSONObj& obj, const string& hashedField, HashSeed seed, - int hashVersion, bool isSparse, BSONObjSet* keys) { - const char* cstr = hashedField.c_str(); - BSONElement fieldVal = obj.getFieldDottedOrArray(cstr); - uassert(16766, "Error: hashed indexes do not currently support array values", - fieldVal.type() != Array ); - - if (!fieldVal.eoo()) { - BSONObj key = BSON( "" << makeSingleKey(fieldVal, seed, hashVersion)); - keys->insert(key); - } - else if (!isSparse) { - BSONObj nullObj = BSON("" << BSONNULL); - keys->insert(BSON("" << makeSingleKey(nullObj.firstElement(), seed, hashVersion))); - } + getHashKeys(obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), keys); } } // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h index c2b9fa836b6..d0f46620334 100644 --- a/src/mongo/db/index/hash_access_method.h +++ b/src/mongo/db/index/hash_access_method.h @@ -53,18 +53,6 @@ namespace mongo { return Status::OK(); } - /** - * Hashing function used by both this class and the cursors we create. - * Exposed for testing and so mongo/db/index_legacy.cpp can use it. - */ - static long long int makeSingleKey(const BSONElement& e, HashSeed seed, int v); - - /** - * Exposed externally for testing purposes. - */ - static void getKeysImpl(const BSONObj& obj, const string& hashedField, HashSeed seed, - int hashVersion, bool isSparse, BSONObjSet* keys); - private: virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp index 0fdf9dfa78f..39d19070a38 100644 --- a/src/mongo/db/index/haystack_access_method.cpp +++ b/src/mongo/db/index/haystack_access_method.cpp @@ -30,6 +30,7 @@ #include "mongo/base/status.h" #include "mongo/db/geo/hash.h" +#include "mongo/db/index/expression_key_generator.h" #include "mongo/db/index/haystack_access_method_internal.h" #include "mongo/db/jsobj.h" #include "mongo/db/pdfile.h" @@ -71,70 +72,7 @@ namespace mongo { } void HaystackAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - BSONElement loc = obj.getFieldDotted(_geoField); - - if (loc.eoo()) { return; } - - uassert(16775, "latlng not an array", loc.isABSONObj()); - string root; - { - BSONObjIterator i(loc.Obj()); - BSONElement x = i.next(); - BSONElement y = i.next(); - root = makeString(hash(x), hash(y)); - } - - verify(_otherFields.size() == 1); - - BSONElementSet all; - - // This is getFieldsDotted (plural not singular) since the object we're indexing - // may be an array. - obj.getFieldsDotted(_otherFields[0], all); - - if (all.size() == 0) { - // We're indexing a document that doesn't have the secondary non-geo field present. - // XXX: do we want to add this even if all.size() > 0? result:empty search terms - // match everything instead of only things w/empty search terms) - addKey(root, BSONElement(), keys); - } else { - // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"}, - // all.size()==1. We can query on the complete field. - // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values - // "A" and "B". The query looks for any of the fields in the array. - for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) { - addKey(root, *i, keys); - } - } - } - - int HaystackAccessMethod::hash(const BSONElement& e) const { - uassert(16776, "geo field is not a number", e.isNumber()); - double d = e.numberDouble(); - d += 180; - d /= _bucketSize; - return static_cast<int>(d); - } - - string HaystackAccessMethod::makeString(int hashedX, int hashedY) const { - stringstream ss; - ss << hashedX << "_" << hashedY; - return ss.str(); - } - - // Build a new BSONObj with root in it. If e is non-empty, append that to the key. Insert - // the BSONObj into keys. - void HaystackAccessMethod::addKey(const string& root, const BSONElement& e, - BSONObjSet* keys) const { - BSONObjBuilder buf; - buf.append("", root); - - if (e.eoo()) - buf.appendNull(""); - else - buf.appendAs(e, ""); - - keys->insert(buf.obj()); + getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys); } void HaystackAccessMethod::searchCommand(const BSONObj& nearObj, double maxDistance, @@ -147,8 +85,8 @@ namespace mongo { int x, y; { BSONObjIterator i(nearObj); - x = hash(i.next()); - y = hash(i.next()); + x = hashHaystackElement(i.next(), _bucketSize); + y = hashHaystackElement(i.next(), _bucketSize); } int scale = static_cast<int>(ceil(maxDistance / _bucketSize)); @@ -159,7 +97,7 @@ namespace mongo { for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { BSONObjBuilder bb; - bb.append("", makeString(x + a, y + b)); + bb.append("", makeHaystackString(x + a, y + b)); for (unsigned i = 0; i < _otherFields.size(); i++) { // See if the non-geo field we're indexing on is in the provided search term. diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h index 0e1dd3426c7..29b6485f7d2 100644 --- a/src/mongo/db/index/haystack_access_method.h +++ b/src/mongo/db/index/haystack_access_method.h @@ -66,11 +66,6 @@ namespace mongo { private: virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); - // Helper methods called by getKeys: - int hash(const BSONElement& e) const; - string makeString(int hashedX, int hashedY) const; - void addKey(const string& root, const BSONElement& e, BSONObjSet* keys) const; - string _geoField; vector<string> _otherFields; double _bucketSize; diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp index fff6fbba7f3..5548be3b429 100644 --- a/src/mongo/db/index/s2_access_method.cpp +++ b/src/mongo/db/index/s2_access_method.cpp @@ -35,6 +35,7 @@ #include "mongo/db/geo/geoconstants.h" #include "mongo/db/geo/s2common.h" #include "mongo/db/index_names.h" +#include "mongo/db/index/expression_key_generator.h" #include "mongo/db/jsobj.h" namespace mongo { @@ -122,132 +123,7 @@ namespace mongo { } void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - BSONObjSet keysToAdd; - // We output keys in the same order as the fields we index. - BSONObjIterator i(_descriptor->keyPattern()); - while (i.more()) { - BSONElement e = i.next(); - - // First, we get the keys that this field adds. Either they're added literally from - // the value of the field, or they're transformed if the field is geo. - BSONElementSet fieldElements; - // false means Don't expand the last array, duh. - obj.getFieldsDotted(e.fieldName(), fieldElements, false); - - BSONObjSet keysForThisField; - if (IndexNames::GEO_2DSPHERE == e.valuestr()) { - getGeoKeys(obj, fieldElements, &keysForThisField); - } else { - getLiteralKeys(fieldElements, &keysForThisField); - } - - // We expect there to be the missing field element present in the keys if data is - // missing. So, this should be non-empty. - verify(!keysForThisField.empty()); - - // We take the Cartesian product of all of the keys. This requires that we have - // some keys to take the Cartesian product with. If keysToAdd.empty(), we - // initialize it. - if (keysToAdd.empty()) { - keysToAdd = keysForThisField; - continue; - } - - BSONObjSet updatedKeysToAdd; - for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end(); - ++it) { - for (BSONObjSet::const_iterator newIt = keysForThisField.begin(); - newIt!= keysForThisField.end(); ++newIt) { - BSONObjBuilder b; - b.appendElements(*it); - b.append(newIt->firstElement()); - updatedKeysToAdd.insert(b.obj()); - } - } - keysToAdd = updatedKeysToAdd; - } - - if (keysToAdd.size() > _params.maxKeysPerInsert) { - warning() << "insert of geo object generated lots of keys (" << keysToAdd.size() - << ") consider creating larger buckets. obj=" - << obj; - } - - *keys = keysToAdd; - } - - // Get the index keys for elements that are GeoJSON. - void S2AccessMethod::getGeoKeys(const BSONObj& document, const BSONElementSet& elements, - BSONObjSet* out) const { - for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { - uassert(16754, "Can't parse geometry from element: " + i->toString(), - i->isABSONObj()); - const BSONObj &geoObj = i->Obj(); - - vector<string> cells; - bool succeeded = S2SearchUtil::getKeysForObject(geoObj, _params, &cells); - uassert(16755, "Can't extract geo keys from object, malformed geometry?: " - + document.toString(), succeeded); - - uassert(16756, "Unable to generate keys for (likely malformed) geometry: " - + document.toString(), - cells.size() > 0); - - for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) { - BSONObjBuilder b; - b.append("", *it); - out->insert(b.obj()); - } - } - - if (0 == out->size()) { - BSONObjBuilder b; - b.appendNull(""); - out->insert(b.obj()); - } - } - - void S2AccessMethod::getLiteralKeysArray(const BSONObj& obj, BSONObjSet* out) const { - BSONObjIterator objIt(obj); - if (!objIt.more()) { - // Empty arrays are indexed as undefined. - BSONObjBuilder b; - b.appendUndefined(""); - out->insert(b.obj()); - } else { - // Non-empty arrays are exploded. - while (objIt.more()) { - BSONObjBuilder b; - b.appendAs(objIt.next(), ""); - out->insert(b.obj()); - } - } - } - - void S2AccessMethod::getOneLiteralKey(const BSONElement& elt, BSONObjSet* out) const { - if (Array == elt.type()) { - getLiteralKeysArray(elt.Obj(), out); - } else { - // One thing, not an array, index as-is. - BSONObjBuilder b; - b.appendAs(elt, ""); - out->insert(b.obj()); - } - } - - // elements is a non-geo field. Add the values literally, expanding arrays. - void S2AccessMethod::getLiteralKeys(const BSONElementSet& elements, - BSONObjSet* out) const { - if (0 == elements.size()) { - // Missing fields are indexed as null. - BSONObjBuilder b; - b.appendNull(""); - out->insert(b.obj()); - } else { - for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { - getOneLiteralKey(*i, out); - } - } + getS2Keys(obj, _descriptor->keyPattern(), _params, keys); } } // namespace mongo diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h index 228d6b9da77..df12026395b 100644 --- a/src/mongo/db/index/s2_access_method.h +++ b/src/mongo/db/index/s2_access_method.h @@ -57,13 +57,6 @@ namespace mongo { private: virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); - // getKeys calls the helper methods below. - void getGeoKeys(const BSONObj& document, const BSONElementSet& elements, - BSONObjSet* out) const; - void getLiteralKeys(const BSONElementSet& elements, BSONObjSet* out) const; - void getLiteralKeysArray(const BSONObj& obj, BSONObjSet* out) const; - void getOneLiteralKey(const BSONElement& elt, BSONObjSet *out) const; - S2IndexingParams _params; }; |