summaryrefslogtreecommitdiff
path: root/src/mongo/db/index
diff options
context:
space:
mode:
authorBenety Goh <benety@mongodb.com>2014-02-18 14:38:58 -0500
committerBenety Goh <benety@mongodb.com>2014-02-19 06:11:20 -0500
commit16612e609a83ebb06891e220200d4014331701cb (patch)
treec8637c0c12f07fde8748f9bc6d1ec91f771ce89d /src/mongo/db/index
parent85b43895e9f54277d1b9696951fc3e7550477e74 (diff)
downloadmongo-16612e609a83ebb06891e220200d4014331701cb.tar.gz
SERVER-12767 refactored key generation in db/index.
Diffstat (limited to 'src/mongo/db/index')
-rw-r--r--src/mongo/db/index/2d_access_method.cpp106
-rw-r--r--src/mongo/db/index/2d_access_method.h5
-rw-r--r--src/mongo/db/index/expression_key_generator.cpp436
-rw-r--r--src/mongo/db/index/expression_key_generator.h128
-rw-r--r--src/mongo/db/index/fts_access_method.cpp4
-rw-r--r--src/mongo/db/index/hash_access_method.cpp28
-rw-r--r--src/mongo/db/index/hash_access_method.h12
-rw-r--r--src/mongo/db/index/haystack_access_method.cpp72
-rw-r--r--src/mongo/db/index/haystack_access_method.h5
-rw-r--r--src/mongo/db/index/s2_access_method.cpp128
-rw-r--r--src/mongo/db/index/s2_access_method.h7
11 files changed, 579 insertions, 352 deletions
diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp
index 4e21df67779..aebc92f1970 100644
--- a/src/mongo/db/index/2d_access_method.cpp
+++ b/src/mongo/db/index/2d_access_method.cpp
@@ -34,6 +34,7 @@
#include "mongo/db/geo/core.h"
#include "mongo/db/index_names.h"
#include "mongo/db/index/2d_common.h"
+#include "mongo/db/index/expression_key_generator.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/pdfile.h"
@@ -78,117 +79,16 @@ namespace mongo {
params.scaling = numBuckets / (params.max - params.min);
_params.geoHashConverter.reset(new GeoHashConverter(params));
-
- BSONObjBuilder b;
- b.appendNull("");
- _nullObj = b.obj();
- _nullElt = _nullObj.firstElement();
}
/** Finds the key objects to put in an index */
void TwoDAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) {
- getKeys(obj, keys, NULL);
+ get2DKeys(obj, _params, keys, NULL);
}
/** Finds all locations in a geo-indexed object */
void TwoDAccessMethod::getKeys(const BSONObj& obj, vector<BSONObj>& locs) const {
- getKeys(obj, NULL, &locs);
- }
-
- /** Finds the key objects and/or locations for a geo-indexed object */
- void TwoDAccessMethod::getKeys(const BSONObj &obj, BSONObjSet* keys,
- vector<BSONObj>* locs) const {
- BSONElementMSet bSet;
-
- // Get all the nested location fields, but don't return individual elements from
- // the last array, if it exists.
- obj.getFieldsDotted(_params.geo.c_str(), bSet, false);
-
- if (bSet.empty())
- return;
-
- for (BSONElementMSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) {
- BSONElement geo = *setI;
-
- if (geo.eoo() || !geo.isABSONObj())
- continue;
-
- //
- // Grammar for location lookup:
- // locs ::= [loc,loc,...,loc]|{<k>:loc,<k>:loc,...,<k>:loc}|loc
- // loc ::= { <k1> : #, <k2> : # }|[#, #]|{}
- //
- // Empty locations are ignored, preserving single-location semantics
- //
-
- BSONObj embed = geo.embeddedObject();
- if (embed.isEmpty())
- continue;
-
- // Differentiate between location arrays and locations
- // by seeing if the first element value is a number
- bool singleElement = embed.firstElement().isNumber();
-
- BSONObjIterator oi(embed);
-
- while (oi.more()) {
- BSONObj locObj;
-
- if (singleElement) {
- locObj = embed;
- } else {
- BSONElement locElement = oi.next();
-
- uassert(16804, str::stream() << "location object expected, location "
- "array not in correct format",
- locElement.isABSONObj());
-
- locObj = locElement.embeddedObject();
- if(locObj.isEmpty())
- continue;
- }
-
- BSONObjBuilder b(64);
-
- // Remember the actual location object if needed
- if (locs)
- locs->push_back(locObj);
-
- // Stop if we don't need to get anything but location objects
- if (!keys) {
- if (singleElement) break;
- else continue;
- }
-
- _params.geoHashConverter->hash(locObj, &obj).appendToBuilder(&b, "");
-
- // Go through all the other index keys
- for (vector<pair<string, int> >::const_iterator i = _params.other.begin();
- i != _params.other.end(); ++i) {
- // Get *all* fields for the index key
- BSONElementSet eSet;
- obj.getFieldsDotted(i->first, eSet);
-
- if (eSet.size() == 0)
- b.appendAs(_nullElt, "");
- else if (eSet.size() == 1)
- b.appendAs(*(eSet.begin()), "");
- else {
- // If we have more than one key, store as an array of the objects
- BSONArrayBuilder aBuilder;
-
- for (BSONElementSet::iterator ei = eSet.begin(); ei != eSet.end();
- ++ei) {
- aBuilder.append(*ei);
- }
-
- b.append("", aBuilder.arr());
- }
- }
- keys->insert(b.obj());
- if(singleElement) break;
- }
- }
+ get2DKeys(obj, _params, NULL, &locs);
}
} // namespace mongo
diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h
index f387c121aad..594ae3ab27c 100644
--- a/src/mongo/db/index/2d_access_method.h
+++ b/src/mongo/db/index/2d_access_method.h
@@ -103,11 +103,6 @@ namespace mongo {
virtual void getKeys(const BSONObj& obj, BSONObjSet* keys);
- // This is called by the two getKeys above.
- void getKeys(const BSONObj &obj, BSONObjSet* keys, vector<BSONObj>* locs) const;
-
- BSONObj _nullObj;
- BSONElement _nullElt;
TwoDIndexingParams _params;
};
diff --git a/src/mongo/db/index/expression_key_generator.cpp b/src/mongo/db/index/expression_key_generator.cpp
new file mode 100644
index 00000000000..8b2061cb00a
--- /dev/null
+++ b/src/mongo/db/index/expression_key_generator.cpp
@@ -0,0 +1,436 @@
+/**
+* Copyright (C) 2014 MongoDB Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects for
+* all of the code used other than as permitted herein. If you modify file(s)
+* with this exception, you may extend this exception to your version of the
+* file(s), but you are not obligated to do so. If you do not wish to do so,
+* delete this exception statement from your version. If you delete this
+* exception statement from all source files in the program, then also delete
+* it in the license file.
+*/
+
+#include "mongo/db/index/expression_key_generator.h"
+
+#include <utility>
+#include "mongo/db/fts/fts_index_format.h"
+#include "mongo/db/geo/s2common.h"
+#include "mongo/db/index_names.h"
+#include "mongo/db/index/2d_common.h"
+#include "mongo/util/assert_util.h"
+#include "mongo/util/mongoutils/str.h"
+
+namespace {
+
+ using namespace mongo;
+
+ //
+ // Helper functions for getHaystackKeys
+ //
+
+ /**
+ * Build a new BSONObj with root in it. If e is non-empty, append that to the key.
+ * Insert the BSONObj into keys.
+ * Used by getHaystackKeys.
+ */
+ void addKey(const string& root, const BSONElement& e, BSONObjSet* keys) {
+ BSONObjBuilder buf;
+ buf.append("", root);
+
+ if (e.eoo())
+ buf.appendNull("");
+ else
+ buf.appendAs(e, "");
+
+ keys->insert(buf.obj());
+ }
+
+
+
+ //
+ // Helper functions for getS2Keys
+ //
+
+ /**
+ * Get the index keys for elements that are GeoJSON.
+ * Used by getS2Keys.
+ */
+ void getGeoKeys(const BSONObj& document, const BSONElementSet& elements,
+ const S2IndexingParams& params,
+ BSONObjSet* out) {
+ for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) {
+ uassert(16754, "Can't parse geometry from element: " + i->toString(),
+ i->isABSONObj());
+ const BSONObj &geoObj = i->Obj();
+
+ vector<string> cells;
+ bool succeeded = S2SearchUtil::getKeysForObject(geoObj, params, &cells);
+ uassert(16755, "Can't extract geo keys from object, malformed geometry?: "
+ + document.toString(), succeeded);
+
+ uassert(16756, "Unable to generate keys for (likely malformed) geometry: "
+ + document.toString(),
+ cells.size() > 0);
+
+ for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) {
+ BSONObjBuilder b;
+ b.append("", *it);
+ out->insert(b.obj());
+ }
+ }
+
+ if (0 == out->size()) {
+ BSONObjBuilder b;
+ b.appendNull("");
+ out->insert(b.obj());
+ }
+ }
+
+ /**
+ * Expands array and appends items to 'out'.
+ * Used by getOneLiteralKey.
+ */
+ void getLiteralKeysArray(const BSONObj& obj, BSONObjSet* out) {
+ BSONObjIterator objIt(obj);
+ if (!objIt.more()) {
+ // Empty arrays are indexed as undefined.
+ BSONObjBuilder b;
+ b.appendUndefined("");
+ out->insert(b.obj());
+ } else {
+ // Non-empty arrays are exploded.
+ while (objIt.more()) {
+ BSONObjBuilder b;
+ b.appendAs(objIt.next(), "");
+ out->insert(b.obj());
+ }
+ }
+ }
+
+ /**
+ * If 'elt' is an array, expands elt and adds items to 'out'.
+ * Otherwise, adds 'elt' as a single element.
+ * Used by getLiteralKeys.
+ */
+ void getOneLiteralKey(const BSONElement& elt, BSONObjSet* out) {
+ if (Array == elt.type()) {
+ getLiteralKeysArray(elt.Obj(), out);
+ } else {
+ // One thing, not an array, index as-is.
+ BSONObjBuilder b;
+ b.appendAs(elt, "");
+ out->insert(b.obj());
+ }
+ }
+
+ /**
+ * elements is a non-geo field. Add the values literally, expanding arrays.
+ * Used by getS2Keys.
+ */
+ void getLiteralKeys(const BSONElementSet& elements, BSONObjSet* out) {
+ if (0 == elements.size()) {
+ // Missing fields are indexed as null.
+ BSONObjBuilder b;
+ b.appendNull("");
+ out->insert(b.obj());
+ } else {
+ for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) {
+ getOneLiteralKey(*i, out);
+ }
+ }
+ }
+
+} // namespace
+
+namespace mongo {
+
+ using std::pair;
+ using std::string;
+ using std::vector;
+
+
+
+ //
+ // 2D
+ //
+
+ // static
+ void get2DKeys(const BSONObj &obj, const TwoDIndexingParams& params,
+ BSONObjSet* keys, std::vector<BSONObj>* locs) {
+ BSONElementMSet bSet;
+
+ // Get all the nested location fields, but don't return individual elements from
+ // the last array, if it exists.
+ obj.getFieldsDotted(params.geo.c_str(), bSet, false);
+
+ if (bSet.empty())
+ return;
+
+ for (BSONElementMSet::iterator setI = bSet.begin(); setI != bSet.end(); ++setI) {
+ BSONElement geo = *setI;
+
+ if (geo.eoo() || !geo.isABSONObj())
+ continue;
+
+ //
+ // Grammar for location lookup:
+ // locs ::= [loc,loc,...,loc]|{<k>:loc,<k>:loc,...,<k>:loc}|loc
+ // loc ::= { <k1> : #, <k2> : # }|[#, #]|{}
+ //
+ // Empty locations are ignored, preserving single-location semantics
+ //
+
+ BSONObj embed = geo.embeddedObject();
+ if (embed.isEmpty())
+ continue;
+
+ // Differentiate between location arrays and locations
+ // by seeing if the first element value is a number
+ bool singleElement = embed.firstElement().isNumber();
+
+ BSONObjIterator oi(embed);
+
+ while (oi.more()) {
+ BSONObj locObj;
+
+ if (singleElement) {
+ locObj = embed;
+ } else {
+ BSONElement locElement = oi.next();
+
+ uassert(16804, mongoutils::str::stream() <<
+ "location object expected, location array not in correct format",
+ locElement.isABSONObj());
+
+ locObj = locElement.embeddedObject();
+ if(locObj.isEmpty())
+ continue;
+ }
+
+ BSONObjBuilder b(64);
+
+ // Remember the actual location object if needed
+ if (locs)
+ locs->push_back(locObj);
+
+ // Stop if we don't need to get anything but location objects
+ if (!keys) {
+ if (singleElement) break;
+ else continue;
+ }
+
+ params.geoHashConverter->hash(locObj, &obj).appendToBuilder(&b, "");
+
+ // Go through all the other index keys
+ for (vector<pair<string, int> >::const_iterator i = params.other.begin();
+ i != params.other.end(); ++i) {
+ // Get *all* fields for the index key
+ BSONElementSet eSet;
+ obj.getFieldsDotted(i->first, eSet);
+
+ if (eSet.size() == 0)
+ b.appendNull("");
+ else if (eSet.size() == 1)
+ b.appendAs(*(eSet.begin()), "");
+ else {
+ // If we have more than one key, store as an array of the objects
+ BSONArrayBuilder aBuilder;
+
+ for (BSONElementSet::iterator ei = eSet.begin(); ei != eSet.end();
+ ++ei) {
+ aBuilder.append(*ei);
+ }
+
+ b.append("", aBuilder.arr());
+ }
+ }
+ keys->insert(b.obj());
+ if(singleElement) break;
+ }
+ }
+ }
+
+
+
+ //
+ // FTS
+ //
+
+ // static
+ void getFTSKeys(const BSONObj &obj, const fts::FTSSpec& ftsSpec, BSONObjSet* keys) {
+ fts::FTSIndexFormat::getKeys(ftsSpec, obj, keys);
+ }
+
+
+
+ //
+ // Hash
+ //
+
+ // static
+ void getHashKeys(const BSONObj& obj, const string& hashedField, HashSeed seed,
+ int hashVersion, bool isSparse, BSONObjSet* keys) {
+ const char* cstr = hashedField.c_str();
+ BSONElement fieldVal = obj.getFieldDottedOrArray(cstr);
+ uassert(16766, "Error: hashed indexes do not currently support array values",
+ fieldVal.type() != Array );
+
+ if (!fieldVal.eoo()) {
+ BSONObj key = BSON( "" << makeSingleHashKey(fieldVal, seed, hashVersion));
+ keys->insert(key);
+ }
+ else if (!isSparse) {
+ BSONObj nullObj = BSON("" << BSONNULL);
+ keys->insert(BSON("" << makeSingleHashKey(nullObj.firstElement(), seed, hashVersion)));
+ }
+ }
+
+ // static
+ long long int makeSingleHashKey(const BSONElement& e, HashSeed seed, int v) {
+ massert(16767, "Only HashVersion 0 has been defined" , v == 0 );
+ return BSONElementHasher::hash64(e, seed);
+ }
+
+
+
+ //
+ // Haystack
+ //
+
+ // static
+ void getHaystackKeys(const BSONObj& obj, const std::string& geoField,
+ const std::vector<std::string>& otherFields,
+ double bucketSize, BSONObjSet* keys) {
+ BSONElement loc = obj.getFieldDotted(geoField);
+
+ if (loc.eoo()) { return; }
+
+ uassert(16775, "latlng not an array", loc.isABSONObj());
+ string root;
+ {
+ BSONObjIterator i(loc.Obj());
+ BSONElement x = i.next();
+ BSONElement y = i.next();
+ root = makeHaystackString(hashHaystackElement(x, bucketSize),
+ hashHaystackElement(y, bucketSize));
+ }
+
+ verify(otherFields.size() == 1);
+
+ BSONElementSet all;
+
+ // This is getFieldsDotted (plural not singular) since the object we're indexing
+ // may be an array.
+ obj.getFieldsDotted(otherFields[0], all);
+
+ if (all.size() == 0) {
+ // We're indexing a document that doesn't have the secondary non-geo field present.
+ // XXX: do we want to add this even if all.size() > 0? result:empty search terms
+ // match everything instead of only things w/empty search terms)
+ addKey(root, BSONElement(), keys);
+ } else {
+ // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"},
+ // all.size()==1. We can query on the complete field.
+ // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values
+ // "A" and "B". The query looks for any of the fields in the array.
+ for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) {
+ addKey(root, *i, keys);
+ }
+ }
+ }
+
+ // static
+ int hashHaystackElement(const BSONElement& e, double bucketSize) {
+ uassert(16776, "geo field is not a number", e.isNumber());
+ double d = e.numberDouble();
+ d += 180;
+ d /= bucketSize;
+ return static_cast<int>(d);
+ }
+
+ // static
+ std::string makeHaystackString(int hashedX, int hashedY) {
+ mongoutils::str::stream ss;
+ ss << hashedX << "_" << hashedY;
+ return ss;
+ }
+
+
+
+ //
+ // S2
+ //
+
+ void getS2Keys(const BSONObj& obj, const BSONObj& keyPattern,
+ const S2IndexingParams& params, BSONObjSet* keys) {
+ BSONObjSet keysToAdd;
+ // We output keys in the same order as the fields we index.
+ BSONObjIterator i(keyPattern);
+ while (i.more()) {
+ BSONElement e = i.next();
+
+ // First, we get the keys that this field adds. Either they're added literally from
+ // the value of the field, or they're transformed if the field is geo.
+ BSONElementSet fieldElements;
+ // false means Don't expand the last array, duh.
+ obj.getFieldsDotted(e.fieldName(), fieldElements, false);
+
+ BSONObjSet keysForThisField;
+ if (IndexNames::GEO_2DSPHERE == e.valuestr()) {
+ getGeoKeys(obj, fieldElements, params, &keysForThisField);
+ } else {
+ getLiteralKeys(fieldElements, &keysForThisField);
+ }
+
+ // We expect there to be the missing field element present in the keys if data is
+ // missing. So, this should be non-empty.
+ verify(!keysForThisField.empty());
+
+ // We take the Cartesian product of all of the keys. This requires that we have
+ // some keys to take the Cartesian product with. If keysToAdd.empty(), we
+ // initialize it.
+ if (keysToAdd.empty()) {
+ keysToAdd = keysForThisField;
+ continue;
+ }
+
+ BSONObjSet updatedKeysToAdd;
+ for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end();
+ ++it) {
+ for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
+ newIt!= keysForThisField.end(); ++newIt) {
+ BSONObjBuilder b;
+ b.appendElements(*it);
+ b.append(newIt->firstElement());
+ updatedKeysToAdd.insert(b.obj());
+ }
+ }
+ keysToAdd = updatedKeysToAdd;
+ }
+
+ if (keysToAdd.size() > params.maxKeysPerInsert) {
+ warning() << "insert of geo object generated lots of keys (" << keysToAdd.size()
+ << ") consider creating larger buckets. obj="
+ << obj;
+ }
+
+ *keys = keysToAdd;
+ }
+
+} // namespace mongo
diff --git a/src/mongo/db/index/expression_key_generator.h b/src/mongo/db/index/expression_key_generator.h
new file mode 100644
index 00000000000..679c776a675
--- /dev/null
+++ b/src/mongo/db/index/expression_key_generator.h
@@ -0,0 +1,128 @@
+/**
+* Copyright (C) 2014 MongoDB Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+* As a special exception, the copyright holders give permission to link the
+* code of portions of this program with the OpenSSL library under certain
+* conditions as described in each individual source file and distribute
+* linked combinations including the program with the OpenSSL library. You
+* must comply with the GNU Affero General Public License in all respects for
+* all of the code used other than as permitted herein. If you modify file(s)
+* with this exception, you may extend this exception to your version of the
+* file(s), but you are not obligated to do so. If you do not wish to do so,
+* delete this exception statement from your version. If you delete this
+* exception statement from all source files in the program, then also delete
+* it in the license file.
+*/
+
+#pragma once
+
+#include <vector>
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/bsonmisc.h"
+#include "mongo/db/hasher.h"
+
+namespace mongo {
+
+ struct TwoDIndexingParams;
+ struct S2IndexingParams;
+
+ namespace fts {
+ class FTSSpec;
+ } // namespace fts
+
+
+
+ //
+ // 2D
+ //
+
+ /**
+ * Generate keys for 2d access method.
+ * Finds the key objects and/or locations for a geo-indexed object.
+ */
+ void get2DKeys(const BSONObj &obj, const TwoDIndexingParams& params,
+ BSONObjSet* keys, std::vector<BSONObj>* locs);
+
+
+
+ //
+ // FTS
+ //
+
+ /**
+ * Generates keys for FTS access method
+ */
+ void getFTSKeys(const BSONObj &obj, const fts::FTSSpec& ftsSpec, BSONObjSet* keys);
+
+
+
+ //
+ // Hash
+ //
+
+ /**
+ * Generates keys for hash access method.
+ */
+ void getHashKeys(const BSONObj& obj, const std::string& hashedField, HashSeed seed,
+ int hashVersion, bool isSparse, BSONObjSet* keys);
+
+ /**
+ * Hashing function used by both getHashKeys and the cursors we create.
+ * Exposed for testing in dbtests/namespacetests.cpp and
+ * so mongo/db/index_legacy.cpp can use it.
+ */
+ long long int makeSingleHashKey(const BSONElement& e, HashSeed seed, int v);
+
+
+
+ //
+ // Haystack
+ //
+
+ /**
+ * Generates keys for hay stack access method.
+ */
+ void getHaystackKeys(const BSONObj& obj, const std::string& geoField,
+ const std::vector<std::string>& otherFields,
+ double bucketSize,
+ BSONObjSet* keys);
+
+ /**
+ * Returns a hash of a BSON element.
+ * Used by getHaystackKeys and HaystackAccessMethod::searchCommand.
+ */
+ int hashHaystackElement(const BSONElement& e, double bucketSize);
+
+
+
+ /**
+ * Joins two strings using underscore as separator.
+ * Used by getHaystackKeys and HaystackAccessMethod::searchCommand.
+ */
+ std::string makeHaystackString(int hashedX, int hashedY);
+
+
+
+ //
+ // S2
+ //
+
+ /**
+ * Generates keys for S2 access method.
+ */
+ void getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params,
+ BSONObjSet* keys);
+
+} // namespace mongo
diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp
index 47cb519f3bc..3eac70d8e71 100644
--- a/src/mongo/db/index/fts_access_method.cpp
+++ b/src/mongo/db/index/fts_access_method.cpp
@@ -27,7 +27,7 @@
*/
#include "mongo/db/index/fts_access_method.h"
-#include "mongo/db/fts/fts_index_format.h"
+#include "mongo/db/index/expression_key_generator.h"
namespace mongo {
@@ -35,7 +35,7 @@ namespace mongo {
: BtreeBasedAccessMethod(btreeState), _ftsSpec(btreeState->descriptor()->infoObj()) { }
void FTSAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) {
- fts::FTSIndexFormat::getKeys(_ftsSpec, obj, keys);
+ getFTSKeys(obj, _ftsSpec, keys);
}
} // namespace mongo
diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp
index eb78f1934d8..a658e7078f4 100644
--- a/src/mongo/db/index/hash_access_method.cpp
+++ b/src/mongo/db/index/hash_access_method.cpp
@@ -28,15 +28,11 @@
#include "mongo/db/structure/btree/btree.h"
#include "mongo/db/hasher.h"
+#include "mongo/db/index/expression_key_generator.h"
#include "mongo/db/index/hash_access_method.h"
namespace mongo {
- long long int HashAccessMethod::makeSingleKey(const BSONElement& e, HashSeed seed, int v) {
- massert(16767, "Only HashVersion 0 has been defined" , v == 0 );
- return BSONElementHasher::hash64(e, seed);
- }
-
HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState)
: BtreeBasedAccessMethod(btreeState) {
@@ -66,7 +62,7 @@ namespace mongo {
//In case we have hashed indexes based on other hash functions in
//the future, we store a hashVersion number. If hashVersion changes,
- // "makeSingleKey" will need to change accordingly.
+ // "makeSingleHashKey" will need to change accordingly.
//Defaults to 0 if "hashVersion" is not included in the index spec
//or if the value of "hashversion" is not a number
_hashVersion = descriptor->getInfoElement("hashVersion").numberInt();
@@ -79,25 +75,7 @@ namespace mongo {
}
void HashAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) {
- getKeysImpl(obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), keys);
- }
-
- // static
- void HashAccessMethod::getKeysImpl(const BSONObj& obj, const string& hashedField, HashSeed seed,
- int hashVersion, bool isSparse, BSONObjSet* keys) {
- const char* cstr = hashedField.c_str();
- BSONElement fieldVal = obj.getFieldDottedOrArray(cstr);
- uassert(16766, "Error: hashed indexes do not currently support array values",
- fieldVal.type() != Array );
-
- if (!fieldVal.eoo()) {
- BSONObj key = BSON( "" << makeSingleKey(fieldVal, seed, hashVersion));
- keys->insert(key);
- }
- else if (!isSparse) {
- BSONObj nullObj = BSON("" << BSONNULL);
- keys->insert(BSON("" << makeSingleKey(nullObj.firstElement(), seed, hashVersion)));
- }
+ getHashKeys(obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), keys);
}
} // namespace mongo
diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h
index c2b9fa836b6..d0f46620334 100644
--- a/src/mongo/db/index/hash_access_method.h
+++ b/src/mongo/db/index/hash_access_method.h
@@ -53,18 +53,6 @@ namespace mongo {
return Status::OK();
}
- /**
- * Hashing function used by both this class and the cursors we create.
- * Exposed for testing and so mongo/db/index_legacy.cpp can use it.
- */
- static long long int makeSingleKey(const BSONElement& e, HashSeed seed, int v);
-
- /**
- * Exposed externally for testing purposes.
- */
- static void getKeysImpl(const BSONObj& obj, const string& hashedField, HashSeed seed,
- int hashVersion, bool isSparse, BSONObjSet* keys);
-
private:
virtual void getKeys(const BSONObj& obj, BSONObjSet* keys);
diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp
index 0fdf9dfa78f..39d19070a38 100644
--- a/src/mongo/db/index/haystack_access_method.cpp
+++ b/src/mongo/db/index/haystack_access_method.cpp
@@ -30,6 +30,7 @@
#include "mongo/base/status.h"
#include "mongo/db/geo/hash.h"
+#include "mongo/db/index/expression_key_generator.h"
#include "mongo/db/index/haystack_access_method_internal.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/pdfile.h"
@@ -71,70 +72,7 @@ namespace mongo {
}
void HaystackAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) {
- BSONElement loc = obj.getFieldDotted(_geoField);
-
- if (loc.eoo()) { return; }
-
- uassert(16775, "latlng not an array", loc.isABSONObj());
- string root;
- {
- BSONObjIterator i(loc.Obj());
- BSONElement x = i.next();
- BSONElement y = i.next();
- root = makeString(hash(x), hash(y));
- }
-
- verify(_otherFields.size() == 1);
-
- BSONElementSet all;
-
- // This is getFieldsDotted (plural not singular) since the object we're indexing
- // may be an array.
- obj.getFieldsDotted(_otherFields[0], all);
-
- if (all.size() == 0) {
- // We're indexing a document that doesn't have the secondary non-geo field present.
- // XXX: do we want to add this even if all.size() > 0? result:empty search terms
- // match everything instead of only things w/empty search terms)
- addKey(root, BSONElement(), keys);
- } else {
- // Ex:If our secondary field is type: "foo" or type: {a:"foo", b:"bar"},
- // all.size()==1. We can query on the complete field.
- // Ex: If our secondary field is type: ["A", "B"] all.size()==2 and all has values
- // "A" and "B". The query looks for any of the fields in the array.
- for (BSONElementSet::iterator i = all.begin(); i != all.end(); ++i) {
- addKey(root, *i, keys);
- }
- }
- }
-
- int HaystackAccessMethod::hash(const BSONElement& e) const {
- uassert(16776, "geo field is not a number", e.isNumber());
- double d = e.numberDouble();
- d += 180;
- d /= _bucketSize;
- return static_cast<int>(d);
- }
-
- string HaystackAccessMethod::makeString(int hashedX, int hashedY) const {
- stringstream ss;
- ss << hashedX << "_" << hashedY;
- return ss.str();
- }
-
- // Build a new BSONObj with root in it. If e is non-empty, append that to the key. Insert
- // the BSONObj into keys.
- void HaystackAccessMethod::addKey(const string& root, const BSONElement& e,
- BSONObjSet* keys) const {
- BSONObjBuilder buf;
- buf.append("", root);
-
- if (e.eoo())
- buf.appendNull("");
- else
- buf.appendAs(e, "");
-
- keys->insert(buf.obj());
+ getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys);
}
void HaystackAccessMethod::searchCommand(const BSONObj& nearObj, double maxDistance,
@@ -147,8 +85,8 @@ namespace mongo {
int x, y;
{
BSONObjIterator i(nearObj);
- x = hash(i.next());
- y = hash(i.next());
+ x = hashHaystackElement(i.next(), _bucketSize);
+ y = hashHaystackElement(i.next(), _bucketSize);
}
int scale = static_cast<int>(ceil(maxDistance / _bucketSize));
@@ -159,7 +97,7 @@ namespace mongo {
for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) {
for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) {
BSONObjBuilder bb;
- bb.append("", makeString(x + a, y + b));
+ bb.append("", makeHaystackString(x + a, y + b));
for (unsigned i = 0; i < _otherFields.size(); i++) {
// See if the non-geo field we're indexing on is in the provided search term.
diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h
index 0e1dd3426c7..29b6485f7d2 100644
--- a/src/mongo/db/index/haystack_access_method.h
+++ b/src/mongo/db/index/haystack_access_method.h
@@ -66,11 +66,6 @@ namespace mongo {
private:
virtual void getKeys(const BSONObj& obj, BSONObjSet* keys);
- // Helper methods called by getKeys:
- int hash(const BSONElement& e) const;
- string makeString(int hashedX, int hashedY) const;
- void addKey(const string& root, const BSONElement& e, BSONObjSet* keys) const;
-
string _geoField;
vector<string> _otherFields;
double _bucketSize;
diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp
index fff6fbba7f3..5548be3b429 100644
--- a/src/mongo/db/index/s2_access_method.cpp
+++ b/src/mongo/db/index/s2_access_method.cpp
@@ -35,6 +35,7 @@
#include "mongo/db/geo/geoconstants.h"
#include "mongo/db/geo/s2common.h"
#include "mongo/db/index_names.h"
+#include "mongo/db/index/expression_key_generator.h"
#include "mongo/db/jsobj.h"
namespace mongo {
@@ -122,132 +123,7 @@ namespace mongo {
}
void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) {
- BSONObjSet keysToAdd;
- // We output keys in the same order as the fields we index.
- BSONObjIterator i(_descriptor->keyPattern());
- while (i.more()) {
- BSONElement e = i.next();
-
- // First, we get the keys that this field adds. Either they're added literally from
- // the value of the field, or they're transformed if the field is geo.
- BSONElementSet fieldElements;
- // false means Don't expand the last array, duh.
- obj.getFieldsDotted(e.fieldName(), fieldElements, false);
-
- BSONObjSet keysForThisField;
- if (IndexNames::GEO_2DSPHERE == e.valuestr()) {
- getGeoKeys(obj, fieldElements, &keysForThisField);
- } else {
- getLiteralKeys(fieldElements, &keysForThisField);
- }
-
- // We expect there to be the missing field element present in the keys if data is
- // missing. So, this should be non-empty.
- verify(!keysForThisField.empty());
-
- // We take the Cartesian product of all of the keys. This requires that we have
- // some keys to take the Cartesian product with. If keysToAdd.empty(), we
- // initialize it.
- if (keysToAdd.empty()) {
- keysToAdd = keysForThisField;
- continue;
- }
-
- BSONObjSet updatedKeysToAdd;
- for (BSONObjSet::const_iterator it = keysToAdd.begin(); it != keysToAdd.end();
- ++it) {
- for (BSONObjSet::const_iterator newIt = keysForThisField.begin();
- newIt!= keysForThisField.end(); ++newIt) {
- BSONObjBuilder b;
- b.appendElements(*it);
- b.append(newIt->firstElement());
- updatedKeysToAdd.insert(b.obj());
- }
- }
- keysToAdd = updatedKeysToAdd;
- }
-
- if (keysToAdd.size() > _params.maxKeysPerInsert) {
- warning() << "insert of geo object generated lots of keys (" << keysToAdd.size()
- << ") consider creating larger buckets. obj="
- << obj;
- }
-
- *keys = keysToAdd;
- }
-
- // Get the index keys for elements that are GeoJSON.
- void S2AccessMethod::getGeoKeys(const BSONObj& document, const BSONElementSet& elements,
- BSONObjSet* out) const {
- for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) {
- uassert(16754, "Can't parse geometry from element: " + i->toString(),
- i->isABSONObj());
- const BSONObj &geoObj = i->Obj();
-
- vector<string> cells;
- bool succeeded = S2SearchUtil::getKeysForObject(geoObj, _params, &cells);
- uassert(16755, "Can't extract geo keys from object, malformed geometry?: "
- + document.toString(), succeeded);
-
- uassert(16756, "Unable to generate keys for (likely malformed) geometry: "
- + document.toString(),
- cells.size() > 0);
-
- for (vector<string>::const_iterator it = cells.begin(); it != cells.end(); ++it) {
- BSONObjBuilder b;
- b.append("", *it);
- out->insert(b.obj());
- }
- }
-
- if (0 == out->size()) {
- BSONObjBuilder b;
- b.appendNull("");
- out->insert(b.obj());
- }
- }
-
- void S2AccessMethod::getLiteralKeysArray(const BSONObj& obj, BSONObjSet* out) const {
- BSONObjIterator objIt(obj);
- if (!objIt.more()) {
- // Empty arrays are indexed as undefined.
- BSONObjBuilder b;
- b.appendUndefined("");
- out->insert(b.obj());
- } else {
- // Non-empty arrays are exploded.
- while (objIt.more()) {
- BSONObjBuilder b;
- b.appendAs(objIt.next(), "");
- out->insert(b.obj());
- }
- }
- }
-
- void S2AccessMethod::getOneLiteralKey(const BSONElement& elt, BSONObjSet* out) const {
- if (Array == elt.type()) {
- getLiteralKeysArray(elt.Obj(), out);
- } else {
- // One thing, not an array, index as-is.
- BSONObjBuilder b;
- b.appendAs(elt, "");
- out->insert(b.obj());
- }
- }
-
- // elements is a non-geo field. Add the values literally, expanding arrays.
- void S2AccessMethod::getLiteralKeys(const BSONElementSet& elements,
- BSONObjSet* out) const {
- if (0 == elements.size()) {
- // Missing fields are indexed as null.
- BSONObjBuilder b;
- b.appendNull("");
- out->insert(b.obj());
- } else {
- for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) {
- getOneLiteralKey(*i, out);
- }
- }
+ getS2Keys(obj, _descriptor->keyPattern(), _params, keys);
}
} // namespace mongo
diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h
index 228d6b9da77..df12026395b 100644
--- a/src/mongo/db/index/s2_access_method.h
+++ b/src/mongo/db/index/s2_access_method.h
@@ -57,13 +57,6 @@ namespace mongo {
private:
virtual void getKeys(const BSONObj& obj, BSONObjSet* keys);
- // getKeys calls the helper methods below.
- void getGeoKeys(const BSONObj& document, const BSONElementSet& elements,
- BSONObjSet* out) const;
- void getLiteralKeys(const BSONElementSet& elements, BSONObjSet* out) const;
- void getLiteralKeysArray(const BSONObj& obj, BSONObjSet* out) const;
- void getOneLiteralKey(const BSONElement& elt, BSONObjSet *out) const;
-
S2IndexingParams _params;
};