diff options
author | Hari Khalsa <hkhalsa@10gen.com> | 2014-02-19 16:25:10 -0500 |
---|---|---|
committer | Hari Khalsa <hkhalsa@10gen.com> | 2014-02-19 21:51:35 -0500 |
commit | f03fe8518989be07ebf741725a7d50ca66577d6f (patch) | |
tree | 23b0b28ea8ccedfbbb44b234bf971f1f90985b42 /src/mongo/db/index | |
parent | 269c683220692299b5fb9cf431359e3ec299f8fc (diff) | |
download | mongo-f03fe8518989be07ebf741725a7d50ca66577d6f.tar.gz |
SERVER-8391 introduce a (indexObj x doc) -> keys function for upgrade checking
Diffstat (limited to 'src/mongo/db/index')
-rw-r--r-- | src/mongo/db/index/2d_access_method.cpp | 42 | ||||
-rw-r--r-- | src/mongo/db/index/SConscript | 18 | ||||
-rw-r--r-- | src/mongo/db/index/expression_key_generator.h | 128 | ||||
-rw-r--r-- | src/mongo/db/index/expression_keys_private.cpp (renamed from src/mongo/db/index/expression_key_generator.cpp) | 188 | ||||
-rw-r--r-- | src/mongo/db/index/expression_keys_private.h | 130 | ||||
-rw-r--r-- | src/mongo/db/index/expression_params.h | 189 | ||||
-rw-r--r-- | src/mongo/db/index/external_key_generator.cpp | 97 | ||||
-rw-r--r-- | src/mongo/db/index/external_key_generator.h | 47 | ||||
-rw-r--r-- | src/mongo/db/index/fts_access_method.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/index/hash_access_method.cpp | 42 | ||||
-rw-r--r-- | src/mongo/db/index/haystack_access_method.cpp | 41 | ||||
-rw-r--r-- | src/mongo/db/index/s2_access_method.cpp | 41 |
12 files changed, 621 insertions, 346 deletions
diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp index aebc92f1970..7a5cbc9ff0b 100644 --- a/src/mongo/db/index/2d_access_method.cpp +++ b/src/mongo/db/index/2d_access_method.cpp @@ -34,61 +34,29 @@ #include "mongo/db/geo/core.h" #include "mongo/db/index_names.h" #include "mongo/db/index/2d_common.h" -#include "mongo/db/index/expression_key_generator.h" +#include "mongo/db/index/expression_keys_private.h" +#include "mongo/db/index/expression_params.h" #include "mongo/db/jsobj.h" #include "mongo/db/pdfile.h" namespace mongo { - static double configValueWithDefault(const IndexDescriptor *desc, const string& name, double def) { - BSONElement e = desc->getInfoElement(name); - if (e.isNumber()) { return e.numberDouble(); } - return def; - } - TwoDAccessMethod::TwoDAccessMethod(IndexCatalogEntry* btreeState) : BtreeBasedAccessMethod(btreeState) { const IndexDescriptor* descriptor = btreeState->descriptor(); - BSONObjIterator i(descriptor->keyPattern()); - while (i.more()) { - BSONElement e = i.next(); - if (e.type() == String && IndexNames::GEO_2D == e.valuestr()) { - uassert(16800, "can't have 2 geo fields", _params.geo.size() == 0); - uassert(16801, "2d has to be first in index", _params.other.size() == 0); - _params.geo = e.fieldName(); - } else { - int order = 1; - if (e.isNumber()) { - order = static_cast<int>(e.Number()); - } - _params.other.push_back(make_pair(e.fieldName(), order)); - } - } - uassert(16802, "no geo field specified", _params.geo.size()); - - double bits = configValueWithDefault(descriptor, "bits", 26); // for lat/long, ~ 1ft - uassert(16803, "bits in geo index must be between 1 and 32", bits > 0 && bits <= 32); - - GeoHashConverter::Parameters params; - params.bits = static_cast<unsigned>(bits); - params.max = configValueWithDefault(descriptor, "max", 180.0); - params.min = configValueWithDefault(descriptor, "min", -180.0); - double numBuckets = (1024 * 1024 * 1024 * 4.0); - params.scaling = numBuckets / (params.max - params.min); - - _params.geoHashConverter.reset(new GeoHashConverter(params)); + ExpressionParams::parseTwoDParams(descriptor->infoObj(), &_params); } /** Finds the key objects to put in an index */ void TwoDAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - get2DKeys(obj, _params, keys, NULL); + ExpressionKeysPrivate::get2DKeys(obj, _params, keys, NULL); } /** Finds all locations in a geo-indexed object */ void TwoDAccessMethod::getKeys(const BSONObj& obj, vector<BSONObj>& locs) const { - get2DKeys(obj, _params, NULL, &locs); + ExpressionKeysPrivate::get2DKeys(obj, _params, NULL, &locs); } } // namespace mongo diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index d1ea2e844d9..c8d757d150c 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -6,8 +6,26 @@ env.Library( target='key_generator', source=[ 'btree_key_generator.cpp', + 'expression_keys_private.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/bson', + '$BUILD_DIR/mongo/db/fts/base', + '$BUILD_DIR/mongo/geometry', + '$BUILD_DIR/mongo/geoparser', + '$BUILD_DIR/mongo/geoquery', + '$BUILD_DIR/mongo/index_names', + '$BUILD_DIR/third_party/s2/s2', + ], +) + +env.Library( + target='external_key_generator', + source=[ + 'external_key_generator.cpp', + ], + LIBDEPS=[ + 'key_generator', + '$BUILD_DIR/mongo/bson', ], ) diff --git a/src/mongo/db/index/expression_key_generator.h b/src/mongo/db/index/expression_key_generator.h deleted file mode 100644 index 679c776a675..00000000000 --- a/src/mongo/db/index/expression_key_generator.h +++ /dev/null @@ -1,128 +0,0 @@ -/** -* Copyright (C) 2014 MongoDB Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3, -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - -#pragma once - -#include <vector> -#include "mongo/bson/bsonobj.h" -#include "mongo/bson/bsonmisc.h" -#include "mongo/db/hasher.h" - -namespace mongo { - - struct TwoDIndexingParams; - struct S2IndexingParams; - - namespace fts { - class FTSSpec; - } // namespace fts - - - - // - // 2D - // - - /** - * Generate keys for 2d access method. - * Finds the key objects and/or locations for a geo-indexed object. - */ - void get2DKeys(const BSONObj &obj, const TwoDIndexingParams& params, - BSONObjSet* keys, std::vector<BSONObj>* locs); - - - - // - // FTS - // - - /** - * Generates keys for FTS access method - */ - void getFTSKeys(const BSONObj &obj, const fts::FTSSpec& ftsSpec, BSONObjSet* keys); - - - - // - // Hash - // - - /** - * Generates keys for hash access method. - */ - void getHashKeys(const BSONObj& obj, const std::string& hashedField, HashSeed seed, - int hashVersion, bool isSparse, BSONObjSet* keys); - - /** - * Hashing function used by both getHashKeys and the cursors we create. - * Exposed for testing in dbtests/namespacetests.cpp and - * so mongo/db/index_legacy.cpp can use it. - */ - long long int makeSingleHashKey(const BSONElement& e, HashSeed seed, int v); - - - - // - // Haystack - // - - /** - * Generates keys for hay stack access method. - */ - void getHaystackKeys(const BSONObj& obj, const std::string& geoField, - const std::vector<std::string>& otherFields, - double bucketSize, - BSONObjSet* keys); - - /** - * Returns a hash of a BSON element. - * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. - */ - int hashHaystackElement(const BSONElement& e, double bucketSize); - - - - /** - * Joins two strings using underscore as separator. - * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. - */ - std::string makeHaystackString(int hashedX, int hashedY); - - - - // - // S2 - // - - /** - * Generates keys for S2 access method. - */ - void getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, const S2IndexingParams& params, - BSONObjSet* keys); - -} // namespace mongo diff --git a/src/mongo/db/index/expression_key_generator.cpp b/src/mongo/db/index/expression_keys_private.cpp index e916ebccb44..0f94a7b7b58 100644 --- a/src/mongo/db/index/expression_key_generator.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -1,40 +1,47 @@ /** -* Copyright (C) 2014 MongoDB Inc. -* -* This program is free software: you can redistribute it and/or modify -* it under the terms of the GNU Affero General Public License, version 3 -* as published by the Free Software Foundation. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Affero General Public License for more details. -* -* You should have received a copy of the GNU Affero General Public License -* along with this program. If not, see <http://www.gnu.org/licenses/>. -* -* As a special exception, the copyright holders give permission to link the -* code of portions of this program with the OpenSSL library under certain -* conditions as described in each individual source file and distribute -* linked combinations including the program with the OpenSSL library. You -* must comply with the GNU Affero General Public License in all respects for -* all of the code used other than as permitted herein. If you modify file(s) -* with this exception, you may extend this exception to your version of the -* file(s), but you are not obligated to do so. If you do not wish to do so, -* delete this exception statement from your version. If you delete this -* exception statement from all source files in the program, then also delete -* it in the license file. -*/ - -#include "mongo/db/index/expression_key_generator.h" + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/index/expression_keys_private.h" #include <utility> + #include "mongo/db/fts/fts_index_format.h" +#include "mongo/db/geo/geoconstants.h" +#include "mongo/db/geo/geoparser.h" +#include "mongo/db/geo/geoquery.h" #include "mongo/db/geo/s2common.h" +#include "mongo/db/geo/s2.h" #include "mongo/db/index_names.h" #include "mongo/db/index/2d_common.h" #include "mongo/util/assert_util.h" #include "mongo/util/mongoutils/str.h" +#include "third_party/s2/s2cell.h" +#include "third_party/s2/s2regioncoverer.h" namespace { @@ -61,17 +68,48 @@ namespace { keys->insert(buf.obj()); } - - // // Helper functions for getS2Keys // + static void S2KeysFromRegion(S2RegionCoverer *coverer, const S2Region ®ion, + vector<string> *out) { + vector<S2CellId> covering; + coverer->GetCovering(region, &covering); + for (size_t i = 0; i < covering.size(); ++i) { + out->push_back(covering[i].toString()); + } + } + + + bool S2GetKeysForObject(const BSONObj& obj, + const S2IndexingParams& params, + vector<string>* out) { + S2RegionCoverer coverer; + params.configureCoverer(&coverer); + + GeometryContainer geoContainer; + if (!geoContainer.parseFrom(obj)) { return false; } + + // Only certain geometries can be indexed in the old index format S2_INDEX_VERSION_1. See + // definition of S2IndexVersion for details. + if (params.indexVersion == S2_INDEX_VERSION_1 && !geoContainer.isSimpleContainer()) { + return false; + } + + if (!geoContainer.hasS2Region()) { return false; } + + S2KeysFromRegion(&coverer, geoContainer.getRegion(), out); + + return true; + } + + /** * Get the index keys for elements that are GeoJSON. * Used by getS2Keys. */ - void getGeoKeys(const BSONObj& document, const BSONElementSet& elements, + void getS2GeoKeys(const BSONObj& document, const BSONElementSet& elements, const S2IndexingParams& params, BSONObjSet* out) { for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { @@ -80,7 +118,7 @@ namespace { const BSONObj &geoObj = i->Obj(); vector<string> cells; - bool succeeded = S2SearchUtil::getKeysForObject(geoObj, params, &cells); + bool succeeded = S2GetKeysForObject(geoObj, params, &cells); uassert(16755, "Can't extract geo keys from object, malformed geometry?: " + document.toString(), succeeded); @@ -106,7 +144,7 @@ namespace { * Expands array and appends items to 'out'. * Used by getOneLiteralKey. */ - void getLiteralKeysArray(const BSONObj& obj, BSONObjSet* out) { + void getS2LiteralKeysArray(const BSONObj& obj, BSONObjSet* out) { BSONObjIterator objIt(obj); if (!objIt.more()) { // Empty arrays are indexed as undefined. @@ -128,9 +166,9 @@ namespace { * Otherwise, adds 'elt' as a single element. * Used by getLiteralKeys. */ - void getOneLiteralKey(const BSONElement& elt, BSONObjSet* out) { + void getS2OneLiteralKey(const BSONElement& elt, BSONObjSet* out) { if (Array == elt.type()) { - getLiteralKeysArray(elt.Obj(), out); + getS2LiteralKeysArray(elt.Obj(), out); } else { // One thing, not an array, index as-is. BSONObjBuilder b; @@ -143,7 +181,7 @@ namespace { * elements is a non-geo field. Add the values literally, expanding arrays. * Used by getS2Keys. */ - void getLiteralKeys(const BSONElementSet& elements, BSONObjSet* out) { + void getS2LiteralKeys(const BSONElementSet& elements, BSONObjSet* out) { if (0 == elements.size()) { // Missing fields are indexed as null. BSONObjBuilder b; @@ -151,7 +189,7 @@ namespace { out->insert(b.obj()); } else { for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { - getOneLiteralKey(*i, out); + getS2OneLiteralKey(*i, out); } } } @@ -164,15 +202,11 @@ namespace mongo { using std::string; using std::vector; - - - // - // 2D - // - // static - void get2DKeys(const BSONObj &obj, const TwoDIndexingParams& params, - BSONObjSet* keys, std::vector<BSONObj>* locs) { + void ExpressionKeysPrivate::get2DKeys(const BSONObj &obj, + const TwoDIndexingParams& params, + BSONObjSet* keys, + std::vector<BSONObj>* locs) { BSONElementMSet bSet; // Get all the nested location fields, but don't return individual elements from @@ -266,26 +300,21 @@ namespace mongo { } } - - - // - // FTS - // - // static - void getFTSKeys(const BSONObj &obj, const fts::FTSSpec& ftsSpec, BSONObjSet* keys) { + void ExpressionKeysPrivate::getFTSKeys(const BSONObj &obj, + const fts::FTSSpec& ftsSpec, + BSONObjSet* keys) { fts::FTSIndexFormat::getKeys(ftsSpec, obj, keys); } - - - // - // Hash - // - // static - void getHashKeys(const BSONObj& obj, const string& hashedField, HashSeed seed, - int hashVersion, bool isSparse, BSONObjSet* keys) { + void ExpressionKeysPrivate::getHashKeys(const BSONObj& obj, + const string& hashedField, + HashSeed seed, + int hashVersion, + bool isSparse, + BSONObjSet* keys) { + const char* cstr = hashedField.c_str(); BSONElement fieldVal = obj.getFieldDottedOrArray(cstr); uassert(16766, "Error: hashed indexes do not currently support array values", @@ -302,21 +331,20 @@ namespace mongo { } // static - long long int makeSingleHashKey(const BSONElement& e, HashSeed seed, int v) { + long long int ExpressionKeysPrivate::makeSingleHashKey(const BSONElement& e, + HashSeed seed, + int v) { massert(16767, "Only HashVersion 0 has been defined" , v == 0 ); return BSONElementHasher::hash64(e, seed); } - - - // - // Haystack - // - // static - void getHaystackKeys(const BSONObj& obj, const std::string& geoField, - const std::vector<std::string>& otherFields, - double bucketSize, BSONObjSet* keys) { + void ExpressionKeysPrivate::getHaystackKeys(const BSONObj& obj, + const std::string& geoField, + const std::vector<std::string>& otherFields, + double bucketSize, + BSONObjSet* keys) { + BSONElement loc = obj.getFieldDotted(geoField); if (loc.eoo()) { return; } @@ -356,7 +384,7 @@ namespace mongo { } // static - int hashHaystackElement(const BSONElement& e, double bucketSize) { + int ExpressionKeysPrivate::hashHaystackElement(const BSONElement& e, double bucketSize) { uassert(16776, "geo field is not a number", e.isNumber()); double d = e.numberDouble(); d += 180; @@ -365,20 +393,16 @@ namespace mongo { } // static - std::string makeHaystackString(int hashedX, int hashedY) { + std::string ExpressionKeysPrivate::makeHaystackString(int hashedX, int hashedY) { mongoutils::str::stream ss; ss << hashedX << "_" << hashedY; return ss; } - - - // - // S2 - // - - void getS2Keys(const BSONObj& obj, const BSONObj& keyPattern, - const S2IndexingParams& params, BSONObjSet* keys) { + void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, + const BSONObj& keyPattern, + const S2IndexingParams& params, + BSONObjSet* keys) { BSONObjSet keysToAdd; // Does one of our documents have a geo field? @@ -426,9 +450,9 @@ namespace mongo { } } - getGeoKeys(obj, fieldElements, params, &keysForThisField); + getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { - getLiteralKeys(fieldElements, &keysForThisField); + getS2LiteralKeys(fieldElements, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h new file mode 100644 index 00000000000..6e3fb1ea9a0 --- /dev/null +++ b/src/mongo/db/index/expression_keys_private.h @@ -0,0 +1,130 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <vector> + +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonmisc.h" +#include "mongo/db/hasher.h" + +namespace mongo { + + struct TwoDIndexingParams; + struct S2IndexingParams; + + namespace fts { + + class FTSSpec; + + } // namespace fts + + /** + * Do not use this class or any of its methods directly. The key generation of btree-indexed + * expression indices is kept outside of the access method for testing and for upgrade + * compatibility checking. + */ + class ExpressionKeysPrivate { + public: + + // + // 2d + // + + static void get2DKeys(const BSONObj &obj, + const TwoDIndexingParams& params, + BSONObjSet* keys, + std::vector<BSONObj>* locs); + + // + // FTS + // + + static void getFTSKeys(const BSONObj &obj, const fts::FTSSpec& ftsSpec, BSONObjSet* keys); + + // + // Hash + // + + /** + * Generates keys for hash access method. + */ + static void getHashKeys(const BSONObj& obj, + const std::string& hashedField, + HashSeed seed, + int hashVersion, + bool isSparse, + BSONObjSet* keys); + + /** + * Hashing function used by both getHashKeys and the cursors we create. + * Exposed for testing in dbtests/namespacetests.cpp and + * so mongo/db/index_legacy.cpp can use it. + */ + static long long int makeSingleHashKey(const BSONElement& e, HashSeed seed, int v); + + // + // Haystack + // + + /** + * Generates keys for haystack access method. + */ + static void getHaystackKeys(const BSONObj& obj, + const std::string& geoField, + const std::vector<std::string>& otherFields, + double bucketSize, + BSONObjSet* keys); + + /** + * Returns a hash of a BSON element. + * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. + */ + static int hashHaystackElement(const BSONElement& e, double bucketSize); + + /** + * Joins two strings using underscore as separator. + * Used by getHaystackKeys and HaystackAccessMethod::searchCommand. + */ + static std::string makeHaystackString(int hashedX, int hashedY); + + // + // S2 + // + + /** + * Generates keys for S2 access method. + */ + static void getS2Keys(const BSONObj& obj, + const BSONObj& keyPattern, + const S2IndexingParams& params, + BSONObjSet* keys); + }; + +} // namespace mongo diff --git a/src/mongo/db/index/expression_params.h b/src/mongo/db/index/expression_params.h new file mode 100644 index 00000000000..90d294c5800 --- /dev/null +++ b/src/mongo/db/index/expression_params.h @@ -0,0 +1,189 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/geo/s2common.h" +#include "mongo/db/index_names.h" +#include "mongo/db/index/2d_common.h" +#include "mongo/db/jsobj.h" +#include "mongo/util/mongoutils/str.h" + +namespace mongo { + + class ExpressionParams { + public: + static void parseTwoDParams(const BSONObj& infoObj, TwoDIndexingParams* out) { + BSONObjIterator i(infoObj.getObjectField("key")); + + while (i.more()) { + BSONElement e = i.next(); + if (e.type() == String && IndexNames::GEO_2D == e.valuestr()) { + uassert(16800, "can't have 2 geo fields", out->geo.size() == 0); + uassert(16801, "2d has to be first in index", out->other.size() == 0); + out->geo = e.fieldName(); + } else { + int order = 1; + if (e.isNumber()) { + order = static_cast<int>(e.Number()); + } + out->other.push_back(make_pair(e.fieldName(), order)); + } + } + + uassert(16802, "no geo field specified", out->geo.size()); + + double bits = configValueWithDefaultDouble(infoObj, "bits", 26); // for lat/long, ~ 1ft + uassert(16803, "bits in geo index must be between 1 and 32", bits > 0 && bits <= 32); + + GeoHashConverter::Parameters hashParams; + hashParams.bits = static_cast<unsigned>(bits); + hashParams.max = configValueWithDefaultDouble(infoObj, "max", 180.0); + hashParams.min = configValueWithDefaultDouble(infoObj, "min", -180.0); + double numBuckets = (1024 * 1024 * 1024 * 4.0); + hashParams.scaling = numBuckets / (hashParams.max - hashParams.min); + + out->geoHashConverter.reset(new GeoHashConverter(hashParams)); + } + + static void parseHashParams(const BSONObj& infoObj, + HashSeed* seedOut, + int* versionOut, + string* fieldOut) { + + // Default _seed to DEFAULT_HASH_SEED if "seed" is not included in the index spec + // or if the value of "seed" is not a number + + // *** WARNING *** + // Choosing non-default seeds will invalidate hashed sharding + // Changing the seed default will break existing indexes and sharded collections + if (infoObj["seed"].eoo()) { + *seedOut = BSONElementHasher::DEFAULT_HASH_SEED; + } + else { + *seedOut = infoObj["seed"].numberInt(); + } + + // In case we have hashed indexes based on other hash functions in the future, we store + // a hashVersion number. If hashVersion changes, "makeSingleHashKey" will need to change + // accordingly. Defaults to 0 if "hashVersion" is not included in the index spec or if + // the value of "hashversion" is not a number + *versionOut = infoObj["hashVersion"].numberInt(); + + // Get the hashfield name + BSONElement firstElt = infoObj.getObjectField("key").firstElement(); + massert(16765, "error: no hashed index field", + firstElt.str().compare(IndexNames::HASHED) == 0); + *fieldOut = firstElt.fieldName(); + } + + static void parseHaystackParams(const BSONObj& infoObj, + string* geoFieldOut, + vector<string>* otherFieldsOut, + double* bucketSizeOut) { + + BSONElement e = infoObj["bucketSize"]; + uassert(16777, "need bucketSize", e.isNumber()); + *bucketSizeOut = e.numberDouble(); + uassert(16769, "bucketSize cannot be zero", *bucketSizeOut != 0.0); + + // Example: + // db.foo.ensureIndex({ pos : "geoHaystack", type : 1 }, { bucketSize : 1 }) + BSONObjIterator i(infoObj.getObjectField("key")); + while (i.more()) { + BSONElement e = i.next(); + if (e.type() == String && IndexNames::GEO_HAYSTACK == e.valuestr()) { + uassert(16770, "can't have more than one geo field", geoFieldOut->size() == 0); + uassert(16771, "the geo field has to be first in index", + otherFieldsOut->size() == 0); + *geoFieldOut = e.fieldName(); + } else { + uassert(16772, "geoSearch can only have 1 non-geo field for now", + otherFieldsOut->size() == 0); + otherFieldsOut->push_back(e.fieldName()); + } + } + } + + static void parse2dsphereParams(const BSONObj& infoObj, + S2IndexingParams* out) { + // Set up basic params. + out->maxKeysPerInsert = 200; + + // This is advisory. + out->maxCellsInCovering = 50; + + // Near distances are specified in meters...sometimes. + out->radius = kRadiusOfEarthInMeters; + + // These are not advisory. + out->finestIndexedLevel = configValueWithDefaultInt(infoObj, + "finestIndexedLevel", + S2::kAvgEdge.GetClosestLevel(500.0 / out->radius)); + + out->coarsestIndexedLevel = configValueWithDefaultInt(infoObj, + "coarsestIndexedLevel", + S2::kAvgEdge.GetClosestLevel(100 * 1000.0 / out->radius)); + + static const string kIndexVersionFieldName("2dsphereIndexVersion"); + + // Determine which version of this index we're using. If none was set in the descriptor, + // assume S2_INDEX_VERSION_1 (alas, the first version predates the existence of the version + // field). + out->indexVersion = static_cast<S2IndexVersion>(configValueWithDefaultInt(infoObj, + kIndexVersionFieldName, + S2_INDEX_VERSION_1)); + + uassert(16747, "coarsestIndexedLevel must be >= 0", out->coarsestIndexedLevel >= 0); + uassert(16748, "finestIndexedLevel must be <= 30", out->finestIndexedLevel <= 30); + uassert(16749, "finestIndexedLevel must be >= coarsestIndexedLevel", + out->finestIndexedLevel >= out->coarsestIndexedLevel); + + massert(17395, + mongoutils::str::stream() << "unsupported geo index version { " << kIndexVersionFieldName + << " : " << out->indexVersion << " }, only support versions: [" + << S2_INDEX_VERSION_1 << "," << S2_INDEX_VERSION_2 << "]", + out->indexVersion == S2_INDEX_VERSION_2 || out->indexVersion == S2_INDEX_VERSION_1); + } + + private: + static double configValueWithDefaultDouble(const BSONObj& infoObj, + const string& name, + double def) { + BSONElement e = infoObj[name]; + if (e.isNumber()) { return e.numberDouble(); } + return def; + } + + static int configValueWithDefaultInt(const BSONObj& infoObj, const string& name, int def) { + BSONElement e = infoObj[name]; + if (e.isNumber()) { return e.numberInt(); } + return def; + } + + }; + +} // namespace mongo diff --git a/src/mongo/db/index/external_key_generator.cpp b/src/mongo/db/index/external_key_generator.cpp new file mode 100644 index 00000000000..ec639b6ad5f --- /dev/null +++ b/src/mongo/db/index/external_key_generator.cpp @@ -0,0 +1,97 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/index/external_key_generator.h" + +#include "mongo/db/fts/fts_index_format.h" +#include "mongo/db/geo/s2common.h" +#include "mongo/db/index_names.h" +#include "mongo/db/index/2d_common.h" +#include "mongo/db/index/btree_key_generator.h" +#include "mongo/db/index/expression_keys_private.h" +#include "mongo/db/index/expression_params.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + + void getKeysForUpgradeChecking(const BSONObj& infoObj, + const BSONObj& doc, + BSONObjSet* keys) { + + BSONObj keyPattern = infoObj.getObjectField("key"); + + string type = IndexNames::findPluginName(keyPattern); + + if (IndexNames::GEO_2D == type) { + TwoDIndexingParams params; + ExpressionParams::parseTwoDParams(infoObj, ¶ms); + ExpressionKeysPrivate::get2DKeys(doc, params, keys, NULL); + } + else if (IndexNames::GEO_HAYSTACK == type) { + string geoField; + vector<string> otherFields; + double bucketSize; + ExpressionParams::parseHaystackParams(infoObj, &geoField, &otherFields, &bucketSize); + ExpressionKeysPrivate::getHaystackKeys(doc, geoField, otherFields, bucketSize, keys); + } + else if (IndexNames::GEO_2DSPHERE == type) { + S2IndexingParams params; + ExpressionParams::parse2dsphereParams(infoObj, ¶ms); + ExpressionKeysPrivate::getS2Keys(doc, keyPattern, params, keys); + } + else if (IndexNames::TEXT == type) { + fts::FTSSpec spec(infoObj); + ExpressionKeysPrivate::getFTSKeys(doc, spec, keys); + } + else if (IndexNames::HASHED == type) { + HashSeed seed; + int version; + string field; + ExpressionParams::parseHashParams(infoObj, &seed, &version, &field); + ExpressionKeysPrivate::getHashKeys(doc, field, seed, version, infoObj["sparse"].trueValue(), keys); + } + else { + invariant(IndexNames::BTREE == type); + + std::vector<const char *> fieldNames; + std::vector<BSONElement> fixed; + BSONObjIterator keyIt(keyPattern); + while (keyIt.more()) { + BSONElement patternElt = keyIt.next(); + fieldNames.push_back(patternElt.fieldName()); + fixed.push_back(BSONElement()); + } + + // XXX: do we care about version + BtreeKeyGeneratorV1 keyGen(fieldNames, fixed, infoObj["sparse"].trueValue()); + + keyGen.getKeys(doc, keys); + } + } + +} // namespace mongo diff --git a/src/mongo/db/index/external_key_generator.h b/src/mongo/db/index/external_key_generator.h new file mode 100644 index 00000000000..8379dc981ac --- /dev/null +++ b/src/mongo/db/index/external_key_generator.h @@ -0,0 +1,47 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <vector> + +#include "mongo/db/jsobj.h" + +namespace mongo { + + /** + * Please, please, please do not rely on this existing. If you write code that uses + * it please talk to hk first. + * + * This is visible only for for upgrade checking. + */ + void getKeysForUpgradeChecking(const BSONObj& infoObj, + const BSONObj& doc, + BSONObjSet* keys); + +} // namespace mongo diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp index 3eac70d8e71..c803867b812 100644 --- a/src/mongo/db/index/fts_access_method.cpp +++ b/src/mongo/db/index/fts_access_method.cpp @@ -27,7 +27,7 @@ */ #include "mongo/db/index/fts_access_method.h" -#include "mongo/db/index/expression_key_generator.h" +#include "mongo/db/index/expression_keys_private.h" namespace mongo { @@ -35,7 +35,7 @@ namespace mongo { : BtreeBasedAccessMethod(btreeState), _ftsSpec(btreeState->descriptor()->infoObj()) { } void FTSAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - getFTSKeys(obj, _ftsSpec, keys); + ExpressionKeysPrivate::getFTSKeys(obj, _ftsSpec, keys); } } // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp index a658e7078f4..34729a1cf77 100644 --- a/src/mongo/db/index/hash_access_method.cpp +++ b/src/mongo/db/index/hash_access_method.cpp @@ -28,7 +28,8 @@ #include "mongo/db/structure/btree/btree.h" #include "mongo/db/hasher.h" -#include "mongo/db/index/expression_key_generator.h" +#include "mongo/db/index/expression_keys_private.h" +#include "mongo/db/index/expression_params.h" #include "mongo/db/index/hash_access_method.h" namespace mongo { @@ -38,44 +39,21 @@ namespace mongo { const IndexDescriptor* descriptor = btreeState->descriptor(); - const string HASHED_INDEX_TYPE_IDENTIFIER = "hashed"; - - //change these if single-field limitation lifted later - uassert(16763, "Currently only single field hashed index supported." , + // We can change these if the single-field limitation is lifted later. + uassert(16763, "Currently only single field hashed index supported.", 1 == descriptor->getNumFields()); + uassert(16764, "Currently hashed indexes cannot guarantee uniqueness. Use a regular index.", !descriptor->unique()); - // Default _seed to DEFAULT_HASH_SEED if "seed" is not included in the index spec - // or if the value of "seed" is not a number - - // *** WARNING *** - // Choosing non-default seeds will invalidate hashed sharding - // Changing the seed default will break existing indexes and sharded collections - - if ( descriptor->getInfoElement( "seed" ).eoo() ) { - _seed = BSONElementHasher::DEFAULT_HASH_SEED; - } - else { - _seed = descriptor->getInfoElement("seed").numberInt(); - } - - //In case we have hashed indexes based on other hash functions in - //the future, we store a hashVersion number. If hashVersion changes, - // "makeSingleHashKey" will need to change accordingly. - //Defaults to 0 if "hashVersion" is not included in the index spec - //or if the value of "hashversion" is not a number - _hashVersion = descriptor->getInfoElement("hashVersion").numberInt(); - - //Get the hashfield name - BSONElement firstElt = descriptor->keyPattern().firstElement(); - massert(16765, "error: no hashed index field", - firstElt.str().compare(HASHED_INDEX_TYPE_IDENTIFIER) == 0); - _hashedField = firstElt.fieldName(); + ExpressionParams::parseHashParams(descriptor->infoObj(), + &_seed, + &_hashVersion, + &_hashedField); } void HashAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - getHashKeys(obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), keys); + ExpressionKeysPrivate::getHashKeys(obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), keys); } } // namespace mongo diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp index 39d19070a38..76250d6797e 100644 --- a/src/mongo/db/index/haystack_access_method.cpp +++ b/src/mongo/db/index/haystack_access_method.cpp @@ -30,7 +30,8 @@ #include "mongo/base/status.h" #include "mongo/db/geo/hash.h" -#include "mongo/db/index/expression_key_generator.h" +#include "mongo/db/index/expression_keys_private.h" +#include "mongo/db/index/expression_params.h" #include "mongo/db/index/haystack_access_method_internal.h" #include "mongo/db/jsobj.h" #include "mongo/db/pdfile.h" @@ -38,41 +39,22 @@ namespace mongo { - static const string GEOSEARCHNAME = "geoHaystack"; - HaystackAccessMethod::HaystackAccessMethod(IndexCatalogEntry* btreeState) : BtreeBasedAccessMethod(btreeState) { const IndexDescriptor* descriptor = btreeState->descriptor(); - BSONElement e = descriptor->getInfoElement("bucketSize"); - uassert(16777, "need bucketSize", e.isNumber()); - _bucketSize = e.numberDouble(); - uassert(16769, "bucketSize cannot be zero", _bucketSize != 0.0); - - // Example: - // db.foo.ensureIndex({ pos : "geoHaystack", type : 1 }, { bucketSize : 1 }) - BSONObjIterator i(descriptor->keyPattern()); - while (i.more()) { - BSONElement e = i.next(); - if (e.type() == String && GEOSEARCHNAME == e.valuestr()) { - uassert(16770, "can't have more than one geo field", _geoField.size() == 0); - uassert(16771, "the geo field has to be first in index", - _otherFields.size() == 0); - _geoField = e.fieldName(); - } else { - uassert(16772, "geoSearch can only have 1 non-geo field for now", - _otherFields.size() == 0); - _otherFields.push_back(e.fieldName()); - } - } + ExpressionParams::parseHaystackParams(descriptor->infoObj(), + &_geoField, + &_otherFields, + &_bucketSize); uassert(16773, "no geo field specified", _geoField.size()); uassert(16774, "no non-geo fields specified", _otherFields.size()); } void HaystackAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys); + ExpressionKeysPrivate::getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys); } void HaystackAccessMethod::searchCommand(const BSONObj& nearObj, double maxDistance, @@ -85,8 +67,8 @@ namespace mongo { int x, y; { BSONObjIterator i(nearObj); - x = hashHaystackElement(i.next(), _bucketSize); - y = hashHaystackElement(i.next(), _bucketSize); + x = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize); + y = ExpressionKeysPrivate::hashHaystackElement(i.next(), _bucketSize); } int scale = static_cast<int>(ceil(maxDistance / _bucketSize)); @@ -97,7 +79,7 @@ namespace mongo { for (int a = -scale; a <= scale && !hopper.limitReached(); ++a) { for (int b = -scale; b <= scale && !hopper.limitReached(); ++b) { BSONObjBuilder bb; - bb.append("", makeHaystackString(x + a, y + b)); + bb.append("", ExpressionKeysPrivate::makeHaystackString(x + a, y + b)); for (unsigned i = 0; i < _otherFields.size(); i++) { // See if the non-geo field we're indexing on is in the provided search term. @@ -110,9 +92,6 @@ namespace mongo { BSONObj key = bb.obj(); - // TODO(hk): this keeps a set of all DiskLoc seen in this pass so that we don't - // consider the element twice. Do we want to instead store a hash of the set? - // Is this often big? unordered_set<DiskLoc, DiskLoc::Hasher> thisPass; diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp index 493fd7da300..9cbfd2d418b 100644 --- a/src/mongo/db/index/s2_access_method.cpp +++ b/src/mongo/db/index/s2_access_method.cpp @@ -35,49 +35,21 @@ #include "mongo/db/geo/geoconstants.h" #include "mongo/db/geo/s2common.h" #include "mongo/db/index_names.h" -#include "mongo/db/index/expression_key_generator.h" +#include "mongo/db/index/expression_keys_private.h" +#include "mongo/db/index/expression_params.h" #include "mongo/db/jsobj.h" namespace mongo { - static const string kIndexVersionFieldName("2dsphereIndexVersion"); - static int configValueWithDefault(const IndexDescriptor *desc, const string& name, int def) { - BSONElement e = desc->getInfoElement(name); - if (e.isNumber()) { return e.numberInt(); } - return def; - } + static const string kIndexVersionFieldName("2dsphereIndexVersion"); S2AccessMethod::S2AccessMethod(IndexCatalogEntry* btreeState) : BtreeBasedAccessMethod(btreeState) { const IndexDescriptor* descriptor = btreeState->descriptor(); - // Set up basic params. - _params.maxKeysPerInsert = 200; - // This is advisory. - _params.maxCellsInCovering = 50; - // Near distances are specified in meters...sometimes. - _params.radius = kRadiusOfEarthInMeters; - // These are not advisory. - _params.finestIndexedLevel = configValueWithDefault(descriptor, "finestIndexedLevel", - S2::kAvgEdge.GetClosestLevel(500.0 / _params.radius)); - _params.coarsestIndexedLevel = configValueWithDefault(descriptor, "coarsestIndexedLevel", - S2::kAvgEdge.GetClosestLevel(100 * 1000.0 / _params.radius)); - // Determine which version of this index we're using. If none was set in the descriptor, - // assume S2_INDEX_VERSION_1 (alas, the first version predates the existence of the version - // field). - _params.indexVersion = static_cast<S2IndexVersion>(configValueWithDefault( - descriptor, kIndexVersionFieldName, S2_INDEX_VERSION_1)); - uassert(16747, "coarsestIndexedLevel must be >= 0", _params.coarsestIndexedLevel >= 0); - uassert(16748, "finestIndexedLevel must be <= 30", _params.finestIndexedLevel <= 30); - uassert(16749, "finestIndexedLevel must be >= coarsestIndexedLevel", - _params.finestIndexedLevel >= _params.coarsestIndexedLevel); - massert(17395, - str::stream() << "unsupported geo index version { " << kIndexVersionFieldName - << " : " << _params.indexVersion << " }, only support versions: [" - << S2_INDEX_VERSION_1 << "," << S2_INDEX_VERSION_2 << "]", - _params.indexVersion == S2_INDEX_VERSION_2 - || _params.indexVersion == S2_INDEX_VERSION_1); + ExpressionParams::parse2dsphereParams(descriptor->infoObj(), + &_params); int geoFields = 0; @@ -95,6 +67,7 @@ namespace mongo { e.isNumber() ); } } + uassert(16750, "Expect at least one geo field, spec=" + descriptor->keyPattern().toString(), geoFields >= 1); @@ -128,7 +101,7 @@ namespace mongo { } void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { - getS2Keys(obj, _descriptor->keyPattern(), _params, keys); + ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys); } } // namespace mongo |