diff options
22 files changed, 589 insertions, 25 deletions
diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp index 2a23509fd0a..5e001a2d4e6 100644 --- a/src/mongo/db/catalog/index_catalog.cpp +++ b/src/mongo/db/catalog/index_catalog.cpp @@ -562,8 +562,8 @@ Status IndexCatalog::_isSpecOk(OperationContext* txn, const BSONObj& spec) const BSONElement collationElement = spec.getField("collation"); if (collationElement) { string pluginName = IndexNames::findPluginName(key); - if ((pluginName != IndexNames::BTREE) && (pluginName != IndexNames::GEO_2D) && - (pluginName != IndexNames::GEO_2DSPHERE) && (pluginName != IndexNames::HASHED)) { + if ((pluginName != IndexNames::BTREE) && (pluginName != IndexNames::GEO_2DSPHERE) && + (pluginName != IndexNames::HASHED)) { return Status(ErrorCodes::CannotCreateIndex, str::stream() << "\"collation\" not supported for index type " << pluginName); diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h index 1befbb82eb0..909b93520ba 100644 --- a/src/mongo/db/catalog/index_catalog_entry.h +++ b/src/mongo/db/catalog/index_catalog_entry.h @@ -87,6 +87,10 @@ public: return _filterExpression.get(); } + CollatorInterface* getCollator() { + return _collator.get(); + } + /// --------------------- const RecordId& head(OperationContext* txn) const; diff --git a/src/mongo/db/exec/geo_near.cpp b/src/mongo/db/exec/geo_near.cpp index 208c3b36783..65bd1defd98 100644 --- a/src/mongo/db/exec/geo_near.cpp +++ b/src/mongo/db/exec/geo_near.cpp @@ -743,7 +743,9 @@ GeoNear2DSphereStage::GeoNear2DSphereStage(const GeoNearParams& nearParams, _specificStats.keyPattern = s2Index->keyPattern(); _specificStats.indexName = s2Index->indexName(); _specificStats.indexVersion = s2Index->version(); - ExpressionParams::parse2dsphereParams(s2Index->infoObj(), &_indexParams); + // TODO SERVER-23968: change nullptr to the appropriate collator. + CollatorInterface* collator = nullptr; + ExpressionParams::initialize2dsphereParams(s2Index->infoObj(), collator, &_indexParams); } GeoNear2DSphereStage::~GeoNear2DSphereStage() {} diff --git a/src/mongo/db/index/2d_key_generator_test.cpp b/src/mongo/db/index/2d_key_generator_test.cpp new file mode 100644 index 00000000000..256dc7ecc2f --- /dev/null +++ b/src/mongo/db/index/2d_key_generator_test.cpp @@ -0,0 +1,122 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex + +#include "mongo/platform/basic.h" + +#include "mongo/db/index/expression_keys_private.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/index/2d_common.h" +#include "mongo/db/index/expression_params.h" +#include "mongo/db/json.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/log.h" + +using namespace mongo; + +namespace { + +std::string dumpKeyset(const BSONObjSet& objs) { + std::stringstream ss; + ss << "[ "; + for (BSONObjSet::iterator i = objs.begin(); i != objs.end(); ++i) { + ss << i->toString() << " "; + } + ss << "]"; + + return ss.str(); +} + +bool assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) { + if (expectedKeys != actualKeys) { + log() << "Expected: " << dumpKeyset(expectedKeys) << ", " + << "Actual: " << dumpKeyset(actualKeys); + return false; + } + return true; +} + +BSONObj make2DKey(const TwoDIndexingParams& params, int x, int y, BSONElement trailingFields) { + BSONObjBuilder bob; + BSONObj locObj = BSON_ARRAY(x << y); + params.geoHashConverter->hash(locObj, nullptr).appendHashMin(&bob, ""); + bob.append(trailingFields); + return bob.obj(); +} + +TEST(2dKeyGeneratorTest, TrailingField) { + BSONObj obj = fromjson("{a: [0, 0], b: 5}"); + BSONObj infoObj = fromjson("{key: {a: '2d', b: 1}}"); + TwoDIndexingParams params; + ExpressionParams::parseTwoDParams(infoObj, ¶ms); + BSONObjSet actualKeys; + std::vector<BSONObj> locs; + ExpressionKeysPrivate::get2DKeys(obj, params, &actualKeys, &locs); + + BSONObjSet expectedKeys; + BSONObj trailingFields = BSON("" << 5); + expectedKeys.insert(make2DKey(params, 0, 0, trailingFields.firstElement())); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(2dKeyGeneratorTest, ArrayTrailingField) { + BSONObj obj = fromjson("{a: [0, 0], b: [5, 6]}"); + BSONObj infoObj = fromjson("{key: {a: '2d', b: 1}}"); + TwoDIndexingParams params; + ExpressionParams::parseTwoDParams(infoObj, ¶ms); + BSONObjSet actualKeys; + std::vector<BSONObj> locs; + ExpressionKeysPrivate::get2DKeys(obj, params, &actualKeys, &locs); + + BSONObjSet expectedKeys; + BSONObj trailingFields = BSON("" << BSON_ARRAY(5 << 6)); + expectedKeys.insert(make2DKey(params, 0, 0, trailingFields.firstElement())); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(2dKeyGeneratorTest, ArrayOfObjectsTrailingField) { + BSONObj obj = fromjson("{a: [0, 0], b: [{c: 5}, {c: 6}]}"); + BSONObj infoObj = fromjson("{key: {a: '2d', 'b.c': 1}}"); + TwoDIndexingParams params; + ExpressionParams::parseTwoDParams(infoObj, ¶ms); + BSONObjSet actualKeys; + std::vector<BSONObj> locs; + ExpressionKeysPrivate::get2DKeys(obj, params, &actualKeys, &locs); + + BSONObjSet expectedKeys; + BSONObj trailingFields = BSON("" << BSON_ARRAY(5 << 6)); + expectedKeys.insert(make2DKey(params, 0, 0, trailingFields.firstElement())); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +} // namespace diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index c0e6a9241ac..97fba307203 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -54,14 +54,18 @@ env.Library( '$BUILD_DIR/mongo/db/geo/geometry', '$BUILD_DIR/mongo/db/geo/geoparser', '$BUILD_DIR/mongo/db/mongohasher', + '$BUILD_DIR/mongo/db/query/collation/collation_serializer', '$BUILD_DIR/third_party/s2/s2', ] ) env.CppUnitTest( - target='btree_key_generator_test', + target='key_generator_test', source=[ + '2d_key_generator_test.cpp', 'btree_key_generator_test.cpp', + 'hash_key_generator_test.cpp', + 's2_key_generator_test.cpp', ], LIBDEPS=[ 'key_generator', diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp index 941b35fc8b6..a3708e8caed 100644 --- a/src/mongo/db/index/btree_access_method.cpp +++ b/src/mongo/db/index/btree_access_method.cpp @@ -55,9 +55,8 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn if (0 == _descriptor->version()) { _keyGenerator.reset(new BtreeKeyGeneratorV0(fieldNames, fixed, _descriptor->isSparse())); } else if (1 == _descriptor->version()) { - // TODO SERVER-23092: change nullptr to the appropriate CollatorInterface*. - _keyGenerator.reset( - new BtreeKeyGeneratorV1(fieldNames, fixed, _descriptor->isSparse(), nullptr)); + _keyGenerator.reset(new BtreeKeyGeneratorV1( + fieldNames, fixed, _descriptor->isSparse(), btreeState->getCollator())); } else { massert(16745, "Invalid index version for key generation.", false); } diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp index 8ed6803cc04..e8a9a3b0a2e 100644 --- a/src/mongo/db/index/expression_keys_private.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -40,6 +40,7 @@ #include "mongo/db/index_names.h" #include "mongo/db/index/2d_common.h" #include "mongo/db/index/s2_common.h" +#include "mongo/db/query/collation/collation_index_key.h" #include "mongo/util/assert_util.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" @@ -148,7 +149,7 @@ void getS2GeoKeys(const BSONObj& document, * Expands array and appends items to 'out'. * Used by getOneLiteralKey. */ -void getS2LiteralKeysArray(const BSONObj& obj, BSONObjSet* out) { +void getS2LiteralKeysArray(const BSONObj& obj, CollatorInterface* collator, BSONObjSet* out) { BSONObjIterator objIt(obj); if (!objIt.more()) { // Empty arrays are indexed as undefined. @@ -159,7 +160,7 @@ void getS2LiteralKeysArray(const BSONObj& obj, BSONObjSet* out) { // Non-empty arrays are exploded. while (objIt.more()) { BSONObjBuilder b; - b.appendAs(objIt.next(), ""); + CollationIndexKey::collationAwareIndexKeyAppend(objIt.next(), collator, &b); out->insert(b.obj()); } } @@ -170,13 +171,13 @@ void getS2LiteralKeysArray(const BSONObj& obj, BSONObjSet* out) { * Otherwise, adds 'elt' as a single element. * Used by getLiteralKeys. */ -void getS2OneLiteralKey(const BSONElement& elt, BSONObjSet* out) { +void getS2OneLiteralKey(const BSONElement& elt, CollatorInterface* collator, BSONObjSet* out) { if (Array == elt.type()) { - getS2LiteralKeysArray(elt.Obj(), out); + getS2LiteralKeysArray(elt.Obj(), collator, out); } else { // One thing, not an array, index as-is. BSONObjBuilder b; - b.appendAs(elt, ""); + CollationIndexKey::collationAwareIndexKeyAppend(elt, collator, &b); out->insert(b.obj()); } } @@ -185,7 +186,9 @@ void getS2OneLiteralKey(const BSONElement& elt, BSONObjSet* out) { * elements is a non-geo field. Add the values literally, expanding arrays. * Used by getS2Keys. */ -void getS2LiteralKeys(const BSONElementSet& elements, BSONObjSet* out) { +void getS2LiteralKeys(const BSONElementSet& elements, + CollatorInterface* collator, + BSONObjSet* out) { if (0 == elements.size()) { // Missing fields are indexed as null. BSONObjBuilder b; @@ -193,7 +196,7 @@ void getS2LiteralKeys(const BSONElementSet& elements, BSONObjSet* out) { out->insert(b.obj()); } else { for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) { - getS2OneLiteralKey(*i, out); + getS2OneLiteralKey(*i, collator, out); } } } @@ -321,9 +324,20 @@ void ExpressionKeysPrivate::getHashKeys(const BSONObj& obj, HashSeed seed, int hashVersion, bool isSparse, + CollatorInterface* collator, BSONObjSet* keys) { const char* cstr = hashedField.c_str(); BSONElement fieldVal = obj.getFieldDottedOrArray(cstr); + + // Convert strings to comparison keys. + BSONObj fieldValObj; + if (!fieldVal.eoo()) { + BSONObjBuilder bob; + CollationIndexKey::collationAwareIndexKeyAppend(fieldVal, collator, &bob); + fieldValObj = bob.obj(); + fieldVal = fieldValObj.firstElement(); + } + uassert(16766, "Error: hashed indexes do not currently support array values", fieldVal.type() != Array); @@ -462,7 +476,7 @@ void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj, getS2GeoKeys(obj, fieldElements, params, &keysForThisField); } else { - getS2LiteralKeys(fieldElements, &keysForThisField); + getS2LiteralKeys(fieldElements, params.collator, &keysForThisField); } // We expect there to be the missing field element present in the keys if data is diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h index 9aba295b579..4267e4d66db 100644 --- a/src/mongo/db/index/expression_keys_private.h +++ b/src/mongo/db/index/expression_keys_private.h @@ -36,6 +36,7 @@ namespace mongo { +class CollatorInterface; struct TwoDIndexingParams; struct S2IndexingParams; @@ -79,6 +80,7 @@ public: HashSeed seed, int hashVersion, bool isSparse, + CollatorInterface* collator, BSONObjSet* keys); /** diff --git a/src/mongo/db/index/expression_params.cpp b/src/mongo/db/index/expression_params.cpp index deae1875d2f..a3f201ceb12 100644 --- a/src/mongo/db/index/expression_params.cpp +++ b/src/mongo/db/index/expression_params.cpp @@ -123,8 +123,11 @@ void ExpressionParams::parseHaystackParams(const BSONObj& infoObj, } } -void ExpressionParams::parse2dsphereParams(const BSONObj& infoObj, S2IndexingParams* out) { +void ExpressionParams::initialize2dsphereParams(const BSONObj& infoObj, + CollatorInterface* collator, + S2IndexingParams* out) { // Set up basic params. + out->collator = collator; out->maxKeysPerInsert = 200; // Near distances are specified in meters...sometimes. diff --git a/src/mongo/db/index/expression_params.h b/src/mongo/db/index/expression_params.h index 598ffd388f5..9eb28132ceb 100644 --- a/src/mongo/db/index/expression_params.h +++ b/src/mongo/db/index/expression_params.h @@ -36,6 +36,7 @@ namespace mongo { +class CollatorInterface; struct TwoDIndexingParams; struct S2IndexingParams; @@ -53,7 +54,9 @@ void parseHaystackParams(const BSONObj& infoObj, std::vector<std::string>* otherFieldsOut, double* bucketSizeOut); -void parse2dsphereParams(const BSONObj& infoObj, S2IndexingParams* out); +void initialize2dsphereParams(const BSONObj& infoObj, + CollatorInterface* collator, + S2IndexingParams* out); } // namespace ExpressionParams diff --git a/src/mongo/db/index/external_key_generator.cpp b/src/mongo/db/index/external_key_generator.cpp index efc1d8ce32e..de1aec11d64 100644 --- a/src/mongo/db/index/external_key_generator.cpp +++ b/src/mongo/db/index/external_key_generator.cpp @@ -60,7 +60,10 @@ void getKeysForUpgradeChecking(const BSONObj& infoObj, const BSONObj& doc, BSONO ExpressionKeysPrivate::getHaystackKeys(doc, geoField, otherFields, bucketSize, keys); } else if (IndexNames::GEO_2DSPHERE == type) { S2IndexingParams params; - ExpressionParams::parse2dsphereParams(infoObj, ¶ms); + // TODO SERVER-22251: If the index has a collator, it should be passed here, or the keys + // generated will be wrong. + CollatorInterface* collator = nullptr; + ExpressionParams::initialize2dsphereParams(infoObj, collator, ¶ms); ExpressionKeysPrivate::getS2Keys(doc, keyPattern, params, keys); } else if (IndexNames::TEXT == type) { fts::FTSSpec spec(infoObj); @@ -70,8 +73,11 @@ void getKeysForUpgradeChecking(const BSONObj& infoObj, const BSONObj& doc, BSONO int version; std::string field; ExpressionParams::parseHashParams(infoObj, &seed, &version, &field); + // TODO SERVER-22251: If the index has a collator, it should be passed here, or the keys + // generated will be wrong. + CollatorInterface* collator = nullptr; ExpressionKeysPrivate::getHashKeys( - doc, field, seed, version, infoObj["sparse"].trueValue(), keys); + doc, field, seed, version, infoObj["sparse"].trueValue(), collator, keys); } else { invariant(IndexNames::BTREE == type); diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp index 8262c9019ab..ef94a249f9c 100644 --- a/src/mongo/db/index/hash_access_method.cpp +++ b/src/mongo/db/index/hash_access_method.cpp @@ -47,11 +47,13 @@ HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte !descriptor->unique()); ExpressionParams::parseHashParams(descriptor->infoObj(), &_seed, &_hashVersion, &_hashedField); + + _collator = btreeState->getCollator(); } void HashAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const { ExpressionKeysPrivate::getHashKeys( - obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), keys); + obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), _collator, keys); } } // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h index ea3f36bb647..610bf6db015 100644 --- a/src/mongo/db/index/hash_access_method.h +++ b/src/mongo/db/index/hash_access_method.h @@ -58,6 +58,10 @@ private: int _hashVersion; BSONObj _missingKey; + + // Null if this index orders strings according to the simple binary compare. If non-null, + // represents the collator used to generate index keys for indexed strings. + CollatorInterface* _collator; }; } // namespace mongo diff --git a/src/mongo/db/index/hash_key_generator_test.cpp b/src/mongo/db/index/hash_key_generator_test.cpp new file mode 100644 index 00000000000..047c59b9994 --- /dev/null +++ b/src/mongo/db/index/hash_key_generator_test.cpp @@ -0,0 +1,126 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex + +#include "mongo/platform/basic.h" + +#include "mongo/db/index/expression_keys_private.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/hasher.h" +#include "mongo/db/json.h" +#include "mongo/db/query/collation/collator_interface_mock.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/log.h" + +using namespace mongo; + +namespace { + +const HashSeed kHashSeed = 0; +const int kHashVersion = 0; + +std::string dumpKeyset(const BSONObjSet& objs) { + std::stringstream ss; + ss << "[ "; + for (BSONObjSet::iterator i = objs.begin(); i != objs.end(); ++i) { + ss << i->toString() << " "; + } + ss << "]"; + + return ss.str(); +} + +bool assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) { + if (expectedKeys != actualKeys) { + log() << "Expected: " << dumpKeyset(expectedKeys) << ", " + << "Actual: " << dumpKeyset(actualKeys); + return false; + } + return true; +} + +BSONObj makeHashKey(BSONElement elt) { + return BSON("" << BSONElementHasher::hash64(elt, kHashSeed)); +} + +TEST(HashKeyGeneratorTest, CollationAppliedBeforeHashing) { + BSONObj obj = fromjson("{a: 'string'}"); + BSONObjSet actualKeys; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionKeysPrivate::getHashKeys( + obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys); + + BSONObj backwardsObj = fromjson("{a: 'gnirts'}"); + BSONObjSet expectedKeys; + expectedKeys.insert(makeHashKey(backwardsObj["a"])); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(HashKeyGeneratorTest, CollationDoesNotAffectNonStringFields) { + BSONObj obj = fromjson("{a: 5}"); + BSONObjSet actualKeys; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionKeysPrivate::getHashKeys( + obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(makeHashKey(obj["a"])); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +// TODO SERVER-23172: remove test. +TEST(HashKeyGeneratorTest, CollationDoesNotAffectStringsInEmbeddedDocuments) { + BSONObj obj = fromjson("{a: {b: 'string'}}"); + BSONObjSet actualKeys; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionKeysPrivate::getHashKeys( + obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(makeHashKey(obj["a"])); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(HashKeyGeneratorTest, NoCollation) { + BSONObj obj = fromjson("{a: 'string'}"); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getHashKeys( + obj, "a", kHashSeed, kHashVersion, false, nullptr, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(makeHashKey(obj["a"])); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +} // namespace diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp index 15079d17620..6c3c5c18302 100644 --- a/src/mongo/db/index/s2_access_method.cpp +++ b/src/mongo/db/index/s2_access_method.cpp @@ -49,7 +49,8 @@ S2AccessMethod::S2AccessMethod(IndexCatalogEntry* btreeState, SortedDataInterfac : IndexAccessMethod(btreeState, btree) { const IndexDescriptor* descriptor = btreeState->descriptor(); - ExpressionParams::parse2dsphereParams(descriptor->infoObj(), &_params); + ExpressionParams::initialize2dsphereParams( + descriptor->infoObj(), btreeState->getCollator(), &_params); int geoFields = 0; diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h index add49eccd10..a6b10d625f5 100644 --- a/src/mongo/db/index/s2_access_method.h +++ b/src/mongo/db/index/s2_access_method.h @@ -52,6 +52,10 @@ private: virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const; S2IndexingParams _params; + + // Null if this index orders strings according to the simple binary compare. If non-null, + // represents the collator used to generate index keys for indexed strings. + CollatorInterface* _collator; }; } // namespace mongo diff --git a/src/mongo/db/index/s2_common.cpp b/src/mongo/db/index/s2_common.cpp index e1db08edae9..9592cdab1f1 100644 --- a/src/mongo/db/index/s2_common.cpp +++ b/src/mongo/db/index/s2_common.cpp @@ -32,6 +32,7 @@ #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/geo/geometry_container.h" +#include "mongo/db/query/collation/collation_serializer.h" #include "third_party/s2/s2cellid.h" #include "third_party/s2/s2regioncoverer.h" @@ -47,6 +48,10 @@ std::string S2IndexingParams::toString() const { ss << "finestIndexedLevel: " << finestIndexedLevel << std::endl; ss << "coarsestIndexedLevel: " << coarsestIndexedLevel << std::endl; ss << "indexVersion: " << indexVersion << std::endl; + if (collator) { + ss << "collation: " << CollationSerializer::specToBSON(collator->getSpec()).toString() + << std::endl; + } return ss.str(); } diff --git a/src/mongo/db/index/s2_common.h b/src/mongo/db/index/s2_common.h index 10632e3cb84..5afbbec5fa0 100644 --- a/src/mongo/db/index/s2_common.h +++ b/src/mongo/db/index/s2_common.h @@ -31,6 +31,7 @@ #include <string> #include "mongo/db/jsobj.h" +#include "mongo/db/query/collation/collator_interface.h" class S2CellId; class S2RegionCoverer; @@ -71,6 +72,9 @@ struct S2IndexingParams { S2IndexVersion indexVersion; // Radius of the earth in meters double radius; + // Null if this index orders strings according to the simple binary compare. If non-null, + // represents the collator used to generate index keys for indexed strings. + CollatorInterface* collator = nullptr; std::string toString() const; diff --git a/src/mongo/db/index/s2_key_generator_test.cpp b/src/mongo/db/index/s2_key_generator_test.cpp new file mode 100644 index 00000000000..e13c14c133d --- /dev/null +++ b/src/mongo/db/index/s2_key_generator_test.cpp @@ -0,0 +1,255 @@ +/** + * Copyright (C) 2014 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex + +#include "mongo/platform/basic.h" + +#include "mongo/db/index/expression_keys_private.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/index/s2_common.h" +#include "mongo/db/index/expression_params.h" +#include "mongo/db/json.h" +#include "mongo/db/query/collation/collator_interface_mock.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/log.h" + +using namespace mongo; + +namespace { + +std::string dumpKeyset(const BSONObjSet& objs) { + std::stringstream ss; + ss << "[ "; + for (BSONObjSet::iterator i = objs.begin(); i != objs.end(); ++i) { + ss << i->toString() << " "; + } + ss << "]"; + + return ss.str(); +} + +bool assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) { + if (expectedKeys != actualKeys) { + log() << "Expected: " << dumpKeyset(expectedKeys) << ", " + << "Actual: " << dumpKeyset(actualKeys); + return false; + } + return true; +} + +long long getCellID(int x, int y) { + BSONObj obj = BSON("a" << BSON("type" + << "Point" + << "coordinates" << BSON_ARRAY(x << y))); + BSONObj keyPattern = fromjson("{a: '2dsphere'}"); + BSONObj infoObj = fromjson("{key: {a: '2dsphere'}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterface* collator = nullptr; + ExpressionParams::initialize2dsphereParams(infoObj, collator, ¶ms); + BSONObjSet keys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &keys); + ASSERT_EQUALS(1U, keys.size()); + return (*keys.begin()).firstElement().Long(); +} + +TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldAfterGeoField) { + BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: 'string'}"); + BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}"); + BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" + << "gnirts")); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldBeforeGeoField) { + BSONObj obj = fromjson("{a: 'string', b: {type: 'Point', coordinates: [0, 0]}}"); + BSONObj keyPattern = fromjson("{a: 1, b: '2dsphere'}"); + BSONObj infoObj = fromjson("{key: {a: 1, b: '2dsphere'}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" + << "gnirts" + << "" << getCellID(0, 0))); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(S2KeyGeneratorTest, CollationAppliedToAllNonGeoStringFields) { + BSONObj obj = fromjson("{a: 'string', b: {type: 'Point', coordinates: [0, 0]}, c: 'string2'}"); + BSONObj keyPattern = fromjson("{a: 1, b: '2dsphere', c: 1}"); + BSONObj infoObj = fromjson("{key: {a: 1, b: '2dsphere', c: 1}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" + << "gnirts" + << "" << getCellID(0, 0) << "" + << "2gnirts")); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldWithMultiplePathComponents) { + BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: {c: {d: 'string'}}}"); + BSONObj keyPattern = fromjson("{a: '2dsphere', 'b.c.d': 1}"); + BSONObj infoObj = fromjson("{key: {a: '2dsphere', 'b.c.d': 1}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" + << "gnirts")); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(S2KeyGeneratorTest, CollationAppliedToStringsInArray) { + BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: ['string', 'string2']}"); + BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}"); + BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" + << "gnirts")); + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" + << "2gnirts")); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(S2KeyGeneratorTest, CollationAppliedToStringsInAllArrays) { + BSONObj obj = fromjson( + "{a: {type: 'Point', coordinates: [0, 0]}, b: ['string', 'string2'], c: ['abc', 'def']}"); + BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1, c: 1}"); + BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1, c: 1}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" + << "gnirts" + << "" + << "cba")); + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" + << "gnirts" + << "" + << "fed")); + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" + << "2gnirts" + << "" + << "cba")); + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" + << "2gnirts" + << "" + << "fed")); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(S2KeyGeneratorTest, CollationDoesNotAffectNonStringFields) { + BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: 5}"); + BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}"); + BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << 5)); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +// TODO SERVER-23172: remove test. +TEST(S2KeyGeneratorTest, CollationDoesNotAffectStringsInEmbeddedDocuments) { + BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: {c: 'string'}}"); + BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}"); + BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + ExpressionParams::initialize2dsphereParams(infoObj, &collator, ¶ms); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << BSON("c" + << "string"))); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(S2KeyGeneratorTest, NoCollation) { + BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: 'string'}"); + BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}"); + BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}"); + S2IndexingParams params; + CollatorInterface* collator = nullptr; + ExpressionParams::initialize2dsphereParams(infoObj, collator, ¶ms); + BSONObjSet actualKeys; + ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys); + + BSONObjSet expectedKeys; + expectedKeys.insert(BSON("" << getCellID(0, 0) << "" + << "string")); + + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +} // namespace diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 3f89d5dd605..859caf4b737 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -596,7 +596,7 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr, verify(gme->getGeoExpression().getGeometry().hasS2Region()); const S2Region& region = gme->getGeoExpression().getGeometry().getS2Region(); S2IndexingParams indexParams; - ExpressionParams::parse2dsphereParams(index.infoObj, &indexParams); + ExpressionParams::initialize2dsphereParams(index.infoObj, index.collator, &indexParams); ExpressionMapping::cover2dsphere(region, indexParams, oilOut); *tightnessOut = IndexBoundsBuilder::INEXACT_FETCH; } else if (mongoutils::str::equals("2d", elt.valuestrsafe())) { diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index 8be4d834ca9..b789395cb44 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -300,7 +300,8 @@ void QueryPlannerAnalysis::analyzeGeo(const QueryPlannerParams& params, } S2IndexingParams params; - ExpressionParams::parse2dsphereParams(indexEntry.infoObj, ¶ms); + ExpressionParams::initialize2dsphereParams( + indexEntry.infoObj, indexEntry.collator, ¶ms); if (params.indexVersion < S2_INDEX_VERSION_3) { continue; diff --git a/src/mongo/dbtests/namespacetests.cpp b/src/mongo/dbtests/namespacetests.cpp index eadb0a4d85e..e7f7a1220db 100644 --- a/src/mongo/dbtests/namespacetests.cpp +++ b/src/mongo/dbtests/namespacetests.cpp @@ -101,7 +101,8 @@ public: // Call getKeys on the nullObj. BSONObjSet nullFieldKeySet; - ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0, 0, false, &nullFieldKeySet); + CollatorInterface* collator = nullptr; + ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0, 0, false, collator, &nullFieldKeySet); BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement(); ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0, 0), @@ -127,7 +128,9 @@ public: BSONObj nullObj = BSON("a" << BSONNULL); BSONObjSet nullFieldKeySet; - ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0x5eed, 0, false, &nullFieldKeySet); + CollatorInterface* collator = nullptr; + ExpressionKeysPrivate::getHashKeys( + nullObj, "a", 0x5eed, 0, false, collator, &nullFieldKeySet); BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement(); ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0x5eed, 0), |