summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTess Avitabile <tess.avitabile@mongodb.com>2016-03-24 13:54:25 -0400
committerTess Avitabile <tess.avitabile@mongodb.com>2016-05-02 10:23:51 -0400
commit1c5be329f5e3903d5cd4e9d106022733507b5e3f (patch)
tree14fb8dc680efd0dac4fe4967a98e692c121852c5
parentef7e7261f934d2b29c9a1cbf7731a4f733e91627 (diff)
downloadmongo-1c5be329f5e3903d5cd4e9d106022733507b5e3f.tar.gz
SERVER-23092 Collation-aware index key generation
-rw-r--r--src/mongo/db/catalog/index_catalog.cpp4
-rw-r--r--src/mongo/db/catalog/index_catalog_entry.h4
-rw-r--r--src/mongo/db/exec/geo_near.cpp4
-rw-r--r--src/mongo/db/index/2d_key_generator_test.cpp122
-rw-r--r--src/mongo/db/index/SConscript6
-rw-r--r--src/mongo/db/index/btree_access_method.cpp5
-rw-r--r--src/mongo/db/index/expression_keys_private.cpp30
-rw-r--r--src/mongo/db/index/expression_keys_private.h2
-rw-r--r--src/mongo/db/index/expression_params.cpp5
-rw-r--r--src/mongo/db/index/expression_params.h5
-rw-r--r--src/mongo/db/index/external_key_generator.cpp10
-rw-r--r--src/mongo/db/index/hash_access_method.cpp4
-rw-r--r--src/mongo/db/index/hash_access_method.h4
-rw-r--r--src/mongo/db/index/hash_key_generator_test.cpp126
-rw-r--r--src/mongo/db/index/s2_access_method.cpp3
-rw-r--r--src/mongo/db/index/s2_access_method.h4
-rw-r--r--src/mongo/db/index/s2_common.cpp5
-rw-r--r--src/mongo/db/index/s2_common.h4
-rw-r--r--src/mongo/db/index/s2_key_generator_test.cpp255
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp2
-rw-r--r--src/mongo/db/query/planner_analysis.cpp3
-rw-r--r--src/mongo/dbtests/namespacetests.cpp7
22 files changed, 589 insertions, 25 deletions
diff --git a/src/mongo/db/catalog/index_catalog.cpp b/src/mongo/db/catalog/index_catalog.cpp
index 2a23509fd0a..5e001a2d4e6 100644
--- a/src/mongo/db/catalog/index_catalog.cpp
+++ b/src/mongo/db/catalog/index_catalog.cpp
@@ -562,8 +562,8 @@ Status IndexCatalog::_isSpecOk(OperationContext* txn, const BSONObj& spec) const
BSONElement collationElement = spec.getField("collation");
if (collationElement) {
string pluginName = IndexNames::findPluginName(key);
- if ((pluginName != IndexNames::BTREE) && (pluginName != IndexNames::GEO_2D) &&
- (pluginName != IndexNames::GEO_2DSPHERE) && (pluginName != IndexNames::HASHED)) {
+ if ((pluginName != IndexNames::BTREE) && (pluginName != IndexNames::GEO_2DSPHERE) &&
+ (pluginName != IndexNames::HASHED)) {
return Status(ErrorCodes::CannotCreateIndex,
str::stream() << "\"collation\" not supported for index type "
<< pluginName);
diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h
index 1befbb82eb0..909b93520ba 100644
--- a/src/mongo/db/catalog/index_catalog_entry.h
+++ b/src/mongo/db/catalog/index_catalog_entry.h
@@ -87,6 +87,10 @@ public:
return _filterExpression.get();
}
+ CollatorInterface* getCollator() {
+ return _collator.get();
+ }
+
/// ---------------------
const RecordId& head(OperationContext* txn) const;
diff --git a/src/mongo/db/exec/geo_near.cpp b/src/mongo/db/exec/geo_near.cpp
index 208c3b36783..65bd1defd98 100644
--- a/src/mongo/db/exec/geo_near.cpp
+++ b/src/mongo/db/exec/geo_near.cpp
@@ -743,7 +743,9 @@ GeoNear2DSphereStage::GeoNear2DSphereStage(const GeoNearParams& nearParams,
_specificStats.keyPattern = s2Index->keyPattern();
_specificStats.indexName = s2Index->indexName();
_specificStats.indexVersion = s2Index->version();
- ExpressionParams::parse2dsphereParams(s2Index->infoObj(), &_indexParams);
+ // TODO SERVER-23968: change nullptr to the appropriate collator.
+ CollatorInterface* collator = nullptr;
+ ExpressionParams::initialize2dsphereParams(s2Index->infoObj(), collator, &_indexParams);
}
GeoNear2DSphereStage::~GeoNear2DSphereStage() {}
diff --git a/src/mongo/db/index/2d_key_generator_test.cpp b/src/mongo/db/index/2d_key_generator_test.cpp
new file mode 100644
index 00000000000..256dc7ecc2f
--- /dev/null
+++ b/src/mongo/db/index/2d_key_generator_test.cpp
@@ -0,0 +1,122 @@
+/**
+ * Copyright (C) 2014 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/index/expression_keys_private.h"
+
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/index/2d_common.h"
+#include "mongo/db/index/expression_params.h"
+#include "mongo/db/json.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/log.h"
+
+using namespace mongo;
+
+namespace {
+
+std::string dumpKeyset(const BSONObjSet& objs) {
+ std::stringstream ss;
+ ss << "[ ";
+ for (BSONObjSet::iterator i = objs.begin(); i != objs.end(); ++i) {
+ ss << i->toString() << " ";
+ }
+ ss << "]";
+
+ return ss.str();
+}
+
+bool assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) {
+ if (expectedKeys != actualKeys) {
+ log() << "Expected: " << dumpKeyset(expectedKeys) << ", "
+ << "Actual: " << dumpKeyset(actualKeys);
+ return false;
+ }
+ return true;
+}
+
+BSONObj make2DKey(const TwoDIndexingParams& params, int x, int y, BSONElement trailingFields) {
+ BSONObjBuilder bob;
+ BSONObj locObj = BSON_ARRAY(x << y);
+ params.geoHashConverter->hash(locObj, nullptr).appendHashMin(&bob, "");
+ bob.append(trailingFields);
+ return bob.obj();
+}
+
+TEST(2dKeyGeneratorTest, TrailingField) {
+ BSONObj obj = fromjson("{a: [0, 0], b: 5}");
+ BSONObj infoObj = fromjson("{key: {a: '2d', b: 1}}");
+ TwoDIndexingParams params;
+ ExpressionParams::parseTwoDParams(infoObj, &params);
+ BSONObjSet actualKeys;
+ std::vector<BSONObj> locs;
+ ExpressionKeysPrivate::get2DKeys(obj, params, &actualKeys, &locs);
+
+ BSONObjSet expectedKeys;
+ BSONObj trailingFields = BSON("" << 5);
+ expectedKeys.insert(make2DKey(params, 0, 0, trailingFields.firstElement()));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(2dKeyGeneratorTest, ArrayTrailingField) {
+ BSONObj obj = fromjson("{a: [0, 0], b: [5, 6]}");
+ BSONObj infoObj = fromjson("{key: {a: '2d', b: 1}}");
+ TwoDIndexingParams params;
+ ExpressionParams::parseTwoDParams(infoObj, &params);
+ BSONObjSet actualKeys;
+ std::vector<BSONObj> locs;
+ ExpressionKeysPrivate::get2DKeys(obj, params, &actualKeys, &locs);
+
+ BSONObjSet expectedKeys;
+ BSONObj trailingFields = BSON("" << BSON_ARRAY(5 << 6));
+ expectedKeys.insert(make2DKey(params, 0, 0, trailingFields.firstElement()));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(2dKeyGeneratorTest, ArrayOfObjectsTrailingField) {
+ BSONObj obj = fromjson("{a: [0, 0], b: [{c: 5}, {c: 6}]}");
+ BSONObj infoObj = fromjson("{key: {a: '2d', 'b.c': 1}}");
+ TwoDIndexingParams params;
+ ExpressionParams::parseTwoDParams(infoObj, &params);
+ BSONObjSet actualKeys;
+ std::vector<BSONObj> locs;
+ ExpressionKeysPrivate::get2DKeys(obj, params, &actualKeys, &locs);
+
+ BSONObjSet expectedKeys;
+ BSONObj trailingFields = BSON("" << BSON_ARRAY(5 << 6));
+ expectedKeys.insert(make2DKey(params, 0, 0, trailingFields.firstElement()));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+} // namespace
diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript
index c0e6a9241ac..97fba307203 100644
--- a/src/mongo/db/index/SConscript
+++ b/src/mongo/db/index/SConscript
@@ -54,14 +54,18 @@ env.Library(
'$BUILD_DIR/mongo/db/geo/geometry',
'$BUILD_DIR/mongo/db/geo/geoparser',
'$BUILD_DIR/mongo/db/mongohasher',
+ '$BUILD_DIR/mongo/db/query/collation/collation_serializer',
'$BUILD_DIR/third_party/s2/s2',
]
)
env.CppUnitTest(
- target='btree_key_generator_test',
+ target='key_generator_test',
source=[
+ '2d_key_generator_test.cpp',
'btree_key_generator_test.cpp',
+ 'hash_key_generator_test.cpp',
+ 's2_key_generator_test.cpp',
],
LIBDEPS=[
'key_generator',
diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp
index 941b35fc8b6..a3708e8caed 100644
--- a/src/mongo/db/index/btree_access_method.cpp
+++ b/src/mongo/db/index/btree_access_method.cpp
@@ -55,9 +55,8 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn
if (0 == _descriptor->version()) {
_keyGenerator.reset(new BtreeKeyGeneratorV0(fieldNames, fixed, _descriptor->isSparse()));
} else if (1 == _descriptor->version()) {
- // TODO SERVER-23092: change nullptr to the appropriate CollatorInterface*.
- _keyGenerator.reset(
- new BtreeKeyGeneratorV1(fieldNames, fixed, _descriptor->isSparse(), nullptr));
+ _keyGenerator.reset(new BtreeKeyGeneratorV1(
+ fieldNames, fixed, _descriptor->isSparse(), btreeState->getCollator()));
} else {
massert(16745, "Invalid index version for key generation.", false);
}
diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp
index 8ed6803cc04..e8a9a3b0a2e 100644
--- a/src/mongo/db/index/expression_keys_private.cpp
+++ b/src/mongo/db/index/expression_keys_private.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/index_names.h"
#include "mongo/db/index/2d_common.h"
#include "mongo/db/index/s2_common.h"
+#include "mongo/db/query/collation/collation_index_key.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
@@ -148,7 +149,7 @@ void getS2GeoKeys(const BSONObj& document,
* Expands array and appends items to 'out'.
* Used by getOneLiteralKey.
*/
-void getS2LiteralKeysArray(const BSONObj& obj, BSONObjSet* out) {
+void getS2LiteralKeysArray(const BSONObj& obj, CollatorInterface* collator, BSONObjSet* out) {
BSONObjIterator objIt(obj);
if (!objIt.more()) {
// Empty arrays are indexed as undefined.
@@ -159,7 +160,7 @@ void getS2LiteralKeysArray(const BSONObj& obj, BSONObjSet* out) {
// Non-empty arrays are exploded.
while (objIt.more()) {
BSONObjBuilder b;
- b.appendAs(objIt.next(), "");
+ CollationIndexKey::collationAwareIndexKeyAppend(objIt.next(), collator, &b);
out->insert(b.obj());
}
}
@@ -170,13 +171,13 @@ void getS2LiteralKeysArray(const BSONObj& obj, BSONObjSet* out) {
* Otherwise, adds 'elt' as a single element.
* Used by getLiteralKeys.
*/
-void getS2OneLiteralKey(const BSONElement& elt, BSONObjSet* out) {
+void getS2OneLiteralKey(const BSONElement& elt, CollatorInterface* collator, BSONObjSet* out) {
if (Array == elt.type()) {
- getS2LiteralKeysArray(elt.Obj(), out);
+ getS2LiteralKeysArray(elt.Obj(), collator, out);
} else {
// One thing, not an array, index as-is.
BSONObjBuilder b;
- b.appendAs(elt, "");
+ CollationIndexKey::collationAwareIndexKeyAppend(elt, collator, &b);
out->insert(b.obj());
}
}
@@ -185,7 +186,9 @@ void getS2OneLiteralKey(const BSONElement& elt, BSONObjSet* out) {
* elements is a non-geo field. Add the values literally, expanding arrays.
* Used by getS2Keys.
*/
-void getS2LiteralKeys(const BSONElementSet& elements, BSONObjSet* out) {
+void getS2LiteralKeys(const BSONElementSet& elements,
+ CollatorInterface* collator,
+ BSONObjSet* out) {
if (0 == elements.size()) {
// Missing fields are indexed as null.
BSONObjBuilder b;
@@ -193,7 +196,7 @@ void getS2LiteralKeys(const BSONElementSet& elements, BSONObjSet* out) {
out->insert(b.obj());
} else {
for (BSONElementSet::iterator i = elements.begin(); i != elements.end(); ++i) {
- getS2OneLiteralKey(*i, out);
+ getS2OneLiteralKey(*i, collator, out);
}
}
}
@@ -321,9 +324,20 @@ void ExpressionKeysPrivate::getHashKeys(const BSONObj& obj,
HashSeed seed,
int hashVersion,
bool isSparse,
+ CollatorInterface* collator,
BSONObjSet* keys) {
const char* cstr = hashedField.c_str();
BSONElement fieldVal = obj.getFieldDottedOrArray(cstr);
+
+ // Convert strings to comparison keys.
+ BSONObj fieldValObj;
+ if (!fieldVal.eoo()) {
+ BSONObjBuilder bob;
+ CollationIndexKey::collationAwareIndexKeyAppend(fieldVal, collator, &bob);
+ fieldValObj = bob.obj();
+ fieldVal = fieldValObj.firstElement();
+ }
+
uassert(16766,
"Error: hashed indexes do not currently support array values",
fieldVal.type() != Array);
@@ -462,7 +476,7 @@ void ExpressionKeysPrivate::getS2Keys(const BSONObj& obj,
getS2GeoKeys(obj, fieldElements, params, &keysForThisField);
} else {
- getS2LiteralKeys(fieldElements, &keysForThisField);
+ getS2LiteralKeys(fieldElements, params.collator, &keysForThisField);
}
// We expect there to be the missing field element present in the keys if data is
diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h
index 9aba295b579..4267e4d66db 100644
--- a/src/mongo/db/index/expression_keys_private.h
+++ b/src/mongo/db/index/expression_keys_private.h
@@ -36,6 +36,7 @@
namespace mongo {
+class CollatorInterface;
struct TwoDIndexingParams;
struct S2IndexingParams;
@@ -79,6 +80,7 @@ public:
HashSeed seed,
int hashVersion,
bool isSparse,
+ CollatorInterface* collator,
BSONObjSet* keys);
/**
diff --git a/src/mongo/db/index/expression_params.cpp b/src/mongo/db/index/expression_params.cpp
index deae1875d2f..a3f201ceb12 100644
--- a/src/mongo/db/index/expression_params.cpp
+++ b/src/mongo/db/index/expression_params.cpp
@@ -123,8 +123,11 @@ void ExpressionParams::parseHaystackParams(const BSONObj& infoObj,
}
}
-void ExpressionParams::parse2dsphereParams(const BSONObj& infoObj, S2IndexingParams* out) {
+void ExpressionParams::initialize2dsphereParams(const BSONObj& infoObj,
+ CollatorInterface* collator,
+ S2IndexingParams* out) {
// Set up basic params.
+ out->collator = collator;
out->maxKeysPerInsert = 200;
// Near distances are specified in meters...sometimes.
diff --git a/src/mongo/db/index/expression_params.h b/src/mongo/db/index/expression_params.h
index 598ffd388f5..9eb28132ceb 100644
--- a/src/mongo/db/index/expression_params.h
+++ b/src/mongo/db/index/expression_params.h
@@ -36,6 +36,7 @@
namespace mongo {
+class CollatorInterface;
struct TwoDIndexingParams;
struct S2IndexingParams;
@@ -53,7 +54,9 @@ void parseHaystackParams(const BSONObj& infoObj,
std::vector<std::string>* otherFieldsOut,
double* bucketSizeOut);
-void parse2dsphereParams(const BSONObj& infoObj, S2IndexingParams* out);
+void initialize2dsphereParams(const BSONObj& infoObj,
+ CollatorInterface* collator,
+ S2IndexingParams* out);
} // namespace ExpressionParams
diff --git a/src/mongo/db/index/external_key_generator.cpp b/src/mongo/db/index/external_key_generator.cpp
index efc1d8ce32e..de1aec11d64 100644
--- a/src/mongo/db/index/external_key_generator.cpp
+++ b/src/mongo/db/index/external_key_generator.cpp
@@ -60,7 +60,10 @@ void getKeysForUpgradeChecking(const BSONObj& infoObj, const BSONObj& doc, BSONO
ExpressionKeysPrivate::getHaystackKeys(doc, geoField, otherFields, bucketSize, keys);
} else if (IndexNames::GEO_2DSPHERE == type) {
S2IndexingParams params;
- ExpressionParams::parse2dsphereParams(infoObj, &params);
+ // TODO SERVER-22251: If the index has a collator, it should be passed here, or the keys
+ // generated will be wrong.
+ CollatorInterface* collator = nullptr;
+ ExpressionParams::initialize2dsphereParams(infoObj, collator, &params);
ExpressionKeysPrivate::getS2Keys(doc, keyPattern, params, keys);
} else if (IndexNames::TEXT == type) {
fts::FTSSpec spec(infoObj);
@@ -70,8 +73,11 @@ void getKeysForUpgradeChecking(const BSONObj& infoObj, const BSONObj& doc, BSONO
int version;
std::string field;
ExpressionParams::parseHashParams(infoObj, &seed, &version, &field);
+ // TODO SERVER-22251: If the index has a collator, it should be passed here, or the keys
+ // generated will be wrong.
+ CollatorInterface* collator = nullptr;
ExpressionKeysPrivate::getHashKeys(
- doc, field, seed, version, infoObj["sparse"].trueValue(), keys);
+ doc, field, seed, version, infoObj["sparse"].trueValue(), collator, keys);
} else {
invariant(IndexNames::BTREE == type);
diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp
index 8262c9019ab..ef94a249f9c 100644
--- a/src/mongo/db/index/hash_access_method.cpp
+++ b/src/mongo/db/index/hash_access_method.cpp
@@ -47,11 +47,13 @@ HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte
!descriptor->unique());
ExpressionParams::parseHashParams(descriptor->infoObj(), &_seed, &_hashVersion, &_hashedField);
+
+ _collator = btreeState->getCollator();
}
void HashAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const {
ExpressionKeysPrivate::getHashKeys(
- obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), keys);
+ obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), _collator, keys);
}
} // namespace mongo
diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h
index ea3f36bb647..610bf6db015 100644
--- a/src/mongo/db/index/hash_access_method.h
+++ b/src/mongo/db/index/hash_access_method.h
@@ -58,6 +58,10 @@ private:
int _hashVersion;
BSONObj _missingKey;
+
+ // Null if this index orders strings according to the simple binary compare. If non-null,
+ // represents the collator used to generate index keys for indexed strings.
+ CollatorInterface* _collator;
};
} // namespace mongo
diff --git a/src/mongo/db/index/hash_key_generator_test.cpp b/src/mongo/db/index/hash_key_generator_test.cpp
new file mode 100644
index 00000000000..047c59b9994
--- /dev/null
+++ b/src/mongo/db/index/hash_key_generator_test.cpp
@@ -0,0 +1,126 @@
+/**
+ * Copyright (C) 2014 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/index/expression_keys_private.h"
+
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/hasher.h"
+#include "mongo/db/json.h"
+#include "mongo/db/query/collation/collator_interface_mock.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/log.h"
+
+using namespace mongo;
+
+namespace {
+
+const HashSeed kHashSeed = 0;
+const int kHashVersion = 0;
+
+std::string dumpKeyset(const BSONObjSet& objs) {
+ std::stringstream ss;
+ ss << "[ ";
+ for (BSONObjSet::iterator i = objs.begin(); i != objs.end(); ++i) {
+ ss << i->toString() << " ";
+ }
+ ss << "]";
+
+ return ss.str();
+}
+
+bool assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) {
+ if (expectedKeys != actualKeys) {
+ log() << "Expected: " << dumpKeyset(expectedKeys) << ", "
+ << "Actual: " << dumpKeyset(actualKeys);
+ return false;
+ }
+ return true;
+}
+
+BSONObj makeHashKey(BSONElement elt) {
+ return BSON("" << BSONElementHasher::hash64(elt, kHashSeed));
+}
+
+TEST(HashKeyGeneratorTest, CollationAppliedBeforeHashing) {
+ BSONObj obj = fromjson("{a: 'string'}");
+ BSONObjSet actualKeys;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionKeysPrivate::getHashKeys(
+ obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys);
+
+ BSONObj backwardsObj = fromjson("{a: 'gnirts'}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(makeHashKey(backwardsObj["a"]));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(HashKeyGeneratorTest, CollationDoesNotAffectNonStringFields) {
+ BSONObj obj = fromjson("{a: 5}");
+ BSONObjSet actualKeys;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionKeysPrivate::getHashKeys(
+ obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(makeHashKey(obj["a"]));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+// TODO SERVER-23172: remove test.
+TEST(HashKeyGeneratorTest, CollationDoesNotAffectStringsInEmbeddedDocuments) {
+ BSONObj obj = fromjson("{a: {b: 'string'}}");
+ BSONObjSet actualKeys;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionKeysPrivate::getHashKeys(
+ obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(makeHashKey(obj["a"]));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(HashKeyGeneratorTest, NoCollation) {
+ BSONObj obj = fromjson("{a: 'string'}");
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getHashKeys(
+ obj, "a", kHashSeed, kHashVersion, false, nullptr, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(makeHashKey(obj["a"]));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+} // namespace
diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp
index 15079d17620..6c3c5c18302 100644
--- a/src/mongo/db/index/s2_access_method.cpp
+++ b/src/mongo/db/index/s2_access_method.cpp
@@ -49,7 +49,8 @@ S2AccessMethod::S2AccessMethod(IndexCatalogEntry* btreeState, SortedDataInterfac
: IndexAccessMethod(btreeState, btree) {
const IndexDescriptor* descriptor = btreeState->descriptor();
- ExpressionParams::parse2dsphereParams(descriptor->infoObj(), &_params);
+ ExpressionParams::initialize2dsphereParams(
+ descriptor->infoObj(), btreeState->getCollator(), &_params);
int geoFields = 0;
diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h
index add49eccd10..a6b10d625f5 100644
--- a/src/mongo/db/index/s2_access_method.h
+++ b/src/mongo/db/index/s2_access_method.h
@@ -52,6 +52,10 @@ private:
virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const;
S2IndexingParams _params;
+
+ // Null if this index orders strings according to the simple binary compare. If non-null,
+ // represents the collator used to generate index keys for indexed strings.
+ CollatorInterface* _collator;
};
} // namespace mongo
diff --git a/src/mongo/db/index/s2_common.cpp b/src/mongo/db/index/s2_common.cpp
index e1db08edae9..9592cdab1f1 100644
--- a/src/mongo/db/index/s2_common.cpp
+++ b/src/mongo/db/index/s2_common.cpp
@@ -32,6 +32,7 @@
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/geo/geometry_container.h"
+#include "mongo/db/query/collation/collation_serializer.h"
#include "third_party/s2/s2cellid.h"
#include "third_party/s2/s2regioncoverer.h"
@@ -47,6 +48,10 @@ std::string S2IndexingParams::toString() const {
ss << "finestIndexedLevel: " << finestIndexedLevel << std::endl;
ss << "coarsestIndexedLevel: " << coarsestIndexedLevel << std::endl;
ss << "indexVersion: " << indexVersion << std::endl;
+ if (collator) {
+ ss << "collation: " << CollationSerializer::specToBSON(collator->getSpec()).toString()
+ << std::endl;
+ }
return ss.str();
}
diff --git a/src/mongo/db/index/s2_common.h b/src/mongo/db/index/s2_common.h
index 10632e3cb84..5afbbec5fa0 100644
--- a/src/mongo/db/index/s2_common.h
+++ b/src/mongo/db/index/s2_common.h
@@ -31,6 +31,7 @@
#include <string>
#include "mongo/db/jsobj.h"
+#include "mongo/db/query/collation/collator_interface.h"
class S2CellId;
class S2RegionCoverer;
@@ -71,6 +72,9 @@ struct S2IndexingParams {
S2IndexVersion indexVersion;
// Radius of the earth in meters
double radius;
+ // Null if this index orders strings according to the simple binary compare. If non-null,
+ // represents the collator used to generate index keys for indexed strings.
+ CollatorInterface* collator = nullptr;
std::string toString() const;
diff --git a/src/mongo/db/index/s2_key_generator_test.cpp b/src/mongo/db/index/s2_key_generator_test.cpp
new file mode 100644
index 00000000000..e13c14c133d
--- /dev/null
+++ b/src/mongo/db/index/s2_key_generator_test.cpp
@@ -0,0 +1,255 @@
+/**
+ * Copyright (C) 2014 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kIndex
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/index/expression_keys_private.h"
+
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/index/s2_common.h"
+#include "mongo/db/index/expression_params.h"
+#include "mongo/db/json.h"
+#include "mongo/db/query/collation/collator_interface_mock.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/log.h"
+
+using namespace mongo;
+
+namespace {
+
+std::string dumpKeyset(const BSONObjSet& objs) {
+ std::stringstream ss;
+ ss << "[ ";
+ for (BSONObjSet::iterator i = objs.begin(); i != objs.end(); ++i) {
+ ss << i->toString() << " ";
+ }
+ ss << "]";
+
+ return ss.str();
+}
+
+bool assertKeysetsEqual(const BSONObjSet& expectedKeys, const BSONObjSet& actualKeys) {
+ if (expectedKeys != actualKeys) {
+ log() << "Expected: " << dumpKeyset(expectedKeys) << ", "
+ << "Actual: " << dumpKeyset(actualKeys);
+ return false;
+ }
+ return true;
+}
+
+long long getCellID(int x, int y) {
+ BSONObj obj = BSON("a" << BSON("type"
+ << "Point"
+ << "coordinates" << BSON_ARRAY(x << y)));
+ BSONObj keyPattern = fromjson("{a: '2dsphere'}");
+ BSONObj infoObj = fromjson("{key: {a: '2dsphere'}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterface* collator = nullptr;
+ ExpressionParams::initialize2dsphereParams(infoObj, collator, &params);
+ BSONObjSet keys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &keys);
+ ASSERT_EQUALS(1U, keys.size());
+ return (*keys.begin()).firstElement().Long();
+}
+
+TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldAfterGeoField) {
+ BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: 'string'}");
+ BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}");
+ BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
+ << "gnirts"));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldBeforeGeoField) {
+ BSONObj obj = fromjson("{a: 'string', b: {type: 'Point', coordinates: [0, 0]}}");
+ BSONObj keyPattern = fromjson("{a: 1, b: '2dsphere'}");
+ BSONObj infoObj = fromjson("{key: {a: 1, b: '2dsphere'}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON(""
+ << "gnirts"
+ << "" << getCellID(0, 0)));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(S2KeyGeneratorTest, CollationAppliedToAllNonGeoStringFields) {
+ BSONObj obj = fromjson("{a: 'string', b: {type: 'Point', coordinates: [0, 0]}, c: 'string2'}");
+ BSONObj keyPattern = fromjson("{a: 1, b: '2dsphere', c: 1}");
+ BSONObj infoObj = fromjson("{key: {a: 1, b: '2dsphere', c: 1}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON(""
+ << "gnirts"
+ << "" << getCellID(0, 0) << ""
+ << "2gnirts"));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(S2KeyGeneratorTest, CollationAppliedToNonGeoStringFieldWithMultiplePathComponents) {
+ BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: {c: {d: 'string'}}}");
+ BSONObj keyPattern = fromjson("{a: '2dsphere', 'b.c.d': 1}");
+ BSONObj infoObj = fromjson("{key: {a: '2dsphere', 'b.c.d': 1}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
+ << "gnirts"));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(S2KeyGeneratorTest, CollationAppliedToStringsInArray) {
+ BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: ['string', 'string2']}");
+ BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}");
+ BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
+ << "gnirts"));
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
+ << "2gnirts"));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(S2KeyGeneratorTest, CollationAppliedToStringsInAllArrays) {
+ BSONObj obj = fromjson(
+ "{a: {type: 'Point', coordinates: [0, 0]}, b: ['string', 'string2'], c: ['abc', 'def']}");
+ BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1, c: 1}");
+ BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1, c: 1}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
+ << "gnirts"
+ << ""
+ << "cba"));
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
+ << "gnirts"
+ << ""
+ << "fed"));
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
+ << "2gnirts"
+ << ""
+ << "cba"));
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
+ << "2gnirts"
+ << ""
+ << "fed"));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(S2KeyGeneratorTest, CollationDoesNotAffectNonStringFields) {
+ BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: 5}");
+ BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}");
+ BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << 5));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+// TODO SERVER-23172: remove test.
+TEST(S2KeyGeneratorTest, CollationDoesNotAffectStringsInEmbeddedDocuments) {
+ BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: {c: 'string'}}");
+ BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}");
+ BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ ExpressionParams::initialize2dsphereParams(infoObj, &collator, &params);
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << "" << BSON("c"
+ << "string")));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+TEST(S2KeyGeneratorTest, NoCollation) {
+ BSONObj obj = fromjson("{a: {type: 'Point', coordinates: [0, 0]}, b: 'string'}");
+ BSONObj keyPattern = fromjson("{a: '2dsphere', b: 1}");
+ BSONObj infoObj = fromjson("{key: {a: '2dsphere', b: 1}, '2dsphereIndexVersion': 3}");
+ S2IndexingParams params;
+ CollatorInterface* collator = nullptr;
+ ExpressionParams::initialize2dsphereParams(infoObj, collator, &params);
+ BSONObjSet actualKeys;
+ ExpressionKeysPrivate::getS2Keys(obj, keyPattern, params, &actualKeys);
+
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(BSON("" << getCellID(0, 0) << ""
+ << "string"));
+
+ ASSERT(assertKeysetsEqual(expectedKeys, actualKeys));
+}
+
+} // namespace
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 3f89d5dd605..859caf4b737 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -596,7 +596,7 @@ void IndexBoundsBuilder::translate(const MatchExpression* expr,
verify(gme->getGeoExpression().getGeometry().hasS2Region());
const S2Region& region = gme->getGeoExpression().getGeometry().getS2Region();
S2IndexingParams indexParams;
- ExpressionParams::parse2dsphereParams(index.infoObj, &indexParams);
+ ExpressionParams::initialize2dsphereParams(index.infoObj, index.collator, &indexParams);
ExpressionMapping::cover2dsphere(region, indexParams, oilOut);
*tightnessOut = IndexBoundsBuilder::INEXACT_FETCH;
} else if (mongoutils::str::equals("2d", elt.valuestrsafe())) {
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index 8be4d834ca9..b789395cb44 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -300,7 +300,8 @@ void QueryPlannerAnalysis::analyzeGeo(const QueryPlannerParams& params,
}
S2IndexingParams params;
- ExpressionParams::parse2dsphereParams(indexEntry.infoObj, &params);
+ ExpressionParams::initialize2dsphereParams(
+ indexEntry.infoObj, indexEntry.collator, &params);
if (params.indexVersion < S2_INDEX_VERSION_3) {
continue;
diff --git a/src/mongo/dbtests/namespacetests.cpp b/src/mongo/dbtests/namespacetests.cpp
index eadb0a4d85e..e7f7a1220db 100644
--- a/src/mongo/dbtests/namespacetests.cpp
+++ b/src/mongo/dbtests/namespacetests.cpp
@@ -101,7 +101,8 @@ public:
// Call getKeys on the nullObj.
BSONObjSet nullFieldKeySet;
- ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0, 0, false, &nullFieldKeySet);
+ CollatorInterface* collator = nullptr;
+ ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0, 0, false, collator, &nullFieldKeySet);
BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement();
ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0, 0),
@@ -127,7 +128,9 @@ public:
BSONObj nullObj = BSON("a" << BSONNULL);
BSONObjSet nullFieldKeySet;
- ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0x5eed, 0, false, &nullFieldKeySet);
+ CollatorInterface* collator = nullptr;
+ ExpressionKeysPrivate::getHashKeys(
+ nullObj, "a", 0x5eed, 0, false, collator, &nullFieldKeySet);
BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement();
ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0x5eed, 0),