summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTess Avitabile <tess.avitabile@mongodb.com>2016-03-22 16:07:55 -0400
committerTess Avitabile <tess.avitabile@mongodb.com>2016-03-23 11:38:59 -0400
commit9316aca8fb95265cd6e3b2343b2cfab2ad90e7c8 (patch)
tree362206347f23d4782b2fab165eb6a13c7d3b4556
parent54cb8dac783c78709342da942f1a94d4a900dcff (diff)
downloadmongo-9316aca8fb95265cd6e3b2343b2cfab2ad90e7c8.tar.gz
SERVER-23092 Update BtreeKeyGeneratorV1 to be collation-aware
-rw-r--r--src/mongo/db/exec/sort_key_generator.cpp3
-rw-r--r--src/mongo/db/index/SConscript2
-rw-r--r--src/mongo/db/index/btree_access_method.cpp4
-rw-r--r--src/mongo/db/index/btree_key_generator.cpp60
-rw-r--r--src/mongo/db/index/btree_key_generator.h10
-rw-r--r--src/mongo/db/index/btree_key_generator_test.cpp107
-rw-r--r--src/mongo/db/index/external_key_generator.cpp3
7 files changed, 164 insertions, 25 deletions
diff --git a/src/mongo/db/exec/sort_key_generator.cpp b/src/mongo/db/exec/sort_key_generator.cpp
index f186d9617ac..7ffa4fe3f52 100644
--- a/src/mongo/db/exec/sort_key_generator.cpp
+++ b/src/mongo/db/exec/sort_key_generator.cpp
@@ -96,7 +96,8 @@ SortKeyGenerator::SortKeyGenerator(const BSONObj& sortSpec, const BSONObj& query
fixed.push_back(BSONElement());
}
- _keyGen.reset(new BtreeKeyGeneratorV1(fieldNames, fixed, false /* not sparse */));
+ // TODO SERVER-23095: change nullptr to the appropriate CollationInterface*.
+ _keyGen.reset(new BtreeKeyGeneratorV1(fieldNames, fixed, false /* not sparse */, nullptr));
// The bounds checker only works on the Btree part of the sort key.
getBoundsForSort(queryObj, _btreeObj);
diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript
index cf096712f11..c0e6a9241ac 100644
--- a/src/mongo/db/index/SConscript
+++ b/src/mongo/db/index/SConscript
@@ -25,6 +25,7 @@ env.Library(
'$BUILD_DIR/mongo/db/geo/geoparser',
'$BUILD_DIR/mongo/db/index_names',
'$BUILD_DIR/mongo/db/mongohasher',
+ '$BUILD_DIR/mongo/db/query/collation/collation_index_key',
'$BUILD_DIR/third_party/s2/s2',
],
)
@@ -65,5 +66,6 @@ env.CppUnitTest(
LIBDEPS=[
'key_generator',
'$BUILD_DIR/mongo/db/mongohasher',
+ '$BUILD_DIR/mongo/db/query/collation/collator_interface_mock',
],
)
diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp
index 40c4bc77251..941b35fc8b6 100644
--- a/src/mongo/db/index/btree_access_method.cpp
+++ b/src/mongo/db/index/btree_access_method.cpp
@@ -55,7 +55,9 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn
if (0 == _descriptor->version()) {
_keyGenerator.reset(new BtreeKeyGeneratorV0(fieldNames, fixed, _descriptor->isSparse()));
} else if (1 == _descriptor->version()) {
- _keyGenerator.reset(new BtreeKeyGeneratorV1(fieldNames, fixed, _descriptor->isSparse()));
+ // TODO SERVER-23092: change nullptr to the appropriate CollatorInterface*.
+ _keyGenerator.reset(
+ new BtreeKeyGeneratorV1(fieldNames, fixed, _descriptor->isSparse(), nullptr));
} else {
massert(16745, "Invalid index version for key generation.", false);
}
diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp
index ccf4df2de96..14c48085b03 100644
--- a/src/mongo/db/index/btree_key_generator.cpp
+++ b/src/mongo/db/index/btree_key_generator.cpp
@@ -32,6 +32,8 @@
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/field_ref.h"
+#include "mongo/db/query/collation/collation_index_key.h"
+#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/util/mongoutils/str.h"
namespace mongo {
@@ -65,21 +67,6 @@ BtreeKeyGenerator::BtreeKeyGenerator(std::vector<const char*> fieldNames,
void BtreeKeyGenerator::getKeys(const BSONObj& obj,
BSONObjSet* keys,
MultikeyPaths* multikeyPaths) const {
- if (_isIdIndex) {
- // we special case for speed
- BSONElement e = obj["_id"];
- if (e.eoo()) {
- keys->insert(_nullKey);
- } else {
- int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */;
- BSONObjBuilder b(size);
- b.appendAs(e, "");
- keys->insert(b.obj());
- invariant(keys->begin()->objsize() == size);
- }
- return;
- }
-
// '_fieldNames' and '_fixed' are passed by value so that they can be mutated as part of the
// getKeys call. :|
getKeysImpl(_fieldNames, _fixed, obj, keys, multikeyPaths);
@@ -104,6 +91,21 @@ void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames,
const BSONObj& obj,
BSONObjSet* keys,
MultikeyPaths* multikeyPaths) const {
+ if (_isIdIndex) {
+ // we special case for speed
+ BSONElement e = obj["_id"];
+ if (e.eoo()) {
+ keys->insert(_nullKey);
+ } else {
+ int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */;
+ BSONObjBuilder b(size);
+ b.appendAs(e, "");
+ keys->insert(b.obj());
+ invariant(keys->begin()->objsize() == size);
+ }
+ return;
+ }
+
BSONElement arrElt;
unsigned arrIdx = ~0;
unsigned numNotFound = 0;
@@ -216,8 +218,11 @@ void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames,
BtreeKeyGeneratorV1::BtreeKeyGeneratorV1(std::vector<const char*> fieldNames,
std::vector<BSONElement> fixed,
- bool isSparse)
- : BtreeKeyGenerator(fieldNames, fixed, isSparse), _emptyPositionalInfo(fieldNames.size()) {
+ bool isSparse,
+ CollatorInterface* collator)
+ : BtreeKeyGenerator(fieldNames, fixed, isSparse),
+ _emptyPositionalInfo(fieldNames.size()),
+ _collator(collator) {
for (const char* fieldName : fieldNames) {
size_t pathLength = FieldRef{fieldName}.numParts();
invariant(pathLength > 0);
@@ -287,6 +292,25 @@ void BtreeKeyGeneratorV1::getKeysImpl(std::vector<const char*> fieldNames,
const BSONObj& obj,
BSONObjSet* keys,
MultikeyPaths* multikeyPaths) const {
+ if (_isIdIndex) {
+ // we special case for speed
+ BSONElement e = obj["_id"];
+ if (e.eoo()) {
+ keys->insert(_nullKey);
+ } else {
+ BSONObjBuilder b;
+ CollationIndexKey::collationAwareIndexKeyAppend(e, _collator, &b);
+ keys->insert(b.obj());
+ }
+
+ // The {_id: 1} index can never be multikey because the _id field isn't allowed to be an
+ // array value. We therefore always set 'multikeyPaths' as [ [ ] ].
+ if (multikeyPaths) {
+ multikeyPaths->resize(1);
+ }
+ return;
+ }
+
if (multikeyPaths) {
multikeyPaths->resize(fieldNames.size());
}
@@ -371,7 +395,7 @@ void BtreeKeyGeneratorV1::getKeysImplWithArray(
}
BSONObjBuilder b(_sizeTracker);
for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i) {
- b.appendAs(*i, "");
+ CollationIndexKey::collationAwareIndexKeyAppend(*i, _collator, &b);
}
keys->insert(b.obj());
} else if (arrElt.embeddedObject().firstElement().eoo()) {
diff --git a/src/mongo/db/index/btree_key_generator.h b/src/mongo/db/index/btree_key_generator.h
index 85ada2c2c1a..7ddbe8a0732 100644
--- a/src/mongo/db/index/btree_key_generator.h
+++ b/src/mongo/db/index/btree_key_generator.h
@@ -36,6 +36,8 @@
namespace mongo {
+class CollatorInterface;
+
/**
* Internal class used by BtreeAccessMethod to generate keys for indexed documents.
* This class is meant to be kept under the index access layer.
@@ -61,7 +63,6 @@ protected:
BSONSizeTracker _sizeTracker;
private:
- // We have V0 and V1. Sigh.
virtual void getKeysImpl(std::vector<const char*> fieldNames,
std::vector<BSONElement> fixed,
const BSONObj& obj,
@@ -98,7 +99,8 @@ class BtreeKeyGeneratorV1 : public BtreeKeyGenerator {
public:
BtreeKeyGeneratorV1(std::vector<const char*> fieldNames,
std::vector<BSONElement> fixed,
- bool isSparse);
+ bool isSparse,
+ CollatorInterface* collator);
virtual ~BtreeKeyGeneratorV1() {}
@@ -245,6 +247,10 @@ private:
// A vector with size equal to the number of elements in the index key pattern. Each element in
// the vector is the number of path components in the indexed field.
std::vector<size_t> _pathLengths;
+
+ // Null if this key generator orders strings according to the simple binary compare. If
+ // non-null, represents the collator used to generate index keys for indexed strings.
+ CollatorInterface* _collator;
};
} // namespace mongo
diff --git a/src/mongo/db/index/btree_key_generator_test.cpp b/src/mongo/db/index/btree_key_generator_test.cpp
index 834ce3fa61f..b258487d114 100644
--- a/src/mongo/db/index/btree_key_generator_test.cpp
+++ b/src/mongo/db/index/btree_key_generator_test.cpp
@@ -35,6 +35,7 @@
#include <iostream>
#include "mongo/db/json.h"
+#include "mongo/db/query/collation/collator_interface_mock.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/log.h"
@@ -81,7 +82,8 @@ bool testKeygen(const BSONObj& kp,
const BSONObj& obj,
const BSONObjSet& expectedKeys,
const MultikeyPaths& expectedMultikeyPaths,
- bool sparse = false) {
+ bool sparse = false,
+ CollatorInterface* collator = nullptr) {
invariant(expectedMultikeyPaths.size() == static_cast<size_t>(kp.nFields()));
//
@@ -98,7 +100,8 @@ bool testKeygen(const BSONObj& kp,
fixed.push_back(BSONElement());
}
- unique_ptr<BtreeKeyGenerator> keyGen(new BtreeKeyGeneratorV1(fieldNames, fixed, sparse));
+ unique_ptr<BtreeKeyGenerator> keyGen(
+ new BtreeKeyGeneratorV1(fieldNames, fixed, sparse, collator));
//
// Step 2: ask 'keyGen' to generate index keys for the object 'obj' and report any prefixes of
@@ -132,6 +135,16 @@ bool testKeygen(const BSONObj& kp,
// Unit tests
//
+
+TEST(BtreeKeyGeneratorTest, GetIdKeyFromObject) {
+ BSONObj keyPattern = fromjson("{_id: 1}");
+ BSONObj genKeysFrom = fromjson("{_id: 'foo', b: 4}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(fromjson("{'': 'foo'}"));
+ MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}};
+ ASSERT(testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths));
+}
+
TEST(BtreeKeyGeneratorTest, GetKeysFromObjectSimple) {
BSONObj keyPattern = fromjson("{a: 1}");
BSONObj genKeysFrom = fromjson("{b: 4, a: 5}");
@@ -989,4 +1002,94 @@ TEST(BtreeKeyGeneratorTest, PositionalKeyPatternNestedArrays7) {
ASSERT(testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths));
}
+TEST(BtreeKeyGeneratorTest, GetCollationAwareIdKeyFromObject) {
+ BSONObj keyPattern = fromjson("{_id: 1}");
+ BSONObj genKeysFrom = fromjson("{_id: 'foo', b: 4}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(fromjson("{'': 'oof'}"));
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}};
+ ASSERT(
+ testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator));
+}
+
+TEST(BtreeKeyGeneratorTest, GetCollationAwareKeysFromObjectSimple) {
+ BSONObj keyPattern = fromjson("{a: 1}");
+ BSONObj genKeysFrom = fromjson("{b: 4, a: 'foo'}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(fromjson("{'': 'oof'}"));
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}};
+ ASSERT(
+ testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator));
+}
+
+TEST(BtreeKeyGeneratorTest, GetCollationAwareKeysFromObjectDotted) {
+ BSONObj keyPattern = fromjson("{'a.b': 1}");
+ BSONObj genKeysFrom = fromjson("{a: {b: 'foo'}, c: 4}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(fromjson("{'': 'oof'}"));
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}};
+ ASSERT(
+ testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator));
+}
+
+TEST(BtreeKeyGeneratorTest, GetCollationAwareKeysFromArraySimple) {
+ BSONObj keyPattern = fromjson("{a: 1}");
+ BSONObj genKeysFrom = fromjson("{a: ['foo', 'bar', 'baz']}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(fromjson("{'': 'oof'}"));
+ expectedKeys.insert(fromjson("{'': 'rab'}"));
+ expectedKeys.insert(fromjson("{'': 'zab'}"));
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ MultikeyPaths expectedMultikeyPaths{{0U}};
+ ASSERT(
+ testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator));
+}
+
+TEST(BtreeKeyGeneratorTest, CollatorDoesNotAffectNonStringIdKey) {
+ BSONObj keyPattern = fromjson("{_id: 1}");
+ BSONObj genKeysFrom = fromjson("{_id: 5, b: 4}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(fromjson("{'': 5}"));
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}};
+ ASSERT(
+ testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator));
+}
+
+TEST(BtreeKeyGeneratorTest, CollatorDoesNotAffectNonStringKeys) {
+ BSONObj keyPattern = fromjson("{a: 1}");
+ BSONObj genKeysFrom = fromjson("{b: 4, a: 5}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(fromjson("{'': 5}"));
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}};
+ ASSERT(
+ testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator));
+}
+
+TEST(BtreeKeyGeneratorTest, CollatorDoesNotAffectNestedObjectKeys) {
+ BSONObj keyPattern = fromjson("{a: 1}");
+ BSONObj genKeysFrom = fromjson("{b: 4, a: {c: 'foo'}}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(fromjson("{'': {c: 'foo'}}"));
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}};
+ ASSERT(
+ testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator));
+}
+
+TEST(BtreeKeyGeneratorTest, CollatorDoesNotAffectNestedArrayKeys) {
+ BSONObj keyPattern = fromjson("{a: 1}");
+ BSONObj genKeysFrom = fromjson("{b: 4, a: {c: ['foo', 'bar', 'baz']}}");
+ BSONObjSet expectedKeys;
+ expectedKeys.insert(fromjson("{'': {c: ['foo', 'bar', 'baz']}}"));
+ CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString);
+ MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}};
+ ASSERT(
+ testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator));
+}
+
} // namespace
diff --git a/src/mongo/db/index/external_key_generator.cpp b/src/mongo/db/index/external_key_generator.cpp
index c7f09d78bf0..efc1d8ce32e 100644
--- a/src/mongo/db/index/external_key_generator.cpp
+++ b/src/mongo/db/index/external_key_generator.cpp
@@ -85,7 +85,8 @@ void getKeysForUpgradeChecking(const BSONObj& infoObj, const BSONObj& doc, BSONO
}
// XXX: do we care about version
- BtreeKeyGeneratorV1 keyGen(fieldNames, fixed, infoObj["sparse"].trueValue());
+ // TODO: change nullptr to a collator, if a collation spec is given.
+ BtreeKeyGeneratorV1 keyGen(fieldNames, fixed, infoObj["sparse"].trueValue(), nullptr);
// There's no need to compute the prefixes of the indexed fields that cause the index to be
// multikey when checking if any index key is too large.