diff options
author | Tess Avitabile <tess.avitabile@mongodb.com> | 2016-03-15 17:20:24 -0400 |
---|---|---|
committer | Tess Avitabile <tess.avitabile@mongodb.com> | 2016-03-22 13:17:36 -0400 |
commit | 0af6e7685ab00249c38c8c874eda7cb895240f94 (patch) | |
tree | 74d873d8cfa7b48caa85be03b649bf40134e9cc3 /src/mongo/db | |
parent | 622707b5ce4bde79cbf6df07bafc98038d5c7e8d (diff) | |
download | mongo-0af6e7685ab00249c38c8c874eda7cb895240f94.tar.gz |
SERVER-23092 Update BtreeKeyGeneratorV1 to be collation-aware
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/exec/sort_key_generator.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/index/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/db/index/btree_access_method.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator.cpp | 58 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator.h | 10 | ||||
-rw-r--r-- | src/mongo/db/index/btree_key_generator_test.cpp | 107 | ||||
-rw-r--r-- | src/mongo/db/index/external_key_generator.cpp | 3 |
7 files changed, 162 insertions, 25 deletions
diff --git a/src/mongo/db/exec/sort_key_generator.cpp b/src/mongo/db/exec/sort_key_generator.cpp index f186d9617ac..7ffa4fe3f52 100644 --- a/src/mongo/db/exec/sort_key_generator.cpp +++ b/src/mongo/db/exec/sort_key_generator.cpp @@ -96,7 +96,8 @@ SortKeyGenerator::SortKeyGenerator(const BSONObj& sortSpec, const BSONObj& query fixed.push_back(BSONElement()); } - _keyGen.reset(new BtreeKeyGeneratorV1(fieldNames, fixed, false /* not sparse */)); + // TODO SERVER-23095: change nullptr to the appropriate CollationInterface*. + _keyGen.reset(new BtreeKeyGeneratorV1(fieldNames, fixed, false /* not sparse */, nullptr)); // The bounds checker only works on the Btree part of the sort key. getBoundsForSort(queryObj, _btreeObj); diff --git a/src/mongo/db/index/SConscript b/src/mongo/db/index/SConscript index cf096712f11..c0e6a9241ac 100644 --- a/src/mongo/db/index/SConscript +++ b/src/mongo/db/index/SConscript @@ -25,6 +25,7 @@ env.Library( '$BUILD_DIR/mongo/db/geo/geoparser', '$BUILD_DIR/mongo/db/index_names', '$BUILD_DIR/mongo/db/mongohasher', + '$BUILD_DIR/mongo/db/query/collation/collation_index_key', '$BUILD_DIR/third_party/s2/s2', ], ) @@ -65,5 +66,6 @@ env.CppUnitTest( LIBDEPS=[ 'key_generator', '$BUILD_DIR/mongo/db/mongohasher', + '$BUILD_DIR/mongo/db/query/collation/collator_interface_mock', ], ) diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp index 40c4bc77251..941b35fc8b6 100644 --- a/src/mongo/db/index/btree_access_method.cpp +++ b/src/mongo/db/index/btree_access_method.cpp @@ -55,7 +55,9 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn if (0 == _descriptor->version()) { _keyGenerator.reset(new BtreeKeyGeneratorV0(fieldNames, fixed, _descriptor->isSparse())); } else if (1 == _descriptor->version()) { - _keyGenerator.reset(new BtreeKeyGeneratorV1(fieldNames, fixed, _descriptor->isSparse())); + // TODO SERVER-23092: change nullptr to the appropriate CollatorInterface*. + _keyGenerator.reset( + new BtreeKeyGeneratorV1(fieldNames, fixed, _descriptor->isSparse(), nullptr)); } else { massert(16745, "Invalid index version for key generation.", false); } diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp index ccf4df2de96..6e90ab3a068 100644 --- a/src/mongo/db/index/btree_key_generator.cpp +++ b/src/mongo/db/index/btree_key_generator.cpp @@ -32,6 +32,8 @@ #include "mongo/bson/bsonobjbuilder.h" #include "mongo/db/field_ref.h" +#include "mongo/db/query/collation/collation_index_key.h" +#include "mongo/db/query/collation/collator_interface.h" #include "mongo/util/mongoutils/str.h" namespace mongo { @@ -65,21 +67,6 @@ BtreeKeyGenerator::BtreeKeyGenerator(std::vector<const char*> fieldNames, void BtreeKeyGenerator::getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { - if (_isIdIndex) { - // we special case for speed - BSONElement e = obj["_id"]; - if (e.eoo()) { - keys->insert(_nullKey); - } else { - int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */; - BSONObjBuilder b(size); - b.appendAs(e, ""); - keys->insert(b.obj()); - invariant(keys->begin()->objsize() == size); - } - return; - } - // '_fieldNames' and '_fixed' are passed by value so that they can be mutated as part of the // getKeys call. :| getKeysImpl(_fieldNames, _fixed, obj, keys, multikeyPaths); @@ -104,6 +91,21 @@ void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames, const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { + if (_isIdIndex) { + // we special case for speed + BSONElement e = obj["_id"]; + if (e.eoo()) { + keys->insert(_nullKey); + } else { + int size = e.size() + 5 /* bson over head*/ - 3 /* remove _id string */; + BSONObjBuilder b(size); + b.appendAs(e, ""); + keys->insert(b.obj()); + invariant(keys->begin()->objsize() == size); + } + return; + } + BSONElement arrElt; unsigned arrIdx = ~0; unsigned numNotFound = 0; @@ -216,8 +218,11 @@ void BtreeKeyGeneratorV0::getKeysImpl(std::vector<const char*> fieldNames, BtreeKeyGeneratorV1::BtreeKeyGeneratorV1(std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, - bool isSparse) - : BtreeKeyGenerator(fieldNames, fixed, isSparse), _emptyPositionalInfo(fieldNames.size()) { + bool isSparse, + CollatorInterface* collator) + : BtreeKeyGenerator(fieldNames, fixed, isSparse), + _emptyPositionalInfo(fieldNames.size()), + _collator(collator) { for (const char* fieldName : fieldNames) { size_t pathLength = FieldRef{fieldName}.numParts(); invariant(pathLength > 0); @@ -287,6 +292,23 @@ void BtreeKeyGeneratorV1::getKeysImpl(std::vector<const char*> fieldNames, const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { + if (_isIdIndex) { + // we special case for speed + BSONElement e = obj["_id"]; + if (e.eoo()) { + keys->insert(_nullKey); + } else { + BSONObjBuilder b; + CollationIndexKey::collationAwareIndexKeyAppend(e, _collator, &b); + keys->insert(b.obj()); + } + + // The {_id: 1} index can never be multikey because the _id field isn't allowed to be an + // array value. We therefore always set 'multikeyPaths' as [ [ ] ]. + multikeyPaths->resize(1); + return; + } + if (multikeyPaths) { multikeyPaths->resize(fieldNames.size()); } @@ -371,7 +393,7 @@ void BtreeKeyGeneratorV1::getKeysImplWithArray( } BSONObjBuilder b(_sizeTracker); for (std::vector<BSONElement>::iterator i = fixed.begin(); i != fixed.end(); ++i) { - b.appendAs(*i, ""); + CollationIndexKey::collationAwareIndexKeyAppend(*i, _collator, &b); } keys->insert(b.obj()); } else if (arrElt.embeddedObject().firstElement().eoo()) { diff --git a/src/mongo/db/index/btree_key_generator.h b/src/mongo/db/index/btree_key_generator.h index 85ada2c2c1a..7ddbe8a0732 100644 --- a/src/mongo/db/index/btree_key_generator.h +++ b/src/mongo/db/index/btree_key_generator.h @@ -36,6 +36,8 @@ namespace mongo { +class CollatorInterface; + /** * Internal class used by BtreeAccessMethod to generate keys for indexed documents. * This class is meant to be kept under the index access layer. @@ -61,7 +63,6 @@ protected: BSONSizeTracker _sizeTracker; private: - // We have V0 and V1. Sigh. virtual void getKeysImpl(std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, const BSONObj& obj, @@ -98,7 +99,8 @@ class BtreeKeyGeneratorV1 : public BtreeKeyGenerator { public: BtreeKeyGeneratorV1(std::vector<const char*> fieldNames, std::vector<BSONElement> fixed, - bool isSparse); + bool isSparse, + CollatorInterface* collator); virtual ~BtreeKeyGeneratorV1() {} @@ -245,6 +247,10 @@ private: // A vector with size equal to the number of elements in the index key pattern. Each element in // the vector is the number of path components in the indexed field. std::vector<size_t> _pathLengths; + + // Null if this key generator orders strings according to the simple binary compare. If + // non-null, represents the collator used to generate index keys for indexed strings. + CollatorInterface* _collator; }; } // namespace mongo diff --git a/src/mongo/db/index/btree_key_generator_test.cpp b/src/mongo/db/index/btree_key_generator_test.cpp index 834ce3fa61f..b258487d114 100644 --- a/src/mongo/db/index/btree_key_generator_test.cpp +++ b/src/mongo/db/index/btree_key_generator_test.cpp @@ -35,6 +35,7 @@ #include <iostream> #include "mongo/db/json.h" +#include "mongo/db/query/collation/collator_interface_mock.h" #include "mongo/unittest/unittest.h" #include "mongo/util/log.h" @@ -81,7 +82,8 @@ bool testKeygen(const BSONObj& kp, const BSONObj& obj, const BSONObjSet& expectedKeys, const MultikeyPaths& expectedMultikeyPaths, - bool sparse = false) { + bool sparse = false, + CollatorInterface* collator = nullptr) { invariant(expectedMultikeyPaths.size() == static_cast<size_t>(kp.nFields())); // @@ -98,7 +100,8 @@ bool testKeygen(const BSONObj& kp, fixed.push_back(BSONElement()); } - unique_ptr<BtreeKeyGenerator> keyGen(new BtreeKeyGeneratorV1(fieldNames, fixed, sparse)); + unique_ptr<BtreeKeyGenerator> keyGen( + new BtreeKeyGeneratorV1(fieldNames, fixed, sparse, collator)); // // Step 2: ask 'keyGen' to generate index keys for the object 'obj' and report any prefixes of @@ -132,6 +135,16 @@ bool testKeygen(const BSONObj& kp, // Unit tests // + +TEST(BtreeKeyGeneratorTest, GetIdKeyFromObject) { + BSONObj keyPattern = fromjson("{_id: 1}"); + BSONObj genKeysFrom = fromjson("{_id: 'foo', b: 4}"); + BSONObjSet expectedKeys; + expectedKeys.insert(fromjson("{'': 'foo'}")); + MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}}; + ASSERT(testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths)); +} + TEST(BtreeKeyGeneratorTest, GetKeysFromObjectSimple) { BSONObj keyPattern = fromjson("{a: 1}"); BSONObj genKeysFrom = fromjson("{b: 4, a: 5}"); @@ -989,4 +1002,94 @@ TEST(BtreeKeyGeneratorTest, PositionalKeyPatternNestedArrays7) { ASSERT(testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths)); } +TEST(BtreeKeyGeneratorTest, GetCollationAwareIdKeyFromObject) { + BSONObj keyPattern = fromjson("{_id: 1}"); + BSONObj genKeysFrom = fromjson("{_id: 'foo', b: 4}"); + BSONObjSet expectedKeys; + expectedKeys.insert(fromjson("{'': 'oof'}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}}; + ASSERT( + testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator)); +} + +TEST(BtreeKeyGeneratorTest, GetCollationAwareKeysFromObjectSimple) { + BSONObj keyPattern = fromjson("{a: 1}"); + BSONObj genKeysFrom = fromjson("{b: 4, a: 'foo'}"); + BSONObjSet expectedKeys; + expectedKeys.insert(fromjson("{'': 'oof'}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}}; + ASSERT( + testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator)); +} + +TEST(BtreeKeyGeneratorTest, GetCollationAwareKeysFromObjectDotted) { + BSONObj keyPattern = fromjson("{'a.b': 1}"); + BSONObj genKeysFrom = fromjson("{a: {b: 'foo'}, c: 4}"); + BSONObjSet expectedKeys; + expectedKeys.insert(fromjson("{'': 'oof'}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}}; + ASSERT( + testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator)); +} + +TEST(BtreeKeyGeneratorTest, GetCollationAwareKeysFromArraySimple) { + BSONObj keyPattern = fromjson("{a: 1}"); + BSONObj genKeysFrom = fromjson("{a: ['foo', 'bar', 'baz']}"); + BSONObjSet expectedKeys; + expectedKeys.insert(fromjson("{'': 'oof'}")); + expectedKeys.insert(fromjson("{'': 'rab'}")); + expectedKeys.insert(fromjson("{'': 'zab'}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + MultikeyPaths expectedMultikeyPaths{{0U}}; + ASSERT( + testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator)); +} + +TEST(BtreeKeyGeneratorTest, CollatorDoesNotAffectNonStringIdKey) { + BSONObj keyPattern = fromjson("{_id: 1}"); + BSONObj genKeysFrom = fromjson("{_id: 5, b: 4}"); + BSONObjSet expectedKeys; + expectedKeys.insert(fromjson("{'': 5}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}}; + ASSERT( + testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator)); +} + +TEST(BtreeKeyGeneratorTest, CollatorDoesNotAffectNonStringKeys) { + BSONObj keyPattern = fromjson("{a: 1}"); + BSONObj genKeysFrom = fromjson("{b: 4, a: 5}"); + BSONObjSet expectedKeys; + expectedKeys.insert(fromjson("{'': 5}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}}; + ASSERT( + testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator)); +} + +TEST(BtreeKeyGeneratorTest, CollatorDoesNotAffectNestedObjectKeys) { + BSONObj keyPattern = fromjson("{a: 1}"); + BSONObj genKeysFrom = fromjson("{b: 4, a: {c: 'foo'}}"); + BSONObjSet expectedKeys; + expectedKeys.insert(fromjson("{'': {c: 'foo'}}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}}; + ASSERT( + testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator)); +} + +TEST(BtreeKeyGeneratorTest, CollatorDoesNotAffectNestedArrayKeys) { + BSONObj keyPattern = fromjson("{a: 1}"); + BSONObj genKeysFrom = fromjson("{b: 4, a: {c: ['foo', 'bar', 'baz']}}"); + BSONObjSet expectedKeys; + expectedKeys.insert(fromjson("{'': {c: ['foo', 'bar', 'baz']}}")); + CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); + MultikeyPaths expectedMultikeyPaths{std::set<size_t>{}}; + ASSERT( + testKeygen(keyPattern, genKeysFrom, expectedKeys, expectedMultikeyPaths, false, &collator)); +} + } // namespace diff --git a/src/mongo/db/index/external_key_generator.cpp b/src/mongo/db/index/external_key_generator.cpp index c7f09d78bf0..efc1d8ce32e 100644 --- a/src/mongo/db/index/external_key_generator.cpp +++ b/src/mongo/db/index/external_key_generator.cpp @@ -85,7 +85,8 @@ void getKeysForUpgradeChecking(const BSONObj& infoObj, const BSONObj& doc, BSONO } // XXX: do we care about version - BtreeKeyGeneratorV1 keyGen(fieldNames, fixed, infoObj["sparse"].trueValue()); + // TODO: change nullptr to a collator, if a collation spec is given. + BtreeKeyGeneratorV1 keyGen(fieldNames, fixed, infoObj["sparse"].trueValue(), nullptr); // There's no need to compute the prefixes of the indexed fields that cause the index to be // multikey when checking if any index key is too large. |