diff options
author | Arun Banala <arun.banala@10gen.com> | 2019-11-20 14:46:44 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-11-20 14:46:44 +0000 |
commit | 2c1ef409e145a8adfb845415e17033de7ed170d7 (patch) | |
tree | 31a7275b58d0060c66aff693a25bf99f38854647 | |
parent | 4b8c6ec607794113ce697559135fae69a96ab15d (diff) | |
download | mongo-2c1ef409e145a8adfb845415e17033de7ed170d7.tar.gz |
SERVER-44571 Documents involved in SERVER-44050 corruption scenario cannot be updated or deleted after upgrade
(cherry picked from commit 35c6778143fc55eb9617ab4a54e616ba1e537ad5)
(cherry picked from commit 6dd33f3f725d8df801603b8f1dcbd7b13a85f1ce)
(cherry picked from commit 30701d77ca133ecd1b184587f61c832b1d028a4b)
(cherry picked from commit bb379d5efd111b8d2221c94c857f93f8aff3bc1d)
23 files changed, 273 insertions, 36 deletions
diff --git a/etc/evergreen.yml b/etc/evergreen.yml index c12b262415c..1a6d2a4a613 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -874,7 +874,7 @@ functions: # 3.2.1 is needed to check cross version compatibility as it is the final version # to use the old style replSetUpdatePosition command. # 3.2.1 is also needed to check for a correct implementation of SERVER-23299. - ${python|/opt/mongodbtoolchain/v2/bin/python2} buildscripts/setup_multiversion_mongodb.py /data/install /data/multiversion ${multiversion_edition|"base"} ${multiversion_platform_arch|"linux/x86_64"} "2.6" "3.0" "3.2" "3.2.1" + ${python|/opt/mongodbtoolchain/v2/bin/python2} buildscripts/setup_multiversion_mongodb.py /data/install /data/multiversion ${multiversion_edition|"base"} ${multiversion_platform_arch|"linux/x86_64"} "2.6" "3.0" "3.2" "3.2.1" "3.4.23" "cleanup environment" : command: shell.exec diff --git a/jstests/multiVersion/hashed_index_bad_keys_cleanup.js b/jstests/multiVersion/hashed_index_bad_keys_cleanup.js new file mode 100644 index 00000000000..e7e3a4a3f5d --- /dev/null +++ b/jstests/multiVersion/hashed_index_bad_keys_cleanup.js @@ -0,0 +1,120 @@ +/** + * Prior to SERVER-44050, for hashed indexes, if there is an array along index field path, we did + * not fail insertion. We incorrectly stored empty index key value for those cases. This lead to + * corruption of index keys. + * + * In this test we verify that we are able to successfully update and delete documents that were + * involved in creating corrupt indexes. + * + */ +(function() { + "use strict"; + + load("jstests/multiVersion/libs/multi_rs.js"); // For upgradeSet. + load("jstests/multiVersion/libs/verify_versions.js"); // For binVersion. + + const preBackportVersion = "3.4.23"; + const preBackportNodeOptions = {binVersion: preBackportVersion}; + const nodeOptionsOfLatestVersion = {binVersion: "latest"}; + + // Set up a new replSet consisting of 2 nodes, initially running on bad binaries. + const rst = new ReplSetTest({nodes: 2, nodeOptions: preBackportNodeOptions}); + rst.startSet(); + rst.initiate(); + + let testDB = rst.getPrimary().getDB(jsTestName()); + let coll = testDB.coll; + coll.drop(); + + // Verify that the replset is on binary version specified in 'preBackportVersion'. + assert.binVersion(testDB.getMongo(), preBackportVersion); + + // Insert bad documents using older version. + assert.commandWorked(coll.createIndex({"p.q.r": "hashed"})); + assert.writeOK(coll.insert({_id: 1, p: []})); + assert.writeOK(coll.insert({_id: 2, p: {q: [1]}})); + assert.writeOK(coll.insert({_id: 3, p: [{q: 1}]})); + assert.writeOK(coll.insert({_id: 4, a: 1, p: [{q: 1}]})); + assert.writeOK(coll.insert({_id: 5, a: 1, p: [{q: 1}]})); + + // Helper function which runs validate() on primary and secondary nodes, then verifies that the + // command returned the expected result. + function assertValidateCmdReturned(expectedResult) { + const resFromPrimary = assert.commandWorked(coll.validate({full: true})); + assert.eq(resFromPrimary.valid, expectedResult, resFromPrimary); + + rst.awaitReplication(); + const testDBOnSecondary = rst.getSecondary().getDB(jsTestName()); + const resFromSecondary = + assert.commandWorked(testDBOnSecondary.coll.validate({full: true})); + assert.eq(resFromSecondary.valid, expectedResult, resFromSecondary); + } + + // Confirm that validate() does not perceive a problem with the malformed documents. + assertValidateCmdReturned(true); + + // Upgrade the set to the new binary version. + rst.upgradeSet(nodeOptionsOfLatestVersion); + testDB = rst.getPrimary().getDB(jsTestName()); + coll = testDB.coll; + + // Verify that the five documents inserted earlier have their index keys. + let res = coll.find().hint({"p.q.r": "hashed"}).returnKey().itcount(); + assert.eq(res, 5); + + // Verify that after upgrade, inserting bad documents is not allowed. + const arrayAlongPathFailCode = 16766; + assert.writeErrorWithCode(coll.insert({p: []}), arrayAlongPathFailCode); + assert.writeErrorWithCode(coll.insert({p: [{q: 1}]}), arrayAlongPathFailCode); + assert.writeErrorWithCode(coll.insert({p: {q: {r: [3]}}}), arrayAlongPathFailCode); + + // After upgrade, validate() should now fail since there are existing bad documents. + assertValidateCmdReturned(false); + + // Deleting bad documents succeeds. + assert.eq(coll.deleteOne({_id: 1}).deletedCount, 1); + assert.eq(coll.deleteMany({a: 1}).deletedCount, 2); + + // Updating documents to contain array along field path should fail. + assert.writeErrorWithCode(coll.update({_id: 2}, {p: {q: [{r: 1}]}}), arrayAlongPathFailCode); + assert.commandFailedWithCode( + testDB.runCommand( + {findAndModify: coll.getName(), query: {_id: 2}, update: {p: {q: [{r: 1}]}}}), + arrayAlongPathFailCode); + + assert.writeErrorWithCode(coll.update({_id: 2}, {p: {q: {r: [3]}}}), arrayAlongPathFailCode); + const updateRes = testDB.runCommand({ + update: coll.getName(), + updates: [ + {q: {_id: 3}, u: {$set: {p: {q: [{r: 1}]}}}}, + {q: {_id: 2}, u: {$set: {p: {q: [{r: 1}]}}}} + ], + ordered: false + }); + assert.eq(updateRes.writeErrors.length, 2); + assert.eq(updateRes.writeErrors[0].code, arrayAlongPathFailCode); + assert.eq(updateRes.writeErrors[1].code, arrayAlongPathFailCode); + + // Verify that updating to a valid index field works. + assert.writeOK(coll.update({_id: 2}, {p: {q: {r: 4}}})); + + // Verify that the index key is updated correctly by quering with hashed index. + res = coll.find({"p.q.r": 4}).hint({"p.q.r": "hashed"}).toArray(); + assert.eq(res, [{_id: 2, p: {q: {r: 4}}}]); + + // Validate should still fail since a bad document {_id: 3} exists. + assertValidateCmdReturned(false); + + // Delete the last remaining bad document. + assert.eq(coll.deleteOne({_id: 3}).deletedCount, 1); + + // Now that all the bad documents are deleted or updated, verify that validate succeeds. + assertValidateCmdReturned(true); + + // Verify that there is only one index key left (for {_id: 2}). + res = coll.find().hint({"p.q.r": "hashed"}).returnKey().itcount(); + assert.eq(res, 1); + + rst.awaitReplication(); + rst.stopSet(); +}()); diff --git a/src/mongo/db/catalog/collection.cpp b/src/mongo/db/catalog/collection.cpp index 39bb1870256..a20cdde1bd8 100644 --- a/src/mongo/db/catalog/collection.cpp +++ b/src/mongo/db/catalog/collection.cpp @@ -1092,6 +1092,7 @@ public: MultikeyPaths* multikeyPaths = nullptr; iam->getKeys(recordBson, IndexAccessMethod::GetKeysMode::kEnforceConstraints, + IndexAccessMethod::GetKeysContext::kReadOrAddKeys, &documentKeySet, multikeyPaths); diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp index a9035311bba..ed376b4548a 100644 --- a/src/mongo/db/exec/working_set_common.cpp +++ b/src/mongo/db/exec/working_set_common.cpp @@ -120,6 +120,7 @@ bool WorkingSetCommon::fetch(OperationContext* txn, MultikeyPaths* multikeyPaths = nullptr; member->keyData[i].index->getKeys(member->obj.value(), IndexAccessMethod::GetKeysMode::kEnforceConstraints, + IndexAccessMethod::GetKeysContext::kReadOrAddKeys, &keys, multikeyPaths); if (!keys.count(member->keyData[i].keyData)) { diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp index 603df21bbdb..bebc91a9452 100644 --- a/src/mongo/db/index/2d_access_method.cpp +++ b/src/mongo/db/index/2d_access_method.cpp @@ -48,6 +48,7 @@ TwoDAccessMethod::TwoDAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte /** Finds the key objects to put in an index */ void TwoDAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::get2DKeys(obj, _params, keys, NULL); diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h index 72b0e641e03..95479bcfdca 100644 --- a/src/mongo/db/index/2d_access_method.h +++ b/src/mongo/db/index/2d_access_method.h @@ -60,7 +60,10 @@ private: * This function ignores the 'multikeyPaths' pointer because 2d indexes don't support tracking * path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; TwoDIndexingParams _params; }; diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp index 1e1d7f4e090..288ade31496 100644 --- a/src/mongo/db/index/btree_access_method.cpp +++ b/src/mongo/db/index/btree_access_method.cpp @@ -62,6 +62,7 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn } void BtreeAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { _keyGenerator->getKeys(obj, keys, multikeyPaths); diff --git a/src/mongo/db/index/btree_access_method.h b/src/mongo/db/index/btree_access_method.h index caed0eccab5..3209aa9e6c3 100644 --- a/src/mongo/db/index/btree_access_method.h +++ b/src/mongo/db/index/btree_access_method.h @@ -48,7 +48,10 @@ public: BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree); private: - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; // Our keys differ for V0 and V1. std::unique_ptr<BtreeKeyGenerator> _keyGenerator; diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp index 0a4cd2e42ed..ee5118614f8 100644 --- a/src/mongo/db/index/expression_keys_private.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -359,20 +359,35 @@ void ExpressionKeysPrivate::getHashKeys(const BSONObj& obj, int hashVersion, bool isSparse, const CollatorInterface* collator, - BSONObjSet* keys) { + BSONObjSet* keys, + bool ignoreArraysAlongPath) { + static const BSONObj nullObj = BSON("" << BSONNULL); const char* cstr = hashedField.c_str(); BSONElement fieldVal = dps::extractElementAtPathOrArrayAlongPath(obj, cstr); // If we hit an array while traversing the path, 'cstr' will point to the path component - // immediately following the array, or the null termination byte if the final field in the path - // was an array. - uassert( - 16766, - str::stream() - << "Error: hashed indexes do not currently support array values. Found array at path: " - << hashedField.substr( - 0, hashedField.size() - StringData(cstr).size() - !StringData(cstr).empty()), - fieldVal.type() != BSONType::Array); + // immediately following the array, or the null termination byte if the terminal path + // component was an array. In the latter case, 'remainingPath' will be empty. + auto remainingPath = StringData(cstr); + + // If 'ignoreArraysAlongPath' is set, we want to use the behaviour prior to SERVER-44050, + // which is to allow arrays along the field path (except the terminal path). This is done so + // that the document keys inserted prior to SERVER-44050 can be deleted or updated after the + // upgrade, allowing users to recover from the possible index corruption. The old behaviour + // before SERVER-44050 was to store 'null' index key if we encountered an array along the + // index field path. We will use the same logic in the context of removing index keys. + if (ignoreArraysAlongPath && fieldVal.type() == BSONType::Array && !remainingPath.empty()) { + fieldVal = nullObj.firstElement(); + } + + // Otherwise, throw if an array was encountered at any point along the path. + uassert(16766, + str::stream() << "Error: hashed indexes do not currently support array values. " + "Found array at path: " + << hashedField.substr(0, + hashedField.size() - remainingPath.size() - + !remainingPath.empty()), + fieldVal.type() != BSONType::Array); // Convert strings to comparison keys. BSONObj fieldValObj; @@ -384,7 +399,6 @@ void ExpressionKeysPrivate::getHashKeys(const BSONObj& obj, BSONObj key = BSON("" << makeSingleHashKey(fieldVal, seed, hashVersion)); keys->insert(key); } else if (!isSparse) { - BSONObj nullObj = BSON("" << BSONNULL); keys->insert(BSON("" << makeSingleHashKey(nullObj.firstElement(), seed, hashVersion))); } } diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h index cbb8c9508ee..c0a95f7aaa6 100644 --- a/src/mongo/db/index/expression_keys_private.h +++ b/src/mongo/db/index/expression_keys_private.h @@ -82,7 +82,8 @@ public: int hashVersion, bool isSparse, const CollatorInterface* collator, - BSONObjSet* keys); + BSONObjSet* keys, + bool ignoreArraysAlongPath); /** * Hashing function used by both getHashKeys and the cursors we create. diff --git a/src/mongo/db/index/external_key_generator.cpp b/src/mongo/db/index/external_key_generator.cpp index ed62b1afe74..08d67550d14 100644 --- a/src/mongo/db/index/external_key_generator.cpp +++ b/src/mongo/db/index/external_key_generator.cpp @@ -82,7 +82,7 @@ void getKeysForUpgradeChecking(const BSONObj& infoObj, const BSONObj& doc, BSONO // generated will be wrong. CollatorInterface* collator = nullptr; ExpressionKeysPrivate::getHashKeys( - doc, field, seed, version, infoObj["sparse"].trueValue(), collator, keys); + doc, field, seed, version, infoObj["sparse"].trueValue(), collator, keys, false); } else { invariant(IndexNames::BTREE == type); diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp index 6741b5ef95e..cdca3fe9df9 100644 --- a/src/mongo/db/index/fts_access_method.cpp +++ b/src/mongo/db/index/fts_access_method.cpp @@ -35,6 +35,7 @@ FTSAccessMethod::FTSAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterf : IndexAccessMethod(btreeState, btree), _ftsSpec(btreeState->descriptor()->infoObj()) {} void FTSAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getFTSKeys(obj, _ftsSpec, keys); diff --git a/src/mongo/db/index/fts_access_method.h b/src/mongo/db/index/fts_access_method.h index 8f843e32bc6..b7f1dfc49be 100644 --- a/src/mongo/db/index/fts_access_method.h +++ b/src/mongo/db/index/fts_access_method.h @@ -51,7 +51,10 @@ private: * This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking * path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; fts::FTSSpec _ftsSpec; }; diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp index 87efa44b6a3..f160a9cfdbd 100644 --- a/src/mongo/db/index/hash_access_method.cpp +++ b/src/mongo/db/index/hash_access_method.cpp @@ -52,10 +52,17 @@ HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte } void HashAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { - ExpressionKeysPrivate::getHashKeys( - obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), _collator, keys); + ExpressionKeysPrivate::getHashKeys(obj, + _hashedField, + _seed, + _hashVersion, + _descriptor->isSparse(), + _collator, + keys, + (context == GetKeysContext::kRemovingKeys)); } } // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h index fdcf3aa6183..21cdc453324 100644 --- a/src/mongo/db/index/hash_access_method.h +++ b/src/mongo/db/index/hash_access_method.h @@ -52,7 +52,10 @@ private: * This function ignores the 'multikeyPaths' pointer because hashed indexes don't support * tracking path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; // Only one of our fields is hashed. This is the field name for it. std::string _hashedField; diff --git a/src/mongo/db/index/hash_key_generator_test.cpp b/src/mongo/db/index/hash_key_generator_test.cpp index 07436bda0fd..449474bfcce 100644 --- a/src/mongo/db/index/hash_key_generator_test.cpp +++ b/src/mongo/db/index/hash_key_generator_test.cpp @@ -88,7 +88,7 @@ TEST(HashKeyGeneratorTest, CollationAppliedBeforeHashing) { BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionKeysPrivate::getHashKeys( - obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys); + obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys, false); BSONObj backwardsObj = fromjson("{a: 'gnirts'}"); BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); @@ -102,7 +102,7 @@ TEST(HashKeyGeneratorTest, CollationDoesNotAffectNonStringFields) { BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionKeysPrivate::getHashKeys( - obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys); + obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys, false); BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); expectedKeys.insert(makeHashKey(obj["a"])); @@ -116,7 +116,7 @@ TEST(HashKeyGeneratorTest, CollatorAppliedBeforeHashingNestedObject) { BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionKeysPrivate::getHashKeys( - obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys); + obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys, false); BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); expectedKeys.insert(makeHashKey(backwardsObj["a"])); @@ -128,7 +128,7 @@ TEST(HashKeyGeneratorTest, NoCollation) { BSONObj obj = fromjson("{a: 'string'}"); BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); ExpressionKeysPrivate::getHashKeys( - obj, "a", kHashSeed, kHashVersion, false, nullptr, &actualKeys); + obj, "a", kHashSeed, kHashVersion, false, nullptr, &actualKeys, false); BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); expectedKeys.insert(makeHashKey(obj["a"])); @@ -136,4 +136,48 @@ TEST(HashKeyGeneratorTest, NoCollation) { ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); } +TEST(HashKeyGeneratorTest, ArrayAlongIndexFieldPathFails) { + BSONObj obj = fromjson("{a: []}"); + BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + ASSERT_THROWS_CODE( + ExpressionKeysPrivate::getHashKeys( + obj, "a.b.c", kHashSeed, kHashVersion, false, nullptr, &actualKeys, false), + DBException, + 16766); +} + +TEST(HashKeyGeneratorTest, ArrayAlongIndexFieldPathDoesNotFailWhenIgnoreFlagIsSet) { + BSONObj obj = fromjson("{a: []}"); + BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + ExpressionKeysPrivate::getHashKeys(obj, + "a.b.c", + kHashSeed, + kHashVersion, + false, + nullptr, + &actualKeys, + true // ignoreArraysAlongPath + ); + + BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + expectedKeys.insert(makeHashKey(BSON("" << BSONNULL).firstElement())); + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(HashKeyGeneratorTest, ArrayAtTerminalPathAlwaysFails) { + BSONObj obj = fromjson("{a : {b: {c: [1]}}}"); + BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + ASSERT_THROWS_CODE(ExpressionKeysPrivate::getHashKeys(obj, + "a.b.c", + kHashSeed, + kHashVersion, + true, // isSparse + nullptr, + &actualKeys, + true // ignoreArraysAlongPath + ), + DBException, + 16766); +} + } // namespace diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp index 877e94c0c7d..771995cb228 100644 --- a/src/mongo/db/index/haystack_access_method.cpp +++ b/src/mongo/db/index/haystack_access_method.cpp @@ -63,6 +63,7 @@ HaystackAccessMethod::HaystackAccessMethod(IndexCatalogEntry* btreeState, } void HaystackAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys); diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h index a6ff68bdd9f..ec21744d5d7 100644 --- a/src/mongo/db/index/haystack_access_method.h +++ b/src/mongo/db/index/haystack_access_method.h @@ -75,7 +75,10 @@ private: * This function ignores the 'multikeyPaths' pointer because geoHaystack indexes don't support * tracking path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; std::string _geoField; std::vector<std::string> _otherFields; diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index ada7e27ab92..5dfc812e6d8 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -131,7 +131,7 @@ Status IndexAccessMethod::insert(OperationContext* txn, BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths multikeyPaths; // Delegate to the subclass. - getKeys(obj, options.getKeysMode, &keys, &multikeyPaths); + getKeys(obj, options.getKeysMode, GetKeysContext::kReadOrAddKeys, &keys, &multikeyPaths); Status ret = Status::OK(); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { @@ -211,7 +211,7 @@ Status IndexAccessMethod::remove(OperationContext* txn, // multikey when removing a document since the index metadata isn't updated when keys are // deleted. MultikeyPaths* multikeyPaths = nullptr; - getKeys(obj, options.getKeysMode, &keys, multikeyPaths); + getKeys(obj, options.getKeysMode, GetKeysContext::kRemovingKeys, &keys, multikeyPaths); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { removeOneKey(txn, *i, loc, options.dupsAllowed); @@ -230,7 +230,11 @@ Status IndexAccessMethod::touch(OperationContext* txn, const BSONObj& obj) { // There's no need to compute the prefixes of the indexed fields that cause the index to be // multikey when paging a document's index entries into memory. MultikeyPaths* multikeyPaths = nullptr; - getKeys(obj, GetKeysMode::kEnforceConstraints, &keys, multikeyPaths); + getKeys(obj, + GetKeysMode::kEnforceConstraints, + GetKeysContext::kReadOrAddKeys, + &keys, + multikeyPaths); std::unique_ptr<SortedDataInterface::Cursor> cursor(_newInterface->newCursor(txn)); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { @@ -252,7 +256,11 @@ RecordId IndexAccessMethod::findSingle(OperationContext* txn, const BSONObj& req // For performance, call get keys only if there is a non-simple collation. BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths* multikeyPaths = nullptr; - getKeys(requestedKey, GetKeysMode::kEnforceConstraints, &keys, multikeyPaths); + getKeys(requestedKey, + GetKeysMode::kEnforceConstraints, + GetKeysContext::kReadOrAddKeys, + &keys, + multikeyPaths); invariant(keys.size() == 1); actualKey = *keys.begin(); } else { @@ -341,11 +349,19 @@ Status IndexAccessMethod::validateUpdate(OperationContext* txn, // index to be multikey when the old version of the document was written since the index // metadata isn't updated when keys are deleted. MultikeyPaths* multikeyPaths = nullptr; - getKeys(from, options.getKeysMode, &ticket->oldKeys, multikeyPaths); + getKeys(from, + options.getKeysMode, + GetKeysContext::kRemovingKeys, + &ticket->oldKeys, + multikeyPaths); } if (!indexFilter || indexFilter->matchesBSON(to)) { - getKeys(to, options.getKeysMode, &ticket->newKeys, &ticket->newMultikeyPaths); + getKeys(to, + options.getKeysMode, + GetKeysContext::kReadOrAddKeys, + &ticket->newKeys, + &ticket->newMultikeyPaths); } ticket->loc = record; @@ -427,7 +443,7 @@ Status IndexAccessMethod::BulkBuilder::insert(OperationContext* txn, BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths multikeyPaths; - _real->getKeys(obj, options.getKeysMode, &keys, &multikeyPaths); + _real->getKeys(obj, options.getKeysMode, GetKeysContext::kReadOrAddKeys, &keys, &multikeyPaths); _everGeneratedMultipleKeys = _everGeneratedMultipleKeys || (keys.size() > 1); @@ -542,6 +558,7 @@ Status IndexAccessMethod::commitBulk(OperationContext* txn, void IndexAccessMethod::getKeys(const BSONObj& obj, GetKeysMode mode, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { static stdx::unordered_set<int> whiteList{ErrorCodes::CannotBuildIndexKeys, @@ -567,7 +584,7 @@ void IndexAccessMethod::getKeys(const BSONObj& obj, 13026, 13027}; try { - doGetKeys(obj, keys, multikeyPaths); + doGetKeys(obj, context, keys, multikeyPaths); } catch (const UserException& ex) { if (mode == GetKeysMode::kEnforceConstraints) { throw; diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 2e52ce7990d..dd7bbf4a48a 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -262,6 +262,12 @@ public: enum class GetKeysMode { kRelaxConstraints, kEnforceConstraints }; /** + * Specifies whether getKeys is being used in the context of creating new keys or deleting + * existing keys. + */ + enum class GetKeysContext { kRemovingKeys, kReadOrAddKeys }; + + /** * Fills 'keys' with the keys that should be generated for 'obj' on this index. * Based on 'mode', it will honor or ignore index constraints, e.g. duplicated key, key too * long, and geo index parsing errors. The ignoring of constraints is for replication due to @@ -275,6 +281,7 @@ public: */ void getKeys(const BSONObj& obj, GetKeysMode mode, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const; @@ -300,6 +307,7 @@ protected: * a result of inserting 'keys'. */ virtual void doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const = 0; diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp index 995da4dbeb7..6db46cfaf90 100644 --- a/src/mongo/db/index/s2_access_method.cpp +++ b/src/mongo/db/index/s2_access_method.cpp @@ -140,6 +140,7 @@ StatusWith<BSONObj> S2AccessMethod::fixSpec(const BSONObj& specObj) { } void S2AccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys, multikeyPaths); diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h index ad0044dc128..97a07e4f274 100644 --- a/src/mongo/db/index/s2_access_method.h +++ b/src/mongo/db/index/s2_access_method.h @@ -63,7 +63,10 @@ private: * and fills each element with the prefixes of the indexed field that would cause this index to * be multikey as a result of inserting 'keys'. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; S2IndexingParams _params; diff --git a/src/mongo/dbtests/namespacetests.cpp b/src/mongo/dbtests/namespacetests.cpp index 021d5b59293..4dd7db545d2 100644 --- a/src/mongo/dbtests/namespacetests.cpp +++ b/src/mongo/dbtests/namespacetests.cpp @@ -103,7 +103,8 @@ public: // Call getKeys on the nullObj. BSONObjSet nullFieldKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); const CollatorInterface* collator = nullptr; - ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0, 0, false, collator, &nullFieldKeySet); + ExpressionKeysPrivate::getHashKeys( + nullObj, "a", 0, 0, false, collator, &nullFieldKeySet, false); BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement(); ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0, 0), @@ -133,7 +134,7 @@ public: BSONObjSet nullFieldKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); const CollatorInterface* collator = nullptr; ExpressionKeysPrivate::getHashKeys( - nullObj, "a", 0x5eed, 0, false, collator, &nullFieldKeySet); + nullObj, "a", 0x5eed, 0, false, collator, &nullFieldKeySet, false); BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement(); ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0x5eed, 0), |