diff options
22 files changed, 274 insertions, 35 deletions
diff --git a/etc/evergreen.yml b/etc/evergreen.yml index 698799e4b4a..2dff08cd664 100644 --- a/etc/evergreen.yml +++ b/etc/evergreen.yml @@ -1777,7 +1777,7 @@ functions: --edition ${multiversion_edition|base} \ --platform ${multiversion_platform|linux} \ --architecture ${multiversion_architecture|x86_64} \ - --useLatest 3.2 3.4 3.6 3.6.7 4.0.5 + --useLatest 3.2 3.4 3.6 3.6.7 4.0.5 4.0.13 "do snmp setup" : command: shell.exec diff --git a/jstests/multiVersion/hashed_index_bad_keys_cleanup.js b/jstests/multiVersion/hashed_index_bad_keys_cleanup.js new file mode 100644 index 00000000000..e43141365d6 --- /dev/null +++ b/jstests/multiVersion/hashed_index_bad_keys_cleanup.js @@ -0,0 +1,118 @@ +/** + * Prior to SERVER-44050, for hashed indexes, if there is an array along index field path, we did + * not fail insertion. We incorrectly stored empty index key value for those cases. This lead to + * corruption of index keys. + * + * In this test we verify that we are able to successfully update and delete documents that were + * involved in creating corrupt indexes. + * + */ +(function() { + "use strict"; + + load("jstests/multiVersion/libs/multi_rs.js"); // For upgradeSet. + load("jstests/multiVersion/libs/verify_versions.js"); // For binVersion. + + const preBackportVersion = "4.0.13"; + const preBackportNodeOptions = {binVersion: preBackportVersion}; + const nodeOptionsOfLatestVersion = {binVersion: "latest"}; + + // Set up a new replSet consisting of 2 nodes, initially running on bad binaries. + const rst = new ReplSetTest({nodes: 2, nodeOptions: preBackportNodeOptions}); + rst.startSet(); + rst.initiate(); + + let testDB = rst.getPrimary().getDB(jsTestName()); + let coll = testDB.coll; + coll.drop(); + + // Verify that the replset is on binary version specified in 'preBackportVersion'. + assert.binVersion(testDB.getMongo(), preBackportVersion); + + // Insert bad documents using older version. + assert.commandWorked(coll.createIndex({"p.q.r": "hashed"})); + assert.commandWorked(coll.insert({_id: 1, p: []})); + assert.commandWorked(coll.insert({_id: 2, p: {q: [1]}})); + assert.commandWorked(coll.insert({_id: 3, p: [{q: 1}]})); + assert.commandWorked(coll.insert({_id: 4, a: 1, p: [{q: 1}]})); + assert.commandWorked(coll.insert({_id: 5, a: 1, p: [{q: 1}]})); + + // Helper function which runs validate() on primary and secondary nodes, then verifies that the + // command returned the expected result. + function assertValidateCmdReturned(expectedResult) { + const resFromPrimary = assert.commandWorked(coll.validate({full: true})); + assert.eq(resFromPrimary.valid, expectedResult, resFromPrimary); + + rst.awaitReplication(); + const testDBOnSecondary = rst.getSecondary().getDB(jsTestName()); + const resFromSecondary = + assert.commandWorked(testDBOnSecondary.coll.validate({full: true})); + assert.eq(resFromSecondary.valid, expectedResult, resFromSecondary); + } + + // Confirm that validate() does not perceive a problem with the malformed documents. + assertValidateCmdReturned(true); + + // Upgrade the set to the new binary version. + rst.upgradeSet(nodeOptionsOfLatestVersion); + testDB = rst.getPrimary().getDB(jsTestName()); + coll = testDB.coll; + + // Verify that the five documents inserted earlier have their index keys. + let res = coll.find().hint({"p.q.r": "hashed"}).returnKey().itcount(); + assert.eq(res, 5); + + // Verify that after upgrade, inserting bad documents is not allowed. + const arrayAlongPathFailCode = 16766; + assert.commandFailedWithCode(coll.insert({p: []}), arrayAlongPathFailCode); + assert.commandFailedWithCode(coll.insert({p: [{q: 1}]}), arrayAlongPathFailCode); + assert.commandFailedWithCode(coll.insert({p: {q: {r: [3]}}}), arrayAlongPathFailCode); + + // After upgrade, validate() should now fail since there are existing bad documents. + assertValidateCmdReturned(false); + + // Deleting bad documents succeeds. + assert.commandWorked(coll.deleteOne({_id: 1})); + assert.commandWorked(coll.deleteMany({a: 1})); + + // Updating documents to contain array along field path should fail. + assert.commandFailedWithCode(coll.update({_id: 2}, {p: {q: [{r: 1}]}}), arrayAlongPathFailCode); + assert.commandFailedWithCode( + testDB.runCommand( + {findAndModify: coll.getName(), query: {_id: 2}, update: {p: {q: [{r: 1}]}}}), + arrayAlongPathFailCode); + + assert.commandFailedWithCode(coll.update({_id: 2}, {p: {q: {r: [3]}}}), arrayAlongPathFailCode); + assert.commandFailedWithCode(testDB.runCommand({ + update: coll.getName(), + updates: [ + {q: {_id: 3}, u: {$set: {p: {q: [{r: 1}]}}}}, + {q: {_id: 2}, u: {$set: {p: {q: [{r: 1}]}}}} + ], + ordered: false + }), + arrayAlongPathFailCode); + + // Verify that updating to a valid index field works. + assert.commandWorked(coll.update({_id: 2}, {p: {q: {r: 4}}})); + + // Verify that the index key is updated correctly by quering with hashed index. + res = coll.find({"p.q.r": 4}).hint({"p.q.r": "hashed"}).toArray(); + assert.eq(res, [{_id: 2, p: {q: {r: 4}}}]); + + // Validate should still fail since a bad document {_id: 3} exists. + assertValidateCmdReturned(false); + + // Delete the last remaining bad document. + assert.commandWorked(coll.deleteOne({_id: 3})); + + // Now that all the bad documents are deleted or updated, verify that validate succeeds. + assertValidateCmdReturned(true); + + // Verify that there is only one index key left (for {_id: 2}). + res = coll.find().hint({"p.q.r": "hashed"}).returnKey().itcount(); + assert.eq(res, 1); + + rst.awaitReplication(); + rst.stopSet(); +}()); diff --git a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp index bd82cfe531d..ed342ef4a97 100644 --- a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp +++ b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp @@ -83,6 +83,7 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId, MultikeyPaths* multikeyPaths = nullptr; iam->getKeys(recordBson, IndexAccessMethod::GetKeysMode::kEnforceConstraints, + IndexAccessMethod::GetKeysContext::kReadOrAddKeys, &documentKeySet, multikeyPaths); diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp index 97276fcfc28..15ec18c6121 100644 --- a/src/mongo/db/exec/working_set_common.cpp +++ b/src/mongo/db/exec/working_set_common.cpp @@ -122,6 +122,7 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx, MultikeyPaths* multikeyPaths = nullptr; member->keyData[i].index->getKeys(member->obj.value(), IndexAccessMethod::GetKeysMode::kEnforceConstraints, + IndexAccessMethod::GetKeysContext::kReadOrAddKeys, &keys, multikeyPaths); if (!keys.count(member->keyData[i].keyData)) { diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp index c6df95e5fba..55b7a2d7159 100644 --- a/src/mongo/db/index/2d_access_method.cpp +++ b/src/mongo/db/index/2d_access_method.cpp @@ -51,6 +51,7 @@ TwoDAccessMethod::TwoDAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte /** Finds the key objects to put in an index */ void TwoDAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::get2DKeys(obj, _params, keys); diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h index 4a889fc676c..dc23a36167a 100644 --- a/src/mongo/db/index/2d_access_method.h +++ b/src/mongo/db/index/2d_access_method.h @@ -59,7 +59,10 @@ private: * This function ignores the 'multikeyPaths' pointer because 2d indexes don't support tracking * path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; TwoDIndexingParams _params; }; diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp index 0a5811be033..bddb464bf06 100644 --- a/src/mongo/db/index/btree_access_method.cpp +++ b/src/mongo/db/index/btree_access_method.cpp @@ -65,6 +65,7 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn } void BtreeAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { _keyGenerator->getKeys(obj, keys, multikeyPaths); diff --git a/src/mongo/db/index/btree_access_method.h b/src/mongo/db/index/btree_access_method.h index e84f48e4faa..d596c318601 100644 --- a/src/mongo/db/index/btree_access_method.h +++ b/src/mongo/db/index/btree_access_method.h @@ -50,7 +50,10 @@ public: BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree); private: - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; // Our keys differ for V0 and V1. std::unique_ptr<BtreeKeyGenerator> _keyGenerator; diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp index 424db842960..6b595c4c790 100644 --- a/src/mongo/db/index/expression_keys_private.cpp +++ b/src/mongo/db/index/expression_keys_private.cpp @@ -345,20 +345,35 @@ void ExpressionKeysPrivate::getHashKeys(const BSONObj& obj, int hashVersion, bool isSparse, const CollatorInterface* collator, - BSONObjSet* keys) { + BSONObjSet* keys, + bool ignoreArraysAlongPath) { + static const BSONObj nullObj = BSON("" << BSONNULL); const char* cstr = hashedField.c_str(); BSONElement fieldVal = dps::extractElementAtPathOrArrayAlongPath(obj, cstr); // If we hit an array while traversing the path, 'cstr' will point to the path component - // immediately following the array, or the null termination byte if the final field in the path - // was an array. - uassert( - 16766, - str::stream() - << "Error: hashed indexes do not currently support array values. Found array at path: " - << hashedField.substr( - 0, hashedField.size() - StringData(cstr).size() - !StringData(cstr).empty()), - fieldVal.type() != BSONType::Array); + // immediately following the array, or the null termination byte if the terminal path + // component was an array. In the latter case, 'remainingPath' will be empty. + auto remainingPath = StringData(cstr); + + // If 'ignoreArraysAlongPath' is set, we want to use the behaviour prior to SERVER-44050, + // which is to allow arrays along the field path (except the terminal path). This is done so + // that the document keys inserted prior to SERVER-44050 can be deleted or updated after the + // upgrade, allowing users to recover from the possible index corruption. The old behaviour + // before SERVER-44050 was to store 'null' index key if we encountered an array along the + // index field path. We will use the same logic in the context of removing index keys. + if (ignoreArraysAlongPath && fieldVal.type() == BSONType::Array && !remainingPath.empty()) { + fieldVal = nullObj.firstElement(); + } + + // Otherwise, throw if an array was encountered at any point along the path. + uassert(16766, + str::stream() << "Error: hashed indexes do not currently support array values. " + "Found array at path: " + << hashedField.substr(0, + hashedField.size() - remainingPath.size() - + !remainingPath.empty()), + fieldVal.type() != BSONType::Array); // Convert strings to comparison keys. BSONObj fieldValObj; @@ -370,7 +385,6 @@ void ExpressionKeysPrivate::getHashKeys(const BSONObj& obj, BSONObj key = BSON("" << makeSingleHashKey(fieldVal, seed, hashVersion)); keys->insert(key); } else if (!isSparse) { - BSONObj nullObj = BSON("" << BSONNULL); keys->insert(BSON("" << makeSingleHashKey(nullObj.firstElement(), seed, hashVersion))); } } diff --git a/src/mongo/db/index/expression_keys_private.h b/src/mongo/db/index/expression_keys_private.h index 87d48aaaded..8fc8e612cba 100644 --- a/src/mongo/db/index/expression_keys_private.h +++ b/src/mongo/db/index/expression_keys_private.h @@ -81,7 +81,8 @@ public: int hashVersion, bool isSparse, const CollatorInterface* collator, - BSONObjSet* keys); + BSONObjSet* keys, + bool ignoreArraysAlongPath); /** * Hashing function used by both getHashKeys and the cursors we create. diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp index 08ab9c11933..27164bfdc97 100644 --- a/src/mongo/db/index/fts_access_method.cpp +++ b/src/mongo/db/index/fts_access_method.cpp @@ -39,6 +39,7 @@ FTSAccessMethod::FTSAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterf : IndexAccessMethod(btreeState, btree), _ftsSpec(btreeState->descriptor()->infoObj()) {} void FTSAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getFTSKeys(obj, _ftsSpec, keys); diff --git a/src/mongo/db/index/fts_access_method.h b/src/mongo/db/index/fts_access_method.h index cbf254207ba..a200c4451d4 100644 --- a/src/mongo/db/index/fts_access_method.h +++ b/src/mongo/db/index/fts_access_method.h @@ -53,7 +53,10 @@ private: * This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking * path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; fts::FTSSpec _ftsSpec; }; diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp index af3382a60c1..d7b79f1c0c9 100644 --- a/src/mongo/db/index/hash_access_method.cpp +++ b/src/mongo/db/index/hash_access_method.cpp @@ -56,10 +56,17 @@ HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte } void HashAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { - ExpressionKeysPrivate::getHashKeys( - obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), _collator, keys); + ExpressionKeysPrivate::getHashKeys(obj, + _hashedField, + _seed, + _hashVersion, + _descriptor->isSparse(), + _collator, + keys, + (context == GetKeysContext::kRemovingKeys)); } } // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h index 80d0eec4500..83dd52b8a29 100644 --- a/src/mongo/db/index/hash_access_method.h +++ b/src/mongo/db/index/hash_access_method.h @@ -56,7 +56,10 @@ private: * This function ignores the 'multikeyPaths' pointer because hashed indexes don't support * tracking path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; // Only one of our fields is hashed. This is the field name for it. std::string _hashedField; diff --git a/src/mongo/db/index/hash_key_generator_test.cpp b/src/mongo/db/index/hash_key_generator_test.cpp index cd1ac3acace..9354a9a9507 100644 --- a/src/mongo/db/index/hash_key_generator_test.cpp +++ b/src/mongo/db/index/hash_key_generator_test.cpp @@ -90,7 +90,7 @@ TEST(HashKeyGeneratorTest, CollationAppliedBeforeHashing) { BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionKeysPrivate::getHashKeys( - obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys); + obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys, false); BSONObj backwardsObj = fromjson("{a: 'gnirts'}"); BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); @@ -104,7 +104,7 @@ TEST(HashKeyGeneratorTest, CollationDoesNotAffectNonStringFields) { BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionKeysPrivate::getHashKeys( - obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys); + obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys, false); BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); expectedKeys.insert(makeHashKey(obj["a"])); @@ -118,7 +118,7 @@ TEST(HashKeyGeneratorTest, CollatorAppliedBeforeHashingNestedObject) { BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); ExpressionKeysPrivate::getHashKeys( - obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys); + obj, "a", kHashSeed, kHashVersion, false, &collator, &actualKeys, false); BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); expectedKeys.insert(makeHashKey(backwardsObj["a"])); @@ -130,7 +130,7 @@ TEST(HashKeyGeneratorTest, NoCollation) { BSONObj obj = fromjson("{a: 'string'}"); BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); ExpressionKeysPrivate::getHashKeys( - obj, "a", kHashSeed, kHashVersion, false, nullptr, &actualKeys); + obj, "a", kHashSeed, kHashVersion, false, nullptr, &actualKeys, false); BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); expectedKeys.insert(makeHashKey(obj["a"])); @@ -138,4 +138,48 @@ TEST(HashKeyGeneratorTest, NoCollation) { ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); } +TEST(HashKeyGeneratorTest, ArrayAlongIndexFieldPathFails) { + BSONObj obj = fromjson("{a: []}"); + BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + ASSERT_THROWS_CODE( + ExpressionKeysPrivate::getHashKeys( + obj, "a.b.c", kHashSeed, kHashVersion, false, nullptr, &actualKeys, false), + DBException, + 16766); +} + +TEST(HashKeyGeneratorTest, ArrayAlongIndexFieldPathDoesNotFailWhenIgnoreFlagIsSet) { + BSONObj obj = fromjson("{a: []}"); + BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + ExpressionKeysPrivate::getHashKeys(obj, + "a.b.c", + kHashSeed, + kHashVersion, + false, + nullptr, + &actualKeys, + true // ignoreArraysAlongPath + ); + + BSONObjSet expectedKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + expectedKeys.insert(makeHashKey(BSON("" << BSONNULL).firstElement())); + ASSERT(assertKeysetsEqual(expectedKeys, actualKeys)); +} + +TEST(HashKeyGeneratorTest, ArrayAtTerminalPathAlwaysFails) { + BSONObj obj = fromjson("{a : {b: {c: [1]}}}"); + BSONObjSet actualKeys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + ASSERT_THROWS_CODE(ExpressionKeysPrivate::getHashKeys(obj, + "a.b.c", + kHashSeed, + kHashVersion, + true, // isSparse + nullptr, + &actualKeys, + true // ignoreArraysAlongPath + ), + DBException, + 16766); +} + } // namespace diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp index 120b5806eee..d562b1ffa69 100644 --- a/src/mongo/db/index/haystack_access_method.cpp +++ b/src/mongo/db/index/haystack_access_method.cpp @@ -66,6 +66,7 @@ HaystackAccessMethod::HaystackAccessMethod(IndexCatalogEntry* btreeState, } void HaystackAccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys); diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h index f4d743c1021..a15933c98f6 100644 --- a/src/mongo/db/index/haystack_access_method.h +++ b/src/mongo/db/index/haystack_access_method.h @@ -77,7 +77,10 @@ private: * This function ignores the 'multikeyPaths' pointer because geoHaystack indexes don't support * tracking path-level multikey information. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; std::string _geoField; std::vector<std::string> _otherFields; diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index 35df1dd865b..583a2cd3bf0 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -138,7 +138,7 @@ Status IndexAccessMethod::insert(OperationContext* opCtx, BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths multikeyPaths; // Delegate to the subclass. - getKeys(obj, options.getKeysMode, &keys, &multikeyPaths); + getKeys(obj, options.getKeysMode, GetKeysContext::kReadOrAddKeys, &keys, &multikeyPaths); Status ret = Status::OK(); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { @@ -225,7 +225,11 @@ Status IndexAccessMethod::remove(OperationContext* opCtx, // Relax key constraints on removal when deleting documents with invalid formats, but only // those that don't apply to the partialIndex filter. - getKeys(obj, GetKeysMode::kRelaxConstraintsUnfiltered, &keys, multikeyPaths); + getKeys(obj, + GetKeysMode::kRelaxConstraintsUnfiltered, + GetKeysContext::kRemovingKeys, + &keys, + multikeyPaths); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { removeOneKey(opCtx, *i, loc, options.dupsAllowed); @@ -244,7 +248,11 @@ Status IndexAccessMethod::touch(OperationContext* opCtx, const BSONObj& obj) { // There's no need to compute the prefixes of the indexed fields that cause the index to be // multikey when paging a document's index entries into memory. MultikeyPaths* multikeyPaths = nullptr; - getKeys(obj, GetKeysMode::kEnforceConstraints, &keys, multikeyPaths); + getKeys(obj, + GetKeysMode::kEnforceConstraints, + GetKeysContext::kReadOrAddKeys, + &keys, + multikeyPaths); std::unique_ptr<SortedDataInterface::Cursor> cursor(_newInterface->newCursor(opCtx)); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { @@ -266,7 +274,11 @@ RecordId IndexAccessMethod::findSingle(OperationContext* opCtx, const BSONObj& r // For performance, call get keys only if there is a non-simple collation. BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths* multikeyPaths = nullptr; - getKeys(requestedKey, GetKeysMode::kEnforceConstraints, &keys, multikeyPaths); + getKeys(requestedKey, + GetKeysMode::kEnforceConstraints, + GetKeysContext::kReadOrAddKeys, + &keys, + multikeyPaths); invariant(keys.size() == 1); actualKey = *keys.begin(); } else { @@ -354,11 +366,19 @@ Status IndexAccessMethod::validateUpdate(OperationContext* opCtx, // index to be multikey when the old version of the document was written since the index // metadata isn't updated when keys are deleted. MultikeyPaths* multikeyPaths = nullptr; - getKeys(from, options.getKeysMode, &ticket->oldKeys, multikeyPaths); + getKeys(from, + options.getKeysMode, + GetKeysContext::kRemovingKeys, + &ticket->oldKeys, + multikeyPaths); } if (!indexFilter || indexFilter->matchesBSON(to)) { - getKeys(to, options.getKeysMode, &ticket->newKeys, &ticket->newMultikeyPaths); + getKeys(to, + options.getKeysMode, + GetKeysContext::kReadOrAddKeys, + &ticket->newKeys, + &ticket->newMultikeyPaths); } ticket->loc = record; @@ -445,7 +465,7 @@ Status IndexAccessMethod::BulkBuilder::insert(OperationContext* opCtx, BSONObjSet keys = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); MultikeyPaths multikeyPaths; - _real->getKeys(obj, options.getKeysMode, &keys, &multikeyPaths); + _real->getKeys(obj, options.getKeysMode, GetKeysContext::kReadOrAddKeys, &keys, &multikeyPaths); _everGeneratedMultipleKeys = _everGeneratedMultipleKeys || (keys.size() > 1); @@ -554,6 +574,7 @@ void IndexAccessMethod::setIndexIsMultikey(OperationContext* opCtx, MultikeyPath void IndexAccessMethod::getKeys(const BSONObj& obj, GetKeysMode mode, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { static stdx::unordered_set<int> whiteList{ErrorCodes::CannotBuildIndexKeys, @@ -581,7 +602,7 @@ void IndexAccessMethod::getKeys(const BSONObj& obj, 13026, 13027}; try { - doGetKeys(obj, keys, multikeyPaths); + doGetKeys(obj, context, keys, multikeyPaths); } catch (const AssertionException& ex) { // Suppress all indexing errors when mode is kRelaxConstraints. if (mode == GetKeysMode::kEnforceConstraints) { diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index a1c7ac9283c..af9fd01f7e8 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -281,6 +281,12 @@ public: }; /** + * Specifies whether getKeys is being used in the context of creating new keys or deleting + * existing keys. + */ + enum class GetKeysContext { kRemovingKeys, kReadOrAddKeys }; + + /** * Fills 'keys' with the keys that should be generated for 'obj' on this index. * Based on 'mode', it will honor or ignore index constraints, e.g. duplicated key, key too * long, and geo index parsing errors. The ignoring of constraints is for replication due to @@ -294,6 +300,7 @@ public: */ void getKeys(const BSONObj& obj, GetKeysMode mode, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const; @@ -319,6 +326,7 @@ protected: * a result of inserting 'keys'. */ virtual void doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const = 0; diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp index b86b08fd97e..0f414aaa242 100644 --- a/src/mongo/db/index/s2_access_method.cpp +++ b/src/mongo/db/index/s2_access_method.cpp @@ -143,6 +143,7 @@ StatusWith<BSONObj> S2AccessMethod::fixSpec(const BSONObj& specObj) { } void S2AccessMethod::doGetKeys(const BSONObj& obj, + GetKeysContext context, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const { ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys, multikeyPaths); diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h index 52153b73f65..dc45f33f6e3 100644 --- a/src/mongo/db/index/s2_access_method.h +++ b/src/mongo/db/index/s2_access_method.h @@ -65,7 +65,10 @@ private: * and fills each element with the prefixes of the indexed field that would cause this index to * be multikey as a result of inserting 'keys'. */ - void doGetKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final; + void doGetKeys(const BSONObj& obj, + GetKeysContext context, + BSONObjSet* keys, + MultikeyPaths* multikeyPaths) const final; S2IndexingParams _params; diff --git a/src/mongo/dbtests/namespacetests.cpp b/src/mongo/dbtests/namespacetests.cpp index 8abce8bab8a..84e7bf335e7 100644 --- a/src/mongo/dbtests/namespacetests.cpp +++ b/src/mongo/dbtests/namespacetests.cpp @@ -102,7 +102,8 @@ public: // Call getKeys on the nullObj. BSONObjSet nullFieldKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); const CollatorInterface* collator = nullptr; - ExpressionKeysPrivate::getHashKeys(nullObj, "a", 0, 0, false, collator, &nullFieldKeySet); + ExpressionKeysPrivate::getHashKeys( + nullObj, "a", 0, 0, false, collator, &nullFieldKeySet, false); BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement(); ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0, 0), @@ -132,7 +133,7 @@ public: BSONObjSet nullFieldKeySet = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); const CollatorInterface* collator = nullptr; ExpressionKeysPrivate::getHashKeys( - nullObj, "a", 0x5eed, 0, false, collator, &nullFieldKeySet); + nullObj, "a", 0x5eed, 0, false, collator, &nullFieldKeySet, false); BSONElement nullFieldFromKey = nullFieldKeySet.begin()->firstElement(); ASSERT_EQUALS(ExpressionKeysPrivate::makeSingleHashKey(nullObj.firstElement(), 0x5eed, 0), |