diff options
author | Louis Williams <louis.williams@mongodb.com> | 2021-01-07 17:04:00 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-01-12 14:53:00 +0000 |
commit | 858d7caf8b79fd43a2054f4960ab205208699b0f (patch) | |
tree | b0067958d6048be7701874f56c0030f155f9b841 | |
parent | 87d0144984752767599297de915c45d828316896 (diff) | |
download | mongo-858d7caf8b79fd43a2054f4960ab205208699b0f.tar.gz |
SERVER-48471 Hashed indexes may be incorrectly marked multikey and be ineligible as a shard key
(cherry picked from commit 8fa255bc05cd99ff649bd0ef9407417b7e439b5e)
(cherry picked from commit 39f10d9d3bf7783593dbc7083d1afaec7369c4ca)
-rw-r--r-- | jstests/noPassthrough/hybrid_multikey.js | 137 | ||||
-rw-r--r-- | src/mongo/db/catalog/multi_index_block.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/index/index_build_interceptor.cpp | 37 | ||||
-rw-r--r-- | src/mongo/db/index/index_build_interceptor.h | 2 | ||||
-rw-r--r-- | src/mongo/db/multi_key_path_tracker.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/multi_key_path_tracker.h | 6 |
6 files changed, 143 insertions, 54 deletions
diff --git a/jstests/noPassthrough/hybrid_multikey.js b/jstests/noPassthrough/hybrid_multikey.js new file mode 100644 index 00000000000..6fa1845834d --- /dev/null +++ b/jstests/noPassthrough/hybrid_multikey.js @@ -0,0 +1,137 @@ +/** + * Tests that hybrid index builds result in a consistent state and correct multikey behavior across + * various index types. + */ +load("jstests/noPassthrough/libs/index_build.js"); + +(function() { +"use strict"; + +const conn = MongoRunner.runMongod(); +const dbName = 'test'; +const collName = 'foo'; +const testDB = conn.getDB(dbName); +const testColl = testDB[collName]; + +const runTest = (config) => { + // Populate the collection. + const nDocs = 10; + testColl.drop(); + for (let i = 0; i < nDocs; i++) { + assert.commandWorked(testColl.insert({x: i})); + } + + jsTestLog("Testing: " + tojson(config)); + + // Prevent the index build from completing. + IndexBuildTest.pauseIndexBuilds(conn); + + // Start the index build and wait for it to start. + const indexName = "test_index"; + let indexOptions = config.indexOptions || {}; + indexOptions.name = indexName; + const awaitIndex = IndexBuildTest.startIndexBuild( + conn, testColl.getFullName(), config.indexSpec, indexOptions); + IndexBuildTest.waitForIndexBuildToStart(testDB, collName, indexName); + + // Perform writes while the index build is in progress. + assert.commandWorked(testColl.insert({x: nDocs + 1})); + assert.commandWorked(testColl.update({x: 0}, {x: -1})); + assert.commandWorked(testColl.remove({x: -1})); + + let extraDocs = config.extraDocs || []; + extraDocs.forEach((doc) => { + assert.commandWorked(testColl.insert(doc)); + }); + + // Allow the index build to complete. + IndexBuildTest.resumeIndexBuilds(conn); + awaitIndex(); + + // Ensure the index is usable and has the expected multikey state. + let explain = testColl.find({x: 1}).hint(indexName).explain(); + let plan = explain.queryPlanner.winningPlan; + assert.eq("FETCH", plan.stage, explain); + assert.eq("IXSCAN", plan.inputStage.stage, explain); + assert.eq( + config.expectMultikey, + plan.inputStage.isMultiKey, + `Index multikey state "${plan.inputStage.isMultiKey}" was not "${config.expectMultikey}"`); + + const validateRes = assert.commandWorked(testColl.validate()); + assert(validateRes.valid, validateRes); +}; + +// Hashed indexes should never be multikey. +runTest({ + indexSpec: {x: 'hashed'}, + expectMultikey: false, +}); + +// Wildcard indexes are not multikey unless they have multikey documents. +runTest({ + indexSpec: {'$**': 1}, + expectMultikey: false, +}); +runTest({ + indexSpec: {'$**': 1}, + expectMultikey: true, + extraDocs: [{x: [1, 2, 3]}], +}); + +// '2dsphere' indexes are not multikey unless they have multikey documents. +runTest({ + indexSpec: {x: 1, b: '2dsphere'}, + expectMultikey: false, +}); + +// '2dsphere' indexes are automatically sparse. If we insert a document where 'x' is multikey, even +// though 'b' is omitted, the index is still considered multikey. See SERVER-39705. +runTest({ + indexSpec: {x: 1, b: '2dsphere'}, + extraDocs: [ + {x: [1, 2]}, + ], + expectMultikey: true, +}); + +// Test that a partial index is not multikey when a multikey document is not indexed. +runTest({ + indexSpec: {x: 1}, + indexOptions: {partialFilterExpression: {a: {$gt: 0}}}, + extraDocs: [ + {x: [0, 1, 2], a: 0}, + ], + expectMultikey: false, +}); + +// Test that a partial index is multikey when a multikey document is indexed. +runTest({ + indexSpec: {x: 1}, + indexOptions: {partialFilterExpression: {a: {$gt: 0}}}, + extraDocs: [ + {x: [0, 1, 2], a: 1}, + ], + expectMultikey: true, +}); + +// Text indexes are not multikey unless documents make them multikey. +runTest({ + indexSpec: {x: 'text'}, + extraDocs: [ + {x: 'hello'}, + ], + expectMultikey: false, +}); + +// Text indexes can be multikey if a field has multiple words. +runTest({ + indexSpec: {x: 'text'}, + extraDocs: [ + {x: 'hello world'}, + ], + expectMultikey: true, +}); + +MongoRunner.stopMongod(conn); +})(); diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp index 27545c0c070..60720c89443 100644 --- a/src/mongo/db/catalog/multi_index_block.cpp +++ b/src/mongo/db/catalog/multi_index_block.cpp @@ -877,12 +877,6 @@ Status MultiIndexBlock::commit(OperationContext* opCtx, auto multikeyMetadataKeys = MultikeyPathTracker::get(opCtx).getMultikeyMetadataKeys( collection->ns(), _indexes[i].block->getIndexName()); if (multikeyPaths || multikeyMetadataKeys) { - // Upon reaching this point, multikeyPaths must either have at least one (possibly - // empty) element unless it is of an index with a type that doesn't support tracking - // multikeyPaths via the multikeyPaths array or has "special" multikey semantics. - invariant(multikeyMetadataKeys || !multikeyPaths.get().empty() || - !IndexBuildInterceptor::typeCanFastpathMultikeyUpdates( - _indexes[i].block->getEntry()->descriptor()->getIndexType())); _indexes[i].block->getEntry()->setMultikey( opCtx, multikeyMetadataKeys.value_or(std::vector<BSONObj>{}), diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp index 708bfbc8fdf..59756d1bfe9 100644 --- a/src/mongo/db/index/index_build_interceptor.cpp +++ b/src/mongo/db/index/index_build_interceptor.cpp @@ -55,18 +55,6 @@ namespace mongo { MONGO_FAIL_POINT_DEFINE(hangDuringIndexBuildDrainYield); -bool IndexBuildInterceptor::typeCanFastpathMultikeyUpdates(IndexType indexType) { - // Ensure no new indexes are added without considering whether they use the multikeyPaths - // vector. - invariant(indexType == INDEX_BTREE || indexType == INDEX_2D || indexType == INDEX_HAYSTACK || - indexType == INDEX_2DSPHERE || indexType == INDEX_TEXT || indexType == INDEX_HASHED || - indexType == INDEX_WILDCARD); - // Only BTREE indexes are guaranteed to use the multikeyPaths vector. Other index types either - // do not track path-level multikey information or have "special" handling of multikey - // information. - return (indexType == INDEX_BTREE); -} - IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx, IndexCatalogEntry* entry) : _indexCatalogEntry(entry), _sideWritesTable( @@ -76,15 +64,6 @@ IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx, IndexCatal if (entry->descriptor()->unique()) { _duplicateKeyTracker = std::make_unique<DuplicateKeyTracker>(opCtx, entry); } - // `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey - // "shape". Initialize `_multikeyPaths` with the right shape from the IndexCatalogEntry. - auto indexType = entry->descriptor()->getIndexType(); - if (typeCanFastpathMultikeyUpdates(indexType)) { - auto numFields = entry->descriptor()->getNumFields(); - _multikeyPaths = MultikeyPaths{}; - auto it = _multikeyPaths->begin(); - _multikeyPaths->insert(it, numFields, {}); - } } void IndexBuildInterceptor::deleteTemporaryTables(OperationContext* opCtx) { @@ -396,13 +375,13 @@ Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx, // `multikeyMetadataKeys` when inserting. *numKeysOut = keys.size() + (op == Op::kInsert ? multikeyMetadataKeys.size() : 0); - auto indexType = _indexCatalogEntry->descriptor()->getIndexType(); + // Maintain parity with IndexAccessMethod's handling of whether keys could change the multikey + // state on the index. + bool isMultikey = _indexCatalogEntry->accessMethod()->shouldMarkIndexAsMultikey( + keys, {multikeyMetadataKeys.begin(), multikeyMetadataKeys.end()}, multikeyPaths); - // No need to take the multikeyPaths mutex if this is a trivial multikey update. - bool canBypassMultikeyMutex = typeCanFastpathMultikeyUpdates(indexType) && - MultikeyPathTracker::isMultikeyPathsTrivial(multikeyPaths); - - if (op == Op::kInsert && !canBypassMultikeyMutex) { + // No need to take the multikeyPaths mutex if this would not change any multikey state. + if (op == Op::kInsert && isMultikey) { // SERVER-39705: It's worth noting that a document may not generate any keys, but be // described as being multikey. This step must be done to maintain parity with `validate`s // expectations. @@ -410,10 +389,6 @@ Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx, if (_multikeyPaths) { MultikeyPathTracker::mergeMultikeyPaths(&_multikeyPaths.get(), multikeyPaths); } else { - // All indexes that support pre-initialization of _multikeyPaths during - // IndexBuildInterceptor construction time should have been initialized already. - invariant(!typeCanFastpathMultikeyUpdates(indexType)); - // `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey // "shape". Initialize `_multikeyPaths` with the right shape from the first result. _multikeyPaths = multikeyPaths; diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h index 70a7d28d1f6..55bcf1f24ae 100644 --- a/src/mongo/db/index/index_build_interceptor.h +++ b/src/mongo/db/index/index_build_interceptor.h @@ -48,8 +48,6 @@ class IndexBuildInterceptor { public: enum class Op { kInsert, kDelete }; - static bool typeCanFastpathMultikeyUpdates(IndexType type); - /** * Creates a temporary table for writes during an index build. Additionally creates a temporary * table to store any duplicate key constraint violations found during the build, if the index diff --git a/src/mongo/db/multi_key_path_tracker.cpp b/src/mongo/db/multi_key_path_tracker.cpp index a80a5e48194..52ac75d58f5 100644 --- a/src/mongo/db/multi_key_path_tracker.cpp +++ b/src/mongo/db/multi_key_path_tracker.cpp @@ -68,15 +68,6 @@ void MultikeyPathTracker::mergeMultikeyPaths(MultikeyPaths* toMergeInto, } } -bool MultikeyPathTracker::isMultikeyPathsTrivial(const MultikeyPaths& paths) { - for (auto&& path : paths) { - if (!path.empty()) { - return false; - } - } - return true; -} - void MultikeyPathTracker::addMultikeyPathInfo(MultikeyPathInfo info) { invariant(_trackMultikeyPathInfo); // Merge the `MultikeyPathInfo` input into the accumulated value being tracked for the diff --git a/src/mongo/db/multi_key_path_tracker.h b/src/mongo/db/multi_key_path_tracker.h index 0849a812c8a..949a7a7d319 100644 --- a/src/mongo/db/multi_key_path_tracker.h +++ b/src/mongo/db/multi_key_path_tracker.h @@ -64,12 +64,6 @@ public: static void mergeMultikeyPaths(MultikeyPaths* toMergeInto, const MultikeyPaths& newPaths); - /** - * Returns whether paths contains only empty sets, i.e., {{}, {}, {}}. This includes the case - * where the MultikeyPaths vector itself has no elements, e.g., {}. - */ - static bool isMultikeyPathsTrivial(const MultikeyPaths& paths); - // Decoration requires a default constructor. MultikeyPathTracker() = default; |