summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2021-01-07 17:04:00 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-01-12 14:53:00 +0000
commit858d7caf8b79fd43a2054f4960ab205208699b0f (patch)
treeb0067958d6048be7701874f56c0030f155f9b841
parent87d0144984752767599297de915c45d828316896 (diff)
downloadmongo-858d7caf8b79fd43a2054f4960ab205208699b0f.tar.gz
SERVER-48471 Hashed indexes may be incorrectly marked multikey and be ineligible as a shard key
(cherry picked from commit 8fa255bc05cd99ff649bd0ef9407417b7e439b5e) (cherry picked from commit 39f10d9d3bf7783593dbc7083d1afaec7369c4ca)
-rw-r--r--jstests/noPassthrough/hybrid_multikey.js137
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp6
-rw-r--r--src/mongo/db/index/index_build_interceptor.cpp37
-rw-r--r--src/mongo/db/index/index_build_interceptor.h2
-rw-r--r--src/mongo/db/multi_key_path_tracker.cpp9
-rw-r--r--src/mongo/db/multi_key_path_tracker.h6
6 files changed, 143 insertions, 54 deletions
diff --git a/jstests/noPassthrough/hybrid_multikey.js b/jstests/noPassthrough/hybrid_multikey.js
new file mode 100644
index 00000000000..6fa1845834d
--- /dev/null
+++ b/jstests/noPassthrough/hybrid_multikey.js
@@ -0,0 +1,137 @@
+/**
+ * Tests that hybrid index builds result in a consistent state and correct multikey behavior across
+ * various index types.
+ */
+load("jstests/noPassthrough/libs/index_build.js");
+
+(function() {
+"use strict";
+
+const conn = MongoRunner.runMongod();
+const dbName = 'test';
+const collName = 'foo';
+const testDB = conn.getDB(dbName);
+const testColl = testDB[collName];
+
+const runTest = (config) => {
+ // Populate the collection.
+ const nDocs = 10;
+ testColl.drop();
+ for (let i = 0; i < nDocs; i++) {
+ assert.commandWorked(testColl.insert({x: i}));
+ }
+
+ jsTestLog("Testing: " + tojson(config));
+
+ // Prevent the index build from completing.
+ IndexBuildTest.pauseIndexBuilds(conn);
+
+ // Start the index build and wait for it to start.
+ const indexName = "test_index";
+ let indexOptions = config.indexOptions || {};
+ indexOptions.name = indexName;
+ const awaitIndex = IndexBuildTest.startIndexBuild(
+ conn, testColl.getFullName(), config.indexSpec, indexOptions);
+ IndexBuildTest.waitForIndexBuildToStart(testDB, collName, indexName);
+
+ // Perform writes while the index build is in progress.
+ assert.commandWorked(testColl.insert({x: nDocs + 1}));
+ assert.commandWorked(testColl.update({x: 0}, {x: -1}));
+ assert.commandWorked(testColl.remove({x: -1}));
+
+ let extraDocs = config.extraDocs || [];
+ extraDocs.forEach((doc) => {
+ assert.commandWorked(testColl.insert(doc));
+ });
+
+ // Allow the index build to complete.
+ IndexBuildTest.resumeIndexBuilds(conn);
+ awaitIndex();
+
+ // Ensure the index is usable and has the expected multikey state.
+ let explain = testColl.find({x: 1}).hint(indexName).explain();
+ let plan = explain.queryPlanner.winningPlan;
+ assert.eq("FETCH", plan.stage, explain);
+ assert.eq("IXSCAN", plan.inputStage.stage, explain);
+ assert.eq(
+ config.expectMultikey,
+ plan.inputStage.isMultiKey,
+ `Index multikey state "${plan.inputStage.isMultiKey}" was not "${config.expectMultikey}"`);
+
+ const validateRes = assert.commandWorked(testColl.validate());
+ assert(validateRes.valid, validateRes);
+};
+
+// Hashed indexes should never be multikey.
+runTest({
+ indexSpec: {x: 'hashed'},
+ expectMultikey: false,
+});
+
+// Wildcard indexes are not multikey unless they have multikey documents.
+runTest({
+ indexSpec: {'$**': 1},
+ expectMultikey: false,
+});
+runTest({
+ indexSpec: {'$**': 1},
+ expectMultikey: true,
+ extraDocs: [{x: [1, 2, 3]}],
+});
+
+// '2dsphere' indexes are not multikey unless they have multikey documents.
+runTest({
+ indexSpec: {x: 1, b: '2dsphere'},
+ expectMultikey: false,
+});
+
+// '2dsphere' indexes are automatically sparse. If we insert a document where 'x' is multikey, even
+// though 'b' is omitted, the index is still considered multikey. See SERVER-39705.
+runTest({
+ indexSpec: {x: 1, b: '2dsphere'},
+ extraDocs: [
+ {x: [1, 2]},
+ ],
+ expectMultikey: true,
+});
+
+// Test that a partial index is not multikey when a multikey document is not indexed.
+runTest({
+ indexSpec: {x: 1},
+ indexOptions: {partialFilterExpression: {a: {$gt: 0}}},
+ extraDocs: [
+ {x: [0, 1, 2], a: 0},
+ ],
+ expectMultikey: false,
+});
+
+// Test that a partial index is multikey when a multikey document is indexed.
+runTest({
+ indexSpec: {x: 1},
+ indexOptions: {partialFilterExpression: {a: {$gt: 0}}},
+ extraDocs: [
+ {x: [0, 1, 2], a: 1},
+ ],
+ expectMultikey: true,
+});
+
+// Text indexes are not multikey unless documents make them multikey.
+runTest({
+ indexSpec: {x: 'text'},
+ extraDocs: [
+ {x: 'hello'},
+ ],
+ expectMultikey: false,
+});
+
+// Text indexes can be multikey if a field has multiple words.
+runTest({
+ indexSpec: {x: 'text'},
+ extraDocs: [
+ {x: 'hello world'},
+ ],
+ expectMultikey: true,
+});
+
+MongoRunner.stopMongod(conn);
+})();
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 27545c0c070..60720c89443 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -877,12 +877,6 @@ Status MultiIndexBlock::commit(OperationContext* opCtx,
auto multikeyMetadataKeys = MultikeyPathTracker::get(opCtx).getMultikeyMetadataKeys(
collection->ns(), _indexes[i].block->getIndexName());
if (multikeyPaths || multikeyMetadataKeys) {
- // Upon reaching this point, multikeyPaths must either have at least one (possibly
- // empty) element unless it is of an index with a type that doesn't support tracking
- // multikeyPaths via the multikeyPaths array or has "special" multikey semantics.
- invariant(multikeyMetadataKeys || !multikeyPaths.get().empty() ||
- !IndexBuildInterceptor::typeCanFastpathMultikeyUpdates(
- _indexes[i].block->getEntry()->descriptor()->getIndexType()));
_indexes[i].block->getEntry()->setMultikey(
opCtx,
multikeyMetadataKeys.value_or(std::vector<BSONObj>{}),
diff --git a/src/mongo/db/index/index_build_interceptor.cpp b/src/mongo/db/index/index_build_interceptor.cpp
index 708bfbc8fdf..59756d1bfe9 100644
--- a/src/mongo/db/index/index_build_interceptor.cpp
+++ b/src/mongo/db/index/index_build_interceptor.cpp
@@ -55,18 +55,6 @@ namespace mongo {
MONGO_FAIL_POINT_DEFINE(hangDuringIndexBuildDrainYield);
-bool IndexBuildInterceptor::typeCanFastpathMultikeyUpdates(IndexType indexType) {
- // Ensure no new indexes are added without considering whether they use the multikeyPaths
- // vector.
- invariant(indexType == INDEX_BTREE || indexType == INDEX_2D || indexType == INDEX_HAYSTACK ||
- indexType == INDEX_2DSPHERE || indexType == INDEX_TEXT || indexType == INDEX_HASHED ||
- indexType == INDEX_WILDCARD);
- // Only BTREE indexes are guaranteed to use the multikeyPaths vector. Other index types either
- // do not track path-level multikey information or have "special" handling of multikey
- // information.
- return (indexType == INDEX_BTREE);
-}
-
IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx, IndexCatalogEntry* entry)
: _indexCatalogEntry(entry),
_sideWritesTable(
@@ -76,15 +64,6 @@ IndexBuildInterceptor::IndexBuildInterceptor(OperationContext* opCtx, IndexCatal
if (entry->descriptor()->unique()) {
_duplicateKeyTracker = std::make_unique<DuplicateKeyTracker>(opCtx, entry);
}
- // `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey
- // "shape". Initialize `_multikeyPaths` with the right shape from the IndexCatalogEntry.
- auto indexType = entry->descriptor()->getIndexType();
- if (typeCanFastpathMultikeyUpdates(indexType)) {
- auto numFields = entry->descriptor()->getNumFields();
- _multikeyPaths = MultikeyPaths{};
- auto it = _multikeyPaths->begin();
- _multikeyPaths->insert(it, numFields, {});
- }
}
void IndexBuildInterceptor::deleteTemporaryTables(OperationContext* opCtx) {
@@ -396,13 +375,13 @@ Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx,
// `multikeyMetadataKeys` when inserting.
*numKeysOut = keys.size() + (op == Op::kInsert ? multikeyMetadataKeys.size() : 0);
- auto indexType = _indexCatalogEntry->descriptor()->getIndexType();
+ // Maintain parity with IndexAccessMethod's handling of whether keys could change the multikey
+ // state on the index.
+ bool isMultikey = _indexCatalogEntry->accessMethod()->shouldMarkIndexAsMultikey(
+ keys, {multikeyMetadataKeys.begin(), multikeyMetadataKeys.end()}, multikeyPaths);
- // No need to take the multikeyPaths mutex if this is a trivial multikey update.
- bool canBypassMultikeyMutex = typeCanFastpathMultikeyUpdates(indexType) &&
- MultikeyPathTracker::isMultikeyPathsTrivial(multikeyPaths);
-
- if (op == Op::kInsert && !canBypassMultikeyMutex) {
+ // No need to take the multikeyPaths mutex if this would not change any multikey state.
+ if (op == Op::kInsert && isMultikey) {
// SERVER-39705: It's worth noting that a document may not generate any keys, but be
// described as being multikey. This step must be done to maintain parity with `validate`s
// expectations.
@@ -410,10 +389,6 @@ Status IndexBuildInterceptor::sideWrite(OperationContext* opCtx,
if (_multikeyPaths) {
MultikeyPathTracker::mergeMultikeyPaths(&_multikeyPaths.get(), multikeyPaths);
} else {
- // All indexes that support pre-initialization of _multikeyPaths during
- // IndexBuildInterceptor construction time should have been initialized already.
- invariant(!typeCanFastpathMultikeyUpdates(indexType));
-
// `mergeMultikeyPaths` is sensitive to the two inputs having the same multikey
// "shape". Initialize `_multikeyPaths` with the right shape from the first result.
_multikeyPaths = multikeyPaths;
diff --git a/src/mongo/db/index/index_build_interceptor.h b/src/mongo/db/index/index_build_interceptor.h
index 70a7d28d1f6..55bcf1f24ae 100644
--- a/src/mongo/db/index/index_build_interceptor.h
+++ b/src/mongo/db/index/index_build_interceptor.h
@@ -48,8 +48,6 @@ class IndexBuildInterceptor {
public:
enum class Op { kInsert, kDelete };
- static bool typeCanFastpathMultikeyUpdates(IndexType type);
-
/**
* Creates a temporary table for writes during an index build. Additionally creates a temporary
* table to store any duplicate key constraint violations found during the build, if the index
diff --git a/src/mongo/db/multi_key_path_tracker.cpp b/src/mongo/db/multi_key_path_tracker.cpp
index a80a5e48194..52ac75d58f5 100644
--- a/src/mongo/db/multi_key_path_tracker.cpp
+++ b/src/mongo/db/multi_key_path_tracker.cpp
@@ -68,15 +68,6 @@ void MultikeyPathTracker::mergeMultikeyPaths(MultikeyPaths* toMergeInto,
}
}
-bool MultikeyPathTracker::isMultikeyPathsTrivial(const MultikeyPaths& paths) {
- for (auto&& path : paths) {
- if (!path.empty()) {
- return false;
- }
- }
- return true;
-}
-
void MultikeyPathTracker::addMultikeyPathInfo(MultikeyPathInfo info) {
invariant(_trackMultikeyPathInfo);
// Merge the `MultikeyPathInfo` input into the accumulated value being tracked for the
diff --git a/src/mongo/db/multi_key_path_tracker.h b/src/mongo/db/multi_key_path_tracker.h
index 0849a812c8a..949a7a7d319 100644
--- a/src/mongo/db/multi_key_path_tracker.h
+++ b/src/mongo/db/multi_key_path_tracker.h
@@ -64,12 +64,6 @@ public:
static void mergeMultikeyPaths(MultikeyPaths* toMergeInto, const MultikeyPaths& newPaths);
- /**
- * Returns whether paths contains only empty sets, i.e., {{}, {}, {}}. This includes the case
- * where the MultikeyPaths vector itself has no elements, e.g., {}.
- */
- static bool isMultikeyPathsTrivial(const MultikeyPaths& paths);
-
// Decoration requires a default constructor.
MultikeyPathTracker() = default;