summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Hirschhorn <max.hirschhorn@mongodb.com>2016-05-08 14:16:24 -0400
committerMax Hirschhorn <max.hirschhorn@mongodb.com>2016-05-08 14:16:24 -0400
commit3e26b6b80f0a4904d477f8c4bd189945941ee09e (patch)
tree1a503037b273c922f28209abcda546ac7006938d
parent3a0d6ee6a2b6f82c5775380b7184501916338331 (diff)
downloadmongo-3e26b6b80f0a4904d477f8c4bd189945941ee09e.tar.gz
SERVER-22726 Propagate multikey paths computed during key generation.
-rw-r--r--src/mongo/db/catalog/collection.cpp5
-rw-r--r--src/mongo/db/catalog/collection_info_cache.cpp1
-rw-r--r--src/mongo/db/catalog/index_catalog_entry.cpp116
-rw-r--r--src/mongo/db/catalog/index_catalog_entry.h61
-rw-r--r--src/mongo/db/exec/sort_key_generator.cpp11
-rw-r--r--src/mongo/db/exec/working_set_common.cpp5
-rw-r--r--src/mongo/db/index/2d_access_method.cpp4
-rw-r--r--src/mongo/db/index/2d_access_method.h8
-rw-r--r--src/mongo/db/index/btree_access_method.cpp8
-rw-r--r--src/mongo/db/index/btree_access_method.h2
-rw-r--r--src/mongo/db/index/btree_key_generator.cpp1
-rw-r--r--src/mongo/db/index/fts_access_method.cpp4
-rw-r--r--src/mongo/db/index/fts_access_method.h9
-rw-r--r--src/mongo/db/index/hash_access_method.cpp4
-rw-r--r--src/mongo/db/index/hash_access_method.h8
-rw-r--r--src/mongo/db/index/haystack_access_method.cpp4
-rw-r--r--src/mongo/db/index/haystack_access_method.h8
-rw-r--r--src/mongo/db/index/index_access_method.cpp73
-rw-r--r--src/mongo/db/index/index_access_method.h24
-rw-r--r--src/mongo/db/index/s2_access_method.cpp4
-rw-r--r--src/mongo/db/index/s2_access_method.h11
-rw-r--r--src/mongo/db/query/get_executor.cpp2
-rw-r--r--src/mongo/db/query/index_entry.h2
-rw-r--r--src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp9
-rw-r--r--src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp10
-rw-r--r--src/mongo/dbtests/SConscript1
-rw-r--r--src/mongo/dbtests/multikey_paths_test.cpp353
-rw-r--r--src/mongo/s/chunk_manager.cpp1
28 files changed, 674 insertions, 75 deletions
diff --git a/src/mongo/db/catalog/collection.cpp b/src/mongo/db/catalog/collection.cpp
index 7c359b728cb..40c2cd100a6 100644
--- a/src/mongo/db/catalog/collection.cpp
+++ b/src/mongo/db/catalog/collection.cpp
@@ -1021,7 +1021,10 @@ public:
const IndexAccessMethod* iam = _indexCatalog->getIndex(descriptor);
BSONObjSet documentKeySet;
- iam->getKeys(recordBson, &documentKeySet);
+ // There's no need to compute the prefixes of the indexed fields that cause the
+ // index to be multikey when validating the index keys.
+ MultikeyPaths* multikeyPaths = nullptr;
+ iam->getKeys(recordBson, &documentKeySet, multikeyPaths);
if (descriptor->isPartial()) {
const IndexCatalogEntry* ice = _indexCatalog->getEntry(descriptor);
diff --git a/src/mongo/db/catalog/collection_info_cache.cpp b/src/mongo/db/catalog/collection_info_cache.cpp
index 383099aefd8..c2a6890f041 100644
--- a/src/mongo/db/catalog/collection_info_cache.cpp
+++ b/src/mongo/db/catalog/collection_info_cache.cpp
@@ -156,6 +156,7 @@ void CollectionInfoCache::updatePlanCacheIndexEntries(OperationContext* txn) {
indexEntries.emplace_back(desc->keyPattern(),
desc->getAccessMethodName(),
desc->isMultikey(txn),
+ ice->getMultikeyPaths(txn),
desc->isSparse(),
desc->unique(),
desc->indexName(),
diff --git a/src/mongo/db/catalog/index_catalog_entry.cpp b/src/mongo/db/catalog/index_catalog_entry.cpp
index 65b48166249..2a3dfac1ed2 100644
--- a/src/mongo/db/catalog/index_catalog_entry.cpp
+++ b/src/mongo/db/catalog/index_catalog_entry.cpp
@@ -34,6 +34,8 @@
#include "mongo/db/catalog/index_catalog_entry.h"
+#include <algorithm>
+
#include "mongo/db/catalog/collection_catalog_entry.h"
#include "mongo/db/catalog/head_manager.h"
#include "mongo/db/concurrency/write_conflict_exception.h"
@@ -99,7 +101,12 @@ void IndexCatalogEntry::init(OperationContext* txn, IndexAccessMethod* accessMet
_isReady = _catalogIsReady(txn);
_head = _catalogHead(txn);
- _isMultikey = _catalogIsMultikey(txn);
+
+ {
+ stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex);
+ _isMultikey.store(_catalogIsMultikey(txn, &_indexMultikeyPaths));
+ _indexTracksPathLevelMultikeyInfo = !_indexMultikeyPaths.empty();
+ }
if (BSONElement filterElement = _descriptor->getInfoElement("partialFilterExpression")) {
invariant(filterElement.isABSONObj());
@@ -135,7 +142,12 @@ bool IndexCatalogEntry::isReady(OperationContext* txn) const {
}
bool IndexCatalogEntry::isMultikey() const {
- return _isMultikey;
+ return _isMultikey.load();
+}
+
+MultikeyPaths IndexCatalogEntry::getMultikeyPaths(OperationContext* txn) const {
+ stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex);
+ return _indexMultikeyPaths;
}
// ---
@@ -167,7 +179,7 @@ void IndexCatalogEntry::setHead(OperationContext* txn, RecordId newHead) {
/**
* RAII class, which associates a new RecoveryUnit with an OperationContext for the purposes
- * of simulating a sub-transaction. Takes ownership of the new recovery unit and frees it at
+ * of simulating a side-transaction. Takes ownership of the new recovery unit and frees it at
* destruction time.
*/
class RecoveryUnitSwap {
@@ -196,43 +208,86 @@ private:
const std::unique_ptr<RecoveryUnit> _newRecoveryUnit;
};
-void IndexCatalogEntry::setMultikey(OperationContext* txn) {
- if (isMultikey()) {
+void IndexCatalogEntry::setMultikey(OperationContext* txn, const MultikeyPaths& multikeyPaths) {
+ if (!_indexTracksPathLevelMultikeyInfo && isMultikey()) {
+ // If the index is already set as multikey and we don't have any path-level information to
+ // update, then there's nothing more for us to do.
return;
}
- // Only one thread should set the multi-key value per collection, because the metadata for
- // a collection is one large document.
- Lock::ResourceLock collMDLock(txn->lockState(), ResourceId(RESOURCE_METADATA, _ns), MODE_X);
+ if (_indexTracksPathLevelMultikeyInfo) {
+ stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex);
+ invariant(multikeyPaths.size() == _indexMultikeyPaths.size());
+
+ bool newPathIsMultikey = false;
+ for (size_t i = 0; i < multikeyPaths.size(); ++i) {
+ if (!std::includes(_indexMultikeyPaths[i].begin(),
+ _indexMultikeyPaths[i].end(),
+ multikeyPaths[i].begin(),
+ multikeyPaths[i].end())) {
+ // If 'multikeyPaths' contains a new path component that causes this index to be
+ // multikey, then we must update the index metadata in the CollectionCatalogEntry.
+ newPathIsMultikey = true;
+ break;
+ }
+ }
- // Check again in case we blocked on the MD lock and another thread beat us to setting the
- // multiKey metadata for this index.
- if (isMultikey()) {
- return;
+ if (!newPathIsMultikey) {
+ // Otherwise, if all the path components in 'multikeyPaths' are already tracked in
+ // '_indexMultikeyPaths', then there's nothing more for us to do.
+ return;
+ }
}
- // This effectively emulates a sub-transaction off the main transaction, which invoked
- // setMultikey. The reason we need is to avoid artificial WriteConflicts, which happen
- // with snapshot isolation.
{
- StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
- RecoveryUnitSwap ruSwap(txn, storageEngine->newRecoveryUnit());
-
- WriteUnitOfWork wuow(txn);
+ // Only one thread should set the multi-key value per collection, because the metadata for a
+ // collection is one large document.
+ Lock::ResourceLock collMDLock(txn->lockState(), ResourceId(RESOURCE_METADATA, _ns), MODE_X);
+
+ if (!_indexTracksPathLevelMultikeyInfo && isMultikey()) {
+ // It's possible that we raced with another thread when acquiring the MD lock. If the
+ // index is already set as multikey and we don't have any path-level information to
+ // update, then there's nothing more for us to do.
+ return;
+ }
- // TODO SERVER-22726: Propagate multikey paths computed during index key generation.
- if (_collection->setIndexIsMultikey(txn, _descriptor->indexName(), MultikeyPaths{})) {
- if (_infoCache) {
- LOG(1) << _ns << ": clearing plan cache - index " << _descriptor->keyPattern()
- << " set to multi key.";
- _infoCache->clearQueryCache();
+ // This effectively emulates a side-transaction off the main transaction, which invoked
+ // setMultikey. The reason we need is to avoid artificial WriteConflicts, which happen with
+ // snapshot isolation.
+ {
+ StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
+ RecoveryUnitSwap ruSwap(txn, storageEngine->newRecoveryUnit());
+
+ WriteUnitOfWork wuow(txn);
+
+ // It's possible that the index type (e.g. ascending/descending index) supports tracking
+ // path-level multikey information, but this particular index doesn't.
+ // CollectionCatalogEntry::setIndexIsMultikey() requires that we discard the path-level
+ // multikey information in order to avoid unintentionally setting path-level multikey
+ // information on an index created before 3.4.
+ if (_collection->setIndexIsMultikey(
+ txn,
+ _descriptor->indexName(),
+ _indexTracksPathLevelMultikeyInfo ? multikeyPaths : MultikeyPaths{})) {
+ if (_infoCache) {
+ LOG(1) << _ns << ": clearing plan cache - index " << _descriptor->keyPattern()
+ << " set to multi key.";
+ _infoCache->clearQueryCache();
+ }
}
- }
- wuow.commit();
+ wuow.commit();
+ }
}
- _isMultikey = true;
+ _isMultikey.store(true);
+
+ if (_indexTracksPathLevelMultikeyInfo) {
+ stdx::lock_guard<stdx::mutex> lk(_indexMultikeyPathsMutex);
+ for (size_t i = 0; i < multikeyPaths.size(); ++i) {
+ _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end());
+ }
+ }
}
// ----
@@ -245,8 +300,9 @@ RecordId IndexCatalogEntry::_catalogHead(OperationContext* txn) const {
return _collection->getIndexHead(txn, _descriptor->indexName());
}
-bool IndexCatalogEntry::_catalogIsMultikey(OperationContext* txn) const {
- return _collection->isIndexMultikey(txn, _descriptor->indexName(), nullptr);
+bool IndexCatalogEntry::_catalogIsMultikey(OperationContext* txn,
+ MultikeyPaths* multikeyPaths) const {
+ return _collection->isIndexMultikey(txn, _descriptor->indexName(), multikeyPaths);
}
// ------------------
diff --git a/src/mongo/db/catalog/index_catalog_entry.h b/src/mongo/db/catalog/index_catalog_entry.h
index 5d9aad728ac..08cad3bcbb4 100644
--- a/src/mongo/db/catalog/index_catalog_entry.h
+++ b/src/mongo/db/catalog/index_catalog_entry.h
@@ -30,12 +30,16 @@
#pragma once
+#include <boost/optional.hpp>
#include <string>
#include "mongo/base/owned_pointer_vector.h"
#include "mongo/bson/ordering.h"
+#include "mongo/db/index/multikey_paths.h"
#include "mongo/db/record_id.h"
#include "mongo/db/storage/snapshot_name.h"
+#include "mongo/platform/atomic_word.h"
+#include "mongo/stdx/mutex.h"
namespace mongo {
@@ -105,9 +109,33 @@ public:
// --
+ /**
+ * Returns true if this index is multikey, and returns false otherwise.
+ */
bool isMultikey() const;
- void setMultikey(OperationContext* txn);
+ /**
+ * Returns the path components that cause this index to be multikey if this index supports
+ * path-level multikey tracking, and returns an empty vector if path-level multikey tracking
+ * isn't supported.
+ *
+ * If this index supports path-level multikey tracking but isn't multikey, then this function
+ * returns a vector with size equal to the number of elements in the index key pattern where
+ * each element in the vector is an empty set.
+ */
+ MultikeyPaths getMultikeyPaths(OperationContext* txn) const;
+
+ /**
+ * Sets this index to be multikey. Information regarding which newly detected path components
+ * cause this index to be multikey can also be specified.
+ *
+ * If this index doesn't support path-level multikey tracking, then 'multikeyPaths' is ignored.
+ *
+ * If this index supports path-level multikey tracking, then 'multikeyPaths' must be a vector
+ * with size equal to the number of elements in the index key pattern. Additionally, at least
+ * one path component of the indexed fields must cause this index to be multikey.
+ */
+ void setMultikey(OperationContext* txn, const MultikeyPaths& multikeyPaths);
// if this ready is ready for queries
bool isReady(OperationContext* txn) const;
@@ -130,7 +158,13 @@ private:
bool _catalogIsReady(OperationContext* txn) const;
RecordId _catalogHead(OperationContext* txn) const;
- bool _catalogIsMultikey(OperationContext* txn) const;
+
+ /**
+ * Retrieves the multikey information associated with this index from '_collection',
+ *
+ * See CollectionCatalogEntry::isIndexMultikey() for more details.
+ */
+ bool _catalogIsMultikey(OperationContext* txn, MultikeyPaths* multikeyPaths) const;
// -----
@@ -154,7 +188,28 @@ private:
Ordering _ordering; // TODO: this might be b-tree specific
bool _isReady; // cache of NamespaceDetails info
RecordId _head; // cache of IndexDetails
- bool _isMultikey; // cache of NamespaceDetails info
+
+ // Set to true if this index supports path-level multikey tracking.
+ // '_indexTracksPathLevelMultikeyInfo' is effectively const after IndexCatalogEntry::init() is
+ // called.
+ bool _indexTracksPathLevelMultikeyInfo = false;
+
+ // Set to true if this index is multikey. '_isMultikey' serves as a cache of the information
+ // stored in the NamespaceDetails or KVCatalog.
+ AtomicWord<bool> _isMultikey;
+
+ // Controls concurrent access to '_indexMultikeyPaths'. We acquire this mutex rather than the
+ // RESOURCE_METADATA lock as a performance optimization so that it is cheaper to detect whether
+ // there is actually any path-level multikey information to update or not.
+ mutable stdx::mutex _indexMultikeyPathsMutex;
+
+ // Non-empty only if '_indexTracksPathLevelMultikeyInfo' is true.
+ //
+ // If non-empty, '_indexMultikeyPaths' is a vector with size equal to the number of elements
+ // in the index key pattern. Each element in the vector is an ordered set of positions (starting
+ // at 0) into the corresponding indexed field that represent what prefixes of the indexed field
+ // causes the index to be multikey.
+ MultikeyPaths _indexMultikeyPaths;
// The earliest snapshot that is allowed to read this index.
boost::optional<SnapshotName> _minVisibleSnapshot;
diff --git a/src/mongo/db/exec/sort_key_generator.cpp b/src/mongo/db/exec/sort_key_generator.cpp
index 0625e1553fc..87554b85f5f 100644
--- a/src/mongo/db/exec/sort_key_generator.cpp
+++ b/src/mongo/db/exec/sort_key_generator.cpp
@@ -229,8 +229,15 @@ void SortKeyGenerator::getBoundsForSort(OperationContext* txn,
params.options = QueryPlannerParams::NO_TABLE_SCAN;
// We're creating a "virtual index" with key pattern equal to the sort order.
- IndexEntry sortOrder(
- sortObj, IndexNames::BTREE, true, false, false, "doesnt_matter", NULL, BSONObj());
+ IndexEntry sortOrder(sortObj,
+ IndexNames::BTREE,
+ true,
+ MultikeyPaths{},
+ false,
+ false,
+ "doesnt_matter",
+ NULL,
+ BSONObj());
params.indices.push_back(sortOrder);
auto statusWithQueryForSort = CanonicalQuery::canonicalize(
diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp
index 6cbfa82a21a..1981985a831 100644
--- a/src/mongo/db/exec/working_set_common.cpp
+++ b/src/mongo/db/exec/working_set_common.cpp
@@ -114,7 +114,10 @@ bool WorkingSetCommon::fetch(OperationContext* txn,
invariant(!member->keyData.empty());
for (size_t i = 0; i < member->keyData.size(); i++) {
BSONObjSet keys;
- member->keyData[i].index->getKeys(member->obj.value(), &keys);
+ // There's no need to compute the prefixes of the indexed fields that cause the index to
+ // be multikey when ensuring the keyData is still valid.
+ MultikeyPaths* multikeyPaths = nullptr;
+ member->keyData[i].index->getKeys(member->obj.value(), &keys, multikeyPaths);
if (!keys.count(member->keyData[i].keyData)) {
// document would no longer be at this position in the index.
return false;
diff --git a/src/mongo/db/index/2d_access_method.cpp b/src/mongo/db/index/2d_access_method.cpp
index bc5bad9283b..c4e5e6e8843 100644
--- a/src/mongo/db/index/2d_access_method.cpp
+++ b/src/mongo/db/index/2d_access_method.cpp
@@ -47,7 +47,9 @@ TwoDAccessMethod::TwoDAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte
}
/** Finds the key objects to put in an index */
-void TwoDAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const {
+void TwoDAccessMethod::getKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::get2DKeys(obj, _params, keys, NULL);
}
diff --git a/src/mongo/db/index/2d_access_method.h b/src/mongo/db/index/2d_access_method.h
index 644b6addc5b..5d181904ead 100644
--- a/src/mongo/db/index/2d_access_method.h
+++ b/src/mongo/db/index/2d_access_method.h
@@ -54,7 +54,13 @@ private:
// This really gets the 'locs' from the provided obj.
void getKeys(const BSONObj& obj, std::vector<BSONObj>& locs) const;
- virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const;
+ /**
+ * Fills 'keys' with the keys that should be generated for 'obj' on this index.
+ *
+ * This function ignores the 'multikeyPaths' pointer because 2d indexes don't support tracking
+ * path-level multikey information.
+ */
+ void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
TwoDIndexingParams _params;
};
diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp
index a3708e8caed..10cbe4ab6d1 100644
--- a/src/mongo/db/index/btree_access_method.cpp
+++ b/src/mongo/db/index/btree_access_method.cpp
@@ -62,11 +62,9 @@ BtreeAccessMethod::BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataIn
}
}
-void BtreeAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const {
- // SERVER-22726 represents the work to gather and persist the path-level multikey information.
- // Until that's done, we may as well avoid computing the prefixes of the indexed fields that
- // cause the index to be multikey.
- MultikeyPaths* multikeyPaths = nullptr;
+void BtreeAccessMethod::getKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const {
_keyGenerator->getKeys(obj, keys, multikeyPaths);
}
diff --git a/src/mongo/db/index/btree_access_method.h b/src/mongo/db/index/btree_access_method.h
index 4c20deeb931..5873514c01c 100644
--- a/src/mongo/db/index/btree_access_method.h
+++ b/src/mongo/db/index/btree_access_method.h
@@ -48,7 +48,7 @@ public:
BtreeAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree);
private:
- virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const;
+ void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
// Our keys differ for V0 and V1.
std::unique_ptr<BtreeKeyGenerator> _keyGenerator;
diff --git a/src/mongo/db/index/btree_key_generator.cpp b/src/mongo/db/index/btree_key_generator.cpp
index ad99eaa8a77..1f1eb949b06 100644
--- a/src/mongo/db/index/btree_key_generator.cpp
+++ b/src/mongo/db/index/btree_key_generator.cpp
@@ -312,6 +312,7 @@ void BtreeKeyGeneratorV1::getKeysImpl(std::vector<const char*> fieldNames,
}
if (multikeyPaths) {
+ invariant(multikeyPaths->empty());
multikeyPaths->resize(fieldNames.size());
}
getKeysImplWithArray(fieldNames, fixed, obj, keys, 0, _emptyPositionalInfo, multikeyPaths);
diff --git a/src/mongo/db/index/fts_access_method.cpp b/src/mongo/db/index/fts_access_method.cpp
index 9676fcbec45..e54e1760f55 100644
--- a/src/mongo/db/index/fts_access_method.cpp
+++ b/src/mongo/db/index/fts_access_method.cpp
@@ -34,7 +34,9 @@ namespace mongo {
FTSAccessMethod::FTSAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree)
: IndexAccessMethod(btreeState, btree), _ftsSpec(btreeState->descriptor()->infoObj()) {}
-void FTSAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const {
+void FTSAccessMethod::getKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::getFTSKeys(obj, _ftsSpec, keys);
}
diff --git a/src/mongo/db/index/fts_access_method.h b/src/mongo/db/index/fts_access_method.h
index 794d1efe360..aa3a7dfc23e 100644
--- a/src/mongo/db/index/fts_access_method.h
+++ b/src/mongo/db/index/fts_access_method.h
@@ -45,8 +45,13 @@ public:
}
private:
- // Implemented:
- virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const;
+ /**
+ * Fills 'keys' with the keys that should be generated for 'obj' on this index.
+ *
+ * This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking
+ * path-level multikey information.
+ */
+ void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
fts::FTSSpec _ftsSpec;
};
diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp
index ef94a249f9c..10339f7eae9 100644
--- a/src/mongo/db/index/hash_access_method.cpp
+++ b/src/mongo/db/index/hash_access_method.cpp
@@ -51,7 +51,9 @@ HashAccessMethod::HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInte
_collator = btreeState->getCollator();
}
-void HashAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const {
+void HashAccessMethod::getKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::getHashKeys(
obj, _hashedField, _seed, _hashVersion, _descriptor->isSparse(), _collator, keys);
}
diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h
index 5af9d0e4153..8fc5db36636 100644
--- a/src/mongo/db/index/hash_access_method.h
+++ b/src/mongo/db/index/hash_access_method.h
@@ -46,7 +46,13 @@ public:
HashAccessMethod(IndexCatalogEntry* btreeState, SortedDataInterface* btree);
private:
- virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const;
+ /**
+ * Fills 'keys' with the keys that should be generated for 'obj' on this index.
+ *
+ * This function ignores the 'multikeyPaths' pointer because hashed indexes don't support
+ * tracking path-level multikey information.
+ */
+ void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
// Only one of our fields is hashed. This is the field name for it.
std::string _hashedField;
diff --git a/src/mongo/db/index/haystack_access_method.cpp b/src/mongo/db/index/haystack_access_method.cpp
index 946fdc7d444..d6a50fdac88 100644
--- a/src/mongo/db/index/haystack_access_method.cpp
+++ b/src/mongo/db/index/haystack_access_method.cpp
@@ -59,7 +59,9 @@ HaystackAccessMethod::HaystackAccessMethod(IndexCatalogEntry* btreeState,
uassert(16774, "no non-geo fields specified", _otherFields.size());
}
-void HaystackAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const {
+void HaystackAccessMethod::getKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::getHaystackKeys(obj, _geoField, _otherFields, _bucketSize, keys);
}
diff --git a/src/mongo/db/index/haystack_access_method.h b/src/mongo/db/index/haystack_access_method.h
index d79de3bfffc..0f5e519e1e5 100644
--- a/src/mongo/db/index/haystack_access_method.h
+++ b/src/mongo/db/index/haystack_access_method.h
@@ -69,7 +69,13 @@ protected:
unsigned limit);
private:
- virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const;
+ /**
+ * Fills 'keys' with the keys that should be generated for 'obj' on this index.
+ *
+ * This function ignores the 'multikeyPaths' pointer because geoHaystack indexes don't support
+ * tracking path-level multikey information.
+ */
+ void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
std::string _geoField;
std::vector<std::string> _otherFields;
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index 2eb283a818a..4670a74f59d 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -55,6 +55,20 @@ using std::pair;
using std::set;
using std::vector;
+namespace {
+
+/**
+ * Returns true if at least one prefix of any of the indexed fields causes the index to be multikey,
+ * and returns false otherwise. This function returns false if the 'multikeyPaths' vector is empty.
+ */
+bool isMultikeyFromPaths(const MultikeyPaths& multikeyPaths) {
+ return std::any_of(multikeyPaths.cbegin(),
+ multikeyPaths.cend(),
+ [](const std::set<std::size_t>& components) { return !components.empty(); });
+}
+
+} // namespace
+
MONGO_EXPORT_SERVER_PARAMETER(failIndexKeyTooLong, bool, true);
//
@@ -107,8 +121,9 @@ Status IndexAccessMethod::insert(OperationContext* txn,
invariant(numInserted);
*numInserted = 0;
BSONObjSet keys;
+ MultikeyPaths multikeyPaths;
// Delegate to the subclass.
- getKeys(obj, &keys);
+ getKeys(obj, &keys, &multikeyPaths);
Status ret = Status::OK();
for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) {
@@ -144,8 +159,8 @@ Status IndexAccessMethod::insert(OperationContext* txn,
return status;
}
- if (*numInserted > 1) {
- _btreeState->setMultikey(txn);
+ if (*numInserted > 1 || isMultikeyFromPaths(multikeyPaths)) {
+ _btreeState->setMultikey(txn, multikeyPaths);
}
return ret;
@@ -184,7 +199,11 @@ Status IndexAccessMethod::remove(OperationContext* txn,
invariant(numDeleted);
*numDeleted = 0;
BSONObjSet keys;
- getKeys(obj, &keys);
+ // There's no need to compute the prefixes of the indexed fields that cause the index to be
+ // multikey when removing a document since the index metadata isn't updated when keys are
+ // deleted.
+ MultikeyPaths* multikeyPaths = nullptr;
+ getKeys(obj, &keys, multikeyPaths);
for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) {
removeOneKey(txn, *i, loc, options.dupsAllowed);
@@ -200,7 +219,10 @@ Status IndexAccessMethod::initializeAsEmpty(OperationContext* txn) {
Status IndexAccessMethod::touch(OperationContext* txn, const BSONObj& obj) {
BSONObjSet keys;
- getKeys(obj, &keys);
+ // There's no need to compute the prefixes of the indexed fields that cause the index to be
+ // multikey when paging a document's index entries into memory.
+ MultikeyPaths* multikeyPaths = nullptr;
+ getKeys(obj, &keys, multikeyPaths);
std::unique_ptr<SortedDataInterface::Cursor> cursor(_newInterface->newCursor(txn));
for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) {
@@ -292,10 +314,18 @@ Status IndexAccessMethod::validateUpdate(OperationContext* txn,
const InsertDeleteOptions& options,
UpdateTicket* ticket,
const MatchExpression* indexFilter) {
- if (indexFilter == NULL || indexFilter->matchesBSON(from))
- getKeys(from, &ticket->oldKeys);
- if (indexFilter == NULL || indexFilter->matchesBSON(to))
- getKeys(to, &ticket->newKeys);
+ if (!indexFilter || indexFilter->matchesBSON(from)) {
+ // There's no need to compute the prefixes of the indexed fields that possibly caused the
+ // index to be multikey when the old version of the document was written since the index
+ // metadata isn't updated when keys are deleted.
+ MultikeyPaths* multikeyPaths = nullptr;
+ getKeys(from, &ticket->oldKeys, multikeyPaths);
+ }
+
+ if (!indexFilter || indexFilter->matchesBSON(to)) {
+ getKeys(to, &ticket->newKeys, &ticket->newMultikeyPaths);
+ }
+
ticket->loc = record;
ticket->dupsAllowed = options.dupsAllowed;
@@ -320,8 +350,9 @@ Status IndexAccessMethod::update(OperationContext* txn,
return Status(ErrorCodes::InternalError, "Invalid UpdateTicket in update");
}
- if (ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size() > 1) {
- _btreeState->setMultikey(txn);
+ if (ticket.oldKeys.size() + ticket.added.size() - ticket.removed.size() > 1 ||
+ isMultikeyFromPaths(ticket.newMultikeyPaths)) {
+ _btreeState->setMultikey(txn, ticket.newMultikeyPaths);
}
for (size_t i = 0; i < ticket.removed.size(); ++i) {
@@ -370,9 +401,21 @@ Status IndexAccessMethod::BulkBuilder::insert(OperationContext* txn,
const InsertDeleteOptions& options,
int64_t* numInserted) {
BSONObjSet keys;
- _real->getKeys(obj, &keys);
+ MultikeyPaths multikeyPaths;
+ _real->getKeys(obj, &keys, &multikeyPaths);
- _isMultiKey = _isMultiKey || (keys.size() > 1);
+ _everGeneratedMultipleKeys = _everGeneratedMultipleKeys || (keys.size() > 1);
+
+ if (!multikeyPaths.empty()) {
+ if (_indexMultikeyPaths.empty()) {
+ _indexMultikeyPaths = multikeyPaths;
+ } else {
+ invariant(_indexMultikeyPaths.size() == multikeyPaths.size());
+ for (size_t i = 0; i < multikeyPaths.size(); ++i) {
+ _indexMultikeyPaths[i].insert(multikeyPaths[i].begin(), multikeyPaths[i].end());
+ }
+ }
+ }
for (BSONObjSet::iterator it = keys.begin(); it != keys.end(); ++it) {
_sorter->add(*it, loc);
@@ -408,8 +451,8 @@ Status IndexAccessMethod::commitBulk(OperationContext* txn,
MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN {
WriteUnitOfWork wunit(txn);
- if (bulk->_isMultiKey) {
- _btreeState->setMultikey(txn);
+ if (bulk->_everGeneratedMultipleKeys || isMultikeyFromPaths(bulk->_indexMultikeyPaths)) {
+ _btreeState->setMultikey(txn, bulk->_indexMultikeyPaths);
}
builder.reset(_newInterface->getBulkBuilder(txn, dupsAllowed));
diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h
index 85da9227586..35455f5425c 100644
--- a/src/mongo/db/index/index_access_method.h
+++ b/src/mongo/db/index/index_access_method.h
@@ -216,7 +216,14 @@ public:
std::unique_ptr<Sorter> _sorter;
const IndexAccessMethod* _real;
int64_t _keysInserted = 0;
- bool _isMultiKey = false;
+
+ // Set to true if at least one document causes IndexAccessMethod::getKeys() to return a
+ // BSONObjSet with size strictly greater than one.
+ bool _everGeneratedMultipleKeys = false;
+
+ // Holds the path components that cause this index to be multikey. The '_indexMultikeyPaths'
+ // vector remains empty if this index doesn't support path-level multikey tracking.
+ MultikeyPaths _indexMultikeyPaths;
};
/**
@@ -245,8 +252,16 @@ public:
/**
* Fills 'keys' with the keys that should be generated for 'obj' on this index.
+ *
+ * If the 'multikeyPaths' pointer is non-null, then it must point to an empty vector. If this
+ * index type supports tracking path-level multikey information, then this function resizes
+ * 'multikeyPaths' to have the same number of elements as the index key pattern and fills each
+ * element with the prefixes of the indexed field that would cause this index to be multikey as
+ * a result of inserting 'keys'.
*/
- virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const = 0;
+ virtual void getKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const = 0;
/**
* Splits the sets 'left' and 'right' into two vectors, the first containing the elements that
@@ -294,6 +309,11 @@ private:
RecordId loc;
bool dupsAllowed;
+
+ // Holds the path components that would cause this index to be multikey as a result of inserting
+ // 'newKeys'. The 'newMultikeyPaths' vector remains empty if this index doesn't support
+ // path-level multikey tracking.
+ MultikeyPaths newMultikeyPaths;
};
/**
diff --git a/src/mongo/db/index/s2_access_method.cpp b/src/mongo/db/index/s2_access_method.cpp
index 6c3c5c18302..1abc17c0979 100644
--- a/src/mongo/db/index/s2_access_method.cpp
+++ b/src/mongo/db/index/s2_access_method.cpp
@@ -103,7 +103,9 @@ BSONObj S2AccessMethod::fixSpec(const BSONObj& specObj) {
return specObj;
}
-void S2AccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) const {
+void S2AccessMethod::getKeys(const BSONObj& obj,
+ BSONObjSet* keys,
+ MultikeyPaths* multikeyPaths) const {
ExpressionKeysPrivate::getS2Keys(obj, _descriptor->keyPattern(), _params, keys);
}
diff --git a/src/mongo/db/index/s2_access_method.h b/src/mongo/db/index/s2_access_method.h
index db615977461..7bc819adcce 100644
--- a/src/mongo/db/index/s2_access_method.h
+++ b/src/mongo/db/index/s2_access_method.h
@@ -49,7 +49,16 @@ public:
static BSONObj fixSpec(const BSONObj& specObj);
private:
- virtual void getKeys(const BSONObj& obj, BSONObjSet* keys) const;
+ /**
+ * Fills 'keys' with the keys that should be generated for 'obj' on this index.
+ *
+ * This function ignores the 'multikeyPaths' pointer because text indexes don't support tracking
+ * path-level multikey information.
+ *
+ * TODO SERVER-23114: Return prefixes of the indexed fields that cause the index to be multikey
+ * as a result of inserting 'keys'.
+ */
+ void getKeys(const BSONObj& obj, BSONObjSet* keys, MultikeyPaths* multikeyPaths) const final;
S2IndexingParams _params;
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 33e6f533fb3..dceff9d9c85 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -137,6 +137,7 @@ void fillOutPlannerParams(OperationContext* txn,
plannerParams->indices.push_back(IndexEntry(desc->keyPattern(),
desc->getAccessMethodName(),
desc->isMultikey(txn),
+ ice->getMultikeyPaths(txn),
desc->isSparse(),
desc->unique(),
desc->indexName(),
@@ -1318,6 +1319,7 @@ StatusWith<unique_ptr<PlanExecutor>> getExecutorDistinct(OperationContext* txn,
plannerParams.indices.push_back(IndexEntry(desc->keyPattern(),
desc->getAccessMethodName(),
desc->isMultikey(txn),
+ ice->getMultikeyPaths(txn),
desc->isSparse(),
desc->unique(),
desc->indexName(),
diff --git a/src/mongo/db/query/index_entry.h b/src/mongo/db/query/index_entry.h
index 185b7ae6bed..a8d51c7c5d8 100644
--- a/src/mongo/db/query/index_entry.h
+++ b/src/mongo/db/query/index_entry.h
@@ -50,6 +50,7 @@ struct IndexEntry {
IndexEntry(const BSONObj& kp,
const std::string& accessMethod,
bool mk,
+ const MultikeyPaths& mkp,
bool sp,
bool unq,
const std::string& n,
@@ -57,6 +58,7 @@ struct IndexEntry {
const BSONObj& io)
: keyPattern(kp),
multikey(mk),
+ multikeyPaths(mkp),
sparse(sp),
unique(unq),
name(n),
diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp
index 10a853ced75..aafcd1b0c3e 100644
--- a/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp
+++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry.cpp
@@ -100,7 +100,10 @@ bool KVCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn,
invariant(offset >= 0);
const bool tracksPathLevelMultikeyInfo = !md.indexes[offset].multikeyPaths.empty();
- if (!tracksPathLevelMultikeyInfo) {
+ if (tracksPathLevelMultikeyInfo) {
+ invariant(!multikeyPaths.empty());
+ invariant(multikeyPaths.size() == md.indexes[offset].multikeyPaths.size());
+ } else {
invariant(multikeyPaths.empty());
if (md.indexes[offset].multikey) {
@@ -112,9 +115,7 @@ bool KVCollectionCatalogEntry::setIndexIsMultikey(OperationContext* txn,
md.indexes[offset].multikey = true;
- if (tracksPathLevelMultikeyInfo && !multikeyPaths.empty()) {
- invariant(multikeyPaths.size() == md.indexes[offset].multikeyPaths.size());
-
+ if (tracksPathLevelMultikeyInfo) {
bool newPathIsMultikey = false;
bool somePathIsMultikey = false;
diff --git a/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp b/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp
index 57729826aa4..a37eb986256 100644
--- a/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp
+++ b/src/mongo/db/storage/kv/kv_collection_catalog_entry_test.cpp
@@ -239,6 +239,16 @@ TEST_F(KVCollectionCatalogEntryTest, CanSetMultipleFieldsAndComponentsAsMultikey
}
DEATH_TEST_F(KVCollectionCatalogEntryTest,
+ CannotOmitPathLevelMultikeyInfoWithBtreeIndex,
+ "Invariant failure !multikeyPaths.empty()") {
+ std::string indexName = createIndex(BSON("a" << 1 << "b" << 1));
+ CollectionCatalogEntry* collEntry = getCollectionCatalogEntry();
+
+ auto opCtx = newOperationContext();
+ collEntry->setIndexIsMultikey(opCtx.get(), indexName, MultikeyPaths{});
+}
+
+DEATH_TEST_F(KVCollectionCatalogEntryTest,
AtLeastOnePathComponentMustCauseIndexToBeMultikey,
"Invariant failure somePathIsMultikey") {
std::string indexName = createIndex(BSON("a" << 1 << "b" << 1));
diff --git a/src/mongo/dbtests/SConscript b/src/mongo/dbtests/SConscript
index 23c0f1258f5..0a4301fa180 100644
--- a/src/mongo/dbtests/SConscript
+++ b/src/mongo/dbtests/SConscript
@@ -73,6 +73,7 @@ dbtest = env.Program(
'mmaptests.cpp',
'mock_dbclient_conn_test.cpp',
'mock_replica_set_test.cpp',
+ 'multikey_paths_test.cpp',
'namespacetests.cpp',
'oplogstarttests.cpp',
'pdfiletests.cpp',
diff --git a/src/mongo/dbtests/multikey_paths_test.cpp b/src/mongo/dbtests/multikey_paths_test.cpp
new file mode 100644
index 00000000000..429bc9447eb
--- /dev/null
+++ b/src/mongo/dbtests/multikey_paths_test.cpp
@@ -0,0 +1,353 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include <iostream>
+#include <string>
+
+#include "mongo/db/client.h"
+#include "mongo/db/db_raii.h"
+#include "mongo/db/catalog/index_create.h"
+#include "mongo/db/index/multikey_paths.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/service_context.h"
+#include "mongo/dbtests/dbtests.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/mongoutils/str.h"
+
+namespace mongo {
+namespace {
+
+/**
+ * Fixture for testing correctness of multikey paths.
+ *
+ * Has helper functions for creating indexes and asserting that the multikey paths after performing
+ * write operations are as expected.
+ */
+class MultikeyPathsTest : public unittest::Test {
+public:
+ MultikeyPathsTest() : _nss("unittests.multikey_paths") {}
+
+ void setUp() final {
+ AutoGetOrCreateDb autoDb(_opCtx.get(), _nss.db(), MODE_X);
+ Database* database = autoDb.getDb();
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ ASSERT(database->createCollection(_opCtx.get(), _nss.ns()));
+ wuow.commit();
+ }
+ }
+
+ void tearDown() final {
+ AutoGetDb autoDb(_opCtx.get(), _nss.db(), MODE_X);
+ Database* database = autoDb.getDb();
+ if (database) {
+ WriteUnitOfWork wuow(_opCtx.get());
+ ASSERT_OK(database->dropCollection(_opCtx.get(), _nss.ns()));
+ wuow.commit();
+ }
+ }
+
+ Status createIndex(Collection* collection, BSONObj indexSpec) {
+ return dbtests::createIndexFromSpec(_opCtx.get(), collection->ns().ns(), indexSpec);
+ }
+
+ void assertMultikeyPaths(Collection* collection,
+ BSONObj keyPattern,
+ const MultikeyPaths& expectedMultikeyPaths) {
+ IndexCatalog* indexCatalog = collection->getIndexCatalog();
+ IndexDescriptor* desc = indexCatalog->findIndexByKeyPattern(_opCtx.get(), keyPattern);
+ const IndexCatalogEntry* ice = indexCatalog->getEntry(desc);
+
+ auto actualMultikeyPaths = ice->getMultikeyPaths(_opCtx.get());
+ if (storageEngineSupportsPathLevelMultikeyTracking()) {
+ ASSERT_FALSE(actualMultikeyPaths.empty());
+ const bool match = (expectedMultikeyPaths == actualMultikeyPaths);
+ if (!match) {
+ FAIL(str::stream() << "Expected: " << dumpMultikeyPaths(expectedMultikeyPaths)
+ << ", Actual: " << dumpMultikeyPaths(actualMultikeyPaths));
+ }
+ ASSERT_TRUE(match);
+ } else {
+ ASSERT_TRUE(actualMultikeyPaths.empty());
+ }
+ }
+
+protected:
+ const ServiceContext::UniqueOperationContext _opCtx = cc().makeOperationContext();
+ const NamespaceString _nss;
+
+private:
+ bool storageEngineSupportsPathLevelMultikeyTracking() {
+ // Path-level multikey tracking is supported for all storage engines that use the KVCatalog.
+ // MMAPv1 is the only storage engine that does not.
+ //
+ // TODO SERVER-22727: Store path-level multikey information in MMAPv1 index catalog.
+ return !getGlobalServiceContext()->getGlobalStorageEngine()->isMmapV1();
+ }
+
+ std::string dumpMultikeyPaths(const MultikeyPaths& multikeyPaths) {
+ std::stringstream ss;
+
+ ss << "[ ";
+ for (const auto multikeyComponents : multikeyPaths) {
+ ss << "[ ";
+ for (const auto multikeyComponent : multikeyComponents) {
+ ss << multikeyComponent << " ";
+ }
+ ss << "] ";
+ }
+ ss << "]";
+
+ return ss.str();
+ }
+};
+
+TEST_F(MultikeyPathsTest, PathsUpdatedOnIndexCreation) {
+ AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X);
+ Collection* collection = autoColl.getCollection();
+ invariant(collection);
+
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ OpDebug* const nullOpDebug = nullptr;
+ const bool enforceQuota = true;
+ ASSERT_OK(collection->insertDocument(
+ _opCtx.get(),
+ BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)),
+ nullOpDebug,
+ enforceQuota));
+ wuow.commit();
+ }
+
+ BSONObj keyPattern = BSON("a" << 1 << "b" << 1);
+ createIndex(collection,
+ BSON("name"
+ << "a_1_b_1"
+ << "ns" << _nss.ns() << "key" << keyPattern));
+
+ assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}});
+}
+
+TEST_F(MultikeyPathsTest, PathsUpdatedOnIndexCreationWithMultipleDocuments) {
+ AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X);
+ Collection* collection = autoColl.getCollection();
+ invariant(collection);
+
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ OpDebug* const nullOpDebug = nullptr;
+ const bool enforceQuota = true;
+ ASSERT_OK(collection->insertDocument(
+ _opCtx.get(),
+ BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)),
+ nullOpDebug,
+ enforceQuota));
+ ASSERT_OK(collection->insertDocument(
+ _opCtx.get(),
+ BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2 << 3) << "b" << 5),
+ nullOpDebug,
+ enforceQuota));
+ wuow.commit();
+ }
+
+ BSONObj keyPattern = BSON("a" << 1 << "b" << 1);
+ createIndex(collection,
+ BSON("name"
+ << "a_1_b_1"
+ << "ns" << _nss.ns() << "key" << keyPattern));
+
+ assertMultikeyPaths(collection, keyPattern, {{0U}, {0U}});
+}
+
+TEST_F(MultikeyPathsTest, PathsUpdatedOnDocumentInsert) {
+ AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X);
+ Collection* collection = autoColl.getCollection();
+ invariant(collection);
+
+ BSONObj keyPattern = BSON("a" << 1 << "b" << 1);
+ createIndex(collection,
+ BSON("name"
+ << "a_1_b_1"
+ << "ns" << _nss.ns() << "key" << keyPattern));
+
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ OpDebug* const nullOpDebug = nullptr;
+ const bool enforceQuota = true;
+ ASSERT_OK(collection->insertDocument(
+ _opCtx.get(),
+ BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)),
+ nullOpDebug,
+ enforceQuota));
+ wuow.commit();
+ }
+
+ assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}});
+
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ OpDebug* const nullOpDebug = nullptr;
+ const bool enforceQuota = true;
+ ASSERT_OK(collection->insertDocument(
+ _opCtx.get(),
+ BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2 << 3) << "b" << 5),
+ nullOpDebug,
+ enforceQuota));
+ wuow.commit();
+ }
+
+ assertMultikeyPaths(collection, keyPattern, {{0U}, {0U}});
+}
+
+TEST_F(MultikeyPathsTest, PathsUpdatedOnDocumentUpdate) {
+ AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X);
+ Collection* collection = autoColl.getCollection();
+ invariant(collection);
+
+ BSONObj keyPattern = BSON("a" << 1 << "b" << 1);
+ createIndex(collection,
+ BSON("name"
+ << "a_1_b_1"
+ << "ns" << _nss.ns() << "key" << keyPattern));
+
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ OpDebug* const nullOpDebug = nullptr;
+ const bool enforceQuota = true;
+ ASSERT_OK(collection->insertDocument(
+ _opCtx.get(), BSON("_id" << 0 << "a" << 5), nullOpDebug, enforceQuota));
+ wuow.commit();
+ }
+
+ assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, std::set<size_t>{}});
+
+ {
+ auto cursor = collection->getCursor(_opCtx.get());
+ auto record = cursor->next();
+ invariant(record);
+
+ auto oldDoc = collection->docFor(_opCtx.get(), record->id);
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ const bool enforceQuota = true;
+ const bool indexesAffected = true;
+ OpDebug* opDebug = nullptr;
+ OplogUpdateEntryArgs args;
+ collection->updateDocument(
+ _opCtx.get(),
+ record->id,
+ oldDoc,
+ BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)),
+ enforceQuota,
+ indexesAffected,
+ opDebug,
+ &args);
+ wuow.commit();
+ }
+ }
+
+ assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}});
+}
+
+TEST_F(MultikeyPathsTest, PathsNotUpdatedOnDocumentDelete) {
+ AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X);
+ Collection* collection = autoColl.getCollection();
+ invariant(collection);
+
+ BSONObj keyPattern = BSON("a" << 1 << "b" << 1);
+ createIndex(collection,
+ BSON("name"
+ << "a_1_b_1"
+ << "ns" << _nss.ns() << "key" << keyPattern));
+
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ OpDebug* const nullOpDebug = nullptr;
+ const bool enforceQuota = true;
+ ASSERT_OK(collection->insertDocument(
+ _opCtx.get(),
+ BSON("_id" << 0 << "a" << 5 << "b" << BSON_ARRAY(1 << 2 << 3)),
+ nullOpDebug,
+ enforceQuota));
+ wuow.commit();
+ }
+
+ assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}});
+
+ {
+ auto cursor = collection->getCursor(_opCtx.get());
+ auto record = cursor->next();
+ invariant(record);
+
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ OpDebug* const nullOpDebug = nullptr;
+ collection->deleteDocument(_opCtx.get(), record->id, nullOpDebug);
+ wuow.commit();
+ }
+ }
+
+ assertMultikeyPaths(collection, keyPattern, {std::set<size_t>{}, {0U}});
+}
+
+TEST_F(MultikeyPathsTest, PathsUpdatedForMultipleIndexesOnDocumentInsert) {
+ AutoGetCollection autoColl(_opCtx.get(), _nss, MODE_X);
+ Collection* collection = autoColl.getCollection();
+ invariant(collection);
+
+ BSONObj keyPatternAB = BSON("a" << 1 << "b" << 1);
+ createIndex(collection,
+ BSON("name"
+ << "a_1_b_1"
+ << "ns" << _nss.ns() << "key" << keyPatternAB));
+
+ BSONObj keyPatternAC = BSON("a" << 1 << "c" << 1);
+ createIndex(collection,
+ BSON("name"
+ << "a_1_c_1"
+ << "ns" << _nss.ns() << "key" << keyPatternAC));
+ {
+ WriteUnitOfWork wuow(_opCtx.get());
+ OpDebug* const nullOpDebug = nullptr;
+ const bool enforceQuota = true;
+ ASSERT_OK(collection->insertDocument(
+ _opCtx.get(),
+ BSON("_id" << 0 << "a" << BSON_ARRAY(1 << 2 << 3) << "b" << 5 << "c" << 8),
+ nullOpDebug,
+ enforceQuota));
+ wuow.commit();
+ }
+
+ assertMultikeyPaths(collection, keyPatternAB, {{0U}, std::set<size_t>{}});
+ assertMultikeyPaths(collection, keyPatternAC, {{0U}, std::set<size_t>{}});
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp
index 328045ce868..a7e72788a73 100644
--- a/src/mongo/s/chunk_manager.cpp
+++ b/src/mongo/s/chunk_manager.cpp
@@ -590,6 +590,7 @@ IndexBounds ChunkManager::getIndexBoundsForQuery(const BSONObj& key,
IndexEntry indexEntry(key,
accessMethod,
false /* multiKey */,
+ MultikeyPaths{},
false /* sparse */,
false /* unique */,
"shardkey",