summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVishnu Kaushik <vishnu.kaushik@mongodb.com>2020-10-16 14:01:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-16 14:33:28 +0000
commitb8017175e3ac2c6d37f60d3f4cd2efb5011d1a32 (patch)
tree207ae8d4e2758d31e2eeb7e3a994f78667b7bb23
parentb6a04c494f73ca267547dcd79541c0977651037f (diff)
downloadmongo-b8017175e3ac2c6d37f60d3f4cd2efb5011d1a32.tar.gz
SERVER-51461 Support building indexes from existing indexes guarded by feature flag
-rw-r--r--jstests/noPassthrough/index_build_from_ref.js190
-rw-r--r--src/mongo/db/catalog/SConscript2
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp219
-rw-r--r--src/mongo/db/catalog/multi_index_block.h9
-rw-r--r--src/mongo/db/catalog/multi_index_block.idl8
-rw-r--r--src/mongo/db/index/index_access_method.cpp10
-rw-r--r--src/mongo/db/index/index_access_method.h13
7 files changed, 451 insertions, 0 deletions
diff --git a/jstests/noPassthrough/index_build_from_ref.js b/jstests/noPassthrough/index_build_from_ref.js
new file mode 100644
index 00000000000..ce1332130ad
--- /dev/null
+++ b/jstests/noPassthrough/index_build_from_ref.js
@@ -0,0 +1,190 @@
+// Test the useReferenceIndexForIndexBuild optimization. This test goes through the conditions
+// in which a reference index can be used instead of performing a collection scan. for example, a
+// reference index cannot be used when the child index (the index we are trying to build) is unique.
+(function() {
+
+const conn = MongoRunner.runMongod({setParameter: "useReferenceIndexForIndexBuild=true"});
+const db = conn.getDB(jsTestName());
+
+t = db.index_refidx;
+t.drop();
+
+// Insert a couple of items
+t.insert({a: "a", b: "red", c: 80, d: "asdf"}); // recId 1
+t.insert({a: "a", b: "blue", c: 800, d: "asdf"}); // recId 2 - Will be inverted on { a: 1, b: 1 }
+t.insert({a: "b", b: "red", c: 80, d: "asdf"});
+t.insert({a: "c", b: "blue", c: 800, d: "asdf"});
+t.insert({a: "d", b: "red", c: 80, d: "asdf"});
+t.insert({a: "a", b: "blue", c: 800, d: "asdf"});
+t.insert({a: "a", b: "red", c: 80, d: "asdf"});
+t.insert({a: "b", b: "blue", c: 800, d: "asdf"});
+t.insert({a: "b", b: "red", c: 80, d: "asdf"});
+t.insert({a: "b", b: "blue", c: 800, d: "asdf"});
+t.insert({a: "a", b: "red", c: 80, d: "asdf"});
+t.insert({a: "c", b: "blue", c: 800, d: "asdf"});
+
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_1")); // should take the normal path
+assert.eq(5, t.find({a: "a"}).count()); // should be 5
+
+// Ensure key class sorts are working properly (inverted keys were inserted earlier).
+assert.commandWorked(t.createIndex({a: 1})); // should take fast path
+assert(checkLog.checkContainsOnceJson(
+ conn, 3620203, {refIdx: "refidx_xyz_1"})); // checkContainsOnceJson
+
+assert.eq(5, t.find({a: "a"}).hint({a: 1}).count());
+assert.eq(4, t.find({a: "b"}).hint({a: 1}).count());
+assert.eq(0, t.find({a: "aa"}).hint({a: 1}).count());
+
+let result = assert.commandWorked(t.validate());
+assert(result.valid);
+
+t.drop();
+
+// Insert all unique keys
+for (var i = 0; i < 30; i++) {
+ t.insert({a: i, b: (i * 10)});
+ t.insert({a: (-i), b: (i * -10)});
+}
+
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_2"));
+assert.commandWorked(t.createIndex({a: 1}));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_2"}));
+assert.eq(29, t.find({a: {$gt: 0}}).hint({a: 1}).count());
+
+assert.commandWorked(t.createIndex({a: -1})); // Shouldn't take the fast path.
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_2"}));
+assert.eq(0, t.find({a: "blah"}).hint({a: 1}).count()); // None should be found.
+
+// No problems inserting new documents into the existing index.
+for (var i = 0; i < 40; i++) {
+ t.insert({a: i, b: (i + 1)});
+}
+
+assert.eq(68, t.find({a: {$gt: 0}}).hint({a: 1}).count());
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_3"));
+
+// This test case covers a key class distribution of the form
+// [ a a ... a b b b b ...b c d e f g g g g ..g].
+// We are trying to make sure documents sitting on key class boundaries are not skipped.
+for (var i = 0; i < 20; i++) {
+ t.insert({a: "a", b: i});
+ t.insert({a: "b", b: (2 * i)});
+ t.insert({a: "b", b: i});
+ t.insert({a: "g", b: (i - 1)});
+}
+
+t.insert({a: "c", b: 2});
+t.insert({a: "f", b: 2});
+t.insert({a: "e", b: 2});
+t.insert({a: "d", b: 2});
+
+assert.commandWorked(t.createIndex({b: 1, a: 1}, "refidx_xyz_4"));
+assert.commandWorked(t.createIndex({a: 1}));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_3"}));
+assert.eq(20, t.find({a: "a"}).hint({a: 1}).count());
+assert.eq(40, t.find({a: "b"}).hint({a: 1}).count());
+assert.eq(20, t.find({a: "g"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "c"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "d"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "e"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "f"}).hint({a: 1}).count());
+assert.eq(0, t.find({a: "h"}).hint({a: 1}).count());
+
+assert.commandWorked(
+ t.createIndex({a: 1, b: 1, c: 1}, "refidx_xyz_5")); // shouldn't take the fast path
+assert.commandWorked(t.createIndex({a: 1, c: 1})); // shouldn't take the fast path
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_3"}));
+
+t.dropIndex({a: 1, b: 1});
+assert.commandWorked(t.createIndex({a: 1, b: 1})); // should take the fast path
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_5"}));
+assert.commandWorked(t.createIndex({b: 1})); // should take the fast path
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_4"}));
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+
+t.drop();
+
+// An empty index:
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_6"));
+assert.commandWorked(t.createIndex({a: 1}));
+// Empty collections are handled elsewhere.
+// assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_6"}));
+assert.eq(0, t.find().hint({a: 1}).count());
+assert.eq(0, t.find().count());
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+
+t.drop();
+
+// An index with one element
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_7"));
+t.insert({a: "we are the robots", b: "kraftwerk"});
+assert.commandWorked(t.createIndex({a: 1}));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_7"}));
+assert.eq(0, t.find({a: "spacelab"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "we are the robots"}).hint({a: 1}).count());
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+// Parent can be unique, and we can build a non-unique child from it. Check partial indexes as well.
+for (var i = 0; i < 30; i++) {
+ t.insert({a: 1, b: i});
+}
+
+assert.commandWorked(t.createIndex({a: 1, b: 1}, {unique: true, name: "refidx_uniq_a_b"}));
+assert.commandWorked(t.createIndex({b: 1, a: 1}, {unique: true, name: "refidx_uniq_b_a"}));
+assert.commandWorked(t.createIndex({a: 1}, "childidx_a1"));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_uniq_a_b"}));
+assert.commandWorked(t.createIndex({b: 1}, {unique: true})); // shouldn't use the fast path.
+assert(!checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_b_a"}));
+assert.commandWorked(t.dropIndex("childidx_a1"));
+assert.commandWorked(t.dropIndex("refidx_uniq_a_b"));
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_partial_a_b"));
+assert.commandWorked(t.createIndex({a: 1}, {partialFilterExpression: {b: {$gt: 5}}}));
+assert(!checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_partial_a_b"}));
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+// Make sure we're using the smallest available reference index.
+for (var i = 0; i < 30; i++) {
+ t.insert({a: 1, b: i, c: (i + 1), d: (i * 3)});
+}
+
+assert.commandWorked(t.createIndex({a: 1, b: 1, c: 1, d: 1}, "refidx_abcd_xyz"));
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_ab_xyz"));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_abcd_xyz"}));
+assert.commandWorked(t.createIndex({a: 1, b: 1, c: 1}, "refidx_abc_xyz"));
+assert.commandWorked(t.createIndex({a: 1, c: 1}, "refidx_ac_xyz"));
+assert.commandWorked(t.createIndex({a: 1}, "childidx_a_smallest"));
+assert(checkLog.checkContainsOnceJson(
+ conn, 3620203, {childIdx: "childidx_a_smallest", refIdx: "refidx_ab_xyz"}));
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+// Make sure multi-key isn't permitted
+t.insert({a: ["me", "mee", "and louie"], b: 78});
+t.insert({a: ["summer", "is a good season"], b: 8});
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "multikey_ab_xyz"));
+assert.commandWorked(t.createIndex({a: 1}, "child_shouldn_use_ref"));
+assert(!checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "multikey_ab_xyz"}));
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+MongoRunner.stopMongod(conn);
+})(); \ No newline at end of file
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index f493b8a7b4b..e9b90a65aa7 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -248,7 +248,9 @@ env.Library(
'index_build_block',
],
LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/index/index_access_method',
'$BUILD_DIR/mongo/db/index/index_build_interceptor',
+ '$BUILD_DIR/mongo/db/storage/execution_context',
'$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/mongo/idl/server_parameter',
'collection_catalog',
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 109b73a0780..24ac0c90eb2 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -53,6 +53,8 @@
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/tenant_migration_committed_info.h"
#include "mongo/db/repl/tenant_migration_conflict_info.h"
+#include "mongo/db/storage/execution_context.h"
+#include "mongo/db/storage/index_entry_comparison.h"
#include "mongo/db/storage/storage_options.h"
#include "mongo/db/storage/write_unit_of_work.h"
#include "mongo/logv2/log.h"
@@ -208,6 +210,7 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(
static_cast<std::size_t>(maxIndexBuildMemoryUsageMegabytes.load()) * 1024 * 1024 /
indexSpecs.size();
}
+ _eachIndexBuildMaxMemoryUsageBytes = eachIndexBuildMaxMemoryUsageBytes;
// Initializing individual index build blocks below performs un-timestamped writes to the
// durable catalog. It's possible for the onInit function to set multiple timestamps
@@ -365,6 +368,18 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(
}
MultikeyPathTracker::get(opCtx).startTrackingMultikeyPathInfo();
+ const IndexCatalogEntry* refIdx = nullptr;
+ if (gUseReferenceIndexForIndexBuild && _indexes.size() == 1 &&
+ (refIdx = _findSmallestReferenceIdx(opCtx, collection))) {
+ LOGV2(3620203,
+ "Index Build: using existing index instead of scanning collection",
+ "refIdx"_attr = refIdx->descriptor()->indexName(),
+ "childIdx"_attr =
+ _indexes[0].block->getEntry(opCtx, collection)->descriptor()->indexName());
+ uassertStatusOK(_scanReferenceIdxInsertAndCommit(opCtx, collection, refIdx));
+ return Status::OK();
+ }
+
const char* curopMessage = "Index Build: scanning collection";
const auto numRecords = collection->numRecords(opCtx);
ProgressMeterHolder progress;
@@ -970,4 +985,208 @@ Status MultiIndexBlock::_failPointHangDuringBuild(OperationContext* opCtx,
return Status::OK();
}
+
+const IndexCatalogEntry* MultiIndexBlock::_findSmallestReferenceIdx(
+ OperationContext* opCtx, const CollectionPtr& collection) const {
+ // Find a suitable reference index for the first index we are trying to build. First make
+ // sure we're not trying to build a partial, sparse or unique index; the logic to handle these
+ // cases is complicated (we'll have to make sure whether a partial reference index covers the
+ // partial index we're trying to build, and a unique reference index will not necessarily
+ // yield a unique child index, etc.). We do not support multi-key indexes either.
+ for (size_t i = 0; i < _indexes.size(); i++) {
+ auto entry = _indexes[i].block->getEntry(opCtx, collection);
+ auto descriptor = entry->descriptor();
+ if (descriptor->isPartial() || descriptor->isSparse() || descriptor->unique())
+ return nullptr;
+ }
+
+ const IndexCatalogEntry* smallestRefIdx = nullptr;
+
+ int smallestSize = -1;
+ auto it = collection->getIndexCatalog()->getIndexIterator(opCtx,
+ false /* includeUnfinishedIndexes */);
+ while (it->more()) {
+ const auto candidateEntry = it->next();
+ const auto candidateDescriptor = candidateEntry->descriptor();
+ // A partial / sparse reference index may or may not cover the index we're trying to build.
+ // More complex logic is required to check if the reference index covers the one we're
+ // building, and so as a simplification, we're avoiding using partial / sparse ref indexes.
+ // A candidate reference index being unique, however, is not a problem.
+ if (candidateEntry->isMultikey() || candidateDescriptor->isPartial() ||
+ candidateDescriptor->isSparse())
+ continue;
+
+ // Make sure the candidate we are looking at is compatible with all the indexes we are
+ // attempting to build:
+ bool compatible = true;
+ for (size_t i = 0; i < _indexes.size(); i++) {
+ auto descriptor = _indexes[i].block->getEntry(opCtx, collection)->descriptor();
+ compatible = compatible &&
+ descriptor->keyPattern().isPrefixOf(candidateDescriptor->keyPattern(),
+ SimpleBSONElementComparator::kInstance);
+ if (!compatible)
+ break;
+ }
+
+ if (compatible) {
+ int candidateSize = candidateDescriptor->keyPattern().nFields();
+ if (smallestRefIdx && smallestSize <= candidateSize)
+ continue;
+
+ smallestSize = candidateSize;
+ smallestRefIdx = candidateEntry;
+ }
+ }
+
+ return smallestRefIdx;
+}
+
+Status MultiIndexBlock::_scanReferenceIdxInsertAndCommit(OperationContext* opCtx,
+ const CollectionPtr& collection,
+ const IndexCatalogEntry* refIdx) {
+ auto cursor = refIdx->accessMethod()->newCursor(opCtx);
+ KeyString::Value startKeyString = IndexEntryComparison::makeKeyStringFromBSONKeyForSeek(
+ kMinBSONKey,
+ refIdx->accessMethod()->getSortedDataInterface()->getKeyStringVersion(),
+ refIdx->accessMethod()->getSortedDataInterface()->getOrdering(),
+ true /* forward */,
+ false /* inclusive */);
+
+ auto childIdx = _indexes[0].block->getEntry(opCtx, collection);
+ auto childDescriptor = childIdx->descriptor();
+ invariant(!childDescriptor->isPartial() && !childDescriptor->isSparse() &&
+ !childDescriptor->unique() && _indexes[0].options.dupsAllowed);
+
+ auto bulkLoader = _indexes[0].real->makeBulkBuilder(opCtx, _indexes[0].options.dupsAllowed);
+
+ // We are returning by value. "keys" within the lambda clears itself on destruction, and
+ // returning by reference would return a reference pointing to a deleted object.
+ auto produceKey = [&](const BSONObj& key, const RecordId& loc) -> KeyString::Value {
+ auto& executionCtx = StorageExecutionContext::get(opCtx);
+ auto keys = executionCtx.keys();
+ auto multikeyMetadataKeys = executionCtx.multikeyMetadataKeys();
+ auto multikeyPaths = executionCtx.multikeyPaths();
+ auto obj = IndexKeyEntry::rehydrateKey(refIdx->descriptor()->keyPattern(), key);
+ childIdx->accessMethod()->getKeys(executionCtx.pooledBufferBuilder(),
+ obj,
+ _indexes[0].options.getKeysMode,
+ IndexAccessMethod::GetKeysContext::kAddingKeys,
+ keys.get(),
+ multikeyMetadataKeys.get(),
+ multikeyPaths.get(),
+ loc,
+ IndexAccessMethod::kNoopOnSuppressedErrorFn);
+
+ // Should produce only one key:
+ invariant(keys->size() == 1, "Expected one key, got " + std::to_string(keys->size()));
+ return *(keys->begin());
+ };
+
+ auto dumpSorter = [&]() {
+ std::unique_ptr<IndexAccessMethod::BulkBuilder::Sorter::Iterator> it(
+ _indexes[0].bulk->done());
+
+ WriteUnitOfWork wuow(opCtx);
+ while (it->more()) {
+ auto key = it->next().first;
+ auto stat = bulkLoader->addKey(key);
+ if (!stat.isOK())
+ return stat;
+ }
+ wuow.commit();
+
+ return Status::OK();
+ };
+
+ // We "refresh" the sorter (create a new one) to empty it out. We require an empty sorter for
+ // every key class we encounter. A key class is a contiguous group of keys that are in order in
+ // the reference index, but may be out of order in the child index due to the record ID.
+ // For example, consider a reference index { a: 1, b: 1 } with their corresponding keyStrings:
+ //
+ // RecordID 2: { a: "a", b: "blue" } -> "a,blue,2"
+ // RecordID 1: { a: "a", b: "red" } -> "a,red,1"
+ //
+ // Note that in the reference index, the above are in order (sorted by { a: 1, b: 1 }), but the
+ // document with a greater record ID appears first.
+ //
+ // When trying to build a child index { a: 1 }, we produce these corresponding keyStrings:
+ // "a,2"
+ // "a,1"
+ // Though the keyStrings were in order in the reference index, they are not in order when it
+ // comes to the child index. As a result, we need to sort each set of keys that differ only in
+ // their record IDs. We're calling this set of keys a key class.
+ auto refreshSorter = [&]() {
+ _indexes[0].bulk =
+ _indexes[0].real->initiateBulk(_eachIndexBuildMaxMemoryUsageBytes, boost::none);
+ };
+
+ auto addToSorter = [&](const KeyString::Value& keyString) {
+ _indexes[0].bulk->addToSorter(keyString);
+ };
+
+ auto insertBulkBypassingSorter = [&](const KeyString::Value& keyString) {
+ uassertStatusOK(bulkLoader->addKey(keyString));
+ };
+
+ auto refIdxEntry = cursor->seek(startKeyString);
+
+ if (!refIdxEntry) {
+ LOGV2(3620204,
+ "Reference index is empty.",
+ "refIdx"_attr = refIdx->descriptor()->indexName());
+ _phase = IndexBuildPhaseEnum::kBulkLoad;
+ WriteUnitOfWork wuow(opCtx);
+ // Allow the commit operation to be interruptable:
+ bulkLoader->commit(true);
+ wuow.commit();
+ return Status::OK();
+ }
+
+ KeyString::Value currKS = produceKey(refIdxEntry->key, refIdxEntry->loc);
+ KeyString::Value nextKS;
+ bool processingKeyClass = false;
+
+ while ((refIdxEntry = cursor->next())) {
+ nextKS = produceKey(refIdxEntry->key, refIdxEntry->loc);
+ if (currKS.compareWithoutRecordId(nextKS) == 0) {
+ addToSorter(currKS);
+ processingKeyClass = true;
+ currKS = nextKS;
+ continue;
+ }
+
+ if (processingKeyClass) {
+ addToSorter(currKS);
+ auto stat = dumpSorter();
+ if (!stat.isOK())
+ return stat;
+ refreshSorter();
+ currKS = nextKS;
+ processingKeyClass = false;
+ continue;
+ }
+
+ insertBulkBypassingSorter(currKS);
+ currKS = nextKS;
+ }
+
+ if (processingKeyClass) {
+ addToSorter(currKS);
+ auto stat = dumpSorter();
+ if (!stat.isOK())
+ return stat;
+ } else {
+ insertBulkBypassingSorter(currKS);
+ }
+
+ _phase = IndexBuildPhaseEnum::kBulkLoad;
+
+ WriteUnitOfWork wuow(opCtx);
+ // Allow the commit operation to be interruptable:
+ bulkLoader->commit(true);
+ wuow.commit();
+
+ return Status::OK();
+}
+
} // namespace mongo
diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h
index 54b12fb622a..9c82ceb9a3c 100644
--- a/src/mongo/db/catalog/multi_index_block.h
+++ b/src/mongo/db/catalog/multi_index_block.h
@@ -310,6 +310,13 @@ private:
const BSONObj& doc,
unsigned long long iteration) const;
+ const IndexCatalogEntry* _findSmallestReferenceIdx(OperationContext* opCtx,
+ const CollectionPtr& collection) const;
+
+ Status _scanReferenceIdxInsertAndCommit(OperationContext* opCtx,
+ const CollectionPtr& collection,
+ const IndexCatalogEntry* refIdx);
+
// Is set during init() and ensures subsequent function calls act on the same Collection.
boost::optional<UUID> _collectionUUID;
@@ -319,6 +326,8 @@ private:
bool _ignoreUnique = false;
+ std::size_t _eachIndexBuildMaxMemoryUsageBytes = 0;
+
// Set to true when no work remains to be done, the object can safely destruct without leaving
// incorrect state set anywhere.
bool _buildIsCleanedUp = true;
diff --git a/src/mongo/db/catalog/multi_index_block.idl b/src/mongo/db/catalog/multi_index_block.idl
index c7f5ddddcb9..93b7869a606 100644
--- a/src/mongo/db/catalog/multi_index_block.idl
+++ b/src/mongo/db/catalog/multi_index_block.idl
@@ -52,3 +52,11 @@ server_parameters:
default: 200
validator:
gte: 50
+
+ useReferenceIndexForIndexBuild:
+ description: "When true, attempts to utilize an existing index to build a new index instead of performing a collection scan"
+ set_at:
+ - startup
+ cpp_varname: gUseReferenceIndexForIndexBuild
+ cpp_vartype: bool
+ default: false
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index 6bdd126d250..5635e275e05 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -486,6 +486,10 @@ public:
const RecordId& loc,
const InsertDeleteOptions& options) final;
+ void addToSorter(const KeyString::Value& keyString) final {
+ _sorter->add(keyString, mongo::NullValue());
+ }
+
const MultikeyPaths& getMultikeyPaths() const final;
bool isMultikey() const final;
@@ -841,6 +845,12 @@ bool AbstractIndexAccessMethod::shouldMarkIndexAsMultikey(
return numberOfKeys > 1 || isMultikeyFromPaths(multikeyPaths);
}
+std::unique_ptr<SortedDataBuilderInterface> AbstractIndexAccessMethod::makeBulkBuilder(
+ OperationContext* opCtx, bool dupsAllowed) {
+ return std::unique_ptr<SortedDataBuilderInterface>(
+ _newInterface->getBulkBuilder(opCtx, dupsAllowed));
+}
+
SortedDataInterface* AbstractIndexAccessMethod::getSortedDataInterface() const {
return _newInterface.get();
}
diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h
index 013a6663c3a..b5adc80e051 100644
--- a/src/mongo/db/index/index_access_method.h
+++ b/src/mongo/db/index/index_access_method.h
@@ -160,6 +160,10 @@ public:
int64_t* numInserted,
int64_t* numDeleted) = 0;
+ virtual std::unique_ptr<SortedDataBuilderInterface> makeBulkBuilder(OperationContext* opCtx,
+ bool dupsAllowed) = 0;
+
+
/**
* Returns an unpositioned cursor over 'this' index.
*/
@@ -242,6 +246,12 @@ public:
const RecordId& loc,
const InsertDeleteOptions& options) = 0;
+ /**
+ * Inserts the keyString directly into the sorter. No additional logic (related to multikey
+ * paths, etc.) is performed.
+ */
+ virtual void addToSorter(const KeyString::Value& keyString) = 0;
+
virtual const MultikeyPaths& getMultikeyPaths() const = 0;
virtual bool isMultikey() const = 0;
@@ -508,6 +518,9 @@ public:
bool isForward) const final;
std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* opCtx) const final;
+ std::unique_ptr<SortedDataBuilderInterface> makeBulkBuilder(OperationContext* opCtx,
+ bool dupsAllowed) final;
+
Status initializeAsEmpty(OperationContext* opCtx) final;
void validate(OperationContext* opCtx,