From b8017175e3ac2c6d37f60d3f4cd2efb5011d1a32 Mon Sep 17 00:00:00 2001
From: Vishnu Kaushik <vishnu.kaushik@mongodb.com>
Date: Fri, 16 Oct 2020 14:01:47 +0000
Subject: SERVER-51461 Support building indexes from existing indexes guarded
 by feature flag

---
 jstests/noPassthrough/index_build_from_ref.js | 190 ++++++++++++++++++++++
 src/mongo/db/catalog/SConscript               |   2 +
 src/mongo/db/catalog/multi_index_block.cpp    | 219 ++++++++++++++++++++++++++
 src/mongo/db/catalog/multi_index_block.h      |   9 ++
 src/mongo/db/catalog/multi_index_block.idl    |   8 +
 src/mongo/db/index/index_access_method.cpp    |  10 ++
 src/mongo/db/index/index_access_method.h      |  13 ++
 7 files changed, 451 insertions(+)
 create mode 100644 jstests/noPassthrough/index_build_from_ref.js

diff --git a/jstests/noPassthrough/index_build_from_ref.js b/jstests/noPassthrough/index_build_from_ref.js
new file mode 100644
index 00000000000..ce1332130ad
--- /dev/null
+++ b/jstests/noPassthrough/index_build_from_ref.js
@@ -0,0 +1,190 @@
+// Test the useReferenceIndexForIndexBuild optimization. This test goes through the conditions
+// in which a reference index can be used instead of performing a collection scan. for example, a
+// reference index cannot be used when the child index (the index we are trying to build) is unique.
+(function() {
+
+const conn = MongoRunner.runMongod({setParameter: "useReferenceIndexForIndexBuild=true"});
+const db = conn.getDB(jsTestName());
+
+t = db.index_refidx;
+t.drop();
+
+// Insert a couple of items
+t.insert({a: "a", b: "red", c: 80, d: "asdf"});    // recId 1
+t.insert({a: "a", b: "blue", c: 800, d: "asdf"});  // recId 2 - Will be inverted on { a: 1, b: 1 }
+t.insert({a: "b", b: "red", c: 80, d: "asdf"});
+t.insert({a: "c", b: "blue", c: 800, d: "asdf"});
+t.insert({a: "d", b: "red", c: 80, d: "asdf"});
+t.insert({a: "a", b: "blue", c: 800, d: "asdf"});
+t.insert({a: "a", b: "red", c: 80, d: "asdf"});
+t.insert({a: "b", b: "blue", c: 800, d: "asdf"});
+t.insert({a: "b", b: "red", c: 80, d: "asdf"});
+t.insert({a: "b", b: "blue", c: 800, d: "asdf"});
+t.insert({a: "a", b: "red", c: 80, d: "asdf"});
+t.insert({a: "c", b: "blue", c: 800, d: "asdf"});
+
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_1"));  // should take the normal path
+assert.eq(5, t.find({a: "a"}).count());                             // should be 5
+
+// Ensure key class sorts are working properly (inverted keys were inserted earlier).
+assert.commandWorked(t.createIndex({a: 1}));  // should take fast path
+assert(checkLog.checkContainsOnceJson(
+    conn, 3620203, {refIdx: "refidx_xyz_1"}));  // checkContainsOnceJson
+
+assert.eq(5, t.find({a: "a"}).hint({a: 1}).count());
+assert.eq(4, t.find({a: "b"}).hint({a: 1}).count());
+assert.eq(0, t.find({a: "aa"}).hint({a: 1}).count());
+
+let result = assert.commandWorked(t.validate());
+assert(result.valid);
+
+t.drop();
+
+// Insert all unique keys
+for (var i = 0; i < 30; i++) {
+    t.insert({a: i, b: (i * 10)});
+    t.insert({a: (-i), b: (i * -10)});
+}
+
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_2"));
+assert.commandWorked(t.createIndex({a: 1}));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_2"}));
+assert.eq(29, t.find({a: {$gt: 0}}).hint({a: 1}).count());
+
+assert.commandWorked(t.createIndex({a: -1}));  // Shouldn't take the fast path.
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_2"}));
+assert.eq(0, t.find({a: "blah"}).hint({a: 1}).count());  // None should be found.
+
+// No problems inserting new documents into the existing index.
+for (var i = 0; i < 40; i++) {
+    t.insert({a: i, b: (i + 1)});
+}
+
+assert.eq(68, t.find({a: {$gt: 0}}).hint({a: 1}).count());
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_3"));
+
+// This test case covers a key class distribution of the form
+// [ a a ... a b b b b ...b c d e f g g g g ..g].
+// We are trying to make sure documents sitting on key class boundaries are not skipped.
+for (var i = 0; i < 20; i++) {
+    t.insert({a: "a", b: i});
+    t.insert({a: "b", b: (2 * i)});
+    t.insert({a: "b", b: i});
+    t.insert({a: "g", b: (i - 1)});
+}
+
+t.insert({a: "c", b: 2});
+t.insert({a: "f", b: 2});
+t.insert({a: "e", b: 2});
+t.insert({a: "d", b: 2});
+
+assert.commandWorked(t.createIndex({b: 1, a: 1}, "refidx_xyz_4"));
+assert.commandWorked(t.createIndex({a: 1}));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_3"}));
+assert.eq(20, t.find({a: "a"}).hint({a: 1}).count());
+assert.eq(40, t.find({a: "b"}).hint({a: 1}).count());
+assert.eq(20, t.find({a: "g"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "c"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "d"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "e"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "f"}).hint({a: 1}).count());
+assert.eq(0, t.find({a: "h"}).hint({a: 1}).count());
+
+assert.commandWorked(
+    t.createIndex({a: 1, b: 1, c: 1}, "refidx_xyz_5"));  // shouldn't take the fast path
+assert.commandWorked(t.createIndex({a: 1, c: 1}));       // shouldn't take the fast path
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_3"}));
+
+t.dropIndex({a: 1, b: 1});
+assert.commandWorked(t.createIndex({a: 1, b: 1}));  // should take the fast path
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_5"}));
+assert.commandWorked(t.createIndex({b: 1}));  // should take the fast path
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_4"}));
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+
+t.drop();
+
+// An empty index:
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_6"));
+assert.commandWorked(t.createIndex({a: 1}));
+// Empty collections are handled elsewhere.
+// assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_6"}));
+assert.eq(0, t.find().hint({a: 1}).count());
+assert.eq(0, t.find().count());
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+
+t.drop();
+
+// An index with one element
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_xyz_7"));
+t.insert({a: "we are the robots", b: "kraftwerk"});
+assert.commandWorked(t.createIndex({a: 1}));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_7"}));
+assert.eq(0, t.find({a: "spacelab"}).hint({a: 1}).count());
+assert.eq(1, t.find({a: "we are the robots"}).hint({a: 1}).count());
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+// Parent can be unique, and we can build a non-unique child from it. Check partial indexes as well.
+for (var i = 0; i < 30; i++) {
+    t.insert({a: 1, b: i});
+}
+
+assert.commandWorked(t.createIndex({a: 1, b: 1}, {unique: true, name: "refidx_uniq_a_b"}));
+assert.commandWorked(t.createIndex({b: 1, a: 1}, {unique: true, name: "refidx_uniq_b_a"}));
+assert.commandWorked(t.createIndex({a: 1}, "childidx_a1"));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_uniq_a_b"}));
+assert.commandWorked(t.createIndex({b: 1}, {unique: true}));  // shouldn't use the fast path.
+assert(!checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_xyz_b_a"}));
+assert.commandWorked(t.dropIndex("childidx_a1"));
+assert.commandWorked(t.dropIndex("refidx_uniq_a_b"));
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_partial_a_b"));
+assert.commandWorked(t.createIndex({a: 1}, {partialFilterExpression: {b: {$gt: 5}}}));
+assert(!checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_partial_a_b"}));
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+// Make sure we're using the smallest available reference index.
+for (var i = 0; i < 30; i++) {
+    t.insert({a: 1, b: i, c: (i + 1), d: (i * 3)});
+}
+
+assert.commandWorked(t.createIndex({a: 1, b: 1, c: 1, d: 1}, "refidx_abcd_xyz"));
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "refidx_ab_xyz"));
+assert(checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "refidx_abcd_xyz"}));
+assert.commandWorked(t.createIndex({a: 1, b: 1, c: 1}, "refidx_abc_xyz"));
+assert.commandWorked(t.createIndex({a: 1, c: 1}, "refidx_ac_xyz"));
+assert.commandWorked(t.createIndex({a: 1}, "childidx_a_smallest"));
+assert(checkLog.checkContainsOnceJson(
+    conn, 3620203, {childIdx: "childidx_a_smallest", refIdx: "refidx_ab_xyz"}));
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+// Make sure multi-key isn't permitted
+t.insert({a: ["me", "mee", "and louie"], b: 78});
+t.insert({a: ["summer", "is a good season"], b: 8});
+assert.commandWorked(t.createIndex({a: 1, b: 1}, "multikey_ab_xyz"));
+assert.commandWorked(t.createIndex({a: 1}, "child_shouldn_use_ref"));
+assert(!checkLog.checkContainsOnceJson(conn, 3620203, {refIdx: "multikey_ab_xyz"}));
+
+result = assert.commandWorked(t.validate());
+assert(result.valid);
+t.drop();
+
+MongoRunner.stopMongod(conn);
+})();
\ No newline at end of file
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index f493b8a7b4b..e9b90a65aa7 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -248,7 +248,9 @@ env.Library(
         'index_build_block',
     ],
     LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/mongo/db/index/index_access_method',
         '$BUILD_DIR/mongo/db/index/index_build_interceptor',
+        '$BUILD_DIR/mongo/db/storage/execution_context',
         '$BUILD_DIR/mongo/db/storage/storage_options',
         '$BUILD_DIR/mongo/idl/server_parameter',
         'collection_catalog',
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 109b73a0780..24ac0c90eb2 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -53,6 +53,8 @@
 #include "mongo/db/repl/replication_coordinator.h"
 #include "mongo/db/repl/tenant_migration_committed_info.h"
 #include "mongo/db/repl/tenant_migration_conflict_info.h"
+#include "mongo/db/storage/execution_context.h"
+#include "mongo/db/storage/index_entry_comparison.h"
 #include "mongo/db/storage/storage_options.h"
 #include "mongo/db/storage/write_unit_of_work.h"
 #include "mongo/logv2/log.h"
@@ -208,6 +210,7 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(
                 static_cast<std::size_t>(maxIndexBuildMemoryUsageMegabytes.load()) * 1024 * 1024 /
                 indexSpecs.size();
         }
+        _eachIndexBuildMaxMemoryUsageBytes = eachIndexBuildMaxMemoryUsageBytes;
 
         // Initializing individual index build blocks below performs un-timestamped writes to the
         // durable catalog. It's possible for the onInit function to set multiple timestamps
@@ -365,6 +368,18 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(
     }
     MultikeyPathTracker::get(opCtx).startTrackingMultikeyPathInfo();
 
+    const IndexCatalogEntry* refIdx = nullptr;
+    if (gUseReferenceIndexForIndexBuild && _indexes.size() == 1 &&
+        (refIdx = _findSmallestReferenceIdx(opCtx, collection))) {
+        LOGV2(3620203,
+              "Index Build: using existing index instead of scanning collection",
+              "refIdx"_attr = refIdx->descriptor()->indexName(),
+              "childIdx"_attr =
+                  _indexes[0].block->getEntry(opCtx, collection)->descriptor()->indexName());
+        uassertStatusOK(_scanReferenceIdxInsertAndCommit(opCtx, collection, refIdx));
+        return Status::OK();
+    }
+
     const char* curopMessage = "Index Build: scanning collection";
     const auto numRecords = collection->numRecords(opCtx);
     ProgressMeterHolder progress;
@@ -970,4 +985,208 @@ Status MultiIndexBlock::_failPointHangDuringBuild(OperationContext* opCtx,
 
     return Status::OK();
 }
+
+const IndexCatalogEntry* MultiIndexBlock::_findSmallestReferenceIdx(
+    OperationContext* opCtx, const CollectionPtr& collection) const {
+    // Find a suitable reference index for the first index we are trying to build. First make
+    // sure we're not trying to build a partial, sparse or unique index; the logic to handle these
+    // cases is complicated (we'll have to make sure whether a partial reference index covers the
+    // partial index we're trying to build, and a unique reference index will not necessarily
+    // yield a unique child index, etc.). We do not support multi-key indexes either.
+    for (size_t i = 0; i < _indexes.size(); i++) {
+        auto entry = _indexes[i].block->getEntry(opCtx, collection);
+        auto descriptor = entry->descriptor();
+        if (descriptor->isPartial() || descriptor->isSparse() || descriptor->unique())
+            return nullptr;
+    }
+
+    const IndexCatalogEntry* smallestRefIdx = nullptr;
+
+    int smallestSize = -1;
+    auto it = collection->getIndexCatalog()->getIndexIterator(opCtx,
+                                                              false /* includeUnfinishedIndexes */);
+    while (it->more()) {
+        const auto candidateEntry = it->next();
+        const auto candidateDescriptor = candidateEntry->descriptor();
+        // A partial / sparse reference index may or may not cover the index we're trying to build.
+        // More complex logic is required to check if the reference index covers the one we're
+        // building, and so as a simplification, we're avoiding using partial / sparse ref indexes.
+        // A candidate reference index being unique, however, is not a problem.
+        if (candidateEntry->isMultikey() || candidateDescriptor->isPartial() ||
+            candidateDescriptor->isSparse())
+            continue;
+
+        // Make sure the candidate we are looking at is compatible with all the indexes we are
+        // attempting to build:
+        bool compatible = true;
+        for (size_t i = 0; i < _indexes.size(); i++) {
+            auto descriptor = _indexes[i].block->getEntry(opCtx, collection)->descriptor();
+            compatible = compatible &&
+                descriptor->keyPattern().isPrefixOf(candidateDescriptor->keyPattern(),
+                                                    SimpleBSONElementComparator::kInstance);
+            if (!compatible)
+                break;
+        }
+
+        if (compatible) {
+            int candidateSize = candidateDescriptor->keyPattern().nFields();
+            if (smallestRefIdx && smallestSize <= candidateSize)
+                continue;
+
+            smallestSize = candidateSize;
+            smallestRefIdx = candidateEntry;
+        }
+    }
+
+    return smallestRefIdx;
+}
+
+Status MultiIndexBlock::_scanReferenceIdxInsertAndCommit(OperationContext* opCtx,
+                                                         const CollectionPtr& collection,
+                                                         const IndexCatalogEntry* refIdx) {
+    auto cursor = refIdx->accessMethod()->newCursor(opCtx);
+    KeyString::Value startKeyString = IndexEntryComparison::makeKeyStringFromBSONKeyForSeek(
+        kMinBSONKey,
+        refIdx->accessMethod()->getSortedDataInterface()->getKeyStringVersion(),
+        refIdx->accessMethod()->getSortedDataInterface()->getOrdering(),
+        true /* forward */,
+        false /* inclusive */);
+
+    auto childIdx = _indexes[0].block->getEntry(opCtx, collection);
+    auto childDescriptor = childIdx->descriptor();
+    invariant(!childDescriptor->isPartial() && !childDescriptor->isSparse() &&
+              !childDescriptor->unique() && _indexes[0].options.dupsAllowed);
+
+    auto bulkLoader = _indexes[0].real->makeBulkBuilder(opCtx, _indexes[0].options.dupsAllowed);
+
+    // We are returning by value. "keys" within the lambda clears itself on destruction, and
+    // returning by reference would return a reference pointing to a deleted object.
+    auto produceKey = [&](const BSONObj& key, const RecordId& loc) -> KeyString::Value {
+        auto& executionCtx = StorageExecutionContext::get(opCtx);
+        auto keys = executionCtx.keys();
+        auto multikeyMetadataKeys = executionCtx.multikeyMetadataKeys();
+        auto multikeyPaths = executionCtx.multikeyPaths();
+        auto obj = IndexKeyEntry::rehydrateKey(refIdx->descriptor()->keyPattern(), key);
+        childIdx->accessMethod()->getKeys(executionCtx.pooledBufferBuilder(),
+                                          obj,
+                                          _indexes[0].options.getKeysMode,
+                                          IndexAccessMethod::GetKeysContext::kAddingKeys,
+                                          keys.get(),
+                                          multikeyMetadataKeys.get(),
+                                          multikeyPaths.get(),
+                                          loc,
+                                          IndexAccessMethod::kNoopOnSuppressedErrorFn);
+
+        // Should produce only one key:
+        invariant(keys->size() == 1, "Expected one key, got " + std::to_string(keys->size()));
+        return *(keys->begin());
+    };
+
+    auto dumpSorter = [&]() {
+        std::unique_ptr<IndexAccessMethod::BulkBuilder::Sorter::Iterator> it(
+            _indexes[0].bulk->done());
+
+        WriteUnitOfWork wuow(opCtx);
+        while (it->more()) {
+            auto key = it->next().first;
+            auto stat = bulkLoader->addKey(key);
+            if (!stat.isOK())
+                return stat;
+        }
+        wuow.commit();
+
+        return Status::OK();
+    };
+
+    // We "refresh" the sorter (create a new one) to empty it out. We require an empty sorter for
+    // every key class we encounter. A key class is a contiguous group of keys that are in order in
+    // the reference index, but may be out of order in the child index due to the record ID.
+    // For example, consider a reference index { a: 1, b: 1 } with their corresponding keyStrings:
+    //
+    // RecordID 2: { a: "a", b: "blue" }  -> "a,blue,2"
+    // RecordID 1: { a: "a", b: "red" }   -> "a,red,1"
+    //
+    // Note that in the reference index, the above are in order (sorted by { a: 1, b: 1 }), but the
+    // document with a greater record ID appears first.
+    //
+    // When trying to build a child index { a: 1 }, we produce these corresponding keyStrings:
+    // "a,2"
+    // "a,1"
+    // Though the keyStrings were in order in the reference index, they are not in order when it
+    // comes to the child index. As a result, we need to sort each set of keys that differ only in
+    // their record IDs. We're calling this set of keys a key class.
+    auto refreshSorter = [&]() {
+        _indexes[0].bulk =
+            _indexes[0].real->initiateBulk(_eachIndexBuildMaxMemoryUsageBytes, boost::none);
+    };
+
+    auto addToSorter = [&](const KeyString::Value& keyString) {
+        _indexes[0].bulk->addToSorter(keyString);
+    };
+
+    auto insertBulkBypassingSorter = [&](const KeyString::Value& keyString) {
+        uassertStatusOK(bulkLoader->addKey(keyString));
+    };
+
+    auto refIdxEntry = cursor->seek(startKeyString);
+
+    if (!refIdxEntry) {
+        LOGV2(3620204,
+              "Reference index is empty.",
+              "refIdx"_attr = refIdx->descriptor()->indexName());
+        _phase = IndexBuildPhaseEnum::kBulkLoad;
+        WriteUnitOfWork wuow(opCtx);
+        // Allow the commit operation to be interruptable:
+        bulkLoader->commit(true);
+        wuow.commit();
+        return Status::OK();
+    }
+
+    KeyString::Value currKS = produceKey(refIdxEntry->key, refIdxEntry->loc);
+    KeyString::Value nextKS;
+    bool processingKeyClass = false;
+
+    while ((refIdxEntry = cursor->next())) {
+        nextKS = produceKey(refIdxEntry->key, refIdxEntry->loc);
+        if (currKS.compareWithoutRecordId(nextKS) == 0) {
+            addToSorter(currKS);
+            processingKeyClass = true;
+            currKS = nextKS;
+            continue;
+        }
+
+        if (processingKeyClass) {
+            addToSorter(currKS);
+            auto stat = dumpSorter();
+            if (!stat.isOK())
+                return stat;
+            refreshSorter();
+            currKS = nextKS;
+            processingKeyClass = false;
+            continue;
+        }
+
+        insertBulkBypassingSorter(currKS);
+        currKS = nextKS;
+    }
+
+    if (processingKeyClass) {
+        addToSorter(currKS);
+        auto stat = dumpSorter();
+        if (!stat.isOK())
+            return stat;
+    } else {
+        insertBulkBypassingSorter(currKS);
+    }
+
+    _phase = IndexBuildPhaseEnum::kBulkLoad;
+
+    WriteUnitOfWork wuow(opCtx);
+    // Allow the commit operation to be interruptable:
+    bulkLoader->commit(true);
+    wuow.commit();
+
+    return Status::OK();
+}
+
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/multi_index_block.h b/src/mongo/db/catalog/multi_index_block.h
index 54b12fb622a..9c82ceb9a3c 100644
--- a/src/mongo/db/catalog/multi_index_block.h
+++ b/src/mongo/db/catalog/multi_index_block.h
@@ -310,6 +310,13 @@ private:
                                      const BSONObj& doc,
                                      unsigned long long iteration) const;
 
+    const IndexCatalogEntry* _findSmallestReferenceIdx(OperationContext* opCtx,
+                                                       const CollectionPtr& collection) const;
+
+    Status _scanReferenceIdxInsertAndCommit(OperationContext* opCtx,
+                                            const CollectionPtr& collection,
+                                            const IndexCatalogEntry* refIdx);
+
     // Is set during init() and ensures subsequent function calls act on the same Collection.
     boost::optional<UUID> _collectionUUID;
 
@@ -319,6 +326,8 @@ private:
 
     bool _ignoreUnique = false;
 
+    std::size_t _eachIndexBuildMaxMemoryUsageBytes = 0;
+
     // Set to true when no work remains to be done, the object can safely destruct without leaving
     // incorrect state set anywhere.
     bool _buildIsCleanedUp = true;
diff --git a/src/mongo/db/catalog/multi_index_block.idl b/src/mongo/db/catalog/multi_index_block.idl
index c7f5ddddcb9..93b7869a606 100644
--- a/src/mongo/db/catalog/multi_index_block.idl
+++ b/src/mongo/db/catalog/multi_index_block.idl
@@ -52,3 +52,11 @@ server_parameters:
     default: 200
     validator:
       gte: 50
+
+  useReferenceIndexForIndexBuild:
+    description: "When true, attempts to utilize an existing index to build a new index instead of performing a collection scan"
+    set_at:
+      - startup
+    cpp_varname: gUseReferenceIndexForIndexBuild
+    cpp_vartype: bool
+    default: false
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index 6bdd126d250..5635e275e05 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -486,6 +486,10 @@ public:
                   const RecordId& loc,
                   const InsertDeleteOptions& options) final;
 
+    void addToSorter(const KeyString::Value& keyString) final {
+        _sorter->add(keyString, mongo::NullValue());
+    }
+
     const MultikeyPaths& getMultikeyPaths() const final;
 
     bool isMultikey() const final;
@@ -841,6 +845,12 @@ bool AbstractIndexAccessMethod::shouldMarkIndexAsMultikey(
     return numberOfKeys > 1 || isMultikeyFromPaths(multikeyPaths);
 }
 
+std::unique_ptr<SortedDataBuilderInterface> AbstractIndexAccessMethod::makeBulkBuilder(
+    OperationContext* opCtx, bool dupsAllowed) {
+    return std::unique_ptr<SortedDataBuilderInterface>(
+        _newInterface->getBulkBuilder(opCtx, dupsAllowed));
+}
+
 SortedDataInterface* AbstractIndexAccessMethod::getSortedDataInterface() const {
     return _newInterface.get();
 }
diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h
index 013a6663c3a..b5adc80e051 100644
--- a/src/mongo/db/index/index_access_method.h
+++ b/src/mongo/db/index/index_access_method.h
@@ -160,6 +160,10 @@ public:
                           int64_t* numInserted,
                           int64_t* numDeleted) = 0;
 
+    virtual std::unique_ptr<SortedDataBuilderInterface> makeBulkBuilder(OperationContext* opCtx,
+                                                                        bool dupsAllowed) = 0;
+
+
     /**
      * Returns an unpositioned cursor over 'this' index.
      */
@@ -242,6 +246,12 @@ public:
                               const RecordId& loc,
                               const InsertDeleteOptions& options) = 0;
 
+        /**
+         * Inserts the keyString directly into the sorter. No additional logic (related to multikey
+         * paths, etc.) is performed.
+         */
+        virtual void addToSorter(const KeyString::Value& keyString) = 0;
+
         virtual const MultikeyPaths& getMultikeyPaths() const = 0;
 
         virtual bool isMultikey() const = 0;
@@ -508,6 +518,9 @@ public:
                                                            bool isForward) const final;
     std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* opCtx) const final;
 
+    std::unique_ptr<SortedDataBuilderInterface> makeBulkBuilder(OperationContext* opCtx,
+                                                                bool dupsAllowed) final;
+
     Status initializeAsEmpty(OperationContext* opCtx) final;
 
     void validate(OperationContext* opCtx,
-- 
cgit v1.2.1