diff options
-rw-r--r-- | src/mongo/db/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/exec/index_iterator.cpp | 87 | ||||
-rw-r--r-- | src/mongo/db/exec/index_iterator.h | 93 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.h | 4 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.cpp | 45 | ||||
-rw-r--r-- | src/mongo/db/query/stage_builder.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/query/stage_types.h | 3 | ||||
-rw-r--r-- | src/mongo/db/storage/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/storage/sorted_data_interface.h | 18 | ||||
-rw-r--r-- | src/mongo/db/storage/sorted_data_interface_test_rand_cursor.cpp | 248 |
11 files changed, 8 insertions, 498 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index a33f95db62f..c91df3a29de 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -959,7 +959,6 @@ env.Library( 'exec/fetch.cpp', 'exec/geo_near.cpp', 'exec/idhack.cpp', - 'exec/index_iterator.cpp', 'exec/index_scan.cpp', 'exec/keep_mutations.cpp', 'exec/limit.cpp', diff --git a/src/mongo/db/exec/index_iterator.cpp b/src/mongo/db/exec/index_iterator.cpp deleted file mode 100644 index 54cb130d6d1..00000000000 --- a/src/mongo/db/exec/index_iterator.cpp +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Copyright (C) 2015 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/platform/basic.h" - -#include "mongo/db/exec/index_iterator.h" - -#include "mongo/db/catalog/collection.h" -#include "mongo/db/exec/scoped_timer.h" -#include "mongo/db/index/index_access_method.h" -#include "mongo/stdx/memory.h" - -namespace mongo { - -using std::unique_ptr; -using stdx::make_unique; - -const char* IndexIteratorStage::kStageType = "INDEX_ITERATOR"; - -IndexIteratorStage::IndexIteratorStage(OperationContext* opCtx, - WorkingSet* ws, - Collection* collection, - IndexAccessMethod* iam, - BSONObj keyPattern, - unique_ptr<SortedDataInterface::Cursor> cursor) - : PlanStage(kStageType, opCtx), - _collection(collection), - _ws(ws), - _iam(iam), - _cursor(std::move(cursor)), - _keyPattern(keyPattern.getOwned()) { - invariant(_collection); // It is illegal to use this stage without a collection. -} - -PlanStage::StageState IndexIteratorStage::doWork(WorkingSetID* out) { - if (auto entry = _cursor->next()) { - if (!entry->key.isOwned()) - entry->key = entry->key.getOwned(); - - WorkingSetID id = _ws->allocate(); - WorkingSetMember* member = _ws->get(id); - member->recordId = entry->loc; - member->keyData.push_back(IndexKeyDatum(_keyPattern, entry->key, _iam)); - _ws->transitionToRecordIdAndIdx(id); - - *out = id; - return PlanStage::ADVANCED; - } - - _commonStats.isEOF = true; - return PlanStage::IS_EOF; -} - -bool IndexIteratorStage::isEOF() { - return _commonStats.isEOF; -} - -unique_ptr<PlanStageStats> IndexIteratorStage::getStats() { - return make_unique<PlanStageStats>(_commonStats, STAGE_INDEX_ITERATOR); -} - -} // namespace mongo diff --git a/src/mongo/db/exec/index_iterator.h b/src/mongo/db/exec/index_iterator.h deleted file mode 100644 index 3e989eb9aae..00000000000 --- a/src/mongo/db/exec/index_iterator.h +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Copyright (C) 2015 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include <memory> -#include <vector> - -#include "mongo/db/exec/plan_stage.h" -#include "mongo/db/exec/plan_stats.h" -#include "mongo/db/storage/sorted_data_interface.h" - -namespace mongo { -class Collection; - -/** - * A simple wrapper around a SortedDataInterface::Cursor, which will advance the cursor until it is - * exhausted. - */ -class IndexIteratorStage final : public PlanStage { -public: - IndexIteratorStage(OperationContext* opCtx, - WorkingSet* ws, - Collection* collection, - IndexAccessMethod* iam, - BSONObj keyPattern, - std::unique_ptr<SortedDataInterface::Cursor> cursor); - - PlanStage::StageState doWork(WorkingSetID* out) final; - - bool isEOF() final; - - void doSaveState() final { - _cursor->save(); - } - void doRestoreState() final { - _cursor->restore(); - } - void doDetachFromOperationContext() final { - _cursor->detachFromOperationContext(); - } - void doReattachToOperationContext() final { - _cursor->reattachToOperationContext(getOpCtx()); - } - - std::unique_ptr<PlanStageStats> getStats() final; - - // Not used. - SpecificStats* getSpecificStats() const final { - return nullptr; - } - - StageType stageType() const final { - return STAGE_INDEX_ITERATOR; - } - - static const char* kStageType; - -private: - Collection* _collection; - WorkingSet* _ws; - IndexAccessMethod* _iam; // owned by Collection -> IndexCatalog. - - std::unique_ptr<SortedDataInterface::Cursor> _cursor; - const BSONObj _keyPattern; -}; - -} // namespace mongo diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index 7ab26cbb9c2..04ebed5b623 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -190,11 +190,6 @@ std::unique_ptr<SortedDataInterface::Cursor> IndexAccessMethod::newCursor(Operat return _newInterface->newCursor(opCtx, isForward); } -std::unique_ptr<SortedDataInterface::Cursor> IndexAccessMethod::newRandomCursor( - OperationContext* opCtx) const { - return _newInterface->newRandomCursor(opCtx); -} - // Remove the provided doc from the index. Status IndexAccessMethod::remove(OperationContext* opCtx, const BSONObj& obj, diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 5035a636085..feb51712f1a 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -133,10 +133,6 @@ public: */ std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* opCtx, bool isForward = true) const; - /** - * Returns a pseudo-random cursor over 'this' index. - */ - std::unique_ptr<SortedDataInterface::Cursor> newRandomCursor(OperationContext* opCtx) const; // ------ index level operations ------ diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 1c1a16883aa..653cc389344 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -46,7 +46,6 @@ #include "mongo/db/db_raii.h" #include "mongo/db/dbdirectclient.h" #include "mongo/db/exec/fetch.h" -#include "mongo/db/exec/index_iterator.h" #include "mongo/db/exec/multi_iterator.h" #include "mongo/db/exec/shard_filter.h" #include "mongo/db/exec/working_set.h" @@ -109,44 +108,16 @@ StatusWith<unique_ptr<PlanExecutor, PlanExecutor::Deleter>> createRandomCursorEx return {nullptr}; } - // Attempt to get a random cursor from the RecordStore. If the RecordStore does not support - // random cursors, attempt to get one from the _id index. - std::unique_ptr<RecordCursor> rsRandCursor = - collection->getRecordStore()->getRandomCursor(opCtx); + // Attempt to get a random cursor from the RecordStore. + auto rsRandCursor = collection->getRecordStore()->getRandomCursor(opCtx); + if (!rsRandCursor) { + // The storage engine has no random cursor support. + return {nullptr}; + } auto ws = stdx::make_unique<WorkingSet>(); - std::unique_ptr<PlanStage> stage; - - if (rsRandCursor) { - stage = stdx::make_unique<MultiIteratorStage>(opCtx, ws.get(), collection); - static_cast<MultiIteratorStage*>(stage.get())->addIterator(std::move(rsRandCursor)); - - } else { - auto indexCatalog = collection->getIndexCatalog(); - auto indexDescriptor = indexCatalog->findIdIndex(opCtx); - - if (!indexDescriptor) { - // There was no _id index. - return {nullptr}; - } - - IndexAccessMethod* idIam = indexCatalog->getIndex(indexDescriptor); - auto idxRandCursor = idIam->newRandomCursor(opCtx); - - if (!idxRandCursor) { - // Storage engine does not support any type of random cursor. - return {nullptr}; - } - - auto idxIterator = stdx::make_unique<IndexIteratorStage>(opCtx, - ws.get(), - collection, - idIam, - indexDescriptor->keyPattern(), - std::move(idxRandCursor)); - stage = stdx::make_unique<FetchStage>( - opCtx, ws.get(), idxIterator.release(), nullptr, collection); - } + auto stage = stdx::make_unique<MultiIteratorStage>(opCtx, ws.get(), collection); + stage->addIterator(std::move(rsRandCursor)); { AutoGetCollectionForRead autoColl(opCtx, collection->ns()); diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index 16b80841311..16b4be88766 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -359,7 +359,6 @@ PlanStage* buildStages(OperationContext* opCtx, case STAGE_EOF: case STAGE_GROUP: case STAGE_IDHACK: - case STAGE_INDEX_ITERATOR: case STAGE_MULTI_ITERATOR: case STAGE_MULTI_PLAN: case STAGE_PIPELINE_PROXY: diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h index eb385828944..610aadbdeaf 100644 --- a/src/mongo/db/query/stage_types.h +++ b/src/mongo/db/query/stage_types.h @@ -72,9 +72,6 @@ enum StageType { STAGE_IDHACK, - // Simple wrapper to iterate a SortedDataInterface::Cursor. - STAGE_INDEX_ITERATOR, - STAGE_IXSCAN, STAGE_LIMIT, diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript index 2aeb45e3f0e..202902ecfc7 100644 --- a/src/mongo/db/storage/SConscript +++ b/src/mongo/db/storage/SConscript @@ -121,7 +121,6 @@ env.Library( 'sorted_data_interface_test_harness.cpp', 'sorted_data_interface_test_insert.cpp', 'sorted_data_interface_test_isempty.cpp', - 'sorted_data_interface_test_rand_cursor.cpp', 'sorted_data_interface_test_rollback.cpp', 'sorted_data_interface_test_spaceused.cpp', 'sorted_data_interface_test_touch.cpp', diff --git a/src/mongo/db/storage/sorted_data_interface.h b/src/mongo/db/storage/sorted_data_interface.h index 7dfcc8554f3..aa6d1e6e51d 100644 --- a/src/mongo/db/storage/sorted_data_interface.h +++ b/src/mongo/db/storage/sorted_data_interface.h @@ -363,24 +363,6 @@ public: virtual std::unique_ptr<Cursor> newCursor(OperationContext* opCtx, bool isForward = true) const = 0; - /** - * Constructs a cursor over an index that returns entries in a randomized order, and allows - * storage engines to provide a more efficient way to randomly sample a collection than - * MongoDB's default sampling methods, which are used when this method returns {}. Note if it is - * possible to implement RecordStore::getRandomCursor(), that method is preferred, as it will - * return the entire document, whereas this method will only return the index key and the - * RecordId, requiring an extra lookup. - * - * This method may be implemented using a pseudo-random walk over B-trees or a similar approach. - * Different cursors should return entries in a different order. Random cursors may return the - * same entry more than once and, as a result, may return more entries than exist in the index. - * Implementations should avoid obvious biases toward older, newer, larger smaller or other - * specific classes of entries. - */ - virtual std::unique_ptr<Cursor> newRandomCursor(OperationContext* opCtx) const { - return {}; - } - // // Index creation // diff --git a/src/mongo/db/storage/sorted_data_interface_test_rand_cursor.cpp b/src/mongo/db/storage/sorted_data_interface_test_rand_cursor.cpp deleted file mode 100644 index afada4a0fc1..00000000000 --- a/src/mongo/db/storage/sorted_data_interface_test_rand_cursor.cpp +++ /dev/null @@ -1,248 +0,0 @@ -/** - * Copyright (C) 2015 MongoDB Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the GNU Affero General Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - - -#include "mongo/platform/basic.h" - -#include "mongo/db/storage/sorted_data_interface_test_harness.h" - -#include <memory> - -#include "mongo/db/storage/sorted_data_interface.h" -#include "mongo/unittest/unittest.h" - -using std::unique_ptr; -using std::set; -using std::string; -using std::stringstream; - -namespace mongo { -namespace { - -// A random iterator should never return any entries from an empty index. -TEST(SortedDataInterface, GetRandomIteratorEmpty) { - const auto harnessHelper(newSortedDataInterfaceHarnessHelper()); - unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true)); - - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - ASSERT(sorted->isEmpty(opCtx.get())); - } - - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - auto cursor = sorted->newRandomCursor(opCtx.get()); - if (!cursor) { - // newRandomCursor is not supported. - return; - } - ASSERT(!cursor->next()); - } -} - -// General test for 'randomness' of the cursor. With N entries in the index, we should see at least -// N/4 distinct entries after iterating N - 1 times. -TEST(SortedDataInterface, GetRandomIteratorNonEmpty) { - const auto harnessHelper(newSortedDataInterfaceHarnessHelper()); - unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false)); - - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - ASSERT_EQUALS(0, sorted->numEntries(opCtx.get())); - } - - // Insert enough entries to generate multiple levels of a tree. - const unsigned nToInsert = 5000; - RecordId rids[nToInsert]; - for (unsigned i = 0; i < nToInsert; i++) { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - { - BSONObj data = BSON("" << i); - - WriteUnitOfWork uow(opCtx.get()); - rids[i] = RecordId(42, i * 2); - Status res = sorted->insert(opCtx.get(), data, rids[i], true); - ASSERT_OK(res); - uow.commit(); - } - } - - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get())); - } - - set<RecordId> remain(rids, rids + nToInsert); - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - auto cursor = sorted->newRandomCursor(opCtx.get()); - if (!cursor) { - // newRandomCursor is not supported. - return; - } - - // Iterate documents and mark those visited, but let at least one remain. - for (unsigned i = 0; i < nToInsert - 1; i++) { - // Get a new cursor once in a while, shouldn't affect things. - if (i % (nToInsert / 8) == 0) { - cursor = sorted->newRandomCursor(opCtx.get()); - } - remain.erase(cursor->next()->loc); // Can happen more than once per doc. - } - ASSERT(!remain.empty()); - ASSERT(cursor->next()); - - // We should have at least visited a quarter of the items if we're any random at all - // The expected fraction of visited entrys is 62.3%. - ASSERT_LT(remain.size(), nToInsert * 3 / 4); - } -} - -// With only a single entry in the index, we should always receive that entry via a random cursor. -TEST(SortedDataInterface, GetRandomIteratorSingleton) { - const auto harnessHelper(newSortedDataInterfaceHarnessHelper()); - unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true)); - - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - ASSERT(sorted->isEmpty(opCtx.get())); - } - - // Insert one entry. - RecordId idToRetrieve(42, 0); - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - WriteUnitOfWork uow(opCtx.get()); - Status res = sorted->insert(opCtx.get(), BSON("" << 0), idToRetrieve, false); - ASSERT_OK(res); - uow.commit(); - } - - // Double-check that the index has one entry in it now. - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - ASSERT_EQ(1, sorted->numEntries(opCtx.get())); - } - - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - auto cursor = sorted->newRandomCursor(opCtx.get()); - if (!cursor) { - // newRandomCursor is not supported. - return; - } - - // The random cursor should keep returning the single existing entry. - for (int i = 0; i < 10; i++) { - auto entry = cursor->next(); - ASSERT_EQ(entry->loc, idToRetrieve); - } - - // Check deattaching / reattaching. - cursor->save(); - cursor->detachFromOperationContext(); - auto newClient = harnessHelper->serviceContext()->makeClient( - "New client for SortedDataInterface::GetRandomIteratorSingleton"); - auto newOpCtx(harnessHelper->newOperationContext(newClient.get())); - cursor->reattachToOperationContext(newOpCtx.get()); - cursor->restore(); - - auto entry = cursor->next(); - ASSERT_EQ(entry->loc, idToRetrieve); - - // The random cursor should keep returning the single existing entry. - for (int i = 0; i < 10; i++) { - entry = cursor->next(); - ASSERT_EQ(entry->loc, idToRetrieve); - } - } -} - -// With enough samples, we should eventually have returned every document in the tree at least once. -TEST(SortedDataInterface, GetRandomIteratorLargeDocs) { - const auto harnessHelper(newSortedDataInterfaceHarnessHelper()); - unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true)); - - // Seed the random number generator. - harnessHelper->client()->getPrng() = PseudoRandom(1234); - - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - ASSERT_EQUALS(0, sorted->numEntries(opCtx.get())); - } - - // Insert enough entries to generate multiple levels of a tree. - const unsigned nToInsert = 200; - RecordId rids[nToInsert]; - // Add a large string to the entries, to encourage more branching in the tree. - int strBytes = 500; - std::string keyStr('c', strBytes / sizeof('c')); - for (unsigned i = 0; i < nToInsert; i++) { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - { - BSONObj data = BSON("" << i << "" << keyStr); - - WriteUnitOfWork uow(opCtx.get()); - rids[i] = RecordId(42, i * 2); - Status res = sorted->insert(opCtx.get(), data, rids[i], false); - ASSERT_OK(res); - uow.commit(); - } - } - - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get())); - } - - set<RecordId> remain(rids, rids + nToInsert); - { - auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); - auto cursor = sorted->newRandomCursor(opCtx.get()); - if (!cursor) { - // newRandomCursor is not supported. - return; - } - - // Iterate the cursor enough times so that there should be a large probability of seeing all - // the documents. - for (unsigned i = 0; i < nToInsert * 15; i++) { - // Get a new cursor once in a while, shouldn't affect things. - if (i % (nToInsert / 8) == 0) { - cursor = sorted->newRandomCursor(opCtx.get()); - } - remain.erase(cursor->next()->loc); // Can happen more than once per doc. - } - // By this point, we should have visited all entries. - ASSERT_EQ(remain.size(), 0UL); - } -} - -} // namespace -} // namespace mongo |