diff options
author | Charlie Swanson <cswanson310@gmail.com> | 2015-10-23 16:04:42 -0400 |
---|---|---|
committer | Charlie Swanson <cswanson310@gmail.com> | 2015-10-23 17:46:09 -0400 |
commit | 9284edb11f0626c77017117ef02790881586aa26 (patch) | |
tree | 69bf0c8d5120085228b70f7e4b94bdd427e5d007 | |
parent | d4d8955022819016cca2d5555ddec69c12f8c9dc (diff) | |
download | mongo-9284edb11f0626c77017117ef02790881586aa26.tar.gz |
SERVER-20906 Add a random cursor implementation for mmap_v1 storage engine
-rw-r--r-- | src/mongo/db/exec/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/exec/index_iterator.cpp | 93 | ||||
-rw-r--r-- | src/mongo/db/exec/index_iterator.h | 93 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/index/index_access_method.h | 4 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.cpp | 48 | ||||
-rw-r--r-- | src/mongo/db/query/stage_types.h | 4 | ||||
-rw-r--r-- | src/mongo/db/storage/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp | 53 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp | 98 | ||||
-rw-r--r-- | src/mongo/db/storage/mmap_v1/btree/btree_logic.h | 14 | ||||
-rw-r--r-- | src/mongo/db/storage/sorted_data_interface.h | 18 | ||||
-rw-r--r-- | src/mongo/db/storage/sorted_data_interface_test_harness.h | 26 | ||||
-rw-r--r-- | src/mongo/db/storage/sorted_data_interface_test_rand_cursor.cpp | 246 |
14 files changed, 692 insertions, 12 deletions
diff --git a/src/mongo/db/exec/SConscript b/src/mongo/db/exec/SConscript index bc4db81ddaa..dc8c83ae743 100644 --- a/src/mongo/db/exec/SConscript +++ b/src/mongo/db/exec/SConscript @@ -50,6 +50,7 @@ env.Library( "geo_near.cpp", "group.cpp", "idhack.cpp", + "index_iterator.cpp", "index_scan.cpp", "keep_mutations.cpp", "limit.cpp", diff --git a/src/mongo/db/exec/index_iterator.cpp b/src/mongo/db/exec/index_iterator.cpp new file mode 100644 index 00000000000..f12947b996d --- /dev/null +++ b/src/mongo/db/exec/index_iterator.cpp @@ -0,0 +1,93 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/exec/index_iterator.h" + +#include "mongo/db/catalog/collection.h" +#include "mongo/db/exec/scoped_timer.h" +#include "mongo/db/index/index_access_method.h" +#include "mongo/stdx/memory.h" + +namespace mongo { + +using std::unique_ptr; +using stdx::make_unique; + +const char* IndexIteratorStage::kStageType = "INDEX_ITERATOR"; + +IndexIteratorStage::IndexIteratorStage(OperationContext* txn, + WorkingSet* ws, + Collection* collection, + IndexAccessMethod* iam, + BSONObj keyPattern, + unique_ptr<SortedDataInterface::Cursor> cursor) + : PlanStage(kStageType, txn), + _collection(collection), + _ws(ws), + _iam(iam), + _cursor(std::move(cursor)), + _keyPattern(keyPattern.getOwned()) { + invariant(_collection); // It is illegal to use this stage without a collection. +} + +PlanStage::StageState IndexIteratorStage::work(WorkingSetID* out) { + ++_commonStats.works; + + // Adds the amount of time taken by work() to executionTimeMillis. + ScopedTimer timer(&_commonStats.executionTimeMillis); + + if (auto entry = _cursor->next()) { + if (!entry->key.isOwned()) + entry->key = entry->key.getOwned(); + + WorkingSetID id = _ws->allocate(); + WorkingSetMember* member = _ws->get(id); + member->loc = entry->loc; + member->keyData.push_back(IndexKeyDatum(_keyPattern, entry->key, _iam)); + _ws->transitionToLocAndIdx(id); + + *out = id; + ++_commonStats.advanced; + return PlanStage::ADVANCED; + } + + _commonStats.isEOF = true; + return PlanStage::IS_EOF; +} + +bool IndexIteratorStage::isEOF() { + return _commonStats.isEOF; +} + +unique_ptr<PlanStageStats> IndexIteratorStage::getStats() { + return make_unique<PlanStageStats>(_commonStats, STAGE_INDEX_ITERATOR); +} + +} // namespace mongo diff --git a/src/mongo/db/exec/index_iterator.h b/src/mongo/db/exec/index_iterator.h new file mode 100644 index 00000000000..3adb1a33104 --- /dev/null +++ b/src/mongo/db/exec/index_iterator.h @@ -0,0 +1,93 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <memory> +#include <vector> + +#include "mongo/db/exec/plan_stage.h" +#include "mongo/db/exec/plan_stats.h" +#include "mongo/db/storage/sorted_data_interface.h" + +namespace mongo { +class Collection; + +/** + * A simple wrapper around a SortedDataInterface::Cursor, which will advance the cursor until it is + * exhausted. + */ +class IndexIteratorStage final : public PlanStage { +public: + IndexIteratorStage(OperationContext* txn, + WorkingSet* ws, + Collection* collection, + IndexAccessMethod* iam, + BSONObj keyPattern, + std::unique_ptr<SortedDataInterface::Cursor> cursor); + + PlanStage::StageState work(WorkingSetID* out) final; + + bool isEOF() final; + + void doSaveState() final { + _cursor->save(); + } + void doRestoreState() final { + _cursor->restore(); + } + void doDetachFromOperationContext() final { + _cursor->detachFromOperationContext(); + } + void doReattachToOperationContext() final { + _cursor->reattachToOperationContext(getOpCtx()); + } + + std::unique_ptr<PlanStageStats> getStats() final; + + // Not used. + SpecificStats* getSpecificStats() const final { + return nullptr; + } + + StageType stageType() const final { + return STAGE_INDEX_ITERATOR; + } + + static const char* kStageType; + +private: + Collection* _collection; + WorkingSet* _ws; + IndexAccessMethod* _iam; // owned by Collection -> IndexCatalog. + + std::unique_ptr<SortedDataInterface::Cursor> _cursor; + const BSONObj _keyPattern; +}; + +} // namespace mongo diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index b40c2319a8a..d731f9cbfed 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -168,6 +168,11 @@ std::unique_ptr<SortedDataInterface::Cursor> IndexAccessMethod::newCursor(Operat return _newInterface->newCursor(txn, isForward); } +std::unique_ptr<SortedDataInterface::Cursor> IndexAccessMethod::newRandomCursor( + OperationContext* txn) const { + return _newInterface->newRandomCursor(txn); +} + // Remove the provided doc from the index. Status IndexAccessMethod::remove(OperationContext* txn, const BSONObj& obj, diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 5dc88d30d80..df62c4ae937 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -123,6 +123,10 @@ public: */ std::unique_ptr<SortedDataInterface::Cursor> newCursor(OperationContext* txn, bool isForward = true) const; + /** + * Returns a pseudo-random cursor over 'this' index. + */ + std::unique_ptr<SortedDataInterface::Cursor> newRandomCursor(OperationContext* txn) const; // ------ index level operations ------ diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 5005e2ccdb5..27eda2730f1 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -35,17 +35,21 @@ #include "mongo/db/catalog/database.h" #include "mongo/db/catalog/document_validation.h" #include "mongo/db/concurrency/write_conflict_exception.h" +#include "mongo/db/exec/fetch.h" +#include "mongo/db/exec/index_iterator.h" #include "mongo/db/exec/multi_iterator.h" -#include "mongo/db/exec/working_set.h" #include "mongo/db/exec/shard_filter.h" +#include "mongo/db/exec/working_set.h" #include "mongo/db/db_raii.h" #include "mongo/db/dbdirectclient.h" +#include "mongo/db/index/index_access_method.h" #include "mongo/db/pipeline/document_source.h" #include "mongo/db/pipeline/pipeline.h" #include "mongo/db/query/get_executor.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/service_context.h" #include "mongo/db/storage/record_store.h" +#include "mongo/db/storage/sorted_data_interface.h" #include "mongo/db/s/sharded_connection_info.h" #include "mongo/db/s/sharding_state.h" #include "mongo/s/chunk_version.h" @@ -130,15 +134,43 @@ shared_ptr<PlanExecutor> createRandomCursorExecutor(Collection* collection, if (sampleSize > numRecords * kMaxSampleRatioForRandCursor || numRecords <= 100) return {}; - // Attempt to get a random cursor from the storage engine. - auto randCursor = collection->getRecordStore()->getRandomCursor(txn); - - if (!randCursor) - return {}; + // Attempt to get a random cursor from the RecordStore. If the RecordStore does not support + // random cursors, attempt to get one from the _id index. + std::unique_ptr<RecordCursor> rsRandCursor = collection->getRecordStore()->getRandomCursor(txn); auto ws = stdx::make_unique<WorkingSet>(); - auto stage = stdx::make_unique<MultiIteratorStage>(txn, ws.get(), collection); - stage->addIterator(std::move(randCursor)); + std::unique_ptr<PlanStage> stage; + + if (rsRandCursor) { + stage = stdx::make_unique<MultiIteratorStage>(txn, ws.get(), collection); + static_cast<MultiIteratorStage*>(stage.get())->addIterator(std::move(rsRandCursor)); + + } else { + auto indexCatalog = collection->getIndexCatalog(); + auto indexDescriptor = indexCatalog->findIdIndex(txn); + + if (!indexDescriptor) { + // There was no _id index. + return {}; + } + + IndexAccessMethod* idIam = indexCatalog->getIndex(indexDescriptor); + auto idxRandCursor = idIam->newRandomCursor(txn); + + if (!idxRandCursor) { + // Storage engine does not support any type of random cursor. + return {}; + } + + auto idxIterator = stdx::make_unique<IndexIteratorStage>(txn, + ws.get(), + collection, + idIam, + indexDescriptor->keyPattern(), + idxRandCursor.release()); + stage = stdx::make_unique<FetchStage>( + txn, ws.get(), idxIterator.release(), nullptr, collection); + } ShardingState* const shardingState = ShardingState::get(txn); diff --git a/src/mongo/db/query/stage_types.h b/src/mongo/db/query/stage_types.h index 35964248e48..29d16653f72 100644 --- a/src/mongo/db/query/stage_types.h +++ b/src/mongo/db/query/stage_types.h @@ -72,6 +72,10 @@ enum StageType { STAGE_GROUP, STAGE_IDHACK, + + // Simple wrapper to iterate a SortedDataInterface::Cursor. + STAGE_INDEX_ITERATOR, + STAGE_IXSCAN, STAGE_LIMIT, diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript index d1470c8c04e..1215d5afbbd 100644 --- a/src/mongo/db/storage/SConscript +++ b/src/mongo/db/storage/SConscript @@ -88,6 +88,7 @@ env.Library( 'sorted_data_interface_test_harness.cpp', 'sorted_data_interface_test_insert.cpp', 'sorted_data_interface_test_isempty.cpp', + 'sorted_data_interface_test_rand_cursor.cpp', 'sorted_data_interface_test_rollback.cpp', 'sorted_data_interface_test_spaceused.cpp', 'sorted_data_interface_test_touch.cpp', diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp index 9232d3f5910..2ae4b0e38bb 100644 --- a/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp +++ b/src/mongo/db/storage/mmap_v1/btree/btree_interface.cpp @@ -346,6 +346,59 @@ public: return stdx::make_unique<Cursor>(txn, _btree.get(), isForward); } + class RandomCursor final : public SortedDataInterface::Cursor { + public: + RandomCursor(OperationContext* txn, const BtreeLogic<OnDiskFormat>* btree) + : _txn(txn), _btree(btree) {} + + boost::optional<IndexKeyEntry> next(RequestedInfo parts) override { + if (_btree->isEmpty(_txn)) { + return {}; + } + return _btree->getRandomEntry(_txn); + } + + void detachFromOperationContext() final { + _txn = nullptr; + } + + void reattachToOperationContext(OperationContext* txn) final { + _txn = txn; + } + + // + // Should never be called. + // + void setEndPosition(const BSONObj& key, bool inclusive) override { + MONGO_UNREACHABLE; + } + boost::optional<IndexKeyEntry> seek(const BSONObj& key, + bool inclusive, + RequestedInfo parts) override { + MONGO_UNREACHABLE; + } + boost::optional<IndexKeyEntry> seek(const IndexSeekPoint& seekPoint, + RequestedInfo parts) override { + MONGO_UNREACHABLE; + } + + // + // May be called, but are no-ops. + // + void save() override {} + void saveUnpositioned() override {} + void restore() override {} + + private: + OperationContext* _txn; + const BtreeLogic<OnDiskFormat>* const _btree; + }; + + virtual std::unique_ptr<SortedDataInterface::Cursor> newRandomCursor( + OperationContext* txn) const { + return stdx::make_unique<RandomCursor>(txn, _btree.get()); + } + virtual Status initAsEmpty(OperationContext* txn) { return _btree->initAsEmpty(txn); } diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp b/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp index 11e31b3fce7..6e2d19d6bce 100644 --- a/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp +++ b/src/mongo/db/storage/mmap_v1/btree/btree_logic.cpp @@ -30,6 +30,9 @@ #include "mongo/platform/basic.h" +#include <numeric> + +#include "mongo/db/client.h" #include "mongo/db/jsobj.h" #include "mongo/db/operation_context.h" #include "mongo/db/storage/mmap_v1/btree/btree_logic.h" @@ -1911,6 +1914,101 @@ BSONObj BtreeLogic<BtreeLayout>::getKey(OperationContext* txn, } template <class BtreeLayout> +IndexKeyEntry BtreeLogic<BtreeLayout>::getRandomEntry(OperationContext* txn) const { + // To ensure a uniform distribution, all keys must have an equal probability of being selected. + // Specifically, a key from the root should have the same probability of being selected as a key + // from a leaf. + // + // Here we do a random walk until we get to a leaf, storing a random key from each bucket along + // the way down. Because the root is always present in the random walk, but any given leaf would + // seldom be seen, we assign weights to each key such that the key from the leaf is much more + // likely to be selected than the key from the root. These weights attempt to ensure each entry + // is equally likely to be selected and avoid bias towards the entries closer to the root. + // + // As a simplification, we treat all buckets in a given level as having the same number of + // children. While this is inaccurate if the tree isn't perfectly balanced or if key-size + // greatly varies, it is assumed to be good enough for this purpose. + invariant(!isEmpty(txn)); + BucketType* root = getRoot(txn); + + vector<int64_t> nKeysInLevel; + vector<FullKey> selectedKeys; + + auto& prng = txn->getClient()->getPrng(); + + int nRetries = 0; + const int kMaxRetries = 5; + do { + // See documentation below for description of parameters. + recordRandomWalk(txn, &prng, root, 1, &nKeysInLevel, &selectedKeys); + } while (selectedKeys.empty() && nRetries++ < kMaxRetries); + massert(28826, + str::stream() << "index " << _indexName << " may be corrupt, please repair", + !selectedKeys.empty()); + + invariant(nKeysInLevel.size() == selectedKeys.size()); + // Select a key from the random walk such that each key from the B-tree has an equal probability + // of being selected. + // + // Let N be the sum of 'nKeysInLevel'. That is, the total number of keys in the B-tree. + // + // On our walk down the tree, we selected exactly one key from each level of the B-tree, where + // 'selectedKeys[i]' came from the ith level of the tree. On any given level, each key has an + // equal probability of being selected. Specifically, a key on level i has a probability of + // 1/'nKeysInLevel[i]' of being selected as 'selectedKeys[i]'. Then if, given our selected keys, + // we choose to return 'selectedKeys[i]' with a probability of 'nKeysInLevel[i]'/N, that key + // will be returned with a probability of 1/'nKeysInLevel[i]' * 'nKeysInLevel[i]'/N = 1/N. + // + // So 'selectedKeys[i]' should have a probability of 'nKeysInLevel[i]'/N of being returned. We + // will do so by picking a random number X in the range [0, N). Then, if X is in the first + // 'nKeysInLevel[0]' numbers, we will return 'selectedKeys[0]'. If X is in the next + // 'nKeysInLevel[1]' numbers, we will return 'selectedKeys[1]', and so on. + int64_t choice = prng.nextInt64(std::accumulate(nKeysInLevel.begin(), nKeysInLevel.end(), 0)); + for (size_t i = 0; i < nKeysInLevel.size(); i++) { + if (choice < nKeysInLevel[i]) { + return {selectedKeys[i].data.toBson(), selectedKeys[i].header.recordLoc.toRecordId()}; + } + choice -= nKeysInLevel[i]; + } + MONGO_UNREACHABLE; +} + +/** + * Does a random walk through the tree, recording information about the walk along the way. + * + * 'nKeysInLevel' will be filled in such that 'nKeysInLevel[i]' is an approximation of the number of + * keys in the ith level of the B-tree. + * + * 'selectedKeys' will be filled in such that 'selectedKeys[i]' will be a pseudo-random key selected + * from the bucket we went through on the ith level of the B-tree. + */ +template <class BtreeLayout> +void BtreeLogic<BtreeLayout>::recordRandomWalk(OperationContext* txn, + PseudoRandom* prng, + BucketType* curBucket, + int64_t nBucketsInCurrentLevel, + vector<int64_t>* nKeysInLevel, + vector<FullKey>* selectedKeys) const { + // Select a random key from this bucket, and record it. + int nKeys = curBucket->n; + int keyToReturn = prng->nextInt32(nKeys); + auto fullKey = getFullKey(curBucket, keyToReturn); + // If the key is not used, just skip this level. + if (fullKey.header.isUsed()) { + selectedKeys->push_back(std::move(fullKey)); + nKeysInLevel->push_back(nBucketsInCurrentLevel * nKeys); + } + + // Select a random child and descend (if there are any). + int nChildren = nKeys + 1; + int nextChild = prng->nextInt32(nChildren); + if (auto child = childForPos(txn, curBucket, nextChild)) { + recordRandomWalk( + txn, prng, child, nBucketsInCurrentLevel * nChildren, nKeysInLevel, selectedKeys); + } +} + +template <class BtreeLayout> Status BtreeLogic<BtreeLayout>::touch(OperationContext* txn) const { return _recordStore->touch(txn, NULL); } diff --git a/src/mongo/db/storage/mmap_v1/btree/btree_logic.h b/src/mongo/db/storage/mmap_v1/btree/btree_logic.h index 3c742170bcd..4859b476723 100644 --- a/src/mongo/db/storage/mmap_v1/btree/btree_logic.h +++ b/src/mongo/db/storage/mmap_v1/btree/btree_logic.h @@ -41,6 +41,7 @@ namespace mongo { +class PseudoRandom; class RecordStore; class SavedCursorRegistry; @@ -177,6 +178,12 @@ public: BSONObj getKey(OperationContext* txn, const DiskLoc& bucketLoc, const int keyOffset) const; + /** + * Returns a pseudo-random element from the tree. It is an error to call this method if the tree + * is empty. + */ + IndexKeyEntry getRandomEntry(OperationContext* txn) const; + DiskLoc getHead(OperationContext* txn) const { return DiskLoc::fromRecordId(_headManager->getHead(txn)); } @@ -541,6 +548,13 @@ private: DiskLoc getRootLoc(OperationContext* txn) const; + void recordRandomWalk(OperationContext* txn, + PseudoRandom* prng, + BucketType* curBucket, + int64_t nBucketsInCurrentLevel, + std::vector<int64_t>* nKeysInLevel, + std::vector<FullKey>* selectedKeys) const; + // // Data // diff --git a/src/mongo/db/storage/sorted_data_interface.h b/src/mongo/db/storage/sorted_data_interface.h index d6b176f449e..9507339995a 100644 --- a/src/mongo/db/storage/sorted_data_interface.h +++ b/src/mongo/db/storage/sorted_data_interface.h @@ -355,6 +355,24 @@ public: virtual std::unique_ptr<Cursor> newCursor(OperationContext* txn, bool isForward = true) const = 0; + /** + * Constructs a cursor over an index that returns entries in a randomized order, and allows + * storage engines to provide a more efficient way to randomly sample a collection than + * MongoDB's default sampling methods, which are used when this method returns {}. Note if it is + * possible to implement RecordStore::getRandomCursor(), that method is preferred, as it will + * return the entire document, whereas this method will only return the index key and the + * RecordId, requiring an extra lookup. + * + * This method may be implemented using a pseudo-random walk over B-trees or a similar approach. + * Different cursors should return entries in a different order. Random cursors may return the + * same entry more than once and, as a result, may return more entries than exist in the index. + * Implementations should avoid obvious biases toward older, newer, larger smaller or other + * specific classes of entries. + */ + virtual std::unique_ptr<Cursor> newRandomCursor(OperationContext* txn) const { + return {}; + } + // // Index creation // diff --git a/src/mongo/db/storage/sorted_data_interface_test_harness.h b/src/mongo/db/storage/sorted_data_interface_test_harness.h index e90f6606e86..ddc34c6419b 100644 --- a/src/mongo/db/storage/sorted_data_interface_test_harness.h +++ b/src/mongo/db/storage/sorted_data_interface_test_harness.h @@ -36,6 +36,8 @@ #include "mongo/db/jsobj.h" #include "mongo/db/operation_context_noop.h" #include "mongo/db/record_id.h" +#include "mongo/db/service_context.h" +#include "mongo/db/service_context_noop.h" #include "mongo/db/storage/sorted_data_interface.h" #include "mongo/stdx/memory.h" #include "mongo/util/unowned_ptr.h" @@ -84,14 +86,18 @@ class RecoveryUnit; class HarnessHelper { public: - HarnessHelper() {} - virtual ~HarnessHelper() = default; + HarnessHelper() : _serviceContext(), _client(_serviceContext.makeClient("hh")) {} + virtual ~HarnessHelper() {} virtual std::unique_ptr<SortedDataInterface> newSortedDataInterface(bool unique) = 0; virtual std::unique_ptr<RecoveryUnit> newRecoveryUnit() = 0; - virtual std::unique_ptr<OperationContext> newOperationContext() { - return stdx::make_unique<OperationContextNoop>(newRecoveryUnit().release()); + std::unique_ptr<OperationContext> newOperationContext(Client* client) { + return stdx::make_unique<OperationContextNoop>(client, 1, newRecoveryUnit().release()); + } + + std::unique_ptr<OperationContext> newOperationContext() { + return newOperationContext(nullptr); } /** @@ -101,6 +107,18 @@ public: */ std::unique_ptr<SortedDataInterface> newSortedDataInterface( bool unique, std::initializer_list<IndexKeyEntry> toInsert); + + Client* client() { + return _client.get(); + } + + ServiceContext* serviceContext() { + return &_serviceContext; + } + +private: + ServiceContextNoop _serviceContext; + ServiceContext::UniqueClient _client; }; /** diff --git a/src/mongo/db/storage/sorted_data_interface_test_rand_cursor.cpp b/src/mongo/db/storage/sorted_data_interface_test_rand_cursor.cpp new file mode 100644 index 00000000000..123776e1d8c --- /dev/null +++ b/src/mongo/db/storage/sorted_data_interface_test_rand_cursor.cpp @@ -0,0 +1,246 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/sorted_data_interface_test_harness.h" + +#include <memory> + +#include "mongo/db/storage/sorted_data_interface.h" +#include "mongo/unittest/unittest.h" + +using std::unique_ptr; +using std::set; +using std::string; +using std::stringstream; + +namespace mongo { + +// A random iterator should never return any entries from an empty index. +TEST(SortedDataInterface, GetRandomIteratorEmpty) { + unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper()); + unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true)); + + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + ASSERT(sorted->isEmpty(opCtx.get())); + } + + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + auto cursor = sorted->newRandomCursor(opCtx.get()); + if (!cursor) { + // newRandomCursor is not supported. + return; + } + ASSERT(!cursor->next()); + } +} + +// General test for 'randomness' of the cursor. With N entries in the index, we should see at least +// N/4 distinct entries after iterating N - 1 times. +TEST(SortedDataInterface, GetRandomIteratorNonEmpty) { + unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper()); + unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(false)); + + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + ASSERT_EQUALS(0, sorted->numEntries(opCtx.get())); + } + + // Insert enough entries to generate multiple levels of a tree. + const unsigned nToInsert = 5000; + RecordId rids[nToInsert]; + for (unsigned i = 0; i < nToInsert; i++) { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + { + BSONObj data = BSON("" << i); + + WriteUnitOfWork uow(opCtx.get()); + rids[i] = RecordId(42, i * 2); + Status res = sorted->insert(opCtx.get(), data, rids[i], true); + ASSERT_OK(res); + uow.commit(); + } + } + + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get())); + } + + set<RecordId> remain(rids, rids + nToInsert); + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + auto cursor = sorted->newRandomCursor(opCtx.get()); + if (!cursor) { + // newRandomCursor is not supported. + return; + } + + // Iterate documents and mark those visited, but let at least one remain. + for (unsigned i = 0; i < nToInsert - 1; i++) { + // Get a new cursor once in a while, shouldn't affect things. + if (i % (nToInsert / 8) == 0) { + cursor = sorted->newRandomCursor(opCtx.get()); + } + remain.erase(cursor->next()->loc); // Can happen more than once per doc. + } + ASSERT(!remain.empty()); + ASSERT(cursor->next()); + + // We should have at least visited a quarter of the items if we're any random at all + // The expected fraction of visited entrys is 62.3%. + ASSERT_LT(remain.size(), nToInsert * 3 / 4); + } +} + +// With only a single entry in the index, we should always receive that entry via a random cursor. +TEST(SortedDataInterface, GetRandomIteratorSingleton) { + unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper()); + unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true)); + + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + ASSERT(sorted->isEmpty(opCtx.get())); + } + + // Insert one entry. + RecordId idToRetrieve(42, 0); + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + WriteUnitOfWork uow(opCtx.get()); + Status res = sorted->insert(opCtx.get(), BSON("" << 0), idToRetrieve, false); + ASSERT_OK(res); + uow.commit(); + } + + // Double-check that the index has one entry in it now. + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + ASSERT_EQ(1, sorted->numEntries(opCtx.get())); + } + + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + auto cursor = sorted->newRandomCursor(opCtx.get()); + if (!cursor) { + // newRandomCursor is not supported. + return; + } + + // The random cursor should keep returning the single existing entry. + for (int i = 0; i < 10; i++) { + auto entry = cursor->next(); + ASSERT_EQ(entry->loc, idToRetrieve); + } + + // Check deattaching / reattaching. + cursor->save(); + cursor->detachFromOperationContext(); + auto newClient = harnessHelper->serviceContext()->makeClient( + "New client for SortedDataInterface::GetRandomIteratorSingleton"); + auto newOpCtx(harnessHelper->newOperationContext(newClient.get())); + cursor->reattachToOperationContext(newOpCtx.get()); + cursor->restore(); + + auto entry = cursor->next(); + ASSERT_EQ(entry->loc, idToRetrieve); + + // The random cursor should keep returning the single existing entry. + for (int i = 0; i < 10; i++) { + entry = cursor->next(); + ASSERT_EQ(entry->loc, idToRetrieve); + } + } +} + +// With enough samples, we should eventually have returned every document in the tree at least once. +TEST(SortedDataInterface, GetRandomIteratorLargeDocs) { + unique_ptr<HarnessHelper> harnessHelper(newHarnessHelper()); + unique_ptr<SortedDataInterface> sorted(harnessHelper->newSortedDataInterface(true)); + + // Seed the random number generator. + harnessHelper->client()->getPrng() = PseudoRandom(1234); + + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + ASSERT_EQUALS(0, sorted->numEntries(opCtx.get())); + } + + // Insert enough entries to generate multiple levels of a tree. + const unsigned nToInsert = 200; + RecordId rids[nToInsert]; + // Add a large string to the entries, to encourage more branching in the tree. + int strBytes = 500; + std::string keyStr('c', strBytes / sizeof('c')); + for (unsigned i = 0; i < nToInsert; i++) { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + { + BSONObj data = BSON("" << i << "" << keyStr); + + WriteUnitOfWork uow(opCtx.get()); + rids[i] = RecordId(42, i * 2); + Status res = sorted->insert(opCtx.get(), data, rids[i], false); + ASSERT_OK(res); + uow.commit(); + } + } + + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + ASSERT_EQUALS(nToInsert, sorted->numEntries(opCtx.get())); + } + + set<RecordId> remain(rids, rids + nToInsert); + { + auto opCtx(harnessHelper->newOperationContext(harnessHelper->client())); + auto cursor = sorted->newRandomCursor(opCtx.get()); + if (!cursor) { + // newRandomCursor is not supported. + return; + } + + // Iterate the cursor enough times so that there should be a large probability of seeing all + // the documents. + for (unsigned i = 0; i < nToInsert * 15; i++) { + // Get a new cursor once in a while, shouldn't affect things. + if (i % (nToInsert / 8) == 0) { + cursor = sorted->newRandomCursor(opCtx.get()); + } + remain.erase(cursor->next()->loc); // Can happen more than once per doc. + } + // By this point, we should have visited all entries. + ASSERT_EQ(remain.size(), 0UL); + } +} + +} // namespace mongo |