diff options
Diffstat (limited to 'src')
22 files changed, 1194 insertions, 299 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript index 5ea323b5d54..160db497a9d 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -394,7 +394,11 @@ serverOnlyFiles = [ "db/curop.cpp", "db/repl/write_concern.cpp", "db/btreecursor.cpp", "db/index/btree_access_method.cpp", + "db/index/btree_index_cursor.cpp", + "db/index/btree_interface.cpp", "db/index/btree_key_generator.cpp", + "db/index/hash_access_method.cpp", + "db/index/hash_index_cursor.cpp", "db/intervalbtreecursor.cpp", "db/btreeposition.cpp", "db/cloner.cpp", diff --git a/src/mongo/db/btreecursor.cpp b/src/mongo/db/btreecursor.cpp index 166469b32b0..20ca81eb2e9 100644 --- a/src/mongo/db/btreecursor.cpp +++ b/src/mongo/db/btreecursor.cpp @@ -75,7 +75,7 @@ namespace mongo { _independentFieldRanges = false; dassert( d->idxNo((IndexDetails&) indexDetails) == idxNo ); - _indexDescriptor.reset(CatalogHack::getDescriptor(d->idx(idxNo))); + _indexDescriptor.reset(CatalogHack::getDescriptor(d, idxNo)); _indexAM.reset(CatalogHack::getBtreeIndex(_indexDescriptor.get())); IndexCursor *cursor; @@ -102,7 +102,7 @@ namespace mongo { _boundsIterator->advance( startKey ); // handles initialization _boundsIterator->prepDive(); - _indexDescriptor.reset(CatalogHack::getDescriptor(d->idx(idxNo))); + _indexDescriptor.reset(CatalogHack::getDescriptor(d, idxNo)); _indexAM.reset(CatalogHack::getBtreeIndex(_indexDescriptor.get())); IndexCursor *cursor; diff --git a/src/mongo/db/hashindex.cpp b/src/mongo/db/hashindex.cpp index a769dc73a98..f93d56e8e70 100644 --- a/src/mongo/db/hashindex.cpp +++ b/src/mongo/db/hashindex.cpp @@ -1,3 +1,5 @@ +// XXX THIS FILE IS DEPRECATED. PLEASE DON'T MODIFY. + // mongo/db/hashindex.cpp /** diff --git a/src/mongo/db/hashindex.h b/src/mongo/db/hashindex.h index 8c020acf8c0..7451ba307f1 100644 --- a/src/mongo/db/hashindex.h +++ b/src/mongo/db/hashindex.h @@ -1,3 +1,5 @@ +// XXX THIS FILE IS DEPRECATED. PLEASE DON'T MODIFY WITHOUT TALKING TO HK + // hashindex.h /** diff --git a/src/mongo/db/index/btree_access_method.cpp b/src/mongo/db/index/btree_access_method.cpp index ea33289984b..fe7b0df86b4 100644 --- a/src/mongo/db/index/btree_access_method.cpp +++ b/src/mongo/db/index/btree_access_method.cpp @@ -18,19 +18,25 @@ #include <vector> +#include "mongo/base/status.h" #include "mongo/db/index/btree_index_cursor.h" +#include "mongo/db/index/btree_interface.h" +#include "mongo/db/jsobj.h" #include "mongo/db/pdfile.h" #include "mongo/db/pdfile_private.h" namespace mongo { - template <class Key> BtreeBasedAccessMethod<Key>::BtreeBasedAccessMethod( - IndexDescriptor *descriptor) : _descriptor(descriptor), - _ordering(Ordering::make(_descriptor->keyPattern())) { } + BtreeBasedAccessMethod::BtreeBasedAccessMethod(IndexDescriptor *descriptor) + : _descriptor(descriptor), _ordering(Ordering::make(_descriptor->keyPattern())) { + + verify(0 == descriptor->version() || 1 == descriptor->version()); + _interface = BtreeInterface::interfaces[descriptor->version()]; + } // Find the keys for obj, put them in the tree pointing to loc - template <class Key> Status BtreeBasedAccessMethod<Key>::insert(const BSONObj& obj, - const DiskLoc& loc, const InsertDeleteOptions& options, int64_t* numInserted) { + Status BtreeBasedAccessMethod::insert(const BSONObj& obj, const DiskLoc& loc, + const InsertDeleteOptions& options, int64_t* numInserted) { *numInserted = 0; @@ -42,9 +48,8 @@ namespace mongo { for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { try { - _descriptor->getHead().btree<Key>()->bt_insert( - _descriptor->getHead(), loc, *i, _ordering, options.dupsAllowed, - _descriptor->getOnDisk(), true); + _interface->bt_insert(_descriptor->getHead(), loc, *i, _ordering, + options.dupsAllowed, _descriptor->getOnDisk(), true); ++*numInserted; } catch (AssertionException& e) { if (10287 == e.getCode() && options.dupsAllowed) { @@ -69,14 +74,11 @@ namespace mongo { return ret; } - template <class Key> bool BtreeBasedAccessMethod<Key>::removeOneKey(const BSONObj& key, - const DiskLoc& loc) { - + bool BtreeBasedAccessMethod::removeOneKey(const BSONObj& key, const DiskLoc& loc) { bool ret = false; try { - ret = _descriptor->getHead().btree<Key>()->unindex(_descriptor->getHead(), - _descriptor->getOnDisk(), key, loc); + ret = _interface->unindex(_descriptor->getHead(), _descriptor->getOnDisk(), key, loc); } catch (AssertionException& e) { problem() << "Assertion failure: _unindex failed " << _descriptor->indexNamespace() << endl; @@ -91,9 +93,8 @@ namespace mongo { } // Remove the provided doc from the index. - template <class Key> Status BtreeBasedAccessMethod<Key>::remove( - const BSONObj &obj, const DiskLoc& loc, const InsertDeleteOptions &options, - int64_t* numDeleted) { + Status BtreeBasedAccessMethod::remove(const BSONObj &obj, const DiskLoc& loc, + const InsertDeleteOptions &options, int64_t* numDeleted) { BSONObjSet keys; getKeys(obj, &keys); @@ -133,22 +134,22 @@ namespace mongo { } } - template <class Key> Status BtreeBasedAccessMethod<Key>::touch(const BSONObj &obj) { + Status BtreeBasedAccessMethod::touch(const BSONObj& obj) { BSONObjSet keys; getKeys(obj, &keys); for (BSONObjSet::const_iterator i = keys.begin(); i != keys.end(); ++i) { - DiskLoc unusedDiskLoc; int unusedPos; bool unusedFound; - _descriptor->getHead().btree<Key>()->locate(_descriptor->getOnDisk(), - _descriptor->getHead(), *i, _ordering, unusedPos, unusedFound, unusedDiskLoc, 1); + DiskLoc unusedDiskLoc; + _interface->locate(_descriptor->getOnDisk(), _descriptor->getHead(), *i, _ordering, + unusedPos, unusedFound, unusedDiskLoc, 1); } return Status::OK(); } - template <class Key> Status BtreeBasedAccessMethod<Key>::validateUpdate( + Status BtreeBasedAccessMethod::validateUpdate( const BSONObj &from, const BSONObj &to, const DiskLoc &record, const InsertDeleteOptions &options, UpdateTicket* status) { @@ -167,14 +168,14 @@ namespace mongo { // Check for dups. if (!data->added.empty() && _descriptor->unique() && !options.dupsAllowed) { - const BtreeBucket<Key> *head = _descriptor->getHead().btree<Key>(); for (vector<BSONObj*>::iterator i = data->added.begin(); i != data->added.end(); i++) { - typename Key::KeyOwned key(**i); - if (head->wouldCreateDup(_descriptor->getOnDisk(), _descriptor->getHead(), - key, _ordering, record)) { + if (_interface->wouldCreateDup(_descriptor->getOnDisk(), _descriptor->getHead(), + **i, _ordering, record)) { status->_isValid = false; return Status(ErrorCodes::DuplicateKey, - head->dupKeyError(_descriptor->getOnDisk(), key)); + _interface->dupKeyError(_descriptor->getHead(), + _descriptor->getOnDisk(), + **i)); } } } @@ -184,30 +185,30 @@ namespace mongo { return Status::OK(); } - template <class Key> Status BtreeBasedAccessMethod<Key>::update(const UpdateTicket& ticket) { + Status BtreeBasedAccessMethod::update(const UpdateTicket& ticket) { if (!ticket._isValid) { return Status(ErrorCodes::InternalError, "Invalid updateticket in update"); } - BtreeBasedPrivateUpdateData *data = + BtreeBasedPrivateUpdateData* data = static_cast<BtreeBasedPrivateUpdateData*>(ticket._indexSpecificUpdateData.get()); for (size_t i = 0; i < data->added.size(); ++i) { - _descriptor->getHead().btree<Key>()->bt_insert(_descriptor->getHead(), data->loc, - *data->added[i], _ordering, data->dupsAllowed, _descriptor->getOnDisk(), true); + _interface->bt_insert(_descriptor->getHead(), data->loc, *data->added[i], _ordering, + data->dupsAllowed, _descriptor->getOnDisk(), true); } for (size_t i = 0; i < data->removed.size(); ++i) { - _descriptor->getHead().btree<Key>()->unindex(_descriptor->getHead(), - _descriptor->getOnDisk(), *data->removed[i], data->loc); + _interface->unindex(_descriptor->getHead(), _descriptor->getOnDisk(), *data->removed[i], + data->loc); } return Status::OK(); } // Standard Btree implementation below. - template <class Key> BtreeAccessMethod<Key>::BtreeAccessMethod(IndexDescriptor *descriptor) - : BtreeBasedAccessMethod<Key>(descriptor) { + BtreeAccessMethod::BtreeAccessMethod(IndexDescriptor* descriptor) + : BtreeBasedAccessMethod(descriptor) { // The key generation wants these values. vector<const char*> fieldNames; @@ -231,20 +232,13 @@ namespace mongo { } } - template <class Key> void BtreeAccessMethod<Key>::getKeys(const BSONObj &obj, - BSONObjSet *keys) { + void BtreeAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { _keyGenerator->getKeys(obj, keys); } - template <class Key> Status BtreeAccessMethod<Key>::newCursor(IndexCursor **out) { - *out = new BtreeIndexCursor<Key>(_descriptor, _ordering); + Status BtreeAccessMethod::newCursor(IndexCursor** out) { + *out = new BtreeIndexCursor(_descriptor, _ordering, _interface); return Status::OK(); } - template class BtreeBasedAccessMethod<V0>; - template class BtreeBasedAccessMethod<V1>; - - template class BtreeAccessMethod<V0>; - template class BtreeAccessMethod<V1>; - } // namespace mongo diff --git a/src/mongo/db/index/btree_access_method.h b/src/mongo/db/index/btree_access_method.h index 96f67e71e59..d8c011a2b10 100644 --- a/src/mongo/db/index/btree_access_method.h +++ b/src/mongo/db/index/btree_access_method.h @@ -27,13 +27,21 @@ namespace mongo { + class BtreeInterface; class IndexCursor; class IndexDescriptor; - template <class Key> class BtreeAccessMethod : public BtreeBasedAccessMethod<Key> { + /** + * The IndexAccessMethod for a Btree index. + * Any index created with {field: 1} or {field: -1} uses this. + */ + class BtreeAccessMethod : public BtreeBasedAccessMethod { public: - using BtreeBasedAccessMethod<Key>::_descriptor; - using BtreeBasedAccessMethod<Key>::_ordering; + // Every Btree-based index needs these. We put them in the BtreeBasedAccessMethod + // superclass and subclasses (like this) can use them. + using BtreeBasedAccessMethod::_descriptor; + using BtreeBasedAccessMethod::_interface; + using BtreeBasedAccessMethod::_ordering; BtreeAccessMethod(IndexDescriptor* descriptor); virtual ~BtreeAccessMethod() { } diff --git a/src/mongo/db/index/btree_access_method_internal.h b/src/mongo/db/index/btree_access_method_internal.h index 4d2c61bd6a6..89c6ba57e5b 100644 --- a/src/mongo/db/index/btree_access_method_internal.h +++ b/src/mongo/db/index/btree_access_method_internal.h @@ -21,6 +21,7 @@ #include "mongo/bson/ordering.h" #include "mongo/db/diskloc.h" #include "mongo/db/jsobj.h" +#include "mongo/db/index/btree_interface.h" #include "mongo/db/index/index_access_method.h" #include "mongo/db/index/index_cursor.h" #include "mongo/db/index/index_descriptor.h" @@ -35,7 +36,7 @@ namespace mongo { * 2. override newCursor, and * 3. override getKeys. */ - template <class Key> class BtreeBasedAccessMethod : public IndexAccessMethod { + class BtreeBasedAccessMethod : public IndexAccessMethod { public: BtreeBasedAccessMethod(IndexDescriptor *descriptor); virtual ~BtreeBasedAccessMethod() { } @@ -72,19 +73,29 @@ namespace mongo { virtual void getKeys(const BSONObj &obj, BSONObjSet *keys) = 0; - IndexDescriptor *_descriptor; + IndexDescriptor* _descriptor; Ordering _ordering; + // There are 2 types of Btree disk formats. We put them both behind one interface. + BtreeInterface* _interface; + private: bool removeOneKey(const BSONObj& key, const DiskLoc& loc); }; - template <class Key> class BtreeBasedAccessMethod<Key>::BtreeBasedPrivateUpdateData + /** + * What data do we need to perform an update? + */ + class BtreeBasedAccessMethod::BtreeBasedPrivateUpdateData : public UpdateTicket::PrivateUpdateData { public: virtual ~BtreeBasedPrivateUpdateData() { } + BSONObjSet oldKeys, newKeys; + + // These point into the sets oldKeys and newKeys. vector<BSONObj*> removed, added; + DiskLoc loc; bool dupsAllowed; }; diff --git a/src/mongo/db/index/btree_index_cursor.cpp b/src/mongo/db/index/btree_index_cursor.cpp new file mode 100644 index 00000000000..0c57ad6768e --- /dev/null +++ b/src/mongo/db/index/btree_index_cursor.cpp @@ -0,0 +1,231 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/db/index/btree_index_cursor.h" + +#include <vector> + +#include "mongo/base/status.h" +#include "mongo/db/diskloc.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/index/index_cursor.h" +#include "mongo/db/index/index_descriptor.h" + +namespace mongo { + + // Go forward by default. + BtreeIndexCursor::BtreeIndexCursor(IndexDescriptor *descriptor, Ordering ordering, + BtreeInterface *interface) + : _direction(1), _descriptor(descriptor), _ordering(ordering), _interface(interface), + _bucket(descriptor->getHead()), _keyOffset(0) { } + + bool BtreeIndexCursor::isEOF() const { return _bucket.isNull(); } + + // XXX SHORT TERM HACKS THAT MUST DIE: 2d index + DiskLoc BtreeIndexCursor::getBucket() const { return _bucket; } + + // XXX SHORT TERM HACKS THAT MUST DIE: 2d index + int BtreeIndexCursor::getKeyOfs() const { return _keyOffset; } + + // XXX SHORT TERM HACKS THAT MUST DIE: btree deletion + void BtreeIndexCursor::aboutToDeleteBucket(const DiskLoc& bucket) { + if (bucket == _bucket) { + _keyOffset = -1; + } + } + + Status BtreeIndexCursor::setOptions(const CursorOptions& options) { + if (CursorOptions::DECREASING == options.direction) { + _direction = -1; + } else { + _direction = 1; + } + return Status::OK(); + } + + Status BtreeIndexCursor::seek(const BSONObj& position) { + // Unused out parameter. + bool found; + + _bucket = _interface->locate( + _descriptor->getOnDisk(), + _descriptor->getHead(), + position, + _ordering, + _keyOffset, + found, + 1 == _direction ? minDiskLoc : maxDiskLoc, + _direction); + + skipUnusedKeys(); + + return Status::OK(); + } + + Status BtreeIndexCursor::seek(const vector<const BSONElement*>& position, + const vector<bool>& inclusive) { + pair<DiskLoc, int> ignored; + + // Bucket is modified by customLocate. Seeks start @ the root, so we set _bucket to the + // root here. + _bucket = _descriptor->getHead(); + + _interface->customLocate( + _bucket, + _keyOffset, + _emptyObj, + 0, + false, + position, + inclusive, + _ordering, + (int)_direction, + ignored); + + skipUnusedKeys(); + + return Status::OK(); + } + + Status BtreeIndexCursor::skip(const vector<const BSONElement*>& position, + const vector<bool>& inclusive) { + _interface->advanceTo( + _bucket, + _keyOffset, + _emptyObj, + 0, + false, + position, + inclusive, + _ordering, + (int)_direction); + + skipUnusedKeys(); + + return Status::OK(); + } + + BSONObj BtreeIndexCursor::getKey() const { + verify(!_bucket.isNull()); + return _interface->keyAt(_bucket, _keyOffset); + } + + DiskLoc BtreeIndexCursor::getValue() const { + verify(!_bucket.isNull()); + return _interface->recordAt(_bucket, _keyOffset); + } + + void BtreeIndexCursor::next() { advance("BtreeIndexCursor::next"); skipUnusedKeys(); } + + Status BtreeIndexCursor::savePosition() { + if (!isEOF()) { + _savedKey = getKey().getOwned(); + _savedLoc = getValue(); + return Status::OK(); + } else { + return Status(ErrorCodes::IllegalOperation, "Can't save position when EOF"); + } + } + + Status BtreeIndexCursor::restorePosition() { + // _keyOffset could be -1 if the bucket was deleted. When buckets are deleted, the + // Btree calls a clientcursor function that calls down to all BTree buckets. Really, + // this deletion thing should be kept BTree-internal. + if (_keyOffset >= 0) { + verify(!_savedKey.isEmpty()); + + try { + if (isSavedPositionValid()) { return Status::OK(); } + if (_keyOffset > 0) { + --_keyOffset; + // "we check one key earlier too, in case a key was just deleted. this is + // important so that multi updates are reasonably fast." -- btreecursor.cpp + if (isSavedPositionValid()) { return Status::OK(); } + } + // Object isn't at the saved position. Fall through to calling seek. + } catch (UserException& e) { + // deletedBucketCode is what keyAt throws if the bucket was deleted. Not a + // problem... + if (BtreeInterface::deletedBucketCode != e.getCode()) { + return e.toStatus(); + } + // Our bucket was deleted, so we look for the saved key. + DEV log() << "debug info: bucket was deleted" << endl; + } + } + + // Our old position was invalidated (keyOfs was set to -1) or our saved position + // is no longer valid, so we must re-find. + RARELY log() << "key seems to have moved in the index, refinding. " + << _bucket.toString() << endl; + + bool found; + + // Why don't we just call seek? Because we want to pass _savedLoc. + _bucket = _interface->locate( + _descriptor->getOnDisk(), + _descriptor->getHead(), + _savedKey, + _ordering, + _keyOffset, + found, + _savedLoc, + _direction); + + skipUnusedKeys(); + + return Status::OK(); + } + + string BtreeIndexCursor::toString() { return "I AM A BTREE INDEX CURSOR!\n"; } + + void BtreeIndexCursor::skipUnusedKeys() { + int skipped = 0; + + while (!isEOF() && !_interface->keyIsUsed(_bucket, _keyOffset)) { + advance("BtreeIndexCursor::skipUnusedKeys"); + ++skipped; + } + + if (skipped > 10) { + OCCASIONALLY log() << "btree unused skipped: " << skipped << endl; + } + } + + bool BtreeIndexCursor::isSavedPositionValid() { + // We saved the key. If it's in the same position we saved it from... + if (_interface->keyAt(_bucket, _keyOffset).binaryEqual(_savedKey)) { + // And the record it points to is the same record... + if (_interface->recordAt(_bucket, _keyOffset) == _savedLoc) { + // Success! We found it. However! + if (!_interface->keyIsUsed(_bucket, _keyOffset)) { + // We could have been deleted but still exist as a "vacant" key, so skip + // over any unused keys. + skipUnusedKeys(); + } + return true; + } + } + + return false; + } + + // Move to the next/prev. key. Used by normal getNext and also skipping unused keys. + void BtreeIndexCursor::advance(const char* caller) { + _bucket = _interface->advance(_bucket, _keyOffset, _direction, caller); + } + +} // namespace mongo diff --git a/src/mongo/db/index/btree_index_cursor.h b/src/mongo/db/index/btree_index_cursor.h index 9c24f8fe7e8..77deeeb813c 100644 --- a/src/mongo/db/index/btree_index_cursor.h +++ b/src/mongo/db/index/btree_index_cursor.h @@ -14,280 +14,83 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#pragma once + #include <vector> + #include "mongo/base/status.h" -#include "mongo/db/btree.h" #include "mongo/db/diskloc.h" #include "mongo/db/jsobj.h" +#include "mongo/db/index/btree_interface.h" #include "mongo/db/index/index_cursor.h" #include "mongo/db/index/index_descriptor.h" namespace mongo { - template <class Key> class BtreeIndexCursor : public IndexCursor { + class BtreeIndexCursor : public IndexCursor { public: - // The in-memory wrapper, not part of Btree storage. - typedef typename BucketBasics<Key>::KeyNode KeyNode; - // The on-disk data. - typedef typename Key::_KeyNode OnDiskKeyNode; - virtual ~BtreeIndexCursor() { } - bool isEOF() const { return _bucket.isNull(); } + bool isEOF() const; // XXX SHORT TERM HACKS THAT MUST DIE: 2d index - virtual DiskLoc getBucket() const { return _bucket; } + virtual DiskLoc getBucket() const; // XXX SHORT TERM HACKS THAT MUST DIE: 2d index - virtual int getKeyOfs() const { return _keyOffset; } + virtual int getKeyOfs() const; // XXX SHORT TERM HACKS THAT MUST DIE: btree deletion - virtual void aboutToDeleteBucket(const DiskLoc& bucket) { - if (bucket == _bucket) { - _keyOffset = -1; - } - } - - virtual Status setOptions(const CursorOptions& options) { - if (CursorOptions::DECREASING == options.direction) { - _direction = -1; - } else { - _direction = 1; - } - return Status::OK(); - } - - virtual Status seek(const BSONObj &position) { - // Unused out parameter. - bool found; - - _bucket = _descriptor->getHead().btree<Key>()->locate( - _descriptor->getOnDisk(), - _descriptor->getHead(), - position, - _ordering, - _keyOffset, - found, - 1 == _direction ? minDiskLoc : maxDiskLoc, - _direction); - - skipUnusedKeys(); - - return Status::OK(); - } - - virtual Status seek(const vector<const BSONElement*> &position, - const vector<bool> &inclusive) { - pair<DiskLoc, int> ignored; - - // Bucket is modified by customLocate. Seeks start @ the root, so we set _bucket to the - // root here. - _bucket = _descriptor->getHead(); + virtual void aboutToDeleteBucket(const DiskLoc& bucket); - _descriptor->getHead().btree<Key>()->customLocate( - _bucket, - _keyOffset, - _emptyObj, - 0, - false, - position, - inclusive, - _ordering, - (int)_direction, - ignored); + virtual Status setOptions(const CursorOptions& options); - skipUnusedKeys(); + virtual Status seek(const BSONObj& position); - return Status::OK(); - } + virtual Status seek(const vector<const BSONElement*>& position, + const vector<bool>& inclusive); - virtual Status skip(const vector<const BSONElement*> &position, - const vector<bool> &inclusive) { - _bucket.btree<Key>()->advanceTo( - _bucket, - _keyOffset, - _emptyObj, - 0, - false, - position, - inclusive, - _ordering, - (int)_direction); + virtual Status skip(const vector<const BSONElement*>& position, + const vector<bool>& inclusive); - skipUnusedKeys(); + virtual BSONObj getKey() const; + virtual DiskLoc getValue() const; + virtual void next(); - return Status::OK(); - } + virtual Status savePosition(); - virtual BSONObj getKey() const { return currKey(); } - virtual DiskLoc getValue() const { return currLoc(); } - virtual void next() { advance("BtreeIndexCursor::next"); skipUnusedKeys(); } + virtual Status restorePosition(); - virtual Status savePosition() { - if (!isEOF()) { - _savedKey = currKey().getOwned(); - _savedLoc = currLoc(); - return Status::OK(); - } else { - return Status(ErrorCodes::IllegalOperation, "Can't save position when EOF"); - } - } - - virtual Status restorePosition() { - // _keyOffset could be -1 if the bucket was deleted. When buckets are deleted, the - // Btree calls a clientcursor function that calls down to all BTree buckets. Really, - // this deletion thing should be kept BTree-internal. - if (_keyOffset >= 0) { - verify(!_savedKey.isEmpty()); - - try { - if (isSavedPositionValid()) { return Status::OK(); } - if (_keyOffset > 0) { - --_keyOffset; - // "we check one key earlier too, in case a key was just deleted. this is - // important so that multi updates are reasonably fast." -- btreecursor.cpp - if (isSavedPositionValid()) { return Status::OK(); } - } - // Object isn't at the saved position. Fall through to calling seek. - } catch (UserException& e) { - // deletedBucketCode is what keyAt throws if the bucket was deleted. Not a - // problem... - if (deletedBucketCode != e.getCode()) { - return e.toStatus(); - } - // Our bucket was deleted, so we look for the saved key. - DEV log() << "debug info: bucket was deleted" << endl; - } - } - - // Our old position was invalidated (keyOfs was set to -1) or our saved position - // is no longer valid, so we must re-find. - RARELY log() << "key seems to have moved in the index, refinding. " - << _bucket.toString() << endl; - - bool found; - - // Why don't we just call seek? Because we want to pass _savedLoc. - _bucket = _descriptor->getHead().btree<Key>()->locate( - _descriptor->getOnDisk(), - _descriptor->getHead(), - _savedKey, - _ordering, - _keyOffset, - found, - _savedLoc, - _direction); - - skipUnusedKeys(); - - return Status::OK(); - } - - virtual string toString() { return "I AM A BTREE INDEX CURSOR!\n"; } + virtual string toString(); private: // We keep the constructor private and only allow the AM to create us. - friend class BtreeAccessMethod<V0>; - friend class BtreeAccessMethod<V1>; + friend class BtreeAccessMethod; // Go forward by default. - BtreeIndexCursor(IndexDescriptor *descriptor, Ordering ordering) - : _direction(1), _descriptor(descriptor), _ordering(ordering) { - _keyOffset = 0; - _bucket = descriptor->getHead(); - } - - void skipUnusedKeys() { - int skipped = 0; + BtreeIndexCursor(IndexDescriptor *descriptor, Ordering ordering, BtreeInterface *interface); - while (!isEOF() && !onDiskKeyNodeAt(_bucket, _keyOffset).isUsed()) { - advance("BtreeIndexCursor::skipUnusedKeys"); - ++skipped; - } + void skipUnusedKeys(); - if (skipped > 10) { - OCCASIONALLY log() << "btree unused skipped: " << skipped << endl; - } - } - - bool isSavedPositionValid() { - // We saved the key. If it's in the same position we saved it from... - if (keyAt(_bucket, _keyOffset).binaryEqual(_savedKey)) { - const OnDiskKeyNode& kn = onDiskKeyNodeAt(_bucket, _keyOffset); - // And the record it points to is the same record... - if (kn.recordLoc == _savedLoc) { - // Success! We found it. However! - if (!kn.isUsed()) { - // We could have been deleted but still exist as a "vacant" key, so skip - // over any unused keys. - skipUnusedKeys(); - } - return true; - } - } - - return false; - } - - // Get the on-disk representation of the key at (bucket, keyOffset) - const OnDiskKeyNode& onDiskKeyNodeAt(DiskLoc bucket, int keyOffset) const { - return bucket.btree<Key>()->k(keyOffset); - } - - static const int deletedBucketCode = 16738; - - // Get the BSON value of the key that's at (bucket, ofs) - // If there is no such key, return a BSONObj(). - BSONObj keyAt(DiskLoc bucket, int ofs) const { - verify(!bucket.isNull()); - const BtreeBucket<Key> *b = bucket.btree<Key>(); - int n = b->getN(); - if (n == b->INVALID_N_SENTINEL) { - throw UserException(deletedBucketCode, "keyAt bucket deleted"); - } - dassert( n >= 0 && n < 10000 ); - return ofs >= n ? BSONObj() : b->keyNode(ofs).key.toBson(); - } + bool isSavedPositionValid(); // Move to the next/prev. key. Used by normal getNext and also skipping unused keys. - void advance(const char* caller) { - _bucket = _bucket.btree<Key>()->advance(_bucket, _keyOffset, _direction, caller); - } - - // Accessors for our current state. - const KeyNode getKeyNode() const { - verify(!_bucket.isNull()); - const BtreeBucket<Key> *b = _bucket.btree<Key>(); - return b->keyNode(_keyOffset); - } - - const BSONObj currKey() const { - verify(!_bucket.isNull()); - return getKeyNode().key.toBson(); - } - - DiskLoc currLoc() const { - verify(!_bucket.isNull()); - return getKeyNode().recordLoc; - } + void advance(const char* caller); // For saving/restoring position. BSONObj _savedKey; DiskLoc _savedLoc; - // What are we looking at RIGHT NOW? We look at a bucket. - DiskLoc _bucket; - // And we look at an offset in the bucket. - int _keyOffset; - BSONObj _emptyObj; int _direction; - IndexDescriptor *_descriptor; + IndexDescriptor* _descriptor; Ordering _ordering; - }; + BtreeInterface* _interface; - template class BtreeIndexCursor<V0>; - template class BtreeIndexCursor<V1>; + // What are we looking at RIGHT NOW? We look at a bucket. + DiskLoc _bucket; + // And we look at an offset in the bucket. + int _keyOffset; + }; -} // namespace +} // namespace mongo diff --git a/src/mongo/db/index/btree_interface.cpp b/src/mongo/db/index/btree_interface.cpp new file mode 100644 index 00000000000..952100958fd --- /dev/null +++ b/src/mongo/db/index/btree_interface.cpp @@ -0,0 +1,169 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/db/btree.h" +#include "mongo/db/index/btree_interface.h" + +namespace mongo { + + template <class Version> + class BtreeInterfaceImpl : public BtreeInterface { + public: + // typedef typename BucketBasics<Version>::VersionNode VersionNode; + + virtual ~BtreeInterfaceImpl() { } + + virtual int bt_insert(const DiskLoc thisLoc, + const DiskLoc recordLoc, + const BSONObj& key, + const Ordering &order, + bool dupsAllowed, + IndexDetails& idx, + bool toplevel) const { + // FYI: toplevel has a default value of true in btree.h + return thisLoc.btree<Version>()->bt_insert( + thisLoc, + recordLoc, + key, + order, + dupsAllowed, + idx, + toplevel); + } + + virtual bool unindex(const DiskLoc thisLoc, + IndexDetails& id, + const BSONObj& key, + const DiskLoc recordLoc) const { + return thisLoc.btree<Version>()->unindex(thisLoc, id, key, recordLoc); + } + + virtual DiskLoc locate(const IndexDetails& idx, + const DiskLoc& thisLoc, + const BSONObj& key, + const Ordering& order, + int& pos, + bool& found, + const DiskLoc& recordLoc, + int direction) const { + // FYI: direction has a default of 1 + return thisLoc.btree<Version>()->locate( + idx, + thisLoc, + key, + order, + pos, + found, + recordLoc, + direction); + } + + virtual bool wouldCreateDup(const IndexDetails& idx, + const DiskLoc& thisLoc, + const BSONObj& key, + const Ordering& order, + const DiskLoc& self) const { + typename Version::KeyOwned ownedVersion(key); + return thisLoc.btree<Version>()->wouldCreateDup( + idx, + thisLoc, + ownedVersion, + order, + self); + } + + virtual void customLocate(DiskLoc& locInOut, + int& keyOfs, + const BSONObj& keyBegin, + int keyBeginLen, bool afterVersion, + const vector<const BSONElement*>& keyEnd, + const vector<bool>& keyEndInclusive, + const Ordering& order, + int direction, + pair<DiskLoc, int>& bestParent) { + locInOut.btree<Version>()->customLocate( + locInOut, + keyOfs, + keyBegin, + keyBeginLen, + afterVersion, + keyEnd, + keyEndInclusive, + order, + direction, + bestParent); + } + + virtual void advanceTo(DiskLoc &thisLoc, + int &keyOfs, + const BSONObj &keyBegin, + int keyBeginLen, + bool afterVersion, + const vector<const BSONElement*>& keyEnd, + const vector<bool>& keyEndInclusive, + const Ordering& order, int direction) const { + thisLoc.btree<Version>()->advanceTo( + thisLoc, + keyOfs, + keyBegin, + keyBeginLen, + afterVersion, + keyEnd, + keyEndInclusive, + order, + direction); + } + + + virtual bool keyIsUsed(DiskLoc bucket, int keyOffset) const { + return bucket.btree<Version>()->k(keyOffset).isUsed(); + } + + virtual BSONObj keyAt(DiskLoc bucket, int keyOffset) const { + verify(!bucket.isNull()); + const BtreeBucket<Version> *b = bucket.btree<Version>(); + int n = b->getN(); + if (n == b->INVALID_N_SENTINEL) { + throw UserException(deletedBucketCode, "keyAt bucket deleted"); + } + dassert( n >= 0 && n < 10000 ); + return keyOffset >= n ? BSONObj() : b->keyNode(keyOffset).key.toBson(); + } + + virtual DiskLoc recordAt(DiskLoc bucket, int keyOffset) const { + const BtreeBucket<Version> *b = bucket.btree<Version>(); + return b->keyNode(keyOffset).recordLoc; + } + + virtual string dupKeyError(DiskLoc bucket, const IndexDetails &idx, + const BSONObj& keyObj) const { + typename Version::KeyOwned key(keyObj); + return bucket.btree<Version>()->dupKeyError(idx, key); + } + + virtual DiskLoc advance(const DiskLoc& thisLoc, + int& keyOfs, + int direction, + const char* caller) const { + return thisLoc.btree<Version>()->advance(thisLoc, keyOfs, direction, caller); + } + }; + + BtreeInterfaceImpl<V0> interface_v0; + BtreeInterfaceImpl<V1> interface_v1; + BtreeInterface* BtreeInterface::interfaces[] = { &interface_v0, &interface_v1 }; + +} // namespace mongo diff --git a/src/mongo/db/index/btree_interface.h b/src/mongo/db/index/btree_interface.h new file mode 100644 index 00000000000..e9c40e55ada --- /dev/null +++ b/src/mongo/db/index/btree_interface.h @@ -0,0 +1,119 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include "mongo/db/btree.h" +#include "mongo/db/diskloc.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + + /** + * We have two Btree on-disk formats which support identical operations. We hide this as much + * as possible by having one implementation of this interface per format. + * + * For documentation on all of the methods here, look at mongo/db/btree.h + */ + class BtreeInterface { + public: + virtual ~BtreeInterface() { } + + static BtreeInterface *interfaces[]; + + // This is the # of the exception that is thrown if we're trying to access a bucket that + // was deleted. Calling code needs to be able to recognize this and possibly ignore it. + static const int deletedBucketCode = 16738; + + virtual int bt_insert(const DiskLoc thisLoc, + const DiskLoc recordLoc, + const BSONObj& key, + const Ordering &order, + bool dupsAllowed, + IndexDetails& idx, + bool toplevel = true) const = 0; + + virtual bool unindex(const DiskLoc thisLoc, + IndexDetails& id, + const BSONObj& key, + const DiskLoc recordLoc) const = 0; + + virtual DiskLoc locate(const IndexDetails& idx, + const DiskLoc& thisLoc, + const BSONObj& key, + const Ordering& order, + int& pos, + bool& found, + const DiskLoc& recordLoc, + int direction = 1) const = 0; + + virtual bool wouldCreateDup(const IndexDetails& idx, + const DiskLoc& thisLoc, + const BSONObj& key, + const Ordering& order, + const DiskLoc& self) const = 0; + + virtual void customLocate(DiskLoc& locInOut, + int& keyOfs, + const BSONObj& keyBegin, + int keyBeginLen, bool afterKey, + const vector<const BSONElement*>& keyEnd, + const vector<bool>& keyEndInclusive, + const Ordering& order, + int direction, + pair<DiskLoc, int>& bestParent) = 0 ; + + virtual void advanceTo(DiskLoc &thisLoc, + int &keyOfs, + const BSONObj &keyBegin, + int keyBeginLen, + bool afterKey, + const vector<const BSONElement*>& keyEnd, + const vector<bool>& keyEndInclusive, + const Ordering& order, int direction) const = 0; + + virtual string dupKeyError(DiskLoc bucket, + const IndexDetails &idx, + const BSONObj& keyObj) const =0; + + virtual DiskLoc advance(const DiskLoc& thisLoc, + int& keyOfs, + int direction, + const char* caller) const = 0; + + /** + * These methods are here so that the BtreeCursor doesn't need to do any templating for the + * two on-disk formats. + */ + + /** + * Is the key at (bucket, keyOffset) being used or not? + * Some keys are marked as not used and skipped. + */ + virtual bool keyIsUsed(DiskLoc bucket, int keyOffset) const = 0; + + /** + * Get the BSON representation of the key at (bucket, keyOffset). + */ + virtual BSONObj keyAt(DiskLoc bucket, int keyOffset) const = 0; + + /** + * Get the DiskLoc that the key at (bucket, keyOffset) points at. + */ + virtual DiskLoc recordAt(DiskLoc bucket, int keyOffset) const = 0; + }; + +} // namespace mongo diff --git a/src/mongo/db/index/catalog_hack.h b/src/mongo/db/index/catalog_hack.h index 541904668f2..c45cc6b0d60 100644 --- a/src/mongo/db/index/catalog_hack.h +++ b/src/mongo/db/index/catalog_hack.h @@ -14,17 +14,37 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "mongo/db/keypattern.h" #include "mongo/db/index/btree_access_method.h" +#include "mongo/db/index/hash_access_method.h" #include "mongo/db/index/index_access_method.h" #include "mongo/db/index/index_descriptor.h" +#include "mongo/db/keypattern.h" namespace mongo { + /** + * Fake some catalog behavior until the catalog is finalized. + */ class CatalogHack { public: - static IndexDescriptor* getDescriptor(IndexDetails &id) { - return new IndexDescriptor(&id, id.info.obj()); + static IndexDescriptor* getDescriptor(NamespaceDetails* nsd, int idxNo) { + IndexDetails& id = nsd->idx(idxNo); + return new IndexDescriptor(nsd, idxNo, &id, id.info.obj()); + } + + static bool isIndexMigrated(const BSONObj& keyPattern) { + string type = KeyPattern::findPluginName(keyPattern); + return "hashed" == type; + } + + static IndexAccessMethod* getSpecialIndex(IndexDescriptor* desc) { + string type = KeyPattern::findPluginName(desc->keyPattern()); + if ("hashed" == type) { + return new HashAccessMethod(desc); + } else { + verify(0); + return NULL; + } } // The IndexDetails passed in might refer to a Btree-backed index that is not a proper Btree @@ -32,14 +52,7 @@ namespace mongo { // for the backed index; it wants to talk Btree directly. So BtreeCursor always asks for a // Btree index. static IndexAccessMethod* getBtreeIndex(IndexDescriptor* desc) { - if (0 == desc->version()) { - return new BtreeAccessMethod<V0>(desc); - } else if (1 == desc->version()) { - return new BtreeAccessMethod<V1>(desc); - } else { - verify(0); - return NULL; - } + return new BtreeAccessMethod(desc); } }; diff --git a/src/mongo/db/index/emulated_cursor.h b/src/mongo/db/index/emulated_cursor.h new file mode 100644 index 00000000000..a1ecbc282c5 --- /dev/null +++ b/src/mongo/db/index/emulated_cursor.h @@ -0,0 +1,156 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include <boost/scoped_ptr.hpp> +#include <set> + +#include "mongo/db/cursor.h" +#include "mongo/db/index/index_access_method.h" +#include "mongo/db/index/index_descriptor.h" +#include "mongo/db/index/index_cursor.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + + /** + * This class is a crutch to help migrate from the old everything-is-a-Cursor world to the new + * index API world. It wraps a new IndexCursor in the old Cursor. We only use this for special + * (2d, 2dsphere, text, haystack, hash) indices. + */ + class EmulatedCursor : public Cursor { + public: + /** + * Takes ownership of the provided indexAccessMethod indexCursor. + * Takes ownership of the IndexDescriptor inside the indexAccessMethod. + */ + EmulatedCursor(IndexDescriptor* descriptor, IndexAccessMethod* indexAccessMethod, + const BSONObj& query, const BSONObj& order, int numWanted) + : _descriptor(descriptor), _indexAccessMethod(indexAccessMethod) { + + IndexCursor *cursor; + indexAccessMethod->newCursor(&cursor); + _indexCursor.reset(cursor); + _indexCursor->seek(query); + + if (!_indexCursor->isEOF()) { + _nscanned = 1; + } else { + _nscanned = 0; + } + + if ("hashed" == KeyPattern::findPluginName(descriptor->keyPattern())) { + // Quoted from hashindex.cpp: + // Force a match of the query against the actual document by giving + // the cursor a matcher with an empty indexKeyPattern. This ensures the + // index is not used as a covered index. + // NOTE: this forcing is necessary due to potential hash collisions + _matcher = shared_ptr<CoveredIndexMatcher>( + new CoveredIndexMatcher(query, BSONObj())); + _supportYields = true; + _supportGetMore = true; + _modifiedKeys = true; + } else { + verify(0); + } + + checkMultiKeyProperties(); + } + + // We defer everything we can to the underlying cursor. + virtual bool ok() { return !_indexCursor->isEOF(); } + virtual Record* _current() { return currLoc().rec(); } + virtual BSONObj current() { return BSONObj::make(_current()); } + virtual DiskLoc currLoc() { return _indexCursor->getValue(); } + virtual BSONObj currKey() const { return _indexCursor->getKey(); } + virtual DiskLoc refLoc() { return currLoc(); } + virtual long long nscanned() { return _nscanned; } + virtual string toString() { return _indexCursor->toString(); } + + virtual bool advance() { + _indexCursor->next(); + if (ok()) { + ++_nscanned; + } + return ok(); + } + + virtual void noteLocation() { + verify(_supportYields); + _indexCursor->savePosition(); + } + + virtual void checkLocation() { + verify(_supportYields); + _indexCursor->restorePosition(); + checkMultiKeyProperties(); + } + + // Below this is where the Cursor <---> IndexCursor mapping breaks down. + virtual CoveredIndexMatcher* matcher() const { + return _matcher.get(); + } + + // XXX: this is true for everything but '2d'. + virtual bool supportGetMore() { return _supportGetMore; } + + // XXX: this is true for everything but '2d'. + virtual bool supportYields() { return _supportYields; } + + // XXX: true for 2dsphere + // XXX: false for 2d, hash + // XXX: I think it's correct yet slow to leave as 'true'. + virtual bool isMultiKey() const { return _isMultiKey; } + + // XXX: currently is set to 'true' for 2d and 2dsphere. + // XXX: default is false, though hash doesn't set it to true (?) + // XXX: I think it's correct yet slow to leave as 'true'. + virtual bool modifiedKeys() const { return _modifiedKeys; } + + virtual bool getsetdup(DiskLoc loc) { + if (_shouldGetSetDup) { + pair<set<DiskLoc>::iterator, bool> p = _dups.insert(loc); + return !p.second; + } else { + return false; + } + } + + private: + void checkMultiKeyProperties() { + if ("hashed" == KeyPattern::findPluginName(_descriptor->keyPattern())) { + _shouldGetSetDup = _descriptor->isMultikey(); + } + } + + set<DiskLoc> _dups; + + scoped_ptr<IndexDescriptor> _descriptor; + scoped_ptr<IndexAccessMethod> _indexAccessMethod; + scoped_ptr<IndexCursor> _indexCursor; + + long long _nscanned; + shared_ptr<CoveredIndexMatcher> _matcher; + + bool _supportYields; + bool _supportGetMore; + bool _isMultiKey; + bool _modifiedKeys; + bool _shouldGetSetDup; + }; + +} // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.cpp b/src/mongo/db/index/hash_access_method.cpp new file mode 100644 index 00000000000..d547f7fce1b --- /dev/null +++ b/src/mongo/db/index/hash_access_method.cpp @@ -0,0 +1,87 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "mongo/db/btree.h" +#include "mongo/db/btreecursor.h" +#include "mongo/db/hasher.h" +#include "mongo/db/index/hash_access_method.h" +#include "mongo/db/index/hash_index_cursor.h" +#include "mongo/db/queryutil.h" + +namespace mongo { + + long long int HashAccessMethod::makeSingleKey(const BSONElement& e, HashSeed seed, int v) { + massert(16767, "Only HashVersion 0 has been defined" , v == 0 ); + return BSONElementHasher::hash64(e, seed); + } + + HashAccessMethod::HashAccessMethod(IndexDescriptor* descriptor) + : BtreeBasedAccessMethod(descriptor) { + + const string HASHED_INDEX_TYPE_IDENTIFIER = "hashed"; + + //change these if single-field limitation lifted later + uassert(16763, "Currently only single field hashed index supported." , + 1 == descriptor->getNumFields()); + uassert(16764, "Currently hashed indexes cannot guarantee uniqueness. Use a regular index.", + !descriptor->unique()); + + //Default _seed to 0 if "seed" is not included in the index spec + //or if the value of "seed" is not a number + _seed = descriptor->getInfoElement("seed").numberInt(); + + //In case we have hashed indexes based on other hash functions in + //the future, we store a hashVersion number. If hashVersion changes, + // "makeSingleKey" will need to change accordingly. + //Defaults to 0 if "hashVersion" is not included in the index spec + //or if the value of "hashversion" is not a number + _hashVersion = descriptor->getInfoElement("hashVersion").numberInt(); + + //Get the hashfield name + BSONElement firstElt = descriptor->keyPattern().firstElement(); + massert(16765, "error: no hashed index field", + firstElt.str().compare(HASHED_INDEX_TYPE_IDENTIFIER) == 0); + _hashedField = firstElt.fieldName(); + + // Explicit null valued fields and missing fields are both represented in hashed indexes + // using the hash value of the null BSONElement. This is partly for historical reasons + // (hash of null was used in the initial release of hashed indexes and changing would alter + // the data format). Additionally, in certain places the hashed index code and the index + // bound calculation code assume null and missing are indexed identically. + BSONObj nullObj = BSON("" << BSONNULL); + _missingKey = BSON("" << makeSingleKey(nullObj.firstElement(), _seed, _hashVersion)); + } + + Status HashAccessMethod::newCursor(IndexCursor** out) { + *out = new HashIndexCursor(_hashedField, _seed, _hashVersion, _descriptor); + return Status::OK(); + } + + void HashAccessMethod::getKeys(const BSONObj& obj, BSONObjSet* keys) { + const char* cstr = _hashedField.c_str(); + BSONElement fieldVal = obj.getFieldDottedOrArray(cstr); + uassert(16766, "Error: hashed indexes do not currently support array values", + fieldVal.type() != Array ); + + if (!fieldVal.eoo()) { + BSONObj key = BSON( "" << makeSingleKey( fieldVal , _seed , _hashVersion ) ); + keys->insert( key ); + } else if (!_descriptor->isSparse()) { + keys->insert( _missingKey.copy() ); + } + } + +} // namespace mongo diff --git a/src/mongo/db/index/hash_access_method.h b/src/mongo/db/index/hash_access_method.h new file mode 100644 index 00000000000..9fe10dc463c --- /dev/null +++ b/src/mongo/db/index/hash_access_method.h @@ -0,0 +1,69 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include <string> + +#include "mongo/base/status.h" +#include "mongo/db/hasher.h" // For HashSeed. +#include "mongo/db/index/index_descriptor.h" +#include "mongo/db/index/btree_access_method_internal.h" +#include "mongo/db/jsobj.h" + +namespace mongo { + + /** + * This is the access method for "hashed" indices. + */ + class HashAccessMethod : public BtreeBasedAccessMethod { + public: + using BtreeBasedAccessMethod::_descriptor; + using BtreeBasedAccessMethod::_ordering; + + HashAccessMethod(IndexDescriptor* descriptor); + + virtual Status newCursor(IndexCursor** out); + + // This is a NO-OP. + virtual Status setOptions(const CursorOptions& options) { + return Status::OK(); + } + + /** + * Hashing function used by both this class and the cursors we create. + */ + static long long int makeSingleKey(const BSONElement& e, HashSeed seed, int v); + + private: + virtual void getKeys(const BSONObj& obj, BSONObjSet* keys); + + // Only one of our fields is hashed. This is the field name for it. + string _hashedField; + + // _seed defaults to zero. + HashSeed _seed; + + // _hashVersion defaults to zero. + int _hashVersion; + + // What key do we insert when the field is missing? + // TODO: fix migration code to do the right thing. + // TODO: see http://codereview.10gen.com/9497028/patch/3001/4007 + BSONObj _missingKey; + }; + +} // namespace mongo diff --git a/src/mongo/db/index/hash_index_cursor.cpp b/src/mongo/db/index/hash_index_cursor.cpp new file mode 100644 index 00000000000..47b07fce9e3 --- /dev/null +++ b/src/mongo/db/index/hash_index_cursor.cpp @@ -0,0 +1,114 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "hash_index_cursor.h" + +#include <boost/scoped_ptr.hpp> +#include <vector> + +#include "mongo/db/btreecursor.h" +#include "mongo/db/hasher.h" +#include "mongo/db/index/hash_access_method.h" // for HashAccessMethod::makeSingleKey +#include "mongo/db/index/index_cursor.h" +#include "mongo/db/pdfile.h" +#include "mongo/db/queryutil.h" + +namespace mongo { + + HashIndexCursor::HashIndexCursor(const string& hashedField, HashSeed seed, int hashVersion, + IndexDescriptor* descriptor) + : _hashedField(hashedField), _seed(seed), _hashVersion(hashVersion), + _descriptor(descriptor) { } + + Status HashIndexCursor::setOptions(const CursorOptions& options) { + return Status::OK(); + } + + Status HashIndexCursor::seek(const BSONObj& position) { + //Use FieldRangeSet to parse the query into a vector of intervals + //These should be point-intervals if this cursor is ever used + //So the FieldInterval vector will be, e.g. <[1,1], [3,3], [6,6]> + FieldRangeSet frs( "" , position, true, true ); + const vector<FieldInterval>& intervals = frs.range( _hashedField.c_str() ).intervals(); + + //Construct a new query based on the hashes of the previous point-intervals + //e.g. {a : {$in : [ hash(1) , hash(3) , hash(6) ]}} + BSONObjBuilder newQueryBuilder; + BSONObjBuilder inObj( newQueryBuilder.subobjStart( _hashedField ) ); + BSONArrayBuilder inArray( inObj.subarrayStart("$in") ); + vector<FieldInterval>::const_iterator i; + for( i = intervals.begin(); i != intervals.end(); ++i ){ + if ( ! i->equality() ){ + _oldCursor.reset( + BtreeCursor::make( nsdetails( _descriptor->parentNS()), + _descriptor->getOnDisk(), + BSON( "" << MINKEY ) , + BSON( "" << MAXKEY ) , + true , + 1 ) ); + return Status::OK(); + } + inArray.append(HashAccessMethod::makeSingleKey(i->_lower._bound, _seed, _hashVersion)); + } + inArray.done(); + inObj.done(); + BSONObj newQuery = newQueryBuilder.obj(); + + // FieldRangeVector needs an IndexSpec so we make it one. + BSONObjBuilder specBuilder; + BSONObjIterator it(_descriptor->keyPattern()); + while (it.more()) { + BSONElement e = it.next(); + specBuilder.append(e.fieldName(), 1); + } + BSONObj spec = specBuilder.obj(); + IndexSpec specForFRV(spec); + + //Use the point-intervals of the new query to create a Btree cursor + FieldRangeSet newfrs( "" , newQuery , true, true ); + shared_ptr<FieldRangeVector> newVector( + new FieldRangeVector( newfrs , specForFRV, 1 ) ); + + _oldCursor.reset( + BtreeCursor::make(nsdetails(_descriptor->parentNS()), + _descriptor->getOnDisk(), + newVector, + 0, + 1)); + + return Status::OK(); + } + + Status HashIndexCursor::seek(const vector<const BSONElement*>& position, + const vector<bool>& inclusive) { + return Status(ErrorCodes::IllegalOperation, "Unimplemented seek called on Hash index"); + } + + Status HashIndexCursor::skip(const vector<const BSONElement*>& position, + const vector<bool>& inclusive) { + return Status(ErrorCodes::IllegalOperation, "Unimplemented seek called on Hash index"); + } + + bool HashIndexCursor::isEOF() const { return _oldCursor->eof(); } + BSONObj HashIndexCursor::getKey() const { return _oldCursor->currKey(); } + DiskLoc HashIndexCursor::getValue() const { return _oldCursor->currLoc(); } + void HashIndexCursor::next() { _oldCursor->advance(); } + string HashIndexCursor::toString() { return _oldCursor->toString(); } + + Status HashIndexCursor::savePosition() { _oldCursor->noteLocation(); return Status::OK(); } + Status HashIndexCursor::restorePosition() { _oldCursor->checkLocation(); return Status::OK(); } + +} // namespace mongo diff --git a/src/mongo/db/index/hash_index_cursor.h b/src/mongo/db/index/hash_index_cursor.h new file mode 100644 index 00000000000..334f32eb0f4 --- /dev/null +++ b/src/mongo/db/index/hash_index_cursor.h @@ -0,0 +1,69 @@ +/** +* Copyright (C) 2013 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include <boost/scoped_ptr.hpp> +#include <vector> + +#include "mongo/db/btreecursor.h" +#include "mongo/db/hasher.h" +#include "mongo/db/index/index_cursor.h" +#include "mongo/db/pdfile.h" + +namespace mongo { + + class HashIndexCursor : public IndexCursor { + public: + HashIndexCursor(const string& hashedField, HashSeed seed, int hashVersion, + IndexDescriptor* descriptor); + + virtual ~HashIndexCursor() { } + + virtual Status setOptions(const CursorOptions& options); + + virtual Status seek(const BSONObj &position); + + // Returns an error. + virtual Status seek(const vector<const BSONElement*>& position, + const vector<bool>& inclusive); + + // Returns an error. + virtual Status skip(const vector<const BSONElement*>& position, + const vector<bool>& inclusive); + + bool isEOF() const; + BSONObj getKey() const; + DiskLoc getValue() const; + void next(); + string toString(); + + Status savePosition(); + Status restorePosition(); + + private: + string _hashedField; + scoped_ptr<BtreeCursor> _oldCursor; + + // Default of zero. + HashSeed _seed; + + // Default of zero. + int _hashVersion; + IndexDescriptor *_descriptor; + }; + +} // namespace mongo diff --git a/src/mongo/db/index/index_access_method.h b/src/mongo/db/index/index_access_method.h index 9d7125b996c..8488b484f77 100644 --- a/src/mongo/db/index/index_access_method.h +++ b/src/mongo/db/index/index_access_method.h @@ -130,7 +130,7 @@ namespace mongo { protected: // These friends are the classes that actually fill out an UpdateStatus. - template <class Key> friend class BtreeBasedAccessMethod; + friend class BtreeBasedAccessMethod; class PrivateUpdateData; diff --git a/src/mongo/db/index/index_descriptor.h b/src/mongo/db/index/index_descriptor.h index 8734abea54d..9e58cbb7c46 100644 --- a/src/mongo/db/index/index_descriptor.h +++ b/src/mongo/db/index/index_descriptor.h @@ -20,6 +20,7 @@ #include "mongo/db/index.h" // For IndexDetails. #include "mongo/db/jsobj.h" +#include "mongo/db/namespace_details.h" // For NamespaceDetails. namespace mongo { @@ -44,10 +45,10 @@ namespace mongo { * OnDiskIndexData is a pointer to the memory mapped per-index data. * infoObj is a copy of the index-describing BSONObj contained in the OnDiskIndexData. */ - IndexDescriptor(OnDiskIndexData* data, BSONObj infoObj) - : _onDiskData(data) - , _infoObj(infoObj) - , _numFields(infoObj.getObjectField("key").nFields()) { } + IndexDescriptor(NamespaceDetails* namespaceDetails, int indexNumber, OnDiskIndexData* data, + BSONObj infoObj) + : _namespaceDetails(namespaceDetails), _indexNumber(indexNumber), _onDiskData(data), + _infoObj(infoObj), _numFields(infoObj.getObjectField("key").nFields()) { } // // Information about the key pattern. @@ -105,6 +106,9 @@ namespace mongo { // Is this index sparse? bool isSparse() const { return _infoObj["sparse"].trueValue(); } + // Is this index multikey? + bool isMultikey() const { return _namespaceDetails->isMultikey(_indexNumber); } + // // Properties that are Index-specific. // @@ -127,6 +131,13 @@ namespace mongo { string toString() { return _infoObj.toString(); } private: + // Related catalog information. + NamespaceDetails* _namespaceDetails; + + // What # index are we in the catalog represented by _namespaceDetails? Needed for setting + // and getting multikey. + int _indexNumber; + OnDiskIndexData* _onDiskData; // The BSONObj describing the index. Accessed through the various members above. diff --git a/src/mongo/db/keypattern.cpp b/src/mongo/db/keypattern.cpp index 89eca1f16c6..7379c69bd2e 100644 --- a/src/mongo/db/keypattern.cpp +++ b/src/mongo/db/keypattern.cpp @@ -40,6 +40,18 @@ namespace mongo { } } + string KeyPattern::findPluginName(const BSONObj& keyPattern) { + BSONObjIterator i(keyPattern); + + while (i.more()) { + BSONElement e = i.next(); + if (String != e.type()) { continue; } + return e.String(); + } + + return ""; + } + BSONObj KeyPattern::extractSingleKey(const BSONObj& doc ) const { if ( _pattern.isEmpty() ) return BSONObj(); diff --git a/src/mongo/db/keypattern.h b/src/mongo/db/keypattern.h index a25ce2b87e5..4c52dac2c9d 100644 --- a/src/mongo/db/keypattern.h +++ b/src/mongo/db/keypattern.h @@ -80,6 +80,12 @@ namespace mongo { return _pattern.isPrefixOf( other.toBSON() ); } + /** + * Return the first string value in the provided object. For an index key pattern, + * a field with a non-string value indicates a "special" (not straight Btree) index. + */ + static string findPluginName(const BSONObj& keyPattern); + /* Takes a BSONObj whose field names are a prefix of the fields in this keyPattern, and * outputs a new bound with MinKey values appended to match the fields in this keyPattern * (or MaxKey values for descending -1 fields). This is useful in sharding for diff --git a/src/mongo/db/query_plan.cpp b/src/mongo/db/query_plan.cpp index 40442d63a4e..ca97e47ca87 100644 --- a/src/mongo/db/query_plan.cpp +++ b/src/mongo/db/query_plan.cpp @@ -18,6 +18,10 @@ #include "mongo/db/btreecursor.h" #include "mongo/db/cmdline.h" +#include "mongo/db/index/catalog_hack.h" +#include "mongo/db/index/emulated_cursor.h" +#include "mongo/db/index/index_descriptor.h" +#include "mongo/db/index/index_access_method.h" #include "mongo/db/intervalbtreecursor.h" #include "mongo/db/pdfile.h" #include "mongo/db/parsed_query.h" @@ -247,6 +251,17 @@ doneCheckOrder: // SERVER-5390 numWanted = _parsedQuery->getSkip() + _parsedQuery->getNumToReturn(); } + + // TODO(hk): Migrate! + bool testIndexMigrations = true; + + if (testIndexMigrations && CatalogHack::isIndexMigrated(_type->keyPattern())) { + IndexDescriptor* descriptor = CatalogHack::getDescriptor(_d, _idxNo); + IndexAccessMethod* iam = CatalogHack::getSpecialIndex(descriptor); + return shared_ptr<Cursor>(new EmulatedCursor(descriptor, iam, _originalQuery, + _order, numWanted)); + } + return _type->newCursor( _originalQuery, _order, numWanted ); } |