diff options
Diffstat (limited to 'db/btree.h')
-rw-r--r-- | db/btree.h | 356 |
1 files changed, 195 insertions, 161 deletions
diff --git a/db/btree.h b/db/btree.h index 2439784df15..d5f3f334389 100644 --- a/db/btree.h +++ b/db/btree.h @@ -2,16 +2,16 @@ /** * Copyright (C) 2008 10gen Inc. -* +* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. -* +* * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. -* +* * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ @@ -26,22 +26,34 @@ #pragma pack(push,1) struct _KeyNode { - DiskLoc prevChildBucket; - DiskLoc recordLoc; - short keyDataOfs() { return (short) _kdo; } - unsigned short _kdo; - void setKeyDataOfs(short s) { _kdo = s; assert(s>=0); } - void setKeyDataOfsSavingUse(short s) { _kdo = s; assert(s>=0); } - void setUnused() { - /* Setting ofs to odd is the sentinel for unused, as real recordLoc's are always - even numbers. - Note we need to keep its value basically the same as we use the recordLoc - as part of the key in the index (to handle duplicate keys efficiently). - */ - recordLoc.GETOFS() |= 1; - } - int isUnused() { return recordLoc.getOfs() & 1; } - int isUsed() { return !isUnused(); } + DiskLoc prevChildBucket; + DiskLoc recordLoc; + short keyDataOfs() { + return (short) _kdo; + } + unsigned short _kdo; + void setKeyDataOfs(short s) { + _kdo = s; + assert(s>=0); + } + void setKeyDataOfsSavingUse(short s) { + _kdo = s; + assert(s>=0); + } + void setUnused() { + /* Setting ofs to odd is the sentinel for unused, as real recordLoc's are always + even numbers. + Note we need to keep its value basically the same as we use the recordLoc + as part of the key in the index (to handle duplicate keys efficiently). + */ + recordLoc.GETOFS() |= 1; + } + int isUnused() { + return recordLoc.getOfs() & 1; + } + int isUsed() { + return !isUnused(); + } }; #pragma pack(pop) @@ -51,182 +63,204 @@ class BucketBasics; /* wrapper - this is our in memory representation of the key. _KeyNode is the disk representation. */ class KeyNode { public: - KeyNode(BucketBasics& bb, _KeyNode &k); - DiskLoc& prevChildBucket; - DiskLoc& recordLoc; - BSONObj key; + KeyNode(BucketBasics& bb, _KeyNode &k); + DiskLoc& prevChildBucket; + DiskLoc& recordLoc; + BSONObj key; }; #pragma pack(push,1) /* this class is all about the storage management */ class BucketBasics { - friend class KeyNode; + friend class KeyNode; public: - void dumpTree(DiskLoc thisLoc); - bool isHead() { return parent.isNull(); } - void assertValid(bool force = false); - int fullValidate(const DiskLoc& thisLoc); /* traverses everything */ + void dumpTree(DiskLoc thisLoc); + bool isHead() { + return parent.isNull(); + } + void assertValid(bool force = false); + int fullValidate(const DiskLoc& thisLoc); /* traverses everything */ protected: - DiskLoc& getChild(int pos) { - assert( pos >= 0 && pos <= n ); - return pos == n ? nextChild : k(pos).prevChildBucket; - } - KeyNode keyNode(int i) { - assert( i < n ); - return KeyNode(*this, k(i)); - } - - char * dataAt(short ofs) { return data + ofs; } - - void init(); // initialize a new node - - /* returns false if node is full and must be split - keypos is where to insert -- inserted after that key #. so keypos=0 is the leftmost one. - */ - bool basicInsert(int keypos, const DiskLoc& recordLoc, BSONObj& key); - void pushBack(const DiskLoc& recordLoc, BSONObj& key, DiskLoc prevChild); - void _delKeyAtPos(int keypos); // low level version that doesn't deal with child ptrs. - - /* !Packed means there is deleted fragment space within the bucket. + DiskLoc& getChild(int pos) { + assert( pos >= 0 && pos <= n ); + return pos == n ? nextChild : k(pos).prevChildBucket; + } + KeyNode keyNode(int i) { + assert( i < n ); + return KeyNode(*this, k(i)); + } + + char * dataAt(short ofs) { + return data + ofs; + } + + void init(); // initialize a new node + + /* returns false if node is full and must be split + keypos is where to insert -- inserted after that key #. so keypos=0 is the leftmost one. + */ + bool basicInsert(int keypos, const DiskLoc& recordLoc, BSONObj& key); + void pushBack(const DiskLoc& recordLoc, BSONObj& key, DiskLoc prevChild); + void _delKeyAtPos(int keypos); // low level version that doesn't deal with child ptrs. + + /* !Packed means there is deleted fragment space within the bucket. We "repack" when we run out of space before considering the node - to be full. - */ - enum Flags { Packed=1 }; - - DiskLoc childForPos(int p) { - return p == n ? nextChild : k(p).prevChildBucket; - } - - int totalDataSize() const; - void pack(); void setNotPacked(); void setPacked(); - int _alloc(int bytes); - void truncateTo(int N); - void markUnused(int keypos); + to be full. + */ + enum Flags { Packed=1 }; + + DiskLoc childForPos(int p) { + return p == n ? nextChild : k(p).prevChildBucket; + } + + int totalDataSize() const; + void pack(); + void setNotPacked(); + void setPacked(); + int _alloc(int bytes); + void truncateTo(int N); + void markUnused(int keypos); public: - DiskLoc parent; - - string bucketSummary() const { - stringstream ss; - ss << " Bucket info:" << endl; - ss << " n: " << n << endl; - ss << " parent: " << parent.toString() << endl; - ss << " nextChild: " << parent.toString() << endl; - ss << " Size: " << _Size << " flags:" << flags << endl; - ss << " emptySize: " << emptySize << " topSize: " << topSize << endl; - return ss.str(); - } + DiskLoc parent; + + string bucketSummary() const { + stringstream ss; + ss << " Bucket info:" << endl; + ss << " n: " << n << endl; + ss << " parent: " << parent.toString() << endl; + ss << " nextChild: " << parent.toString() << endl; + ss << " Size: " << _Size << " flags:" << flags << endl; + ss << " emptySize: " << emptySize << " topSize: " << topSize << endl; + return ss.str(); + } protected: - void _shape(int level, stringstream&); - DiskLoc nextChild; // child bucket off and to the right of the highest key. - int _Size; // total size of this btree node in bytes. constant. - int Size() const; - int flags; - int emptySize; // size of the empty region - int topSize; // size of the data at the top of the bucket (keys are at the beginning or 'bottom') - int n; // # of keys so far. - int reserved; - _KeyNode& k(int i) { return ((_KeyNode*)data)[i]; } - char data[4]; + void _shape(int level, stringstream&); + DiskLoc nextChild; // child bucket off and to the right of the highest key. + int _Size; // total size of this btree node in bytes. constant. + int Size() const; + int flags; + int emptySize; // size of the empty region + int topSize; // size of the data at the top of the bucket (keys are at the beginning or 'bottom') + int n; // # of keys so far. + int reserved; + _KeyNode& k(int i) { + return ((_KeyNode*)data)[i]; + } + char data[4]; }; -class BtreeBucket : public BucketBasics { - friend class BtreeCursor; +class BtreeBucket : public BucketBasics { + friend class BtreeCursor; public: - void dump(); + void dump(); - static DiskLoc addHead(IndexDetails&); /* start a new index off, empty */ - int insert(DiskLoc thisLoc, DiskLoc recordLoc, - BSONObj& key, bool dupsAllowed, IndexDetails& idx, bool toplevel); + static DiskLoc addHead(IndexDetails&); /* start a new index off, empty */ + int insert(DiskLoc thisLoc, DiskLoc recordLoc, + BSONObj& key, bool dupsAllowed, IndexDetails& idx, bool toplevel); - bool unindex(const DiskLoc& thisLoc, IndexDetails& id, BSONObj& key, const DiskLoc& recordLoc); + bool unindex(const DiskLoc& thisLoc, IndexDetails& id, BSONObj& key, const DiskLoc& recordLoc); - /* locate may return an "unused" key that is just a marker. so be careful. - looks for a key:recordloc pair. - */ - DiskLoc locate(const DiskLoc& thisLoc, BSONObj& key, int& pos, bool& found, DiskLoc recordLoc, int direction=1); + /* locate may return an "unused" key that is just a marker. so be careful. + looks for a key:recordloc pair. + */ + DiskLoc locate(const DiskLoc& thisLoc, BSONObj& key, int& pos, bool& found, DiskLoc recordLoc, int direction=1); - /* advance one key position in the index: */ - DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller); - DiskLoc getHead(const DiskLoc& thisLoc); + /* advance one key position in the index: */ + DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller); + DiskLoc getHead(const DiskLoc& thisLoc); - /* get tree shape */ - void shape(stringstream&); + /* get tree shape */ + void shape(stringstream&); private: - void fixParentPtrs(const DiskLoc& thisLoc); - void delBucket(const DiskLoc& thisLoc, IndexDetails&); - void delKeyAtPos(const DiskLoc& thisLoc, IndexDetails& id, int p); - BSONObj keyAt(int keyOfs) { return keyOfs >= n ? BSONObj() : keyNode(keyOfs).key; } - static BtreeBucket* allocTemp(); /* caller must release with free() */ - void insertHere(DiskLoc thisLoc, int keypos, - DiskLoc recordLoc, BSONObj& key, - DiskLoc lchild, DiskLoc rchild, IndexDetails&); - int _insert(DiskLoc thisLoc, DiskLoc recordLoc, - BSONObj& key, bool dupsAllowed, - DiskLoc lChild, DiskLoc rChild, IndexDetails&); - bool find(BSONObj& key, DiskLoc recordLoc, int& pos); - static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey); + void fixParentPtrs(const DiskLoc& thisLoc); + void delBucket(const DiskLoc& thisLoc, IndexDetails&); + void delKeyAtPos(const DiskLoc& thisLoc, IndexDetails& id, int p); + BSONObj keyAt(int keyOfs) { + return keyOfs >= n ? BSONObj() : keyNode(keyOfs).key; + } + static BtreeBucket* allocTemp(); /* caller must release with free() */ + void insertHere(DiskLoc thisLoc, int keypos, + DiskLoc recordLoc, BSONObj& key, + DiskLoc lchild, DiskLoc rchild, IndexDetails&); + int _insert(DiskLoc thisLoc, DiskLoc recordLoc, + BSONObj& key, bool dupsAllowed, + DiskLoc lChild, DiskLoc rChild, IndexDetails&); + bool find(BSONObj& key, DiskLoc recordLoc, int& pos); + static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey); }; class BtreeCursor : public Cursor { - friend class BtreeBucket; - BSONObj startKey; - BSONObj endKey; + friend class BtreeBucket; + BSONObj startKey; + BSONObj endKey; // BSONObj query; // the query we are working on in association with the cursor -- see noMoreMatches() public: - BtreeCursor(IndexDetails&, const BSONObj& startKey, int direction, BSONObj& query); - virtual bool ok() { return !bucket.isNull(); } - bool eof() { return !ok(); } - virtual bool advance(); - - virtual void noteLocation(); // updates keyAtKeyOfs... - virtual void checkLocation(); - - _KeyNode& _currKeyNode() { - assert( !bucket.isNull() ); - _KeyNode& kn = bucket.btree()->k(keyOfs); - assert( kn.isUsed() ); - return kn; - } - KeyNode currKeyNode() { - assert( !bucket.isNull() ); - return bucket.btree()->keyNode(keyOfs); - } - BSONObj currKey() { return currKeyNode().key; } - - virtual BSONObj indexKeyPattern() { + BtreeCursor(IndexDetails&, const BSONObj& startKey, int direction, BSONObj& query); + virtual bool ok() { + return !bucket.isNull(); + } + bool eof() { + return !ok(); + } + virtual bool advance(); + + virtual void noteLocation(); // updates keyAtKeyOfs... + virtual void checkLocation(); + + _KeyNode& _currKeyNode() { + assert( !bucket.isNull() ); + _KeyNode& kn = bucket.btree()->k(keyOfs); + assert( kn.isUsed() ); + return kn; + } + KeyNode currKeyNode() { + assert( !bucket.isNull() ); + return bucket.btree()->keyNode(keyOfs); + } + BSONObj currKey() { + return currKeyNode().key; + } + + virtual BSONObj indexKeyPattern() { return indexDetails.keyPattern(); } - virtual void aboutToDeleteBucket(const DiskLoc& b) { - if( bucket == b ) - keyOfs = -1; - } + virtual void aboutToDeleteBucket(const DiskLoc& b) { + if ( bucket == b ) + keyOfs = -1; + } - virtual DiskLoc currLoc() { return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc(); } - virtual Record* _current() { return currLoc().rec(); } - virtual BSONObj current() { return BSONObj(_current()); } - virtual string toString() { - string s = string("BtreeCursor ") + indexDetails.indexName(); - if( direction < 0 ) s += " reverse"; + virtual DiskLoc currLoc() { + return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc(); + } + virtual Record* _current() { + return currLoc().rec(); + } + virtual BSONObj current() { + return BSONObj(_current()); + } + virtual string toString() { + string s = string("BtreeCursor ") + indexDetails.indexName(); + if ( direction < 0 ) s += " reverse"; return s; } private: - void findExtremeKeys( const BSONObj &query ); - void findExtremeInequalityValues( const BSONElement &e, - BSONElement &lowest, - BSONElement &highest ); - static void getFields( const BSONObj &key, set< string > &fields ); - void checkUnused(); - void checkEnd(); - IndexDetails& indexDetails; - DiskLoc bucket; - int keyOfs; - int direction; // 1=fwd,-1=reverse - BSONObj keyAtKeyOfs; // so we can tell if things moved around on us between the query and the getMore call - DiskLoc locAtKeyOfs; + void findExtremeKeys( const BSONObj &query ); + void findExtremeInequalityValues( const BSONElement &e, + BSONElement &lowest, + BSONElement &highest ); + static void getFields( const BSONObj &key, set< string > &fields ); + void checkUnused(); + void checkEnd(); + IndexDetails& indexDetails; + DiskLoc bucket; + int keyOfs; + int direction; // 1=fwd,-1=reverse + BSONObj keyAtKeyOfs; // so we can tell if things moved around on us between the query and the getMore call + DiskLoc locAtKeyOfs; }; #pragma pack(pop) |