diff options
author | Mathias Stearn <mathias@10gen.com> | 2012-10-26 19:35:31 -0400 |
---|---|---|
committer | Mathias Stearn <mathias@10gen.com> | 2012-11-16 17:32:59 -0500 |
commit | ffd194f68edf4a61bff35fac591452b715cff849 (patch) | |
tree | 67ed1e6300f04d1333fee769e9c1ba2f8b4d41d6 /src/mongo/db/pipeline/document_internal.h | |
parent | 7eb3fc28fbd9cb0464cbf6dba5ffb497ba2088e9 (diff) | |
download | mongo-ffd194f68edf4a61bff35fac591452b715cff849.tar.gz |
Rewrite Document and Value classes
Diffstat (limited to 'src/mongo/db/pipeline/document_internal.h')
-rw-r--r-- | src/mongo/db/pipeline/document_internal.h | 302 |
1 files changed, 302 insertions, 0 deletions
diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h new file mode 100644 index 00000000000..39677bb5698 --- /dev/null +++ b/src/mongo/db/pipeline/document_internal.h @@ -0,0 +1,302 @@ +/** + * Copyright 2012 (c) 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <third_party/murmurhash3/MurmurHash3.h> + +#include "mongo/util/intrusive_counter.h" +#include "mongo/db/pipeline/value.h" + +namespace mongo { + /** Helper class to make the position in a document abstract + * Warning: This is NOT guaranteed to be the ordered position. + * eg. the first field may not be at Position(0) + */ + class Position { + public: + // This represents "not found" similar to string::npos + Position() :index(static_cast<unsigned>(-1)) {} + bool found() const { return index != Position().index; } + + bool operator == (Position rhs) const { return this->index == rhs.index; } + bool operator != (Position rhs) const { return !(*this == rhs); } + + // For debugging and ASSERT_EQUALS in tests. + template <typename OStream> + friend OStream& operator<<(OStream& stream, Position p) { return stream << p.index; } + + private: + explicit Position(size_t i) :index(i) {} + unsigned index; + friend class DocumentStorage; + friend class DocumentStorageIterator; + }; + +#pragma pack(1) + /** This is how values are stored in the DocumentStorage buffer + * Internal class. Consumers shouldn't care about this. + */ + class ValueElement : boost::noncopyable { + public: + Value val; + Position nextCollision; // Position of next field with same hashBucket + const int nameLen; // doesn't include '\0' + const char name[1]; // pointer to start of name + + // implicit conversions to Value + operator Value&() { return val; } + operator const Value&() const { return val; } + + ValueElement* next() { + return align(plusBytes(sizeof(ValueElement) + nameLen)); + } + + const ValueElement* next() const { + return align(plusBytes(sizeof(ValueElement) + nameLen)); + } + + StringData nameSD() const { return StringData(name, nameLen); } + + + // helpers for doing pointer arithmetic with this class + // Note: These don't dereference 'this' so they are safe to use with NULL + char* ptr() { return reinterpret_cast<char*>(this); } + const char* ptr() const { return reinterpret_cast<const char*>(this); } + const ValueElement* plusBytes(size_t bytes) const { + return reinterpret_cast<const ValueElement*>(ptr() + bytes); + } + ValueElement* plusBytes(size_t bytes) { + return reinterpret_cast<ValueElement*>(ptr() + bytes); + } + + // Round number or pointer up to N-byte boundary. No change if already aligned. + template <typename T> + static T align(T size) { + const intmax_t ALIGNMENT = 8; // must be power of 2 and <= 16 (malloc alignment) + // Can't use c++ cast because of conversion between intmax_t and both ints and pointers + return (T)(((intmax_t)(size) + (ALIGNMENT-1)) & ~(ALIGNMENT-1)); + } + + private: + ValueElement(); // this class should never be constructed + ~ValueElement(); // or destructed + }; + // Real size is sizeof(ValueElement) + nameLen +#pragma pack() + BOOST_STATIC_ASSERT(sizeof(ValueElement) == (sizeof(Value) + + sizeof(Position) + + sizeof(int) + + 1)); + + // This is an internal class for Document. See FieldIterator for the public version. + class DocumentStorageIterator { + public: + // DocumentStorage::iterator() and iteratorAll() are easier to use + DocumentStorageIterator(const ValueElement* first, + const ValueElement* end, + bool includeMissing) + : _first(first) + , _it(first) + , _end(end) + , _includeMissing(includeMissing) { + skipMissing(); + } + + bool atEnd() const { return _it == _end; } + + const ValueElement& get() const { return *_it; } + + Position position() const { return Position(_it->ptr() - _first->ptr()); } + + void advance() { + advanceOne(); + skipMissing(); + } + + const ValueElement* operator-> () { return _it; } + const ValueElement& operator* () { return *_it; } + + private: + void advanceOne() { + _it = _it->next(); + } + + void skipMissing() { + if (!_includeMissing) { + while (!atEnd() && _it->val.missing()) { + advanceOne(); + } + } + } + + const ValueElement* _first; + const ValueElement* _it; + const ValueElement* _end; + bool _includeMissing; + }; + + /// Storage class used by both Document and MutableDocument + class DocumentStorage : public RefCountable { + public: + // Note: default constructor should zero-init to support emptyDoc() + DocumentStorage() : _buffer(NULL) + , _bufferEnd(NULL) + , _usedBytes(0) + , _numFields(0) + , _hashTabMask(0) + {} + ~DocumentStorage(); + + static const DocumentStorage& emptyDoc() { + static const char emptyBytes[sizeof(DocumentStorage)] = {0}; + return *reinterpret_cast<const DocumentStorage*>(emptyBytes); + } + + size_t size() const { + // can't use _numFields because it includes removed Fields + size_t count = 0; + for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance()) + count++; + return count; + } + + /// Returns the position of the next field to be inserted + Position getNextPosition() const { return Position(_usedBytes); } + + /// Returns the position of the named field (may be missing) or Position() + Position findField(StringData name) const; + + // Document uses these + const ValueElement& getField(Position pos) const { + verify(pos.found()); + return *(_firstElement->plusBytes(pos.index)); + } + Value getField(StringData name) const { + Position pos = findField(name); + if (!pos.found()) + return Value(); + return getField(pos); + } + + // MutableDocument uses these + ValueElement& getField(Position pos) { + verify(pos.found()); + return *(_firstElement->plusBytes(pos.index)); + } + Value& getField(StringData name) { + Position pos = findField(name); + if (!pos.found()) + return appendField(name); // TODO: find a way to avoid hashing name twice + return getField(pos); + } + + /// Adds a new field with missing Value at the end of the document + Value& appendField(StringData name); + + /** Preallocates space for fields. Use this to attempt to prevent buffer growth. + * This is only valid to call before anything is added to the document. + */ + void reserveFields(size_t expectedFields); + + /// This skips missing values + DocumentStorageIterator iterator() const { + return DocumentStorageIterator(_firstElement, end(), false); + } + + /// This includes missing values + DocumentStorageIterator iteratorAll() const { + return DocumentStorageIterator(_firstElement, end(), true); + } + + /// Shallow copy of this. Caller owns memory. + intrusive_ptr<DocumentStorage> clone() const; + + size_t allocatedBytes() const { + return !_buffer ? 0 : (_bufferEnd - _buffer + hashTabBytes()); + } + + private: + + /// Same as lastElement->next() or firstElement() if empty. + const ValueElement* end() const { return _firstElement->plusBytes(_usedBytes); } + + /// Allocates space in _buffer. Copies existing data if there is any. + void alloc(unsigned newSize); + + /// Call after adding field to _buffer and increasing _numFields + void addFieldToHashTable(Position pos); + + // assumes _hashTabMask is (power of two) - 1 + unsigned hashTabBuckets() const { return _hashTabMask + 1; } + unsigned hashTabBytes() const { return hashTabBuckets() * sizeof(Position); } + + /// rehash on buffer growth if load-factor > .5 (attempt to keep lf < 1 when full) + bool needRehash() const { return _numFields*2 > hashTabBuckets(); } + + /// Initialize empty hash table + void hashTabInit() { memset(_hashTab, -1, hashTabBytes()); } + + static unsigned hashKey(StringData name) { + // TODO consider FNV-1a once we have a better benchmark corpus + unsigned out; + MurmurHash3_x86_32(name.data(), name.size(), 0, &out); + return out; + } + + unsigned bucketForKey(StringData name) const { + return hashKey(name) & _hashTabMask; + } + + /// Adds all fields to the hash table + void rehash() { + hashTabInit(); + for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) + addFieldToHashTable(it.position()); + } + + enum { + HASH_TAB_INIT_SIZE = 8, // must be power of 2 + HASH_TAB_MIN = 4, // don't hash fields for docs smaller than this + // set to 1 to always hash + }; + + // _buffer layout: + // ------------------------------------------------------------------------------- + // | ValueElement1 Name1 | ValueElement2 Name2 | ... FREE SPACE ... | Hash Table | + // ------------------------------------------------------------------------------- + // ^ _buffer and _firstElement point here ^ + // _bufferEnd and _hashTab point here ^ + // + // + // When the buffer grows, the hash table moves to the new end. + union { + char* _buffer; + ValueElement* _firstElement; + }; + + union { + // pointer to "end" of _buffer element space and start of hash table (same position) + char* _bufferEnd; + Position* _hashTab; // table lazily initialized once _numFields == HASH_TAB_MIN + }; + + unsigned _usedBytes; // position where next field would start + unsigned _numFields; // this includes removed fields + unsigned _hashTabMask; // equal to hashTabBuckets()-1 but used more often + // When adding a field, make sure to update clone() method + }; +} |