/** * Copyright 2012 (c) 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the GNU Affero General Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #pragma once #include #include #include "mongo/util/intrusive_counter.h" #include "mongo/db/pipeline/value.h" namespace mongo { /** Helper class to make the position in a document abstract * Warning: This is NOT guaranteed to be the ordered position. * eg. the first field may not be at Position(0) */ class Position { public: // This represents "not found" similar to std::string::npos Position() : index(static_cast(-1)) {} bool found() const { return index != Position().index; } bool operator==(Position rhs) const { return this->index == rhs.index; } bool operator!=(Position rhs) const { return !(*this == rhs); } // For debugging and ASSERT_EQUALS in tests. template friend OStream& operator<<(OStream& stream, Position p) { return stream << p.index; } private: explicit Position(size_t i) : index(i) {} unsigned index; friend class DocumentStorage; friend class DocumentStorageIterator; }; #pragma pack(1) /** This is how values are stored in the DocumentStorage buffer * Internal class. Consumers shouldn't care about this. */ class ValueElement { MONGO_DISALLOW_COPYING(ValueElement); public: Value val; Position nextCollision; // Position of next field with same hashBucket const int nameLen; // doesn't include '\0' const char _name[1]; // pointer to start of name (use nameSD instead) ValueElement* next() { return align(plusBytes(sizeof(ValueElement) + nameLen)); } const ValueElement* next() const { return align(plusBytes(sizeof(ValueElement) + nameLen)); } StringData nameSD() const { return StringData(_name, nameLen); } // helpers for doing pointer arithmetic with this class // Note: These don't dereference 'this' so they are safe to use with NULL char* ptr() { return reinterpret_cast(this); } const char* ptr() const { return reinterpret_cast(this); } const ValueElement* plusBytes(size_t bytes) const { return reinterpret_cast(ptr() + bytes); } ValueElement* plusBytes(size_t bytes) { return reinterpret_cast(ptr() + bytes); } // Round number or pointer up to N-byte boundary. No change if already aligned. template static T align(T size) { const intmax_t ALIGNMENT = 8; // must be power of 2 and <= 16 (malloc alignment) // Can't use c++ cast because of conversion between intmax_t and both ints and pointers return (T)(((intmax_t)(size) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1)); } private: ValueElement(); // this class should never be constructed ~ValueElement(); // or destructed }; // Real size is sizeof(ValueElement) + nameLen #pragma pack() BOOST_STATIC_ASSERT(sizeof(ValueElement) == (sizeof(Value) + sizeof(Position) + sizeof(int) + 1)); // This is an internal class for Document. See FieldIterator for the public version. class DocumentStorageIterator { public: // DocumentStorage::iterator() and iteratorAll() are easier to use DocumentStorageIterator(const ValueElement* first, const ValueElement* end, bool includeMissing) : _first(first), _it(first), _end(end), _includeMissing(includeMissing) { if (!_includeMissing) skipMissing(); } bool atEnd() const { return _it == _end; } const ValueElement& get() const { return *_it; } Position position() const { return Position(_it->ptr() - _first->ptr()); } void advance() { advanceOne(); if (!_includeMissing) skipMissing(); } const ValueElement* operator->() { return _it; } const ValueElement& operator*() { return *_it; } private: void advanceOne() { _it = _it->next(); } void skipMissing() { while (!atEnd() && _it->val.missing()) { advanceOne(); } } const ValueElement* _first; const ValueElement* _it; const ValueElement* _end; bool _includeMissing; }; /// Storage class used by both Document and MutableDocument class DocumentStorage : public RefCountable { public: // Note: default constructor should zero-init to support emptyDoc() DocumentStorage() : _buffer(NULL), _bufferEnd(NULL), _usedBytes(0), _numFields(0), _hashTabMask(0), _hasTextScore(false), _textScore(0) {} ~DocumentStorage(); static const DocumentStorage& emptyDoc() { static const char emptyBytes[sizeof(DocumentStorage)] = {0}; return *reinterpret_cast(emptyBytes); } size_t size() const { // can't use _numFields because it includes removed Fields size_t count = 0; for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance()) count++; return count; } /// Returns the position of the next field to be inserted Position getNextPosition() const { return Position(_usedBytes); } /// Returns the position of the named field (may be missing) or Position() Position findField(StringData name) const; // Document uses these const ValueElement& getField(Position pos) const { verify(pos.found()); return *(_firstElement->plusBytes(pos.index)); } Value getField(StringData name) const { Position pos = findField(name); if (!pos.found()) return Value(); return getField(pos).val; } // MutableDocument uses these ValueElement& getField(Position pos) { verify(pos.found()); return *(_firstElement->plusBytes(pos.index)); } Value& getField(StringData name) { Position pos = findField(name); if (!pos.found()) return appendField(name); // TODO: find a way to avoid hashing name twice return getField(pos).val; } /// Adds a new field with missing Value at the end of the document Value& appendField(StringData name); /** Preallocates space for fields. Use this to attempt to prevent buffer growth. * This is only valid to call before anything is added to the document. */ void reserveFields(size_t expectedFields); /// This skips missing values DocumentStorageIterator iterator() const { return DocumentStorageIterator(_firstElement, end(), false); } /// This includes missing values DocumentStorageIterator iteratorAll() const { return DocumentStorageIterator(_firstElement, end(), true); } /// Shallow copy of this. Caller owns memory. boost::intrusive_ptr clone() const; size_t allocatedBytes() const { return !_buffer ? 0 : (_bufferEnd - _buffer + hashTabBytes()); } /** * Copies all metadata from source if it has any. * Note: does not clear metadata from this. */ void copyMetaDataFrom(const DocumentStorage& source) { if (source.hasTextScore()) { setTextScore(source.getTextScore()); } } bool hasTextScore() const { return _hasTextScore; } double getTextScore() const { return _textScore; } void setTextScore(double score) { _hasTextScore = true; _textScore = score; } private: /// Same as lastElement->next() or firstElement() if empty. const ValueElement* end() const { return _firstElement->plusBytes(_usedBytes); } /// Allocates space in _buffer. Copies existing data if there is any. void alloc(unsigned newSize); /// Call after adding field to _buffer and increasing _numFields void addFieldToHashTable(Position pos); // assumes _hashTabMask is (power of two) - 1 unsigned hashTabBuckets() const { return _hashTabMask + 1; } unsigned hashTabBytes() const { return hashTabBuckets() * sizeof(Position); } /// rehash on buffer growth if load-factor > .5 (attempt to keep lf < 1 when full) bool needRehash() const { return _numFields * 2 > hashTabBuckets(); } /// Initialize empty hash table void hashTabInit() { memset(_hashTab, -1, hashTabBytes()); } static unsigned hashKey(StringData name) { // TODO consider FNV-1a once we have a better benchmark corpus unsigned out; MurmurHash3_x86_32(name.rawData(), name.size(), 0, &out); return out; } unsigned bucketForKey(StringData name) const { return hashKey(name) & _hashTabMask; } /// Adds all fields to the hash table void rehash() { hashTabInit(); for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) addFieldToHashTable(it.position()); } enum { HASH_TAB_INIT_SIZE = 8, // must be power of 2 HASH_TAB_MIN = 4, // don't hash fields for docs smaller than this // set to 1 to always hash }; // _buffer layout: // ------------------------------------------------------------------------------- // | ValueElement1 Name1 | ValueElement2 Name2 | ... FREE SPACE ... | Hash Table | // ------------------------------------------------------------------------------- // ^ _buffer and _firstElement point here ^ // _bufferEnd and _hashTab point here ^ // // // When the buffer grows, the hash table moves to the new end. union { char* _buffer; ValueElement* _firstElement; }; union { // pointer to "end" of _buffer element space and start of hash table (same position) char* _bufferEnd; Position* _hashTab; // table lazily initialized once _numFields == HASH_TAB_MIN }; unsigned _usedBytes; // position where next field would start unsigned _numFields; // this includes removed fields unsigned _hashTabMask; // equal to hashTabBuckets()-1 but used more often bool _hasTextScore; // When adding more metadata fields, this should become a bitvector double _textScore; // When adding a field, make sure to update clone() method }; }