summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline/document_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/pipeline/document_internal.h')
-rw-r--r--src/mongo/db/pipeline/document_internal.h594
1 files changed, 313 insertions, 281 deletions
diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h
index 93188ffb6f3..fa5988611b5 100644
--- a/src/mongo/db/pipeline/document_internal.h
+++ b/src/mongo/db/pipeline/document_internal.h
@@ -36,300 +36,332 @@
#include "mongo/db/pipeline/value.h"
namespace mongo {
- /** Helper class to make the position in a document abstract
- * Warning: This is NOT guaranteed to be the ordered position.
- * eg. the first field may not be at Position(0)
- */
- class Position {
- public:
- // This represents "not found" similar to std::string::npos
- Position() :index(static_cast<unsigned>(-1)) {}
- bool found() const { return index != Position().index; }
-
- bool operator == (Position rhs) const { return this->index == rhs.index; }
- bool operator != (Position rhs) const { return !(*this == rhs); }
-
- // For debugging and ASSERT_EQUALS in tests.
- template <typename OStream>
- friend OStream& operator<<(OStream& stream, Position p) { return stream << p.index; }
-
- private:
- explicit Position(size_t i) :index(i) {}
- unsigned index;
- friend class DocumentStorage;
- friend class DocumentStorageIterator;
- };
+/** Helper class to make the position in a document abstract
+ * Warning: This is NOT guaranteed to be the ordered position.
+ * eg. the first field may not be at Position(0)
+ */
+class Position {
+public:
+ // This represents "not found" similar to std::string::npos
+ Position() : index(static_cast<unsigned>(-1)) {}
+ bool found() const {
+ return index != Position().index;
+ }
+
+ bool operator==(Position rhs) const {
+ return this->index == rhs.index;
+ }
+ bool operator!=(Position rhs) const {
+ return !(*this == rhs);
+ }
+
+ // For debugging and ASSERT_EQUALS in tests.
+ template <typename OStream>
+ friend OStream& operator<<(OStream& stream, Position p) {
+ return stream << p.index;
+ }
+
+private:
+ explicit Position(size_t i) : index(i) {}
+ unsigned index;
+ friend class DocumentStorage;
+ friend class DocumentStorageIterator;
+};
#pragma pack(1)
- /** This is how values are stored in the DocumentStorage buffer
- * Internal class. Consumers shouldn't care about this.
- */
- class ValueElement {
- MONGO_DISALLOW_COPYING(ValueElement);
- public:
- Value val;
- Position nextCollision; // Position of next field with same hashBucket
- const int nameLen; // doesn't include '\0'
- const char _name[1]; // pointer to start of name (use nameSD instead)
-
- ValueElement* next() {
- return align(plusBytes(sizeof(ValueElement) + nameLen));
- }
-
- const ValueElement* next() const {
- return align(plusBytes(sizeof(ValueElement) + nameLen));
- }
-
- StringData nameSD() const { return StringData(_name, nameLen); }
-
-
- // helpers for doing pointer arithmetic with this class
- // Note: These don't dereference 'this' so they are safe to use with NULL
- char* ptr() { return reinterpret_cast<char*>(this); }
- const char* ptr() const { return reinterpret_cast<const char*>(this); }
- const ValueElement* plusBytes(size_t bytes) const {
- return reinterpret_cast<const ValueElement*>(ptr() + bytes);
- }
- ValueElement* plusBytes(size_t bytes) {
- return reinterpret_cast<ValueElement*>(ptr() + bytes);
- }
-
- // Round number or pointer up to N-byte boundary. No change if already aligned.
- template <typename T>
- static T align(T size) {
- const intmax_t ALIGNMENT = 8; // must be power of 2 and <= 16 (malloc alignment)
- // Can't use c++ cast because of conversion between intmax_t and both ints and pointers
- return (T)(((intmax_t)(size) + (ALIGNMENT-1)) & ~(ALIGNMENT-1));
- }
-
- private:
- ValueElement(); // this class should never be constructed
- ~ValueElement(); // or destructed
- };
- // Real size is sizeof(ValueElement) + nameLen
+/** This is how values are stored in the DocumentStorage buffer
+ * Internal class. Consumers shouldn't care about this.
+ */
+class ValueElement {
+ MONGO_DISALLOW_COPYING(ValueElement);
+
+public:
+ Value val;
+ Position nextCollision; // Position of next field with same hashBucket
+ const int nameLen; // doesn't include '\0'
+ const char _name[1]; // pointer to start of name (use nameSD instead)
+
+ ValueElement* next() {
+ return align(plusBytes(sizeof(ValueElement) + nameLen));
+ }
+
+ const ValueElement* next() const {
+ return align(plusBytes(sizeof(ValueElement) + nameLen));
+ }
+
+ StringData nameSD() const {
+ return StringData(_name, nameLen);
+ }
+
+
+ // helpers for doing pointer arithmetic with this class
+ // Note: These don't dereference 'this' so they are safe to use with NULL
+ char* ptr() {
+ return reinterpret_cast<char*>(this);
+ }
+ const char* ptr() const {
+ return reinterpret_cast<const char*>(this);
+ }
+ const ValueElement* plusBytes(size_t bytes) const {
+ return reinterpret_cast<const ValueElement*>(ptr() + bytes);
+ }
+ ValueElement* plusBytes(size_t bytes) {
+ return reinterpret_cast<ValueElement*>(ptr() + bytes);
+ }
+
+ // Round number or pointer up to N-byte boundary. No change if already aligned.
+ template <typename T>
+ static T align(T size) {
+ const intmax_t ALIGNMENT = 8; // must be power of 2 and <= 16 (malloc alignment)
+ // Can't use c++ cast because of conversion between intmax_t and both ints and pointers
+ return (T)(((intmax_t)(size) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1));
+ }
+
+private:
+ ValueElement(); // this class should never be constructed
+ ~ValueElement(); // or destructed
+};
+// Real size is sizeof(ValueElement) + nameLen
#pragma pack()
- BOOST_STATIC_ASSERT(sizeof(ValueElement) == (sizeof(Value) +
- sizeof(Position) +
- sizeof(int) +
- 1));
-
- // This is an internal class for Document. See FieldIterator for the public version.
- class DocumentStorageIterator {
- public:
- // DocumentStorage::iterator() and iteratorAll() are easier to use
- DocumentStorageIterator(const ValueElement* first,
- const ValueElement* end,
- bool includeMissing)
- : _first(first)
- , _it(first)
- , _end(end)
- , _includeMissing(includeMissing) {
- if (!_includeMissing)
- skipMissing();
- }
-
- bool atEnd() const { return _it == _end; }
-
- const ValueElement& get() const { return *_it; }
-
- Position position() const { return Position(_it->ptr() - _first->ptr()); }
-
- void advance() {
+BOOST_STATIC_ASSERT(sizeof(ValueElement) == (sizeof(Value) + sizeof(Position) + sizeof(int) + 1));
+
+// This is an internal class for Document. See FieldIterator for the public version.
+class DocumentStorageIterator {
+public:
+ // DocumentStorage::iterator() and iteratorAll() are easier to use
+ DocumentStorageIterator(const ValueElement* first, const ValueElement* end, bool includeMissing)
+ : _first(first), _it(first), _end(end), _includeMissing(includeMissing) {
+ if (!_includeMissing)
+ skipMissing();
+ }
+
+ bool atEnd() const {
+ return _it == _end;
+ }
+
+ const ValueElement& get() const {
+ return *_it;
+ }
+
+ Position position() const {
+ return Position(_it->ptr() - _first->ptr());
+ }
+
+ void advance() {
+ advanceOne();
+ if (!_includeMissing)
+ skipMissing();
+ }
+
+ const ValueElement* operator->() {
+ return _it;
+ }
+ const ValueElement& operator*() {
+ return *_it;
+ }
+
+private:
+ void advanceOne() {
+ _it = _it->next();
+ }
+
+ void skipMissing() {
+ while (!atEnd() && _it->val.missing()) {
advanceOne();
- if (!_includeMissing)
- skipMissing();
- }
-
- const ValueElement* operator-> () { return _it; }
- const ValueElement& operator* () { return *_it; }
-
- private:
- void advanceOne() {
- _it = _it->next();
- }
-
- void skipMissing() {
- while (!atEnd() && _it->val.missing()) {
- advanceOne();
- }
- }
-
- const ValueElement* _first;
- const ValueElement* _it;
- const ValueElement* _end;
- bool _includeMissing;
- };
-
- /// Storage class used by both Document and MutableDocument
- class DocumentStorage : public RefCountable {
- public:
- // Note: default constructor should zero-init to support emptyDoc()
- DocumentStorage() : _buffer(NULL)
- , _bufferEnd(NULL)
- , _usedBytes(0)
- , _numFields(0)
- , _hashTabMask(0)
- , _hasTextScore(false)
- , _textScore(0)
- {}
- ~DocumentStorage();
-
- static const DocumentStorage& emptyDoc() {
- static const char emptyBytes[sizeof(DocumentStorage)] = {0};
- return *reinterpret_cast<const DocumentStorage*>(emptyBytes);
- }
-
- size_t size() const {
- // can't use _numFields because it includes removed Fields
- size_t count = 0;
- for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance())
- count++;
- return count;
- }
-
- /// Returns the position of the next field to be inserted
- Position getNextPosition() const { return Position(_usedBytes); }
-
- /// Returns the position of the named field (may be missing) or Position()
- Position findField(StringData name) const;
-
- // Document uses these
- const ValueElement& getField(Position pos) const {
- verify(pos.found());
- return *(_firstElement->plusBytes(pos.index));
- }
- Value getField(StringData name) const {
- Position pos = findField(name);
- if (!pos.found())
- return Value();
- return getField(pos).val;
- }
-
- // MutableDocument uses these
- ValueElement& getField(Position pos) {
- verify(pos.found());
- return *(_firstElement->plusBytes(pos.index));
- }
- Value& getField(StringData name) {
- Position pos = findField(name);
- if (!pos.found())
- return appendField(name); // TODO: find a way to avoid hashing name twice
- return getField(pos).val;
- }
-
- /// Adds a new field with missing Value at the end of the document
- Value& appendField(StringData name);
-
- /** Preallocates space for fields. Use this to attempt to prevent buffer growth.
- * This is only valid to call before anything is added to the document.
- */
- void reserveFields(size_t expectedFields);
-
- /// This skips missing values
- DocumentStorageIterator iterator() const {
- return DocumentStorageIterator(_firstElement, end(), false);
}
+ }
+
+ const ValueElement* _first;
+ const ValueElement* _it;
+ const ValueElement* _end;
+ bool _includeMissing;
+};
+
+/// Storage class used by both Document and MutableDocument
+class DocumentStorage : public RefCountable {
+public:
+ // Note: default constructor should zero-init to support emptyDoc()
+ DocumentStorage()
+ : _buffer(NULL),
+ _bufferEnd(NULL),
+ _usedBytes(0),
+ _numFields(0),
+ _hashTabMask(0),
+ _hasTextScore(false),
+ _textScore(0) {}
+ ~DocumentStorage();
+
+ static const DocumentStorage& emptyDoc() {
+ static const char emptyBytes[sizeof(DocumentStorage)] = {0};
+ return *reinterpret_cast<const DocumentStorage*>(emptyBytes);
+ }
+
+ size_t size() const {
+ // can't use _numFields because it includes removed Fields
+ size_t count = 0;
+ for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance())
+ count++;
+ return count;
+ }
+
+ /// Returns the position of the next field to be inserted
+ Position getNextPosition() const {
+ return Position(_usedBytes);
+ }
+
+ /// Returns the position of the named field (may be missing) or Position()
+ Position findField(StringData name) const;
+
+ // Document uses these
+ const ValueElement& getField(Position pos) const {
+ verify(pos.found());
+ return *(_firstElement->plusBytes(pos.index));
+ }
+ Value getField(StringData name) const {
+ Position pos = findField(name);
+ if (!pos.found())
+ return Value();
+ return getField(pos).val;
+ }
+
+ // MutableDocument uses these
+ ValueElement& getField(Position pos) {
+ verify(pos.found());
+ return *(_firstElement->plusBytes(pos.index));
+ }
+ Value& getField(StringData name) {
+ Position pos = findField(name);
+ if (!pos.found())
+ return appendField(name); // TODO: find a way to avoid hashing name twice
+ return getField(pos).val;
+ }
+
+ /// Adds a new field with missing Value at the end of the document
+ Value& appendField(StringData name);
+
+ /** Preallocates space for fields. Use this to attempt to prevent buffer growth.
+ * This is only valid to call before anything is added to the document.
+ */
+ void reserveFields(size_t expectedFields);
- /// This includes missing values
- DocumentStorageIterator iteratorAll() const {
- return DocumentStorageIterator(_firstElement, end(), true);
- }
+ /// This skips missing values
+ DocumentStorageIterator iterator() const {
+ return DocumentStorageIterator(_firstElement, end(), false);
+ }
- /// Shallow copy of this. Caller owns memory.
- boost::intrusive_ptr<DocumentStorage> clone() const;
+ /// This includes missing values
+ DocumentStorageIterator iteratorAll() const {
+ return DocumentStorageIterator(_firstElement, end(), true);
+ }
- size_t allocatedBytes() const {
- return !_buffer ? 0 : (_bufferEnd - _buffer + hashTabBytes());
- }
+ /// Shallow copy of this. Caller owns memory.
+ boost::intrusive_ptr<DocumentStorage> clone() const;
- /**
- * Copies all metadata from source if it has any.
- * Note: does not clear metadata from this.
- */
- void copyMetaDataFrom(const DocumentStorage& source) {
- if (source.hasTextScore()) {
- setTextScore(source.getTextScore());
- }
- }
+ size_t allocatedBytes() const {
+ return !_buffer ? 0 : (_bufferEnd - _buffer + hashTabBytes());
+ }
- bool hasTextScore() const { return _hasTextScore; }
- double getTextScore() const { return _textScore; }
- void setTextScore(double score) {
- _hasTextScore = true;
- _textScore = score;
+ /**
+ * Copies all metadata from source if it has any.
+ * Note: does not clear metadata from this.
+ */
+ void copyMetaDataFrom(const DocumentStorage& source) {
+ if (source.hasTextScore()) {
+ setTextScore(source.getTextScore());
}
+ }
+
+ bool hasTextScore() const {
+ return _hasTextScore;
+ }
+ double getTextScore() const {
+ return _textScore;
+ }
+ void setTextScore(double score) {
+ _hasTextScore = true;
+ _textScore = score;
+ }
+
+private:
+ /// Same as lastElement->next() or firstElement() if empty.
+ const ValueElement* end() const {
+ return _firstElement->plusBytes(_usedBytes);
+ }
+
+ /// Allocates space in _buffer. Copies existing data if there is any.
+ void alloc(unsigned newSize);
+
+ /// Call after adding field to _buffer and increasing _numFields
+ void addFieldToHashTable(Position pos);
+
+ // assumes _hashTabMask is (power of two) - 1
+ unsigned hashTabBuckets() const {
+ return _hashTabMask + 1;
+ }
+ unsigned hashTabBytes() const {
+ return hashTabBuckets() * sizeof(Position);
+ }
+
+ /// rehash on buffer growth if load-factor > .5 (attempt to keep lf < 1 when full)
+ bool needRehash() const {
+ return _numFields * 2 > hashTabBuckets();
+ }
+
+ /// Initialize empty hash table
+ void hashTabInit() {
+ memset(_hashTab, -1, hashTabBytes());
+ }
+
+ static unsigned hashKey(StringData name) {
+ // TODO consider FNV-1a once we have a better benchmark corpus
+ unsigned out;
+ MurmurHash3_x86_32(name.rawData(), name.size(), 0, &out);
+ return out;
+ }
+
+ unsigned bucketForKey(StringData name) const {
+ return hashKey(name) & _hashTabMask;
+ }
+
+ /// Adds all fields to the hash table
+ void rehash() {
+ hashTabInit();
+ for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance())
+ addFieldToHashTable(it.position());
+ }
+
+ enum {
+ HASH_TAB_INIT_SIZE = 8, // must be power of 2
+ HASH_TAB_MIN = 4, // don't hash fields for docs smaller than this
+ // set to 1 to always hash
+ };
- private:
-
- /// Same as lastElement->next() or firstElement() if empty.
- const ValueElement* end() const { return _firstElement->plusBytes(_usedBytes); }
-
- /// Allocates space in _buffer. Copies existing data if there is any.
- void alloc(unsigned newSize);
-
- /// Call after adding field to _buffer and increasing _numFields
- void addFieldToHashTable(Position pos);
-
- // assumes _hashTabMask is (power of two) - 1
- unsigned hashTabBuckets() const { return _hashTabMask + 1; }
- unsigned hashTabBytes() const { return hashTabBuckets() * sizeof(Position); }
-
- /// rehash on buffer growth if load-factor > .5 (attempt to keep lf < 1 when full)
- bool needRehash() const { return _numFields*2 > hashTabBuckets(); }
-
- /// Initialize empty hash table
- void hashTabInit() { memset(_hashTab, -1, hashTabBytes()); }
-
- static unsigned hashKey(StringData name) {
- // TODO consider FNV-1a once we have a better benchmark corpus
- unsigned out;
- MurmurHash3_x86_32(name.rawData(), name.size(), 0, &out);
- return out;
- }
+ // _buffer layout:
+ // -------------------------------------------------------------------------------
+ // | ValueElement1 Name1 | ValueElement2 Name2 | ... FREE SPACE ... | Hash Table |
+ // -------------------------------------------------------------------------------
+ // ^ _buffer and _firstElement point here ^
+ // _bufferEnd and _hashTab point here ^
+ //
+ //
+ // When the buffer grows, the hash table moves to the new end.
+ union {
+ char* _buffer;
+ ValueElement* _firstElement;
+ };
- unsigned bucketForKey(StringData name) const {
- return hashKey(name) & _hashTabMask;
- }
+ union {
+ // pointer to "end" of _buffer element space and start of hash table (same position)
+ char* _bufferEnd;
+ Position* _hashTab; // table lazily initialized once _numFields == HASH_TAB_MIN
+ };
- /// Adds all fields to the hash table
- void rehash() {
- hashTabInit();
- for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance())
- addFieldToHashTable(it.position());
- }
+ unsigned _usedBytes; // position where next field would start
+ unsigned _numFields; // this includes removed fields
+ unsigned _hashTabMask; // equal to hashTabBuckets()-1 but used more often
- enum {
- HASH_TAB_INIT_SIZE = 8, // must be power of 2
- HASH_TAB_MIN = 4, // don't hash fields for docs smaller than this
- // set to 1 to always hash
- };
-
- // _buffer layout:
- // -------------------------------------------------------------------------------
- // | ValueElement1 Name1 | ValueElement2 Name2 | ... FREE SPACE ... | Hash Table |
- // -------------------------------------------------------------------------------
- // ^ _buffer and _firstElement point here ^
- // _bufferEnd and _hashTab point here ^
- //
- //
- // When the buffer grows, the hash table moves to the new end.
- union {
- char* _buffer;
- ValueElement* _firstElement;
- };
-
- union {
- // pointer to "end" of _buffer element space and start of hash table (same position)
- char* _bufferEnd;
- Position* _hashTab; // table lazily initialized once _numFields == HASH_TAB_MIN
- };
-
- unsigned _usedBytes; // position where next field would start
- unsigned _numFields; // this includes removed fields
- unsigned _hashTabMask; // equal to hashTabBuckets()-1 but used more often
-
- bool _hasTextScore; // When adding more metadata fields, this should become a bitvector
- double _textScore;
- // When adding a field, make sure to update clone() method
- };
+ bool _hasTextScore; // When adding more metadata fields, this should become a bitvector
+ double _textScore;
+ // When adding a field, make sure to update clone() method
+};
}