1 files changed, 313 insertions, 281 deletions
diff --git a/src/mongo/db/pipeline/document_internal.h b/src/mongo/db/pipeline/document_internal.h
index 93188ffb6f3..fa5988611b5 100644
--- a/src/mongo/db/pipeline/document_internal.h
+++ b/src/mongo/db/pipeline/document_internal.h
@@ -36,300 +36,332 @@
 #include "mongo/db/pipeline/value.h"
 
 namespace mongo {
-    /** Helper class to make the position in a document abstract
-     *  Warning: This is NOT guaranteed to be the ordered position.
-     *           eg. the first field may not be at Position(0)
-     */
-    class Position {
-    public:
-        // This represents "not found" similar to std::string::npos
-        Position() :index(static_cast<unsigned>(-1)) {}
-        bool found() const { return index != Position().index; }
-
-        bool operator == (Position rhs) const { return this->index == rhs.index; }
-        bool operator != (Position rhs) const { return !(*this == rhs); }
-
-        // For debugging and ASSERT_EQUALS in tests.
-        template <typename OStream>
-        friend OStream& operator<<(OStream& stream, Position p) { return stream << p.index; }
-
-    private:
-        explicit Position(size_t i) :index(i) {}
-        unsigned index;
-        friend class DocumentStorage;
-        friend class DocumentStorageIterator;
-    };
+/** Helper class to make the position in a document abstract
+ *  Warning: This is NOT guaranteed to be the ordered position.
+ *           eg. the first field may not be at Position(0)
+ */
+class Position {
+public:
+    // This represents "not found" similar to std::string::npos
+    Position() : index(static_cast<unsigned>(-1)) {}
+    bool found() const {
+        return index != Position().index;
+    }
+
+    bool operator==(Position rhs) const {
+        return this->index == rhs.index;
+    }
+    bool operator!=(Position rhs) const {
+        return !(*this == rhs);
+    }
+
+    // For debugging and ASSERT_EQUALS in tests.
+    template <typename OStream>
+    friend OStream& operator<<(OStream& stream, Position p) {
+        return stream << p.index;
+    }
+
+private:
+    explicit Position(size_t i) : index(i) {}
+    unsigned index;
+    friend class DocumentStorage;
+    friend class DocumentStorageIterator;
+};
 
 #pragma pack(1)
-    /** This is how values are stored in the DocumentStorage buffer
-     *  Internal class. Consumers shouldn't care about this.
-     */
-    class ValueElement {
-        MONGO_DISALLOW_COPYING(ValueElement);
-    public:
-        Value val;
-        Position nextCollision; // Position of next field with same hashBucket
-        const int nameLen; // doesn't include '\0'
-        const char _name[1]; // pointer to start of name (use nameSD instead)
-
-        ValueElement* next() {
-            return align(plusBytes(sizeof(ValueElement) + nameLen));
-        }
-
-        const ValueElement* next() const {
-            return align(plusBytes(sizeof(ValueElement) + nameLen));
-        }
-
-        StringData nameSD() const { return StringData(_name, nameLen); }
-
-
-        // helpers for doing pointer arithmetic with this class
-        // Note: These don't dereference 'this' so they are safe to use with NULL
-        char* ptr() { return reinterpret_cast<char*>(this); }
-        const char* ptr() const { return reinterpret_cast<const char*>(this); }
-        const ValueElement* plusBytes(size_t bytes) const {
-            return reinterpret_cast<const ValueElement*>(ptr() + bytes);
-        }
-        ValueElement* plusBytes(size_t bytes) {
-            return reinterpret_cast<ValueElement*>(ptr() + bytes);
-        }
-
-        // Round number or pointer up to N-byte boundary. No change if already aligned.
-        template <typename T>
-        static T align(T size) {
-            const intmax_t ALIGNMENT = 8; // must be power of 2 and <= 16 (malloc alignment)
-            // Can't use c++ cast because of conversion between intmax_t and both ints and pointers
-            return (T)(((intmax_t)(size) + (ALIGNMENT-1)) & ~(ALIGNMENT-1));
-        }
-
-    private:
-        ValueElement(); // this class should never be constructed
-        ~ValueElement(); // or destructed
-    };
-    // Real size is sizeof(ValueElement) + nameLen
+/** This is how values are stored in the DocumentStorage buffer
+ *  Internal class. Consumers shouldn't care about this.
+ */
+class ValueElement {
+    MONGO_DISALLOW_COPYING(ValueElement);
+
+public:
+    Value val;
+    Position nextCollision;  // Position of next field with same hashBucket
+    const int nameLen;       // doesn't include '\0'
+    const char _name[1];     // pointer to start of name (use nameSD instead)
+
+    ValueElement* next() {
+        return align(plusBytes(sizeof(ValueElement) + nameLen));
+    }
+
+    const ValueElement* next() const {
+        return align(plusBytes(sizeof(ValueElement) + nameLen));
+    }
+
+    StringData nameSD() const {
+        return StringData(_name, nameLen);
+    }
+
+
+    // helpers for doing pointer arithmetic with this class
+    // Note: These don't dereference 'this' so they are safe to use with NULL
+    char* ptr() {
+        return reinterpret_cast<char*>(this);
+    }
+    const char* ptr() const {
+        return reinterpret_cast<const char*>(this);
+    }
+    const ValueElement* plusBytes(size_t bytes) const {
+        return reinterpret_cast<const ValueElement*>(ptr() + bytes);
+    }
+    ValueElement* plusBytes(size_t bytes) {
+        return reinterpret_cast<ValueElement*>(ptr() + bytes);
+    }
+
+    // Round number or pointer up to N-byte boundary. No change if already aligned.
+    template <typename T>
+    static T align(T size) {
+        const intmax_t ALIGNMENT = 8;  // must be power of 2 and <= 16 (malloc alignment)
+        // Can't use c++ cast because of conversion between intmax_t and both ints and pointers
+        return (T)(((intmax_t)(size) + (ALIGNMENT - 1)) & ~(ALIGNMENT - 1));
+    }
+
+private:
+    ValueElement();   // this class should never be constructed
+    ~ValueElement();  // or destructed
+};
+// Real size is sizeof(ValueElement) + nameLen
 #pragma pack()
-    BOOST_STATIC_ASSERT(sizeof(ValueElement) == (sizeof(Value) +
-                                                 sizeof(Position) +
-                                                 sizeof(int) +
-                                                 1));
-
-    // This is an internal class for Document. See FieldIterator for the public version.
-    class DocumentStorageIterator {
-    public:
-        // DocumentStorage::iterator() and iteratorAll() are easier to use
-        DocumentStorageIterator(const ValueElement* first,
-                                const ValueElement* end,
-                                bool includeMissing)
-                : _first(first)
-                , _it(first)
-                , _end(end)
-                , _includeMissing(includeMissing) {
-            if (!_includeMissing)
-                skipMissing();
-        }
-
-        bool atEnd() const { return _it == _end; }
-
-        const ValueElement& get() const { return *_it; }
-
-        Position position() const { return Position(_it->ptr() - _first->ptr()); }
-
-        void advance() {
+BOOST_STATIC_ASSERT(sizeof(ValueElement) == (sizeof(Value) + sizeof(Position) + sizeof(int) + 1));
+
+// This is an internal class for Document. See FieldIterator for the public version.
+class DocumentStorageIterator {
+public:
+    // DocumentStorage::iterator() and iteratorAll() are easier to use
+    DocumentStorageIterator(const ValueElement* first, const ValueElement* end, bool includeMissing)
+        : _first(first), _it(first), _end(end), _includeMissing(includeMissing) {
+        if (!_includeMissing)
+            skipMissing();
+    }
+
+    bool atEnd() const {
+        return _it == _end;
+    }
+
+    const ValueElement& get() const {
+        return *_it;
+    }
+
+    Position position() const {
+        return Position(_it->ptr() - _first->ptr());
+    }
+
+    void advance() {
+        advanceOne();
+        if (!_includeMissing)
+            skipMissing();
+    }
+
+    const ValueElement* operator->() {
+        return _it;
+    }
+    const ValueElement& operator*() {
+        return *_it;
+    }
+
+private:
+    void advanceOne() {
+        _it = _it->next();
+    }
+
+    void skipMissing() {
+        while (!atEnd() && _it->val.missing()) {
             advanceOne();
-            if (!_includeMissing)
-                skipMissing();
-        }
-
-        const ValueElement* operator-> () { return _it; }
-        const ValueElement& operator* () { return *_it; }
-
-    private:
-        void advanceOne() {
-            _it = _it->next();
-        }
-
-        void skipMissing() {
-            while (!atEnd() && _it->val.missing()) {
-                advanceOne();
-            }
-        }
-
-        const ValueElement* _first;
-        const ValueElement* _it;
-        const ValueElement* _end;
-        bool _includeMissing;
-    };
-
-    /// Storage class used by both Document and MutableDocument
-    class DocumentStorage :  public RefCountable {
-    public:
-        // Note: default constructor should zero-init to support emptyDoc()
-        DocumentStorage() : _buffer(NULL)
-                          , _bufferEnd(NULL)
-                          , _usedBytes(0)
-                          , _numFields(0)
-                          , _hashTabMask(0)
-                          , _hasTextScore(false)
-                          , _textScore(0)
-        {}
-        ~DocumentStorage();
-
-        static const DocumentStorage& emptyDoc() {
-            static const char emptyBytes[sizeof(DocumentStorage)] = {0};
-            return *reinterpret_cast<const DocumentStorage*>(emptyBytes);
-        }
-
-        size_t size() const {
-            // can't use _numFields because it includes removed Fields
-            size_t count = 0;
-            for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance())
-                count++;
-            return count;
-        }
-
-        /// Returns the position of the next field to be inserted
-        Position getNextPosition() const { return Position(_usedBytes); }
-
-        /// Returns the position of the named field (may be missing) or Position()
-        Position findField(StringData name) const;
-
-        // Document uses these
-        const ValueElement& getField(Position pos) const {
-            verify(pos.found());
-            return *(_firstElement->plusBytes(pos.index));
-        }
-        Value getField(StringData name) const {
-            Position pos = findField(name);
-            if (!pos.found())
-                return Value();
-            return getField(pos).val;
-        }
-
-        // MutableDocument uses these
-        ValueElement& getField(Position pos) {
-            verify(pos.found());
-            return *(_firstElement->plusBytes(pos.index));
-        }
-        Value& getField(StringData name) {
-            Position pos = findField(name);
-            if (!pos.found())
-                return appendField(name); // TODO: find a way to avoid hashing name twice
-            return getField(pos).val;
-        }
-
-        /// Adds a new field with missing Value at the end of the document
-        Value& appendField(StringData name);
-
-        /** Preallocates space for fields. Use this to attempt to prevent buffer growth.
-         *  This is only valid to call before anything is added to the document.
-         */
-        void reserveFields(size_t expectedFields);
-
-        /// This skips missing values
-        DocumentStorageIterator iterator() const {
-            return DocumentStorageIterator(_firstElement, end(), false);
         }
+    }
+
+    const ValueElement* _first;
+    const ValueElement* _it;
+    const ValueElement* _end;
+    bool _includeMissing;
+};
+
+/// Storage class used by both Document and MutableDocument
+class DocumentStorage : public RefCountable {
+public:
+    // Note: default constructor should zero-init to support emptyDoc()
+    DocumentStorage()
+        : _buffer(NULL),
+          _bufferEnd(NULL),
+          _usedBytes(0),
+          _numFields(0),
+          _hashTabMask(0),
+          _hasTextScore(false),
+          _textScore(0) {}
+    ~DocumentStorage();
+
+    static const DocumentStorage& emptyDoc() {
+        static const char emptyBytes[sizeof(DocumentStorage)] = {0};
+        return *reinterpret_cast<const DocumentStorage*>(emptyBytes);
+    }
+
+    size_t size() const {
+        // can't use _numFields because it includes removed Fields
+        size_t count = 0;
+        for (DocumentStorageIterator it = iterator(); !it.atEnd(); it.advance())
+            count++;
+        return count;
+    }
+
+    /// Returns the position of the next field to be inserted
+    Position getNextPosition() const {
+        return Position(_usedBytes);
+    }
+
+    /// Returns the position of the named field (may be missing) or Position()
+    Position findField(StringData name) const;
+
+    // Document uses these
+    const ValueElement& getField(Position pos) const {
+        verify(pos.found());
+        return *(_firstElement->plusBytes(pos.index));
+    }
+    Value getField(StringData name) const {
+        Position pos = findField(name);
+        if (!pos.found())
+            return Value();
+        return getField(pos).val;
+    }
+
+    // MutableDocument uses these
+    ValueElement& getField(Position pos) {
+        verify(pos.found());
+        return *(_firstElement->plusBytes(pos.index));
+    }
+    Value& getField(StringData name) {
+        Position pos = findField(name);
+        if (!pos.found())
+            return appendField(name);  // TODO: find a way to avoid hashing name twice
+        return getField(pos).val;
+    }
+
+    /// Adds a new field with missing Value at the end of the document
+    Value& appendField(StringData name);
+
+    /** Preallocates space for fields. Use this to attempt to prevent buffer growth.
+     *  This is only valid to call before anything is added to the document.
+     */
+    void reserveFields(size_t expectedFields);
 
-        /// This includes missing values
-        DocumentStorageIterator iteratorAll() const {
-            return DocumentStorageIterator(_firstElement, end(), true);
-        }
+    /// This skips missing values
+    DocumentStorageIterator iterator() const {
+        return DocumentStorageIterator(_firstElement, end(), false);
+    }
 
-        /// Shallow copy of this. Caller owns memory.
-        boost::intrusive_ptr<DocumentStorage> clone() const;
+    /// This includes missing values
+    DocumentStorageIterator iteratorAll() const {
+        return DocumentStorageIterator(_firstElement, end(), true);
+    }
 
-        size_t allocatedBytes() const {
-            return !_buffer ? 0 : (_bufferEnd - _buffer + hashTabBytes());
-        }
+    /// Shallow copy of this. Caller owns memory.
+    boost::intrusive_ptr<DocumentStorage> clone() const;
 
-        /**
-         * Copies all metadata from source if it has any.
-         * Note: does not clear metadata from this.
-         */
-        void copyMetaDataFrom(const DocumentStorage& source) {
-            if (source.hasTextScore()) {
-                setTextScore(source.getTextScore());
-            }
-        }
+    size_t allocatedBytes() const {
+        return !_buffer ? 0 : (_bufferEnd - _buffer + hashTabBytes());
+    }
 
-        bool hasTextScore() const { return _hasTextScore; }
-        double getTextScore() const { return _textScore; }
-        void setTextScore(double score) {
-            _hasTextScore = true;
-            _textScore = score;
+    /**
+     * Copies all metadata from source if it has any.
+     * Note: does not clear metadata from this.
+     */
+    void copyMetaDataFrom(const DocumentStorage& source) {
+        if (source.hasTextScore()) {
+            setTextScore(source.getTextScore());
         }
+    }
+
+    bool hasTextScore() const {
+        return _hasTextScore;
+    }
+    double getTextScore() const {
+        return _textScore;
+    }
+    void setTextScore(double score) {
+        _hasTextScore = true;
+        _textScore = score;
+    }
+
+private:
+    /// Same as lastElement->next() or firstElement() if empty.
+    const ValueElement* end() const {
+        return _firstElement->plusBytes(_usedBytes);
+    }
+
+    /// Allocates space in _buffer. Copies existing data if there is any.
+    void alloc(unsigned newSize);
+
+    /// Call after adding field to _buffer and increasing _numFields
+    void addFieldToHashTable(Position pos);
+
+    // assumes _hashTabMask is (power of two) - 1
+    unsigned hashTabBuckets() const {
+        return _hashTabMask + 1;
+    }
+    unsigned hashTabBytes() const {
+        return hashTabBuckets() * sizeof(Position);
+    }
+
+    /// rehash on buffer growth if load-factor > .5 (attempt to keep lf < 1 when full)
+    bool needRehash() const {
+        return _numFields * 2 > hashTabBuckets();
+    }
+
+    /// Initialize empty hash table
+    void hashTabInit() {
+        memset(_hashTab, -1, hashTabBytes());
+    }
+
+    static unsigned hashKey(StringData name) {
+        // TODO consider FNV-1a once we have a better benchmark corpus
+        unsigned out;
+        MurmurHash3_x86_32(name.rawData(), name.size(), 0, &out);
+        return out;
+    }
+
+    unsigned bucketForKey(StringData name) const {
+        return hashKey(name) & _hashTabMask;
+    }
+
+    /// Adds all fields to the hash table
+    void rehash() {
+        hashTabInit();
+        for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance())
+            addFieldToHashTable(it.position());
+    }
+
+    enum {
+        HASH_TAB_INIT_SIZE = 8,  // must be power of 2
+        HASH_TAB_MIN = 4,        // don't hash fields for docs smaller than this
+                                 // set to 1 to always hash
+    };
 
-    private:
-
-        /// Same as lastElement->next() or firstElement() if empty.
-        const ValueElement* end() const { return _firstElement->plusBytes(_usedBytes); }
-
-        /// Allocates space in _buffer. Copies existing data if there is any.
-        void alloc(unsigned newSize);
-
-        /// Call after adding field to _buffer and increasing _numFields
-        void addFieldToHashTable(Position pos);
-
-        // assumes _hashTabMask is (power of two) - 1
-        unsigned hashTabBuckets() const { return _hashTabMask + 1; }
-        unsigned hashTabBytes() const { return hashTabBuckets() * sizeof(Position); }
-
-        /// rehash on buffer growth if load-factor > .5 (attempt to keep lf < 1 when full)
-        bool needRehash() const { return _numFields*2 > hashTabBuckets(); }
-
-        /// Initialize empty hash table
-        void hashTabInit() { memset(_hashTab, -1, hashTabBytes()); }
-
-        static unsigned hashKey(StringData name) {
-            // TODO consider FNV-1a once we have a better benchmark corpus
-            unsigned out;
-            MurmurHash3_x86_32(name.rawData(), name.size(), 0, &out);
-            return out;
-        }
+    // _buffer layout:
+    // -------------------------------------------------------------------------------
+    // | ValueElement1 Name1 | ValueElement2 Name2 | ... FREE SPACE ... | Hash Table |
+    // -------------------------------------------------------------------------------
+    //  ^ _buffer and _firstElement point here                           ^
+    //                                _bufferEnd and _hashTab point here ^
+    //
+    //
+    // When the buffer grows, the hash table moves to the new end.
+    union {
+        char* _buffer;
+        ValueElement* _firstElement;
+    };
 
-        unsigned bucketForKey(StringData name) const {
-            return hashKey(name) & _hashTabMask;
-        }
+    union {
+        // pointer to "end" of _buffer element space and start of hash table (same position)
+        char* _bufferEnd;
+        Position* _hashTab;  // table lazily initialized once _numFields == HASH_TAB_MIN
+    };
 
-        /// Adds all fields to the hash table
-        void rehash() {
-            hashTabInit();
-            for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance())
-                addFieldToHashTable(it.position());
-        }
+    unsigned _usedBytes;    // position where next field would start
+    unsigned _numFields;    // this includes removed fields
+    unsigned _hashTabMask;  // equal to hashTabBuckets()-1 but used more often
 
-        enum {
-            HASH_TAB_INIT_SIZE = 8, // must be power of 2
-            HASH_TAB_MIN = 4, // don't hash fields for docs smaller than this
-                              // set to 1 to always hash
-        };
-
-        // _buffer layout:
-        // -------------------------------------------------------------------------------
-        // | ValueElement1 Name1 | ValueElement2 Name2 | ... FREE SPACE ... | Hash Table |
-        // -------------------------------------------------------------------------------
-        //  ^ _buffer and _firstElement point here                           ^
-        //                                _bufferEnd and _hashTab point here ^
-        //
-        //
-        // When the buffer grows, the hash table moves to the new end.
-        union {
-            char* _buffer;
-            ValueElement* _firstElement;
-        };
-
-        union {
-            // pointer to "end" of _buffer element space and start of hash table (same position)
-            char* _bufferEnd;
-            Position* _hashTab; // table lazily initialized once _numFields == HASH_TAB_MIN
-        };
-
-        unsigned _usedBytes; // position where next field would start
-        unsigned _numFields; // this includes removed fields
-        unsigned _hashTabMask; // equal to hashTabBuckets()-1 but used more often
-
-        bool _hasTextScore; // When adding more metadata fields, this should become a bitvector
-        double _textScore;
-        // When adding a field, make sure to update clone() method
-    };
+    bool _hasTextScore;  // When adding more metadata fields, this should become a bitvector
+    double _textScore;
+    // When adding a field, make sure to update clone() method
+};
 }