Rewrite Document and Value classes

author: Mathias Stearn <mathias@10gen.com> 2012-10-26 19:35:31 -0400
committer: Mathias Stearn <mathias@10gen.com> 2012-11-16 17:32:59 -0500
commit: ffd194f68edf4a61bff35fac591452b715cff849 (patch)
tree: 67ed1e6300f04d1333fee769e9c1ba2f8b4d41d6 /src/mongo/db/pipeline/document.cpp
parent: 7eb3fc28fbd9cb0464cbf6dba5ffb497ba2088e9 (diff)
download: mongo-ffd194f68edf4a61bff35fac591452b715cff849.tar.gz
1 files changed, 254 insertions, 129 deletions
diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp
index e34af40b8dd..65b0ec6e2de 100755..100644
--- a/src/mongo/db/pipeline/document.cpp
+++ b/src/mongo/db/pipeline/document.cpp
@@ -15,159 +15,292 @@
  */
 
 #include "pch.h"
+
+#include "mongo/db/pipeline/document.h"
+
 #include <boost/functional/hash.hpp>
-#include "db/jsobj.h"
-#include "db/pipeline/document.h"
-#include "db/pipeline/value.h"
-#include "util/mongoutils/str.h"
+
+#include "mongo/db/jsobj.h"
+#include "mongo/db/pipeline/field_path.h"
+#include "mongo/util/mongoutils/str.h"
 
 namespace mongo {
     using namespace mongoutils;
 
-    string Document::idName("_id");
+    Position DocumentStorage::findField(StringData requested) const {
+        if (_numFields >= HASH_TAB_MIN) { // hash lookup
+            const unsigned bucket = bucketForKey(requested);
 
-    intrusive_ptr<Document> Document::createFromBsonObj(BSONObj* pBsonObj) {
-        return new Document(pBsonObj);
-    }
+            Position pos = _hashTab[bucket];
+            while (pos.found()) {
+                const ValueElement& elem = getField(pos);
+                if (str::equals(requested.data(), elem.name))
+                    return pos;
 
-    Document::Document(BSONObj* pBsonObj) {
-        const int fields = pBsonObj->nFields();
-        vFieldName.reserve(fields);
-        vpValue.reserve(fields);
-        BSONObjIterator bsonIterator(pBsonObj->begin());
-        while(bsonIterator.more()) {
-            BSONElement bsonElement(bsonIterator.next());
-            string fieldName(bsonElement.fieldName());
+                // possible collision
+                pos = elem.nextCollision;
+            }
+        }
+        else if (_numFields) { // linear scan
+            for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) {
+                if (size_t(it->nameLen) == requested.size()
+                    && str::equals(it->name, requested.data())) {
+                    return it.position();
+                }
+            }
+        }
 
-            // LATER grovel through structures???
-            intrusive_ptr<const Value> pValue(
-                Value::createFromBsonElement(&bsonElement));
+        // if we got here, there's no such field
+        return Position();
+    }
 
-            vFieldName.push_back(fieldName);
-            vpValue.push_back(pValue);
+    Value& DocumentStorage::appendField(StringData name) {
+        Position pos = getNextPosition();
+        const int nameSize = name.size();
+
+        // these are the same for everyone
+        const Position nextCollision;
+        const Value value;
+
+        // Make room for new field (and padding at end for alignment)
+        const unsigned newUsed = ValueElement::align(_usedBytes + sizeof(ValueElement) + nameSize);
+        if (_buffer + newUsed > _bufferEnd)
+            alloc(newUsed);
+        _usedBytes = newUsed;
+
+        // Append structure of a ValueElement
+        char* dest = _buffer + pos.index; // must be after alloc since it changes _buffer
+#define append(x) memcpy(dest, &(x), sizeof(x)); dest += sizeof(x)
+        append(value);
+        append(nextCollision);
+        append(nameSize);
+        memcpy(dest, name.data(), nameSize + 1/*NUL*/);
+        // Padding for alignment handled above
+#undef append
+
+        // Make sure next field starts where we expect it
+        fassert(16486, getField(pos).next()->ptr() == _buffer + _usedBytes);
+
+        _numFields++;
+
+        if (_numFields > HASH_TAB_MIN) {
+            addFieldToHashTable(pos);
+        }
+        else if (_numFields == HASH_TAB_MIN) {
+            // adds all fields to hash table (including the one we just added)
+            rehash();
         }
-    }
 
-    void Document::toBson(BSONObjBuilder* pBuilder) const {
-        const size_t n = vFieldName.size();
-        for(size_t i = 0; i < n; ++i)
-            vpValue[i]->addToBsonObj(pBuilder, vFieldName[i]);
+        return getField(pos);
     }
 
-    intrusive_ptr<Document> Document::create(size_t sizeHint) {
-        intrusive_ptr<Document> pDocument(new Document(sizeHint));
-        return pDocument;
+    // Call after adding field to _fields and increasing _numFields
+    void DocumentStorage::addFieldToHashTable(Position pos) {
+        ValueElement& elem = getField(pos);
+        elem.nextCollision = Position();
+
+        const unsigned bucket = bucketForKey(elem.nameSD());
+
+        Position* posPtr = &_hashTab[bucket];
+        while (posPtr->found()) {
+            // collision: walk links and add new to end
+            posPtr = &getField(*posPtr).nextCollision;
+        }
+        *posPtr = Position(pos.index);
     }
 
-    Document::Document(size_t sizeHint):
-        vFieldName(),
-        vpValue() {
-        if (sizeHint) {
-            vFieldName.reserve(sizeHint);
-            vpValue.reserve(sizeHint);
+    void DocumentStorage::alloc(unsigned newSize) {
+        const bool firstAlloc = !_buffer;
+        const bool doingRehash = needRehash();
+        const size_t oldCapacity = _bufferEnd - _buffer;
+
+        // make new bucket count big enough
+        while (needRehash() || hashTabBuckets() < HASH_TAB_INIT_SIZE)
+            _hashTabMask = hashTabBuckets()*2 - 1;
+
+        // only allocate power-of-two sized space > 128 bytes
+        size_t capacity = 128;
+        while (capacity < newSize + hashTabBytes())
+            capacity *= 2;
+
+        uassert(16490, "Tried to make oversized document",
+                capacity <= size_t(BufferMaxSize));
+
+        boost::scoped_array<char> oldBuf(_buffer);
+        _buffer = new char[capacity];
+        _bufferEnd = _buffer + capacity - hashTabBytes();
+
+        if (!firstAlloc) {
+            // This just copies the elements
+            memcpy(_buffer, oldBuf.get(), _usedBytes);
+
+            if (_numFields >= HASH_TAB_MIN) {
+                // if we were hashing, deal with the hash table
+                if (doingRehash) {
+                    rehash();
+                }
+                else {
+                    // no rehash needed so just slide table down to new position
+                    memcpy(_hashTab, oldBuf.get() + oldCapacity, hashTabBytes());
+                }
+            }
         }
     }
 
-    intrusive_ptr<Document> Document::clone() {
-        const size_t n = vFieldName.size();
-        intrusive_ptr<Document> pNew(Document::create(n));
-        for(size_t i = 0; i < n; ++i)
-            pNew->addField(vFieldName[i], vpValue[i]);
+    void DocumentStorage::reserveFields(size_t expectedFields) {
+        fassert(16487, !_buffer);
+
+        unsigned buckets = HASH_TAB_INIT_SIZE;
+        while (buckets < expectedFields)
+            buckets *= 2;
+        _hashTabMask = buckets - 1;
+
+        // Using expectedFields+1 to allow space for long field names
+        const size_t newSize = (expectedFields+1) * ValueElement::align(sizeof(ValueElement));
 
-        return pNew;
+        uassert(16491, "Tried to make oversized document",
+                newSize <= size_t(BufferMaxSize));
+
+        _buffer = new char[newSize + hashTabBytes()];
+        _bufferEnd = _buffer + newSize;
     }
 
-    Document::~Document() {
+    intrusive_ptr<DocumentStorage> DocumentStorage::clone() const {
+        intrusive_ptr<DocumentStorage> out (new DocumentStorage());
+
+        // Make a copy of the buffer.
+        // It is very important that the positions of each field are the same after cloning.
+        const size_t bufferBytes = (_bufferEnd + hashTabBytes()) - _buffer;
+        out->_buffer = new char[bufferBytes];
+        out->_bufferEnd = out->_buffer + (_bufferEnd - _buffer);
+        memcpy(out->_buffer, _buffer, bufferBytes);
+
+        // Copy remaining fields
+        out->_usedBytes = _usedBytes;
+        out->_numFields = _numFields;
+        out->_hashTabMask = _hashTabMask;
+
+        // Tell values that they have been memcpyed (updates ref counts)
+        for (DocumentStorageIterator it = out->iteratorAll(); !it.atEnd(); it.advance()) {
+            it->val.memcpyed();
+        }
+
+        return out;
     }
 
-    FieldIterator *Document::createFieldIterator() {
-        return new FieldIterator(intrusive_ptr<Document>(this));
+    DocumentStorage::~DocumentStorage() {
+        boost::scoped_array<char> deleteBufferAtScopeEnd (_buffer);
+
+        for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) {
+            it->val.~Value(); // explicit destructor call
+        }
     }
 
-    intrusive_ptr<const Value> Document::getValue(const string &fieldName) {
-        /*
-          For now, assume the number of fields is small enough that iteration
-          is ok.  Later, if this gets large, we can create a map into the
-          vector for these lookups.
+    Document::Document(const BSONObj& bson) {
+        MutableDocument md(bson.nFields());
 
-          Note that because of the schema-less nature of this data, we always
-          have to look, and can't assume that the requested field is always
-          in a particular place as we would with a statically compilable
-          reference.
-        */
-        const size_t n = vFieldName.size();
-        for(size_t i = 0; i < n; ++i) {
-            if (fieldName.compare(vFieldName[i]) == 0)
-                return vpValue[i];
+        BSONObjIterator it(bson);
+        while(it.more()) {
+            BSONElement bsonElement(it.next());
+            md.addField(bsonElement.fieldName(), Value(bsonElement));
         }
 
-        return(intrusive_ptr<const Value>());
+        *this = md.freeze();
     }
 
-    void Document::addField(const string &fieldName,
-                            const intrusive_ptr<const Value> &pValue) {
-        vFieldName.push_back(fieldName);
-        vpValue.push_back(pValue);
+    void Document::toBson(BSONObjBuilder* pBuilder) const {
+        for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
+            it->val.addToBsonObj(pBuilder, it->nameSD());
+        }
     }
 
-    void Document::setField(size_t index,
-                            const string &fieldName,
-                            const intrusive_ptr<const Value> &pValue) {
-        /* special case:  should this field be removed? */
-        if (!pValue.get()) {
-            vFieldName.erase(vFieldName.begin() + index);
-            vpValue.erase(vpValue.begin() + index);
-            return;
+    MutableDocument::MutableDocument(size_t expectedFields)
+        : _storageHolder(NULL)
+        , _storage(_storageHolder)
+    {
+        if (expectedFields) {
+            storage().reserveFields(expectedFields);
         }
-
-        /* set the indicated field */
-        vFieldName[index] = fieldName;
-        vpValue[index] = pValue;
     }
 
-    intrusive_ptr<const Value> Document::getField(const string &fieldName) const {
-        const size_t n = vFieldName.size();
-        for(size_t i = 0; i < n; ++i) {
-            if (fieldName.compare(vFieldName[i]) == 0)
-                return vpValue[i];
+    MutableValue MutableDocument::getNestedFieldHelper(MutableDocument& md,
+                                                       const vector<Position>& positions,
+                                                       size_t level) {
+        fassert(16488, !positions.empty());
+
+        if (level == positions.size()-1) {
+            return md[positions[level]];
         }
+        else {
+            MutableDocument nested (md[positions[level]]);
+            return getNestedFieldHelper(nested, positions, level+1);
+        }
+    }
 
-        /* if we got here, there's no such field */
-        return intrusive_ptr<const Value>();
+    MutableValue MutableDocument::getNestedField(const vector<Position>& positions) {
+        return getNestedFieldHelper(*this, positions, 0);
     }
 
-    size_t Document::getApproximateSize() const {
-        size_t size = sizeof(Document);
-        const size_t n = vpValue.size();
-        for(size_t i = 0; i < n; ++i)
-            size += vpValue[i]->getApproximateSize();
+    static Value getNestedFieldHelper(const Document& doc,
+                                      const FieldPath& fieldNames,
+                                      vector<Position>* positions,
+                                      size_t level) {
 
-        return size;
+        fassert(16489, fieldNames.getPathLength());
+
+        const string& fieldName = fieldNames.getFieldName(level);
+        const Position pos = doc.positionOf(fieldName);
+
+        if (!pos.found())
+            return Value();
+
+        if (positions)
+            positions->push_back(pos);
+
+        if (level == fieldNames.getPathLength()-1)
+            return doc.getField(pos);
+
+        Value val = doc.getField(pos);
+        if (val.getType() != Object)
+            return Value();
+
+        return getNestedFieldHelper(val.getDocument(), fieldNames, positions, level+1);
+    }
+
+    const Value Document::getNestedField(const FieldPath& fieldNames,
+                                         vector<Position>* positions) const {
+        return getNestedFieldHelper(*this, fieldNames, positions, 0);
     }
 
-    size_t Document::getFieldIndex(const string &fieldName) const {
-        const size_t n = vFieldName.size();
-        size_t i = 0;
-        for(; i < n; ++i) {
-            if (fieldName.compare(vFieldName[i]) == 0)
-                break;
+    size_t Document::getApproximateSize() const {
+        if (!_storage)
+            return 0; // we've allocated no memory
+
+        size_t size = sizeof(DocumentStorage);
+        size += storage().allocatedBytes();
+
+        for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
+            size += it->val.getApproximateSize();
+            size -= sizeof(Value); // already accounted for above
         }
 
-        return i;
+        return size;
     }
 
     void Document::hash_combine(size_t &seed) const {
-        const size_t n = vFieldName.size();
-        for(size_t i = 0; i < n; ++i) {
-            boost::hash_combine(seed, vFieldName[i]);
-            vpValue[i]->hash_combine(seed);
+        for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
+            StringData name = it->nameSD();
+            boost::hash_range(seed, name.data(), name.data()+name.size());
+            it->val.hash_combine(seed);
         }
     }
 
-    int Document::compare(const intrusive_ptr<Document> &rL,
-                          const intrusive_ptr<Document> &rR) {
-        const size_t lSize = rL->vFieldName.size();
-        const size_t rSize = rR->vFieldName.size();
+    int Document::compare(const Document& rL, const Document& rR) {
+        const size_t lSize = rL->storage().size();
+        const size_t rSize = rR->storage().size();
+
+        DocumentStorageIterator lIt = rL.storage().iterator();
+        DocumentStorageIterator rIt = rR.storage().iterator();
 
         for(size_t i = 0; true; ++i) {
             if (i >= lSize) {
@@ -180,43 +313,35 @@ namespace mongo {
             if (i >= rSize)
                 return 1; // right document is shorter
 
-            const int nameCmp = rL->vFieldName[i].compare(rR->vFieldName[i]);
+            const ValueElement& rField = rIt.get();
+            const ValueElement& lField = lIt.get();
+
+            rIt.advance();
+            lIt.advance();
+
+            const int nameCmp = strcmp(lField.name, rField.name);
             if (nameCmp)
                 return nameCmp; // field names are unequal
 
-            const int valueCmp = Value::compare(rL->vpValue[i], rR->vpValue[i]);
+            const int valueCmp = Value::compare(lField.val, rField.val);
             if (valueCmp)
                 return valueCmp; // fields are unequal
         }
-
-        /* NOTREACHED */
-        verify(false);
-        return 0;
     }
 
     string Document::toString() const {
-        // this is a temporary hack and it should only be used for debugging
-        BSONObjBuilder bb;
-        toBson(&bb);
-        return bb.done().toString();
-    }
-
-    /* ----------------------- FieldIterator ------------------------------- */
+        if (empty())
+            return "{}";
 
-    FieldIterator::FieldIterator(const intrusive_ptr<Document> &pTheDocument):
-        pDocument(pTheDocument),
-        index(0) {
-    }
+        StringBuilder out;
+        const char* prefix = "{";
 
-    bool FieldIterator::more() const {
-        return (index < pDocument->vFieldName.size());
-    }
+        for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) {
+            out << prefix << it->name << ": " << it->val.toString();
+            prefix = ", ";
+        }
+        out << '}';
 
-    pair<string, intrusive_ptr<const Value> > FieldIterator::next() {
-        verify(more());
-        pair<string, intrusive_ptr<const Value> > result(
-            pDocument->vFieldName[index], pDocument->vpValue[index]);
-        ++index;
-        return result;
+        return out.str();
     }
 }
author	Mathias Stearn <mathias@10gen.com>	2012-10-26 19:35:31 -0400
committer	Mathias Stearn <mathias@10gen.com>	2012-11-16 17:32:59 -0500
commit	ffd194f68edf4a61bff35fac591452b715cff849 (patch)
tree	67ed1e6300f04d1333fee769e9c1ba2f8b4d41d6 /src/mongo/db/pipeline/document.cpp
parent	7eb3fc28fbd9cb0464cbf6dba5ffb497ba2088e9 (diff)
download	mongo-ffd194f68edf4a61bff35fac591452b715cff849.tar.gz