diff options
author | Mathias Stearn <mathias@10gen.com> | 2012-10-26 19:35:31 -0400 |
---|---|---|
committer | Mathias Stearn <mathias@10gen.com> | 2012-11-16 17:32:59 -0500 |
commit | ffd194f68edf4a61bff35fac591452b715cff849 (patch) | |
tree | 67ed1e6300f04d1333fee769e9c1ba2f8b4d41d6 /src/mongo/db/pipeline/document.cpp | |
parent | 7eb3fc28fbd9cb0464cbf6dba5ffb497ba2088e9 (diff) | |
download | mongo-ffd194f68edf4a61bff35fac591452b715cff849.tar.gz |
Rewrite Document and Value classes
Diffstat (limited to 'src/mongo/db/pipeline/document.cpp')
-rw-r--r--[-rwxr-xr-x] | src/mongo/db/pipeline/document.cpp | 383 |
1 files changed, 254 insertions, 129 deletions
diff --git a/src/mongo/db/pipeline/document.cpp b/src/mongo/db/pipeline/document.cpp index e34af40b8dd..65b0ec6e2de 100755..100644 --- a/src/mongo/db/pipeline/document.cpp +++ b/src/mongo/db/pipeline/document.cpp @@ -15,159 +15,292 @@ */ #include "pch.h" + +#include "mongo/db/pipeline/document.h" + #include <boost/functional/hash.hpp> -#include "db/jsobj.h" -#include "db/pipeline/document.h" -#include "db/pipeline/value.h" -#include "util/mongoutils/str.h" + +#include "mongo/db/jsobj.h" +#include "mongo/db/pipeline/field_path.h" +#include "mongo/util/mongoutils/str.h" namespace mongo { using namespace mongoutils; - string Document::idName("_id"); + Position DocumentStorage::findField(StringData requested) const { + if (_numFields >= HASH_TAB_MIN) { // hash lookup + const unsigned bucket = bucketForKey(requested); - intrusive_ptr<Document> Document::createFromBsonObj(BSONObj* pBsonObj) { - return new Document(pBsonObj); - } + Position pos = _hashTab[bucket]; + while (pos.found()) { + const ValueElement& elem = getField(pos); + if (str::equals(requested.data(), elem.name)) + return pos; - Document::Document(BSONObj* pBsonObj) { - const int fields = pBsonObj->nFields(); - vFieldName.reserve(fields); - vpValue.reserve(fields); - BSONObjIterator bsonIterator(pBsonObj->begin()); - while(bsonIterator.more()) { - BSONElement bsonElement(bsonIterator.next()); - string fieldName(bsonElement.fieldName()); + // possible collision + pos = elem.nextCollision; + } + } + else if (_numFields) { // linear scan + for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) { + if (size_t(it->nameLen) == requested.size() + && str::equals(it->name, requested.data())) { + return it.position(); + } + } + } - // LATER grovel through structures??? - intrusive_ptr<const Value> pValue( - Value::createFromBsonElement(&bsonElement)); + // if we got here, there's no such field + return Position(); + } - vFieldName.push_back(fieldName); - vpValue.push_back(pValue); + Value& DocumentStorage::appendField(StringData name) { + Position pos = getNextPosition(); + const int nameSize = name.size(); + + // these are the same for everyone + const Position nextCollision; + const Value value; + + // Make room for new field (and padding at end for alignment) + const unsigned newUsed = ValueElement::align(_usedBytes + sizeof(ValueElement) + nameSize); + if (_buffer + newUsed > _bufferEnd) + alloc(newUsed); + _usedBytes = newUsed; + + // Append structure of a ValueElement + char* dest = _buffer + pos.index; // must be after alloc since it changes _buffer +#define append(x) memcpy(dest, &(x), sizeof(x)); dest += sizeof(x) + append(value); + append(nextCollision); + append(nameSize); + memcpy(dest, name.data(), nameSize + 1/*NUL*/); + // Padding for alignment handled above +#undef append + + // Make sure next field starts where we expect it + fassert(16486, getField(pos).next()->ptr() == _buffer + _usedBytes); + + _numFields++; + + if (_numFields > HASH_TAB_MIN) { + addFieldToHashTable(pos); + } + else if (_numFields == HASH_TAB_MIN) { + // adds all fields to hash table (including the one we just added) + rehash(); } - } - void Document::toBson(BSONObjBuilder* pBuilder) const { - const size_t n = vFieldName.size(); - for(size_t i = 0; i < n; ++i) - vpValue[i]->addToBsonObj(pBuilder, vFieldName[i]); + return getField(pos); } - intrusive_ptr<Document> Document::create(size_t sizeHint) { - intrusive_ptr<Document> pDocument(new Document(sizeHint)); - return pDocument; + // Call after adding field to _fields and increasing _numFields + void DocumentStorage::addFieldToHashTable(Position pos) { + ValueElement& elem = getField(pos); + elem.nextCollision = Position(); + + const unsigned bucket = bucketForKey(elem.nameSD()); + + Position* posPtr = &_hashTab[bucket]; + while (posPtr->found()) { + // collision: walk links and add new to end + posPtr = &getField(*posPtr).nextCollision; + } + *posPtr = Position(pos.index); } - Document::Document(size_t sizeHint): - vFieldName(), - vpValue() { - if (sizeHint) { - vFieldName.reserve(sizeHint); - vpValue.reserve(sizeHint); + void DocumentStorage::alloc(unsigned newSize) { + const bool firstAlloc = !_buffer; + const bool doingRehash = needRehash(); + const size_t oldCapacity = _bufferEnd - _buffer; + + // make new bucket count big enough + while (needRehash() || hashTabBuckets() < HASH_TAB_INIT_SIZE) + _hashTabMask = hashTabBuckets()*2 - 1; + + // only allocate power-of-two sized space > 128 bytes + size_t capacity = 128; + while (capacity < newSize + hashTabBytes()) + capacity *= 2; + + uassert(16490, "Tried to make oversized document", + capacity <= size_t(BufferMaxSize)); + + boost::scoped_array<char> oldBuf(_buffer); + _buffer = new char[capacity]; + _bufferEnd = _buffer + capacity - hashTabBytes(); + + if (!firstAlloc) { + // This just copies the elements + memcpy(_buffer, oldBuf.get(), _usedBytes); + + if (_numFields >= HASH_TAB_MIN) { + // if we were hashing, deal with the hash table + if (doingRehash) { + rehash(); + } + else { + // no rehash needed so just slide table down to new position + memcpy(_hashTab, oldBuf.get() + oldCapacity, hashTabBytes()); + } + } } } - intrusive_ptr<Document> Document::clone() { - const size_t n = vFieldName.size(); - intrusive_ptr<Document> pNew(Document::create(n)); - for(size_t i = 0; i < n; ++i) - pNew->addField(vFieldName[i], vpValue[i]); + void DocumentStorage::reserveFields(size_t expectedFields) { + fassert(16487, !_buffer); + + unsigned buckets = HASH_TAB_INIT_SIZE; + while (buckets < expectedFields) + buckets *= 2; + _hashTabMask = buckets - 1; + + // Using expectedFields+1 to allow space for long field names + const size_t newSize = (expectedFields+1) * ValueElement::align(sizeof(ValueElement)); - return pNew; + uassert(16491, "Tried to make oversized document", + newSize <= size_t(BufferMaxSize)); + + _buffer = new char[newSize + hashTabBytes()]; + _bufferEnd = _buffer + newSize; } - Document::~Document() { + intrusive_ptr<DocumentStorage> DocumentStorage::clone() const { + intrusive_ptr<DocumentStorage> out (new DocumentStorage()); + + // Make a copy of the buffer. + // It is very important that the positions of each field are the same after cloning. + const size_t bufferBytes = (_bufferEnd + hashTabBytes()) - _buffer; + out->_buffer = new char[bufferBytes]; + out->_bufferEnd = out->_buffer + (_bufferEnd - _buffer); + memcpy(out->_buffer, _buffer, bufferBytes); + + // Copy remaining fields + out->_usedBytes = _usedBytes; + out->_numFields = _numFields; + out->_hashTabMask = _hashTabMask; + + // Tell values that they have been memcpyed (updates ref counts) + for (DocumentStorageIterator it = out->iteratorAll(); !it.atEnd(); it.advance()) { + it->val.memcpyed(); + } + + return out; } - FieldIterator *Document::createFieldIterator() { - return new FieldIterator(intrusive_ptr<Document>(this)); + DocumentStorage::~DocumentStorage() { + boost::scoped_array<char> deleteBufferAtScopeEnd (_buffer); + + for (DocumentStorageIterator it = iteratorAll(); !it.atEnd(); it.advance()) { + it->val.~Value(); // explicit destructor call + } } - intrusive_ptr<const Value> Document::getValue(const string &fieldName) { - /* - For now, assume the number of fields is small enough that iteration - is ok. Later, if this gets large, we can create a map into the - vector for these lookups. + Document::Document(const BSONObj& bson) { + MutableDocument md(bson.nFields()); - Note that because of the schema-less nature of this data, we always - have to look, and can't assume that the requested field is always - in a particular place as we would with a statically compilable - reference. - */ - const size_t n = vFieldName.size(); - for(size_t i = 0; i < n; ++i) { - if (fieldName.compare(vFieldName[i]) == 0) - return vpValue[i]; + BSONObjIterator it(bson); + while(it.more()) { + BSONElement bsonElement(it.next()); + md.addField(bsonElement.fieldName(), Value(bsonElement)); } - return(intrusive_ptr<const Value>()); + *this = md.freeze(); } - void Document::addField(const string &fieldName, - const intrusive_ptr<const Value> &pValue) { - vFieldName.push_back(fieldName); - vpValue.push_back(pValue); + void Document::toBson(BSONObjBuilder* pBuilder) const { + for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { + it->val.addToBsonObj(pBuilder, it->nameSD()); + } } - void Document::setField(size_t index, - const string &fieldName, - const intrusive_ptr<const Value> &pValue) { - /* special case: should this field be removed? */ - if (!pValue.get()) { - vFieldName.erase(vFieldName.begin() + index); - vpValue.erase(vpValue.begin() + index); - return; + MutableDocument::MutableDocument(size_t expectedFields) + : _storageHolder(NULL) + , _storage(_storageHolder) + { + if (expectedFields) { + storage().reserveFields(expectedFields); } - - /* set the indicated field */ - vFieldName[index] = fieldName; - vpValue[index] = pValue; } - intrusive_ptr<const Value> Document::getField(const string &fieldName) const { - const size_t n = vFieldName.size(); - for(size_t i = 0; i < n; ++i) { - if (fieldName.compare(vFieldName[i]) == 0) - return vpValue[i]; + MutableValue MutableDocument::getNestedFieldHelper(MutableDocument& md, + const vector<Position>& positions, + size_t level) { + fassert(16488, !positions.empty()); + + if (level == positions.size()-1) { + return md[positions[level]]; } + else { + MutableDocument nested (md[positions[level]]); + return getNestedFieldHelper(nested, positions, level+1); + } + } - /* if we got here, there's no such field */ - return intrusive_ptr<const Value>(); + MutableValue MutableDocument::getNestedField(const vector<Position>& positions) { + return getNestedFieldHelper(*this, positions, 0); } - size_t Document::getApproximateSize() const { - size_t size = sizeof(Document); - const size_t n = vpValue.size(); - for(size_t i = 0; i < n; ++i) - size += vpValue[i]->getApproximateSize(); + static Value getNestedFieldHelper(const Document& doc, + const FieldPath& fieldNames, + vector<Position>* positions, + size_t level) { - return size; + fassert(16489, fieldNames.getPathLength()); + + const string& fieldName = fieldNames.getFieldName(level); + const Position pos = doc.positionOf(fieldName); + + if (!pos.found()) + return Value(); + + if (positions) + positions->push_back(pos); + + if (level == fieldNames.getPathLength()-1) + return doc.getField(pos); + + Value val = doc.getField(pos); + if (val.getType() != Object) + return Value(); + + return getNestedFieldHelper(val.getDocument(), fieldNames, positions, level+1); + } + + const Value Document::getNestedField(const FieldPath& fieldNames, + vector<Position>* positions) const { + return getNestedFieldHelper(*this, fieldNames, positions, 0); } - size_t Document::getFieldIndex(const string &fieldName) const { - const size_t n = vFieldName.size(); - size_t i = 0; - for(; i < n; ++i) { - if (fieldName.compare(vFieldName[i]) == 0) - break; + size_t Document::getApproximateSize() const { + if (!_storage) + return 0; // we've allocated no memory + + size_t size = sizeof(DocumentStorage); + size += storage().allocatedBytes(); + + for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { + size += it->val.getApproximateSize(); + size -= sizeof(Value); // already accounted for above } - return i; + return size; } void Document::hash_combine(size_t &seed) const { - const size_t n = vFieldName.size(); - for(size_t i = 0; i < n; ++i) { - boost::hash_combine(seed, vFieldName[i]); - vpValue[i]->hash_combine(seed); + for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { + StringData name = it->nameSD(); + boost::hash_range(seed, name.data(), name.data()+name.size()); + it->val.hash_combine(seed); } } - int Document::compare(const intrusive_ptr<Document> &rL, - const intrusive_ptr<Document> &rR) { - const size_t lSize = rL->vFieldName.size(); - const size_t rSize = rR->vFieldName.size(); + int Document::compare(const Document& rL, const Document& rR) { + const size_t lSize = rL->storage().size(); + const size_t rSize = rR->storage().size(); + + DocumentStorageIterator lIt = rL.storage().iterator(); + DocumentStorageIterator rIt = rR.storage().iterator(); for(size_t i = 0; true; ++i) { if (i >= lSize) { @@ -180,43 +313,35 @@ namespace mongo { if (i >= rSize) return 1; // right document is shorter - const int nameCmp = rL->vFieldName[i].compare(rR->vFieldName[i]); + const ValueElement& rField = rIt.get(); + const ValueElement& lField = lIt.get(); + + rIt.advance(); + lIt.advance(); + + const int nameCmp = strcmp(lField.name, rField.name); if (nameCmp) return nameCmp; // field names are unequal - const int valueCmp = Value::compare(rL->vpValue[i], rR->vpValue[i]); + const int valueCmp = Value::compare(lField.val, rField.val); if (valueCmp) return valueCmp; // fields are unequal } - - /* NOTREACHED */ - verify(false); - return 0; } string Document::toString() const { - // this is a temporary hack and it should only be used for debugging - BSONObjBuilder bb; - toBson(&bb); - return bb.done().toString(); - } - - /* ----------------------- FieldIterator ------------------------------- */ + if (empty()) + return "{}"; - FieldIterator::FieldIterator(const intrusive_ptr<Document> &pTheDocument): - pDocument(pTheDocument), - index(0) { - } + StringBuilder out; + const char* prefix = "{"; - bool FieldIterator::more() const { - return (index < pDocument->vFieldName.size()); - } + for (DocumentStorageIterator it = storage().iterator(); !it.atEnd(); it.advance()) { + out << prefix << it->name << ": " << it->val.toString(); + prefix = ", "; + } + out << '}'; - pair<string, intrusive_ptr<const Value> > FieldIterator::next() { - verify(more()); - pair<string, intrusive_ptr<const Value> > result( - pDocument->vFieldName[index], pDocument->vpValue[index]); - ++index; - return result; + return out.str(); } } |