From e0303b45fcbef6882820212f3f724030ed4b9ccb Mon Sep 17 00:00:00 2001 From: Eric Cox Date: Tue, 26 Jan 2021 21:37:11 +0000 Subject: SERVER-53467 Investigate performance of $_internalUnpackBucket over BSON --- .../document_source_internal_unpack_bucket.cpp | 48 +++++++++++++--------- .../document_source_internal_unpack_bucket.h | 21 +++++----- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index 6b629bb7d52..c5ce3957bc7 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -44,19 +44,30 @@ REGISTER_DOCUMENT_SOURCE(_internalUnpackBucket, LiteParsedDocumentSourceDefault::parse, DocumentSourceInternalUnpackBucket::createFromBson); -void BucketUnpacker::reset(Document&& bucket) { +void BucketUnpacker::reset(BSONObj&& bucket) { _fieldIters.clear(); _timeFieldIter = boost::none; _bucket = std::move(bucket); - uassert(5346510, "An empty bucket cannot be unpacked", !_bucket.empty()); + uassert(5346510, "An empty bucket cannot be unpacked", !_bucket.isEmpty()); + tassert(5346701, + "The $_internalUnpackBucket stage requires the bucket to be owned", + _bucket.isOwned()); - if (_bucket[kBucketDataFieldName].getDocument().empty()) { + auto&& dataRegion = _bucket.getField(kBucketDataFieldName).Obj(); + if (dataRegion.isEmpty()) { // If the data field of a bucket is present but it holds an empty object, there's nothing to // unpack. return; } + auto&& timeFieldElem = dataRegion.getField(_spec.timeField); + uassert(5346700, + "The $_internalUnpackBucket stage requires the data region to have a timeField object", + timeFieldElem); + + _timeFieldIter = BSONObjIterator{timeFieldElem.Obj()}; + _metaValue = _bucket[kBucketMetaFieldName]; if (_spec.metaField) { // The spec indicates that there should be a metadata region. Missing metadata in this case @@ -65,23 +76,20 @@ void BucketUnpacker::reset(Document&& bucket) { uassert(5369600, "The $_internalUnpackBucket stage requires metadata to be present in a bucket if " "metaField parameter is provided", - (_metaValue.getType() != BSONType::Undefined) && !_metaValue.missing()); + (_metaValue.type() != BSONType::Undefined) && _metaValue); } else { // If the spec indicates that the time series collection has no metadata field, then we // should not find a metadata region in the underlying bucket documents. uassert(5369601, "The $_internalUnpackBucket stage expects buckets to have missing metadata regions " "if the metaField parameter is not provided", - _metaValue.missing()); + !_metaValue); } - _timeFieldIter = _bucket[kBucketDataFieldName][_spec.timeField].getDocument().fieldIterator(); - // Walk the data region of the bucket, and decide if an iterator should be set up based on the // include or exclude case. - auto colIter = _bucket[kBucketDataFieldName].getDocument().fieldIterator(); - while (colIter.more()) { - auto&& [colName, colVal] = colIter.next(); + for (auto&& elem : dataRegion) { + auto& colName = elem.fieldNameStringData(); if (colName == _spec.timeField) { // Skip adding a FieldIterator for the timeField since the timestamp value from // _timeFieldIter can be placed accordingly in the materialized measurement. @@ -89,7 +97,7 @@ void BucketUnpacker::reset(Document&& bucket) { } auto found = _spec.fieldSet.find(colName.toString()) != _spec.fieldSet.end(); if ((_unpackerBehavior == Behavior::kInclude) == found) { - _fieldIters.push_back({colName.toString(), colVal.getDocument().fieldIterator()}); + _fieldIters.push_back({colName.toString(), BSONObjIterator{elem.Obj()}}); } } } @@ -98,20 +106,20 @@ Document BucketUnpacker::getNext() { invariant(hasNext()); auto measurement = MutableDocument{}; - - auto&& [currentIdx, timeVal] = _timeFieldIter->next(); + auto&& timeElem = _timeFieldIter->next(); if (_includeTimeField) { - measurement.addField(_spec.timeField, timeVal); + measurement.addField(_spec.timeField, Value{timeElem}); } - if (_includeMetaField && !_metaValue.nullish()) { - measurement.addField(*_spec.metaField, _metaValue); + if (_includeMetaField && !_metaValue.isNull()) { + measurement.addField(*_spec.metaField, Value{_metaValue}); } + auto& currentIdx = timeElem.fieldNameStringData(); for (auto&& [colName, colIter] : _fieldIters) { - if (colIter.more() && colIter.fieldName() == currentIdx) { - auto&& [_, val] = colIter.next(); - measurement.addField(colName, val); + if (auto&& elem = *colIter; colIter.more() && elem.fieldNameStringData() == currentIdx) { + measurement.addField(colName, Value{elem}); + colIter.advance(elem); } } @@ -228,7 +236,7 @@ DocumentSource::GetNextResult DocumentSourceInternalUnpackBucket::doGetNext() { auto nextResult = pSource->getNext(); if (nextResult.isAdvanced()) { - auto bucket = nextResult.getDocument(); + auto bucket = nextResult.getDocument().toBson(); _bucketUnpacker.reset(std::move(bucket)); uassert( 5346509, diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h index bb389310fe0..7731e5032db 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h @@ -83,7 +83,7 @@ public: /** * This resets the unpacker to prepare to unpack a new bucket described by the given document. */ - void reset(Document&& bucket); + void reset(BSONObj&& bucket); Behavior behavior() const { return _unpackerBehavior; @@ -93,7 +93,7 @@ public: return _spec; } - const Document& bucket() const { + const BSONObj& bucket() const { return _bucket; } @@ -102,24 +102,25 @@ private: const Behavior _unpackerBehavior; // Iterates the timestamp section of the bucket to drive the unpacking iteration. - boost::optional _timeFieldIter; + boost::optional _timeFieldIter; // A flag used to mark that the timestamp value should be materialized in measurements. const bool _includeTimeField; - // Since the metadata value is the same across all materialized measurements we can cache the - // metadata value in the reset phase and use it to materialize the metadata in each measurement. - Value _metaValue; - - // A flag used to mark that a bucket's metadata value should be materialized in measurements. + // A flag used to mark that a bucket's metadata element should be materialized in measurements. const bool _includeMetaField; // The bucket being unpacked. - Document _bucket; + BSONObj _bucket; + + // Since the metadata value is the same across all materialized measurements we can cache the + // metadata BSONElement in the reset phase and use it to materialize the metadata in each + // measurement. + BSONElement _metaValue; // Iterators used to unpack the columns of the above bucket that are populated during the reset // phase according to the provided 'Behavior' and 'BucketSpec'. - std::vector> _fieldIters; + std::vector> _fieldIters; }; class DocumentSourceInternalUnpackBucket : public DocumentSource { -- cgit v1.2.1