summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Cox <eric.cox@mongodb.com>2021-01-26 21:37:11 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-02-09 19:25:26 +0000
commite0303b45fcbef6882820212f3f724030ed4b9ccb (patch)
tree5f9befa95d957373a9ee691339f3ba9ad886511a
parent54e6a589f127043ac42c88e1f6f44d66ad1713f6 (diff)
downloadmongo-e0303b45fcbef6882820212f3f724030ed4b9ccb.tar.gz
SERVER-53467 Investigate performance of $_internalUnpackBucket over BSON
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp48
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.h21
2 files changed, 39 insertions, 30 deletions
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
index 6b629bb7d52..c5ce3957bc7 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
@@ -44,19 +44,30 @@ REGISTER_DOCUMENT_SOURCE(_internalUnpackBucket,
LiteParsedDocumentSourceDefault::parse,
DocumentSourceInternalUnpackBucket::createFromBson);
-void BucketUnpacker::reset(Document&& bucket) {
+void BucketUnpacker::reset(BSONObj&& bucket) {
_fieldIters.clear();
_timeFieldIter = boost::none;
_bucket = std::move(bucket);
- uassert(5346510, "An empty bucket cannot be unpacked", !_bucket.empty());
+ uassert(5346510, "An empty bucket cannot be unpacked", !_bucket.isEmpty());
+ tassert(5346701,
+ "The $_internalUnpackBucket stage requires the bucket to be owned",
+ _bucket.isOwned());
- if (_bucket[kBucketDataFieldName].getDocument().empty()) {
+ auto&& dataRegion = _bucket.getField(kBucketDataFieldName).Obj();
+ if (dataRegion.isEmpty()) {
// If the data field of a bucket is present but it holds an empty object, there's nothing to
// unpack.
return;
}
+ auto&& timeFieldElem = dataRegion.getField(_spec.timeField);
+ uassert(5346700,
+ "The $_internalUnpackBucket stage requires the data region to have a timeField object",
+ timeFieldElem);
+
+ _timeFieldIter = BSONObjIterator{timeFieldElem.Obj()};
+
_metaValue = _bucket[kBucketMetaFieldName];
if (_spec.metaField) {
// The spec indicates that there should be a metadata region. Missing metadata in this case
@@ -65,23 +76,20 @@ void BucketUnpacker::reset(Document&& bucket) {
uassert(5369600,
"The $_internalUnpackBucket stage requires metadata to be present in a bucket if "
"metaField parameter is provided",
- (_metaValue.getType() != BSONType::Undefined) && !_metaValue.missing());
+ (_metaValue.type() != BSONType::Undefined) && _metaValue);
} else {
// If the spec indicates that the time series collection has no metadata field, then we
// should not find a metadata region in the underlying bucket documents.
uassert(5369601,
"The $_internalUnpackBucket stage expects buckets to have missing metadata regions "
"if the metaField parameter is not provided",
- _metaValue.missing());
+ !_metaValue);
}
- _timeFieldIter = _bucket[kBucketDataFieldName][_spec.timeField].getDocument().fieldIterator();
-
// Walk the data region of the bucket, and decide if an iterator should be set up based on the
// include or exclude case.
- auto colIter = _bucket[kBucketDataFieldName].getDocument().fieldIterator();
- while (colIter.more()) {
- auto&& [colName, colVal] = colIter.next();
+ for (auto&& elem : dataRegion) {
+ auto& colName = elem.fieldNameStringData();
if (colName == _spec.timeField) {
// Skip adding a FieldIterator for the timeField since the timestamp value from
// _timeFieldIter can be placed accordingly in the materialized measurement.
@@ -89,7 +97,7 @@ void BucketUnpacker::reset(Document&& bucket) {
}
auto found = _spec.fieldSet.find(colName.toString()) != _spec.fieldSet.end();
if ((_unpackerBehavior == Behavior::kInclude) == found) {
- _fieldIters.push_back({colName.toString(), colVal.getDocument().fieldIterator()});
+ _fieldIters.push_back({colName.toString(), BSONObjIterator{elem.Obj()}});
}
}
}
@@ -98,20 +106,20 @@ Document BucketUnpacker::getNext() {
invariant(hasNext());
auto measurement = MutableDocument{};
-
- auto&& [currentIdx, timeVal] = _timeFieldIter->next();
+ auto&& timeElem = _timeFieldIter->next();
if (_includeTimeField) {
- measurement.addField(_spec.timeField, timeVal);
+ measurement.addField(_spec.timeField, Value{timeElem});
}
- if (_includeMetaField && !_metaValue.nullish()) {
- measurement.addField(*_spec.metaField, _metaValue);
+ if (_includeMetaField && !_metaValue.isNull()) {
+ measurement.addField(*_spec.metaField, Value{_metaValue});
}
+ auto& currentIdx = timeElem.fieldNameStringData();
for (auto&& [colName, colIter] : _fieldIters) {
- if (colIter.more() && colIter.fieldName() == currentIdx) {
- auto&& [_, val] = colIter.next();
- measurement.addField(colName, val);
+ if (auto&& elem = *colIter; colIter.more() && elem.fieldNameStringData() == currentIdx) {
+ measurement.addField(colName, Value{elem});
+ colIter.advance(elem);
}
}
@@ -228,7 +236,7 @@ DocumentSource::GetNextResult DocumentSourceInternalUnpackBucket::doGetNext() {
auto nextResult = pSource->getNext();
if (nextResult.isAdvanced()) {
- auto bucket = nextResult.getDocument();
+ auto bucket = nextResult.getDocument().toBson();
_bucketUnpacker.reset(std::move(bucket));
uassert(
5346509,
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
index bb389310fe0..7731e5032db 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
@@ -83,7 +83,7 @@ public:
/**
* This resets the unpacker to prepare to unpack a new bucket described by the given document.
*/
- void reset(Document&& bucket);
+ void reset(BSONObj&& bucket);
Behavior behavior() const {
return _unpackerBehavior;
@@ -93,7 +93,7 @@ public:
return _spec;
}
- const Document& bucket() const {
+ const BSONObj& bucket() const {
return _bucket;
}
@@ -102,24 +102,25 @@ private:
const Behavior _unpackerBehavior;
// Iterates the timestamp section of the bucket to drive the unpacking iteration.
- boost::optional<FieldIterator> _timeFieldIter;
+ boost::optional<BSONObjIterator> _timeFieldIter;
// A flag used to mark that the timestamp value should be materialized in measurements.
const bool _includeTimeField;
- // Since the metadata value is the same across all materialized measurements we can cache the
- // metadata value in the reset phase and use it to materialize the metadata in each measurement.
- Value _metaValue;
-
- // A flag used to mark that a bucket's metadata value should be materialized in measurements.
+ // A flag used to mark that a bucket's metadata element should be materialized in measurements.
const bool _includeMetaField;
// The bucket being unpacked.
- Document _bucket;
+ BSONObj _bucket;
+
+ // Since the metadata value is the same across all materialized measurements we can cache the
+ // metadata BSONElement in the reset phase and use it to materialize the metadata in each
+ // measurement.
+ BSONElement _metaValue;
// Iterators used to unpack the columns of the above bucket that are populated during the reset
// phase according to the provided 'Behavior' and 'BucketSpec'.
- std::vector<std::pair<std::string, FieldIterator>> _fieldIters;
+ std::vector<std::pair<std::string, BSONObjIterator>> _fieldIters;
};
class DocumentSourceInternalUnpackBucket : public DocumentSource {