summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHenrik Edin <henrik.edin@mongodb.com>2021-04-01 13:23:08 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-04-07 12:47:32 +0000
commit659a367887558c4ed71bead6e1da9b5a5fe3d84b (patch)
tree8e68ea73bc46f2361e329a8c454409c2ff976a6e
parent623aa8fee52c5a63f950aa7cc8e34c551c41ead5 (diff)
downloadmongo-659a367887558c4ed71bead6e1da9b5a5fe3d84b.tar.gz
SERVER-55482 Optimize timeseries minmax calculation algorithm
* Only traverse inserted object once, calculate both min and max at the same time * Allow inlining of comparator when possible * Reuse buffer to store values if buffer size is sufficient * Don't store field name in value as it is already stored in map owning MinMax object. Overall this should cut CPU usage in about half.
-rw-r--r--src/mongo/db/timeseries/bucket_catalog.cpp288
-rw-r--r--src/mongo/db/timeseries/bucket_catalog.h142
2 files changed, 300 insertions, 130 deletions
diff --git a/src/mongo/db/timeseries/bucket_catalog.cpp b/src/mongo/db/timeseries/bucket_catalog.cpp
index 727653de2c7..450470632d5 100644
--- a/src/mongo/db/timeseries/bucket_catalog.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog.cpp
@@ -847,162 +847,242 @@ Date_t BucketCatalog::BucketAccess::getTime() const {
return _bucket->id().asDateT();
}
+void BucketCatalog::MinMax::Data::setValue(const BSONElement& elem) {
+ auto requiredSize = elem.size() - elem.fieldNameSize() + 1;
+ if (_totalSize < requiredSize) {
+ _value = std::make_unique<char[]>(requiredSize);
+ }
+ // Store element as BSONElement buffer but strip out the field name
+ _value[0] = elem.type();
+ _value[1] = '\0';
+ memcpy(_value.get() + 2, elem.value(), elem.valuesize());
+ _totalSize = requiredSize;
+ _type = Type::kValue;
+ _updated = true;
+}
+
+void BucketCatalog::MinMax::Data::setObject() {
+ if (std::exchange(_type, Type::kObject) != Type::kObject) {
+ _updated = true;
+ }
+}
+
+void BucketCatalog::MinMax::Data::setRootObject() {
+ _type = Type::kObject;
+}
+
+void BucketCatalog::MinMax::Data::setArray() {
+ if (std::exchange(_type, Type::kArray) != Type::kArray) {
+ _updated = true;
+ }
+}
+
+BSONElement BucketCatalog::MinMax::Data::value() const {
+ return BSONElement(_value.get(), 1, _totalSize, BSONElement::CachedSizeTag{});
+}
+
+BSONType BucketCatalog::MinMax::Data::valueType() const {
+ return (BSONType)_value[0];
+}
+
+int BucketCatalog::MinMax::Data::valueSize() const {
+ return _totalSize;
+}
+
void BucketCatalog::MinMax::update(const BSONObj& doc,
boost::optional<StringData> metaField,
- const StringData::ComparatorInterface* stringComparator,
- const std::function<bool(int, int)>& comp) {
- invariant(_type == Type::kObject || _type == Type::kUnset);
+ const StringData::ComparatorInterface* stringComparator) {
+ invariant(_min.type() == Type::kObject || _min.type() == Type::kUnset);
+ invariant(_max.type() == Type::kObject || _max.type() == Type::kUnset);
- _type = Type::kObject;
+
+ _min.setRootObject();
+ _max.setRootObject();
for (auto&& elem : doc) {
if (metaField && elem.fieldNameStringData() == metaField) {
continue;
}
- _updateWithMemoryUsage(&_object[elem.fieldName()], elem, stringComparator, comp);
+ _updateWithMemoryUsage(&_object[elem.fieldNameStringData()], elem, stringComparator);
}
}
void BucketCatalog::MinMax::_update(BSONElement elem,
- const StringData::ComparatorInterface* stringComparator,
- const std::function<bool(int, int)>& comp) {
+ const StringData::ComparatorInterface* stringComparator) {
auto typeComp = [&](BSONType type) {
- return comp(elem.canonicalType() - canonicalizeBSONType(type), 0);
+ return elem.canonicalType() - canonicalizeBSONType(type);
};
if (elem.type() == Object) {
- if (_type == Type::kObject || _type == Type::kUnset ||
- (_type == Type::kArray && typeComp(Array)) ||
- (_type == Type::kValue && typeComp(_value.firstElement().type()))) {
- // Compare objects element-wise.
- if (std::exchange(_type, Type::kObject) != Type::kObject) {
- _updated = true;
- _memoryUsage = 0;
- }
+ auto shouldUpdateObject = [&](Data& data, auto compare) {
+ return data.type() == Type::kObject || data.type() == Type::kUnset ||
+ (data.type() == Type::kArray && compare(typeComp(Array), 0)) ||
+ (data.type() == Type::kValue && compare(typeComp(data.valueType()), 0));
+ };
+ bool updateMin = shouldUpdateObject(_min, std::less<int>{});
+ if (updateMin) {
+ _min.setObject();
+ }
+ bool updateMax = shouldUpdateObject(_max, std::greater<int>{});
+ if (updateMax) {
+ _max.setObject();
+ }
+
+ // Compare objects element-wise if min or max need to be updated
+ if (updateMin || updateMax) {
for (auto&& subElem : elem.Obj()) {
_updateWithMemoryUsage(
- &_object[subElem.fieldName()], subElem, stringComparator, comp);
+ &_object[subElem.fieldNameStringData()], subElem, stringComparator);
}
}
return;
}
if (elem.type() == Array) {
- if (_type == Type::kArray || _type == Type::kUnset ||
- (_type == Type::kObject && typeComp(Object)) ||
- (_type == Type::kValue && typeComp(_value.firstElement().type()))) {
- // Compare arrays element-wise.
- if (std::exchange(_type, Type::kArray) != Type::kArray) {
- _updated = true;
- _memoryUsage = 0;
- }
+ auto shouldUpdateArray = [&](Data& data, auto compare) {
+ return data.type() == Type::kArray || data.type() == Type::kUnset ||
+ (data.type() == Type::kObject && compare(typeComp(Object), 0)) ||
+ (data.type() == Type::kValue && compare(typeComp(data.valueType()), 0));
+ };
+ bool updateMin = shouldUpdateArray(_min, std::less<int>{});
+ if (updateMin) {
+ _min.setArray();
+ }
+ bool updateMax = shouldUpdateArray(_max, std::greater<int>{});
+ if (updateMax) {
+ _max.setArray();
+ }
+ // Compare objects element-wise if min or max need to be updated
+ if (updateMin || updateMax) {
auto elemArray = elem.Array();
if (_array.size() < elemArray.size()) {
_array.resize(elemArray.size());
}
for (size_t i = 0; i < elemArray.size(); i++) {
- _updateWithMemoryUsage(&_array[i], elemArray[i], stringComparator, comp);
+ _updateWithMemoryUsage(&_array[i], elemArray[i], stringComparator);
}
}
return;
}
- if (_type == Type::kUnset || (_type == Type::kObject && typeComp(Object)) ||
- (_type == Type::kArray && typeComp(Array)) ||
- (_type == Type::kValue &&
- comp(elem.woCompare(_value.firstElement(), false, stringComparator), 0))) {
- _type = Type::kValue;
- _value = elem.wrap();
- _updated = true;
- _memoryUsage = _value.objsize();
- }
+ auto maybeUpdateValue = [&](Data& data, auto compare) {
+ if (data.type() == Type::kUnset ||
+ (data.type() == Type::kObject && compare(typeComp(Object), 0)) ||
+ (data.type() == Type::kArray && compare(typeComp(Array), 0)) ||
+ (data.type() == Type::kValue &&
+ compare(elem.woCompare(data.value(), false, stringComparator), 0))) {
+ data.setValue(elem);
+ }
+ };
+ maybeUpdateValue(_min, std::less<int>{});
+ maybeUpdateValue(_max, std::greater<int>{});
}
void BucketCatalog::MinMax::_updateWithMemoryUsage(
- MinMax* minMax,
- BSONElement elem,
- const StringData::ComparatorInterface* stringComparator,
- const std::function<bool(int, int)>& comp) {
+ MinMax* minMax, BSONElement elem, const StringData::ComparatorInterface* stringComparator) {
_memoryUsage -= minMax->getMemoryUsage();
- minMax->_update(elem, stringComparator, comp);
+ minMax->_update(elem, stringComparator);
_memoryUsage += minMax->getMemoryUsage();
}
-BSONObj BucketCatalog::MinMax::toBSON() const {
- invariant(_type == Type::kObject);
+BSONObj BucketCatalog::MinMax::min() const {
+ invariant(_min.type() == Type::kObject);
BSONObjBuilder builder;
- _append(&builder);
+ _append(&builder, GetMin());
return builder.obj();
}
-void BucketCatalog::MinMax::_append(BSONObjBuilder* builder) const {
- invariant(_type == Type::kObject);
+BSONObj BucketCatalog::MinMax::max() const {
+ invariant(_max.type() == Type::kObject);
+
+ BSONObjBuilder builder;
+ _append(&builder, GetMax());
+ return builder.obj();
+}
+
+template <typename GetDataFn>
+void BucketCatalog::MinMax::_append(BSONObjBuilder* builder, GetDataFn getData) const {
+ invariant(getData(*this).type() == Type::kObject);
for (const auto& minMax : _object) {
- invariant(minMax.second._type != Type::kUnset);
- if (minMax.second._type == Type::kObject) {
+ const Data& data = getData(minMax.second);
+ invariant(data.type() != Type::kUnset);
+ if (data.type() == Type::kObject) {
BSONObjBuilder subObj(builder->subobjStart(minMax.first));
- minMax.second._append(&subObj);
- } else if (minMax.second._type == Type::kArray) {
+ minMax.second._append(&subObj, getData);
+ } else if (data.type() == Type::kArray) {
BSONArrayBuilder subArr(builder->subarrayStart(minMax.first));
- minMax.second._append(&subArr);
+ minMax.second._append(&subArr, getData);
} else {
- builder->append(minMax.second._value.firstElement());
+ builder->appendAs(data.value(), minMax.first);
}
}
}
-void BucketCatalog::MinMax::_append(BSONArrayBuilder* builder) const {
- invariant(_type == Type::kArray);
+template <typename GetDataFn>
+void BucketCatalog::MinMax::_append(BSONArrayBuilder* builder, GetDataFn getData) const {
+ invariant(getData(*this).type() == Type::kArray);
for (const auto& minMax : _array) {
- invariant(minMax._type != Type::kUnset);
- if (minMax._type == Type::kObject) {
+ const Data& data = getData(minMax);
+ invariant(data.type() != Type::kUnset);
+ if (data.type() == Type::kObject) {
BSONObjBuilder subObj(builder->subobjStart());
- minMax._append(&subObj);
- } else if (minMax._type == Type::kArray) {
+ minMax._append(&subObj, getData);
+ } else if (data.type() == Type::kArray) {
BSONArrayBuilder subArr(builder->subarrayStart());
- minMax._append(&subArr);
+ minMax._append(&subArr, getData);
} else {
- builder->append(minMax._value.firstElement());
+ builder->append(data.value());
}
}
}
-BSONObj BucketCatalog::MinMax::getUpdates() {
- invariant(_type == Type::kObject);
+BSONObj BucketCatalog::MinMax::minUpdates() {
+ invariant(_min.type() == Type::kObject);
+
+ BSONObjBuilder builder;
+ _appendUpdates(&builder, GetMin());
+ return builder.obj();
+}
+
+BSONObj BucketCatalog::MinMax::maxUpdates() {
+ invariant(_max.type() == Type::kObject);
BSONObjBuilder builder;
- _appendUpdates(&builder);
+ _appendUpdates(&builder, GetMax());
return builder.obj();
}
-bool BucketCatalog::MinMax::_appendUpdates(BSONObjBuilder* builder) {
- invariant(_type == Type::kObject || _type == Type::kArray);
+template <typename GetDataFn>
+bool BucketCatalog::MinMax::_appendUpdates(BSONObjBuilder* builder, GetDataFn getData) {
+ const auto& data = getData(*this);
+ invariant(data.type() == Type::kObject || data.type() == Type::kArray);
bool appended = false;
- if (_type == Type::kObject) {
+ if (data.type() == Type::kObject) {
bool hasUpdateSection = false;
BSONObjBuilder updateSection;
StringMap<BSONObj> subDiffs;
for (auto& minMax : _object) {
- invariant(minMax.second._type != Type::kUnset);
- if (minMax.second._updated) {
- if (minMax.second._type == Type::kObject) {
+ const auto& subdata = getData(minMax.second);
+ invariant(subdata.type() != Type::kUnset);
+ if (subdata.updated()) {
+ if (subdata.type() == Type::kObject) {
BSONObjBuilder subObj(updateSection.subobjStart(minMax.first));
- minMax.second._append(&subObj);
- } else if (minMax.second._type == Type::kArray) {
+ minMax.second._append(&subObj, getData);
+ } else if (subdata.type() == Type::kArray) {
BSONArrayBuilder subArr(updateSection.subarrayStart(minMax.first));
- minMax.second._append(&subArr);
+ minMax.second._append(&subArr, getData);
} else {
- updateSection.append(minMax.second._value.firstElement());
+ updateSection.appendAs(subdata.value(), minMax.first);
}
- minMax.second._clearUpdated();
+ minMax.second._clearUpdated(getData);
appended = true;
hasUpdateSection = true;
- } else if (minMax.second._type != Type::kValue) {
+ } else if (subdata.type() != Type::kValue) {
BSONObjBuilder subDiff;
- if (minMax.second._appendUpdates(&subDiff)) {
+ if (minMax.second._appendUpdates(&subDiff, getData)) {
// An update occurred at a lower level, so append the sub diff.
subDiffs[doc_diff::kSubDiffSectionFieldPrefix + minMax.first] = subDiff.obj();
appended = true;
@@ -1021,24 +1101,25 @@ bool BucketCatalog::MinMax::_appendUpdates(BSONObjBuilder* builder) {
builder->append(doc_diff::kArrayHeader, true);
DecimalCounter<size_t> count;
for (auto& minMax : _array) {
- invariant(minMax._type != Type::kUnset);
- if (minMax._updated) {
+ const auto& subdata = getData(minMax);
+ invariant(subdata.type() != Type::kUnset);
+ if (subdata.updated()) {
std::string updateFieldName = str::stream()
<< doc_diff::kUpdateSectionFieldName << StringData(count);
- if (minMax._type == Type::kObject) {
+ if (subdata.type() == Type::kObject) {
BSONObjBuilder subObj(builder->subobjStart(updateFieldName));
- minMax._append(&subObj);
- } else if (minMax._type == Type::kArray) {
+ minMax._append(&subObj, getData);
+ } else if (subdata.type() == Type::kArray) {
BSONArrayBuilder subArr(builder->subarrayStart(updateFieldName));
- minMax._append(&subArr);
+ minMax._append(&subArr, getData);
} else {
- builder->appendAs(minMax._value.firstElement(), updateFieldName);
+ builder->appendAs(subdata.value(), updateFieldName);
}
- minMax._clearUpdated();
+ minMax._clearUpdated(getData);
appended = true;
- } else if (minMax._type != Type::kValue) {
+ } else if (subdata.type() != Type::kValue) {
BSONObjBuilder subDiff;
- if (minMax._appendUpdates(&subDiff)) {
+ if (minMax._appendUpdates(&subDiff, getData)) {
// An update occurred at a lower level, so append the sub diff.
builder->append(str::stream() << doc_diff::kSubDiffSectionFieldPrefix
<< StringData(count),
@@ -1053,23 +1134,26 @@ bool BucketCatalog::MinMax::_appendUpdates(BSONObjBuilder* builder) {
return appended;
}
-void BucketCatalog::MinMax::_clearUpdated() {
- invariant(_type != Type::kUnset);
+template <typename GetDataFn>
+void BucketCatalog::MinMax::_clearUpdated(GetDataFn getData) {
+ auto& data = getData(*this);
+ invariant(data.type() != Type::kUnset);
- _updated = false;
- if (_type == Type::kObject) {
+ data.clearUpdated();
+ if (data.type() == Type::kObject) {
for (auto& minMax : _object) {
- minMax.second._clearUpdated();
+ minMax.second._clearUpdated(getData);
}
- } else if (_type == Type::kArray) {
+ } else if (_min.type() == Type::kArray) {
for (auto& minMax : _array) {
- minMax._clearUpdated();
+ minMax._clearUpdated(getData);
}
}
}
uint64_t BucketCatalog::MinMax::getMemoryUsage() const {
- return _memoryUsage + (sizeof(MinMax) * (_object.size() + _array.size()));
+ return _memoryUsage + _min.valueSize() + _min.valueSize() +
+ (sizeof(MinMax) * (_object.size() + _array.size()));
}
BucketCatalog::WriteBatch::WriteBatch(Bucket* bucket,
@@ -1160,22 +1244,16 @@ void BucketCatalog::WriteBatch::_prepareCommit() {
}
_newFieldNamesToBeInserted = std::move(newFieldNamesToBeInserted);
- _bucket->_memoryUsage -= _bucket->_min.getMemoryUsage() + _bucket->_max.getMemoryUsage();
+ _bucket->_memoryUsage -= _bucket->_minmax.getMemoryUsage();
for (const auto& doc : _measurements) {
- _bucket->_min.update(doc,
- _bucket->_metadata.getMetaField(),
- _bucket->_metadata.getComparator(),
- std::less<>{});
- _bucket->_max.update(doc,
- _bucket->_metadata.getMetaField(),
- _bucket->_metadata.getComparator(),
- std::greater<>{});
+ _bucket->_minmax.update(
+ doc, _bucket->_metadata.getMetaField(), _bucket->_metadata.getComparator());
}
- _bucket->_memoryUsage += _bucket->_min.getMemoryUsage() + _bucket->_max.getMemoryUsage();
+ _bucket->_memoryUsage += _bucket->_minmax.getMemoryUsage();
const bool isUpdate = _numPreviouslyCommittedMeasurements > 0;
- _min = isUpdate ? _bucket->_min.getUpdates() : _bucket->_min.toBSON();
- _max = isUpdate ? _bucket->_max.getUpdates() : _bucket->_max.toBSON();
+ _min = isUpdate ? _bucket->_minmax.minUpdates() : _bucket->_minmax.min();
+ _max = isUpdate ? _bucket->_minmax.maxUpdates() : _bucket->_minmax.max();
}
void BucketCatalog::WriteBatch::_finish(const CommitInfo& info) {
diff --git a/src/mongo/db/timeseries/bucket_catalog.h b/src/mongo/db/timeseries/bucket_catalog.h
index 501814a64bd..1b4bbd423d5 100644
--- a/src/mongo/db/timeseries/bucket_catalog.h
+++ b/src/mongo/db/timeseries/bucket_catalog.h
@@ -296,26 +296,27 @@ private:
class MinMax {
public:
- /*
+ /**
* Updates the min/max according to 'comp', ignoring the 'metaField' field.
*/
void update(const BSONObj& doc,
boost::optional<StringData> metaField,
- const StringData::ComparatorInterface* stringComparator,
- const std::function<bool(int, int)>& comp);
+ const StringData::ComparatorInterface* stringComparator);
/**
* Returns the full min/max object.
*/
- BSONObj toBSON() const;
+ BSONObj min() const;
+ BSONObj max() const;
/**
* Returns the updates since the previous time this function was called in the format for
* an update op.
*/
- BSONObj getUpdates();
+ BSONObj minUpdates();
+ BSONObj maxUpdates();
- /*
+ /**
* Returns the approximate memory usage of this MinMax.
*/
uint64_t getMemoryUsage() const;
@@ -328,36 +329,130 @@ private:
kUnset,
};
- void _update(BSONElement elem,
- const StringData::ComparatorInterface* stringComparator,
- const std::function<bool(int, int)>& comp);
+ void _update(BSONElement elem, const StringData::ComparatorInterface* stringComparator);
void _updateWithMemoryUsage(MinMax* minMax,
BSONElement elem,
- const StringData::ComparatorInterface* stringComparator,
- const std::function<bool(int, int)>& comp);
+ const StringData::ComparatorInterface* stringComparator);
- void _append(BSONObjBuilder* builder) const;
- void _append(BSONArrayBuilder* builder) const;
+ template <typename GetDataFn>
+ void _append(BSONObjBuilder* builder, GetDataFn getData) const;
+ template <typename GetDataFn>
+ void _append(BSONArrayBuilder* builder, GetDataFn getData) const;
- /*
+ /**
* Appends updates, if any, to the builder. Returns whether any updates were appended by
* this MinMax or any MinMaxes below it.
*/
- bool _appendUpdates(BSONObjBuilder* builder);
+ template <typename GetDataFn>
+ bool _appendUpdates(BSONObjBuilder* builder, GetDataFn getData);
- /*
+ /**
* Clears the '_updated' flag on this MinMax and all MinMaxes below it.
*/
- void _clearUpdated();
+ template <typename GetDataFn>
+ void _clearUpdated(GetDataFn getData);
StringMap<MinMax> _object;
std::vector<MinMax> _array;
- BSONObj _value;
- Type _type = Type::kUnset;
+ /**
+ * Data bearing representation for MinMax. Can represent unset, Object, Array or Value
+ * (BSONElement).
+ */
+ class Data {
+ public:
+ /**
+ * Set type to value and store provided element without its field name.
+ */
+ void setValue(const BSONElement& elem);
+
+ /**
+ * Set type to object.
+ */
+ void setObject();
+
+ /**
+ * Set type to array.
+ */
+ void setArray();
+
+ /**
+ * Set to be the root object.
+ */
+ void setRootObject();
+
+ /**
+ * Returns stored BSONElement with field name as empty string..
+ */
+ BSONElement value() const;
+
+ /**
+ * Returns stored value type and size without needing to construct BSONElement above.
+ */
+ BSONType valueType() const;
+ int valueSize() const;
+
+ /**
+ * Type this MinMax::Data represents, Object, Array, Value or Unset.
+ */
+ Type type() const {
+ return _type;
+ }
+
+ /**
+ * Flag to indicate if this MinMax::Data was updated since last clear.
+ */
+ bool updated() const {
+ return _updated;
+ }
+
+ /**
+ * Clear update flag.
+ */
+ void clearUpdated() {
+ _updated = false;
+ }
+
+ private:
+ // Memory buffer to store BSONElement without the field name
+ std::unique_ptr<char[]> _value;
+
+ // Size of buffer above
+ int _totalSize = 0;
- bool _updated = false;
+ // Type that this MinMax::Data represents
+ Type _type = Type::kUnset;
+ // Flag to indicate if we got updated as part of this MinMax update.
+ bool _updated = false;
+ };
+
+ /**
+ * Helper for the recursive internal functions to access the min data component.
+ */
+ struct GetMin {
+ Data& operator()(MinMax& minmax) const {
+ return minmax._min;
+ }
+ const Data& operator()(const MinMax& minmax) const {
+ return minmax._min;
+ }
+ };
+
+ /**
+ * Helper for the recursive internal functions to access the max data component.
+ */
+ struct GetMax {
+ Data& operator()(MinMax& minmax) const {
+ return minmax._max;
+ }
+ const Data& operator()(const MinMax& minmax) const {
+ return minmax._max;
+ }
+ };
+
+ Data _min;
+ Data _max;
uint64_t _memoryUsage = 0;
};
@@ -416,11 +511,8 @@ public:
// Top-level field names of the measurements that have been inserted into the bucket.
StringSet _fieldNames;
- // The minimum values for each field in the bucket.
- MinMax _min;
-
- // The maximum values for each field in the bucket.
- MinMax _max;
+ // The minimum and maximum values for each field in the bucket.
+ MinMax _minmax;
// The latest time that has been inserted into the bucket.
Date_t _latestTime;