summaryrefslogtreecommitdiff
path: root/src/mongo/db/timeseries
diff options
context:
space:
mode:
authorGregory Wlodarek <gregory.wlodarek@mongodb.com>2022-06-13 19:11:14 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-13 21:14:26 +0000
commit6ebd2850069b676b7fa51a6e139d596d57ba94cd (patch)
tree87a6e6b1c49fa5004fa282f03b0b3b319d247b1a /src/mongo/db/timeseries
parent037f714353bb314aa28c7ec6be6f77865ac41273 (diff)
downloadmongo-6ebd2850069b676b7fa51a6e139d596d57ba94cd.tar.gz
SERVER-65703 Approximate control.min and control.max space usage for time-series inserts
Diffstat (limited to 'src/mongo/db/timeseries')
-rw-r--r--src/mongo/db/timeseries/bucket_catalog.cpp44
-rw-r--r--src/mongo/db/timeseries/bucket_catalog.h2
2 files changed, 27 insertions, 19 deletions
diff --git a/src/mongo/db/timeseries/bucket_catalog.cpp b/src/mongo/db/timeseries/bucket_catalog.cpp
index abd3a869f74..0b365571616 100644
--- a/src/mongo/db/timeseries/bucket_catalog.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog.cpp
@@ -349,7 +349,6 @@ private:
void _calculateBucketFieldsAndSizeChange(const BSONObj& doc,
boost::optional<StringData> metaField,
NewFieldNames* newFieldNamesToBeInserted,
- uint32_t* newFieldNamesSize,
uint32_t* sizeToBeAdded) const {
// BSON size for an object with an empty object field where field name is empty string.
// We can use this as an offset to know the size when we have real field names.
@@ -358,7 +357,6 @@ private:
dassert(emptyObjSize == BSON("" << BSONObj()).objsize());
newFieldNamesToBeInserted->clear();
- *newFieldNamesSize = 0;
*sizeToBeAdded = 0;
auto numMeasurementsFieldLength = numDigits(_numMeasurements);
for (const auto& elem : doc) {
@@ -368,12 +366,24 @@ private:
continue;
}
- // If the field name is new, add the size of an empty object with that field name.
auto hashedKey = StringSet::hasher().hashed_key(fieldName);
if (!_fieldNames.contains(hashedKey)) {
+ // Record the new field name only if it hasn't been committed yet. There could be
+ // concurrent batches writing to this bucket with the same new field name, but
+ // they're not guaranteed to commit successfully.
newFieldNamesToBeInserted->push_back(hashedKey);
- *newFieldNamesSize += elem.fieldNameSize();
- *sizeToBeAdded += emptyObjSize + fieldName.size();
+
+ // Only update the bucket size once to account for the new field name if it isn't
+ // already pending a commit from another batch.
+ if (!_uncommittedFieldNames.contains(hashedKey)) {
+ // Add the size of an empty object with that field name.
+ *sizeToBeAdded += emptyObjSize + fieldName.size();
+
+ // The control.min and control.max summaries don't have any information for this
+ // new field name yet. Add two measurements worth of data to account for this.
+ // As this is the first measurement for this field, min == max.
+ *sizeToBeAdded += elem.size() * 2;
+ }
}
// Add the element size, taking into account that the name will be changed to its
@@ -420,6 +430,9 @@ private:
// Top-level hashed field names of the measurements that have been inserted into the bucket.
StringSet _fieldNames;
+ // Top-level hashed new field names that have not yet been committed into the bucket.
+ StringSet _uncommittedFieldNames;
+
// Time field for the measurements that have been inserted into the bucket.
std::string _timeField;
@@ -536,9 +549,10 @@ void BucketCatalog::WriteBatch::_addMeasurement(const BSONObj& doc) {
_measurements.push_back(doc);
}
-void BucketCatalog::WriteBatch::_recordNewFields(NewFieldNames&& fields) {
+void BucketCatalog::WriteBatch::_recordNewFields(Bucket* bucket, NewFieldNames&& fields) {
for (auto&& field : fields) {
_newFieldNamesToBeInserted[field] = field.hash();
+ bucket->_uncommittedFieldNames.emplace(field);
}
}
@@ -550,6 +564,7 @@ void BucketCatalog::WriteBatch::_prepareCommit(Bucket* bucket) {
// by someone else.
for (auto it = _newFieldNamesToBeInserted.begin(); it != _newFieldNamesToBeInserted.end();) {
StringMapHashedKey fieldName(it->first, it->second);
+ bucket->_uncommittedFieldNames.erase(fieldName);
if (bucket->_fieldNames.contains(fieldName)) {
_newFieldNamesToBeInserted.erase(it++);
continue;
@@ -761,13 +776,9 @@ StatusWith<BucketCatalog::InsertResult> BucketCatalog::insert(
invariant(bucket);
NewFieldNames newFieldNamesToBeInserted;
- uint32_t newFieldNamesSize = 0;
uint32_t sizeToBeAdded = 0;
- bucket->_calculateBucketFieldsAndSizeChange(doc,
- options.getMetaField(),
- &newFieldNamesToBeInserted,
- &newFieldNamesSize,
- &sizeToBeAdded);
+ bucket->_calculateBucketFieldsAndSizeChange(
+ doc, options.getMetaField(), &newFieldNamesToBeInserted, &sizeToBeAdded);
auto shouldCloseBucket = [&](Bucket* bucket) -> bool {
if (bucket->schemaIncompatible(doc, metaFieldName, comparator)) {
@@ -798,16 +809,13 @@ StatusWith<BucketCatalog::InsertResult> BucketCatalog::insert(
info.openedDuetoMetadata = false;
bucket = _rollover(&stripe, stripeLock, bucket, info);
- bucket->_calculateBucketFieldsAndSizeChange(doc,
- options.getMetaField(),
- &newFieldNamesToBeInserted,
- &newFieldNamesSize,
- &sizeToBeAdded);
+ bucket->_calculateBucketFieldsAndSizeChange(
+ doc, options.getMetaField(), &newFieldNamesToBeInserted, &sizeToBeAdded);
}
auto batch = bucket->_activeBatch(getOpId(opCtx, combine), stats);
batch->_addMeasurement(doc);
- batch->_recordNewFields(std::move(newFieldNamesToBeInserted));
+ batch->_recordNewFields(bucket, std::move(newFieldNamesToBeInserted));
bucket->_numMeasurements++;
bucket->_size += sizeToBeAdded;
diff --git a/src/mongo/db/timeseries/bucket_catalog.h b/src/mongo/db/timeseries/bucket_catalog.h
index ad7d7161d1b..c133fea3864 100644
--- a/src/mongo/db/timeseries/bucket_catalog.h
+++ b/src/mongo/db/timeseries/bucket_catalog.h
@@ -181,7 +181,7 @@ public:
/**
* Records a set of new-to-the-bucket fields. Active batches only.
*/
- void _recordNewFields(NewFieldNames&& fields);
+ void _recordNewFields(Bucket* bucket, NewFieldNames&& fields);
/**
* Prepares the batch for commit. Sets min/max appropriately, records the number of