summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/noPassthrough/timeseries_collStats.js25
-rw-r--r--src/mongo/bson/util/bsoncolumnbuilder.cpp5
-rw-r--r--src/mongo/bson/util/bsoncolumnbuilder.h6
-rw-r--r--src/mongo/db/commands/write_commands.cpp14
-rw-r--r--src/mongo/db/timeseries/bucket_compression.cpp14
-rw-r--r--src/mongo/db/timeseries/bucket_compression.h8
-rw-r--r--src/mongo/db/timeseries/timeseries_stats.cpp8
-rw-r--r--src/mongo/db/timeseries/timeseries_stats.h11
8 files changed, 78 insertions, 13 deletions
diff --git a/jstests/noPassthrough/timeseries_collStats.js b/jstests/noPassthrough/timeseries_collStats.js
index fde24660e27..f1343cbab96 100644
--- a/jstests/noPassthrough/timeseries_collStats.js
+++ b/jstests/noPassthrough/timeseries_collStats.js
@@ -51,6 +51,7 @@ const clearCollection = function() {
expectedStats.numMeasurementsCommitted = 0;
expectedStats.numCompressedBuckets = 0;
expectedStats.numUncompressedBuckets = 0;
+ expectedStats.numSubObjCompressionRestart = 0;
};
clearCollection();
@@ -150,6 +151,30 @@ if (isTimeseriesBucketCompressionEnabled) {
}
checkCollStats();
+// Assumes each bucket has a limit of 1000 measurements. We change the order twice of fields in the
+// subobj we are storing. Should be 2 'numSubObjCompressionRestart' if bucket compression is
+// enabled.
+docs = Array(500).fill({[timeFieldName]: ISODate(), [metaFieldName]: {a: 37}, x: {'a': 1, 'b': 1}});
+docs = docs.concat(
+ Array(1).fill({[timeFieldName]: ISODate(), [metaFieldName]: {a: 37}, x: {'b': 1, 'a': 1}}));
+docs = docs.concat(
+ Array(500).fill({[timeFieldName]: ISODate(), [metaFieldName]: {a: 37}, x: {'a': 1, 'b': 1}}));
+assert.commandWorked(coll.insert(docs, {ordered: false}));
+expectedStats.bucketCount += 2;
+expectedStats.numBucketInserts += 2;
+expectedStats.numBucketsOpenedDueToMetadata++;
+expectedStats.numBucketsClosedDueToCount++;
+expectedStats.numCommits += 2;
+expectedStats.numMeasurementsCommitted += 1001;
+expectedStats.avgNumMeasurementsPerCommit =
+ Math.floor(expectedStats.numMeasurementsCommitted / expectedStats.numCommits);
+if (isTimeseriesBucketCompressionEnabled) {
+ expectedStats.numCompressedBuckets++;
+ expectedStats.numSubObjCompressionRestart += 2;
+}
+
+checkCollStats();
+
// Assumes each bucket has a limit of 125kB on the measurements stored in the 'data' field.
const bucketMaxSizeKB = 125;
numDocs = 2;
diff --git a/src/mongo/bson/util/bsoncolumnbuilder.cpp b/src/mongo/bson/util/bsoncolumnbuilder.cpp
index c7fd747efa8..bedd270a12e 100644
--- a/src/mongo/bson/util/bsoncolumnbuilder.cpp
+++ b/src/mongo/bson/util/bsoncolumnbuilder.cpp
@@ -388,6 +388,10 @@ BufBuilder BSONColumnBuilder::detach() {
return std::move(_bufBuilder);
}
+int BSONColumnBuilder::numInterleavedStartWritten() const {
+ return _numInterleavedStartWritten;
+}
+
BSONColumnBuilder::EncodingState::EncodingState(
BufBuilder* bufBuilder, std::function<void(const char*, size_t)> controlBlockWriter)
: _simple8bBuilder64(_createBufferWriter()),
@@ -924,6 +928,7 @@ void BSONColumnBuilder::_finishDetermineSubObjReference() {
// Done determining reference sub-object. Write this control byte and object to stream.
_bufBuilder.appendChar(bsoncolumn::kInterleavedStartControlByte);
_bufBuilder.appendBuf(_referenceSubObj.objdata(), _referenceSubObj.objsize());
+ ++_numInterleavedStartWritten;
// Initialize all encoding states. We do this by traversing in lock-step between the reference
// object and first buffered element. We can use the fact if sub-element exists in reference to
diff --git a/src/mongo/bson/util/bsoncolumnbuilder.h b/src/mongo/bson/util/bsoncolumnbuilder.h
index 3348f512dde..28cea020ca5 100644
--- a/src/mongo/bson/util/bsoncolumnbuilder.h
+++ b/src/mongo/bson/util/bsoncolumnbuilder.h
@@ -85,6 +85,11 @@ public:
*/
BufBuilder detach();
+ /**
+ * Returns the number of interleaved start control bytes this BSONColumnBuilder has written.
+ */
+ int numInterleavedStartWritten() const;
+
private:
/**
* State for encoding scalar BSONElement as BSONColumn using delta or delta-of-delta
@@ -186,6 +191,7 @@ private:
Mode _mode = Mode::kRegular;
std::string _fieldName;
+ int _numInterleavedStartWritten = 0;
};
} // namespace mongo
diff --git a/src/mongo/db/commands/write_commands.cpp b/src/mongo/db/commands/write_commands.cpp
index 75ab791c5ba..fd50c346163 100644
--- a/src/mongo/db/commands/write_commands.cpp
+++ b/src/mongo/db/commands/write_commands.cpp
@@ -710,13 +710,16 @@ public:
}
boost::optional<int> beforeSize;
- boost::optional<int> afterSize;
+ boost::optional<TimeseriesStats::CompressedBucketInfo> compressionStats;
+
auto bucketCompressionFunc = [&](const BSONObj& bucketDoc) -> boost::optional<BSONObj> {
beforeSize = bucketDoc.objsize();
// Reset every time we run to ensure we never use a stale value
- afterSize = boost::none;
- auto compressed = timeseries::compressBucket(bucketDoc, closedBucket.timeField);
+ compressionStats = boost::none;
+ int numInterleavedRestarts = 0;
+ auto compressed = timeseries::compressBucket(
+ bucketDoc, closedBucket.timeField, &numInterleavedRestarts);
// If compressed object size is larger than uncompressed, skip compression update.
if (compressed && compressed->objsize() >= *beforeSize) {
LOGV2_DEBUG(5857802,
@@ -727,7 +730,8 @@ public:
"compressedSize"_attr = compressed->objsize());
return boost::none;
}
- afterSize = compressed->objsize();
+ compressionStats = TimeseriesStats::CompressedBucketInfo{compressed->objsize(),
+ numInterleavedRestarts};
return compressed;
};
@@ -743,7 +747,7 @@ public:
opCtx, compressionOp.getNamespace());
if (coll) {
const auto& stats = TimeseriesStats::get(coll.get());
- stats.onBucketClosed(*beforeSize, afterSize);
+ stats.onBucketClosed(*beforeSize, compressionStats);
}
}
diff --git a/src/mongo/db/timeseries/bucket_compression.cpp b/src/mongo/db/timeseries/bucket_compression.cpp
index 669a24be224..6267cc39314 100644
--- a/src/mongo/db/timeseries/bucket_compression.cpp
+++ b/src/mongo/db/timeseries/bucket_compression.cpp
@@ -41,13 +41,19 @@ namespace mongo {
namespace timeseries {
-boost::optional<BSONObj> compressBucket(const BSONObj& bucketDoc, StringData timeFieldName) try {
+boost::optional<BSONObj> compressBucket(const BSONObj& bucketDoc,
+ StringData timeFieldName,
+ int* numInterleavedRestarts) try {
// Helper for uncompressed measurements
struct Measurement {
BSONElement timeField;
std::vector<BSONElement> dataFields;
};
+ if (numInterleavedRestarts) {
+ *numInterleavedRestarts = 0;
+ }
+
BSONObjBuilder builder; // builder to build the compressed bucket
std::vector<Measurement> measurements; // Extracted measurements from uncompressed bucket
boost::optional<BSONObjIterator> time; // Iterator to read time fields from uncompressed bucket
@@ -200,6 +206,12 @@ boost::optional<BSONObj> compressBucket(const BSONObj& bucketDoc, StringData tim
}
}
dataBuilder.append(column.fieldName(), column.finalize());
+ // We only record when the interleaved mode has to re-start. i.e. when more than one
+ // interleaved start control byte was written in the binary
+ if (int interleavedStarts = column.numInterleavedStartWritten();
+ numInterleavedRestarts && interleavedStarts > 1) {
+ *numInterleavedRestarts += interleavedStarts - 1;
+ }
columnBuffer = column.detach();
}
}
diff --git a/src/mongo/db/timeseries/bucket_compression.h b/src/mongo/db/timeseries/bucket_compression.h
index f28360bf786..b7a21d5f7a2 100644
--- a/src/mongo/db/timeseries/bucket_compression.h
+++ b/src/mongo/db/timeseries/bucket_compression.h
@@ -40,11 +40,15 @@ namespace timeseries {
/**
* Returns a compressed timeseries bucket in v2 format for a given uncompressed v1 bucket and time
- * field. The compressed bucket will have all measurements sorted by time.
+ * field. The compressed bucket will have all measurements sorted by time. 'numInterleavedRestarts'
+ * may be provided to get how many times, in excess of one, subobject compression was started when
+ * compressing buckets. Useful for statistics.
*
* If bucket compression is not possible for any reason, boost::none is returned.
*/
-boost::optional<BSONObj> compressBucket(const BSONObj& bucketDoc, StringData timeFieldName);
+boost::optional<BSONObj> compressBucket(const BSONObj& bucketDoc,
+ StringData timeFieldName,
+ int* numInterleavedRestarts = nullptr);
} // namespace timeseries
} // namespace mongo
diff --git a/src/mongo/db/timeseries/timeseries_stats.cpp b/src/mongo/db/timeseries/timeseries_stats.cpp
index 33cafb386ff..4333bf39a7d 100644
--- a/src/mongo/db/timeseries/timeseries_stats.cpp
+++ b/src/mongo/db/timeseries/timeseries_stats.cpp
@@ -44,10 +44,11 @@ const TimeseriesStats& TimeseriesStats::get(const Collection* coll) {
}
void TimeseriesStats::onBucketClosed(int uncompressedSize,
- boost::optional<int> compressedSize) const {
+ boost::optional<CompressedBucketInfo> compressed) const {
_uncompressedSize.fetchAndAddRelaxed(uncompressedSize);
- if (compressedSize) {
- _compressedSize.fetchAndAddRelaxed(*compressedSize);
+ if (compressed) {
+ _compressedSize.fetchAndAddRelaxed(compressed->size);
+ _compressedSubObjRestart.fetchAndAddRelaxed(compressed->numInterleaveRestarts);
_numCompressedBuckets.fetchAndAddRelaxed(1);
} else {
_compressedSize.fetchAndAddRelaxed(uncompressedSize);
@@ -58,6 +59,7 @@ void TimeseriesStats::onBucketClosed(int uncompressedSize,
void TimeseriesStats::append(BSONObjBuilder* builder) const {
builder->appendNumber("numBytesUncompressed", _uncompressedSize.load());
builder->appendNumber("numBytesCompressed", _compressedSize.load());
+ builder->appendNumber("numSubObjCompressionRestart", _compressedSubObjRestart.load());
builder->appendNumber("numCompressedBuckets", _numCompressedBuckets.load());
builder->appendNumber("numUncompressedBuckets", _numUncompressedBuckets.load());
}
diff --git a/src/mongo/db/timeseries/timeseries_stats.h b/src/mongo/db/timeseries/timeseries_stats.h
index c7870f34d35..3219e793add 100644
--- a/src/mongo/db/timeseries/timeseries_stats.h
+++ b/src/mongo/db/timeseries/timeseries_stats.h
@@ -44,11 +44,17 @@ class TimeseriesStats {
public:
static const TimeseriesStats& get(const Collection* coll);
+ struct CompressedBucketInfo {
+ int size = 0;
+ int numInterleaveRestarts = 0;
+ };
+
/**
- * Records stats for a closed time-series bucket. 'boost::none' for compressedSize means
+ * Records stats for a closed time-series bucket. 'boost::none' for compressed means
* compression failed for any reason.
*/
- void onBucketClosed(int uncompressedSize, boost::optional<int> compressedSize) const;
+ void onBucketClosed(int uncompressedSize,
+ boost::optional<CompressedBucketInfo> compressed) const;
/**
* Appends current stats to the given BSONObjBuilder.
@@ -60,6 +66,7 @@ private:
// non-const Collection (which requires MODE_X collection lock).
mutable AtomicWord<long long> _uncompressedSize;
mutable AtomicWord<long long> _compressedSize;
+ mutable AtomicWord<long long> _compressedSubObjRestart;
mutable AtomicWord<long long> _numCompressedBuckets;
mutable AtomicWord<long long> _numUncompressedBuckets;
};