From 2fc1d7177e4b8c79c8e307d37cf901bed4bfadf8 Mon Sep 17 00:00:00 2001 From: Gregory Wlodarek Date: Thu, 16 Jun 2022 20:41:14 +0000 Subject: SERVER-66738 Improve bucketing performance for large time-series measurements --- .../timeseries/timeseries_bucket_limit_size.js | 124 --------------------- .../timeseries/timeseries_large_measurements.js | 22 ++-- 2 files changed, 14 insertions(+), 132 deletions(-) delete mode 100644 jstests/core/timeseries/timeseries_bucket_limit_size.js (limited to 'jstests/core/timeseries') diff --git a/jstests/core/timeseries/timeseries_bucket_limit_size.js b/jstests/core/timeseries/timeseries_bucket_limit_size.js deleted file mode 100644 index 09b4ba7d8d8..00000000000 --- a/jstests/core/timeseries/timeseries_bucket_limit_size.js +++ /dev/null @@ -1,124 +0,0 @@ -/** - * Tests maximum size of measurements held in each bucket in a time-series buckets collection. - * @tags: [ - * does_not_support_stepdowns, - * does_not_support_transactions, - * tenant_migration_incompatible, - * requires_fcv_61, - * ] - */ -(function() { -"use strict"; - -load("jstests/core/timeseries/libs/timeseries.js"); // For 'TimeseriesTest'. - -TimeseriesTest.run((insert) => { - const isTimeseriesBucketCompressionEnabled = - TimeseriesTest.timeseriesBucketCompressionEnabled(db); - const areTimeseriesScalabilityImprovementsEnabled = - TimeseriesTest.timeseriesScalabilityImprovementsEnabled(db); - - const collNamePrefix = 'timeseries_bucket_limit_size_'; - - const timeFieldName = 'time'; - - // Assumes each bucket has a limit of 125kB on the measurements stored in the 'data' field. - const bucketMaxSizeKB = 125; - const numDocs = 3; - - // The measurement data should not take up all of the 'bucketMaxSizeKB' limit because we need to - // leave room for the control.min and control.max summaries (two measurements worth of data). We - // need to fit two measurements within this limit to trigger compression if enabled. - const largeValue = 'x'.repeat(((bucketMaxSizeKB - 1) / 4) * 1024); - - const runTest = function(numDocsPerInsert) { - const coll = db.getCollection(collNamePrefix + numDocsPerInsert); - const bucketsColl = db.getCollection('system.buckets.' + coll.getName()); - coll.drop(); - - assert.commandWorked( - db.createCollection(coll.getName(), {timeseries: {timeField: timeFieldName}})); - assert.contains(bucketsColl.getName(), db.getCollectionNames()); - - let docs = []; - for (let i = 0; i < numDocs; i++) { - docs.push({_id: i, [timeFieldName]: ISODate(), x: largeValue}); - if ((i + 1) % numDocsPerInsert === 0) { - assert.commandWorked(insert(coll, docs), 'failed to insert docs: ' + tojson(docs)); - docs = []; - } - } - - // Check view. - const viewDocs = coll.find({}, {x: 1}).sort({_id: 1}).toArray(); - assert.eq(numDocs, viewDocs.length, viewDocs); - for (let i = 0; i < numDocs; i++) { - const viewDoc = viewDocs[i]; - assert.eq(i, viewDoc._id, 'unexpected _id in doc: ' + i + ': ' + tojson(viewDoc)); - assert.eq( - largeValue, viewDoc.x, 'unexpected field x in doc: ' + i + ': ' + tojson(viewDoc)); - } - - // Check bucket collection. - const bucketDocs = bucketsColl.find().sort({'control.min._id': 1}).toArray(); - assert.eq(2, bucketDocs.length, bucketDocs); - - // Check both buckets. - // First bucket should be full with two documents since we spill the third document over - // into the second bucket due to size constraints on 'data'. - assert.eq(0, - bucketDocs[0].control.min._id, - 'invalid control.min for _id in first bucket: ' + tojson(bucketDocs[0].control)); - assert.eq(largeValue, - bucketDocs[0].control.min.x, - 'invalid control.min for x in first bucket: ' + tojson(bucketDocs[0].control)); - assert.eq(1, - bucketDocs[0].control.max._id, - 'invalid control.max for _id in first bucket: ' + tojson(bucketDocs[0].control)); - assert.eq(largeValue, - bucketDocs[0].control.max.x, - 'invalid control.max for x in first bucket: ' + tojson(bucketDocs[0].control)); - assert.eq(isTimeseriesBucketCompressionEnabled ? 2 : 1, - bucketDocs[0].control.version, - 'unexpected control.version in first bucket: ' + tojson(bucketDocs)); - - if (areTimeseriesScalabilityImprovementsEnabled) { - assert.eq(true, - bucketDocs[0].control.closed, - 'unexpected control.closed in first bucket: ' + tojson(bucketDocs)); - } else { - assert(!bucketDocs[0].control.hasOwnProperty("closed"), - 'unexpected control.closed in first bucket: ' + tojson(bucketDocs)); - } - - // Second bucket should contain the remaining document. - assert.eq(numDocs - 1, - bucketDocs[1].control.min._id, - 'invalid control.min for _id in second bucket: ' + tojson(bucketDocs[1].control)); - assert.eq(largeValue, - bucketDocs[1].control.min.x, - 'invalid control.min for x in second bucket: ' + tojson(bucketDocs[1].control)); - assert.eq(numDocs - 1, - bucketDocs[1].control.max._id, - 'invalid control.max for _id in second bucket: ' + tojson(bucketDocs[1].control)); - assert.eq(largeValue, - bucketDocs[1].control.max.x, - 'invalid control.max for x in second bucket: ' + tojson(bucketDocs[1].control)); - assert.eq(1, - bucketDocs[1].control.version, - 'unexpected control.version in second bucket: ' + tojson(bucketDocs)); - - if (areTimeseriesScalabilityImprovementsEnabled) { - assert.eq(false, - bucketDocs[1].control.closed, - 'unexpected control.closed in second bucket: ' + tojson(bucketDocs)); - } else { - assert(!bucketDocs[1].control.hasOwnProperty("closed"), - 'unexpected control.closed in second bucket: ' + tojson(bucketDocs)); - } - }; - - runTest(1); - runTest(numDocs); -}); -})(); diff --git a/jstests/core/timeseries/timeseries_large_measurements.js b/jstests/core/timeseries/timeseries_large_measurements.js index 310c97b0d7a..409af5a80d3 100644 --- a/jstests/core/timeseries/timeseries_large_measurements.js +++ b/jstests/core/timeseries/timeseries_large_measurements.js @@ -5,6 +5,7 @@ * @tags: [ * does_not_support_stepdowns, * does_not_support_transactions, + * tenant_migration_incompatible, * requires_collstats, * requires_fcv_61, * ] @@ -13,6 +14,7 @@ "use strict"; const coll = db.getCollection(jsTestName()); +const bucketColl = db.getCollection("system.buckets." + jsTestName()); const timeFieldName = "time"; const resetCollection = (() => { @@ -33,15 +35,19 @@ const checkAverageBucketSize = (() => { jsTestLog("Average bucket size: " + averageBucketSize); assert.lte(averageBucketSize, timeseriesBucketMaxSize); + + const firstBucket = bucketColl.find().sort({'control.min._id': 1}).toArray()[0]; + assert.eq(0, firstBucket.control.min._id); + assert.eq(9, firstBucket.control.max._id); }); -// Each measurement inserted will consume roughly 1/5th of the bucket max size. In theory, we'll -// only be able to fit three measurements per bucket. The first measurement will also create the +// Each measurement inserted will consume roughly 1/12th of the bucket max size. In theory, we'll +// only be able to fit ten measurements per bucket. The first measurement will also create the // control.min and control.max summaries, which will account for two measurements worth of data. -// The second and third measurements will not modify the control.min and control.max fields to the -// same degree as they're going to insert the same-length values. The remaining ~5% of the bucket -// size is left for other internal fields that need to be written out. -const measurementValueLength = Math.floor(timeseriesBucketMaxSize * 0.19); +// The other measurements will not modify the control.min and control.max fields to the same degree +// as they're going to insert the same-length values. The remaining ~4% of the bucket size is left +// for other internal fields that need to be written out. +const measurementValueLength = Math.floor(timeseriesBucketMaxSize * 0.08); const numMeasurements = 100; @@ -49,7 +55,7 @@ jsTestLog("Testing single inserts"); resetCollection(); for (let i = 0; i < numMeasurements; i++) { - const doc = {[timeFieldName]: ISODate(), value: "a".repeat(measurementValueLength)}; + const doc = {_id: i, [timeFieldName]: ISODate(), value: "a".repeat(measurementValueLength)}; assert.commandWorked(coll.insert(doc)); } checkAverageBucketSize(); @@ -59,7 +65,7 @@ resetCollection(); let batch = []; for (let i = 0; i < numMeasurements; i++) { - const doc = {[timeFieldName]: ISODate(), value: "a".repeat(measurementValueLength)}; + const doc = {_id: i, [timeFieldName]: ISODate(), value: "a".repeat(measurementValueLength)}; batch.push(doc); } assert.commandWorked(coll.insertMany(batch)); -- cgit v1.2.1