diff options
author | Dan Larkin-York <dan.larkin-york@mongodb.com> | 2022-06-24 12:16:36 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-24 12:41:01 +0000 |
commit | be7d9cd2d857f8179af2be6e274cd4ff16c27ce7 (patch) | |
tree | 94c70a4e72a10b359669b09aeb208b3512641e58 | |
parent | 09546e0661b4730bd78bb64b330fd8f3ef155c25 (diff) | |
download | mongo-be7d9cd2d857f8179af2be6e274cd4ff16c27ce7.tar.gz |
SERVER-66727 Use full-precision timestamp for time-series bucketing decision
-rw-r--r-- | jstests/core/timeseries/timeseries_bucket_spanning_epoch.js | 88 | ||||
-rw-r--r-- | src/mongo/db/timeseries/bucket_catalog.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/timeseries/bucket_catalog.h | 11 |
3 files changed, 104 insertions, 9 deletions
diff --git a/jstests/core/timeseries/timeseries_bucket_spanning_epoch.js b/jstests/core/timeseries/timeseries_bucket_spanning_epoch.js new file mode 100644 index 00000000000..7c5be1c0f3f --- /dev/null +++ b/jstests/core/timeseries/timeseries_bucket_spanning_epoch.js @@ -0,0 +1,88 @@ +/** + * Tests correctness of time-series bucketing when measurements cross the Unix Epoch and other + * interesting boundaries. + * + * @tags: [ + * requires_timeseries, + * does_not_support_transactions, + * # This test depends on certain writes ending up in the same bucket. Stepdowns may result in + * # writes splitting between two primaries, and thus different buckets. + * does_not_support_stepdowns, + * # Same goes for tenant migrations. + * tenant_migration_incompatible, + * requires_fcv_50, + * ] + */ + +(function() { + +// Test that measurements spanning the Unix Epoch end up in the same bucket. +(function testUnixEpoch() { + let coll = db.timeseries_bucket_spanning_epoch; + let bucketsColl = db.getCollection('system.buckets.' + coll.getName()); + coll.drop(); + + assert.commandWorked(db.createCollection( + coll.getName(), {timeseries: {timeField: 't', metaField: 'm', granularity: 'hours'}})); + + // All measurements land in the same bucket. + assert.commandWorked(coll.insert({m: 1, t: ISODate("1969-12-31T23:00:00.000Z")})); + assert.commandWorked(coll.insert({m: 1, t: ISODate("1970-01-01T01:00:00.000Z")})); + assert.eq(1, bucketsColl.find().itcount()); +})(); + +// Test that measurements spanning multiples of the Unix Epoch width end up in the different buckets +(function testUnixEpoch() { + let coll = db.timeseries_bucket_spanning_epoch; + let bucketsColl = db.getCollection('system.buckets.' + coll.getName()); + coll.drop(); + + assert.commandWorked(db.createCollection( + coll.getName(), {timeseries: {timeField: 't', metaField: 'm', granularity: 'hours'}})); + + // Measurements land in same buckets. + assert.commandWorked(coll.insert({m: 1, t: ISODate("1900-01-01T00:00:00.000Z")})); + assert.commandWorked(coll.insert({m: 1, t: ISODate("1900-01-01T01:00:00.000Z")})); + assert.eq(1, bucketsColl.find().itcount()); + + // Measurements land in different buckets. + assert.commandWorked(coll.insert({m: 1, t: ISODate("1970-01-01T01:00:00.000Z")})); + assert.eq(2, bucketsColl.find().itcount()); + + // Measurements land in different buckets. + assert.commandWorked(coll.insert({m: 1, t: ISODate("2110-01-01T00:00:00.000Z")})); + assert.commandWorked(coll.insert({m: 1, t: ISODate("2110-01-01T01:00:00.000Z")})); + assert.eq(3, bucketsColl.find().itcount()); +})(); + +// Test that measurements with timestamps equivalent modulo 2^32 end up in the same bucket. +(function testUnixEpochPlus32BitsOverflow() { + let coll = db.timeseries_bucket_spanning_epoch; + let bucketsColl = db.getCollection('system.buckets.' + coll.getName()); + coll.drop(); + + assert.commandWorked(db.createCollection( + coll.getName(), {timeseries: {timeField: 't', metaField: 'm', granularity: 'hours'}})); + + // Measurements land in different buckets. + assert.commandWorked(coll.insert({m: 2, t: ISODate("2106-07-02T06:28:16.000Z")})); + assert.commandWorked(coll.insert({m: 2, t: ISODate("1970-01-01T00:00:00.000Z")})); + assert.eq(2, bucketsColl.find().itcount()); +})(); + +// Test that measurements with a difference of more than the maximum time span expressible in 32-bit +// precision seconds-count cannot overflow to end up in the same bucket. +(function testUnixEpochPlus32BitsAndSomeOverflow() { + let coll = db.timeseries_bucket_spanning_epoch; + let bucketsColl = db.getCollection('system.buckets.' + coll.getName()); + coll.drop(); + + assert.commandWorked(db.createCollection( + coll.getName(), {timeseries: {timeField: 't', metaField: 'm', granularity: 'hours'}})); + + // Measurements land in different buckets. + assert.commandWorked(coll.insert({m: 2, t: ISODate("2105-06-24T06:28:16Z")})); + assert.commandWorked(coll.insert({m: 2, t: ISODate("1969-05-18T00:00:00.000Z")})); + assert.eq(2, bucketsColl.find().itcount()); +})(); +})(); diff --git a/src/mongo/db/timeseries/bucket_catalog.cpp b/src/mongo/db/timeseries/bucket_catalog.cpp index 461a90bb213..7093a63d0eb 100644 --- a/src/mongo/db/timeseries/bucket_catalog.cpp +++ b/src/mongo/db/timeseries/bucket_catalog.cpp @@ -166,7 +166,7 @@ std::pair<OID, Date_t> generateBucketId(const Date_t& time, const TimeseriesOpti // together into predictable chunks for sharding. This way we know from a measurement timestamp // what the bucket timestamp will be, so we can route measurements to the right shard chunk. auto roundedTime = timeseries::roundTimestampToGranularity(time, options.getGranularity()); - uint64_t const roundedSeconds = durationCount<Seconds>(roundedTime.toDurationSinceEpoch()); + int64_t const roundedSeconds = durationCount<Seconds>(roundedTime.toDurationSinceEpoch()); bucketId.setTimestamp(roundedSeconds); // Now, if we stopped here we could end up with bucket OID collisions. Consider the case where @@ -265,7 +265,7 @@ StatusWith<std::shared_ptr<BucketCatalog::WriteBatch>> BucketCatalog::insert( stats->numBucketsClosedDueToSize.fetchAndAddRelaxed(1); return true; } - auto bucketTime = (*bucket).getTime(); + auto bucketTime = (*bucket)->getTime(); if (time - bucketTime >= Seconds(*options.getBucketMaxSpanSeconds())) { stats->numBucketsClosedDueToTimeForward.fetchAndAddRelaxed(1); return true; @@ -671,6 +671,8 @@ BucketCatalog::Bucket* BucketCatalog::_allocateBucket(const BucketKey& key, stats->numBucketsOpenedDueToMetadata.fetchAndAddRelaxed(1); } + bucket->_minTime = roundedTime; + // Make sure we set the control.min time field to match the rounded _id timestamp. auto controlDoc = buildControlMinTimestampDoc(options.getTimeField(), roundedTime); bucket->_minmax.update( @@ -851,6 +853,10 @@ bool BucketCatalog::Bucket::allCommitted() const { return _batches.empty() && !_preparedBatch; } +Date_t BucketCatalog::Bucket::getTime() const { + return _minTime; +} + std::shared_ptr<BucketCatalog::WriteBatch> BucketCatalog::Bucket::_activeBatch( OperationId opId, const std::shared_ptr<ExecutionStats>& stats) { auto it = _batches.find(opId); @@ -1126,10 +1132,6 @@ void BucketCatalog::BucketAccess::rollover(const std::function<bool(BucketAccess } } -Date_t BucketCatalog::BucketAccess::getTime() const { - return _bucket->id().asDateT(); -} - BucketCatalog::WriteBatch::WriteBatch(const OID& bucketId, OperationId opId, const std::shared_ptr<ExecutionStats>& stats) diff --git a/src/mongo/db/timeseries/bucket_catalog.h b/src/mongo/db/timeseries/bucket_catalog.h index f00475a0c71..59266a6d765 100644 --- a/src/mongo/db/timeseries/bucket_catalog.h +++ b/src/mongo/db/timeseries/bucket_catalog.h @@ -349,6 +349,11 @@ public: */ bool allCommitted() const; + /** + * Retrieve the time associated with the bucket + */ + Date_t getTime() const; + private: /** * Determines the effect of adding 'doc' to this bucket. If adding 'doc' causes this bucket @@ -393,6 +398,9 @@ public: // Top-level field names of the measurements that have been inserted into the bucket. StringSet _fieldNames; + // Minimum timestamp over contained measurements + Date_t _minTime; + // The minimum and maximum values for each field in the bucket. timeseries::MinMax _minmax; @@ -575,9 +583,6 @@ private: */ void rollover(const std::function<bool(BucketAccess*)>& isBucketFull); - // Retrieve the time associated with the bucket (id) - Date_t getTime() const; - private: /** * Returns the state of the bucket, or boost::none if there is no state for the bucket. |