summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Larkin-York <dan.larkin-york@mongodb.com>2022-06-24 12:16:36 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-24 12:41:01 +0000
commitbe7d9cd2d857f8179af2be6e274cd4ff16c27ce7 (patch)
tree94c70a4e72a10b359669b09aeb208b3512641e58
parent09546e0661b4730bd78bb64b330fd8f3ef155c25 (diff)
downloadmongo-be7d9cd2d857f8179af2be6e274cd4ff16c27ce7.tar.gz
SERVER-66727 Use full-precision timestamp for time-series bucketing decision
-rw-r--r--jstests/core/timeseries/timeseries_bucket_spanning_epoch.js88
-rw-r--r--src/mongo/db/timeseries/bucket_catalog.cpp14
-rw-r--r--src/mongo/db/timeseries/bucket_catalog.h11
3 files changed, 104 insertions, 9 deletions
diff --git a/jstests/core/timeseries/timeseries_bucket_spanning_epoch.js b/jstests/core/timeseries/timeseries_bucket_spanning_epoch.js
new file mode 100644
index 00000000000..7c5be1c0f3f
--- /dev/null
+++ b/jstests/core/timeseries/timeseries_bucket_spanning_epoch.js
@@ -0,0 +1,88 @@
+/**
+ * Tests correctness of time-series bucketing when measurements cross the Unix Epoch and other
+ * interesting boundaries.
+ *
+ * @tags: [
+ * requires_timeseries,
+ * does_not_support_transactions,
+ * # This test depends on certain writes ending up in the same bucket. Stepdowns may result in
+ * # writes splitting between two primaries, and thus different buckets.
+ * does_not_support_stepdowns,
+ * # Same goes for tenant migrations.
+ * tenant_migration_incompatible,
+ * requires_fcv_50,
+ * ]
+ */
+
+(function() {
+
+// Test that measurements spanning the Unix Epoch end up in the same bucket.
+(function testUnixEpoch() {
+ let coll = db.timeseries_bucket_spanning_epoch;
+ let bucketsColl = db.getCollection('system.buckets.' + coll.getName());
+ coll.drop();
+
+ assert.commandWorked(db.createCollection(
+ coll.getName(), {timeseries: {timeField: 't', metaField: 'm', granularity: 'hours'}}));
+
+ // All measurements land in the same bucket.
+ assert.commandWorked(coll.insert({m: 1, t: ISODate("1969-12-31T23:00:00.000Z")}));
+ assert.commandWorked(coll.insert({m: 1, t: ISODate("1970-01-01T01:00:00.000Z")}));
+ assert.eq(1, bucketsColl.find().itcount());
+})();
+
+// Test that measurements spanning multiples of the Unix Epoch width end up in the different buckets
+(function testUnixEpoch() {
+ let coll = db.timeseries_bucket_spanning_epoch;
+ let bucketsColl = db.getCollection('system.buckets.' + coll.getName());
+ coll.drop();
+
+ assert.commandWorked(db.createCollection(
+ coll.getName(), {timeseries: {timeField: 't', metaField: 'm', granularity: 'hours'}}));
+
+ // Measurements land in same buckets.
+ assert.commandWorked(coll.insert({m: 1, t: ISODate("1900-01-01T00:00:00.000Z")}));
+ assert.commandWorked(coll.insert({m: 1, t: ISODate("1900-01-01T01:00:00.000Z")}));
+ assert.eq(1, bucketsColl.find().itcount());
+
+ // Measurements land in different buckets.
+ assert.commandWorked(coll.insert({m: 1, t: ISODate("1970-01-01T01:00:00.000Z")}));
+ assert.eq(2, bucketsColl.find().itcount());
+
+ // Measurements land in different buckets.
+ assert.commandWorked(coll.insert({m: 1, t: ISODate("2110-01-01T00:00:00.000Z")}));
+ assert.commandWorked(coll.insert({m: 1, t: ISODate("2110-01-01T01:00:00.000Z")}));
+ assert.eq(3, bucketsColl.find().itcount());
+})();
+
+// Test that measurements with timestamps equivalent modulo 2^32 end up in the same bucket.
+(function testUnixEpochPlus32BitsOverflow() {
+ let coll = db.timeseries_bucket_spanning_epoch;
+ let bucketsColl = db.getCollection('system.buckets.' + coll.getName());
+ coll.drop();
+
+ assert.commandWorked(db.createCollection(
+ coll.getName(), {timeseries: {timeField: 't', metaField: 'm', granularity: 'hours'}}));
+
+ // Measurements land in different buckets.
+ assert.commandWorked(coll.insert({m: 2, t: ISODate("2106-07-02T06:28:16.000Z")}));
+ assert.commandWorked(coll.insert({m: 2, t: ISODate("1970-01-01T00:00:00.000Z")}));
+ assert.eq(2, bucketsColl.find().itcount());
+})();
+
+// Test that measurements with a difference of more than the maximum time span expressible in 32-bit
+// precision seconds-count cannot overflow to end up in the same bucket.
+(function testUnixEpochPlus32BitsAndSomeOverflow() {
+ let coll = db.timeseries_bucket_spanning_epoch;
+ let bucketsColl = db.getCollection('system.buckets.' + coll.getName());
+ coll.drop();
+
+ assert.commandWorked(db.createCollection(
+ coll.getName(), {timeseries: {timeField: 't', metaField: 'm', granularity: 'hours'}}));
+
+ // Measurements land in different buckets.
+ assert.commandWorked(coll.insert({m: 2, t: ISODate("2105-06-24T06:28:16Z")}));
+ assert.commandWorked(coll.insert({m: 2, t: ISODate("1969-05-18T00:00:00.000Z")}));
+ assert.eq(2, bucketsColl.find().itcount());
+})();
+})();
diff --git a/src/mongo/db/timeseries/bucket_catalog.cpp b/src/mongo/db/timeseries/bucket_catalog.cpp
index 461a90bb213..7093a63d0eb 100644
--- a/src/mongo/db/timeseries/bucket_catalog.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog.cpp
@@ -166,7 +166,7 @@ std::pair<OID, Date_t> generateBucketId(const Date_t& time, const TimeseriesOpti
// together into predictable chunks for sharding. This way we know from a measurement timestamp
// what the bucket timestamp will be, so we can route measurements to the right shard chunk.
auto roundedTime = timeseries::roundTimestampToGranularity(time, options.getGranularity());
- uint64_t const roundedSeconds = durationCount<Seconds>(roundedTime.toDurationSinceEpoch());
+ int64_t const roundedSeconds = durationCount<Seconds>(roundedTime.toDurationSinceEpoch());
bucketId.setTimestamp(roundedSeconds);
// Now, if we stopped here we could end up with bucket OID collisions. Consider the case where
@@ -265,7 +265,7 @@ StatusWith<std::shared_ptr<BucketCatalog::WriteBatch>> BucketCatalog::insert(
stats->numBucketsClosedDueToSize.fetchAndAddRelaxed(1);
return true;
}
- auto bucketTime = (*bucket).getTime();
+ auto bucketTime = (*bucket)->getTime();
if (time - bucketTime >= Seconds(*options.getBucketMaxSpanSeconds())) {
stats->numBucketsClosedDueToTimeForward.fetchAndAddRelaxed(1);
return true;
@@ -671,6 +671,8 @@ BucketCatalog::Bucket* BucketCatalog::_allocateBucket(const BucketKey& key,
stats->numBucketsOpenedDueToMetadata.fetchAndAddRelaxed(1);
}
+ bucket->_minTime = roundedTime;
+
// Make sure we set the control.min time field to match the rounded _id timestamp.
auto controlDoc = buildControlMinTimestampDoc(options.getTimeField(), roundedTime);
bucket->_minmax.update(
@@ -851,6 +853,10 @@ bool BucketCatalog::Bucket::allCommitted() const {
return _batches.empty() && !_preparedBatch;
}
+Date_t BucketCatalog::Bucket::getTime() const {
+ return _minTime;
+}
+
std::shared_ptr<BucketCatalog::WriteBatch> BucketCatalog::Bucket::_activeBatch(
OperationId opId, const std::shared_ptr<ExecutionStats>& stats) {
auto it = _batches.find(opId);
@@ -1126,10 +1132,6 @@ void BucketCatalog::BucketAccess::rollover(const std::function<bool(BucketAccess
}
}
-Date_t BucketCatalog::BucketAccess::getTime() const {
- return _bucket->id().asDateT();
-}
-
BucketCatalog::WriteBatch::WriteBatch(const OID& bucketId,
OperationId opId,
const std::shared_ptr<ExecutionStats>& stats)
diff --git a/src/mongo/db/timeseries/bucket_catalog.h b/src/mongo/db/timeseries/bucket_catalog.h
index f00475a0c71..59266a6d765 100644
--- a/src/mongo/db/timeseries/bucket_catalog.h
+++ b/src/mongo/db/timeseries/bucket_catalog.h
@@ -349,6 +349,11 @@ public:
*/
bool allCommitted() const;
+ /**
+ * Retrieve the time associated with the bucket
+ */
+ Date_t getTime() const;
+
private:
/**
* Determines the effect of adding 'doc' to this bucket. If adding 'doc' causes this bucket
@@ -393,6 +398,9 @@ public:
// Top-level field names of the measurements that have been inserted into the bucket.
StringSet _fieldNames;
+ // Minimum timestamp over contained measurements
+ Date_t _minTime;
+
// The minimum and maximum values for each field in the bucket.
timeseries::MinMax _minmax;
@@ -575,9 +583,6 @@ private:
*/
void rollover(const std::function<bool(BucketAccess*)>& isBucketFull);
- // Retrieve the time associated with the bucket (id)
- Date_t getTime() const;
-
private:
/**
* Returns the state of the bucket, or boost::none if there is no state for the bucket.