diff options
author | David Percy <david.percy@mongodb.com> | 2021-11-19 22:46:15 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-22 22:57:25 +0000 |
commit | a39a201c7c6991132494187f25e34151db1ef5c7 (patch) | |
tree | bc145e0a8540522a2fb42275497ab0a467e83cce | |
parent | a0ca381d51c085a0ba3299b2086ae7663cc29f45 (diff) | |
download | mongo-a39a201c7c6991132494187f25e34151db1ef5c7.tar.gz |
SERVER-61548 Handle dotted partitionByFields in $densify
-rw-r--r-- | jstests/aggregation/sources/densify/partitions.js | 91 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_densify.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_densify.h | 6 |
3 files changed, 107 insertions, 7 deletions
diff --git a/jstests/aggregation/sources/densify/partitions.js b/jstests/aggregation/sources/densify/partitions.js index ffb8d2df29d..8cadffc07b2 100644 --- a/jstests/aggregation/sources/densify/partitions.js +++ b/jstests/aggregation/sources/densify/partitions.js @@ -522,6 +522,94 @@ function singleDocumentTest() { assert(arrayEq(resultArray, testExpected), buildErrorString(resultArray, testExpected)); } +function testDottedField() { + coll.drop(); + const input = [ + { + "_id": 0, + "metadata": {"sensorId": 5578, "type": "temperature"}, + "timestamp": ISODate("2021-05-18T00:00:00.000Z"), + "temp": 12 + }, + { + "_id": 1, + "metadata": {"sensorId": 5578, "type": "temperature"}, + "timestamp": ISODate("2021-05-18T02:00:00.000Z"), + "temp": 14 + } + ]; + const pipeline = [{ + $densify: { + field: "timestamp", + // Dots are interpreted as path separators. + partitionByFields: ["metadata.sensorId"], + range: {step: 1, unit: "hour", bounds: "full"} + } + }]; + const expectedOutput = [ + { + "_id": 0, + "metadata": {"sensorId": 5578, "type": "temperature"}, + "timestamp": ISODate("2021-05-18T00:00:00Z"), + "temp": 12 + }, + { + // Because dotted fields are interpreted as paths, when we write to 'metadata.sensorId' + // it should create a nested document. + "metadata": {"sensorId": 5578}, + "timestamp": ISODate("2021-05-18T01:00:00Z"), + }, + { + "_id": 1, + "metadata": {"sensorId": 5578, "type": "temperature"}, + "timestamp": ISODate("2021-05-18T02:00:00Z"), + "temp": 14 + } + ]; + + assert.commandWorked(coll.insert(input)); + const actualOutput = coll.aggregate(pipeline).toArray(); + + assert(arrayEq(actualOutput, expectedOutput), {actualOutput, expectedOutput}); +} + +function testArrayTraversalDisallowed() { + const pipeline = [{ + $densify: { + field: "timestamp", + partitionByFields: ["metadata.sensorId"], + } + }]; + + let input = [ + { + "_id": 0, + // In this case, the dot in 'metadata.sensorId' traverses through an array, because + // the whole 'metadata' subdocument is an array. + "metadata": [{"sensorId": 5578, "type": "temperature"}], + "timestamp": ISODate("2021-05-18T00:00:00.000Z"), + "temp": 12 + }, + ]; + coll.drop(); + assert.commandWorked(coll.insert(input)); + assert.throws(() => coll.aggregate(pipeline)); + + input = [ + { + "_id": 0, + // In this case, the dot does not traverse an array. But the path evaluates to an array, + // so this is also an error. + "metadata": {"sensorId": [5578], "type": "temperature"}, + "timestamp": ISODate("2021-05-18T00:00:00.000Z"), + "temp": 12 + }, + ]; + coll.drop(); + assert.commandWorked(coll.insert(input)); + assert.throws(() => coll.aggregate(pipeline)); +} + testOne(); testTwo(); testThree(); @@ -543,4 +631,7 @@ fullTestTwoDates(); rangeTestTwoDates(); singleDocumentTest(); + +testDottedField(); +testArrayTraversalDisallowed(); })(); diff --git a/src/mongo/db/pipeline/document_source_densify.cpp b/src/mongo/db/pipeline/document_source_densify.cpp index 89bb2ad437f..11a4c973675 100644 --- a/src/mongo/db/pipeline/document_source_densify.cpp +++ b/src/mongo/db/pipeline/document_source_densify.cpp @@ -640,14 +640,19 @@ Value DocumentSourceInternalDensify::serialize( } void DocumentSourceInternalDensify::initializePartitionState(Document initialDoc) { + // Initialize _partitionExpr from _partitions. + // We check whether there is anything in _partitions during parsing. - std::vector<std::pair<std::string, boost::intrusive_ptr<mongo::Expression>>> partitionExp; - for (FieldPath p : _partitions) { - partitionExp.push_back({p.fullPath(), - ExpressionFieldPath::createPathFromString( - pExpCtx.get(), p.fullPath(), pExpCtx->variablesParseState)}); + tassert( + 6154800, "Expected at least one field when partitioning is enabled.", !_partitions.empty()); + + MutableDocument partitionExpr; + for (auto&& p : _partitions) { + partitionExpr.setNestedField(p.fullPath(), Value{"$"_sd + p.fullPath()}); } - _partitionExpr = ExpressionObject::create(pExpCtx.get(), std::move(partitionExp)); + _partitionExpr = ExpressionObject::parse( + pExpCtx.get(), partitionExpr.freeze().toBson(), pExpCtx->variablesParseState); + setPartitionValue(initialDoc); } diff --git a/src/mongo/db/pipeline/document_source_densify.h b/src/mongo/db/pipeline/document_source_densify.h index b4e6ff16a37..24c44b52922 100644 --- a/src/mongo/db/pipeline/document_source_densify.h +++ b/src/mongo/db/pipeline/document_source_densify.h @@ -542,7 +542,11 @@ private: boost::optional<DensifyValue> _globalMin = boost::none; boost::optional<DensifyValue> _globalMax = boost::none; - // Expression to be used to compare partitions. + // _partitionExpr has two purposes: + // 1. to determine which partition a document belongs in. + // 2. to initialize new documents with the right partition key. + // For example, if the stage had 'partitionByFields: ["a", "x.y"]' then this expression + // would be {a: "$a", {x: {y: "$x.y"}}}. boost::intrusive_ptr<ExpressionObject> _partitionExpr; bool _eof = false; |