summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2021-11-19 22:46:15 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-11-22 22:57:25 +0000
commita39a201c7c6991132494187f25e34151db1ef5c7 (patch)
treebc145e0a8540522a2fb42275497ab0a467e83cce
parenta0ca381d51c085a0ba3299b2086ae7663cc29f45 (diff)
downloadmongo-a39a201c7c6991132494187f25e34151db1ef5c7.tar.gz
SERVER-61548 Handle dotted partitionByFields in $densify
-rw-r--r--jstests/aggregation/sources/densify/partitions.js91
-rw-r--r--src/mongo/db/pipeline/document_source_densify.cpp17
-rw-r--r--src/mongo/db/pipeline/document_source_densify.h6
3 files changed, 107 insertions, 7 deletions
diff --git a/jstests/aggregation/sources/densify/partitions.js b/jstests/aggregation/sources/densify/partitions.js
index ffb8d2df29d..8cadffc07b2 100644
--- a/jstests/aggregation/sources/densify/partitions.js
+++ b/jstests/aggregation/sources/densify/partitions.js
@@ -522,6 +522,94 @@ function singleDocumentTest() {
assert(arrayEq(resultArray, testExpected), buildErrorString(resultArray, testExpected));
}
+function testDottedField() {
+ coll.drop();
+ const input = [
+ {
+ "_id": 0,
+ "metadata": {"sensorId": 5578, "type": "temperature"},
+ "timestamp": ISODate("2021-05-18T00:00:00.000Z"),
+ "temp": 12
+ },
+ {
+ "_id": 1,
+ "metadata": {"sensorId": 5578, "type": "temperature"},
+ "timestamp": ISODate("2021-05-18T02:00:00.000Z"),
+ "temp": 14
+ }
+ ];
+ const pipeline = [{
+ $densify: {
+ field: "timestamp",
+ // Dots are interpreted as path separators.
+ partitionByFields: ["metadata.sensorId"],
+ range: {step: 1, unit: "hour", bounds: "full"}
+ }
+ }];
+ const expectedOutput = [
+ {
+ "_id": 0,
+ "metadata": {"sensorId": 5578, "type": "temperature"},
+ "timestamp": ISODate("2021-05-18T00:00:00Z"),
+ "temp": 12
+ },
+ {
+ // Because dotted fields are interpreted as paths, when we write to 'metadata.sensorId'
+ // it should create a nested document.
+ "metadata": {"sensorId": 5578},
+ "timestamp": ISODate("2021-05-18T01:00:00Z"),
+ },
+ {
+ "_id": 1,
+ "metadata": {"sensorId": 5578, "type": "temperature"},
+ "timestamp": ISODate("2021-05-18T02:00:00Z"),
+ "temp": 14
+ }
+ ];
+
+ assert.commandWorked(coll.insert(input));
+ const actualOutput = coll.aggregate(pipeline).toArray();
+
+ assert(arrayEq(actualOutput, expectedOutput), {actualOutput, expectedOutput});
+}
+
+function testArrayTraversalDisallowed() {
+ const pipeline = [{
+ $densify: {
+ field: "timestamp",
+ partitionByFields: ["metadata.sensorId"],
+ }
+ }];
+
+ let input = [
+ {
+ "_id": 0,
+ // In this case, the dot in 'metadata.sensorId' traverses through an array, because
+ // the whole 'metadata' subdocument is an array.
+ "metadata": [{"sensorId": 5578, "type": "temperature"}],
+ "timestamp": ISODate("2021-05-18T00:00:00.000Z"),
+ "temp": 12
+ },
+ ];
+ coll.drop();
+ assert.commandWorked(coll.insert(input));
+ assert.throws(() => coll.aggregate(pipeline));
+
+ input = [
+ {
+ "_id": 0,
+ // In this case, the dot does not traverse an array. But the path evaluates to an array,
+ // so this is also an error.
+ "metadata": {"sensorId": [5578], "type": "temperature"},
+ "timestamp": ISODate("2021-05-18T00:00:00.000Z"),
+ "temp": 12
+ },
+ ];
+ coll.drop();
+ assert.commandWorked(coll.insert(input));
+ assert.throws(() => coll.aggregate(pipeline));
+}
+
testOne();
testTwo();
testThree();
@@ -543,4 +631,7 @@ fullTestTwoDates();
rangeTestTwoDates();
singleDocumentTest();
+
+testDottedField();
+testArrayTraversalDisallowed();
})();
diff --git a/src/mongo/db/pipeline/document_source_densify.cpp b/src/mongo/db/pipeline/document_source_densify.cpp
index 89bb2ad437f..11a4c973675 100644
--- a/src/mongo/db/pipeline/document_source_densify.cpp
+++ b/src/mongo/db/pipeline/document_source_densify.cpp
@@ -640,14 +640,19 @@ Value DocumentSourceInternalDensify::serialize(
}
void DocumentSourceInternalDensify::initializePartitionState(Document initialDoc) {
+ // Initialize _partitionExpr from _partitions.
+
// We check whether there is anything in _partitions during parsing.
- std::vector<std::pair<std::string, boost::intrusive_ptr<mongo::Expression>>> partitionExp;
- for (FieldPath p : _partitions) {
- partitionExp.push_back({p.fullPath(),
- ExpressionFieldPath::createPathFromString(
- pExpCtx.get(), p.fullPath(), pExpCtx->variablesParseState)});
+ tassert(
+ 6154800, "Expected at least one field when partitioning is enabled.", !_partitions.empty());
+
+ MutableDocument partitionExpr;
+ for (auto&& p : _partitions) {
+ partitionExpr.setNestedField(p.fullPath(), Value{"$"_sd + p.fullPath()});
}
- _partitionExpr = ExpressionObject::create(pExpCtx.get(), std::move(partitionExp));
+ _partitionExpr = ExpressionObject::parse(
+ pExpCtx.get(), partitionExpr.freeze().toBson(), pExpCtx->variablesParseState);
+
setPartitionValue(initialDoc);
}
diff --git a/src/mongo/db/pipeline/document_source_densify.h b/src/mongo/db/pipeline/document_source_densify.h
index b4e6ff16a37..24c44b52922 100644
--- a/src/mongo/db/pipeline/document_source_densify.h
+++ b/src/mongo/db/pipeline/document_source_densify.h
@@ -542,7 +542,11 @@ private:
boost::optional<DensifyValue> _globalMin = boost::none;
boost::optional<DensifyValue> _globalMax = boost::none;
- // Expression to be used to compare partitions.
+ // _partitionExpr has two purposes:
+ // 1. to determine which partition a document belongs in.
+ // 2. to initialize new documents with the right partition key.
+ // For example, if the stage had 'partitionByFields: ["a", "x.y"]' then this expression
+ // would be {a: "$a", {x: {y: "$x.y"}}}.
boost::intrusive_ptr<ExpressionObject> _partitionExpr;
bool _eof = false;