diff options
author | Justin Seyster <justin.seyster@mongodb.com> | 2019-12-13 01:35:54 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-12-13 01:35:54 +0000 |
commit | 3a62fec23a50653994e01d1b1725d80a10fc208d (patch) | |
tree | 6eda6709522577d39a5d82a3afe25c7feb252e70 | |
parent | 07ba9da4013e908e7a9e37c4a7f482eddd9c3edc (diff) | |
download | mongo-3a62fec23a50653994e01d1b1725d80a10fc208d.tar.gz |
SERVER-43669 Serialize "sortKey" as BSONArray
26 files changed, 254 insertions, 73 deletions
diff --git a/jstests/core/return_key.js b/jstests/core/return_key.js index ddcbaf9d3b5..bb91a810223 100644 --- a/jstests/core/return_key.js +++ b/jstests/core/return_key.js @@ -64,11 +64,11 @@ assert(isIndexOnly(db, explain.queryPlanner.winningPlan)); // Unlike other projections, sortKey meta-projection can co-exist with returnKey. results = coll.find({}, {c: {$meta: 'sortKey'}}).hint({a: 1}).sort({a: -1}).returnKey().toArray(); -assert.eq(results, [{a: 3, c: {'': 3}}, {a: 2, c: {'': 2}}, {a: 1, c: {'': 1}}]); +assert.eq(results, [{a: 3, c: [3]}, {a: 2, c: [2]}, {a: 1, c: [1]}]); // returnKey with sortKey $meta where there is an in-memory sort. results = coll.find({}, {c: {$meta: 'sortKey'}}).hint({a: 1}).sort({b: 1}).returnKey().toArray(); -assert.eq(results, [{a: 3, c: {'': 1}}, {a: 2, c: {'': 2}}, {a: 1, c: {'': 3}}]); +assert.eq(results, [{a: 3, c: [1]}, {a: 2, c: [2]}, {a: 1, c: [3]}]); // returnKey with multiple sortKey $meta projections. results = coll.find({}, {c: {$meta: 'sortKey'}, d: {$meta: 'sortKey'}}) @@ -76,14 +76,10 @@ results = coll.find({}, {c: {$meta: 'sortKey'}, d: {$meta: 'sortKey'}}) .sort({b: 1}) .returnKey() .toArray(); -assert.eq(results, [ - {a: 3, c: {'': 1}, d: {'': 1}}, - {a: 2, c: {'': 2}, d: {'': 2}}, - {a: 1, c: {'': 3}, d: {'': 3}} -]); +assert.eq(results, [{a: 3, c: [1], d: [1]}, {a: 2, c: [2], d: [2]}, {a: 1, c: [3], d: [3]}]); // returnKey with a sortKey $meta projection on a nested field. results = coll.find({}, {"c.d": {$meta: "sortKey"}}).hint({a: 1}).sort({b: 1}).returnKey().toArray(); -assert.eq(results, [{a: 3, c: {d: {'': 1}}}, {a: 2, c: {d: {'': 2}}}, {a: 1, c: {d: {'': 3}}}]); +assert.eq(results, [{a: 3, c: {d: [1]}}, {a: 2, c: {d: [2]}}, {a: 1, c: {d: [3]}}]); })(); diff --git a/jstests/core/sortl.js b/jstests/core/sortl.js index 3d1a4adcd12..014f578f70d 100644 --- a/jstests/core/sortl.js +++ b/jstests/core/sortl.js @@ -1,5 +1,10 @@ // Tests equality query on _id with a sort, intended to be tested on both mongos and mongod. For // SERVER-20641. +// +// Always run on a fully upgraded cluster, so that {$meta: "sortKey"} projections use the newest +// sort key format. +// @tags: [requires_fcv_44] + (function() { 'use strict'; var coll = db.sortl; @@ -11,7 +16,7 @@ assert.eq(res.next(), {_id: 1, a: 2}); assert.eq(res.hasNext(), false); res = coll.find({_id: 1}, {b: {$meta: "sortKey"}}).sort({a: 1}); -assert.eq(res.next(), {_id: 1, a: 2, b: {"": 2}}); +assert.eq(res.next(), {_id: 1, a: 2, b: [2]}); assert.eq(res.hasNext(), false); res = db.runCommand({ diff --git a/jstests/sharding/find_getmore_cmd.js b/jstests/sharding/find_getmore_cmd.js index 3591d938e07..957a2408c3c 100644 --- a/jstests/sharding/find_getmore_cmd.js +++ b/jstests/sharding/find_getmore_cmd.js @@ -1,5 +1,9 @@ /** * Test issuing raw find and getMore commands to mongos using db.runCommand(). + * + * Always run on a fully upgraded cluster, so that {$meta: "sortKey"} projections use the newest + * sort key format. + * @tags: [requires_fcv_44] */ (function() { "use strict"; @@ -153,12 +157,12 @@ assert.commandWorked(cmdRes); assert.eq(cmdRes.cursor.id, NumberLong(0)); assert.eq(cmdRes.cursor.ns, coll.getFullName()); assert.eq(cmdRes.cursor.firstBatch.length, 6); -assert.eq(cmdRes.cursor.firstBatch[0], {key: {"": -9}}); -assert.eq(cmdRes.cursor.firstBatch[1], {key: {"": -5}}); -assert.eq(cmdRes.cursor.firstBatch[2], {key: {"": -1}}); -assert.eq(cmdRes.cursor.firstBatch[3], {key: {"": 1}}); -assert.eq(cmdRes.cursor.firstBatch[4], {key: {"": 5}}); -assert.eq(cmdRes.cursor.firstBatch[5], {key: {"": 9}}); +assert.eq(cmdRes.cursor.firstBatch[0], {key: [-9]}); +assert.eq(cmdRes.cursor.firstBatch[1], {key: [-5]}); +assert.eq(cmdRes.cursor.firstBatch[2], {key: [-1]}); +assert.eq(cmdRes.cursor.firstBatch[3], {key: [1]}); +assert.eq(cmdRes.cursor.firstBatch[4], {key: [5]}); +assert.eq(cmdRes.cursor.firstBatch[5], {key: [9]}); st.stop(); })(); diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index 7102a2c8840..8c1e299fdb1 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -115,6 +115,17 @@ boost::intrusive_ptr<ExpressionContext> makeExpressionContext( boost::none // uuid ); expCtx->tempDir = storageGlobalParams.dbpath + "/_tmp"; + // TODO (SERVER-43361): We will not need to set 'sortKeyFormat' after branching for 4.5. + auto assumeInternalClient = !opCtx->getClient()->session() || + (opCtx->getClient()->session()->getTags() & transport::Session::kInternalClient); + if (assumeInternalClient) { + expCtx->sortKeyFormat = + queryRequest.use44SortKeys() ? SortKeyFormat::k44SortKey : SortKeyFormat::k42SortKey; + } else { + // The client is not a mongoS, so we will not need to use an older sort key format to + // support it. Use default value for the ExpressionContext's 'sortKeyFormat' member + // variable, which is the newest format. + } return expCtx; } diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index 687e5e32de8..d0129eaba5c 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -295,8 +295,10 @@ public: while (!FindCommon::enoughForGetMore(request.batchSize.value_or(0), *numResults) && PlanExecutor::ADVANCED == (*state = exec->getNext(&doc, nullptr))) { auto* expCtx = exec->getExpCtx().get(); + // Note that "needsMerge" implies a find or aggregate operation, which should + // always have a non-NULL 'expCtx' value. BSONObj obj = cursor->needsMerge() - ? doc.toBsonWithMetaData(expCtx ? expCtx->use42ChangeStreamSortKeys : false) + ? doc.toBsonWithMetaData(expCtx->sortKeyFormat) : doc.toBson(); // If adding this object will cause us to exceed the message size limit, then we diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp index 8569562dd1d..283e5516991 100644 --- a/src/mongo/db/commands/run_aggregate.cpp +++ b/src/mongo/db/commands/run_aggregate.cpp @@ -214,9 +214,8 @@ bool handleCursorCommand(OperationContext* opCtx, // for later. auto* expCtx = exec->getExpCtx().get(); - BSONObj next = expCtx->needsMerge - ? nextDoc.toBsonWithMetaData(expCtx ? expCtx->use42ChangeStreamSortKeys : false) - : nextDoc.toBson(); + BSONObj next = expCtx->needsMerge ? nextDoc.toBsonWithMetaData(expCtx->sortKeyFormat) + : nextDoc.toBson(); if (!FindCommon::haveSpaceForNext(next, objCount, responseBuilder.bytesUsed())) { exec->enqueue(nextDoc); stashedResult = true; @@ -618,6 +617,30 @@ Status runAggregate(OperationContext* opCtx, invariant(collatorToUse); expCtx = makeExpressionContext(opCtx, request, std::move(*collatorToUse), uuid); + // If this is a shard server, we need to use the correct sort key format for the mongoS that + // generated this query. We determine the version by checking for the "use44SortKeys" flag + // in the aggregation request. + // TODO (SERVER-43361): This check will be unnecessary after branching for 4.5. + if (expCtx->fromMongos) { + if (request.getUse44SortKeys()) { + // This request originated with 4.4-or-newer mongoS, which can understand the new + // sort key format. Note: it's possible that merging will actually occur on a mongoD + // (for pipelines that merge on a shard), but if the mongoS is 4.4 or newer, all + // shard servers must also be 4.4 or newer. + expCtx->sortKeyFormat = SortKeyFormat::k44SortKey; + } else { + // This request originated with an older mongoS that will not understand the new + // sort key format. We must use the older format, which differs depending on whether + // or not the pipeline is a change stream. Non-$changeStream pipelines may still + // merge on a mongoD, but a 4.4 mongoD can still understand the 4.2 sort key format. + expCtx->sortKeyFormat = liteParsedPipeline.hasChangeStream() + ? SortKeyFormat::k42ChangeStreamSortKey + : SortKeyFormat::k42SortKey; + } + } else { + // Use default value for the ExpressionContext's 'sortKeyFormat' member variable. + } + auto pipeline = uassertStatusOK(Pipeline::parse(request.getPipeline(), expCtx)); // Check that the view's collation matches the collation of any views involved in the diff --git a/src/mongo/db/exec/document_value/document.cpp b/src/mongo/db/exec/document_value/document.cpp index 17c18ba3c79..7400e88058d 100644 --- a/src/mongo/db/exec/document_value/document.cpp +++ b/src/mongo/db/exec/document_value/document.cpp @@ -470,7 +470,7 @@ constexpr StringData Document::metaFieldGeoNearPoint; constexpr StringData Document::metaFieldSearchScore; constexpr StringData Document::metaFieldSearchHighlights; -BSONObj Document::toBsonWithMetaData(bool use42ChangeStreamSortKeys) const { +BSONObj Document::toBsonWithMetaData(SortKeyFormat sortKeyFormat) const { BSONObjBuilder bb; toBson(&bb); if (!metadata()) { @@ -482,17 +482,21 @@ BSONObj Document::toBsonWithMetaData(bool use42ChangeStreamSortKeys) const { if (metadata().hasRandVal()) bb.append(metaFieldRandVal, metadata().getRandVal()); if (metadata().hasSortKey()) { - if (use42ChangeStreamSortKeys) { - // TODO (SERVER-43361): In 4.2 and earlier, the "sort key" for a change stream document - // gets serialized differently than sort keys for normal pipeline documents. Once we no - // longer need to support that format, we can remove the 'use42ChangeStreamSortKeys' - // flag and this special case along with it. - invariant(metadata().isSingleElementKey()); - metadata().getSortKey().addToBsonObj(&bb, metaFieldSortKey); - } else { - bb.append(metaFieldSortKey, - DocumentMetadataFields::serializeSortKey(metadata().isSingleElementKey(), - metadata().getSortKey())); + switch (sortKeyFormat) { + case SortKeyFormat::k42ChangeStreamSortKey: + invariant(metadata().isSingleElementKey()); + metadata().getSortKey().addToBsonObj(&bb, metaFieldSortKey); + break; + case SortKeyFormat::k42SortKey: + bb.append(metaFieldSortKey, + DocumentMetadataFields::serializeSortKeyAsObject( + metadata().isSingleElementKey(), metadata().getSortKey())); + break; + case SortKeyFormat::k44SortKey: + bb.append(metaFieldSortKey, + DocumentMetadataFields::serializeSortKeyAsArray( + metadata().isSingleElementKey(), metadata().getSortKey())); + break; } } if (metadata().hasGeoNearDistance()) diff --git a/src/mongo/db/exec/document_value/document.h b/src/mongo/db/exec/document_value/document.h index 9e03aa09475..5b9d953b287 100644 --- a/src/mongo/db/exec/document_value/document.h +++ b/src/mongo/db/exec/document_value/document.h @@ -59,6 +59,26 @@ class MutableDocument; */ class Position; +/** + * "Sort keys" are stored in memory as a Value with Array type (with an exception for singleton sort + * keys). When serializing a sort key for storage in document metadata or as part of a + * {$meta: "sortKey"} projection, there are three possible storage formats: + */ +enum class SortKeyFormat { + // We expect the in-memory sort key to have one object, which has the format: + // {_data: ..., _typeBits:...}. This same object becomes the serialized sort key. This format + // exists for compatibility with 4.2 and will be deleted in 4.6 (SERVER-43361). + k42ChangeStreamSortKey, + + // A sort key with values "a" and "b" would get translated to an object that looks like: + // {"": "a", "": "b"}. Also scheduled for deletion in 4.6. + k42SortKey, + + // A sort key with values "a" and "b" would get translated to an array that looks like: + // ["a", "b"]. + k44SortKey, +}; + /** A Document is similar to a BSONObj but with a different in-memory representation. * * A Document can be treated as a const std::map<std::string, const Value> that is @@ -239,14 +259,16 @@ public: boost::optional<BSONObj> toBsonIfTriviallyConvertible() const; /** - * Like the 'toBson()' method, but includes metadata at the top-level. When - * 'use42ChangeStreamSortKeys' is true, we assume that any Value in the "sortKey" metadata - * represents the resume token, which gets assigned directly to the "$sortKey" field. Otherwise, - * the "$sortKey" field gets assigned using DocumentMetadataFields::serializeSortKey(). Output - * is parseable by the 'fromBsonWithMetaData()' method. Note that parsing is able to infer the - * value of 'use42ChangeStreamSortKeys' from the format of the '$sortKey' field. - */ - BSONObj toBsonWithMetaData(bool use42ChangeStreamSortKeys = false) const; + * Like the 'toBson()' method, but includes metadata at the top-level. When the metadata + * includes a sort key, the 'sortKeyFormat' parameter controls how it gets converted from its + * in-memory representation as a Value to its serialized representation as either a BSONObj or + * BSONArray. The possible formats are described in the comment above the 'SortKeyFormat' enum. + * + * Note that the 'fromBsonWithMetaData()' function does not need a corresponding 'sortKeyFormat' + * parameter, because sort key deserialization is able to infer the sort key format based on the + * layout of the object. + */ + BSONObj toBsonWithMetaData(SortKeyFormat sortKeyFormat) const; /** * Like Document(BSONObj) but treats top-level fields with special names as metadata. diff --git a/src/mongo/db/exec/document_value/document_metadata_fields.cpp b/src/mongo/db/exec/document_value/document_metadata_fields.cpp index c7c2e4d202d..61438cd2d2d 100644 --- a/src/mongo/db/exec/document_value/document_metadata_fields.cpp +++ b/src/mongo/db/exec/document_value/document_metadata_fields.cpp @@ -207,7 +207,8 @@ void DocumentMetadataFields::deserializeForSorter(BufReader& buf, DocumentMetada } } -BSONObj DocumentMetadataFields::serializeSortKey(bool isSingleElementKey, const Value& value) { +BSONObj DocumentMetadataFields::serializeSortKeyAsObject(bool isSingleElementKey, + const Value& value) { // Missing values don't serialize correctly in this format, so use nulls instead, since they are // considered equivalent with woCompare(). if (isSingleElementKey) { @@ -221,6 +222,21 @@ BSONObj DocumentMetadataFields::serializeSortKey(bool isSingleElementKey, const return bb.obj(); } +BSONArray DocumentMetadataFields::serializeSortKeyAsArray(bool isSingleElementKey, + const Value& value) { + // Missing values don't serialize correctly in this format, so use nulls instead, since they are + // considered equivalent with woCompare(). + if (isSingleElementKey) { + return BSON_ARRAY(missingToNull(value)); + } + invariant(value.isArray()); + BSONArrayBuilder bb; + for (auto&& val : value.getArray()) { + bb << missingToNull(val); + } + return bb.arr(); +} + Value DocumentMetadataFields::deserializeSortKey(bool isSingleElementKey, const BSONObj& bsonSortKey) { std::vector<Value> keys; diff --git a/src/mongo/db/exec/document_value/document_metadata_fields.h b/src/mongo/db/exec/document_value/document_metadata_fields.h index ef4627b14c6..83482a32abc 100644 --- a/src/mongo/db/exec/document_value/document_metadata_fields.h +++ b/src/mongo/db/exec/document_value/document_metadata_fields.h @@ -81,8 +81,22 @@ public: * sort key. If 'isSingleElementKey' is true, returns a BSON object with 'value' as its only * value - and an empty field name. Otherwise returns a BSONObj with one field for each value in * the array, each field using the empty string as the key name. + * + * Note that this format for serializing a sort key is deprecated and will be removed as part of + * SERVER-43361. The serialized sort key is a BSONObj with a field for each key component. Each + * field name is the empty string, meaning that the fields have the same name + * (e.g., {"": 1, "": 2}). The new preferred way to serialize a sort key is as a BSONArray + * (e.g.: [1, 2]), which can be done with the 'serializeSortKeyAsArray()' function. + */ + static BSONObj serializeSortKeyAsObject(bool isSingleElementKey, const Value& value); + + /** + * Converts a Value representing an in-memory sort key to a BSONArray representing a serialized + * sort key. If 'isSingleElementKey' is true, returns a BSONArray with 'value' as its only + * element. Otherwise, converts 'value' (which is expected to be an Array) to a BSONArray. Any + * Value elements whose value is "missing" get converted to BSONNull. */ - static BSONObj serializeSortKey(bool isSingleElementKey, const Value& value); + static BSONArray serializeSortKeyAsArray(bool isSingleElementKey, const Value& value); /** * Converts a BSONObj representing a serialized sort key into a Value, which we use for diff --git a/src/mongo/db/exec/document_value/document_value_test.cpp b/src/mongo/db/exec/document_value/document_value_test.cpp index a3092b636b1..d641b699ae5 100644 --- a/src/mongo/db/exec/document_value/document_value_test.cpp +++ b/src/mongo/db/exec/document_value/document_value_test.cpp @@ -595,7 +595,7 @@ TEST(MetaFields, IndexKeyMetadataSerializesCorrectly) { ASSERT_TRUE(doc.metadata().hasIndexKey()); ASSERT_BSONOBJ_EQ(doc.metadata().getIndexKey(), BSON("b" << 1)); - auto serialized = doc.toBsonWithMetaData(); + auto serialized = doc.toBsonWithMetaData(SortKeyFormat::k42SortKey); ASSERT_BSONOBJ_EQ(serialized, BSON("a" << 1 << "$indexKey" << BSON("b" << 1))); } @@ -709,7 +709,7 @@ TEST(MetaFields, ToAndFromBson) { docBuilder.metadata().setSearchHighlights(DOC_ARRAY("abc"_sd << "def"_sd)); Document doc = docBuilder.freeze(); - BSONObj obj = doc.toBsonWithMetaData(); + BSONObj obj = doc.toBsonWithMetaData(SortKeyFormat::k42SortKey); ASSERT_EQ(10.0, obj[Document::metaFieldTextScore].Double()); ASSERT_EQ(20, obj[Document::metaFieldRandVal].numberLong()); ASSERT_EQ(30.0, obj[Document::metaFieldSearchScore].Double()); diff --git a/src/mongo/db/exec/exclusion_projection_executor_test.cpp b/src/mongo/db/exec/exclusion_projection_executor_test.cpp index 651b2637bba..881ec1e17eb 100644 --- a/src/mongo/db/exec/exclusion_projection_executor_test.cpp +++ b/src/mongo/db/exec/exclusion_projection_executor_test.cpp @@ -316,7 +316,7 @@ TEST(ExclusionProjectionExecutionTest, ShouldAlwaysKeepMetadataFromOriginalDoc) ASSERT_DOCUMENT_EQ(result, expectedDoc.freeze()); } -TEST(ExclusionProjectionExecutionTest, ShouldEvalauateMetaExpressions) { +TEST(ExclusionProjectionExecutionTest, ShouldEvaluateMetaExpressions) { auto exclusion = makeExclusionProjectionWithDefaultPolicies(fromjson("{a: 0, c: {$meta: 'textScore'}, " "d: {$meta: 'randVal'}, " @@ -344,7 +344,7 @@ TEST(ExclusionProjectionExecutionTest, ShouldEvalauateMetaExpressions) { ASSERT_DOCUMENT_EQ(result, Document{fromjson("{b: 2, c: 0.0, d: 1.0, e: 2.0, f: 'foo', g: 3.0, " - "h: [4, 5], i: 6, j: {foo: 7}, k: {'': {bar: 8}}}")}); + "h: [4, 5], i: 6, j: {foo: 7}, k: [{bar: 8}]}")}); } TEST(ExclusionProjectionExecutionTest, ShouldAddMetaExpressionsToDependencies) { diff --git a/src/mongo/db/exec/inclusion_projection_executor_test.cpp b/src/mongo/db/exec/inclusion_projection_executor_test.cpp index 73647ac38af..e31e5867522 100644 --- a/src/mongo/db/exec/inclusion_projection_executor_test.cpp +++ b/src/mongo/db/exec/inclusion_projection_executor_test.cpp @@ -797,7 +797,7 @@ TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ASSERT_TRUE(deps.metadataDeps()[DocumentMetadataFields::kSortKey]); } -TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ShouldEvalauateMetaExpressions) { +TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ShouldEvaluateMetaExpressions) { auto inclusion = makeInclusionProjectionWithDefaultPolicies(fromjson("{a: 1, c: {$meta: 'textScore'}, " "d: {$meta: 'randVal'}, " @@ -825,7 +825,7 @@ TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ShouldEvalauateMet ASSERT_DOCUMENT_EQ(result, Document{fromjson("{a: 1, c: 0.0, d: 1.0, e: 2.0, f: 'foo', g: 3.0, " - "h: [4, 5], i: 6, j: {foo: 7}, k: {'': {bar: 8}}}")}); + "h: [4, 5], i: 6, j: {foo: 7}, k: [{bar: 8}]}")}); } // diff --git a/src/mongo/db/exec/return_key.cpp b/src/mongo/db/exec/return_key.cpp index c133d0418f2..0bf7d446988 100644 --- a/src/mongo/db/exec/return_key.cpp +++ b/src/mongo/db/exec/return_key.cpp @@ -84,10 +84,6 @@ Status ReturnKeyStage::_extractIndexKey(WorkingSetMember* member) { } auto indexKey = member->metadata().hasIndexKey() ? member->metadata().getIndexKey() : BSONObj(); - auto sortKey = member->metadata().hasSortKey() - ? DocumentMetadataFields::serializeSortKey(member->metadata().isSingleElementKey(), - member->metadata().getSortKey()) - : BSONObj(); MutableDocument md; @@ -96,7 +92,27 @@ Status ReturnKeyStage::_extractIndexKey(WorkingSetMember* member) { } for (const auto& fieldPath : _sortKeyMetaFields) { - md.setNestedField(fieldPath, Value(sortKey)); + if (!member->metadata().hasSortKey()) { + md.setNestedField(fieldPath, Value{}); + continue; + } + + switch (_sortKeyFormat) { + case SortKeyFormat::k44SortKey: + md.setNestedField( + fieldPath, + Value(DocumentMetadataFields::serializeSortKeyAsArray( + member->metadata().isSingleElementKey(), member->metadata().getSortKey()))); + break; + case SortKeyFormat::k42SortKey: + md.setNestedField( + fieldPath, + Value(DocumentMetadataFields::serializeSortKeyAsObject( + member->metadata().isSingleElementKey(), member->metadata().getSortKey()))); + break; + default: + MONGO_UNREACHABLE; + } } member->keyData.clear(); diff --git a/src/mongo/db/exec/return_key.h b/src/mongo/db/exec/return_key.h index 9a6f2926b2d..687b949d182 100644 --- a/src/mongo/db/exec/return_key.h +++ b/src/mongo/db/exec/return_key.h @@ -49,10 +49,12 @@ public: ReturnKeyStage(OperationContext* opCtx, std::vector<FieldPath> sortKeyMetaFields, WorkingSet* ws, + SortKeyFormat sortKeyFormat, std::unique_ptr<PlanStage> child) : PlanStage(opCtx, std::move(child), kStageName.rawData()), _ws(*ws), - _sortKeyMetaFields(std::move(sortKeyMetaFields)) {} + _sortKeyMetaFields(std::move(sortKeyMetaFields)), + _sortKeyFormat(sortKeyFormat) {} StageType stageType() const final { return STAGE_RETURN_KEY; @@ -79,5 +81,7 @@ private: // The field names associated with any sortKey meta-projection(s). Empty if there is no sortKey // meta-projection. std::vector<FieldPath> _sortKeyMetaFields; + + const SortKeyFormat _sortKeyFormat; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp index e07b2d950b5..aba6a767663 100644 --- a/src/mongo/db/pipeline/expression.cpp +++ b/src/mongo/db/pipeline/expression.cpp @@ -2700,10 +2700,24 @@ Value ExpressionMeta::evaluate(const Document& root, Variables* variables) const case MetaType::kIndexKey: return metadata.hasIndexKey() ? Value(metadata.getIndexKey()) : Value(); case MetaType::kSortKey: - return metadata.hasSortKey() - ? Value(DocumentMetadataFields::serializeSortKey(metadata.isSingleElementKey(), - metadata.getSortKey())) - : Value(); + if (metadata.hasSortKey()) { + switch (getExpressionContext()->sortKeyFormat) { + case SortKeyFormat::k42ChangeStreamSortKey: + invariant(metadata.isSingleElementKey()); + return Value(metadata.getSortKey()); + case SortKeyFormat::k42SortKey: + return Value(DocumentMetadataFields::serializeSortKeyAsObject( + metadata.isSingleElementKey(), metadata.getSortKey())); + case SortKeyFormat::k44SortKey: + return Value(DocumentMetadataFields::serializeSortKeyAsArray( + metadata.isSingleElementKey(), metadata.getSortKey())); + break; + default: + MONGO_UNREACHABLE; + } + } else { + return Value(); + } default: MONGO_UNREACHABLE; } diff --git a/src/mongo/db/pipeline/expression_context.h b/src/mongo/db/pipeline/expression_context.h index 75cb77fb0ba..6fde76c2c07 100644 --- a/src/mongo/db/pipeline/expression_context.h +++ b/src/mongo/db/pipeline/expression_context.h @@ -290,9 +290,11 @@ public: boost::optional<ServerGlobalParams::FeatureCompatibility::Version> maxFeatureCompatibilityVersion; - // True if this ExpressionContext is associated with a Change Stream that should serialize its - // "$sortKey" using the 4.2 format. - bool use42ChangeStreamSortKeys = false; + // In a shard mongoD, we use this to determine the format for "sortKey" data. This format has to + // match the format that the mongoS expects. + // TODO (SERVER-43361): After branching for 4.5, there will be no need to support any format + // other than the "k44SortKey" format, so this will be removed. + SortKeyFormat sortKeyFormat = SortKeyFormat::k44SortKey; // True if this context is associated with a pipeline which is permitted to use the new // upsertSupplied mechanism for applicable $merge modes. diff --git a/src/mongo/db/pipeline/expression_test.cpp b/src/mongo/db/pipeline/expression_test.cpp index 9e724edbfdb..522e2a750f4 100644 --- a/src/mongo/db/pipeline/expression_test.cpp +++ b/src/mongo/db/pipeline/expression_test.cpp @@ -2666,7 +2666,7 @@ TEST(ExpressionMetaTest, ExpressionMetaSortKey) { Value sortKey = Value(std::vector<Value>{Value(1), Value(2)}); doc.metadata().setSortKey(sortKey, /* isSingleElementSortKey = */ false); Value val = expressionMeta->evaluate(doc.freeze(), &expCtx->variables); - ASSERT_BSONOBJ_EQ(val.getDocument().toBson(), BSON("" << 1 << "" << 2)); + ASSERT_VALUE_EQ(val, Value(std::vector<Value>{Value(1), Value(2)})); } TEST(ExpressionMetaTest, ExpressionMetaTextScore) { diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 3b44b87a88b..d97a7d7f47e 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -659,12 +659,6 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep if (pipeline->peekFront() && pipeline->peekFront()->constraints().isChangeStreamStage()) { invariant(expCtx->tailableMode == TailableModeEnum::kTailableAndAwaitData); plannerOpts |= QueryPlannerParams::TRACK_LATEST_OPLOG_TS; - - // SERVER-42713: If "use44SortKeys" isn't set, then this aggregation request is from an - // earlier version of mongos, and we must fall back to the old way of serializing change - // stream sort keys from 4.2 and earlier. - invariant(aggRequest); - expCtx->use42ChangeStreamSortKeys = !aggRequest->getUse44SortKeys(); } // If there is a sort stage eligible for pushdown, serialize its SortPattern to a BSONObj. The diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp index df61af88f4e..d236fd1f167 100644 --- a/src/mongo/db/query/get_executor.cpp +++ b/src/mongo/db/query/get_executor.cpp @@ -415,6 +415,7 @@ StatusWith<PrepareExecutionResult> prepareExecution(OperationContext* opCtx, ? QueryPlannerCommon::extractSortKeyMetaFieldsFromProjection(*cqProjection) : std::vector<FieldPath>{}, ws, + canonicalQuery->getExpCtx()->sortKeyFormat, std::move(root)); } else if (cqProjection) { // There might be a projection. The idhack stage will always fetch the full diff --git a/src/mongo/db/query/query_request.cpp b/src/mongo/db/query/query_request.cpp index a33557263ac..c12a34630ee 100644 --- a/src/mongo/db/query/query_request.cpp +++ b/src/mongo/db/query/query_request.cpp @@ -107,6 +107,7 @@ const char kAllowSpeculativeMajorityReadField[] = "allowSpeculativeMajorityRead" const char kInternalReadAtClusterTimeField[] = "$_internalReadAtClusterTime"; const char kRequestResumeTokenField[] = "$_requestResumeToken"; const char kResumeAfterField[] = "$_resumeAfter"; +const char kUse44SortKeys[] = "_use44SortKeys"; // Field names for sorting options. const char kNaturalSortField[] = "$natural"; @@ -411,6 +412,12 @@ StatusWith<unique_ptr<QueryRequest>> QueryRequest::parseFromFindCommand(unique_p return status; } qr->_requestResumeToken = el.boolean(); + } else if (fieldName == kUse44SortKeys) { + Status status = checkFieldType(el, Bool); + if (!status.isOK()) { + return status; + } + qr->_use44SortKeys = el.boolean(); } else if (!isGenericArgument(fieldName)) { return Status(ErrorCodes::FailedToParse, str::stream() << "Failed to parse: " << cmdObj.toString() << ". " @@ -595,6 +602,10 @@ void QueryRequest::asFindCommandInternal(BSONObjBuilder* cmdBuilder) const { if (!_resumeAfter.isEmpty()) { cmdBuilder->append(kResumeAfterField, _resumeAfter); } + + if (_use44SortKeys) { + cmdBuilder->append(kUse44SortKeys, true); + } } void QueryRequest::addShowRecordIdMetaProj() { diff --git a/src/mongo/db/query/query_request.h b/src/mongo/db/query/query_request.h index 07da6b23f8e..1457480b3dd 100644 --- a/src/mongo/db/query/query_request.h +++ b/src/mongo/db/query/query_request.h @@ -429,6 +429,13 @@ public: _resumeAfter = resumeAfter; } + bool use44SortKeys() const { + return _use44SortKeys; + } + + void setUse44SortKeys(bool use44SortKeys) { + _use44SortKeys = use44SortKeys; + } /** * Return options as a bit vector. @@ -567,6 +574,8 @@ private: // The Timestamp that RecoveryUnit::setTimestampReadSource() should be called with. The optional // should only ever be engaged when testing commands are enabled. boost::optional<Timestamp> _internalReadAtClusterTime; + + bool _use44SortKeys = false; }; } // namespace mongo diff --git a/src/mongo/db/query/query_request_test.cpp b/src/mongo/db/query/query_request_test.cpp index 1b44fbbae8a..b909e004d1b 100644 --- a/src/mongo/db/query/query_request_test.cpp +++ b/src/mongo/db/query/query_request_test.cpp @@ -496,6 +496,7 @@ TEST(QueryRequestTest, ParseFromCommandAllFlagsTrue) { "awaitData: true," "allowPartialResults: true," "readOnce: true," + "_use44SortKeys: true," "allowSpeculativeMajorityRead: true}"); const NamespaceString nss("test.testns"); bool isExplain = false; @@ -510,6 +511,7 @@ TEST(QueryRequestTest, ParseFromCommandAllFlagsTrue) { ASSERT(qr->isTailableAndAwaitData()); ASSERT(qr->isAllowPartialResults()); ASSERT(qr->isReadOnce()); + ASSERT(qr->use44SortKeys()); ASSERT(qr->allowSpeculativeMajorityRead()); } @@ -522,6 +524,15 @@ TEST(QueryRequestTest, ParseFromCommandReadOnceDefaultsToFalse) { ASSERT(!qr->isReadOnce()); } +TEST(QueryRequestTest, ParseFromCommandUse44SortKeysDefaultsToFalse) { + BSONObj cmdObj = fromjson("{find: 'testns'}"); + const NamespaceString nss("test.testns"); + bool isExplain = false; + unique_ptr<QueryRequest> qr( + assertGet(QueryRequest::makeFromFindCommand(nss, cmdObj, isExplain))); + ASSERT(!qr->use44SortKeys()); +} + TEST(QueryRequestTest, ParseFromCommandValidMinMax) { BSONObj cmdObj = fromjson( "{find: 'testns'," @@ -901,6 +912,17 @@ TEST(QueryRequestTest, ParseFromCommandRuntimeConstantsSubfieldsWrongType) { ErrorCodes::TypeMismatch); } +TEST(QueryRequestTest, ParseFromCommandUse44SortKeysWrongType) { + BSONObj cmdObj = BSON("find" + << "testns" + << "_use44SortKeys" + << "shouldBeBool"); + const NamespaceString nss("test.testns"); + bool isExplain = false; + auto result = QueryRequest::makeFromFindCommand(nss, cmdObj, isExplain); + ASSERT_NOT_OK(result.getStatus()); +} + // // Parsing errors where a field has the right type but a bad value. // diff --git a/src/mongo/db/query/stage_builder.cpp b/src/mongo/db/query/stage_builder.cpp index fe81269291f..8f5906716c3 100644 --- a/src/mongo/db/query/stage_builder.cpp +++ b/src/mongo/db/query/stage_builder.cpp @@ -141,8 +141,11 @@ std::unique_ptr<PlanStage> buildStages(OperationContext* opCtx, auto returnKeyNode = static_cast<const ReturnKeyNode*>(root); auto childStage = buildStages(opCtx, collection, cq, qsol, returnKeyNode->children[0], ws); - return std::make_unique<ReturnKeyStage>( - opCtx, std::move(returnKeyNode->sortKeyMetaFields), ws, std::move(childStage)); + return std::make_unique<ReturnKeyStage>(opCtx, + std::move(returnKeyNode->sortKeyMetaFields), + ws, + cq.getExpCtx()->sortKeyFormat, + std::move(childStage)); } case STAGE_PROJECTION_DEFAULT: { auto pn = static_cast<const ProjectionNodeDefault*>(root); diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp index c94e309891c..bff8b0902aa 100644 --- a/src/mongo/s/query/async_results_merger.cpp +++ b/src/mongo/s/query/async_results_merger.cpp @@ -64,7 +64,11 @@ BSONObj extractSortKey(BSONObj obj, bool compareWholeSortKey) { if (compareWholeSortKey) { return key.wrap(); } - invariant(key.type() == BSONType::Object); + // TODO (SERVER-43361): We expect the sort key to be an array, but if the sort key originated + // from a 4.2 mongod, it will be a document, instead. Either way, 'isABSONObj()' will return + // true, and 'compareSortKeys()' will behave the same way. After branching for 4.5, we can + // tighten this invariant to specifically require a 'BSONArray' type. + invariant(key.isABSONObj()); return key.Obj(); } @@ -75,8 +79,8 @@ BSONObj extractSortKey(BSONObj obj, bool compareWholeSortKey) { int compareSortKeys(BSONObj leftSortKey, BSONObj rightSortKey, BSONObj sortKeyPattern) { // This does not need to sort with a collator, since mongod has already mapped strings to their // ICU comparison keys as part of the $sortKey meta projection. - const bool considerFieldName = false; - return leftSortKey.woCompare(rightSortKey, sortKeyPattern, considerFieldName); + const BSONObj::ComparisonRulesSet rules = 0; // 'considerFieldNames' flag is not set. + return leftSortKey.woCompare(rightSortKey, sortKeyPattern, rules); } } // namespace @@ -679,7 +683,7 @@ bool AsyncResultsMerger::_addBatchToBuffer(WithLock lk, str::stream() << "Missing field '" << AsyncResultsMerger::kSortKeyField << "' in document: " << obj); return false; - } else if (!_params.getCompareWholeSortKey() && key.type() != BSONType::Object) { + } else if (!_params.getCompareWholeSortKey() && !key.isABSONObj()) { remote.status = Status(ErrorCodes::InternalError, str::stream() << "Field '" << AsyncResultsMerger::kSortKeyField diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp index 5c91dd0e28e..111bcb2660f 100644 --- a/src/mongo/s/query/cluster_find.cpp +++ b/src/mongo/s/query/cluster_find.cpp @@ -190,6 +190,10 @@ StatusWith<std::unique_ptr<QueryRequest>> transformQueryForShards( // Any expansion of the 'showRecordId' flag should have already happened on mongos. newQR->setShowRecordId(false); + // Indicate to shard servers that this is a 4.4 or newer mongoS, and they should serialize sort + // keys in the new format. + newQR->setUse44SortKeys(true); + invariant(newQR->validate()); return std::move(newQR); } |