diff options
author | Davis Haupt <davis.haupt@mongodb.com> | 2023-05-03 19:59:01 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-05-03 21:25:10 +0000 |
commit | 1470c88daaa805408f083a1c2f7bb22f1632b5d1 (patch) | |
tree | 205ab378fea5664b95d33a541345ef70abd2f4c5 | |
parent | 693d1b831df752bc37e866733229f6dcf88c6944 (diff) | |
download | mongo-1470c88daaa805408f083a1c2f7bb22f1632b5d1.tar.gz |
SERVER-76208 support query shape serialization in IDL for types with custom serializers
35 files changed, 420 insertions, 214 deletions
diff --git a/buildscripts/idl/idl/ast.py b/buildscripts/idl/idl/ast.py index 30acba185eb..0e0763c32b1 100644 --- a/buildscripts/idl/idl/ast.py +++ b/buildscripts/idl/idl/ast.py @@ -210,6 +210,31 @@ class Validator(common.SourceLocation): super(Validator, self).__init__(file_name, line, column) +@enum.unique +class QueryShapeFieldType(enum.Enum): + # Abstract literal from shape. + LITERAL = enum.auto() + # Leave value as-is in shape. + PARAMETER = enum.auto() + # Anonymize string value. + ANONYMIZE = enum.auto() + # IDL type uses custom serializer -- defer to that serializer. + CUSTOM = enum.auto() + + @classmethod + def bind(cls, string_value): + # type: (Optional[str]) -> Optional[QueryShapeFieldType] + if string_value is None: + return None + bindings = { + "literal": cls.LITERAL, + "parameter": cls.PARAMETER, + "anonymize": cls.ANONYMIZE, + "custom": cls.CUSTOM, + } + return bindings.get(string_value, None) + + class Field(common.SourceLocation): """ An instance of a field in a struct. @@ -255,16 +280,21 @@ class Field(common.SourceLocation): # Determines whether or not this field represents a literal value that should be abstracted when serializing a query shape. # See WRITING-13831 for details on query shape. - self.query_shape_literal = None # type: Optional[bool] - # Determines whether or not this field represents a fieldpath that should be anonymized. - self.query_shape_anonymize = None # type: Optional[bool] + self.query_shape = None # type: Optional[QueryShapeFieldType] super(Field, self).__init__(file_name, line, column) @property - def should_serialize_query_shape(self): + def should_serialize_with_options(self): # type: () -> bool - return self.query_shape_anonymize or self.query_shape_literal + """Returns true if the IDL compiler should add a call to serialization options for this field.""" + return self.query_shape is not None and self.query_shape in [ + QueryShapeFieldType.LITERAL, QueryShapeFieldType.ANONYMIZE + ] + + @property + def should_shapify(self): + return self.query_shape is not None and self.query_shape != QueryShapeFieldType.PARAMETER class Privilege(common.SourceLocation): diff --git a/buildscripts/idl/idl/binder.py b/buildscripts/idl/idl/binder.py index 47ef2c02d40..4fb2e0c4f2c 100644 --- a/buildscripts/idl/idl/binder.py +++ b/buildscripts/idl/idl/binder.py @@ -333,16 +333,15 @@ def _bind_struct_common(ctxt, parsed_spec, struct, ast_struct): # Verify that each field on the struct defines a query shape type on the field if and only if # query_shape_component is defined on the struct. - defined_query_shape_type = ast_field.query_shape_literal is not None or ast_field.query_shape_anonymize is not None - if not field.hidden and struct.query_shape_component and not defined_query_shape_type: + if not field.hidden and struct.query_shape_component and ast_field.query_shape is None: ctxt.add_must_declare_shape_type(ast_field, ast_struct.name, ast_field.name) - if not struct.query_shape_component and defined_query_shape_type: + if not struct.query_shape_component and ast_field.query_shape is not None: ctxt.add_must_be_query_shape_component(ast_field, ast_struct.name, ast_field.name) - if ast_field.query_shape_anonymize and ast_field.type.cpp_type not in [ - "std::string", "std::vector<std::string>" - ]: + if ast_field.query_shape == ast.QueryShapeFieldType.ANONYMIZE and not ( + ast_field.type.cpp_type in ["std::string", "std::vector<std::string>"] + or 'string' in ast_field.type.bson_serialization_type): ctxt.add_query_shape_anonymize_must_be_string(ast_field, ast_field.name, ast_field.type.cpp_type) @@ -1056,8 +1055,11 @@ def _bind_field(ctxt, parsed_spec, field): ast_field.stability = field.stability ast_field.always_serialize = field.always_serialize ast_field.preparse = field.preparse - ast_field.query_shape_literal = field.query_shape_literal - ast_field.query_shape_anonymize = field.query_shape_anonymize + + if field.query_shape is not None: + ast_field.query_shape = ast.QueryShapeFieldType.bind(field.query_shape) + if ast_field.query_shape is None: + ctxt.add_invalid_query_shape_value(ast_field, field.query_shape) ast_field.cpp_name = field.name if field.cpp_name: @@ -1067,13 +1069,6 @@ def _bind_field(ctxt, parsed_spec, field): if ast_field.name.startswith("array<"): ctxt.add_array_not_valid_error(ast_field, "field", ast_field.name) - # Validate that 'field' is not both a query shape literal and query shape fieldpath. The two are mutually exclusive. - if ast_field.query_shape_literal is not None and ast_field.query_shape_anonymize is not None: - ctxt.add_field_cannot_be_literal_and_fieldpath(ast_field, ast_field.name) - - if ast_field.query_shape_anonymize is False: - ctxt.add_field_cannot_have_query_shape_anonymize_false(ast_field) - if field.ignore: ast_field.ignore = field.ignore _validate_ignored_field(ctxt, field) @@ -1146,7 +1141,7 @@ def _bind_field(ctxt, parsed_spec, field): if ast_field.validator is None: return None - if ast_field.should_serialize_query_shape and not ast_field.type.is_query_shape_component: + if ast_field.should_shapify and not ast_field.type.is_query_shape_component: ctxt.add_must_be_query_shape_component(ast_field, ast_field.type.name, ast_field.name) return ast_field diff --git a/buildscripts/idl/idl/cpp_types.py b/buildscripts/idl/idl/cpp_types.py index a2631bb4f72..d8315772199 100644 --- a/buildscripts/idl/idl/cpp_types.py +++ b/buildscripts/idl/idl/cpp_types.py @@ -511,14 +511,14 @@ class BsonCppTypeBase(object, metaclass=ABCMeta): pass @abstractmethod - def gen_serializer_expression(self, indented_writer, expression): - # type: (writer.IndentedTextWriter, str) -> str + def gen_serializer_expression(self, indented_writer, expression, should_shapify=False): + # type: (writer.IndentedTextWriter, str, bool) -> str """Generate code with the text writer and return an expression to serialize the type.""" pass -def _call_method_or_global_function(expression, ast_type): - # type: (str, ast.Type) -> str +def _call_method_or_global_function(expression, ast_type, should_shapify=False): + # type: (str, ast.Type, bool) -> str """ Given a fully-qualified method name, call it correctly. @@ -528,18 +528,31 @@ def _call_method_or_global_function(expression, ast_type): """ method_name = ast_type.serializer serialization_context = 'getSerializationContext()' if ast_type.deserialize_with_tenant else '' + shape_options = '' + if should_shapify: + shape_options = 'options' + if serialization_context != '': + shape_options = ', ' + shape_options short_method_name = writer.get_method_name(method_name) if writer.is_function(method_name): if ast_type.deserialize_with_tenant: serialization_context = ', ' + serialization_context - return common.template_args('${method_name}(${expression}${serialization_context})', - expression=expression, method_name=method_name, - serialization_context=serialization_context) + return common.template_args( + '${method_name}(${expression}${serialization_context}${shape_options})', + expression=expression, + method_name=method_name, + serialization_context=serialization_context, + shape_options=shape_options, + ) - return common.template_args('${expression}.${method_name}(${serialization_context})', - expression=expression, method_name=short_method_name, - serialization_context=serialization_context) + return common.template_args( + '${expression}.${method_name}(${serialization_context}${shape_options})', + expression=expression, + method_name=short_method_name, + serialization_context=serialization_context, + shape_options=shape_options, + ) class _CommonBsonCppTypeBase(BsonCppTypeBase): @@ -560,9 +573,9 @@ class _CommonBsonCppTypeBase(BsonCppTypeBase): # type: () -> bool return self._ast_type.serializer is not None - def gen_serializer_expression(self, indented_writer, expression): - # type: (writer.IndentedTextWriter, str) -> str - return _call_method_or_global_function(expression, self._ast_type) + def gen_serializer_expression(self, indented_writer, expression, should_shapify=False): + # type: (writer.IndentedTextWriter, str, bool) -> str + return _call_method_or_global_function(expression, self._ast_type, should_shapify) class _ObjectBsonCppTypeBase(BsonCppTypeBase): @@ -584,8 +597,8 @@ class _ObjectBsonCppTypeBase(BsonCppTypeBase): # type: () -> bool return self._ast_type.serializer is not None - def gen_serializer_expression(self, indented_writer, expression): - # type: (writer.IndentedTextWriter, str) -> str + def gen_serializer_expression(self, indented_writer, expression, should_shapify=False): + # type: (writer.IndentedTextWriter, str, bool) -> str method_name = writer.get_method_name(self._ast_type.serializer) if self._ast_type.deserialize_with_tenant: # SerializationContext is tied to tenant deserialization indented_writer.write_line( @@ -618,8 +631,8 @@ class _ArrayBsonCppTypeBase(BsonCppTypeBase): # type: () -> bool return self._ast_type.serializer is not None - def gen_serializer_expression(self, indented_writer, expression): - # type: (writer.IndentedTextWriter, str) -> str + def gen_serializer_expression(self, indented_writer, expression, should_shapify=False): + # type: (writer.IndentedTextWriter, str, bool) -> str method_name = writer.get_method_name(self._ast_type.serializer) indented_writer.write_line( common.template_args('BSONArray localArray(${expression}.${method_name}());', @@ -642,8 +655,8 @@ class _BinDataBsonCppTypeBase(BsonCppTypeBase): # type: () -> bool return True - def gen_serializer_expression(self, indented_writer, expression): - # type: (writer.IndentedTextWriter, str) -> str + def gen_serializer_expression(self, indented_writer, expression, should_shapify=False): + # type: (writer.IndentedTextWriter, str, bool) -> str if self._ast_type.serializer: method_name = writer.get_method_name(self._ast_type.serializer) indented_writer.write_line( diff --git a/buildscripts/idl/idl/errors.py b/buildscripts/idl/idl/errors.py index 677cf9eca8f..26bd0600695 100644 --- a/buildscripts/idl/idl/errors.py +++ b/buildscripts/idl/idl/errors.py @@ -133,11 +133,12 @@ ERROR_ID_INVALID_ARRAY_VARIANT = "ID0093" ERROR_ID_FIELD_MUST_DECLARE_SHAPE_LITERAL = "ID0094" ERROR_ID_CANNOT_DECLARE_SHAPE_LITERAL = "ID0095" ERROR_ID_INVALID_TYPE_FOR_SHAPIFY = "ID0096" -ERROR_ID_CANNOT_BE_LITERAL_AND_FIELDPATH = "ID0097" -ERROR_ID_QUERY_SHAPE_FIELDPATH_CANNOT_BE_FALSE = "ID0098" +ERROR_ID_QUERY_SHAPE_PROPERTIES_MUTUALLY_EXCLUSIVE = "ID0097" +ERROR_ID_QUERY_SHAPE_PROPERTY_CANNOT_BE_FALSE = "ID0098" ERROR_ID_STRICT_AND_DISABLE_CHECK_NOT_ALLOWED = "ID0099" ERROR_ID_INHERITANCE_AND_DISABLE_CHECK_NOT_ALLOWED = "ID0100" ERROR_ID_FEATURE_FLAG_SHOULD_BE_FCV_GATED_FALSE_HAS_VERSION = "ID0101" +ERROR_ID_QUERY_SHAPE_INVALID_VALUE = "ID0102" class IDLError(Exception): @@ -980,15 +981,9 @@ class ParserContext(object): f"In order for {field_name} to be marked as a query shape fieldpath, it must have a string type, not {field_type}." ) - def add_field_cannot_be_literal_and_fieldpath(self, location, field_name): - self._add_error( - location, ERROR_ID_CANNOT_BE_LITERAL_AND_FIELDPATH, - f"{field_name} cannot be marked as both a query shape literal and query shape fieldpath." - ) - - def add_field_cannot_have_query_shape_anonymize_false(self, location): - self._add_error(location, ERROR_ID_QUERY_SHAPE_FIELDPATH_CANNOT_BE_FALSE, - "'query_shape_anonymize' cannot be defined as false if it is set.") + def add_invalid_query_shape_value(self, location, query_shape_value): + self._add_error(location, ERROR_ID_QUERY_SHAPE_INVALID_VALUE, + f"'{query_shape_value}' is not a valid value for 'query_shape'.") def add_strict_and_disable_check_not_allowed(self, location): self._add_error( diff --git a/buildscripts/idl/idl/generator.py b/buildscripts/idl/idl/generator.py index 0fd4f74a49e..b896f7610fa 100644 --- a/buildscripts/idl/idl/generator.py +++ b/buildscripts/idl/idl/generator.py @@ -2169,21 +2169,24 @@ class _CppSourceFileWriter(_CppFileWriterBase): self._writer.write_template( 'BSONArrayBuilder arrayBuilder(builder->subarrayStart(${field_name}));') with self._block('for (const auto& item : ${access_member}) {', '}'): - expression = bson_cpp_type.gen_serializer_expression(self._writer, 'item') + expression = bson_cpp_type.gen_serializer_expression( + self._writer, 'item', + field.query_shape == ast.QueryShapeFieldType.CUSTOM) template_params['expression'] = expression self._writer.write_template('arrayBuilder.append(${expression});') else: expression = bson_cpp_type.gen_serializer_expression( - self._writer, _access_member(field)) + self._writer, _access_member(field), + field.query_shape == ast.QueryShapeFieldType.CUSTOM) template_params['expression'] = expression - if not field.should_serialize_query_shape: + if not field.should_serialize_with_options: self._writer.write_template( 'builder->append(${field_name}, ${expression});') - elif field.query_shape_literal: + elif field.query_shape == ast.QueryShapeFieldType.LITERAL: self._writer.write_template( 'options.serializeLiteralValue(${expression}).serializeForIDL(${field_name}, builder);' ) - elif field.query_shape_anonymize: + elif field.query_shape == ast.QueryShapeFieldType.ANONYMIZE: self._writer.write_template( 'builder->append(${field_name}, options.serializeFieldPathFromString(${expression}));' ) @@ -2260,7 +2263,7 @@ class _CppSourceFileWriter(_CppFileWriterBase): if field.chained: # Just directly call the serializer for chained structs without opening up a nested # document. - if not field.should_serialize_query_shape: + if not field.should_serialize_with_options: self._writer.write_template('${access_member}.serialize(builder);') else: self._writer.write_template('${access_member}.serialize(builder, options);') @@ -2271,14 +2274,14 @@ class _CppSourceFileWriter(_CppFileWriterBase): with self._block('for (const auto& item : ${access_member}) {', '}'): self._writer.write_line( 'BSONObjBuilder subObjBuilder(arrayBuilder.subobjStart());') - if not field.should_serialize_query_shape: + if not field.should_serialize_with_options: self._writer.write_line('item.serialize(&subObjBuilder);') else: self._writer.write_line('item.serialize(&subObjBuilder, options);') else: self._writer.write_template( 'BSONObjBuilder subObjBuilder(builder->subobjStart(${field_name}));') - if not field.should_serialize_query_shape: + if not field.should_serialize_with_options: self._writer.write_template('${access_member}.serialize(&subObjBuilder);') else: self._writer.write_template( @@ -2321,7 +2324,7 @@ class _CppSourceFileWriter(_CppFileWriterBase): 'cpp_type'] = 'std::vector<' + variant_type.cpp_type + '>' if variant_type.is_array else variant_type.cpp_type template_params['param_opt'] = "" - if field.should_serialize_query_shape: + if field.should_serialize_with_options: template_params['param_opt'] = ', options' with self._block('[%s${param_opt}](const ${cpp_type}& value) {' % builder, '},'): bson_cpp_type = cpp_types.get_bson_cpp_type(variant_type) @@ -2329,30 +2332,32 @@ class _CppSourceFileWriter(_CppFileWriterBase): self._writer.write_template('value.serialize(%s);' % builder) elif bson_cpp_type and bson_cpp_type.has_serializer(): assert not field.type.is_array - expression = bson_cpp_type.gen_serializer_expression(self._writer, 'value') + expression = bson_cpp_type.gen_serializer_expression( + self._writer, 'value', + field.query_shape == ast.QueryShapeFieldType.CUSTOM) template_params['expression'] = expression - if not field.should_serialize_query_shape: + if not field.should_serialize_with_options: self._writer.write_template( 'builder->append(${field_name}, ${expression});') - elif field.query_shape_literal: + elif field.query_shape == ast.QueryShapeFieldType.LITERAL: self._writer.write_template( 'options.serializeLiteralValue(${expression}).serializeForIDL(${field_name}, builder);' ) - elif field.query_shape_anonymize: + elif field.query_shape == ast.QueryShapeFieldType.ANONYMIZE: self._writer.write_template( 'builder->append(${field_name}, options.serializeFieldPathFromString(${expression}));' ) else: assert False else: - if not field.should_serialize_query_shape: + if not field.should_serialize_with_options: self._writer.write_template( 'idl::idlSerialize(builder, ${field_name}, value);') - elif field.query_shape_literal: + elif field.query_shape == ast.QueryShapeFieldType.LITERAL: self._writer.write_template( 'options.serializeLiteralValue(value).serializeForIDL(${field_name}, builder);' ) - elif field.query_shape_anonymize: + elif field.query_shape == ast.QueryShapeFieldType.ANONYMIZE: self._writer.write_template( 'idl::idlSerialize(builder, ${field_name}, options.serializeFieldPathFromString(value));' ) @@ -2391,15 +2396,15 @@ class _CppSourceFileWriter(_CppFileWriterBase): else: # Generate default serialization # Note: BSONObjBuilder::append, which all three branches use, has overrides for std::vector also - if not field.should_serialize_query_shape: + if not field.should_serialize_with_options: self._writer.write_line( 'builder->append(%s, %s);' % (_get_field_constant_name(field), _access_member(field))) - elif field.query_shape_literal: + elif field.query_shape == ast.QueryShapeFieldType.LITERAL: self._writer.write_line( 'options.serializeLiteralValue(%s).serializeForIDL(%s, builder);' % (_access_member(field), _get_field_constant_name(field))) - elif field.query_shape_anonymize: + elif field.query_shape == ast.QueryShapeFieldType.ANONYMIZE: self._writer.write_line( 'builder->append(%s, options.serializeFieldPathFromString(%s));' % (_get_field_constant_name(field), _access_member(field))) diff --git a/buildscripts/idl/idl/parser.py b/buildscripts/idl/idl/parser.py index 349ef8ff64a..41b73966d26 100644 --- a/buildscripts/idl/idl/parser.py +++ b/buildscripts/idl/idl/parser.py @@ -406,10 +406,8 @@ def _parse_field(ctxt, name, node): _RuleDesc('bool_scalar'), "forward_from_shards": _RuleDesc('bool_scalar'), - "query_shape_literal": - _RuleDesc('required_bool_scalar'), - "query_shape_anonymize": - _RuleDesc('required_bool_scalar'), + "query_shape": + _RuleDesc('scalar'), }) return field diff --git a/buildscripts/idl/idl/syntax.py b/buildscripts/idl/idl/syntax.py index cf23b4931c5..c8e4697525b 100644 --- a/buildscripts/idl/idl/syntax.py +++ b/buildscripts/idl/idl/syntax.py @@ -493,8 +493,7 @@ class Field(common.SourceLocation): self.serialize_op_msg_request_only = False # type: bool self.constructed = False # type: bool - self.query_shape_literal = None # type: Optional[bool] - self.query_shape_anonymize = None # type: Optional[bool] + self.query_shape = None # type: Optional[str] self.hidden = False # type: bool diff --git a/buildscripts/idl/tests/test_binder.py b/buildscripts/idl/tests/test_binder.py index 0118f38c7b9..e1e948051ec 100644 --- a/buildscripts/idl/tests/test_binder.py +++ b/buildscripts/idl/tests/test_binder.py @@ -2717,11 +2717,11 @@ class TestBinder(testcase.IDLTestcase): description: "" fields: field1: - query_shape_literal: true + query_shape: literal type: string field2: type: bool - query_shape_literal: false + query_shape: parameter """)) self.assert_bind_fail( @@ -2736,7 +2736,7 @@ class TestBinder(testcase.IDLTestcase): type: string field2: type: bool - query_shape_literal: false + query_shape: parameter """), idl.errors.ERROR_ID_FIELD_MUST_DECLARE_SHAPE_LITERAL) self.assert_bind_fail( @@ -2750,7 +2750,7 @@ class TestBinder(testcase.IDLTestcase): type: string field2: type: bool - query_shape_literal: false + query_shape: parameter """), idl.errors.ERROR_ID_CANNOT_DECLARE_SHAPE_LITERAL) # Validating query_shape_anonymize relies on std::string @@ -2780,10 +2780,10 @@ class TestBinder(testcase.IDLTestcase): description: "" fields: field1: - query_shape_anonymize: true + query_shape: anonymize type: string field2: - query_shape_literal: false + query_shape: parameter type: bool """)) @@ -2795,10 +2795,10 @@ class TestBinder(testcase.IDLTestcase): description: "" fields: field1: - query_shape_anonymize: true + query_shape: anonymize type: array<string> field2: - query_shape_literal: false + query_shape: parameter type: bool """)) @@ -2806,17 +2806,13 @@ class TestBinder(testcase.IDLTestcase): basic_types + textwrap.dedent(""" structs: struct1: - query_shape_component: true strict: true description: "" fields: field1: - query_shape_anonymize: false + query_shape: blah type: string - field2: - query_shape_literal: false - type: bool - """), idl.errors.ERROR_ID_QUERY_SHAPE_FIELDPATH_CANNOT_BE_FALSE) + """), idl.errors.ERROR_ID_QUERY_SHAPE_INVALID_VALUE) self.assert_bind_fail( basic_types + textwrap.dedent(""" @@ -2827,10 +2823,10 @@ class TestBinder(testcase.IDLTestcase): description: "" fields: field1: - query_shape_anonymize: true + query_shape: anonymize type: bool field2: - query_shape_literal: false + query_shape: parameter type: bool """), idl.errors.ERROR_ID_INVALID_TYPE_FOR_SHAPIFY) @@ -2843,39 +2839,22 @@ class TestBinder(testcase.IDLTestcase): description: "" fields: field1: - query_shape_anonymize: true + query_shape: anonymize type: array<bool> field2: - query_shape_literal: false + query_shape: parameter type: bool """), idl.errors.ERROR_ID_INVALID_TYPE_FOR_SHAPIFY) self.assert_bind_fail( basic_types + textwrap.dedent(""" structs: - struct1: - query_shape_component: true - strict: true - description: "" - fields: - field1: - query_shape_anonymize: true - query_shape_literal: true - type: string - field2: - query_shape_literal: false - type: bool - """), idl.errors.ERROR_ID_CANNOT_BE_LITERAL_AND_FIELDPATH) - - self.assert_bind_fail( - basic_types + textwrap.dedent(""" - structs: StructZero: strict: true description: "" fields: field1: - query_shape_literal: true + query_shape: literal type: string """), idl.errors.ERROR_ID_CANNOT_DECLARE_SHAPE_LITERAL) @@ -2896,7 +2875,7 @@ class TestBinder(testcase.IDLTestcase): field2: type: StructZero description: "" - query_shape_literal: true + query_shape: literal """), idl.errors.ERROR_ID_CANNOT_DECLARE_SHAPE_LITERAL) # pylint: disable=invalid-name diff --git a/src/mongo/db/exec/document_value/value.h b/src/mongo/db/exec/document_value/value.h index c3e3dc78546..a6255dfcae2 100644 --- a/src/mongo/db/exec/document_value/value.h +++ b/src/mongo/db/exec/document_value/value.h @@ -97,6 +97,9 @@ public: explicit Value(bool value) : _storage(Bool, value) {} explicit Value(int value) : _storage(NumberInt, value) {} explicit Value(long long value) : _storage(NumberLong, value) {} +#if !defined(_WIN32) + explicit Value(int64_t value) : _storage(NumberLong, (long long)value) {} +#endif explicit Value(double value) : _storage(NumberDouble, value) {} explicit Value(const Decimal128& value) : _storage(NumberDecimal, value) {} explicit Value(const Timestamp& value) : _storage(bsonTimestamp, value) {} diff --git a/src/mongo/db/keypattern.h b/src/mongo/db/keypattern.h index 986d259eebd..1be9e0cc9eb 100644 --- a/src/mongo/db/keypattern.h +++ b/src/mongo/db/keypattern.h @@ -33,6 +33,7 @@ #include "mongo/base/string_data.h" #include "mongo/bson/util/builder.h" #include "mongo/db/jsobj.h" +#include "mongo/db/query/serialization_options.h" #include "mongo/util/str.h" namespace mongo { @@ -84,6 +85,14 @@ public: return _pattern; } + BSONObj serializeForIDL(const SerializationOptions& options = {}) const { + BSONObjBuilder bob; + for (const auto& e : _pattern) { + bob.appendAs(e, options.serializeIdentifier(e.fieldNameStringData())); + } + return bob.obj(); + } + /** * Returns a string representation of this KeyPattern. */ diff --git a/src/mongo/db/keypattern.idl b/src/mongo/db/keypattern.idl index 158c742faab..cd67f8e5428 100644 --- a/src/mongo/db/keypattern.idl +++ b/src/mongo/db/keypattern.idl @@ -38,5 +38,5 @@ types: bson_serialization_type: object description: An expression describing a transformation of a document into a document key. cpp_type: KeyPattern - serializer: KeyPattern::toBSON + serializer: KeyPattern::serializeForIDL deserializer: KeyPattern::fromBSON diff --git a/src/mongo/db/pipeline/document_source_coll_stats.idl b/src/mongo/db/pipeline/document_source_coll_stats.idl index 0f146db02cf..09da6d65065 100644 --- a/src/mongo/db/pipeline/document_source_coll_stats.idl +++ b/src/mongo/db/pipeline/document_source_coll_stats.idl @@ -46,7 +46,7 @@ structs: type: optionalBool # Do not abstract this literal, since it is parameterizing the stage like an enum rather than representing # real user input. - query_shape_literal: false + query_shape: parameter DocumentSourceCollStatsSpec: description: Specification for a $collStats stage. strict: true @@ -56,29 +56,29 @@ structs: description: A request to include latency stats in the $collStats output. type: LatencyStatsSpec optional: true - query_shape_literal: true + query_shape: literal storageStats: description: Adds storage statistics to the return document. type: StorageStatsSpec optional: true - query_shape_literal: true + query_shape: literal count: description: Adds the total number of documents in the collection to the return document. type: object validator: callback: validateObjectIsEmpty optional: true - query_shape_literal: true + query_shape: literal queryExecStats: description: Adds query execution statistics to the return document. type: object validator: callback: validateObjectIsEmpty optional: true - query_shape_literal: true + query_shape: literal $_requestOnTimeseriesView: description: When set to true, $collStats stage requests statistics from the view namespace. When set to false, $collStats stage requests statistics from the underlying collection. cpp_name: requestOnTimeseriesView type: optionalBool - query_shape_literal: false + query_shape: parameter diff --git a/src/mongo/db/pipeline/document_source_internal_all_collection_stats.idl b/src/mongo/db/pipeline/document_source_internal_all_collection_stats.idl index 8c3c43e637d..247992a02ee 100644 --- a/src/mongo/db/pipeline/document_source_internal_all_collection_stats.idl +++ b/src/mongo/db/pipeline/document_source_internal_all_collection_stats.idl @@ -46,4 +46,4 @@ structs: description: Specification for a $collStats stage. type: DocumentSourceCollStatsSpec optional: true - query_shape_literal: true + query_shape: literal diff --git a/src/mongo/db/pipeline/document_source_list_sessions.idl b/src/mongo/db/pipeline/document_source_list_sessions.idl index 5fcdeb000d4..85e32a37641 100644 --- a/src/mongo/db/pipeline/document_source_list_sessions.idl +++ b/src/mongo/db/pipeline/document_source_list_sessions.idl @@ -44,10 +44,10 @@ structs: fields: user: type: string - query_shape_anonymize: true + query_shape: anonymize db: type: string - query_shape_anonymize: true + query_shape: anonymize ListSessionsSpec: description: "$listSessions and $listLocalSessions pipeline spec" @@ -58,13 +58,13 @@ structs: type: bool default: false # This boolean parameterizes the stage rather than representing user input, so do not abstract the literal. - query_shape_literal: false + query_shape: parameter users: type: array<ListSessionsUser> optional: true - query_shape_literal: true + query_shape: literal $_internalPredicate: cpp_name: predicate type: object optional: true - query_shape_literal: true # This is a MatchExpression predicate and could be shape-ified rather than completely abstracted. + query_shape: literal # This is a MatchExpression predicate and could be shape-ified rather than completely abstracted. diff --git a/src/mongo/db/pipeline/document_source_merge.cpp b/src/mongo/db/pipeline/document_source_merge.cpp index 22f271babb6..722de5936f9 100644 --- a/src/mongo/db/pipeline/document_source_merge.cpp +++ b/src/mongo/db/pipeline/document_source_merge.cpp @@ -535,11 +535,6 @@ boost::optional<DocumentSource::DistributedPlanLogic> DocumentSourceMerge::distr Value DocumentSourceMerge::serialize(SerializationOptions opts) const { auto explain = opts.verbosity; - if (opts.applyHmacToIdentifiers || opts.replacementForLiteralArgs) { - // TODO: SERVER-76208 support query shapification for IDL types with custom serializers. - MONGO_UNIMPLEMENTED_TASSERT(7484324); - } - DocumentSourceMergeSpec spec; spec.setTargetNss(_outputNs); spec.setLet([&]() -> boost::optional<BSONObj> { @@ -553,7 +548,23 @@ Value DocumentSourceMerge::serialize(SerializationOptions opts) const { } return bob.obj(); }()); - spec.setWhenMatched(MergeWhenMatchedPolicy{_descriptor.mode.first, _pipeline}); + spec.setWhenMatched(MergeWhenMatchedPolicy{ + _descriptor.mode.first, [&]() -> boost::optional<std::vector<BSONObj>> { + if (!_pipeline.has_value()) { + return boost::none; + } + auto expCtxWithLetVariables = pExpCtx->copyWith(pExpCtx->ns); + if (spec.getLet()) { + BSONObjBuilder cleanLetSpecBuilder; + for (auto& elt : spec.getLet().value()) { + cleanLetSpecBuilder.append(elt.fieldNameStringData(), BSONObj{}); + } + expCtxWithLetVariables->variables.seedVariablesWithLetParameters( + expCtxWithLetVariables.get(), cleanLetSpecBuilder.obj()); + } + return Pipeline::parse(_pipeline.value(), expCtxWithLetVariables) + ->serializeToBson(opts); + }()}); spec.setWhenNotMatched(_descriptor.mode.second); spec.setOn([&]() { std::vector<std::string> mergeOnFields; diff --git a/src/mongo/db/pipeline/document_source_merge_cursors_test.cpp b/src/mongo/db/pipeline/document_source_merge_cursors_test.cpp index 9829838aba8..ffd97c755f7 100644 --- a/src/mongo/db/pipeline/document_source_merge_cursors_test.cpp +++ b/src/mongo/db/pipeline/document_source_merge_cursors_test.cpp @@ -56,6 +56,7 @@ #include "mongo/s/client/shard_registry.h" #include "mongo/s/sharding_router_test_fixture.h" #include "mongo/stdx/thread.h" +#include "mongo/unittest/bson_test_util.h" #include "mongo/unittest/unittest.h" namespace mongo { @@ -531,4 +532,41 @@ TEST_F(DocumentSourceMergeCursorsMultiTenancyAndFeatureFlagTest, // AsyncResultsMergerParams. ASSERT(DocumentSourceMergeCursors::createFromBson(newSpec.firstElement(), getExpCtx())); } +using DocumentSourceMergeCursorsShapeTest = AggregationContextFixture; +TEST_F(DocumentSourceMergeCursorsShapeTest, QueryShape) { + auto expCtx = getExpCtx(); + AsyncResultsMergerParams armParams; + armParams.setNss( + NamespaceString::createNamespaceString_forTest(boost::none, kMergeCursorNsStr)); + std::vector<RemoteCursor> cursors; + cursors.emplace_back( + makeRemoteCursor(kTestShardIds[0], kTestShardHosts[0], CursorResponse(expCtx->ns, 1, {}))); + cursors.emplace_back( + makeRemoteCursor(kTestShardIds[1], kTestShardHosts[1], CursorResponse(expCtx->ns, 2, {}))); + armParams.setRemotes(std::move(cursors)); + auto stage = DocumentSourceMergeCursors::create(expCtx, std::move(armParams)); + + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "$mergeCursors": { + "compareWholeSortKey": "?", + "remotes": [ + { + "shardId": "HASH<FakeShard1>", + "hostAndPort": "HASH<FakeShard1Host:12345>", + "cursorResponse": "?" + }, + { + "shardId": "HASH<FakeShard2>", + "hostAndPort": "HASH<FakeShard2Host:12345>", + "cursorResponse": "?" + } + ], + "nss": "HASH<test.mergeCursors>", + "allowPartialResults": false, + "recordRemoteOpWaitTime": false + } + })", + redact(*stage)); +} } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_merge_test.cpp b/src/mongo/db/pipeline/document_source_merge_test.cpp index 1cd0aa0dcd6..14ca9c52368 100644 --- a/src/mongo/db/pipeline/document_source_merge_test.cpp +++ b/src/mongo/db/pipeline/document_source_merge_test.cpp @@ -29,6 +29,7 @@ #include "mongo/platform/basic.h" +#include "mongo/unittest/bson_test_util.h" #include <boost/intrusive_ptr.hpp> #include "mongo/db/exec/document_value/document.h" @@ -794,12 +795,12 @@ TEST_F(DocumentSourceMergeTest, SerializeDefaultLetVariable) { // Test the behaviour of 'let' serialization for each whenNotMatched mode. TEST_F(DocumentSourceMergeTest, SerializeLetVariables) { - auto pipeline = BSON_ARRAY(BSON("$project" << BSON("x" - << "$$v1" - << "y" - << "$$v2" - << "z" - << "$$v3"))); + auto pipeline = BSON_ARRAY(BSON("$project" << BSON("_id" << true << "x" + << "$$v1" + << "y" + << "$$v2" + << "z" + << "$$v3"))); const auto createAndSerializeMergeStage = [this, &pipeline](StringData whenNotMatched) { auto spec = BSON("$merge" << BSON("into" @@ -845,8 +846,8 @@ TEST_F(DocumentSourceMergeTest, SerializeLetVariables) { TEST_F(DocumentSourceMergeTest, SerializeLetArrayVariable) { for (auto&& whenNotMatched : {"insert", "fail", "discard"}) { - auto pipeline = BSON_ARRAY(BSON("$project" << BSON("x" - << "$$v1"))); + auto pipeline = BSON_ARRAY(BSON("$project" << BSON("_id" << true << "x" + << "$$v1"))); auto spec = BSON( "$merge" << BSON("into" << "target_collection" @@ -874,8 +875,9 @@ TEST_F(DocumentSourceMergeTest, SerializeLetArrayVariable) { // SERVER-41272, this test should be updated to accordingly. TEST_F(DocumentSourceMergeTest, SerializeNullLetVariablesAsDefault) { for (auto&& whenNotMatched : {"insert", "fail", "discard"}) { - auto pipeline = BSON_ARRAY(BSON("$project" << BSON("x" - << "1"))); + auto pipeline = BSON_ARRAY(BSON("$project" << BSON("_id" << true << "x" + << BSON("$const" + << "1")))); auto spec = BSON("$merge" << BSON("into" << "target_collection" << "let" << BSONNULL << "whenMatched" << pipeline @@ -892,8 +894,9 @@ TEST_F(DocumentSourceMergeTest, SerializeNullLetVariablesAsDefault) { TEST_F(DocumentSourceMergeTest, SerializeEmptyLetVariables) { for (auto&& whenNotMatched : {"insert", "fail", "discard"}) { - auto pipeline = BSON_ARRAY(BSON("$project" << BSON("x" - << "1"))); + auto pipeline = BSON_ARRAY(BSON("$project" << BSON("_id" << true << "x" + << BSON("$const" + << "1")))); auto spec = BSON("$merge" << BSON("into" << "target_collection" << "let" << BSONObj() << "whenMatched" << pipeline @@ -909,6 +912,41 @@ TEST_F(DocumentSourceMergeTest, SerializeEmptyLetVariables) { } } +TEST_F(DocumentSourceMergeTest, SerializeEmptyLetVariableMentionNew) { + auto pipeline = BSON_ARRAY(fromjson("{$project: {_id: true, x: '$$new'}}")); + auto spec = + BSON("$merge" << BSON("into" + << "target_collection" + << "let" << BSONObj() << "whenMatched" << pipeline << "whenNotMatched" + << "insert")); + auto mergeStage = createMergeStage(spec); + ASSERT(mergeStage); + auto serialized = mergeStage->serialize().getDocument(); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "$merge": { + "into": { + "db": "unittests", + "coll": "target_collection" + }, + "on": "_id", + "let": { + "new": "$$ROOT" + }, + "whenMatched": [ + { + "$project": { + "_id": true, + "x": "$$new" + } + } + ], + "whenNotMatched": "insert" + } + })", + serialized.toBson()); +} + TEST_F(DocumentSourceMergeTest, OnlyObjectCanBeUsedAsLetVariables) { for (auto&& whenNotMatched : {"insert", "fail", "discard"}) { auto pipeline = BSON_ARRAY(BSON("$project" << BSON("x" @@ -1138,6 +1176,41 @@ TEST_F(DocumentSourceMergeServerlessTest, ASSERT_DOCUMENT_EQ(serialized["$merge"][kIntoFieldName].getDocument(), expectedDoc); } +TEST_F(DocumentSourceMergeTest, QueryShape) { + auto pipeline = BSON_ARRAY(BSON("$project" << BSON("x" + << "1"))); + auto spec = + BSON("$merge" << BSON("into" + << "target_collection" + << "let" << BSONObj() << "whenMatched" << pipeline << "whenNotMatched" + << "insert")); + auto mergeStage = createMergeStage(spec); + ASSERT(mergeStage); + auto serialized = mergeStage->serialize().getDocument(); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "$merge": { + "into": { + "db": "unittests", + "coll": "target_collection" + }, + "on": "_id", + "let": { + "new": "$$ROOT" + }, + "whenMatched": [ + { + "$project": { + "HASH<_id>": true, + "HASH<x>": "?string" + } + } + ], + "whenNotMatched": "insert" + } + })", + redact(*mergeStage)); +} } // namespace } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_out.idl b/src/mongo/db/pipeline/document_source_out.idl index 47f0e6d1c23..9581c3de800 100644 --- a/src/mongo/db/pipeline/document_source_out.idl +++ b/src/mongo/db/pipeline/document_source_out.idl @@ -46,17 +46,17 @@ structs: description: "Target collection name to write documents from $out to." type: string optional: false - query_shape_anonymize: true + query_shape: anonymize db: description: "Target database name to write documents from $out to." type: string optional: false - query_shape_anonymize: true + query_shape: anonymize timeseries: cpp_name: timeseries description: "If set, the aggregation stage will use these options to create or replace a time-series collection in the given namespace." type: TimeseriesOptions optional: true - query_shape_literal: true + query_shape: literal diff --git a/src/mongo/db/pipeline/document_source_set_variable_from_subpipeline.cpp b/src/mongo/db/pipeline/document_source_set_variable_from_subpipeline.cpp index a85e0e68af2..c1c946933c4 100644 --- a/src/mongo/db/pipeline/document_source_set_variable_from_subpipeline.cpp +++ b/src/mongo/db/pipeline/document_source_set_variable_from_subpipeline.cpp @@ -54,16 +54,10 @@ REGISTER_INTERNAL_DOCUMENT_SOURCE(setVariableFromSubPipeline, true); Value DocumentSourceSetVariableFromSubPipeline::serialize(SerializationOptions opts) const { - if (opts.applyHmacToIdentifiers || opts.replacementForLiteralArgs) { - // TODO: SERVER-76208 support query shapification for IDL types like pipeline with custom - // serializers. - MONGO_UNIMPLEMENTED_TASSERT(7484314); - } - const auto var = "$$" + Variables::getBuiltinVariableName(_variableID); SetVariableFromSubPipelineSpec spec; tassert(625298, "SubPipeline cannot be null during serialization", _subPipeline); - spec.setSetVariable(var); + spec.setSetVariable(opts.serializeIdentifier(var)); spec.setPipeline(_subPipeline->serializeToBson(opts)); return Value(DOC(getSourceName() << spec.toBSON())); } diff --git a/src/mongo/db/pipeline/document_source_set_variable_from_subpipeline_test.cpp b/src/mongo/db/pipeline/document_source_set_variable_from_subpipeline_test.cpp index e7cc8d71a27..8b30d3c95b7 100644 --- a/src/mongo/db/pipeline/document_source_set_variable_from_subpipeline_test.cpp +++ b/src/mongo/db/pipeline/document_source_set_variable_from_subpipeline_test.cpp @@ -158,6 +158,41 @@ TEST_F(DocumentSourceSetVariableFromSubPipelineTest, testDoGetNext) { Value((BSON("d" << 1))), nullptr) == 0); } +TEST_F(DocumentSourceSetVariableFromSubPipelineTest, QueryShape) { + const auto inputDocs = + std::vector{Document{{"a", 1}}, Document{{"b", 1}}, Document{{"c", 1}}, Document{{"d", 1}}}; + auto expCtx = getExpCtx(); + const auto mockSourceForSetVarStage = DocumentSourceMock::createForTest(inputDocs[1], expCtx); + auto ctxForSubPipeline = expCtx->copyForSubPipeline(expCtx->ns); + const auto mockSourceForSubPipeline = + DocumentSourceMock::createForTest(inputDocs, ctxForSubPipeline); + auto setVariableFromSubPipeline = DocumentSourceSetVariableFromSubPipeline::create( + expCtx, + Pipeline::create({DocumentSourceMatch::create(BSON("d" << 1), ctxForSubPipeline)}, + ctxForSubPipeline), + Variables::kSearchMetaId); + setVariableFromSubPipeline->addSubPipelineInitialSource(mockSourceForSubPipeline); + setVariableFromSubPipeline->setSource(mockSourceForSetVarStage.get()); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "$setVariableFromSubPipeline": { + "setVariable": "HASH<$$SEARCH_META>", + "pipeline": [ + { + "mock": {} + }, + { + "$match": { + "HASH<d>": { + "$eq": "?number" + } + } + } + ] + } + })", + redact(*setVariableFromSubPipeline)); +} } // namespace } // namespace mongo diff --git a/src/mongo/db/pipeline/exchange_spec.idl b/src/mongo/db/pipeline/exchange_spec.idl index 9bf0a5d9e84..a765f159e1b 100644 --- a/src/mongo/db/pipeline/exchange_spec.idl +++ b/src/mongo/db/pipeline/exchange_spec.idl @@ -52,24 +52,24 @@ structs: type: ExchangePolicy description: A string indicating a policy of how documents are distributed to consumers. stability: stable - query_shape_literal: false + query_shape: parameter consumers: type: int description: Number of consumers. stability: stable - query_shape_literal: true + query_shape: literal orderPreserving: type: bool default: false description: A flag indicating documents are merged while preserving the order. stability: stable - query_shape_literal: false + query_shape: parameter bufferSize: type: int default: 16777216 description: The size of exchange buffers. stability: stable - query_shape_literal: true + query_shape: literal key: type: object default: "BSONObj()" @@ -79,17 +79,17 @@ structs: field listed here, or if any prefix of any path is multikey (i.e. an array is encountered while traversing a path listed here), then it is by definition sent to consumer 0. - query_shape_literal: true + query_shape: literal boundaries: type: array<object> optional: true description: Range/hash split points. stability: stable - query_shape_literal: true + query_shape: literal consumerIds: type: array<int> optional: true description: Mapping from a range index to a consumer id. stability: stable - query_shape_literal: true + query_shape: literal diff --git a/src/mongo/db/pipeline/process_interface/mongo_process_interface.h b/src/mongo/db/pipeline/process_interface/mongo_process_interface.h index 7189ab9413e..1aa7d6be8de 100644 --- a/src/mongo/db/pipeline/process_interface/mongo_process_interface.h +++ b/src/mongo/db/pipeline/process_interface/mongo_process_interface.h @@ -84,7 +84,7 @@ public: * 2. write_ops::UpdateModification - either the new document we want to upsert or insert into * the collection (i.e. a 'classic' replacement update), or the pipeline to run to compute * the new document. - * 3. boost::optional<BSONObj> - for pipeline-style updated, specifies variables that can be + * 3. boost::optional<BSONObj> - for pipeline-style updates, specifies variables that can be * referred to in the pipeline performing the custom update. */ using BatchObject = diff --git a/src/mongo/db/pipeline/storage_stats_spec.idl b/src/mongo/db/pipeline/storage_stats_spec.idl index f74818fd729..59bfe5c5230 100644 --- a/src/mongo/db/pipeline/storage_stats_spec.idl +++ b/src/mongo/db/pipeline/storage_stats_spec.idl @@ -43,16 +43,16 @@ structs: type: safeInt optional: true validator: { gte: 1 } - query_shape_literal: true + query_shape: literal verbose: type: optionalBool default: false - query_shape_literal: false + query_shape: parameter waitForLock: type: optionalBool default: true - query_shape_literal: false + query_shape: parameter numericOnly: type: optionalBool default: false - query_shape_literal: false + query_shape: parameter diff --git a/src/mongo/db/query/query_shape_test.cpp b/src/mongo/db/query/query_shape_test.cpp index 2e0ec2e1fe7..c2f61933f5e 100644 --- a/src/mongo/db/query/query_shape_test.cpp +++ b/src/mongo/db/query/query_shape_test.cpp @@ -599,7 +599,9 @@ TEST(QueryShapeIDL, ShapifyIDLStruct) { "hello", {1, 2, 3, 4}, "field.path", - {"field.path.1", "fieldpath2"}); + {"field.path.1", "fieldpath2"}, + NamespaceString{"db", "coll"}, + NamespaceString{"db", "coll"}); ASSERT_BSONOBJ_EQ_AUTO( // NOLINT R"({ "stringField": "value", @@ -616,7 +618,9 @@ TEST(QueryShapeIDL, ShapifyIDLStruct) { "fieldpathList": [ "field.path.1", "fieldpath2" - ] + ], + "nss": "db.coll", + "plainNss": "db.coll" })", nested.toBSON()); @@ -631,7 +635,9 @@ TEST(QueryShapeIDL, ShapifyIDLStruct) { "fieldpathList": [ "HASH<field>.HASH<path>.HASH<1>", "HASH<fieldpath2>" - ] + ], + "nss": "HASH<db.coll>", + "plainNss": "db.coll" })", nested.toBSON(options)); @@ -654,7 +660,9 @@ TEST(QueryShapeIDL, ShapifyIDLStruct) { "fieldpathList": [ "field.path.1", "fieldpath2" - ] + ], + "nss": "db.coll", + "plainNss": "db.coll" }, "nested_no_shape": { "stringField": "value", @@ -671,7 +679,9 @@ TEST(QueryShapeIDL, ShapifyIDLStruct) { "fieldpathList": [ "field.path.1", "fieldpath2" - ] + ], + "nss": "db.coll", + "plainNss": "db.coll" } })", parent.toBSON()); @@ -688,7 +698,9 @@ TEST(QueryShapeIDL, ShapifyIDLStruct) { "fieldpathList": [ "HASH<field>.HASH<path>.HASH<1>", "HASH<fieldpath2>" - ] + ], + "nss": "HASH<db.coll>", + "plainNss": "db.coll" }, "nested_no_shape": { "stringField": "value", @@ -705,7 +717,9 @@ TEST(QueryShapeIDL, ShapifyIDLStruct) { "fieldpathList": [ "field.path.1", "fieldpath2" - ] + ], + "nss": "db.coll", + "plainNss": "db.coll" } })", parent.toBSON(options)); diff --git a/src/mongo/db/query/query_shape_test.idl b/src/mongo/db/query/query_shape_test.idl index caea731e651..ac5f4c6fce3 100644 --- a/src/mongo/db/query/query_shape_test.idl +++ b/src/mongo/db/query/query_shape_test.idl @@ -47,28 +47,34 @@ structs: description: "" fields: stringField: - query_shape_literal: true + query_shape: literal type: string enumField: - query_shape_literal: false + query_shape: parameter type: ExampleEnum stringIntVariant: - query_shape_literal: true + query_shape: literal type: variant: [string, int] stringIntVariantEnum: - query_shape_literal: false + query_shape: parameter type: variant: [string, int] arrayOfInts: - query_shape_literal: true + query_shape: literal type: array<int> fieldpath: - query_shape_anonymize: true + query_shape: anonymize type: string fieldpathList: - query_shape_anonymize: true + query_shape: anonymize type: array<string> + nss: + query_shape: custom + type: namespacestring + plainNss: + query_shape: parameter + type: namespacestring ParentStruct: query_shape_component: true @@ -76,8 +82,8 @@ structs: description: "" fields: nested_shape: - query_shape_literal: true + query_shape: literal type: NestedStruct nested_no_shape: - query_shape_literal: false + query_shape: parameter type: NestedStruct diff --git a/src/mongo/db/s/document_source_analyze_shard_key_read_write_distribution.cpp b/src/mongo/db/s/document_source_analyze_shard_key_read_write_distribution.cpp index 6783b6f8833..d3d43f07ae9 100644 --- a/src/mongo/db/s/document_source_analyze_shard_key_read_write_distribution.cpp +++ b/src/mongo/db/s/document_source_analyze_shard_key_read_write_distribution.cpp @@ -314,13 +314,7 @@ DocumentSourceAnalyzeShardKeyReadWriteDistribution::createFromBson( Value DocumentSourceAnalyzeShardKeyReadWriteDistribution::serialize( SerializationOptions opts) const { - if (opts.applyHmacToIdentifiers || opts.replacementForLiteralArgs) { - // TODO: SERVER-76208 support query shapification for IDL types like KeyPattern with custom - // serializers. - MONGO_UNIMPLEMENTED_TASSERT(7484305); - } - - return Value(Document{{getSourceName(), _spec.toBSON()}}); + return Value(Document{{getSourceName(), _spec.toBSON(opts)}}); } DocumentSource::GetNextResult DocumentSourceAnalyzeShardKeyReadWriteDistribution::doGetNext() { diff --git a/src/mongo/db/s/document_source_analyze_shard_key_read_write_distribution.idl b/src/mongo/db/s/document_source_analyze_shard_key_read_write_distribution.idl index 3a41239c348..84ba8129dd4 100644 --- a/src/mongo/db/s/document_source_analyze_shard_key_read_write_distribution.idl +++ b/src/mongo/db/s/document_source_analyze_shard_key_read_write_distribution.idl @@ -40,22 +40,27 @@ structs: DocumentSourceAnalyzeShardKeyReadWriteDistributionSpec: description: Specification for a $_analyzeShardKeyReadWriteDistribution stage. strict: false + query_shape_component: true fields: key: description: The shard key to evaluate. type: KeyPattern + query_shape: custom validator: callback: validateShardKeyPattern splitPointsFilter: + query_shape: literal description: The filter to use to fetch the split point documents generated by the command running this aggregation stage. type: object_owned splitPointsAfterClusterTime: + query_shape: literal description: The afterClusterTime to use when fetching the split point documents. This must be greater or equal to the timestamp at which the insert for the last split point document occurred. type: timestamp splitPointsShardId: + query_shape: anonymize description: The id of the shard that the analyzeShardKey command is running on, and therefore contains the temporary collection storing the split points for the shard key. Only set when running on a sharded cluster. diff --git a/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp b/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp index 0df5fde9dcd..67b94aa7df6 100644 --- a/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp +++ b/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp @@ -93,16 +93,10 @@ StageConstraints DocumentSourceReshardingOwnershipMatch::constraints( } Value DocumentSourceReshardingOwnershipMatch::serialize(SerializationOptions opts) const { - if (opts.applyHmacToIdentifiers || opts.replacementForLiteralArgs) { - // TODO: SERVER-76208 support query shapification for IDL types like KeyPattern with custom - // serializers. - MONGO_UNIMPLEMENTED_TASSERT(7484302); - } - return Value{Document{{kStageName, DocumentSourceReshardingOwnershipMatchSpec( _recipientShardId, _reshardingKey.getKeyPattern()) - .toBSON()}}}; + .toBSON(opts)}}}; } DepsTracker::State DocumentSourceReshardingOwnershipMatch::getDependencies( diff --git a/src/mongo/db/timeseries/timeseries.idl b/src/mongo/db/timeseries/timeseries.idl index 687f65cd9e4..0a582c2994e 100644 --- a/src/mongo/db/timeseries/timeseries.idl +++ b/src/mongo/db/timeseries/timeseries.idl @@ -115,7 +115,7 @@ structs: datetime type (0x9)" type: string stability: stable - query_shape_anonymize: true + query_shape: anonymize metaField: description: "The name of the top-level field describing the series. This field is used to group related data and may be of any BSON type. This may not @@ -123,13 +123,13 @@ structs: type: string optional: true stability: stable - query_shape_anonymize: true + query_shape: anonymize granularity: description: "Describes the expected interval between subsequent measurements" type: BucketGranularity optional: true stability: stable - query_shape_literal: false + query_shape: parameter bucketRoundingSeconds: description: "Used to determine the minimum time boundary when opening a new bucket by rounding the first timestamp down to the next multiple of this @@ -138,14 +138,14 @@ structs: optional: true validator: { gte: 1, lte: 31536000 } stability: stable - query_shape_literal: true + query_shape: literal bucketMaxSpanSeconds: description: "The maximum range of time values for a bucket, in seconds" type: safeInt optional: true validator: { gte: 1, lte: 31536000 } stability: stable - query_shape_literal: true + query_shape: literal CollModTimeseries: description: "A type representing the adjustable options on timeseries collections" diff --git a/src/mongo/s/SConscript b/src/mongo/s/SConscript index 240410508e5..324602ed191 100644 --- a/src/mongo/s/SConscript +++ b/src/mongo/s/SConscript @@ -288,6 +288,7 @@ env.Library( '$BUILD_DIR/mongo/db/commands/set_user_write_block_mode_idl', '$BUILD_DIR/mongo/db/common', '$BUILD_DIR/mongo/db/index_commands_idl', + '$BUILD_DIR/mongo/db/serialization_options', '$BUILD_DIR/mongo/rpc/message', '$BUILD_DIR/mongo/util/caching', 'analyze_shard_key_common', diff --git a/src/mongo/s/query/async_results_merger_params.idl b/src/mongo/s/query/async_results_merger_params.idl index 42074b7b62e..2145840972f 100644 --- a/src/mongo/s/query/async_results_merger_params.idl +++ b/src/mongo/s/query/async_results_merger_params.idl @@ -51,15 +51,19 @@ types: structs: RemoteCursor: description: A description of a cursor opened on a remote server. + query_shape_component: true fields: shardId: type: string description: The shardId of the shard on which the cursor resides. + query_shape: anonymize hostAndPort: type: HostAndPort description: The exact host (within the shard) on which the cursor resides. + query_shape: anonymize cursorResponse: type: CursorResponse + query_shape: literal description: The response after establishing a cursor on the remote shard, including the first batch. @@ -67,35 +71,46 @@ structs: description: The parameters needed to establish an AsyncResultsMerger. chained_structs: OperationSessionInfoFromClient : OperationSessionInfo + query_shape_component: true fields: sort: type: object description: The sort requested on the merging operation. Empty if there is no sort. optional: true + query_shape: literal compareWholeSortKey: type: bool default: false + query_shape: literal description: >- When 'compareWholeSortKey' is true, $sortKey is a scalar value, rather than an object. We extract the sort key {$sortKey: <value>}. The sort key pattern is verified to be {$sortKey: 1}. - remotes: array<RemoteCursor> + remotes: + type: array<RemoteCursor> + query_shape: literal tailableMode: type: TailableMode optional: true description: If set, the tailability mode of this cursor. + query_shape: parameter batchSize: type: safeInt64 optional: true description: The batch size for this cursor. - nss: namespacestring + query_shape: literal + nss: + type: namespacestring + query_shape: custom allowPartialResults: type: bool default: false description: If set, error responses are ignored. + query_shape: parameter recordRemoteOpWaitTime: type: bool default: false + query_shape: parameter description: >- This parameter is not used anymore but should stay for a while for backward compatibility. diff --git a/src/mongo/s/query/document_source_merge_cursors.cpp b/src/mongo/s/query/document_source_merge_cursors.cpp index 1d49bcdf916..c707596fdc7 100644 --- a/src/mongo/s/query/document_source_merge_cursors.cpp +++ b/src/mongo/s/query/document_source_merge_cursors.cpp @@ -117,13 +117,7 @@ DocumentSource::GetNextResult DocumentSourceMergeCursors::doGetNext() { Value DocumentSourceMergeCursors::serialize(SerializationOptions opts) const { invariant(!_blockingResultsMerger); invariant(_armParams); - if (opts.applyHmacToIdentifiers || opts.replacementForLiteralArgs) { - // TODO: SERVER-76208 support query shapification for IDL types like namespacestring with - // custom serializers. - MONGO_UNIMPLEMENTED_TASSERT(7484301); - } - - return Value(Document{{kStageName, _armParams->toBSON()}}); + return Value(Document{{kStageName, _armParams->toBSON(opts)}}); } boost::intrusive_ptr<DocumentSource> DocumentSourceMergeCursors::createFromBson( diff --git a/src/mongo/s/resharding/common_types.idl b/src/mongo/s/resharding/common_types.idl index b28385881d2..c9c8dd7dad3 100644 --- a/src/mongo/s/resharding/common_types.idl +++ b/src/mongo/s/resharding/common_types.idl @@ -271,10 +271,13 @@ structs: description: "A struct representing the information needed for a resharding pipeline to determine which documents belong to a particular shard." strict: true + query_shape_component: true fields: recipientShardId: type: shard_id description: "The id of the recipient shard." + query_shape: anonymize reshardingKey: type: KeyPattern description: "The index specification document to use as the new shard key." + query_shape: custom diff --git a/src/mongo/util/namespace_string_util.cpp b/src/mongo/util/namespace_string_util.cpp index 5ec39d33f29..c3e0184aa89 100644 --- a/src/mongo/util/namespace_string_util.cpp +++ b/src/mongo/util/namespace_string_util.cpp @@ -37,9 +37,10 @@ namespace mongo { std::string NamespaceStringUtil::serialize(const NamespaceString& ns, - const SerializationContext& context) { + const SerializationContext& context, + const SerializationOptions& options) { if (!gMultitenancySupport) - return ns.toString(); + return options.serializeIdentifier(ns.toString()); // TODO SERVER-74284: uncomment to redirect command-sepcific serialization requests // if (context.getSource() == SerializationContext::Source::Command && @@ -47,7 +48,7 @@ std::string NamespaceStringUtil::serialize(const NamespaceString& ns, // return serializeForCommands(ns, context); // if we're not serializing a Command Reply, use the default serializing rules - return serializeForStorage(ns, context); + return options.serializeIdentifier(serializeForStorage(ns, context)); } std::string NamespaceStringUtil::serializeForStorage(const NamespaceString& ns, diff --git a/src/mongo/util/namespace_string_util.h b/src/mongo/util/namespace_string_util.h index 9d9ce77817a..acdd805c56b 100644 --- a/src/mongo/util/namespace_string_util.h +++ b/src/mongo/util/namespace_string_util.h @@ -30,6 +30,7 @@ #pragma once #include "mongo/db/namespace_string.h" +#include "mongo/db/query/serialization_options.h" #include "mongo/db/tenant_id.h" #include "mongo/util/serialization_context.h" @@ -57,7 +58,8 @@ public: * eg. serialize(NamespaceString(boost::none, "foo.bar")) -> "foo.bar" */ static std::string serialize(const NamespaceString& ns, - const SerializationContext& context = SerializationContext()); + const SerializationContext& context = SerializationContext(), + const SerializationOptions& = {}); // TODO SERVER-74284: Privatize the worker functions static std::string serializeForStorage( |