diff options
author | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2017-03-29 11:32:58 -0400 |
---|---|---|
committer | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2017-03-29 11:34:59 -0400 |
commit | 97f86c66421ca3e16fbc260e833fd400d83b71c1 (patch) | |
tree | 09b5d5a8f01411e2efe6f5a62dee1c4cb286e4a3 /buildscripts/idl/idl/parser.py | |
parent | c3f504f2a32dabd609e3c9d0abb1f6860b9fb569 (diff) | |
download | mongo-97f86c66421ca3e16fbc260e833fd400d83b71c1.tar.gz |
SERVER-28305 IDL Parser
Diffstat (limited to 'buildscripts/idl/idl/parser.py')
-rw-r--r-- | buildscripts/idl/idl/parser.py | 309 |
1 files changed, 309 insertions, 0 deletions
diff --git a/buildscripts/idl/idl/parser.py b/buildscripts/idl/idl/parser.py new file mode 100644 index 00000000000..c945142ce7f --- /dev/null +++ b/buildscripts/idl/idl/parser.py @@ -0,0 +1,309 @@ +# Copyright (C) 2017 MongoDB Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License, version 3, +# as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +""" +IDL Parser. + +Converts a YAML document to an idl.syntax tree. +Only validates the document is syntatically correct, not semantically. +""" +from __future__ import absolute_import, print_function, unicode_literals + +# from typing import Any, Callable, Dict, List, Set, Union +from yaml import nodes +import yaml + +from . import errors +from . import syntax + + +class _RuleDesc(object): + """ + Describe a simple parser rule for the generic YAML node parser. + + node_type is either (scalar, scalar_bool, scalar_or_sequence, or mapping) + - scalar_bool - means a scalar node which is a valid bool, populates a bool + - scalar_or_sequence - means a scalar or sequence node, populates a list + mapping_parser_func is only called when parsing a mapping yaml node + """ + + # TODO: after porting to Python 3, use an enum + REQUIRED = 1 + OPTIONAL = 2 + + def __init__(self, node_type, required=OPTIONAL, mapping_parser_func=None): + # type: (unicode, int, Callable[[errors.ParserContext,yaml.nodes.MappingNode], Any]) -> None + """Construct a parser rule description.""" + assert required == _RuleDesc.REQUIRED or required == _RuleDesc.OPTIONAL + + self.node_type = node_type # type: unicode + self.required = required # type: int + self.mapping_parser_func = mapping_parser_func # type: Callable[[errors.ParserContext,yaml.nodes.MappingNode], Any] + + +def _generic_parser( + ctxt, # type: errors.ParserContext + node, # type: Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode] + syntax_node_name, # type: unicode + syntax_node, # type: Any + mapping_rules # type: Dict[unicode, _RuleDesc] +): + # pylint: disable=too-many-branches + field_name_set = set() # type: Set[str] + + for node_pair in node.value: + first_node = node_pair[0] + second_node = node_pair[1] + + first_name = first_node.value + + if first_name in field_name_set: + ctxt.add_duplicate_error(first_node, first_name) + continue + + if first_name in mapping_rules: + rule_desc = mapping_rules[first_name] + + if rule_desc.node_type == "scalar": + if ctxt.is_scalar_node(second_node, first_name): + syntax_node.__dict__[first_name] = second_node.value + elif rule_desc.node_type == "bool_scalar": + if ctxt.is_scalar_bool_node(second_node, first_name): + syntax_node.__dict__[first_name] = ctxt.get_bool(second_node) + elif rule_desc.node_type == "scalar_or_sequence": + if ctxt.is_sequence_or_scalar_node(second_node, first_name): + syntax_node.__dict__[first_name] = ctxt.get_list(second_node) + elif rule_desc.node_type == "mapping": + if ctxt.is_mapping_node(second_node, first_name): + syntax_node.__dict__[first_name] = rule_desc.mapping_parser_func(ctxt, + second_node) + else: + raise errors.IDLError("Unknown node_type '%s' for parser rule" % + (rule_desc.node_type)) + else: + ctxt.add_unknown_node_error(first_node, syntax_node_name) + + field_name_set.add(first_name) + + # Check for any missing required fields + for name, rule_desc in mapping_rules.items(): + if not rule_desc.required == _RuleDesc.REQUIRED: + continue + + # A bool is never "None" like other types, it simply defaults to "false". + # It means "if bool is None" will always return false and there is no support for required + # 'bool' at this time. + if not rule_desc.node_type == 'bool_scalar': + if syntax_node.__dict__[name] is None: + ctxt.add_missing_required_field_error(node, syntax_node_name, name) + else: + raise errors.IDLError("Unknown node_type '%s' for parser required rule" % + (rule_desc.node_type)) + + +def _parse_global(ctxt, spec, node): + # type: (errors.ParserContext, syntax.IDLSpec, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None + """Parse a global section in the IDL file.""" + if not ctxt.is_mapping_node(node, "global"): + return + + idlglobal = syntax.Global(ctxt.file_name, node.start_mark.line, node.start_mark.column) + + _generic_parser(ctxt, node, "global", idlglobal, { + "cpp_namespace": _RuleDesc("scalar"), + "cpp_includes": _RuleDesc("scalar_or_sequence"), + }) + + if spec.globals: + ctxt.add_duplicate_error(node, "global") + return + + spec.globals = idlglobal + + +def _parse_type(ctxt, spec, name, node): + # type: (errors.ParserContext, syntax.IDLSpec, unicode, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None + """Parse a type section in the IDL file.""" + if not ctxt.is_mapping_node(node, "type"): + return + + idltype = syntax.Type(ctxt.file_name, node.start_mark.line, node.start_mark.column) + idltype.name = name + + _generic_parser(ctxt, node, "type", idltype, { + "description": _RuleDesc('scalar', _RuleDesc.REQUIRED), + "cpp_type": _RuleDesc('scalar', _RuleDesc.REQUIRED), + "bson_serialization_type": _RuleDesc('scalar_or_sequence', _RuleDesc.REQUIRED), + "bindata_subtype": _RuleDesc('scalar'), + "serializer": _RuleDesc('scalar'), + "deserializer": _RuleDesc('scalar'), + "default": _RuleDesc('scalar'), + }) + + spec.symbols.add_type(ctxt, idltype) + + +def _parse_types(ctxt, spec, node): + # type: (errors.ParserContext, syntax.IDLSpec, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None + """Parse a types section in the IDL file.""" + if not ctxt.is_mapping_node(node, "types"): + return + + for node_pair in node.value: + first_node = node_pair[0] + second_node = node_pair[1] + + first_name = first_node.value + + _parse_type(ctxt, spec, first_name, second_node) + + +def _parse_field(ctxt, name, node): + # type: (errors.ParserContext, str, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> syntax.Field + """Parse a field in a struct/command in the IDL file.""" + field = syntax.Field(ctxt.file_name, node.start_mark.line, node.start_mark.column) + field.name = name + + _generic_parser(ctxt, node, "field", field, { + "description": _RuleDesc('scalar'), + "type": _RuleDesc('scalar', _RuleDesc.REQUIRED), + "ignore": _RuleDesc("bool_scalar"), + "optional": _RuleDesc("bool_scalar"), + "default": _RuleDesc('scalar'), + }) + + return field + + +def _parse_fields(ctxt, node): + # type: (errors.ParserContext, yaml.nodes.MappingNode) -> List[syntax.Field] + """Parse a fields section in a struct in the IDL file.""" + + fields = [] + + field_name_set = set() # type: Set[str] + + for node_pair in node.value: + first_node = node_pair[0] + second_node = node_pair[1] + + first_name = first_node.value + + if first_name in field_name_set: + ctxt.add_duplicate_error(first_node, first_name) + continue + + # Simple Type + if second_node.id == "scalar": + field = syntax.Field(ctxt.file_name, node.start_mark.line, node.start_mark.column) + field.name = first_name + field.type = second_node.value + fields.append(field) + else: + field = _parse_field(ctxt, first_name, second_node) + fields.append(field) + + field_name_set.add(first_name) + + return fields + + +def _parse_struct(ctxt, spec, name, node): + # type: (errors.ParserContext, syntax.IDLSpec, unicode, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None + """Parse a struct section in the IDL file.""" + if not ctxt.is_mapping_node(node, "struct"): + return + + struct = syntax.Struct(ctxt.file_name, node.start_mark.line, node.start_mark.column) + struct.name = name + + _generic_parser(ctxt, node, "struct", struct, { + "description": _RuleDesc('scalar', _RuleDesc.REQUIRED), + "fields": _RuleDesc('mapping', mapping_parser_func=_parse_fields), + "strict": _RuleDesc("bool_scalar"), + }) + + if struct.fields is None: + ctxt.add_empty_struct_error(node, struct.name) + + spec.symbols.add_struct(ctxt, struct) + + +def _parse_structs(ctxt, spec, node): + # type: (errors.ParserContext, syntax.IDLSpec, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None + """Parse a structs section in the IDL file.""" + if not ctxt.is_mapping_node(node, "structs"): + return + + for node_pair in node.value: + first_node = node_pair[0] + second_node = node_pair[1] + + first_name = first_node.value + + _parse_struct(ctxt, spec, first_name, second_node) + + +def parse(stream, error_file_name="unknown"): + # type: (Any, unicode) -> syntax.IDLParsedSpec + """ + Parse a YAML document into an idl.syntax tree. + + stream: is a io.Stream. + error_file_name: just a file name for error messages to use. + """ + + # This will raise an exception if the YAML parse fails + root_node = yaml.compose(stream) + + ctxt = errors.ParserContext(error_file_name, errors.ParserErrorCollection()) + + spec = syntax.IDLSpec() + + # If the document is empty, we are done + if not root_node: + return syntax.IDLParsedSpec(spec, None) + + if not root_node.id == "mapping": + raise errors.IDLError( + "Expected a YAML mapping node as root node of IDL document, got '%s' instead" % + root_node.id) + + field_name_set = set() # type: Set[str] + + for node_pair in root_node.value: + first_node = node_pair[0] + second_node = node_pair[1] + + first_name = first_node.value + + if first_name in field_name_set: + ctxt.add_duplicate_error(first_node, first_name) + continue + + if first_name == "global": + _parse_global(ctxt, spec, second_node) + elif first_name == "types": + _parse_types(ctxt, spec, second_node) + elif first_name == "structs": + _parse_structs(ctxt, spec, second_node) + else: + ctxt.add_unknown_root_node_error(first_node) + + field_name_set.add(first_name) + + if ctxt.errors.has_errors(): + return syntax.IDLParsedSpec(None, ctxt.errors) + else: + return syntax.IDLParsedSpec(spec, None) |