summaryrefslogtreecommitdiff
path: root/buildscripts/idl/idl/parser.py
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2017-03-29 11:32:58 -0400
committerMark Benvenuto <mark.benvenuto@mongodb.com>2017-03-29 11:34:59 -0400
commit97f86c66421ca3e16fbc260e833fd400d83b71c1 (patch)
tree09b5d5a8f01411e2efe6f5a62dee1c4cb286e4a3 /buildscripts/idl/idl/parser.py
parentc3f504f2a32dabd609e3c9d0abb1f6860b9fb569 (diff)
downloadmongo-97f86c66421ca3e16fbc260e833fd400d83b71c1.tar.gz
SERVER-28305 IDL Parser
Diffstat (limited to 'buildscripts/idl/idl/parser.py')
-rw-r--r--buildscripts/idl/idl/parser.py309
1 files changed, 309 insertions, 0 deletions
diff --git a/buildscripts/idl/idl/parser.py b/buildscripts/idl/idl/parser.py
new file mode 100644
index 00000000000..c945142ce7f
--- /dev/null
+++ b/buildscripts/idl/idl/parser.py
@@ -0,0 +1,309 @@
+# Copyright (C) 2017 MongoDB Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License, version 3,
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+"""
+IDL Parser.
+
+Converts a YAML document to an idl.syntax tree.
+Only validates the document is syntatically correct, not semantically.
+"""
+from __future__ import absolute_import, print_function, unicode_literals
+
+# from typing import Any, Callable, Dict, List, Set, Union
+from yaml import nodes
+import yaml
+
+from . import errors
+from . import syntax
+
+
+class _RuleDesc(object):
+ """
+ Describe a simple parser rule for the generic YAML node parser.
+
+ node_type is either (scalar, scalar_bool, scalar_or_sequence, or mapping)
+ - scalar_bool - means a scalar node which is a valid bool, populates a bool
+ - scalar_or_sequence - means a scalar or sequence node, populates a list
+ mapping_parser_func is only called when parsing a mapping yaml node
+ """
+
+ # TODO: after porting to Python 3, use an enum
+ REQUIRED = 1
+ OPTIONAL = 2
+
+ def __init__(self, node_type, required=OPTIONAL, mapping_parser_func=None):
+ # type: (unicode, int, Callable[[errors.ParserContext,yaml.nodes.MappingNode], Any]) -> None
+ """Construct a parser rule description."""
+ assert required == _RuleDesc.REQUIRED or required == _RuleDesc.OPTIONAL
+
+ self.node_type = node_type # type: unicode
+ self.required = required # type: int
+ self.mapping_parser_func = mapping_parser_func # type: Callable[[errors.ParserContext,yaml.nodes.MappingNode], Any]
+
+
+def _generic_parser(
+ ctxt, # type: errors.ParserContext
+ node, # type: Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]
+ syntax_node_name, # type: unicode
+ syntax_node, # type: Any
+ mapping_rules # type: Dict[unicode, _RuleDesc]
+):
+ # pylint: disable=too-many-branches
+ field_name_set = set() # type: Set[str]
+
+ for node_pair in node.value:
+ first_node = node_pair[0]
+ second_node = node_pair[1]
+
+ first_name = first_node.value
+
+ if first_name in field_name_set:
+ ctxt.add_duplicate_error(first_node, first_name)
+ continue
+
+ if first_name in mapping_rules:
+ rule_desc = mapping_rules[first_name]
+
+ if rule_desc.node_type == "scalar":
+ if ctxt.is_scalar_node(second_node, first_name):
+ syntax_node.__dict__[first_name] = second_node.value
+ elif rule_desc.node_type == "bool_scalar":
+ if ctxt.is_scalar_bool_node(second_node, first_name):
+ syntax_node.__dict__[first_name] = ctxt.get_bool(second_node)
+ elif rule_desc.node_type == "scalar_or_sequence":
+ if ctxt.is_sequence_or_scalar_node(second_node, first_name):
+ syntax_node.__dict__[first_name] = ctxt.get_list(second_node)
+ elif rule_desc.node_type == "mapping":
+ if ctxt.is_mapping_node(second_node, first_name):
+ syntax_node.__dict__[first_name] = rule_desc.mapping_parser_func(ctxt,
+ second_node)
+ else:
+ raise errors.IDLError("Unknown node_type '%s' for parser rule" %
+ (rule_desc.node_type))
+ else:
+ ctxt.add_unknown_node_error(first_node, syntax_node_name)
+
+ field_name_set.add(first_name)
+
+ # Check for any missing required fields
+ for name, rule_desc in mapping_rules.items():
+ if not rule_desc.required == _RuleDesc.REQUIRED:
+ continue
+
+ # A bool is never "None" like other types, it simply defaults to "false".
+ # It means "if bool is None" will always return false and there is no support for required
+ # 'bool' at this time.
+ if not rule_desc.node_type == 'bool_scalar':
+ if syntax_node.__dict__[name] is None:
+ ctxt.add_missing_required_field_error(node, syntax_node_name, name)
+ else:
+ raise errors.IDLError("Unknown node_type '%s' for parser required rule" %
+ (rule_desc.node_type))
+
+
+def _parse_global(ctxt, spec, node):
+ # type: (errors.ParserContext, syntax.IDLSpec, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None
+ """Parse a global section in the IDL file."""
+ if not ctxt.is_mapping_node(node, "global"):
+ return
+
+ idlglobal = syntax.Global(ctxt.file_name, node.start_mark.line, node.start_mark.column)
+
+ _generic_parser(ctxt, node, "global", idlglobal, {
+ "cpp_namespace": _RuleDesc("scalar"),
+ "cpp_includes": _RuleDesc("scalar_or_sequence"),
+ })
+
+ if spec.globals:
+ ctxt.add_duplicate_error(node, "global")
+ return
+
+ spec.globals = idlglobal
+
+
+def _parse_type(ctxt, spec, name, node):
+ # type: (errors.ParserContext, syntax.IDLSpec, unicode, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None
+ """Parse a type section in the IDL file."""
+ if not ctxt.is_mapping_node(node, "type"):
+ return
+
+ idltype = syntax.Type(ctxt.file_name, node.start_mark.line, node.start_mark.column)
+ idltype.name = name
+
+ _generic_parser(ctxt, node, "type", idltype, {
+ "description": _RuleDesc('scalar', _RuleDesc.REQUIRED),
+ "cpp_type": _RuleDesc('scalar', _RuleDesc.REQUIRED),
+ "bson_serialization_type": _RuleDesc('scalar_or_sequence', _RuleDesc.REQUIRED),
+ "bindata_subtype": _RuleDesc('scalar'),
+ "serializer": _RuleDesc('scalar'),
+ "deserializer": _RuleDesc('scalar'),
+ "default": _RuleDesc('scalar'),
+ })
+
+ spec.symbols.add_type(ctxt, idltype)
+
+
+def _parse_types(ctxt, spec, node):
+ # type: (errors.ParserContext, syntax.IDLSpec, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None
+ """Parse a types section in the IDL file."""
+ if not ctxt.is_mapping_node(node, "types"):
+ return
+
+ for node_pair in node.value:
+ first_node = node_pair[0]
+ second_node = node_pair[1]
+
+ first_name = first_node.value
+
+ _parse_type(ctxt, spec, first_name, second_node)
+
+
+def _parse_field(ctxt, name, node):
+ # type: (errors.ParserContext, str, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> syntax.Field
+ """Parse a field in a struct/command in the IDL file."""
+ field = syntax.Field(ctxt.file_name, node.start_mark.line, node.start_mark.column)
+ field.name = name
+
+ _generic_parser(ctxt, node, "field", field, {
+ "description": _RuleDesc('scalar'),
+ "type": _RuleDesc('scalar', _RuleDesc.REQUIRED),
+ "ignore": _RuleDesc("bool_scalar"),
+ "optional": _RuleDesc("bool_scalar"),
+ "default": _RuleDesc('scalar'),
+ })
+
+ return field
+
+
+def _parse_fields(ctxt, node):
+ # type: (errors.ParserContext, yaml.nodes.MappingNode) -> List[syntax.Field]
+ """Parse a fields section in a struct in the IDL file."""
+
+ fields = []
+
+ field_name_set = set() # type: Set[str]
+
+ for node_pair in node.value:
+ first_node = node_pair[0]
+ second_node = node_pair[1]
+
+ first_name = first_node.value
+
+ if first_name in field_name_set:
+ ctxt.add_duplicate_error(first_node, first_name)
+ continue
+
+ # Simple Type
+ if second_node.id == "scalar":
+ field = syntax.Field(ctxt.file_name, node.start_mark.line, node.start_mark.column)
+ field.name = first_name
+ field.type = second_node.value
+ fields.append(field)
+ else:
+ field = _parse_field(ctxt, first_name, second_node)
+ fields.append(field)
+
+ field_name_set.add(first_name)
+
+ return fields
+
+
+def _parse_struct(ctxt, spec, name, node):
+ # type: (errors.ParserContext, syntax.IDLSpec, unicode, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None
+ """Parse a struct section in the IDL file."""
+ if not ctxt.is_mapping_node(node, "struct"):
+ return
+
+ struct = syntax.Struct(ctxt.file_name, node.start_mark.line, node.start_mark.column)
+ struct.name = name
+
+ _generic_parser(ctxt, node, "struct", struct, {
+ "description": _RuleDesc('scalar', _RuleDesc.REQUIRED),
+ "fields": _RuleDesc('mapping', mapping_parser_func=_parse_fields),
+ "strict": _RuleDesc("bool_scalar"),
+ })
+
+ if struct.fields is None:
+ ctxt.add_empty_struct_error(node, struct.name)
+
+ spec.symbols.add_struct(ctxt, struct)
+
+
+def _parse_structs(ctxt, spec, node):
+ # type: (errors.ParserContext, syntax.IDLSpec, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None
+ """Parse a structs section in the IDL file."""
+ if not ctxt.is_mapping_node(node, "structs"):
+ return
+
+ for node_pair in node.value:
+ first_node = node_pair[0]
+ second_node = node_pair[1]
+
+ first_name = first_node.value
+
+ _parse_struct(ctxt, spec, first_name, second_node)
+
+
+def parse(stream, error_file_name="unknown"):
+ # type: (Any, unicode) -> syntax.IDLParsedSpec
+ """
+ Parse a YAML document into an idl.syntax tree.
+
+ stream: is a io.Stream.
+ error_file_name: just a file name for error messages to use.
+ """
+
+ # This will raise an exception if the YAML parse fails
+ root_node = yaml.compose(stream)
+
+ ctxt = errors.ParserContext(error_file_name, errors.ParserErrorCollection())
+
+ spec = syntax.IDLSpec()
+
+ # If the document is empty, we are done
+ if not root_node:
+ return syntax.IDLParsedSpec(spec, None)
+
+ if not root_node.id == "mapping":
+ raise errors.IDLError(
+ "Expected a YAML mapping node as root node of IDL document, got '%s' instead" %
+ root_node.id)
+
+ field_name_set = set() # type: Set[str]
+
+ for node_pair in root_node.value:
+ first_node = node_pair[0]
+ second_node = node_pair[1]
+
+ first_name = first_node.value
+
+ if first_name in field_name_set:
+ ctxt.add_duplicate_error(first_node, first_name)
+ continue
+
+ if first_name == "global":
+ _parse_global(ctxt, spec, second_node)
+ elif first_name == "types":
+ _parse_types(ctxt, spec, second_node)
+ elif first_name == "structs":
+ _parse_structs(ctxt, spec, second_node)
+ else:
+ ctxt.add_unknown_root_node_error(first_node)
+
+ field_name_set.add(first_name)
+
+ if ctxt.errors.has_errors():
+ return syntax.IDLParsedSpec(None, ctxt.errors)
+ else:
+ return syntax.IDLParsedSpec(spec, None)