diff options
author | Dieter Verfaillie <dieterv@optionexplicit.be> | 2013-08-12 16:54:11 +0200 |
---|---|---|
committer | Dieter Verfaillie <dieterv@optionexplicit.be> | 2013-10-08 20:55:46 +0200 |
commit | a2b22ce75937d2d996ef90e0ab683d36031365d0 (patch) | |
tree | 42a1e8e7ba992a89efd98fa41b2d5e013f7165b2 /giscanner/annotationparser.py | |
parent | 839e4f10a6b291a261c200484ff05ec44a31d93e (diff) | |
download | gobject-introspection-a2b22ce75937d2d996ef90e0ab683d36031365d0.tar.gz |
giscanner: flesh out annotation parsing and storage
- remove annotations regex, restore proper parens parsing
- drop weird DocOption() storage class and use lists/dicts
as appropriate
- make GtkDocAnnotations a simple OrderedDict subclass instead
of a weird hybrid dict/list storage class
- Deprecate Attribute: tag, replace with (attributes) annotation
on the identifier
Diffstat (limited to 'giscanner/annotationparser.py')
-rw-r--r-- | giscanner/annotationparser.py | 978 |
1 files changed, 611 insertions, 367 deletions
diff --git a/giscanner/annotationparser.py b/giscanner/annotationparser.py index 26e001e1..25445b49 100644 --- a/giscanner/annotationparser.py +++ b/giscanner/annotationparser.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # -*- Mode: Python -*- + # GObject-Introspection - a framework for introspecting GObject libraries # Copyright (C) 2008-2010 Johan Dahlin # Copyright (C) 2012-2013 Dieter Verfaillie <dieterv@optionexplicit.be> @@ -109,7 +110,7 @@ import os import re from .collections import OrderedDict -from .message import Position, warn +from .message import Position, warn, error # GTK-Doc comment block parts @@ -148,7 +149,6 @@ DEPRECATED_GI_TAGS = [TAG_RETURN, TAG_RETURNS_VALUE] # 4) GObject-Introspection annotation tags. -TAG_ATTRIBUTES = 'attributes' TAG_GET_VALUE_FUNC = 'get value func' TAG_REF_FUNC = 'ref func' TAG_RENAME_TO = 'rename to' @@ -159,8 +159,7 @@ TAG_UNREF_FUNC = 'unref func' TAG_VALUE = 'value' TAG_VFUNC = 'virtual' -GI_ANN_TAGS = [TAG_ATTRIBUTES, - TAG_GET_VALUE_FUNC, +GI_ANN_TAGS = [TAG_GET_VALUE_FUNC, TAG_REF_FUNC, TAG_RENAME_TO, TAG_SET_VALUE_FUNC, @@ -170,12 +169,28 @@ GI_ANN_TAGS = [TAG_ATTRIBUTES, TAG_VALUE, TAG_VFUNC] -ALL_TAGS = GTKDOC_TAGS + DEPRECATED_GTKDOC_TAGS + DEPRECATED_GI_TAGS + GI_ANN_TAGS +# 5) Deprecated GObject-Introspection annotation tags. +# Accepted by old versions of this module while they should have been +# annotations on the identifier part instead. +# Note: This list can not be extended ever again. The GObject-Introspection project is not +# allowed to invent GTK-Doc tags. Please create new annotations instead. +TAG_ATTRIBUTES = 'attributes' + +DEPRECATED_GI_ANN_TAGS = [TAG_ATTRIBUTES] + +ALL_TAGS = (GTKDOC_TAGS + DEPRECATED_GTKDOC_TAGS + DEPRECATED_GI_TAGS + GI_ANN_TAGS + + DEPRECATED_GI_ANN_TAGS) + +# GObject-Introspection annotation start/end tokens +ANN_LPAR = '(' +ANN_RPAR = ')' # GObject-Introspection annotations +# 1) Supported annotations +# Note: when adding new annotations, GTK-Doc project's gtkdoc-mkdb needs to be modified too! ANN_ALLOW_NONE = 'allow-none' ANN_ARRAY = 'array' -ANN_ATTRIBUTE = 'attribute' +ANN_ATTRIBUTES = 'attributes' ANN_CLOSURE = 'closure' ANN_CONSTRUCTOR = 'constructor' ANN_DESTROY = 'destroy' @@ -183,7 +198,6 @@ ANN_ELEMENT_TYPE = 'element-type' ANN_FOREIGN = 'foreign' ANN_IN = 'in' ANN_INOUT = 'inout' -ANN_INOUT_ALT = 'in-out' ANN_METHOD = 'method' ANN_OUT = 'out' ANN_SCOPE = 'scope' @@ -191,24 +205,33 @@ ANN_SKIP = 'skip' ANN_TRANSFER = 'transfer' ANN_TYPE = 'type' -ALL_ANNOTATIONS = [ - ANN_ALLOW_NONE, - ANN_ARRAY, - ANN_ATTRIBUTE, - ANN_CLOSURE, - ANN_CONSTRUCTOR, - ANN_DESTROY, - ANN_ELEMENT_TYPE, - ANN_FOREIGN, - ANN_IN, - ANN_INOUT, - ANN_INOUT_ALT, - ANN_METHOD, - ANN_OUT, - ANN_SCOPE, - ANN_SKIP, - ANN_TRANSFER, - ANN_TYPE] +GI_ANNS = [ANN_ALLOW_NONE, + ANN_ARRAY, + ANN_ATTRIBUTES, + ANN_CLOSURE, + ANN_CONSTRUCTOR, + ANN_DESTROY, + ANN_ELEMENT_TYPE, + ANN_FOREIGN, + ANN_IN, + ANN_INOUT, + ANN_METHOD, + ANN_OUT, + ANN_SCOPE, + ANN_SKIP, + ANN_TRANSFER, + ANN_TYPE] + +# 2) Deprecated GObject-Introspection annotations +ANN_ATTRIBUTE = 'attribute' +ANN_INOUT_ALT = 'in-out' + +DEPRECATED_GI_ANNS = [ANN_ATTRIBUTE, + ANN_INOUT_ALT] + +ALL_ANNOTATIONS = GI_ANNS + DEPRECATED_GI_ANNS +DICT_ANNOTATIONS = [ANN_ARRAY, ANN_ATTRIBUTES] +LIST_ANNOTATIONS = [ann for ann in ALL_ANNOTATIONS if ann not in DICT_ANNOTATIONS] # (array) annotation options OPT_ARRAY_FIXED_SIZE = 'fixed-size' @@ -318,12 +341,7 @@ SECTION_RE = re.compile( ''', re.UNICODE | re.VERBOSE) -# Program matching symbol (function, constant, struct and enum) identifiers. -# -# Results in 3 symbolic groups: -# - group 1 = symbol_name -# - group 2 = delimiter -# - group 3 = annotations +# Pattern matching symbol (function, constant, struct and enum) identifiers. SYMBOL_RE = re.compile( r''' ^ # start @@ -332,44 +350,36 @@ SYMBOL_RE = re.compile( \s* # 0 or more whitespace characters (?P<delimiter>:?) # delimiter \s* # 0 or more whitespace characters - (?P<annotations>(?:\(.*?\)\s*)*) # annotations + (?P<fields>.*?) # annotations + description + \s* # 0 or more whitespace characters + :? # invalid delimiter \s* # 0 or more whitespace characters $ # end ''', re.UNICODE | re.VERBOSE) -# Program matching property identifiers. -# -# Results in 4 symbolic groups: -# - group 1 = class_name -# - group 2 = property_name -# - group 3 = delimiter -# - group 4 = annotations +# Pattern matching property identifiers. PROPERTY_RE = re.compile( r''' ^ # start \s* # 0 or more whitespace characters (?P<class_name>[\w]+) # class name \s* # 0 or more whitespace characters - :{1} # required colon + :{1} # 1 required colon \s* # 0 or more whitespace characters (?P<property_name>[\w-]*\w) # property name \s* # 0 or more whitespace characters (?P<delimiter>:?) # delimiter \s* # 0 or more whitespace characters - (?P<annotations>(?:\(.*?\)\s*)*) # annotations + (?P<fields>.*?) # annotations + description + \s* # 0 or more whitespace characters + :? # invalid delimiter \s* # 0 or more whitespace characters $ # end ''', re.UNICODE | re.VERBOSE) -# Program matching signal identifiers. -# -# Results in 4 symbolic groups: -# - group 1 = class_name -# - group 2 = signal_name -# - group 3 = delimiter -# - group 4 = annotations +# Pattern matching signal identifiers. SIGNAL_RE = re.compile( r''' ^ # start @@ -382,19 +392,15 @@ SIGNAL_RE = re.compile( \s* # 0 or more whitespace characters (?P<delimiter>:?) # delimiter \s* # 0 or more whitespace characters - (?P<annotations>(?:\(.*?\)\s*)*) # annotations + (?P<fields>.*?) # annotations + description + \s* # 0 or more whitespace characters + :? # invalid delimiter \s* # 0 or more whitespace characters $ # end ''', re.UNICODE | re.VERBOSE) -# Program matching parameters. -# -# Results in 4 symbolic groups: -# - group 1 = parameter_name -# - group 2 = annotations -# - group 3 = delimiter -# - group 4 = description +# Pattern matching parameters. PARAMETER_RE = re.compile( r''' ^ # start @@ -402,63 +408,30 @@ PARAMETER_RE = re.compile( @ # @ character (?P<parameter_name>[\w-]*\w|.*?\.\.\.) # parameter name \s* # 0 or more whitespace characters - :{1} # required colon + :{1} # 1 required delimiter \s* # 0 or more whitespace characters - (?P<annotations>(?:\(.*?\)\s*)*) # annotations - (?P<delimiter>:?) # delimiter - \s* # 0 or more whitespace characters - (?P<description>.*?) # description + (?P<fields>.*?) # annotations + description \s* # 0 or more whitespace characters $ # end ''', re.UNICODE | re.VERBOSE) -# Program matching tags. -# -# Results in 4 symbolic groups: -# - group 1 = tag_name -# - group 2 = annotations -# - group 3 = delimiter -# - group 4 = description -_all_tags = '|'.join(ALL_TAGS).replace(' ', '\\ ') +# Pattern matching tags. +_all_tags = '|'.join(ALL_TAGS).replace(' ', r'\s') TAG_RE = re.compile( r''' ^ # start \s* # 0 or more whitespace characters (?P<tag_name>''' + _all_tags + r''') # tag name \s* # 0 or more whitespace characters - :{1} # required colon - \s* # 0 or more whitespace characters - (?P<annotations>(?:\(.*?\)\s*)*) # annotations - (?P<delimiter>:?) # delimiter + :{1} # 1 required delimiter \s* # 0 or more whitespace characters - (?P<description>.*?) # description + (?P<fields>.*?) # annotations + value + description \s* # 0 or more whitespace characters $ # end ''', re.UNICODE | re.VERBOSE | re.IGNORECASE) -# Program matching multiline annotation continuations. -# This is used on multiline parameters and tags (but not on the first line) to -# generate warnings about invalid annotations spanning multiple lines. -# -# Results in 3 symbolic groups: -# - group 2 = annotations -# - group 3 = delimiter -# - group 4 = description -MULTILINE_ANNOTATION_CONTINUATION_RE = re.compile( - r''' - ^ # start - \s* # 0 or more whitespace characters - (?P<annotations>(?:\(.*?\)\s*)*) # annotations - (?P<delimiter>:) # delimiter - \s* # 0 or more whitespace characters - (?P<description>.*?) # description - \s* # 0 or more whitespace characters - $ # end - ''', - re.UNICODE | re.VERBOSE) - # Pattern matching value and description fields for TAG_DEPRECATED & TAG_SINCE tags. TAG_VALUE_VERSION_RE = re.compile( r''' @@ -490,82 +463,21 @@ TAG_VALUE_STABILITY_RE = re.compile( re.UNICODE | re.VERBOSE | re.IGNORECASE) -class DocOption(object): - - __slots__ = ('_array', '_dict') - - def __init__(self, option): - self._array = [] - self._dict = OrderedDict() - # (annotation option1=value1 option2=value2) etc - for p in option.split(' '): - if '=' in p: - name, value = p.split('=', 1) - else: - name = p - value = None - self._dict[name] = value - if value is None: - self._array.append(name) - else: - self._array.append((name, value)) - - def __repr__(self): - return '<DocOption %r>' % (self._array, ) - - def length(self): - return len(self._array) - - def one(self): - assert len(self._array) == 1 - return self._array[0] - - def flat(self): - return self._array - - def all(self): - return self._dict - - -class GtkDocAnnotations(object): - - __slots__ = ('values', 'position') - - def __init__(self): - self.values = [] - self.position = None - - def __repr__(self): - return '<GtkDocAnnotations %r>' % (self.values, ) - - def __getitem__(self, item): - for key, value in self.values: - if key == item: - return value - raise KeyError - - def __nonzero__(self): - return bool(self.values) - - def __iter__(self): - return (k for k, v in self.values) - - def add(self, name, value): - self.values.append((name, value)) +class GtkDocAnnotations(OrderedDict): + ''' + An ordered dictionary mapping annotation names to annotation options (if any). Annotation + options can be either a :class:`list`, a :class:`giscanner.collections.OrderedDict` + (depending on the annotation name)or :const:`None`. + ''' - def get(self, item, default=None): - for key, value in self.values: - if key == item: - return value - return default + __slots__ = ('position') - def getall(self, item): - for key, value in self.values: - if key == item: - yield value + def __init__(self, position=None): + OrderedDict.__init__(self) - def items(self): - return iter(self.values) + #: A :class:`giscanner.message.Position` instance specifying the location of the + #: annotations in the source file or :const:`None`. + self.position = position class GtkDocTag(object): @@ -584,7 +496,7 @@ class GtkDocTag(object): def _validate_annotation(self, ann_name, options, required=False, n_params=None, choices=None): - if required and options is None: + if required and len(options) == 0: warn('%s annotation needs a value' % (ann_name, ), self.position) return @@ -595,28 +507,25 @@ class GtkDocTag(object): s = 'one value' else: s = '%d values' % (n_params, ) - if ((n_params > 0 and (options is None or options.length() != n_params)) - or n_params == 0 and options is not None): - if options is None: - length = 0 - else: - length = options.length() + if ((n_params > 0 and (len(options) == 0 or len(options) != n_params)) + or n_params == 0 and len(options) != 0): + length = len(options) warn('%s annotation needs %s, not %d' % (ann_name, s, length), self.position) return if choices is not None: - option = options.one() + option = options[0] if option not in choices: warn('invalid %s annotation value: %r' % (ann_name, option, ), self.position) return def _validate_array(self, ann_name, options): - if options is None: + if len(options) == 0: return - for option, value in options.all().items(): + for option, value in options.items(): if option in [OPT_ARRAY_ZERO_TERMINATED, OPT_ARRAY_FIXED_SIZE]: try: int(value) @@ -637,29 +546,29 @@ class GtkDocTag(object): self.position) def _validate_closure(self, ann_name, options): - if options is not None and options.length() > 1: - warn('closure takes at most 1 value, %d given' % (options.length(), ), + if len(options) != 0 and len(options) > 1: + warn('closure takes at most 1 value, %d given' % (len(options), ), self.position) def _validate_element_type(self, ann_name, options): self._validate_annotation(ann_name, options, required=True) - if options is None: + if len(options) == 0: warn('element-type takes at least one value, none given', self.position) return - if options.length() > 2: - warn('element-type takes at most 2 values, %d given' % (options.length(), ), + if len(options) > 2: + warn('element-type takes at most 2 values, %d given' % (len(options), ), self.position) return def _validate_out(self, ann_name, options): - if options is None: + if len(options) == 0: return - if options.length() > 1: - warn('out annotation takes at most 1 value, %d given' % (options.length(), ), + if len(options) > 1: + warn('out annotation takes at most 1 value, %d given' % (len(options), ), self.position) return - option = options.one() + option = options[0] if option not in [OPT_OUT_CALLEE_ALLOCATES, OPT_OUT_CALLER_ALLOCATES]: warn("out annotation value is invalid: %r" % (option, ), @@ -670,8 +579,11 @@ class GtkDocTag(object): def serialize_one(option, value, fmt, fmt2): if value: if type(value) != str: - value = ' '.join((serialize_one(k, v, '%s=%s', '%s') - for k, v in value.all().items())) + if isinstance(value, list): + value = ' '.join(value) + else: + value = ' '.join((serialize_one(k, v, '%s=%s', '%s') + for k, v in value.items())) return fmt % (option, value) else: return fmt2 % (option, ) @@ -698,18 +610,14 @@ class GtkDocTag(object): self.description or '') def validate(self): - if self.name == TAG_ATTRIBUTES: - # The 'Attributes:' tag allows free form annotations so the - # validation below is most certainly going to fail. - return - for ann_name, value in self.annotations.items(): if ann_name == ANN_ALLOW_NONE: self._validate_annotation(ann_name, value, n_params=0) elif ann_name == ANN_ARRAY: self._validate_array(ann_name, value) - elif ann_name == ANN_ATTRIBUTE: - self._validate_annotation(ann_name, value, n_params=2) + elif ann_name == ANN_ATTRIBUTES: + # The 'attributes' annotation allows free form annotations. + pass elif ann_name == ANN_CLOSURE: self._validate_closure(ann_name, value) elif ann_name == ANN_DESTROY: @@ -827,73 +735,34 @@ class GtkDocCommentBlock(object): class GtkDocCommentBlockParser(object): - """ - GTK-Doc comment block parser. - + ''' Parse GTK-Doc comment blocks into a parse tree built out of :class:`GtkDocCommentBlock`, - :class:`GtkDocTag`, :class:`GtkDocAnnotations` and :class:`DocOption` objects. This - parser tries to accept malformed input whenever possible and does not emit - syntax errors. However, it does emit warnings at the slightest indication - of malformed input when possible. It is usually a good idea to heed these - warnings as malformed input is known to result in invalid GTK-Doc output. - - A GTK-Doc comment block can be constructed out of multiple parts that can - be combined to write different types of documentation. - See `GTK-Doc's documentation`_ to learn more about possible valid combinations. - Each part can be further divided into fields which are separated by `:` characters. - - Possible parts and the fields they are constructed from look like the - following (optional fields are enclosed in square brackets): - - .. code-block:: c - /** - * identifier_name [:annotations] - * @parameter_name [:annotations] [:description] - * - * comment_block_description - * tag_name [:annotations] [:description] - */ - - The order in which the different parts have to be specified is important:: - - - There has to be exactly 1 `identifier` part on the first line of the - comment block which consists of: - * an `identifier_name` field - * an optional `annotations` field - - Followed by 0 or more `parameters` parts, each consisting of: - * a `parameter_name` field - * an optional `annotations` field - * an optional `description` field - - Followed by at least 1 empty line signaling the beginning of - the `comment_block_description` part - - Followed by an optional `comment block description` part. - - Followed by 0 or more `tag` parts, each consisting of: - * a `tag_name` field - * an optional `annotations` field - * an optional `description` field - - Additionally, the following restrictions are in effect:: - - - Parts can optionally be separated by an empty line, except between - the `parameter` parts and the `comment block description` part where - an empty line is required (see above). - - Parts and fields cannot span multiple lines, except for - `parameter descriptions`, `tag descriptions` and the - `comment_block_description` fields. - - `parameter descriptions` fields can not span multiple paragraphs. - - `tag descriptions` and `comment block description` fields can - span multiple paragraphs. + :class:`GtkDocParameter`, :class:`GtkDocTag` and :class:`GtkDocAnnotations` + objects. This parser tries to accept malformed input whenever possible and does + not cause the process to exit on syntax errors. It does however emit: + + * warning messages at the slightest indication of recoverable malformed input and + * error messages for unrecoverable malformed input + + whenever possible. Recoverable, in this context, means that we can serialize the + :class:`GtkDocCommentBlock` instance using a :class:`GtkDocCommentBlockWriter` without + information being lost. It is usually a good idea to heed these warning and error messages + as malformed input can result in both: + + * invalid GTK-Doc output (HTML, pdf, ...) when the comment blocks are parsed + with GTK-Doc's gtkdoc-mkdb + * unexpected introspection behavior, for example missing parameters in the + generated .gir and .typelib files .. NOTE:: :class:`GtkDocCommentBlockParser` functionality is heavily based on gtkdoc-mkdb's `ScanSourceFile()`_ function and is currently in sync with GTK-Doc commit `47abcd5`_. - .. _GTK-Doc's documentation: - http://developer.gnome.org/gtk-doc-manual/1.18/documenting.html.en .. _ScanSourceFile(): - http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722 - .. _47abcd5: 47abcd53b8489ebceec9e394676512a181c1f1f6 - """ + http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722 + .. _47abcd5: + https://git.gnome.org/browse/gtk-doc/commit/?id=47abcd53b8489ebceec9e394676512a181c1f1f6 + ''' def parse_comment_blocks(self, comments): ''' @@ -1030,32 +899,40 @@ class GtkDocCommentBlockParser(object): #################################################################### # Check for GTK-Doc comment block identifier. #################################################################### - if not comment_block: + if comment_block is None: result = SECTION_RE.match(line) if result: identifier_name = 'SECTION:%s' % (result.group('section_name'), ) - column = result.start('section_name') + column_offset + identifier_delimiter = None + identifier_fields = None + identifier_fields_start = None else: result = PROPERTY_RE.match(line) if result: identifier_name = '%s:%s' % (result.group('class_name'), result.group('property_name')) - column = result.start('property_name') + column_offset + identifier_delimiter = result.group('delimiter') + identifier_fields = result.group('fields') + identifier_fields_start = result.start('fields') else: result = SIGNAL_RE.match(line) if result: identifier_name = '%s::%s' % (result.group('class_name'), result.group('signal_name')) - column = result.start('signal_name') + column_offset + identifier_delimiter = result.group('delimiter') + identifier_fields = result.group('fields') + identifier_fields_start = result.start('fields') else: result = SYMBOL_RE.match(line) if result: identifier_name = '%s' % (result.group('symbol_name'), ) - column = result.start('symbol_name') + column_offset + identifier_delimiter = result.group('delimiter') + identifier_fields = result.group('fields') + identifier_fields_start = result.start('fields') if result: in_part = PART_IDENTIFIER @@ -1064,17 +941,26 @@ class GtkDocCommentBlockParser(object): comment_block = GtkDocCommentBlock(identifier_name) comment_block.position = position - if 'annotations' in result.groupdict() and result.group('annotations') != '': - comment_block.annotations = self.parse_annotations(comment_block, - result.group('annotations')) + if identifier_fields: + (a, d) = self._parse_fields(position, + column_offset + identifier_fields_start, + original_line, + identifier_fields, True, False) + if d: + # Not an identifier due to invalid trailing description field + in_part = None + part_indent = None + comment_block = None + result = None + else: + comment_block.annotations = a - if 'delimiter' in result.groupdict() and result.group('delimiter') != ':': - delimiter_start = result.start('delimiter') - delimiter_column = column_offset + delimiter_start - marker = ' ' * delimiter_column + '^' - warn("missing ':' at column %s:\n%s\n%s" % - (delimiter_column + 1, original_line, marker), - position) + if not identifier_delimiter and a: + marker_position = column_offset + result.start('delimiter') + marker = ' ' * marker_position + '^' + warn('missing ":" at column %s:\n%s\n%s' % + (marker_position + 1, original_line, marker), + position) if not result: # Emit a single warning when the identifier is not found on the first line @@ -1091,10 +977,11 @@ class GtkDocCommentBlockParser(object): #################################################################### result = PARAMETER_RE.match(line) if result: - marker = ' ' * (result.start('parameter_name') + column_offset) + '^' param_name = result.group('parameter_name') - param_annotations = result.group('annotations') - param_description = result.group('description') + param_name_lower = param_name.lower() + param_fields = result.group('fields') + param_fields_start = result.start('fields') + marker = ' ' * (result.start('parameter_name') + column_offset) + '^' if in_part == PART_IDENTIFIER: in_part = PART_PARAMETERS @@ -1109,7 +996,7 @@ class GtkDocCommentBlockParser(object): # Old style GTK-Doc allowed return values to be specified as # parameters instead of tags. - if param_name.lower() == TAG_RETURNS: + if param_name_lower == TAG_RETURNS: param_name = TAG_RETURNS if not returns_seen: @@ -1133,9 +1020,14 @@ class GtkDocCommentBlockParser(object): tag = GtkDocTag(param_name) tag.position = position - tag.description = param_description - if param_annotations: - tag.annotations = self.parse_annotations(tag, param_annotations) + + if param_fields: + (a, d) = self._parse_fields(position, + column_offset + param_fields_start, + original_line, param_fields) + tag.annotations = a + tag.description = d + if param_name == TAG_RETURNS: comment_block.tags[param_name] = tag else: @@ -1146,9 +1038,13 @@ class GtkDocCommentBlockParser(object): #################################################################### # Check for comment block description. # - # When we are parsing comment block parameters or the comment block - # identifier (when there are no parameters) and encounter an empty - # line, we must be parsing the comment block description. + # When we are parsing parameter parts or the identifier part (when + # there are no parameters) and encounter an empty line, we must be + # parsing the comment block description. + # + # Note: it is unclear why GTK-Doc does not allow paragraph breaks + # at this location as those might be handy describing + # parameters from time to time... #################################################################### if (EMPTY_LINE_RE.match(line) and in_part in [PART_IDENTIFIER, PART_PARAMETERS]): in_part = PART_DESCRIPTION @@ -1160,42 +1056,99 @@ class GtkDocCommentBlockParser(object): #################################################################### result = TAG_RE.match(line) if result and line_indent <= part_indent: + part_indent = line_indent tag_name = result.group('tag_name') - tag_annotations = result.group('annotations') - tag_description = result.group('description') - + tag_name_lower = tag_name.lower() + tag_fields = result.group('fields') + tag_fields_start = result.start('fields') marker = ' ' * (result.start('tag_name') + column_offset) + '^' - # Deprecated GTK-Doc Description: tag - if tag_name.lower() == TAG_DESCRIPTION: - warn("GTK-Doc tag \"Description:\" has been deprecated:\n%s\n%s" % + if tag_name_lower in DEPRECATED_GI_ANN_TAGS: + # Deprecated GObject-Introspection specific tags. + # Emit a warning and transform these into annotations on the identifier + # instead, as agreed upon in http://bugzilla.gnome.org/show_bug.cgi?id=676133 + warn('GObject-Introspection specific GTK-Doc tag "%s" ' + 'has been deprecated, please use annotations on the identifier ' + 'instead:\n%s\n%s' % (tag_name, original_line, marker), + position) + + # Translate deprecated tag name into corresponding annotation name + ann_name = tag_name_lower.replace(' ', '-') + + if tag_name_lower == TAG_ATTRIBUTES: + transformed = '' + (a, d) = self._parse_fields(position, + result.start('tag_name') + column_offset, + line, + tag_fields.strip(), + False, + False) + + if a: + for annotation in a: + ann_options = self._parse_annotation_options_list(position, marker, + line, annotation) + n_options = len(ann_options) + if n_options == 1: + transformed = '%s %s' % (transformed, ann_options[0], ) + elif n_options == 2: + transformed = '%s %s=%s' % (transformed, ann_options[0], + ann_options[1]) + else: + # Malformed Attributes: tag + error('malformed "Attributes:" tag will be ignored:\n%s\n%s' % + (original_line, marker), + position) + transformed = None + + if transformed: + transformed = '%s %s' % (ann_name, transformed.strip()) + ann_name, docannotation = self._parse_annotation( + position, + column_offset + tag_fields_start, + original_line, + transformed) + stored_annotation = comment_block.annotations.get('attributes') + if stored_annotation: + error('Duplicate "Attributes:" annotation will ' + 'be ignored:\n%s\n%s' % (original_line, marker), + position) + else: + comment_block.annotations[ann_name] = docannotation + else: + ann_name, options = self._parse_annotation(position, + column_offset + tag_fields_start, + line, + '%s %s' % (ann_name, tag_fields)) + comment_block.annotations[ann_name] = options + + continue + elif tag_name_lower == TAG_DESCRIPTION: + # Deprecated GTK-Doc Description: tag + warn('GTK-Doc tag "Description:" has been deprecated:\n%s\n%s' % (original_line, marker), position) in_part = PART_DESCRIPTION - part_indent = line_indent if not comment_block.description: - comment_block.description = tag_description + comment_block.description = tag_fields else: - comment_block.description += '\n' + tag_description + comment_block.description += '\n' + tag_fields continue # Now that the deprecated stuff is out of the way, continue parsing real tags if in_part == PART_DESCRIPTION: in_part = PART_TAGS - part_indent = line_indent - if in_part != PART_TAGS: column = result.start('tag_name') + column_offset - marker = ' ' * column + '^' warn("'%s:' tag unexpected at this location:\n%s\n%s" % (tag_name, original_line, marker), position) - if tag_name.lower() in [TAG_RETURN, TAG_RETURNS, - TAG_RETURN_VALUE, TAG_RETURNS_VALUE]: + if tag_name_lower in [TAG_RETURN, TAG_RETURNS, + TAG_RETURN_VALUE, TAG_RETURNS_VALUE]: if not returns_seen: returns_seen = True else: @@ -1205,44 +1158,49 @@ class GtkDocCommentBlockParser(object): tag = GtkDocTag(TAG_RETURNS) tag.position = position - tag.description = tag_description - if tag_annotations: - tag.annotations = self.parse_annotations(tag, tag_annotations) + + if tag_fields: + (a, d) = self._parse_fields(position, + column_offset + tag_fields_start, + original_line, + tag_fields) + tag.annotations = a + tag.description = d + comment_block.tags[TAG_RETURNS] = tag current_tag = tag continue else: - if tag_name.lower() in comment_block.tags.keys(): - column = result.start('tag_name') + column_offset - marker = ' ' * column + '^' + if tag_name_lower in comment_block.tags.keys(): warn("multiple '%s:' tags for identifier '%s':\n%s\n%s" % (tag_name, comment_block.name, original_line, marker), position) - tag = GtkDocTag(tag_name.lower()) + tag = GtkDocTag(tag_name_lower) tag.position = position - if tag_annotations: - if tag_name.lower() == TAG_ATTRIBUTES: - tag.annotations = self.parse_annotations(tag, tag_annotations) - else: - warn("annotations not supported for tag '%s:'." % - (tag_name, ), - position) - - if tag_name.lower() in [TAG_DEPRECATED, TAG_SINCE]: - result = TAG_VALUE_VERSION_RE.match(tag_description) - tag.value = result.group('value') - tag.description = result.group('description') - elif tag_name.lower() == TAG_STABILITY: - result = TAG_VALUE_STABILITY_RE.match(tag_description) - tag.value = result.group('value').capitalize() - tag.description = result.group('description') - elif tag_name.lower() in GI_ANN_TAGS: - tag.value = tag_description - tag.description = '' - - comment_block.tags[tag_name.lower()] = tag + if tag_fields: + (a, d) = self._parse_fields(position, + column_offset + tag_fields_start, + original_line, + tag_fields) + if a: + error('annotations not supported for tag "%s:".' % (tag_name, ), + position) + + if tag_name_lower in [TAG_DEPRECATED, TAG_SINCE]: + result = TAG_VALUE_VERSION_RE.match(d) + tag.value = result.group('value') + tag.description = result.group('description') + elif tag_name_lower == TAG_STABILITY: + result = TAG_VALUE_STABILITY_RE.match(d) + tag.value = result.group('value').capitalize() + tag.description = result.group('description') + elif tag_name_lower in GI_ANN_TAGS: + tag.value = d + tag.description = '' + + comment_block.tags[tag_name_lower] = tag current_tag = tag continue @@ -1257,16 +1215,22 @@ class GtkDocCommentBlockParser(object): comment_block.description += '\n' + line continue elif in_part == PART_PARAMETERS: - self._validate_multiline_annotation_continuation(line, original_line, - column_offset, position) + if not current_param.description: + self._validate_multiline_annotation_continuation(line, original_line, + column_offset, position) # Append to parameter description. - current_param.description += ' ' + line.strip() + if current_param.description is None: + current_param.description = line + else: + current_param.description += ' ' + line.strip() continue elif in_part == PART_TAGS: - self._validate_multiline_annotation_continuation(line, original_line, - column_offset, position) + if not current_tag.description: + self._validate_multiline_annotation_continuation(line, original_line, + column_offset, position) + + # Append to tag description. current_tag.description += ' ' + line.strip() - continue ######################################################################## # Finished parsing this comment block. @@ -1303,48 +1267,328 @@ class GtkDocCommentBlockParser(object): def _validate_multiline_annotation_continuation(self, line, original_line, column_offset, position): ''' - Validate parameters and tags (except the first line) and generate - warnings about invalid annotations spanning multiple lines. + Validate annotatable parts' source text ensuring annotations don't span multiple lines. + For example, the following comment block would result in a warning being emitted for + the forth line:: + + /** + * shiny_function: + * @array_: (out caller-allocates) (array) + * (element-type utf8) (transfer full): A beautiful array + */ + + :param line: line to validate, stripped from ("``*/``") at start of the line. + :param original_line: original line (including ("``*/``")) being validated + :param column_offset: number of characters stripped from `line` when ("``*/``") + was removed + :param position: :class:`giscanner.message.Position` of `line` in the source file + ''' + + success, annotations, start_pos, end_pos = self._parse_annotations(position, column_offset, + original_line, line, + False) + if annotations: + marker = ' ' * (start_pos + column_offset) + '^' + warn('ignoring invalid multiline annotation continuation:\n%s\n%s' % + (original_line, marker), + position) - :param line: line to validate, stripped from ' * ' at start of the line. - :param original_line: original line to validate (used in warning messages) - :param column_offset: column width of ' * ' at the time it was stripped from `line` - :param position: position of `line` in the source file + def _parse_annotation_options_list(self, position, column, line, options): + ''' + Parse annotation options into a list. For example:: + + ┌──────────────────────────────────────────────────────────────┐ + │ 'option1 option2 option3' │ ─▷ source + ├──────────────────────────────────────────────────────────────┤ + │ ['option1', 'option2', 'option3'] │ ◁─ parsed options + └──────────────────────────────────────────────────────────────┘ + + :param position: :class:`giscanner.message.Position` of `line` in the source file + :param column: start column of the `options` in the source file + :param line: complete source line + :param options: annotation options to parse + :returns: a list of annotation options ''' - result = MULTILINE_ANNOTATION_CONTINUATION_RE.match(line) - if result: - column = result.start('annotations') + column_offset - marker = ' ' * column + '^' - warn('ignoring invalid multiline annotation continuation:\n' - '%s\n%s' % (original_line, marker), + parsed = [] + + if options: + result = options.find('=') + if result >= 0: + marker = ' ' * (column + result) + '^' + warn('invalid annotation options: expected a "list" but ' + 'received "key=value pairs":\n%s\n%s' % (line, marker), + position) + parsed = self._parse_annotation_options_unknown(position, column, line, options) + else: + parsed = options.split(' ') + + return parsed + + def _parse_annotation_options_dict(self, position, column, line, options): + ''' + Parse annotation options into a dict. For example:: + + ┌──────────────────────────────────────────────────────────────┐ + │ 'option1=value1 option2 option3=value2' │ ─▷ source + ├──────────────────────────────────────────────────────────────┤ + │ {'option1': 'value1', 'option2': None, 'option3': 'value2'} │ ◁─ parsed options + └──────────────────────────────────────────────────────────────┘ + + :param position: :class:`giscanner.message.Position` of `line` in the source file + :param column: start column of the `options` in the source file + :param line: complete source line + :param options: annotation options to parse + :returns: an ordered dictionary of annotation options + ''' + + parsed = OrderedDict() + + if options: + for p in options.split(' '): + parts = p.split('=', 1) + key = parts[0] + value = parts[1] if len(parts) == 2 else None + parsed[key] = value + + return parsed + + def _parse_annotation_options_unknown(self, position, column, line, options): + ''' + Parse annotation options into a list holding a single item. This is used when the + annotation options to parse in not known to be a list nor dict. For example:: + + ┌──────────────────────────────────────────────────────────────┐ + │ ' option1 option2 option3=value1 ' │ ─▷ source + ├──────────────────────────────────────────────────────────────┤ + │ ['option1 option2 option3=value1'] │ ◁─ parsed options + └──────────────────────────────────────────────────────────────┘ + + :param position: :class:`giscanner.message.Position` of `line` in the source file + :param column: start column of the `options` in the source file + :param line: complete source line + :param options: annotation options to parse + :returns: a list of annotation options + ''' + + if options: + return [options.strip()] + + def _parse_annotation(self, position, column, line, annotation): + ''' + Parse an annotation into the annotation name and a list or dict (depending on the + name of the annotation) holding the options. For example:: + + ┌──────────────────────────────────────────────────────────────┐ + │ 'name opt1=value1 opt2=value2 opt3' │ ─▷ source + ├──────────────────────────────────────────────────────────────┤ + │ 'name', {'opt1': 'value1', 'opt2':'value2', 'opt3':None} │ ◁─ parsed annotation + └──────────────────────────────────────────────────────────────┘ + + ┌──────────────────────────────────────────────────────────────┐ + │ 'name opt1 opt2' │ ─▷ source + ├──────────────────────────────────────────────────────────────┤ + │ 'name', ['opt1', 'opt2'] │ ◁─ parsed annotation + └──────────────────────────────────────────────────────────────┘ + + ┌──────────────────────────────────────────────────────────────┐ + │ 'unkownname unknown list of options' │ ─▷ source + ├──────────────────────────────────────────────────────────────┤ + │ 'unkownname', ['unknown list of options'] │ ◁─ parsed annotation + └──────────────────────────────────────────────────────────────┘ + + :param position: :class:`giscanner.message.Position` of `line` in the source file + :param column: start column of the `annotation` in the source file + :param line: complete source line + :param annotation: annotation to parse + :returns: a tuple containing the annotation name and options + ''' + + # Transform deprecated type syntax "tokens" + annotation = annotation.replace('<', ANN_LPAR).replace('>', ANN_RPAR) + + parts = annotation.split(' ', 1) + ann_name = parts[0].lower() + ann_options = parts[1] if len(parts) == 2 else None + + if ann_name == ANN_INOUT_ALT: + marker = ' ' * (column) + '^' + warn('"%s" annotation has been deprecated, please use "%s" instead:\n%s\n%s' % + (ANN_INOUT_ALT, ANN_INOUT, line, marker), position) - @classmethod - def parse_annotations(cls, tag, value): - # (annotation) - # (annotation opt1 opt2 ...) - # (annotation opt1=value1 opt2=value2 ...) - opened = -1 - annotations = GtkDocAnnotations() - annotations.position = tag.position - - for i, c in enumerate(value): - if c == '(' and opened == -1: - opened = i + 1 - if c == ')' and opened != -1: - segment = value[opened:i] - parts = segment.split(' ', 1) - if len(parts) == 2: - name, option = parts - elif len(parts) == 1: - name = parts[0] - option = None + ann_name = ANN_INOUT + elif ann_name == ANN_ATTRIBUTE: + marker = ' ' * (column) + '^' + warn('"%s" annotation has been deprecated, please use "%s" instead:\n%s\n%s' % + (ANN_ATTRIBUTE, ANN_ATTRIBUTES, line, marker), + position) + + ann_name = ANN_ATTRIBUTES + ann_options = self._parse_annotation_options_list(position, column, line, ann_options) + n_options = len(ann_options) + if n_options == 1: + ann_options = ann_options[0] + elif n_options == 2: + ann_options = '%s=%s' % (ann_options[0], ann_options[1]) + else: + marker = ' ' * (column) + '^' + error('malformed "(attribute)" annotation will be ignored:\n%s\n%s' % + (line, marker), + position) + return None, None + + column += len(ann_name) + 2 + + if ann_name in LIST_ANNOTATIONS: + ann_options = self._parse_annotation_options_list(position, column, line, ann_options) + elif ann_name in DICT_ANNOTATIONS: + ann_options = self._parse_annotation_options_dict(position, column, line, ann_options) + else: + ann_options = self._parse_annotation_options_unknown(position, column, line, + ann_options) + + return ann_name, ann_options + + def _parse_annotations(self, position, column, line, fields, parse_options=True): + ''' + Parse annotations into a :class:`GtkDocAnnotations` object. + + :param position: :class:`giscanner.message.Position` of `line` in the source file + :param column: start column of the `annotations` in the source file + :param line: complete source line + :param fields: string containing the fields to parse + :param parse_options: whether options will be parsed into a :class:`GtkDocAnnotations` + object or into a :class:`list` + :returns: if `parse_options` evaluates to True a :class:`GtkDocAnnotations` object, + a :class:`list` otherwise. If `line` does not contain any annotations, + :const:`None` + ''' + + if parse_options: + parsed_annotations = GtkDocAnnotations(position) + else: + parsed_annotations = [] + + i = 0 + parens_level = 0 + prev_char = '' + char_buffer = [] + start_pos = 0 + end_pos = 0 + + for i, cur_char in enumerate(fields): + cur_char_is_space = cur_char.isspace() + + if cur_char == ANN_LPAR: + parens_level += 1 + + if parens_level == 1: + start_pos = i + + if prev_char == ANN_LPAR: + marker = ' ' * (column + i) + '^' + error('unexpected parentheses, annotations will be ignored:\n%s\n%s' % + (line, marker), + position) + return (False, None, None, None) + elif parens_level > 1: + char_buffer.append(cur_char) + elif cur_char == ANN_RPAR: + parens_level -= 1 + + if prev_char == ANN_LPAR: + marker = ' ' * (column + i) + '^' + error('unexpected parentheses, annotations will be ignored:\n%s\n%s' % + (line, marker), + position) + return (False, None, None, None) + elif parens_level < 0: + marker = ' ' * (column + i) + '^' + error('unbalanced parentheses, annotations will be ignored:\n%s\n%s' % + (line, marker), + position) + return (False, None, None, None) + elif parens_level == 0: + end_pos = i + 1 + + if parse_options is True: + name, options = self._parse_annotation(position, + column + start_pos, + line, + ''.join(char_buffer).strip()) + if name is not None: + if name in parsed_annotations: + marker = ' ' * (column + i) + '^' + error('multiple "%s" annotations:\n%s\n%s' % + (name, line, marker), position) + parsed_annotations[name] = options + else: + parsed_annotations.append(''.join(char_buffer).strip()) + + char_buffer = [] + else: + char_buffer.append(cur_char) + elif cur_char_is_space: + if parens_level > 0: + char_buffer.append(cur_char) + else: + if parens_level == 0: + break else: - raise AssertionError - if option is not None: - option = DocOption(option) - annotations.add(name, option) - opened = -1 + char_buffer.append(cur_char) + + prev_char = cur_char + + if parens_level > 0: + marker = ' ' * (column + i) + '^' + error('unbalanced parentheses, annotations will be ignored:\n%s\n%s' % + (line, marker), + position) + return (False, None, None, None) + else: + return (True, parsed_annotations, start_pos, end_pos) + + def _parse_fields(self, position, column, line, fields, parse_options=True, + validate_description_field=True): + ''' + Parse annotations out of field data. For example:: + + ┌──────────────────────────────────────────────────────────────┐ + │ '(skip): description of some parameter │ ─▷ source + ├──────────────────────────────────────────────────────────────┤ + │ ({'skip': []}, 'description of some parameter') │ ◁─ annotations and + └──────────────────────────────────────────────────────────────┘ remaining fields + + :param position: :class:`giscanner.message.Position` of `line` in the source file + :param column: start column of `fields` in the source file + :param line: complete source line + :param fields: string containing the fields to parse + :param parse_options: whether options will be parsed into a :class:`GtkDocAnnotations` + object or into a :class:`list` + :param validate_description_field: :const:`True` to validate the description field + :returns: if `parse_options` evaluates to True a :class:`GtkDocAnnotations` object, + a :class:`list` otherwise. If `line` does not contain any annotations, + :const:`None` and a string holding the remaining fields + ''' + description_field = '' + success, annotations, start_pos, end_pos = self._parse_annotations(position, + column, + line, + fields, + parse_options) + if success: + description_field = fields[end_pos:].strip() + + if description_field and validate_description_field: + if description_field.startswith(':'): + description_field = description_field[1:] + else: + if end_pos > 0: + marker_position = column + end_pos + marker = ' ' * marker_position + '^' + warn('missing ":" at column %s:\n%s\n%s' % + (marker_position + 1, line, marker), + position) - return annotations + return (annotations, description_field) |