summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDieter Verfaillie <dieterv@optionexplicit.be>2012-04-03 07:56:47 +0200
committerJohan Dahlin <jdahlin@litl.com>2012-04-05 10:22:59 -0300
commitf1d2547d0fba03431d5f85376088638bb461869e (patch)
treefe0b0c63a5bbdf2f7b482b682685bc25b605275b
parent3288b284607e8fa7a53e061c6f10348bdb67e223 (diff)
downloadgobject-introspection-f1d2547d0fba03431d5f85376088638bb461869e.tar.gz
Make AnnotationParser._parse_comment() do what gtk-doc does.
https://bugzilla.gnome.org/show_bug.cgi?id=672254
-rw-r--r--giscanner/annotationparser.py395
1 files changed, 226 insertions, 169 deletions
diff --git a/giscanner/annotationparser.py b/giscanner/annotationparser.py
index e0a7cec6..98e8519d 100644
--- a/giscanner/annotationparser.py
+++ b/giscanner/annotationparser.py
@@ -432,184 +432,241 @@ class DocOption(object):
class AnnotationParser(object):
- COMMENT_HEADER_RE = re.compile(r'^\*[ \t]*\n[\t ]')
- COMMENT_HEADER_START_RE = re.compile(r'\n[\t ]')
- WHITESPACE_RE = re.compile(r'^\s*$')
- OPTION_RE = re.compile(r'\([A-Za-z]+[^(]*\)')
- RETURNS_RE = re.compile(r'^return(s?)( value)?:', re.IGNORECASE)
+ PARAM_FIELD_COUNT = 2
+
+ COMMENT_BLOCK_RE = re.compile(r'^\s*/\*\*\s')
+ COMMENT_BLOCK_END_RE = re.compile(r'^\s*\*+/')
+ COMMENT_STAR_RE = re.compile(r'^\s*\*\s?')
+ SYMBOL_SECTION_RE = re.compile(r'^\s*(SECTION:\s*\S+)')
+ SYMBOL_RE = re.compile(r'^\s*([\w:-]*\w)\s*:?\s*(\([a-z ]+\)\s*)*$')
+ DESCRIPTION_RE = re.compile(r'^\s*Description:')
+ RETURNS_RE = re.compile(r'^\s*(returns:|return\s+value:)', re.IGNORECASE)
+ BROKEN_RETURNS_RE = re.compile(r'^\s*(returns\b\s*)', re.IGNORECASE)
+ SINCE_RE = re.compile(r'^\s*since:', re.IGNORECASE)
+ DEPRECATED_RE = re.compile(r'^\s*deprecated:', re.IGNORECASE)
+ STABILITY_RE = re.compile(r'^\s*stability:', re.IGNORECASE)
+ EMPTY_LINE_RE = re.compile(r'^\s*$')
+ PARAMETER_NAME_RE = re.compile(r'^\s*@(\S+)\s*:\s*')
+ VARARGS_RE = re.compile(r'^.*\.\.\.$')
def __init__(self):
self._blocks = {}
def parse(self, comments):
+ in_comment_block = False
+ symbol = ''
+ symbol_options = ''
+ in_description = False
+ in_return = False
+ in_since = False
+ in_stability = False
+ in_deprecated = False
+ in_part = ''
+ description = ''
+ return_desc = ''
+ return_start = ''
+ return_style = ''
+ since_desc = ''
+ deprecated_desc = ''
+ stability_desc = ''
+ deprecated_desc = ''
+ current_param = 0
+ ignore_broken_returns = False
+ params = odict()
+
for comment in comments:
- self._parse_comment(comment)
- return self._blocks
+ comment, filename, lineno = comment
+
+ for line in comment.split('\n'):
+ line += '\n'
+
+ # Check if we are at the start of a comment block
+ if not in_comment_block:
+ if self.COMMENT_BLOCK_RE.search(line):
+ in_comment_block = True
+
+ # Reset all the symbol data
+ symbol = ''
+ symbol_options = ''
+ in_description = False
+ in_return = False
+ in_since = False
+ in_stability = False
+ in_deprecated = False
+ in_part = ''
+ description = ''
+ return_desc = ''
+ return_start = ''
+ return_style = ''
+ since_desc = ''
+ deprecated_desc = ''
+ stability_desc = ''
+ deprecated_desc = ''
+ current_param = 0
+ ignore_broken_returns = False
+ params = odict()
- def _parse_comment(self, cmt):
- # We're looking for gtk-doc comments here, they look like this:
- # /**
- # * symbol:
- #
- # Or, alternatively, with options:
- # /**
- # * symbol: (name value) ...
- #
- # symbol is currently one of:
- # - function: gtk_widget_show
- # - signal: GtkWidget::destroy
- # - property: GtkWidget:visible
- #
- comment, filename, lineno = cmt
- comment = comment.lstrip()
- if not self.COMMENT_HEADER_RE.search(comment):
- return
- comment = self.COMMENT_HEADER_RE.sub('', comment, count=1)
- comment = comment.strip()
- if not comment.startswith('* '):
- return
- comment = comment[2:]
+ continue
- match = self.COMMENT_HEADER_START_RE.search(comment)
- if match is None:
- return
- pos = match.start()
- block_header = comment[:pos]
- block_header = block_header.strip()
- cpos = block_header.find(': ')
- block_name = block_header
- raw_name = block_header
- if cpos != -1:
- block_name = block_name[:cpos].strip()
- if block_name.endswith(':'):
- block_name = block_name[:-1]
- block = DocBlock(block_name)
- block.set_position(message.Position(filename, lineno))
-
- if cpos:
- block.options = self.parse_options(block, block_header[cpos+2:])
- comment_lines = []
- parsing_parameters = True
- last_param_tag = None
-
- # Second phase: parse parameters, return values, Tag: format
- # annotations.
- #
- # Valid lines look like:
- # * @foo: some comment here
- # * @baz: (inout): This has an annotation
- # * @bar: (out) (allow-none): this is a long parameter comment
- # * that gets wrapped to the next line.
- # *
- # * Some documentation for the function.
- # *
- # * Returns: (transfer none): A value
-
- # offset of the first doctag in relation to the start of
- # the docblock, we parsed /** and the xxx: lines already
- lineno = 2
- for line in comment[pos+1:].split('\n'):
- line = line.lstrip()
- if not line.startswith('*'):
- lineno += 1
- continue
- nostar_line = line[1:]
- is_whitespace = self.WHITESPACE_RE.match(nostar_line) is not None
- if parsing_parameters and is_whitespace:
- # As soon as we find a line that's just whitespace,
- # we're done parsing the parameters.
- parsing_parameters = False
- lineno += 1
- continue
- elif is_whitespace:
- comment_lines.append('')
- lineno += 1
- continue
-
- # Explicitly only accept parameters of the form "* @foo" with one space.
- is_parameter = nostar_line.startswith(' @')
-
- # Strip the rest of the leading whitespace for the rest of
- # the code; may not actually be necessary, but still doing
- # it to avoid regressions.
- line = nostar_line.lstrip()
-
- # Look for a parameter or return value. Both of these can
- # have parenthesized options.
- first_colonspace_index = line.find(': ')
- is_return_value = self.RETURNS_RE.search(line)
- parse_options = True
- if ((is_parameter or is_return_value)
- and first_colonspace_index > 0):
- # Skip lines which has non-whitespace before first (
- first_paren = line[first_colonspace_index+1:].find('(')
- if (first_paren != -1 and
- line[first_colonspace_index+1:first_paren].strip()):
- parse_options = False
-
- if is_parameter:
- argname = line[1:first_colonspace_index]
- else:
- argname = TAG_RETURNS
- tag = DocTag(block, argname)
- tag.set_position(block.position.offset(lineno))
- line_after_first_colon_space = line[first_colonspace_index + 2:]
- second_colon_index = line_after_first_colon_space.find(':')
- if second_colon_index >= 0:
- second_colon_index += first_colonspace_index + 2
- assert line[second_colon_index] == ':'
- found_options = False
- if second_colon_index > first_colonspace_index:
- value_line = \
- line[first_colonspace_index+2:second_colon_index]
- if ')' in value_line:
- after_last_paren = value_line[value_line.rfind(')'):]
- if not after_last_paren.rstrip().endswith(')'):
- parse_options = False
- if parse_options and self.OPTION_RE.search(value_line):
- # The OPTION_RE is a little bit heuristic. If
- # we found two colons, we scan inside for something
- # that looks like (foo).
- # *Ideally* we'd change the gtk-doc format to
- # require double colons, and then there'd be
- # no ambiguity. I.e.:
- # @foo:: Some documentation here
- # But that'd be a rather incompatible change.
- found_options = True
- tag.comment = line[second_colon_index+1:].strip()
- tag.options = self.parse_options(tag, value_line)
- if not found_options:
- # We didn't find any options, so just take the whole thing
- # as documentation.
- tag.comment = line[first_colonspace_index+2:].strip()
- block.tags[argname] = tag
- last_param_tag = tag
- if is_parameter:
- block.params.append(argname)
- elif (not is_parameter) and parsing_parameters and last_param_tag:
- # We need to handle continuation lines on parameters. The
- # conditional above - if a line doesn't start with '@', we're
- # not yet in the documentation block for the whole function,
- # and we've seen at least one parameter.
- last_param_tag.comment += (' ' + line.strip())
- elif first_colonspace_index > 0:
- # The line is of the form "Tag: some value here", like:
- # Since: 0.8
- tag_name = line[:first_colonspace_index]
- if tag_name.lower() in _ALL_TAGS:
- tag_name = tag_name.lower()
- tag = DocTag(block, tag_name)
- tag.value = line[first_colonspace_index+2:]
- tag.position = block.position.offset(lineno)
- block.tags[tag_name] = tag
+ # Check if we are at the end of a comment block
+ if self.COMMENT_BLOCK_END_RE.search(line):
+ if not symbol:
+ # maybe its not even meant to be a gtk-doc comment?
+ pass
+ else:
+ block = DocBlock(symbol)
+ block.set_position(message.Position(filename, lineno))
+ block.comment = description.strip()
+
+ if symbol_options:
+ block.options = self.parse_options(block, symbol_options)
+
+ for param_name, param_desc in params.iteritems():
+ tag = DocTag(block, param_name)
+ tag.set_position(lineno)
+ block.tags[param_name] = tag
+
+ # Add the return value description onto the end of the params.
+ if return_desc:
+ tag = DocTag(block, TAG_RETURNS)
+ tag.value = return_desc.strip()
+ block.tags[TAG_RETURNS] = tag
+
+ if return_style == 'broken':
+ pass
+
+ # gtk-doc handles Section docs here, but we have no need
+ # for them...
+
+ if since_desc:
+ tag = DocTag(block, TAG_SINCE)
+ tag.value = since_desc.strip()
+ block.tags[TAG_SINCE] = tag
+
+ if stability_desc:
+ tag = DocTag(block, TAG_STABILITY)
+ tag.value = stability_desc.strip()
+ block.tags[TAG_STABILITY] = tag
+
+ if deprecated_desc:
+ tag = DocTag(block, TAG_DEPRECATED)
+ tag.value = deprecated_desc.strip()
+ block.tags[TAG_DEPRECATED] = tag
+
+ block.validate()
+ self._blocks[symbol] = block
+
+ in_comment_block = False
+ continue
+
+ # Get rid of ' * ' at start of every line in the comment block.
+ line = re.sub(self.COMMENT_STAR_RE, '', line)
+ # But make sure we don't get rid of the newline at the end.
+ if not line.endswith('\n'):
+ line += '\n'
+
+ # If we haven't found the symbol name yet, look for it.
+ if not symbol:
+ symbol_section_match = self.SECTION_RE.search(line)
+ symbol_match = self.SYMBOL_RE.search(line)
+
+ if symbol_section_match:
+ symbol = symbol_section_match.group(1)
+ ignore_broken_returns = True
+ elif symbol_match:
+ symbol = symbol_match.group(1).strip()
+ symbol_options = line[symbol_match.end(0):].strip()
+
+ continue
+
+ return_match = self.RETURNS_RE.search(line)
+ broken_return_match = self.BROKEN_RETURNS_RE.search(line)
+ since_match = self.SINCE_RE.search(line)
+ deprecated_match = self.DEPRECATED_RE.search(line)
+ stability_match = self.STABILITY_RE.search(line)
+
+ if return_match:
+ if return_style == 'broken':
+ description += return_start + return_desc
+
+ return_start = return_match.group(0)
+
+ if return_style == 'sane':
+ pass
+
+ return_style = 'sane'
+ ignore_broken_returns = True
+ return_desc = line[return_match.end(0):]
+ in_part = 'return'
+ continue
+ elif not ignore_broken_returns and broken_return_match:
+ return_start = broken_return_match.group(0)
+ return_style = 'broken'
+ return_desc = line[broken_return_match.end(0):]
+ in_part = 'return'
+ continue
+ elif since_match:
+ since_desc = line[since_match.end(0):]
+ in_part = 'since'
+ continue
+ elif deprecated_match:
+ deprecated_desc = line[deprecated_match.end(0):]
+ in_part = 'deprecated'
+ continue
+ elif stability_match:
+ stability_desc = line[stability_match.end(0):]
+ in_part = 'stability'
+ continue
+
+ if in_part == 'description':
+ # Get rid of 'Description:'
+ line = re.sub(self.DESCRIPTION_RE, '', line)
+ description += line
+ continue
+ elif in_part == 'return':
+ return_desc += line
+ continue
+ elif in_part == 'since':
+ since_desc += line
+ continue
+ elif in_part == 'stability':
+ stability_desc += line
+ continue
+ elif in_part == 'deprecated':
+ deprecated_desc += line
+ continue
+
+ # We must be in the parameters. Check for the empty line below them.
+ if self.EMPTY_LINE_RE.search(line):
+ in_part = 'description'
+ continue
+
+ # Look for a parameter name.
+ match = self.PARAMETER_NAME_RE.search(line)
+ if match:
+ param_name = match.group(1)
+ param_desc = line[match.end(0):]
+
+ # Allow varargs variations
+ if self.VARARGS_RE.search(param_name):
+ param_name = '...'
+ if param_name.lower() == 'returns':
+ return_style = 'sane'
+ ignore_broken_returns = True
+
+ params[param_name] = param_desc
+ current_param += 1
+ continue
+
+ # We must be in the middle of a parameter description, so add it on
+ # to the last element in @params.
+ if current_param:
+ params[params.keys()[-1]] += line
else:
- comment_lines.append(line)
- elif not parsing_parameters:
- comment_lines.append(line)
- lineno += 1
- block.comment = '\n'.join(comment_lines).strip()
- block.validate()
- self._blocks[block.name] = block
+ pass
+
+ return self._blocks
+
@classmethod
def parse_options(cls, tag, value):