7 files changed, 1266 insertions, 229 deletions
diff --git a/Makefile-giscanner.am b/Makefile-giscanner.am
index c529c4eb..f20135ae 100644
--- a/Makefile-giscanner.am
+++ b/Makefile-giscanner.am
@@ -30,6 +30,7 @@ pkgpyexec_PYTHON =			\
 	giscanner/__init__.py		\
 	giscanner/annotationmain.py	\
 	giscanner/annotationparser.py	\
+	giscanner/annotationpatterns.py	\
 	giscanner/ast.py		\
 	giscanner/cachestore.py		\
 	giscanner/codegen.py		\
diff --git a/Makefile.am b/Makefile.am
index 9cc54cfb..a36c200a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -61,8 +61,10 @@ check-local:
 	pre-commit hook.\n\ncopy misc/pre-commit to .git/hooks"; false))
 	@test ! -d $(top_srcdir)/.git || (test -x $(top_srcdir)/.git/hooks/pre-commit || (echo -e "ERROR: pre-commit \
 	is not executable\n\nrun chmod +x .git/hooks/pre-commit"; false))
-	@echo "  PEP-8 INQUISITION"
+	@echo "TEST: PEP-8 INQUISITION"
 	@find $(top_srcdir)/giscanner -name \*.py | sort | uniq | xargs $(PYTHON) $(top_srcdir)/misc/pep8.py --repeat --exclude=config.py
+	@echo "TEST: Annotation pattern programs"
+	$(PYTHON) $(top_srcdir)/giscanner/annotationpatterns.py
 	@touch $(top_builddir)/.make-check-passed
 
 check-pyflakes:
diff --git a/giscanner/annotationmain.py b/giscanner/annotationmain.py
index 97897e7f..4df6e831 100644
--- a/giscanner/annotationmain.py
+++ b/giscanner/annotationmain.py
@@ -52,7 +52,7 @@ def annotation_main(args):
     if options.packages:
         process_packages(options, options.packages)
 
-    logger = message.MessageLogger.get(namespace='')
+    logger = message.MessageLogger.get(namespace=None)
 
     ss = create_source_scanner(options, args)
 
diff --git a/giscanner/annotationparser.py b/giscanner/annotationparser.py
index 98e8519d..a4e8b732 100644
--- a/giscanner/annotationparser.py
+++ b/giscanner/annotationparser.py
@@ -1,6 +1,7 @@
 # -*- Mode: Python -*-
 # GObject-Introspection - a framework for introspecting GObject libraries
 # Copyright (C) 2008-2010 Johan Dahlin
+# Copyright (C) 2012 Dieter Verfaillie <dieterv@optionexplicit.be>
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -18,19 +19,40 @@
 # 02110-1301, USA.
 #
 
+
 # AnnotationParser - extract annotations from gtk-doc comments
 
+
 import re
 
 from . import message
+from .annotationpatterns import (COMMENT_START_RE, COMMENT_END_RE,
+                                 COMMENT_STAR_RE, EMPTY_LINE_RE,
+                                 SECTION_RE, SYMBOL_RE, PROPERTY_RE, SIGNAL_RE,
+                                 PARAMETER_RE, DESCRIPTION_TAG_RE, TAG_RE,
+                                 MULTILINE_ANNOTATION_CONTINUATION_RE)
 from .odict import odict
 
+
+# GTK-Doc comment block parts
+PART_IDENTIFIER = 'identifier'
+PART_PARAMETERS = 'parameters'
+PART_DESCRIPTION = 'description'
+PART_TAGS = 'tags'
+
+# Identifiers
+IDENTIFIER_SECTION = 'section'
+IDENTIFIER_SYMBOL = 'symbol'
+IDENTIFIER_PROPERTY = 'property'
+IDENTIFIER_SIGNAL = 'signal'
+
 # Tags - annotations applied to comment blocks
 TAG_VFUNC = 'virtual'
 TAG_SINCE = 'since'
 TAG_STABILITY = 'stability'
 TAG_DEPRECATED = 'deprecated'
 TAG_RETURNS = 'returns'
+TAG_RETURNVALUE = 'return value'
 TAG_ATTRIBUTES = 'attributes'
 TAG_RENAME_TO = 'rename to'
 TAG_TYPE = 'type'
@@ -45,6 +67,7 @@ _ALL_TAGS = [TAG_VFUNC,
              TAG_STABILITY,
              TAG_DEPRECATED,
              TAG_RETURNS,
+             TAG_RETURNVALUE,
              TAG_ATTRIBUTES,
              TAG_RENAME_TO,
              TAG_TYPE,
@@ -163,9 +186,6 @@ class DocBlock(object):
                 lines.append(tag.to_gtk_doc_tag())
 
         comment = ''
-        #comment += '# %d \"%s\"\n' % (
-        #    self.position.line,
-        #    self.position.filename)
         comment += '/**\n'
         for line in lines:
             line = line.rstrip()
@@ -256,6 +276,11 @@ class DocTag(object):
                              self.comment or '')
 
     def validate(self):
+        if self.name == TAG_ATTRIBUTES:
+            # The 'Attributes:' tag allows free form annotations so the
+            # validation below is most certainly going to fail.
+            return
+
         for option in self.options:
             value = self.options[option]
             if option == OPT_ALLOW_NONE:
@@ -432,250 +457,447 @@ class DocOption(object):
 
 
 class AnnotationParser(object):
-    PARAM_FIELD_COUNT = 2
-
-    COMMENT_BLOCK_RE = re.compile(r'^\s*/\*\*\s')
-    COMMENT_BLOCK_END_RE = re.compile(r'^\s*\*+/')
-    COMMENT_STAR_RE = re.compile(r'^\s*\*\s?')
-    SYMBOL_SECTION_RE = re.compile(r'^\s*(SECTION:\s*\S+)')
-    SYMBOL_RE = re.compile(r'^\s*([\w:-]*\w)\s*:?\s*(\([a-z ]+\)\s*)*$')
-    DESCRIPTION_RE = re.compile(r'^\s*Description:')
-    RETURNS_RE = re.compile(r'^\s*(returns:|return\s+value:)', re.IGNORECASE)
-    BROKEN_RETURNS_RE = re.compile(r'^\s*(returns\b\s*)', re.IGNORECASE)
-    SINCE_RE = re.compile(r'^\s*since:', re.IGNORECASE)
-    DEPRECATED_RE = re.compile(r'^\s*deprecated:', re.IGNORECASE)
-    STABILITY_RE = re.compile(r'^\s*stability:', re.IGNORECASE)
-    EMPTY_LINE_RE = re.compile(r'^\s*$')
-    PARAMETER_NAME_RE = re.compile(r'^\s*@(\S+)\s*:\s*')
-    VARARGS_RE = re.compile(r'^.*\.\.\.$')
-
-    def __init__(self):
-        self._blocks = {}
+    '''
+    GTK-Doc comment block parser.
+
+    Parses GTK-Doc comment blocks into a parse tree built out of :class:`DockBlock`,
+    :class:`DocTag`, :class:`DocOptions` and :class:`DocOption` objects. This
+    parser tries to accept malformed input whenever possible and does not emit
+    syntax errors. However, it does emit warnings at the slightest indication
+    of malformed input when possible.
+
+    A GTK-Doc comment block can be constructed out of multiple parts that can
+    be combined to write different types of documentation.
+    See `GTK-Doc's documentation`_ to learn more about possible valid combinations.
+    Each part can be further divided into fields which are separated by `:` characters.
+
+    Possible parts and the fields they are constructed from look like the
+    following (optional fields are enclosed in square brackets):
+
+    .. code-block:: c
+        /**
+         * identifier_name: [annotations]
+         * @parameter_name: [annotations:] [description]
+         *
+         * description
+         * tag_name: [annotations:] [description]
+         */
+
+    - Parts and fields cannot span multiple lines, except for parameter descriptions,
+      tag descriptions and comment block descriptions.
+    - There has to be exactly 1 `identifier` part on the first line of the
+      comment block which consists of:
+          * an `identifier_name` field
+          * an optional `annotations` field
+    - There can be 0 or more `parameter` parts following the `identifier` part,
+      each consisting of:
+          * a `parameter_name` filed
+          * an optional `annotations` field
+          * an optional `description` field
+    - An empty lines signals the end of the `parameter` parts and the beginning
+      of the (free form) comment block `description` part.
+    - There can be 0 or 1 `description` parts following the `description` part.
+    - There can be 0 or more `tag` parts following the `description` part,
+      each consisting of:
+          * a `tag_name` field
+          * an optional `annotations` field
+          * an optional `description` field
+
+    .. NOTE:: :class:`AnnotationParser` functionality is heavily based on gtkdoc-mkdb's
+        `ScanSourceFile()`_ function and is currently in sync with gtk-doc
+        commit `b41641b`_.
+
+    .. _types of documentation:
+            http://developer.gnome.org/gtk-doc-manual/1.18/documenting.html.en
+    .. _ScanSourceFile():
+            http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
+    .. _b41641b: b41641bd75f870afff7561ceed8a08456da57565
+    '''
 
     def parse(self, comments):
-        in_comment_block = False
-        symbol = ''
-        symbol_options = ''
-        in_description = False
-        in_return = False
-        in_since = False
-        in_stability = False
-        in_deprecated = False
-        in_part = ''
-        description = ''
-        return_desc = ''
-        return_start = ''
-        return_style = ''
-        since_desc = ''
-        deprecated_desc = ''
-        stability_desc = ''
-        deprecated_desc = ''
-        current_param = 0
-        ignore_broken_returns = False
-        params = odict()
+        '''
+        Parses multiple GTK-Doc comment blocks.
 
-        for comment in comments:
-            comment, filename, lineno = comment
-
-            for line in comment.split('\n'):
-                line += '\n'
-
-                # Check if we are at the start of a comment block
-                if not in_comment_block:
-                    if self.COMMENT_BLOCK_RE.search(line):
-                        in_comment_block = True
-
-                        # Reset all the symbol data
-                        symbol = ''
-                        symbol_options = ''
-                        in_description = False
-                        in_return = False
-                        in_since = False
-                        in_stability = False
-                        in_deprecated = False
-                        in_part = ''
-                        description = ''
-                        return_desc = ''
-                        return_start = ''
-                        return_style = ''
-                        since_desc = ''
-                        deprecated_desc = ''
-                        stability_desc = ''
-                        deprecated_desc = ''
-                        current_param = 0
-                        ignore_broken_returns = False
-                        params = odict()
+        :param comments: a list of (comment, filename, lineno) tuples
+        :returns: a list of :class:`DocBlock` or ``None`` objects
+        '''
 
-                    continue
+        comment_blocks = {}
 
-                # Check if we are at the end of a comment block
-                if self.COMMENT_BLOCK_END_RE.search(line):
-                    if not symbol:
-                        # maybe its not even meant to be a gtk-doc comment?
-                        pass
-                    else:
-                        block = DocBlock(symbol)
-                        block.set_position(message.Position(filename, lineno))
-                        block.comment = description.strip()
-
-                        if symbol_options:
-                            block.options = self.parse_options(block, symbol_options)
-
-                        for param_name, param_desc in params.iteritems():
-                            tag = DocTag(block, param_name)
-                            tag.set_position(lineno)
-                            block.tags[param_name] = tag
-
-                        # Add the return value description onto the end of the params.
-                        if return_desc:
-                            tag = DocTag(block, TAG_RETURNS)
-                            tag.value = return_desc.strip()
-                            block.tags[TAG_RETURNS] = tag
-
-                            if return_style == 'broken':
-                                pass
-
-                        # gtk-doc handles Section docs here, but we have no need
-                        # for them...
-
-                        if since_desc:
-                            tag = DocTag(block, TAG_SINCE)
-                            tag.value = since_desc.strip()
-                            block.tags[TAG_SINCE] = tag
-
-                        if stability_desc:
-                            tag = DocTag(block, TAG_STABILITY)
-                            tag.value = stability_desc.strip()
-                            block.tags[TAG_STABILITY] = tag
-
-                        if deprecated_desc:
-                            tag = DocTag(block, TAG_DEPRECATED)
-                            tag.value = deprecated_desc.strip()
-                            block.tags[TAG_DEPRECATED] = tag
-
-                        block.validate()
-                        self._blocks[symbol] = block
-
-                        in_comment_block = False
-                        continue
-
-                # Get rid of ' * ' at start of every line in the comment block.
-                line = re.sub(self.COMMENT_STAR_RE, '', line)
-                # But make sure we don't get rid of the newline at the end.
-                if not line.endswith('\n'):
-                    line += '\n'
-
-                # If we haven't found the symbol name yet, look for it.
-                if not symbol:
-                    symbol_section_match = self.SECTION_RE.search(line)
-                    symbol_match = self.SYMBOL_RE.search(line)
-
-                    if symbol_section_match:
-                        symbol = symbol_section_match.group(1)
-                        ignore_broken_returns = True
-                    elif symbol_match:
-                        symbol = symbol_match.group(1).strip()
-                        symbol_options = line[symbol_match.end(0):].strip()
+        for comment in comments:
+            comment_block = self.parse_comment_block(comment)
 
-                    continue
+            if comment_block is not None:
+                if comment_block.name in comment_blocks:
+                    message.warn("multiple comment blocks documenting '%s:' identifier." %
+                                 (comment_block.name),
+                                 comment_block.position)
 
-                return_match = self.RETURNS_RE.search(line)
-                broken_return_match = self.BROKEN_RETURNS_RE.search(line)
-                since_match = self.SINCE_RE.search(line)
-                deprecated_match = self.DEPRECATED_RE.search(line)
-                stability_match = self.STABILITY_RE.search(line)
+                # Always store the block even if it's a duplicate for
+                # backward compatibility...
+                comment_blocks[comment_block.name] = comment_block
 
-                if return_match:
-                    if return_style == 'broken':
-                        description += return_start + return_desc
+        return comment_blocks
 
-                    return_start = return_match.group(0)
+    def parse_comment_block(self, comment):
+        '''
+        Parses a single GTK-Doc comment block.
 
-                    if return_style == 'sane':
-                        pass
+        :param comment: a (comment, filename, lineno) tuple
+        :returns: a :class:`DocBlock` object or ``None``
+        '''
 
-                    return_style = 'sane'
-                    ignore_broken_returns = True
-                    return_desc = line[return_match.end(0):]
-                    in_part = 'return'
-                    continue
-                elif not ignore_broken_returns and broken_return_match:
-                    return_start = broken_return_match.group(0)
-                    return_style = 'broken'
-                    return_desc = line[broken_return_match.end(0):]
-                    in_part = 'return'
-                    continue
-                elif since_match:
-                    since_desc = line[since_match.end(0):]
-                    in_part = 'since'
-                    continue
-                elif deprecated_match:
-                    deprecated_desc = line[deprecated_match.end(0):]
-                    in_part = 'deprecated'
-                    continue
-                elif stability_match:
-                    stability_desc = line[stability_match.end(0):]
-                    in_part = 'stability'
-                    continue
+        comment, filename, lineno = comment
+        comment_lines = list(enumerate(comment.split('\n')))
 
-                if in_part == 'description':
-                    # Get rid of 'Description:'
-                    line = re.sub(self.DESCRIPTION_RE, '', line)
-                    description += line
-                    continue
-                elif in_part == 'return':
-                    return_desc += line
-                    continue
-                elif in_part == 'since':
-                    since_desc += line
-                    continue
-                elif in_part == 'stability':
-                    stability_desc += line
-                    continue
-                elif in_part == 'deprecated':
-                    deprecated_desc += line
+        # Check for the start the comment block.
+        if COMMENT_START_RE.search(comment_lines[0][1]):
+            del comment_lines[0]
+        else:
+            # Not a GTK-Doc comment block.
+            return None
+
+        # Check for the end the comment block.
+        if COMMENT_END_RE.search(comment_lines[-1][1]):
+            del comment_lines[-1]
+
+        # If we get this far, we are inside a GTK-Doc comment block.
+        return self._parse_comment_block(comment_lines, filename, lineno)
+
+    def _parse_comment_block(self, comment_lines, filename, lineno):
+        '''
+        Parses a single GTK-Doc comment block stripped from it's
+        comment start (/**) and comment end (*/) marker lines.
+
+        :param comment_lines: GTK-Doc comment block stripped from it's comment
+                              start (/**) and comment end (*/) marker lines
+        :param filename: source file name where the comment block originated from
+        :param lineno:  line in the source file where the comment block starts
+        :returns: a :class:`DocBlock` object or ``None``
+
+        .. NOTE:: If you are tempted to refactor this method and split it
+            further up (for example into _parse_identifier(), _parse_parameters(),
+            _parse_description(), _parse_tags() methods) then please resist the
+            urge. It is considered important that this method should be more or
+            less easily comparable with gtkdoc-mkdb's `ScanSourceFile()`_ function.
+
+            The different parsing steps are marked with a comment surrounded
+            by `#` characters in an attempt to make it clear what is going on.
+
+        .. _ScanSourceFile():
+                http://git.gnome.org/browse/gtk-doc/tree/gtkdoc-mkdb.in#n3722
+        '''
+        comment_block = None
+        in_part = None
+        identifier = None
+        current_param = None
+        current_tag = None
+        returns_seen = False
+
+        for line_offset, line in comment_lines:
+            position = message.Position(filename, line_offset + lineno)
+
+            result = COMMENT_STAR_RE.match(line)
+            if result:
+                # Store the original line (without \n) and column offset
+                # so we can generate meaningful warnings later on.
+                original_line = line
+                column_offset = result.end(0)
+
+                # Get rid of ' * ' at start of the line.
+                line = line[result.end(0):]
+
+            ####################################################################
+            # Check for GTK-Doc comment block identifier.
+            ####################################################################
+            if not comment_block:
+                # The correct identifier name would have the colon at the end
+                # but maintransformer.py does not expect us to do that. So
+                # make sure to compute an identifier_name without the colon and
+                # a real_identifier_name with the colon.
+
+                if not identifier:
+                    result = SECTION_RE.search(line)
+                    if result:
+                        identifier = IDENTIFIER_SECTION
+                        real_identifier_name = 'SECTION:%s' % (result.group('section_name'))
+                        identifier_name = real_identifier_name
+                        column = result.start('section_name') + column_offset
+
+                if not identifier:
+                    result = SYMBOL_RE.search(line)
+                    if result:
+                        identifier = IDENTIFIER_SYMBOL
+                        real_identifier_name = '%s:' % (result.group('symbol_name'))
+                        identifier_name = '%s' % (result.group('symbol_name'))
+                        column = result.start('symbol_name') + column_offset
+
+                if not identifier:
+                    result = PROPERTY_RE.search(line)
+                    if result:
+                        identifier = IDENTIFIER_PROPERTY
+                        real_identifier_name = '%s:%s:' % (result.group('class_name'),
+                                                           result.group('property_name'))
+                        identifier_name = '%s:%s' % (result.group('class_name'),
+                                                     result.group('property_name'))
+                        column = result.start('property_name') + column_offset
+
+                if not identifier:
+                    result = SIGNAL_RE.search(line)
+                    if result:
+                        identifier = IDENTIFIER_SIGNAL
+                        real_identifier_name = '%s::%s:' % (result.group('class_name'),
+                                                            result.group('signal_name'))
+                        identifier_name = '%s::%s' % (result.group('class_name'),
+                                                      result.group('signal_name'))
+                        column = result.start('signal_name') + column_offset
+
+                if identifier:
+                    in_part = PART_IDENTIFIER
+
+                    comment_block = DocBlock(identifier_name)
+                    comment_block.set_position(position)
+
+                    if 'annotations' in result.groupdict():
+                        comment_block.options = self.parse_options(comment_block,
+                                                                   result.group('annotations'))
+
+                    if 'colon' in result.groupdict() and result.group('colon') != ':':
+                        colon_start = result.start('colon')
+                        colon_column = column_offset + colon_start
+                        marker = ' '*colon_column + '^'
+                        message.warn("missing ':' at column %s:\n%s\n%s" %
+                                     (colon_start, original_line, marker),
+                                     position)
                     continue
-
-                # We must be in the parameters. Check for the empty line below them.
-                if self.EMPTY_LINE_RE.search(line):
-                    in_part = 'description'
+                else:
+                    # If we get here, the identifier was not recognized, so
+                    # ignore the rest of the block just like the old annotation
+                    # parser did. Doing this is a bit more strict than
+                    # gtkdoc-mkdb (which continues to search for the identifier
+                    # until either it is found or the end of the block is
+                    # reached). In practice, however, ignoring the block is the
+                    # right thing to do because sooner or later some long
+                    # descriptions will contain something matching an identifier
+                    # pattern by accident.
+                    marker = ' '*column_offset + '^'
+                    message.warn('ignoring unrecognized GTK-Doc comment block, identifier not '
+                                 'found:\n%s\n%s' % (original_line, marker),
+                                 position)
+
+                    return None
+
+            ####################################################################
+            # Check for comment block parameters.
+            ####################################################################
+            result = PARAMETER_RE.search(line)
+            if result:
+                param_name = result.group('parameter_name')
+                param_annotations = result.group('annotations')
+                param_description = result.group('description')
+
+                if in_part == PART_IDENTIFIER:
+                    in_part = PART_PARAMETERS
+
+                if in_part != PART_PARAMETERS:
+                    column = result.start('parameter_name') + column_offset
+                    marker = ' '*column + '^'
+                    message.warn("'@%s' parameter unexpected at this location:\n%s\n%s" %
+                                 (param_name, original_line, marker),
+                                 position)
+
+                # Old style GTK-Doc allowed return values to be specified as
+                # parameters instead of tags.
+                if param_name.lower() == TAG_RETURNS:
+                    param_name = TAG_RETURNS
+
+                    if not returns_seen:
+                        returns_seen = True
+                    else:
+                        message.warn("encountered multiple 'Returns' parameters or tags for "
+                                     "'%s'." % (comment_block.name),
+                                     position)
+                elif param_name in comment_block.tags.keys():
+                    column = result.start('parameter_name') + column_offset
+                    marker = ' '*column + '^'
+                    message.warn("multiple '@%s' parameters for identifier '%s':\n%s\n%s" %
+                                 (param_name, comment_block.name, original_line, marker),
+                                 position)
+
+                tag = DocTag(comment_block, param_name)
+                tag.set_position(position)
+                tag.comment = param_description
+                if param_annotations:
+                    tag.options = self.parse_options(tag, param_annotations)
+                comment_block.tags[param_name] = tag
+                if not param_name == TAG_RETURNS:
+                    comment_block.params.append(param_name)
+                current_param = tag
+                continue
+
+            ####################################################################
+            # Check for comment block description.
+            #
+            # When we are parsing comment block parameters or the comment block
+            # identifier (when there are no parameters) and encounter an empty
+            # line, we must be parsing the comment block description
+            ####################################################################
+            if (EMPTY_LINE_RE.search(line)
+            and (in_part == PART_IDENTIFIER or in_part == PART_PARAMETERS)):
+                in_part = PART_DESCRIPTION
+                continue
+
+            ####################################################################
+            # Check for GTK-Doc comment block tags.
+            ####################################################################
+            result = TAG_RE.search(line)
+            if result:
+                tag_name = result.group('tag_name')
+                tag_annotations = result.group('annotations')
+                tag_description = result.group('description')
+
+                if in_part == PART_DESCRIPTION:
+                    in_part = PART_TAGS
+
+                if in_part != PART_TAGS:
+                    column = result.start('tag_name') + column_offset
+                    marker = ' '*column + '^'
+                    message.warn("'%s:' tag unexpected at this location:\n%s\n%s" %
+                                 (tag_name, original_line, marker),
+                                 position)
+
+                if tag_name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
+                    if not returns_seen:
+                        returns_seen = True
+                    else:
+                        message.warn("encountered multiple 'Returns' parameters or tags for "
+                                     "'%s'." % (comment_block.name),
+                                     position)
+
+                    tag = DocTag(comment_block, TAG_RETURNS)
+                    tag.position = position
+                    tag.comment = tag_description
+                    if tag_annotations:
+                        tag.options = self.parse_options(tag, tag_annotations)
+                    comment_block.tags[TAG_RETURNS] = tag
+                    current_tag = tag
                     continue
-
-                # Look for a parameter name.
-                match = self.PARAMETER_NAME_RE.search(line)
-                if match:
-                    param_name = match.group(1)
-                    param_desc = line[match.end(0):]
-
-                    # Allow varargs variations
-                    if self.VARARGS_RE.search(param_name):
-                        param_name = '...'
-                    if param_name.lower() == 'returns':
-                        return_style = 'sane'
-                        ignore_broken_returns = True
-
-                    params[param_name] = param_desc
-                    current_param += 1
+                elif tag_name.lower() in _ALL_TAGS:
+                    if tag_name.lower() in comment_block.tags.keys():
+                        column = result.start('tag_name') + column_offset
+                        marker = ' '*column + '^'
+                        message.warn("multiple '%s:' tags for identifier '%s':\n%s\n%s" %
+                                     (tag_name, comment_block.name, original_line, marker),
+                                     position)
+
+                    tag = DocTag(comment_block, tag_name.lower())
+                    tag.position = position
+                    tag.value = tag_description
+                    if tag_annotations:
+                        if tag_name.lower() == TAG_ATTRIBUTES:
+                            tag.options = self.parse_options(tag, tag_annotations)
+                        else:
+                            message.warn("annotations not supported for tag '%s'." %
+                                         (tag_name),
+                                         position)
+                    comment_block.tags[tag_name.lower()] = tag
+                    current_tag = tag
                     continue
 
-                # We must be in the middle of a parameter description, so add it on
-                # to the last element in @params.
-                if current_param:
-                    params[params.keys()[-1]] += line
+            ####################################################################
+            # If we get here, we must be in the middle of a multiline
+            # comment block, parameter or tag description.
+            ####################################################################
+            if in_part == PART_DESCRIPTION:
+                if not comment_block.comment:
+                    # Backwards compatibility with old style GTK-Doc
+                    # comment blocks where Description used to be a comment
+                    # block tag. Simply get rid of 'Description:'.
+                    line = re.sub(DESCRIPTION_TAG_RE, '', line)
+                    comment_block.comment = line
+                else:
+                    comment_block.comment += '\n' + line
+                continue
+            elif in_part == PART_PARAMETERS:
+                if not current_param:
+                    message.warn('parameter expected:\n%s' %
+                                 (line),
+                                 position)
+                else:
+                    self._validate_multiline_annotation_continuation(line, original_line,
+                                                                     column_offset, position)
+
+                    # Append to parameter description.
+                    current_param.comment += ' ' + line.strip()
+            elif in_part == PART_TAGS:
+                if not current_tag:
+                    message.warn('tag expected:\n%s' %
+                                 (line),
+                                 position)
                 else:
-                    pass
+                    self._validate_multiline_annotation_continuation(line, original_line,
+                                                                     column_offset, position)
 
-        return self._blocks
+                    # Append to tag description.
+                    if current_tag.name.lower() in [TAG_RETURNS, TAG_RETURNVALUE]:
+                        current_tag.comment += ' ' + line.strip()
+                    else:
+                        current_tag.value += ' ' + line.strip()
+
+        ########################################################################
+        # Finished parsing this comment block.
+        ########################################################################
+        # We have picked up a couple of \n characters that where not
+        # intended. Strip those.
+        if comment_block.comment:
+            comment_block.comment = comment_block.comment.strip()
+        else:
+            comment_block.comment = ''
 
+        for tag in comment_block.tags.itervalues():
+            if tag.comment:
+                tag.comment = tag.comment.strip()
+            else:
+                tag.comment = None
+
+            if tag.value:
+                tag.value = tag.value.strip()
+            else:
+                tag.value = ''
+
+        # Validate and store block.
+        comment_block.validate()
+        return comment_block
+
+    def _validate_multiline_annotation_continuation(self, line, original_line,
+                                                          column_offset, position):
+        '''
+        Validate parameters and tags (except the first line) and generate
+        warnings about invalid annotations spanning multiple lines.
+
+        :param line: line to validate, stripped from ' * ' at start of the line.
+        :param original_line: original line to validate (used in warning messages)
+        :param column_offset: column width of ' * ' at the time it was stripped from `line`
+        :param position: position of `line` in the source file
+        '''
+
+        result = MULTILINE_ANNOTATION_CONTINUATION_RE.search(line)
+        if result:
+            line = result.group('description')
+            column = result.start('annotations') + column_offset
+            marker = ' '*column + '^'
+            message.warn('ignoring invalid multiline annotation continuation:\n'
+                         '%s\n%s' % (original_line, marker),
+                         position)
 
     @classmethod
     def parse_options(cls, tag, value):
         # (foo)
-        # (bar opt1 opt2...)
+        # (bar opt1 opt2 ...)
         opened = -1
         options = DocOptions()
         options.position = tag.position
-        last = None
+
         for i, c in enumerate(value):
             if c == '(' and opened == -1:
                 opened = i+1
@@ -692,7 +914,6 @@ class AnnotationParser(object):
                 if option is not None:
                     option = DocOption(tag, option)
                 options.add(name, option)
-                last = i + 2
                 opened = -1
 
         return options
diff --git a/giscanner/annotationpatterns.py b/giscanner/annotationpatterns.py
new file mode 100644
index 00000000..80b9da41
--- /dev/null
+++ b/giscanner/annotationpatterns.py
@@ -0,0 +1,808 @@
+# -*- Mode: Python -*-
+# GObject-Introspection - a framework for introspecting GObject libraries
+# Copyright (C) 2012 Dieter Verfaillie <dieterv@optionexplicit.be>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
+
+
+'''
+This module provides regular expression programs that can be used to identify
+and extract useful information from different parts of GTK-Doc comment blocks.
+These programs are built to:
+ - match (or substitute) a single comment block line at a time;
+ - support MULTILINE mode and should support (but remains untested)
+   LOCALE and UNICODE modes.
+'''
+
+
+import re
+
+
+# Program matching the start of a comment block.
+#
+# Results in 0 symbolic groups.
+COMMENT_START_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    /                                        # 1 forward slash character
+    \*{2}                                    # exactly 2 asterisk characters
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    $                                        # end
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+# Program matching the end of a comment block.
+#
+# Results in 0 symbolic groups.
+COMMENT_END_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    \*+                                      # 1 or more asterisk characters
+    /                                        # 1 forward slash character
+    $                                        # end
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+# Program matching the "*" at the beginning of every
+# line inside a comment block.
+#
+# Results in 0 symbolic groups.
+COMMENT_STAR_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    \*                                       # 1 asterisk character
+    [^\S\n\r]?                               # 0 or 1 whitespace characters
+                                             #   Carefull: removing more would
+                                             #   break embedded example program
+                                             #   indentation
+    ''',
+    re.VERBOSE)
+
+# Program matching an empty line.
+#
+# Results in 0 symbolic groups.
+EMPTY_LINE_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or 1 whitespace characters
+    $                                        # end
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+# Program matching SECTION identifiers.
+#
+# Results in 2 symbolic groups:
+#   - group 1 = colon
+#   - group 2 = section_name
+SECTION_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    SECTION                                  # SECTION
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<colon>:?)                            # colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<section_name>\w\S+)?                 # section name
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    $
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+# Program matching symbol (function, constant, struct and enum) identifiers.
+#
+# Results in 3 symbolic groups:
+#   - group 1 = symbol_name
+#   - group 2 = colon
+#   - group 3 = annotations
+SYMBOL_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<symbol_name>[\w-]*\w)                # symbol name
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<colon>:?)                            # colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    $                                        # end
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+# Program matching property identifiers.
+#
+# Results in 4 symbolic groups:
+#   - group 1 = class_name
+#   - group 2 = property_name
+#   - group 3 = colon
+#   - group 4 = annotations
+PROPERTY_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<class_name>[\w]+)                    # class name
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    :{1}                                     # required colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<property_name>[\w-]*\w)              # property name
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<colon>:?)                            # colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    $                                        # end
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+# Program matching signal identifiers.
+#
+# Results in 4 symbolic groups:
+#   - group 1 = class_name
+#   - group 2 = signal_name
+#   - group 3 = colon
+#   - group 4 = annotations
+SIGNAL_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<class_name>[\w]+)                    # class name
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    :{2}                                     # 2 required colons
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<signal_name>[\w-]*\w)                # signal name
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<colon>:?)                            # colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    $                                        # end
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+# Program matching parameters.
+#
+# Results in 4 symbolic groups:
+#   - group 1 = parameter_name
+#   - group 2 = annotations
+#   - group 3 = colon
+#   - group 4 = description
+PARAMETER_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    @                                        # @ character
+    (?P<parameter_name>[\w-]*\w|\.\.\.)      # parameter name
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    :{1}                                     # required colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
+    (?P<colon>:?)                            # colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<description>.*?)                     # description
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    $                                        # end
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+# Program matching old style "Description:" tag.
+#
+# Results in 0 symbolic groups.
+DESCRIPTION_TAG_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    Description:                             # 'Description:' literal
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+# Program matching tags.
+#
+# Results in 4 symbolic groups:
+#   - group 1 = tag_name
+#   - group 2 = annotations
+#   - group 3 = colon
+#   - group 4 = description
+TAG_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<tag_name>virtual|since|stability|
+                 deprecated|returns|
+                 return\ value|attributes|
+                 rename\ to|type|
+                 unref\ func|ref\ func|
+                 set\ value\ func|
+                 get\ value\ func|
+                 transfer|value)             # tag name
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    :{1}                                     # required colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
+    (?P<colon>:?)                            # colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<description>.*?)                     # description
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    $                                        # end
+    ''',
+    re.VERBOSE | re.MULTILINE | re.IGNORECASE)
+
+# Program matching multiline annotation continuations.
+# This is used on multiline parameters and tags (but not on the first line) to
+# generate warnings about invalid annotations spanning multiple lines.
+#
+# Results in 3 symbolic groups:
+#   - group 2 = annotations
+#   - group 3 = colon
+#   - group 4 = description
+MULTILINE_ANNOTATION_CONTINUATION_RE = re.compile(r'''
+    ^                                        # start
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<annotations>(?:\(.*?\)[^\S\n\r]*)*)  # annotations
+    (?P<colon>:)                             # colon
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    (?P<description>.*?)                     # description
+    [^\S\n\r]*                               # 0 or more whitespace characters
+    $                                        # end
+    ''',
+    re.VERBOSE | re.MULTILINE)
+
+
+if __name__ == '__main__':
+    import unittest
+
+    identifier_section_tests = [
+        (SECTION_RE, 'TSIEOCN',
+             None),
+        (SECTION_RE, 'section',
+             None),
+        (SECTION_RE, 'section:',
+             None),
+        (SECTION_RE, 'section:test',
+             None),
+        (SECTION_RE, 'SECTION',
+             {'colon': '',
+              'section_name': None}),
+        (SECTION_RE, 'SECTION  \t   ',
+             {'colon': '',
+              'section_name': None}),
+        (SECTION_RE, '   \t  SECTION  \t   ',
+             {'colon': '',
+              'section_name': None}),
+        (SECTION_RE, 'SECTION:   \t ',
+             {'colon': ':',
+              'section_name': None}),
+        (SECTION_RE, 'SECTION   :   ',
+             {'colon': ':',
+              'section_name': None}),
+        (SECTION_RE, '   SECTION : ',
+             {'colon': ':',
+              'section_name': None}),
+        (SECTION_RE, 'SECTION:gtkwidget',
+             {'colon': ':',
+              'section_name': 'gtkwidget'}),
+        (SECTION_RE, 'SECTION:gtkwidget  ',
+             {'colon': ':',
+              'section_name': 'gtkwidget'}),
+        (SECTION_RE, '  SECTION:gtkwidget',
+             {'colon': ':',
+              'section_name': 'gtkwidget'}),
+        (SECTION_RE, '  SECTION:gtkwidget\t  ',
+             {'colon': ':',
+              'section_name': 'gtkwidget'}),
+        (SECTION_RE, 'SECTION:    gtkwidget   ',
+             {'colon': ':',
+              'section_name': 'gtkwidget'}),
+        (SECTION_RE, 'SECTION   :  gtkwidget',
+             {'colon': ':',
+              'section_name': 'gtkwidget'}),
+        (SECTION_RE, 'SECTION    gtkwidget \f  ',
+             {'colon': '',
+              'section_name': 'gtkwidget'})]
+
+    identifier_symbol_tests = [
+        (SYMBOL_RE, 'GBaseFinalizeFunc:',
+             {'colon': ':',
+              'symbol_name': 'GBaseFinalizeFunc',
+              'annotations': ''}),
+        (SYMBOL_RE, 'gtk_widget_show  ',
+             {'colon': '',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, '  gtk_widget_show',
+             {'colon': '',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, '  gtk_widget_show  ',
+             {'colon': '',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, 'gtk_widget_show:',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, 'gtk_widget_show :',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, 'gtk_widget_show:  ',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, 'gtk_widget_show :  ',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, '  gtk_widget_show:',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, '  gtk_widget_show :',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, '  gtk_widget_show:  ',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, '  gtk_widget_show :  ',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': ''}),
+        (SYMBOL_RE, 'gtk_widget_show (skip)',
+             {'colon': '',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)'}),
+        (SYMBOL_RE, 'gtk_widget_show: (skip)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)'}),
+        (SYMBOL_RE, 'gtk_widget_show : (skip)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)'}),
+        (SYMBOL_RE, 'gtk_widget_show:  (skip)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)'}),
+        (SYMBOL_RE, 'gtk_widget_show :  (skip)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)'}),
+        (SYMBOL_RE, '  gtk_widget_show:(skip)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)'}),
+        (SYMBOL_RE, '  gtk_widget_show :(skip)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)'}),
+        (SYMBOL_RE, '  gtk_widget_show:  (skip)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)'}),
+        (SYMBOL_RE, '  gtk_widget_show :  (skip)    \t    ',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)    \t    '}),
+        (SYMBOL_RE, '  gtk_widget_show  :  (skip)   \t    ',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)   \t    '}),
+        (SYMBOL_RE, 'gtk_widget_show:(skip)(test1)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)(test1)'}),
+        (SYMBOL_RE, 'gtk_widget_show (skip)(test1)',
+             {'colon': '',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)(test1)'}),
+        (SYMBOL_RE, 'gtk_widget_show: (skip) (test1)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1)'}),
+        (SYMBOL_RE, 'gtk_widget_show : (skip) (test1)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1)'}),
+        (SYMBOL_RE, 'gtk_widget_show:  (skip) (test1)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1)'}),
+        (SYMBOL_RE, 'gtk_widget_show :  (skip) (test1)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1)'}),
+        (SYMBOL_RE, '  gtk_widget_show:(skip) (test1)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1)'}),
+        (SYMBOL_RE, '  gtk_widget_show :(skip) (test1)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1)'}),
+        (SYMBOL_RE, '  gtk_widget_show:  (skip) (test1)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1)'}),
+        (SYMBOL_RE, '  gtk_widget_show :  (skip) (test1)  ',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1)  '}),
+        (SYMBOL_RE, 'gtk_widget_show: (skip) (test1) (test-2)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1) (test-2)'}),
+        (SYMBOL_RE, 'gtk_widget_show : (skip) (test1) (test-2)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1) (test-2)'}),
+        (SYMBOL_RE, 'gtk_widget_show:  (skip) (test1) (test-2)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1) (test-2)'}),
+        (SYMBOL_RE, 'gtk_widget_show :  (skip) (test1) (test-2)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1) (test-2)'}),
+        (SYMBOL_RE, '  gtk_widget_show:(skip) (test1) (test-2)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1) (test-2)'}),
+        (SYMBOL_RE, '  gtk_widget_show :(skip) (test1) (test-2)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1) (test-2)'}),
+        (SYMBOL_RE, '  gtk_widget_show:  (skip) (test1) (test-2)',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1) (test-2)'}),
+        (SYMBOL_RE, '  gtk_widget_show :  (skip) (test1) (test-2)  ',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip) (test1) (test-2)  '}),
+        (SYMBOL_RE, '  gtk_widget_show  :  (skip)  (test1)  (test-2)  ',
+             {'colon': ':',
+              'symbol_name': 'gtk_widget_show',
+              'annotations': '(skip)  (test1)  (test-2)  '}),
+        # constants
+        (SYMBOL_RE, 'MY_CONSTANT:',
+             {'colon': ':',
+              'symbol_name': 'MY_CONSTANT',
+              'annotations': ''}),
+        # structs
+        (SYMBOL_RE, 'FooWidget:',
+             {'colon': ':',
+              'symbol_name': 'FooWidget',
+              'annotations': ''}),
+        # enums
+        (SYMBOL_RE, 'Something:',
+             {'colon': ':',
+              'symbol_name': 'Something',
+              'annotations': ''})]
+
+    identifier_property_tests = [
+        # simple property name
+        (PROPERTY_RE, 'GtkWidget:name (skip)',
+             {'class_name': 'GtkWidget',
+              'property_name': 'name',
+              'colon': '',
+              'annotations': '(skip)'}),
+        (PROPERTY_RE, 'GtkWidget:name',
+             {'class_name': 'GtkWidget',
+              'property_name': 'name',
+              'colon': '',
+              'annotations': ''}),
+        (PROPERTY_RE, ' GtkWidget :name',
+             {'class_name': 'GtkWidget',
+              'property_name': 'name',
+              'colon': '',
+              'annotations': ''}),
+        (PROPERTY_RE, 'GtkWidget: name ',
+             {'class_name': 'GtkWidget',
+              'property_name': 'name',
+              'colon': '',
+              'annotations': ''}),
+        (PROPERTY_RE, '  GtkWidget  :  name  ',
+             {'class_name': 'GtkWidget',
+              'property_name': 'name',
+              'colon': '',
+              'annotations': ''}),
+        (PROPERTY_RE, 'GtkWidget:name:',
+             {'class_name': 'GtkWidget',
+              'property_name': 'name',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, 'GtkWidget:name:  ',
+             {'class_name': 'GtkWidget',
+              'property_name': 'name',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, '  GtkWidget:name:',
+             {'class_name': 'GtkWidget',
+              'property_name': 'name',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, 'Something:name:',
+             {'class_name': 'Something',
+              'property_name': 'name',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, 'Something:name:  ',
+             {'class_name': 'Something',
+              'property_name': 'name',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, '  Something:name:',
+             {'class_name': 'Something',
+              'property_name': 'name',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, 'Weird-thing:name:',
+             None),
+        (PROPERTY_RE, 'really-weird_thing:name:',
+             None),
+        (PROPERTY_RE, 'GWin32InputStream:handle:',
+             {'class_name': 'GWin32InputStream',
+              'property_name': 'handle',
+              'colon': ':',
+              'annotations': ''}),
+        # property name that contains a dash
+        (PROPERTY_RE, 'GtkWidget:double-buffered (skip)',
+             {'class_name': 'GtkWidget',
+              'property_name': 'double-buffered',
+              'colon': '',
+              'annotations': '(skip)'}),
+        (PROPERTY_RE, 'GtkWidget:double-buffered',
+             {'class_name': 'GtkWidget',
+              'property_name': 'double-buffered',
+              'colon': '',
+              'annotations': ''}),
+        (PROPERTY_RE, ' GtkWidget :double-buffered',
+             {'class_name': 'GtkWidget',
+              'property_name': 'double-buffered',
+              'colon': '',
+              'annotations': ''}),
+        (PROPERTY_RE, 'GtkWidget: double-buffered ',
+             {'class_name': 'GtkWidget',
+              'property_name': 'double-buffered',
+              'colon': '',
+              'annotations': ''}),
+        (PROPERTY_RE, '  GtkWidget  :  double-buffered  ',
+             {'class_name': 'GtkWidget',
+              'property_name': 'double-buffered',
+              'colon': '',
+              'annotations': ''}),
+        (PROPERTY_RE, 'GtkWidget:double-buffered:',
+             {'class_name': 'GtkWidget',
+              'property_name': 'double-buffered',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, 'GtkWidget:double-buffered:  ',
+             {'class_name': 'GtkWidget',
+              'property_name': 'double-buffered',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, '  GtkWidget:double-buffered:',
+             {'class_name': 'GtkWidget',
+              'property_name': 'double-buffered',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, 'Something:double-buffered:',
+             {'class_name': 'Something',
+              'property_name': 'double-buffered',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, 'Something:double-buffered:  ',
+             {'class_name': 'Something',
+              'property_name': 'double-buffered',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, '  Something:double-buffered:',
+             {'class_name': 'Something',
+              'property_name': 'double-buffered',
+              'colon': ':',
+              'annotations': ''}),
+        (PROPERTY_RE, 'Weird-thing:double-buffered:',
+             None),
+        (PROPERTY_RE, 'really-weird_thing:double-buffered:',
+             None),
+        (PROPERTY_RE, ' GMemoryOutputStream:realloc-function: (skip)',
+             {'class_name': 'GMemoryOutputStream',
+              'property_name': 'realloc-function',
+              'colon': ':',
+              'annotations': '(skip)'})]
+
+    identifier_signal_tests = [
+        # simple property name
+        (SIGNAL_RE, 'GtkWidget::changed: (skip)',
+             {'class_name': 'GtkWidget',
+              'signal_name': 'changed',
+              'colon': ':',
+              'annotations': '(skip)'}),
+        (SIGNAL_RE, 'GtkWidget::changed:',
+             {'class_name': 'GtkWidget',
+              'signal_name': 'changed',
+              'colon': ':',
+              'annotations': ''}),
+        (SIGNAL_RE, 'Something::changed:',
+             {'class_name': 'Something',
+              'signal_name': 'changed',
+              'colon': ':',
+              'annotations': ''}),
+        (SIGNAL_RE, 'Weird-thing::changed:',
+             None),
+        (SIGNAL_RE, 'really-weird_thing::changed:',
+             None),
+        # property name that contains a dash
+        (SIGNAL_RE, 'GtkWidget::hierarchy-changed: (skip)',
+             {'class_name': 'GtkWidget',
+              'signal_name': 'hierarchy-changed',
+              'colon': ':',
+              'annotations': '(skip)'}),
+        (SIGNAL_RE, 'GtkWidget::hierarchy-changed:',
+             {'class_name': 'GtkWidget',
+              'signal_name': 'hierarchy-changed',
+              'colon': ':',
+              'annotations': ''}),
+        (SIGNAL_RE, 'Something::hierarchy-changed:',
+             {'class_name': 'Something',
+              'signal_name': 'hierarchy-changed',
+              'colon': ':',
+              'annotations': ''}),
+        (SIGNAL_RE, 'Weird-thing::hierarchy-changed:',
+             None),
+        (SIGNAL_RE, 'really-weird_thing::hierarchy-changed:',
+             None)]
+
+    parameter_tests = [
+        (PARAMETER_RE, '@Short_description: Base class for all widgets  ',
+             {'parameter_name': 'Short_description',
+              'annotations': '',
+              'colon': '',
+              'description': 'Base class for all widgets'}),
+        (PARAMETER_RE, '@...: the value of the first property, followed optionally by more',
+             {'parameter_name': '...',
+              'annotations': '',
+              'colon': '',
+              'description': 'the value of the first property, followed optionally by more'}),
+        (PARAMETER_RE, '@widget: a #GtkWidget',
+             {'parameter_name': 'widget',
+              'annotations': '',
+              'colon': '',
+              'description': 'a #GtkWidget'}),
+        (PARAMETER_RE, '@widget_pointer: (inout) (transfer none): '
+                       'address of a variable that contains @widget',
+             {'parameter_name': 'widget_pointer',
+              'annotations': '(inout) (transfer none)',
+              'colon': ':',
+              'description': 'address of a variable that contains @widget'}),
+        (PARAMETER_RE, '@weird_thing: (inout) (transfer none) (allow-none) (attribute) (destroy) '
+                       '(foreign) (inout) (out) (transfer) (skip) (method): some weird @thing',
+             {'parameter_name': 'weird_thing',
+              'annotations': '(inout) (transfer none) (allow-none) (attribute) (destroy) '
+                                '(foreign) (inout) (out) (transfer) (skip) (method)',
+              'colon': ':',
+              'description': 'some weird @thing'}),
+        (PARAMETER_RE, '@data: a pointer to the element data. The data may be moved as elements '
+                       'are added to the #GByteArray.',
+             {'parameter_name': 'data',
+              'annotations': '',
+              'colon': '',
+              'description': 'a pointer to the element data. The data may be moved as elements '
+                                'are added to the #GByteArray.'}),
+        (PARAMETER_RE, '@a: a #GSequenceIter',
+             {'parameter_name': 'a',
+              'annotations': '',
+              'colon': '',
+              'description': 'a #GSequenceIter'}),
+        (PARAMETER_RE, '@keys: (array length=n_keys) (element-type GQuark) (allow-none):',
+             {'parameter_name': 'keys',
+              'annotations': '(array length=n_keys) (element-type GQuark) (allow-none)',
+              'colon': ':',
+              'description': ''})]
+
+    tag_tests = [
+        (TAG_RE, 'Since 3.0',
+             None),
+        (TAG_RE, 'Since: 3.0',
+             {'tag_name': 'Since',
+              'annotations': '',
+              'colon': '',
+              'description': '3.0'}),
+        (TAG_RE, 'Attributes: (inout) (transfer none): some note about attributes',
+             {'tag_name': 'Attributes',
+              'annotations': '(inout) (transfer none)',
+              'colon': ':',
+              'description': 'some note about attributes'}),
+        (TAG_RE, 'Rename to: something_else',
+             {'tag_name': 'Rename to',
+              'annotations': '',
+              'colon': '',
+              'description': 'something_else'}),
+        (TAG_RE, '@Deprecated: Since 2.8, reference counting is done atomically',
+             None),
+        (TAG_RE, 'Returns %TRUE and does weird things',
+             None),
+        (TAG_RE, 'Returns: a #GtkWidget',
+             {'tag_name': 'Returns',
+              'annotations': '',
+              'colon': '',
+              'description': 'a #GtkWidget'}),
+        (TAG_RE, 'Return value: (transfer none): The binary data that @text responds. '
+                 'This pointer',
+             {'tag_name': 'Return value',
+              'annotations': '(transfer none)',
+              'colon': ':',
+              'description': 'The binary data that @text responds. This pointer'}),
+        (TAG_RE, 'Return value: (transfer full) (array length=out_len) (element-type guint8):',
+             {'tag_name': 'Return value',
+              'annotations': '(transfer full) (array length=out_len) (element-type guint8)',
+              'colon': ':',
+              'description': ''}),
+        (TAG_RE, 'Returns: A boolean value, but let me tell you a bit about this boolean.  It',
+             {'tag_name': 'Returns',
+              'annotations': '',
+              'colon': '',
+              'description': 'A boolean value, but let me tell you a bit about this boolean.  '
+                             'It'}),
+        (TAG_RE, 'Returns: (transfer container) (element-type GObject.ParamSpec): a',
+             {'tag_name': 'Returns',
+              'annotations': '(transfer container) (element-type GObject.ParamSpec)',
+              'colon': ':',
+              'description': 'a'}),
+        (TAG_RE, 'Return value: (type GLib.HashTable<utf8,GLib.HashTable<utf8,utf8>>) '
+                 '(transfer full):',
+             {'tag_name': 'Return value',
+              'annotations': '(type GLib.HashTable<utf8,GLib.HashTable<utf8,utf8>>) '
+                             '(transfer full)',
+              'colon': ':',
+              'description': ''})]
+
+
+    def create_tests(cls, test_name, testcases):
+        for (index, testcase) in enumerate(testcases):
+            real_test_name = '%s_%03d' % (test_name, index)
+
+            test_method = cls.__create_test__(testcase)
+            test_method.__name__ = real_test_name
+            setattr(cls, real_test_name, test_method)
+
+
+    class TestProgram(unittest.TestCase):
+        @classmethod
+        def __create_test__(cls, testcase):
+            def do_test(self):
+                (program, text, expected) = testcase
+
+                match = program.search(text)
+
+                if expected is None:
+                    msg = 'Program matched text but shouldn\'t:\n"%s"'
+                    self.assertTrue(match is None, msg % (text))
+                else:
+                    msg = 'Program should match text but didn\'t:\n"%s"'
+                    self.assertTrue(match is not None, msg % (text))
+
+                    for key, value in expected.items():
+                        msg = 'expected "%s" for "%s" but match returned "%s"'
+                        msg = msg % (value, key, match.group(key))
+                        self.assertEqual(match.group(key), value, msg)
+
+            return do_test
+
+
+    # Create tests from data
+    create_tests(TestProgram, 'test_identifier_section', identifier_section_tests)
+    create_tests(TestProgram, 'test_identifier_symbol', identifier_symbol_tests)
+    create_tests(TestProgram, 'test_identifier_property', identifier_property_tests)
+    create_tests(TestProgram, 'test_identifier_signal', identifier_signal_tests)
+    create_tests(TestProgram, 'test_parameter', parameter_tests)
+    create_tests(TestProgram, 'test_tag', tag_tests)
+
+    # Run test suite
+    unittest.main()
diff --git a/giscanner/maintransformer.py b/giscanner/maintransformer.py
index 7d274b34..a71d6ef9 100644
--- a/giscanner/maintransformer.py
+++ b/giscanner/maintransformer.py
@@ -607,7 +607,9 @@ usage is void (*_gtk_reserved1)(void);"""
         if deprecated_tag is not None:
             value = deprecated_tag.value
             if ': ' in value:
-                version, desc = value.split(': ')
+                colon = value.find(': ')
+                version = value[:colon]
+                desc = value[colon+2:]
             else:
                 desc = value
                 version = None
@@ -617,8 +619,7 @@ usage is void (*_gtk_reserved1)(void);"""
 
         annos_tag = block.get(TAG_ATTRIBUTES)
         if annos_tag is not None:
-            options = AnnotationParser.parse_options(annos_tag, annos_tag.value)
-            for key, value in options.iteritems():
+            for key, value in annos_tag.options.iteritems():
                 if value:
                     node.attributes.append((key, value.one()))
 
diff --git a/giscanner/message.py b/giscanner/message.py
index 1f7195de..8a948cd3 100644
--- a/giscanner/message.py
+++ b/giscanner/message.py
@@ -90,7 +90,7 @@ class MessageLogger(object):
 
     def log(self, log_type, text, positions=None, prefix=None):
         """Log a warning, using optional file positioning information.
-If the warning is related to a ast.Node type, see log_node_warning()."""
+If the warning is related to a ast.Node type, see log_node()."""
         utils.break_on_debug_flag('warning')
 
         self._warning_count += 1
@@ -123,8 +123,12 @@ If the warning is related to a ast.Node type, see log_node_warning()."""
 '''%s: %s: %s: %s: %s\n''' % (last_position, error_type, self._namespace.name,
                             prefix, text))
         else:
-            text = (
+            if self._namespace:
+                text = (
 '''%s: %s: %s: %s\n''' % (last_position, error_type, self._namespace.name, text))
+            else:
+                text = (
+'''%s: %s: %s\n''' % (last_position, error_type, text))
 
         self._output.write(text)
         if log_type == FATAL: