giscanner: store code before and after comment block

so we can later use them to re-write source files containing broken GTK-Doc comment blocks where /** is preceded by and/or */ is followed by code...
author: Dieter Verfaillie <dieterv@optionexplicit.be> 2013-08-21 12:19:40 +0200
committer: Dieter Verfaillie <dieterv@optionexplicit.be> 2013-10-08 20:56:07 +0200
commit: 07ad34a094cb00aef16250dfbf05d6eef05c3aff (patch)
tree: 547d8776e7994d106e1fbb5bf6eaeb2b713630fa
parent: 3177c9868cc5daab1c7915c27e19f7236bb338df (diff)
download: gobject-introspection-07ad34a094cb00aef16250dfbf05d6eef05c3aff.tar.gz
5 files changed, 298 insertions, 93 deletions
diff --git a/girepository/gitypes.h b/girepository/gitypes.h
index a90ed4c8..660a98f9 100644
--- a/girepository/gitypes.h
+++ b/girepository/gitypes.h
@@ -225,7 +225,7 @@ typedef enum
   GI_INFO_TYPE_OBJECT,
   GI_INFO_TYPE_INTERFACE,
   GI_INFO_TYPE_CONSTANT,
-  GI_INFO_TYPE_INVALID_0,    /* 10 */  /** DELETED - used to be ERROR_DOMAIN **/
+  GI_INFO_TYPE_INVALID_0,    /* 10 */  /* DELETED - used to be ERROR_DOMAIN */
   GI_INFO_TYPE_UNION,
   GI_INFO_TYPE_VALUE,
   GI_INFO_TYPE_SIGNAL,
diff --git a/giscanner/annotationparser.py b/giscanner/annotationparser.py
index 88a8c73a..3dd54ef8 100644
--- a/giscanner/annotationparser.py
+++ b/giscanner/annotationparser.py
@@ -285,34 +285,31 @@ TRANSFER_OPTIONS = [OPT_TRANSFER_CONTAINER,
 # Pattern used to normalize different types of line endings
 LINE_BREAK_RE = re.compile(r'\r\n|\r|\n', re.UNICODE)
 
-# Program matching the start of a comment block.
-#
-# Results in 0 symbolic groups.
-COMMENT_START_RE = re.compile(
+# Pattern matching the start token of a comment block.
+COMMENT_BLOCK_START_RE = re.compile(
     r'''
     ^                                                    # start
+    (?P<code>.*?)                                        # whitespace, code, ...
+    \s*                                                  # 0 or more whitespace characters
+    (?P<token>/\*{2}(?!\*))                              # 1 forward slash character followed
+                                                         #   by exactly 2 asterisk characters
     \s*                                                  # 0 or more whitespace characters
-    /                                                    # 1 forward slash character
-    \*{2}                                                # exactly 2 asterisk characters
+    (?P<comment>.*?)                                     # GTK-Doc comment text
     \s*                                                  # 0 or more whitespace characters
     $                                                    # end
     ''',
     re.UNICODE | re.VERBOSE)
 
-# Program matching the end of a comment block. We need to take care
-# of comment ends that aren't on their own line for legacy support
-# reasons. See https://bugzilla.gnome.org/show_bug.cgi?id=689354
-#
-# Results in 1 symbolic group:
-#    - group 1 = description
-COMMENT_END_RE = re.compile(
+# Pattern matching the end token of a comment block.
+COMMENT_BLOCK_END_RE = re.compile(
     r'''
     ^                                                    # start
     \s*                                                  # 0 or more whitespace characters
-    (?P<description>.*?)                                 # description text
+    (?P<comment>.*?)                                     # GTK-Doc comment text
     \s*                                                  # 0 or more whitespace characters
-    \*+                                                  # 1 or more asterisk characters
-    /                                                    # 1 forward slash character
+    (?P<token>\*+/)                                      # 1 or more asterisk characters followed
+                                                         #   by exactly 1 forward slash character
+    (?P<code>.*?)                                        # whitespace, code, ...
     \s*                                                  # 0 or more whitespace characters
     $                                                    # end
     ''',
@@ -976,7 +973,7 @@ class GtkDocCommentBlock(GtkDocAnnotatable):
     Represents a GTK-Doc comment block.
     '''
 
-    __slots__ = ('name', 'params', 'description', 'tags')
+    __slots__ = ('code_before', 'code_after', 'name', 'params', 'description', 'tags')
 
     #: Valid annotation names for the GTK-Doc comment block identifier part.
     valid_annotations = (ANN_ATTRIBUTES, ANN_CONSTRUCTOR, ANN_FOREIGN, ANN_GET_VALUE_FUNC,
@@ -986,6 +983,12 @@ class GtkDocCommentBlock(GtkDocAnnotatable):
     def __init__(self, name, position=None):
         GtkDocAnnotatable.__init__(self, position)
 
+        #: Code preceding the GTK-Doc comment block start token ("``/**``"), if any.
+        self.code_before = None
+
+        #: Code following the GTK-Doc comment block end token ("``*/``"), if any.
+        self.code_after = None
+
         #: Identifier name.
         self.name = name
 
@@ -1146,27 +1149,67 @@ class GtkDocCommentBlockParser(object):
         :returns: a :class:`GtkDocCommentBlock` object or ``None``
         '''
 
+        code_before = ''
+        code_after = ''
         comment_lines = list(enumerate(re.sub(LINE_BREAK_RE, '\n', comment).split('\n')))
+        comment_lines_len = len(comment_lines)
+
+        # Check for the start of the comment block.
+        result = COMMENT_BLOCK_START_RE.match(comment_lines[0][1])
+        if result:
+            # Skip single line comment blocks
+            if comment_lines_len == 1:
+                position = Position(filename, lineno)
+                marker = ' ' * result.end('code') + '^'
+                error('Skipping invalid GTK-Doc comment block:'
+                      '\n%s\n%s' % (comment_lines[0][1], marker),
+                     position)
+                return None
 
-        # Check for the start the comment block.
-        if COMMENT_START_RE.match(comment_lines[0][1]):
-            del comment_lines[0]
+            code_before = result.group('code')
+            comment = result.group('comment')
+
+            if code_before:
+                position = Position(filename, lineno)
+                marker = ' ' * result.end('code') + '^'
+                warn('GTK-Doc comment block start token "/**" should '
+                     'not be preceded by code:\n%s\n%s' % (comment_lines[0][1], marker),
+                     position)
+
+            if comment:
+                position = Position(filename, lineno)
+                marker = ' ' * result.start('comment') + '^'
+                warn('GTK-Doc comment block start token "/**" should '
+                     'not be followed by comment text:\n%s\n%s' % (comment_lines[0][1], marker),
+                     position)
+
+                comment_lines[0] = (comment_lines[0][0], comment)
+            else:
+                del comment_lines[0]
         else:
             # Not a GTK-Doc comment block.
             return None
 
-        # Check for the end the comment block.
-        line_offset, line = comment_lines[-1]
-        result = COMMENT_END_RE.match(line)
+        # Check for the end of the comment block.
+        result = COMMENT_BLOCK_END_RE.match(comment_lines[-1][1])
         if result:
-            description = result.group('description')
-            if description:
-                comment_lines[-1] = (line_offset, description)
-                position = Position(filename, lineno + line_offset)
-                marker = ' ' * result.end('description') + '^'
-                warn("Comments should end with */ on a new line:\n%s\n%s" %
-                     (line, marker),
+            code_after = result.group('code')
+            comment = result.group('comment')
+            if code_after:
+                position = Position(filename, lineno + comment_lines_len - 1)
+                marker = ' ' * result.end('code') + '^'
+                warn('GTK-Doc comment block end token "*/" should '
+                     'not be followed by code:\n%s\n%s' % (comment_lines[-1][1], marker),
+                     position)
+
+            if comment:
+                position = Position(filename, lineno + comment_lines_len - 1)
+                marker = ' ' * result.end('comment') + '^'
+                warn('GTK-Doc comment block end token "*/" should '
+                     'not be preceded by comment text:\n%s\n%s' % (comment_lines[-1][1], marker),
                      position)
+
+                comment_lines[-1] = (comment_lines[-1][0], comment)
             else:
                 del comment_lines[-1]
         else:
@@ -1243,8 +1286,9 @@ class GtkDocCommentBlockParser(object):
                     in_part = PART_IDENTIFIER
                     part_indent = line_indent
 
-                    comment_block = GtkDocCommentBlock(identifier_name)
-                    comment_block.position = position
+                    comment_block = GtkDocCommentBlock(identifier_name, position)
+                    comment_block.code_before = code_before
+                    comment_block.code_after = code_after
 
                     if identifier_fields:
                         (a, d) = self._parse_fields(position,
diff --git a/tests/offsets/offsets.h b/tests/offsets/offsets.h
index 1985f39a..7bdc1ad7 100644
--- a/tests/offsets/offsets.h
+++ b/tests/offsets/offsets.h
@@ -119,8 +119,7 @@ struct _OffsetsArray
   gpointer some_ptrs[5];
 };
 
-/** Test object offsets
- */
+/* Test object offsets */
 
 typedef struct _OffsetsObj OffsetsObj;
 typedef struct _OffsetsObjClass OffsetsObjClass;
diff --git a/tests/scanner/annotationparser/gi/syntax.xml b/tests/scanner/annotationparser/gi/syntax.xml
index 32528fb0..b8c85b50 100644
--- a/tests/scanner/annotationparser/gi/syntax.xml
+++ b/tests/scanner/annotationparser/gi/syntax.xml
@@ -20,6 +20,14 @@
   <!--
   Not GTK-Doc
   -->
+  <input>/**
+//</input>
+</test>
+
+<test>
+  <!--
+  Not GTK-Doc
+  -->
   <input>/*Test*/</input>
 </test>
 
@@ -51,37 +59,77 @@ something */</input>
   <!--
   Not GTK-Doc
   -->
-  <input>/**Test*/</input>
+  <input>/**
+*/</input>
 </test>
 
 <test>
-  <!--
-  Not GTK-Doc
-  -->
-  <input>/** Test */</input>
+  <input>/** Test **/</input>
+  <parser>
+    <messages>
+      <message>1: Error: Test: Skipping invalid GTK-Doc comment block:
+/** Test **/
+^</message>
+    </messages>
+  </parser>
 </test>
 
 <test>
-  <!--
-  Not GTK-Doc
-  -->
-  <input>/** Test
-something */</input>
+  <input>/**Test
+*/</input>
+  <parser>
+    <docblock>
+      <identifier>
+        <name>Test</name>
+      </identifier>
+    </docblock>
+    <messages>
+      <message>1: Warning: Test: GTK-Doc comment block start token "/**" should not be followed by comment text:
+/**Test
+   ^</message>
+    </messages>
+  </parser>
 </test>
 
 <test>
-  <!--
-  Not GTK-Doc
-  -->
   <input>/**
-*/</input>
+Test */</input>
+  <parser>
+    <docblock>
+      <identifier>
+        <name>Test</name>
+      </identifier>
+    </docblock>
+    <messages>
+      <message>2: Warning: Test: GTK-Doc comment block end token "*/" should not be preceded by comment text:
+Test */
+    ^</message>
+    </messages>
+  </parser>
+</test>
+
+<test>
+  <input>/** Test
+something */</input>
+  <parser>
+    <docblock>
+      <identifier>
+        <name>Test</name>
+      </identifier>
+      <description>something</description>
+    </docblock>
+    <messages>
+      <message>1: Warning: Test: GTK-Doc comment block start token "/**" should not be followed by comment text:
+/** Test
+    ^</message>
+      <message>2: Warning: Test: GTK-Doc comment block end token "*/" should not be preceded by comment text:
+something */
+         ^</message>
+    </messages>
+  </parser>
 </test>
 
 <test>
-  <!--
-  Technically not GTK-Doc, but we need to support this for backwards compatibility
-  with the old annotationparser.
-  -->
   <input>/**
 Test
 something */</input>
@@ -93,7 +141,7 @@ something */</input>
       <description>something</description>
     </docblock>
     <messages>
-      <message>3: Warning: Test: Comments should end with */ on a new line:
+      <message>3: Warning: Test: GTK-Doc comment block end token "*/" should not be preceded by comment text:
 something */
          ^</message>
     </messages>
@@ -101,10 +149,6 @@ something */
 </test>
 
 <test>
-  <!--
-  Technically not GTK-Doc, but we need to support this for backwards compatibility
-  with the old annotationparser.
-  -->
   <input>/**
 Test
 something **/</input>
@@ -116,7 +160,7 @@ something **/</input>
       <description>something</description>
     </docblock>
     <messages>
-      <message>3: Warning: Test: Comments should end with */ on a new line:
+      <message>3: Warning: Test: GTK-Doc comment block end token "*/" should not be preceded by comment text:
 something **/
          ^</message>
     </messages>
@@ -124,8 +168,35 @@ something **/
 </test>
 
 <test>
+  <input>code goes here /** Test
+something */ code goes here</input>
+  <parser>
+    <docblock>
+      <identifier>
+        <name>Test</name>
+      </identifier>
+      <description>something</description>
+    </docblock>
+    <messages>
+      <message>1: Warning: Test: GTK-Doc comment block start token "/**" should not be preceded by code:
+code goes here /** Test
+              ^</message>
+      <message>1: Warning: Test: GTK-Doc comment block start token "/**" should not be followed by comment text:
+code goes here /** Test
+                   ^</message>
+      <message>2: Warning: Test: GTK-Doc comment block end token "*/" should not be followed by code:
+something */ code goes here
+                           ^</message>
+      <message>2: Warning: Test: GTK-Doc comment block end token "*/" should not be preceded by comment text:
+something */ code goes here
+         ^</message>
+    </messages>
+  </parser>
+</test>
+
+<test>
   <!--
-  Broken comment block, signal the start of the comment block description followed
+  Malformed comment block, signal the start of the comment block description followed
   by a parameter instead.
   -->
   <input>/**
diff --git a/tests/scanner/annotationparser/test_patterns.py b/tests/scanner/annotationparser/test_patterns.py
index 023f61f6..ce82cf90 100644
--- a/tests/scanner/annotationparser/test_patterns.py
+++ b/tests/scanner/annotationparser/test_patterns.py
@@ -31,13 +31,132 @@ against the expected output.
 '''
 
 
-from giscanner.annotationparser import (SECTION_RE, SYMBOL_RE, PROPERTY_RE,
+from giscanner.annotationparser import (COMMENT_BLOCK_START_RE, COMMENT_BLOCK_END_RE,
+                                        SECTION_RE, SYMBOL_RE, PROPERTY_RE,
                                         SIGNAL_RE, PARAMETER_RE, TAG_RE,
-                                        TAG_VALUE_VERSION_RE, TAG_VALUE_STABILITY_RE,
-                                        COMMENT_END_RE)
+                                        TAG_VALUE_VERSION_RE, TAG_VALUE_STABILITY_RE)
 from unittest import (TestCase, main)
 
 
+comment_start_tests = [
+    (COMMENT_BLOCK_START_RE, '/**',
+         {'code': '',
+          'token': '/**',
+          'comment': ''}),
+    (COMMENT_BLOCK_START_RE, '   /**',
+         {'code': '',
+          'token': '/**',
+          'comment': ''}),
+    (COMMENT_BLOCK_START_RE, ' /** ',
+         {'code': '',
+          'token': '/**',
+          'comment': ''}),
+    (COMMENT_BLOCK_START_RE, 'xyz /** ',
+         {'code': 'xyz',
+          'token': '/**',
+          'comment': ''}),
+    (COMMENT_BLOCK_START_RE, '    xyz    /** ',
+         {'code': '    xyz',
+          'token': '/**',
+          'comment': ''}),
+    (COMMENT_BLOCK_START_RE, '/** xyz',
+         {'code': '',
+          'token': '/**',
+          'comment': 'xyz'}),
+    (COMMENT_BLOCK_START_RE, ' /**xyz',
+         {'code': '',
+          'token': '/**',
+          'comment': 'xyz'}),
+    (COMMENT_BLOCK_START_RE, ' /** xyz',
+         {'code': '',
+          'token': '/**',
+          'comment': 'xyz'}),
+    (COMMENT_BLOCK_START_RE, '/***',
+         None),
+    (COMMENT_BLOCK_START_RE, ' /***',
+         None),
+    (COMMENT_BLOCK_START_RE, ' /*** ',
+         None),
+    (COMMENT_BLOCK_START_RE, '/*** xyz',
+         None),
+    (COMMENT_BLOCK_START_RE, '/***** xyz',
+         None),
+    (COMMENT_BLOCK_START_RE, ' /*****xyz',
+         None),
+]
+
+
+comment_end_tests = [
+    (COMMENT_BLOCK_END_RE, '*/',
+         {'comment': '',
+          'token': '*/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, '   */',
+         {'comment': '',
+          'token': '*/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, ' */ ',
+         {'comment': '',
+          'token': '*/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, '*/xyz',
+         {'comment': '',
+          'token': '*/',
+          'code': 'xyz'}),
+    (COMMENT_BLOCK_END_RE, '   */xyz',
+         {'comment': '',
+          'token': '*/',
+          'code': 'xyz'}),
+    (COMMENT_BLOCK_END_RE, ' */ xyz',
+         {'comment': '',
+          'token': '*/',
+          'code': ' xyz'}),
+    (COMMENT_BLOCK_END_RE, '**/',
+         {'comment': '',
+          'token': '**/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, ' **/',
+         {'comment': '',
+          'token': '**/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, ' **/ ',
+         {'comment': '',
+          'token': '**/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, 'test */',
+         {'comment': 'test',
+          'token': '*/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, ' test*/',
+         {'comment': 'test',
+          'token': '*/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, 'test */ xyz',
+         {'comment': 'test',
+          'token': '*/',
+          'code': ' xyz'}),
+    (COMMENT_BLOCK_END_RE, ' test*/  xyz  ',
+         {'comment': 'test',
+          'token': '*/',
+          'code': '  xyz'}),
+    (COMMENT_BLOCK_END_RE, 'test **/',
+         {'comment': 'test',
+          'token': '**/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, ' test**/',
+         {'comment': 'test',
+          'token': '**/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, 'test *****/',
+         {'comment': 'test',
+          'token': '*****/',
+          'code': ''}),
+    (COMMENT_BLOCK_END_RE, ' test*****/',
+         {'comment': 'test',
+          'token': '*****/',
+          'code': ''})]
+
+
 identifier_section_tests = [
     (SECTION_RE, 'TSIEOCN',
          None),
@@ -571,33 +690,6 @@ tag_value_stability_tests = [
           'description': 'xyz: abc'})]
 
 
-comment_end_tests = [
-    (COMMENT_END_RE, '*/',
-         {'description': ''}),
-    (COMMENT_END_RE, '   */',
-         {'description': ''}),
-    (COMMENT_END_RE, ' */ ',
-         {'description': ''}),
-    (COMMENT_END_RE, '**/',
-         {'description': ''}),
-    (COMMENT_END_RE, ' **/',
-         {'description': ''}),
-    (COMMENT_END_RE, ' **/ ',
-         {'description': ''}),
-    (COMMENT_END_RE, 'test */',
-         {'description': 'test'}),
-    (COMMENT_END_RE, ' test*/',
-         {'description': 'test'}),
-    (COMMENT_END_RE, 'test **/',
-         {'description': 'test'}),
-    (COMMENT_END_RE, ' test**/',
-         {'description': 'test'}),
-    (COMMENT_END_RE, 'test *****/',
-         {'description': 'test'}),
-    (COMMENT_END_RE, ' test*****/',
-         {'description': 'test'})]
-
-
 def create_tests(tests_name, testcases):
     for (index, testcase) in enumerate(testcases):
         real_test_name = '%s_%03d' % (tests_name, index)
@@ -639,16 +731,15 @@ class TestProgram(TestCase):
 
 
 if __name__ == '__main__':
-    # Create tests from data
+    create_tests('test_comment_start', comment_start_tests)
+    create_tests('test_comment_end', comment_end_tests)
     create_tests('test_identifier_section', identifier_section_tests)
     create_tests('test_identifier_symbol', identifier_symbol_tests)
     create_tests('test_identifier_property', identifier_property_tests)
     create_tests('test_identifier_signal', identifier_signal_tests)
     create_tests('test_parameter', parameter_tests)
     create_tests('test_tag', tag_tests)
-    create_tests('test_comment_end', comment_end_tests)
     create_tests('test_tag_value_version', tag_value_version_tests)
     create_tests('test_tag_value_stability', tag_value_stability_tests)
 
-    # Run test suite
     main()
author	Dieter Verfaillie <dieterv@optionexplicit.be>	2013-08-21 12:19:40 +0200
committer	Dieter Verfaillie <dieterv@optionexplicit.be>	2013-10-08 20:56:07 +0200
commit	07ad34a094cb00aef16250dfbf05d6eef05c3aff (patch)
tree	547d8776e7994d106e1fbb5bf6eaeb2b713630fa
parent	3177c9868cc5daab1c7915c27e19f7236bb338df (diff)
download	gobject-introspection-07ad34a094cb00aef16250dfbf05d6eef05c3aff.tar.gz