Fix comments within function declarations in C (#1891) (#2140)

Co-authored-by: Jean Abou Samra <jean@abou-samra.fr>
author: lambda-karlculus <80796236+lambda-karlculus@users.noreply.github.com> 2022-05-31 03:18:11 +0930
committer: GitHub <noreply@github.com> 2022-05-30 19:48:11 +0200
commit: a9641d731c2f657f377fce7be98f935a33a163df (patch)
tree: 6395dd1c9ae2d6af332d40e09c61488f93c8ab33
parent: cd5cf1aba811f55295827858f5af9823f348c16d (diff)
download: pygments-git-a9641d731c2f657f377fce7be98f935a33a163df.tar.gz
5 files changed, 446 insertions, 20 deletions
diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py
index 1f27b89c..5d3b9c7d 100644
--- a/pygments/lexers/c_cpp.py
+++ b/pygments/lexers/c_cpp.py
@@ -42,6 +42,17 @@ class CFamilyLexer(RegexLexer):
     _ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})+'
     _namespaced_ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|::)+'
 
+    # Single and multiline comment regexes
+    # Beware not to use *? for the inner content! When these regexes
+    # are embedded in larger regexes, that can cause the stuff*? to
+    # match more than it would have if the regex had been used in
+    # a standalone way ...
+    _comment_single = r'//(?:.|(?<=\\)\n)*\n'
+    _comment_multiline = r'/(?:\\\n)?[*](?:[^*]|[*](?!(?:\\\n)?/))*[*](?:\\\n)?/'
+
+    # Regex to match optional comments
+    _possible_comments = rf'\s*(?:(?:(?:{_comment_single})|(?:{_comment_multiline}))\s*)*'
+
     tokens = {
         'whitespace': [
             # preprocessor directives: without whitespace
@@ -60,8 +71,8 @@ class CFamilyLexer(RegexLexer):
             (r'\n', Whitespace),
             (r'[^\S\n]+', Whitespace),
             (r'\\\n', Text),  # line continuation
-            (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
-            (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
+            (_comment_single, Comment.Single),
+            (_comment_multiline, Comment.Multiline),
             # Open until EOF, so no ending delimiter
             (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
         ],
@@ -118,19 +129,25 @@ class CFamilyLexer(RegexLexer):
             include('keywords'),
             # functions
             (r'(' + _namespaced_ident + r'(?:[&*\s])+)'  # return arguments
+             r'(' + _possible_comments + r')'    # possible comments
              r'(' + _namespaced_ident + r')'             # method name
-             r'(\s*\([^;]*?\))'                          # signature
-             r'([^;{]*)(\{)',
-             bygroups(using(this), Name.Function, using(this), using(this),
-                      Punctuation),
+             r'(' + _possible_comments + r')'    # possible comments
+             r'(\([^;]*?\))'                          # signature
+             r'(' + _possible_comments + r')'    # possible comments
+             r'([^;{/]*)(\{)',
+             bygroups(using(this), using(this, state='whitespace'), Name.Function, using(this, state='whitespace'),
+                      using(this), using(this, state='whitespace'), using(this), Punctuation),
              'function'),
             # function declarations
             (r'(' + _namespaced_ident + r'(?:[&*\s])+)'  # return arguments
+             r'(' + _possible_comments + r')'    # possible comments
              r'(' + _namespaced_ident + r')'             # method name
-             r'(\s*\([^;]*?\))'                          # signature
-             r'([^;]*)(;)',
-             bygroups(using(this), Name.Function, using(this), using(this),
-                      Punctuation)),
+             r'(' + _possible_comments + r')'    # possible comments
+             r'(\([^;]*?\))'                          # signature
+             r'(' + _possible_comments + r')'    # possible comments
+             r'([^;/]*)(;)',
+             bygroups(using(this), using(this, state='whitespace'), Name.Function, using(this, state='whitespace'),
+                      using(this), using(this, state='whitespace'), using(this), Punctuation)),
             include('types'),
             default('statement'),
         ],
@@ -222,9 +239,9 @@ class CFamilyLexer(RegexLexer):
         self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True)
         RegexLexer.__init__(self, **options)
 
-    def get_tokens_unprocessed(self, text):
+    def get_tokens_unprocessed(self, text, stack=('root',)):
         for index, token, value in \
-                RegexLexer.get_tokens_unprocessed(self, text):
+                RegexLexer.get_tokens_unprocessed(self, text, stack):
             if token is Name:
                 if self.stdlibhighlighting and value in self.stdlib_types:
                     token = Keyword.Type
diff --git a/pygments/lexers/c_like.py b/pygments/lexers/c_like.py
index 6b511ded..fb09ca8d 100644
--- a/pygments/lexers/c_like.py
+++ b/pygments/lexers/c_like.py
@@ -304,8 +304,8 @@ class CudaLexer(CLexer):
                  '__syncthreads_or'}
     execution_confs = {'<<<', '>>>'}
 
-    def get_tokens_unprocessed(self, text):
-        for index, token, value in CLexer.get_tokens_unprocessed(self, text):
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        for index, token, value in CLexer.get_tokens_unprocessed(self, text, stack):
             if token is Name:
                 if value in self.variable_qualifiers:
                     token = Keyword.Type
@@ -525,8 +525,8 @@ class ArduinoLexer(CppLexer):
         'typename', 'this', 'alignof', 'constexpr', 'decltype', 'noexcept',
         'static_assert', 'thread_local', 'restrict'}
 
-    def get_tokens_unprocessed(self, text):
-        for index, token, value in CppLexer.get_tokens_unprocessed(self, text):
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        for index, token, value in CppLexer.get_tokens_unprocessed(self, text, stack):
             if value in self.structure:
                 yield index, Name.Builtin, value
             elif value in self.operators:
diff --git a/pygments/lexers/freefem.py b/pygments/lexers/freefem.py
index 7947b107..a7b15a7e 100644
--- a/pygments/lexers/freefem.py
+++ b/pygments/lexers/freefem.py
@@ -876,8 +876,8 @@ class FreeFemLexer(CppLexer):
                 'using'
     }
 
-    def get_tokens_unprocessed(self, text):
-        for index, token, value in CppLexer.get_tokens_unprocessed(self, text):
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        for index, token, value in CppLexer.get_tokens_unprocessed(self, text, stack):
             if value in self.operators:
                 yield index, Operator, value
             elif value in self.types:
diff --git a/pygments/lexers/objective.py b/pygments/lexers/objective.py
index bbe94962..c9c4278f 100644
--- a/pygments/lexers/objective.py
+++ b/pygments/lexers/objective.py
@@ -175,12 +175,12 @@ def objective(baselexer):
                 return 0.8
             return 0
 
-        def get_tokens_unprocessed(self, text):
+        def get_tokens_unprocessed(self, text, stack=('root',)):
             from pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \
                 COCOA_PROTOCOLS, COCOA_PRIMITIVES
 
             for index, token, value in \
-                    baselexer.get_tokens_unprocessed(self, text):
+                    baselexer.get_tokens_unprocessed(self, text, stack):
                 if token is Name or token is Name.Class:
                     if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \
                        or value in COCOA_PRIMITIVES:
diff --git a/tests/snippets/c/test_function_comments.txt b/tests/snippets/c/test_function_comments.txt
new file mode 100644
index 00000000..e8d8a69d
--- /dev/null
+++ b/tests/snippets/c/test_function_comments.txt
@@ -0,0 +1,409 @@
+---input---
+int func1(int x, int y)
+    /*@requires y >= 0*/
+{
+    return x / y;
+}
+
+
+int func2(int x, int y)    //@requires y >= 0;
+{
+    return x / y;
+}
+
+
+void func3()
+//#test{};
+{
+    fun(2,3)//test1;
+    ;
+}
+
+
+int func4(int x, int y)
+    /*@requires y >= 0;*/
+{
+    return x / y;
+}
+
+
+int func5(int x, int y)
+    /*@requires y >= 0
+    {
+        return x / y;
+    }
+    */
+   {
+       return 2;
+   }
+
+
+//@requires y >= 0;
+//@requires y >= 0
+/*
+calling(2,5)
+*/
+/*
+calling(2,5);
+*/
+int func6(int x, int y)
+    //@requires y >= 0
+    //@requires y >= 0;
+    /*
+    hello(2,3);
+    */
+    /*
+    hello(2,3)
+    */
+    {
+        // haha(2,3);
+        return x / y;
+        /*
+        callblabla(x, y);
+        */
+    }
+//@requires y >= 0;
+//@requires y >= 0
+/*
+calling(2,5)
+*/
+/*
+calling(2,5);
+*/
+
+
+int * //@# a pointer to int
+func7 /* @# why a comment here?  */ (
+  int /* the index has to be an int */ a, // index into the array
+  int *b    //the array @!
+)
+/*
+    The end of the func params @ (@ will result error if parsed incorrectly)
+*/
+{
+  // yet another comment
+  return b[a];
+}
+
+---tokens---
+'int'         Keyword.Type
+' '           Text.Whitespace
+'func1'       Name.Function
+'('           Punctuation
+'int'         Keyword.Type
+' '           Text.Whitespace
+'x'           Name
+','           Punctuation
+' '           Text.Whitespace
+'int'         Keyword.Type
+' '           Text.Whitespace
+'y'           Name
+')'           Punctuation
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'/*@requires y >= 0*/' Comment.Multiline
+'\n'          Text.Whitespace
+
+'{'           Punctuation
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'return'      Keyword
+' '           Text.Whitespace
+'x'           Name
+' '           Text.Whitespace
+'/'           Operator
+' '           Text.Whitespace
+'y'           Name
+';'           Punctuation
+'\n'          Text.Whitespace
+
+'}'           Punctuation
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'int'         Keyword.Type
+' '           Text.Whitespace
+'func2'       Name.Function
+'('           Punctuation
+'int'         Keyword.Type
+' '           Text.Whitespace
+'x'           Name
+','           Punctuation
+' '           Text.Whitespace
+'int'         Keyword.Type
+' '           Text.Whitespace
+'y'           Name
+')'           Punctuation
+'    '        Text.Whitespace
+'//@requires y >= 0;\n' Comment.Single
+
+'{'           Punctuation
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'return'      Keyword
+' '           Text.Whitespace
+'x'           Name
+' '           Text.Whitespace
+'/'           Operator
+' '           Text.Whitespace
+'y'           Name
+';'           Punctuation
+'\n'          Text.Whitespace
+
+'}'           Punctuation
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'void'        Keyword.Type
+' '           Text.Whitespace
+'func3'       Name.Function
+'('           Punctuation
+')'           Punctuation
+'\n'          Text.Whitespace
+
+'//#test{};\n' Comment.Single
+
+'{'           Punctuation
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'fun'         Name
+'('           Punctuation
+'2'           Literal.Number.Integer
+','           Punctuation
+'3'           Literal.Number.Integer
+')'           Punctuation
+'//test1;\n'  Comment.Single
+
+'    '        Text.Whitespace
+';'           Punctuation
+'\n'          Text.Whitespace
+
+'}'           Punctuation
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'int'         Keyword.Type
+' '           Text.Whitespace
+'func4'       Name.Function
+'('           Punctuation
+'int'         Keyword.Type
+' '           Text.Whitespace
+'x'           Name
+','           Punctuation
+' '           Text.Whitespace
+'int'         Keyword.Type
+' '           Text.Whitespace
+'y'           Name
+')'           Punctuation
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'/*@requires y >= 0;*/' Comment.Multiline
+'\n'          Text.Whitespace
+
+'{'           Punctuation
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'return'      Keyword
+' '           Text.Whitespace
+'x'           Name
+' '           Text.Whitespace
+'/'           Operator
+' '           Text.Whitespace
+'y'           Name
+';'           Punctuation
+'\n'          Text.Whitespace
+
+'}'           Punctuation
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'int'         Keyword.Type
+' '           Text.Whitespace
+'func5'       Name.Function
+'('           Punctuation
+'int'         Keyword.Type
+' '           Text.Whitespace
+'x'           Name
+','           Punctuation
+' '           Text.Whitespace
+'int'         Keyword.Type
+' '           Text.Whitespace
+'y'           Name
+')'           Punctuation
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'/*@requires y >= 0\n    {\n        return x / y;\n    }\n    */' Comment.Multiline
+'\n'          Text.Whitespace
+
+'   '         Text.Whitespace
+'{'           Punctuation
+'\n'          Text.Whitespace
+
+'       '     Text.Whitespace
+'return'      Keyword
+' '           Text.Whitespace
+'2'           Literal.Number.Integer
+';'           Punctuation
+'\n'          Text.Whitespace
+
+'   '         Text.Whitespace
+'}'           Punctuation
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'//@requires y >= 0;\n' Comment.Single
+
+'//@requires y >= 0\n' Comment.Single
+
+'/*\ncalling(2,5)\n*/' Comment.Multiline
+'\n'          Text.Whitespace
+
+'/*\ncalling(2,5);\n*/' Comment.Multiline
+'\n'          Text.Whitespace
+
+'int'         Keyword.Type
+' '           Text.Whitespace
+'func6'       Name.Function
+'('           Punctuation
+'int'         Keyword.Type
+' '           Text.Whitespace
+'x'           Name
+','           Punctuation
+' '           Text.Whitespace
+'int'         Keyword.Type
+' '           Text.Whitespace
+'y'           Name
+')'           Punctuation
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'//@requires y >= 0\n' Comment.Single
+
+'    '        Text.Whitespace
+'//@requires y >= 0;\n' Comment.Single
+
+'    '        Text.Whitespace
+'/*\n    hello(2,3);\n    */' Comment.Multiline
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'/*\n    hello(2,3)\n    */' Comment.Multiline
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'{'           Punctuation
+'\n'          Text.Whitespace
+
+'        '    Text.Whitespace
+'// haha(2,3);\n' Comment.Single
+
+'        '    Text.Whitespace
+'return'      Keyword
+' '           Text.Whitespace
+'x'           Name
+' '           Text.Whitespace
+'/'           Operator
+' '           Text.Whitespace
+'y'           Name
+';'           Punctuation
+'\n'          Text.Whitespace
+
+'        '    Text.Whitespace
+'/*\n        callblabla(x, y);\n        */' Comment.Multiline
+'\n'          Text.Whitespace
+
+'    '        Text.Whitespace
+'}'           Punctuation
+'\n'          Text.Whitespace
+
+'//@requires y >= 0;\n' Comment.Single
+
+'//@requires y >= 0\n' Comment.Single
+
+'/*\ncalling(2,5)\n*/' Comment.Multiline
+'\n'          Text.Whitespace
+
+'/*\ncalling(2,5);\n*/' Comment.Multiline
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'\n'          Text.Whitespace
+
+'int'         Keyword.Type
+' '           Text.Whitespace
+'*'           Operator
+' '           Text.Whitespace
+'//@# a pointer to int\n' Comment.Single
+
+'func7'       Name.Function
+' '           Text.Whitespace
+'/* @# why a comment here?  */' Comment.Multiline
+' '           Text.Whitespace
+'('           Punctuation
+'\n'          Text.Whitespace
+
+'  '          Text.Whitespace
+'int'         Keyword.Type
+' '           Text.Whitespace
+'/* the index has to be an int */' Comment.Multiline
+' '           Text.Whitespace
+'a'           Name
+','           Punctuation
+' '           Text.Whitespace
+'// index into the array\n' Comment.Single
+
+'  '          Text.Whitespace
+'int'         Keyword.Type
+' '           Text.Whitespace
+'*'           Operator
+'b'           Name
+'    '        Text.Whitespace
+'//the array @!\n' Comment.Single
+
+')'           Punctuation
+'\n'          Text.Whitespace
+
+'/*\n    The end of the func params @ (@ will result error if parsed incorrectly)\n*/' Comment.Multiline
+'\n'          Text.Whitespace
+
+'{'           Punctuation
+'\n'          Text.Whitespace
+
+'  '          Text.Whitespace
+'// yet another comment\n' Comment.Single
+
+'  '          Text.Whitespace
+'return'      Keyword
+' '           Text.Whitespace
+'b'           Name
+'['           Punctuation
+'a'           Name
+']'           Punctuation
+';'           Punctuation
+'\n'          Text.Whitespace
+
+'}'           Punctuation
+'\n'          Text.Whitespace
author	lambda-karlculus <80796236+lambda-karlculus@users.noreply.github.com>	2022-05-31 03:18:11 +0930
committer	GitHub <noreply@github.com>	2022-05-30 19:48:11 +0200
commit	a9641d731c2f657f377fce7be98f935a33a163df (patch)
tree	6395dd1c9ae2d6af332d40e09c61488f93c8ab33
parent	cd5cf1aba811f55295827858f5af9823f348c16d (diff)
download	pygments-git-a9641d731c2f657f377fce7be98f935a33a163df.tar.gz