summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlambda-karlculus <80796236+lambda-karlculus@users.noreply.github.com>2022-05-31 03:18:11 +0930
committerGitHub <noreply@github.com>2022-05-30 19:48:11 +0200
commita9641d731c2f657f377fce7be98f935a33a163df (patch)
tree6395dd1c9ae2d6af332d40e09c61488f93c8ab33
parentcd5cf1aba811f55295827858f5af9823f348c16d (diff)
downloadpygments-git-a9641d731c2f657f377fce7be98f935a33a163df.tar.gz
Fix comments within function declarations in C (#1891) (#2140)
Co-authored-by: Jean Abou Samra <jean@abou-samra.fr>
-rw-r--r--pygments/lexers/c_cpp.py41
-rw-r--r--pygments/lexers/c_like.py8
-rw-r--r--pygments/lexers/freefem.py4
-rw-r--r--pygments/lexers/objective.py4
-rw-r--r--tests/snippets/c/test_function_comments.txt409
5 files changed, 446 insertions, 20 deletions
diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py
index 1f27b89c..5d3b9c7d 100644
--- a/pygments/lexers/c_cpp.py
+++ b/pygments/lexers/c_cpp.py
@@ -42,6 +42,17 @@ class CFamilyLexer(RegexLexer):
_ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})+'
_namespaced_ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|::)+'
+ # Single and multiline comment regexes
+ # Beware not to use *? for the inner content! When these regexes
+ # are embedded in larger regexes, that can cause the stuff*? to
+ # match more than it would have if the regex had been used in
+ # a standalone way ...
+ _comment_single = r'//(?:.|(?<=\\)\n)*\n'
+ _comment_multiline = r'/(?:\\\n)?[*](?:[^*]|[*](?!(?:\\\n)?/))*[*](?:\\\n)?/'
+
+ # Regex to match optional comments
+ _possible_comments = rf'\s*(?:(?:(?:{_comment_single})|(?:{_comment_multiline}))\s*)*'
+
tokens = {
'whitespace': [
# preprocessor directives: without whitespace
@@ -60,8 +71,8 @@ class CFamilyLexer(RegexLexer):
(r'\n', Whitespace),
(r'[^\S\n]+', Whitespace),
(r'\\\n', Text), # line continuation
- (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
- (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
+ (_comment_single, Comment.Single),
+ (_comment_multiline, Comment.Multiline),
# Open until EOF, so no ending delimiter
(r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
],
@@ -118,19 +129,25 @@ class CFamilyLexer(RegexLexer):
include('keywords'),
# functions
(r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments
+ r'(' + _possible_comments + r')' # possible comments
r'(' + _namespaced_ident + r')' # method name
- r'(\s*\([^;]*?\))' # signature
- r'([^;{]*)(\{)',
- bygroups(using(this), Name.Function, using(this), using(this),
- Punctuation),
+ r'(' + _possible_comments + r')' # possible comments
+ r'(\([^;]*?\))' # signature
+ r'(' + _possible_comments + r')' # possible comments
+ r'([^;{/]*)(\{)',
+ bygroups(using(this), using(this, state='whitespace'), Name.Function, using(this, state='whitespace'),
+ using(this), using(this, state='whitespace'), using(this), Punctuation),
'function'),
# function declarations
(r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments
+ r'(' + _possible_comments + r')' # possible comments
r'(' + _namespaced_ident + r')' # method name
- r'(\s*\([^;]*?\))' # signature
- r'([^;]*)(;)',
- bygroups(using(this), Name.Function, using(this), using(this),
- Punctuation)),
+ r'(' + _possible_comments + r')' # possible comments
+ r'(\([^;]*?\))' # signature
+ r'(' + _possible_comments + r')' # possible comments
+ r'([^;/]*)(;)',
+ bygroups(using(this), using(this, state='whitespace'), Name.Function, using(this, state='whitespace'),
+ using(this), using(this, state='whitespace'), using(this), Punctuation)),
include('types'),
default('statement'),
],
@@ -222,9 +239,9 @@ class CFamilyLexer(RegexLexer):
self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True)
RegexLexer.__init__(self, **options)
- def get_tokens_unprocessed(self, text):
+ def get_tokens_unprocessed(self, text, stack=('root',)):
for index, token, value in \
- RegexLexer.get_tokens_unprocessed(self, text):
+ RegexLexer.get_tokens_unprocessed(self, text, stack):
if token is Name:
if self.stdlibhighlighting and value in self.stdlib_types:
token = Keyword.Type
diff --git a/pygments/lexers/c_like.py b/pygments/lexers/c_like.py
index 6b511ded..fb09ca8d 100644
--- a/pygments/lexers/c_like.py
+++ b/pygments/lexers/c_like.py
@@ -304,8 +304,8 @@ class CudaLexer(CLexer):
'__syncthreads_or'}
execution_confs = {'<<<', '>>>'}
- def get_tokens_unprocessed(self, text):
- for index, token, value in CLexer.get_tokens_unprocessed(self, text):
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ for index, token, value in CLexer.get_tokens_unprocessed(self, text, stack):
if token is Name:
if value in self.variable_qualifiers:
token = Keyword.Type
@@ -525,8 +525,8 @@ class ArduinoLexer(CppLexer):
'typename', 'this', 'alignof', 'constexpr', 'decltype', 'noexcept',
'static_assert', 'thread_local', 'restrict'}
- def get_tokens_unprocessed(self, text):
- for index, token, value in CppLexer.get_tokens_unprocessed(self, text):
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ for index, token, value in CppLexer.get_tokens_unprocessed(self, text, stack):
if value in self.structure:
yield index, Name.Builtin, value
elif value in self.operators:
diff --git a/pygments/lexers/freefem.py b/pygments/lexers/freefem.py
index 7947b107..a7b15a7e 100644
--- a/pygments/lexers/freefem.py
+++ b/pygments/lexers/freefem.py
@@ -876,8 +876,8 @@ class FreeFemLexer(CppLexer):
'using'
}
- def get_tokens_unprocessed(self, text):
- for index, token, value in CppLexer.get_tokens_unprocessed(self, text):
+ def get_tokens_unprocessed(self, text, stack=('root',)):
+ for index, token, value in CppLexer.get_tokens_unprocessed(self, text, stack):
if value in self.operators:
yield index, Operator, value
elif value in self.types:
diff --git a/pygments/lexers/objective.py b/pygments/lexers/objective.py
index bbe94962..c9c4278f 100644
--- a/pygments/lexers/objective.py
+++ b/pygments/lexers/objective.py
@@ -175,12 +175,12 @@ def objective(baselexer):
return 0.8
return 0
- def get_tokens_unprocessed(self, text):
+ def get_tokens_unprocessed(self, text, stack=('root',)):
from pygments.lexers._cocoa_builtins import COCOA_INTERFACES, \
COCOA_PROTOCOLS, COCOA_PRIMITIVES
for index, token, value in \
- baselexer.get_tokens_unprocessed(self, text):
+ baselexer.get_tokens_unprocessed(self, text, stack):
if token is Name or token is Name.Class:
if value in COCOA_INTERFACES or value in COCOA_PROTOCOLS \
or value in COCOA_PRIMITIVES:
diff --git a/tests/snippets/c/test_function_comments.txt b/tests/snippets/c/test_function_comments.txt
new file mode 100644
index 00000000..e8d8a69d
--- /dev/null
+++ b/tests/snippets/c/test_function_comments.txt
@@ -0,0 +1,409 @@
+---input---
+int func1(int x, int y)
+ /*@requires y >= 0*/
+{
+ return x / y;
+}
+
+
+int func2(int x, int y) //@requires y >= 0;
+{
+ return x / y;
+}
+
+
+void func3()
+//#test{};
+{
+ fun(2,3)//test1;
+ ;
+}
+
+
+int func4(int x, int y)
+ /*@requires y >= 0;*/
+{
+ return x / y;
+}
+
+
+int func5(int x, int y)
+ /*@requires y >= 0
+ {
+ return x / y;
+ }
+ */
+ {
+ return 2;
+ }
+
+
+//@requires y >= 0;
+//@requires y >= 0
+/*
+calling(2,5)
+*/
+/*
+calling(2,5);
+*/
+int func6(int x, int y)
+ //@requires y >= 0
+ //@requires y >= 0;
+ /*
+ hello(2,3);
+ */
+ /*
+ hello(2,3)
+ */
+ {
+ // haha(2,3);
+ return x / y;
+ /*
+ callblabla(x, y);
+ */
+ }
+//@requires y >= 0;
+//@requires y >= 0
+/*
+calling(2,5)
+*/
+/*
+calling(2,5);
+*/
+
+
+int * //@# a pointer to int
+func7 /* @# why a comment here? */ (
+ int /* the index has to be an int */ a, // index into the array
+ int *b //the array @!
+)
+/*
+ The end of the func params @ (@ will result error if parsed incorrectly)
+*/
+{
+ // yet another comment
+ return b[a];
+}
+
+---tokens---
+'int' Keyword.Type
+' ' Text.Whitespace
+'func1' Name.Function
+'(' Punctuation
+'int' Keyword.Type
+' ' Text.Whitespace
+'x' Name
+',' Punctuation
+' ' Text.Whitespace
+'int' Keyword.Type
+' ' Text.Whitespace
+'y' Name
+')' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'/*@requires y >= 0*/' Comment.Multiline
+'\n' Text.Whitespace
+
+'{' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'return' Keyword
+' ' Text.Whitespace
+'x' Name
+' ' Text.Whitespace
+'/' Operator
+' ' Text.Whitespace
+'y' Name
+';' Punctuation
+'\n' Text.Whitespace
+
+'}' Punctuation
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'int' Keyword.Type
+' ' Text.Whitespace
+'func2' Name.Function
+'(' Punctuation
+'int' Keyword.Type
+' ' Text.Whitespace
+'x' Name
+',' Punctuation
+' ' Text.Whitespace
+'int' Keyword.Type
+' ' Text.Whitespace
+'y' Name
+')' Punctuation
+' ' Text.Whitespace
+'//@requires y >= 0;\n' Comment.Single
+
+'{' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'return' Keyword
+' ' Text.Whitespace
+'x' Name
+' ' Text.Whitespace
+'/' Operator
+' ' Text.Whitespace
+'y' Name
+';' Punctuation
+'\n' Text.Whitespace
+
+'}' Punctuation
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'void' Keyword.Type
+' ' Text.Whitespace
+'func3' Name.Function
+'(' Punctuation
+')' Punctuation
+'\n' Text.Whitespace
+
+'//#test{};\n' Comment.Single
+
+'{' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'fun' Name
+'(' Punctuation
+'2' Literal.Number.Integer
+',' Punctuation
+'3' Literal.Number.Integer
+')' Punctuation
+'//test1;\n' Comment.Single
+
+' ' Text.Whitespace
+';' Punctuation
+'\n' Text.Whitespace
+
+'}' Punctuation
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'int' Keyword.Type
+' ' Text.Whitespace
+'func4' Name.Function
+'(' Punctuation
+'int' Keyword.Type
+' ' Text.Whitespace
+'x' Name
+',' Punctuation
+' ' Text.Whitespace
+'int' Keyword.Type
+' ' Text.Whitespace
+'y' Name
+')' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'/*@requires y >= 0;*/' Comment.Multiline
+'\n' Text.Whitespace
+
+'{' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'return' Keyword
+' ' Text.Whitespace
+'x' Name
+' ' Text.Whitespace
+'/' Operator
+' ' Text.Whitespace
+'y' Name
+';' Punctuation
+'\n' Text.Whitespace
+
+'}' Punctuation
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'int' Keyword.Type
+' ' Text.Whitespace
+'func5' Name.Function
+'(' Punctuation
+'int' Keyword.Type
+' ' Text.Whitespace
+'x' Name
+',' Punctuation
+' ' Text.Whitespace
+'int' Keyword.Type
+' ' Text.Whitespace
+'y' Name
+')' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'/*@requires y >= 0\n {\n return x / y;\n }\n */' Comment.Multiline
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'{' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'return' Keyword
+' ' Text.Whitespace
+'2' Literal.Number.Integer
+';' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'}' Punctuation
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'//@requires y >= 0;\n' Comment.Single
+
+'//@requires y >= 0\n' Comment.Single
+
+'/*\ncalling(2,5)\n*/' Comment.Multiline
+'\n' Text.Whitespace
+
+'/*\ncalling(2,5);\n*/' Comment.Multiline
+'\n' Text.Whitespace
+
+'int' Keyword.Type
+' ' Text.Whitespace
+'func6' Name.Function
+'(' Punctuation
+'int' Keyword.Type
+' ' Text.Whitespace
+'x' Name
+',' Punctuation
+' ' Text.Whitespace
+'int' Keyword.Type
+' ' Text.Whitespace
+'y' Name
+')' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'//@requires y >= 0\n' Comment.Single
+
+' ' Text.Whitespace
+'//@requires y >= 0;\n' Comment.Single
+
+' ' Text.Whitespace
+'/*\n hello(2,3);\n */' Comment.Multiline
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'/*\n hello(2,3)\n */' Comment.Multiline
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'{' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'// haha(2,3);\n' Comment.Single
+
+' ' Text.Whitespace
+'return' Keyword
+' ' Text.Whitespace
+'x' Name
+' ' Text.Whitespace
+'/' Operator
+' ' Text.Whitespace
+'y' Name
+';' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'/*\n callblabla(x, y);\n */' Comment.Multiline
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'}' Punctuation
+'\n' Text.Whitespace
+
+'//@requires y >= 0;\n' Comment.Single
+
+'//@requires y >= 0\n' Comment.Single
+
+'/*\ncalling(2,5)\n*/' Comment.Multiline
+'\n' Text.Whitespace
+
+'/*\ncalling(2,5);\n*/' Comment.Multiline
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'\n' Text.Whitespace
+
+'int' Keyword.Type
+' ' Text.Whitespace
+'*' Operator
+' ' Text.Whitespace
+'//@# a pointer to int\n' Comment.Single
+
+'func7' Name.Function
+' ' Text.Whitespace
+'/* @# why a comment here? */' Comment.Multiline
+' ' Text.Whitespace
+'(' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'int' Keyword.Type
+' ' Text.Whitespace
+'/* the index has to be an int */' Comment.Multiline
+' ' Text.Whitespace
+'a' Name
+',' Punctuation
+' ' Text.Whitespace
+'// index into the array\n' Comment.Single
+
+' ' Text.Whitespace
+'int' Keyword.Type
+' ' Text.Whitespace
+'*' Operator
+'b' Name
+' ' Text.Whitespace
+'//the array @!\n' Comment.Single
+
+')' Punctuation
+'\n' Text.Whitespace
+
+'/*\n The end of the func params @ (@ will result error if parsed incorrectly)\n*/' Comment.Multiline
+'\n' Text.Whitespace
+
+'{' Punctuation
+'\n' Text.Whitespace
+
+' ' Text.Whitespace
+'// yet another comment\n' Comment.Single
+
+' ' Text.Whitespace
+'return' Keyword
+' ' Text.Whitespace
+'b' Name
+'[' Punctuation
+'a' Name
+']' Punctuation
+';' Punctuation
+'\n' Text.Whitespace
+
+'}' Punctuation
+'\n' Text.Whitespace