summaryrefslogtreecommitdiff
path: root/pygments/lexers/c_cpp.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexers/c_cpp.py')
-rw-r--r--pygments/lexers/c_cpp.py41
1 files changed, 29 insertions, 12 deletions
diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py
index 1f27b89c..5d3b9c7d 100644
--- a/pygments/lexers/c_cpp.py
+++ b/pygments/lexers/c_cpp.py
@@ -42,6 +42,17 @@ class CFamilyLexer(RegexLexer):
_ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})+'
_namespaced_ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|::)+'
+ # Single and multiline comment regexes
+ # Beware not to use *? for the inner content! When these regexes
+ # are embedded in larger regexes, that can cause the stuff*? to
+ # match more than it would have if the regex had been used in
+ # a standalone way ...
+ _comment_single = r'//(?:.|(?<=\\)\n)*\n'
+ _comment_multiline = r'/(?:\\\n)?[*](?:[^*]|[*](?!(?:\\\n)?/))*[*](?:\\\n)?/'
+
+ # Regex to match optional comments
+ _possible_comments = rf'\s*(?:(?:(?:{_comment_single})|(?:{_comment_multiline}))\s*)*'
+
tokens = {
'whitespace': [
# preprocessor directives: without whitespace
@@ -60,8 +71,8 @@ class CFamilyLexer(RegexLexer):
(r'\n', Whitespace),
(r'[^\S\n]+', Whitespace),
(r'\\\n', Text), # line continuation
- (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single),
- (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline),
+ (_comment_single, Comment.Single),
+ (_comment_multiline, Comment.Multiline),
# Open until EOF, so no ending delimiter
(r'/(\\\n)?[*][\w\W]*', Comment.Multiline),
],
@@ -118,19 +129,25 @@ class CFamilyLexer(RegexLexer):
include('keywords'),
# functions
(r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments
+ r'(' + _possible_comments + r')' # possible comments
r'(' + _namespaced_ident + r')' # method name
- r'(\s*\([^;]*?\))' # signature
- r'([^;{]*)(\{)',
- bygroups(using(this), Name.Function, using(this), using(this),
- Punctuation),
+ r'(' + _possible_comments + r')' # possible comments
+ r'(\([^;]*?\))' # signature
+ r'(' + _possible_comments + r')' # possible comments
+ r'([^;{/]*)(\{)',
+ bygroups(using(this), using(this, state='whitespace'), Name.Function, using(this, state='whitespace'),
+ using(this), using(this, state='whitespace'), using(this), Punctuation),
'function'),
# function declarations
(r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments
+ r'(' + _possible_comments + r')' # possible comments
r'(' + _namespaced_ident + r')' # method name
- r'(\s*\([^;]*?\))' # signature
- r'([^;]*)(;)',
- bygroups(using(this), Name.Function, using(this), using(this),
- Punctuation)),
+ r'(' + _possible_comments + r')' # possible comments
+ r'(\([^;]*?\))' # signature
+ r'(' + _possible_comments + r')' # possible comments
+ r'([^;/]*)(;)',
+ bygroups(using(this), using(this, state='whitespace'), Name.Function, using(this, state='whitespace'),
+ using(this), using(this, state='whitespace'), using(this), Punctuation)),
include('types'),
default('statement'),
],
@@ -222,9 +239,9 @@ class CFamilyLexer(RegexLexer):
self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True)
RegexLexer.__init__(self, **options)
- def get_tokens_unprocessed(self, text):
+ def get_tokens_unprocessed(self, text, stack=('root',)):
for index, token, value in \
- RegexLexer.get_tokens_unprocessed(self, text):
+ RegexLexer.get_tokens_unprocessed(self, text, stack):
if token is Name:
if self.stdlibhighlighting and value in self.stdlib_types:
token = Keyword.Type