diff options
Diffstat (limited to 'pygments/lexers/c_cpp.py')
-rw-r--r-- | pygments/lexers/c_cpp.py | 41 |
1 files changed, 29 insertions, 12 deletions
diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py index 1f27b89c..5d3b9c7d 100644 --- a/pygments/lexers/c_cpp.py +++ b/pygments/lexers/c_cpp.py @@ -42,6 +42,17 @@ class CFamilyLexer(RegexLexer): _ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})+' _namespaced_ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|::)+' + # Single and multiline comment regexes + # Beware not to use *? for the inner content! When these regexes + # are embedded in larger regexes, that can cause the stuff*? to + # match more than it would have if the regex had been used in + # a standalone way ... + _comment_single = r'//(?:.|(?<=\\)\n)*\n' + _comment_multiline = r'/(?:\\\n)?[*](?:[^*]|[*](?!(?:\\\n)?/))*[*](?:\\\n)?/' + + # Regex to match optional comments + _possible_comments = rf'\s*(?:(?:(?:{_comment_single})|(?:{_comment_multiline}))\s*)*' + tokens = { 'whitespace': [ # preprocessor directives: without whitespace @@ -60,8 +71,8 @@ class CFamilyLexer(RegexLexer): (r'\n', Whitespace), (r'[^\S\n]+', Whitespace), (r'\\\n', Text), # line continuation - (r'//(\n|[\w\W]*?[^\\]\n)', Comment.Single), - (r'/(\\\n)?[*][\w\W]*?[*](\\\n)?/', Comment.Multiline), + (_comment_single, Comment.Single), + (_comment_multiline, Comment.Multiline), # Open until EOF, so no ending delimiter (r'/(\\\n)?[*][\w\W]*', Comment.Multiline), ], @@ -118,19 +129,25 @@ class CFamilyLexer(RegexLexer): include('keywords'), # functions (r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments + r'(' + _possible_comments + r')' # possible comments r'(' + _namespaced_ident + r')' # method name - r'(\s*\([^;]*?\))' # signature - r'([^;{]*)(\{)', - bygroups(using(this), Name.Function, using(this), using(this), - Punctuation), + r'(' + _possible_comments + r')' # possible comments + r'(\([^;]*?\))' # signature + r'(' + _possible_comments + r')' # possible comments + r'([^;{/]*)(\{)', + bygroups(using(this), using(this, state='whitespace'), Name.Function, using(this, state='whitespace'), + using(this), using(this, state='whitespace'), using(this), Punctuation), 'function'), # function declarations (r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments + r'(' + _possible_comments + r')' # possible comments r'(' + _namespaced_ident + r')' # method name - r'(\s*\([^;]*?\))' # signature - r'([^;]*)(;)', - bygroups(using(this), Name.Function, using(this), using(this), - Punctuation)), + r'(' + _possible_comments + r')' # possible comments + r'(\([^;]*?\))' # signature + r'(' + _possible_comments + r')' # possible comments + r'([^;/]*)(;)', + bygroups(using(this), using(this, state='whitespace'), Name.Function, using(this, state='whitespace'), + using(this), using(this, state='whitespace'), using(this), Punctuation)), include('types'), default('statement'), ], @@ -222,9 +239,9 @@ class CFamilyLexer(RegexLexer): self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True) RegexLexer.__init__(self, **options) - def get_tokens_unprocessed(self, text): + def get_tokens_unprocessed(self, text, stack=('root',)): for index, token, value in \ - RegexLexer.get_tokens_unprocessed(self, text): + RegexLexer.get_tokens_unprocessed(self, text, stack): if token is Name: if self.stdlibhighlighting and value in self.stdlib_types: token = Keyword.Type |