summaryrefslogtreecommitdiff
path: root/pygments/lexers/parsers.py
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2020-12-25 13:16:56 +0100
committerGeorg Brandl <georg@python.org>2020-12-25 13:21:48 +0100
commit681487f82f55fba66f01f9913e4ff103e5b2ef4c (patch)
tree0e3f82343796411c1e9c71f5fa7c31e392d06bf5 /pygments/lexers/parsers.py
parent6c820019a73a606940d7477619a5a9e6ad38761d (diff)
downloadpygments-git-681487f82f55fba66f01f9913e4ff103e5b2ef4c.tar.gz
all: weed out more backtracking string regexes
Diffstat (limited to 'pygments/lexers/parsers.py')
-rw-r--r--pygments/lexers/parsers.py48
1 files changed, 24 insertions, 24 deletions
diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py
index 13a3a83c..ec2f39e0 100644
--- a/pygments/lexers/parsers.py
+++ b/pygments/lexers/parsers.py
@@ -64,10 +64,10 @@ class RagelLexer(RegexLexer):
(r'[+-]?[0-9]+', Number.Integer),
],
'literals': [
- (r'"(\\\\|\\"|[^"])*"', String), # double quote string
- (r"'(\\\\|\\'|[^'])*'", String), # single quote string
- (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals
- (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
+ (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String), # square bracket literals
+ (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex), # regular expressions
],
'identifiers': [
(r'[a-zA-Z_]\w*', Name.Variable),
@@ -106,15 +106,15 @@ class RagelLexer(RegexLexer):
r'[^\\]\\[{}]', # allow escaped { or }
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
r'//.*$\n?', # single line comment
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
r'\#.*$\n?', # ruby comment
# regular expression: There's no reason for it to start
# with a * and this stops confusion with comments.
- r'/(?!\*)(\\\\|\\/|[^/])*/',
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
# / is safe now that we've handled regex and javadoc comments
r'/',
@@ -147,12 +147,12 @@ class RagelEmbeddedLexer(RegexLexer):
r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
r'//.*$\n?', # single line comment
r'\#.*$\n?', # ruby/ragel comment
- r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # regular expression
# / is safe now that we've handled regex and javadoc comments
r'/',
@@ -182,7 +182,7 @@ class RagelEmbeddedLexer(RegexLexer):
# specifically allow regex followed immediately by *
# so it doesn't get mistaken for a comment
- r'/(?!\*)(\\\\|\\/|[^/])*/\*',
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*',
# allow / as long as it's not followed by another / or by a *
r'/(?=[^/*]|$)',
@@ -193,9 +193,9 @@ class RagelEmbeddedLexer(RegexLexer):
)) + r')+',
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
- r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
+ r"\[(\\\\|\\[^\\]|[^\]\\])*\]", # square bracket literal
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
r'//.*$\n?', # single line comment
r'\#.*$\n?', # ruby/ragel comment
@@ -416,8 +416,8 @@ class AntlrLexer(RegexLexer):
(r':', Punctuation),
# literals
- (r"'(\\\\|\\'|[^'])*'", String),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r'<<([^>]|>[^>])>>', String),
# identifiers
# Tokens start with capital letter.
@@ -456,14 +456,14 @@ class AntlrLexer(RegexLexer):
r'[^${}\'"/\\]+', # exclude unsafe characters
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
r'//.*$\n?', # single line comment
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
# regular expression: There's no reason for it to start
# with a * and this stops confusion with comments.
- r'/(?!\*)(\\\\|\\/|[^/])*/',
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
# backslashes are okay, as long as we are not backslashing a %
r'\\(?!%)',
@@ -483,14 +483,14 @@ class AntlrLexer(RegexLexer):
r'[^$\[\]\'"/]+', # exclude unsafe characters
# strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'"(\\\\|\\[^\\]|[^"\\])*"',
+ r"'(\\\\|\\[^\\]|[^'\\])*'",
r'//.*$\n?', # single line comment
r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
# regular expression: There's no reason for it to start
# with a * and this stops confusion with comments.
- r'/(?!\*)(\\\\|\\/|[^/])*/',
+ r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
# Now that we've handled regex and javadoc comments
# it's safe to let / through.
@@ -701,8 +701,8 @@ class TreetopBaseLexer(RegexLexer):
'rule': [
include('space'),
include('end'),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+ (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
(r'[A-Za-z_]\w*', Name),
(r'[()]', Punctuation),