all: weed out more backtracking string regexes

author: Georg Brandl <georg@python.org> 2020-12-25 13:16:56 +0100
committer: Georg Brandl <georg@python.org> 2020-12-25 13:21:48 +0100
commit: 681487f82f55fba66f01f9913e4ff103e5b2ef4c (patch)
tree: 0e3f82343796411c1e9c71f5fa7c31e392d06bf5 /pygments/lexers/parsers.py
parent: 6c820019a73a606940d7477619a5a9e6ad38761d (diff)
download: pygments-git-681487f82f55fba66f01f9913e4ff103e5b2ef4c.tar.gz
1 files changed, 24 insertions, 24 deletions
diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py
index 13a3a83c..ec2f39e0 100644
--- a/pygments/lexers/parsers.py
+++ b/pygments/lexers/parsers.py
@@ -64,10 +64,10 @@ class RagelLexer(RegexLexer):
             (r'[+-]?[0-9]+', Number.Integer),
         ],
         'literals': [
-            (r'"(\\\\|\\"|[^"])*"', String),              # double quote string
-            (r"'(\\\\|\\'|[^'])*'", String),              # single quote string
-            (r'\[(\\\\|\\\]|[^\]])*\]', String),          # square bracket literals
-            (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex),  # regular expressions
+            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
+            (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String),          # square bracket literals
+            (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex),  # regular expressions
         ],
         'identifiers': [
             (r'[a-zA-Z_]\w*', Name.Variable),
@@ -106,15 +106,15 @@ class RagelLexer(RegexLexer):
                 r'[^\\]\\[{}]',  # allow escaped { or }
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"',  # double quote string
-                r"'(\\\\|\\'|[^'])*'",  # single quote string
+                r'"(\\\\|\\[^\\]|[^"\\])*"',
+                r"'(\\\\|\\[^\\]|[^'\\])*'",
                 r'//.*$\n?',            # single line comment
                 r'/\*(.|\n)*?\*/',      # multi-line javadoc-style comment
                 r'\#.*$\n?',            # ruby comment
 
                 # regular expression: There's no reason for it to start
                 # with a * and this stops confusion with comments.
-                r'/(?!\*)(\\\\|\\/|[^/])*/',
+                r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
 
                 # / is safe now that we've handled regex and javadoc comments
                 r'/',
@@ -147,12 +147,12 @@ class RagelEmbeddedLexer(RegexLexer):
                 r'%(?=[^%]|$)',   # a single % sign is okay, just not 2 of them
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"',  # double quote string
-                r"'(\\\\|\\'|[^'])*'",  # single quote string
+                r'"(\\\\|\\[^\\]|[^"\\])*"',
+                r"'(\\\\|\\[^\\]|[^'\\])*'",
                 r'/\*(.|\n)*?\*/',      # multi-line javadoc-style comment
                 r'//.*$\n?',  # single line comment
                 r'\#.*$\n?',  # ruby/ragel comment
-                r'/(?!\*)(\\\\|\\/|[^/])*/',  # regular expression
+                r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',  # regular expression
 
                 # / is safe now that we've handled regex and javadoc comments
                 r'/',
@@ -182,7 +182,7 @@ class RagelEmbeddedLexer(RegexLexer):
 
                     # specifically allow regex followed immediately by *
                     # so it doesn't get mistaken for a comment
-                    r'/(?!\*)(\\\\|\\/|[^/])*/\*',
+                    r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*',
 
                     # allow / as long as it's not followed by another / or by a *
                     r'/(?=[^/*]|$)',
@@ -193,9 +193,9 @@ class RagelEmbeddedLexer(RegexLexer):
                 )) + r')+',
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"',      # double quote string
-                r"'(\\\\|\\'|[^'])*'",      # single quote string
-                r"\[(\\\\|\\\]|[^\]])*\]",  # square bracket literal
+                r'"(\\\\|\\[^\\]|[^"\\])*"',
+                r"'(\\\\|\\[^\\]|[^'\\])*'",
+                r"\[(\\\\|\\[^\\]|[^\]\\])*\]",  # square bracket literal
                 r'/\*(.|\n)*?\*/',          # multi-line javadoc-style comment
                 r'//.*$\n?',                # single line comment
                 r'\#.*$\n?',                # ruby/ragel comment
@@ -416,8 +416,8 @@ class AntlrLexer(RegexLexer):
             (r':', Punctuation),
 
             # literals
-            (r"'(\\\\|\\'|[^'])*'", String),
-            (r'"(\\\\|\\"|[^"])*"', String),
+            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
             (r'<<([^>]|>[^>])>>', String),
             # identifiers
             # Tokens start with capital letter.
@@ -456,14 +456,14 @@ class AntlrLexer(RegexLexer):
                 r'[^${}\'"/\\]+',  # exclude unsafe characters
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"',  # double quote string
-                r"'(\\\\|\\'|[^'])*'",  # single quote string
+                r'"(\\\\|\\[^\\]|[^"\\])*"',
+                r"'(\\\\|\\[^\\]|[^'\\])*'",
                 r'//.*$\n?',            # single line comment
                 r'/\*(.|\n)*?\*/',      # multi-line javadoc-style comment
 
                 # regular expression: There's no reason for it to start
                 # with a * and this stops confusion with comments.
-                r'/(?!\*)(\\\\|\\/|[^/])*/',
+                r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
 
                 # backslashes are okay, as long as we are not backslashing a %
                 r'\\(?!%)',
@@ -483,14 +483,14 @@ class AntlrLexer(RegexLexer):
                 r'[^$\[\]\'"/]+',  # exclude unsafe characters
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"',  # double quote string
-                r"'(\\\\|\\'|[^'])*'",  # single quote string
+                r'"(\\\\|\\[^\\]|[^"\\])*"',
+                r"'(\\\\|\\[^\\]|[^'\\])*'",
                 r'//.*$\n?',            # single line comment
                 r'/\*(.|\n)*?\*/',      # multi-line javadoc-style comment
 
                 # regular expression: There's no reason for it to start
                 # with a * and this stops confusion with comments.
-                r'/(?!\*)(\\\\|\\/|[^/])*/',
+                r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
 
                 # Now that we've handled regex and javadoc comments
                 # it's safe to let / through.
@@ -701,8 +701,8 @@ class TreetopBaseLexer(RegexLexer):
         'rule': [
             include('space'),
             include('end'),
-            (r'"(\\\\|\\"|[^"])*"', String.Double),
-            (r"'(\\\\|\\'|[^'])*'", String.Single),
+            (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
+            (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
             (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
             (r'[A-Za-z_]\w*', Name),
             (r'[()]', Punctuation),
author	Georg Brandl <georg@python.org>	2020-12-25 13:16:56 +0100
committer	Georg Brandl <georg@python.org>	2020-12-25 13:21:48 +0100
commit	681487f82f55fba66f01f9913e4ff103e5b2ef4c (patch)
tree	0e3f82343796411c1e9c71f5fa7c31e392d06bf5 /pygments/lexers/parsers.py
parent	6c820019a73a606940d7477619a5a9e6ad38761d (diff)
download	pygments-git-681487f82f55fba66f01f9913e4ff103e5b2ef4c.tar.gz