summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormike bayer <mike_mp@zzzcomputing.com>2022-08-29 17:59:10 +0000
committerGerrit Code Review <gerrit@ci3.zzzcomputing.com>2022-08-29 17:59:10 +0000
commitb5ac84bafc80c3e56a1ede317b7ae4173e762a4a (patch)
tree6ed749d86ca00b55aa8c22e49db352bbc2f1de2c
parentdbbaad3918c7d19cb71ca4b0b7ebe12661fba47b (diff)
parent925760291d6efec64fda6e9dd1fd9cfbd5be068c (diff)
downloadmako-b5ac84bafc80c3e56a1ede317b7ae4173e762a4a.tar.gz
Merge "fix tag regexp to match quoted groups correctly" into main
-rw-r--r--doc/build/unreleased/366.rst9
-rw-r--r--mako/lexer.py12
-rw-r--r--test/test_lexer.py21
3 files changed, 34 insertions, 8 deletions
diff --git a/doc/build/unreleased/366.rst b/doc/build/unreleased/366.rst
new file mode 100644
index 0000000..27b0278
--- /dev/null
+++ b/doc/build/unreleased/366.rst
@@ -0,0 +1,9 @@
+.. change::
+ :tags: bug, lexer
+ :tickets: 366
+
+ Fixed issue in lexer where the regexp used to match tags would not
+ correctly interpret quoted sections individually. While this parsing issue
+ still produced the same expected tag structure later on, the mis-handling
+ of quoted sections was also subject to a regexp crash if a tag had a large
+ number of quotes within its quoted sections. \ No newline at end of file
diff --git a/mako/lexer.py b/mako/lexer.py
index bfcf286..77a2483 100644
--- a/mako/lexer.py
+++ b/mako/lexer.py
@@ -272,20 +272,24 @@ class Lexer:
return self.template
def match_tag_start(self):
- match = self.match(
- r"""
+ reg = r"""
\<% # opening tag
([\w\.\:]+) # keyword
- ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \
+ ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \
# sign, string expression
+ # comma is for backwards compat
+ # identified in #366
\s* # more whitespace
(/)?> # closing
- """,
+ """
+
+ match = self.match(
+ reg,
re.I | re.S | re.X,
)
diff --git a/test/test_lexer.py b/test/test_lexer.py
index 255c128..a7b6fe3 100644
--- a/test/test_lexer.py
+++ b/test/test_lexer.py
@@ -1,5 +1,7 @@
import re
+import pytest
+
from mako import compat
from mako import exceptions
from mako import parsetree
@@ -146,6 +148,10 @@ class LexerTest(TemplateTest):
"""
assert_raises(exceptions.CompileException, Lexer(template).parse)
+ def test_tag_many_quotes(self):
+ template = "<%0" + '"' * 3000
+ assert_raises(exceptions.SyntaxException, Lexer(template).parse)
+
def test_unmatched_tag(self):
template = """
<%namespace name="bar">
@@ -432,9 +438,16 @@ class LexerTest(TemplateTest):
),
)
- def test_pagetag(self):
- template = """
- <%page cached="True", args="a, b"/>
+ @pytest.mark.parametrize("comma,numchars", [(",", 48), ("", 47)])
+ def test_pagetag(self, comma, numchars):
+ # note that the comma here looks like:
+ # <%page cached="True", args="a, b"/>
+ # that's what this test has looked like for decades, however, the
+ # comma there is not actually the right syntax. When issue #366
+ # was fixed, the reg was altered to accommodate for this comma to allow
+ # backwards compat
+ template = f"""
+ <%page cached="True"{comma} args="a, b"/>
some template
"""
@@ -453,7 +466,7 @@ class LexerTest(TemplateTest):
some template
""",
- (2, 48),
+ (2, numchars),
),
],
),