diff options
author | mike bayer <mike_mp@zzzcomputing.com> | 2022-08-29 17:59:10 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@ci3.zzzcomputing.com> | 2022-08-29 17:59:10 +0000 |
commit | b5ac84bafc80c3e56a1ede317b7ae4173e762a4a (patch) | |
tree | 6ed749d86ca00b55aa8c22e49db352bbc2f1de2c | |
parent | dbbaad3918c7d19cb71ca4b0b7ebe12661fba47b (diff) | |
parent | 925760291d6efec64fda6e9dd1fd9cfbd5be068c (diff) | |
download | mako-b5ac84bafc80c3e56a1ede317b7ae4173e762a4a.tar.gz |
Merge "fix tag regexp to match quoted groups correctly" into main
-rw-r--r-- | doc/build/unreleased/366.rst | 9 | ||||
-rw-r--r-- | mako/lexer.py | 12 | ||||
-rw-r--r-- | test/test_lexer.py | 21 |
3 files changed, 34 insertions, 8 deletions
diff --git a/doc/build/unreleased/366.rst b/doc/build/unreleased/366.rst new file mode 100644 index 0000000..27b0278 --- /dev/null +++ b/doc/build/unreleased/366.rst @@ -0,0 +1,9 @@ +.. change:: + :tags: bug, lexer + :tickets: 366 + + Fixed issue in lexer where the regexp used to match tags would not + correctly interpret quoted sections individually. While this parsing issue + still produced the same expected tag structure later on, the mis-handling + of quoted sections was also subject to a regexp crash if a tag had a large + number of quotes within its quoted sections.
\ No newline at end of file diff --git a/mako/lexer.py b/mako/lexer.py index bfcf286..77a2483 100644 --- a/mako/lexer.py +++ b/mako/lexer.py @@ -272,20 +272,24 @@ class Lexer: return self.template def match_tag_start(self): - match = self.match( - r""" + reg = r""" \<% # opening tag ([\w\.\:]+) # keyword - ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \ + ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \ # sign, string expression + # comma is for backwards compat + # identified in #366 \s* # more whitespace (/)?> # closing - """, + """ + + match = self.match( + reg, re.I | re.S | re.X, ) diff --git a/test/test_lexer.py b/test/test_lexer.py index 255c128..a7b6fe3 100644 --- a/test/test_lexer.py +++ b/test/test_lexer.py @@ -1,5 +1,7 @@ import re +import pytest + from mako import compat from mako import exceptions from mako import parsetree @@ -146,6 +148,10 @@ class LexerTest(TemplateTest): """ assert_raises(exceptions.CompileException, Lexer(template).parse) + def test_tag_many_quotes(self): + template = "<%0" + '"' * 3000 + assert_raises(exceptions.SyntaxException, Lexer(template).parse) + def test_unmatched_tag(self): template = """ <%namespace name="bar"> @@ -432,9 +438,16 @@ class LexerTest(TemplateTest): ), ) - def test_pagetag(self): - template = """ - <%page cached="True", args="a, b"/> + @pytest.mark.parametrize("comma,numchars", [(",", 48), ("", 47)]) + def test_pagetag(self, comma, numchars): + # note that the comma here looks like: + # <%page cached="True", args="a, b"/> + # that's what this test has looked like for decades, however, the + # comma there is not actually the right syntax. When issue #366 + # was fixed, the reg was altered to accommodate for this comma to allow + # backwards compat + template = f""" + <%page cached="True"{comma} args="a, b"/> some template """ @@ -453,7 +466,7 @@ class LexerTest(TemplateTest): some template """, - (2, 48), + (2, numchars), ), ], ), |