diff options
author | David Lord <davidism@gmail.com> | 2022-03-14 08:49:47 -0700 |
---|---|---|
committer | David Lord <davidism@gmail.com> | 2022-03-14 08:49:47 -0700 |
commit | b15d9d6c848bfef272d6091ce02e5c528ec381a0 (patch) | |
tree | d7044965d2712ad392b46fb4c03e0f5bc9b5d6bb | |
parent | 9ddec7a83a304a3e7ac112c9372a0ed4dea17830 (diff) | |
download | markupsafe-b15d9d6c848bfef272d6091ce02e5c528ec381a0.tar.gz |
avoid ambiguous regex in striptags
-rw-r--r-- | CHANGES.rst | 8 | ||||
-rw-r--r-- | src/markupsafe/__init__.py | 12 | ||||
-rw-r--r-- | tests/test_markupsafe.py | 10 |
3 files changed, 25 insertions, 5 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index 25f7737..649aebe 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,11 @@ +Version 2.1.1 +------------- + +Unreleased + +- Avoid ambiguous regex matches in ``striptags``. :pr:`293` + + Version 2.1.0 ------------- diff --git a/src/markupsafe/__init__.py b/src/markupsafe/__init__.py index 2acb04e..e066ebd 100644 --- a/src/markupsafe/__init__.py +++ b/src/markupsafe/__init__.py @@ -11,9 +11,10 @@ if t.TYPE_CHECKING: pass -__version__ = "2.1.0" +__version__ = "2.1.1.dev0" -_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)") +_strip_comments_re = re.compile(r"<!--.*?-->") +_strip_tags_re = re.compile(r"<.*?>") def _simple_escaping_wrapper(name: str) -> t.Callable[..., "Markup"]: @@ -158,8 +159,11 @@ class Markup(str): >>> Markup("Main »\t<em>About</em>").striptags() 'Main ยป About' """ - stripped = " ".join(_striptags_re.sub("", self).split()) - return Markup(stripped).unescape() + # Use two regexes to avoid ambiguous matches. + value = _strip_comments_re.sub("", self) + value = _strip_tags_re.sub("", value) + value = " ".join(value.split()) + return Markup(value).unescape() @classmethod def escape(cls, s: t.Any) -> "Markup": diff --git a/tests/test_markupsafe.py b/tests/test_markupsafe.py index 2f13885..236f35e 100644 --- a/tests/test_markupsafe.py +++ b/tests/test_markupsafe.py @@ -69,7 +69,15 @@ def test_dict_interpol(): def test_escaping(escape): assert escape("\"<>&'") == ""<>&'" - assert Markup("<em>Foo & Bar</em>").striptags() == "Foo & Bar" + assert ( + Markup( + "<!-- outer comment -->" + "<em>Foo & Bar" + "<!-- inner comment about <em> -->" + "</em>" + ).striptags() + == "Foo & Bar" + ) def test_unescape(): |