summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Lord <davidism@gmail.com>2022-03-14 08:49:47 -0700
committerDavid Lord <davidism@gmail.com>2022-03-14 08:49:47 -0700
commitb15d9d6c848bfef272d6091ce02e5c528ec381a0 (patch)
treed7044965d2712ad392b46fb4c03e0f5bc9b5d6bb
parent9ddec7a83a304a3e7ac112c9372a0ed4dea17830 (diff)
downloadmarkupsafe-b15d9d6c848bfef272d6091ce02e5c528ec381a0.tar.gz
avoid ambiguous regex in striptags
-rw-r--r--CHANGES.rst8
-rw-r--r--src/markupsafe/__init__.py12
-rw-r--r--tests/test_markupsafe.py10
3 files changed, 25 insertions, 5 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 25f7737..649aebe 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,3 +1,11 @@
+Version 2.1.1
+-------------
+
+Unreleased
+
+- Avoid ambiguous regex matches in ``striptags``. :pr:`293`
+
+
Version 2.1.0
-------------
diff --git a/src/markupsafe/__init__.py b/src/markupsafe/__init__.py
index 2acb04e..e066ebd 100644
--- a/src/markupsafe/__init__.py
+++ b/src/markupsafe/__init__.py
@@ -11,9 +11,10 @@ if t.TYPE_CHECKING:
pass
-__version__ = "2.1.0"
+__version__ = "2.1.1.dev0"
-_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
+_strip_comments_re = re.compile(r"<!--.*?-->")
+_strip_tags_re = re.compile(r"<.*?>")
def _simple_escaping_wrapper(name: str) -> t.Callable[..., "Markup"]:
@@ -158,8 +159,11 @@ class Markup(str):
>>> Markup("Main &raquo;\t<em>About</em>").striptags()
'Main ยป About'
"""
- stripped = " ".join(_striptags_re.sub("", self).split())
- return Markup(stripped).unescape()
+ # Use two regexes to avoid ambiguous matches.
+ value = _strip_comments_re.sub("", self)
+ value = _strip_tags_re.sub("", value)
+ value = " ".join(value.split())
+ return Markup(value).unescape()
@classmethod
def escape(cls, s: t.Any) -> "Markup":
diff --git a/tests/test_markupsafe.py b/tests/test_markupsafe.py
index 2f13885..236f35e 100644
--- a/tests/test_markupsafe.py
+++ b/tests/test_markupsafe.py
@@ -69,7 +69,15 @@ def test_dict_interpol():
def test_escaping(escape):
assert escape("\"<>&'") == "&#34;&lt;&gt;&amp;&#39;"
- assert Markup("<em>Foo &amp; Bar</em>").striptags() == "Foo & Bar"
+ assert (
+ Markup(
+ "<!-- outer comment -->"
+ "<em>Foo &amp; Bar"
+ "<!-- inner comment about <em> -->"
+ "</em>"
+ ).striptags()
+ == "Foo & Bar"
+ )
def test_unescape():