summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBuck Golemon <buck@yelp.com>2012-07-06 08:33:23 -0700
committerBuck Golemon <workitharder@gmail.com>2014-04-17 08:33:46 -0700
commit810bae60461fd7c00c853b91c8e03dce3103b020 (patch)
treef2c838936ce985fec4b8e308f376a2eb535eb93f
parent3257d6c7e6ae26098ed5e1ada041235a3a18a957 (diff)
downloadmarkupsafe-810bae60461fd7c00c853b91c8e03dce3103b020.tar.gz
idempotent unescape
If we examine the XML spec for entities, we find that ampersand and space are not allowed characters in an entity name. I've also modified the unescape function to not modify unexpected inputs (such as &foo;). This is a common best practice when dealing with layered systems. http://www.w3.org/TR/REC-xml/#sec-references EntityRef ::= '&' Name ';' Name ::= NameStartChar (NameChar)* NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
-rw-r--r--markupsafe/__init__.py5
-rw-r--r--markupsafe/tests.py14
2 files changed, 16 insertions, 3 deletions
diff --git a/markupsafe/__init__.py b/markupsafe/__init__.py
index d6c2ef4..d3d9ac9 100644
--- a/markupsafe/__init__.py
+++ b/markupsafe/__init__.py
@@ -18,7 +18,7 @@ __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
-_entity_re = re.compile(r'&([^;]+);')
+_entity_re = re.compile(r'&([^& ;]+);')
class Markup(text_type):
@@ -140,7 +140,8 @@ class Markup(text_type):
return unichr(int(name[1:]))
except ValueError:
pass
- return u''
+ # Don't modify unexpected input.
+ return m.group()
return _entity_re.sub(handle_match, text_type(self))
def striptags(self):
diff --git a/markupsafe/tests.py b/markupsafe/tests.py
index 13e8b8c..9431767 100644
--- a/markupsafe/tests.py
+++ b/markupsafe/tests.py
@@ -60,10 +60,22 @@ class MarkupTestCase(unittest.TestCase):
}, Markup(u'<em>&lt;foo&gt;:&lt;bar&gt;</em>'))
def test_escaping(self):
- # escaping and unescaping
+ # escaping
assert escape('"<>&\'') == '&#34;&lt;&gt;&amp;&#39;'
assert Markup("<em>Foo &amp; Bar</em>").striptags() == "Foo & Bar"
+
+ def test_unescape(self):
assert Markup("&lt;test&gt;").unescape() == "<test>"
+ assert "jack & tavi are cooler than mike & russ" == \
+ Markup("jack & tavi are cooler than mike &amp; russ").unescape(), \
+ Markup("jack & tavi are cooler than mike &amp; russ").unescape()
+
+ # Test that unescape is idempotent
+ original = '&foo&#x3b;'
+ once = Markup(original).unescape()
+ twice = Markup(once).unescape()
+ expected = "&foo;"
+ assert expected == once == twice, (once, twice)
def test_formatting(self):
for actual, expected in (