diff options
author | Buck Golemon <buck@yelp.com> | 2012-07-06 08:33:23 -0700 |
---|---|---|
committer | Buck Golemon <workitharder@gmail.com> | 2014-04-17 08:33:46 -0700 |
commit | 810bae60461fd7c00c853b91c8e03dce3103b020 (patch) | |
tree | f2c838936ce985fec4b8e308f376a2eb535eb93f | |
parent | 3257d6c7e6ae26098ed5e1ada041235a3a18a957 (diff) | |
download | markupsafe-810bae60461fd7c00c853b91c8e03dce3103b020.tar.gz |
idempotent unescape
If we examine the XML spec for entities, we find that ampersand and
space are not allowed characters in an entity name. I've also modified
the unescape function to not modify unexpected inputs (such as &foo;).
This is a common best practice when dealing with layered systems.
http://www.w3.org/TR/REC-xml/#sec-references
EntityRef ::= '&' Name ';'
Name ::= NameStartChar (NameChar)*
NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
-rw-r--r-- | markupsafe/__init__.py | 5 | ||||
-rw-r--r-- | markupsafe/tests.py | 14 |
2 files changed, 16 insertions, 3 deletions
diff --git a/markupsafe/__init__.py b/markupsafe/__init__.py index d6c2ef4..d3d9ac9 100644 --- a/markupsafe/__init__.py +++ b/markupsafe/__init__.py @@ -18,7 +18,7 @@ __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent'] _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)') -_entity_re = re.compile(r'&([^;]+);') +_entity_re = re.compile(r'&([^& ;]+);') class Markup(text_type): @@ -140,7 +140,8 @@ class Markup(text_type): return unichr(int(name[1:])) except ValueError: pass - return u'' + # Don't modify unexpected input. + return m.group() return _entity_re.sub(handle_match, text_type(self)) def striptags(self): diff --git a/markupsafe/tests.py b/markupsafe/tests.py index 13e8b8c..9431767 100644 --- a/markupsafe/tests.py +++ b/markupsafe/tests.py @@ -60,10 +60,22 @@ class MarkupTestCase(unittest.TestCase): }, Markup(u'<em><foo>:<bar></em>')) def test_escaping(self): - # escaping and unescaping + # escaping assert escape('"<>&\'') == '"<>&'' assert Markup("<em>Foo & Bar</em>").striptags() == "Foo & Bar" + + def test_unescape(self): assert Markup("<test>").unescape() == "<test>" + assert "jack & tavi are cooler than mike & russ" == \ + Markup("jack & tavi are cooler than mike & russ").unescape(), \ + Markup("jack & tavi are cooler than mike & russ").unescape() + + # Test that unescape is idempotent + original = '&foo;' + once = Markup(original).unescape() + twice = Markup(once).unescape() + expected = "&foo;" + assert expected == once == twice, (once, twice) def test_formatting(self): for actual, expected in ( |