summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Lord <davidism@gmail.com>2020-04-09 14:10:37 -0700
committerGitHub <noreply@github.com>2020-04-09 14:10:37 -0700
commit0d5fa7e644a70be8447dff42c1ecc0025e3391d5 (patch)
treeb207541a5ee2d42e389c4250b8eb2256dd54e93b
parent10b931f4456d094303248c7662438dfdea909aba (diff)
parentc35603a9037d3dd679732089ecd1084abc6e8cb7 (diff)
downloadmarkupsafe-0d5fa7e644a70be8447dff42c1ecc0025e3391d5.tar.gz
Merge pull request #117 from pallets/html-unescape
use html.unescape
-rw-r--r--CHANGES.rst2
-rw-r--r--src/markupsafe/__init__.py19
-rw-r--r--src/markupsafe/_constants.py255
3 files changed, 4 insertions, 272 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 39c7227..7f729e7 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,6 +4,8 @@ Version 2.0.0
Unreleased
- Drop Python 2.7, 3.4, and 3.5 support.
+- ``Markup.unescape`` uses :func:`html.unescape` to support HTML5
+ character references. :pr:`117`
Version 1.1.1
diff --git a/src/markupsafe/__init__.py b/src/markupsafe/__init__.py
index 4da294e..54d32bf 100644
--- a/src/markupsafe/__init__.py
+++ b/src/markupsafe/__init__.py
@@ -5,7 +5,6 @@ from collections import abc
__version__ = "2.0.0a1"
_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
-_entity_re = re.compile(r"&([^& ;]+);")
class Markup(str):
@@ -110,23 +109,9 @@ class Markup(str):
>>> Markup("Main &raquo; <em>About</em>").unescape()
'Main ยป <em>About</em>'
"""
- from ._constants import HTML_ENTITIES
+ from html import unescape
- def handle_match(m):
- name = m.group(1)
- if name in HTML_ENTITIES:
- return chr(HTML_ENTITIES[name])
- try:
- if name[:2] in ("#x", "#X"):
- return chr(int(name[2:], 16))
- elif name.startswith("#"):
- return chr(int(name[1:]))
- except ValueError:
- pass
- # Don't modify unexpected input.
- return m.group()
-
- return _entity_re.sub(handle_match, str(self))
+ return unescape(str(self))
def striptags(self):
""":meth:`unescape` the markup, remove tags, and normalize
diff --git a/src/markupsafe/_constants.py b/src/markupsafe/_constants.py
deleted file mode 100644
index 7638937..0000000
--- a/src/markupsafe/_constants.py
+++ /dev/null
@@ -1,255 +0,0 @@
-HTML_ENTITIES = {
- "AElig": 198,
- "Aacute": 193,
- "Acirc": 194,
- "Agrave": 192,
- "Alpha": 913,
- "Aring": 197,
- "Atilde": 195,
- "Auml": 196,
- "Beta": 914,
- "Ccedil": 199,
- "Chi": 935,
- "Dagger": 8225,
- "Delta": 916,
- "ETH": 208,
- "Eacute": 201,
- "Ecirc": 202,
- "Egrave": 200,
- "Epsilon": 917,
- "Eta": 919,
- "Euml": 203,
- "Gamma": 915,
- "Iacute": 205,
- "Icirc": 206,
- "Igrave": 204,
- "Iota": 921,
- "Iuml": 207,
- "Kappa": 922,
- "Lambda": 923,
- "Mu": 924,
- "Ntilde": 209,
- "Nu": 925,
- "OElig": 338,
- "Oacute": 211,
- "Ocirc": 212,
- "Ograve": 210,
- "Omega": 937,
- "Omicron": 927,
- "Oslash": 216,
- "Otilde": 213,
- "Ouml": 214,
- "Phi": 934,
- "Pi": 928,
- "Prime": 8243,
- "Psi": 936,
- "Rho": 929,
- "Scaron": 352,
- "Sigma": 931,
- "THORN": 222,
- "Tau": 932,
- "Theta": 920,
- "Uacute": 218,
- "Ucirc": 219,
- "Ugrave": 217,
- "Upsilon": 933,
- "Uuml": 220,
- "Xi": 926,
- "Yacute": 221,
- "Yuml": 376,
- "Zeta": 918,
- "aacute": 225,
- "acirc": 226,
- "acute": 180,
- "aelig": 230,
- "agrave": 224,
- "alefsym": 8501,
- "alpha": 945,
- "amp": 38,
- "and": 8743,
- "ang": 8736,
- "apos": 39,
- "aring": 229,
- "asymp": 8776,
- "atilde": 227,
- "auml": 228,
- "bdquo": 8222,
- "beta": 946,
- "brvbar": 166,
- "bull": 8226,
- "cap": 8745,
- "ccedil": 231,
- "cedil": 184,
- "cent": 162,
- "chi": 967,
- "circ": 710,
- "clubs": 9827,
- "cong": 8773,
- "copy": 169,
- "crarr": 8629,
- "cup": 8746,
- "curren": 164,
- "dArr": 8659,
- "dagger": 8224,
- "darr": 8595,
- "deg": 176,
- "delta": 948,
- "diams": 9830,
- "divide": 247,
- "eacute": 233,
- "ecirc": 234,
- "egrave": 232,
- "empty": 8709,
- "emsp": 8195,
- "ensp": 8194,
- "epsilon": 949,
- "equiv": 8801,
- "eta": 951,
- "eth": 240,
- "euml": 235,
- "euro": 8364,
- "exist": 8707,
- "fnof": 402,
- "forall": 8704,
- "frac12": 189,
- "frac14": 188,
- "frac34": 190,
- "frasl": 8260,
- "gamma": 947,
- "ge": 8805,
- "gt": 62,
- "hArr": 8660,
- "harr": 8596,
- "hearts": 9829,
- "hellip": 8230,
- "iacute": 237,
- "icirc": 238,
- "iexcl": 161,
- "igrave": 236,
- "image": 8465,
- "infin": 8734,
- "int": 8747,
- "iota": 953,
- "iquest": 191,
- "isin": 8712,
- "iuml": 239,
- "kappa": 954,
- "lArr": 8656,
- "lambda": 955,
- "lang": 9001,
- "laquo": 171,
- "larr": 8592,
- "lceil": 8968,
- "ldquo": 8220,
- "le": 8804,
- "lfloor": 8970,
- "lowast": 8727,
- "loz": 9674,
- "lrm": 8206,
- "lsaquo": 8249,
- "lsquo": 8216,
- "lt": 60,
- "macr": 175,
- "mdash": 8212,
- "micro": 181,
- "middot": 183,
- "minus": 8722,
- "mu": 956,
- "nabla": 8711,
- "nbsp": 160,
- "ndash": 8211,
- "ne": 8800,
- "ni": 8715,
- "not": 172,
- "notin": 8713,
- "nsub": 8836,
- "ntilde": 241,
- "nu": 957,
- "oacute": 243,
- "ocirc": 244,
- "oelig": 339,
- "ograve": 242,
- "oline": 8254,
- "omega": 969,
- "omicron": 959,
- "oplus": 8853,
- "or": 8744,
- "ordf": 170,
- "ordm": 186,
- "oslash": 248,
- "otilde": 245,
- "otimes": 8855,
- "ouml": 246,
- "para": 182,
- "part": 8706,
- "permil": 8240,
- "perp": 8869,
- "phi": 966,
- "pi": 960,
- "piv": 982,
- "plusmn": 177,
- "pound": 163,
- "prime": 8242,
- "prod": 8719,
- "prop": 8733,
- "psi": 968,
- "quot": 34,
- "rArr": 8658,
- "radic": 8730,
- "rang": 9002,
- "raquo": 187,
- "rarr": 8594,
- "rceil": 8969,
- "rdquo": 8221,
- "real": 8476,
- "reg": 174,
- "rfloor": 8971,
- "rho": 961,
- "rlm": 8207,
- "rsaquo": 8250,
- "rsquo": 8217,
- "sbquo": 8218,
- "scaron": 353,
- "sdot": 8901,
- "sect": 167,
- "shy": 173,
- "sigma": 963,
- "sigmaf": 962,
- "sim": 8764,
- "spades": 9824,
- "sub": 8834,
- "sube": 8838,
- "sum": 8721,
- "sup": 8835,
- "sup1": 185,
- "sup2": 178,
- "sup3": 179,
- "supe": 8839,
- "szlig": 223,
- "tau": 964,
- "there4": 8756,
- "theta": 952,
- "thetasym": 977,
- "thinsp": 8201,
- "thorn": 254,
- "tilde": 732,
- "times": 215,
- "trade": 8482,
- "uArr": 8657,
- "uacute": 250,
- "uarr": 8593,
- "ucirc": 251,
- "ugrave": 249,
- "uml": 168,
- "upsih": 978,
- "upsilon": 965,
- "uuml": 252,
- "weierp": 8472,
- "xi": 958,
- "yacute": 253,
- "yen": 165,
- "yuml": 255,
- "zeta": 950,
- "zwj": 8205,
- "zwnj": 8204,
-}