move to src directory

author: David Lord <davidism@gmail.com> 2018-10-20 21:35:14 -0700
committer: David Lord <davidism@gmail.com> 2018-10-20 21:35:14 -0700
commit: c640a7ef30b72268530920cb5e01978e22e6a4fc (patch)
tree: a389ab9391c09d3195f0532ac5b4968b8e43dac9 /src
parent: 7f261419f3b5a8fbbf126d73f34ca73e957f1954 (diff)
download: markupsafe-c640a7ef30b72268530920cb5e01978e22e6a4fc.tar.gz
5 files changed, 1075 insertions, 0 deletions
diff --git a/src/markupsafe/__init__.py b/src/markupsafe/__init__.py
new file mode 100644
index 0000000..0bd6fd2
--- /dev/null
+++ b/src/markupsafe/__init__.py
@@ -0,0 +1,297 @@
+# -*- coding: utf-8 -*-
+"""
+markupsafe
+~~~~~~~~~~
+
+Implements an escape function and a Markup string to replace HTML
+special characters with safe representations.
+
+:copyright: © 2010 by the Pallets team.
+:license: BSD, see LICENSE for more details.
+"""
+import re
+import string
+
+from markupsafe._compat import (
+    PY2, int_types, iteritems, string_types, text_type, unichr, Mapping
+)
+
+__version__ = '1.1'
+
+__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
+
+_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
+_entity_re = re.compile(r'&([^& ;]+);')
+
+
+class Markup(text_type):
+    """A string that is ready to be safely inserted into an HTML or XML
+    document, either because it was escaped or because it was marked
+    safe.
+
+    Passing an object to the constructor converts it to text and wraps
+    it to mark it safe without escaping. To escape the text, use the
+    :meth:`escape` class method instead.
+
+    >>> Markup('Hello, <em>World</em>!')
+    Markup('Hello, <em>World</em>!')
+    >>> Markup(42)
+    Markup('42')
+    >>> Markup.escape('Hello, <em>World</em>!')
+    Markup('Hello &lt;em&gt;World&lt;/em&gt;!')
+
+    This implements the ``__html__()`` interface that some frameworks
+    use. Passing an object that implements ``__html__()`` will wrap the
+    output of that method, marking it safe.
+
+    >>> class Foo:
+    ...     def __html__(self):
+    ...         return '<a href="/foo">foo</a>'
+    ...
+    >>> Markup(Foo())
+    Markup('<a href="/foo">foo</a>')
+
+    This is a subclass of the text type (``str`` in Python 3,
+    ``unicode`` in Python 2). It has the same methods as that type, but
+    all methods escape their arguments and return a ``Markup`` instance.
+
+    >>> Markup('<em>%s</em>') % 'foo & bar'
+    Markup('<em>foo &amp; bar</em>')
+    >>> Markup('<em>Hello</em> ') + '<foo>'
+    Markup('<em>Hello</em> &lt;foo&gt;')
+    """
+    __slots__ = ()
+
+    def __new__(cls, base=u'', encoding=None, errors='strict'):
+        if hasattr(base, '__html__'):
+            base = base.__html__()
+        if encoding is None:
+            return text_type.__new__(cls, base)
+        return text_type.__new__(cls, base, encoding, errors)
+
+    def __html__(self):
+        return self
+
+    def __add__(self, other):
+        if isinstance(other, string_types) or hasattr(other, '__html__'):
+            return self.__class__(super(Markup, self).__add__(self.escape(other)))
+        return NotImplemented
+
+    def __radd__(self, other):
+        if hasattr(other, '__html__') or isinstance(other, string_types):
+            return self.escape(other).__add__(self)
+        return NotImplemented
+
+    def __mul__(self, num):
+        if isinstance(num, int_types):
+            return self.__class__(text_type.__mul__(self, num))
+        return NotImplemented
+    __rmul__ = __mul__
+
+    def __mod__(self, arg):
+        if isinstance(arg, tuple):
+            arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
+        else:
+            arg = _MarkupEscapeHelper(arg, self.escape)
+        return self.__class__(text_type.__mod__(self, arg))
+
+    def __repr__(self):
+        return '%s(%s)' % (
+            self.__class__.__name__,
+            text_type.__repr__(self)
+        )
+
+    def join(self, seq):
+        return self.__class__(text_type.join(self, map(self.escape, seq)))
+    join.__doc__ = text_type.join.__doc__
+
+    def split(self, *args, **kwargs):
+        return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
+    split.__doc__ = text_type.split.__doc__
+
+    def rsplit(self, *args, **kwargs):
+        return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
+    rsplit.__doc__ = text_type.rsplit.__doc__
+
+    def splitlines(self, *args, **kwargs):
+        return list(map(self.__class__, text_type.splitlines(
+            self, *args, **kwargs)))
+    splitlines.__doc__ = text_type.splitlines.__doc__
+
+    def unescape(self):
+        """Convert escaped markup back into a text string. This replaces
+        HTML entities with the characters they represent.
+
+        >>> Markup('Main &raquo; <em>About</em>').unescape()
+        'Main » <em>About</em>'
+        """
+        from markupsafe._constants import HTML_ENTITIES
+        def handle_match(m):
+            name = m.group(1)
+            if name in HTML_ENTITIES:
+                return unichr(HTML_ENTITIES[name])
+            try:
+                if name[:2] in ('#x', '#X'):
+                    return unichr(int(name[2:], 16))
+                elif name.startswith('#'):
+                    return unichr(int(name[1:]))
+            except ValueError:
+                pass
+            # Don't modify unexpected input.
+            return m.group()
+        return _entity_re.sub(handle_match, text_type(self))
+
+    def striptags(self):
+        """:meth:`unescape` the markup, remove tags, and normalize
+        whitespace to single spaces.
+
+        >>> Markup('Main &raquo;\t<em>About</em>').striptags()
+        'Main » About'
+        """
+        stripped = u' '.join(_striptags_re.sub('', self).split())
+        return Markup(stripped).unescape()
+
+    @classmethod
+    def escape(cls, s):
+        """Escape a string. Calls :func:`escape` and ensures that for
+        subclasses the correct type is returned.
+        """
+        rv = escape(s)
+        if rv.__class__ is not cls:
+            return cls(rv)
+        return rv
+
+    def make_simple_escaping_wrapper(name):
+        orig = getattr(text_type, name)
+        def func(self, *args, **kwargs):
+            args = _escape_argspec(list(args), enumerate(args), self.escape)
+            _escape_argspec(kwargs, iteritems(kwargs), self.escape)
+            return self.__class__(orig(self, *args, **kwargs))
+        func.__name__ = orig.__name__
+        func.__doc__ = orig.__doc__
+        return func
+
+    for method in '__getitem__', 'capitalize', \
+                  'title', 'lower', 'upper', 'replace', 'ljust', \
+                  'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
+                  'translate', 'expandtabs', 'swapcase', 'zfill':
+        locals()[method] = make_simple_escaping_wrapper(method)
+
+    def partition(self, sep):
+        return tuple(map(self.__class__,
+                         text_type.partition(self, self.escape(sep))))
+
+    def rpartition(self, sep):
+        return tuple(map(self.__class__,
+                         text_type.rpartition(self, self.escape(sep))))
+
+    def format(*args, **kwargs):
+        self, args = args[0], args[1:]
+        formatter = EscapeFormatter(self.escape)
+        kwargs = _MagicFormatMapping(args, kwargs)
+        return self.__class__(formatter.vformat(self, args, kwargs))
+
+        def __html_format__(self, format_spec):
+            if format_spec:
+                raise ValueError('Unsupported format specification '
+                                 'for Markup.')
+            return self
+
+    # not in python 3
+    if hasattr(text_type, '__getslice__'):
+        __getslice__ = make_simple_escaping_wrapper('__getslice__')
+
+    del method, make_simple_escaping_wrapper
+
+
+class _MagicFormatMapping(Mapping):
+    """This class implements a dummy wrapper to fix a bug in the Python
+    standard library for string formatting.
+
+    See http://bugs.python.org/issue13598 for information about why
+    this is necessary.
+    """
+
+    def __init__(self, args, kwargs):
+        self._args = args
+        self._kwargs = kwargs
+        self._last_index = 0
+
+    def __getitem__(self, key):
+        if key == '':
+            idx = self._last_index
+            self._last_index += 1
+            try:
+                return self._args[idx]
+            except LookupError:
+                pass
+            key = str(idx)
+        return self._kwargs[key]
+
+    def __iter__(self):
+        return iter(self._kwargs)
+
+    def __len__(self):
+        return len(self._kwargs)
+
+
+if hasattr(text_type, 'format'):
+    class EscapeFormatter(string.Formatter):
+
+        def __init__(self, escape):
+            self.escape = escape
+
+        def format_field(self, value, format_spec):
+            if hasattr(value, '__html_format__'):
+                rv = value.__html_format__(format_spec)
+            elif hasattr(value, '__html__'):
+                if format_spec:
+                    raise ValueError(
+                        'Format specifier {0} given, but {1} does not'
+                        ' define __html_format__. A class that defines'
+                        ' __html__ must define __html_format__ to work'
+                        ' with format specifiers.'.format(
+                            format_spec, type(value)))
+                rv = value.__html__()
+            else:
+                # We need to make sure the format spec is unicode here as
+                # otherwise the wrong callback methods are invoked.  For
+                # instance a byte string there would invoke __str__ and
+                # not __unicode__.
+                rv = string.Formatter.format_field(
+                    self, value, text_type(format_spec))
+            return text_type(self.escape(rv))
+
+
+def _escape_argspec(obj, iterable, escape):
+    """Helper for various string-wrapped functions."""
+    for key, value in iterable:
+        if hasattr(value, '__html__') or isinstance(value, string_types):
+            obj[key] = escape(value)
+    return obj
+
+
+class _MarkupEscapeHelper(object):
+    """Helper for Markup.__mod__"""
+
+    def __init__(self, obj, escape):
+        self.obj = obj
+        self.escape = escape
+
+    __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape)
+    __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj))
+    __repr__ = lambda s: str(s.escape(repr(s.obj)))
+    __int__ = lambda s: int(s.obj)
+    __float__ = lambda s: float(s.obj)
+
+
+# we have to import it down here as the speedups and native
+# modules imports the markup type which is define above.
+try:
+    from markupsafe._speedups import escape, escape_silent, soft_unicode
+except ImportError:
+    from markupsafe._native import escape, escape_silent, soft_unicode
+
+if not PY2:
+    soft_str = soft_unicode
+    __all__.append('soft_str')
diff --git a/src/markupsafe/_compat.py b/src/markupsafe/_compat.py
new file mode 100644
index 0000000..4d2e2cd
--- /dev/null
+++ b/src/markupsafe/_compat.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+"""
+markupsafe._compat
+~~~~~~~~~~~~~~~~~~
+
+:copyright: © 2010 by the Pallets team.
+:license: BSD, see LICENSE for more details.
+"""
+import sys
+
+PY2 = sys.version_info[0] == 2
+
+if not PY2:
+    text_type = str
+    string_types = (str,)
+    unichr = chr
+    int_types = (int,)
+    iteritems = lambda x: iter(x.items())
+    from collections.abc import Mapping
+else:
+    text_type = unicode
+    string_types = (str, unicode)
+    unichr = unichr
+    int_types = (int, long)
+    iteritems = lambda x: x.iteritems()
+    from collections import Mapping
diff --git a/src/markupsafe/_constants.py b/src/markupsafe/_constants.py
new file mode 100644
index 0000000..ea6ac2f
--- /dev/null
+++ b/src/markupsafe/_constants.py
@@ -0,0 +1,264 @@
+# -*- coding: utf-8 -*-
+"""
+markupsafe._constants
+~~~~~~~~~~~~~~~~~~~~~
+
+:copyright: © 2010 by the Pallets team.
+:license: BSD, see LICENSE for more details.
+"""
+
+HTML_ENTITIES = {
+    'AElig': 198,
+    'Aacute': 193,
+    'Acirc': 194,
+    'Agrave': 192,
+    'Alpha': 913,
+    'Aring': 197,
+    'Atilde': 195,
+    'Auml': 196,
+    'Beta': 914,
+    'Ccedil': 199,
+    'Chi': 935,
+    'Dagger': 8225,
+    'Delta': 916,
+    'ETH': 208,
+    'Eacute': 201,
+    'Ecirc': 202,
+    'Egrave': 200,
+    'Epsilon': 917,
+    'Eta': 919,
+    'Euml': 203,
+    'Gamma': 915,
+    'Iacute': 205,
+    'Icirc': 206,
+    'Igrave': 204,
+    'Iota': 921,
+    'Iuml': 207,
+    'Kappa': 922,
+    'Lambda': 923,
+    'Mu': 924,
+    'Ntilde': 209,
+    'Nu': 925,
+    'OElig': 338,
+    'Oacute': 211,
+    'Ocirc': 212,
+    'Ograve': 210,
+    'Omega': 937,
+    'Omicron': 927,
+    'Oslash': 216,
+    'Otilde': 213,
+    'Ouml': 214,
+    'Phi': 934,
+    'Pi': 928,
+    'Prime': 8243,
+    'Psi': 936,
+    'Rho': 929,
+    'Scaron': 352,
+    'Sigma': 931,
+    'THORN': 222,
+    'Tau': 932,
+    'Theta': 920,
+    'Uacute': 218,
+    'Ucirc': 219,
+    'Ugrave': 217,
+    'Upsilon': 933,
+    'Uuml': 220,
+    'Xi': 926,
+    'Yacute': 221,
+    'Yuml': 376,
+    'Zeta': 918,
+    'aacute': 225,
+    'acirc': 226,
+    'acute': 180,
+    'aelig': 230,
+    'agrave': 224,
+    'alefsym': 8501,
+    'alpha': 945,
+    'amp': 38,
+    'and': 8743,
+    'ang': 8736,
+    'apos': 39,
+    'aring': 229,
+    'asymp': 8776,
+    'atilde': 227,
+    'auml': 228,
+    'bdquo': 8222,
+    'beta': 946,
+    'brvbar': 166,
+    'bull': 8226,
+    'cap': 8745,
+    'ccedil': 231,
+    'cedil': 184,
+    'cent': 162,
+    'chi': 967,
+    'circ': 710,
+    'clubs': 9827,
+    'cong': 8773,
+    'copy': 169,
+    'crarr': 8629,
+    'cup': 8746,
+    'curren': 164,
+    'dArr': 8659,
+    'dagger': 8224,
+    'darr': 8595,
+    'deg': 176,
+    'delta': 948,
+    'diams': 9830,
+    'divide': 247,
+    'eacute': 233,
+    'ecirc': 234,
+    'egrave': 232,
+    'empty': 8709,
+    'emsp': 8195,
+    'ensp': 8194,
+    'epsilon': 949,
+    'equiv': 8801,
+    'eta': 951,
+    'eth': 240,
+    'euml': 235,
+    'euro': 8364,
+    'exist': 8707,
+    'fnof': 402,
+    'forall': 8704,
+    'frac12': 189,
+    'frac14': 188,
+    'frac34': 190,
+    'frasl': 8260,
+    'gamma': 947,
+    'ge': 8805,
+    'gt': 62,
+    'hArr': 8660,
+    'harr': 8596,
+    'hearts': 9829,
+    'hellip': 8230,
+    'iacute': 237,
+    'icirc': 238,
+    'iexcl': 161,
+    'igrave': 236,
+    'image': 8465,
+    'infin': 8734,
+    'int': 8747,
+    'iota': 953,
+    'iquest': 191,
+    'isin': 8712,
+    'iuml': 239,
+    'kappa': 954,
+    'lArr': 8656,
+    'lambda': 955,
+    'lang': 9001,
+    'laquo': 171,
+    'larr': 8592,
+    'lceil': 8968,
+    'ldquo': 8220,
+    'le': 8804,
+    'lfloor': 8970,
+    'lowast': 8727,
+    'loz': 9674,
+    'lrm': 8206,
+    'lsaquo': 8249,
+    'lsquo': 8216,
+    'lt': 60,
+    'macr': 175,
+    'mdash': 8212,
+    'micro': 181,
+    'middot': 183,
+    'minus': 8722,
+    'mu': 956,
+    'nabla': 8711,
+    'nbsp': 160,
+    'ndash': 8211,
+    'ne': 8800,
+    'ni': 8715,
+    'not': 172,
+    'notin': 8713,
+    'nsub': 8836,
+    'ntilde': 241,
+    'nu': 957,
+    'oacute': 243,
+    'ocirc': 244,
+    'oelig': 339,
+    'ograve': 242,
+    'oline': 8254,
+    'omega': 969,
+    'omicron': 959,
+    'oplus': 8853,
+    'or': 8744,
+    'ordf': 170,
+    'ordm': 186,
+    'oslash': 248,
+    'otilde': 245,
+    'otimes': 8855,
+    'ouml': 246,
+    'para': 182,
+    'part': 8706,
+    'permil': 8240,
+    'perp': 8869,
+    'phi': 966,
+    'pi': 960,
+    'piv': 982,
+    'plusmn': 177,
+    'pound': 163,
+    'prime': 8242,
+    'prod': 8719,
+    'prop': 8733,
+    'psi': 968,
+    'quot': 34,
+    'rArr': 8658,
+    'radic': 8730,
+    'rang': 9002,
+    'raquo': 187,
+    'rarr': 8594,
+    'rceil': 8969,
+    'rdquo': 8221,
+    'real': 8476,
+    'reg': 174,
+    'rfloor': 8971,
+    'rho': 961,
+    'rlm': 8207,
+    'rsaquo': 8250,
+    'rsquo': 8217,
+    'sbquo': 8218,
+    'scaron': 353,
+    'sdot': 8901,
+    'sect': 167,
+    'shy': 173,
+    'sigma': 963,
+    'sigmaf': 962,
+    'sim': 8764,
+    'spades': 9824,
+    'sub': 8834,
+    'sube': 8838,
+    'sum': 8721,
+    'sup': 8835,
+    'sup1': 185,
+    'sup2': 178,
+    'sup3': 179,
+    'supe': 8839,
+    'szlig': 223,
+    'tau': 964,
+    'there4': 8756,
+    'theta': 952,
+    'thetasym': 977,
+    'thinsp': 8201,
+    'thorn': 254,
+    'tilde': 732,
+    'times': 215,
+    'trade': 8482,
+    'uArr': 8657,
+    'uacute': 250,
+    'uarr': 8593,
+    'ucirc': 251,
+    'ugrave': 249,
+    'uml': 168,
+    'upsih': 978,
+    'upsilon': 965,
+    'uuml': 252,
+    'weierp': 8472,
+    'xi': 958,
+    'yacute': 253,
+    'yen': 165,
+    'yuml': 255,
+    'zeta': 950,
+    'zwj': 8205,
+    'zwnj': 8204
+}
diff --git a/src/markupsafe/_native.py b/src/markupsafe/_native.py
new file mode 100644
index 0000000..801f285
--- /dev/null
+++ b/src/markupsafe/_native.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+"""
+markupsafe._native
+~~~~~~~~~~~~~~~~~~
+
+Native Python implementation used when the C module is not compiled.
+
+:copyright: © 2010 by the Pallets team.
+:license: BSD, see LICENSE for more details.
+"""
+from markupsafe import Markup
+from markupsafe._compat import text_type
+
+
+def escape(s):
+    """Replace the characters ``&``, ``<``, ``>``, ``'``, and ``"`` in
+    the string with HTML-safe sequences. Use this if you need to display
+    text that might contain such characters in HTML.
+
+    If the object has an ``__html__`` method, it is called and the
+    return value is assumed to already be safe for HTML.
+
+    :param s: An object to be converted to a string and escaped.
+    :return: A :class:`Markup` string with the escaped text.
+    """
+    if hasattr(s, '__html__'):
+        return Markup(s.__html__())
+    return Markup(text_type(s)
+        .replace('&', '&amp;')
+        .replace('>', '&gt;')
+        .replace('<', '&lt;')
+        .replace("'", '&#39;')
+        .replace('"', '&#34;')
+    )
+
+
+def escape_silent(s):
+    """Like :func:`escape` but treats ``None`` as the empty string.
+    Useful with optional values, as otherwise you get the string
+    ``'None'`` when the value is ``None``.
+
+    >>> escape(None)
+    Markup('None')
+    >>> escape_silent(None)
+    Markup('')
+    """
+    if s is None:
+        return Markup()
+    return escape(s)
+
+
+def soft_unicode(s):
+    """Convert an object to a string if it isn't already. This preserves
+    a :class:`Markup` string rather than converting it back to a basic
+    string, so it will still be marked as safe and won't be escaped
+    again.
+
+    >>> value = escape('<User 1>')
+    >>> value
+    Markup('&lt;User 1&gt;')
+    >>> escape(str(value))
+    Markup('&amp;lt;User 1&amp;gt;')
+    >>> escape(soft_unicode(value))
+    Markup('&lt;User 1&gt;')
+    """
+    if not isinstance(s, text_type):
+        s = text_type(s)
+    return s
diff --git a/src/markupsafe/_speedups.c b/src/markupsafe/_speedups.c
new file mode 100644
index 0000000..22a604d
--- /dev/null
+++ b/src/markupsafe/_speedups.c
@@ -0,0 +1,420 @@
+/**
+ * markupsafe._speedups
+ * ~~~~~~~~~~~~~~~~~~~~
+ *
+ * C implementation of escaping for better performance. Used instead of
+ * the native Python implementation when compiled.
+ *
+ * :copyright: © 2010 by the Pallets team.
+ * :license: BSD, see LICENSE for more details.
+ */
+#include <Python.h>
+
+#if PY_MAJOR_VERSION < 3
+#define ESCAPED_CHARS_TABLE_SIZE 63
+#define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL)));
+
+static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE];
+static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE];
+#endif
+
+static PyObject* markup;
+
+static int
+init_constants(void)
+{
+	PyObject *module;
+
+#if PY_MAJOR_VERSION < 3
+	/* mapping of characters to replace */
+	escaped_chars_repl['"'] = UNICHR("&#34;");
+	escaped_chars_repl['\''] = UNICHR("&#39;");
+	escaped_chars_repl['&'] = UNICHR("&amp;");
+	escaped_chars_repl['<'] = UNICHR("&lt;");
+	escaped_chars_repl['>'] = UNICHR("&gt;");
+
+	/* lengths of those characters when replaced - 1 */
+	memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len));
+	escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \
+		escaped_chars_delta_len['&'] = 4;
+	escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3;
+#endif
+
+	/* import markup type so that we can mark the return value */
+	module = PyImport_ImportModule("markupsafe");
+	if (!module)
+		return 0;
+	markup = PyObject_GetAttrString(module, "Markup");
+	Py_DECREF(module);
+
+	return 1;
+}
+
+#if PY_MAJOR_VERSION < 3
+static PyObject*
+escape_unicode(PyUnicodeObject *in)
+{
+	PyUnicodeObject *out;
+	Py_UNICODE *inp = PyUnicode_AS_UNICODE(in);
+	const Py_UNICODE *inp_end = PyUnicode_AS_UNICODE(in) + PyUnicode_GET_SIZE(in);
+	Py_UNICODE *next_escp;
+	Py_UNICODE *outp;
+	Py_ssize_t delta=0, erepl=0, delta_len=0;
+
+	/* First we need to figure out how long the escaped string will be */
+	while (*(inp) || inp < inp_end) {
+		if (*inp < ESCAPED_CHARS_TABLE_SIZE) {
+			delta += escaped_chars_delta_len[*inp];
+			erepl += !!escaped_chars_delta_len[*inp];
+		}
+		++inp;
+	}
+
+	/* Do we need to escape anything at all? */
+	if (!erepl) {
+		Py_INCREF(in);
+		return (PyObject*)in;
+	}
+
+	out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(in) + delta);
+	if (!out)
+		return NULL;
+
+	outp = PyUnicode_AS_UNICODE(out);
+	inp = PyUnicode_AS_UNICODE(in);
+	while (erepl-- > 0) {
+		/* look for the next substitution */
+		next_escp = inp;
+		while (next_escp < inp_end) {
+			if (*next_escp < ESCAPED_CHARS_TABLE_SIZE &&
+			    (delta_len = escaped_chars_delta_len[*next_escp])) {
+				++delta_len;
+				break;
+			}
+			++next_escp;
+		}
+
+		if (next_escp > inp) {
+			/* copy unescaped chars between inp and next_escp */
+			Py_UNICODE_COPY(outp, inp, next_escp-inp);
+			outp += next_escp - inp;
+		}
+
+		/* escape 'next_escp' */
+		Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len);
+		outp += delta_len;
+
+		inp = next_escp + 1;
+	}
+	if (inp < inp_end)
+		Py_UNICODE_COPY(outp, inp, PyUnicode_GET_SIZE(in) - (inp - PyUnicode_AS_UNICODE(in)));
+
+	return (PyObject*)out;
+}
+#else /* PY_MAJOR_VERSION < 3 */
+
+#define GET_DELTA(inp, inp_end, delta) \
+	while (inp < inp_end) {	 \
+		switch (*inp++) {	   \
+		case '"':			   \
+		case '\'':			  \
+		case '&':			   \
+			delta += 4;		 \
+			break;			  \
+		case '<':			   \
+		case '>':			   \
+			delta += 3;		 \
+			break;			  \
+		}					   \
+	}
+
+#define DO_ESCAPE(inp, inp_end, outp) \
+	{  \
+		Py_ssize_t ncopy = 0;  \
+		while (inp < inp_end) {  \
+			switch (*inp) {  \
+			case '"':  \
+				memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+				outp += ncopy; ncopy = 0; \
+				*outp++ = '&';  \
+				*outp++ = '#';  \
+				*outp++ = '3';  \
+				*outp++ = '4';  \
+				*outp++ = ';';  \
+				break;  \
+			case '\'':  \
+				memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+				outp += ncopy; ncopy = 0; \
+				*outp++ = '&';  \
+				*outp++ = '#';  \
+				*outp++ = '3';  \
+				*outp++ = '9';  \
+				*outp++ = ';';  \
+				break;  \
+			case '&':  \
+				memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+				outp += ncopy; ncopy = 0; \
+				*outp++ = '&';  \
+				*outp++ = 'a';  \
+				*outp++ = 'm';  \
+				*outp++ = 'p';  \
+				*outp++ = ';';  \
+				break;  \
+			case '<':  \
+				memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+				outp += ncopy; ncopy = 0; \
+				*outp++ = '&';  \
+				*outp++ = 'l';  \
+				*outp++ = 't';  \
+				*outp++ = ';';  \
+				break;  \
+			case '>':  \
+				memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+				outp += ncopy; ncopy = 0; \
+				*outp++ = '&';  \
+				*outp++ = 'g';  \
+				*outp++ = 't';  \
+				*outp++ = ';';  \
+				break;  \
+			default:  \
+				ncopy++; \
+			}  \
+            inp++; \
+		}  \
+		memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+	}
+
+static PyObject*
+escape_unicode_kind1(PyUnicodeObject *in)
+{
+	Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in);
+	Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in);
+	Py_UCS1 *outp;
+	PyObject *out;
+	Py_ssize_t delta = 0;
+
+	GET_DELTA(inp, inp_end, delta);
+	if (!delta) {
+		Py_INCREF(in);
+		return (PyObject*)in;
+	}
+
+	out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta,
+						PyUnicode_IS_ASCII(in) ? 127 : 255);
+	if (!out)
+		return NULL;
+
+	inp = PyUnicode_1BYTE_DATA(in);
+	outp = PyUnicode_1BYTE_DATA(out);
+	DO_ESCAPE(inp, inp_end, outp);
+	return out;
+}
+
+static PyObject*
+escape_unicode_kind2(PyUnicodeObject *in)
+{
+	Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in);
+	Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in);
+	Py_UCS2 *outp;
+	PyObject *out;
+	Py_ssize_t delta = 0;
+
+	GET_DELTA(inp, inp_end, delta);
+	if (!delta) {
+		Py_INCREF(in);
+		return (PyObject*)in;
+	}
+
+	out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535);
+	if (!out)
+		return NULL;
+
+	inp = PyUnicode_2BYTE_DATA(in);
+	outp = PyUnicode_2BYTE_DATA(out);
+	DO_ESCAPE(inp, inp_end, outp);
+	return out;
+}
+
+
+static PyObject*
+escape_unicode_kind4(PyUnicodeObject *in)
+{
+	Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in);
+	Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in);
+	Py_UCS4 *outp;
+	PyObject *out;
+	Py_ssize_t delta = 0;
+
+	GET_DELTA(inp, inp_end, delta);
+	if (!delta) {
+		Py_INCREF(in);
+		return (PyObject*)in;
+	}
+
+	out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111);
+	if (!out)
+		return NULL;
+
+	inp = PyUnicode_4BYTE_DATA(in);
+	outp = PyUnicode_4BYTE_DATA(out);
+	DO_ESCAPE(inp, inp_end, outp);
+	return out;
+}
+
+static PyObject*
+escape_unicode(PyUnicodeObject *in)
+{
+	if (PyUnicode_READY(in))
+		return NULL;
+
+	switch (PyUnicode_KIND(in)) {
+	case PyUnicode_1BYTE_KIND:
+		return escape_unicode_kind1(in);
+	case PyUnicode_2BYTE_KIND:
+		return escape_unicode_kind2(in);
+	case PyUnicode_4BYTE_KIND:
+		return escape_unicode_kind4(in);
+	}
+	assert(0);  /* shouldn't happen */
+	return NULL;
+}
+#endif /* PY_MAJOR_VERSION < 3 */
+
+static PyObject*
+escape(PyObject *self, PyObject *text)
+{
+	static PyObject *id_html;
+	PyObject *s = NULL, *rv = NULL, *html;
+
+	if (id_html == NULL) {
+#if PY_MAJOR_VERSION < 3
+		id_html = PyString_InternFromString("__html__");
+#else
+		id_html = PyUnicode_InternFromString("__html__");
+#endif
+		if (id_html == NULL) {
+			return NULL;
+		}
+	}
+
+	/* we don't have to escape integers, bools or floats */
+	if (PyLong_CheckExact(text) ||
+#if PY_MAJOR_VERSION < 3
+	    PyInt_CheckExact(text) ||
+#endif
+	    PyFloat_CheckExact(text) || PyBool_Check(text) ||
+	    text == Py_None)
+		return PyObject_CallFunctionObjArgs(markup, text, NULL);
+
+	/* if the object has an __html__ method that performs the escaping */
+	html = PyObject_GetAttr(text ,id_html);
+	if (html) {
+		s = PyObject_CallObject(html, NULL);
+		Py_DECREF(html);
+		/* Convert to Markup object */
+		rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
+		Py_DECREF(s);
+		return rv;
+	}
+
+	/* otherwise make the object unicode if it isn't, then escape */
+	PyErr_Clear();
+	if (!PyUnicode_Check(text)) {
+#if PY_MAJOR_VERSION < 3
+		PyObject *unicode = PyObject_Unicode(text);
+#else
+		PyObject *unicode = PyObject_Str(text);
+#endif
+		if (!unicode)
+			return NULL;
+		s = escape_unicode((PyUnicodeObject*)unicode);
+		Py_DECREF(unicode);
+	}
+	else
+		s = escape_unicode((PyUnicodeObject*)text);
+
+	/* convert the unicode string into a markup object. */
+	rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL);
+	Py_DECREF(s);
+	return rv;
+}
+
+
+static PyObject*
+escape_silent(PyObject *self, PyObject *text)
+{
+	if (text != Py_None)
+		return escape(self, text);
+	return PyObject_CallFunctionObjArgs(markup, NULL);
+}
+
+
+static PyObject*
+soft_unicode(PyObject *self, PyObject *s)
+{
+	if (!PyUnicode_Check(s))
+#if PY_MAJOR_VERSION < 3
+		return PyObject_Unicode(s);
+#else
+		return PyObject_Str(s);
+#endif
+	Py_INCREF(s);
+	return s;
+}
+
+
+static PyMethodDef module_methods[] = {
+	{"escape", (PyCFunction)escape, METH_O,
+	 "escape(s) -> markup\n\n"
+	 "Convert the characters &, <, >, ', and \" in string s to HTML-safe\n"
+	 "sequences.  Use this if you need to display text that might contain\n"
+	 "such characters in HTML.  Marks return value as markup string."},
+	{"escape_silent", (PyCFunction)escape_silent, METH_O,
+	 "escape_silent(s) -> markup\n\n"
+	 "Like escape but converts None to an empty string."},
+	{"soft_unicode", (PyCFunction)soft_unicode, METH_O,
+	 "soft_unicode(object) -> string\n\n"
+         "Make a string unicode if it isn't already.  That way a markup\n"
+         "string is not converted back to unicode."},
+	{NULL, NULL, 0, NULL}		/* Sentinel */
+};
+
+
+#if PY_MAJOR_VERSION < 3
+
+#ifndef PyMODINIT_FUNC	/* declarations for DLL import/export */
+#define PyMODINIT_FUNC void
+#endif
+PyMODINIT_FUNC
+init_speedups(void)
+{
+	if (!init_constants())
+		return;
+
+	Py_InitModule3("markupsafe._speedups", module_methods, "");
+}
+
+#else /* Python 3.x module initialization */
+
+static struct PyModuleDef module_definition = {
+        PyModuleDef_HEAD_INIT,
+	"markupsafe._speedups",
+	NULL,
+	-1,
+	module_methods,
+	NULL,
+	NULL,
+	NULL,
+	NULL
+};
+
+PyMODINIT_FUNC
+PyInit__speedups(void)
+{
+	if (!init_constants())
+		return NULL;
+
+	return PyModule_Create(&module_definition);
+}
+
+#endif
author	David Lord <davidism@gmail.com>	2018-10-20 21:35:14 -0700
committer	David Lord <davidism@gmail.com>	2018-10-20 21:35:14 -0700
commit	c640a7ef30b72268530920cb5e01978e22e6a4fc (patch)
tree	a389ab9391c09d3195f0532ac5b4968b8e43dac9 /src
parent	7f261419f3b5a8fbbf126d73f34ca73e957f1954 (diff)
download	markupsafe-c640a7ef30b72268530920cb5e01978e22e6a4fc.tar.gz