summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorINADA Naoki <songofacandy@gmail.com>2017-02-12 23:56:37 +0900
committerDavid Lord <davidism@gmail.com>2018-05-04 08:07:32 -0700
commita03ecd5efc3d5a2d12a3bc72d7ce9e776cccfc66 (patch)
tree4564aa15cb084c7d36baab4ad0a452ae8af239b1
parentce443dd4ab6453607166e75211396cae0f811309 (diff)
downloadmarkupsafe-a03ecd5efc3d5a2d12a3bc72d7ce9e776cccfc66.tar.gz
Support PEP 393 new Unicode APIs
-rw-r--r--markupsafe/_speedups.c177
-rwxr-xr-xtests.py271
2 files changed, 446 insertions, 2 deletions
diff --git a/markupsafe/_speedups.c b/markupsafe/_speedups.c
index fb4a03e..8362490 100644
--- a/markupsafe/_speedups.c
+++ b/markupsafe/_speedups.c
@@ -8,20 +8,24 @@
* :copyright: © 2010 by the Pallets team.
* :license: BSD, see LICENSE for more details.
*/
-
#include <Python.h>
+#if PY_MAJOR_VERSION < 3
#define ESCAPED_CHARS_TABLE_SIZE 63
#define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL)));
-static PyObject* markup;
static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE];
static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE];
+#endif
+
+static PyObject* markup;
static int
init_constants(void)
{
PyObject *module;
+
+#if PY_MAJOR_VERSION < 3
/* mapping of characters to replace */
escaped_chars_repl['"'] = UNICHR("&#34;");
escaped_chars_repl['\''] = UNICHR("&#39;");
@@ -34,6 +38,7 @@ init_constants(void)
escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \
escaped_chars_delta_len['&'] = 4;
escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3;
+#endif
/* import markup type so that we can mark the return value */
module = PyImport_ImportModule("markupsafe");
@@ -45,6 +50,7 @@ init_constants(void)
return 1;
}
+#if PY_MAJOR_VERSION < 3
static PyObject*
escape_unicode(PyUnicodeObject *in)
{
@@ -105,7 +111,174 @@ escape_unicode(PyUnicodeObject *in)
return (PyObject*)out;
}
+#else /* PY_MAJOR_VERSION < 3 */
+
+#define GET_DELTA(inp, inp_end, delta) \
+ while (inp < inp_end) { \
+ switch (*inp++) { \
+ case '"': \
+ case '\'': \
+ case '&': \
+ delta += 4; \
+ break; \
+ case '<': \
+ case '>': \
+ delta += 3; \
+ break; \
+ } \
+ }
+
+#define DO_ESCAPE(inp, inp_end, outp) \
+ { \
+ Py_ssize_t ncopy = 0; \
+ while (inp < inp_end) { \
+ switch (*inp) { \
+ case '"': \
+ memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+ outp += ncopy; ncopy = 0; \
+ *outp++ = '&'; \
+ *outp++ = '#'; \
+ *outp++ = '3'; \
+ *outp++ = '4'; \
+ *outp++ = ';'; \
+ break; \
+ case '\'': \
+ memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+ outp += ncopy; ncopy = 0; \
+ *outp++ = '&'; \
+ *outp++ = '#'; \
+ *outp++ = '3'; \
+ *outp++ = '9'; \
+ *outp++ = ';'; \
+ break; \
+ case '&': \
+ memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+ outp += ncopy; ncopy = 0; \
+ *outp++ = '&'; \
+ *outp++ = 'a'; \
+ *outp++ = 'm'; \
+ *outp++ = 'p'; \
+ *outp++ = ';'; \
+ break; \
+ case '<': \
+ memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+ outp += ncopy; ncopy = 0; \
+ *outp++ = '&'; \
+ *outp++ = 'l'; \
+ *outp++ = 't'; \
+ *outp++ = ';'; \
+ break; \
+ case '>': \
+ memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+ outp += ncopy; ncopy = 0; \
+ *outp++ = '&'; \
+ *outp++ = 'g'; \
+ *outp++ = 't'; \
+ *outp++ = ';'; \
+ break; \
+ default: \
+ ncopy++; \
+ } \
+ inp++; \
+ } \
+ memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \
+ }
+
+static PyObject*
+escape_unicode_kind1(PyUnicodeObject *in)
+{
+ Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in);
+ Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in);
+ Py_UCS1 *outp;
+ PyObject *out;
+ Py_ssize_t delta = 0;
+
+ GET_DELTA(inp, inp_end, delta);
+ if (!delta) {
+ Py_INCREF(in);
+ return (PyObject*)in;
+ }
+
+ out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta,
+ PyUnicode_IS_ASCII(in) ? 127 : 255);
+ if (!out)
+ return NULL;
+
+ inp = PyUnicode_1BYTE_DATA(in);
+ outp = PyUnicode_1BYTE_DATA(out);
+ DO_ESCAPE(inp, inp_end, outp);
+ return out;
+}
+
+static PyObject*
+escape_unicode_kind2(PyUnicodeObject *in)
+{
+ Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in);
+ Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in);
+ Py_UCS2 *outp;
+ PyObject *out;
+ Py_ssize_t delta = 0;
+
+ GET_DELTA(inp, inp_end, delta);
+ if (!delta) {
+ Py_INCREF(in);
+ return (PyObject*)in;
+ }
+
+ out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535);
+ if (!out)
+ return NULL;
+
+ inp = PyUnicode_2BYTE_DATA(in);
+ outp = PyUnicode_2BYTE_DATA(out);
+ DO_ESCAPE(inp, inp_end, outp);
+ return out;
+}
+
+static PyObject*
+escape_unicode_kind4(PyUnicodeObject *in)
+{
+ Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in);
+ Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in);
+ Py_UCS4 *outp;
+ PyObject *out;
+ Py_ssize_t delta = 0;
+
+ GET_DELTA(inp, inp_end, delta);
+ if (!delta) {
+ Py_INCREF(in);
+ return (PyObject*)in;
+ }
+
+ out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111);
+ if (!out)
+ return NULL;
+
+ inp = PyUnicode_4BYTE_DATA(in);
+ outp = PyUnicode_4BYTE_DATA(out);
+ DO_ESCAPE(inp, inp_end, outp);
+ return out;
+}
+
+static PyObject*
+escape_unicode(PyUnicodeObject *in)
+{
+ if (PyUnicode_READY(in))
+ return NULL;
+
+ switch (PyUnicode_KIND(in)) {
+ case PyUnicode_1BYTE_KIND:
+ return escape_unicode_kind1(in);
+ case PyUnicode_2BYTE_KIND:
+ return escape_unicode_kind2(in);
+ case PyUnicode_4BYTE_KIND:
+ return escape_unicode_kind4(in);
+ }
+ assert(0); /* shouldn't happen */
+ return NULL;
+}
+#endif /* PY_MAJOR_VERSION < 3 */
static PyObject*
escape(PyObject *self, PyObject *text)
diff --git a/tests.py b/tests.py
new file mode 100755
index 0000000..da4b486
--- /dev/null
+++ b/tests.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import gc
+import sys
+import unittest
+from markupsafe import Markup, escape, escape_silent
+from markupsafe._compat import text_type, PY2
+from markupsafe import _native
+try:
+ from markupsafe import _speedups
+ have_speedups = True
+except ImportError:
+ have_speedups = False
+
+
+class MarkupTestCase(unittest.TestCase):
+
+ def test_adding(self):
+ # adding two strings should escape the unsafe one
+ unsafe = '<script type="application/x-some-script">alert("foo");</script>'
+ safe = Markup('<em>username</em>')
+ assert unsafe + safe == text_type(escape(unsafe)) + text_type(safe)
+
+ def test_string_interpolation(self):
+ # string interpolations are safe to use too
+ assert Markup('<em>%s</em>') % '<bad user>' == \
+ '<em>&lt;bad user&gt;</em>'
+ assert Markup('<em>%(username)s</em>') % {
+ 'username': '<bad user>'
+ } == '<em>&lt;bad user&gt;</em>'
+
+ assert Markup('%i') % 3.14 == '3'
+ assert Markup('%.2f') % 3.14 == '3.14'
+
+ def test_type_behavior(self):
+ # an escaped object is markup too
+ assert type(Markup('foo') + 'bar') is Markup
+
+ # and it implements __html__ by returning itself
+ x = Markup("foo")
+ assert x.__html__() is x
+
+ def test_html_interop(self):
+ # it also knows how to treat __html__ objects
+ class Foo(object):
+ def __html__(self):
+ return '<em>awesome</em>'
+ def __unicode__(self):
+ return 'awesome'
+ __str__ = __unicode__
+ assert Markup(Foo()) == '<em>awesome</em>'
+ assert Markup('<strong>%s</strong>') % Foo() == \
+ '<strong><em>awesome</em></strong>'
+
+ def test_tuple_interpol(self):
+ self.assertEqual(Markup('<em>%s:%s</em>') % (
+ '<foo>',
+ '<bar>',
+ ), Markup(u'<em>&lt;foo&gt;:&lt;bar&gt;</em>'))
+
+ def test_dict_interpol(self):
+ self.assertEqual(Markup('<em>%(foo)s</em>') % {
+ 'foo': '<foo>',
+ }, Markup(u'<em>&lt;foo&gt;</em>'))
+ self.assertEqual(Markup('<em>%(foo)s:%(bar)s</em>') % {
+ 'foo': '<foo>',
+ 'bar': '<bar>',
+ }, Markup(u'<em>&lt;foo&gt;:&lt;bar&gt;</em>'))
+
+ def test_escaping(self):
+ # escaping
+ assert escape('"<>&\'') == '&#34;&lt;&gt;&amp;&#39;'
+ assert Markup("<em>Foo &amp; Bar</em>").striptags() == "Foo & Bar"
+
+ def test_unescape(self):
+ assert Markup("&lt;test&gt;").unescape() == "<test>"
+ assert "jack & tavi are cooler than mike & russ" == \
+ Markup("jack & tavi are cooler than mike &amp; russ").unescape(), \
+ Markup("jack & tavi are cooler than mike &amp; russ").unescape()
+
+ # Test that unescape is idempotent
+ original = '&foo&#x3b;'
+ once = Markup(original).unescape()
+ twice = Markup(once).unescape()
+ expected = "&foo;"
+ assert expected == once == twice, (once, twice)
+
+ def test_formatting(self):
+ for actual, expected in (
+ (Markup('%i') % 3.14, '3'),
+ (Markup('%.2f') % 3.14159, '3.14'),
+ (Markup('%s %s %s') % ('<', 123, '>'), '&lt; 123 &gt;'),
+ (Markup('<em>{awesome}</em>').format(awesome='<awesome>'),
+ '<em>&lt;awesome&gt;</em>'),
+ (Markup('{0[1][bar]}').format([0, {'bar': '<bar/>'}]),
+ '&lt;bar/&gt;'),
+ (Markup('{0[1][bar]}').format([0, {'bar': Markup('<bar/>')}]),
+ '<bar/>')):
+ assert actual == expected, "%r should be %r!" % (actual, expected)
+
+ # This is new in 2.7
+ if sys.version_info >= (2, 7):
+ def test_formatting_empty(self):
+ formatted = Markup('{}').format(0)
+ assert formatted == Markup('0')
+
+ def test_custom_formatting(self):
+ class HasHTMLOnly(object):
+ def __html__(self):
+ return Markup('<foo>')
+
+ class HasHTMLAndFormat(object):
+ def __html__(self):
+ return Markup('<foo>')
+ def __html_format__(self, spec):
+ return Markup('<FORMAT>')
+
+ assert Markup('{0}').format(HasHTMLOnly()) == Markup('<foo>')
+ assert Markup('{0}').format(HasHTMLAndFormat()) == Markup('<FORMAT>')
+
+ def test_complex_custom_formatting(self):
+ class User(object):
+ def __init__(self, id, username):
+ self.id = id
+ self.username = username
+ def __html_format__(self, format_spec):
+ if format_spec == 'link':
+ return Markup('<a href="/user/{0}">{1}</a>').format(
+ self.id,
+ self.__html__(),
+ )
+ elif format_spec:
+ raise ValueError('Invalid format spec')
+ return self.__html__()
+ def __html__(self):
+ return Markup('<span class=user>{0}</span>').format(self.username)
+
+ user = User(1, 'foo')
+ assert Markup('<p>User: {0:link}').format(user) == \
+ Markup('<p>User: <a href="/user/1"><span class=user>foo</span></a>')
+
+ def test_formatting_with_objects(self):
+ class Stringable(object):
+ def __unicode__(self):
+ return u'строка'
+ if PY2:
+ def __str__(self):
+ return 'some other value'
+ else:
+ __str__ = __unicode__
+
+ assert Markup('{s}').format(s=Stringable()) == \
+ Markup(u'строка')
+
+ def test_all_set(self):
+ import markupsafe as markup
+ for item in markup.__all__:
+ getattr(markup, item)
+
+ def test_escape_silent(self):
+ assert escape_silent(None) == Markup()
+ assert escape(None) == Markup(None)
+ assert escape_silent('<foo>') == Markup(u'&lt;foo&gt;')
+
+ def test_splitting(self):
+ self.assertEqual(Markup('a b').split(), [
+ Markup('a'),
+ Markup('b')
+ ])
+ self.assertEqual(Markup('a b').rsplit(), [
+ Markup('a'),
+ Markup('b')
+ ])
+ self.assertEqual(Markup('a\nb').splitlines(), [
+ Markup('a'),
+ Markup('b')
+ ])
+
+ def test_mul(self):
+ self.assertEqual(Markup('a') * 3, Markup('aaa'))
+
+ def test_escape_return_type(self):
+ self.assertTrue(isinstance(escape('a'), Markup))
+ self.assertTrue(isinstance(escape(Markup('a')), Markup))
+ class Foo:
+ def __html__(self):
+ return '<strong>Foo</strong>'
+ self.assertTrue(isinstance(escape(Foo()), Markup))
+
+
+class MarkupLeakTestCase(unittest.TestCase):
+
+ def test_markup_leaks(self):
+ counts = set()
+ for count in range(20):
+ for item in range(1000):
+ escape("foo")
+ escape("<foo>")
+ escape(u"foo")
+ escape(u"<foo>")
+ if hasattr(sys, 'pypy_version_info'):
+ gc.collect()
+ counts.add(len(gc.get_objects()))
+ assert len(counts) == 1, 'ouch, c extension seems to ' \
+ 'leak objects, got: ' + str(len(counts))
+
+
+class NativeEscapeTestCase(unittest.TestCase):
+
+ escape = staticmethod(_native.escape)
+
+ def test_empty(self):
+ self.assertEqual(Markup(u''), self.escape(u''))
+
+ def test_ascii(self):
+ self.assertEqual(
+ Markup(u'abcd&amp;&gt;&lt;&#39;&#34;efgh'),
+ self.escape(u'abcd&><\'"efgh'))
+ self.assertEqual(
+ Markup(u'&amp;&gt;&lt;&#39;&#34;efgh'),
+ self.escape(u'&><\'"efgh'))
+ self.assertEqual(
+ Markup(u'abcd&amp;&gt;&lt;&#39;&#34;'),
+ self.escape(u'abcd&><\'"'))
+
+ def test_2byte(self):
+ self.assertEqual(
+ Markup(u'こんにちは&amp;&gt;&lt;&#39;&#34;こんばんは'),
+ self.escape(u'こんにちは&><\'"こんばんは'))
+ self.assertEqual(
+ Markup(u'&amp;&gt;&lt;&#39;&#34;こんばんは'),
+ self.escape(u'&><\'"こんばんは'))
+ self.assertEqual(
+ Markup(u'こんにちは&amp;&gt;&lt;&#39;&#34;'),
+ self.escape(u'こんにちは&><\'"'))
+
+ def test_4byte(self):
+ self.assertEqual(
+ Markup(u'\U0001F363\U0001F362&amp;&gt;&lt;&#39;&#34;\U0001F37A xyz'),
+ self.escape(u'\U0001F363\U0001F362&><\'"\U0001F37A xyz'))
+ self.assertEqual(
+ Markup(u'&amp;&gt;&lt;&#39;&#34;\U0001F37A xyz'),
+ self.escape(u'&><\'"\U0001F37A xyz'))
+ self.assertEqual(
+ Markup(u'\U0001F363\U0001F362&amp;&gt;&lt;&#39;&#34;'),
+ self.escape(u'\U0001F363\U0001F362&><\'"'))
+
+if have_speedups:
+ class SpeedupEscapeTestCase(NativeEscapeTestCase):
+ escape = _speedups.escape
+
+
+def suite():
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(MarkupTestCase))
+
+ # this test only tests the c extension
+ if not hasattr(escape, 'func_code'):
+ suite.addTest(unittest.makeSuite(MarkupLeakTestCase))
+
+ suite.addTest(unittest.makeSuite(NativeEscapeTestCase))
+ if have_speedups:
+ suite.addTest(unittest.makeSuite(SpeedupEscapeTestCase))
+
+ return suite
+
+
+if __name__ == '__main__':
+ unittest.main(defaultTest='suite')
+
+# vim:sts=4:sw=4:et: