diff options
author | INADA Naoki <songofacandy@gmail.com> | 2017-02-12 23:56:37 +0900 |
---|---|---|
committer | David Lord <davidism@gmail.com> | 2018-05-04 08:07:32 -0700 |
commit | a03ecd5efc3d5a2d12a3bc72d7ce9e776cccfc66 (patch) | |
tree | 4564aa15cb084c7d36baab4ad0a452ae8af239b1 | |
parent | ce443dd4ab6453607166e75211396cae0f811309 (diff) | |
download | markupsafe-a03ecd5efc3d5a2d12a3bc72d7ce9e776cccfc66.tar.gz |
Support PEP 393 new Unicode APIs
-rw-r--r-- | markupsafe/_speedups.c | 177 | ||||
-rwxr-xr-x | tests.py | 271 |
2 files changed, 446 insertions, 2 deletions
diff --git a/markupsafe/_speedups.c b/markupsafe/_speedups.c index fb4a03e..8362490 100644 --- a/markupsafe/_speedups.c +++ b/markupsafe/_speedups.c @@ -8,20 +8,24 @@ * :copyright: © 2010 by the Pallets team. * :license: BSD, see LICENSE for more details. */ - #include <Python.h> +#if PY_MAJOR_VERSION < 3 #define ESCAPED_CHARS_TABLE_SIZE 63 #define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL))); -static PyObject* markup; static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE]; static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE]; +#endif + +static PyObject* markup; static int init_constants(void) { PyObject *module; + +#if PY_MAJOR_VERSION < 3 /* mapping of characters to replace */ escaped_chars_repl['"'] = UNICHR("""); escaped_chars_repl['\''] = UNICHR("'"); @@ -34,6 +38,7 @@ init_constants(void) escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \ escaped_chars_delta_len['&'] = 4; escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3; +#endif /* import markup type so that we can mark the return value */ module = PyImport_ImportModule("markupsafe"); @@ -45,6 +50,7 @@ init_constants(void) return 1; } +#if PY_MAJOR_VERSION < 3 static PyObject* escape_unicode(PyUnicodeObject *in) { @@ -105,7 +111,174 @@ escape_unicode(PyUnicodeObject *in) return (PyObject*)out; } +#else /* PY_MAJOR_VERSION < 3 */ + +#define GET_DELTA(inp, inp_end, delta) \ + while (inp < inp_end) { \ + switch (*inp++) { \ + case '"': \ + case '\'': \ + case '&': \ + delta += 4; \ + break; \ + case '<': \ + case '>': \ + delta += 3; \ + break; \ + } \ + } + +#define DO_ESCAPE(inp, inp_end, outp) \ + { \ + Py_ssize_t ncopy = 0; \ + while (inp < inp_end) { \ + switch (*inp) { \ + case '"': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = '#'; \ + *outp++ = '3'; \ + *outp++ = '4'; \ + *outp++ = ';'; \ + break; \ + case '\'': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = '#'; \ + *outp++ = '3'; \ + *outp++ = '9'; \ + *outp++ = ';'; \ + break; \ + case '&': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = 'a'; \ + *outp++ = 'm'; \ + *outp++ = 'p'; \ + *outp++ = ';'; \ + break; \ + case '<': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = 'l'; \ + *outp++ = 't'; \ + *outp++ = ';'; \ + break; \ + case '>': \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + outp += ncopy; ncopy = 0; \ + *outp++ = '&'; \ + *outp++ = 'g'; \ + *outp++ = 't'; \ + *outp++ = ';'; \ + break; \ + default: \ + ncopy++; \ + } \ + inp++; \ + } \ + memcpy(outp, inp-ncopy, sizeof(*outp)*ncopy); \ + } + +static PyObject* +escape_unicode_kind1(PyUnicodeObject *in) +{ + Py_UCS1 *inp = PyUnicode_1BYTE_DATA(in); + Py_UCS1 *inp_end = inp + PyUnicode_GET_LENGTH(in); + Py_UCS1 *outp; + PyObject *out; + Py_ssize_t delta = 0; + + GET_DELTA(inp, inp_end, delta); + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, + PyUnicode_IS_ASCII(in) ? 127 : 255); + if (!out) + return NULL; + + inp = PyUnicode_1BYTE_DATA(in); + outp = PyUnicode_1BYTE_DATA(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + +static PyObject* +escape_unicode_kind2(PyUnicodeObject *in) +{ + Py_UCS2 *inp = PyUnicode_2BYTE_DATA(in); + Py_UCS2 *inp_end = inp + PyUnicode_GET_LENGTH(in); + Py_UCS2 *outp; + PyObject *out; + Py_ssize_t delta = 0; + + GET_DELTA(inp, inp_end, delta); + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 65535); + if (!out) + return NULL; + + inp = PyUnicode_2BYTE_DATA(in); + outp = PyUnicode_2BYTE_DATA(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + +static PyObject* +escape_unicode_kind4(PyUnicodeObject *in) +{ + Py_UCS4 *inp = PyUnicode_4BYTE_DATA(in); + Py_UCS4 *inp_end = inp + PyUnicode_GET_LENGTH(in); + Py_UCS4 *outp; + PyObject *out; + Py_ssize_t delta = 0; + + GET_DELTA(inp, inp_end, delta); + if (!delta) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = PyUnicode_New(PyUnicode_GET_LENGTH(in) + delta, 1114111); + if (!out) + return NULL; + + inp = PyUnicode_4BYTE_DATA(in); + outp = PyUnicode_4BYTE_DATA(out); + DO_ESCAPE(inp, inp_end, outp); + return out; +} + +static PyObject* +escape_unicode(PyUnicodeObject *in) +{ + if (PyUnicode_READY(in)) + return NULL; + + switch (PyUnicode_KIND(in)) { + case PyUnicode_1BYTE_KIND: + return escape_unicode_kind1(in); + case PyUnicode_2BYTE_KIND: + return escape_unicode_kind2(in); + case PyUnicode_4BYTE_KIND: + return escape_unicode_kind4(in); + } + assert(0); /* shouldn't happen */ + return NULL; +} +#endif /* PY_MAJOR_VERSION < 3 */ static PyObject* escape(PyObject *self, PyObject *text) diff --git a/tests.py b/tests.py new file mode 100755 index 0000000..da4b486 --- /dev/null +++ b/tests.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import gc +import sys +import unittest +from markupsafe import Markup, escape, escape_silent +from markupsafe._compat import text_type, PY2 +from markupsafe import _native +try: + from markupsafe import _speedups + have_speedups = True +except ImportError: + have_speedups = False + + +class MarkupTestCase(unittest.TestCase): + + def test_adding(self): + # adding two strings should escape the unsafe one + unsafe = '<script type="application/x-some-script">alert("foo");</script>' + safe = Markup('<em>username</em>') + assert unsafe + safe == text_type(escape(unsafe)) + text_type(safe) + + def test_string_interpolation(self): + # string interpolations are safe to use too + assert Markup('<em>%s</em>') % '<bad user>' == \ + '<em><bad user></em>' + assert Markup('<em>%(username)s</em>') % { + 'username': '<bad user>' + } == '<em><bad user></em>' + + assert Markup('%i') % 3.14 == '3' + assert Markup('%.2f') % 3.14 == '3.14' + + def test_type_behavior(self): + # an escaped object is markup too + assert type(Markup('foo') + 'bar') is Markup + + # and it implements __html__ by returning itself + x = Markup("foo") + assert x.__html__() is x + + def test_html_interop(self): + # it also knows how to treat __html__ objects + class Foo(object): + def __html__(self): + return '<em>awesome</em>' + def __unicode__(self): + return 'awesome' + __str__ = __unicode__ + assert Markup(Foo()) == '<em>awesome</em>' + assert Markup('<strong>%s</strong>') % Foo() == \ + '<strong><em>awesome</em></strong>' + + def test_tuple_interpol(self): + self.assertEqual(Markup('<em>%s:%s</em>') % ( + '<foo>', + '<bar>', + ), Markup(u'<em><foo>:<bar></em>')) + + def test_dict_interpol(self): + self.assertEqual(Markup('<em>%(foo)s</em>') % { + 'foo': '<foo>', + }, Markup(u'<em><foo></em>')) + self.assertEqual(Markup('<em>%(foo)s:%(bar)s</em>') % { + 'foo': '<foo>', + 'bar': '<bar>', + }, Markup(u'<em><foo>:<bar></em>')) + + def test_escaping(self): + # escaping + assert escape('"<>&\'') == '"<>&'' + assert Markup("<em>Foo & Bar</em>").striptags() == "Foo & Bar" + + def test_unescape(self): + assert Markup("<test>").unescape() == "<test>" + assert "jack & tavi are cooler than mike & russ" == \ + Markup("jack & tavi are cooler than mike & russ").unescape(), \ + Markup("jack & tavi are cooler than mike & russ").unescape() + + # Test that unescape is idempotent + original = '&foo;' + once = Markup(original).unescape() + twice = Markup(once).unescape() + expected = "&foo;" + assert expected == once == twice, (once, twice) + + def test_formatting(self): + for actual, expected in ( + (Markup('%i') % 3.14, '3'), + (Markup('%.2f') % 3.14159, '3.14'), + (Markup('%s %s %s') % ('<', 123, '>'), '< 123 >'), + (Markup('<em>{awesome}</em>').format(awesome='<awesome>'), + '<em><awesome></em>'), + (Markup('{0[1][bar]}').format([0, {'bar': '<bar/>'}]), + '<bar/>'), + (Markup('{0[1][bar]}').format([0, {'bar': Markup('<bar/>')}]), + '<bar/>')): + assert actual == expected, "%r should be %r!" % (actual, expected) + + # This is new in 2.7 + if sys.version_info >= (2, 7): + def test_formatting_empty(self): + formatted = Markup('{}').format(0) + assert formatted == Markup('0') + + def test_custom_formatting(self): + class HasHTMLOnly(object): + def __html__(self): + return Markup('<foo>') + + class HasHTMLAndFormat(object): + def __html__(self): + return Markup('<foo>') + def __html_format__(self, spec): + return Markup('<FORMAT>') + + assert Markup('{0}').format(HasHTMLOnly()) == Markup('<foo>') + assert Markup('{0}').format(HasHTMLAndFormat()) == Markup('<FORMAT>') + + def test_complex_custom_formatting(self): + class User(object): + def __init__(self, id, username): + self.id = id + self.username = username + def __html_format__(self, format_spec): + if format_spec == 'link': + return Markup('<a href="/user/{0}">{1}</a>').format( + self.id, + self.__html__(), + ) + elif format_spec: + raise ValueError('Invalid format spec') + return self.__html__() + def __html__(self): + return Markup('<span class=user>{0}</span>').format(self.username) + + user = User(1, 'foo') + assert Markup('<p>User: {0:link}').format(user) == \ + Markup('<p>User: <a href="/user/1"><span class=user>foo</span></a>') + + def test_formatting_with_objects(self): + class Stringable(object): + def __unicode__(self): + return u'строка' + if PY2: + def __str__(self): + return 'some other value' + else: + __str__ = __unicode__ + + assert Markup('{s}').format(s=Stringable()) == \ + Markup(u'строка') + + def test_all_set(self): + import markupsafe as markup + for item in markup.__all__: + getattr(markup, item) + + def test_escape_silent(self): + assert escape_silent(None) == Markup() + assert escape(None) == Markup(None) + assert escape_silent('<foo>') == Markup(u'<foo>') + + def test_splitting(self): + self.assertEqual(Markup('a b').split(), [ + Markup('a'), + Markup('b') + ]) + self.assertEqual(Markup('a b').rsplit(), [ + Markup('a'), + Markup('b') + ]) + self.assertEqual(Markup('a\nb').splitlines(), [ + Markup('a'), + Markup('b') + ]) + + def test_mul(self): + self.assertEqual(Markup('a') * 3, Markup('aaa')) + + def test_escape_return_type(self): + self.assertTrue(isinstance(escape('a'), Markup)) + self.assertTrue(isinstance(escape(Markup('a')), Markup)) + class Foo: + def __html__(self): + return '<strong>Foo</strong>' + self.assertTrue(isinstance(escape(Foo()), Markup)) + + +class MarkupLeakTestCase(unittest.TestCase): + + def test_markup_leaks(self): + counts = set() + for count in range(20): + for item in range(1000): + escape("foo") + escape("<foo>") + escape(u"foo") + escape(u"<foo>") + if hasattr(sys, 'pypy_version_info'): + gc.collect() + counts.add(len(gc.get_objects())) + assert len(counts) == 1, 'ouch, c extension seems to ' \ + 'leak objects, got: ' + str(len(counts)) + + +class NativeEscapeTestCase(unittest.TestCase): + + escape = staticmethod(_native.escape) + + def test_empty(self): + self.assertEqual(Markup(u''), self.escape(u'')) + + def test_ascii(self): + self.assertEqual( + Markup(u'abcd&><'"efgh'), + self.escape(u'abcd&><\'"efgh')) + self.assertEqual( + Markup(u'&><'"efgh'), + self.escape(u'&><\'"efgh')) + self.assertEqual( + Markup(u'abcd&><'"'), + self.escape(u'abcd&><\'"')) + + def test_2byte(self): + self.assertEqual( + Markup(u'こんにちは&><'"こんばんは'), + self.escape(u'こんにちは&><\'"こんばんは')) + self.assertEqual( + Markup(u'&><'"こんばんは'), + self.escape(u'&><\'"こんばんは')) + self.assertEqual( + Markup(u'こんにちは&><'"'), + self.escape(u'こんにちは&><\'"')) + + def test_4byte(self): + self.assertEqual( + Markup(u'\U0001F363\U0001F362&><'"\U0001F37A xyz'), + self.escape(u'\U0001F363\U0001F362&><\'"\U0001F37A xyz')) + self.assertEqual( + Markup(u'&><'"\U0001F37A xyz'), + self.escape(u'&><\'"\U0001F37A xyz')) + self.assertEqual( + Markup(u'\U0001F363\U0001F362&><'"'), + self.escape(u'\U0001F363\U0001F362&><\'"')) + +if have_speedups: + class SpeedupEscapeTestCase(NativeEscapeTestCase): + escape = _speedups.escape + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(MarkupTestCase)) + + # this test only tests the c extension + if not hasattr(escape, 'func_code'): + suite.addTest(unittest.makeSuite(MarkupLeakTestCase)) + + suite.addTest(unittest.makeSuite(NativeEscapeTestCase)) + if have_speedups: + suite.addTest(unittest.makeSuite(SpeedupEscapeTestCase)) + + return suite + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') + +# vim:sts=4:sw=4:et: |