diff options
author | Bob Ippolito <bob@redivi.com> | 2013-01-01 13:17:38 -0800 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2013-01-01 13:17:38 -0800 |
commit | 03396cb26b6165814150461ec9f49343c151d862 (patch) | |
tree | f804ca341d700d8c14d2d2a016532d93f6c46c97 | |
parent | 02e24f4f856b065aa31d79d4726ce6668a20779e (diff) | |
download | simplejson-3.0.1.tar.gz |
_Py_Accu style encoder optimization, bump to 3.0.1v3.0.1
-rw-r--r-- | CHANGES.txt | 6 | ||||
-rw-r--r-- | conf.py | 2 | ||||
-rw-r--r-- | setup.py | 2 | ||||
-rw-r--r-- | simplejson/__init__.py | 2 | ||||
-rw-r--r-- | simplejson/_speedups.c | 193 | ||||
-rw-r--r-- | simplejson/tests/test_dump.py | 5 |
6 files changed, 171 insertions, 39 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 37b5e65..500252b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,9 @@ +Version 3.0.1 released 2013-01-01 + +* Add accumulator optimization to encoder, equivalent to the usage of + `_Py_Accu` in the Python 3.3 json library. Only relevant if encoding + very large JSON documents. + Version 3.0.0 released 2012-12-30 * Python 3.3 is now supported, thanks to Vinay Sajip #8 @@ -44,7 +44,7 @@ copyright = '2012, Bob Ippolito' # The short X.Y version. version = '3.0' # The full version, including alpha/beta/rc tags. -release = '3.0.0' +release = '3.0.1' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: @@ -7,7 +7,7 @@ from distutils.errors import CCompilerError, DistutilsExecError, \ DistutilsPlatformError IS_PYPY = hasattr(sys, 'pypy_translation_info') -VERSION = '3.0.0' +VERSION = '3.0.1' DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python" LONG_DESCRIPTION = open('README.rst', 'r').read() diff --git a/simplejson/__init__.py b/simplejson/__init__.py index c655e92..84fea33 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -99,7 +99,7 @@ Using simplejson.tool from the shell to validate and pretty-print:: Expecting property name: line 1 column 2 (char 2) """ from __future__ import absolute_import -__version__ = '3.0.0' +__version__ = '3.0.1' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index 37a43ef..beceeb7 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -95,6 +95,24 @@ typedef int Py_ssize_t; static PyTypeObject PyScannerType; static PyTypeObject PyEncoderType; +#undef small /* defined by some Windows headers */ + +typedef PyObject *(*joinerfunc)(PyObject *); +typedef struct { + PyObject *large; /* A list of previously accumulated large strings */ + PyObject *small; /* Pending small strings */ + joinerfunc joiner; +} JSON_Accu; + +static int +JSON_Accu_Init(JSON_Accu *acc); +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode); +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc); +static void +JSON_Accu_Destroy(JSON_Accu *acc); + typedef struct _PyScannerObject { PyObject_HEAD PyObject *encoding; @@ -157,6 +175,8 @@ static PyMemberDef encoder_members[] = { }; static PyObject * +join_list_unicode(PyObject *lst); +static PyObject * JSON_ParseEncoding(PyObject *encoding); static PyObject * JSON_UnicodeFromChar(JSON_UNICHR c); @@ -174,6 +194,8 @@ static PyObject * py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); #if PY_MAJOR_VERSION < 3 static PyObject * +join_list_string(PyObject *lst); +static PyObject * scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); static PyObject * scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr); @@ -205,11 +227,11 @@ encoder_clear(PyObject *self); static PyObject * encoder_stringify_key(PyEncoderObject *s, PyObject *key); static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level); static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level); static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level); static PyObject * _encoded_const(PyObject *obj); static void @@ -233,6 +255,97 @@ moduleinit(void); #define MIN_EXPANSION 6 static int +JSON_Accu_Init(JSON_Accu *acc) +{ + /* Lazily allocated */ + acc->large = NULL; + acc->small = PyList_New(0); + if (acc->small == NULL) + return -1; +#if PY_MAJOR_VERSION >= 3 + acc->joiner = join_list_unicode; +#else /* PY_MAJOR_VERSION >= 3 */ + acc->joiner = join_list_string; +#endif /* PY_MAJOR_VERSION < 3 */ + return 0; +} + +static int +flush_accumulator(JSON_Accu *acc) +{ + Py_ssize_t nsmall = PyList_GET_SIZE(acc->small); + if (nsmall) { + int ret; + PyObject *joined; + if (acc->large == NULL) { + acc->large = PyList_New(0); + if (acc->large == NULL) + return -1; + } + joined = acc->joiner(acc->small); + if (joined == NULL) + return -1; + if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) { + Py_DECREF(joined); + return -1; + } + ret = PyList_Append(acc->large, joined); + Py_DECREF(joined); + return ret; + } + return 0; +} + +static int +JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode) +{ + Py_ssize_t nsmall; +#if PY_MAJOR_VERSION >= 3 + assert(PyUnicode_Check(unicode)); +#else /* PY_MAJOR_VERSION >= 3 */ + assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode)); +#endif /* PY_MAJOR_VERSION < 3 */ + + if (PyList_Append(acc->small, unicode)) + return -1; + nsmall = PyList_GET_SIZE(acc->small); + /* Each item in a list of unicode objects has an overhead (in 64-bit + * builds) of: + * - 8 bytes for the list slot + * - 56 bytes for the header of the unicode object + * that is, 64 bytes. 100000 such objects waste more than 6MB + * compared to a single concatenated string. + */ + if (nsmall < 100000) + return 0; + return flush_accumulator(acc); +} + +static PyObject * +JSON_Accu_FinishAsList(JSON_Accu *acc) +{ + int ret; + PyObject *res; + + ret = flush_accumulator(acc); + Py_CLEAR(acc->small); + if (ret) { + Py_CLEAR(acc->large); + return NULL; + } + res = acc->large; + acc->large = NULL; + return res; +} + +static void +JSON_Accu_Destroy(JSON_Accu *acc) +{ + Py_CLEAR(acc->small); + Py_CLEAR(acc->large); +} + +static int IS_DIGIT(JSON_UNICHR c) { return c >= '0' && c <= '9'; @@ -1983,8 +2096,6 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n PyErr_SetNone(PyExc_StopIteration); return NULL; } - if (Py_EnterRecursiveCall(" while decoding a JSON document")) - return NULL; switch (str[idx]) { case '"': /* string */ @@ -1995,11 +2106,19 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n break; case '{': /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a string")) + return NULL; rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case '[': /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a string")) + return NULL; rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case 'n': /* null */ @@ -2061,7 +2180,6 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n /* Didn't find a string, object, array, or named constant. Look for a number. */ if (fallthrough) rval = _match_number_str(s, pystr, idx, next_idx_ptr); - Py_LeaveRecursiveCall(); return rval; } #endif /* PY_MAJOR_VERSION < 3 */ @@ -2086,8 +2204,6 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyErr_SetNone(PyExc_StopIteration); return NULL; } - if (Py_EnterRecursiveCall(" while decoding a JSON document")) - return NULL; switch (PyUnicode_READ(kind, str, idx)) { case '"': /* string */ @@ -2097,11 +2213,17 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ break; case '{': /* object */ + if (Py_EnterRecursiveCall(" while decoding a JSON object " + "from a unicode string")) rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case '[': /* array */ + if (Py_EnterRecursiveCall(" while decoding a JSON array " + "from a unicode string")) rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); + Py_LeaveRecursiveCall(); break; case 'n': /* null */ @@ -2481,22 +2603,21 @@ encoder_call(PyObject *self, PyObject *args, PyObject *kwds) /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; PyObject *obj; - PyObject *rval; Py_ssize_t indent_level; PyEncoderObject *s; + JSON_Accu rval; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, &obj, _convertPyInt_AsSsize_t, &indent_level)) return NULL; - rval = PyList_New(0); - if (rval == NULL) + if (JSON_Accu_Init(&rval)) return NULL; - if (encoder_listencode_obj(s, rval, obj, indent_level)) { - Py_DECREF(rval); + if (encoder_listencode_obj(s, &rval, obj, indent_level)) { + JSON_Accu_Destroy(&rval); return NULL; } - return rval; + return JSON_Accu_FinishAsList(&rval); } static PyObject * @@ -2583,16 +2704,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_list_append(PyObject *lst, PyObject *stolen) +_steal_accumulate(JSON_Accu *lst, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = PyList_Append(lst, stolen); + int rval = JSON_Accu_Accumulate(lst, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) +encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level) { /* Encode Python object obj to a JSON term, rval is a PyList */ int rv = -1; @@ -2602,13 +2723,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi if (obj == Py_None || obj == Py_True || obj == Py_False) { PyObject *cstr = _encoded_const(obj); if (cstr != NULL) - rv = _steal_list_append(rval, cstr); + rv = _steal_accumulate(rval, cstr); } else if (PyString_Check(obj) || PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } else if (PyInt_Check(obj) || PyLong_Check(obj)) { PyObject *encoded = PyObject_Str(obj); @@ -2618,13 +2739,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi if (encoded == NULL) break; } - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } } else if (PyFloat_Check(obj)) { PyObject *encoded = encoder_encode_float(s, obj); if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } else if (s->namedtuple_as_object && _is_namedtuple(obj)) { PyObject *newobj = PyObject_CallMethod(obj, "_asdict", NULL); @@ -2642,7 +2763,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) { PyObject *encoded = PyObject_Str(obj); if (encoded != NULL) - rv = _steal_list_append(rval, encoded); + rv = _steal_accumulate(rval, encoded); } else { PyObject *ident = NULL; @@ -2689,9 +2810,9 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi } static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) +encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level) { - /* Encode Python dict dct a JSON term, rval is a PyList */ + /* Encode Python dict dct a JSON term */ static PyObject *open_dict = NULL; static PyObject *close_dict = NULL; static PyObject *empty_dict = NULL; @@ -2711,7 +2832,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss return -1; } if (PyDict_Size(dct) == 0) - return PyList_Append(rval, empty_dict); + return JSON_Accu_Accumulate(rval, empty_dict); if (s->markers != Py_None) { int has_key; @@ -2729,7 +2850,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss } } - if (PyList_Append(rval, open_dict)) + if (JSON_Accu_Accumulate(rval, open_dict)) goto bail; if (s->indent != Py_None) { @@ -2775,7 +2896,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss } } if (idx) { - if (PyList_Append(rval, s->item_separator)) + if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } if (encoded == NULL) { @@ -2786,11 +2907,11 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss if (PyDict_SetItem(s->key_memo, key, encoded)) goto bail; } - if (PyList_Append(rval, encoded)) { + if (JSON_Accu_Accumulate(rval, encoded)) { goto bail; } Py_CLEAR(encoded); - if (PyList_Append(rval, s->key_separator)) + if (JSON_Accu_Accumulate(rval, s->key_separator)) goto bail; if (encoder_listencode_obj(s, rval, value, indent_level)) goto bail; @@ -2812,7 +2933,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss yield '\n' + (_indent * _current_indent_level) */ } - if (PyList_Append(rval, close_dict)) + if (JSON_Accu_Accumulate(rval, close_dict)) goto bail; return 0; @@ -2827,9 +2948,9 @@ bail: static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) +encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level) { - /* Encode Python list seq to a JSON term, rval is a PyList */ + /* Encode Python list seq to a JSON term */ static PyObject *open_array = NULL; static PyObject *close_array = NULL; static PyObject *empty_array = NULL; @@ -2851,7 +2972,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss if (is_true == -1) return -1; else if (is_true == 0) - return PyList_Append(rval, empty_array); + return JSON_Accu_Accumulate(rval, empty_array); if (s->markers != Py_None) { int has_key; @@ -2873,7 +2994,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss if (iter == NULL) goto bail; - if (PyList_Append(rval, open_array)) + if (JSON_Accu_Accumulate(rval, open_array)) goto bail; if (s->indent != Py_None) { /* TODO: DOES NOT RUN */ @@ -2886,7 +3007,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss } while ((obj = PyIter_Next(iter))) { if (i) { - if (PyList_Append(rval, s->item_separator)) + if (JSON_Accu_Accumulate(rval, s->item_separator)) goto bail; } if (encoder_listencode_obj(s, rval, obj, indent_level)) @@ -2909,7 +3030,7 @@ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss yield '\n' + (_indent * _current_indent_level) */ } - if (PyList_Append(rval, close_array)) + if (JSON_Accu_Accumulate(rval, close_array)) goto bail; return 0; diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py index e8c6054..0ad2c40 100644 --- a/simplejson/tests/test_dump.py +++ b/simplejson/tests/test_dump.py @@ -114,3 +114,8 @@ class TestDump(TestCase): s = json.dumps([0, 1, 2], indent=AwesomeInt(3)) self.assertEqual(s, '[\n 0,\n 1,\n 2\n]') + + def test_accumulator(self): + # the C API uses an accumulator that collects after 100,000 appends + lst = [0] * 100000 + self.assertEqual(json.loads(json.dumps(lst)), lst) |