diff options
author | Bob Ippolito <bob@redivi.com> | 2012-03-06 20:24:05 -0800 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2012-03-06 20:24:05 -0800 |
commit | cee27e64e5bc5fb923589139079d086265d2593e (patch) | |
tree | 8c2d866d3bc4b9eb0c73fd1b807f14c575657ce0 | |
parent | 9069fbfe3d856e4b66b42c9a354423812e5a593b (diff) | |
parent | ed1c14ad163595159a9df8a100b2cfa152180f8c (diff) | |
download | simplejson-cee27e64e5bc5fb923589139079d086265d2593e.tar.gz |
Merge branch 'bigint_as_string-gh31'v2.4.0
-rw-r--r-- | CHANGES.txt | 6 | ||||
-rw-r--r-- | conf.py | 4 | ||||
-rw-r--r-- | setup.py | 2 | ||||
-rw-r--r-- | simplejson/__init__.py | 25 | ||||
-rw-r--r-- | simplejson/_speedups.c | 54 | ||||
-rw-r--r-- | simplejson/encoder.py | 29 | ||||
-rw-r--r-- | simplejson/tests/__init__.py | 1 | ||||
-rw-r--r-- | simplejson/tests/test_bigint_as_string.py | 55 |
8 files changed, 155 insertions, 21 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index b98a371..4b770e3 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,9 @@ +Version 2.4.0 released 2012-03-XX + +* New bigint_as_string option for encoder to trade JavaScript number precision + issues for type issues. + https://github.com/simplejson/simplejson/issues/31 + Version 2.3.3 released 2012-02-27 * Allow unknown numerical types for indent parameter @@ -42,9 +42,9 @@ copyright = '2011, Bob Ippolito' # other places throughout the built documents. # # The short X.Y version. -version = '2.3' +version = '2.4' # The full version, including alpha/beta/rc tags. -release = '2.3.3' +release = '2.4.0' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: @@ -7,7 +7,7 @@ from distutils.errors import CCompilerError, DistutilsExecError, \ DistutilsPlatformError IS_PYPY = hasattr(sys, 'pypy_translation_info') -VERSION = '2.3.3' +VERSION = '2.4.0' DESCRIPTION = "Simple, fast, extensible JSON encoder/decoder for Python" LONG_DESCRIPTION = open('README.rst', 'r').read() diff --git a/simplejson/__init__.py b/simplejson/__init__.py index 3ee7893..4a5e5df 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -97,7 +97,7 @@ Using simplejson.tool from the shell to validate and pretty-print:: $ echo '{ 1.2:3.4}' | python -m simplejson.tool Expecting property name: line 1 column 2 (char 2) """ -__version__ = '2.3.3' +__version__ = '2.4.0' __all__ = [ 'dump', 'dumps', 'load', 'loads', 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', @@ -138,13 +138,14 @@ _default_encoder = JSONEncoder( use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, - **kw): + bigint_as_string=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). @@ -193,6 +194,12 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, If *tuple_as_array* is true (default: ``True``), :class:`tuple` (and subclasses) will be encoded as JSON arrays. + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. Note that this is still a + lossy operation that will not round-trip correctly and should be used + sparingly. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg. @@ -203,7 +210,8 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, check_circular and allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal - and namedtuple_as_object and tuple_as_array and not kw): + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and not kw): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -214,6 +222,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, default=default, use_decimal=use_decimal, namedtuple_as_object=namedtuple_as_object, tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost @@ -225,7 +234,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, - tuple_as_array=True, + tuple_as_array=True, bigint_as_string=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. @@ -272,6 +281,10 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, If *tuple_as_array* is true (default: ``True``), :class:`tuple` (and subclasses) will be encoded as JSON arrays. + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with the ``cls`` kwarg. @@ -282,7 +295,8 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, check_circular and allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal - and namedtuple_as_object and tuple_as_array and not kw): + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -293,6 +307,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, use_decimal=use_decimal, namedtuple_as_object=namedtuple_as_object, tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, **kw).encode(obj) diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index c25909e..a0e5667 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -89,6 +89,7 @@ typedef struct _PyEncoderObject { int use_decimal; int namedtuple_as_object; int tuple_as_array; + int bigint_as_string; } PyEncoderObject; static PyMemberDef encoder_members[] = { @@ -104,6 +105,9 @@ static PyMemberDef encoder_members[] = { {NULL} }; +static PyObject * +maybe_quote_bigint(PyObject *encoded, PyObject *obj); + static Py_ssize_t ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); static PyObject * @@ -166,6 +170,35 @@ _is_namedtuple(PyObject *obj); #define MAX_EXPANSION MIN_EXPANSION #endif +static PyObject * +maybe_quote_bigint(PyObject *encoded, PyObject *obj) +{ + static PyObject *big_long = NULL; + static PyObject *small_long = NULL; + if (big_long == NULL) { + big_long = PyLong_FromLongLong(1LL << 53); + if (big_long == NULL) { + Py_DECREF(encoded); + return NULL; + } + } + if (small_long == NULL) { + small_long = PyLong_FromLongLong(-1LL << 53); + if (small_long == NULL) { + Py_DECREF(encoded); + return NULL; + } + } + if (PyObject_RichCompareBool(obj, big_long, Py_GE) || + PyObject_RichCompareBool(obj, small_long, Py_LE)) { + PyObject* quoted = PyString_FromFormat("\"%s\"", + PyString_AsString(encoded)); + Py_DECREF(encoded); + encoded = quoted; + } + return encoded; +} + static int _is_namedtuple(PyObject *obj) { @@ -1159,7 +1192,7 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss if (rval == NULL) return NULL; } - + /* skip whitespace after { */ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; @@ -1900,7 +1933,7 @@ scanner_init(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) return -1; - + if (s->memo == NULL) { s->memo = PyDict_New(); if (s->memo == NULL) @@ -2025,19 +2058,19 @@ static int encoder_init(PyObject *self, PyObject *args, PyObject *kwds) { /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", NULL}; + static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", NULL}; PyEncoderObject *s; PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; - PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal, *namedtuple_as_object, *tuple_as_array; + PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal, *namedtuple_as_object, *tuple_as_array, *bigint_as_string; assert(PyEncoder_Check(self)); s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOO:make_encoder", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOO:make_encoder", kwlist, &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal, - &namedtuple_as_object, &tuple_as_array)) + &namedtuple_as_object, &tuple_as_array, &bigint_as_string)) return -1; s->markers = markers; @@ -2054,6 +2087,7 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) s->use_decimal = PyObject_IsTrue(use_decimal); s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object); s->tuple_as_array = PyObject_IsTrue(tuple_as_array); + s->bigint_as_string = PyObject_IsTrue(bigint_as_string); Py_INCREF(s->markers); Py_INCREF(s->defaultfn); @@ -2189,8 +2223,14 @@ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi } else if (PyInt_Check(obj) || PyLong_Check(obj)) { PyObject *encoded = PyObject_Str(obj); - if (encoded != NULL) + if (encoded != NULL) { + if (s->bigint_as_string) { + encoded = maybe_quote_bigint(encoded, obj); + if (encoded == NULL) + break; + } rv = _steal_list_append(rval, encoded); + } } else if (PyFloat_Check(obj)) { PyObject *encoded = encoder_encode_float(s, obj); diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 7f4f1cb..d13d26a 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -107,7 +107,7 @@ class JSONEncoder(object): check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, - tuple_as_array=True): + tuple_as_array=True, bigint_as_string=False): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -160,6 +160,10 @@ class JSONEncoder(object): If tuple_as_array is true (the default), tuple (and subclasses) will be encoded as JSON arrays. + + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. """ self.skipkeys = skipkeys @@ -170,6 +174,7 @@ class JSONEncoder(object): self.use_decimal = use_decimal self.namedtuple_as_object = namedtuple_as_object self.tuple_as_array = tuple_as_array + self.bigint_as_string = bigint_as_string if indent is not None and not isinstance(indent, basestring): indent = indent * ' ' self.indent = indent @@ -285,13 +290,15 @@ class JSONEncoder(object): markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, self.allow_nan, key_memo, self.use_decimal, - self.namedtuple_as_object, self.tuple_as_array) + self.namedtuple_as_object, self.tuple_as_array, + self.bigint_as_string) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, _one_shot, self.use_decimal, - self.namedtuple_as_object, self.tuple_as_array) + self.namedtuple_as_object, self.tuple_as_array, + self.bigint_as_string) try: return _iterencode(o, 0) finally: @@ -328,6 +335,7 @@ class JSONEncoderForHTML(JSONEncoder): def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, _use_decimal, _namedtuple_as_object, _tuple_as_array, + _bigint_as_string, ## HACK: hand-optimized bytecode; turn globals into locals False=False, True=True, @@ -378,7 +386,10 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, elif value is False: yield buf + 'false' elif isinstance(value, (int, long)): - yield buf + str(value) + yield ((buf + str(value)) + if (not _bigint_as_string or + (-1 << 53) < value < (1 << 53)) + else (buf + '"' + str(value) + '"')) elif isinstance(value, float): yield buf + _floatstr(value) elif _use_decimal and isinstance(value, Decimal): @@ -465,7 +476,10 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, elif value is False: yield 'false' elif isinstance(value, (int, long)): - yield str(value) + yield (str(value) + if (not _bigint_as_string or + (-1 << 53) < value < (1 << 53)) + else ('"' + str(value) + '"')) elif isinstance(value, float): yield _floatstr(value) elif _use_decimal and isinstance(value, Decimal): @@ -503,7 +517,10 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, elif o is False: yield 'false' elif isinstance(o, (int, long)): - yield str(o) + yield (str(o) + if (not _bigint_as_string or + (-1 << 53) < o < (1 << 53)) + else ('"' + str(o) + '"')) elif isinstance(o, float): yield _floatstr(o) elif isinstance(o, list): diff --git a/simplejson/tests/__init__.py b/simplejson/tests/__init__.py index 8d2e443..12289b6 100644 --- a/simplejson/tests/__init__.py +++ b/simplejson/tests/__init__.py @@ -27,6 +27,7 @@ def additional_tests(suite=None): def all_tests_suite(): suite = unittest.TestLoader().loadTestsFromNames([ + 'simplejson.tests.test_bigint_as_string', 'simplejson.tests.test_check_circular', 'simplejson.tests.test_decode', 'simplejson.tests.test_default', diff --git a/simplejson/tests/test_bigint_as_string.py b/simplejson/tests/test_bigint_as_string.py new file mode 100644 index 0000000..f74f75d --- /dev/null +++ b/simplejson/tests/test_bigint_as_string.py @@ -0,0 +1,55 @@ +from unittest import TestCase + +import simplejson as json + +class TestBigintAsString(TestCase): + values = [(200, 200), + ((2 ** 53) - 1, 9007199254740991), + ((2 ** 53), '9007199254740992'), + ((2 ** 53) + 1, '9007199254740993'), + (-100, -100), + ((-2 ** 53), '-9007199254740992'), + ((-2 ** 53) - 1, '-9007199254740993'), + ((-2 ** 53) + 1, -9007199254740991)] + + def test_ints(self): + for val, expect in self.values: + self.assertEquals( + val, + json.loads(json.dumps(val))) + self.assertEquals( + expect, + json.loads(json.dumps(val, bigint_as_string=True))) + + def test_lists(self): + for val, expect in self.values: + val = [val, val] + expect = [expect, expect] + self.assertEquals( + val, + json.loads(json.dumps(val))) + self.assertEquals( + expect, + json.loads(json.dumps(val, bigint_as_string=True))) + + def test_dicts(self): + for val, expect in self.values: + val = {'k': val} + expect = {'k': expect} + self.assertEquals( + val, + json.loads(json.dumps(val))) + self.assertEquals( + expect, + json.loads(json.dumps(val, bigint_as_string=True))) + + def test_dict_keys(self): + for val, _ in self.values: + expect = {str(val): 'value'} + val = {val: 'value'} + self.assertEquals( + expect, + json.loads(json.dumps(val))) + self.assertEquals( + expect, + json.loads(json.dumps(val, bigint_as_string=True))) |