diff options
author | Bob Ippolito <bob@redivi.com> | 2023-04-05 21:24:51 -0700 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2023-04-05 21:24:51 -0700 |
commit | 2cbc419a31208dd9d0ed5706d5f3aa333ebd3e30 (patch) | |
tree | c3e879b38d37d8ae25eb76efc8dd2a6f21fce34a | |
parent | 440a5e45be508bb0a0ed25af62b7dbfe8cf1e142 (diff) | |
download | simplejson-2cbc419a31208dd9d0ed5706d5f3aa333ebd3e30.tar.gz |
Additional security hardening improvements:
* Remove unused namedtuple_as_object and tuple_as_array arguments from
simplejson.load (SJ-PT-23-102)
* Remove vestigial _one_shot code from iterencode (SJ-PT-23-103)
* Change default of allow_nan from True to False and add allow_nan
to decoder (SJ-PT-23-107)
-rw-r--r-- | CHANGES.txt | 5 | ||||
-rw-r--r-- | index.rst | 91 | ||||
-rw-r--r-- | simplejson/__init__.py | 100 | ||||
-rw-r--r-- | simplejson/_speedups.c | 18 | ||||
-rw-r--r-- | simplejson/decoder.py | 18 | ||||
-rw-r--r-- | simplejson/encoder.py | 28 | ||||
-rw-r--r-- | simplejson/scanner.py | 6 | ||||
-rw-r--r-- | simplejson/tests/test_float.py | 7 |
8 files changed, 152 insertions, 121 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 4b600ef..2ceab57 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -9,6 +9,11 @@ Version 3.19.0 released 2023-04-XX * Fix inconsistencies with error messages between the C and Python implementations (SJ-PT-23-100) * Remove unused unichr import from encoder (SJ-PT-23-101) +* Remove unused namedtuple_as_object and tuple_as_array arguments from + simplejson.load (SJ-PT-23-102) +* Remove vestigial _one_shot code from iterencode (SJ-PT-23-103) +* Change default of allow_nan from True to False and add allow_nan + to decoder (SJ-PT-23-107) Version 3.18.4 released 2023-03-14 @@ -160,7 +160,7 @@ Basic Usage ----------- .. function:: dump(obj, fp, skipkeys=False, ensure_ascii=True, \ - check_circular=True, allow_nan=True, cls=None, \ + check_circular=True, allow_nan=False, cls=None, \ indent=None, separators=None, encoding='utf-8', \ default=None, use_decimal=True, \ namedtuple_as_object=True, tuple_as_array=True, \ @@ -191,7 +191,7 @@ Basic Usage is highly optimized. .. function:: dumps(obj, skipkeys=False, ensure_ascii=True, \ - check_circular=True, allow_nan=True, cls=None, \ + check_circular=True, allow_nan=False, cls=None, \ indent=None, separators=None, encoding='utf-8', \ default=None, use_decimal=True, \ namedtuple_as_object=True, tuple_as_array=True, \ @@ -225,13 +225,17 @@ Basic Usage reference check for container types will be skipped and a circular reference will result in an :exc:`OverflowError` (or worse). - If *allow_nan* is false (default: ``True``), then it will be a + If *allow_nan* is false (default: ``False``), then it will be a :exc:`ValueError` to serialize out of range :class:`float` values (``nan``, ``inf``, ``-inf``) in strict compliance of the original JSON specification. If *allow_nan* is true, their JavaScript equivalents will be used (``NaN``, ``Infinity``, ``-Infinity``). See also *ignore_nan* for ECMA-262 compliant behavior. + .. versionchanged:: 3.19.0 + The default for *allow_nan* was changed to False for better spec + compliance. + If *indent* is a string, then JSON array elements and object members will be pretty-printed with a newline followed by that string repeated for each level of nesting. ``None`` (the default) selects the most compact @@ -324,7 +328,7 @@ Basic Usage .. function:: load(fp, encoding='utf-8', cls=None, object_hook=None, \ parse_float=None, parse_int=None, \ parse_constant=None, object_pairs_hook=None, \ - use_decimal=None, **kw) + use_decimal=None, allow_nan=False, **kw) Deserialize *fp* (a ``.read()``-supporting file-like object containing a JSON document) to a Python object using this @@ -367,7 +371,7 @@ Basic Usage .. function:: loads(s, encoding='utf-8', cls=None, object_hook=None, \ parse_float=None, parse_int=None, \ parse_constant=None, object_pairs_hook=None, \ - use_decimal=None, **kw) + use_decimal=None, allow_nan=False, **kw) Deserialize *s* (a :class:`str` or :class:`unicode` instance containing a JSON document) to a Python object. :exc:`JSONDecodeError` will be @@ -419,10 +423,6 @@ Basic Usage suitable alternative parser is specified (e.g. :class:`decimal.Decimal`) - *parse_constant*, if specified, will be called with one of the following - strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This can be used to - raise an exception if invalid JSON numbers are encountered. - If *use_decimal* is true (default: ``False``) then *parse_float* is set to :class:`decimal.Decimal`. This is a convenience for parity with the :func:`dump` parameter. @@ -443,12 +443,28 @@ Basic Usage Subclassing is not recommended. You should use *object_hook* or *object_pairs_hook*. This is faster and more portable than subclassing. + + *allow_nan*, if True (default false), will allow the parser to + accept the non-standard floats + ``NaN``, ``Infinity``, and ``-Infinity``. + + .. versionchanged:: 3.19.0 + + This argument was added to make it possible to use the legacy behavior + now that the parser is more strict about compliance to the standard. + + *parse_constant*, if specified, will be + called with one of the following strings: ``'-Infinity'``, + ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, + as it is rare to parse non-compliant JSON containing these values. + + Encoders and decoders --------------------- .. class:: JSONDecoder(encoding='utf-8', object_hook=None, parse_float=None, \ parse_int=None, parse_constant=None, \ - object_pairs_hook=None, strict=True) + object_pairs_hook=None, strict=True, allow_nan=False) Simple JSON decoder. @@ -476,7 +492,8 @@ Encoders and decoders | null | None | None | +---------------+-----------+-----------+ - It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their + When *allow_nan* is True, it also understands + ``NaN``, ``Infinity``, and ``-Infinity`` as their corresponding ``float`` values, which is outside the JSON spec. *encoding* determines the encoding used to interpret any :class:`str` objects @@ -516,15 +533,24 @@ Encoders and decoders suitable alternative parser is specified (e.g. :class:`decimal.Decimal`) - *parse_constant*, if specified, will be called with one of the following - strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This can be used to - raise an exception if invalid JSON numbers are encountered. + *parse_constant*, if specified, will be + called with one of the following strings: ``'-Infinity'``, + ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, + as it is rare to parse non-compliant JSON containing these values. *strict* controls the parser's behavior when it encounters an invalid control character in a string. The default setting of ``True`` means that unescaped control characters are parse errors, if ``False`` then control characters will be allowed in strings. + *allow_nan* when True (not the default), the decoder will allow + ``NaN``, ``Infinity``, and ``-Infinity`` as their corresponding floats. + + .. versionchanged:: 3.19.0 + This argument was added to make it behave closer to the spec by + default. The previous behavior can be restored by setting this to + False. + .. method:: decode(s) Return the Python representation of the JSON document *s*. See @@ -546,7 +572,7 @@ Encoders and decoders document is not valid. .. class:: JSONEncoder(skipkeys=False, ensure_ascii=True, \ - check_circular=True, allow_nan=True, sort_keys=False, \ + check_circular=True, allow_nan=False, sort_keys=False, \ indent=None, separators=None, encoding='utf-8', \ default=None, use_decimal=True, \ namedtuple_as_object=True, tuple_as_array=True, \ @@ -587,7 +613,8 @@ Encoders and decoders wrapped in another type with an appropriate `for_json` method to transform the keys during encoding. - It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their + When *allow_nan* is True, it also understands + ``NaN``, ``Infinity``, and ``-Infinity`` as their corresponding ``float`` values, which is outside the JSON spec. To extend this to recognize other objects, subclass and implement a @@ -613,12 +640,16 @@ Encoders and decoders prevent an infinite recursion (which would cause an :exc:`OverflowError`). Otherwise, no such check takes place. - If *allow_nan* is true (the default), then ``NaN``, ``Infinity``, and + If *allow_nan* is true (not the default), then ``NaN``, ``Infinity``, and ``-Infinity`` will be encoded as such. This behavior is not JSON - specification compliant, but is consistent with most JavaScript based - encoders and decoders. Otherwise, it will be a :exc:`ValueError` to encode + specification compliant. Otherwise, it will be a :exc:`ValueError` to encode such floats. See also *ignore_nan* for ECMA-262 compliant behavior. + .. versionchanged:: 3.19.0 + This default is now False to make it behave closer to the spec. + The previous behavior can be restored by setting this to + False. + If *sort_keys* is true (not the default), then the output of dictionaries will be sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis. @@ -730,7 +761,7 @@ Encoders and decoders :meth:`iterencode`. .. class:: JSONEncoderForHTML(skipkeys=False, ensure_ascii=True, \ - check_circular=True, allow_nan=True, \ + check_circular=True, allow_nan=False, \ sort_keys=False, indent=None, separators=None, \ encoding='utf-8', \ default=None, use_decimal=True, \ @@ -840,22 +871,28 @@ Infinite and NaN Number Values The RFC does not permit the representation of infinite or NaN number values. Despite that, by default, this module accepts and outputs ``Infinity``, -``-Infinity``, and ``NaN`` as if they were valid JSON number literal values:: +``-Infinity``, and ``NaN`` as if they were valid JSON number literal values +if the allow_nan flag is enabled:: >>> # Neither of these calls raises an exception, but the results are not valid JSON - >>> json.dumps(float('-inf')) + >>> json.dumps(float('-inf'), allow_nan=True) '-Infinity' - >>> json.dumps(float('nan')) + >>> json.dumps(float('nan'), allow_nan=True) 'NaN' >>> # Same when deserializing - >>> json.loads('-Infinity') + >>> json.loads('-Infinity', allow_nan=True) -inf - >>> json.loads('NaN') + >>> json.loads('NaN', allow_nan=True) nan + >>> # ignore_nan uses the ECMA-262 behavior to serialize these as null + >>> json.dumps(float('-inf'), ignore_nan=True) + 'null' + >>> json.dumps(float('nan'), ignore_nan=True) + 'null' In the serializer, the *allow_nan* parameter can be used to alter this -behavior. In the deserializer, the *parse_constant* parameter can be used to -alter this behavior. +behavior. In the deserializer, the *allow_nan* and +*parse_constant* parameters can be used to alter this behavior. Repeated Names Within an Object diff --git a/simplejson/__init__.py b/simplejson/__init__.py index 92b6009..206e22d 100644 --- a/simplejson/__init__.py +++ b/simplejson/__init__.py @@ -149,28 +149,10 @@ def _import_c_make_encoder(): except ImportError: return None -_default_encoder = JSONEncoder( - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - indent=None, - separators=None, - encoding='utf-8', - default=None, - use_decimal=True, - namedtuple_as_object=True, - tuple_as_array=True, - iterable_as_array=False, - bigint_as_string=False, - item_sort_key=None, - for_json=False, - ignore_nan=False, - int_as_string_bitcount=None, -) +_default_encoder = JSONEncoder() def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, + allow_nan=False, cls=None, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, sort_keys=False, item_sort_key=None, @@ -187,10 +169,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, contain non-ASCII characters, so long as they do not need to be escaped by JSON. When it is true, all non-ASCII characters are escaped. - If *allow_nan* is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the original JSON specification, instead of using - the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + If *allow_nan* is true (default: ``False``), then out of range ``float`` + values (``nan``, ``inf``, ``-inf``) will be serialized to + their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``) + instead of raising a ValueError. See *ignore_nan* for ECMA-262 compliant behavior. If *indent* is a string, then JSON array elements and object members @@ -258,7 +240,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, """ # cached encoder if (not skipkeys and ensure_ascii and - check_circular and allow_nan and + check_circular and not allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal and namedtuple_as_object and tuple_as_array and not iterable_as_array @@ -292,7 +274,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, + allow_nan=False, cls=None, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, sort_keys=False, item_sort_key=None, @@ -312,10 +294,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, for container types will be skipped and a circular reference will result in an ``OverflowError`` (or worse). - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in - strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + If *allow_nan* is true (default: ``False``), then out of range ``float`` + values (``nan``, ``inf``, ``-inf``) will be serialized to + their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``) + instead of raising a ValueError. See + *ignore_nan* for ECMA-262 compliant behavior. If ``indent`` is a string, then JSON array elements and object members will be pretty-printed with a newline followed by that string repeated @@ -383,7 +366,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, """ # cached encoder if (not skipkeys and ensure_ascii and - check_circular and allow_nan and + check_circular and not allow_nan and cls is None and indent is None and separators is None and encoding == 'utf-8' and default is None and use_decimal and namedtuple_as_object and tuple_as_array and not iterable_as_array @@ -412,14 +395,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, **kw).encode(obj) -_default_decoder = JSONDecoder(encoding=None, object_hook=None, - object_pairs_hook=None) +_default_decoder = JSONDecoder() def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, - use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, - **kw): + use_decimal=False, allow_nan=False, **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document as `str` or `bytes`) to a Python object. @@ -451,14 +432,18 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, ``int(num_str)``. This can be used to use another datatype or parser for JSON integers (e.g. :class:`float`). - *parse_constant*, if specified, will be called with one of the - following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This - can be used to raise an exception if invalid JSON numbers are - encountered. + *allow_nan*, if True (default false), will allow the parser to + accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity`` + and enable the use of the deprecated *parse_constant*. If *use_decimal* is true (default: ``False``) then it implies parse_float=decimal.Decimal for parity with ``dump``. + *parse_constant*, if specified, will be + called with one of the following strings: ``'-Infinity'``, + ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, + as it is rare to parse non-compliant JSON containing these values. + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead of subclassing whenever possible. @@ -468,12 +453,12 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, - use_decimal=use_decimal, **kw) + use_decimal=use_decimal, allow_nan=allow_nan, **kw) def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, - use_decimal=False, **kw): + use_decimal=False, allow_nan=False, **kw): """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) to a Python object. @@ -505,14 +490,18 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, ``int(num_str)``. This can be used to use another datatype or parser for JSON integers (e.g. :class:`float`). - *parse_constant*, if specified, will be called with one of the - following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This - can be used to raise an exception if invalid JSON numbers are - encountered. + *allow_nan*, if True (default false), will allow the parser to + accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity`` + and enable the use of the deprecated *parse_constant*. If *use_decimal* is true (default: ``False``) then it implies parse_float=decimal.Decimal for parity with ``dump``. + *parse_constant*, if specified, will be + called with one of the following strings: ``'-Infinity'``, + ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, + as it is rare to parse non-compliant JSON containing these values. + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead of subclassing whenever possible. @@ -521,7 +510,7 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and parse_constant is None and object_pairs_hook is None - and not use_decimal and not kw): + and not use_decimal and not allow_nan and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder @@ -539,6 +528,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, if parse_float is not None: raise TypeError("use_decimal=True implies parse_float=Decimal") kw['parse_float'] = Decimal + if allow_nan: + kw['allow_nan'] = True return cls(encoding=encoding, **kw).decode(s) @@ -560,22 +551,9 @@ def _toggle_speedups(enabled): scan.make_scanner = scan.py_make_scanner dec.make_scanner = scan.make_scanner global _default_decoder - _default_decoder = JSONDecoder( - encoding=None, - object_hook=None, - object_pairs_hook=None, - ) + _default_decoder = JSONDecoder() global _default_encoder - _default_encoder = JSONEncoder( - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - indent=None, - separators=None, - encoding='utf-8', - default=None, - ) + _default_encoder = JSONEncoder() def simple_first(kv): """Helper function to pass to item_sort_key to sort simple diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c index e08f4c4..bd56b4d 100644 --- a/simplejson/_speedups.c +++ b/simplejson/_speedups.c @@ -1843,7 +1843,7 @@ bail: } static PyObject * -_parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) +_parse_constant(PyScannerObject *s, PyObject *pystr, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { /* Read a JSON constant from PyString pystr. constant is the Python string that was found @@ -1855,6 +1855,10 @@ _parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize Returns the result of parse_constant */ PyObject *rval; + if (s->parse_constant == Py_None) { + raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx); + return NULL; + } /* rval = parse_constant(constant) */ rval = PyObject_CallOneArg(s->parse_constant, constant); @@ -2158,7 +2162,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case 'N': /* NaN */ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr); } else fallthrough = 1; @@ -2166,7 +2170,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case 'I': /* Infinity */ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2174,7 +2178,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n case '-': /* -Infinity */ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2277,7 +2281,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' && PyUnicode_READ(kind, str, idx + 2) == 'N') { - rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr); } else fallthrough = 1; @@ -2292,7 +2296,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyUnicode_READ(kind, str, idx + 5) == 'i' && PyUnicode_READ(kind, str, idx + 6) == 't' && PyUnicode_READ(kind, str, idx + 7) == 'y') { - rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr); } else fallthrough = 1; @@ -2308,7 +2312,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ PyUnicode_READ(kind, str, idx + 6) == 'i' && PyUnicode_READ(kind, str, idx + 7) == 't' && PyUnicode_READ(kind, str, idx + 8) == 'y') { - rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr); + rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr); } else fallthrough = 1; diff --git a/simplejson/decoder.py b/simplejson/decoder.py index bbde82c..c99a976 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -306,14 +306,15 @@ class JSONDecoder(object): | null | None | +---------------+-------------------+ - It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + When allow_nan=True, it also understands + ``NaN``, ``Infinity``, and ``-Infinity`` as their corresponding ``float`` values, which is outside the JSON spec. """ def __init__(self, encoding=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, - object_pairs_hook=None): + object_pairs_hook=None, allow_nan=False): """ *encoding* determines the encoding used to interpret any :class:`str` objects decoded by this instance (``'utf-8'`` by @@ -346,10 +347,13 @@ class JSONDecoder(object): ``int(num_str)``. This can be used to use another datatype or parser for JSON integers (e.g. :class:`float`). - *parse_constant*, if specified, will be called with one of the - following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This - can be used to raise an exception if invalid JSON numbers are - encountered. + *allow_nan*, if True (default false), will allow the parser to + accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``. + + *parse_constant*, if specified, will be + called with one of the following strings: ``'-Infinity'``, + ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, + as it is rare to parse non-compliant JSON containing these values. *strict* controls the parser's behavior when it encounters an invalid control character in a string. The default setting of @@ -364,7 +368,7 @@ class JSONDecoder(object): self.object_pairs_hook = object_pairs_hook self.parse_float = parse_float or float self.parse_int = parse_int or bounded_int - self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None) self.strict = strict self.parse_object = JSONObject self.parse_array = JSONArray diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 3f94f3a..661ff36 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -140,7 +140,7 @@ class JSONEncoder(object): key_separator = ': ' def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, + check_circular=True, allow_nan=False, sort_keys=False, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, @@ -161,10 +161,11 @@ class JSONEncoder(object): prevent an infinite recursion (which would cause an OverflowError). Otherwise, no such check takes place. - If allow_nan is true, then NaN, Infinity, and -Infinity will be - encoded as such. This behavior is not JSON specification compliant, - but is consistent with most JavaScript based encoders and decoders. - Otherwise, it will be a ValueError to encode such floats. + If allow_nan is true (default: False), then out of range float + values (nan, inf, -inf) will be serialized to + their JavaScript equivalents (NaN, Infinity, -Infinity) + instead of raising a ValueError. See + ignore_nan for ECMA-262 compliant behavior. If sort_keys is true, then the output of dictionaries will be sorted by key; this is useful for regression tests to ensure @@ -294,7 +295,7 @@ class JSONEncoder(object): # This doesn't pass the iterator directly to ''.join() because the # exceptions aren't as detailed. The list call should be roughly # equivalent to the PySequence_Fast that ''.join() would do. - chunks = self.iterencode(o, _one_shot=True) + chunks = self.iterencode(o) if not isinstance(chunks, (list, tuple)): chunks = list(chunks) if self.ensure_ascii: @@ -302,7 +303,7 @@ class JSONEncoder(object): else: return u''.join(chunks) - def iterencode(self, o, _one_shot=False): + def iterencode(self, o): """Encode the given object and yield each string representation as available. @@ -356,8 +357,7 @@ class JSONEncoder(object): key_memo = {} int_as_string_bitcount = ( 53 if self.bigint_as_string else self.int_as_string_bitcount) - if (_one_shot and c_make_encoder is not None - and self.indent is None): + if (c_make_encoder is not None and self.indent is None): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, @@ -370,7 +370,7 @@ class JSONEncoder(object): _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot, self.use_decimal, + self.skipkeys, self.use_decimal, self.namedtuple_as_object, self.tuple_as_array, int_as_string_bitcount, self.item_sort_key, self.encoding, self.for_json, @@ -398,14 +398,14 @@ class JSONEncoderForHTML(JSONEncoder): def encode(self, o): # Override JSONEncoder.encode because it has hacks for # performance that make things more complicated. - chunks = self.iterencode(o, True) + chunks = self.iterencode(o) if self.ensure_ascii: return ''.join(chunks) else: return u''.join(chunks) - def iterencode(self, o, _one_shot=False): - chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) + def iterencode(self, o): + chunks = super(JSONEncoderForHTML, self).iterencode(o) for chunk in chunks: chunk = chunk.replace('&', '\\u0026') chunk = chunk.replace('<', '\\u003c') @@ -419,7 +419,7 @@ class JSONEncoderForHTML(JSONEncoder): def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, - _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + _key_separator, _item_separator, _sort_keys, _skipkeys, _use_decimal, _namedtuple_as_object, _tuple_as_array, _int_as_string_bitcount, _item_sort_key, _encoding,_for_json, diff --git a/simplejson/scanner.py b/simplejson/scanner.py index 85e385e..34710d6 100644 --- a/simplejson/scanner.py +++ b/simplejson/scanner.py @@ -60,11 +60,11 @@ def py_make_scanner(context): else: res = parse_int(integer) return res, m.end() - elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': + elif parse_constant and nextchar == 'N' and string[idx:idx + 3] == 'NaN': return parse_constant('NaN'), idx + 3 - elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': + elif parse_constant and nextchar == 'I' and string[idx:idx + 8] == 'Infinity': return parse_constant('Infinity'), idx + 8 - elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': + elif parse_constant and nextchar == '-' and string[idx:idx + 9] == '-Infinity': return parse_constant('-Infinity'), idx + 9 else: raise JSONDecodeError(errmsg, string, idx) diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py index e382ec2..a977969 100644 --- a/simplejson/tests/test_float.py +++ b/simplejson/tests/test_float.py @@ -7,9 +7,9 @@ from simplejson.decoder import NaN, PosInf, NegInf class TestFloat(TestCase): def test_degenerates_allow(self): for inf in (PosInf, NegInf): - self.assertEqual(json.loads(json.dumps(inf)), inf) + self.assertEqual(json.loads(json.dumps(inf, allow_nan=True), allow_nan=True), inf) # Python 2.5 doesn't have math.isnan - nan = json.loads(json.dumps(NaN)) + nan = json.loads(json.dumps(NaN, allow_nan=True), allow_nan=True) self.assertTrue((0 + nan) != nan) def test_degenerates_ignore(self): @@ -19,6 +19,9 @@ class TestFloat(TestCase): def test_degenerates_deny(self): for f in (PosInf, NegInf, NaN): self.assertRaises(ValueError, json.dumps, f, allow_nan=False) + for s in ('Infinity', '-Infinity', 'NaN'): + self.assertRaises(ValueError, json.loads, s, allow_nan=False) + self.assertRaises(ValueError, json.loads, s) def test_floats(self): for num in [1617161771.7650001, math.pi, math.pi**100, |