summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Ippolito <bob@redivi.com>2023-04-06 10:26:00 -0700
committerGitHub <noreply@github.com>2023-04-06 10:26:00 -0700
commit1a4995dca71e02957d81becd7a053c9b4f19aceb (patch)
treed97740dcd5e2185c30ab20550fe35558806dfac0
parent9559fc756deaf20b6bae961b58c5289d8582c8b7 (diff)
parentec4a3d5c7299b16a9bf4d431fa16f466cc453697 (diff)
downloadsimplejson-1a4995dca71e02957d81becd7a053c9b4f19aceb.tar.gz
Merge pull request #313 from simplejson/audit-fixesv3.19.0
Implement recommended fixes from OSTIF audit
-rw-r--r--CHANGES.txt28
-rw-r--r--conf.py4
-rw-r--r--index.rst103
-rw-r--r--simplejson/__init__.py106
-rw-r--r--simplejson/_speedups.c30
-rw-r--r--simplejson/decoder.py82
-rw-r--r--simplejson/encoder.py30
-rw-r--r--simplejson/scanner.py6
-rw-r--r--simplejson/tests/test_decode.py8
-rw-r--r--simplejson/tests/test_fail.py4
-rw-r--r--simplejson/tests/test_float.py7
-rw-r--r--simplejson/tests/test_scanstring.py4
12 files changed, 252 insertions, 160 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index d2f7a94..c3e176c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,31 @@
+Version 3.19.0 released 2023-04-06
+
+* This release contains security hardening measures based on recommendations
+ by a security audit sponsored by OSTIF and conducted by X41 D-Sec GmbH.
+ Several of these measures include changing defaults to be more strict,
+ by default simplejson will now only consume and produce compliant JSON,
+ but the flags still exist for any backwards compatibility needs.
+ No high priority issues were discovered, the reference count
+ leak is thought to be unreachable since the digits of the float are
+ checked before PyOS_string_to_double is called.
+ A link to the public version of this report will be included in a
+ future release of simplejson. The following fixes were implemented in
+ one PR: https://github.com/simplejson/simplejson/pull/313
+* Fix invalid handling of unicode escape sequences in the pure Python
+ implementation of the decoder (SJ-PT-23-01)
+* Fix missing reference count decrease if PyOS_string_to_double raises
+ an exception in Python 2.x; was probably unreachable (SJ-PT-23-02)
+* Backport the integer string length limitation from Python 3.11 to
+ limit quadratic number parsing (SJ-PT-23-03)
+* Fix inconsistencies with error messages between the C and Python
+ implementations (SJ-PT-23-100)
+* Remove unused unichr import from encoder (SJ-PT-23-101)
+* Remove unused namedtuple_as_object and tuple_as_array arguments from
+ simplejson.load (SJ-PT-23-102)
+* Remove vestigial _one_shot code from iterencode (SJ-PT-23-103)
+* Change default of allow_nan from True to False and add allow_nan
+ to decoder (SJ-PT-23-107)
+
Version 3.18.4 released 2023-03-14
* Test the sdist to prevent future regressions
diff --git a/conf.py b/conf.py
index 921bbef..5a2dded 100644
--- a/conf.py
+++ b/conf.py
@@ -42,9 +42,9 @@ copyright = '2023, Bob Ippolito'
# other places throughout the built documents.
#
# The short X.Y version.
-version = '3.18'
+version = '3.19'
# The full version, including alpha/beta/rc tags.
-release = '3.18.4'
+release = '3.19.0'
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
diff --git a/index.rst b/index.rst
index fbb52b5..f57d650 100644
--- a/index.rst
+++ b/index.rst
@@ -160,7 +160,7 @@ Basic Usage
-----------
.. function:: dump(obj, fp, skipkeys=False, ensure_ascii=True, \
- check_circular=True, allow_nan=True, cls=None, \
+ check_circular=True, allow_nan=False, cls=None, \
indent=None, separators=None, encoding='utf-8', \
default=None, use_decimal=True, \
namedtuple_as_object=True, tuple_as_array=True, \
@@ -191,7 +191,7 @@ Basic Usage
is highly optimized.
.. function:: dumps(obj, skipkeys=False, ensure_ascii=True, \
- check_circular=True, allow_nan=True, cls=None, \
+ check_circular=True, allow_nan=False, cls=None, \
indent=None, separators=None, encoding='utf-8', \
default=None, use_decimal=True, \
namedtuple_as_object=True, tuple_as_array=True, \
@@ -225,13 +225,17 @@ Basic Usage
reference check for container types will be skipped and a circular
reference will result in an :exc:`OverflowError` (or worse).
- If *allow_nan* is false (default: ``True``), then it will be a
+ If *allow_nan* is false (default: ``False``), then it will be a
:exc:`ValueError` to serialize out of range :class:`float` values
(``nan``, ``inf``, ``-inf``) in strict compliance of the original
JSON specification. If *allow_nan* is true, their JavaScript equivalents
will be used (``NaN``, ``Infinity``, ``-Infinity``). See also *ignore_nan*
for ECMA-262 compliant behavior.
+ .. versionchanged:: 3.19.0
+ The default for *allow_nan* was changed to False for better spec
+ compliance.
+
If *indent* is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
for each level of nesting. ``None`` (the default) selects the most compact
@@ -324,7 +328,7 @@ Basic Usage
.. function:: load(fp, encoding='utf-8', cls=None, object_hook=None, \
parse_float=None, parse_int=None, \
parse_constant=None, object_pairs_hook=None, \
- use_decimal=None, **kw)
+ use_decimal=None, allow_nan=False, **kw)
Deserialize *fp* (a ``.read()``-supporting file-like object containing a JSON
document) to a Python object using this
@@ -367,7 +371,7 @@ Basic Usage
.. function:: loads(s, encoding='utf-8', cls=None, object_hook=None, \
parse_float=None, parse_int=None, \
parse_constant=None, object_pairs_hook=None, \
- use_decimal=None, **kw)
+ use_decimal=None, allow_nan=False, **kw)
Deserialize *s* (a :class:`str` or :class:`unicode` instance containing a JSON
document) to a Python object. :exc:`JSONDecodeError` will be
@@ -412,9 +416,12 @@ Basic Usage
be used to use another datatype or parser for JSON integers
(e.g. :class:`float`).
- *parse_constant*, if specified, will be called with one of the following
- strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This can be used to
- raise an exception if invalid JSON numbers are encountered.
+ .. versionchanged:: 3.19.0
+ The integer to string conversion length limitation introduced in
+ Python 3.11 has been backported. An attempt to parse an integer
+ with more than 4300 digits will result in an exception unless a
+ suitable alternative parser is specified
+ (e.g. :class:`decimal.Decimal`)
If *use_decimal* is true (default: ``False``) then *parse_float* is set to
:class:`decimal.Decimal`. This is a convenience for parity with the
@@ -436,12 +443,28 @@ Basic Usage
Subclassing is not recommended. You should use *object_hook* or
*object_pairs_hook*. This is faster and more portable than subclassing.
+
+ *allow_nan*, if True (default false), will allow the parser to
+ accept the non-standard floats
+ ``NaN``, ``Infinity``, and ``-Infinity``.
+
+ .. versionchanged:: 3.19.0
+
+ This argument was added to make it possible to use the legacy behavior
+ now that the parser is more strict about compliance to the standard.
+
+ *parse_constant*, if specified, will be
+ called with one of the following strings: ``'-Infinity'``,
+ ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+ as it is rare to parse non-compliant JSON containing these values.
+
+
Encoders and decoders
---------------------
.. class:: JSONDecoder(encoding='utf-8', object_hook=None, parse_float=None, \
parse_int=None, parse_constant=None, \
- object_pairs_hook=None, strict=True)
+ object_pairs_hook=None, strict=True, allow_nan=False)
Simple JSON decoder.
@@ -469,7 +492,8 @@ Encoders and decoders
| null | None | None |
+---------------+-----------+-----------+
- It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their
+ When *allow_nan* is True, it also understands
+ ``NaN``, ``Infinity``, and ``-Infinity`` as their
corresponding ``float`` values, which is outside the JSON spec.
*encoding* determines the encoding used to interpret any :class:`str` objects
@@ -502,15 +526,31 @@ Encoders and decoders
be used to use another datatype or parser for JSON integers
(e.g. :class:`float`).
- *parse_constant*, if specified, will be called with one of the following
- strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This can be used to
- raise an exception if invalid JSON numbers are encountered.
+ .. versionchanged:: 3.19.0
+ The integer to string conversion length limitation introduced in
+ Python 3.11 has been backported. An attempt to parse an integer
+ with more than 4300 digits will result in an exception unless a
+ suitable alternative parser is specified
+ (e.g. :class:`decimal.Decimal`)
+
+ *parse_constant*, if specified, will be
+ called with one of the following strings: ``'-Infinity'``,
+ ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+ as it is rare to parse non-compliant JSON containing these values.
*strict* controls the parser's behavior when it encounters an invalid
control character in a string. The default setting of ``True`` means that
unescaped control characters are parse errors, if ``False`` then control
characters will be allowed in strings.
+ *allow_nan* when True (not the default), the decoder will allow
+ ``NaN``, ``Infinity``, and ``-Infinity`` as their corresponding floats.
+
+ .. versionchanged:: 3.19.0
+ This argument was added to make it behave closer to the spec by
+ default. The previous behavior can be restored by setting this to
+ False.
+
.. method:: decode(s)
Return the Python representation of the JSON document *s*. See
@@ -532,7 +572,7 @@ Encoders and decoders
document is not valid.
.. class:: JSONEncoder(skipkeys=False, ensure_ascii=True, \
- check_circular=True, allow_nan=True, sort_keys=False, \
+ check_circular=True, allow_nan=False, sort_keys=False, \
indent=None, separators=None, encoding='utf-8', \
default=None, use_decimal=True, \
namedtuple_as_object=True, tuple_as_array=True, \
@@ -573,7 +613,8 @@ Encoders and decoders
wrapped in another type with an appropriate `for_json` method to
transform the keys during encoding.
- It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their
+ When *allow_nan* is True, it also understands
+ ``NaN``, ``Infinity``, and ``-Infinity`` as their
corresponding ``float`` values, which is outside the JSON spec.
To extend this to recognize other objects, subclass and implement a
@@ -599,12 +640,16 @@ Encoders and decoders
prevent an infinite recursion (which would cause an :exc:`OverflowError`).
Otherwise, no such check takes place.
- If *allow_nan* is true (the default), then ``NaN``, ``Infinity``, and
+ If *allow_nan* is true (not the default), then ``NaN``, ``Infinity``, and
``-Infinity`` will be encoded as such. This behavior is not JSON
- specification compliant, but is consistent with most JavaScript based
- encoders and decoders. Otherwise, it will be a :exc:`ValueError` to encode
+ specification compliant. Otherwise, it will be a :exc:`ValueError` to encode
such floats. See also *ignore_nan* for ECMA-262 compliant behavior.
+ .. versionchanged:: 3.19.0
+ This default is now False to make it behave closer to the spec.
+ The previous behavior can be restored by setting this to
+ False.
+
If *sort_keys* is true (not the default), then the output of dictionaries
will be sorted by key; this is useful for regression tests to ensure that
JSON serializations can be compared on a day-to-day basis.
@@ -716,7 +761,7 @@ Encoders and decoders
:meth:`iterencode`.
.. class:: JSONEncoderForHTML(skipkeys=False, ensure_ascii=True, \
- check_circular=True, allow_nan=True, \
+ check_circular=True, allow_nan=False, \
sort_keys=False, indent=None, separators=None, \
encoding='utf-8', \
default=None, use_decimal=True, \
@@ -826,22 +871,28 @@ Infinite and NaN Number Values
The RFC does not permit the representation of infinite or NaN number values.
Despite that, by default, this module accepts and outputs ``Infinity``,
-``-Infinity``, and ``NaN`` as if they were valid JSON number literal values::
+``-Infinity``, and ``NaN`` as if they were valid JSON number literal values
+if the allow_nan flag is enabled::
>>> # Neither of these calls raises an exception, but the results are not valid JSON
- >>> json.dumps(float('-inf'))
+ >>> json.dumps(float('-inf'), allow_nan=True)
'-Infinity'
- >>> json.dumps(float('nan'))
+ >>> json.dumps(float('nan'), allow_nan=True)
'NaN'
>>> # Same when deserializing
- >>> json.loads('-Infinity')
+ >>> json.loads('-Infinity', allow_nan=True)
-inf
- >>> json.loads('NaN')
+ >>> json.loads('NaN', allow_nan=True)
nan
+ >>> # ignore_nan uses the ECMA-262 behavior to serialize these as null
+ >>> json.dumps(float('-inf'), ignore_nan=True)
+ 'null'
+ >>> json.dumps(float('nan'), ignore_nan=True)
+ 'null'
In the serializer, the *allow_nan* parameter can be used to alter this
-behavior. In the deserializer, the *parse_constant* parameter can be used to
-alter this behavior.
+behavior. In the deserializer, the *allow_nan* and
+*parse_constant* parameters can be used to alter this behavior.
Repeated Names Within an Object
diff --git a/simplejson/__init__.py b/simplejson/__init__.py
index 47e49a3..206e22d 100644
--- a/simplejson/__init__.py
+++ b/simplejson/__init__.py
@@ -118,7 +118,7 @@ Serializing multiple objects to JSON lines (newline-delimited JSON)::
"""
from __future__ import absolute_import
-__version__ = '3.18.4'
+__version__ = '3.19.0'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
@@ -149,28 +149,10 @@ def _import_c_make_encoder():
except ImportError:
return None
-_default_encoder = JSONEncoder(
- skipkeys=False,
- ensure_ascii=True,
- check_circular=True,
- allow_nan=True,
- indent=None,
- separators=None,
- encoding='utf-8',
- default=None,
- use_decimal=True,
- namedtuple_as_object=True,
- tuple_as_array=True,
- iterable_as_array=False,
- bigint_as_string=False,
- item_sort_key=None,
- for_json=False,
- ignore_nan=False,
- int_as_string_bitcount=None,
-)
+_default_encoder = JSONEncoder()
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
- allow_nan=True, cls=None, indent=None, separators=None,
+ allow_nan=False, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
@@ -187,10 +169,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
contain non-ASCII characters, so long as they do not need to be escaped
by JSON. When it is true, all non-ASCII characters are escaped.
- If *allow_nan* is false, then it will be a ``ValueError`` to
- serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
- in strict compliance of the original JSON specification, instead of using
- the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
+ If *allow_nan* is true (default: ``False``), then out of range ``float``
+ values (``nan``, ``inf``, ``-inf``) will be serialized to
+ their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
+ instead of raising a ValueError. See
*ignore_nan* for ECMA-262 compliant behavior.
If *indent* is a string, then JSON array elements and object members
@@ -258,7 +240,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
"""
# cached encoder
if (not skipkeys and ensure_ascii and
- check_circular and allow_nan and
+ check_circular and not allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and not iterable_as_array
@@ -292,7 +274,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
- allow_nan=True, cls=None, indent=None, separators=None,
+ allow_nan=False, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
@@ -312,10 +294,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
- If ``allow_nan`` is false, then it will be a ``ValueError`` to
- serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
- strict compliance of the JSON specification, instead of using the
- JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+ If *allow_nan* is true (default: ``False``), then out of range ``float``
+ values (``nan``, ``inf``, ``-inf``) will be serialized to
+ their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
+ instead of raising a ValueError. See
+ *ignore_nan* for ECMA-262 compliant behavior.
If ``indent`` is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
@@ -383,7 +366,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
"""
# cached encoder
if (not skipkeys and ensure_ascii and
- check_circular and allow_nan and
+ check_circular and not allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and not iterable_as_array
@@ -412,14 +395,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
**kw).encode(obj)
-_default_decoder = JSONDecoder(encoding=None, object_hook=None,
- object_pairs_hook=None)
+_default_decoder = JSONDecoder()
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
- use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
- **kw):
+ use_decimal=False, allow_nan=False, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document as `str` or `bytes`) to a Python object.
@@ -442,23 +423,27 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
takes priority.
*parse_float*, if specified, will be called with the string of every
- JSON float to be decoded. By default, this is equivalent to
+ JSON float to be decoded. By default, this is equivalent to
``float(num_str)``. This can be used to use another datatype or parser
for JSON floats (e.g. :class:`decimal.Decimal`).
*parse_int*, if specified, will be called with the string of every
- JSON int to be decoded. By default, this is equivalent to
+ JSON int to be decoded. By default, this is equivalent to
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
- *parse_constant*, if specified, will be called with one of the
- following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
- can be used to raise an exception if invalid JSON numbers are
- encountered.
+ *allow_nan*, if True (default false), will allow the parser to
+ accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
+ and enable the use of the deprecated *parse_constant*.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
+ *parse_constant*, if specified, will be
+ called with one of the following strings: ``'-Infinity'``,
+ ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+ as it is rare to parse non-compliant JSON containing these values.
+
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
@@ -468,12 +453,12 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
- use_decimal=use_decimal, **kw)
+ use_decimal=use_decimal, allow_nan=allow_nan, **kw)
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
- use_decimal=False, **kw):
+ use_decimal=False, allow_nan=False, **kw):
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
document) to a Python object.
@@ -505,14 +490,18 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
- *parse_constant*, if specified, will be called with one of the
- following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
- can be used to raise an exception if invalid JSON numbers are
- encountered.
+ *allow_nan*, if True (default false), will allow the parser to
+ accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
+ and enable the use of the deprecated *parse_constant*.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
+ *parse_constant*, if specified, will be
+ called with one of the following strings: ``'-Infinity'``,
+ ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+ as it is rare to parse non-compliant JSON containing these values.
+
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
@@ -521,7 +510,7 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None
- and not use_decimal and not kw):
+ and not use_decimal and not allow_nan and not kw):
return _default_decoder.decode(s)
if cls is None:
cls = JSONDecoder
@@ -539,6 +528,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
if parse_float is not None:
raise TypeError("use_decimal=True implies parse_float=Decimal")
kw['parse_float'] = Decimal
+ if allow_nan:
+ kw['allow_nan'] = True
return cls(encoding=encoding, **kw).decode(s)
@@ -560,22 +551,9 @@ def _toggle_speedups(enabled):
scan.make_scanner = scan.py_make_scanner
dec.make_scanner = scan.make_scanner
global _default_decoder
- _default_decoder = JSONDecoder(
- encoding=None,
- object_hook=None,
- object_pairs_hook=None,
- )
+ _default_decoder = JSONDecoder()
global _default_encoder
- _default_encoder = JSONEncoder(
- skipkeys=False,
- ensure_ascii=True,
- check_circular=True,
- allow_nan=True,
- indent=None,
- separators=None,
- encoding='utf-8',
- default=None,
- )
+ _default_encoder = JSONEncoder()
def simple_first(kv):
"""Helper function to pass to item_sort_key to sort simple
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
index ec054c7..bd56b4d 100644
--- a/simplejson/_speedups.c
+++ b/simplejson/_speedups.c
@@ -1843,7 +1843,7 @@ bail:
}
static PyObject *
-_parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+_parse_constant(PyScannerObject *s, PyObject *pystr, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
{
/* Read a JSON constant from PyString pystr.
constant is the Python string that was found
@@ -1855,6 +1855,10 @@ _parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize
Returns the result of parse_constant
*/
PyObject *rval;
+ if (s->parse_constant == Py_None) {
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+ return NULL;
+ }
/* rval = parse_constant(constant) */
rval = PyObject_CallOneArg(s->parse_constant, constant);
@@ -1886,7 +1890,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
/* read a sign if it's there, make sure it's not the end of the string */
if (str[idx] == '-') {
if (idx >= end_idx) {
- raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
return NULL;
}
idx++;
@@ -1903,7 +1907,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
}
/* no integer digits, error */
else {
- raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
return NULL;
}
@@ -1949,8 +1953,10 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
/* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
NULL, NULL);
- if (d == -1.0 && PyErr_Occurred())
+ if (d == -1.0 && PyErr_Occurred()) {
+ Py_DECREF(numstr);
return NULL;
+ }
rval = PyFloat_FromDouble(d);
}
}
@@ -1993,7 +1999,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
/* read a sign if it's there, make sure it's not the end of the string */
if (PyUnicode_READ(kind, str, idx) == '-') {
if (idx >= end_idx) {
- raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
return NULL;
}
idx++;
@@ -2013,7 +2019,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
}
else {
/* no integer digits, error */
- raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+ raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
return NULL;
}
@@ -2156,7 +2162,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
case 'N':
/* NaN */
if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
- rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr);
+ rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr);
}
else
fallthrough = 1;
@@ -2164,7 +2170,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
case 'I':
/* Infinity */
if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
- rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr);
+ rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr);
}
else
fallthrough = 1;
@@ -2172,7 +2178,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
case '-':
/* -Infinity */
if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
- rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr);
+ rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr);
}
else
fallthrough = 1;
@@ -2275,7 +2281,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
if ((idx + 2 < length) &&
PyUnicode_READ(kind, str, idx + 1) == 'a' &&
PyUnicode_READ(kind, str, idx + 2) == 'N') {
- rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr);
+ rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr);
}
else
fallthrough = 1;
@@ -2290,7 +2296,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
PyUnicode_READ(kind, str, idx + 5) == 'i' &&
PyUnicode_READ(kind, str, idx + 6) == 't' &&
PyUnicode_READ(kind, str, idx + 7) == 'y') {
- rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr);
+ rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr);
}
else
fallthrough = 1;
@@ -2306,7 +2312,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
PyUnicode_READ(kind, str, idx + 6) == 'i' &&
PyUnicode_READ(kind, str, idx + 7) == 't' &&
PyUnicode_READ(kind, str, idx + 8) == 'y') {
- rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr);
+ rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr);
}
else
fallthrough = 1;
diff --git a/simplejson/decoder.py b/simplejson/decoder.py
index 1a8f772..c99a976 100644
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -46,9 +46,35 @@ BACKSLASH = {
DEFAULT_ENCODING = "utf-8"
+if hasattr(sys, 'get_int_max_str_digits'):
+ bounded_int = int
+else:
+ def bounded_int(s, INT_MAX_STR_DIGITS=4300):
+ """Backport of the integer string length conversion limitation
+
+ https://docs.python.org/3/library/stdtypes.html#int-max-str-digits
+ """
+ if len(s) > INT_MAX_STR_DIGITS:
+ raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s)))
+ return int(s)
+
+
+def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match):
+ """Scan a four digit hex number from s[end:end + 4]
+ """
+ msg = "Invalid \\uXXXX escape sequence"
+ esc = s[end:end + 4]
+ if not _m(esc):
+ raise JSONDecodeError(msg, s, end - 2)
+ try:
+ return int(esc, 16), end + 4
+ except ValueError:
+ raise JSONDecodeError(msg, s, end - 2)
+
def py_scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
- _PY3=PY3, _maxunicode=sys.maxunicode):
+ _PY3=PY3, _maxunicode=sys.maxunicode,
+ _scan_four_digit_hex=scan_four_digit_hex):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
@@ -67,6 +93,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
if chunk is None:
raise JSONDecodeError(
"Unterminated string starting at", s, begin)
+ prev_end = end
end = chunk.end()
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
@@ -81,7 +108,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
elif terminator != '\\':
if strict:
msg = "Invalid control character %r at"
- raise JSONDecodeError(msg, s, end)
+ raise JSONDecodeError(msg, s, prev_end)
else:
_append(terminator)
continue
@@ -100,35 +127,18 @@ def py_scanstring(s, end, encoding=None, strict=True,
end += 1
else:
# Unicode escape sequence
- msg = "Invalid \\uXXXX escape sequence"
- esc = s[end + 1:end + 5]
- escX = esc[1:2]
- if len(esc) != 4 or escX == 'x' or escX == 'X':
- raise JSONDecodeError(msg, s, end - 1)
- try:
- uni = int(esc, 16)
- except ValueError:
- raise JSONDecodeError(msg, s, end - 1)
- if uni < 0 or uni > _maxunicode:
- raise JSONDecodeError(msg, s, end - 1)
- end += 5
+ uni, end = _scan_four_digit_hex(s, end + 1)
# Check for surrogate pair on UCS-4 systems
# Note that this will join high/low surrogate pairs
# but will also pass unpaired surrogates through
if (_maxunicode > 65535 and
uni & 0xfc00 == 0xd800 and
s[end:end + 2] == '\\u'):
- esc2 = s[end + 2:end + 6]
- escX = esc2[1:2]
- if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
- try:
- uni2 = int(esc2, 16)
- except ValueError:
- raise JSONDecodeError(msg, s, end)
- if uni2 & 0xfc00 == 0xdc00:
- uni = 0x10000 + (((uni - 0xd800) << 10) |
- (uni2 - 0xdc00))
- end += 6
+ uni2, end2 = _scan_four_digit_hex(s, end + 2)
+ if uni2 & 0xfc00 == 0xdc00:
+ uni = 0x10000 + (((uni - 0xd800) << 10) |
+ (uni2 - 0xdc00))
+ end = end2
char = unichr(uni)
# Append the unescaped character
_append(char)
@@ -169,7 +179,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
return pairs, end + 1
elif nextchar != '"':
raise JSONDecodeError(
- "Expecting property name enclosed in double quotes",
+ "Expecting property name enclosed in double quotes or '}'",
s, end)
end += 1
while True:
@@ -296,14 +306,15 @@ class JSONDecoder(object):
| null | None |
+---------------+-------------------+
- It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
+ When allow_nan=True, it also understands
+ ``NaN``, ``Infinity``, and ``-Infinity`` as
their corresponding ``float`` values, which is outside the JSON spec.
"""
def __init__(self, encoding=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True,
- object_pairs_hook=None):
+ object_pairs_hook=None, allow_nan=False):
"""
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
@@ -336,10 +347,13 @@ class JSONDecoder(object):
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
- *parse_constant*, if specified, will be called with one of the
- following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
- can be used to raise an exception if invalid JSON numbers are
- encountered.
+ *allow_nan*, if True (default false), will allow the parser to
+ accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``.
+
+ *parse_constant*, if specified, will be
+ called with one of the following strings: ``'-Infinity'``,
+ ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+ as it is rare to parse non-compliant JSON containing these values.
*strict* controls the parser's behavior when it encounters an
invalid control character in a string. The default setting of
@@ -353,8 +367,8 @@ class JSONDecoder(object):
self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
self.parse_float = parse_float or float
- self.parse_int = parse_int or int
- self.parse_constant = parse_constant or _CONSTANTS.__getitem__
+ self.parse_int = parse_int or bounded_int
+ self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None)
self.strict = strict
self.parse_object = JSONObject
self.parse_array = JSONArray
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index e93fe43..661ff36 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -5,7 +5,7 @@ import re
from operator import itemgetter
# Do not import Decimal directly to avoid reload issues
import decimal
-from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3
+from .compat import binary_type, text_type, string_types, integer_types, PY3
def _import_speedups():
try:
from . import _speedups
@@ -140,7 +140,7 @@ class JSONEncoder(object):
key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True,
- check_circular=True, allow_nan=True, sort_keys=False,
+ check_circular=True, allow_nan=False, sort_keys=False,
indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True,
tuple_as_array=True, bigint_as_string=False,
@@ -161,10 +161,11 @@ class JSONEncoder(object):
prevent an infinite recursion (which would cause an OverflowError).
Otherwise, no such check takes place.
- If allow_nan is true, then NaN, Infinity, and -Infinity will be
- encoded as such. This behavior is not JSON specification compliant,
- but is consistent with most JavaScript based encoders and decoders.
- Otherwise, it will be a ValueError to encode such floats.
+ If allow_nan is true (default: False), then out of range float
+ values (nan, inf, -inf) will be serialized to
+ their JavaScript equivalents (NaN, Infinity, -Infinity)
+ instead of raising a ValueError. See
+ ignore_nan for ECMA-262 compliant behavior.
If sort_keys is true, then the output of dictionaries will be
sorted by key; this is useful for regression tests to ensure
@@ -294,7 +295,7 @@ class JSONEncoder(object):
# This doesn't pass the iterator directly to ''.join() because the
# exceptions aren't as detailed. The list call should be roughly
# equivalent to the PySequence_Fast that ''.join() would do.
- chunks = self.iterencode(o, _one_shot=True)
+ chunks = self.iterencode(o)
if not isinstance(chunks, (list, tuple)):
chunks = list(chunks)
if self.ensure_ascii:
@@ -302,7 +303,7 @@ class JSONEncoder(object):
else:
return u''.join(chunks)
- def iterencode(self, o, _one_shot=False):
+ def iterencode(self, o):
"""Encode the given object and yield each string
representation as available.
@@ -356,8 +357,7 @@ class JSONEncoder(object):
key_memo = {}
int_as_string_bitcount = (
53 if self.bigint_as_string else self.int_as_string_bitcount)
- if (_one_shot and c_make_encoder is not None
- and self.indent is None):
+ if (c_make_encoder is not None and self.indent is None):
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys,
@@ -370,7 +370,7 @@ class JSONEncoder(object):
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
- self.skipkeys, _one_shot, self.use_decimal,
+ self.skipkeys, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
int_as_string_bitcount,
self.item_sort_key, self.encoding, self.for_json,
@@ -398,14 +398,14 @@ class JSONEncoderForHTML(JSONEncoder):
def encode(self, o):
# Override JSONEncoder.encode because it has hacks for
# performance that make things more complicated.
- chunks = self.iterencode(o, True)
+ chunks = self.iterencode(o)
if self.ensure_ascii:
return ''.join(chunks)
else:
return u''.join(chunks)
- def iterencode(self, o, _one_shot=False):
- chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
+ def iterencode(self, o):
+ chunks = super(JSONEncoderForHTML, self).iterencode(o)
for chunk in chunks:
chunk = chunk.replace('&', '\\u0026')
chunk = chunk.replace('<', '\\u003c')
@@ -419,7 +419,7 @@ class JSONEncoderForHTML(JSONEncoder):
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
- _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
+ _key_separator, _item_separator, _sort_keys, _skipkeys,
_use_decimal, _namedtuple_as_object, _tuple_as_array,
_int_as_string_bitcount, _item_sort_key,
_encoding,_for_json,
diff --git a/simplejson/scanner.py b/simplejson/scanner.py
index 85e385e..34710d6 100644
--- a/simplejson/scanner.py
+++ b/simplejson/scanner.py
@@ -60,11 +60,11 @@ def py_make_scanner(context):
else:
res = parse_int(integer)
return res, m.end()
- elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
+ elif parse_constant and nextchar == 'N' and string[idx:idx + 3] == 'NaN':
return parse_constant('NaN'), idx + 3
- elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
+ elif parse_constant and nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
return parse_constant('Infinity'), idx + 8
- elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
+ elif parse_constant and nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
raise JSONDecodeError(errmsg, string, idx)
diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py
index 6960ee5..317b4f9 100644
--- a/simplejson/tests/test_decode.py
+++ b/simplejson/tests/test_decode.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import
import decimal
from unittest import TestCase
+import sys
import simplejson as json
from simplejson.compat import StringIO, b, binary_type
from simplejson import OrderedDict
@@ -117,3 +118,10 @@ class TestDecode(TestCase):
diff = id(x) - id(y)
self.assertRaises(ValueError, j.scan_once, y, diff)
self.assertRaises(ValueError, j.raw_decode, y, i)
+
+ def test_bounded_int(self):
+ # SJ-PT-23-03, limit quadratic number parsing per Python 3.11
+ max_str_digits = getattr(sys, 'get_int_max_str_digits', lambda: 4300)()
+ s = '1' + '0' * (max_str_digits - 1)
+ self.assertEqual(json.loads(s), int(s))
+ self.assertRaises(ValueError, json.loads, s + '0')
diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py
index 788f3a5..5f9a8f6 100644
--- a/simplejson/tests/test_fail.py
+++ b/simplejson/tests/test_fail.py
@@ -145,7 +145,7 @@ class TestFail(TestCase):
('["spam', 'Unterminated string starting at', 1),
('["spam"', "Expecting ',' delimiter", 7),
('["spam",', 'Expecting value', 8),
- ('{', 'Expecting property name enclosed in double quotes', 1),
+ ('{', "Expecting property name enclosed in double quotes or '}'", 1),
('{"', 'Unterminated string starting at', 1),
('{"spam', 'Unterminated string starting at', 1),
('{"spam"', "Expecting ':' delimiter", 7),
@@ -156,6 +156,8 @@ class TestFail(TestCase):
('"', 'Unterminated string starting at', 0),
('"spam', 'Unterminated string starting at', 0),
('[,', "Expecting value", 1),
+ ('--', 'Expecting value', 0),
+ ('"\x18d', "Invalid control character %r", 1),
]
for data, msg, idx in test_cases:
try:
diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py
index e382ec2..a977969 100644
--- a/simplejson/tests/test_float.py
+++ b/simplejson/tests/test_float.py
@@ -7,9 +7,9 @@ from simplejson.decoder import NaN, PosInf, NegInf
class TestFloat(TestCase):
def test_degenerates_allow(self):
for inf in (PosInf, NegInf):
- self.assertEqual(json.loads(json.dumps(inf)), inf)
+ self.assertEqual(json.loads(json.dumps(inf, allow_nan=True), allow_nan=True), inf)
# Python 2.5 doesn't have math.isnan
- nan = json.loads(json.dumps(NaN))
+ nan = json.loads(json.dumps(NaN, allow_nan=True), allow_nan=True)
self.assertTrue((0 + nan) != nan)
def test_degenerates_ignore(self):
@@ -19,6 +19,9 @@ class TestFloat(TestCase):
def test_degenerates_deny(self):
for f in (PosInf, NegInf, NaN):
self.assertRaises(ValueError, json.dumps, f, allow_nan=False)
+ for s in ('Infinity', '-Infinity', 'NaN'):
+ self.assertRaises(ValueError, json.loads, s, allow_nan=False)
+ self.assertRaises(ValueError, json.loads, s)
def test_floats(self):
for num in [1617161771.7650001, math.pi, math.pi**100,
diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py
index c6c53b8..1f54483 100644
--- a/simplejson/tests/test_scanstring.py
+++ b/simplejson/tests/test_scanstring.py
@@ -132,7 +132,9 @@ class TestScanString(TestCase):
self.assertRaises(ValueError,
scanstring, '\\ud834\\x0123"', 0, None, True)
- self.assertRaises(json.JSONDecodeError, scanstring, "\\u-123", 0, None, True)
+ self.assertRaises(json.JSONDecodeError, scanstring, '\\u-123"', 0, None, True)
+ # SJ-PT-23-01: Invalid Handling of Broken Unicode Escape Sequences
+ self.assertRaises(json.JSONDecodeError, scanstring, '\\u EDD"', 0, None, True)
def test_issue3623(self):
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,