Merge pull request #313 from simplejson/audit-fixesv3.19.0

Implement recommended fixes from OSTIF audit
author: Bob Ippolito <bob@redivi.com> 2023-04-06 10:26:00 -0700
committer: GitHub <noreply@github.com> 2023-04-06 10:26:00 -0700
commit: 1a4995dca71e02957d81becd7a053c9b4f19aceb (patch)
tree: d97740dcd5e2185c30ab20550fe35558806dfac0
parent: 9559fc756deaf20b6bae961b58c5289d8582c8b7 (diff)
parent: ec4a3d5c7299b16a9bf4d431fa16f466cc453697 (diff)
download: simplejson-1a4995dca71e02957d81becd7a053c9b4f19aceb.tar.gz
12 files changed, 252 insertions, 160 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index d2f7a94..c3e176c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,31 @@
+Version 3.19.0 released 2023-04-06
+
+* This release contains security hardening measures based on recommendations
+  by a security audit sponsored by OSTIF and conducted by X41 D-Sec GmbH.
+  Several of these measures include changing defaults to be more strict,
+  by default simplejson will now only consume and produce compliant JSON,
+  but the flags still exist for any backwards compatibility needs.
+  No high priority issues were discovered, the reference count
+  leak is thought to be unreachable since the digits of the float are
+  checked before PyOS_string_to_double is called.
+  A link to the public version of this report will be included in a
+  future release of simplejson. The following fixes were implemented in
+  one PR: https://github.com/simplejson/simplejson/pull/313
+* Fix invalid handling of unicode escape sequences in the pure Python
+  implementation of the decoder (SJ-PT-23-01)
+* Fix missing reference count decrease if PyOS_string_to_double raises
+  an exception in Python 2.x; was probably unreachable (SJ-PT-23-02)
+* Backport the integer string length limitation from Python 3.11 to
+  limit quadratic number parsing (SJ-PT-23-03)
+* Fix inconsistencies with error messages between the C and Python
+  implementations (SJ-PT-23-100)
+* Remove unused unichr import from encoder (SJ-PT-23-101)
+* Remove unused namedtuple_as_object and tuple_as_array arguments from
+  simplejson.load (SJ-PT-23-102)
+* Remove vestigial _one_shot code from iterencode (SJ-PT-23-103)
+* Change default of allow_nan from True to False and add allow_nan
+  to decoder (SJ-PT-23-107)
+
 Version 3.18.4 released 2023-03-14
 
 * Test the sdist to prevent future regressions
diff --git a/conf.py b/conf.py
index 921bbef..5a2dded 100644
--- a/conf.py
+++ b/conf.py
@@ -42,9 +42,9 @@ copyright = '2023, Bob Ippolito'
 # other places throughout the built documents.
 #
 # The short X.Y version.
-version = '3.18'
+version = '3.19'
 # The full version, including alpha/beta/rc tags.
-release = '3.18.4'
+release = '3.19.0'
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
diff --git a/index.rst b/index.rst
index fbb52b5..f57d650 100644
--- a/index.rst
+++ b/index.rst
@@ -160,7 +160,7 @@ Basic Usage
 -----------
 
 .. function:: dump(obj, fp, skipkeys=False, ensure_ascii=True, \
-                   check_circular=True, allow_nan=True, cls=None, \
+                   check_circular=True, allow_nan=False, cls=None, \
                    indent=None, separators=None, encoding='utf-8', \
                    default=None, use_decimal=True, \
                    namedtuple_as_object=True, tuple_as_array=True, \
@@ -191,7 +191,7 @@ Basic Usage
         is highly optimized.
 
 .. function:: dumps(obj, skipkeys=False, ensure_ascii=True, \
-                    check_circular=True, allow_nan=True, cls=None, \
+                    check_circular=True, allow_nan=False, cls=None, \
                     indent=None, separators=None, encoding='utf-8', \
                     default=None, use_decimal=True, \
                     namedtuple_as_object=True, tuple_as_array=True, \
@@ -225,13 +225,17 @@ Basic Usage
     reference check for container types will be skipped and a circular
     reference will result in an :exc:`OverflowError` (or worse).
 
-    If *allow_nan* is false (default: ``True``), then it will be a
+    If *allow_nan* is false (default: ``False``), then it will be a
     :exc:`ValueError` to serialize out of range :class:`float` values
     (``nan``, ``inf``, ``-inf``) in strict compliance of the original
     JSON specification. If *allow_nan* is true, their JavaScript equivalents
     will be used (``NaN``, ``Infinity``, ``-Infinity``). See also *ignore_nan*
     for ECMA-262 compliant behavior.
 
+    .. versionchanged:: 3.19.0
+        The default for *allow_nan* was changed to False for better spec
+        compliance.
+
     If *indent* is a string, then JSON array elements and object members
     will be pretty-printed with a newline followed by that string repeated
     for each level of nesting. ``None`` (the default) selects the most compact
@@ -324,7 +328,7 @@ Basic Usage
 .. function:: load(fp, encoding='utf-8', cls=None, object_hook=None, \
                    parse_float=None, parse_int=None, \
                    parse_constant=None, object_pairs_hook=None, \
-                   use_decimal=None, **kw)
+                   use_decimal=None, allow_nan=False, **kw)
 
    Deserialize *fp* (a ``.read()``-supporting file-like object containing a JSON
    document) to a Python object using this
@@ -367,7 +371,7 @@ Basic Usage
 .. function:: loads(s, encoding='utf-8', cls=None, object_hook=None, \
                     parse_float=None, parse_int=None, \
                     parse_constant=None, object_pairs_hook=None, \
-                    use_decimal=None, **kw)
+                    use_decimal=None, allow_nan=False, **kw)
 
     Deserialize *s* (a :class:`str` or :class:`unicode` instance containing a JSON
     document) to a Python object. :exc:`JSONDecodeError` will be
@@ -412,9 +416,12 @@ Basic Usage
     be used to use another datatype or parser for JSON integers
     (e.g. :class:`float`).
 
-    *parse_constant*, if specified, will be called with one of the following
-    strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This can be used to
-    raise an exception if invalid JSON numbers are encountered.
+    .. versionchanged:: 3.19.0
+        The integer to string conversion length limitation introduced in
+        Python 3.11 has been backported. An attempt to parse an integer
+        with more than 4300 digits will result in an exception unless a
+        suitable alternative parser is specified
+        (e.g. :class:`decimal.Decimal`)
 
     If *use_decimal* is true (default: ``False``) then *parse_float* is set to
     :class:`decimal.Decimal`. This is a convenience for parity with the
@@ -436,12 +443,28 @@ Basic Usage
         Subclassing is not recommended. You should use *object_hook* or
         *object_pairs_hook*. This is faster and more portable than subclassing.
 
+
+    *allow_nan*, if True (default false), will allow the parser to
+    accept the non-standard floats
+    ``NaN``, ``Infinity``, and ``-Infinity``.
+
+    .. versionchanged:: 3.19.0
+
+        This argument was added to make it possible to use the legacy behavior
+        now that the parser is more strict about compliance to the standard.
+
+    *parse_constant*, if specified, will be
+    called with one of the following strings: ``'-Infinity'``,
+    ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+    as it is rare to parse non-compliant JSON containing these values.
+
+
 Encoders and decoders
 ---------------------
 
 .. class:: JSONDecoder(encoding='utf-8', object_hook=None, parse_float=None, \
                        parse_int=None, parse_constant=None, \
-                       object_pairs_hook=None, strict=True)
+                       object_pairs_hook=None, strict=True, allow_nan=False)
 
    Simple JSON decoder.
 
@@ -469,7 +492,8 @@ Encoders and decoders
    | null          | None      | None      |
    +---------------+-----------+-----------+
 
-   It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their
+   When *allow_nan* is True, it also understands
+   ``NaN``, ``Infinity``, and ``-Infinity`` as their
    corresponding ``float`` values, which is outside the JSON spec.
 
    *encoding* determines the encoding used to interpret any :class:`str` objects
@@ -502,15 +526,31 @@ Encoders and decoders
    be used to use another datatype or parser for JSON integers
    (e.g. :class:`float`).
 
-   *parse_constant*, if specified, will be called with one of the following
-   strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This can be used to
-   raise an exception if invalid JSON numbers are encountered.
+    .. versionchanged:: 3.19.0
+        The integer to string conversion length limitation introduced in
+        Python 3.11 has been backported. An attempt to parse an integer
+        with more than 4300 digits will result in an exception unless a
+        suitable alternative parser is specified
+        (e.g. :class:`decimal.Decimal`)
+
+    *parse_constant*, if specified, will be
+    called with one of the following strings: ``'-Infinity'``,
+    ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+    as it is rare to parse non-compliant JSON containing these values.
 
    *strict* controls the parser's behavior when it encounters an invalid
    control character in a string. The default setting of ``True`` means that
    unescaped control characters are parse errors, if ``False`` then control
    characters will be allowed in strings.
 
+   *allow_nan* when True (not the default), the decoder will allow
+   ``NaN``, ``Infinity``, and ``-Infinity`` as their corresponding floats.
+
+    .. versionchanged:: 3.19.0
+        This argument was added to make it behave closer to the spec by
+        default. The previous behavior can be restored by setting this to
+        False.
+
    .. method:: decode(s)
 
       Return the Python representation of the JSON document *s*. See
@@ -532,7 +572,7 @@ Encoders and decoders
       document is not valid.
 
 .. class:: JSONEncoder(skipkeys=False, ensure_ascii=True, \
-                       check_circular=True, allow_nan=True, sort_keys=False, \
+                       check_circular=True, allow_nan=False, sort_keys=False, \
                        indent=None, separators=None, encoding='utf-8', \
                        default=None, use_decimal=True, \
                        namedtuple_as_object=True, tuple_as_array=True, \
@@ -573,7 +613,8 @@ Encoders and decoders
       wrapped in another type with an appropriate `for_json` method to
       transform the keys during encoding.
 
-   It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their
+   When *allow_nan* is True, it also understands
+   ``NaN``, ``Infinity``, and ``-Infinity`` as their
    corresponding ``float`` values, which is outside the JSON spec.
 
    To extend this to recognize other objects, subclass and implement a
@@ -599,12 +640,16 @@ Encoders and decoders
    prevent an infinite recursion (which would cause an :exc:`OverflowError`).
    Otherwise, no such check takes place.
 
-   If *allow_nan* is true (the default), then ``NaN``, ``Infinity``, and
+   If *allow_nan* is true (not the default), then ``NaN``, ``Infinity``, and
    ``-Infinity`` will be encoded as such. This behavior is not JSON
-   specification compliant, but is consistent with most JavaScript based
-   encoders and decoders.  Otherwise, it will be a :exc:`ValueError` to encode
+   specification compliant. Otherwise, it will be a :exc:`ValueError` to encode
    such floats. See also *ignore_nan* for ECMA-262 compliant behavior.
 
+    .. versionchanged:: 3.19.0
+        This default is now False to make it behave closer to the spec.
+        The previous behavior can be restored by setting this to
+        False.
+
    If *sort_keys* is true (not the default), then the output of dictionaries
    will be sorted by key; this is useful for regression tests to ensure that
    JSON serializations can be compared on a day-to-day basis.
@@ -716,7 +761,7 @@ Encoders and decoders
       :meth:`iterencode`.
 
 .. class:: JSONEncoderForHTML(skipkeys=False, ensure_ascii=True, \
-                              check_circular=True, allow_nan=True, \
+                              check_circular=True, allow_nan=False, \
                               sort_keys=False, indent=None, separators=None, \
                               encoding='utf-8', \
                               default=None, use_decimal=True, \
@@ -826,22 +871,28 @@ Infinite and NaN Number Values
 
 The RFC does not permit the representation of infinite or NaN number values.
 Despite that, by default, this module accepts and outputs ``Infinity``,
-``-Infinity``, and ``NaN`` as if they were valid JSON number literal values::
+``-Infinity``, and ``NaN`` as if they were valid JSON number literal values
+if the allow_nan flag is enabled::
 
    >>> # Neither of these calls raises an exception, but the results are not valid JSON
-   >>> json.dumps(float('-inf'))
+   >>> json.dumps(float('-inf'), allow_nan=True)
    '-Infinity'
-   >>> json.dumps(float('nan'))
+   >>> json.dumps(float('nan'), allow_nan=True)
    'NaN'
    >>> # Same when deserializing
-   >>> json.loads('-Infinity')
+   >>> json.loads('-Infinity', allow_nan=True)
    -inf
-   >>> json.loads('NaN')
+   >>> json.loads('NaN', allow_nan=True)
    nan
+   >>> # ignore_nan uses the ECMA-262 behavior to serialize these as null
+   >>> json.dumps(float('-inf'), ignore_nan=True)
+   'null'
+   >>> json.dumps(float('nan'), ignore_nan=True)
+   'null'
 
 In the serializer, the *allow_nan* parameter can be used to alter this
-behavior.  In the deserializer, the *parse_constant* parameter can be used to
-alter this behavior.
+behavior. In the deserializer, the *allow_nan* and
+*parse_constant* parameters can be used to alter this behavior.
 
 
 Repeated Names Within an Object
diff --git a/simplejson/__init__.py b/simplejson/__init__.py
index 47e49a3..206e22d 100644
--- a/simplejson/__init__.py
+++ b/simplejson/__init__.py
@@ -118,7 +118,7 @@ Serializing multiple objects to JSON lines (newline-delimited JSON)::
 
 """
 from __future__ import absolute_import
-__version__ = '3.18.4'
+__version__ = '3.19.0'
 __all__ = [
     'dump', 'dumps', 'load', 'loads',
     'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
@@ -149,28 +149,10 @@ def _import_c_make_encoder():
     except ImportError:
         return None
 
-_default_encoder = JSONEncoder(
-    skipkeys=False,
-    ensure_ascii=True,
-    check_circular=True,
-    allow_nan=True,
-    indent=None,
-    separators=None,
-    encoding='utf-8',
-    default=None,
-    use_decimal=True,
-    namedtuple_as_object=True,
-    tuple_as_array=True,
-    iterable_as_array=False,
-    bigint_as_string=False,
-    item_sort_key=None,
-    for_json=False,
-    ignore_nan=False,
-    int_as_string_bitcount=None,
-)
+_default_encoder = JSONEncoder()
 
 def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
-         allow_nan=True, cls=None, indent=None, separators=None,
+         allow_nan=False, cls=None, indent=None, separators=None,
          encoding='utf-8', default=None, use_decimal=True,
          namedtuple_as_object=True, tuple_as_array=True,
          bigint_as_string=False, sort_keys=False, item_sort_key=None,
@@ -187,10 +169,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
     contain non-ASCII characters, so long as they do not need to be escaped
     by JSON. When it is true, all non-ASCII characters are escaped.
 
-    If *allow_nan* is false, then it will be a ``ValueError`` to
-    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
-    in strict compliance of the original JSON specification, instead of using
-    the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
+    If *allow_nan* is true (default: ``False``), then out of range ``float``
+    values (``nan``, ``inf``, ``-inf``) will be serialized to
+    their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
+    instead of raising a ValueError. See
     *ignore_nan* for ECMA-262 compliant behavior.
 
     If *indent* is a string, then JSON array elements and object members
@@ -258,7 +240,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
     """
     # cached encoder
     if (not skipkeys and ensure_ascii and
-        check_circular and allow_nan and
+        check_circular and not allow_nan and
         cls is None and indent is None and separators is None and
         encoding == 'utf-8' and default is None and use_decimal
         and namedtuple_as_object and tuple_as_array and not iterable_as_array
@@ -292,7 +274,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
 
 
 def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
-          allow_nan=True, cls=None, indent=None, separators=None,
+          allow_nan=False, cls=None, indent=None, separators=None,
           encoding='utf-8', default=None, use_decimal=True,
           namedtuple_as_object=True, tuple_as_array=True,
           bigint_as_string=False, sort_keys=False, item_sort_key=None,
@@ -312,10 +294,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
     for container types will be skipped and a circular reference will
     result in an ``OverflowError`` (or worse).
 
-    If ``allow_nan`` is false, then it will be a ``ValueError`` to
-    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
-    strict compliance of the JSON specification, instead of using the
-    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+    If *allow_nan* is true (default: ``False``), then out of range ``float``
+    values (``nan``, ``inf``, ``-inf``) will be serialized to
+    their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
+    instead of raising a ValueError. See
+    *ignore_nan* for ECMA-262 compliant behavior.
 
     If ``indent`` is a string, then JSON array elements and object members
     will be pretty-printed with a newline followed by that string repeated
@@ -383,7 +366,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
     """
     # cached encoder
     if (not skipkeys and ensure_ascii and
-        check_circular and allow_nan and
+        check_circular and not allow_nan and
         cls is None and indent is None and separators is None and
         encoding == 'utf-8' and default is None and use_decimal
         and namedtuple_as_object and tuple_as_array and not iterable_as_array
@@ -412,14 +395,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
         **kw).encode(obj)
 
 
-_default_decoder = JSONDecoder(encoding=None, object_hook=None,
-                               object_pairs_hook=None)
+_default_decoder = JSONDecoder()
 
 
 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
         parse_int=None, parse_constant=None, object_pairs_hook=None,
-        use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
-        **kw):
+        use_decimal=False, allow_nan=False, **kw):
     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
     a JSON document as `str` or `bytes`) to a Python object.
 
@@ -442,23 +423,27 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
     takes priority.
 
     *parse_float*, if specified, will be called with the string of every
-    JSON float to be decoded.  By default, this is equivalent to
+    JSON float to be decoded. By default, this is equivalent to
     ``float(num_str)``. This can be used to use another datatype or parser
     for JSON floats (e.g. :class:`decimal.Decimal`).
 
     *parse_int*, if specified, will be called with the string of every
-    JSON int to be decoded.  By default, this is equivalent to
+    JSON int to be decoded. By default, this is equivalent to
     ``int(num_str)``.  This can be used to use another datatype or parser
     for JSON integers (e.g. :class:`float`).
 
-    *parse_constant*, if specified, will be called with one of the
-    following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
-    can be used to raise an exception if invalid JSON numbers are
-    encountered.
+    *allow_nan*, if True (default false), will allow the parser to
+    accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
+    and enable the use of the deprecated *parse_constant*.
 
     If *use_decimal* is true (default: ``False``) then it implies
     parse_float=decimal.Decimal for parity with ``dump``.
 
+    *parse_constant*, if specified, will be
+    called with one of the following strings: ``'-Infinity'``,
+    ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+    as it is rare to parse non-compliant JSON containing these values.
+
     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
     kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
     of subclassing whenever possible.
@@ -468,12 +453,12 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
         encoding=encoding, cls=cls, object_hook=object_hook,
         parse_float=parse_float, parse_int=parse_int,
         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
-        use_decimal=use_decimal, **kw)
+        use_decimal=use_decimal, allow_nan=allow_nan, **kw)
 
 
 def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
         parse_int=None, parse_constant=None, object_pairs_hook=None,
-        use_decimal=False, **kw):
+        use_decimal=False, allow_nan=False, **kw):
     """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
     document) to a Python object.
 
@@ -505,14 +490,18 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
     ``int(num_str)``.  This can be used to use another datatype or parser
     for JSON integers (e.g. :class:`float`).
 
-    *parse_constant*, if specified, will be called with one of the
-    following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
-    can be used to raise an exception if invalid JSON numbers are
-    encountered.
+    *allow_nan*, if True (default false), will allow the parser to
+    accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
+    and enable the use of the deprecated *parse_constant*.
 
     If *use_decimal* is true (default: ``False``) then it implies
     parse_float=decimal.Decimal for parity with ``dump``.
 
+    *parse_constant*, if specified, will be
+    called with one of the following strings: ``'-Infinity'``,
+    ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+    as it is rare to parse non-compliant JSON containing these values.
+
     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
     kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
     of subclassing whenever possible.
@@ -521,7 +510,7 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
     if (cls is None and encoding is None and object_hook is None and
             parse_int is None and parse_float is None and
             parse_constant is None and object_pairs_hook is None
-            and not use_decimal and not kw):
+            and not use_decimal and not allow_nan and not kw):
         return _default_decoder.decode(s)
     if cls is None:
         cls = JSONDecoder
@@ -539,6 +528,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
         if parse_float is not None:
             raise TypeError("use_decimal=True implies parse_float=Decimal")
         kw['parse_float'] = Decimal
+    if allow_nan:
+        kw['allow_nan'] = True
     return cls(encoding=encoding, **kw).decode(s)
 
 
@@ -560,22 +551,9 @@ def _toggle_speedups(enabled):
         scan.make_scanner = scan.py_make_scanner
     dec.make_scanner = scan.make_scanner
     global _default_decoder
-    _default_decoder = JSONDecoder(
-        encoding=None,
-        object_hook=None,
-        object_pairs_hook=None,
-    )
+    _default_decoder = JSONDecoder()
     global _default_encoder
-    _default_encoder = JSONEncoder(
-       skipkeys=False,
-       ensure_ascii=True,
-       check_circular=True,
-       allow_nan=True,
-       indent=None,
-       separators=None,
-       encoding='utf-8',
-       default=None,
-   )
+    _default_encoder = JSONEncoder()
 
 def simple_first(kv):
     """Helper function to pass to item_sort_key to sort simple
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
index ec054c7..bd56b4d 100644
--- a/simplejson/_speedups.c
+++ b/simplejson/_speedups.c
@@ -1843,7 +1843,7 @@ bail:
 }
 
 static PyObject *
-_parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+_parse_constant(PyScannerObject *s, PyObject *pystr, PyObject *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
 {
     /* Read a JSON constant from PyString pystr.
     constant is the Python string that was found
@@ -1855,6 +1855,10 @@ _parse_constant(PyScannerObject *s, PyObject *constant, Py_ssize_t idx, Py_ssize
     Returns the result of parse_constant
     */
     PyObject *rval;
+    if (s->parse_constant == Py_None) {
+        raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+        return NULL;
+    }
 
     /* rval = parse_constant(constant) */
     rval = PyObject_CallOneArg(s->parse_constant, constant);
@@ -1886,7 +1890,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
     /* read a sign if it's there, make sure it's not the end of the string */
     if (str[idx] == '-') {
         if (idx >= end_idx) {
-            raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+            raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
             return NULL;
         }
         idx++;
@@ -1903,7 +1907,7 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
     }
     /* no integer digits, error */
     else {
-        raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+        raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
         return NULL;
     }
 
@@ -1949,8 +1953,10 @@ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz
             /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
             double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
                                              NULL, NULL);
-            if (d == -1.0 && PyErr_Occurred())
+            if (d == -1.0 && PyErr_Occurred()) {
+                Py_DECREF(numstr);
                 return NULL;
+            }
             rval = PyFloat_FromDouble(d);
         }
     }
@@ -1993,7 +1999,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
     /* read a sign if it's there, make sure it's not the end of the string */
     if (PyUnicode_READ(kind, str, idx) == '-') {
         if (idx >= end_idx) {
-            raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+            raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
             return NULL;
         }
         idx++;
@@ -2013,7 +2019,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
     }
     else {
         /* no integer digits, error */
-        raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+        raise_errmsg(ERR_EXPECTING_VALUE, pystr, start);
         return NULL;
     }
 
@@ -2156,7 +2162,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
         case 'N':
             /* NaN */
             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
-                rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr);
+                rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr);
             }
             else
                 fallthrough = 1;
@@ -2164,7 +2170,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
         case 'I':
             /* Infinity */
             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
-                rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr);
+                rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr);
             }
             else
                 fallthrough = 1;
@@ -2172,7 +2178,7 @@ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n
         case '-':
             /* -Infinity */
             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
-                rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr);
+                rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr);
             }
             else
                 fallthrough = 1;
@@ -2275,7 +2281,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
             if ((idx + 2 < length) &&
                 PyUnicode_READ(kind, str, idx + 1) == 'a' &&
                 PyUnicode_READ(kind, str, idx + 2) == 'N') {
-                rval = _parse_constant(s, JSON_NaN, idx, next_idx_ptr);
+                rval = _parse_constant(s, pystr, JSON_NaN, idx, next_idx_ptr);
             }
             else
                 fallthrough = 1;
@@ -2290,7 +2296,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
                 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
                 PyUnicode_READ(kind, str, idx + 6) == 't' &&
                 PyUnicode_READ(kind, str, idx + 7) == 'y') {
-                rval = _parse_constant(s, JSON_Infinity, idx, next_idx_ptr);
+                rval = _parse_constant(s, pystr, JSON_Infinity, idx, next_idx_ptr);
             }
             else
                 fallthrough = 1;
@@ -2306,7 +2312,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
                 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
                 PyUnicode_READ(kind, str, idx + 7) == 't' &&
                 PyUnicode_READ(kind, str, idx + 8) == 'y') {
-                rval = _parse_constant(s, JSON_NegInfinity, idx, next_idx_ptr);
+                rval = _parse_constant(s, pystr, JSON_NegInfinity, idx, next_idx_ptr);
             }
             else
                 fallthrough = 1;
diff --git a/simplejson/decoder.py b/simplejson/decoder.py
index 1a8f772..c99a976 100644
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -46,9 +46,35 @@ BACKSLASH = {
 
 DEFAULT_ENCODING = "utf-8"
 
+if hasattr(sys, 'get_int_max_str_digits'):
+    bounded_int = int
+else:
+    def bounded_int(s, INT_MAX_STR_DIGITS=4300):
+        """Backport of the integer string length conversion limitation
+
+        https://docs.python.org/3/library/stdtypes.html#int-max-str-digits
+        """
+        if len(s) > INT_MAX_STR_DIGITS:
+            raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s)))
+        return int(s)
+
+
+def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match):
+    """Scan a four digit hex number from s[end:end + 4]
+    """
+    msg = "Invalid \\uXXXX escape sequence"
+    esc = s[end:end + 4]
+    if not _m(esc):
+        raise JSONDecodeError(msg, s, end - 2)
+    try:
+        return int(esc, 16), end + 4
+    except ValueError:
+        raise JSONDecodeError(msg, s, end - 2)
+
 def py_scanstring(s, end, encoding=None, strict=True,
         _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
-        _PY3=PY3, _maxunicode=sys.maxunicode):
+        _PY3=PY3, _maxunicode=sys.maxunicode,
+        _scan_four_digit_hex=scan_four_digit_hex):
     """Scan the string s for a JSON string. End is the index of the
     character in s after the quote that started the JSON string.
     Unescapes all valid JSON string escape sequences and raises ValueError
@@ -67,6 +93,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
         if chunk is None:
             raise JSONDecodeError(
                 "Unterminated string starting at", s, begin)
+        prev_end = end
         end = chunk.end()
         content, terminator = chunk.groups()
         # Content is contains zero or more unescaped string characters
@@ -81,7 +108,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
         elif terminator != '\\':
             if strict:
                 msg = "Invalid control character %r at"
-                raise JSONDecodeError(msg, s, end)
+                raise JSONDecodeError(msg, s, prev_end)
             else:
                 _append(terminator)
                 continue
@@ -100,35 +127,18 @@ def py_scanstring(s, end, encoding=None, strict=True,
             end += 1
         else:
             # Unicode escape sequence
-            msg = "Invalid \\uXXXX escape sequence"
-            esc = s[end + 1:end + 5]
-            escX = esc[1:2]
-            if len(esc) != 4 or escX == 'x' or escX == 'X':
-                raise JSONDecodeError(msg, s, end - 1)
-            try:
-                uni = int(esc, 16)
-            except ValueError:
-                raise JSONDecodeError(msg, s, end - 1)
-            if uni < 0 or uni > _maxunicode:
-                raise JSONDecodeError(msg, s, end - 1)
-            end += 5
+            uni, end = _scan_four_digit_hex(s, end + 1)
             # Check for surrogate pair on UCS-4 systems
             # Note that this will join high/low surrogate pairs
             # but will also pass unpaired surrogates through
             if (_maxunicode > 65535 and
                 uni & 0xfc00 == 0xd800 and
                 s[end:end + 2] == '\\u'):
-                esc2 = s[end + 2:end + 6]
-                escX = esc2[1:2]
-                if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
-                    try:
-                        uni2 = int(esc2, 16)
-                    except ValueError:
-                        raise JSONDecodeError(msg, s, end)
-                    if uni2 & 0xfc00 == 0xdc00:
-                        uni = 0x10000 + (((uni - 0xd800) << 10) |
-                                         (uni2 - 0xdc00))
-                        end += 6
+                uni2, end2 = _scan_four_digit_hex(s, end + 2)
+                if uni2 & 0xfc00 == 0xdc00:
+                    uni = 0x10000 + (((uni - 0xd800) << 10) |
+                                        (uni2 - 0xdc00))
+                    end = end2
             char = unichr(uni)
         # Append the unescaped character
         _append(char)
@@ -169,7 +179,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
             return pairs, end + 1
         elif nextchar != '"':
             raise JSONDecodeError(
-                "Expecting property name enclosed in double quotes",
+                "Expecting property name enclosed in double quotes or '}'",
                 s, end)
     end += 1
     while True:
@@ -296,14 +306,15 @@ class JSONDecoder(object):
     | null          | None              |
     +---------------+-------------------+
 
-    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
+    When allow_nan=True, it also understands
+    ``NaN``, ``Infinity``, and ``-Infinity`` as
     their corresponding ``float`` values, which is outside the JSON spec.
 
     """
 
     def __init__(self, encoding=None, object_hook=None, parse_float=None,
             parse_int=None, parse_constant=None, strict=True,
-            object_pairs_hook=None):
+            object_pairs_hook=None, allow_nan=False):
         """
         *encoding* determines the encoding used to interpret any
         :class:`str` objects decoded by this instance (``'utf-8'`` by
@@ -336,10 +347,13 @@ class JSONDecoder(object):
         ``int(num_str)``.  This can be used to use another datatype or parser
         for JSON integers (e.g. :class:`float`).
 
-        *parse_constant*, if specified, will be called with one of the
-        following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
-        can be used to raise an exception if invalid JSON numbers are
-        encountered.
+        *allow_nan*, if True (default false), will allow the parser to
+        accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``.
+
+        *parse_constant*, if specified, will be
+        called with one of the following strings: ``'-Infinity'``,
+        ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
+        as it is rare to parse non-compliant JSON containing these values.
 
         *strict* controls the parser's behavior when it encounters an
         invalid control character in a string. The default setting of
@@ -353,8 +367,8 @@ class JSONDecoder(object):
         self.object_hook = object_hook
         self.object_pairs_hook = object_pairs_hook
         self.parse_float = parse_float or float
-        self.parse_int = parse_int or int
-        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
+        self.parse_int = parse_int or bounded_int
+        self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None)
         self.strict = strict
         self.parse_object = JSONObject
         self.parse_array = JSONArray
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index e93fe43..661ff36 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -5,7 +5,7 @@ import re
 from operator import itemgetter
 # Do not import Decimal directly to avoid reload issues
 import decimal
-from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3
+from .compat import binary_type, text_type, string_types, integer_types, PY3
 def _import_speedups():
     try:
         from . import _speedups
@@ -140,7 +140,7 @@ class JSONEncoder(object):
     key_separator = ': '
 
     def __init__(self, skipkeys=False, ensure_ascii=True,
-                 check_circular=True, allow_nan=True, sort_keys=False,
+                 check_circular=True, allow_nan=False, sort_keys=False,
                  indent=None, separators=None, encoding='utf-8', default=None,
                  use_decimal=True, namedtuple_as_object=True,
                  tuple_as_array=True, bigint_as_string=False,
@@ -161,10 +161,11 @@ class JSONEncoder(object):
         prevent an infinite recursion (which would cause an OverflowError).
         Otherwise, no such check takes place.
 
-        If allow_nan is true, then NaN, Infinity, and -Infinity will be
-        encoded as such.  This behavior is not JSON specification compliant,
-        but is consistent with most JavaScript based encoders and decoders.
-        Otherwise, it will be a ValueError to encode such floats.
+        If allow_nan is true (default: False), then out of range float
+        values (nan, inf, -inf) will be serialized to
+        their JavaScript equivalents (NaN, Infinity, -Infinity)
+        instead of raising a ValueError. See
+        ignore_nan for ECMA-262 compliant behavior.
 
         If sort_keys is true, then the output of dictionaries will be
         sorted by key; this is useful for regression tests to ensure
@@ -294,7 +295,7 @@ class JSONEncoder(object):
         # This doesn't pass the iterator directly to ''.join() because the
         # exceptions aren't as detailed.  The list call should be roughly
         # equivalent to the PySequence_Fast that ''.join() would do.
-        chunks = self.iterencode(o, _one_shot=True)
+        chunks = self.iterencode(o)
         if not isinstance(chunks, (list, tuple)):
             chunks = list(chunks)
         if self.ensure_ascii:
@@ -302,7 +303,7 @@ class JSONEncoder(object):
         else:
             return u''.join(chunks)
 
-    def iterencode(self, o, _one_shot=False):
+    def iterencode(self, o):
         """Encode the given object and yield each string
         representation as available.
 
@@ -356,8 +357,7 @@ class JSONEncoder(object):
         key_memo = {}
         int_as_string_bitcount = (
             53 if self.bigint_as_string else self.int_as_string_bitcount)
-        if (_one_shot and c_make_encoder is not None
-                and self.indent is None):
+        if (c_make_encoder is not None and self.indent is None):
             _iterencode = c_make_encoder(
                 markers, self.default, _encoder, self.indent,
                 self.key_separator, self.item_separator, self.sort_keys,
@@ -370,7 +370,7 @@ class JSONEncoder(object):
             _iterencode = _make_iterencode(
                 markers, self.default, _encoder, self.indent, floatstr,
                 self.key_separator, self.item_separator, self.sort_keys,
-                self.skipkeys, _one_shot, self.use_decimal,
+                self.skipkeys, self.use_decimal,
                 self.namedtuple_as_object, self.tuple_as_array,
                 int_as_string_bitcount,
                 self.item_sort_key, self.encoding, self.for_json,
@@ -398,14 +398,14 @@ class JSONEncoderForHTML(JSONEncoder):
     def encode(self, o):
         # Override JSONEncoder.encode because it has hacks for
         # performance that make things more complicated.
-        chunks = self.iterencode(o, True)
+        chunks = self.iterencode(o)
         if self.ensure_ascii:
             return ''.join(chunks)
         else:
             return u''.join(chunks)
 
-    def iterencode(self, o, _one_shot=False):
-        chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
+    def iterencode(self, o):
+        chunks = super(JSONEncoderForHTML, self).iterencode(o)
         for chunk in chunks:
             chunk = chunk.replace('&', '\\u0026')
             chunk = chunk.replace('<', '\\u003c')
@@ -419,7 +419,7 @@ class JSONEncoderForHTML(JSONEncoder):
 
 
 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
-        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
+        _key_separator, _item_separator, _sort_keys, _skipkeys,
         _use_decimal, _namedtuple_as_object, _tuple_as_array,
         _int_as_string_bitcount, _item_sort_key,
         _encoding,_for_json,
diff --git a/simplejson/scanner.py b/simplejson/scanner.py
index 85e385e..34710d6 100644
--- a/simplejson/scanner.py
+++ b/simplejson/scanner.py
@@ -60,11 +60,11 @@ def py_make_scanner(context):
             else:
                 res = parse_int(integer)
             return res, m.end()
-        elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
+        elif parse_constant and nextchar == 'N' and string[idx:idx + 3] == 'NaN':
             return parse_constant('NaN'), idx + 3
-        elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
+        elif parse_constant and nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
             return parse_constant('Infinity'), idx + 8
-        elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
+        elif parse_constant and nextchar == '-' and string[idx:idx + 9] == '-Infinity':
             return parse_constant('-Infinity'), idx + 9
         else:
             raise JSONDecodeError(errmsg, string, idx)
diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py
index 6960ee5..317b4f9 100644
--- a/simplejson/tests/test_decode.py
+++ b/simplejson/tests/test_decode.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import
 import decimal
 from unittest import TestCase
 
+import sys
 import simplejson as json
 from simplejson.compat import StringIO, b, binary_type
 from simplejson import OrderedDict
@@ -117,3 +118,10 @@ class TestDecode(TestCase):
         diff = id(x) - id(y)
         self.assertRaises(ValueError, j.scan_once, y, diff)
         self.assertRaises(ValueError, j.raw_decode, y, i)
+
+    def test_bounded_int(self):
+        # SJ-PT-23-03, limit quadratic number parsing per Python 3.11
+        max_str_digits = getattr(sys, 'get_int_max_str_digits', lambda: 4300)()
+        s = '1' + '0' * (max_str_digits - 1)
+        self.assertEqual(json.loads(s), int(s))
+        self.assertRaises(ValueError, json.loads, s + '0')
diff --git a/simplejson/tests/test_fail.py b/simplejson/tests/test_fail.py
index 788f3a5..5f9a8f6 100644
--- a/simplejson/tests/test_fail.py
+++ b/simplejson/tests/test_fail.py
@@ -145,7 +145,7 @@ class TestFail(TestCase):
             ('["spam', 'Unterminated string starting at', 1),
             ('["spam"', "Expecting ',' delimiter", 7),
             ('["spam",', 'Expecting value', 8),
-            ('{', 'Expecting property name enclosed in double quotes', 1),
+            ('{', "Expecting property name enclosed in double quotes or '}'", 1),
             ('{"', 'Unterminated string starting at', 1),
             ('{"spam', 'Unterminated string starting at', 1),
             ('{"spam"', "Expecting ':' delimiter", 7),
@@ -156,6 +156,8 @@ class TestFail(TestCase):
             ('"', 'Unterminated string starting at', 0),
             ('"spam', 'Unterminated string starting at', 0),
             ('[,', "Expecting value", 1),
+            ('--', 'Expecting value', 0),
+            ('"\x18d', "Invalid control character %r", 1),
         ]
         for data, msg, idx in test_cases:
             try:
diff --git a/simplejson/tests/test_float.py b/simplejson/tests/test_float.py
index e382ec2..a977969 100644
--- a/simplejson/tests/test_float.py
+++ b/simplejson/tests/test_float.py
@@ -7,9 +7,9 @@ from simplejson.decoder import NaN, PosInf, NegInf
 class TestFloat(TestCase):
     def test_degenerates_allow(self):
         for inf in (PosInf, NegInf):
-            self.assertEqual(json.loads(json.dumps(inf)), inf)
+            self.assertEqual(json.loads(json.dumps(inf, allow_nan=True), allow_nan=True), inf)
         # Python 2.5 doesn't have math.isnan
-        nan = json.loads(json.dumps(NaN))
+        nan = json.loads(json.dumps(NaN, allow_nan=True), allow_nan=True)
         self.assertTrue((0 + nan) != nan)
 
     def test_degenerates_ignore(self):
@@ -19,6 +19,9 @@ class TestFloat(TestCase):
     def test_degenerates_deny(self):
         for f in (PosInf, NegInf, NaN):
             self.assertRaises(ValueError, json.dumps, f, allow_nan=False)
+        for s in ('Infinity', '-Infinity', 'NaN'):
+            self.assertRaises(ValueError, json.loads, s, allow_nan=False)
+            self.assertRaises(ValueError, json.loads, s)
 
     def test_floats(self):
         for num in [1617161771.7650001, math.pi, math.pi**100,
diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py
index c6c53b8..1f54483 100644
--- a/simplejson/tests/test_scanstring.py
+++ b/simplejson/tests/test_scanstring.py
@@ -132,7 +132,9 @@ class TestScanString(TestCase):
             self.assertRaises(ValueError,
                               scanstring, '\\ud834\\x0123"', 0, None, True)
 
-        self.assertRaises(json.JSONDecodeError, scanstring, "\\u-123", 0, None, True)
+        self.assertRaises(json.JSONDecodeError, scanstring, '\\u-123"', 0, None, True)
+        # SJ-PT-23-01: Invalid Handling of Broken Unicode Escape Sequences
+        self.assertRaises(json.JSONDecodeError, scanstring, '\\u EDD"', 0, None, True)
 
     def test_issue3623(self):
         self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,
author	Bob Ippolito <bob@redivi.com>	2023-04-06 10:26:00 -0700
committer	GitHub <noreply@github.com>	2023-04-06 10:26:00 -0700
commit	1a4995dca71e02957d81becd7a053c9b4f19aceb (patch)
tree	d97740dcd5e2185c30ab20550fe35558806dfac0
parent	9559fc756deaf20b6bae961b58c5289d8582c8b7 (diff)
parent	ec4a3d5c7299b16a9bf4d431fa16f466cc453697 (diff)
download	simplejson-1a4995dca71e02957d81becd7a053c9b4f19aceb.tar.gz