merge heads

author: Georg Brandl <georg@python.org> 2012-10-28 13:37:05 +0100
committer: Georg Brandl <georg@python.org> 2012-10-28 13:37:05 +0100
commit: 62fd9533cad18f89521beef054cc1cab3f0c880d (patch)
tree: d5382e5a23ace120249f510ceb57c79c7c7fbd64
parent: f6bd4d0339d6dd7eae8f94e4fbfe3c70e1d9865c (diff)
parent: 2203af1417a8a4c26457b6e28405bcca2e3736a6 (diff)
download: cpython-62fd9533cad18f89521beef054cc1cab3f0c880d.tar.gz
7 files changed, 68 insertions, 28 deletions
diff --git a/Doc/library/json.rst b/Doc/library/json.rst
index 95f120cc7f..f9547cb358 100644
--- a/Doc/library/json.rst
+++ b/Doc/library/json.rst
@@ -116,7 +116,10 @@ Using json.tool from the shell to validate and pretty-print::
 Basic Usage
 -----------
 
-.. function:: dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, **kw)
+.. function:: dump(obj, fp, skipkeys=False, ensure_ascii=True, \
+                   check_circular=True, allow_nan=True, cls=None, \
+                   indent=None, separators=None, default=None, \
+                   sort_keys=False, **kw)
 
    Serialize *obj* as a JSON formatted stream to *fp* (a ``.write()``-supporting
    :term:`file-like object`).
@@ -159,12 +162,18 @@ Basic Usage
    *default(obj)* is a function that should return a serializable version of
    *obj* or raise :exc:`TypeError`.  The default simply raises :exc:`TypeError`.
 
+   If *sort_keys* is ``True`` (default: ``False``), then the output of
+   dictionaries will be sorted by key.
+
    To use a custom :class:`JSONEncoder` subclass (e.g. one that overrides the
    :meth:`default` method to serialize additional types), specify it with the
    *cls* kwarg; otherwise :class:`JSONEncoder` is used.
 
 
-.. function:: dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, **kw)
+.. function:: dumps(obj, skipkeys=False, ensure_ascii=True, \
+                    check_circular=True, allow_nan=True, cls=None, \
+                    indent=None, separators=None, default=None, \
+                    sort_keys=False, **kw)
 
    Serialize *obj* to a JSON formatted :class:`str`.  The arguments have the
    same meaning as in :func:`dump`.
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
index 725b5cdc88..86a7a3e50a 100644
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -122,7 +122,7 @@ _default_encoder = JSONEncoder(
 
 def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
         allow_nan=True, cls=None, indent=None, separators=None,
-        default=None, **kw):
+        default=None, sort_keys=False, **kw):
     """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
     ``.write()``-supporting file-like object).
 
@@ -155,6 +155,9 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
     ``default(obj)`` is a function that should return a serializable version
     of obj or raise TypeError. The default simply raises TypeError.
 
+    If *sort_keys* is ``True`` (default: ``False``), then the output of
+    dictionaries will be sorted by key.
+
     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
     ``.default()`` method to serialize additional types), specify it with
     the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
@@ -164,7 +167,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
     if (not skipkeys and ensure_ascii and
         check_circular and allow_nan and
         cls is None and indent is None and separators is None and
-        default is None and not kw):
+        default is None and not sort_keys and not kw):
         iterable = _default_encoder.iterencode(obj)
     else:
         if cls is None:
@@ -172,7 +175,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
         iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
             check_circular=check_circular, allow_nan=allow_nan, indent=indent,
             separators=separators,
-            default=default, **kw).iterencode(obj)
+            default=default, sort_keys=sort_keys, **kw).iterencode(obj)
     # could accelerate with writelines in some versions of Python, at
     # a debuggability cost
     for chunk in iterable:
@@ -181,7 +184,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
 
 def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
         allow_nan=True, cls=None, indent=None, separators=None,
-        default=None, **kw):
+        default=None, sort_keys=False, **kw):
     """Serialize ``obj`` to a JSON formatted ``str``.
 
     If ``skipkeys`` is false then ``dict`` keys that are not basic types
@@ -213,6 +216,9 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
     ``default(obj)`` is a function that should return a serializable version
     of obj or raise TypeError. The default simply raises TypeError.
 
+    If *sort_keys* is ``True`` (default: ``False``), then the output of
+    dictionaries will be sorted by key.
+
     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
     ``.default()`` method to serialize additional types), specify it with
     the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
@@ -222,14 +228,14 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
     if (not skipkeys and ensure_ascii and
         check_circular and allow_nan and
         cls is None and indent is None and separators is None and
-        default is None and not kw):
+        default is None and not sort_keys and not kw):
         return _default_encoder.encode(obj)
     if cls is None:
         cls = JSONEncoder
     return cls(
         skipkeys=skipkeys, ensure_ascii=ensure_ascii,
         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
-        separators=separators, default=default,
+        separators=separators, default=default, sort_keys=sort_keys,
         **kw).encode(obj)
 
 
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 19b06a034d..000ae6a430 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -906,6 +906,21 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertRaises(ValueError, '{}'.format_map, 'a')
         self.assertRaises(ValueError, '{a} {}'.format_map, {"a" : 2, "b" : 1})
 
+    def test_format_huge_precision(self):
+        format_string = ".{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format(2.34, format_string)
+
+    def test_format_huge_width(self):
+        format_string = "{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format(2.34, format_string)
+
+    def test_format_huge_item_number(self):
+        format_string = "{{{}:.6f}}".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format_string.format(2.34)
+
     def test_format_auto_numbering(self):
         class C:
             def __init__(self, x=100):
@@ -990,6 +1005,18 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertEqual('%f' % INF, 'inf')
         self.assertEqual('%F' % INF, 'INF')
 
+    @support.cpython_only
+    def test_formatting_huge_precision(self):
+        from _testcapi import INT_MAX
+        format_string = "%.{}f".format(INT_MAX + 1)
+        with self.assertRaises(ValueError):
+            result = format_string % 2.34
+
+    def test_formatting_huge_width(self):
+        format_string = "%{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format_string % 2.34
+
     def test_startswith_endswith_errors(self):
         for meth in ('foo'.startswith, 'foo'.endswith):
             with self.assertRaises(TypeError) as cm:
diff --git a/Misc/NEWS b/Misc/NEWS
index 6c306d8eae..f2ba9826ad 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2.4
 Core and Builtins
 -----------------
 
+- Issue #14700: Fix buggy overflow checks when handling large precisions and
+  widths in old-style and new-style formatting.
+
 - Issue #6074: Ensure cached bytecode files can always be updated by the
   user that created them, even when the source file is read-only.
 
diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h
index 4fdc62d650..139b56cd8e 100644
--- a/Objects/stringlib/formatter.h
+++ b/Objects/stringlib/formatter.h
@@ -73,7 +73,7 @@ static int
 get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
                   Py_ssize_t *result)
 {
-    Py_ssize_t accumulator, digitval, oldaccumulator;
+    Py_ssize_t accumulator, digitval;
     int numdigits;
     accumulator = numdigits = 0;
     for (;;(*ptr)++, numdigits++) {
@@ -83,19 +83,17 @@ get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
         if (digitval < 0)
             break;
         /*
-           This trick was copied from old Unicode format code.  It's cute,
-           but would really suck on an old machine with a slow divide
-           implementation.  Fortunately, in the normal case we do not
-           expect too many digits.
+           Detect possible overflow before it happens:
+
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
         */
-        oldaccumulator = accumulator;
-        accumulator *= 10;
-        if ((accumulator+10)/10 != oldaccumulator+1) {
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
             PyErr_Format(PyExc_ValueError,
                          "Too many decimal digits in format string");
             return -1;
         }
-        accumulator += digitval;
+        accumulator = accumulator * 10 + digitval;
     }
     *result = accumulator;
     return numdigits;
diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h
index 6c7adcb01e..c46bdc2656 100644
--- a/Objects/stringlib/string_format.h
+++ b/Objects/stringlib/string_format.h
@@ -197,7 +197,6 @@ get_integer(const SubString *str)
 {
     Py_ssize_t accumulator = 0;
     Py_ssize_t digitval;
-    Py_ssize_t oldaccumulator;
     STRINGLIB_CHAR *p;
 
     /* empty string is an error */
@@ -209,19 +208,17 @@ get_integer(const SubString *str)
         if (digitval < 0)
             return -1;
         /*
-           This trick was copied from old Unicode format code.  It's cute,
-           but would really suck on an old machine with a slow divide
-           implementation.  Fortunately, in the normal case we do not
-           expect too many digits.
+           Detect possible overflow before it happens:
+
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
         */
-        oldaccumulator = accumulator;
-        accumulator *= 10;
-        if ((accumulator+10)/10 != oldaccumulator+1) {
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
             PyErr_Format(PyExc_ValueError,
                          "Too many decimal digits in format string");
             return -1;
         }
-        accumulator += digitval;
+        accumulator = accumulator * 10 + digitval;
     }
     return accumulator;
 }
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1dd3a852f2..3ef9c9bbaf 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -9648,7 +9648,7 @@ PyObject *PyUnicode_Format(PyObject *format,
                     c = *fmt++;
                     if (c < '0' || c > '9')
                         break;
-                    if ((width*10) / 10 != width) {
+                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
                         PyErr_SetString(PyExc_ValueError,
                                         "width too big");
                         goto onError;
@@ -9683,7 +9683,7 @@ PyObject *PyUnicode_Format(PyObject *format,
                         c = *fmt++;
                         if (c < '0' || c > '9')
                             break;
-                        if ((prec*10) / 10 != prec) {
+                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
                             PyErr_SetString(PyExc_ValueError,
                                             "prec too big");
                             goto onError;
author	Georg Brandl <georg@python.org>	2012-10-28 13:37:05 +0100
committer	Georg Brandl <georg@python.org>	2012-10-28 13:37:05 +0100
commit	62fd9533cad18f89521beef054cc1cab3f0c880d (patch)
tree	d5382e5a23ace120249f510ceb57c79c7c7fbd64
parent	f6bd4d0339d6dd7eae8f94e4fbfe3c70e1d9865c (diff)
parent	2203af1417a8a4c26457b6e28405bcca2e3736a6 (diff)
download	cpython-62fd9533cad18f89521beef054cc1cab3f0c880d.tar.gz