summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam Sneddon <me@gsnedders.com>2020-04-20 18:58:32 +0100
committerStefan Behnel <stefan_ml@behnel.de>2020-04-20 19:59:33 +0200
commit631475acbad73ac665e5f3fe921be5e81adbfb9d (patch)
tree50124f140afbd7825ab09d7efe2cd3799fd9ccf1
parent43517bfbd79813d2c600728c171eda62b307d751 (diff)
downloadcython-631475acbad73ac665e5f3fe921be5e81adbfb9d.tar.gz
Avoid integer overflow when decoding bytes/charptr (GH-3535)
Fixes GH-3534.
-rw-r--r--Cython/Utility/StringTools.c8
-rw-r--r--tests/run/bytesmethods.pyx16
-rw-r--r--tests/run/charptr_decode.pyx18
3 files changed, 38 insertions, 4 deletions
diff --git a/Cython/Utility/StringTools.c b/Cython/Utility/StringTools.c
index b97538f70..68315d949 100644
--- a/Cython/Utility/StringTools.c
+++ b/Cython/Utility/StringTools.c
@@ -466,9 +466,9 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
if (stop < 0)
stop += length;
}
- length = stop - start;
- if (unlikely(length <= 0))
+ if (unlikely(stop <= start))
return PyUnicode_FromUnicode(NULL, 0);
+ length = stop - start;
cstring += start;
if (decode_func) {
return decode_func(cstring, length, errors);
@@ -502,9 +502,9 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes(
}
if (stop > length)
stop = length;
- length = stop - start;
- if (unlikely(length <= 0))
+ if (unlikely(stop <= start))
return PyUnicode_FromUnicode(NULL, 0);
+ length = stop - start;
cstring += start;
if (decode_func) {
return decode_func(cstring, length, errors);
diff --git a/tests/run/bytesmethods.pyx b/tests/run/bytesmethods.pyx
index 14a06c42d..f2a10e1d2 100644
--- a/tests/run/bytesmethods.pyx
+++ b/tests/run/bytesmethods.pyx
@@ -1,5 +1,13 @@
cimport cython
+cdef extern from *:
+ cdef Py_ssize_t PY_SSIZE_T_MIN
+ cdef Py_ssize_t PY_SSIZE_T_MAX
+
+SSIZE_T_MAX = PY_SSIZE_T_MAX
+SSIZE_T_MIN = PY_SSIZE_T_MIN
+
+
b_a = b'a'
b_b = b'b'
@@ -114,6 +122,14 @@ def bytes_decode(bytes s, start=None, stop=None):
<BLANKLINE>
>>> print(bytes_decode(s, -300, -500))
<BLANKLINE>
+ >>> print(bytes_decode(s, SSIZE_T_MIN, SSIZE_T_MIN))
+ <BLANKLINE>
+ >>> print(bytes_decode(s, SSIZE_T_MIN, SSIZE_T_MAX))
+ abaab
+ >>> print(bytes_decode(s, SSIZE_T_MAX, SSIZE_T_MIN))
+ <BLANKLINE>
+ >>> print(bytes_decode(s, SSIZE_T_MAX, SSIZE_T_MAX))
+ <BLANKLINE>
>>> s[:'test'] # doctest: +ELLIPSIS
Traceback (most recent call last):
diff --git a/tests/run/charptr_decode.pyx b/tests/run/charptr_decode.pyx
index e7f9b541f..e7c26db1b 100644
--- a/tests/run/charptr_decode.pyx
+++ b/tests/run/charptr_decode.pyx
@@ -1,6 +1,11 @@
cimport cython
+cdef extern from *:
+ cdef Py_ssize_t PY_SSIZE_T_MIN
+ cdef Py_ssize_t PY_SSIZE_T_MAX
+
+
############################################################
# tests for char* slicing
@@ -118,6 +123,19 @@ def slice_charptr_dynamic_bounds_non_name():
(cstring+1)[:].decode('UTF-8'),
(cstring+1)[return1():return5()].decode('UTF-8'))
+@cython.test_assert_path_exists("//PythonCapiCallNode")
+@cython.test_fail_if_path_exists("//AttributeNode")
+def slice_charptr_decode_large_bounds():
+ """
+ >>> print(str(slice_charptr_decode_large_bounds()).replace("u'", "'"))
+ ('abcABCqtp', '', '', '')
+ """
+ return (cstring[PY_SSIZE_T_MIN:9].decode('UTF-8'),
+ cstring[PY_SSIZE_T_MAX:PY_SSIZE_T_MIN].decode('UTF-8'),
+ cstring[PY_SSIZE_T_MIN:PY_SSIZE_T_MIN].decode('UTF-8'),
+ cstring[PY_SSIZE_T_MAX:PY_SSIZE_T_MAX].decode('UTF-8'))
+
+
cdef return1(): return 1
cdef return3(): return 3
cdef return4(): return 4