1 files changed, 173 insertions, 273 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 796e400a82..14bd8e68c5 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -41,10 +41,6 @@ static PyBytesObject *nullstring;
 #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
 
 /*
-   For both PyBytes_FromString() and PyBytes_FromStringAndSize(), the
-   parameter `size' denotes number of characters to allocate, not counting any
-   null terminating character.
-
    For PyBytes_FromString(), the parameter `str' points to a null-terminated
    string containing exactly `size' bytes.
 
@@ -61,8 +57,8 @@ static PyBytesObject *nullstring;
 
    The PyObject member `op->ob_size', which denotes the number of "extra
    items" in a variable-size object, will contain the number of bytes
-   allocated for string data, not counting the null terminating character.  It
-   is therefore equal to the equal to the `size' parameter (for
+   allocated for string data, not counting the null terminating character.
+   It is therefore equal to the `size' parameter (for
    PyBytes_FromStringAndSize()) or the length of the string in the `str'
    parameter (for PyBytes_FromString()).
 */
@@ -568,76 +564,70 @@ PyBytes_AsStringAndSize(register PyObject *obj,
 PyObject *
 PyBytes_Repr(PyObject *obj, int smartquotes)
 {
-    static const char *hexdigits = "0123456789abcdef";
     register PyBytesObject* op = (PyBytesObject*) obj;
-    Py_ssize_t length = Py_SIZE(op);
-    size_t newsize;
+    Py_ssize_t i, length = Py_SIZE(op);
+    size_t newsize, squotes, dquotes;
     PyObject *v;
-    if (length > (PY_SSIZE_T_MAX - 3) / 4) {
+    unsigned char quote, *s, *p;
+
+    /* Compute size of output string */
+    squotes = dquotes = 0;
+    newsize = 3; /* b'' */
+    s = (unsigned char*)op->ob_sval;
+    for (i = 0; i < length; i++) {
+        switch(s[i]) {
+        case '\'': squotes++; newsize++; break;
+        case '"':  dquotes++; newsize++; break;
+        case '\\': case '\t': case '\n': case '\r':
+            newsize += 2; break; /* \C */
+        default:
+            if (s[i] < ' ' || s[i] >= 0x7f)
+                newsize += 4; /* \xHH */
+            else
+                newsize++;
+        }
+    }
+    quote = '\'';
+    if (smartquotes && squotes && !dquotes)
+        quote = '"';
+    if (squotes && quote == '\'')
+        newsize += squotes;
+
+    if (newsize > (PY_SSIZE_T_MAX - sizeof(PyUnicodeObject) - 1)) {
         PyErr_SetString(PyExc_OverflowError,
             "bytes object is too large to make repr");
         return NULL;
     }
-    newsize = 3 + 4 * length;
-    v = PyUnicode_FromUnicode(NULL, newsize);
+
+    v = PyUnicode_New(newsize, 127);
     if (v == NULL) {
         return NULL;
     }
-    else {
-        register Py_ssize_t i;
-        register Py_UNICODE c;
-        register Py_UNICODE *p = PyUnicode_AS_UNICODE(v);
-        int quote;
-
-        /* Figure out which quote to use; single is preferred */
-        quote = '\'';
-        if (smartquotes) {
-            char *test, *start;
-            start = PyBytes_AS_STRING(op);
-            for (test = start; test < start+length; ++test) {
-                if (*test == '"') {
-                    quote = '\''; /* back to single */
-                    goto decided;
-                }
-                else if (*test == '\'')
-                    quote = '"';
-            }
-            decided:
-            ;
-        }
-
-        *p++ = 'b', *p++ = quote;
-        for (i = 0; i < length; i++) {
-            /* There's at least enough room for a hex escape
-               and a closing quote. */
-            assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 5);
-            c = op->ob_sval[i];
-            if (c == quote || c == '\\')
-                *p++ = '\\', *p++ = c;
-            else if (c == '\t')
-                *p++ = '\\', *p++ = 't';
-            else if (c == '\n')
-                *p++ = '\\', *p++ = 'n';
-            else if (c == '\r')
-                *p++ = '\\', *p++ = 'r';
-            else if (c < ' ' || c >= 0x7f) {
-                *p++ = '\\';
-                *p++ = 'x';
-                *p++ = hexdigits[(c & 0xf0) >> 4];
-                *p++ = hexdigits[c & 0xf];
-            }
-            else
-                *p++ = c;
-        }
-        assert(newsize - (p - PyUnicode_AS_UNICODE(v)) >= 1);
-        *p++ = quote;
-        *p = '\0';
-        if (PyUnicode_Resize(&v, (p - PyUnicode_AS_UNICODE(v)))) {
-            Py_DECREF(v);
-            return NULL;
+    p = PyUnicode_1BYTE_DATA(v);
+
+    *p++ = 'b', *p++ = quote;
+    for (i = 0; i < length; i++) {
+        unsigned char c = op->ob_sval[i];
+        if (c == quote || c == '\\')
+            *p++ = '\\', *p++ = c;
+        else if (c == '\t')
+            *p++ = '\\', *p++ = 't';
+        else if (c == '\n')
+            *p++ = '\\', *p++ = 'n';
+        else if (c == '\r')
+            *p++ = '\\', *p++ = 'r';
+        else if (c < ' ' || c >= 0x7f) {
+            *p++ = '\\';
+            *p++ = 'x';
+            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
+            *p++ = Py_hexdigits[c & 0xf];
         }
-        return v;
+        else
+            *p++ = c;
     }
+    *p++ = quote;
+    assert(_PyUnicode_CheckConsistency(v, 1));
+    return v;
 }
 
 static PyObject *
@@ -871,35 +861,11 @@ bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
 static Py_hash_t
 bytes_hash(PyBytesObject *a)
 {
-    register Py_ssize_t len;
-    register unsigned char *p;
-    register Py_hash_t x;
-
-#ifdef Py_DEBUG
-    assert(_Py_HashSecret_Initialized);
-#endif
-    if (a->ob_shash != -1)
-        return a->ob_shash;
-    len = Py_SIZE(a);
-    /*
-      We make the hash of the empty string be 0, rather than using
-      (prefix ^ suffix), since this slightly obfuscates the hash secret
-    */
-    if (len == 0) {
-        a->ob_shash = 0;
-        return 0;
-    }
-    p = (unsigned char *) a->ob_sval;
-    x = _Py_HashSecret.prefix;
-    x ^= *p << 7;
-    while (--len >= 0)
-        x = (_PyHASH_MULTIPLIER*x) ^ *p++;
-    x ^= Py_SIZE(a);
-    x ^= _Py_HashSecret.suffix;
-    if (x == -1)
-        x = -2;
-    a->ob_shash = x;
-    return x;
+    if (a->ob_shash == -1) {
+        /* Can't fail */
+        a->ob_shash = _Py_HashBytes((unsigned char *) a->ob_sval, Py_SIZE(a));
+    }
+    return a->ob_shash;
 }
 
 static PyObject*
@@ -1007,7 +973,7 @@ static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
 #define STRIPNAME(i) (stripformat[i]+3)
 
 PyDoc_STRVAR(split__doc__,
-"B.split([sep[, maxsplit]]) -> list of bytes\n\
+"B.split(sep=None, maxsplit=-1) -> list of bytes\n\
 \n\
 Return a list of the sections in B, using sep as the delimiter.\n\
 If sep is not specified or is None, B is split on ASCII whitespace\n\
@@ -1015,15 +981,17 @@ characters (space, tab, return, newline, formfeed, vertical tab).\n\
 If maxsplit is given, at most maxsplit splits are done.");
 
 static PyObject *
-bytes_split(PyBytesObject *self, PyObject *args)
+bytes_split(PyBytesObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"sep", "maxsplit", 0};
     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
     Py_ssize_t maxsplit = -1;
     const char *s = PyBytes_AS_STRING(self), *sub;
     Py_buffer vsub;
     PyObject *list, *subobj = Py_None;
 
-    if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:split",
+                                     kwlist, &subobj, &maxsplit))
         return NULL;
     if (maxsplit < 0)
         maxsplit = PY_SSIZE_T_MAX;
@@ -1095,7 +1063,7 @@ bytes_rpartition(PyBytesObject *self, PyObject *sep_obj)
 }
 
 PyDoc_STRVAR(rsplit__doc__,
-"B.rsplit([sep[, maxsplit]]) -> list of bytes\n\
+"B.rsplit(sep=None, maxsplit=-1) -> list of bytes\n\
 \n\
 Return a list of the sections in B, using sep as the delimiter,\n\
 starting at the end of B and working to the front.\n\
@@ -1105,15 +1073,17 @@ If maxsplit is given, at most maxsplit splits are done.");
 
 
 static PyObject *
-bytes_rsplit(PyBytesObject *self, PyObject *args)
+bytes_rsplit(PyBytesObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"sep", "maxsplit", 0};
     Py_ssize_t len = PyBytes_GET_SIZE(self), n;
     Py_ssize_t maxsplit = -1;
     const char *s = PyBytes_AS_STRING(self), *sub;
     Py_buffer vsub;
     PyObject *list, *subobj = Py_None;
 
-    if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|On:rsplit",
+                                     kwlist, &subobj, &maxsplit))
         return NULL;
     if (maxsplit < 0)
         maxsplit = PY_SSIZE_T_MAX;
@@ -1254,31 +1224,42 @@ Py_LOCAL_INLINE(Py_ssize_t)
 bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
 {
     PyObject *subobj;
+    char byte;
+    Py_buffer subbuf;
     const char *sub;
     Py_ssize_t sub_len;
     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
+    Py_ssize_t res;
 
-    if (!stringlib_parse_args_finds("find/rfind/index/rindex",
-                                    args, &subobj, &start, &end))
+    if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex",
+                                         args, &subobj, &byte, &start, &end))
         return -2;
 
-    if (PyBytes_Check(subobj)) {
-        sub = PyBytes_AS_STRING(subobj);
-        sub_len = PyBytes_GET_SIZE(subobj);
+    if (subobj) {
+        if (_getbuffer(subobj, &subbuf) < 0)
+            return -2;
+
+        sub = subbuf.buf;
+        sub_len = subbuf.len;
+    }
+    else {
+        sub = &byte;
+        sub_len = 1;
     }
-    else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
-        /* XXX - the "expected a character buffer object" is pretty
-           confusing for a non-expert.  remap to something else ? */
-        return -2;
 
     if (dir > 0)
-        return stringlib_find_slice(
+        res = stringlib_find_slice(
             PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
             sub, sub_len, start, end);
     else
-        return stringlib_rfind_slice(
+        res = stringlib_rfind_slice(
             PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
             sub, sub_len, start, end);
+
+    if (subobj)
+        PyBuffer_Release(&subbuf);
+
+    return res;
 }
 
 
@@ -1504,23 +1485,38 @@ bytes_count(PyBytesObject *self, PyObject *args)
     PyObject *sub_obj;
     const char *str = PyBytes_AS_STRING(self), *sub;
     Py_ssize_t sub_len;
+    char byte;
     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
 
-    if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
+    Py_buffer vsub;
+    PyObject *count_obj;
+
+    if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte,
+                                         &start, &end))
         return NULL;
 
-    if (PyBytes_Check(sub_obj)) {
-        sub = PyBytes_AS_STRING(sub_obj);
-        sub_len = PyBytes_GET_SIZE(sub_obj);
+    if (sub_obj) {
+        if (_getbuffer(sub_obj, &vsub) < 0)
+            return NULL;
+
+        sub = vsub.buf;
+        sub_len = vsub.len;
+    }
+    else {
+        sub = &byte;
+        sub_len = 1;
     }
-    else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
-        return NULL;
 
     ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self));
 
-    return PyLong_FromSsize_t(
+    count_obj = PyLong_FromSsize_t(
         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
         );
+
+    if (sub_obj)
+        PyBuffer_Release(&vsub);
+
+    return count_obj;
 }
 
 
@@ -2329,11 +2325,13 @@ Line breaks are not included in the resulting list unless keepends\n\
 is given and true.");
 
 static PyObject*
-bytes_splitlines(PyObject *self, PyObject *args)
+bytes_splitlines(PyObject *self, PyObject *args, PyObject *kwds)
 {
+    static char *kwlist[] = {"keepends", 0};
     int keepends = 0;
 
-    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:splitlines",
+                                     kwlist, &keepends))
         return NULL;
 
     return stringlib_splitlines(
@@ -2351,7 +2349,7 @@ Spaces between two numbers are accepted.\n\
 Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.");
 
 static int
-hex_digit_to_int(Py_UNICODE c)
+hex_digit_to_int(Py_UCS4 c)
 {
     if (c >= 128)
         return -1;
@@ -2371,15 +2369,20 @@ bytes_fromhex(PyObject *cls, PyObject *args)
 {
     PyObject *newstring, *hexobj;
     char *buf;
-    Py_UNICODE *hex;
     Py_ssize_t hexlen, byteslen, i, j;
     int top, bot;
+    void *data;
+    unsigned int kind;
 
     if (!PyArg_ParseTuple(args, "U:fromhex", &hexobj))
         return NULL;
     assert(PyUnicode_Check(hexobj));
-    hexlen = PyUnicode_GET_SIZE(hexobj);
-    hex = PyUnicode_AS_UNICODE(hexobj);
+    if (PyUnicode_READY(hexobj))
+        return NULL;
+    kind = PyUnicode_KIND(hexobj);
+    data = PyUnicode_DATA(hexobj);
+    hexlen = PyUnicode_GET_LENGTH(hexobj);
+
     byteslen = hexlen/2; /* This overestimates if there are spaces */
     newstring = PyBytes_FromStringAndSize(NULL, byteslen);
     if (!newstring)
@@ -2387,12 +2390,12 @@ bytes_fromhex(PyObject *cls, PyObject *args)
     buf = PyBytes_AS_STRING(newstring);
     for (i = j = 0; i < hexlen; i += 2) {
         /* skip over spaces in the input */
-        while (hex[i] == ' ')
+        while (PyUnicode_READ(kind, data, i) == ' ')
             i++;
         if (i >= hexlen)
             break;
-        top = hex_digit_to_int(hex[i]);
-        bot = hex_digit_to_int(hex[i+1]);
+        top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
+        bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
         if (top == -1 || bot == -1) {
             PyErr_Format(PyExc_ValueError,
                          "non-hexadecimal number found in "
@@ -2472,10 +2475,10 @@ bytes_methods[] = {
     {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__},
     {"rpartition", (PyCFunction)bytes_rpartition, METH_O,
      rpartition__doc__},
-    {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS, rsplit__doc__},
+    {"rsplit", (PyCFunction)bytes_rsplit, METH_VARARGS | METH_KEYWORDS, rsplit__doc__},
     {"rstrip", (PyCFunction)bytes_rstrip, METH_VARARGS, rstrip__doc__},
-    {"split", (PyCFunction)bytes_split, METH_VARARGS, split__doc__},
-    {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS,
+    {"split", (PyCFunction)bytes_split, METH_VARARGS | METH_KEYWORDS, split__doc__},
+    {"splitlines", (PyCFunction)bytes_splitlines, METH_VARARGS | METH_KEYWORDS,
      splitlines__doc__},
     {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
      startswith__doc__},
@@ -2574,6 +2577,12 @@ PyBytes_FromObject(PyObject *x)
         PyErr_BadInternalCall();
         return NULL;
     }
+
+    if (PyBytes_CheckExact(x)) {
+        Py_INCREF(x);
+        return x;
+    }
+
     /* Use the modern buffer interface */
     if (PyObject_CheckBuffer(x)) {
         Py_buffer view;
@@ -2857,149 +2866,6 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
     return 0;
 }
 
-/* _PyBytes_FormatLong emulates the format codes d, u, o, x and X, and
- * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
- * Python's regular ints.
- * Return value:  a new PyBytes*, or NULL if error.
- *  .  *pbuf is set to point into it,
- *     *plen set to the # of chars following that.
- *     Caller must decref it when done using pbuf.
- *     The string starting at *pbuf is of the form
- *         "-"? ("0x" | "0X")? digit+
- *     "0x"/"0X" are present only for x and X conversions, with F_ALT
- *         set in flags.  The case of hex digits will be correct,
- *     There will be at least prec digits, zero-filled on the left if
- *         necessary to get that many.
- * val          object to be converted
- * flags        bitmask of format flags; only F_ALT is looked at
- * prec         minimum number of digits; 0-fill on left if needed
- * type         a character in [duoxX]; u acts the same as d
- *
- * CAUTION:  o, x and X conversions on regular ints can never
- * produce a '-' sign, but can for Python's unbounded ints.
- */
-PyObject*
-_PyBytes_FormatLong(PyObject *val, int flags, int prec, int type,
-                     char **pbuf, int *plen)
-{
-    PyObject *result = NULL;
-    char *buf;
-    Py_ssize_t i;
-    int sign;           /* 1 if '-', else 0 */
-    int len;            /* number of characters */
-    Py_ssize_t llen;
-    int numdigits;      /* len == numnondigits + numdigits */
-    int numnondigits = 0;
-
-    /* Avoid exceeding SSIZE_T_MAX */
-    if (prec > INT_MAX-3) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "precision too large");
-        return NULL;
-    }
-
-    switch (type) {
-    case 'd':
-    case 'u':
-        /* Special-case boolean: we want 0/1 */
-        if (PyBool_Check(val))
-            result = PyNumber_ToBase(val, 10);
-        else
-            result = Py_TYPE(val)->tp_str(val);
-        break;
-    case 'o':
-        numnondigits = 2;
-        result = PyNumber_ToBase(val, 8);
-        break;
-    case 'x':
-    case 'X':
-        numnondigits = 2;
-        result = PyNumber_ToBase(val, 16);
-        break;
-    default:
-        assert(!"'type' not in [duoxX]");
-    }
-    if (!result)
-        return NULL;
-
-    buf = _PyUnicode_AsString(result);
-    if (!buf) {
-        Py_DECREF(result);
-        return NULL;
-    }
-
-    /* To modify the string in-place, there can only be one reference. */
-    if (Py_REFCNT(result) != 1) {
-        PyErr_BadInternalCall();
-        return NULL;
-    }
-    llen = PyUnicode_GetSize(result);
-    if (llen > INT_MAX) {
-        PyErr_SetString(PyExc_ValueError,
-                        "string too large in _PyBytes_FormatLong");
-        return NULL;
-    }
-    len = (int)llen;
-    if (buf[len-1] == 'L') {
-        --len;
-        buf[len] = '\0';
-    }
-    sign = buf[0] == '-';
-    numnondigits += sign;
-    numdigits = len - numnondigits;
-    assert(numdigits > 0);
-
-    /* Get rid of base marker unless F_ALT */
-    if (((flags & F_ALT) == 0 &&
-        (type == 'o' || type == 'x' || type == 'X'))) {
-        assert(buf[sign] == '0');
-        assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
-               buf[sign+1] == 'o');
-        numnondigits -= 2;
-        buf += 2;
-        len -= 2;
-        if (sign)
-            buf[0] = '-';
-        assert(len == numnondigits + numdigits);
-        assert(numdigits > 0);
-    }
-
-    /* Fill with leading zeroes to meet minimum width. */
-    if (prec > numdigits) {
-        PyObject *r1 = PyBytes_FromStringAndSize(NULL,
-                                numnondigits + prec);
-        char *b1;
-        if (!r1) {
-            Py_DECREF(result);
-            return NULL;
-        }
-        b1 = PyBytes_AS_STRING(r1);
-        for (i = 0; i < numnondigits; ++i)
-            *b1++ = *buf++;
-        for (i = 0; i < prec - numdigits; i++)
-            *b1++ = '0';
-        for (i = 0; i < numdigits; i++)
-            *b1++ = *buf++;
-        *b1 = '\0';
-        Py_DECREF(result);
-        result = r1;
-        buf = PyBytes_AS_STRING(result);
-        len = numnondigits + prec;
-    }
-
-    /* Fix up case for hex conversions. */
-    if (type == 'X') {
-        /* Need to convert all lower case letters to upper case.
-           and need to convert 0x to 0X (and -0x to -0X). */
-        for (i = 0; i < len; i++)
-            if (buf[i] >= 'a' && buf[i] <= 'x')
-                buf[i] -= 'a'-'A';
-    }
-    *pbuf = buf;
-    *plen = len;
-    return result;
-}
-
 void
 PyBytes_Fini(void)
 {
@@ -3072,9 +2938,43 @@ striter_len(striterobject *it)
 PyDoc_STRVAR(length_hint_doc,
              "Private method returning an estimate of len(list(it)).");
 
+static PyObject *
+striter_reduce(striterobject *it)
+{
+    if (it->it_seq != NULL) {
+        return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
+                             it->it_seq, it->it_index);
+    } else {
+        PyObject *u = PyUnicode_FromUnicode(NULL, 0);
+        if (u == NULL)
+            return NULL;
+        return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
+    }
+}
+
+PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
+
+static PyObject *
+striter_setstate(striterobject *it, PyObject *state)
+{
+    Py_ssize_t index = PyLong_AsSsize_t(state);
+    if (index == -1 && PyErr_Occurred())
+        return NULL;
+    if (index < 0)
+        index = 0;
+    it->it_index = index;
+    Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
+
 static PyMethodDef striter_methods[] = {
     {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
      length_hint_doc},
+    {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
+     reduce_doc},
+    {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
+     setstate_doc},
     {NULL,              NULL}           /* sentinel */
 };