summaryrefslogtreecommitdiff
path: root/Objects/bytesobject.c
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2016-03-30 20:41:15 +0300
committerSerhiy Storchaka <storchaka@gmail.com>2016-03-30 20:41:15 +0300
commit83e9d2102aaa99c1096ca590511fb744a29bd273 (patch)
tree73a621780a2252c68ffef100c45ccb7f87472e1f /Objects/bytesobject.c
parent6a20be2164da28d43d634469088710e3e4a6bfdd (diff)
parentd3e6ebfc3d5c888a4837e2b804779186d725dcc0 (diff)
downloadcpython-83e9d2102aaa99c1096ca590511fb744a29bd273.tar.gz
Issue #26494: Fixed crash on iterating exhausting iterators.
Affected classes are generic sequence iterators, iterators of str, bytes, bytearray, list, tuple, set, frozenset, dict, OrderedDict, corresponding views and os.scandir() iterator.
Diffstat (limited to 'Objects/bytesobject.c')
-rw-r--r--Objects/bytesobject.c1372
1 files changed, 912 insertions, 460 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 495c3ebc23..5bbbcde3fb 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -30,6 +30,10 @@ static PyBytesObject *nullstring;
*/
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
+/* Forward declaration */
+Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
+ char *str);
+
/*
For PyBytes_FromString(), the parameter `str' points to a null-terminated
string containing exactly `size' bytes.
@@ -174,190 +178,184 @@ PyBytes_FromString(const char *str)
PyObject *
PyBytes_FromFormatV(const char *format, va_list vargs)
{
- va_list count;
- Py_ssize_t n = 0;
- const char* f;
char *s;
- PyObject* string;
+ const char *f;
+ const char *p;
+ Py_ssize_t prec;
+ int longflag;
+ int size_tflag;
+ /* Longest 64-bit formatted numbers:
+ - "18446744073709551615\0" (21 bytes)
+ - "-9223372036854775808\0" (21 bytes)
+ Decimal takes the most space (it isn't enough for octal.)
+
+ Longest 64-bit pointer representation:
+ "0xffffffffffffffff\0" (19 bytes). */
+ char buffer[21];
+ _PyBytesWriter writer;
+
+ _PyBytesWriter_Init(&writer);
+
+ s = _PyBytesWriter_Alloc(&writer, strlen(format));
+ if (s == NULL)
+ return NULL;
+ writer.overallocate = 1;
+
+#define WRITE_BYTES(str) \
+ do { \
+ s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
+ if (s == NULL) \
+ goto error; \
+ } while (0)
- Py_VA_COPY(count, vargs);
- /* step 1: figure out how large a buffer we need */
for (f = format; *f; f++) {
- if (*f == '%') {
- const char* p = f;
- while (*++f && *f != '%' && !Py_ISALPHA(*f))
- ;
-
- /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
- * they don't affect the amount of space we reserve.
- */
- if ((*f == 'l' || *f == 'z') &&
- (f[1] == 'd' || f[1] == 'u'))
- ++f;
-
- switch (*f) {
- case 'c':
- {
- int c = va_arg(count, int);
- if (c < 0 || c > 255) {
- PyErr_SetString(PyExc_OverflowError,
- "PyBytes_FromFormatV(): %c format "
- "expects an integer in range [0; 255]");
- return NULL;
- }
- n++;
- break;
+ if (*f != '%') {
+ *s++ = *f;
+ continue;
+ }
+
+ p = f++;
+
+ /* ignore the width (ex: 10 in "%10s") */
+ while (Py_ISDIGIT(*f))
+ f++;
+
+ /* parse the precision (ex: 10 in "%.10s") */
+ prec = 0;
+ if (*f == '.') {
+ f++;
+ for (; Py_ISDIGIT(*f); f++) {
+ prec = (prec * 10) + (*f - '0');
}
- case '%':
- n++;
- break;
- case 'd': case 'u': case 'i': case 'x':
- (void) va_arg(count, int);
- /* 20 bytes is enough to hold a 64-bit
- integer. Decimal takes the most space.
- This isn't enough for octal. */
- n += 20;
- break;
- case 's':
- s = va_arg(count, char*);
- n += strlen(s);
- break;
- case 'p':
- (void) va_arg(count, int);
- /* maximum 64-bit pointer representation:
- * 0xffffffffffffffff
- * so 19 characters is enough.
- * XXX I count 18 -- what's the extra for?
- */
- n += 19;
- break;
- default:
- /* if we stumble upon an unknown
- formatting code, copy the rest of
- the format string to the output
- string. (we cannot just skip the
- code, since there's no way to know
- what's in the argument list) */
- n += strlen(p);
- goto expand;
+ }
+
+ while (*f && *f != '%' && !Py_ISALPHA(*f))
+ f++;
+
+ /* handle the long flag ('l'), but only for %ld and %lu.
+ others can be added when necessary. */
+ longflag = 0;
+ if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
+ longflag = 1;
+ ++f;
+ }
+
+ /* handle the size_t flag ('z'). */
+ size_tflag = 0;
+ if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
+ size_tflag = 1;
+ ++f;
+ }
+
+ /* substract bytes preallocated for the format string
+ (ex: 2 for "%s") */
+ writer.min_size -= (f - p + 1);
+
+ switch (*f) {
+ case 'c':
+ {
+ int c = va_arg(vargs, int);
+ if (c < 0 || c > 255) {
+ PyErr_SetString(PyExc_OverflowError,
+ "PyBytes_FromFormatV(): %c format "
+ "expects an integer in range [0; 255]");
+ goto error;
}
- } else
- n++;
- }
- expand:
- /* step 2: fill the buffer */
- /* Since we've analyzed how much space we need for the worst case,
- use sprintf directly instead of the slower PyOS_snprintf. */
- string = PyBytes_FromStringAndSize(NULL, n);
- if (!string)
- return NULL;
+ writer.min_size++;
+ *s++ = (unsigned char)c;
+ break;
+ }
- s = PyBytes_AsString(string);
+ case 'd':
+ if (longflag)
+ sprintf(buffer, "%ld", va_arg(vargs, long));
+ else if (size_tflag)
+ sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
+ va_arg(vargs, Py_ssize_t));
+ else
+ sprintf(buffer, "%d", va_arg(vargs, int));
+ assert(strlen(buffer) < sizeof(buffer));
+ WRITE_BYTES(buffer);
+ break;
- for (f = format; *f; f++) {
- if (*f == '%') {
- const char* p = f++;
+ case 'u':
+ if (longflag)
+ sprintf(buffer, "%lu",
+ va_arg(vargs, unsigned long));
+ else if (size_tflag)
+ sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
+ va_arg(vargs, size_t));
+ else
+ sprintf(buffer, "%u",
+ va_arg(vargs, unsigned int));
+ assert(strlen(buffer) < sizeof(buffer));
+ WRITE_BYTES(buffer);
+ break;
+
+ case 'i':
+ sprintf(buffer, "%i", va_arg(vargs, int));
+ assert(strlen(buffer) < sizeof(buffer));
+ WRITE_BYTES(buffer);
+ break;
+
+ case 'x':
+ sprintf(buffer, "%x", va_arg(vargs, int));
+ assert(strlen(buffer) < sizeof(buffer));
+ WRITE_BYTES(buffer);
+ break;
+
+ case 's':
+ {
Py_ssize_t i;
- int longflag = 0;
- int size_tflag = 0;
- /* parse the width.precision part (we're only
- interested in the precision value, if any) */
- n = 0;
- while (Py_ISDIGIT(*f))
- n = (n*10) + *f++ - '0';
- if (*f == '.') {
- f++;
- n = 0;
- while (Py_ISDIGIT(*f))
- n = (n*10) + *f++ - '0';
- }
- while (*f && *f != '%' && !Py_ISALPHA(*f))
- f++;
- /* handle the long flag, but only for %ld and %lu.
- others can be added when necessary. */
- if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
- longflag = 1;
- ++f;
- }
- /* handle the size_t flag. */
- if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
- size_tflag = 1;
- ++f;
- }
- switch (*f) {
- case 'c':
- {
- int c = va_arg(vargs, int);
- /* c has been checked for overflow in the first step */
- *s++ = (unsigned char)c;
- break;
+ p = va_arg(vargs, const char*);
+ i = strlen(p);
+ if (prec > 0 && i > prec)
+ i = prec;
+ s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
+ if (s == NULL)
+ goto error;
+ break;
+ }
+
+ case 'p':
+ sprintf(buffer, "%p", va_arg(vargs, void*));
+ assert(strlen(buffer) < sizeof(buffer));
+ /* %p is ill-defined: ensure leading 0x. */
+ if (buffer[1] == 'X')
+ buffer[1] = 'x';
+ else if (buffer[1] != 'x') {
+ memmove(buffer+2, buffer, strlen(buffer)+1);
+ buffer[0] = '0';
+ buffer[1] = 'x';
}
- case 'd':
- if (longflag)
- sprintf(s, "%ld", va_arg(vargs, long));
- else if (size_tflag)
- sprintf(s, "%" PY_FORMAT_SIZE_T "d",
- va_arg(vargs, Py_ssize_t));
- else
- sprintf(s, "%d", va_arg(vargs, int));
- s += strlen(s);
- break;
- case 'u':
- if (longflag)
- sprintf(s, "%lu",
- va_arg(vargs, unsigned long));
- else if (size_tflag)
- sprintf(s, "%" PY_FORMAT_SIZE_T "u",
- va_arg(vargs, size_t));
- else
- sprintf(s, "%u",
- va_arg(vargs, unsigned int));
- s += strlen(s);
- break;
- case 'i':
- sprintf(s, "%i", va_arg(vargs, int));
- s += strlen(s);
- break;
- case 'x':
- sprintf(s, "%x", va_arg(vargs, int));
- s += strlen(s);
- break;
- case 's':
- p = va_arg(vargs, char*);
- i = strlen(p);
- if (n > 0 && i > n)
- i = n;
- Py_MEMCPY(s, p, i);
- s += i;
- break;
- case 'p':
- sprintf(s, "%p", va_arg(vargs, void*));
- /* %p is ill-defined: ensure leading 0x. */
- if (s[1] == 'X')
- s[1] = 'x';
- else if (s[1] != 'x') {
- memmove(s+2, s, strlen(s)+1);
- s[0] = '0';
- s[1] = 'x';
- }
- s += strlen(s);
- break;
- case '%':
- *s++ = '%';
- break;
- default:
- strcpy(s, p);
- s += strlen(s);
- goto end;
+ WRITE_BYTES(buffer);
+ break;
+
+ case '%':
+ writer.min_size++;
+ *s++ = '%';
+ break;
+
+ default:
+ if (*f == 0) {
+ /* fix min_size if we reached the end of the format string */
+ writer.min_size++;
}
- } else
- *s++ = *f;
+
+ /* invalid format string: copy unformatted string and exit */
+ WRITE_BYTES(p);
+ return _PyBytesWriter_Finish(&writer, s);
+ }
}
- end:
- _PyBytes_Resize(&string, s - PyBytes_AS_STRING(string));
- return string;
+#undef WRITE_BYTES
+
+ return _PyBytesWriter_Finish(&writer, s);
+
+ error:
+ _PyBytesWriter_Dealloc(&writer);
+ return NULL;
}
PyObject *
@@ -409,12 +407,14 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
/* Returns a new reference to a PyBytes object, or NULL on failure. */
-static PyObject *
-formatfloat(PyObject *v, int flags, int prec, int type)
+static char*
+formatfloat(PyObject *v, int flags, int prec, int type,
+ PyObject **p_result, _PyBytesWriter *writer, char *str)
{
char *p;
PyObject *result;
double x;
+ size_t len;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred()) {
@@ -431,9 +431,21 @@ formatfloat(PyObject *v, int flags, int prec, int type)
if (p == NULL)
return NULL;
- result = PyBytes_FromStringAndSize(p, strlen(p));
+
+ len = strlen(p);
+ if (writer != NULL) {
+ str = _PyBytesWriter_Prepare(writer, str, len);
+ if (str == NULL)
+ return NULL;
+ Py_MEMCPY(str, p, len);
+ str += len;
+ return str;
+ }
+
+ result = PyBytes_FromStringAndSize(p, len);
PyMem_Free(p);
- return result;
+ *p_result = result;
+ return str;
}
static PyObject *
@@ -557,36 +569,36 @@ format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
return NULL;
}
-/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
- FORMATBUFLEN is the length of the buffer in which the ints &
- chars are formatted. XXX This is a magic number. Each formatting
- routine does bounds checking to ensure no overflow, but a better
- solution may be to malloc a buffer of appropriate size for each
- format. For now, the current solution is sufficient.
-*/
-#define FORMATBUFLEN (size_t)120
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
PyObject *
-_PyBytes_Format(PyObject *format, PyObject *args)
+_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
+ PyObject *args, int use_bytearray)
{
- char *fmt, *res;
+ const char *fmt;
+ char *res;
Py_ssize_t arglen, argidx;
- Py_ssize_t reslen, rescnt, fmtcnt;
+ Py_ssize_t fmtcnt;
int args_owned = 0;
- PyObject *result;
PyObject *dict = NULL;
- if (format == NULL || !PyBytes_Check(format) || args == NULL) {
+ _PyBytesWriter writer;
+
+ if (args == NULL) {
PyErr_BadInternalCall();
return NULL;
}
- fmt = PyBytes_AS_STRING(format);
- fmtcnt = PyBytes_GET_SIZE(format);
- reslen = rescnt = fmtcnt + 100;
- result = PyBytes_FromStringAndSize((char *)NULL, reslen);
- if (result == NULL)
+ fmt = format;
+ fmtcnt = format_len;
+
+ _PyBytesWriter_Init(&writer);
+ writer.use_bytearray = use_bytearray;
+
+ res = _PyBytesWriter_Alloc(&writer, fmtcnt);
+ if (res == NULL)
return NULL;
- res = PyBytes_AsString(result);
+ if (!use_bytearray)
+ writer.overallocate = 1;
+
if (PyTuple_Check(args)) {
arglen = PyTuple_GET_SIZE(args);
argidx = 0;
@@ -600,18 +612,23 @@ _PyBytes_Format(PyObject *format, PyObject *args)
!PyByteArray_Check(args)) {
dict = args;
}
+
while (--fmtcnt >= 0) {
if (*fmt != '%') {
- if (--rescnt < 0) {
- rescnt = fmtcnt + 100;
- reslen += rescnt;
- if (_PyBytes_Resize(&result, reslen))
- return NULL;
- res = PyBytes_AS_STRING(result)
- + reslen - rescnt;
- --rescnt;
- }
- *res++ = *fmt++;
+ Py_ssize_t len;
+ char *pos;
+
+ pos = strchr(fmt + 1, '%');
+ if (pos != NULL)
+ len = pos - fmt;
+ else
+ len = format_len - (fmt - format);
+ assert(len != 0);
+
+ Py_MEMCPY(res, fmt, len);
+ res += len;
+ fmt += len;
+ fmtcnt -= (len - 1);
}
else {
/* Got a format specifier */
@@ -626,10 +643,14 @@ _PyBytes_Format(PyObject *format, PyObject *args)
int sign;
Py_ssize_t len = 0;
char onechar; /* For byte_converter() */
+ Py_ssize_t alloc;
+#ifdef Py_DEBUG
+ char *before;
+#endif
fmt++;
if (*fmt == '(') {
- char *keystart;
+ const char *keystart;
Py_ssize_t keylen;
PyObject *key;
int pcount = 1;
@@ -673,6 +694,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
arglen = -1;
argidx = -2;
}
+
+ /* Parse flags. Example: "%+i" => flags=F_SIGN. */
while (--fmtcnt >= 0) {
switch (c = *fmt++) {
case '-': flags |= F_LJUST; continue;
@@ -683,6 +706,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
}
break;
}
+
+ /* Parse width. Example: "%10s" => width=10 */
if (c == '*') {
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
@@ -717,6 +742,8 @@ _PyBytes_Format(PyObject *format, PyObject *args)
width = width*10 + (c - '0');
}
}
+
+ /* Parse precision. Example: "%.3f" => prec=3 */
if (c == '.') {
prec = 0;
if (--fmtcnt >= 0)
@@ -771,13 +798,19 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (v == NULL)
goto error;
}
+
+ if (fmtcnt < 0) {
+ /* last writer: disable writer overallocation */
+ writer.overallocate = 0;
+ }
+
sign = 0;
fill = ' ';
switch (c) {
case '%':
- pbuf = "%";
- len = 1;
- break;
+ *res++ = '%';
+ continue;
+
case 'r':
// %r is only for 2/3 code; 3 only code should use %a
case 'a':
@@ -790,6 +823,7 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (prec >= 0 && len > prec)
len = prec;
break;
+
case 's':
// %s is only for 2/3 code; 3 only code should use %b
case 'b':
@@ -799,12 +833,49 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (prec >= 0 && len > prec)
len = prec;
break;
+
case 'i':
case 'd':
case 'u':
case 'o':
case 'x':
case 'X':
+ if (PyLong_CheckExact(v)
+ && width == -1 && prec == -1
+ && !(flags & (F_SIGN | F_BLANK))
+ && c != 'X')
+ {
+ /* Fast path */
+ int alternate = flags & F_ALT;
+ int base;
+
+ switch(c)
+ {
+ default:
+ assert(0 && "'type' not in [diuoxX]");
+ case 'd':
+ case 'i':
+ case 'u':
+ base = 10;
+ break;
+ case 'o':
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ break;
+ }
+
+ /* Fast path */
+ writer.min_size -= 2; /* size preallocated for "%d" */
+ res = _PyLong_FormatBytesWriter(&writer, res,
+ v, base, alternate);
+ if (res == NULL)
+ goto error;
+ continue;
+ }
+
temp = formatlong(v, flags, prec, c);
if (!temp)
goto error;
@@ -815,14 +886,25 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (flags & F_ZERO)
fill = '0';
break;
+
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
- temp = formatfloat(v, flags, prec, c);
- if (temp == NULL)
+ if (width == -1 && prec == -1
+ && !(flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ writer.min_size -= 2; /* size preallocated for "%f" */
+ res = formatfloat(v, flags, prec, c, NULL, &writer, res);
+ if (res == NULL)
+ goto error;
+ continue;
+ }
+
+ if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
goto error;
pbuf = PyBytes_AS_STRING(temp);
len = PyBytes_GET_SIZE(temp);
@@ -830,21 +912,28 @@ _PyBytes_Format(PyObject *format, PyObject *args)
if (flags & F_ZERO)
fill = '0';
break;
+
case 'c':
pbuf = &onechar;
len = byte_converter(v, &onechar);
if (!len)
goto error;
+ if (width == -1) {
+ /* Fast path */
+ *res++ = onechar;
+ continue;
+ }
break;
+
default:
PyErr_Format(PyExc_ValueError,
"unsupported format character '%c' (0x%x) "
"at index %zd",
c, c,
- (Py_ssize_t)(fmt - 1 -
- PyBytes_AsString(format)));
+ (Py_ssize_t)(fmt - 1 - format));
goto error;
}
+
if (sign) {
if (*pbuf == '-' || *pbuf == '+') {
sign = *pbuf++;
@@ -859,29 +948,31 @@ _PyBytes_Format(PyObject *format, PyObject *args)
}
if (width < len)
width = len;
- if (rescnt - (sign != 0) < width) {
- reslen -= rescnt;
- rescnt = width + fmtcnt + 100;
- reslen += rescnt;
- if (reslen < 0) {
- Py_DECREF(result);
- Py_XDECREF(temp);
- return PyErr_NoMemory();
- }
- if (_PyBytes_Resize(&result, reslen)) {
- Py_XDECREF(temp);
- return NULL;
- }
- res = PyBytes_AS_STRING(result)
- + reslen - rescnt;
+
+ alloc = width;
+ if (sign != 0 && len == width)
+ alloc++;
+ /* 2: size preallocated for %s */
+ if (alloc > 2) {
+ res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
+ if (res == NULL)
+ goto error;
}
+#ifdef Py_DEBUG
+ before = res;
+#endif
+
+ /* Write the sign if needed */
if (sign) {
if (fill != ' ')
*res++ = sign;
- rescnt--;
if (width > len)
width--;
}
+
+ /* Write the numeric prefix for "x", "X" and "o" formats
+ if the alternate form is used.
+ For example, write "0x" for the "%#x" format. */
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
assert(pbuf[0] == '0');
assert(pbuf[1] == c);
@@ -889,18 +980,21 @@ _PyBytes_Format(PyObject *format, PyObject *args)
*res++ = *pbuf++;
*res++ = *pbuf++;
}
- rescnt -= 2;
width -= 2;
if (width < 0)
width = 0;
len -= 2;
}
+
+ /* Pad left with the fill character if needed */
if (width > len && !(flags & F_LJUST)) {
- do {
- --rescnt;
- *res++ = fill;
- } while (--width > len);
+ memset(res, fill, width - len);
+ res += (width - len);
+ width = len;
}
+
+ /* If padding with spaces: write sign if needed and/or numeric
+ prefix if the alternate form is used */
if (fill == ' ') {
if (sign)
*res++ = sign;
@@ -912,13 +1006,17 @@ _PyBytes_Format(PyObject *format, PyObject *args)
*res++ = *pbuf++;
}
}
+
+ /* Copy bytes */
Py_MEMCPY(res, pbuf, len);
res += len;
- rescnt -= len;
- while (--width >= len) {
- --rescnt;
- *res++ = ' ';
+
+ /* Pad right with the fill character if needed */
+ if (width > len) {
+ memset(res, ' ', width - len);
+ res += (width - len);
}
+
if (dict && (argidx < arglen) && c != '%') {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting");
@@ -926,22 +1024,31 @@ _PyBytes_Format(PyObject *format, PyObject *args)
goto error;
}
Py_XDECREF(temp);
+
+#ifdef Py_DEBUG
+ /* check that we computed the exact size for this write */
+ assert((res - before) == alloc);
+#endif
} /* '%' */
+
+ /* If overallocation was disabled, ensure that it was the last
+ write. Otherwise, we missed an optimization */
+ assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
} /* until end */
+
if (argidx < arglen && !dict) {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during bytes formatting");
goto error;
}
+
if (args_owned) {
Py_DECREF(args);
}
- if (_PyBytes_Resize(&result, reslen - rescnt))
- return NULL;
- return result;
+ return _PyBytesWriter_Finish(&writer, res);
error:
- Py_DECREF(result);
+ _PyBytesWriter_Dealloc(&writer);
if (args_owned) {
Py_DECREF(args);
}
@@ -961,6 +1068,42 @@ bytes_dealloc(PyObject *op)
the string is UTF-8 encoded and should be re-encoded in the
specified encoding. */
+static char *
+_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
+ const char *errors, const char *recode_encoding,
+ _PyBytesWriter *writer, char *p)
+{
+ PyObject *u, *w;
+ const char* t;
+
+ t = *s;
+ /* Decode non-ASCII bytes as UTF-8. */
+ while (t < end && (*t & 0x80))
+ t++;
+ u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
+ if (u == NULL)
+ return NULL;
+
+ /* Recode them in target encoding. */
+ w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
+ Py_DECREF(u);
+ if (w == NULL)
+ return NULL;
+ assert(PyBytes_Check(w));
+
+ /* Append bytes to output buffer. */
+ writer->min_size--; /* substract 1 preallocated byte */
+ p = _PyBytesWriter_WriteBytes(writer, p,
+ PyBytes_AS_STRING(w),
+ PyBytes_GET_SIZE(w));
+ Py_DECREF(w);
+ if (p == NULL)
+ return NULL;
+
+ *s = t;
+ return p;
+}
+
PyObject *PyBytes_DecodeEscape(const char *s,
Py_ssize_t len,
const char *errors,
@@ -968,54 +1111,42 @@ PyObject *PyBytes_DecodeEscape(const char *s,
const char *recode_encoding)
{
int c;
- char *p, *buf;
+ char *p;
const char *end;
- PyObject *v;
- Py_ssize_t newlen = recode_encoding ? 4*len:len;
- v = PyBytes_FromStringAndSize((char *)NULL, newlen);
- if (v == NULL)
+ _PyBytesWriter writer;
+
+ _PyBytesWriter_Init(&writer);
+
+ p = _PyBytesWriter_Alloc(&writer, len);
+ if (p == NULL)
return NULL;
- p = buf = PyBytes_AsString(v);
+ writer.overallocate = 1;
+
end = s + len;
while (s < end) {
if (*s != '\\') {
non_esc:
- if (recode_encoding && (*s & 0x80)) {
- PyObject *u, *w;
- char *r;
- const char* t;
- Py_ssize_t rn;
- t = s;
- /* Decode non-ASCII bytes as UTF-8. */
- while (t < end && (*t & 0x80)) t++;
- u = PyUnicode_DecodeUTF8(s, t - s, errors);
- if(!u) goto failed;
-
- /* Recode them in target encoding. */
- w = PyUnicode_AsEncodedString(
- u, recode_encoding, errors);
- Py_DECREF(u);
- if (!w) goto failed;
-
- /* Append bytes to output buffer. */
- assert(PyBytes_Check(w));
- r = PyBytes_AS_STRING(w);
- rn = PyBytes_GET_SIZE(w);
- Py_MEMCPY(p, r, rn);
- p += rn;
- Py_DECREF(w);
- s = t;
- } else {
+ if (!(recode_encoding && (*s & 0x80))) {
*p++ = *s++;
}
+ else {
+ /* non-ASCII character and need to recode */
+ p = _PyBytes_DecodeEscapeRecode(&s, end,
+ errors, recode_encoding,
+ &writer, p);
+ if (p == NULL)
+ goto failed;
+ }
continue;
}
+
s++;
- if (s==end) {
+ if (s == end) {
PyErr_SetString(PyExc_ValueError,
"Trailing \\ in string");
goto failed;
}
+
switch (*s++) {
/* XXX This assumes ASCII! */
case '\n': break;
@@ -1040,28 +1171,18 @@ PyObject *PyBytes_DecodeEscape(const char *s,
*p++ = c;
break;
case 'x':
- if (s+1 < end && Py_ISXDIGIT(s[0]) && Py_ISXDIGIT(s[1])) {
- unsigned int x = 0;
- c = Py_CHARMASK(*s);
- s++;
- if (Py_ISDIGIT(c))
- x = c - '0';
- else if (Py_ISLOWER(c))
- x = 10 + c - 'a';
- else
- x = 10 + c - 'A';
- x = x << 4;
- c = Py_CHARMASK(*s);
- s++;
- if (Py_ISDIGIT(c))
- x += c - '0';
- else if (Py_ISLOWER(c))
- x += 10 + c - 'a';
- else
- x += 10 + c - 'A';
- *p++ = x;
- break;
+ if (s+1 < end) {
+ int digit1, digit2;
+ digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
+ digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
+ if (digit1 < 16 && digit2 < 16) {
+ *p++ = (unsigned char)((digit1 << 4) + digit2);
+ s += 2;
+ break;
+ }
}
+ /* invalid hexadecimal digits */
+
if (!errors || strcmp(errors, "strict") == 0) {
PyErr_Format(PyExc_ValueError,
"invalid \\x escape at position %d",
@@ -1083,6 +1204,7 @@ PyObject *PyBytes_DecodeEscape(const char *s,
if (s < end && Py_ISXDIGIT(s[0]))
s++; /* and a hexdigit */
break;
+
default:
*p++ = '\\';
s--;
@@ -1090,11 +1212,11 @@ PyObject *PyBytes_DecodeEscape(const char *s,
UTF-8 bytes may follow. */
}
}
- if (p-buf < newlen)
- _PyBytes_Resize(&v, p - buf);
- return v;
+
+ return _PyBytesWriter_Finish(&writer, p);
+
failed:
- Py_DECREF(v);
+ _PyBytesWriter_Dealloc(&writer);
return NULL;
}
@@ -1815,16 +1937,15 @@ bytes_find_internal(PyBytesObject *self, PyObject *args, int dir)
ADJUST_INDICES(start, end, len);
if (end - start < sub_len)
res = -1;
- else if (sub_len == 1
-#ifndef HAVE_MEMRCHR
- && dir > 0
-#endif
- ) {
- unsigned char needle = *sub;
- int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH;
- res = stringlib_fastsearch_memchr_1char(
- PyBytes_AS_STRING(self) + start, end - start,
- needle, needle, mode);
+ else if (sub_len == 1) {
+ if (dir > 0)
+ res = stringlib_find_char(
+ PyBytes_AS_STRING(self) + start, end - start,
+ *sub);
+ else
+ res = stringlib_rfind_char(
+ PyBytes_AS_STRING(self) + start, end - start,
+ *sub);
if (res >= 0)
res += start;
}
@@ -2343,17 +2464,27 @@ replace_interleave(PyBytesObject *self,
self_s = PyBytes_AS_STRING(self);
result_s = PyBytes_AS_STRING(result);
- /* TODO: special case single character, which doesn't need memcpy */
-
- /* Lay the first one down (guaranteed this will occur) */
- Py_MEMCPY(result_s, to_s, to_len);
- result_s += to_len;
- count -= 1;
-
- for (i=0; i<count; i++) {
- *result_s++ = *self_s++;
+ if (to_len > 1) {
+ /* Lay the first one down (guaranteed this will occur) */
Py_MEMCPY(result_s, to_s, to_len);
result_s += to_len;
+ count -= 1;
+
+ for (i = 0; i < count; i++) {
+ *result_s++ = *self_s++;
+ Py_MEMCPY(result_s, to_s, to_len);
+ result_s += to_len;
+ }
+ }
+ else {
+ result_s[0] = to_s[0];
+ result_s += to_len;
+ count -= 1;
+ for (i = 0; i < count; i++) {
+ *result_s++ = *self_s++;
+ result_s[0] = to_s[0];
+ result_s += to_len;
+ }
}
/* Copy the rest of the original string */
@@ -2975,22 +3106,6 @@ bytes_splitlines_impl(PyBytesObject*self, int keepends)
);
}
-static int
-hex_digit_to_int(Py_UCS4 c)
-{
- if (c >= 128)
- return -1;
- if (Py_ISDIGIT(c))
- return c - '0';
- else {
- if (Py_ISUPPER(c))
- c = Py_TOLOWER(c);
- if (c >= 'a' && c <= 'f')
- return c - 'a' + 10;
- }
- return -1;
-}
-
/*[clinic input]
@classmethod
bytes.fromhex
@@ -3008,47 +3123,83 @@ static PyObject *
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
{
- PyObject *newstring;
+ return _PyBytes_FromHex(string, 0);
+}
+
+PyObject*
+_PyBytes_FromHex(PyObject *string, int use_bytearray)
+{
char *buf;
- Py_ssize_t hexlen, byteslen, i, j;
- int top, bot;
- void *data;
- unsigned int kind;
+ Py_ssize_t hexlen, invalid_char;
+ unsigned int top, bot;
+ Py_UCS1 *str, *end;
+ _PyBytesWriter writer;
+
+ _PyBytesWriter_Init(&writer);
+ writer.use_bytearray = use_bytearray;
assert(PyUnicode_Check(string));
if (PyUnicode_READY(string))
return NULL;
- kind = PyUnicode_KIND(string);
- data = PyUnicode_DATA(string);
hexlen = PyUnicode_GET_LENGTH(string);
- byteslen = hexlen/2; /* This overestimates if there are spaces */
- newstring = PyBytes_FromStringAndSize(NULL, byteslen);
- if (!newstring)
+ if (!PyUnicode_IS_ASCII(string)) {
+ void *data = PyUnicode_DATA(string);
+ unsigned int kind = PyUnicode_KIND(string);
+ Py_ssize_t i;
+
+ /* search for the first non-ASCII character */
+ for (i = 0; i < hexlen; i++) {
+ if (PyUnicode_READ(kind, data, i) >= 128)
+ break;
+ }
+ invalid_char = i;
+ goto error;
+ }
+
+ assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
+ str = PyUnicode_1BYTE_DATA(string);
+
+ /* This overestimates if there are spaces */
+ buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
+ if (buf == NULL)
return NULL;
- buf = PyBytes_AS_STRING(newstring);
- for (i = j = 0; i < hexlen; i += 2) {
+
+ end = str + hexlen;
+ while (str < end) {
/* skip over spaces in the input */
- while (PyUnicode_READ(kind, data, i) == ' ')
- i++;
- if (i >= hexlen)
- break;
- top = hex_digit_to_int(PyUnicode_READ(kind, data, i));
- bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1));
- if (top == -1 || bot == -1) {
- PyErr_Format(PyExc_ValueError,
- "non-hexadecimal number found in "
- "fromhex() arg at position %zd", i);
+ if (*str == ' ') {
+ do {
+ str++;
+ } while (*str == ' ');
+ if (str >= end)
+ break;
+ }
+
+ top = _PyLong_DigitValue[*str];
+ if (top >= 16) {
+ invalid_char = str - PyUnicode_1BYTE_DATA(string);
goto error;
}
- buf[j++] = (top << 4) + bot;
+ str++;
+
+ bot = _PyLong_DigitValue[*str];
+ if (bot >= 16) {
+ invalid_char = str - PyUnicode_1BYTE_DATA(string);
+ goto error;
+ }
+ str++;
+
+ *buf++ = (unsigned char)((top << 4) + bot);
}
- if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0)
- goto error;
- return newstring;
+
+ return _PyBytesWriter_Finish(&writer, buf);
error:
- Py_XDECREF(newstring);
+ PyErr_Format(PyExc_ValueError,
+ "non-hexadecimal number found in "
+ "fromhex() arg at position %zd", invalid_char);
+ _PyBytesWriter_Dealloc(&writer);
return NULL;
}
@@ -3131,11 +3282,15 @@ bytes_methods[] = {
};
static PyObject *
-bytes_mod(PyObject *v, PyObject *w)
+bytes_mod(PyObject *self, PyObject *args)
{
- if (!PyBytes_Check(v))
- Py_RETURN_NOTIMPLEMENTED;
- return _PyBytes_Format(v, w);
+ if (self == NULL || !PyBytes_Check(self)) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
+ args, 0);
}
static PyNumberMethods bytes_as_number = {
@@ -3244,103 +3399,95 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return PyBytes_FromObject(x);
}
-PyObject *
-PyBytes_FromObject(PyObject *x)
+static PyObject*
+_PyBytes_FromBuffer(PyObject *x)
{
- PyObject *new, *it;
- Py_ssize_t i, size;
+ PyObject *new;
+ Py_buffer view;
- if (x == NULL) {
- PyErr_BadInternalCall();
+ if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
return NULL;
- }
- if (PyBytes_CheckExact(x)) {
- Py_INCREF(x);
- return x;
- }
+ new = PyBytes_FromStringAndSize(NULL, view.len);
+ if (!new)
+ goto fail;
+ if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
+ &view, view.len, 'C') < 0)
+ goto fail;
+ PyBuffer_Release(&view);
+ return new;
- /* Use the modern buffer interface */
- if (PyObject_CheckBuffer(x)) {
- Py_buffer view;
- if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
- return NULL;
- new = PyBytes_FromStringAndSize(NULL, view.len);
- if (!new)
- goto fail;
- if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
- &view, view.len, 'C') < 0)
- goto fail;
- PyBuffer_Release(&view);
- return new;
- fail:
- Py_XDECREF(new);
- PyBuffer_Release(&view);
- return NULL;
- }
- if (PyUnicode_Check(x)) {
- PyErr_SetString(PyExc_TypeError,
- "cannot convert unicode object to bytes");
- return NULL;
- }
+fail:
+ Py_XDECREF(new);
+ PyBuffer_Release(&view);
+ return NULL;
+}
- if (PyList_CheckExact(x)) {
- new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
- if (new == NULL)
- return NULL;
- for (i = 0; i < Py_SIZE(x); i++) {
- Py_ssize_t value = PyNumber_AsSsize_t(
- PyList_GET_ITEM(x, i), PyExc_ValueError);
- if (value == -1 && PyErr_Occurred()) {
- Py_DECREF(new);
- return NULL;
- }
- if (value < 0 || value >= 256) {
- PyErr_SetString(PyExc_ValueError,
- "bytes must be in range(0, 256)");
- Py_DECREF(new);
- return NULL;
- }
- ((PyBytesObject *)new)->ob_sval[i] = (char) value;
- }
- return new;
- }
- if (PyTuple_CheckExact(x)) {
- new = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));
- if (new == NULL)
- return NULL;
- for (i = 0; i < Py_SIZE(x); i++) {
- Py_ssize_t value = PyNumber_AsSsize_t(
- PyTuple_GET_ITEM(x, i), PyExc_ValueError);
- if (value == -1 && PyErr_Occurred()) {
- Py_DECREF(new);
- return NULL;
- }
- if (value < 0 || value >= 256) {
- PyErr_SetString(PyExc_ValueError,
- "bytes must be in range(0, 256)");
- Py_DECREF(new);
- return NULL;
- }
- ((PyBytesObject *)new)->ob_sval[i] = (char) value;
- }
- return new;
- }
+#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM) \
+ do { \
+ PyObject *bytes; \
+ Py_ssize_t i; \
+ Py_ssize_t value; \
+ char *str; \
+ PyObject *item; \
+ \
+ bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x)); \
+ if (bytes == NULL) \
+ return NULL; \
+ str = ((PyBytesObject *)bytes)->ob_sval; \
+ \
+ for (i = 0; i < Py_SIZE(x); i++) { \
+ item = GET_ITEM((x), i); \
+ value = PyNumber_AsSsize_t(item, PyExc_ValueError); \
+ if (value == -1 && PyErr_Occurred()) \
+ goto error; \
+ \
+ if (value < 0 || value >= 256) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "bytes must be in range(0, 256)"); \
+ goto error; \
+ } \
+ *str++ = (char) value; \
+ } \
+ return bytes; \
+ \
+ error: \
+ Py_DECREF(bytes); \
+ return NULL; \
+ } while (0)
+
+static PyObject*
+_PyBytes_FromList(PyObject *x)
+{
+ _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
+}
+
+static PyObject*
+_PyBytes_FromTuple(PyObject *x)
+{
+ _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
+}
+
+static PyObject *
+_PyBytes_FromIterator(PyObject *x)
+{
+ char *str;
+ PyObject *it;
+ Py_ssize_t i, size;
+ _PyBytesWriter writer;
+
+ _PyBytesWriter_Init(&writer);
/* For iterator version, create a string object and resize as needed */
size = PyObject_LengthHint(x, 64);
if (size == -1 && PyErr_Occurred())
return NULL;
- /* Allocate an extra byte to prevent PyBytes_FromStringAndSize() from
- returning a shared empty bytes string. This required because we
- want to call _PyBytes_Resize() the returned object, which we can
- only do on bytes objects with refcount == 1. */
- if (size == 0)
- size = 1;
- new = PyBytes_FromStringAndSize(NULL, size);
- if (new == NULL)
+
+ str = _PyBytesWriter_Alloc(&writer, size);
+ if (str == NULL)
return NULL;
- assert(Py_REFCNT(new) == 1);
+ writer.overallocate = 1;
+ size = writer.allocated;
/* Get the iterator */
it = PyObject_GetIter(x);
@@ -3375,24 +3522,55 @@ PyBytes_FromObject(PyObject *x)
/* Append the byte */
if (i >= size) {
- size = 2 * size + 1;
- if (_PyBytes_Resize(&new, size) < 0)
- goto error;
+ str = _PyBytesWriter_Resize(&writer, str, size+1);
+ if (str == NULL)
+ return NULL;
+ size = writer.allocated;
}
- ((PyBytesObject *)new)->ob_sval[i] = (char) value;
+ *str++ = (char) value;
}
- _PyBytes_Resize(&new, i);
-
- /* Clean up and return success */
Py_DECREF(it);
- return new;
+
+ return _PyBytesWriter_Finish(&writer, str);
error:
+ _PyBytesWriter_Dealloc(&writer);
Py_XDECREF(it);
- Py_XDECREF(new);
return NULL;
}
+PyObject *
+PyBytes_FromObject(PyObject *x)
+{
+ if (x == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+
+ if (PyBytes_CheckExact(x)) {
+ Py_INCREF(x);
+ return x;
+ }
+
+ /* Use the modern buffer interface */
+ if (PyObject_CheckBuffer(x))
+ return _PyBytes_FromBuffer(x);
+
+ if (PyList_CheckExact(x))
+ return _PyBytes_FromList(x);
+
+ if (PyTuple_CheckExact(x))
+ return _PyBytes_FromTuple(x);
+
+ if (PyUnicode_Check(x)) {
+ PyErr_SetString(PyExc_TypeError,
+ "cannot convert unicode object to bytes");
+ return NULL;
+ }
+
+ return _PyBytes_FromIterator(x);
+}
+
static PyObject *
bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
@@ -3740,3 +3918,277 @@ bytes_iter(PyObject *seq)
_PyObject_GC_TRACK(it);
return (PyObject *)it;
}
+
+
+/* _PyBytesWriter API */
+
+#ifdef MS_WINDOWS
+ /* On Windows, overallocate by 50% is the best factor */
+# define OVERALLOCATE_FACTOR 2
+#else
+ /* On Linux, overallocate by 25% is the best factor */
+# define OVERALLOCATE_FACTOR 4
+#endif
+
+void
+_PyBytesWriter_Init(_PyBytesWriter *writer)
+{
+ /* Set all attributes before small_buffer to 0 */
+ memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
+#ifdef Py_DEBUG
+ memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
+#endif
+}
+
+void
+_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
+{
+ Py_CLEAR(writer->buffer);
+}
+
+Py_LOCAL_INLINE(char*)
+_PyBytesWriter_AsString(_PyBytesWriter *writer)
+{
+ if (writer->use_small_buffer) {
+ assert(writer->buffer == NULL);
+ return writer->small_buffer;
+ }
+ else if (writer->use_bytearray) {
+ assert(writer->buffer != NULL);
+ return PyByteArray_AS_STRING(writer->buffer);
+ }
+ else {
+ assert(writer->buffer != NULL);
+ return PyBytes_AS_STRING(writer->buffer);
+ }
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
+{
+ char *start = _PyBytesWriter_AsString(writer);
+ assert(str != NULL);
+ assert(str >= start);
+ assert(str - start <= writer->allocated);
+ return str - start;
+}
+
+Py_LOCAL_INLINE(void)
+_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
+{
+#ifdef Py_DEBUG
+ char *start, *end;
+
+ if (writer->use_small_buffer) {
+ assert(writer->buffer == NULL);
+ }
+ else {
+ assert(writer->buffer != NULL);
+ if (writer->use_bytearray)
+ assert(PyByteArray_CheckExact(writer->buffer));
+ else
+ assert(PyBytes_CheckExact(writer->buffer));
+ assert(Py_REFCNT(writer->buffer) == 1);
+ }
+
+ if (writer->use_bytearray) {
+ /* bytearray has its own overallocation algorithm,
+ writer overallocation must be disabled */
+ assert(!writer->overallocate);
+ }
+
+ assert(0 <= writer->allocated);
+ assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
+ /* the last byte must always be null */
+ start = _PyBytesWriter_AsString(writer);
+ assert(start[writer->allocated] == 0);
+
+ end = start + writer->allocated;
+ assert(str != NULL);
+ assert(start <= str && str <= end);
+#endif
+}
+
+void*
+_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
+{
+ Py_ssize_t allocated, pos;
+
+ _PyBytesWriter_CheckConsistency(writer, str);
+ assert(writer->allocated < size);
+
+ allocated = size;
+ if (writer->overallocate
+ && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
+ /* overallocate to limit the number of realloc() */
+ allocated += allocated / OVERALLOCATE_FACTOR;
+ }
+
+ pos = _PyBytesWriter_GetSize(writer, str);
+ if (!writer->use_small_buffer) {
+ if (writer->use_bytearray) {
+ if (PyByteArray_Resize(writer->buffer, allocated))
+ goto error;
+ /* writer->allocated can be smaller than writer->buffer->ob_alloc,
+ but we cannot use ob_alloc because bytes may need to be moved
+ to use the whole buffer. bytearray uses an internal optimization
+ to avoid moving or copying bytes when bytes are removed at the
+ beginning (ex: del bytearray[:1]). */
+ }
+ else {
+ if (_PyBytes_Resize(&writer->buffer, allocated))
+ goto error;
+ }
+ }
+ else {
+ /* convert from stack buffer to bytes object buffer */
+ assert(writer->buffer == NULL);
+
+ if (writer->use_bytearray)
+ writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
+ else
+ writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
+ if (writer->buffer == NULL)
+ goto error;
+
+ if (pos != 0) {
+ char *dest;
+ if (writer->use_bytearray)
+ dest = PyByteArray_AS_STRING(writer->buffer);
+ else
+ dest = PyBytes_AS_STRING(writer->buffer);
+ Py_MEMCPY(dest,
+ writer->small_buffer,
+ pos);
+ }
+
+ writer->use_small_buffer = 0;
+#ifdef Py_DEBUG
+ memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
+#endif
+ }
+ writer->allocated = allocated;
+
+ str = _PyBytesWriter_AsString(writer) + pos;
+ _PyBytesWriter_CheckConsistency(writer, str);
+ return str;
+
+error:
+ _PyBytesWriter_Dealloc(writer);
+ return NULL;
+}
+
+void*
+_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
+{
+ Py_ssize_t new_min_size;
+
+ _PyBytesWriter_CheckConsistency(writer, str);
+ assert(size >= 0);
+
+ if (size == 0) {
+ /* nothing to do */
+ return str;
+ }
+
+ if (writer->min_size > PY_SSIZE_T_MAX - size) {
+ PyErr_NoMemory();
+ _PyBytesWriter_Dealloc(writer);
+ return NULL;
+ }
+ new_min_size = writer->min_size + size;
+
+ if (new_min_size > writer->allocated)
+ str = _PyBytesWriter_Resize(writer, str, new_min_size);
+
+ writer->min_size = new_min_size;
+ return str;
+}
+
+/* Allocate the buffer to write size bytes.
+ Return the pointer to the beginning of buffer data.
+ Raise an exception and return NULL on error. */
+void*
+_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
+{
+ /* ensure that _PyBytesWriter_Alloc() is only called once */
+ assert(writer->min_size == 0 && writer->buffer == NULL);
+ assert(size >= 0);
+
+ writer->use_small_buffer = 1;
+#ifdef Py_DEBUG
+ writer->allocated = sizeof(writer->small_buffer) - 1;
+ /* In debug mode, don't use the full small buffer because it is less
+ efficient than bytes and bytearray objects to detect buffer underflow
+ and buffer overflow. Use 10 bytes of the small buffer to test also
+ code using the smaller buffer in debug mode.
+
+ Don't modify the _PyBytesWriter structure (use a shorter small buffer)
+ in debug mode to also be able to detect stack overflow when running
+ tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
+ if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
+ stack overflow. */
+ writer->allocated = Py_MIN(writer->allocated, 10);
+ /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
+ to detect buffer overflow */
+ writer->small_buffer[writer->allocated] = 0;
+#else
+ writer->allocated = sizeof(writer->small_buffer);
+#endif
+ return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
+}
+
+PyObject *
+_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
+{
+ Py_ssize_t size;
+ PyObject *result;
+
+ _PyBytesWriter_CheckConsistency(writer, str);
+
+ size = _PyBytesWriter_GetSize(writer, str);
+ if (size == 0 && !writer->use_bytearray) {
+ Py_CLEAR(writer->buffer);
+ /* Get the empty byte string singleton */
+ result = PyBytes_FromStringAndSize(NULL, 0);
+ }
+ else if (writer->use_small_buffer) {
+ result = PyBytes_FromStringAndSize(writer->small_buffer, size);
+ }
+ else {
+ result = writer->buffer;
+ writer->buffer = NULL;
+
+ if (size != writer->allocated) {
+ if (writer->use_bytearray) {
+ if (PyByteArray_Resize(result, size)) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ }
+ else {
+ if (_PyBytes_Resize(&result, size)) {
+ assert(result == NULL);
+ return NULL;
+ }
+ }
+ }
+ }
+ return result;
+}
+
+void*
+_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
+ const void *bytes, Py_ssize_t size)
+{
+ char *str = (char *)ptr;
+
+ str = _PyBytesWriter_Prepare(writer, str, size);
+ if (str == NULL)
+ return NULL;
+
+ Py_MEMCPY(str, bytes, size);
+ str += size;
+
+ return str;
+}