diff options
author | Zachary Ware <zachary.ware@gmail.com> | 2015-04-09 15:56:22 -0500 |
---|---|---|
committer | Zachary Ware <zachary.ware@gmail.com> | 2015-04-09 15:56:22 -0500 |
commit | 99526d469deab2fa7e3d22c93dd1df06f97dcc66 (patch) | |
tree | 646406a79fa03b935f5c18238918b376f7f77b70 /Modules/_io | |
parent | c41867cada0d1f9d2020b501d1f0825814ff6964 (diff) | |
parent | 54ccd26b64e7f398ae92942d73564b5d412b0244 (diff) | |
download | cpython-99526d469deab2fa7e3d22c93dd1df06f97dcc66.tar.gz |
Null merge with 3.4
Diffstat (limited to 'Modules/_io')
-rw-r--r-- | Modules/_io/_iomodule.c | 28 | ||||
-rw-r--r-- | Modules/_io/_iomodule.h | 2 | ||||
-rw-r--r-- | Modules/_io/bufferedio.c | 70 | ||||
-rw-r--r-- | Modules/_io/bytesio.c | 234 | ||||
-rw-r--r-- | Modules/_io/fileio.c | 225 | ||||
-rw-r--r-- | Modules/_io/textio.c | 42 |
6 files changed, 319 insertions, 282 deletions
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 45c31a550e..e70c4b7611 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -237,8 +237,8 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) PyObject *raw, *modeobj = NULL, *buffer, *wrapper, *result = NULL; + _Py_IDENTIFIER(_blksize); _Py_IDENTIFIER(isatty); - _Py_IDENTIFIER(fileno); _Py_IDENTIFIER(mode); _Py_IDENTIFIER(close); @@ -380,24 +380,14 @@ io_open(PyObject *self, PyObject *args, PyObject *kwds) line_buffering = 0; if (buffering < 0) { - buffering = DEFAULT_BUFFER_SIZE; -#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE - { - struct stat st; - long fileno; - PyObject *res = _PyObject_CallMethodId(raw, &PyId_fileno, NULL); - if (res == NULL) - goto error; - - fileno = PyLong_AsLong(res); - Py_DECREF(res); - if (fileno == -1 && PyErr_Occurred()) - goto error; - - if (fstat(fileno, &st) >= 0 && st.st_blksize > 1) - buffering = st.st_blksize; - } -#endif + PyObject *blksize_obj; + blksize_obj = _PyObject_GetAttrId(raw, &PyId__blksize); + if (blksize_obj == NULL) + goto error; + buffering = PyLong_AsLong(blksize_obj); + Py_DECREF(blksize_obj); + if (buffering == -1 && PyErr_Occurred()) + goto error; } if (buffering < 0) { PyErr_SetString(PyExc_ValueError, diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 8927864e8c..9d5205ec30 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -69,7 +69,7 @@ extern int _PyIO_trap_eintr(void); * Offset type for positioning. */ -/* Printing a variable of type off_t (with e.g., PyString_FromFormat) +/* Printing a variable of type off_t (with e.g., PyUnicode_FromFormat) correctly and without producing compiler warnings is surprisingly painful. We identify an integer type whose size matches off_t and then: (1) cast the off_t to that integer type and (2) use the appropriate conversion diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 445c8708ad..ea9f5332a4 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -24,6 +24,7 @@ _Py_IDENTIFIER(read); _Py_IDENTIFIER(read1); _Py_IDENTIFIER(readable); _Py_IDENTIFIER(readinto); +_Py_IDENTIFIER(readinto1); _Py_IDENTIFIER(writable); _Py_IDENTIFIER(write); @@ -47,17 +48,21 @@ PyDoc_STRVAR(bufferediobase_doc, ); static PyObject * -bufferediobase_readinto(PyObject *self, PyObject *args) +_bufferediobase_readinto_generic(PyObject *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t len; PyObject *data; - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) { + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) { return NULL; } - data = _PyObject_CallMethodId(self, &PyId_read, "n", buf.len); + data = _PyObject_CallMethodId(self, + readinto1 ? &PyId_read1 : &PyId_read, + "n", buf.len); if (data == NULL) goto error; @@ -89,6 +94,18 @@ bufferediobase_readinto(PyObject *self, PyObject *args) } static PyObject * +bufferediobase_readinto(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 0); +} + +static PyObject * +bufferediobase_readinto1(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 1); +} + +static PyObject * bufferediobase_unsupported(const char *message) { _PyIO_State *state = IO_STATE(); @@ -167,6 +184,7 @@ static PyMethodDef bufferediobase_methods[] = { {"read", bufferediobase_read, METH_VARARGS, bufferediobase_read_doc}, {"read1", bufferediobase_read1, METH_VARARGS, bufferediobase_read1_doc}, {"readinto", bufferediobase_readinto, METH_VARARGS, NULL}, + {"readinto1", bufferediobase_readinto1, METH_VARARGS, NULL}, {"write", bufferediobase_write, METH_VARARGS, bufferediobase_write_doc}, {NULL, NULL} }; @@ -662,11 +680,7 @@ static void _set_BlockingIOError(char *msg, Py_ssize_t written) { PyObject *err; -#ifdef Py_DEBUG - /* in debug mode, PyEval_EvalFrameEx() fails with an assertion error - if an exception is set when it is called */ PyErr_Clear(); -#endif err = PyObject_CallFunction(PyExc_BlockingIOError, "isn", errno, msg, written); if (err) @@ -977,7 +991,7 @@ buffered_read1(buffered *self, PyObject *args) } static PyObject * -buffered_readinto(buffered *self, PyObject *args) +_buffered_readinto_generic(buffered *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t n, written = 0, remaining; @@ -985,7 +999,9 @@ buffered_readinto(buffered *self, PyObject *args) CHECK_INITIALIZED(self) - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) return NULL; n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); @@ -1023,7 +1039,10 @@ buffered_readinto(buffered *self, PyObject *args) n = _bufferedreader_raw_read(self, (char *) buf.buf + written, remaining); } - else { + + /* In readinto1 mode, we do not want to fill the internal + buffer if we already have some data to return */ + else if (!(readinto1 && written)) { n = _bufferedreader_fill_buffer(self); if (n > 0) { if (n > remaining) @@ -1034,6 +1053,9 @@ buffered_readinto(buffered *self, PyObject *args) continue; /* short circuit */ } } + else + n = 0; + if (n == 0 || (n == -2 && written > 0)) break; if (n < 0) { @@ -1043,6 +1065,12 @@ buffered_readinto(buffered *self, PyObject *args) } goto end; } + + /* At most one read in readinto1 mode */ + if (readinto1) { + written += n; + break; + } } res = PyLong_FromSsize_t(written); @@ -1054,6 +1082,19 @@ end_unlocked: } static PyObject * +buffered_readinto(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 0); +} + +static PyObject * +buffered_readinto1(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 1); +} + + +static PyObject * _buffered_readline(buffered *self, Py_ssize_t limit) { PyObject *res = NULL; @@ -1738,6 +1779,7 @@ static PyMethodDef bufferedreader_methods[] = { {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, @@ -2339,6 +2381,12 @@ bufferedrwpair_readinto(rwpair *self, PyObject *args) } static PyObject * +bufferedrwpair_readinto1(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, &PyId_readinto1, args); +} + +static PyObject * bufferedrwpair_write(rwpair *self, PyObject *args) { return _forward_call(self->writer, &PyId_write, args); @@ -2409,6 +2457,7 @@ static PyMethodDef bufferedrwpair_methods[] = { {"peek", (PyCFunction)bufferedrwpair_peek, METH_VARARGS}, {"read1", (PyCFunction)bufferedrwpair_read1, METH_VARARGS}, {"readinto", (PyCFunction)bufferedrwpair_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)bufferedrwpair_readinto1, METH_VARARGS}, {"write", (PyCFunction)bufferedrwpair_write, METH_VARARGS}, {"flush", (PyCFunction)bufferedrwpair_flush, METH_NOARGS}, @@ -2557,6 +2606,7 @@ static PyMethodDef bufferedrandom_methods[] = { {"read", (PyCFunction)buffered_read, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"write", (PyCFunction)bufferedwriter_write, METH_VARARGS}, diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 1537d979a3..fc4ea74b23 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -4,10 +4,9 @@ typedef struct { PyObject_HEAD - char *buf; + PyObject *buf; Py_ssize_t pos; Py_ssize_t string_size; - size_t buf_size; PyObject *dict; PyObject *weakreflist; Py_ssize_t exports; @@ -18,6 +17,12 @@ typedef struct { bytesio *source; } bytesiobuf; +/* The bytesio object can be in three states: + * Py_REFCNT(buf) == 1, exports == 0. + * Py_REFCNT(buf) > 1. exports == 0, + first modification or export causes the internal buffer copying. + * exports > 0. Py_REFCNT(buf) == 1, any modifications are forbidden. +*/ #define CHECK_CLOSED(self) \ if ((self)->buf == NULL) { \ @@ -33,40 +38,60 @@ typedef struct { return NULL; \ } +#define SHARED_BUF(self) (Py_REFCNT((self)->buf) > 1) + /* Internal routine to get a line from the buffer of a BytesIO object. Returns the length between the current position to the next newline character. */ static Py_ssize_t -get_line(bytesio *self, char **output) +scan_eol(bytesio *self, Py_ssize_t len) { - char *n; - const char *str_end; - Py_ssize_t len; + const char *start, *n; + Py_ssize_t maxlen; assert(self->buf != NULL); /* Move to the end of the line, up to the end of the string, s. */ - str_end = self->buf + self->string_size; - for (n = self->buf + self->pos; - n < str_end && *n != '\n'; - n++); - - /* Skip the newline character */ - if (n < str_end) - n++; - - /* Get the length from the current position to the end of the line. */ - len = n - (self->buf + self->pos); - *output = self->buf + self->pos; - + start = PyBytes_AS_STRING(self->buf) + self->pos; + maxlen = self->string_size - self->pos; + if (len < 0 || len > maxlen) + len = maxlen; + + if (len) { + n = memchr(start, '\n', len); + if (n) + /* Get the length from the current position to the end of + the line. */ + len = n - start + 1; + } assert(len >= 0); assert(self->pos < PY_SSIZE_T_MAX - len); - self->pos += len; return len; } +/* Internal routine for detaching the shared buffer of BytesIO objects. + The caller should ensure that the 'size' argument is non-negative and + not lesser than self->string_size. Returns 0 on success, -1 otherwise. */ +static int +unshare_buffer(bytesio *self, size_t size) +{ + PyObject *new_buf, *old_buf; + assert(SHARED_BUF(self)); + assert(self->exports == 0); + assert(size >= (size_t)self->string_size); + new_buf = PyBytes_FromStringAndSize(NULL, size); + if (new_buf == NULL) + return -1; + memcpy(PyBytes_AS_STRING(new_buf), PyBytes_AS_STRING(self->buf), + self->string_size); + old_buf = self->buf; + self->buf = new_buf; + Py_DECREF(old_buf); + return 0; +} + /* Internal routine for changing the size of the buffer of BytesIO objects. The caller should ensure that the 'size' argument is non-negative. Returns 0 on success, -1 otherwise. */ @@ -75,8 +100,7 @@ resize_buffer(bytesio *self, size_t size) { /* Here, unsigned types are used to avoid dealing with signed integer overflow, which is undefined in C. */ - size_t alloc = self->buf_size; - char *new_buf = NULL; + size_t alloc = PyBytes_GET_SIZE(self->buf); assert(self->buf != NULL); @@ -104,13 +128,15 @@ resize_buffer(bytesio *self, size_t size) if (alloc > ((size_t)-1) / sizeof(char)) goto overflow; - new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char)); - if (new_buf == NULL) { - PyErr_NoMemory(); - return -1; + + if (SHARED_BUF(self)) { + if (unshare_buffer(self, alloc) < 0) + return -1; + } + else { + if (_PyBytes_Resize(&self->buf, alloc) < 0) + return -1; } - self->buf_size = alloc; - self->buf = new_buf; return 0; @@ -125,12 +151,18 @@ resize_buffer(bytesio *self, size_t size) static Py_ssize_t write_bytes(bytesio *self, const char *bytes, Py_ssize_t len) { + size_t endpos; assert(self->buf != NULL); assert(self->pos >= 0); assert(len >= 0); - if ((size_t)self->pos + len > self->buf_size) { - if (resize_buffer(self, (size_t)self->pos + len) < 0) + endpos = (size_t)self->pos + len; + if (endpos > (size_t)PyBytes_GET_SIZE(self->buf)) { + if (resize_buffer(self, endpos) < 0) + return -1; + } + else if (SHARED_BUF(self)) { + if (unshare_buffer(self, Py_MAX(endpos, (size_t)self->string_size)) < 0) return -1; } @@ -143,18 +175,18 @@ write_bytes(bytesio *self, const char *bytes, Py_ssize_t len) | | <--to pad-->|<---to write---> | 0 buf position */ - memset(self->buf + self->string_size, '\0', + memset(PyBytes_AS_STRING(self->buf) + self->string_size, '\0', (self->pos - self->string_size) * sizeof(char)); } /* Copy the data to the internal buffer, overwriting some of the existing data if self->pos < self->string_size. */ - memcpy(self->buf + self->pos, bytes, len); - self->pos += len; + memcpy(PyBytes_AS_STRING(self->buf) + self->pos, bytes, len); + self->pos = endpos; /* Set the new length of the internal string if it has changed. */ - if (self->string_size < self->pos) { - self->string_size = self->pos; + if ((size_t)self->string_size < endpos) { + self->string_size = endpos; } return len; @@ -231,7 +263,22 @@ static PyObject * bytesio_getvalue(bytesio *self) { CHECK_CLOSED(self); - return PyBytes_FromStringAndSize(self->buf, self->string_size); + if (self->string_size <= 1 || self->exports > 0) + return PyBytes_FromStringAndSize(PyBytes_AS_STRING(self->buf), + self->string_size); + + if (self->string_size != PyBytes_GET_SIZE(self->buf)) { + if (SHARED_BUF(self)) { + if (unshare_buffer(self, self->string_size) < 0) + return NULL; + } + else { + if (_PyBytes_Resize(&self->buf, self->string_size) < 0) + return NULL; + } + } + Py_INCREF(self->buf); + return self->buf; } PyDoc_STRVAR(isatty_doc, @@ -257,6 +304,26 @@ bytesio_tell(bytesio *self) return PyLong_FromSsize_t(self->pos); } +static PyObject * +read_bytes(bytesio *self, Py_ssize_t size) +{ + char *output; + + assert(self->buf != NULL); + assert(size <= self->string_size); + if (size > 1 && + self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) && + self->exports == 0) { + self->pos += size; + Py_INCREF(self->buf); + return self->buf; + } + + output = PyBytes_AS_STRING(self->buf) + self->pos; + self->pos += size; + return PyBytes_FromStringAndSize(output, size); +} + PyDoc_STRVAR(read_doc, "read([size]) -> read at most size bytes, returned as a string.\n" "\n" @@ -267,7 +334,6 @@ static PyObject * bytesio_read(bytesio *self, PyObject *args) { Py_ssize_t size, n; - char *output; PyObject *arg = Py_None; CHECK_CLOSED(self); @@ -298,11 +364,7 @@ bytesio_read(bytesio *self, PyObject *args) size = 0; } - assert(self->buf != NULL); - output = self->buf + self->pos; - self->pos += size; - - return PyBytes_FromStringAndSize(output, size); + return read_bytes(self, size); } @@ -336,7 +398,6 @@ static PyObject * bytesio_readline(bytesio *self, PyObject *args) { Py_ssize_t size, n; - char *output; PyObject *arg = Py_None; CHECK_CLOSED(self); @@ -359,15 +420,9 @@ bytesio_readline(bytesio *self, PyObject *args) return NULL; } - n = get_line(self, &output); - - if (size >= 0 && size < n) { - size = n - size; - n -= size; - self->pos -= size; - } + n = scan_eol(self, size); - return PyBytes_FromStringAndSize(output, n); + return read_bytes(self, n); } PyDoc_STRVAR(readlines_doc, @@ -410,7 +465,9 @@ bytesio_readlines(bytesio *self, PyObject *args) if (!result) return NULL; - while ((n = get_line(self, &output)) != 0) { + output = PyBytes_AS_STRING(self->buf) + self->pos; + while ((n = scan_eol(self, -1)) != 0) { + self->pos += n; line = PyBytes_FromStringAndSize(output, n); if (!line) goto on_error; @@ -422,6 +479,7 @@ bytesio_readlines(bytesio *self, PyObject *args) size += n; if (maxsize > 0 && size >= maxsize) break; + output += n; } return result; @@ -456,7 +514,7 @@ bytesio_readinto(bytesio *self, PyObject *arg) len = 0; } - memcpy(buffer.buf, self->buf + self->pos, len); + memcpy(buffer.buf, PyBytes_AS_STRING(self->buf) + self->pos, len); assert(self->pos + len < PY_SSIZE_T_MAX); assert(len >= 0); self->pos += len; @@ -516,17 +574,16 @@ bytesio_truncate(bytesio *self, PyObject *args) static PyObject * bytesio_iternext(bytesio *self) { - char *next; Py_ssize_t n; CHECK_CLOSED(self); - n = get_line(self, &next); + n = scan_eol(self, -1); - if (!next || n == 0) + if (n == 0) return NULL; - return PyBytes_FromStringAndSize(next, n); + return read_bytes(self, n); } PyDoc_STRVAR(seek_doc, @@ -658,10 +715,7 @@ static PyObject * bytesio_close(bytesio *self) { CHECK_EXPORTS(self); - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + Py_CLEAR(self->buf); Py_RETURN_NONE; } @@ -791,10 +845,7 @@ bytesio_dealloc(bytesio *self) "deallocated BytesIO object has exported buffers"); PyErr_Print(); } - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + Py_CLEAR(self->buf); Py_CLEAR(self->dict); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); @@ -814,7 +865,7 @@ bytesio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) /* tp_alloc initializes all the fields to zero. So we don't have to initialize them here. */ - self->buf = (char *)PyMem_Malloc(0); + self->buf = PyBytes_FromStringAndSize(NULL, 0); if (self->buf == NULL) { Py_DECREF(self); return PyErr_NoMemory(); @@ -837,13 +888,26 @@ bytesio_init(bytesio *self, PyObject *args, PyObject *kwds) self->string_size = 0; self->pos = 0; + if (self->exports > 0) { + PyErr_SetString(PyExc_BufferError, + "Existing exports of data: object cannot be re-sized"); + return -1; + } if (initvalue && initvalue != Py_None) { - PyObject *res; - res = bytesio_write(self, initvalue); - if (res == NULL) - return -1; - Py_DECREF(res); - self->pos = 0; + if (PyBytes_CheckExact(initvalue)) { + Py_INCREF(initvalue); + Py_XDECREF(self->buf); + self->buf = initvalue; + self->string_size = PyBytes_GET_SIZE(initvalue); + } + else { + PyObject *res; + res = bytesio_write(self, initvalue); + if (res == NULL) + return -1; + Py_DECREF(res); + self->pos = 0; + } } return 0; @@ -855,8 +919,8 @@ bytesio_sizeof(bytesio *self, void *unused) Py_ssize_t res; res = sizeof(bytesio); - if (self->buf) - res += self->buf_size; + if (self->buf && !SHARED_BUF(self)) + res += _PySys_GetSizeOf(self->buf); return PyLong_FromSsize_t(res); } @@ -964,18 +1028,24 @@ PyTypeObject PyBytesIO_Type = { static int bytesiobuf_getbuffer(bytesiobuf *obj, Py_buffer *view, int flags) { - int ret; bytesio *b = (bytesio *) obj->source; + if (view == NULL) { - b->exports++; - return 0; + PyErr_SetString(PyExc_BufferError, + "bytesiobuf_getbuffer: view==NULL argument is obsolete"); + return -1; } - ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size, - 0, flags); - if (ret >= 0) { - b->exports++; + if (SHARED_BUF(b)) { + if (unshare_buffer(b, b->string_size) < 0) + return -1; } - return ret; + + /* cannot fail if view != NULL and readonly == 0 */ + (void)PyBuffer_FillInfo(view, (PyObject*)obj, + PyBytes_AS_STRING(b->buf), b->string_size, + 0, flags); + b->exports++; + return 0; } static void diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 80ca99c5bc..b56a9c3f43 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -53,6 +53,7 @@ typedef struct { signed int seekable : 2; /* -1 means unknown */ unsigned int closefd : 1; char finalizing; + unsigned int blksize; PyObject *weakreflist; PyObject *dict; } fileio; @@ -168,6 +169,7 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->writable = 0; self->appending = 0; self->seekable = -1; + self->blksize = 0; self->closefd = 1; self->weakreflist = NULL; } @@ -175,44 +177,6 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *) self; } -/* On Unix, open will succeed for directories. - In Python, there should be no file objects referring to - directories, so we need a check. */ - -static int -dircheck(fileio* self, PyObject *nameobj) -{ -#if defined(HAVE_FSTAT) && defined(S_ISDIR) && defined(EISDIR) - struct stat buf; - if (self->fd < 0) - return 0; - if (fstat(self->fd, &buf) == 0 && S_ISDIR(buf.st_mode)) { - errno = EISDIR; - PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, nameobj); - return -1; - } -#endif - return 0; -} - -static int -check_fd(int fd) -{ -#if defined(HAVE_FSTAT) - struct stat buf; - if (!_PyVerify_fd(fd) || (fstat(fd, &buf) < 0 && errno == EBADF)) { - PyObject *exc; - char *msg = strerror(EBADF); - exc = PyObject_CallFunction(PyExc_OSError, "(is)", - EBADF, msg); - PyErr_SetObject(PyExc_OSError, exc); - Py_XDECREF(exc); - return -1; - } -#endif - return 0; -} - #ifdef O_CLOEXEC extern int _Py_open_cloexec_works; #endif @@ -240,6 +204,8 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) #elif !defined(MS_WINDOWS) int *atomic_flag_works = NULL; #endif + struct _Py_stat_struct fdfstat; + int async_err = 0; assert(PyFileIO_Check(oself)); if (self->fd >= 0) { @@ -278,7 +244,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) int rv = _PyUnicode_HasNULChars(nameobj); if (rv) { if (rv != -1) - PyErr_SetString(PyExc_TypeError, "embedded NUL character"); + PyErr_SetString(PyExc_ValueError, "embedded null character"); return -1; } widename = PyUnicode_AsUnicode(nameobj); @@ -367,8 +333,6 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) #endif if (fd >= 0) { - if (check_fd(fd)) - goto error; self->fd = fd; self->closefd = closefd; } @@ -382,15 +346,20 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) errno = 0; if (opener == Py_None) { - Py_BEGIN_ALLOW_THREADS + do { + Py_BEGIN_ALLOW_THREADS #ifdef MS_WINDOWS - if (widename != NULL) - self->fd = _wopen(widename, flags, 0666); - else + if (widename != NULL) + self->fd = _wopen(widename, flags, 0666); + else #endif - self->fd = open(name, flags, 0666); + self->fd = open(name, flags, 0666); + Py_END_ALLOW_THREADS + } while (self->fd < 0 && errno == EINTR && + !(async_err = PyErr_CheckSignals())); - Py_END_ALLOW_THREADS + if (async_err) + goto error; } else { PyObject *fdobj; @@ -428,8 +397,24 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds) goto error; #endif } - if (dircheck(self, nameobj) < 0) + + self->blksize = DEFAULT_BUFFER_SIZE; + if (_Py_fstat(self->fd, &fdfstat) < 0) + goto error; +#if defined(S_ISDIR) && defined(EISDIR) + /* On Unix, open will succeed for directories. + In Python, there should be no file objects referring to + directories, so we need a check. */ + if (S_ISDIR(fdfstat.st_mode)) { + errno = EISDIR; + PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, nameobj); goto error; + } +#endif /* defined(S_ISDIR) */ +#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE + if (fdfstat.st_blksize > 1) + self->blksize = fdfstat.st_blksize; +#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */ #if defined(MS_WINDOWS) || defined(__CYGWIN__) /* don't translate newlines (\r\n <=> \n) */ @@ -553,7 +538,7 @@ static PyObject * fileio_readinto(fileio *self, PyObject *args) { Py_buffer pbuf; - Py_ssize_t n, len; + Py_ssize_t n; int err; if (self->fd < 0) @@ -564,45 +549,22 @@ fileio_readinto(fileio *self, PyObject *args) if (!PyArg_ParseTuple(args, "w*", &pbuf)) return NULL; - if (_PyVerify_fd(self->fd)) { - len = pbuf.len; - Py_BEGIN_ALLOW_THREADS - errno = 0; -#ifdef MS_WINDOWS - if (len > INT_MAX) - len = INT_MAX; - n = read(self->fd, pbuf.buf, (int)len); -#else - n = read(self->fd, pbuf.buf, len); -#endif - Py_END_ALLOW_THREADS - } else - n = -1; + n = _Py_read(self->fd, pbuf.buf, pbuf.len); + /* copy errno because PyBuffer_Release() can indirectly modify it */ err = errno; PyBuffer_Release(&pbuf); - if (n < 0) { - if (err == EAGAIN) + + if (n == -1) { + if (err == EAGAIN) { + PyErr_Clear(); Py_RETURN_NONE; - errno = err; - PyErr_SetFromErrno(PyExc_IOError); + } return NULL; } return PyLong_FromSsize_t(n); } -#ifndef HAVE_FSTAT - -static PyObject * -fileio_readall(fileio *self) -{ - _Py_IDENTIFIER(readall); - return _PyObject_CallMethodId((PyObject*)&PyRawIOBase_Type, - &PyId_readall, "O", self); -} - -#else - static size_t new_buffersize(fileio *self, size_t currentsize) { @@ -625,7 +587,7 @@ new_buffersize(fileio *self, size_t currentsize) static PyObject * fileio_readall(fileio *self) { - struct stat st; + struct _Py_stat_struct status; Py_off_t pos, end; PyObject *result; Py_ssize_t bytes_read = 0; @@ -642,8 +604,8 @@ fileio_readall(fileio *self) #else pos = lseek(self->fd, 0L, SEEK_CUR); #endif - if (fstat(self->fd, &st) == 0) - end = st.st_size; + if (_Py_fstat_noraise(self->fd, &status) == 0) + end = status.st_size; else end = (Py_off_t)-1; @@ -677,35 +639,22 @@ fileio_readall(fileio *self) return NULL; } } - Py_BEGIN_ALLOW_THREADS - errno = 0; - n = bufsize - bytes_read; -#ifdef MS_WINDOWS - if (n > INT_MAX) - n = INT_MAX; - n = read(self->fd, PyBytes_AS_STRING(result) + bytes_read, (int)n); -#else - n = read(self->fd, PyBytes_AS_STRING(result) + bytes_read, n); -#endif - Py_END_ALLOW_THREADS + + n = _Py_read(self->fd, + PyBytes_AS_STRING(result) + bytes_read, + bufsize - bytes_read); + if (n == 0) break; - if (n < 0) { - if (errno == EINTR) { - if (PyErr_CheckSignals()) { - Py_DECREF(result); - return NULL; - } - continue; - } + if (n == -1) { if (errno == EAGAIN) { + PyErr_Clear(); if (bytes_read > 0) break; Py_DECREF(result); Py_RETURN_NONE; } Py_DECREF(result); - PyErr_SetFromErrno(PyExc_IOError); return NULL; } bytes_read += n; @@ -719,8 +668,6 @@ fileio_readall(fileio *self) return result; } -#endif /* HAVE_FSTAT */ - static PyObject * fileio_read(fileio *self, PyObject *args) { @@ -737,38 +684,29 @@ fileio_read(fileio *self, PyObject *args) if (!PyArg_ParseTuple(args, "|O&", &_PyIO_ConvertSsize_t, &size)) return NULL; - if (size < 0) { + if (size < 0) return fileio_readall(self); - } #ifdef MS_WINDOWS + /* On Windows, the count parameter of read() is an int */ if (size > INT_MAX) size = INT_MAX; #endif + bytes = PyBytes_FromStringAndSize(NULL, size); if (bytes == NULL) return NULL; ptr = PyBytes_AS_STRING(bytes); - if (_PyVerify_fd(self->fd)) { - Py_BEGIN_ALLOW_THREADS - errno = 0; -#ifdef MS_WINDOWS - n = read(self->fd, ptr, (int)size); -#else - n = read(self->fd, ptr, size); -#endif - Py_END_ALLOW_THREADS - } else - n = -1; - - if (n < 0) { + n = _Py_read(self->fd, ptr, size); + if (n == -1) { + /* copy errno because Py_DECREF() can indirectly modify it */ int err = errno; Py_DECREF(bytes); - if (err == EAGAIN) + if (err == EAGAIN) { + PyErr_Clear(); Py_RETURN_NONE; - errno = err; - PyErr_SetFromErrno(PyExc_IOError); + } return NULL; } @@ -786,7 +724,7 @@ static PyObject * fileio_write(fileio *self, PyObject *args) { Py_buffer pbuf; - Py_ssize_t n, len; + Py_ssize_t n; int err; if (self->fd < 0) @@ -797,36 +735,16 @@ fileio_write(fileio *self, PyObject *args) if (!PyArg_ParseTuple(args, "y*", &pbuf)) return NULL; - if (_PyVerify_fd(self->fd)) { - Py_BEGIN_ALLOW_THREADS - errno = 0; - len = pbuf.len; -#ifdef MS_WINDOWS - if (len > 32767 && isatty(self->fd)) { - /* Issue #11395: the Windows console returns an error (12: not - enough space error) on writing into stdout if stdout mode is - binary and the length is greater than 66,000 bytes (or less, - depending on heap usage). */ - len = 32767; - } - else if (len > INT_MAX) - len = INT_MAX; - n = write(self->fd, pbuf.buf, (int)len); -#else - n = write(self->fd, pbuf.buf, len); -#endif - Py_END_ALLOW_THREADS - } else - n = -1; + n = _Py_write(self->fd, pbuf.buf, pbuf.len); + /* copy errno because PyBuffer_Release() can indirectly modify it */ err = errno; - PyBuffer_Release(&pbuf); if (n < 0) { - if (err == EAGAIN) + if (err == EAGAIN) { + PyErr_Clear(); Py_RETURN_NONE; - errno = err; - PyErr_SetFromErrno(PyExc_IOError); + } return NULL; } @@ -1058,12 +976,14 @@ fileio_repr(fileio *self) PyErr_Clear(); else return NULL; - res = PyUnicode_FromFormat("<_io.FileIO fd=%d mode='%s'>", - self->fd, mode_string(self)); + res = PyUnicode_FromFormat( + "<_io.FileIO fd=%d mode='%s' closefd=%s>", + self->fd, mode_string(self), self->closefd ? "True" : "False"); } else { - res = PyUnicode_FromFormat("<_io.FileIO name=%R mode='%s'>", - nameobj, mode_string(self)); + res = PyUnicode_FromFormat( + "<_io.FileIO name=%R mode='%s' closefd=%s>", + nameobj, mode_string(self), self->closefd ? "True" : "False"); Py_DECREF(nameobj); } return res; @@ -1225,6 +1145,7 @@ static PyGetSetDef fileio_getsetlist[] = { }; static PyMemberDef fileio_members[] = { + {"_blksize", T_UINT, offsetof(fileio, blksize), 0}, {"_finalizing", T_BOOL, offsetof(fileio, finalizing), 0}, {NULL} }; diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d1c0d01232..af232bf6ed 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -224,8 +224,8 @@ typedef struct { PyObject_HEAD PyObject *decoder; PyObject *errors; - signed int pendingcr: 1; - signed int translate: 1; + unsigned int pendingcr: 1; + unsigned int translate: 1; unsigned int seennl: 3; } nldecoder_object; @@ -546,7 +546,7 @@ incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state) if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) return NULL; - self->pendingcr = (int) flag & 1; + self->pendingcr = (int) (flag & 1); flag >>= 1; if (self->decoder != Py_None) @@ -1441,6 +1441,7 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) PyObject *dec_buffer = NULL; PyObject *dec_flags = NULL; PyObject *input_chunk = NULL; + Py_buffer input_chunk_buf; PyObject *decoded_chars, *chunk_size; Py_ssize_t nbytes, nchars; int eof; @@ -1472,6 +1473,15 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) Py_DECREF(state); return -1; } + + if (!PyBytes_Check(dec_buffer)) { + PyErr_Format(PyExc_TypeError, + "decoder getstate() should have returned a bytes " + "object, not '%.200s'", + Py_TYPE(dec_buffer)->tp_name); + Py_DECREF(state); + return -1; + } Py_INCREF(dec_buffer); Py_INCREF(dec_flags); Py_DECREF(state); @@ -1484,23 +1494,24 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint)); if (chunk_size == NULL) goto fail; + input_chunk = PyObject_CallMethodObjArgs(self->buffer, (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read), chunk_size, NULL); Py_DECREF(chunk_size); if (input_chunk == NULL) goto fail; - if (!PyBytes_Check(input_chunk)) { + + if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) { PyErr_Format(PyExc_TypeError, - "underlying %s() should have returned a bytes object, " + "underlying %s() should have returned a bytes-like object, " "not '%.200s'", (self->has_read1 ? "read1": "read"), Py_TYPE(input_chunk)->tp_name); goto fail; } - nbytes = PyBytes_Size(input_chunk); + nbytes = input_chunk_buf.len; eof = (nbytes == 0); - if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) { decoded_chars = _PyIncrementalNewlineDecoder_decode( self->decoder, input_chunk, eof); @@ -1509,6 +1520,7 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) decoded_chars = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL); } + PyBuffer_Release(&input_chunk_buf); if (check_decoded(decoded_chars) < 0) goto fail; @@ -1525,18 +1537,12 @@ textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) /* At the snapshot point, len(dec_buffer) bytes before the read, the * next input to be decoded is dec_buffer + input_chunk. */ - PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk); - if (next_input == NULL) - goto fail; - if (!PyBytes_Check(next_input)) { - PyErr_Format(PyExc_TypeError, - "decoder getstate() should have returned a bytes " - "object, not '%.200s'", - Py_TYPE(next_input)->tp_name); - Py_DECREF(next_input); + PyObject *next_input = dec_buffer; + PyBytes_Concat(&next_input, input_chunk); + if (next_input == NULL) { + dec_buffer = NULL; /* Reference lost to PyBytes_Concat */ goto fail; } - Py_DECREF(dec_buffer); Py_CLEAR(self->snapshot); self->snapshot = Py_BuildValue("NN", dec_flags, next_input); } @@ -1725,7 +1731,7 @@ _PyIO_find_line_ending( else { /* Non-universal mode. */ Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); - char *nl = PyUnicode_DATA(readnl); + Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl); /* Assume that readnl is an ASCII character. */ assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); if (readnl_len == 1) { |