diff options
author | Siddhesh Poyarekar <siddhesh@redhat.com> | 2014-03-04 07:45:58 +0530 |
---|---|---|
committer | Siddhesh Poyarekar <siddhesh@redhat.com> | 2014-03-04 07:45:58 +0530 |
commit | 000232b9bcbf194f1e5fd0ff380000f341505405 (patch) | |
tree | 252e22a971caa7021ae130c080ccb161daba33cc /libio/fileops.c | |
parent | 116f4d1ad64a061488b29ef89f1a60f5e8c5963b (diff) | |
download | glibc-000232b9bcbf194f1e5fd0ff380000f341505405.tar.gz |
Separate ftell from fseek logic and avoid modifying FILE data (#16532)
ftell semantics are distinct from fseek(SEEK_CUR) especially when it
is called on a file handler that is not yet active. Due to this
caveat, much care needs to be taken while modifying the handler data
and hence, this first iteration on separating out ftell focusses on
maintaining handler data integrity at all times while it figures out
the current stream offset. The result is that it makes a syscall for
every offset request.
There is scope for optimizing this by caching offsets when we know
that the handler is active. A simple way to find out is when the
buffers have data. It is not so simple to find this out when the
buffer is empty without adding some kind of flag.
Diffstat (limited to 'libio/fileops.c')
-rw-r--r-- | libio/fileops.c | 105 |
1 files changed, 75 insertions, 30 deletions
diff --git a/libio/fileops.c b/libio/fileops.c index a3499be460..500629564a 100644 --- a/libio/fileops.c +++ b/libio/fileops.c @@ -929,6 +929,71 @@ _IO_file_sync_mmap (_IO_FILE *fp) return 0; } +/* Get the current file offset using a system call. This is the safest method + to get the current file offset, since we are sure that we get the current + state of the file. Before the stream handle is activated (by using fread, + fwrite, etc.), an application may alter the state of the file descriptor + underlying it by calling read/write/lseek on it. Using a cached offset at + this point will result in returning the incorrect value. Same is the case + when one switches from reading in a+ mode to writing, where the buffer has + not been flushed - the cached offset would reflect the reading position + while the actual write position would be at the end of the file. + + do_ftell and do_ftell_wide may resort to using the cached offset in some + special cases instead of calling get_file_offset, but those cases should be + thoroughly described. */ +_IO_off64_t +get_file_offset (_IO_FILE *fp) +{ + if ((fp->_flags & _IO_IS_APPENDING) == _IO_IS_APPENDING) + { + struct stat64 st; + bool ret = (_IO_SYSSTAT (fp, &st) == 0 && S_ISREG (st.st_mode)); + if (ret) + return st.st_size; + else + return EOF; + } + else + return _IO_SYSSEEK (fp, 0, _IO_seek_cur); +} + + +/* ftell{,o} implementation. Don't modify any state of the file pointer while + we try to get the current state of the stream. */ +static _IO_off64_t +do_ftell (_IO_FILE *fp) +{ + _IO_off64_t result; + + result = get_file_offset (fp); + + if (result == EOF) + return result; + + /* No point looking at unflushed data if we haven't allocated buffers + yet. */ + if (fp->_IO_buf_base != NULL) + { + bool was_writing = (fp->_IO_write_ptr > fp->_IO_write_base + || _IO_in_put_mode (fp)); + + /* Adjust for unflushed data. */ + if (!was_writing) + result -= fp->_IO_read_end - fp->_IO_read_ptr; + else + result += fp->_IO_write_ptr - fp->_IO_read_end; + } + + if (result < 0) + { + __set_errno (EINVAL); + return EOF; + } + + return result; +} + _IO_off64_t _IO_new_file_seekoff (fp, offset, dir, mode) @@ -940,6 +1005,13 @@ _IO_new_file_seekoff (fp, offset, dir, mode) _IO_off64_t result; _IO_off64_t delta, new_offset; long count; + + /* Short-circuit into a separate function. We don't want to mix any + functionality and we don't want to touch anything inside the FILE + object. */ + if (mode == 0) + return do_ftell (fp); + /* POSIX.1 8.2.3.7 says that after a call the fflush() the file offset of the underlying file must be exact. */ int must_be_exact = (fp->_IO_read_base == fp->_IO_read_end @@ -948,9 +1020,6 @@ _IO_new_file_seekoff (fp, offset, dir, mode) bool was_writing = (fp->_IO_write_ptr > fp->_IO_write_base || _IO_in_put_mode (fp)); - if (mode == 0) - dir = _IO_seek_cur, offset = 0; /* Don't move any pointers. */ - /* Flush unwritten characters. (This may do an unneeded write if we seek within the buffer. But to be able to switch to reading, we would need to set @@ -958,7 +1027,7 @@ _IO_new_file_seekoff (fp, offset, dir, mode) which assumes file_ptr() is eGptr. Anyway, since we probably end up flushing when we close(), it doesn't make much difference.) FIXME: simulate mem-mapped files. */ - else if (was_writing && _IO_switch_to_get_mode (fp)) + if (was_writing && _IO_switch_to_get_mode (fp)) return EOF; if (fp->_IO_buf_base == NULL) @@ -978,30 +1047,10 @@ _IO_new_file_seekoff (fp, offset, dir, mode) { case _IO_seek_cur: /* Adjust for read-ahead (bytes is buffer). */ - if (mode != 0 || !was_writing) - offset -= fp->_IO_read_end - fp->_IO_read_ptr; - else - { - /* _IO_read_end coincides with fp._offset, so the actual file position - is fp._offset - (_IO_read_end - new_write_ptr). This is fine - even if fp._offset is not set, since fp->_IO_read_end is then at - _IO_buf_base and this adjustment is for unbuffered output. */ - offset -= fp->_IO_read_end - fp->_IO_write_ptr; - } + offset -= fp->_IO_read_end - fp->_IO_read_ptr; if (fp->_offset == _IO_pos_BAD) - { - if (mode != 0) - goto dumb; - else - { - result = _IO_SYSSEEK (fp, 0, dir); - if (result == EOF) - return result; - - fp->_offset = result; - } - } + goto dumb; /* Make offset absolute, assuming current pointer is file_ptr(). */ offset += fp->_offset; if (offset < 0) @@ -1028,10 +1077,6 @@ _IO_new_file_seekoff (fp, offset, dir, mode) } /* At this point, dir==_IO_seek_set. */ - /* If we are only interested in the current position we've found it now. */ - if (mode == 0) - return offset; - /* If destination is within current buffer, optimize: */ if (fp->_offset != _IO_pos_BAD && fp->_IO_read_base != NULL && !_IO_in_backup (fp)) |