diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2023-03-04 11:42:16 -0800 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2023-03-04 14:49:45 -0800 |
commit | aa266f1b3dc4e12acdc46cc0f562adc03c2c0b8f (patch) | |
tree | 3c34b524283b144e8e437eccbf5b11690279eb3d | |
parent | fe64f8be015050500b2be4678a8ce954fde576db (diff) | |
download | coreutils-aa266f1b3dc4e12acdc46cc0f562adc03c2c0b8f.tar.gz |
split: port ‘split -n N /dev/null’ better to macOS
* src/split.c (input_file_size): Do not bother with lseek if the
initial read probe reaches EOF, since the file size is known then.
This works better on macOS, which doesn’t allow lseek on /dev/null.
Do not special-case size-zero files, as the issue can occur
with any size file (though /proc files are the most common).
If the current position is past end of file, treat this as
size zero regardless of whether the file has a usable st_size.
Pass through lseek -1 return values rather than using ‘return -1’;
this makes the code a bit easier to analyze (and a bit faster).
Avoid undefined behavior if the size calculation overflows.
(lines_chunk_split): Do not bother with lseek if it would have
no effect if successful. This works better on macOS, which
doesn’t allow lseek on /dev/null.
* tests/split/l-chunk.sh: Adjust to match fixed behavior.
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | src/split.c | 66 | ||||
-rwxr-xr-x | tests/split/l-chunk.sh | 7 |
3 files changed, 40 insertions, 37 deletions
@@ -54,6 +54,10 @@ GNU coreutils NEWS -*- outline -*- long been documented to be platform-dependent. [bug introduced 1999-05-02 and only partly fixed in coreutils-8.14] + split with -l or -n no longer misbehaves on small piped input, on + small GNU/Linux /proc files, or on macOS /dev/null. + [bug introduced in coreutils-8.8] + stty ispeed and ospeed options no longer accept and silently ignore invalid speed arguments, or give false warnings for valid speeds. Now they're validated against both the general accepted set, diff --git a/src/split.c b/src/split.c index c66bc69a2..424ca9fe0 100644 --- a/src/split.c +++ b/src/split.c @@ -283,14 +283,6 @@ CHUNKS may be:\n\ static off_t input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize) { - off_t cur = lseek (fd, 0, SEEK_CUR); - if (cur < 0) - { - if (errno == ESPIPE) - errno = 0; /* Suppress confusing seek error. */ - return -1; - } - off_t size = 0; do { @@ -303,45 +295,49 @@ input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize) } while (size < bufsize); - /* Note we check st_size _after_ the read() above - because /proc files on GNU/Linux are seekable - but have st_size == 0. */ - if (st->st_size == 0) + off_t cur = lseek (fd, 0, SEEK_CUR); + if (cur < 0) { - /* We've filled the buffer, from a seekable file, - which has an st_size==0, E.g., /dev/zero on GNU/Linux. - Assume there is no limit to file size. */ - errno = EOVERFLOW; - return -1; + if (errno == ESPIPE) + errno = 0; /* Suppress confusing seek error. */ + return cur; } - cur += size; off_t end; - if (usable_st_size (st) && cur <= st->st_size) + if (usable_st_size (st)) end = st->st_size; else { end = lseek (fd, 0, SEEK_END); if (end < 0) - return -1; - if (end != cur) + return end; + if (end == OFF_T_MAX) + goto overflow; /* E.g., /dev/zero on GNU/Hurd. */ + if (cur < end) { - if (lseek (fd, cur, SEEK_SET) < 0) - return -1; - if (end < cur) - end = cur; + off_t cur1 = lseek (fd, cur, SEEK_SET); + if (cur1 < 0) + return cur1; } } - size += end - cur; - if (size == OFF_T_MAX) - { - /* E.g., /dev/zero on GNU/Hurd. */ - errno = EOVERFLOW; - return -1; - } + /* Report overflow if we filled the buffer from a file with more + bytes than stat or lseek reports. This can happen with mutating + (e.g., /proc) files that are larger than the input block size. + FIXME: Handle this properly, e.g., by copying the growing file's + data into the first output file, and then splitting that output + file (which should not grow) into the other output files. */ + if (end < size) + goto overflow; + + if (cur < end && INT_ADD_WRAPV (size, end - cur, &size)) + goto overflow; return size; + + overflow: + errno = EOVERFLOW; + return -1; } /* Compute the next sequential output file name and store it into the @@ -886,7 +882,8 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, } else { - if (lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) + if (initial_read < start + && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) die (EXIT_FAILURE, errno, "%s", quotef (infile)); initial_read = SIZE_MAX; } @@ -1005,7 +1002,8 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, } else { - if (lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) + if (initial_read < start + && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) die (EXIT_FAILURE, errno, "%s", quotef (infile)); initial_read = SIZE_MAX; } diff --git a/tests/split/l-chunk.sh b/tests/split/l-chunk.sh index cdb201746..c94380e87 100755 --- a/tests/split/l-chunk.sh +++ b/tests/split/l-chunk.sh @@ -24,9 +24,10 @@ echo "split: invalid number of chunks: '1o'" > exp returns_ 1 split -n l/1o 2>err || fail=1 compare exp err || fail=1 -echo "split: -: cannot determine file size" > exp -: | returns_ 1 split -n l/1 2>err || fail=1 -compare exp err || fail=1 +rm -f x* || fail=1 +: | split -n l/1 || fail=1 +compare /dev/null xaa || fail=1 +test ! -f xab || fail=1 # N can be greater than the file size # in which case no data is extracted, or empty files are written |