diff options
Diffstat (limited to 'src/wc.c')
-rw-r--r-- | src/wc.c | 801 |
1 files changed, 465 insertions, 336 deletions
@@ -1,10 +1,10 @@ /* wc - print the number of lines, words, and bytes in files - Copyright (C) 85, 91, 1995-2006 Free Software Foundation, Inc. + Copyright (C) 1985-2016 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -12,42 +12,46 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* Written by Paul Rubin, phr@ocf.berkeley.edu and David MacKenzie, djm@gnu.ai.mit.edu. */ - + #include <config.h> #include <stdio.h> +#include <assert.h> #include <getopt.h> #include <sys/types.h> +#include <wchar.h> +#include <wctype.h> #include "system.h" +#include "argv-iter.h" #include "error.h" -#include "inttostr.h" -#include "quote.h" +#include "fadvise.h" +#include "mbchar.h" +#include "physmem.h" #include "readtokens0.h" #include "safe-read.h" -#include "wcwidth.h" +#include "stat-size.h" +#include "xfreopen.h" #if !defined iswspace && !HAVE_ISWSPACE # define iswspace(wc) \ ((wc) == to_uchar (wc) && isspace (to_uchar (wc))) #endif -/* The official name of this program (e.g., no `g' prefix). */ +/* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "wc" -#define AUTHORS "Paul Rubin", "David MacKenzie" +#define AUTHORS \ + proper_name ("Paul Rubin"), \ + proper_name ("David MacKenzie") /* Size of atomic reads. */ #define BUFFER_SIZE (16 * 1024) -/* The name this program was run with. */ -char *program_name; - /* Cumulative number of lines, words, chars and bytes in all files so far. max_line_length is the maximum over all files processed so far. */ static uintmax_t total_lines; @@ -101,32 +105,40 @@ void usage (int status) { if (status != EXIT_SUCCESS) - fprintf (stderr, _("Try `%s --help' for more information.\n"), - program_name); + emit_try_help (); else { printf (_("\ Usage: %s [OPTION]... [FILE]...\n\ or: %s [OPTION]... --files0-from=F\n\ "), - program_name, program_name); + program_name, program_name); fputs (_("\ Print newline, word, and byte counts for each FILE, and a total line if\n\ -more than one FILE is specified. With no FILE, or when FILE is -,\n\ -read standard input.\n\ +more than one FILE is specified. A word is a non-zero-length sequence of\n\ +characters delimited by white space.\n\ +"), stdout); + + emit_stdin_note (); + + fputs (_("\ +\n\ +The options below may be used to select which counts are printed, always in\n\ +the following order: newline, word, character, byte, maximum line length.\n\ -c, --bytes print the byte counts\n\ -m, --chars print the character counts\n\ -l, --lines print the newline counts\n\ "), stdout); fputs (_("\ --files0-from=F read input from the files specified by\n\ - NUL-terminated names in file F\n\ - -L, --max-line-length print the length of the longest line\n\ + NUL-terminated names in file F;\n\ + If F is - then read names from standard input\n\ + -L, --max-line-length print the maximum display width\n\ -w, --words print the word counts\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); - printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); + emit_ancillary_info (PROGRAM_NAME); } exit (status); } @@ -135,11 +147,11 @@ read standard input.\n\ associated with the specified counters. */ static void write_counts (uintmax_t lines, - uintmax_t words, - uintmax_t chars, - uintmax_t bytes, - uintmax_t linelength, - const char *file) + uintmax_t words, + uintmax_t chars, + uintmax_t bytes, + uintmax_t linelength, + const char *file) { static char const format_sp_int[] = " %*s"; char const *format_int = format_sp_int + 1; @@ -170,15 +182,16 @@ write_counts (uintmax_t lines, printf (format_int, number_width, umaxtostr (linelength, buf)); } if (file) - printf (" %s", file); + printf (" %s", strchr (file, '\n') ? quotef (file) : file); putchar ('\n'); } /* Count words. FILE_X is the name of the file (or NULL for standard input) that is open on descriptor FD. *FSTATUS is its status. + CURRENT_POS is the current file offset if known, negative if unknown. Return true if successful. */ static bool -wc (int fd, char const *file_x, struct fstatus *fstatus) +wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) { bool ok = true; char buf[BUFFER_SIZE + 1]; @@ -191,7 +204,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) /* If in the current locale, chars are equivalent to bytes, we prefer counting bytes, because that's easier. */ -#if HAVE_MBRTOWC && (MB_LEN_MAX > 1) +#if MB_LEN_MAX > 1 if (MB_CUR_MAX > 1) { count_bytes = print_bytes; @@ -200,207 +213,242 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) else #endif { - count_bytes = print_bytes | print_chars; + count_bytes = print_bytes || print_chars; count_chars = false; } - count_complicated = print_words | print_linelength; + count_complicated = print_words || print_linelength; + + /* Advise the kernel of our access pattern only if we will read(). */ + if (!count_bytes || count_chars || print_lines || count_complicated) + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); /* When counting only bytes, save some line- and word-counting - overhead. If FD is a `regular' Unix file, using lseek is enough - to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE - bytes at a time until EOF. Note that the `size' (number of bytes) + overhead. If FD is a 'regular' Unix file, using lseek is enough + to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE + bytes at a time until EOF. Note that the 'size' (number of bytes) that wc reports is smaller than stats.st_size when the file is not positioned at its beginning. That's why the lseek calls below are necessary. For example the command - `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group' - should make wc report `0' bytes. */ + '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group' + should make wc report '0' bytes. */ - if (count_bytes & !count_chars & !print_lines & !count_complicated) + if (count_bytes && !count_chars && !print_lines && !count_complicated) { - off_t current_pos, end_pos; - if (0 < fstatus->failed) - fstatus->failed = fstat (fd, &fstatus->st); - - if (! fstatus->failed && S_ISREG (fstatus->st.st_mode) - && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1 - && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1) - { - /* Be careful here. The current position may actually be - beyond the end of the file. As in the example above. */ - bytes = end_pos < current_pos ? 0 : end_pos - current_pos; - } - else - { - while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) - { - if (bytes_read == SAFE_READ_ERROR) - { - error (0, errno, "%s", file); - ok = false; - break; - } - bytes += bytes_read; - } - } + fstatus->failed = fstat (fd, &fstatus->st); + + /* For sized files, seek to one st_blksize before EOF rather than to EOF. + This works better for files in proc-like file systems where + the size is only approximate. */ + if (! fstatus->failed && usable_st_size (&fstatus->st) + && 0 <= fstatus->st.st_size) + { + size_t end_pos = fstatus->st.st_size; + off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1); + if (current_pos < 0) + current_pos = lseek (fd, 0, SEEK_CUR); + if (0 <= current_pos && current_pos < hi_pos + && 0 <= lseek (fd, hi_pos, SEEK_CUR)) + bytes = hi_pos - current_pos; + } + + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + { + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + bytes += bytes_read; + } } - else if (!count_chars & !count_complicated) + else if (!count_chars && !count_complicated) { /* Use a separate loop when counting only lines or lines and bytes -- - but not chars or words. */ + but not chars or words. */ + bool long_lines = false; while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) - { - char *p = buf; - - if (bytes_read == SAFE_READ_ERROR) - { - error (0, errno, "%s", file); - ok = false; - break; - } - - while ((p = memchr (p, '\n', (buf + bytes_read) - p))) - { - ++p; - ++lines; - } - bytes += bytes_read; - } + { + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + + bytes += bytes_read; + + char *p = buf; + char *end = p + bytes_read; + uintmax_t plines = lines; + + if (! long_lines) + { + /* Avoid function call overhead for shorter lines. */ + while (p != end) + lines += *p++ == '\n'; + } + else + { + /* memchr is more efficient with longer lines. */ + while ((p = memchr (p, '\n', end - p))) + { + ++p; + ++lines; + } + } + + /* If the average line length in the block is >= 15, then use + memchr for the next block, where system specific optimizations + may outweigh function call overhead. + FIXME: This line length was determined in 2015, on both + x86_64 and ppc64, but it's worth re-evaluating in future with + newer compilers, CPUs, or memchr() implementations etc. */ + if (lines - plines <= bytes_read / 15) + long_lines = true; + else + long_lines = false; + } } -#if HAVE_MBRTOWC && (MB_LEN_MAX > 1) +#if MB_LEN_MAX > 1 # define SUPPORT_OLD_MBRTOWC 1 else if (MB_CUR_MAX > 1) { bool in_word = false; uintmax_t linepos = 0; mbstate_t state = { 0, }; - uintmax_t last_error_line = 0; - int last_error_errno = 0; + bool in_shift = false; # if SUPPORT_OLD_MBRTOWC /* Back-up the state before each multibyte character conversion and - move the last incomplete character of the buffer to the front - of the buffer. This is needed because we don't know whether - the `mbrtowc' function updates the state when it returns -2, - - this is the ISO C 99 and glibc-2.2 behaviour - or not - amended - ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an - autoconf test for this, yet. */ + move the last incomplete character of the buffer to the front + of the buffer. This is needed because we don't know whether + the 'mbrtowc' function updates the state when it returns -2, -- + this is the ISO C 99 and glibc-2.2 behaviour - or not - amended + ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an + autoconf test for this, yet. */ size_t prev = 0; /* number of bytes carried over from previous round */ # else const size_t prev = 0; # endif while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0) - { - const char *p; + { + const char *p; # if SUPPORT_OLD_MBRTOWC - mbstate_t backup_state; + mbstate_t backup_state; # endif - if (bytes_read == SAFE_READ_ERROR) - { - error (0, errno, "%s", file); - ok = false; - break; - } - - bytes += bytes_read; - p = buf; - bytes_read += prev; - do - { - wchar_t wide_char; - size_t n; - + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + + bytes += bytes_read; + p = buf; + bytes_read += prev; + do + { + wchar_t wide_char; + size_t n; + + if (!in_shift && is_basic (*p)) + { + /* Handle most ASCII characters quickly, without calling + mbrtowc(). */ + n = 1; + wide_char = *p; + } + else + { + in_shift = true; # if SUPPORT_OLD_MBRTOWC - backup_state = state; + backup_state = state; # endif - n = mbrtowc (&wide_char, p, bytes_read, &state); - if (n == (size_t) -2) - { + n = mbrtowc (&wide_char, p, bytes_read, &state); + if (n == (size_t) -2) + { # if SUPPORT_OLD_MBRTOWC - state = backup_state; + state = backup_state; # endif - break; - } - if (n == (size_t) -1) - { - /* Signal repeated errors only once per line. */ - if (!(lines + 1 == last_error_line - && errno == last_error_errno)) - { - char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)]; - last_error_line = lines + 1; - last_error_errno = errno; - error (0, errno, "%s:%s", file, - umaxtostr (last_error_line, line_number_buf)); - ok = false; - } - p++; - bytes_read--; - } - else - { - if (n == 0) - { - wide_char = 0; - n = 1; - } - p += n; - bytes_read -= n; - chars++; - switch (wide_char) - { - case '\n': - lines++; - /* Fall through. */ - case '\r': - case '\f': - if (linepos > linelength) - linelength = linepos; - linepos = 0; - goto mb_word_separator; - case '\t': - linepos += 8 - (linepos % 8); - goto mb_word_separator; - case ' ': - linepos++; - /* Fall through. */ - case '\v': - mb_word_separator: - words += in_word; - in_word = false; - break; - default: - if (iswprint (wide_char)) - { - int width = wcwidth (wide_char); - if (width > 0) - linepos += width; - if (iswspace (wide_char)) - goto mb_word_separator; - in_word = true; - } - break; - } - } - } - while (bytes_read > 0); + break; + } + if (n == (size_t) -1) + { + /* Remember that we read a byte, but don't complain + about the error. Because of the decoding error, + this is a considered to be byte but not a + character (that is, chars is not incremented). */ + p++; + bytes_read--; + continue; + } + if (mbsinit (&state)) + in_shift = false; + if (n == 0) + { + wide_char = 0; + n = 1; + } + } + p += n; + bytes_read -= n; + chars++; + switch (wide_char) + { + case '\n': + lines++; + /* Fall through. */ + case '\r': + case '\f': + if (linepos > linelength) + linelength = linepos; + linepos = 0; + goto mb_word_separator; + case '\t': + linepos += 8 - (linepos % 8); + goto mb_word_separator; + case ' ': + linepos++; + /* Fall through. */ + case '\v': + mb_word_separator: + words += in_word; + in_word = false; + break; + default: + if (iswprint (wide_char)) + { + int width = wcwidth (wide_char); + if (width > 0) + linepos += width; + if (iswspace (wide_char)) + goto mb_word_separator; + in_word = true; + } + break; + } + } + while (bytes_read > 0); # if SUPPORT_OLD_MBRTOWC - if (bytes_read > 0) - { - if (bytes_read == BUFFER_SIZE) - { - /* Encountered a very long redundant shift sequence. */ - p++; - bytes_read--; - } - memmove (buf, p, bytes_read); - } - prev = bytes_read; + if (bytes_read > 0) + { + if (bytes_read == BUFFER_SIZE) + { + /* Encountered a very long redundant shift sequence. */ + p++; + bytes_read--; + } + memmove (buf, p, bytes_read); + } + prev = bytes_read; # endif - } + } if (linepos > linelength) - linelength = linepos; + linelength = linepos; words += in_word; } #endif @@ -410,55 +458,55 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) uintmax_t linepos = 0; while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) - { - const char *p = buf; - if (bytes_read == SAFE_READ_ERROR) - { - error (0, errno, "%s", file); - ok = false; - break; - } - - bytes += bytes_read; - do - { - switch (*p++) - { - case '\n': - lines++; - /* Fall through. */ - case '\r': - case '\f': - if (linepos > linelength) - linelength = linepos; - linepos = 0; - goto word_separator; - case '\t': - linepos += 8 - (linepos % 8); - goto word_separator; - case ' ': - linepos++; - /* Fall through. */ - case '\v': - word_separator: - words += in_word; - in_word = false; - break; - default: - if (isprint (to_uchar (p[-1]))) - { - linepos++; - if (isspace (to_uchar (p[-1]))) - goto word_separator; - in_word = true; - } - break; - } - } - while (--bytes_read); - } + { + const char *p = buf; + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + + bytes += bytes_read; + do + { + switch (*p++) + { + case '\n': + lines++; + /* Fall through. */ + case '\r': + case '\f': + if (linepos > linelength) + linelength = linepos; + linepos = 0; + goto word_separator; + case '\t': + linepos += 8 - (linepos % 8); + goto word_separator; + case ' ': + linepos++; + /* Fall through. */ + case '\v': + word_separator: + words += in_word; + in_word = false; + break; + default: + if (isprint (to_uchar (p[-1]))) + { + linepos++; + if (isspace (to_uchar (p[-1]))) + goto word_separator; + in_word = true; + } + break; + } + } + while (--bytes_read); + } if (linepos > linelength) - linelength = linepos; + linelength = linepos; words += in_word; } @@ -483,53 +531,55 @@ wc_file (char const *file, struct fstatus *fstatus) { have_read_stdin = true; if (O_BINARY && ! isatty (STDIN_FILENO)) - freopen (NULL, "rb", stdin); - return wc (STDIN_FILENO, file, fstatus); + xfreopen (NULL, "rb", stdin); + return wc (STDIN_FILENO, file, fstatus, -1); } else { int fd = open (file, O_RDONLY | O_BINARY); if (fd == -1) - { - error (0, errno, "%s", file); - return false; - } + { + error (0, errno, "%s", quotef (file)); + return false; + } else - { - bool ok = wc (fd, file, fstatus); - if (close (fd) != 0) - { - error (0, errno, "%s", file); - return false; - } - return ok; - } + { + bool ok = wc (fd, file, fstatus, 0); + if (close (fd) != 0) + { + error (0, errno, "%s", quotef (file)); + return false; + } + return ok; + } } } /* Return the file status for the NFILES files addressed by FILE. Optimize the case where only one number is printed, for just one file; in that case we can use a print width of 1, so we don't need - to stat the file. */ + to stat the file. Handle the case of (nfiles == 0) in the same way; + that happens when we don't know how long the list of file names will be. */ static struct fstatus * -get_input_fstatus (int nfiles, char * const *file) +get_input_fstatus (size_t nfiles, char *const *file) { - struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus); + struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus); - if (nfiles == 1 - && ((print_lines + print_words + print_chars - + print_bytes + print_linelength) - == 1)) + if (nfiles == 0 + || (nfiles == 1 + && ((print_lines + print_words + print_chars + + print_bytes + print_linelength) + == 1))) fstatus[0].failed = 1; else { - int i; + size_t i; for (i = 0; i < nfiles; i++) - fstatus[i].failed = (! file[i] || STREQ (file[i], "-") - ? fstat (STDIN_FILENO, &fstatus[i].st) - : stat (file[i], &fstatus[i].st)); + fstatus[i].failed = (! file[i] || STREQ (file[i], "-") + ? fstat (STDIN_FILENO, &fstatus[i].st) + : stat (file[i], &fstatus[i].st)); } return fstatus; @@ -539,8 +589,8 @@ get_input_fstatus (int nfiles, char * const *file) recorded in FSTATUS. Optimize the same special case that get_input_fstatus optimizes. */ -static int -compute_number_width (int nfiles, struct fstatus const *fstatus) +static int _GL_ATTRIBUTE_PURE +compute_number_width (size_t nfiles, struct fstatus const *fstatus) { int width = 1; @@ -548,21 +598,21 @@ compute_number_width (int nfiles, struct fstatus const *fstatus) { int minimum_width = 1; uintmax_t regular_total = 0; - int i; + size_t i; for (i = 0; i < nfiles; i++) - if (! fstatus[i].failed) - { - if (S_ISREG (fstatus[i].st.st_mode)) - regular_total += fstatus[i].st.st_size; - else - minimum_width = 7; - } + if (! fstatus[i].failed) + { + if (S_ISREG (fstatus[i].st.st_mode)) + regular_total += fstatus[i].st.st_size; + else + minimum_width = 7; + } for (; 10 <= regular_total; regular_total /= 10) - width++; + width++; if (width < minimum_width) - width = minimum_width; + width = minimum_width; } return width; @@ -572,23 +622,26 @@ compute_number_width (int nfiles, struct fstatus const *fstatus) int main (int argc, char **argv) { - int i; bool ok; int optc; - int nfiles; + size_t nfiles; char **files; char *files_from = NULL; struct fstatus *fstatus; struct Tokens tok; initialize_main (&argc, &argv); - program_name = argv[0]; + set_program_name (argv[0]); setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); atexit (close_stdout); + /* Line buffer stdout to ensure lines are written atomically and immediately + so that processes running in parallel do not intersperse their output. */ + setvbuf (stdout, NULL, _IOLBF, 0); + print_lines = print_words = print_chars = print_bytes = false; print_linelength = false; total_lines = total_words = total_chars = total_bytes = max_line_length = 0; @@ -597,108 +650,184 @@ main (int argc, char **argv) switch (optc) { case 'c': - print_bytes = true; - break; + print_bytes = true; + break; case 'm': - print_chars = true; - break; + print_chars = true; + break; case 'l': - print_lines = true; - break; + print_lines = true; + break; case 'w': - print_words = true; - break; + print_words = true; + break; case 'L': - print_linelength = true; - break; + print_linelength = true; + break; case FILES0_FROM_OPTION: - files_from = optarg; - break; + files_from = optarg; + break; case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: - usage (EXIT_FAILURE); + usage (EXIT_FAILURE); } - if (! (print_lines | print_words | print_chars | print_bytes - | print_linelength)) + if (! (print_lines || print_words || print_chars || print_bytes + || print_linelength)) print_lines = print_words = print_bytes = true; + bool read_tokens = false; + struct argv_iterator *ai; if (files_from) { FILE *stream; /* When using --files0-from=F, you may not specify any files - on the command-line. */ + on the command-line. */ if (optind < argc) - { - error (0, 0, _("extra operand %s"), quote (argv[optind])); - fprintf (stderr, "%s\n", - _("File operands cannot be combined with --files0-from.")); - usage (EXIT_FAILURE); - } + { + error (0, 0, _("extra operand %s"), quoteaf (argv[optind])); + fprintf (stderr, "%s\n", + _("file operands cannot be combined with --files0-from")); + usage (EXIT_FAILURE); + } if (STREQ (files_from, "-")) - stream = stdin; + stream = stdin; else - { - stream = fopen (files_from, "r"); - if (stream == NULL) - error (EXIT_FAILURE, errno, _("cannot open %s for reading"), - quote (files_from)); - } - - readtokens0_init (&tok); - - if (! readtokens0 (stream, &tok) || fclose (stream) != 0) - error (EXIT_FAILURE, 0, _("cannot read file names from %s"), - quote (files_from)); - - files = tok.tok; - nfiles = tok.n_tok; + { + stream = fopen (files_from, "r"); + if (stream == NULL) + error (EXIT_FAILURE, errno, _("cannot open %s for reading"), + quoteaf (files_from)); + } + + /* Read the file list into RAM if we can detect its size and that + size is reasonable. Otherwise, we'll read a name at a time. */ + struct stat st; + if (fstat (fileno (stream), &st) == 0 + && S_ISREG (st.st_mode) + && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2)) + { + read_tokens = true; + readtokens0_init (&tok); + if (! readtokens0 (stream, &tok) || fclose (stream) != 0) + error (EXIT_FAILURE, 0, _("cannot read file names from %s"), + quoteaf (files_from)); + files = tok.tok; + nfiles = tok.n_tok; + ai = argv_iter_init_argv (files); + } + else + { + files = NULL; + nfiles = 0; + ai = argv_iter_init_stream (stream); + } } else { - static char *stdin_only[2]; + static char *stdin_only[] = { NULL }; files = (optind < argc ? argv + optind : stdin_only); nfiles = (optind < argc ? argc - optind : 1); - stdin_only[0] = NULL; + ai = argv_iter_init_argv (files); } + if (!ai) + xalloc_die (); + fstatus = get_input_fstatus (nfiles, files); number_width = compute_number_width (nfiles, fstatus); + int i; ok = true; - for (i = 0; i < nfiles; i++) + for (i = 0; /* */; i++) { - if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-")) - { - ok = false; - error (0, 0, - _("when reading file names from stdin, " - "no file name of %s allowed"), - quote ("-")); - continue; - } - ok &= wc_file (files[i], &fstatus[i]); + bool skip_file = false; + enum argv_iter_err ai_err; + char *file_name = argv_iter (ai, &ai_err); + if (!file_name) + { + switch (ai_err) + { + case AI_ERR_EOF: + goto argv_iter_done; + case AI_ERR_READ: + error (0, errno, _("%s: read error"), + quotef (files_from)); + ok = false; + goto argv_iter_done; + case AI_ERR_MEM: + xalloc_die (); + default: + assert (!"unexpected error code from argv_iter"); + } + } + if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-")) + { + /* Give a better diagnostic in an unusual case: + printf - | wc --files0-from=- */ + error (0, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quoteaf (file_name)); + skip_file = true; + } + + if (!file_name[0]) + { + /* Diagnose a zero-length file name. When it's one + among many, knowing the record number may help. + FIXME: currently print the record number only with + --files0-from=FILE. Maybe do it for argv, too? */ + if (files_from == NULL) + error (0, 0, "%s", _("invalid zero-length file name")); + else + { + /* Using the standard 'filename:line-number:' prefix here is + not totally appropriate, since NUL is the separator, not NL, + but it might be better than nothing. */ + unsigned long int file_number = argv_iter_n_args (ai); + error (0, 0, "%s:%lu: %s", quotef (files_from), + file_number, _("invalid zero-length file name")); + } + skip_file = true; + } + + if (skip_file) + ok = false; + else + ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]); } + argv_iter_done: + + /* No arguments on the command line is fine. That means read from stdin. + However, no arguments on the --files0-from input stream is an error + means don't read anything. */ + if (ok && !files_from && argv_iter_n_args (ai) == 0) + ok &= wc_file (NULL, &fstatus[0]); - if (1 < nfiles) + if (read_tokens) + readtokens0_free (&tok); + + if (1 < argv_iter_n_args (ai)) write_counts (total_lines, total_words, total_chars, total_bytes, - max_line_length, _("total")); + max_line_length, _("total")); + + argv_iter_free (ai); free (fstatus); if (have_read_stdin && close (STDIN_FILENO) != 0) error (EXIT_FAILURE, errno, "-"); - exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); + return ok ? EXIT_SUCCESS : EXIT_FAILURE; } |