summaryrefslogtreecommitdiff
path: root/src/wc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/wc.c')
-rw-r--r--src/wc.c801
1 files changed, 465 insertions, 336 deletions
diff --git a/src/wc.c b/src/wc.c
index 332f32d..94cbaff 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -1,10 +1,10 @@
/* wc - print the number of lines, words, and bytes in files
- Copyright (C) 85, 91, 1995-2006 Free Software Foundation, Inc.
+ Copyright (C) 1985-2016 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -12,42 +12,46 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation,
- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Written by Paul Rubin, phr@ocf.berkeley.edu
and David MacKenzie, djm@gnu.ai.mit.edu. */
-
+
#include <config.h>
#include <stdio.h>
+#include <assert.h>
#include <getopt.h>
#include <sys/types.h>
+#include <wchar.h>
+#include <wctype.h>
#include "system.h"
+#include "argv-iter.h"
#include "error.h"
-#include "inttostr.h"
-#include "quote.h"
+#include "fadvise.h"
+#include "mbchar.h"
+#include "physmem.h"
#include "readtokens0.h"
#include "safe-read.h"
-#include "wcwidth.h"
+#include "stat-size.h"
+#include "xfreopen.h"
#if !defined iswspace && !HAVE_ISWSPACE
# define iswspace(wc) \
((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
#endif
-/* The official name of this program (e.g., no `g' prefix). */
+/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "wc"
-#define AUTHORS "Paul Rubin", "David MacKenzie"
+#define AUTHORS \
+ proper_name ("Paul Rubin"), \
+ proper_name ("David MacKenzie")
/* Size of atomic reads. */
#define BUFFER_SIZE (16 * 1024)
-/* The name this program was run with. */
-char *program_name;
-
/* Cumulative number of lines, words, chars and bytes in all files so far.
max_line_length is the maximum over all files processed so far. */
static uintmax_t total_lines;
@@ -101,32 +105,40 @@ void
usage (int status)
{
if (status != EXIT_SUCCESS)
- fprintf (stderr, _("Try `%s --help' for more information.\n"),
- program_name);
+ emit_try_help ();
else
{
printf (_("\
Usage: %s [OPTION]... [FILE]...\n\
or: %s [OPTION]... --files0-from=F\n\
"),
- program_name, program_name);
+ program_name, program_name);
fputs (_("\
Print newline, word, and byte counts for each FILE, and a total line if\n\
-more than one FILE is specified. With no FILE, or when FILE is -,\n\
-read standard input.\n\
+more than one FILE is specified. A word is a non-zero-length sequence of\n\
+characters delimited by white space.\n\
+"), stdout);
+
+ emit_stdin_note ();
+
+ fputs (_("\
+\n\
+The options below may be used to select which counts are printed, always in\n\
+the following order: newline, word, character, byte, maximum line length.\n\
-c, --bytes print the byte counts\n\
-m, --chars print the character counts\n\
-l, --lines print the newline counts\n\
"), stdout);
fputs (_("\
--files0-from=F read input from the files specified by\n\
- NUL-terminated names in file F\n\
- -L, --max-line-length print the length of the longest line\n\
+ NUL-terminated names in file F;\n\
+ If F is - then read names from standard input\n\
+ -L, --max-line-length print the maximum display width\n\
-w, --words print the word counts\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
- printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+ emit_ancillary_info (PROGRAM_NAME);
}
exit (status);
}
@@ -135,11 +147,11 @@ read standard input.\n\
associated with the specified counters. */
static void
write_counts (uintmax_t lines,
- uintmax_t words,
- uintmax_t chars,
- uintmax_t bytes,
- uintmax_t linelength,
- const char *file)
+ uintmax_t words,
+ uintmax_t chars,
+ uintmax_t bytes,
+ uintmax_t linelength,
+ const char *file)
{
static char const format_sp_int[] = " %*s";
char const *format_int = format_sp_int + 1;
@@ -170,15 +182,16 @@ write_counts (uintmax_t lines,
printf (format_int, number_width, umaxtostr (linelength, buf));
}
if (file)
- printf (" %s", file);
+ printf (" %s", strchr (file, '\n') ? quotef (file) : file);
putchar ('\n');
}
/* Count words. FILE_X is the name of the file (or NULL for standard
input) that is open on descriptor FD. *FSTATUS is its status.
+ CURRENT_POS is the current file offset if known, negative if unknown.
Return true if successful. */
static bool
-wc (int fd, char const *file_x, struct fstatus *fstatus)
+wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
{
bool ok = true;
char buf[BUFFER_SIZE + 1];
@@ -191,7 +204,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
/* If in the current locale, chars are equivalent to bytes, we prefer
counting bytes, because that's easier. */
-#if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
+#if MB_LEN_MAX > 1
if (MB_CUR_MAX > 1)
{
count_bytes = print_bytes;
@@ -200,207 +213,242 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
else
#endif
{
- count_bytes = print_bytes | print_chars;
+ count_bytes = print_bytes || print_chars;
count_chars = false;
}
- count_complicated = print_words | print_linelength;
+ count_complicated = print_words || print_linelength;
+
+ /* Advise the kernel of our access pattern only if we will read(). */
+ if (!count_bytes || count_chars || print_lines || count_complicated)
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
/* When counting only bytes, save some line- and word-counting
- overhead. If FD is a `regular' Unix file, using lseek is enough
- to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
- bytes at a time until EOF. Note that the `size' (number of bytes)
+ overhead. If FD is a 'regular' Unix file, using lseek is enough
+ to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
+ bytes at a time until EOF. Note that the 'size' (number of bytes)
that wc reports is smaller than stats.st_size when the file is not
positioned at its beginning. That's why the lseek calls below are
necessary. For example the command
- `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
- should make wc report `0' bytes. */
+ '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
+ should make wc report '0' bytes. */
- if (count_bytes & !count_chars & !print_lines & !count_complicated)
+ if (count_bytes && !count_chars && !print_lines && !count_complicated)
{
- off_t current_pos, end_pos;
-
if (0 < fstatus->failed)
- fstatus->failed = fstat (fd, &fstatus->st);
-
- if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
- && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
- && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
- {
- /* Be careful here. The current position may actually be
- beyond the end of the file. As in the example above. */
- bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
- }
- else
- {
- while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
- {
- if (bytes_read == SAFE_READ_ERROR)
- {
- error (0, errno, "%s", file);
- ok = false;
- break;
- }
- bytes += bytes_read;
- }
- }
+ fstatus->failed = fstat (fd, &fstatus->st);
+
+ /* For sized files, seek to one st_blksize before EOF rather than to EOF.
+ This works better for files in proc-like file systems where
+ the size is only approximate. */
+ if (! fstatus->failed && usable_st_size (&fstatus->st)
+ && 0 <= fstatus->st.st_size)
+ {
+ size_t end_pos = fstatus->st.st_size;
+ off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
+ if (current_pos < 0)
+ current_pos = lseek (fd, 0, SEEK_CUR);
+ if (0 <= current_pos && current_pos < hi_pos
+ && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+ bytes = hi_pos - current_pos;
+ }
+
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+ while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+ {
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (file));
+ ok = false;
+ break;
+ }
+ bytes += bytes_read;
+ }
}
- else if (!count_chars & !count_complicated)
+ else if (!count_chars && !count_complicated)
{
/* Use a separate loop when counting only lines or lines and bytes --
- but not chars or words. */
+ but not chars or words. */
+ bool long_lines = false;
while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
- {
- char *p = buf;
-
- if (bytes_read == SAFE_READ_ERROR)
- {
- error (0, errno, "%s", file);
- ok = false;
- break;
- }
-
- while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
- {
- ++p;
- ++lines;
- }
- bytes += bytes_read;
- }
+ {
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (file));
+ ok = false;
+ break;
+ }
+
+ bytes += bytes_read;
+
+ char *p = buf;
+ char *end = p + bytes_read;
+ uintmax_t plines = lines;
+
+ if (! long_lines)
+ {
+ /* Avoid function call overhead for shorter lines. */
+ while (p != end)
+ lines += *p++ == '\n';
+ }
+ else
+ {
+ /* memchr is more efficient with longer lines. */
+ while ((p = memchr (p, '\n', end - p)))
+ {
+ ++p;
+ ++lines;
+ }
+ }
+
+ /* If the average line length in the block is >= 15, then use
+ memchr for the next block, where system specific optimizations
+ may outweigh function call overhead.
+ FIXME: This line length was determined in 2015, on both
+ x86_64 and ppc64, but it's worth re-evaluating in future with
+ newer compilers, CPUs, or memchr() implementations etc. */
+ if (lines - plines <= bytes_read / 15)
+ long_lines = true;
+ else
+ long_lines = false;
+ }
}
-#if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
+#if MB_LEN_MAX > 1
# define SUPPORT_OLD_MBRTOWC 1
else if (MB_CUR_MAX > 1)
{
bool in_word = false;
uintmax_t linepos = 0;
mbstate_t state = { 0, };
- uintmax_t last_error_line = 0;
- int last_error_errno = 0;
+ bool in_shift = false;
# if SUPPORT_OLD_MBRTOWC
/* Back-up the state before each multibyte character conversion and
- move the last incomplete character of the buffer to the front
- of the buffer. This is needed because we don't know whether
- the `mbrtowc' function updates the state when it returns -2, -
- this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
- ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
- autoconf test for this, yet. */
+ move the last incomplete character of the buffer to the front
+ of the buffer. This is needed because we don't know whether
+ the 'mbrtowc' function updates the state when it returns -2, --
+ this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
+ ANSI C, glibc-2.1 and Solaris 5.7 behaviour. We don't have an
+ autoconf test for this, yet. */
size_t prev = 0; /* number of bytes carried over from previous round */
# else
const size_t prev = 0;
# endif
while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
- {
- const char *p;
+ {
+ const char *p;
# if SUPPORT_OLD_MBRTOWC
- mbstate_t backup_state;
+ mbstate_t backup_state;
# endif
- if (bytes_read == SAFE_READ_ERROR)
- {
- error (0, errno, "%s", file);
- ok = false;
- break;
- }
-
- bytes += bytes_read;
- p = buf;
- bytes_read += prev;
- do
- {
- wchar_t wide_char;
- size_t n;
-
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (file));
+ ok = false;
+ break;
+ }
+
+ bytes += bytes_read;
+ p = buf;
+ bytes_read += prev;
+ do
+ {
+ wchar_t wide_char;
+ size_t n;
+
+ if (!in_shift && is_basic (*p))
+ {
+ /* Handle most ASCII characters quickly, without calling
+ mbrtowc(). */
+ n = 1;
+ wide_char = *p;
+ }
+ else
+ {
+ in_shift = true;
# if SUPPORT_OLD_MBRTOWC
- backup_state = state;
+ backup_state = state;
# endif
- n = mbrtowc (&wide_char, p, bytes_read, &state);
- if (n == (size_t) -2)
- {
+ n = mbrtowc (&wide_char, p, bytes_read, &state);
+ if (n == (size_t) -2)
+ {
# if SUPPORT_OLD_MBRTOWC
- state = backup_state;
+ state = backup_state;
# endif
- break;
- }
- if (n == (size_t) -1)
- {
- /* Signal repeated errors only once per line. */
- if (!(lines + 1 == last_error_line
- && errno == last_error_errno))
- {
- char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
- last_error_line = lines + 1;
- last_error_errno = errno;
- error (0, errno, "%s:%s", file,
- umaxtostr (last_error_line, line_number_buf));
- ok = false;
- }
- p++;
- bytes_read--;
- }
- else
- {
- if (n == 0)
- {
- wide_char = 0;
- n = 1;
- }
- p += n;
- bytes_read -= n;
- chars++;
- switch (wide_char)
- {
- case '\n':
- lines++;
- /* Fall through. */
- case '\r':
- case '\f':
- if (linepos > linelength)
- linelength = linepos;
- linepos = 0;
- goto mb_word_separator;
- case '\t':
- linepos += 8 - (linepos % 8);
- goto mb_word_separator;
- case ' ':
- linepos++;
- /* Fall through. */
- case '\v':
- mb_word_separator:
- words += in_word;
- in_word = false;
- break;
- default:
- if (iswprint (wide_char))
- {
- int width = wcwidth (wide_char);
- if (width > 0)
- linepos += width;
- if (iswspace (wide_char))
- goto mb_word_separator;
- in_word = true;
- }
- break;
- }
- }
- }
- while (bytes_read > 0);
+ break;
+ }
+ if (n == (size_t) -1)
+ {
+ /* Remember that we read a byte, but don't complain
+ about the error. Because of the decoding error,
+ this is a considered to be byte but not a
+ character (that is, chars is not incremented). */
+ p++;
+ bytes_read--;
+ continue;
+ }
+ if (mbsinit (&state))
+ in_shift = false;
+ if (n == 0)
+ {
+ wide_char = 0;
+ n = 1;
+ }
+ }
+ p += n;
+ bytes_read -= n;
+ chars++;
+ switch (wide_char)
+ {
+ case '\n':
+ lines++;
+ /* Fall through. */
+ case '\r':
+ case '\f':
+ if (linepos > linelength)
+ linelength = linepos;
+ linepos = 0;
+ goto mb_word_separator;
+ case '\t':
+ linepos += 8 - (linepos % 8);
+ goto mb_word_separator;
+ case ' ':
+ linepos++;
+ /* Fall through. */
+ case '\v':
+ mb_word_separator:
+ words += in_word;
+ in_word = false;
+ break;
+ default:
+ if (iswprint (wide_char))
+ {
+ int width = wcwidth (wide_char);
+ if (width > 0)
+ linepos += width;
+ if (iswspace (wide_char))
+ goto mb_word_separator;
+ in_word = true;
+ }
+ break;
+ }
+ }
+ while (bytes_read > 0);
# if SUPPORT_OLD_MBRTOWC
- if (bytes_read > 0)
- {
- if (bytes_read == BUFFER_SIZE)
- {
- /* Encountered a very long redundant shift sequence. */
- p++;
- bytes_read--;
- }
- memmove (buf, p, bytes_read);
- }
- prev = bytes_read;
+ if (bytes_read > 0)
+ {
+ if (bytes_read == BUFFER_SIZE)
+ {
+ /* Encountered a very long redundant shift sequence. */
+ p++;
+ bytes_read--;
+ }
+ memmove (buf, p, bytes_read);
+ }
+ prev = bytes_read;
# endif
- }
+ }
if (linepos > linelength)
- linelength = linepos;
+ linelength = linepos;
words += in_word;
}
#endif
@@ -410,55 +458,55 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
uintmax_t linepos = 0;
while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
- {
- const char *p = buf;
- if (bytes_read == SAFE_READ_ERROR)
- {
- error (0, errno, "%s", file);
- ok = false;
- break;
- }
-
- bytes += bytes_read;
- do
- {
- switch (*p++)
- {
- case '\n':
- lines++;
- /* Fall through. */
- case '\r':
- case '\f':
- if (linepos > linelength)
- linelength = linepos;
- linepos = 0;
- goto word_separator;
- case '\t':
- linepos += 8 - (linepos % 8);
- goto word_separator;
- case ' ':
- linepos++;
- /* Fall through. */
- case '\v':
- word_separator:
- words += in_word;
- in_word = false;
- break;
- default:
- if (isprint (to_uchar (p[-1])))
- {
- linepos++;
- if (isspace (to_uchar (p[-1])))
- goto word_separator;
- in_word = true;
- }
- break;
- }
- }
- while (--bytes_read);
- }
+ {
+ const char *p = buf;
+ if (bytes_read == SAFE_READ_ERROR)
+ {
+ error (0, errno, "%s", quotef (file));
+ ok = false;
+ break;
+ }
+
+ bytes += bytes_read;
+ do
+ {
+ switch (*p++)
+ {
+ case '\n':
+ lines++;
+ /* Fall through. */
+ case '\r':
+ case '\f':
+ if (linepos > linelength)
+ linelength = linepos;
+ linepos = 0;
+ goto word_separator;
+ case '\t':
+ linepos += 8 - (linepos % 8);
+ goto word_separator;
+ case ' ':
+ linepos++;
+ /* Fall through. */
+ case '\v':
+ word_separator:
+ words += in_word;
+ in_word = false;
+ break;
+ default:
+ if (isprint (to_uchar (p[-1])))
+ {
+ linepos++;
+ if (isspace (to_uchar (p[-1])))
+ goto word_separator;
+ in_word = true;
+ }
+ break;
+ }
+ }
+ while (--bytes_read);
+ }
if (linepos > linelength)
- linelength = linepos;
+ linelength = linepos;
words += in_word;
}
@@ -483,53 +531,55 @@ wc_file (char const *file, struct fstatus *fstatus)
{
have_read_stdin = true;
if (O_BINARY && ! isatty (STDIN_FILENO))
- freopen (NULL, "rb", stdin);
- return wc (STDIN_FILENO, file, fstatus);
+ xfreopen (NULL, "rb", stdin);
+ return wc (STDIN_FILENO, file, fstatus, -1);
}
else
{
int fd = open (file, O_RDONLY | O_BINARY);
if (fd == -1)
- {
- error (0, errno, "%s", file);
- return false;
- }
+ {
+ error (0, errno, "%s", quotef (file));
+ return false;
+ }
else
- {
- bool ok = wc (fd, file, fstatus);
- if (close (fd) != 0)
- {
- error (0, errno, "%s", file);
- return false;
- }
- return ok;
- }
+ {
+ bool ok = wc (fd, file, fstatus, 0);
+ if (close (fd) != 0)
+ {
+ error (0, errno, "%s", quotef (file));
+ return false;
+ }
+ return ok;
+ }
}
}
/* Return the file status for the NFILES files addressed by FILE.
Optimize the case where only one number is printed, for just one
file; in that case we can use a print width of 1, so we don't need
- to stat the file. */
+ to stat the file. Handle the case of (nfiles == 0) in the same way;
+ that happens when we don't know how long the list of file names will be. */
static struct fstatus *
-get_input_fstatus (int nfiles, char * const *file)
+get_input_fstatus (size_t nfiles, char *const *file)
{
- struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
+ struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
- if (nfiles == 1
- && ((print_lines + print_words + print_chars
- + print_bytes + print_linelength)
- == 1))
+ if (nfiles == 0
+ || (nfiles == 1
+ && ((print_lines + print_words + print_chars
+ + print_bytes + print_linelength)
+ == 1)))
fstatus[0].failed = 1;
else
{
- int i;
+ size_t i;
for (i = 0; i < nfiles; i++)
- fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
- ? fstat (STDIN_FILENO, &fstatus[i].st)
- : stat (file[i], &fstatus[i].st));
+ fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
+ ? fstat (STDIN_FILENO, &fstatus[i].st)
+ : stat (file[i], &fstatus[i].st));
}
return fstatus;
@@ -539,8 +589,8 @@ get_input_fstatus (int nfiles, char * const *file)
recorded in FSTATUS. Optimize the same special case that
get_input_fstatus optimizes. */
-static int
-compute_number_width (int nfiles, struct fstatus const *fstatus)
+static int _GL_ATTRIBUTE_PURE
+compute_number_width (size_t nfiles, struct fstatus const *fstatus)
{
int width = 1;
@@ -548,21 +598,21 @@ compute_number_width (int nfiles, struct fstatus const *fstatus)
{
int minimum_width = 1;
uintmax_t regular_total = 0;
- int i;
+ size_t i;
for (i = 0; i < nfiles; i++)
- if (! fstatus[i].failed)
- {
- if (S_ISREG (fstatus[i].st.st_mode))
- regular_total += fstatus[i].st.st_size;
- else
- minimum_width = 7;
- }
+ if (! fstatus[i].failed)
+ {
+ if (S_ISREG (fstatus[i].st.st_mode))
+ regular_total += fstatus[i].st.st_size;
+ else
+ minimum_width = 7;
+ }
for (; 10 <= regular_total; regular_total /= 10)
- width++;
+ width++;
if (width < minimum_width)
- width = minimum_width;
+ width = minimum_width;
}
return width;
@@ -572,23 +622,26 @@ compute_number_width (int nfiles, struct fstatus const *fstatus)
int
main (int argc, char **argv)
{
- int i;
bool ok;
int optc;
- int nfiles;
+ size_t nfiles;
char **files;
char *files_from = NULL;
struct fstatus *fstatus;
struct Tokens tok;
initialize_main (&argc, &argv);
- program_name = argv[0];
+ set_program_name (argv[0]);
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
atexit (close_stdout);
+ /* Line buffer stdout to ensure lines are written atomically and immediately
+ so that processes running in parallel do not intersperse their output. */
+ setvbuf (stdout, NULL, _IOLBF, 0);
+
print_lines = print_words = print_chars = print_bytes = false;
print_linelength = false;
total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
@@ -597,108 +650,184 @@ main (int argc, char **argv)
switch (optc)
{
case 'c':
- print_bytes = true;
- break;
+ print_bytes = true;
+ break;
case 'm':
- print_chars = true;
- break;
+ print_chars = true;
+ break;
case 'l':
- print_lines = true;
- break;
+ print_lines = true;
+ break;
case 'w':
- print_words = true;
- break;
+ print_words = true;
+ break;
case 'L':
- print_linelength = true;
- break;
+ print_linelength = true;
+ break;
case FILES0_FROM_OPTION:
- files_from = optarg;
- break;
+ files_from = optarg;
+ break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
- usage (EXIT_FAILURE);
+ usage (EXIT_FAILURE);
}
- if (! (print_lines | print_words | print_chars | print_bytes
- | print_linelength))
+ if (! (print_lines || print_words || print_chars || print_bytes
+ || print_linelength))
print_lines = print_words = print_bytes = true;
+ bool read_tokens = false;
+ struct argv_iterator *ai;
if (files_from)
{
FILE *stream;
/* When using --files0-from=F, you may not specify any files
- on the command-line. */
+ on the command-line. */
if (optind < argc)
- {
- error (0, 0, _("extra operand %s"), quote (argv[optind]));
- fprintf (stderr, "%s\n",
- _("File operands cannot be combined with --files0-from."));
- usage (EXIT_FAILURE);
- }
+ {
+ error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
+ fprintf (stderr, "%s\n",
+ _("file operands cannot be combined with --files0-from"));
+ usage (EXIT_FAILURE);
+ }
if (STREQ (files_from, "-"))
- stream = stdin;
+ stream = stdin;
else
- {
- stream = fopen (files_from, "r");
- if (stream == NULL)
- error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
- quote (files_from));
- }
-
- readtokens0_init (&tok);
-
- if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
- error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
- quote (files_from));
-
- files = tok.tok;
- nfiles = tok.n_tok;
+ {
+ stream = fopen (files_from, "r");
+ if (stream == NULL)
+ error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
+ quoteaf (files_from));
+ }
+
+ /* Read the file list into RAM if we can detect its size and that
+ size is reasonable. Otherwise, we'll read a name at a time. */
+ struct stat st;
+ if (fstat (fileno (stream), &st) == 0
+ && S_ISREG (st.st_mode)
+ && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
+ {
+ read_tokens = true;
+ readtokens0_init (&tok);
+ if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
+ error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
+ quoteaf (files_from));
+ files = tok.tok;
+ nfiles = tok.n_tok;
+ ai = argv_iter_init_argv (files);
+ }
+ else
+ {
+ files = NULL;
+ nfiles = 0;
+ ai = argv_iter_init_stream (stream);
+ }
}
else
{
- static char *stdin_only[2];
+ static char *stdin_only[] = { NULL };
files = (optind < argc ? argv + optind : stdin_only);
nfiles = (optind < argc ? argc - optind : 1);
- stdin_only[0] = NULL;
+ ai = argv_iter_init_argv (files);
}
+ if (!ai)
+ xalloc_die ();
+
fstatus = get_input_fstatus (nfiles, files);
number_width = compute_number_width (nfiles, fstatus);
+ int i;
ok = true;
- for (i = 0; i < nfiles; i++)
+ for (i = 0; /* */; i++)
{
- if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-"))
- {
- ok = false;
- error (0, 0,
- _("when reading file names from stdin, "
- "no file name of %s allowed"),
- quote ("-"));
- continue;
- }
- ok &= wc_file (files[i], &fstatus[i]);
+ bool skip_file = false;
+ enum argv_iter_err ai_err;
+ char *file_name = argv_iter (ai, &ai_err);
+ if (!file_name)
+ {
+ switch (ai_err)
+ {
+ case AI_ERR_EOF:
+ goto argv_iter_done;
+ case AI_ERR_READ:
+ error (0, errno, _("%s: read error"),
+ quotef (files_from));
+ ok = false;
+ goto argv_iter_done;
+ case AI_ERR_MEM:
+ xalloc_die ();
+ default:
+ assert (!"unexpected error code from argv_iter");
+ }
+ }
+ if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
+ {
+ /* Give a better diagnostic in an unusual case:
+ printf - | wc --files0-from=- */
+ error (0, 0, _("when reading file names from stdin, "
+ "no file name of %s allowed"),
+ quoteaf (file_name));
+ skip_file = true;
+ }
+
+ if (!file_name[0])
+ {
+ /* Diagnose a zero-length file name. When it's one
+ among many, knowing the record number may help.
+ FIXME: currently print the record number only with
+ --files0-from=FILE. Maybe do it for argv, too? */
+ if (files_from == NULL)
+ error (0, 0, "%s", _("invalid zero-length file name"));
+ else
+ {
+ /* Using the standard 'filename:line-number:' prefix here is
+ not totally appropriate, since NUL is the separator, not NL,
+ but it might be better than nothing. */
+ unsigned long int file_number = argv_iter_n_args (ai);
+ error (0, 0, "%s:%lu: %s", quotef (files_from),
+ file_number, _("invalid zero-length file name"));
+ }
+ skip_file = true;
+ }
+
+ if (skip_file)
+ ok = false;
+ else
+ ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
}
+ argv_iter_done:
+
+ /* No arguments on the command line is fine. That means read from stdin.
+ However, no arguments on the --files0-from input stream is an error
+ means don't read anything. */
+ if (ok && !files_from && argv_iter_n_args (ai) == 0)
+ ok &= wc_file (NULL, &fstatus[0]);
- if (1 < nfiles)
+ if (read_tokens)
+ readtokens0_free (&tok);
+
+ if (1 < argv_iter_n_args (ai))
write_counts (total_lines, total_words, total_chars, total_bytes,
- max_line_length, _("total"));
+ max_line_length, _("total"));
+
+ argv_iter_free (ai);
free (fstatus);
if (have_read_stdin && close (STDIN_FILENO) != 0)
error (EXIT_FAILURE, errno, "-");
- exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
+ return ok ? EXIT_SUCCESS : EXIT_FAILURE;
}