1 files changed, 465 insertions, 336 deletions
diff --git a/src/wc.c b/src/wc.c
index 332f32d..94cbaff 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -1,10 +1,10 @@
 /* wc - print the number of lines, words, and bytes in files
-   Copyright (C) 85, 91, 1995-2006 Free Software Foundation, Inc.
+   Copyright (C) 1985-2016 Free Software Foundation, Inc.
 
-   This program is free software; you can redistribute it and/or modify
+   This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
 
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -12,42 +12,46 @@
    GNU General Public License for more details.
 
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software Foundation,
-   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 /* Written by Paul Rubin, phr@ocf.berkeley.edu
    and David MacKenzie, djm@gnu.ai.mit.edu. */
-
+
 #include <config.h>
 
 #include <stdio.h>
+#include <assert.h>
 #include <getopt.h>
 #include <sys/types.h>
+#include <wchar.h>
+#include <wctype.h>
 
 #include "system.h"
+#include "argv-iter.h"
 #include "error.h"
-#include "inttostr.h"
-#include "quote.h"
+#include "fadvise.h"
+#include "mbchar.h"
+#include "physmem.h"
 #include "readtokens0.h"
 #include "safe-read.h"
-#include "wcwidth.h"
+#include "stat-size.h"
+#include "xfreopen.h"
 
 #if !defined iswspace && !HAVE_ISWSPACE
 # define iswspace(wc) \
     ((wc) == to_uchar (wc) && isspace (to_uchar (wc)))
 #endif
 
-/* The official name of this program (e.g., no `g' prefix).  */
+/* The official name of this program (e.g., no 'g' prefix).  */
 #define PROGRAM_NAME "wc"
 
-#define AUTHORS "Paul Rubin", "David MacKenzie"
+#define AUTHORS \
+  proper_name ("Paul Rubin"), \
+  proper_name ("David MacKenzie")
 
 /* Size of atomic reads. */
 #define BUFFER_SIZE (16 * 1024)
 
-/* The name this program was run with. */
-char *program_name;
-
 /* Cumulative number of lines, words, chars and bytes in all files so far.
    max_line_length is the maximum over all files processed so far.  */
 static uintmax_t total_lines;
@@ -101,32 +105,40 @@ void
 usage (int status)
 {
   if (status != EXIT_SUCCESS)
-    fprintf (stderr, _("Try `%s --help' for more information.\n"),
-	     program_name);
+    emit_try_help ();
   else
     {
       printf (_("\
 Usage: %s [OPTION]... [FILE]...\n\
   or:  %s [OPTION]... --files0-from=F\n\
 "),
-	      program_name, program_name);
+              program_name, program_name);
       fputs (_("\
 Print newline, word, and byte counts for each FILE, and a total line if\n\
-more than one FILE is specified.  With no FILE, or when FILE is -,\n\
-read standard input.\n\
+more than one FILE is specified.  A word is a non-zero-length sequence of\n\
+characters delimited by white space.\n\
+"), stdout);
+
+      emit_stdin_note ();
+
+      fputs (_("\
+\n\
+The options below may be used to select which counts are printed, always in\n\
+the following order: newline, word, character, byte, maximum line length.\n\
   -c, --bytes            print the byte counts\n\
   -m, --chars            print the character counts\n\
   -l, --lines            print the newline counts\n\
 "), stdout);
       fputs (_("\
       --files0-from=F    read input from the files specified by\n\
-                           NUL-terminated names in file F\n\
-  -L, --max-line-length  print the length of the longest line\n\
+                           NUL-terminated names in file F;\n\
+                           If F is - then read names from standard input\n\
+  -L, --max-line-length  print the maximum display width\n\
   -w, --words            print the word counts\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
-      printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+      emit_ancillary_info (PROGRAM_NAME);
     }
   exit (status);
 }
@@ -135,11 +147,11 @@ read standard input.\n\
    associated with the specified counters.  */
 static void
 write_counts (uintmax_t lines,
-	      uintmax_t words,
-	      uintmax_t chars,
-	      uintmax_t bytes,
-	      uintmax_t linelength,
-	      const char *file)
+              uintmax_t words,
+              uintmax_t chars,
+              uintmax_t bytes,
+              uintmax_t linelength,
+              const char *file)
 {
   static char const format_sp_int[] = " %*s";
   char const *format_int = format_sp_int + 1;
@@ -170,15 +182,16 @@ write_counts (uintmax_t lines,
       printf (format_int, number_width, umaxtostr (linelength, buf));
     }
   if (file)
-    printf (" %s", file);
+    printf (" %s", strchr (file, '\n') ? quotef (file) : file);
   putchar ('\n');
 }
 
 /* Count words.  FILE_X is the name of the file (or NULL for standard
    input) that is open on descriptor FD.  *FSTATUS is its status.
+   CURRENT_POS is the current file offset if known, negative if unknown.
    Return true if successful.  */
 static bool
-wc (int fd, char const *file_x, struct fstatus *fstatus)
+wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
 {
   bool ok = true;
   char buf[BUFFER_SIZE + 1];
@@ -191,7 +204,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
 
   /* If in the current locale, chars are equivalent to bytes, we prefer
      counting bytes, because that's easier.  */
-#if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
+#if MB_LEN_MAX > 1
   if (MB_CUR_MAX > 1)
     {
       count_bytes = print_bytes;
@@ -200,207 +213,242 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
   else
 #endif
     {
-      count_bytes = print_bytes | print_chars;
+      count_bytes = print_bytes || print_chars;
       count_chars = false;
     }
-  count_complicated = print_words | print_linelength;
+  count_complicated = print_words || print_linelength;
+
+  /* Advise the kernel of our access pattern only if we will read().  */
+  if (!count_bytes || count_chars || print_lines || count_complicated)
+    fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
 
   /* When counting only bytes, save some line- and word-counting
-     overhead.  If FD is a `regular' Unix file, using lseek is enough
-     to get its `size' in bytes.  Otherwise, read blocks of BUFFER_SIZE
-     bytes at a time until EOF.  Note that the `size' (number of bytes)
+     overhead.  If FD is a 'regular' Unix file, using lseek is enough
+     to get its 'size' in bytes.  Otherwise, read blocks of BUFFER_SIZE
+     bytes at a time until EOF.  Note that the 'size' (number of bytes)
      that wc reports is smaller than stats.st_size when the file is not
      positioned at its beginning.  That's why the lseek calls below are
      necessary.  For example the command
-     `(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
-     should make wc report `0' bytes.  */
+     '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group'
+     should make wc report '0' bytes.  */
 
-  if (count_bytes & !count_chars & !print_lines & !count_complicated)
+  if (count_bytes && !count_chars && !print_lines && !count_complicated)
     {
-      off_t current_pos, end_pos;
-
       if (0 < fstatus->failed)
-	fstatus->failed = fstat (fd, &fstatus->st);
-
-      if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
-	  && (current_pos = lseek (fd, (off_t) 0, SEEK_CUR)) != -1
-	  && (end_pos = lseek (fd, (off_t) 0, SEEK_END)) != -1)
-	{
-	  /* Be careful here.  The current position may actually be
-	     beyond the end of the file.  As in the example above.  */
-	  bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
-	}
-      else
-	{
-	  while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
-	    {
-	      if (bytes_read == SAFE_READ_ERROR)
-		{
-		  error (0, errno, "%s", file);
-		  ok = false;
-		  break;
-		}
-	      bytes += bytes_read;
-	    }
-	}
+        fstatus->failed = fstat (fd, &fstatus->st);
+
+      /* For sized files, seek to one st_blksize before EOF rather than to EOF.
+         This works better for files in proc-like file systems where
+         the size is only approximate.  */
+      if (! fstatus->failed && usable_st_size (&fstatus->st)
+          && 0 <= fstatus->st.st_size)
+        {
+          size_t end_pos = fstatus->st.st_size;
+          off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1);
+          if (current_pos < 0)
+            current_pos = lseek (fd, 0, SEEK_CUR);
+          if (0 <= current_pos && current_pos < hi_pos
+              && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+            bytes = hi_pos - current_pos;
+        }
+
+      fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+      while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+        {
+          if (bytes_read == SAFE_READ_ERROR)
+            {
+              error (0, errno, "%s", quotef (file));
+              ok = false;
+              break;
+            }
+          bytes += bytes_read;
+        }
     }
-  else if (!count_chars & !count_complicated)
+  else if (!count_chars && !count_complicated)
     {
       /* Use a separate loop when counting only lines or lines and bytes --
-	 but not chars or words.  */
+         but not chars or words.  */
+      bool long_lines = false;
       while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
-	{
-	  char *p = buf;
-
-	  if (bytes_read == SAFE_READ_ERROR)
-	    {
-	      error (0, errno, "%s", file);
-	      ok = false;
-	      break;
-	    }
-
-	  while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
-	    {
-	      ++p;
-	      ++lines;
-	    }
-	  bytes += bytes_read;
-	}
+        {
+          if (bytes_read == SAFE_READ_ERROR)
+            {
+              error (0, errno, "%s", quotef (file));
+              ok = false;
+              break;
+            }
+
+          bytes += bytes_read;
+
+          char *p = buf;
+          char *end = p + bytes_read;
+          uintmax_t plines = lines;
+
+          if (! long_lines)
+            {
+              /* Avoid function call overhead for shorter lines.  */
+              while (p != end)
+                lines += *p++ == '\n';
+            }
+          else
+            {
+              /* memchr is more efficient with longer lines.  */
+              while ((p = memchr (p, '\n', end - p)))
+                {
+                  ++p;
+                  ++lines;
+                }
+            }
+
+          /* If the average line length in the block is >= 15, then use
+             memchr for the next block, where system specific optimizations
+             may outweigh function call overhead.
+             FIXME: This line length was determined in 2015, on both
+             x86_64 and ppc64, but it's worth re-evaluating in future with
+             newer compilers, CPUs, or memchr() implementations etc.  */
+          if (lines - plines <= bytes_read / 15)
+            long_lines = true;
+          else
+            long_lines = false;
+        }
     }
-#if HAVE_MBRTOWC && (MB_LEN_MAX > 1)
+#if MB_LEN_MAX > 1
 # define SUPPORT_OLD_MBRTOWC 1
   else if (MB_CUR_MAX > 1)
     {
       bool in_word = false;
       uintmax_t linepos = 0;
       mbstate_t state = { 0, };
-      uintmax_t last_error_line = 0;
-      int last_error_errno = 0;
+      bool in_shift = false;
 # if SUPPORT_OLD_MBRTOWC
       /* Back-up the state before each multibyte character conversion and
-	 move the last incomplete character of the buffer to the front
-	 of the buffer.  This is needed because we don't know whether
-	 the `mbrtowc' function updates the state when it returns -2, -
-	 this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
-	 ANSI C, glibc-2.1 and Solaris 5.7 behaviour.  We don't have an
-	 autoconf test for this, yet.  */
+         move the last incomplete character of the buffer to the front
+         of the buffer.  This is needed because we don't know whether
+         the 'mbrtowc' function updates the state when it returns -2, --
+         this is the ISO C 99 and glibc-2.2 behaviour - or not - amended
+         ANSI C, glibc-2.1 and Solaris 5.7 behaviour.  We don't have an
+         autoconf test for this, yet.  */
       size_t prev = 0; /* number of bytes carried over from previous round */
 # else
       const size_t prev = 0;
 # endif
 
       while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0)
-	{
-	  const char *p;
+        {
+          const char *p;
 # if SUPPORT_OLD_MBRTOWC
-	  mbstate_t backup_state;
+          mbstate_t backup_state;
 # endif
-	  if (bytes_read == SAFE_READ_ERROR)
-	    {
-	      error (0, errno, "%s", file);
-	      ok = false;
-	      break;
-	    }
-
-	  bytes += bytes_read;
-	  p = buf;
-	  bytes_read += prev;
-	  do
-	    {
-	      wchar_t wide_char;
-	      size_t n;
-
+          if (bytes_read == SAFE_READ_ERROR)
+            {
+              error (0, errno, "%s", quotef (file));
+              ok = false;
+              break;
+            }
+
+          bytes += bytes_read;
+          p = buf;
+          bytes_read += prev;
+          do
+            {
+              wchar_t wide_char;
+              size_t n;
+
+              if (!in_shift && is_basic (*p))
+                {
+                  /* Handle most ASCII characters quickly, without calling
+                     mbrtowc().  */
+                  n = 1;
+                  wide_char = *p;
+                }
+              else
+                {
+                  in_shift = true;
 # if SUPPORT_OLD_MBRTOWC
-	      backup_state = state;
+                  backup_state = state;
 # endif
-	      n = mbrtowc (&wide_char, p, bytes_read, &state);
-	      if (n == (size_t) -2)
-		{
+                  n = mbrtowc (&wide_char, p, bytes_read, &state);
+                  if (n == (size_t) -2)
+                    {
 # if SUPPORT_OLD_MBRTOWC
-		  state = backup_state;
+                      state = backup_state;
 # endif
-		  break;
-		}
-	      if (n == (size_t) -1)
-		{
-		  /* Signal repeated errors only once per line.  */
-		  if (!(lines + 1 == last_error_line
-			&& errno == last_error_errno))
-		    {
-		      char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
-		      last_error_line = lines + 1;
-		      last_error_errno = errno;
-		      error (0, errno, "%s:%s", file,
-			     umaxtostr (last_error_line, line_number_buf));
-		      ok = false;
-		    }
-		  p++;
-		  bytes_read--;
-		}
-	      else
-		{
-		  if (n == 0)
-		    {
-		      wide_char = 0;
-		      n = 1;
-		    }
-		  p += n;
-		  bytes_read -= n;
-		  chars++;
-		  switch (wide_char)
-		    {
-		    case '\n':
-		      lines++;
-		      /* Fall through. */
-		    case '\r':
-		    case '\f':
-		      if (linepos > linelength)
-			linelength = linepos;
-		      linepos = 0;
-		      goto mb_word_separator;
-		    case '\t':
-		      linepos += 8 - (linepos % 8);
-		      goto mb_word_separator;
-		    case ' ':
-		      linepos++;
-		      /* Fall through. */
-		    case '\v':
-		    mb_word_separator:
-		      words += in_word;
-		      in_word = false;
-		      break;
-		    default:
-		      if (iswprint (wide_char))
-			{
-			  int width = wcwidth (wide_char);
-			  if (width > 0)
-			    linepos += width;
-			  if (iswspace (wide_char))
-			    goto mb_word_separator;
-			  in_word = true;
-			}
-		      break;
-		    }
-		}
-	    }
-	  while (bytes_read > 0);
+                      break;
+                    }
+                  if (n == (size_t) -1)
+                    {
+                      /* Remember that we read a byte, but don't complain
+                         about the error.  Because of the decoding error,
+                         this is a considered to be byte but not a
+                         character (that is, chars is not incremented).  */
+                      p++;
+                      bytes_read--;
+                      continue;
+                    }
+                  if (mbsinit (&state))
+                    in_shift = false;
+                  if (n == 0)
+                    {
+                      wide_char = 0;
+                      n = 1;
+                    }
+                }
+              p += n;
+              bytes_read -= n;
+              chars++;
+              switch (wide_char)
+                {
+                case '\n':
+                  lines++;
+                  /* Fall through. */
+                case '\r':
+                case '\f':
+                  if (linepos > linelength)
+                    linelength = linepos;
+                  linepos = 0;
+                  goto mb_word_separator;
+                case '\t':
+                  linepos += 8 - (linepos % 8);
+                  goto mb_word_separator;
+                case ' ':
+                  linepos++;
+                  /* Fall through. */
+                case '\v':
+                mb_word_separator:
+                  words += in_word;
+                  in_word = false;
+                  break;
+                default:
+                  if (iswprint (wide_char))
+                    {
+                      int width = wcwidth (wide_char);
+                      if (width > 0)
+                        linepos += width;
+                      if (iswspace (wide_char))
+                        goto mb_word_separator;
+                      in_word = true;
+                    }
+                  break;
+                }
+            }
+          while (bytes_read > 0);
 
 # if SUPPORT_OLD_MBRTOWC
-	  if (bytes_read > 0)
-	    {
-	      if (bytes_read == BUFFER_SIZE)
-		{
-		  /* Encountered a very long redundant shift sequence.  */
-		  p++;
-		  bytes_read--;
-		}
-	      memmove (buf, p, bytes_read);
-	    }
-	  prev = bytes_read;
+          if (bytes_read > 0)
+            {
+              if (bytes_read == BUFFER_SIZE)
+                {
+                  /* Encountered a very long redundant shift sequence.  */
+                  p++;
+                  bytes_read--;
+                }
+              memmove (buf, p, bytes_read);
+            }
+          prev = bytes_read;
 # endif
-	}
+        }
       if (linepos > linelength)
-	linelength = linepos;
+        linelength = linepos;
       words += in_word;
     }
 #endif
@@ -410,55 +458,55 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
       uintmax_t linepos = 0;
 
       while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
-	{
-	  const char *p = buf;
-	  if (bytes_read == SAFE_READ_ERROR)
-	    {
-	      error (0, errno, "%s", file);
-	      ok = false;
-	      break;
-	    }
-
-	  bytes += bytes_read;
-	  do
-	    {
-	      switch (*p++)
-		{
-		case '\n':
-		  lines++;
-		  /* Fall through. */
-		case '\r':
-		case '\f':
-		  if (linepos > linelength)
-		    linelength = linepos;
-		  linepos = 0;
-		  goto word_separator;
-		case '\t':
-		  linepos += 8 - (linepos % 8);
-		  goto word_separator;
-		case ' ':
-		  linepos++;
-		  /* Fall through. */
-		case '\v':
-		word_separator:
-		  words += in_word;
-		  in_word = false;
-		  break;
-		default:
-		  if (isprint (to_uchar (p[-1])))
-		    {
-		      linepos++;
-		      if (isspace (to_uchar (p[-1])))
-			goto word_separator;
-		      in_word = true;
-		    }
-		  break;
-		}
-	    }
-	  while (--bytes_read);
-	}
+        {
+          const char *p = buf;
+          if (bytes_read == SAFE_READ_ERROR)
+            {
+              error (0, errno, "%s", quotef (file));
+              ok = false;
+              break;
+            }
+
+          bytes += bytes_read;
+          do
+            {
+              switch (*p++)
+                {
+                case '\n':
+                  lines++;
+                  /* Fall through. */
+                case '\r':
+                case '\f':
+                  if (linepos > linelength)
+                    linelength = linepos;
+                  linepos = 0;
+                  goto word_separator;
+                case '\t':
+                  linepos += 8 - (linepos % 8);
+                  goto word_separator;
+                case ' ':
+                  linepos++;
+                  /* Fall through. */
+                case '\v':
+                word_separator:
+                  words += in_word;
+                  in_word = false;
+                  break;
+                default:
+                  if (isprint (to_uchar (p[-1])))
+                    {
+                      linepos++;
+                      if (isspace (to_uchar (p[-1])))
+                        goto word_separator;
+                      in_word = true;
+                    }
+                  break;
+                }
+            }
+          while (--bytes_read);
+        }
       if (linepos > linelength)
-	linelength = linepos;
+        linelength = linepos;
       words += in_word;
     }
 
@@ -483,53 +531,55 @@ wc_file (char const *file, struct fstatus *fstatus)
     {
       have_read_stdin = true;
       if (O_BINARY && ! isatty (STDIN_FILENO))
-	freopen (NULL, "rb", stdin);
-      return wc (STDIN_FILENO, file, fstatus);
+        xfreopen (NULL, "rb", stdin);
+      return wc (STDIN_FILENO, file, fstatus, -1);
     }
   else
     {
       int fd = open (file, O_RDONLY | O_BINARY);
       if (fd == -1)
-	{
-	  error (0, errno, "%s", file);
-	  return false;
-	}
+        {
+          error (0, errno, "%s", quotef (file));
+          return false;
+        }
       else
-	{
-	  bool ok = wc (fd, file, fstatus);
-	  if (close (fd) != 0)
-	    {
-	      error (0, errno, "%s", file);
-	      return false;
-	    }
-	  return ok;
-	}
+        {
+          bool ok = wc (fd, file, fstatus, 0);
+          if (close (fd) != 0)
+            {
+              error (0, errno, "%s", quotef (file));
+              return false;
+            }
+          return ok;
+        }
     }
 }
 
 /* Return the file status for the NFILES files addressed by FILE.
    Optimize the case where only one number is printed, for just one
    file; in that case we can use a print width of 1, so we don't need
-   to stat the file.  */
+   to stat the file.  Handle the case of (nfiles == 0) in the same way;
+   that happens when we don't know how long the list of file names will be.  */
 
 static struct fstatus *
-get_input_fstatus (int nfiles, char * const *file)
+get_input_fstatus (size_t nfiles, char *const *file)
 {
-  struct fstatus *fstatus = xnmalloc (nfiles, sizeof *fstatus);
+  struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus);
 
-  if (nfiles == 1
-      && ((print_lines + print_words + print_chars
-	   + print_bytes + print_linelength)
-	  == 1))
+  if (nfiles == 0
+      || (nfiles == 1
+          && ((print_lines + print_words + print_chars
+               + print_bytes + print_linelength)
+              == 1)))
     fstatus[0].failed = 1;
   else
     {
-      int i;
+      size_t i;
 
       for (i = 0; i < nfiles; i++)
-	fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
-			     ? fstat (STDIN_FILENO, &fstatus[i].st)
-			     : stat (file[i], &fstatus[i].st));
+        fstatus[i].failed = (! file[i] || STREQ (file[i], "-")
+                             ? fstat (STDIN_FILENO, &fstatus[i].st)
+                             : stat (file[i], &fstatus[i].st));
     }
 
   return fstatus;
@@ -539,8 +589,8 @@ get_input_fstatus (int nfiles, char * const *file)
    recorded in FSTATUS.  Optimize the same special case that
    get_input_fstatus optimizes.  */
 
-static int
-compute_number_width (int nfiles, struct fstatus const *fstatus)
+static int _GL_ATTRIBUTE_PURE
+compute_number_width (size_t nfiles, struct fstatus const *fstatus)
 {
   int width = 1;
 
@@ -548,21 +598,21 @@ compute_number_width (int nfiles, struct fstatus const *fstatus)
     {
       int minimum_width = 1;
       uintmax_t regular_total = 0;
-      int i;
+      size_t i;
 
       for (i = 0; i < nfiles; i++)
-	if (! fstatus[i].failed)
-	  {
-	    if (S_ISREG (fstatus[i].st.st_mode))
-	      regular_total += fstatus[i].st.st_size;
-	    else
-	      minimum_width = 7;
-	  }
+        if (! fstatus[i].failed)
+          {
+            if (S_ISREG (fstatus[i].st.st_mode))
+              regular_total += fstatus[i].st.st_size;
+            else
+              minimum_width = 7;
+          }
 
       for (; 10 <= regular_total; regular_total /= 10)
-	width++;
+        width++;
       if (width < minimum_width)
-	width = minimum_width;
+        width = minimum_width;
     }
 
   return width;
@@ -572,23 +622,26 @@ compute_number_width (int nfiles, struct fstatus const *fstatus)
 int
 main (int argc, char **argv)
 {
-  int i;
   bool ok;
   int optc;
-  int nfiles;
+  size_t nfiles;
   char **files;
   char *files_from = NULL;
   struct fstatus *fstatus;
   struct Tokens tok;
 
   initialize_main (&argc, &argv);
-  program_name = argv[0];
+  set_program_name (argv[0]);
   setlocale (LC_ALL, "");
   bindtextdomain (PACKAGE, LOCALEDIR);
   textdomain (PACKAGE);
 
   atexit (close_stdout);
 
+  /* Line buffer stdout to ensure lines are written atomically and immediately
+     so that processes running in parallel do not intersperse their output.  */
+  setvbuf (stdout, NULL, _IOLBF, 0);
+
   print_lines = print_words = print_chars = print_bytes = false;
   print_linelength = false;
   total_lines = total_words = total_chars = total_bytes = max_line_length = 0;
@@ -597,108 +650,184 @@ main (int argc, char **argv)
     switch (optc)
       {
       case 'c':
-	print_bytes = true;
-	break;
+        print_bytes = true;
+        break;
 
       case 'm':
-	print_chars = true;
-	break;
+        print_chars = true;
+        break;
 
       case 'l':
-	print_lines = true;
-	break;
+        print_lines = true;
+        break;
 
       case 'w':
-	print_words = true;
-	break;
+        print_words = true;
+        break;
 
       case 'L':
-	print_linelength = true;
-	break;
+        print_linelength = true;
+        break;
 
       case FILES0_FROM_OPTION:
-	files_from = optarg;
-	break;
+        files_from = optarg;
+        break;
 
       case_GETOPT_HELP_CHAR;
 
       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
 
       default:
-	usage (EXIT_FAILURE);
+        usage (EXIT_FAILURE);
       }
 
-  if (! (print_lines | print_words | print_chars | print_bytes
-	 | print_linelength))
+  if (! (print_lines || print_words || print_chars || print_bytes
+         || print_linelength))
     print_lines = print_words = print_bytes = true;
 
+  bool read_tokens = false;
+  struct argv_iterator *ai;
   if (files_from)
     {
       FILE *stream;
 
       /* When using --files0-from=F, you may not specify any files
-	 on the command-line.  */
+         on the command-line.  */
       if (optind < argc)
-	{
-	  error (0, 0, _("extra operand %s"), quote (argv[optind]));
-	  fprintf (stderr, "%s\n",
-		   _("File operands cannot be combined with --files0-from."));
-	  usage (EXIT_FAILURE);
-	}
+        {
+          error (0, 0, _("extra operand %s"), quoteaf (argv[optind]));
+          fprintf (stderr, "%s\n",
+                   _("file operands cannot be combined with --files0-from"));
+          usage (EXIT_FAILURE);
+        }
 
       if (STREQ (files_from, "-"))
-	stream = stdin;
+        stream = stdin;
       else
-	{
-	  stream = fopen (files_from, "r");
-	  if (stream == NULL)
-	    error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
-		   quote (files_from));
-	}
-
-      readtokens0_init (&tok);
-
-      if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
-	error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
-	       quote (files_from));
-
-      files = tok.tok;
-      nfiles = tok.n_tok;
+        {
+          stream = fopen (files_from, "r");
+          if (stream == NULL)
+            error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
+                   quoteaf (files_from));
+        }
+
+      /* Read the file list into RAM if we can detect its size and that
+         size is reasonable.  Otherwise, we'll read a name at a time.  */
+      struct stat st;
+      if (fstat (fileno (stream), &st) == 0
+          && S_ISREG (st.st_mode)
+          && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2))
+        {
+          read_tokens = true;
+          readtokens0_init (&tok);
+          if (! readtokens0 (stream, &tok) || fclose (stream) != 0)
+            error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
+                   quoteaf (files_from));
+          files = tok.tok;
+          nfiles = tok.n_tok;
+          ai = argv_iter_init_argv (files);
+        }
+      else
+        {
+          files = NULL;
+          nfiles = 0;
+          ai = argv_iter_init_stream (stream);
+        }
     }
   else
     {
-      static char *stdin_only[2];
+      static char *stdin_only[] = { NULL };
       files = (optind < argc ? argv + optind : stdin_only);
       nfiles = (optind < argc ? argc - optind : 1);
-      stdin_only[0] = NULL;
+      ai = argv_iter_init_argv (files);
     }
 
+  if (!ai)
+    xalloc_die ();
+
   fstatus = get_input_fstatus (nfiles, files);
   number_width = compute_number_width (nfiles, fstatus);
 
+  int i;
   ok = true;
-  for (i = 0; i < nfiles; i++)
+  for (i = 0; /* */; i++)
     {
-      if (files_from && STREQ (files_from, "-") && STREQ (files[i], "-"))
-	{
-	  ok = false;
-	  error (0, 0,
-		 _("when reading file names from stdin, "
-		   "no file name of %s allowed"),
-		 quote ("-"));
-	  continue;
-	}
-      ok &= wc_file (files[i], &fstatus[i]);
+      bool skip_file = false;
+      enum argv_iter_err ai_err;
+      char *file_name = argv_iter (ai, &ai_err);
+      if (!file_name)
+        {
+          switch (ai_err)
+            {
+            case AI_ERR_EOF:
+              goto argv_iter_done;
+            case AI_ERR_READ:
+              error (0, errno, _("%s: read error"),
+                     quotef (files_from));
+              ok = false;
+              goto argv_iter_done;
+            case AI_ERR_MEM:
+              xalloc_die ();
+            default:
+              assert (!"unexpected error code from argv_iter");
+            }
+        }
+      if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-"))
+        {
+          /* Give a better diagnostic in an unusual case:
+             printf - | wc --files0-from=- */
+          error (0, 0, _("when reading file names from stdin, "
+                         "no file name of %s allowed"),
+                 quoteaf (file_name));
+          skip_file = true;
+        }
+
+      if (!file_name[0])
+        {
+          /* Diagnose a zero-length file name.  When it's one
+             among many, knowing the record number may help.
+             FIXME: currently print the record number only with
+             --files0-from=FILE.  Maybe do it for argv, too?  */
+          if (files_from == NULL)
+            error (0, 0, "%s", _("invalid zero-length file name"));
+          else
+            {
+              /* Using the standard 'filename:line-number:' prefix here is
+                 not totally appropriate, since NUL is the separator, not NL,
+                 but it might be better than nothing.  */
+              unsigned long int file_number = argv_iter_n_args (ai);
+              error (0, 0, "%s:%lu: %s", quotef (files_from),
+                     file_number, _("invalid zero-length file name"));
+            }
+          skip_file = true;
+        }
+
+      if (skip_file)
+        ok = false;
+      else
+        ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]);
     }
+ argv_iter_done:
+
+  /* No arguments on the command line is fine.  That means read from stdin.
+     However, no arguments on the --files0-from input stream is an error
+     means don't read anything.  */
+  if (ok && !files_from && argv_iter_n_args (ai) == 0)
+    ok &= wc_file (NULL, &fstatus[0]);
 
-  if (1 < nfiles)
+  if (read_tokens)
+    readtokens0_free (&tok);
+
+  if (1 < argv_iter_n_args (ai))
     write_counts (total_lines, total_words, total_chars, total_bytes,
-		  max_line_length, _("total"));
+                  max_line_length, _("total"));
+
+  argv_iter_free (ai);
 
   free (fstatus);
 
   if (have_read_stdin && close (STDIN_FILENO) != 0)
     error (EXIT_FAILURE, errno, "-");
 
-  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
+  return ok ? EXIT_SUCCESS : EXIT_FAILURE;
 }