coreutils-8.25HEAD coreutils-8.25 master

author: Lorry Tar Creator <lorry-tar-importer@lorry> 2016-01-20 10:55:18 +0000
committer: Lorry Tar Creator <lorry-tar-importer@lorry> 2016-01-20 10:55:18 +0000
commit: 70e9163c9c18e995515598085cb824e554eb7ae7 (patch)
tree: a42dc8b2a6c031354bf31472de888bfc8a060132 /src/tr.c
parent: cbf5993c43f49281173f185863577d86bfac6eae (diff)
download: coreutils-tarball-master.tar.gz
1 files changed, 779 insertions, 728 deletions
diff --git a/src/tr.c b/src/tr.c
index 214eb2b..c6a1540 100644
--- a/src/tr.c
+++ b/src/tr.c
@@ -1,10 +1,10 @@
 /* tr -- a filter to translate characters
-   Copyright (C) 91, 1995-2006 Free Software Foundation, Inc.
+   Copyright (C) 1991-2016 Free Software Foundation, Inc.
 
-   This program is free software; you can redistribute it and/or modify
+   This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
 
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -12,8 +12,7 @@
    GNU General Public License for more details.
 
    You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software Foundation,
-   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
 /* Written by Jim Meyering */
 
@@ -26,14 +25,16 @@
 
 #include "system.h"
 #include "error.h"
+#include "fadvise.h"
 #include "quote.h"
 #include "safe-read.h"
+#include "xfreopen.h"
 #include "xstrtol.h"
 
-/* The official name of this program (e.g., no `g' prefix).  */
+/* The official name of this program (e.g., no 'g' prefix).  */
 #define PROGRAM_NAME "tr"
 
-#define AUTHORS "Jim Meyering"
+#define AUTHORS proper_name ("Jim Meyering")
 
 enum { N_CHARS = UCHAR_MAX + 1 };
 
@@ -100,28 +101,28 @@ enum Range_element_type
    For example, consider the POSIX version of the classic tr command:
        tr -cs 'a-zA-Z_' '[\n*]'
    String1 has 3 constructs, two of which are ranges (a-z and A-Z),
-   and a single normal character, `_'.  String2 has one construct.  */
+   and a single normal character, '_'.  String2 has one construct.  */
 struct List_element
   {
     enum Range_element_type type;
     struct List_element *next;
     union
       {
-	unsigned char normal_char;
-	struct			/* unnamed */
-	  {
-	    unsigned char first_char;
-	    unsigned char last_char;
-	  }
-	range;
-	enum Char_class char_class;
-	unsigned char equiv_code;
-	struct			/* unnamed */
-	  {
-	    unsigned char the_repeated_char;
-	    count repeat_count;
-	  }
-	repeated_char;
+        unsigned char normal_char;
+        struct			/* unnamed */
+          {
+            unsigned char first_char;
+            unsigned char last_char;
+          }
+        range;
+        enum Char_class char_class;
+        unsigned char equiv_code;
+        struct			/* unnamed */
+          {
+            unsigned char the_repeated_char;
+            count repeat_count;
+          }
+        repeated_char;
       }
     u;
   };
@@ -132,9 +133,9 @@ struct List_element
    the corresponding argument string.  The attributes are used mainly
    to verify that the strings are valid in the context of any options
    specified (like -s, -d, or -c).  The main exception is the member
-   `tail', which is first used to construct the list.  After construction,
+   'tail', which is first used to construct the list.  After construction,
    it is used by get_next to save its state when traversing the list.
-   The member `state' serves a similar function.  */
+   The member 'state' serves a similar function.  */
 struct Spec_list
   {
     /* Points to the head of the list of range elements.
@@ -194,9 +195,6 @@ es_match (struct E_string const *es, size_t i, char c)
   return es->s[i] == c && !es->escaped[i];
 }
 
-/* The name by which this program was run.  */
-char *program_name;
-
 /* When true, each sequence in the input of a repeated character
    (call it c) is replaced (in the output) by a single occurrence of c
    for every c in the squeeze set.  */
@@ -249,15 +247,14 @@ static char const *const char_class_name[] =
   "alnum", "alpha", "blank", "cntrl", "digit", "graph",
   "lower", "print", "punct", "space", "upper", "xdigit"
 };
-enum { N_CHAR_CLASSES = sizeof char_class_name / sizeof char_class_name[0] };
 
-/* Array of boolean values.  A character `c' is a member of the
+/* Array of boolean values.  A character 'c' is a member of the
    squeeze set if and only if in_squeeze_set[c] is true.  The squeeze
    set is defined by the last (possibly, the only) string argument
    on the command line when the squeeze option is given.  */
 static bool in_squeeze_set[N_CHARS];
 
-/* Array of boolean values.  A character `c' is a member of the
+/* Array of boolean values.  A character 'c' is a member of the
    delete set if and only if in_delete_set[c] is true.  The delete
    set is defined by the first (or only) string argument on the
    command line when the delete option is given.  */
@@ -278,28 +275,27 @@ static struct option const long_options[] =
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
 };
-
+
 void
 usage (int status)
 {
   if (status != EXIT_SUCCESS)
-    fprintf (stderr, _("Try `%s --help' for more information.\n"),
-	     program_name);
+    emit_try_help ();
   else
     {
       printf (_("\
 Usage: %s [OPTION]... SET1 [SET2]\n\
 "),
-	      program_name);
+              program_name);
       fputs (_("\
 Translate, squeeze, and/or delete characters from standard input,\n\
 writing to standard output.\n\
 \n\
-  -c, -C, --complement    first complement SET1\n\
+  -c, -C, --complement    use the complement of SET1\n\
   -d, --delete            delete characters in SET1, do not translate\n\
-  -s, --squeeze-repeats   replace each input sequence of a repeated character\n\
-                            that is listed in SET1 with a single occurrence\n\
-                            of that character\n\
+  -s, --squeeze-repeats   replace each sequence of a repeated character\n\
+                            that is listed in the last specified SET,\n\
+                            with a single occurrence of that character\n\
   -t, --truncate-set1     first truncate SET1 to length of SET2\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -343,20 +339,13 @@ Interpreted sequences are:\n\
 \n\
 Translation occurs if -d is not given and both SET1 and SET2 appear.\n\
 -t may be used only when translating.  SET2 is extended to length of\n\
-SET1 by repeating its last character as necessary.  \
-"), stdout);
-     fputs (_("\
-Excess characters\n\
+SET1 by repeating its last character as necessary.  Excess characters\n\
 of SET2 are ignored.  Only [:lower:] and [:upper:] are guaranteed to\n\
 expand in ascending order; used in SET2 while translating, they may\n\
-only be used in pairs to specify case conversion.  \
+only be used in pairs to specify case conversion.  -s uses the last\n\
+specified SET, and occurs after translation or deletion.\n\
 "), stdout);
-     fputs (_("\
--s uses SET1 if not\n\
-translating nor deleting; else squeezing uses SET2 and occurs after\n\
-translation or deletion.\n\
-"), stdout);
-      printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+      emit_ancillary_info (PROGRAM_NAME);
     }
   exit (status);
 }
@@ -373,7 +362,7 @@ is_equiv_class_member (unsigned char equiv_class, unsigned char c)
 /* Return true if the character C is a member of the
    character class CHAR_CLASS.  */
 
-static bool
+static bool _GL_ATTRIBUTE_PURE
 is_char_class_member (enum Char_class char_class, unsigned char c)
 {
   int result;
@@ -455,93 +444,94 @@ unquote (char const *s, struct E_string *es)
       int oct_digit;
 
       switch (s[i])
-	{
-	case '\\':
-	  es->escaped[j] = true;
-	  switch (s[i + 1])
-	    {
-	    case '\\':
-	      c = '\\';
-	      break;
-	    case 'a':
-	      c = '\a';
-	      break;
-	    case 'b':
-	      c = '\b';
-	      break;
-	    case 'f':
-	      c = '\f';
-	      break;
-	    case 'n':
-	      c = '\n';
-	      break;
-	    case 'r':
-	      c = '\r';
-	      break;
-	    case 't':
-	      c = '\t';
-	      break;
-	    case 'v':
-	      c = '\v';
-	      break;
-	    case '0':
-	    case '1':
-	    case '2':
-	    case '3':
-	    case '4':
-	    case '5':
-	    case '6':
-	    case '7':
-	      c = s[i + 1] - '0';
-	      oct_digit = s[i + 2] - '0';
-	      if (0 <= oct_digit && oct_digit <= 7)
-		{
-		  c = 8 * c + oct_digit;
-		  ++i;
-		  oct_digit = s[i + 2] - '0';
-		  if (0 <= oct_digit && oct_digit <= 7)
-		    {
-		      if (8 * c + oct_digit < N_CHARS)
-			{
-			  c = 8 * c + oct_digit;
-			  ++i;
-			}
-		      else
-			{
-			  /* A 3-digit octal number larger than \377 won't
-			     fit in 8 bits.  So we stop when adding the
-			     next digit would put us over the limit and
-			     give a warning about the ambiguity.  POSIX
-			     isn't clear on this, and we interpret this
-			     lack of clarity as meaning the resulting behavior
-			     is undefined, which means we're allowed to issue
-			     a warning.  */
-			  error (0, 0, _("warning: the ambiguous octal escape \
-\\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, %c"),
-				 s[i], s[i + 1], s[i + 2],
-				 s[i], s[i + 1], s[i + 2]);
-			}
-		    }
-		}
-	      break;
-	    case '\0':
-	      /* POSIX seems to require that a trailing backslash must
-		 stand for itself.  Weird.  */
-	      es->escaped[j] = false;
-	      i--;
-	      c = '\\';
-	      break;
-	    default:
-	      c = s[i + 1];
-	      break;
-	    }
-	  ++i;
-	  es->s[j++] = c;
-	  break;
-	default:
-	  es->s[j++] = s[i];
-	  break;
-	}
+        {
+        case '\\':
+          es->escaped[j] = true;
+          switch (s[i + 1])
+            {
+            case '\\':
+              c = '\\';
+              break;
+            case 'a':
+              c = '\a';
+              break;
+            case 'b':
+              c = '\b';
+              break;
+            case 'f':
+              c = '\f';
+              break;
+            case 'n':
+              c = '\n';
+              break;
+            case 'r':
+              c = '\r';
+              break;
+            case 't':
+              c = '\t';
+              break;
+            case 'v':
+              c = '\v';
+              break;
+            case '0':
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+              c = s[i + 1] - '0';
+              oct_digit = s[i + 2] - '0';
+              if (0 <= oct_digit && oct_digit <= 7)
+                {
+                  c = 8 * c + oct_digit;
+                  ++i;
+                  oct_digit = s[i + 2] - '0';
+                  if (0 <= oct_digit && oct_digit <= 7)
+                    {
+                      if (8 * c + oct_digit < N_CHARS)
+                        {
+                          c = 8 * c + oct_digit;
+                          ++i;
+                        }
+                      else
+                        {
+                          /* A 3-digit octal number larger than \377 won't
+                             fit in 8 bits.  So we stop when adding the
+                             next digit would put us over the limit and
+                             give a warning about the ambiguity.  POSIX
+                             isn't clear on this, and we interpret this
+                             lack of clarity as meaning the resulting behavior
+                             is undefined, which means we're allowed to issue
+                             a warning.  */
+                          error (0, 0, _("warning: the ambiguous octal escape\
+ \\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, %c"),
+                                 s[i], s[i + 1], s[i + 2],
+                                 s[i], s[i + 1], s[i + 2]);
+                        }
+                    }
+                }
+              break;
+            case '\0':
+              error (0, 0, _("warning: an unescaped backslash "
+                             "at end of string is not portable"));
+              /* POSIX is not clear about this.  */
+              es->escaped[j] = false;
+              i--;
+              c = '\\';
+              break;
+            default:
+              c = s[i + 1];
+              break;
+            }
+          ++i;
+          es->s[j++] = c;
+          break;
+        default:
+          es->s[j++] = s[i];
+          break;
+        }
     }
   es->len = j;
   return true;
@@ -550,14 +540,14 @@ unquote (char const *s, struct E_string *es)
 /* If CLASS_STR is a valid character class string, return its index
    in the global char_class_name array.  Otherwise, return CC_NO_CLASS.  */
 
-static enum Char_class
+static enum Char_class _GL_ATTRIBUTE_PURE
 look_up_char_class (char const *class_str, size_t len)
 {
   enum Char_class i;
 
-  for (i = 0; i < N_CHAR_CLASSES; i++)
-    if (strncmp (class_str, char_class_name[i], len) == 0
-	&& strlen (char_class_name[i]) == len)
+  for (i = 0; i < ARRAY_CARDINALITY (char_class_name); i++)
+    if (STREQ_LEN (class_str, char_class_name[i], len)
+        && strlen (char_class_name[i]) == len)
       return i;
   return CC_NO_CLASS;
 }
@@ -605,42 +595,42 @@ make_printable_str (char const *s, size_t len)
       unsigned char c = s[i];
 
       switch (c)
-	{
-	case '\\':
-	  tmp = "\\";
-	  break;
-	case '\a':
-	  tmp = "\\a";
-	  break;
-	case '\b':
-	  tmp = "\\b";
-	  break;
-	case '\f':
-	  tmp = "\\f";
-	  break;
-	case '\n':
-	  tmp = "\\n";
-	  break;
-	case '\r':
-	  tmp = "\\r";
-	  break;
-	case '\t':
-	  tmp = "\\t";
-	  break;
-	case '\v':
-	  tmp = "\\v";
-	  break;
-	default:
-	  if (isprint (c))
-	    {
-	      buf[0] = c;
-	      buf[1] = '\0';
-	    }
-	  else
-	    sprintf (buf, "\\%03o", c);
-	  tmp = buf;
-	  break;
-	}
+        {
+        case '\\':
+          tmp = "\\";
+          break;
+        case '\a':
+          tmp = "\\a";
+          break;
+        case '\b':
+          tmp = "\\b";
+          break;
+        case '\f':
+          tmp = "\\f";
+          break;
+        case '\n':
+          tmp = "\\n";
+          break;
+        case '\r':
+          tmp = "\\r";
+          break;
+        case '\t':
+          tmp = "\\t";
+          break;
+        case '\v':
+          tmp = "\\v";
+          break;
+        default:
+          if (isprint (c))
+            {
+              buf[0] = c;
+              buf[1] = '\0';
+            }
+          else
+            sprintf (buf, "\\%03o", c);
+          tmp = buf;
+          break;
+        }
       p = stpcpy (p, tmp);
     }
   return printable_buf;
@@ -679,8 +669,8 @@ append_range (struct Spec_list *list, unsigned char first, unsigned char last)
       char *tmp2 = make_printable_char (last);
 
       error (0, 0,
-       _("range-endpoints of `%s-%s' are in reverse collating sequence order"),
-	     tmp1, tmp2);
+       _("range-endpoints of '%s-%s' are in reverse collating sequence order"),
+             tmp1, tmp2);
       free (tmp1);
       free (tmp2);
       return false;
@@ -703,7 +693,7 @@ append_range (struct Spec_list *list, unsigned char first, unsigned char last)
 
 static bool
 append_char_class (struct Spec_list *list,
-		   char const *char_class_str, size_t len)
+                   char const *char_class_str, size_t len)
 {
   enum Char_class char_class;
   struct List_element *new;
@@ -728,7 +718,7 @@ append_char_class (struct Spec_list *list,
 
 static void
 append_repeated_char (struct Spec_list *list, unsigned char the_char,
-		      count repeat_count)
+                      count repeat_count)
 {
   struct List_element *new;
 
@@ -750,7 +740,7 @@ append_repeated_char (struct Spec_list *list, unsigned char the_char,
 
 static bool
 append_equiv_class (struct Spec_list *list,
-		    char const *equiv_class_str, size_t len)
+                    char const *equiv_class_str, size_t len)
 {
   struct List_element *new;
 
@@ -774,16 +764,16 @@ append_equiv_class (struct Spec_list *list,
 
 static bool
 find_closing_delim (const struct E_string *es, size_t start_idx,
-		    char pre_bracket_char, size_t *result_idx)
+                    char pre_bracket_char, size_t *result_idx)
 {
   size_t i;
 
   for (i = start_idx; i < es->len - 1; i++)
     if (es->s[i] == pre_bracket_char && es->s[i + 1] == ']'
-	&& !es->escaped[i] && !es->escaped[i + 1])
+        && !es->escaped[i] && !es->escaped[i + 1])
       {
-	*result_idx = i;
-	return true;
+        *result_idx = i;
+        return true;
       }
   return false;
 }
@@ -792,16 +782,16 @@ find_closing_delim (const struct E_string *es, size_t start_idx,
    beginning with P[ START_IDX ] comprise a valid [c*n] construct,
    then set *CHAR_TO_REPEAT, *REPEAT_COUNT, and *CLOSING_BRACKET_IDX
    and return zero. If the second character following
-   the opening bracket is not `*' or if no closing bracket can be
+   the opening bracket is not '*' or if no closing bracket can be
    found, return -1.  If a closing bracket is found and the
-   second char is `*', but the string between the `*' and `]' isn't
+   second char is '*', but the string between the '*' and ']' isn't
    empty, an octal number, or a decimal number, print an error message
    and return -2.  */
 
 static int
 find_bracketed_repeat (const struct E_string *es, size_t start_idx,
-		       unsigned char *char_to_repeat, count *repeat_count,
-		       size_t *closing_bracket_idx)
+                       unsigned char *char_to_repeat, count *repeat_count,
+                       size_t *closing_bracket_idx)
 {
   size_t i;
 
@@ -812,47 +802,47 @@ find_bracketed_repeat (const struct E_string *es, size_t start_idx,
   for (i = start_idx + 2; i < es->len && !es->escaped[i]; i++)
     {
       if (es->s[i] == ']')
-	{
-	  size_t digit_str_len = i - start_idx - 2;
-
-	  *char_to_repeat = es->s[start_idx];
-	  if (digit_str_len == 0)
-	    {
-	      /* We've matched [c*] -- no explicit repeat count.  */
-	      *repeat_count = 0;
-	    }
-	  else
-	    {
-	      /* Here, we have found [c*s] where s should be a string
-		 of octal (if it starts with `0') or decimal digits.  */
-	      char const *digit_str = &es->s[start_idx + 2];
-	      char *d_end;
-	      if ((xstrtoumax (digit_str, &d_end, *digit_str == '0' ? 8 : 10,
-			       repeat_count, NULL)
-		   != LONGINT_OK)
-		  || REPEAT_COUNT_MAXIMUM < *repeat_count
-		  || digit_str + digit_str_len != d_end)
-		{
-		  char *tmp = make_printable_str (digit_str, digit_str_len);
-		  error (0, 0,
-			 _("invalid repeat count %s in [c*n] construct"),
-			 quote (tmp));
-		  free (tmp);
-		  return -2;
-		}
-	    }
-	  *closing_bracket_idx = i;
-	  return 0;
-	}
+        {
+          size_t digit_str_len = i - start_idx - 2;
+
+          *char_to_repeat = es->s[start_idx];
+          if (digit_str_len == 0)
+            {
+              /* We've matched [c*] -- no explicit repeat count.  */
+              *repeat_count = 0;
+            }
+          else
+            {
+              /* Here, we have found [c*s] where s should be a string
+                 of octal (if it starts with '0') or decimal digits.  */
+              char const *digit_str = &es->s[start_idx + 2];
+              char *d_end;
+              if ((xstrtoumax (digit_str, &d_end, *digit_str == '0' ? 8 : 10,
+                               repeat_count, NULL)
+                   != LONGINT_OK)
+                  || REPEAT_COUNT_MAXIMUM < *repeat_count
+                  || digit_str + digit_str_len != d_end)
+                {
+                  char *tmp = make_printable_str (digit_str, digit_str_len);
+                  error (0, 0,
+                         _("invalid repeat count %s in [c*n] construct"),
+                         quote (tmp));
+                  free (tmp);
+                  return -2;
+                }
+            }
+          *closing_bracket_idx = i;
+          return 0;
+        }
     }
   return -1;			/* No bracket found.  */
 }
 
 /* Return true if the string at ES->s[IDX] matches the regular
-   expression `\*[0-9]*\]', false otherwise.  The string does not
+   expression '\*[0-9]*\]', false otherwise.  The string does not
    match if any of its characters are escaped.  */
 
-static bool
+static bool _GL_ATTRIBUTE_PURE
 star_digits_closebracket (const struct E_string *es, size_t idx)
 {
   size_t i;
@@ -869,13 +859,13 @@ star_digits_closebracket (const struct E_string *es, size_t idx)
 /* Convert string UNESCAPED_STRING (which has been preprocessed to
    convert backslash-escape sequences) of length LEN characters into
    a linked list of the following 5 types of constructs:
-      - [:str:] Character class where `str' is one of the 12 valid strings.
-      - [=c=] Equivalence class where `c' is any single character.
-      - [c*n] Repeat the single character `c' `n' times. n may be omitted.
-	  However, if `n' is present, it must be a non-negative octal or
-	  decimal integer.
-      - r-s Range of characters from `r' to `s'.  The second endpoint must
-	  not precede the first in the current collating sequence.
+      - [:str:] Character class where 'str' is one of the 12 valid strings.
+      - [=c=] Equivalence class where 'c' is any single character.
+      - [c*n] Repeat the single character 'c' 'n' times. n may be omitted.
+          However, if 'n' is present, it must be a non-negative octal or
+          decimal integer.
+      - r-s Range of characters from 'r' to 's'.  The second endpoint must
+          not precede the first in the current collating sequence.
       - c Any other character is interpreted as itself.  */
 
 static bool
@@ -888,7 +878,7 @@ build_spec_list (const struct E_string *es, struct Spec_list *result)
 
   /* The main for-loop below recognizes the 4 multi-character constructs.
      A character that matches (in its context) none of the multi-character
-     constructs is classified as `normal'.  Since all multi-character
+     constructs is classified as 'normal'.  Since all multi-character
      constructs have at least 3 characters, any strings of length 2 or
      less are composed solely of normal characters.  Hence, the index of
      the outer for-loop runs only as far as LEN-2.  */
@@ -896,120 +886,120 @@ build_spec_list (const struct E_string *es, struct Spec_list *result)
   for (i = 0; i + 2 < es->len; /* empty */)
     {
       if (es_match (es, i, '['))
-	{
-	  bool matched_multi_char_construct;
-	  size_t closing_bracket_idx;
-	  unsigned char char_to_repeat;
-	  count repeat_count;
-	  int err;
-
-	  matched_multi_char_construct = true;
-	  if (es_match (es, i + 1, ':') || es_match (es, i + 1, '='))
-	    {
-	      size_t closing_delim_idx;
-
-	      if (find_closing_delim (es, i + 2, p[i + 1], &closing_delim_idx))
-		{
-		  size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1;
-		  char const *opnd_str = p + i + 2;
-
-		  if (opnd_str_len == 0)
-		    {
-		      if (p[i + 1] == ':')
-			error (0, 0, _("missing character class name `[::]'"));
-		      else
-			error (0, 0,
-			       _("missing equivalence class character `[==]'"));
-		      return false;
-		    }
-
-		  if (p[i + 1] == ':')
-		    {
-		      /* FIXME: big comment.  */
-		      if (!append_char_class (result, opnd_str, opnd_str_len))
-			{
-			  if (star_digits_closebracket (es, i + 2))
-			    goto try_bracketed_repeat;
-			  else
-			    {
-			      char *tmp = make_printable_str (opnd_str,
-							      opnd_str_len);
-			      error (0, 0, _("invalid character class %s"),
-				     quote (tmp));
-			      free (tmp);
-			      return false;
-			    }
-			}
-		    }
-		  else
-		    {
-		      /* FIXME: big comment.  */
-		      if (!append_equiv_class (result, opnd_str, opnd_str_len))
-			{
-			  if (star_digits_closebracket (es, i + 2))
-			    goto try_bracketed_repeat;
-			  else
-			    {
-			      char *tmp = make_printable_str (opnd_str,
-							      opnd_str_len);
-			      error (0, 0,
-	       _("%s: equivalence class operand must be a single character"),
-				     tmp);
-			      free (tmp);
-			      return false;
-			    }
-			}
-		    }
-
-		  i = closing_delim_idx + 2;
-		  continue;
-		}
-	      /* Else fall through.  This could be [:*] or [=*].  */
-	    }
-
-	try_bracketed_repeat:
-
-	  /* Determine whether this is a bracketed repeat range
-	     matching the RE \[.\*(dec_or_oct_number)?\].  */
-	  err = find_bracketed_repeat (es, i + 1, &char_to_repeat,
-				       &repeat_count,
-				       &closing_bracket_idx);
-	  if (err == 0)
-	    {
-	      append_repeated_char (result, char_to_repeat, repeat_count);
-	      i = closing_bracket_idx + 1;
-	    }
-	  else if (err == -1)
-	    {
-	      matched_multi_char_construct = false;
-	    }
-	  else
-	    {
-	      /* Found a string that looked like [c*n] but the
-		 numeric part was invalid.  */
-	      return false;
-	    }
-
-	  if (matched_multi_char_construct)
-	    continue;
-
-	  /* We reach this point if P does not match [:str:], [=c=],
-	     [c*n], or [c*].  Now, see if P looks like a range `[-c'
-	     (from `[' to `c').  */
-	}
+        {
+          bool matched_multi_char_construct;
+          size_t closing_bracket_idx;
+          unsigned char char_to_repeat;
+          count repeat_count;
+          int err;
+
+          matched_multi_char_construct = true;
+          if (es_match (es, i + 1, ':') || es_match (es, i + 1, '='))
+            {
+              size_t closing_delim_idx;
+
+              if (find_closing_delim (es, i + 2, p[i + 1], &closing_delim_idx))
+                {
+                  size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1;
+                  char const *opnd_str = p + i + 2;
+
+                  if (opnd_str_len == 0)
+                    {
+                      if (p[i + 1] == ':')
+                        error (0, 0, _("missing character class name '[::]'"));
+                      else
+                        error (0, 0,
+                               _("missing equivalence class character '[==]'"));
+                      return false;
+                    }
+
+                  if (p[i + 1] == ':')
+                    {
+                      /* FIXME: big comment.  */
+                      if (!append_char_class (result, opnd_str, opnd_str_len))
+                        {
+                          if (star_digits_closebracket (es, i + 2))
+                            goto try_bracketed_repeat;
+                          else
+                            {
+                              char *tmp = make_printable_str (opnd_str,
+                                                              opnd_str_len);
+                              error (0, 0, _("invalid character class %s"),
+                                     quote (tmp));
+                              free (tmp);
+                              return false;
+                            }
+                        }
+                    }
+                  else
+                    {
+                      /* FIXME: big comment.  */
+                      if (!append_equiv_class (result, opnd_str, opnd_str_len))
+                        {
+                          if (star_digits_closebracket (es, i + 2))
+                            goto try_bracketed_repeat;
+                          else
+                            {
+                              char *tmp = make_printable_str (opnd_str,
+                                                              opnd_str_len);
+                              error (0, 0,
+               _("%s: equivalence class operand must be a single character"),
+                                     tmp);
+                              free (tmp);
+                              return false;
+                            }
+                        }
+                    }
+
+                  i = closing_delim_idx + 2;
+                  continue;
+                }
+              /* Else fall through.  This could be [:*] or [=*].  */
+            }
+
+        try_bracketed_repeat:
+
+          /* Determine whether this is a bracketed repeat range
+             matching the RE \[.\*(dec_or_oct_number)?\].  */
+          err = find_bracketed_repeat (es, i + 1, &char_to_repeat,
+                                       &repeat_count,
+                                       &closing_bracket_idx);
+          if (err == 0)
+            {
+              append_repeated_char (result, char_to_repeat, repeat_count);
+              i = closing_bracket_idx + 1;
+            }
+          else if (err == -1)
+            {
+              matched_multi_char_construct = false;
+            }
+          else
+            {
+              /* Found a string that looked like [c*n] but the
+                 numeric part was invalid.  */
+              return false;
+            }
+
+          if (matched_multi_char_construct)
+            continue;
+
+          /* We reach this point if P does not match [:str:], [=c=],
+             [c*n], or [c*].  Now, see if P looks like a range '[-c'
+             (from '[' to 'c').  */
+        }
 
       /* Look ahead one char for ranges like a-z.  */
       if (es_match (es, i + 1, '-'))
-	{
-	  if (!append_range (result, p[i], p[i + 2]))
-	    return false;
-	  i += 3;
-	}
+        {
+          if (!append_range (result, p[i], p[i + 2]))
+            return false;
+          i += 3;
+        }
       else
-	{
-	  append_normal_char (result, p[i]);
-	  ++i;
-	}
+        {
+          append_normal_char (result, p[i]);
+          ++i;
+        }
     }
 
   /* Now handle the (2 or fewer) remaining characters p[i]..p[es->len - 1].  */
@@ -1019,8 +1009,17 @@ build_spec_list (const struct E_string *es, struct Spec_list *result)
   return true;
 }
 
+/* Advance past the current construct.
+   S->tail must be non-NULL.  */
+static void
+skip_construct (struct Spec_list *s)
+{
+  s->tail = s->tail->next;
+  s->state = NEW_ELEMENT;
+}
+
 /* Given a Spec_list S (with its saved state implicit in the values
-   of its members `tail' and `state'), return the next single character
+   of its members 'tail' and 'state'), return the next single character
    in the expansion of S's constructs.  If the last character of S was
    returned on the previous call or if S was empty, this function
    returns -1.  For example, successive calls to get_next where S
@@ -1063,65 +1062,53 @@ get_next (struct Spec_list *s, enum Upper_Lower_class *class)
 
     case RE_RANGE:
       if (s->state == NEW_ELEMENT)
-	s->state = p->u.range.first_char;
+        s->state = p->u.range.first_char;
       else
-	++(s->state);
+        ++(s->state);
       return_val = s->state;
       if (s->state == p->u.range.last_char)
-	{
-	  s->tail = p->next;
-	  s->state = NEW_ELEMENT;
-	}
+        {
+          s->tail = p->next;
+          s->state = NEW_ELEMENT;
+        }
       break;
 
     case RE_CHAR_CLASS:
       if (class)
-	{
-	  bool upper_or_lower;
-	  switch (p->u.char_class)
-	    {
-	    case CC_LOWER:
-	      *class = UL_LOWER;
-	      upper_or_lower = true;
-	      break;
-	    case CC_UPPER:
-	      *class = UL_UPPER;
-	      upper_or_lower = true;
-	      break;
-	    default:
-	      upper_or_lower = false;
-	      break;
-	    }
-
-	  if (upper_or_lower)
-	    {
-	      s->tail = p->next;
-	      s->state = NEW_ELEMENT;
-	      return_val = 0;
-	      break;
-	    }
-	}
+        {
+          switch (p->u.char_class)
+            {
+            case CC_LOWER:
+              *class = UL_LOWER;
+              break;
+            case CC_UPPER:
+              *class = UL_UPPER;
+              break;
+            default:
+              break;
+            }
+        }
 
       if (s->state == NEW_ELEMENT)
-	{
-	  for (i = 0; i < N_CHARS; i++)
-	    if (is_char_class_member (p->u.char_class, i))
-	      break;
-	  assert (i < N_CHARS);
-	  s->state = i;
-	}
+        {
+          for (i = 0; i < N_CHARS; i++)
+            if (is_char_class_member (p->u.char_class, i))
+              break;
+          assert (i < N_CHARS);
+          s->state = i;
+        }
       assert (is_char_class_member (p->u.char_class, s->state));
       return_val = s->state;
       for (i = s->state + 1; i < N_CHARS; i++)
-	if (is_char_class_member (p->u.char_class, i))
-	  break;
+        if (is_char_class_member (p->u.char_class, i))
+          break;
       if (i < N_CHARS)
-	s->state = i;
+        s->state = i;
       else
-	{
-	  s->tail = p->next;
-	  s->state = NEW_ELEMENT;
-	}
+        {
+          s->tail = p->next;
+          s->state = NEW_ELEMENT;
+        }
       break;
 
     case RE_EQUIV_CLASS:
@@ -1138,25 +1125,25 @@ get_next (struct Spec_list *s, enum Upper_Lower_class *class)
     case RE_REPEATED_CHAR:
       /* Here, a repeat count of n == 0 means don't repeat at all.  */
       if (p->u.repeated_char.repeat_count == 0)
-	{
-	  s->tail = p->next;
-	  s->state = NEW_ELEMENT;
-	  return_val = get_next (s, class);
-	}
+        {
+          s->tail = p->next;
+          s->state = NEW_ELEMENT;
+          return_val = get_next (s, class);
+        }
       else
-	{
-	  if (s->state == NEW_ELEMENT)
-	    {
-	      s->state = 0;
-	    }
-	  ++(s->state);
-	  return_val = p->u.repeated_char.the_repeated_char;
-	  if (s->state == p->u.repeated_char.repeat_count)
-	    {
-	      s->tail = p->next;
-	      s->state = NEW_ELEMENT;
-	    }
-	}
+        {
+          if (s->state == NEW_ELEMENT)
+            {
+              s->state = 0;
+            }
+          ++(s->state);
+          return_val = p->u.repeated_char.the_repeated_char;
+          if (s->state == p->u.repeated_char.repeat_count)
+            {
+              s->tail = p->next;
+              s->state = NEW_ELEMENT;
+            }
+        }
       break;
 
     default:
@@ -1188,6 +1175,78 @@ card_of_complement (struct Spec_list *s)
   return cardinality;
 }
 
+/* Discard the lengths associated with a case conversion,
+   as using the actual number of upper or lower case characters
+   is problematic when they don't match in some locales.
+   Also ensure the case conversion classes in string2 are
+   aligned correctly with those in string1.
+   Note POSIX says the behavior of 'tr "[:upper:]" "[:upper:]"'
+   is undefined.  Therefore we allow it (unlike Solaris)
+   and treat it as a no-op.  */
+
+static void
+validate_case_classes (struct Spec_list *s1, struct Spec_list *s2)
+{
+  size_t n_upper = 0;
+  size_t n_lower = 0;
+  unsigned int i;
+  int c1 = 0;
+  int c2 = 0;
+  count old_s1_len = s1->length;
+  count old_s2_len = s2->length;
+  struct List_element *s1_tail = s1->tail;
+  struct List_element *s2_tail = s2->tail;
+  bool s1_new_element = true;
+  bool s2_new_element = true;
+
+  if (!s2->has_char_class)
+    return;
+
+  for (i = 0; i < N_CHARS; i++)
+    {
+      if (isupper (i))
+        n_upper++;
+      if (islower (i))
+        n_lower++;
+    }
+
+  s1->state = BEGIN_STATE;
+  s2->state = BEGIN_STATE;
+
+  while (c1 != -1 && c2 != -1)
+    {
+      enum Upper_Lower_class class_s1, class_s2;
+
+      c1 = get_next (s1, &class_s1);
+      c2 = get_next (s2, &class_s2);
+
+      /* If c2 transitions to a new case class, then
+         c1 must also transition at the same time.  */
+      if (s2_new_element && class_s2 != UL_NONE
+          && !(s1_new_element && class_s1 != UL_NONE))
+        error (EXIT_FAILURE, 0,
+               _("misaligned [:upper:] and/or [:lower:] construct"));
+
+      /* If case converting, quickly skip over the elements.  */
+      if (class_s2 != UL_NONE)
+        {
+          skip_construct (s1);
+          skip_construct (s2);
+          /* Discount insignificant/problematic lengths.  */
+          s1->length -= (class_s1 == UL_UPPER ? n_upper : n_lower) - 1;
+          s2->length -= (class_s2 == UL_UPPER ? n_upper : n_lower) - 1;
+        }
+
+      s1_new_element = s1->state == NEW_ELEMENT; /* Next element is new.  */
+      s2_new_element = s2->state == NEW_ELEMENT; /* Next element is new.  */
+    }
+
+  assert (old_s1_len >= s1->length && old_s2_len >= s2->length);
+
+  s1->tail = s1_tail;
+  s2->tail = s2_tail;
+}
+
 /* Gather statistics about the spec-list S in preparation for the tests
    in validate that determine the consistency of the specs.  This function
    is called at most twice; once for string1, and again for any string2.
@@ -1218,61 +1277,61 @@ get_spec_stats (struct Spec_list *s)
       count new_length;
 
       switch (p->type)
-	{
-	case RE_NORMAL_CHAR:
-	  len = 1;
-	  break;
-
-	case RE_RANGE:
-	  assert (p->u.range.last_char >= p->u.range.first_char);
-	  len = p->u.range.last_char - p->u.range.first_char + 1;
-	  break;
-
-	case RE_CHAR_CLASS:
-	  s->has_char_class = true;
-	  for (i = 0; i < N_CHARS; i++)
-	    if (is_char_class_member (p->u.char_class, i))
-	      ++len;
-	  switch (p->u.char_class)
-	    {
-	    case CC_UPPER:
-	    case CC_LOWER:
-	      break;
-	    default:
-	      s->has_restricted_char_class = true;
-	      break;
-	    }
-	  break;
-
-	case RE_EQUIV_CLASS:
-	  for (i = 0; i < N_CHARS; i++)
-	    if (is_equiv_class_member (p->u.equiv_code, i))
-	      ++len;
-	  s->has_equiv_class = true;
-	  break;
-
-	case RE_REPEATED_CHAR:
-	  if (p->u.repeated_char.repeat_count > 0)
-	    len = p->u.repeated_char.repeat_count;
-	  else
-	    {
-	      s->indefinite_repeat_element = p;
-	      ++(s->n_indefinite_repeats);
-	    }
-	  break;
-
-	default:
-	  abort ();
-	  break;
-	}
+        {
+        case RE_NORMAL_CHAR:
+          len = 1;
+          break;
+
+        case RE_RANGE:
+          assert (p->u.range.last_char >= p->u.range.first_char);
+          len = p->u.range.last_char - p->u.range.first_char + 1;
+          break;
+
+        case RE_CHAR_CLASS:
+          s->has_char_class = true;
+          for (i = 0; i < N_CHARS; i++)
+            if (is_char_class_member (p->u.char_class, i))
+              ++len;
+          switch (p->u.char_class)
+            {
+            case CC_UPPER:
+            case CC_LOWER:
+              break;
+            default:
+              s->has_restricted_char_class = true;
+              break;
+            }
+          break;
+
+        case RE_EQUIV_CLASS:
+          for (i = 0; i < N_CHARS; i++)
+            if (is_equiv_class_member (p->u.equiv_code, i))
+              ++len;
+          s->has_equiv_class = true;
+          break;
+
+        case RE_REPEATED_CHAR:
+          if (p->u.repeated_char.repeat_count > 0)
+            len = p->u.repeated_char.repeat_count;
+          else
+            {
+              s->indefinite_repeat_element = p;
+              ++(s->n_indefinite_repeats);
+            }
+          break;
+
+        default:
+          abort ();
+          break;
+        }
 
       /* Check for arithmetic overflow in computing length.  Also, reject
-	 any length greater than the maximum repeat count, in case the
-	 length is later used to compute the repeat count for an
-	 indefinite element.  */
+         any length greater than the maximum repeat count, in case the
+         length is later used to compute the repeat count for an
+         indefinite element.  */
       new_length = length + len;
       if (! (length <= new_length && new_length <= REPEAT_COUNT_MAXIMUM))
-	error (EXIT_FAILURE, 0, _("too many characters in set"));
+        error (EXIT_FAILURE, 0, _("too many characters in set"));
       length = new_length;
     }
 
@@ -1294,7 +1353,7 @@ get_s2_spec_stats (struct Spec_list *s2, count len_s1)
   if (len_s1 >= s2->length && s2->n_indefinite_repeats == 1)
     {
       s2->indefinite_repeat_element->u.repeated_char.repeat_count =
-	len_s1 - s2->length;
+        len_s1 - s2->length;
       s2->length = len_s1;
     }
 }
@@ -1329,20 +1388,14 @@ parse_str (char const *s, struct Spec_list *spec_list)
    Upon successful completion, S2->length is set to S1->length.  The only
    way this function can fail to make S2 as long as S1 is when S2 has
    zero-length, since in that case, there is no last character to repeat.
-   So S2->length is required to be at least 1.
+   So S2->length is required to be at least 1.  */
 
-   Providing this functionality allows the user to do some pretty
-   non-BSD (and non-portable) things:  For example, the command
-       tr -cs '[:upper:]0-9' '[:lower:]'
-   is almost guaranteed to give results that depend on your collating
-   sequence.  */
 
 static void
 string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
 {
   struct List_element *p;
   unsigned char char_to_repeat;
-  int i;
 
   assert (translating);
   assert (s1->length > s2->length);
@@ -1358,11 +1411,14 @@ string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
       char_to_repeat = p->u.range.last_char;
       break;
     case RE_CHAR_CLASS:
-      for (i = N_CHARS - 1; i >= 0; i--)
-	if (is_char_class_member (p->u.char_class, i))
-	  break;
-      assert (i >= 0);
-      char_to_repeat = i;
+      /* Note BSD allows extending of classes in string2.  For example:
+           tr '[:upper:]0-9' '[:lower:]'
+         That's not portable however, contradicts POSIX and is dependent
+         on your collating sequence.  */
+      error (EXIT_FAILURE, 0,
+             _("when translating with string1 longer than string2,\nthe\
+ latter string must not end with a character class"));
+      abort (); /* inform gcc that the above use of error never returns. */
       break;
 
     case RE_REPEATED_CHAR:
@@ -1420,7 +1476,7 @@ validate (struct Spec_list *s1, struct Spec_list *s2)
   if (s1->n_indefinite_repeats > 0)
     {
       error (EXIT_FAILURE, 0,
-	     _("the [c*] repeat construct may not appear in string1"));
+             _("the [c*] repeat construct may not appear in string1"));
     }
 
   if (s2)
@@ -1428,57 +1484,59 @@ validate (struct Spec_list *s1, struct Spec_list *s2)
       get_s2_spec_stats (s2, s1->length);
 
       if (s2->n_indefinite_repeats > 1)
-	{
-	  error (EXIT_FAILURE, 0,
-		 _("only one [c*] repeat construct may appear in string2"));
-	}
+        {
+          error (EXIT_FAILURE, 0,
+                 _("only one [c*] repeat construct may appear in string2"));
+        }
 
       if (translating)
-	{
-	  if (s2->has_equiv_class)
-	    {
-	      error (EXIT_FAILURE, 0,
-		     _("[=c=] expressions may not appear in string2 \
-when translating"));
-	    }
-
-	  if (s1->length > s2->length)
-	    {
-	      if (!truncate_set1)
-		{
-		  /* string2 must be non-empty unless --truncate-set1 is
-		     given or string1 is empty.  */
-
-		  if (s2->length == 0)
-		    error (EXIT_FAILURE, 0,
-		     _("when not truncating set1, string2 must be non-empty"));
-		  string2_extend (s1, s2);
-		}
-	    }
-
-	  if (complement && s1->has_char_class
-	      && ! (s2->length == s1->length && homogeneous_spec_list (s2)))
-	    {
-	      error (EXIT_FAILURE, 0,
-		     _("when translating with complemented character classes,\
+        {
+          if (s2->has_equiv_class)
+            {
+              error (EXIT_FAILURE, 0,
+                     _("[=c=] expressions may not appear in string2\
+ when translating"));
+            }
+
+          if (s2->has_restricted_char_class)
+            {
+              error (EXIT_FAILURE, 0,
+                     _("when translating, the only character classes that may\
+ appear in\nstring2 are 'upper' and 'lower'"));
+            }
+
+          validate_case_classes (s1, s2);
+
+          if (s1->length > s2->length)
+            {
+              if (!truncate_set1)
+                {
+                  /* string2 must be non-empty unless --truncate-set1 is
+                     given or string1 is empty.  */
+
+                  if (s2->length == 0)
+                    error (EXIT_FAILURE, 0,
+                     _("when not truncating set1, string2 must be non-empty"));
+                  string2_extend (s1, s2);
+                }
+            }
+
+          if (complement && s1->has_char_class
+              && ! (s2->length == s1->length && homogeneous_spec_list (s2)))
+            {
+              error (EXIT_FAILURE, 0,
+                     _("when translating with complemented character classes,\
 \nstring2 must map all characters in the domain to one"));
-	    }
-
-	  if (s2->has_restricted_char_class)
-	    {
-	      error (EXIT_FAILURE, 0,
-		     _("when translating, the only character classes that may \
-appear in\nstring2 are `upper' and `lower'"));
-	    }
-	}
+            }
+        }
       else
-	/* Not translating.  */
-	{
-	  if (s2->n_indefinite_repeats > 0)
-	    error (EXIT_FAILURE, 0,
-		   _("the [c*] construct may appear in string2 only \
-when translating"));
-	}
+        /* Not translating.  */
+        {
+          if (s2->n_indefinite_repeats > 0)
+            error (EXIT_FAILURE, 0,
+                   _("the [c*] construct may appear in string2 only\
+ when translating"));
+        }
     }
 }
 
@@ -1495,85 +1553,85 @@ squeeze_filter (char *buf, size_t size, size_t (*reader) (char *, size_t))
 {
   /* A value distinct from any character that may have been stored in a
      buffer as the result of a block-read in the function squeeze_filter.  */
-  enum { NOT_A_CHAR = CHAR_MAX + 1 };
+  const int NOT_A_CHAR = INT_MAX;
 
   int char_to_squeeze = NOT_A_CHAR;
   size_t i = 0;
   size_t nr = 0;
 
-  for (;;)
+  while (true)
     {
       size_t begin;
 
       if (i >= nr)
-	{
-	  nr = reader (buf, size);
-	  if (nr == 0)
-	    break;
-	  i = 0;
-	}
+        {
+          nr = reader (buf, size);
+          if (nr == 0)
+            break;
+          i = 0;
+        }
 
       begin = i;
 
       if (char_to_squeeze == NOT_A_CHAR)
-	{
-	  size_t out_len;
-	  /* Here, by being a little tricky, we can get a significant
-	     performance increase in most cases when the input is
-	     reasonably large.  Since tr will modify the input only
-	     if two consecutive (and identical) input characters are
-	     in the squeeze set, we can step by two through the data
-	     when searching for a character in the squeeze set.  This
-	     means there may be a little more work in a few cases and
-	     perhaps twice as much work in the worst cases where most
-	     of the input is removed by squeezing repeats.  But most
-	     uses of this functionality seem to remove less than 20-30%
-	     of the input.  */
-	  for (; i < nr && !in_squeeze_set[to_uchar (buf[i])]; i += 2)
-	    continue;
-
-	  /* There is a special case when i == nr and we've just
-	     skipped a character (the last one in buf) that is in
-	     the squeeze set.  */
-	  if (i == nr && in_squeeze_set[to_uchar (buf[i - 1])])
-	    --i;
-
-	  if (i >= nr)
-	    out_len = nr - begin;
-	  else
-	    {
-	      char_to_squeeze = buf[i];
-	      /* We're about to output buf[begin..i].  */
-	      out_len = i - begin + 1;
-
-	      /* But since we stepped by 2 in the loop above,
-	         out_len may be one too large.  */
-	      if (i > 0 && buf[i - 1] == char_to_squeeze)
-		--out_len;
-
-	      /* Advance i to the index of first character to be
-	         considered when looking for a char different from
-	         char_to_squeeze.  */
-	      ++i;
-	    }
-	  if (out_len > 0
-	      && fwrite (&buf[begin], 1, out_len, stdout) != out_len)
-	    error (EXIT_FAILURE, errno, _("write error"));
-	}
+        {
+          size_t out_len;
+          /* Here, by being a little tricky, we can get a significant
+             performance increase in most cases when the input is
+             reasonably large.  Since tr will modify the input only
+             if two consecutive (and identical) input characters are
+             in the squeeze set, we can step by two through the data
+             when searching for a character in the squeeze set.  This
+             means there may be a little more work in a few cases and
+             perhaps twice as much work in the worst cases where most
+             of the input is removed by squeezing repeats.  But most
+             uses of this functionality seem to remove less than 20-30%
+             of the input.  */
+          for (; i < nr && !in_squeeze_set[to_uchar (buf[i])]; i += 2)
+            continue;
+
+          /* There is a special case when i == nr and we've just
+             skipped a character (the last one in buf) that is in
+             the squeeze set.  */
+          if (i == nr && in_squeeze_set[to_uchar (buf[i - 1])])
+            --i;
+
+          if (i >= nr)
+            out_len = nr - begin;
+          else
+            {
+              char_to_squeeze = buf[i];
+              /* We're about to output buf[begin..i].  */
+              out_len = i - begin + 1;
+
+              /* But since we stepped by 2 in the loop above,
+                 out_len may be one too large.  */
+              if (i > 0 && buf[i - 1] == char_to_squeeze)
+                --out_len;
+
+              /* Advance i to the index of first character to be
+                 considered when looking for a char different from
+                 char_to_squeeze.  */
+              ++i;
+            }
+          if (out_len > 0
+              && fwrite (&buf[begin], 1, out_len, stdout) != out_len)
+            error (EXIT_FAILURE, errno, _("write error"));
+        }
 
       if (char_to_squeeze != NOT_A_CHAR)
-	{
-	  /* Advance i to index of first char != char_to_squeeze
-	     (or to nr if all the rest of the characters in this
-	     buffer are the same as char_to_squeeze).  */
-	  for (; i < nr && buf[i] == char_to_squeeze; i++)
-	    continue;
-	  if (i < nr)
-	    char_to_squeeze = NOT_A_CHAR;
-	  /* If (i >= nr) we've squeezed the last character in this buffer.
-	     So now we have to read a new buffer and continue comparing
-	     characters against char_to_squeeze.  */
-	}
+        {
+          /* Advance i to index of first char != char_to_squeeze
+             (or to nr if all the rest of the characters in this
+             buffer are the same as char_to_squeeze).  */
+          for (; i < nr && buf[i] == char_to_squeeze; i++)
+            continue;
+          if (i < nr)
+            char_to_squeeze = NOT_A_CHAR;
+          /* If (i >= nr) we've squeezed the last character in this buffer.
+             So now we have to read a new buffer and continue comparing
+             characters against char_to_squeeze.  */
+        }
     }
 }
 
@@ -1606,7 +1664,7 @@ read_and_delete (char *buf, size_t size)
       size_t nr = plain_read (buf, size);
 
       if (nr == 0)
-	return 0;
+        return 0;
 
       /* This first loop may be a waste of code, but gives much
          better performance when no characters are deleted in
@@ -1614,12 +1672,12 @@ read_and_delete (char *buf, size_t size)
          of buf[i] into buf[n_saved] when it would be a NOP.  */
 
       for (i = 0; i < nr && !in_delete_set[to_uchar (buf[i])]; i++)
-	continue;
+        continue;
       n_saved = i;
 
       for (++i; i < nr; i++)
-	if (!in_delete_set[to_uchar (buf[i])])
-	  buf[n_saved++] = buf[i];
+        if (!in_delete_set[to_uchar (buf[i])])
+          buf[n_saved++] = buf[i];
     }
   while (n_saved == 0);
 
@@ -1628,7 +1686,7 @@ read_and_delete (char *buf, size_t size)
 
 /* Read at most SIZE bytes from stdin into the array BUF.  Then
    perform the in-place and one-to-one mapping specified by the global
-   array `xlate'.  Return the number of characters read, or 0 upon EOF.  */
+   array 'xlate'.  Return the number of characters read, or 0 upon EOF.  */
 
 static size_t
 read_and_xlate (char *buf, size_t size)
@@ -1644,7 +1702,7 @@ read_and_xlate (char *buf, size_t size)
 
 /* Initialize a boolean membership set, IN_SET, with the character
    values obtained by traversing the linked list of constructs S
-   using the function `get_next'.  IN_SET is expected to have been
+   using the function 'get_next'.  IN_SET is expected to have been
    initialized to all zeros by the caller.  If COMPLEMENT_THIS_SET
    is true the resulting set is complemented.  */
 
@@ -1674,7 +1732,7 @@ main (int argc, char **argv)
   struct Spec_list *s2 = &buf2;
 
   initialize_main (&argc, &argv);
-  program_name = argv[0];
+  set_program_name (argv[0]);
   setlocale (LC_ALL, "");
   bindtextdomain (PACKAGE, LOCALEDIR);
   textdomain (PACKAGE);
@@ -1684,32 +1742,32 @@ main (int argc, char **argv)
   while ((c = getopt_long (argc, argv, "+cCdst", long_options, NULL)) != -1)
     {
       switch (c)
-	{
-	case 'c':
-	case 'C':
-	  complement = true;
-	  break;
+        {
+        case 'c':
+        case 'C':
+          complement = true;
+          break;
 
-	case 'd':
-	  delete = true;
-	  break;
+        case 'd':
+          delete = true;
+          break;
 
-	case 's':
-	  squeeze_repeats = true;
-	  break;
+        case 's':
+          squeeze_repeats = true;
+          break;
 
-	case 't':
-	  truncate_set1 = true;
-	  break;
+        case 't':
+          truncate_set1 = true;
+          break;
 
-	case_GETOPT_HELP_CHAR;
+        case_GETOPT_HELP_CHAR;
 
-	case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+        case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
 
-	default:
-	  usage (EXIT_FAILURE);
-	  break;
-	}
+        default:
+          usage (EXIT_FAILURE);
+          break;
+        }
     }
 
   non_option_args = argc - optind;
@@ -1720,16 +1778,16 @@ main (int argc, char **argv)
   if (non_option_args < min_operands)
     {
       if (non_option_args == 0)
-	error (0, 0, _("missing operand"));
+        error (0, 0, _("missing operand"));
       else
-	{
-	  error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
-	  fprintf (stderr, "%s\n",
-		   _(squeeze_repeats
-		     ? ("Two strings must be given when "
-			"both deleting and squeezing repeats.")
-		     : "Two strings must be given when translating."));
-	}
+        {
+          error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
+          fprintf (stderr, "%s\n",
+                   _(squeeze_repeats
+                     ? N_("Two strings must be given when "
+                          "both deleting and squeezing repeats.")
+                     : N_("Two strings must be given when translating.")));
+        }
       usage (EXIT_FAILURE);
     }
 
@@ -1737,34 +1795,36 @@ main (int argc, char **argv)
     {
       error (0, 0, _("extra operand %s"), quote (argv[optind + max_operands]));
       if (non_option_args == 2)
-	fprintf (stderr, "%s\n",
-		 _("Only one string may be given when "
-		   "deleting without squeezing repeats."));
+        fprintf (stderr, "%s\n",
+                 _("Only one string may be given when "
+                   "deleting without squeezing repeats."));
       usage (EXIT_FAILURE);
     }
 
   spec_init (s1);
   if (!parse_str (argv[optind], s1))
-    exit (EXIT_FAILURE);
+    return EXIT_FAILURE;
 
   if (non_option_args == 2)
     {
       spec_init (s2);
       if (!parse_str (argv[optind + 1], s2))
-	exit (EXIT_FAILURE);
+        return EXIT_FAILURE;
     }
   else
     s2 = NULL;
 
   validate (s1, s2);
 
-  /* Use binary I/O, since `tr' is sometimes used to transliterate
+  /* Use binary I/O, since 'tr' is sometimes used to transliterate
      non-printable characters, or characters which are stripped away
      by text-mode reads (like CR and ^Z).  */
   if (O_BINARY && ! isatty (STDIN_FILENO))
-    freopen (NULL, "rb", stdin);
+    xfreopen (NULL, "rb", stdin);
   if (O_BINARY && ! isatty (STDOUT_FILENO))
-    freopen (NULL, "wb", stdout);
+    xfreopen (NULL, "wb", stdout);
+
+  fadvise (stdin, FADVISE_SEQUENTIAL);
 
   if (squeeze_repeats && non_option_args == 1)
     {
@@ -1775,14 +1835,14 @@ main (int argc, char **argv)
     {
       set_initialize (s1, complement, in_delete_set);
 
-      for (;;)
-	{
-	  size_t nr = read_and_delete (io_buf, sizeof io_buf);
-	  if (nr == 0)
-	    break;
-	  if (fwrite (io_buf, 1, nr, stdout) != nr)
-	    error (EXIT_FAILURE, errno, _("write error"));
-	}
+      while (true)
+        {
+          size_t nr = read_and_delete (io_buf, sizeof io_buf);
+          if (nr == 0)
+            break;
+          if (fwrite (io_buf, 1, nr, stdout) != nr)
+            error (EXIT_FAILURE, errno, _("write error"));
+        }
     }
   else if (squeeze_repeats && delete && non_option_args == 2)
     {
@@ -1793,104 +1853,95 @@ main (int argc, char **argv)
   else if (translating)
     {
       if (complement)
-	{
-	  int i;
-	  bool *in_s1 = in_delete_set;
-
-	  set_initialize (s1, false, in_s1);
-	  s2->state = BEGIN_STATE;
-	  for (i = 0; i < N_CHARS; i++)
-	    xlate[i] = i;
-	  for (i = 0; i < N_CHARS; i++)
-	    {
-	      if (!in_s1[i])
-		{
-		  int ch = get_next (s2, NULL);
-		  assert (ch != -1 || truncate_set1);
-		  if (ch == -1)
-		    {
-		      /* This will happen when tr is invoked like e.g.
-		         tr -cs A-Za-z0-9 '\012'.  */
-		      break;
-		    }
-		  xlate[i] = ch;
-		}
-	    }
-	  assert (get_next (s2, NULL) == -1 || truncate_set1);
-	}
+        {
+          int i;
+          bool *in_s1 = in_delete_set;
+
+          set_initialize (s1, false, in_s1);
+          s2->state = BEGIN_STATE;
+          for (i = 0; i < N_CHARS; i++)
+            xlate[i] = i;
+          for (i = 0; i < N_CHARS; i++)
+            {
+              if (!in_s1[i])
+                {
+                  int ch = get_next (s2, NULL);
+                  assert (ch != -1 || truncate_set1);
+                  if (ch == -1)
+                    {
+                      /* This will happen when tr is invoked like e.g.
+                         tr -cs A-Za-z0-9 '\012'.  */
+                      break;
+                    }
+                  xlate[i] = ch;
+                }
+            }
+        }
       else
-	{
-	  int c1, c2;
-	  int i;
-	  enum Upper_Lower_class class_s1;
-	  enum Upper_Lower_class class_s2;
-
-	  for (i = 0; i < N_CHARS; i++)
-	    xlate[i] = i;
-	  s1->state = BEGIN_STATE;
-	  s2->state = BEGIN_STATE;
-	  for (;;)
-	    {
-	      c1 = get_next (s1, &class_s1);
-	      c2 = get_next (s2, &class_s2);
-
-	      /* When constructing the translation array, either one of the
-		 values returned by paired calls to get_next must be from
-		 [:upper:] and the other is [:lower:], or neither can be from
-		 upper or lower.  */
-
-	      if ((class_s1 == UL_NONE) != (class_s2 == UL_NONE))
-		error (EXIT_FAILURE, 0,
-		       _("misaligned [:upper:] and/or [:lower:] construct"));
-
-	      if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
-		{
-		  for (i = 0; i < N_CHARS; i++)
-		    if (islower (i))
-		      xlate[i] = toupper (i);
-		}
-	      else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
-		{
-		  for (i = 0; i < N_CHARS; i++)
-		    if (isupper (i))
-		      xlate[i] = tolower (i);
-		}
-	      else if ((class_s1 == UL_LOWER && class_s2 == UL_LOWER)
-		       || (class_s1 == UL_UPPER && class_s2 == UL_UPPER))
-		{
-		  /* POSIX says the behavior of `tr "[:upper:]" "[:upper:]"'
-		     is undefined.  Treat it as a no-op.  */
-		}
-	      else
-		{
-		  /* The following should have been checked by validate...  */
-		  if (c1 == -1 || c2 == -1)
-		    break;
-		  xlate[c1] = c2;
-		}
-	    }
-	  assert (c1 == -1 || truncate_set1);
-	}
+        {
+          int c1, c2;
+          int i;
+          enum Upper_Lower_class class_s1;
+          enum Upper_Lower_class class_s2;
+
+          for (i = 0; i < N_CHARS; i++)
+            xlate[i] = i;
+          s1->state = BEGIN_STATE;
+          s2->state = BEGIN_STATE;
+          while (true)
+            {
+              c1 = get_next (s1, &class_s1);
+              c2 = get_next (s2, &class_s2);
+
+              if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
+                {
+                  for (i = 0; i < N_CHARS; i++)
+                    if (islower (i))
+                      xlate[i] = toupper (i);
+                }
+              else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
+                {
+                  for (i = 0; i < N_CHARS; i++)
+                    if (isupper (i))
+                      xlate[i] = tolower (i);
+                }
+              else
+                {
+                  /* The following should have been checked by validate...  */
+                  if (c1 == -1 || c2 == -1)
+                    break;
+                  xlate[c1] = c2;
+                }
+
+              /* When case-converting, skip the elements as an optimization.  */
+              if (class_s2 != UL_NONE)
+                {
+                  skip_construct (s1);
+                  skip_construct (s2);
+                }
+            }
+          assert (c1 == -1 || truncate_set1);
+        }
       if (squeeze_repeats)
-	{
-	  set_initialize (s2, false, in_squeeze_set);
-	  squeeze_filter (io_buf, sizeof io_buf, read_and_xlate);
-	}
+        {
+          set_initialize (s2, false, in_squeeze_set);
+          squeeze_filter (io_buf, sizeof io_buf, read_and_xlate);
+        }
       else
-	{
-	  for (;;)
-	    {
-	      size_t bytes_read = read_and_xlate (io_buf, sizeof io_buf);
-	      if (bytes_read == 0)
-		break;
-	      if (fwrite (io_buf, 1, bytes_read, stdout) != bytes_read)
-		error (EXIT_FAILURE, errno, _("write error"));
-	    }
-	}
+        {
+          while (true)
+            {
+              size_t bytes_read = read_and_xlate (io_buf, sizeof io_buf);
+              if (bytes_read == 0)
+                break;
+              if (fwrite (io_buf, 1, bytes_read, stdout) != bytes_read)
+                error (EXIT_FAILURE, errno, _("write error"));
+            }
+        }
     }
 
   if (close (STDIN_FILENO) != 0)
     error (EXIT_FAILURE, errno, _("standard input"));
 
-  exit (EXIT_SUCCESS);
+  return EXIT_SUCCESS;
 }
author	Lorry Tar Creator <lorry-tar-importer@lorry>	2016-01-20 10:55:18 +0000
committer	Lorry Tar Creator <lorry-tar-importer@lorry>	2016-01-20 10:55:18 +0000
commit	70e9163c9c18e995515598085cb824e554eb7ae7 (patch)
tree	a42dc8b2a6c031354bf31472de888bfc8a060132 /src/tr.c
parent	cbf5993c43f49281173f185863577d86bfac6eae (diff)
download	coreutils-tarball-master.tar.gz