diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
commit | 70e9163c9c18e995515598085cb824e554eb7ae7 (patch) | |
tree | a42dc8b2a6c031354bf31472de888bfc8a060132 /src/tr.c | |
parent | cbf5993c43f49281173f185863577d86bfac6eae (diff) | |
download | coreutils-tarball-master.tar.gz |
coreutils-8.25HEADcoreutils-8.25master
Diffstat (limited to 'src/tr.c')
-rw-r--r-- | src/tr.c | 1507 |
1 files changed, 779 insertions, 728 deletions
@@ -1,10 +1,10 @@ /* tr -- a filter to translate characters - Copyright (C) 91, 1995-2006 Free Software Foundation, Inc. + Copyright (C) 1991-2016 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -12,8 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* Written by Jim Meyering */ @@ -26,14 +25,16 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "quote.h" #include "safe-read.h" +#include "xfreopen.h" #include "xstrtol.h" -/* The official name of this program (e.g., no `g' prefix). */ +/* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "tr" -#define AUTHORS "Jim Meyering" +#define AUTHORS proper_name ("Jim Meyering") enum { N_CHARS = UCHAR_MAX + 1 }; @@ -100,28 +101,28 @@ enum Range_element_type For example, consider the POSIX version of the classic tr command: tr -cs 'a-zA-Z_' '[\n*]' String1 has 3 constructs, two of which are ranges (a-z and A-Z), - and a single normal character, `_'. String2 has one construct. */ + and a single normal character, '_'. String2 has one construct. */ struct List_element { enum Range_element_type type; struct List_element *next; union { - unsigned char normal_char; - struct /* unnamed */ - { - unsigned char first_char; - unsigned char last_char; - } - range; - enum Char_class char_class; - unsigned char equiv_code; - struct /* unnamed */ - { - unsigned char the_repeated_char; - count repeat_count; - } - repeated_char; + unsigned char normal_char; + struct /* unnamed */ + { + unsigned char first_char; + unsigned char last_char; + } + range; + enum Char_class char_class; + unsigned char equiv_code; + struct /* unnamed */ + { + unsigned char the_repeated_char; + count repeat_count; + } + repeated_char; } u; }; @@ -132,9 +133,9 @@ struct List_element the corresponding argument string. The attributes are used mainly to verify that the strings are valid in the context of any options specified (like -s, -d, or -c). The main exception is the member - `tail', which is first used to construct the list. After construction, + 'tail', which is first used to construct the list. After construction, it is used by get_next to save its state when traversing the list. - The member `state' serves a similar function. */ + The member 'state' serves a similar function. */ struct Spec_list { /* Points to the head of the list of range elements. @@ -194,9 +195,6 @@ es_match (struct E_string const *es, size_t i, char c) return es->s[i] == c && !es->escaped[i]; } -/* The name by which this program was run. */ -char *program_name; - /* When true, each sequence in the input of a repeated character (call it c) is replaced (in the output) by a single occurrence of c for every c in the squeeze set. */ @@ -249,15 +247,14 @@ static char const *const char_class_name[] = "alnum", "alpha", "blank", "cntrl", "digit", "graph", "lower", "print", "punct", "space", "upper", "xdigit" }; -enum { N_CHAR_CLASSES = sizeof char_class_name / sizeof char_class_name[0] }; -/* Array of boolean values. A character `c' is a member of the +/* Array of boolean values. A character 'c' is a member of the squeeze set if and only if in_squeeze_set[c] is true. The squeeze set is defined by the last (possibly, the only) string argument on the command line when the squeeze option is given. */ static bool in_squeeze_set[N_CHARS]; -/* Array of boolean values. A character `c' is a member of the +/* Array of boolean values. A character 'c' is a member of the delete set if and only if in_delete_set[c] is true. The delete set is defined by the first (or only) string argument on the command line when the delete option is given. */ @@ -278,28 +275,27 @@ static struct option const long_options[] = {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} }; - + void usage (int status) { if (status != EXIT_SUCCESS) - fprintf (stderr, _("Try `%s --help' for more information.\n"), - program_name); + emit_try_help (); else { printf (_("\ Usage: %s [OPTION]... SET1 [SET2]\n\ "), - program_name); + program_name); fputs (_("\ Translate, squeeze, and/or delete characters from standard input,\n\ writing to standard output.\n\ \n\ - -c, -C, --complement first complement SET1\n\ + -c, -C, --complement use the complement of SET1\n\ -d, --delete delete characters in SET1, do not translate\n\ - -s, --squeeze-repeats replace each input sequence of a repeated character\n\ - that is listed in SET1 with a single occurrence\n\ - of that character\n\ + -s, --squeeze-repeats replace each sequence of a repeated character\n\ + that is listed in the last specified SET,\n\ + with a single occurrence of that character\n\ -t, --truncate-set1 first truncate SET1 to length of SET2\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); @@ -343,20 +339,13 @@ Interpreted sequences are:\n\ \n\ Translation occurs if -d is not given and both SET1 and SET2 appear.\n\ -t may be used only when translating. SET2 is extended to length of\n\ -SET1 by repeating its last character as necessary. \ -"), stdout); - fputs (_("\ -Excess characters\n\ +SET1 by repeating its last character as necessary. Excess characters\n\ of SET2 are ignored. Only [:lower:] and [:upper:] are guaranteed to\n\ expand in ascending order; used in SET2 while translating, they may\n\ -only be used in pairs to specify case conversion. \ +only be used in pairs to specify case conversion. -s uses the last\n\ +specified SET, and occurs after translation or deletion.\n\ "), stdout); - fputs (_("\ --s uses SET1 if not\n\ -translating nor deleting; else squeezing uses SET2 and occurs after\n\ -translation or deletion.\n\ -"), stdout); - printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); + emit_ancillary_info (PROGRAM_NAME); } exit (status); } @@ -373,7 +362,7 @@ is_equiv_class_member (unsigned char equiv_class, unsigned char c) /* Return true if the character C is a member of the character class CHAR_CLASS. */ -static bool +static bool _GL_ATTRIBUTE_PURE is_char_class_member (enum Char_class char_class, unsigned char c) { int result; @@ -455,93 +444,94 @@ unquote (char const *s, struct E_string *es) int oct_digit; switch (s[i]) - { - case '\\': - es->escaped[j] = true; - switch (s[i + 1]) - { - case '\\': - c = '\\'; - break; - case 'a': - c = '\a'; - break; - case 'b': - c = '\b'; - break; - case 'f': - c = '\f'; - break; - case 'n': - c = '\n'; - break; - case 'r': - c = '\r'; - break; - case 't': - c = '\t'; - break; - case 'v': - c = '\v'; - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - c = s[i + 1] - '0'; - oct_digit = s[i + 2] - '0'; - if (0 <= oct_digit && oct_digit <= 7) - { - c = 8 * c + oct_digit; - ++i; - oct_digit = s[i + 2] - '0'; - if (0 <= oct_digit && oct_digit <= 7) - { - if (8 * c + oct_digit < N_CHARS) - { - c = 8 * c + oct_digit; - ++i; - } - else - { - /* A 3-digit octal number larger than \377 won't - fit in 8 bits. So we stop when adding the - next digit would put us over the limit and - give a warning about the ambiguity. POSIX - isn't clear on this, and we interpret this - lack of clarity as meaning the resulting behavior - is undefined, which means we're allowed to issue - a warning. */ - error (0, 0, _("warning: the ambiguous octal escape \ -\\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, %c"), - s[i], s[i + 1], s[i + 2], - s[i], s[i + 1], s[i + 2]); - } - } - } - break; - case '\0': - /* POSIX seems to require that a trailing backslash must - stand for itself. Weird. */ - es->escaped[j] = false; - i--; - c = '\\'; - break; - default: - c = s[i + 1]; - break; - } - ++i; - es->s[j++] = c; - break; - default: - es->s[j++] = s[i]; - break; - } + { + case '\\': + es->escaped[j] = true; + switch (s[i + 1]) + { + case '\\': + c = '\\'; + break; + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + c = s[i + 1] - '0'; + oct_digit = s[i + 2] - '0'; + if (0 <= oct_digit && oct_digit <= 7) + { + c = 8 * c + oct_digit; + ++i; + oct_digit = s[i + 2] - '0'; + if (0 <= oct_digit && oct_digit <= 7) + { + if (8 * c + oct_digit < N_CHARS) + { + c = 8 * c + oct_digit; + ++i; + } + else + { + /* A 3-digit octal number larger than \377 won't + fit in 8 bits. So we stop when adding the + next digit would put us over the limit and + give a warning about the ambiguity. POSIX + isn't clear on this, and we interpret this + lack of clarity as meaning the resulting behavior + is undefined, which means we're allowed to issue + a warning. */ + error (0, 0, _("warning: the ambiguous octal escape\ + \\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, %c"), + s[i], s[i + 1], s[i + 2], + s[i], s[i + 1], s[i + 2]); + } + } + } + break; + case '\0': + error (0, 0, _("warning: an unescaped backslash " + "at end of string is not portable")); + /* POSIX is not clear about this. */ + es->escaped[j] = false; + i--; + c = '\\'; + break; + default: + c = s[i + 1]; + break; + } + ++i; + es->s[j++] = c; + break; + default: + es->s[j++] = s[i]; + break; + } } es->len = j; return true; @@ -550,14 +540,14 @@ unquote (char const *s, struct E_string *es) /* If CLASS_STR is a valid character class string, return its index in the global char_class_name array. Otherwise, return CC_NO_CLASS. */ -static enum Char_class +static enum Char_class _GL_ATTRIBUTE_PURE look_up_char_class (char const *class_str, size_t len) { enum Char_class i; - for (i = 0; i < N_CHAR_CLASSES; i++) - if (strncmp (class_str, char_class_name[i], len) == 0 - && strlen (char_class_name[i]) == len) + for (i = 0; i < ARRAY_CARDINALITY (char_class_name); i++) + if (STREQ_LEN (class_str, char_class_name[i], len) + && strlen (char_class_name[i]) == len) return i; return CC_NO_CLASS; } @@ -605,42 +595,42 @@ make_printable_str (char const *s, size_t len) unsigned char c = s[i]; switch (c) - { - case '\\': - tmp = "\\"; - break; - case '\a': - tmp = "\\a"; - break; - case '\b': - tmp = "\\b"; - break; - case '\f': - tmp = "\\f"; - break; - case '\n': - tmp = "\\n"; - break; - case '\r': - tmp = "\\r"; - break; - case '\t': - tmp = "\\t"; - break; - case '\v': - tmp = "\\v"; - break; - default: - if (isprint (c)) - { - buf[0] = c; - buf[1] = '\0'; - } - else - sprintf (buf, "\\%03o", c); - tmp = buf; - break; - } + { + case '\\': + tmp = "\\"; + break; + case '\a': + tmp = "\\a"; + break; + case '\b': + tmp = "\\b"; + break; + case '\f': + tmp = "\\f"; + break; + case '\n': + tmp = "\\n"; + break; + case '\r': + tmp = "\\r"; + break; + case '\t': + tmp = "\\t"; + break; + case '\v': + tmp = "\\v"; + break; + default: + if (isprint (c)) + { + buf[0] = c; + buf[1] = '\0'; + } + else + sprintf (buf, "\\%03o", c); + tmp = buf; + break; + } p = stpcpy (p, tmp); } return printable_buf; @@ -679,8 +669,8 @@ append_range (struct Spec_list *list, unsigned char first, unsigned char last) char *tmp2 = make_printable_char (last); error (0, 0, - _("range-endpoints of `%s-%s' are in reverse collating sequence order"), - tmp1, tmp2); + _("range-endpoints of '%s-%s' are in reverse collating sequence order"), + tmp1, tmp2); free (tmp1); free (tmp2); return false; @@ -703,7 +693,7 @@ append_range (struct Spec_list *list, unsigned char first, unsigned char last) static bool append_char_class (struct Spec_list *list, - char const *char_class_str, size_t len) + char const *char_class_str, size_t len) { enum Char_class char_class; struct List_element *new; @@ -728,7 +718,7 @@ append_char_class (struct Spec_list *list, static void append_repeated_char (struct Spec_list *list, unsigned char the_char, - count repeat_count) + count repeat_count) { struct List_element *new; @@ -750,7 +740,7 @@ append_repeated_char (struct Spec_list *list, unsigned char the_char, static bool append_equiv_class (struct Spec_list *list, - char const *equiv_class_str, size_t len) + char const *equiv_class_str, size_t len) { struct List_element *new; @@ -774,16 +764,16 @@ append_equiv_class (struct Spec_list *list, static bool find_closing_delim (const struct E_string *es, size_t start_idx, - char pre_bracket_char, size_t *result_idx) + char pre_bracket_char, size_t *result_idx) { size_t i; for (i = start_idx; i < es->len - 1; i++) if (es->s[i] == pre_bracket_char && es->s[i + 1] == ']' - && !es->escaped[i] && !es->escaped[i + 1]) + && !es->escaped[i] && !es->escaped[i + 1]) { - *result_idx = i; - return true; + *result_idx = i; + return true; } return false; } @@ -792,16 +782,16 @@ find_closing_delim (const struct E_string *es, size_t start_idx, beginning with P[ START_IDX ] comprise a valid [c*n] construct, then set *CHAR_TO_REPEAT, *REPEAT_COUNT, and *CLOSING_BRACKET_IDX and return zero. If the second character following - the opening bracket is not `*' or if no closing bracket can be + the opening bracket is not '*' or if no closing bracket can be found, return -1. If a closing bracket is found and the - second char is `*', but the string between the `*' and `]' isn't + second char is '*', but the string between the '*' and ']' isn't empty, an octal number, or a decimal number, print an error message and return -2. */ static int find_bracketed_repeat (const struct E_string *es, size_t start_idx, - unsigned char *char_to_repeat, count *repeat_count, - size_t *closing_bracket_idx) + unsigned char *char_to_repeat, count *repeat_count, + size_t *closing_bracket_idx) { size_t i; @@ -812,47 +802,47 @@ find_bracketed_repeat (const struct E_string *es, size_t start_idx, for (i = start_idx + 2; i < es->len && !es->escaped[i]; i++) { if (es->s[i] == ']') - { - size_t digit_str_len = i - start_idx - 2; - - *char_to_repeat = es->s[start_idx]; - if (digit_str_len == 0) - { - /* We've matched [c*] -- no explicit repeat count. */ - *repeat_count = 0; - } - else - { - /* Here, we have found [c*s] where s should be a string - of octal (if it starts with `0') or decimal digits. */ - char const *digit_str = &es->s[start_idx + 2]; - char *d_end; - if ((xstrtoumax (digit_str, &d_end, *digit_str == '0' ? 8 : 10, - repeat_count, NULL) - != LONGINT_OK) - || REPEAT_COUNT_MAXIMUM < *repeat_count - || digit_str + digit_str_len != d_end) - { - char *tmp = make_printable_str (digit_str, digit_str_len); - error (0, 0, - _("invalid repeat count %s in [c*n] construct"), - quote (tmp)); - free (tmp); - return -2; - } - } - *closing_bracket_idx = i; - return 0; - } + { + size_t digit_str_len = i - start_idx - 2; + + *char_to_repeat = es->s[start_idx]; + if (digit_str_len == 0) + { + /* We've matched [c*] -- no explicit repeat count. */ + *repeat_count = 0; + } + else + { + /* Here, we have found [c*s] where s should be a string + of octal (if it starts with '0') or decimal digits. */ + char const *digit_str = &es->s[start_idx + 2]; + char *d_end; + if ((xstrtoumax (digit_str, &d_end, *digit_str == '0' ? 8 : 10, + repeat_count, NULL) + != LONGINT_OK) + || REPEAT_COUNT_MAXIMUM < *repeat_count + || digit_str + digit_str_len != d_end) + { + char *tmp = make_printable_str (digit_str, digit_str_len); + error (0, 0, + _("invalid repeat count %s in [c*n] construct"), + quote (tmp)); + free (tmp); + return -2; + } + } + *closing_bracket_idx = i; + return 0; + } } return -1; /* No bracket found. */ } /* Return true if the string at ES->s[IDX] matches the regular - expression `\*[0-9]*\]', false otherwise. The string does not + expression '\*[0-9]*\]', false otherwise. The string does not match if any of its characters are escaped. */ -static bool +static bool _GL_ATTRIBUTE_PURE star_digits_closebracket (const struct E_string *es, size_t idx) { size_t i; @@ -869,13 +859,13 @@ star_digits_closebracket (const struct E_string *es, size_t idx) /* Convert string UNESCAPED_STRING (which has been preprocessed to convert backslash-escape sequences) of length LEN characters into a linked list of the following 5 types of constructs: - - [:str:] Character class where `str' is one of the 12 valid strings. - - [=c=] Equivalence class where `c' is any single character. - - [c*n] Repeat the single character `c' `n' times. n may be omitted. - However, if `n' is present, it must be a non-negative octal or - decimal integer. - - r-s Range of characters from `r' to `s'. The second endpoint must - not precede the first in the current collating sequence. + - [:str:] Character class where 'str' is one of the 12 valid strings. + - [=c=] Equivalence class where 'c' is any single character. + - [c*n] Repeat the single character 'c' 'n' times. n may be omitted. + However, if 'n' is present, it must be a non-negative octal or + decimal integer. + - r-s Range of characters from 'r' to 's'. The second endpoint must + not precede the first in the current collating sequence. - c Any other character is interpreted as itself. */ static bool @@ -888,7 +878,7 @@ build_spec_list (const struct E_string *es, struct Spec_list *result) /* The main for-loop below recognizes the 4 multi-character constructs. A character that matches (in its context) none of the multi-character - constructs is classified as `normal'. Since all multi-character + constructs is classified as 'normal'. Since all multi-character constructs have at least 3 characters, any strings of length 2 or less are composed solely of normal characters. Hence, the index of the outer for-loop runs only as far as LEN-2. */ @@ -896,120 +886,120 @@ build_spec_list (const struct E_string *es, struct Spec_list *result) for (i = 0; i + 2 < es->len; /* empty */) { if (es_match (es, i, '[')) - { - bool matched_multi_char_construct; - size_t closing_bracket_idx; - unsigned char char_to_repeat; - count repeat_count; - int err; - - matched_multi_char_construct = true; - if (es_match (es, i + 1, ':') || es_match (es, i + 1, '=')) - { - size_t closing_delim_idx; - - if (find_closing_delim (es, i + 2, p[i + 1], &closing_delim_idx)) - { - size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1; - char const *opnd_str = p + i + 2; - - if (opnd_str_len == 0) - { - if (p[i + 1] == ':') - error (0, 0, _("missing character class name `[::]'")); - else - error (0, 0, - _("missing equivalence class character `[==]'")); - return false; - } - - if (p[i + 1] == ':') - { - /* FIXME: big comment. */ - if (!append_char_class (result, opnd_str, opnd_str_len)) - { - if (star_digits_closebracket (es, i + 2)) - goto try_bracketed_repeat; - else - { - char *tmp = make_printable_str (opnd_str, - opnd_str_len); - error (0, 0, _("invalid character class %s"), - quote (tmp)); - free (tmp); - return false; - } - } - } - else - { - /* FIXME: big comment. */ - if (!append_equiv_class (result, opnd_str, opnd_str_len)) - { - if (star_digits_closebracket (es, i + 2)) - goto try_bracketed_repeat; - else - { - char *tmp = make_printable_str (opnd_str, - opnd_str_len); - error (0, 0, - _("%s: equivalence class operand must be a single character"), - tmp); - free (tmp); - return false; - } - } - } - - i = closing_delim_idx + 2; - continue; - } - /* Else fall through. This could be [:*] or [=*]. */ - } - - try_bracketed_repeat: - - /* Determine whether this is a bracketed repeat range - matching the RE \[.\*(dec_or_oct_number)?\]. */ - err = find_bracketed_repeat (es, i + 1, &char_to_repeat, - &repeat_count, - &closing_bracket_idx); - if (err == 0) - { - append_repeated_char (result, char_to_repeat, repeat_count); - i = closing_bracket_idx + 1; - } - else if (err == -1) - { - matched_multi_char_construct = false; - } - else - { - /* Found a string that looked like [c*n] but the - numeric part was invalid. */ - return false; - } - - if (matched_multi_char_construct) - continue; - - /* We reach this point if P does not match [:str:], [=c=], - [c*n], or [c*]. Now, see if P looks like a range `[-c' - (from `[' to `c'). */ - } + { + bool matched_multi_char_construct; + size_t closing_bracket_idx; + unsigned char char_to_repeat; + count repeat_count; + int err; + + matched_multi_char_construct = true; + if (es_match (es, i + 1, ':') || es_match (es, i + 1, '=')) + { + size_t closing_delim_idx; + + if (find_closing_delim (es, i + 2, p[i + 1], &closing_delim_idx)) + { + size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1; + char const *opnd_str = p + i + 2; + + if (opnd_str_len == 0) + { + if (p[i + 1] == ':') + error (0, 0, _("missing character class name '[::]'")); + else + error (0, 0, + _("missing equivalence class character '[==]'")); + return false; + } + + if (p[i + 1] == ':') + { + /* FIXME: big comment. */ + if (!append_char_class (result, opnd_str, opnd_str_len)) + { + if (star_digits_closebracket (es, i + 2)) + goto try_bracketed_repeat; + else + { + char *tmp = make_printable_str (opnd_str, + opnd_str_len); + error (0, 0, _("invalid character class %s"), + quote (tmp)); + free (tmp); + return false; + } + } + } + else + { + /* FIXME: big comment. */ + if (!append_equiv_class (result, opnd_str, opnd_str_len)) + { + if (star_digits_closebracket (es, i + 2)) + goto try_bracketed_repeat; + else + { + char *tmp = make_printable_str (opnd_str, + opnd_str_len); + error (0, 0, + _("%s: equivalence class operand must be a single character"), + tmp); + free (tmp); + return false; + } + } + } + + i = closing_delim_idx + 2; + continue; + } + /* Else fall through. This could be [:*] or [=*]. */ + } + + try_bracketed_repeat: + + /* Determine whether this is a bracketed repeat range + matching the RE \[.\*(dec_or_oct_number)?\]. */ + err = find_bracketed_repeat (es, i + 1, &char_to_repeat, + &repeat_count, + &closing_bracket_idx); + if (err == 0) + { + append_repeated_char (result, char_to_repeat, repeat_count); + i = closing_bracket_idx + 1; + } + else if (err == -1) + { + matched_multi_char_construct = false; + } + else + { + /* Found a string that looked like [c*n] but the + numeric part was invalid. */ + return false; + } + + if (matched_multi_char_construct) + continue; + + /* We reach this point if P does not match [:str:], [=c=], + [c*n], or [c*]. Now, see if P looks like a range '[-c' + (from '[' to 'c'). */ + } /* Look ahead one char for ranges like a-z. */ if (es_match (es, i + 1, '-')) - { - if (!append_range (result, p[i], p[i + 2])) - return false; - i += 3; - } + { + if (!append_range (result, p[i], p[i + 2])) + return false; + i += 3; + } else - { - append_normal_char (result, p[i]); - ++i; - } + { + append_normal_char (result, p[i]); + ++i; + } } /* Now handle the (2 or fewer) remaining characters p[i]..p[es->len - 1]. */ @@ -1019,8 +1009,17 @@ build_spec_list (const struct E_string *es, struct Spec_list *result) return true; } +/* Advance past the current construct. + S->tail must be non-NULL. */ +static void +skip_construct (struct Spec_list *s) +{ + s->tail = s->tail->next; + s->state = NEW_ELEMENT; +} + /* Given a Spec_list S (with its saved state implicit in the values - of its members `tail' and `state'), return the next single character + of its members 'tail' and 'state'), return the next single character in the expansion of S's constructs. If the last character of S was returned on the previous call or if S was empty, this function returns -1. For example, successive calls to get_next where S @@ -1063,65 +1062,53 @@ get_next (struct Spec_list *s, enum Upper_Lower_class *class) case RE_RANGE: if (s->state == NEW_ELEMENT) - s->state = p->u.range.first_char; + s->state = p->u.range.first_char; else - ++(s->state); + ++(s->state); return_val = s->state; if (s->state == p->u.range.last_char) - { - s->tail = p->next; - s->state = NEW_ELEMENT; - } + { + s->tail = p->next; + s->state = NEW_ELEMENT; + } break; case RE_CHAR_CLASS: if (class) - { - bool upper_or_lower; - switch (p->u.char_class) - { - case CC_LOWER: - *class = UL_LOWER; - upper_or_lower = true; - break; - case CC_UPPER: - *class = UL_UPPER; - upper_or_lower = true; - break; - default: - upper_or_lower = false; - break; - } - - if (upper_or_lower) - { - s->tail = p->next; - s->state = NEW_ELEMENT; - return_val = 0; - break; - } - } + { + switch (p->u.char_class) + { + case CC_LOWER: + *class = UL_LOWER; + break; + case CC_UPPER: + *class = UL_UPPER; + break; + default: + break; + } + } if (s->state == NEW_ELEMENT) - { - for (i = 0; i < N_CHARS; i++) - if (is_char_class_member (p->u.char_class, i)) - break; - assert (i < N_CHARS); - s->state = i; - } + { + for (i = 0; i < N_CHARS; i++) + if (is_char_class_member (p->u.char_class, i)) + break; + assert (i < N_CHARS); + s->state = i; + } assert (is_char_class_member (p->u.char_class, s->state)); return_val = s->state; for (i = s->state + 1; i < N_CHARS; i++) - if (is_char_class_member (p->u.char_class, i)) - break; + if (is_char_class_member (p->u.char_class, i)) + break; if (i < N_CHARS) - s->state = i; + s->state = i; else - { - s->tail = p->next; - s->state = NEW_ELEMENT; - } + { + s->tail = p->next; + s->state = NEW_ELEMENT; + } break; case RE_EQUIV_CLASS: @@ -1138,25 +1125,25 @@ get_next (struct Spec_list *s, enum Upper_Lower_class *class) case RE_REPEATED_CHAR: /* Here, a repeat count of n == 0 means don't repeat at all. */ if (p->u.repeated_char.repeat_count == 0) - { - s->tail = p->next; - s->state = NEW_ELEMENT; - return_val = get_next (s, class); - } + { + s->tail = p->next; + s->state = NEW_ELEMENT; + return_val = get_next (s, class); + } else - { - if (s->state == NEW_ELEMENT) - { - s->state = 0; - } - ++(s->state); - return_val = p->u.repeated_char.the_repeated_char; - if (s->state == p->u.repeated_char.repeat_count) - { - s->tail = p->next; - s->state = NEW_ELEMENT; - } - } + { + if (s->state == NEW_ELEMENT) + { + s->state = 0; + } + ++(s->state); + return_val = p->u.repeated_char.the_repeated_char; + if (s->state == p->u.repeated_char.repeat_count) + { + s->tail = p->next; + s->state = NEW_ELEMENT; + } + } break; default: @@ -1188,6 +1175,78 @@ card_of_complement (struct Spec_list *s) return cardinality; } +/* Discard the lengths associated with a case conversion, + as using the actual number of upper or lower case characters + is problematic when they don't match in some locales. + Also ensure the case conversion classes in string2 are + aligned correctly with those in string1. + Note POSIX says the behavior of 'tr "[:upper:]" "[:upper:]"' + is undefined. Therefore we allow it (unlike Solaris) + and treat it as a no-op. */ + +static void +validate_case_classes (struct Spec_list *s1, struct Spec_list *s2) +{ + size_t n_upper = 0; + size_t n_lower = 0; + unsigned int i; + int c1 = 0; + int c2 = 0; + count old_s1_len = s1->length; + count old_s2_len = s2->length; + struct List_element *s1_tail = s1->tail; + struct List_element *s2_tail = s2->tail; + bool s1_new_element = true; + bool s2_new_element = true; + + if (!s2->has_char_class) + return; + + for (i = 0; i < N_CHARS; i++) + { + if (isupper (i)) + n_upper++; + if (islower (i)) + n_lower++; + } + + s1->state = BEGIN_STATE; + s2->state = BEGIN_STATE; + + while (c1 != -1 && c2 != -1) + { + enum Upper_Lower_class class_s1, class_s2; + + c1 = get_next (s1, &class_s1); + c2 = get_next (s2, &class_s2); + + /* If c2 transitions to a new case class, then + c1 must also transition at the same time. */ + if (s2_new_element && class_s2 != UL_NONE + && !(s1_new_element && class_s1 != UL_NONE)) + error (EXIT_FAILURE, 0, + _("misaligned [:upper:] and/or [:lower:] construct")); + + /* If case converting, quickly skip over the elements. */ + if (class_s2 != UL_NONE) + { + skip_construct (s1); + skip_construct (s2); + /* Discount insignificant/problematic lengths. */ + s1->length -= (class_s1 == UL_UPPER ? n_upper : n_lower) - 1; + s2->length -= (class_s2 == UL_UPPER ? n_upper : n_lower) - 1; + } + + s1_new_element = s1->state == NEW_ELEMENT; /* Next element is new. */ + s2_new_element = s2->state == NEW_ELEMENT; /* Next element is new. */ + } + + assert (old_s1_len >= s1->length && old_s2_len >= s2->length); + + s1->tail = s1_tail; + s2->tail = s2_tail; +} + /* Gather statistics about the spec-list S in preparation for the tests in validate that determine the consistency of the specs. This function is called at most twice; once for string1, and again for any string2. @@ -1218,61 +1277,61 @@ get_spec_stats (struct Spec_list *s) count new_length; switch (p->type) - { - case RE_NORMAL_CHAR: - len = 1; - break; - - case RE_RANGE: - assert (p->u.range.last_char >= p->u.range.first_char); - len = p->u.range.last_char - p->u.range.first_char + 1; - break; - - case RE_CHAR_CLASS: - s->has_char_class = true; - for (i = 0; i < N_CHARS; i++) - if (is_char_class_member (p->u.char_class, i)) - ++len; - switch (p->u.char_class) - { - case CC_UPPER: - case CC_LOWER: - break; - default: - s->has_restricted_char_class = true; - break; - } - break; - - case RE_EQUIV_CLASS: - for (i = 0; i < N_CHARS; i++) - if (is_equiv_class_member (p->u.equiv_code, i)) - ++len; - s->has_equiv_class = true; - break; - - case RE_REPEATED_CHAR: - if (p->u.repeated_char.repeat_count > 0) - len = p->u.repeated_char.repeat_count; - else - { - s->indefinite_repeat_element = p; - ++(s->n_indefinite_repeats); - } - break; - - default: - abort (); - break; - } + { + case RE_NORMAL_CHAR: + len = 1; + break; + + case RE_RANGE: + assert (p->u.range.last_char >= p->u.range.first_char); + len = p->u.range.last_char - p->u.range.first_char + 1; + break; + + case RE_CHAR_CLASS: + s->has_char_class = true; + for (i = 0; i < N_CHARS; i++) + if (is_char_class_member (p->u.char_class, i)) + ++len; + switch (p->u.char_class) + { + case CC_UPPER: + case CC_LOWER: + break; + default: + s->has_restricted_char_class = true; + break; + } + break; + + case RE_EQUIV_CLASS: + for (i = 0; i < N_CHARS; i++) + if (is_equiv_class_member (p->u.equiv_code, i)) + ++len; + s->has_equiv_class = true; + break; + + case RE_REPEATED_CHAR: + if (p->u.repeated_char.repeat_count > 0) + len = p->u.repeated_char.repeat_count; + else + { + s->indefinite_repeat_element = p; + ++(s->n_indefinite_repeats); + } + break; + + default: + abort (); + break; + } /* Check for arithmetic overflow in computing length. Also, reject - any length greater than the maximum repeat count, in case the - length is later used to compute the repeat count for an - indefinite element. */ + any length greater than the maximum repeat count, in case the + length is later used to compute the repeat count for an + indefinite element. */ new_length = length + len; if (! (length <= new_length && new_length <= REPEAT_COUNT_MAXIMUM)) - error (EXIT_FAILURE, 0, _("too many characters in set")); + error (EXIT_FAILURE, 0, _("too many characters in set")); length = new_length; } @@ -1294,7 +1353,7 @@ get_s2_spec_stats (struct Spec_list *s2, count len_s1) if (len_s1 >= s2->length && s2->n_indefinite_repeats == 1) { s2->indefinite_repeat_element->u.repeated_char.repeat_count = - len_s1 - s2->length; + len_s1 - s2->length; s2->length = len_s1; } } @@ -1329,20 +1388,14 @@ parse_str (char const *s, struct Spec_list *spec_list) Upon successful completion, S2->length is set to S1->length. The only way this function can fail to make S2 as long as S1 is when S2 has zero-length, since in that case, there is no last character to repeat. - So S2->length is required to be at least 1. + So S2->length is required to be at least 1. */ - Providing this functionality allows the user to do some pretty - non-BSD (and non-portable) things: For example, the command - tr -cs '[:upper:]0-9' '[:lower:]' - is almost guaranteed to give results that depend on your collating - sequence. */ static void string2_extend (const struct Spec_list *s1, struct Spec_list *s2) { struct List_element *p; unsigned char char_to_repeat; - int i; assert (translating); assert (s1->length > s2->length); @@ -1358,11 +1411,14 @@ string2_extend (const struct Spec_list *s1, struct Spec_list *s2) char_to_repeat = p->u.range.last_char; break; case RE_CHAR_CLASS: - for (i = N_CHARS - 1; i >= 0; i--) - if (is_char_class_member (p->u.char_class, i)) - break; - assert (i >= 0); - char_to_repeat = i; + /* Note BSD allows extending of classes in string2. For example: + tr '[:upper:]0-9' '[:lower:]' + That's not portable however, contradicts POSIX and is dependent + on your collating sequence. */ + error (EXIT_FAILURE, 0, + _("when translating with string1 longer than string2,\nthe\ + latter string must not end with a character class")); + abort (); /* inform gcc that the above use of error never returns. */ break; case RE_REPEATED_CHAR: @@ -1420,7 +1476,7 @@ validate (struct Spec_list *s1, struct Spec_list *s2) if (s1->n_indefinite_repeats > 0) { error (EXIT_FAILURE, 0, - _("the [c*] repeat construct may not appear in string1")); + _("the [c*] repeat construct may not appear in string1")); } if (s2) @@ -1428,57 +1484,59 @@ validate (struct Spec_list *s1, struct Spec_list *s2) get_s2_spec_stats (s2, s1->length); if (s2->n_indefinite_repeats > 1) - { - error (EXIT_FAILURE, 0, - _("only one [c*] repeat construct may appear in string2")); - } + { + error (EXIT_FAILURE, 0, + _("only one [c*] repeat construct may appear in string2")); + } if (translating) - { - if (s2->has_equiv_class) - { - error (EXIT_FAILURE, 0, - _("[=c=] expressions may not appear in string2 \ -when translating")); - } - - if (s1->length > s2->length) - { - if (!truncate_set1) - { - /* string2 must be non-empty unless --truncate-set1 is - given or string1 is empty. */ - - if (s2->length == 0) - error (EXIT_FAILURE, 0, - _("when not truncating set1, string2 must be non-empty")); - string2_extend (s1, s2); - } - } - - if (complement && s1->has_char_class - && ! (s2->length == s1->length && homogeneous_spec_list (s2))) - { - error (EXIT_FAILURE, 0, - _("when translating with complemented character classes,\ + { + if (s2->has_equiv_class) + { + error (EXIT_FAILURE, 0, + _("[=c=] expressions may not appear in string2\ + when translating")); + } + + if (s2->has_restricted_char_class) + { + error (EXIT_FAILURE, 0, + _("when translating, the only character classes that may\ + appear in\nstring2 are 'upper' and 'lower'")); + } + + validate_case_classes (s1, s2); + + if (s1->length > s2->length) + { + if (!truncate_set1) + { + /* string2 must be non-empty unless --truncate-set1 is + given or string1 is empty. */ + + if (s2->length == 0) + error (EXIT_FAILURE, 0, + _("when not truncating set1, string2 must be non-empty")); + string2_extend (s1, s2); + } + } + + if (complement && s1->has_char_class + && ! (s2->length == s1->length && homogeneous_spec_list (s2))) + { + error (EXIT_FAILURE, 0, + _("when translating with complemented character classes,\ \nstring2 must map all characters in the domain to one")); - } - - if (s2->has_restricted_char_class) - { - error (EXIT_FAILURE, 0, - _("when translating, the only character classes that may \ -appear in\nstring2 are `upper' and `lower'")); - } - } + } + } else - /* Not translating. */ - { - if (s2->n_indefinite_repeats > 0) - error (EXIT_FAILURE, 0, - _("the [c*] construct may appear in string2 only \ -when translating")); - } + /* Not translating. */ + { + if (s2->n_indefinite_repeats > 0) + error (EXIT_FAILURE, 0, + _("the [c*] construct may appear in string2 only\ + when translating")); + } } } @@ -1495,85 +1553,85 @@ squeeze_filter (char *buf, size_t size, size_t (*reader) (char *, size_t)) { /* A value distinct from any character that may have been stored in a buffer as the result of a block-read in the function squeeze_filter. */ - enum { NOT_A_CHAR = CHAR_MAX + 1 }; + const int NOT_A_CHAR = INT_MAX; int char_to_squeeze = NOT_A_CHAR; size_t i = 0; size_t nr = 0; - for (;;) + while (true) { size_t begin; if (i >= nr) - { - nr = reader (buf, size); - if (nr == 0) - break; - i = 0; - } + { + nr = reader (buf, size); + if (nr == 0) + break; + i = 0; + } begin = i; if (char_to_squeeze == NOT_A_CHAR) - { - size_t out_len; - /* Here, by being a little tricky, we can get a significant - performance increase in most cases when the input is - reasonably large. Since tr will modify the input only - if two consecutive (and identical) input characters are - in the squeeze set, we can step by two through the data - when searching for a character in the squeeze set. This - means there may be a little more work in a few cases and - perhaps twice as much work in the worst cases where most - of the input is removed by squeezing repeats. But most - uses of this functionality seem to remove less than 20-30% - of the input. */ - for (; i < nr && !in_squeeze_set[to_uchar (buf[i])]; i += 2) - continue; - - /* There is a special case when i == nr and we've just - skipped a character (the last one in buf) that is in - the squeeze set. */ - if (i == nr && in_squeeze_set[to_uchar (buf[i - 1])]) - --i; - - if (i >= nr) - out_len = nr - begin; - else - { - char_to_squeeze = buf[i]; - /* We're about to output buf[begin..i]. */ - out_len = i - begin + 1; - - /* But since we stepped by 2 in the loop above, - out_len may be one too large. */ - if (i > 0 && buf[i - 1] == char_to_squeeze) - --out_len; - - /* Advance i to the index of first character to be - considered when looking for a char different from - char_to_squeeze. */ - ++i; - } - if (out_len > 0 - && fwrite (&buf[begin], 1, out_len, stdout) != out_len) - error (EXIT_FAILURE, errno, _("write error")); - } + { + size_t out_len; + /* Here, by being a little tricky, we can get a significant + performance increase in most cases when the input is + reasonably large. Since tr will modify the input only + if two consecutive (and identical) input characters are + in the squeeze set, we can step by two through the data + when searching for a character in the squeeze set. This + means there may be a little more work in a few cases and + perhaps twice as much work in the worst cases where most + of the input is removed by squeezing repeats. But most + uses of this functionality seem to remove less than 20-30% + of the input. */ + for (; i < nr && !in_squeeze_set[to_uchar (buf[i])]; i += 2) + continue; + + /* There is a special case when i == nr and we've just + skipped a character (the last one in buf) that is in + the squeeze set. */ + if (i == nr && in_squeeze_set[to_uchar (buf[i - 1])]) + --i; + + if (i >= nr) + out_len = nr - begin; + else + { + char_to_squeeze = buf[i]; + /* We're about to output buf[begin..i]. */ + out_len = i - begin + 1; + + /* But since we stepped by 2 in the loop above, + out_len may be one too large. */ + if (i > 0 && buf[i - 1] == char_to_squeeze) + --out_len; + + /* Advance i to the index of first character to be + considered when looking for a char different from + char_to_squeeze. */ + ++i; + } + if (out_len > 0 + && fwrite (&buf[begin], 1, out_len, stdout) != out_len) + error (EXIT_FAILURE, errno, _("write error")); + } if (char_to_squeeze != NOT_A_CHAR) - { - /* Advance i to index of first char != char_to_squeeze - (or to nr if all the rest of the characters in this - buffer are the same as char_to_squeeze). */ - for (; i < nr && buf[i] == char_to_squeeze; i++) - continue; - if (i < nr) - char_to_squeeze = NOT_A_CHAR; - /* If (i >= nr) we've squeezed the last character in this buffer. - So now we have to read a new buffer and continue comparing - characters against char_to_squeeze. */ - } + { + /* Advance i to index of first char != char_to_squeeze + (or to nr if all the rest of the characters in this + buffer are the same as char_to_squeeze). */ + for (; i < nr && buf[i] == char_to_squeeze; i++) + continue; + if (i < nr) + char_to_squeeze = NOT_A_CHAR; + /* If (i >= nr) we've squeezed the last character in this buffer. + So now we have to read a new buffer and continue comparing + characters against char_to_squeeze. */ + } } } @@ -1606,7 +1664,7 @@ read_and_delete (char *buf, size_t size) size_t nr = plain_read (buf, size); if (nr == 0) - return 0; + return 0; /* This first loop may be a waste of code, but gives much better performance when no characters are deleted in @@ -1614,12 +1672,12 @@ read_and_delete (char *buf, size_t size) of buf[i] into buf[n_saved] when it would be a NOP. */ for (i = 0; i < nr && !in_delete_set[to_uchar (buf[i])]; i++) - continue; + continue; n_saved = i; for (++i; i < nr; i++) - if (!in_delete_set[to_uchar (buf[i])]) - buf[n_saved++] = buf[i]; + if (!in_delete_set[to_uchar (buf[i])]) + buf[n_saved++] = buf[i]; } while (n_saved == 0); @@ -1628,7 +1686,7 @@ read_and_delete (char *buf, size_t size) /* Read at most SIZE bytes from stdin into the array BUF. Then perform the in-place and one-to-one mapping specified by the global - array `xlate'. Return the number of characters read, or 0 upon EOF. */ + array 'xlate'. Return the number of characters read, or 0 upon EOF. */ static size_t read_and_xlate (char *buf, size_t size) @@ -1644,7 +1702,7 @@ read_and_xlate (char *buf, size_t size) /* Initialize a boolean membership set, IN_SET, with the character values obtained by traversing the linked list of constructs S - using the function `get_next'. IN_SET is expected to have been + using the function 'get_next'. IN_SET is expected to have been initialized to all zeros by the caller. If COMPLEMENT_THIS_SET is true the resulting set is complemented. */ @@ -1674,7 +1732,7 @@ main (int argc, char **argv) struct Spec_list *s2 = &buf2; initialize_main (&argc, &argv); - program_name = argv[0]; + set_program_name (argv[0]); setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); @@ -1684,32 +1742,32 @@ main (int argc, char **argv) while ((c = getopt_long (argc, argv, "+cCdst", long_options, NULL)) != -1) { switch (c) - { - case 'c': - case 'C': - complement = true; - break; + { + case 'c': + case 'C': + complement = true; + break; - case 'd': - delete = true; - break; + case 'd': + delete = true; + break; - case 's': - squeeze_repeats = true; - break; + case 's': + squeeze_repeats = true; + break; - case 't': - truncate_set1 = true; - break; + case 't': + truncate_set1 = true; + break; - case_GETOPT_HELP_CHAR; + case_GETOPT_HELP_CHAR; - case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); - default: - usage (EXIT_FAILURE); - break; - } + default: + usage (EXIT_FAILURE); + break; + } } non_option_args = argc - optind; @@ -1720,16 +1778,16 @@ main (int argc, char **argv) if (non_option_args < min_operands) { if (non_option_args == 0) - error (0, 0, _("missing operand")); + error (0, 0, _("missing operand")); else - { - error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); - fprintf (stderr, "%s\n", - _(squeeze_repeats - ? ("Two strings must be given when " - "both deleting and squeezing repeats.") - : "Two strings must be given when translating.")); - } + { + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + fprintf (stderr, "%s\n", + _(squeeze_repeats + ? N_("Two strings must be given when " + "both deleting and squeezing repeats.") + : N_("Two strings must be given when translating."))); + } usage (EXIT_FAILURE); } @@ -1737,34 +1795,36 @@ main (int argc, char **argv) { error (0, 0, _("extra operand %s"), quote (argv[optind + max_operands])); if (non_option_args == 2) - fprintf (stderr, "%s\n", - _("Only one string may be given when " - "deleting without squeezing repeats.")); + fprintf (stderr, "%s\n", + _("Only one string may be given when " + "deleting without squeezing repeats.")); usage (EXIT_FAILURE); } spec_init (s1); if (!parse_str (argv[optind], s1)) - exit (EXIT_FAILURE); + return EXIT_FAILURE; if (non_option_args == 2) { spec_init (s2); if (!parse_str (argv[optind + 1], s2)) - exit (EXIT_FAILURE); + return EXIT_FAILURE; } else s2 = NULL; validate (s1, s2); - /* Use binary I/O, since `tr' is sometimes used to transliterate + /* Use binary I/O, since 'tr' is sometimes used to transliterate non-printable characters, or characters which are stripped away by text-mode reads (like CR and ^Z). */ if (O_BINARY && ! isatty (STDIN_FILENO)) - freopen (NULL, "rb", stdin); + xfreopen (NULL, "rb", stdin); if (O_BINARY && ! isatty (STDOUT_FILENO)) - freopen (NULL, "wb", stdout); + xfreopen (NULL, "wb", stdout); + + fadvise (stdin, FADVISE_SEQUENTIAL); if (squeeze_repeats && non_option_args == 1) { @@ -1775,14 +1835,14 @@ main (int argc, char **argv) { set_initialize (s1, complement, in_delete_set); - for (;;) - { - size_t nr = read_and_delete (io_buf, sizeof io_buf); - if (nr == 0) - break; - if (fwrite (io_buf, 1, nr, stdout) != nr) - error (EXIT_FAILURE, errno, _("write error")); - } + while (true) + { + size_t nr = read_and_delete (io_buf, sizeof io_buf); + if (nr == 0) + break; + if (fwrite (io_buf, 1, nr, stdout) != nr) + error (EXIT_FAILURE, errno, _("write error")); + } } else if (squeeze_repeats && delete && non_option_args == 2) { @@ -1793,104 +1853,95 @@ main (int argc, char **argv) else if (translating) { if (complement) - { - int i; - bool *in_s1 = in_delete_set; - - set_initialize (s1, false, in_s1); - s2->state = BEGIN_STATE; - for (i = 0; i < N_CHARS; i++) - xlate[i] = i; - for (i = 0; i < N_CHARS; i++) - { - if (!in_s1[i]) - { - int ch = get_next (s2, NULL); - assert (ch != -1 || truncate_set1); - if (ch == -1) - { - /* This will happen when tr is invoked like e.g. - tr -cs A-Za-z0-9 '\012'. */ - break; - } - xlate[i] = ch; - } - } - assert (get_next (s2, NULL) == -1 || truncate_set1); - } + { + int i; + bool *in_s1 = in_delete_set; + + set_initialize (s1, false, in_s1); + s2->state = BEGIN_STATE; + for (i = 0; i < N_CHARS; i++) + xlate[i] = i; + for (i = 0; i < N_CHARS; i++) + { + if (!in_s1[i]) + { + int ch = get_next (s2, NULL); + assert (ch != -1 || truncate_set1); + if (ch == -1) + { + /* This will happen when tr is invoked like e.g. + tr -cs A-Za-z0-9 '\012'. */ + break; + } + xlate[i] = ch; + } + } + } else - { - int c1, c2; - int i; - enum Upper_Lower_class class_s1; - enum Upper_Lower_class class_s2; - - for (i = 0; i < N_CHARS; i++) - xlate[i] = i; - s1->state = BEGIN_STATE; - s2->state = BEGIN_STATE; - for (;;) - { - c1 = get_next (s1, &class_s1); - c2 = get_next (s2, &class_s2); - - /* When constructing the translation array, either one of the - values returned by paired calls to get_next must be from - [:upper:] and the other is [:lower:], or neither can be from - upper or lower. */ - - if ((class_s1 == UL_NONE) != (class_s2 == UL_NONE)) - error (EXIT_FAILURE, 0, - _("misaligned [:upper:] and/or [:lower:] construct")); - - if (class_s1 == UL_LOWER && class_s2 == UL_UPPER) - { - for (i = 0; i < N_CHARS; i++) - if (islower (i)) - xlate[i] = toupper (i); - } - else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER) - { - for (i = 0; i < N_CHARS; i++) - if (isupper (i)) - xlate[i] = tolower (i); - } - else if ((class_s1 == UL_LOWER && class_s2 == UL_LOWER) - || (class_s1 == UL_UPPER && class_s2 == UL_UPPER)) - { - /* POSIX says the behavior of `tr "[:upper:]" "[:upper:]"' - is undefined. Treat it as a no-op. */ - } - else - { - /* The following should have been checked by validate... */ - if (c1 == -1 || c2 == -1) - break; - xlate[c1] = c2; - } - } - assert (c1 == -1 || truncate_set1); - } + { + int c1, c2; + int i; + enum Upper_Lower_class class_s1; + enum Upper_Lower_class class_s2; + + for (i = 0; i < N_CHARS; i++) + xlate[i] = i; + s1->state = BEGIN_STATE; + s2->state = BEGIN_STATE; + while (true) + { + c1 = get_next (s1, &class_s1); + c2 = get_next (s2, &class_s2); + + if (class_s1 == UL_LOWER && class_s2 == UL_UPPER) + { + for (i = 0; i < N_CHARS; i++) + if (islower (i)) + xlate[i] = toupper (i); + } + else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER) + { + for (i = 0; i < N_CHARS; i++) + if (isupper (i)) + xlate[i] = tolower (i); + } + else + { + /* The following should have been checked by validate... */ + if (c1 == -1 || c2 == -1) + break; + xlate[c1] = c2; + } + + /* When case-converting, skip the elements as an optimization. */ + if (class_s2 != UL_NONE) + { + skip_construct (s1); + skip_construct (s2); + } + } + assert (c1 == -1 || truncate_set1); + } if (squeeze_repeats) - { - set_initialize (s2, false, in_squeeze_set); - squeeze_filter (io_buf, sizeof io_buf, read_and_xlate); - } + { + set_initialize (s2, false, in_squeeze_set); + squeeze_filter (io_buf, sizeof io_buf, read_and_xlate); + } else - { - for (;;) - { - size_t bytes_read = read_and_xlate (io_buf, sizeof io_buf); - if (bytes_read == 0) - break; - if (fwrite (io_buf, 1, bytes_read, stdout) != bytes_read) - error (EXIT_FAILURE, errno, _("write error")); - } - } + { + while (true) + { + size_t bytes_read = read_and_xlate (io_buf, sizeof io_buf); + if (bytes_read == 0) + break; + if (fwrite (io_buf, 1, bytes_read, stdout) != bytes_read) + error (EXIT_FAILURE, errno, _("write error")); + } + } } if (close (STDIN_FILENO) != 0) error (EXIT_FAILURE, errno, _("standard input")); - exit (EXIT_SUCCESS); + return EXIT_SUCCESS; } |