summaryrefslogtreecommitdiff
path: root/src/tr.c
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2016-01-20 10:55:18 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2016-01-20 10:55:18 +0000
commit70e9163c9c18e995515598085cb824e554eb7ae7 (patch)
treea42dc8b2a6c031354bf31472de888bfc8a060132 /src/tr.c
parentcbf5993c43f49281173f185863577d86bfac6eae (diff)
downloadcoreutils-tarball-master.tar.gz
Diffstat (limited to 'src/tr.c')
-rw-r--r--src/tr.c1507
1 files changed, 779 insertions, 728 deletions
diff --git a/src/tr.c b/src/tr.c
index 214eb2b..c6a1540 100644
--- a/src/tr.c
+++ b/src/tr.c
@@ -1,10 +1,10 @@
/* tr -- a filter to translate characters
- Copyright (C) 91, 1995-2006 Free Software Foundation, Inc.
+ Copyright (C) 1991-2016 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -12,8 +12,7 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation,
- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Written by Jim Meyering */
@@ -26,14 +25,16 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "quote.h"
#include "safe-read.h"
+#include "xfreopen.h"
#include "xstrtol.h"
-/* The official name of this program (e.g., no `g' prefix). */
+/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "tr"
-#define AUTHORS "Jim Meyering"
+#define AUTHORS proper_name ("Jim Meyering")
enum { N_CHARS = UCHAR_MAX + 1 };
@@ -100,28 +101,28 @@ enum Range_element_type
For example, consider the POSIX version of the classic tr command:
tr -cs 'a-zA-Z_' '[\n*]'
String1 has 3 constructs, two of which are ranges (a-z and A-Z),
- and a single normal character, `_'. String2 has one construct. */
+ and a single normal character, '_'. String2 has one construct. */
struct List_element
{
enum Range_element_type type;
struct List_element *next;
union
{
- unsigned char normal_char;
- struct /* unnamed */
- {
- unsigned char first_char;
- unsigned char last_char;
- }
- range;
- enum Char_class char_class;
- unsigned char equiv_code;
- struct /* unnamed */
- {
- unsigned char the_repeated_char;
- count repeat_count;
- }
- repeated_char;
+ unsigned char normal_char;
+ struct /* unnamed */
+ {
+ unsigned char first_char;
+ unsigned char last_char;
+ }
+ range;
+ enum Char_class char_class;
+ unsigned char equiv_code;
+ struct /* unnamed */
+ {
+ unsigned char the_repeated_char;
+ count repeat_count;
+ }
+ repeated_char;
}
u;
};
@@ -132,9 +133,9 @@ struct List_element
the corresponding argument string. The attributes are used mainly
to verify that the strings are valid in the context of any options
specified (like -s, -d, or -c). The main exception is the member
- `tail', which is first used to construct the list. After construction,
+ 'tail', which is first used to construct the list. After construction,
it is used by get_next to save its state when traversing the list.
- The member `state' serves a similar function. */
+ The member 'state' serves a similar function. */
struct Spec_list
{
/* Points to the head of the list of range elements.
@@ -194,9 +195,6 @@ es_match (struct E_string const *es, size_t i, char c)
return es->s[i] == c && !es->escaped[i];
}
-/* The name by which this program was run. */
-char *program_name;
-
/* When true, each sequence in the input of a repeated character
(call it c) is replaced (in the output) by a single occurrence of c
for every c in the squeeze set. */
@@ -249,15 +247,14 @@ static char const *const char_class_name[] =
"alnum", "alpha", "blank", "cntrl", "digit", "graph",
"lower", "print", "punct", "space", "upper", "xdigit"
};
-enum { N_CHAR_CLASSES = sizeof char_class_name / sizeof char_class_name[0] };
-/* Array of boolean values. A character `c' is a member of the
+/* Array of boolean values. A character 'c' is a member of the
squeeze set if and only if in_squeeze_set[c] is true. The squeeze
set is defined by the last (possibly, the only) string argument
on the command line when the squeeze option is given. */
static bool in_squeeze_set[N_CHARS];
-/* Array of boolean values. A character `c' is a member of the
+/* Array of boolean values. A character 'c' is a member of the
delete set if and only if in_delete_set[c] is true. The delete
set is defined by the first (or only) string argument on the
command line when the delete option is given. */
@@ -278,28 +275,27 @@ static struct option const long_options[] =
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
};
-
+
void
usage (int status)
{
if (status != EXIT_SUCCESS)
- fprintf (stderr, _("Try `%s --help' for more information.\n"),
- program_name);
+ emit_try_help ();
else
{
printf (_("\
Usage: %s [OPTION]... SET1 [SET2]\n\
"),
- program_name);
+ program_name);
fputs (_("\
Translate, squeeze, and/or delete characters from standard input,\n\
writing to standard output.\n\
\n\
- -c, -C, --complement first complement SET1\n\
+ -c, -C, --complement use the complement of SET1\n\
-d, --delete delete characters in SET1, do not translate\n\
- -s, --squeeze-repeats replace each input sequence of a repeated character\n\
- that is listed in SET1 with a single occurrence\n\
- of that character\n\
+ -s, --squeeze-repeats replace each sequence of a repeated character\n\
+ that is listed in the last specified SET,\n\
+ with a single occurrence of that character\n\
-t, --truncate-set1 first truncate SET1 to length of SET2\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
@@ -343,20 +339,13 @@ Interpreted sequences are:\n\
\n\
Translation occurs if -d is not given and both SET1 and SET2 appear.\n\
-t may be used only when translating. SET2 is extended to length of\n\
-SET1 by repeating its last character as necessary. \
-"), stdout);
- fputs (_("\
-Excess characters\n\
+SET1 by repeating its last character as necessary. Excess characters\n\
of SET2 are ignored. Only [:lower:] and [:upper:] are guaranteed to\n\
expand in ascending order; used in SET2 while translating, they may\n\
-only be used in pairs to specify case conversion. \
+only be used in pairs to specify case conversion. -s uses the last\n\
+specified SET, and occurs after translation or deletion.\n\
"), stdout);
- fputs (_("\
--s uses SET1 if not\n\
-translating nor deleting; else squeezing uses SET2 and occurs after\n\
-translation or deletion.\n\
-"), stdout);
- printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+ emit_ancillary_info (PROGRAM_NAME);
}
exit (status);
}
@@ -373,7 +362,7 @@ is_equiv_class_member (unsigned char equiv_class, unsigned char c)
/* Return true if the character C is a member of the
character class CHAR_CLASS. */
-static bool
+static bool _GL_ATTRIBUTE_PURE
is_char_class_member (enum Char_class char_class, unsigned char c)
{
int result;
@@ -455,93 +444,94 @@ unquote (char const *s, struct E_string *es)
int oct_digit;
switch (s[i])
- {
- case '\\':
- es->escaped[j] = true;
- switch (s[i + 1])
- {
- case '\\':
- c = '\\';
- break;
- case 'a':
- c = '\a';
- break;
- case 'b':
- c = '\b';
- break;
- case 'f':
- c = '\f';
- break;
- case 'n':
- c = '\n';
- break;
- case 'r':
- c = '\r';
- break;
- case 't':
- c = '\t';
- break;
- case 'v':
- c = '\v';
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- c = s[i + 1] - '0';
- oct_digit = s[i + 2] - '0';
- if (0 <= oct_digit && oct_digit <= 7)
- {
- c = 8 * c + oct_digit;
- ++i;
- oct_digit = s[i + 2] - '0';
- if (0 <= oct_digit && oct_digit <= 7)
- {
- if (8 * c + oct_digit < N_CHARS)
- {
- c = 8 * c + oct_digit;
- ++i;
- }
- else
- {
- /* A 3-digit octal number larger than \377 won't
- fit in 8 bits. So we stop when adding the
- next digit would put us over the limit and
- give a warning about the ambiguity. POSIX
- isn't clear on this, and we interpret this
- lack of clarity as meaning the resulting behavior
- is undefined, which means we're allowed to issue
- a warning. */
- error (0, 0, _("warning: the ambiguous octal escape \
-\\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, %c"),
- s[i], s[i + 1], s[i + 2],
- s[i], s[i + 1], s[i + 2]);
- }
- }
- }
- break;
- case '\0':
- /* POSIX seems to require that a trailing backslash must
- stand for itself. Weird. */
- es->escaped[j] = false;
- i--;
- c = '\\';
- break;
- default:
- c = s[i + 1];
- break;
- }
- ++i;
- es->s[j++] = c;
- break;
- default:
- es->s[j++] = s[i];
- break;
- }
+ {
+ case '\\':
+ es->escaped[j] = true;
+ switch (s[i + 1])
+ {
+ case '\\':
+ c = '\\';
+ break;
+ case 'a':
+ c = '\a';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ c = s[i + 1] - '0';
+ oct_digit = s[i + 2] - '0';
+ if (0 <= oct_digit && oct_digit <= 7)
+ {
+ c = 8 * c + oct_digit;
+ ++i;
+ oct_digit = s[i + 2] - '0';
+ if (0 <= oct_digit && oct_digit <= 7)
+ {
+ if (8 * c + oct_digit < N_CHARS)
+ {
+ c = 8 * c + oct_digit;
+ ++i;
+ }
+ else
+ {
+ /* A 3-digit octal number larger than \377 won't
+ fit in 8 bits. So we stop when adding the
+ next digit would put us over the limit and
+ give a warning about the ambiguity. POSIX
+ isn't clear on this, and we interpret this
+ lack of clarity as meaning the resulting behavior
+ is undefined, which means we're allowed to issue
+ a warning. */
+ error (0, 0, _("warning: the ambiguous octal escape\
+ \\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, %c"),
+ s[i], s[i + 1], s[i + 2],
+ s[i], s[i + 1], s[i + 2]);
+ }
+ }
+ }
+ break;
+ case '\0':
+ error (0, 0, _("warning: an unescaped backslash "
+ "at end of string is not portable"));
+ /* POSIX is not clear about this. */
+ es->escaped[j] = false;
+ i--;
+ c = '\\';
+ break;
+ default:
+ c = s[i + 1];
+ break;
+ }
+ ++i;
+ es->s[j++] = c;
+ break;
+ default:
+ es->s[j++] = s[i];
+ break;
+ }
}
es->len = j;
return true;
@@ -550,14 +540,14 @@ unquote (char const *s, struct E_string *es)
/* If CLASS_STR is a valid character class string, return its index
in the global char_class_name array. Otherwise, return CC_NO_CLASS. */
-static enum Char_class
+static enum Char_class _GL_ATTRIBUTE_PURE
look_up_char_class (char const *class_str, size_t len)
{
enum Char_class i;
- for (i = 0; i < N_CHAR_CLASSES; i++)
- if (strncmp (class_str, char_class_name[i], len) == 0
- && strlen (char_class_name[i]) == len)
+ for (i = 0; i < ARRAY_CARDINALITY (char_class_name); i++)
+ if (STREQ_LEN (class_str, char_class_name[i], len)
+ && strlen (char_class_name[i]) == len)
return i;
return CC_NO_CLASS;
}
@@ -605,42 +595,42 @@ make_printable_str (char const *s, size_t len)
unsigned char c = s[i];
switch (c)
- {
- case '\\':
- tmp = "\\";
- break;
- case '\a':
- tmp = "\\a";
- break;
- case '\b':
- tmp = "\\b";
- break;
- case '\f':
- tmp = "\\f";
- break;
- case '\n':
- tmp = "\\n";
- break;
- case '\r':
- tmp = "\\r";
- break;
- case '\t':
- tmp = "\\t";
- break;
- case '\v':
- tmp = "\\v";
- break;
- default:
- if (isprint (c))
- {
- buf[0] = c;
- buf[1] = '\0';
- }
- else
- sprintf (buf, "\\%03o", c);
- tmp = buf;
- break;
- }
+ {
+ case '\\':
+ tmp = "\\";
+ break;
+ case '\a':
+ tmp = "\\a";
+ break;
+ case '\b':
+ tmp = "\\b";
+ break;
+ case '\f':
+ tmp = "\\f";
+ break;
+ case '\n':
+ tmp = "\\n";
+ break;
+ case '\r':
+ tmp = "\\r";
+ break;
+ case '\t':
+ tmp = "\\t";
+ break;
+ case '\v':
+ tmp = "\\v";
+ break;
+ default:
+ if (isprint (c))
+ {
+ buf[0] = c;
+ buf[1] = '\0';
+ }
+ else
+ sprintf (buf, "\\%03o", c);
+ tmp = buf;
+ break;
+ }
p = stpcpy (p, tmp);
}
return printable_buf;
@@ -679,8 +669,8 @@ append_range (struct Spec_list *list, unsigned char first, unsigned char last)
char *tmp2 = make_printable_char (last);
error (0, 0,
- _("range-endpoints of `%s-%s' are in reverse collating sequence order"),
- tmp1, tmp2);
+ _("range-endpoints of '%s-%s' are in reverse collating sequence order"),
+ tmp1, tmp2);
free (tmp1);
free (tmp2);
return false;
@@ -703,7 +693,7 @@ append_range (struct Spec_list *list, unsigned char first, unsigned char last)
static bool
append_char_class (struct Spec_list *list,
- char const *char_class_str, size_t len)
+ char const *char_class_str, size_t len)
{
enum Char_class char_class;
struct List_element *new;
@@ -728,7 +718,7 @@ append_char_class (struct Spec_list *list,
static void
append_repeated_char (struct Spec_list *list, unsigned char the_char,
- count repeat_count)
+ count repeat_count)
{
struct List_element *new;
@@ -750,7 +740,7 @@ append_repeated_char (struct Spec_list *list, unsigned char the_char,
static bool
append_equiv_class (struct Spec_list *list,
- char const *equiv_class_str, size_t len)
+ char const *equiv_class_str, size_t len)
{
struct List_element *new;
@@ -774,16 +764,16 @@ append_equiv_class (struct Spec_list *list,
static bool
find_closing_delim (const struct E_string *es, size_t start_idx,
- char pre_bracket_char, size_t *result_idx)
+ char pre_bracket_char, size_t *result_idx)
{
size_t i;
for (i = start_idx; i < es->len - 1; i++)
if (es->s[i] == pre_bracket_char && es->s[i + 1] == ']'
- && !es->escaped[i] && !es->escaped[i + 1])
+ && !es->escaped[i] && !es->escaped[i + 1])
{
- *result_idx = i;
- return true;
+ *result_idx = i;
+ return true;
}
return false;
}
@@ -792,16 +782,16 @@ find_closing_delim (const struct E_string *es, size_t start_idx,
beginning with P[ START_IDX ] comprise a valid [c*n] construct,
then set *CHAR_TO_REPEAT, *REPEAT_COUNT, and *CLOSING_BRACKET_IDX
and return zero. If the second character following
- the opening bracket is not `*' or if no closing bracket can be
+ the opening bracket is not '*' or if no closing bracket can be
found, return -1. If a closing bracket is found and the
- second char is `*', but the string between the `*' and `]' isn't
+ second char is '*', but the string between the '*' and ']' isn't
empty, an octal number, or a decimal number, print an error message
and return -2. */
static int
find_bracketed_repeat (const struct E_string *es, size_t start_idx,
- unsigned char *char_to_repeat, count *repeat_count,
- size_t *closing_bracket_idx)
+ unsigned char *char_to_repeat, count *repeat_count,
+ size_t *closing_bracket_idx)
{
size_t i;
@@ -812,47 +802,47 @@ find_bracketed_repeat (const struct E_string *es, size_t start_idx,
for (i = start_idx + 2; i < es->len && !es->escaped[i]; i++)
{
if (es->s[i] == ']')
- {
- size_t digit_str_len = i - start_idx - 2;
-
- *char_to_repeat = es->s[start_idx];
- if (digit_str_len == 0)
- {
- /* We've matched [c*] -- no explicit repeat count. */
- *repeat_count = 0;
- }
- else
- {
- /* Here, we have found [c*s] where s should be a string
- of octal (if it starts with `0') or decimal digits. */
- char const *digit_str = &es->s[start_idx + 2];
- char *d_end;
- if ((xstrtoumax (digit_str, &d_end, *digit_str == '0' ? 8 : 10,
- repeat_count, NULL)
- != LONGINT_OK)
- || REPEAT_COUNT_MAXIMUM < *repeat_count
- || digit_str + digit_str_len != d_end)
- {
- char *tmp = make_printable_str (digit_str, digit_str_len);
- error (0, 0,
- _("invalid repeat count %s in [c*n] construct"),
- quote (tmp));
- free (tmp);
- return -2;
- }
- }
- *closing_bracket_idx = i;
- return 0;
- }
+ {
+ size_t digit_str_len = i - start_idx - 2;
+
+ *char_to_repeat = es->s[start_idx];
+ if (digit_str_len == 0)
+ {
+ /* We've matched [c*] -- no explicit repeat count. */
+ *repeat_count = 0;
+ }
+ else
+ {
+ /* Here, we have found [c*s] where s should be a string
+ of octal (if it starts with '0') or decimal digits. */
+ char const *digit_str = &es->s[start_idx + 2];
+ char *d_end;
+ if ((xstrtoumax (digit_str, &d_end, *digit_str == '0' ? 8 : 10,
+ repeat_count, NULL)
+ != LONGINT_OK)
+ || REPEAT_COUNT_MAXIMUM < *repeat_count
+ || digit_str + digit_str_len != d_end)
+ {
+ char *tmp = make_printable_str (digit_str, digit_str_len);
+ error (0, 0,
+ _("invalid repeat count %s in [c*n] construct"),
+ quote (tmp));
+ free (tmp);
+ return -2;
+ }
+ }
+ *closing_bracket_idx = i;
+ return 0;
+ }
}
return -1; /* No bracket found. */
}
/* Return true if the string at ES->s[IDX] matches the regular
- expression `\*[0-9]*\]', false otherwise. The string does not
+ expression '\*[0-9]*\]', false otherwise. The string does not
match if any of its characters are escaped. */
-static bool
+static bool _GL_ATTRIBUTE_PURE
star_digits_closebracket (const struct E_string *es, size_t idx)
{
size_t i;
@@ -869,13 +859,13 @@ star_digits_closebracket (const struct E_string *es, size_t idx)
/* Convert string UNESCAPED_STRING (which has been preprocessed to
convert backslash-escape sequences) of length LEN characters into
a linked list of the following 5 types of constructs:
- - [:str:] Character class where `str' is one of the 12 valid strings.
- - [=c=] Equivalence class where `c' is any single character.
- - [c*n] Repeat the single character `c' `n' times. n may be omitted.
- However, if `n' is present, it must be a non-negative octal or
- decimal integer.
- - r-s Range of characters from `r' to `s'. The second endpoint must
- not precede the first in the current collating sequence.
+ - [:str:] Character class where 'str' is one of the 12 valid strings.
+ - [=c=] Equivalence class where 'c' is any single character.
+ - [c*n] Repeat the single character 'c' 'n' times. n may be omitted.
+ However, if 'n' is present, it must be a non-negative octal or
+ decimal integer.
+ - r-s Range of characters from 'r' to 's'. The second endpoint must
+ not precede the first in the current collating sequence.
- c Any other character is interpreted as itself. */
static bool
@@ -888,7 +878,7 @@ build_spec_list (const struct E_string *es, struct Spec_list *result)
/* The main for-loop below recognizes the 4 multi-character constructs.
A character that matches (in its context) none of the multi-character
- constructs is classified as `normal'. Since all multi-character
+ constructs is classified as 'normal'. Since all multi-character
constructs have at least 3 characters, any strings of length 2 or
less are composed solely of normal characters. Hence, the index of
the outer for-loop runs only as far as LEN-2. */
@@ -896,120 +886,120 @@ build_spec_list (const struct E_string *es, struct Spec_list *result)
for (i = 0; i + 2 < es->len; /* empty */)
{
if (es_match (es, i, '['))
- {
- bool matched_multi_char_construct;
- size_t closing_bracket_idx;
- unsigned char char_to_repeat;
- count repeat_count;
- int err;
-
- matched_multi_char_construct = true;
- if (es_match (es, i + 1, ':') || es_match (es, i + 1, '='))
- {
- size_t closing_delim_idx;
-
- if (find_closing_delim (es, i + 2, p[i + 1], &closing_delim_idx))
- {
- size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1;
- char const *opnd_str = p + i + 2;
-
- if (opnd_str_len == 0)
- {
- if (p[i + 1] == ':')
- error (0, 0, _("missing character class name `[::]'"));
- else
- error (0, 0,
- _("missing equivalence class character `[==]'"));
- return false;
- }
-
- if (p[i + 1] == ':')
- {
- /* FIXME: big comment. */
- if (!append_char_class (result, opnd_str, opnd_str_len))
- {
- if (star_digits_closebracket (es, i + 2))
- goto try_bracketed_repeat;
- else
- {
- char *tmp = make_printable_str (opnd_str,
- opnd_str_len);
- error (0, 0, _("invalid character class %s"),
- quote (tmp));
- free (tmp);
- return false;
- }
- }
- }
- else
- {
- /* FIXME: big comment. */
- if (!append_equiv_class (result, opnd_str, opnd_str_len))
- {
- if (star_digits_closebracket (es, i + 2))
- goto try_bracketed_repeat;
- else
- {
- char *tmp = make_printable_str (opnd_str,
- opnd_str_len);
- error (0, 0,
- _("%s: equivalence class operand must be a single character"),
- tmp);
- free (tmp);
- return false;
- }
- }
- }
-
- i = closing_delim_idx + 2;
- continue;
- }
- /* Else fall through. This could be [:*] or [=*]. */
- }
-
- try_bracketed_repeat:
-
- /* Determine whether this is a bracketed repeat range
- matching the RE \[.\*(dec_or_oct_number)?\]. */
- err = find_bracketed_repeat (es, i + 1, &char_to_repeat,
- &repeat_count,
- &closing_bracket_idx);
- if (err == 0)
- {
- append_repeated_char (result, char_to_repeat, repeat_count);
- i = closing_bracket_idx + 1;
- }
- else if (err == -1)
- {
- matched_multi_char_construct = false;
- }
- else
- {
- /* Found a string that looked like [c*n] but the
- numeric part was invalid. */
- return false;
- }
-
- if (matched_multi_char_construct)
- continue;
-
- /* We reach this point if P does not match [:str:], [=c=],
- [c*n], or [c*]. Now, see if P looks like a range `[-c'
- (from `[' to `c'). */
- }
+ {
+ bool matched_multi_char_construct;
+ size_t closing_bracket_idx;
+ unsigned char char_to_repeat;
+ count repeat_count;
+ int err;
+
+ matched_multi_char_construct = true;
+ if (es_match (es, i + 1, ':') || es_match (es, i + 1, '='))
+ {
+ size_t closing_delim_idx;
+
+ if (find_closing_delim (es, i + 2, p[i + 1], &closing_delim_idx))
+ {
+ size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1;
+ char const *opnd_str = p + i + 2;
+
+ if (opnd_str_len == 0)
+ {
+ if (p[i + 1] == ':')
+ error (0, 0, _("missing character class name '[::]'"));
+ else
+ error (0, 0,
+ _("missing equivalence class character '[==]'"));
+ return false;
+ }
+
+ if (p[i + 1] == ':')
+ {
+ /* FIXME: big comment. */
+ if (!append_char_class (result, opnd_str, opnd_str_len))
+ {
+ if (star_digits_closebracket (es, i + 2))
+ goto try_bracketed_repeat;
+ else
+ {
+ char *tmp = make_printable_str (opnd_str,
+ opnd_str_len);
+ error (0, 0, _("invalid character class %s"),
+ quote (tmp));
+ free (tmp);
+ return false;
+ }
+ }
+ }
+ else
+ {
+ /* FIXME: big comment. */
+ if (!append_equiv_class (result, opnd_str, opnd_str_len))
+ {
+ if (star_digits_closebracket (es, i + 2))
+ goto try_bracketed_repeat;
+ else
+ {
+ char *tmp = make_printable_str (opnd_str,
+ opnd_str_len);
+ error (0, 0,
+ _("%s: equivalence class operand must be a single character"),
+ tmp);
+ free (tmp);
+ return false;
+ }
+ }
+ }
+
+ i = closing_delim_idx + 2;
+ continue;
+ }
+ /* Else fall through. This could be [:*] or [=*]. */
+ }
+
+ try_bracketed_repeat:
+
+ /* Determine whether this is a bracketed repeat range
+ matching the RE \[.\*(dec_or_oct_number)?\]. */
+ err = find_bracketed_repeat (es, i + 1, &char_to_repeat,
+ &repeat_count,
+ &closing_bracket_idx);
+ if (err == 0)
+ {
+ append_repeated_char (result, char_to_repeat, repeat_count);
+ i = closing_bracket_idx + 1;
+ }
+ else if (err == -1)
+ {
+ matched_multi_char_construct = false;
+ }
+ else
+ {
+ /* Found a string that looked like [c*n] but the
+ numeric part was invalid. */
+ return false;
+ }
+
+ if (matched_multi_char_construct)
+ continue;
+
+ /* We reach this point if P does not match [:str:], [=c=],
+ [c*n], or [c*]. Now, see if P looks like a range '[-c'
+ (from '[' to 'c'). */
+ }
/* Look ahead one char for ranges like a-z. */
if (es_match (es, i + 1, '-'))
- {
- if (!append_range (result, p[i], p[i + 2]))
- return false;
- i += 3;
- }
+ {
+ if (!append_range (result, p[i], p[i + 2]))
+ return false;
+ i += 3;
+ }
else
- {
- append_normal_char (result, p[i]);
- ++i;
- }
+ {
+ append_normal_char (result, p[i]);
+ ++i;
+ }
}
/* Now handle the (2 or fewer) remaining characters p[i]..p[es->len - 1]. */
@@ -1019,8 +1009,17 @@ build_spec_list (const struct E_string *es, struct Spec_list *result)
return true;
}
+/* Advance past the current construct.
+ S->tail must be non-NULL. */
+static void
+skip_construct (struct Spec_list *s)
+{
+ s->tail = s->tail->next;
+ s->state = NEW_ELEMENT;
+}
+
/* Given a Spec_list S (with its saved state implicit in the values
- of its members `tail' and `state'), return the next single character
+ of its members 'tail' and 'state'), return the next single character
in the expansion of S's constructs. If the last character of S was
returned on the previous call or if S was empty, this function
returns -1. For example, successive calls to get_next where S
@@ -1063,65 +1062,53 @@ get_next (struct Spec_list *s, enum Upper_Lower_class *class)
case RE_RANGE:
if (s->state == NEW_ELEMENT)
- s->state = p->u.range.first_char;
+ s->state = p->u.range.first_char;
else
- ++(s->state);
+ ++(s->state);
return_val = s->state;
if (s->state == p->u.range.last_char)
- {
- s->tail = p->next;
- s->state = NEW_ELEMENT;
- }
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ }
break;
case RE_CHAR_CLASS:
if (class)
- {
- bool upper_or_lower;
- switch (p->u.char_class)
- {
- case CC_LOWER:
- *class = UL_LOWER;
- upper_or_lower = true;
- break;
- case CC_UPPER:
- *class = UL_UPPER;
- upper_or_lower = true;
- break;
- default:
- upper_or_lower = false;
- break;
- }
-
- if (upper_or_lower)
- {
- s->tail = p->next;
- s->state = NEW_ELEMENT;
- return_val = 0;
- break;
- }
- }
+ {
+ switch (p->u.char_class)
+ {
+ case CC_LOWER:
+ *class = UL_LOWER;
+ break;
+ case CC_UPPER:
+ *class = UL_UPPER;
+ break;
+ default:
+ break;
+ }
+ }
if (s->state == NEW_ELEMENT)
- {
- for (i = 0; i < N_CHARS; i++)
- if (is_char_class_member (p->u.char_class, i))
- break;
- assert (i < N_CHARS);
- s->state = i;
- }
+ {
+ for (i = 0; i < N_CHARS; i++)
+ if (is_char_class_member (p->u.char_class, i))
+ break;
+ assert (i < N_CHARS);
+ s->state = i;
+ }
assert (is_char_class_member (p->u.char_class, s->state));
return_val = s->state;
for (i = s->state + 1; i < N_CHARS; i++)
- if (is_char_class_member (p->u.char_class, i))
- break;
+ if (is_char_class_member (p->u.char_class, i))
+ break;
if (i < N_CHARS)
- s->state = i;
+ s->state = i;
else
- {
- s->tail = p->next;
- s->state = NEW_ELEMENT;
- }
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ }
break;
case RE_EQUIV_CLASS:
@@ -1138,25 +1125,25 @@ get_next (struct Spec_list *s, enum Upper_Lower_class *class)
case RE_REPEATED_CHAR:
/* Here, a repeat count of n == 0 means don't repeat at all. */
if (p->u.repeated_char.repeat_count == 0)
- {
- s->tail = p->next;
- s->state = NEW_ELEMENT;
- return_val = get_next (s, class);
- }
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ return_val = get_next (s, class);
+ }
else
- {
- if (s->state == NEW_ELEMENT)
- {
- s->state = 0;
- }
- ++(s->state);
- return_val = p->u.repeated_char.the_repeated_char;
- if (s->state == p->u.repeated_char.repeat_count)
- {
- s->tail = p->next;
- s->state = NEW_ELEMENT;
- }
- }
+ {
+ if (s->state == NEW_ELEMENT)
+ {
+ s->state = 0;
+ }
+ ++(s->state);
+ return_val = p->u.repeated_char.the_repeated_char;
+ if (s->state == p->u.repeated_char.repeat_count)
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ }
+ }
break;
default:
@@ -1188,6 +1175,78 @@ card_of_complement (struct Spec_list *s)
return cardinality;
}
+/* Discard the lengths associated with a case conversion,
+ as using the actual number of upper or lower case characters
+ is problematic when they don't match in some locales.
+ Also ensure the case conversion classes in string2 are
+ aligned correctly with those in string1.
+ Note POSIX says the behavior of 'tr "[:upper:]" "[:upper:]"'
+ is undefined. Therefore we allow it (unlike Solaris)
+ and treat it as a no-op. */
+
+static void
+validate_case_classes (struct Spec_list *s1, struct Spec_list *s2)
+{
+ size_t n_upper = 0;
+ size_t n_lower = 0;
+ unsigned int i;
+ int c1 = 0;
+ int c2 = 0;
+ count old_s1_len = s1->length;
+ count old_s2_len = s2->length;
+ struct List_element *s1_tail = s1->tail;
+ struct List_element *s2_tail = s2->tail;
+ bool s1_new_element = true;
+ bool s2_new_element = true;
+
+ if (!s2->has_char_class)
+ return;
+
+ for (i = 0; i < N_CHARS; i++)
+ {
+ if (isupper (i))
+ n_upper++;
+ if (islower (i))
+ n_lower++;
+ }
+
+ s1->state = BEGIN_STATE;
+ s2->state = BEGIN_STATE;
+
+ while (c1 != -1 && c2 != -1)
+ {
+ enum Upper_Lower_class class_s1, class_s2;
+
+ c1 = get_next (s1, &class_s1);
+ c2 = get_next (s2, &class_s2);
+
+ /* If c2 transitions to a new case class, then
+ c1 must also transition at the same time. */
+ if (s2_new_element && class_s2 != UL_NONE
+ && !(s1_new_element && class_s1 != UL_NONE))
+ error (EXIT_FAILURE, 0,
+ _("misaligned [:upper:] and/or [:lower:] construct"));
+
+ /* If case converting, quickly skip over the elements. */
+ if (class_s2 != UL_NONE)
+ {
+ skip_construct (s1);
+ skip_construct (s2);
+ /* Discount insignificant/problematic lengths. */
+ s1->length -= (class_s1 == UL_UPPER ? n_upper : n_lower) - 1;
+ s2->length -= (class_s2 == UL_UPPER ? n_upper : n_lower) - 1;
+ }
+
+ s1_new_element = s1->state == NEW_ELEMENT; /* Next element is new. */
+ s2_new_element = s2->state == NEW_ELEMENT; /* Next element is new. */
+ }
+
+ assert (old_s1_len >= s1->length && old_s2_len >= s2->length);
+
+ s1->tail = s1_tail;
+ s2->tail = s2_tail;
+}
+
/* Gather statistics about the spec-list S in preparation for the tests
in validate that determine the consistency of the specs. This function
is called at most twice; once for string1, and again for any string2.
@@ -1218,61 +1277,61 @@ get_spec_stats (struct Spec_list *s)
count new_length;
switch (p->type)
- {
- case RE_NORMAL_CHAR:
- len = 1;
- break;
-
- case RE_RANGE:
- assert (p->u.range.last_char >= p->u.range.first_char);
- len = p->u.range.last_char - p->u.range.first_char + 1;
- break;
-
- case RE_CHAR_CLASS:
- s->has_char_class = true;
- for (i = 0; i < N_CHARS; i++)
- if (is_char_class_member (p->u.char_class, i))
- ++len;
- switch (p->u.char_class)
- {
- case CC_UPPER:
- case CC_LOWER:
- break;
- default:
- s->has_restricted_char_class = true;
- break;
- }
- break;
-
- case RE_EQUIV_CLASS:
- for (i = 0; i < N_CHARS; i++)
- if (is_equiv_class_member (p->u.equiv_code, i))
- ++len;
- s->has_equiv_class = true;
- break;
-
- case RE_REPEATED_CHAR:
- if (p->u.repeated_char.repeat_count > 0)
- len = p->u.repeated_char.repeat_count;
- else
- {
- s->indefinite_repeat_element = p;
- ++(s->n_indefinite_repeats);
- }
- break;
-
- default:
- abort ();
- break;
- }
+ {
+ case RE_NORMAL_CHAR:
+ len = 1;
+ break;
+
+ case RE_RANGE:
+ assert (p->u.range.last_char >= p->u.range.first_char);
+ len = p->u.range.last_char - p->u.range.first_char + 1;
+ break;
+
+ case RE_CHAR_CLASS:
+ s->has_char_class = true;
+ for (i = 0; i < N_CHARS; i++)
+ if (is_char_class_member (p->u.char_class, i))
+ ++len;
+ switch (p->u.char_class)
+ {
+ case CC_UPPER:
+ case CC_LOWER:
+ break;
+ default:
+ s->has_restricted_char_class = true;
+ break;
+ }
+ break;
+
+ case RE_EQUIV_CLASS:
+ for (i = 0; i < N_CHARS; i++)
+ if (is_equiv_class_member (p->u.equiv_code, i))
+ ++len;
+ s->has_equiv_class = true;
+ break;
+
+ case RE_REPEATED_CHAR:
+ if (p->u.repeated_char.repeat_count > 0)
+ len = p->u.repeated_char.repeat_count;
+ else
+ {
+ s->indefinite_repeat_element = p;
+ ++(s->n_indefinite_repeats);
+ }
+ break;
+
+ default:
+ abort ();
+ break;
+ }
/* Check for arithmetic overflow in computing length. Also, reject
- any length greater than the maximum repeat count, in case the
- length is later used to compute the repeat count for an
- indefinite element. */
+ any length greater than the maximum repeat count, in case the
+ length is later used to compute the repeat count for an
+ indefinite element. */
new_length = length + len;
if (! (length <= new_length && new_length <= REPEAT_COUNT_MAXIMUM))
- error (EXIT_FAILURE, 0, _("too many characters in set"));
+ error (EXIT_FAILURE, 0, _("too many characters in set"));
length = new_length;
}
@@ -1294,7 +1353,7 @@ get_s2_spec_stats (struct Spec_list *s2, count len_s1)
if (len_s1 >= s2->length && s2->n_indefinite_repeats == 1)
{
s2->indefinite_repeat_element->u.repeated_char.repeat_count =
- len_s1 - s2->length;
+ len_s1 - s2->length;
s2->length = len_s1;
}
}
@@ -1329,20 +1388,14 @@ parse_str (char const *s, struct Spec_list *spec_list)
Upon successful completion, S2->length is set to S1->length. The only
way this function can fail to make S2 as long as S1 is when S2 has
zero-length, since in that case, there is no last character to repeat.
- So S2->length is required to be at least 1.
+ So S2->length is required to be at least 1. */
- Providing this functionality allows the user to do some pretty
- non-BSD (and non-portable) things: For example, the command
- tr -cs '[:upper:]0-9' '[:lower:]'
- is almost guaranteed to give results that depend on your collating
- sequence. */
static void
string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
{
struct List_element *p;
unsigned char char_to_repeat;
- int i;
assert (translating);
assert (s1->length > s2->length);
@@ -1358,11 +1411,14 @@ string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
char_to_repeat = p->u.range.last_char;
break;
case RE_CHAR_CLASS:
- for (i = N_CHARS - 1; i >= 0; i--)
- if (is_char_class_member (p->u.char_class, i))
- break;
- assert (i >= 0);
- char_to_repeat = i;
+ /* Note BSD allows extending of classes in string2. For example:
+ tr '[:upper:]0-9' '[:lower:]'
+ That's not portable however, contradicts POSIX and is dependent
+ on your collating sequence. */
+ error (EXIT_FAILURE, 0,
+ _("when translating with string1 longer than string2,\nthe\
+ latter string must not end with a character class"));
+ abort (); /* inform gcc that the above use of error never returns. */
break;
case RE_REPEATED_CHAR:
@@ -1420,7 +1476,7 @@ validate (struct Spec_list *s1, struct Spec_list *s2)
if (s1->n_indefinite_repeats > 0)
{
error (EXIT_FAILURE, 0,
- _("the [c*] repeat construct may not appear in string1"));
+ _("the [c*] repeat construct may not appear in string1"));
}
if (s2)
@@ -1428,57 +1484,59 @@ validate (struct Spec_list *s1, struct Spec_list *s2)
get_s2_spec_stats (s2, s1->length);
if (s2->n_indefinite_repeats > 1)
- {
- error (EXIT_FAILURE, 0,
- _("only one [c*] repeat construct may appear in string2"));
- }
+ {
+ error (EXIT_FAILURE, 0,
+ _("only one [c*] repeat construct may appear in string2"));
+ }
if (translating)
- {
- if (s2->has_equiv_class)
- {
- error (EXIT_FAILURE, 0,
- _("[=c=] expressions may not appear in string2 \
-when translating"));
- }
-
- if (s1->length > s2->length)
- {
- if (!truncate_set1)
- {
- /* string2 must be non-empty unless --truncate-set1 is
- given or string1 is empty. */
-
- if (s2->length == 0)
- error (EXIT_FAILURE, 0,
- _("when not truncating set1, string2 must be non-empty"));
- string2_extend (s1, s2);
- }
- }
-
- if (complement && s1->has_char_class
- && ! (s2->length == s1->length && homogeneous_spec_list (s2)))
- {
- error (EXIT_FAILURE, 0,
- _("when translating with complemented character classes,\
+ {
+ if (s2->has_equiv_class)
+ {
+ error (EXIT_FAILURE, 0,
+ _("[=c=] expressions may not appear in string2\
+ when translating"));
+ }
+
+ if (s2->has_restricted_char_class)
+ {
+ error (EXIT_FAILURE, 0,
+ _("when translating, the only character classes that may\
+ appear in\nstring2 are 'upper' and 'lower'"));
+ }
+
+ validate_case_classes (s1, s2);
+
+ if (s1->length > s2->length)
+ {
+ if (!truncate_set1)
+ {
+ /* string2 must be non-empty unless --truncate-set1 is
+ given or string1 is empty. */
+
+ if (s2->length == 0)
+ error (EXIT_FAILURE, 0,
+ _("when not truncating set1, string2 must be non-empty"));
+ string2_extend (s1, s2);
+ }
+ }
+
+ if (complement && s1->has_char_class
+ && ! (s2->length == s1->length && homogeneous_spec_list (s2)))
+ {
+ error (EXIT_FAILURE, 0,
+ _("when translating with complemented character classes,\
\nstring2 must map all characters in the domain to one"));
- }
-
- if (s2->has_restricted_char_class)
- {
- error (EXIT_FAILURE, 0,
- _("when translating, the only character classes that may \
-appear in\nstring2 are `upper' and `lower'"));
- }
- }
+ }
+ }
else
- /* Not translating. */
- {
- if (s2->n_indefinite_repeats > 0)
- error (EXIT_FAILURE, 0,
- _("the [c*] construct may appear in string2 only \
-when translating"));
- }
+ /* Not translating. */
+ {
+ if (s2->n_indefinite_repeats > 0)
+ error (EXIT_FAILURE, 0,
+ _("the [c*] construct may appear in string2 only\
+ when translating"));
+ }
}
}
@@ -1495,85 +1553,85 @@ squeeze_filter (char *buf, size_t size, size_t (*reader) (char *, size_t))
{
/* A value distinct from any character that may have been stored in a
buffer as the result of a block-read in the function squeeze_filter. */
- enum { NOT_A_CHAR = CHAR_MAX + 1 };
+ const int NOT_A_CHAR = INT_MAX;
int char_to_squeeze = NOT_A_CHAR;
size_t i = 0;
size_t nr = 0;
- for (;;)
+ while (true)
{
size_t begin;
if (i >= nr)
- {
- nr = reader (buf, size);
- if (nr == 0)
- break;
- i = 0;
- }
+ {
+ nr = reader (buf, size);
+ if (nr == 0)
+ break;
+ i = 0;
+ }
begin = i;
if (char_to_squeeze == NOT_A_CHAR)
- {
- size_t out_len;
- /* Here, by being a little tricky, we can get a significant
- performance increase in most cases when the input is
- reasonably large. Since tr will modify the input only
- if two consecutive (and identical) input characters are
- in the squeeze set, we can step by two through the data
- when searching for a character in the squeeze set. This
- means there may be a little more work in a few cases and
- perhaps twice as much work in the worst cases where most
- of the input is removed by squeezing repeats. But most
- uses of this functionality seem to remove less than 20-30%
- of the input. */
- for (; i < nr && !in_squeeze_set[to_uchar (buf[i])]; i += 2)
- continue;
-
- /* There is a special case when i == nr and we've just
- skipped a character (the last one in buf) that is in
- the squeeze set. */
- if (i == nr && in_squeeze_set[to_uchar (buf[i - 1])])
- --i;
-
- if (i >= nr)
- out_len = nr - begin;
- else
- {
- char_to_squeeze = buf[i];
- /* We're about to output buf[begin..i]. */
- out_len = i - begin + 1;
-
- /* But since we stepped by 2 in the loop above,
- out_len may be one too large. */
- if (i > 0 && buf[i - 1] == char_to_squeeze)
- --out_len;
-
- /* Advance i to the index of first character to be
- considered when looking for a char different from
- char_to_squeeze. */
- ++i;
- }
- if (out_len > 0
- && fwrite (&buf[begin], 1, out_len, stdout) != out_len)
- error (EXIT_FAILURE, errno, _("write error"));
- }
+ {
+ size_t out_len;
+ /* Here, by being a little tricky, we can get a significant
+ performance increase in most cases when the input is
+ reasonably large. Since tr will modify the input only
+ if two consecutive (and identical) input characters are
+ in the squeeze set, we can step by two through the data
+ when searching for a character in the squeeze set. This
+ means there may be a little more work in a few cases and
+ perhaps twice as much work in the worst cases where most
+ of the input is removed by squeezing repeats. But most
+ uses of this functionality seem to remove less than 20-30%
+ of the input. */
+ for (; i < nr && !in_squeeze_set[to_uchar (buf[i])]; i += 2)
+ continue;
+
+ /* There is a special case when i == nr and we've just
+ skipped a character (the last one in buf) that is in
+ the squeeze set. */
+ if (i == nr && in_squeeze_set[to_uchar (buf[i - 1])])
+ --i;
+
+ if (i >= nr)
+ out_len = nr - begin;
+ else
+ {
+ char_to_squeeze = buf[i];
+ /* We're about to output buf[begin..i]. */
+ out_len = i - begin + 1;
+
+ /* But since we stepped by 2 in the loop above,
+ out_len may be one too large. */
+ if (i > 0 && buf[i - 1] == char_to_squeeze)
+ --out_len;
+
+ /* Advance i to the index of first character to be
+ considered when looking for a char different from
+ char_to_squeeze. */
+ ++i;
+ }
+ if (out_len > 0
+ && fwrite (&buf[begin], 1, out_len, stdout) != out_len)
+ error (EXIT_FAILURE, errno, _("write error"));
+ }
if (char_to_squeeze != NOT_A_CHAR)
- {
- /* Advance i to index of first char != char_to_squeeze
- (or to nr if all the rest of the characters in this
- buffer are the same as char_to_squeeze). */
- for (; i < nr && buf[i] == char_to_squeeze; i++)
- continue;
- if (i < nr)
- char_to_squeeze = NOT_A_CHAR;
- /* If (i >= nr) we've squeezed the last character in this buffer.
- So now we have to read a new buffer and continue comparing
- characters against char_to_squeeze. */
- }
+ {
+ /* Advance i to index of first char != char_to_squeeze
+ (or to nr if all the rest of the characters in this
+ buffer are the same as char_to_squeeze). */
+ for (; i < nr && buf[i] == char_to_squeeze; i++)
+ continue;
+ if (i < nr)
+ char_to_squeeze = NOT_A_CHAR;
+ /* If (i >= nr) we've squeezed the last character in this buffer.
+ So now we have to read a new buffer and continue comparing
+ characters against char_to_squeeze. */
+ }
}
}
@@ -1606,7 +1664,7 @@ read_and_delete (char *buf, size_t size)
size_t nr = plain_read (buf, size);
if (nr == 0)
- return 0;
+ return 0;
/* This first loop may be a waste of code, but gives much
better performance when no characters are deleted in
@@ -1614,12 +1672,12 @@ read_and_delete (char *buf, size_t size)
of buf[i] into buf[n_saved] when it would be a NOP. */
for (i = 0; i < nr && !in_delete_set[to_uchar (buf[i])]; i++)
- continue;
+ continue;
n_saved = i;
for (++i; i < nr; i++)
- if (!in_delete_set[to_uchar (buf[i])])
- buf[n_saved++] = buf[i];
+ if (!in_delete_set[to_uchar (buf[i])])
+ buf[n_saved++] = buf[i];
}
while (n_saved == 0);
@@ -1628,7 +1686,7 @@ read_and_delete (char *buf, size_t size)
/* Read at most SIZE bytes from stdin into the array BUF. Then
perform the in-place and one-to-one mapping specified by the global
- array `xlate'. Return the number of characters read, or 0 upon EOF. */
+ array 'xlate'. Return the number of characters read, or 0 upon EOF. */
static size_t
read_and_xlate (char *buf, size_t size)
@@ -1644,7 +1702,7 @@ read_and_xlate (char *buf, size_t size)
/* Initialize a boolean membership set, IN_SET, with the character
values obtained by traversing the linked list of constructs S
- using the function `get_next'. IN_SET is expected to have been
+ using the function 'get_next'. IN_SET is expected to have been
initialized to all zeros by the caller. If COMPLEMENT_THIS_SET
is true the resulting set is complemented. */
@@ -1674,7 +1732,7 @@ main (int argc, char **argv)
struct Spec_list *s2 = &buf2;
initialize_main (&argc, &argv);
- program_name = argv[0];
+ set_program_name (argv[0]);
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
@@ -1684,32 +1742,32 @@ main (int argc, char **argv)
while ((c = getopt_long (argc, argv, "+cCdst", long_options, NULL)) != -1)
{
switch (c)
- {
- case 'c':
- case 'C':
- complement = true;
- break;
+ {
+ case 'c':
+ case 'C':
+ complement = true;
+ break;
- case 'd':
- delete = true;
- break;
+ case 'd':
+ delete = true;
+ break;
- case 's':
- squeeze_repeats = true;
- break;
+ case 's':
+ squeeze_repeats = true;
+ break;
- case 't':
- truncate_set1 = true;
- break;
+ case 't':
+ truncate_set1 = true;
+ break;
- case_GETOPT_HELP_CHAR;
+ case_GETOPT_HELP_CHAR;
- case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
- default:
- usage (EXIT_FAILURE);
- break;
- }
+ default:
+ usage (EXIT_FAILURE);
+ break;
+ }
}
non_option_args = argc - optind;
@@ -1720,16 +1778,16 @@ main (int argc, char **argv)
if (non_option_args < min_operands)
{
if (non_option_args == 0)
- error (0, 0, _("missing operand"));
+ error (0, 0, _("missing operand"));
else
- {
- error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
- fprintf (stderr, "%s\n",
- _(squeeze_repeats
- ? ("Two strings must be given when "
- "both deleting and squeezing repeats.")
- : "Two strings must be given when translating."));
- }
+ {
+ error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
+ fprintf (stderr, "%s\n",
+ _(squeeze_repeats
+ ? N_("Two strings must be given when "
+ "both deleting and squeezing repeats.")
+ : N_("Two strings must be given when translating.")));
+ }
usage (EXIT_FAILURE);
}
@@ -1737,34 +1795,36 @@ main (int argc, char **argv)
{
error (0, 0, _("extra operand %s"), quote (argv[optind + max_operands]));
if (non_option_args == 2)
- fprintf (stderr, "%s\n",
- _("Only one string may be given when "
- "deleting without squeezing repeats."));
+ fprintf (stderr, "%s\n",
+ _("Only one string may be given when "
+ "deleting without squeezing repeats."));
usage (EXIT_FAILURE);
}
spec_init (s1);
if (!parse_str (argv[optind], s1))
- exit (EXIT_FAILURE);
+ return EXIT_FAILURE;
if (non_option_args == 2)
{
spec_init (s2);
if (!parse_str (argv[optind + 1], s2))
- exit (EXIT_FAILURE);
+ return EXIT_FAILURE;
}
else
s2 = NULL;
validate (s1, s2);
- /* Use binary I/O, since `tr' is sometimes used to transliterate
+ /* Use binary I/O, since 'tr' is sometimes used to transliterate
non-printable characters, or characters which are stripped away
by text-mode reads (like CR and ^Z). */
if (O_BINARY && ! isatty (STDIN_FILENO))
- freopen (NULL, "rb", stdin);
+ xfreopen (NULL, "rb", stdin);
if (O_BINARY && ! isatty (STDOUT_FILENO))
- freopen (NULL, "wb", stdout);
+ xfreopen (NULL, "wb", stdout);
+
+ fadvise (stdin, FADVISE_SEQUENTIAL);
if (squeeze_repeats && non_option_args == 1)
{
@@ -1775,14 +1835,14 @@ main (int argc, char **argv)
{
set_initialize (s1, complement, in_delete_set);
- for (;;)
- {
- size_t nr = read_and_delete (io_buf, sizeof io_buf);
- if (nr == 0)
- break;
- if (fwrite (io_buf, 1, nr, stdout) != nr)
- error (EXIT_FAILURE, errno, _("write error"));
- }
+ while (true)
+ {
+ size_t nr = read_and_delete (io_buf, sizeof io_buf);
+ if (nr == 0)
+ break;
+ if (fwrite (io_buf, 1, nr, stdout) != nr)
+ error (EXIT_FAILURE, errno, _("write error"));
+ }
}
else if (squeeze_repeats && delete && non_option_args == 2)
{
@@ -1793,104 +1853,95 @@ main (int argc, char **argv)
else if (translating)
{
if (complement)
- {
- int i;
- bool *in_s1 = in_delete_set;
-
- set_initialize (s1, false, in_s1);
- s2->state = BEGIN_STATE;
- for (i = 0; i < N_CHARS; i++)
- xlate[i] = i;
- for (i = 0; i < N_CHARS; i++)
- {
- if (!in_s1[i])
- {
- int ch = get_next (s2, NULL);
- assert (ch != -1 || truncate_set1);
- if (ch == -1)
- {
- /* This will happen when tr is invoked like e.g.
- tr -cs A-Za-z0-9 '\012'. */
- break;
- }
- xlate[i] = ch;
- }
- }
- assert (get_next (s2, NULL) == -1 || truncate_set1);
- }
+ {
+ int i;
+ bool *in_s1 = in_delete_set;
+
+ set_initialize (s1, false, in_s1);
+ s2->state = BEGIN_STATE;
+ for (i = 0; i < N_CHARS; i++)
+ xlate[i] = i;
+ for (i = 0; i < N_CHARS; i++)
+ {
+ if (!in_s1[i])
+ {
+ int ch = get_next (s2, NULL);
+ assert (ch != -1 || truncate_set1);
+ if (ch == -1)
+ {
+ /* This will happen when tr is invoked like e.g.
+ tr -cs A-Za-z0-9 '\012'. */
+ break;
+ }
+ xlate[i] = ch;
+ }
+ }
+ }
else
- {
- int c1, c2;
- int i;
- enum Upper_Lower_class class_s1;
- enum Upper_Lower_class class_s2;
-
- for (i = 0; i < N_CHARS; i++)
- xlate[i] = i;
- s1->state = BEGIN_STATE;
- s2->state = BEGIN_STATE;
- for (;;)
- {
- c1 = get_next (s1, &class_s1);
- c2 = get_next (s2, &class_s2);
-
- /* When constructing the translation array, either one of the
- values returned by paired calls to get_next must be from
- [:upper:] and the other is [:lower:], or neither can be from
- upper or lower. */
-
- if ((class_s1 == UL_NONE) != (class_s2 == UL_NONE))
- error (EXIT_FAILURE, 0,
- _("misaligned [:upper:] and/or [:lower:] construct"));
-
- if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
- {
- for (i = 0; i < N_CHARS; i++)
- if (islower (i))
- xlate[i] = toupper (i);
- }
- else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
- {
- for (i = 0; i < N_CHARS; i++)
- if (isupper (i))
- xlate[i] = tolower (i);
- }
- else if ((class_s1 == UL_LOWER && class_s2 == UL_LOWER)
- || (class_s1 == UL_UPPER && class_s2 == UL_UPPER))
- {
- /* POSIX says the behavior of `tr "[:upper:]" "[:upper:]"'
- is undefined. Treat it as a no-op. */
- }
- else
- {
- /* The following should have been checked by validate... */
- if (c1 == -1 || c2 == -1)
- break;
- xlate[c1] = c2;
- }
- }
- assert (c1 == -1 || truncate_set1);
- }
+ {
+ int c1, c2;
+ int i;
+ enum Upper_Lower_class class_s1;
+ enum Upper_Lower_class class_s2;
+
+ for (i = 0; i < N_CHARS; i++)
+ xlate[i] = i;
+ s1->state = BEGIN_STATE;
+ s2->state = BEGIN_STATE;
+ while (true)
+ {
+ c1 = get_next (s1, &class_s1);
+ c2 = get_next (s2, &class_s2);
+
+ if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
+ {
+ for (i = 0; i < N_CHARS; i++)
+ if (islower (i))
+ xlate[i] = toupper (i);
+ }
+ else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
+ {
+ for (i = 0; i < N_CHARS; i++)
+ if (isupper (i))
+ xlate[i] = tolower (i);
+ }
+ else
+ {
+ /* The following should have been checked by validate... */
+ if (c1 == -1 || c2 == -1)
+ break;
+ xlate[c1] = c2;
+ }
+
+ /* When case-converting, skip the elements as an optimization. */
+ if (class_s2 != UL_NONE)
+ {
+ skip_construct (s1);
+ skip_construct (s2);
+ }
+ }
+ assert (c1 == -1 || truncate_set1);
+ }
if (squeeze_repeats)
- {
- set_initialize (s2, false, in_squeeze_set);
- squeeze_filter (io_buf, sizeof io_buf, read_and_xlate);
- }
+ {
+ set_initialize (s2, false, in_squeeze_set);
+ squeeze_filter (io_buf, sizeof io_buf, read_and_xlate);
+ }
else
- {
- for (;;)
- {
- size_t bytes_read = read_and_xlate (io_buf, sizeof io_buf);
- if (bytes_read == 0)
- break;
- if (fwrite (io_buf, 1, bytes_read, stdout) != bytes_read)
- error (EXIT_FAILURE, errno, _("write error"));
- }
- }
+ {
+ while (true)
+ {
+ size_t bytes_read = read_and_xlate (io_buf, sizeof io_buf);
+ if (bytes_read == 0)
+ break;
+ if (fwrite (io_buf, 1, bytes_read, stdout) != bytes_read)
+ error (EXIT_FAILURE, errno, _("write error"));
+ }
+ }
}
if (close (STDIN_FILENO) != 0)
error (EXIT_FAILURE, errno, _("standard input"));
- exit (EXIT_SUCCESS);
+ return EXIT_SUCCESS;
}