summaryrefslogtreecommitdiff
path: root/src/cut.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cut.c')
-rw-r--r--src/cut.c825
1 files changed, 275 insertions, 550 deletions
diff --git a/src/cut.c b/src/cut.c
index c9b8359..7ab6be4 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -1,11 +1,11 @@
/* cut - remove parts of lines of files
- Copyright (C) 1997-2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
Copyright (C) 1984 David M. Ihnat
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -13,8 +13,7 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation,
- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Written by David Ihnat. */
@@ -32,15 +31,20 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "getndelim2.h"
#include "hash.h"
-#include "quote.h"
#include "xstrndup.h"
-/* The official name of this program (e.g., no `g' prefix). */
+#include "set-fields.h"
+
+/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "cut"
-#define AUTHORS "David Ihnat", "David MacKenzie", "Jim Meyering"
+#define AUTHORS \
+ proper_name ("David M. Ihnat"), \
+ proper_name ("David MacKenzie"), \
+ proper_name ("Jim Meyering")
#define FATAL_ERROR(Message) \
do \
@@ -50,28 +54,12 @@
} \
while (0)
-/* Append LOW, HIGH to the list RP of range pairs, allocating additional
- space if necessary. Update local variable N_RP. When allocating,
- update global variable N_RP_ALLOCATED. */
-
-#define ADD_RANGE_PAIR(rp, low, high) \
- do \
- { \
- if (n_rp >= n_rp_allocated) \
- { \
- (rp) = X2NREALLOC (rp, &n_rp_allocated); \
- } \
- rp[n_rp].lo = (low); \
- rp[n_rp].hi = (high); \
- ++n_rp; \
- } \
- while (0)
-struct range_pair
- {
- size_t lo;
- size_t hi;
- };
+/* Pointer inside RP. When checking if a byte or field is selected
+ by a finite range, we check if it is between CURRENT_RP.LO
+ and CURRENT_RP.HI. If the byte or field index is greater than
+ CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
+static struct field_range_pair *current_rp;
/* This buffer is used to support the semantics of the -s option
(or lack of same) when the specified field list includes (does
@@ -85,26 +73,6 @@ static char *field_1_buffer;
/* The number of bytes allocated for FIELD_1_BUFFER. */
static size_t field_1_bufsize;
-/* The largest field or byte index used as an endpoint of a closed
- or degenerate range specification; this doesn't include the starting
- index of right-open-ended ranges. For example, with either range spec
- `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
-static size_t max_range_endpoint;
-
-/* If nonzero, this is the index of the first field in a range that goes
- to end of line. */
-static size_t eol_range_start;
-
-/* This is a bit vector.
- In byte mode, which bytes to output.
- In field mode, which DELIM-separated fields to output.
- Both bytes and fields are numbered starting with 1,
- so the zeroth bit of this array is unused.
- A field or byte K has been selected if
- (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
- || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
-static unsigned char *printable_field;
-
enum operating_mode
{
undefined_mode,
@@ -112,27 +80,27 @@ enum operating_mode
/* Output characters that are in the given bytes. */
byte_mode,
- /* Output the given delimeter-separated fields. */
+ /* Output the given delimiter-separated fields. */
field_mode
};
-/* The name this program was run with. */
-char *program_name;
-
static enum operating_mode operating_mode;
-/* If true do not output lines containing no delimeter characters.
+/* If true do not output lines containing no delimiter characters.
Otherwise, all such lines are printed. This option is valid only
with field mode. */
static bool suppress_non_delimited;
-/* If nonzero, print all bytes, characters, or fields _except_
+/* If true, print all bytes, characters, or fields _except_
those that were specified. */
static bool complement;
-/* The delimeter character for field mode. */
+/* The delimiter character for field mode. */
static unsigned char delim;
+/* The delimiter for each line/record. */
+static unsigned char line_delim = '\n';
+
/* True if the --output-delimiter=STRING option was specified. */
static bool output_delimiter_specified;
@@ -146,15 +114,6 @@ static char *output_delimiter_string;
/* True if we have ever read standard input. */
static bool have_read_stdin;
-#define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31
-
-/* The set of range-start indices. For example, given a range-spec list like
- `-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15.
- Note that although `4' looks like a range-start index, it is in the middle
- of the `3-5' range, so it doesn't count.
- This table is created/used IFF output_delimiter_specified is set. */
-static Hash_table *range_start_ht;
-
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum
@@ -172,6 +131,7 @@ static struct option const longopts[] =
{"only-delimited", no_argument, NULL, 's'},
{"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION},
{"complement", no_argument, NULL, COMPLEMENT_OPTION},
+ {"zero-terminated", no_argument, NULL, 'z'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
@@ -181,21 +141,20 @@ void
usage (int status)
{
if (status != EXIT_SUCCESS)
- fprintf (stderr, _("Try `%s --help' for more information.\n"),
- program_name);
+ emit_try_help ();
else
{
printf (_("\
-Usage: %s [OPTION]... [FILE]...\n\
+Usage: %s OPTION... [FILE]...\n\
"),
- program_name);
+ program_name);
fputs (_("\
Print selected parts of lines from each FILE to standard output.\n\
-\n\
-"), stdout);
- fputs (_("\
-Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
+
+ emit_stdin_note ();
+ emit_mandatory_arg_note ();
+
fputs (_("\
-b, --bytes=LIST select only these bytes\n\
-c, --characters=LIST select only these characters\n\
@@ -209,13 +168,16 @@ Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
fputs (_("\
--complement complement the set of selected bytes, characters\n\
- or fields.\n\
+ or fields\n\
"), stdout);
fputs (_("\
-s, --only-delimited do not print lines not containing delimiters\n\
--output-delimiter=STRING use STRING as the output delimiter\n\
the default is to use the input delimiter\n\
"), stdout);
+ fputs (_("\
+ -z, --zero-terminated line delimiter is NUL, not newline\n\
+"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
fputs (_("\
@@ -231,295 +193,38 @@ Each range is one of:\n\
N- from N'th byte, character or field, to end of line\n\
N-M from N'th to M'th (included) byte, character or field\n\
-M from first to M'th (included) byte, character or field\n\
-\n\
-With no FILE, or when FILE is -, read standard input.\n\
"), stdout);
- printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+ emit_ancillary_info (PROGRAM_NAME);
}
exit (status);
}
-static inline void
-mark_range_start (size_t i)
-{
- /* Record the fact that `i' is a range-start index. */
- void *ent_from_table = hash_insert (range_start_ht, (void*) i);
- if (ent_from_table == NULL)
- {
- /* Insertion failed due to lack of memory. */
- xalloc_die ();
- }
- assert ((size_t) ent_from_table == i);
-}
-
-static inline void
-mark_printable_field (size_t i)
-{
- size_t n = i / CHAR_BIT;
- printable_field[n] |= (1 << (i % CHAR_BIT));
-}
-
-static inline bool
-is_printable_field (size_t i)
-{
- size_t n = i / CHAR_BIT;
- return (printable_field[n] >> (i % CHAR_BIT)) & 1;
-}
-static size_t
-hash_int (const void *x, size_t tablesize)
-{
-#ifdef UINTPTR_MAX
- uintptr_t y = (uintptr_t) x;
-#else
- size_t y = (size_t) x;
-#endif
- return y % tablesize;
-}
+/* Increment *ITEM_IDX (i.e., a field or byte index),
+ and if required CURRENT_RP. */
-static bool
-hash_compare_ints (void const *x, void const *y)
-{
- return (x == y) ? true : false;
-}
-
-static bool
-is_range_start_index (size_t i)
+static inline void
+next_item (size_t *item_idx)
{
- return hash_lookup (range_start_ht, (void *) i) ? true : false;
+ (*item_idx)++;
+ if ((*item_idx) > current_rp->hi)
+ current_rp++;
}
-/* Return nonzero if the K'th field or byte is printable.
- When returning nonzero, if RANGE_START is non-NULL,
- set *RANGE_START to true if K is the beginning of a range, and to
- false otherwise. */
-
-static bool
-print_kth (size_t k, bool *range_start)
-{
- bool k_selected
- = ((0 < eol_range_start && eol_range_start <= k)
- || (k <= max_range_endpoint && is_printable_field (k)));
-
- bool is_selected = k_selected ^ complement;
- if (range_start && is_selected)
- *range_start = is_range_start_index (k);
+/* Return nonzero if the K'th field or byte is printable. */
- return is_selected;
-}
-
-/* Comparison function for qsort to order the list of
- struct range_pairs. */
-static int
-compare_ranges (const void *a, const void *b)
+static inline bool
+print_kth (size_t k)
{
- int a_start = ((const struct range_pair *) a)->lo;
- int b_start = ((const struct range_pair *) b)->lo;
- return a_start < b_start ? -1 : a_start > b_start;
+ return current_rp->lo <= k;
}
-/* Given the list of field or byte range specifications FIELDSTR, set
- MAX_RANGE_ENDPOINT and allocate and initialize the PRINTABLE_FIELD
- array. If there is a right-open-ended range, set EOL_RANGE_START
- to its starting index. FIELDSTR should be composed of one or more
- numbers or ranges of numbers, separated by blanks or commas.
- Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n'
- through end of line. Return true if FIELDSTR contains at least
- one field specification, false otherwise. */
-
-/* FIXME-someday: What if the user wants to cut out the 1,000,000-th
- field of some huge input file? This function shouldn't have to
- allocate a table of a million bits just so we can test every
- field < 10^6 with an array dereference. Instead, consider using
- an adaptive approach: if the range of selected fields is too large,
- but only a few fields/byte-offsets are actually selected, use a
- hash table. If the range of selected fields is too large, and
- too many are selected, then resort to using the range-pairs (the
- `rp' array) directly. */
+/* Return nonzero if K'th byte is the beginning of a range. */
-static bool
-set_fields (const char *fieldstr)
+static inline bool
+is_range_start_index (size_t k)
{
- size_t initial = 1; /* Value of first number in a range. */
- size_t value = 0; /* If nonzero, a number being accumulated. */
- bool dash_found = false; /* True if a '-' is found in this field. */
- bool field_found = false; /* True if at least one field spec
- has been processed. */
-
- struct range_pair *rp = NULL;
- size_t n_rp = 0;
- size_t n_rp_allocated = 0;
- size_t i;
- bool in_digits = false;
-
- /* Collect and store in RP the range end points.
- It also sets EOL_RANGE_START if appropriate. */
-
- for (;;)
- {
- if (*fieldstr == '-')
- {
- in_digits = false;
- /* Starting a range. */
- if (dash_found)
- FATAL_ERROR (_("invalid byte or field list"));
- dash_found = true;
- fieldstr++;
-
- if (value)
- {
- initial = value;
- value = 0;
- }
- else
- initial = 1;
- }
- else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
- {
- in_digits = false;
- /* Ending the string, or this field/byte sublist. */
- if (dash_found)
- {
- dash_found = false;
-
- /* A range. Possibilites: -n, m-n, n-.
- In any case, `initial' contains the start of the range. */
- if (value == 0)
- {
- /* `n-'. From `initial' to end of line. */
- eol_range_start = initial;
- field_found = true;
- }
- else
- {
- /* `m-n' or `-n' (1-n). */
- if (value < initial)
- FATAL_ERROR (_("invalid byte or field list"));
-
- /* Is there already a range going to end of line? */
- if (eol_range_start != 0)
- {
- /* Yes. Is the new sequence already contained
- in the old one? If so, no processing is
- necessary. */
- if (initial < eol_range_start)
- {
- /* No, the new sequence starts before the
- old. Does the old range going to end of line
- extend into the new range? */
- if (eol_range_start <= value)
- {
- /* Yes. Simply move the end of line marker. */
- eol_range_start = initial;
- }
- else
- {
- /* No. A simple range, before and disjoint from
- the range going to end of line. Fill it. */
- ADD_RANGE_PAIR (rp, initial, value);
- }
-
- /* In any case, some fields were selected. */
- field_found = true;
- }
- }
- else
- {
- /* There is no range going to end of line. */
- ADD_RANGE_PAIR (rp, initial, value);
- field_found = true;
- }
- value = 0;
- }
- }
- else if (value != 0)
- {
- /* A simple field number, not a range. */
- ADD_RANGE_PAIR (rp, value, value);
- value = 0;
- field_found = true;
- }
-
- if (*fieldstr == '\0')
- {
- break;
- }
-
- fieldstr++;
- }
- else if (ISDIGIT (*fieldstr))
- {
- /* Record beginning of digit string, in case we have to
- complain about it. */
- static char const *num_start;
- if (!in_digits || !num_start)
- num_start = fieldstr;
- in_digits = true;
-
- /* Detect overflow. */
- if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t))
- {
- /* In case the user specified -c4294967296,22,
- complain only about the first number. */
- /* Determine the length of the offending number. */
- size_t len = strspn (num_start, "0123456789");
- char *bad_num = xstrndup (num_start, len);
- if (operating_mode == byte_mode)
- error (0, 0,
- _("byte offset %s is too large"), quote (bad_num));
- else
- error (0, 0,
- _("field number %s is too large"), quote (bad_num));
- free (bad_num);
- exit (EXIT_FAILURE);
- }
-
- fieldstr++;
- }
- else
- FATAL_ERROR (_("invalid byte or field list"));
- }
-
- max_range_endpoint = 0;
- for (i = 0; i < n_rp; i++)
- {
- if (rp[i].hi > max_range_endpoint)
- max_range_endpoint = rp[i].hi;
- }
-
- /* Allocate an array large enough so that it may be indexed by
- the field numbers corresponding to all finite ranges
- (i.e. `2-6' or `-4', but not `5-') in FIELDSTR. */
-
- printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
-
- qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
-
- /* Set the array entries corresponding to integers in the ranges of RP. */
- for (i = 0; i < n_rp; i++)
- {
- size_t j;
- size_t rsi_candidate;
-
- /* Record the range-start indices, i.e., record each start
- index that is not part of any other (lo..hi] range. */
- rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
- if (output_delimiter_specified
- && !is_printable_field (rsi_candidate))
- mark_range_start (rsi_candidate);
-
- for (j = rp[i].lo; j <= rp[i].hi; j++)
- mark_printable_field (j);
- }
-
- if (output_delimiter_specified
- && !complement
- && eol_range_start && !is_printable_field (eol_range_start))
- mark_range_start (eol_range_start);
-
- free (rp);
-
- return field_found;
+ return k == current_rp->lo;
}
/* Read from stream STREAM, printing to standard output any selected bytes. */
@@ -534,39 +239,44 @@ cut_bytes (FILE *stream)
byte_idx = 0;
print_delimiter = false;
- while (1)
+ current_rp = frp;
+ while (true)
{
int c; /* Each character from the file. */
c = getc (stream);
- if (c == '\n')
- {
- putchar ('\n');
- byte_idx = 0;
- print_delimiter = false;
- }
+ if (c == line_delim)
+ {
+ putchar (c);
+ byte_idx = 0;
+ print_delimiter = false;
+ current_rp = frp;
+ }
else if (c == EOF)
- {
- if (byte_idx > 0)
- putchar ('\n');
- break;
- }
+ {
+ if (byte_idx > 0)
+ putchar (line_delim);
+ break;
+ }
else
- {
- bool range_start;
- bool *rs = output_delimiter_specified ? &range_start : NULL;
- if (print_kth (++byte_idx, rs))
- {
- if (rs && *rs && print_delimiter)
- {
- fwrite (output_delimiter_string, sizeof (char),
- output_delimiter_length, stdout);
- }
- print_delimiter = true;
- putchar (c);
- }
- }
+ {
+ next_item (&byte_idx);
+ if (print_kth (byte_idx))
+ {
+ if (output_delimiter_specified)
+ {
+ if (print_delimiter && is_range_start_index (byte_idx))
+ {
+ fwrite (output_delimiter_string, sizeof (char),
+ output_delimiter_length, stdout);
+ }
+ print_delimiter = true;
+ }
+
+ putchar (c);
+ }
+ }
}
}
@@ -580,115 +290,138 @@ cut_fields (FILE *stream)
bool found_any_selected_field = false;
bool buffer_first_field;
+ current_rp = frp;
+
c = getc (stream);
if (c == EOF)
return;
ungetc (c, stream);
+ c = 0;
/* To support the semantics of the -s flag, we may have to buffer
- all of the first field to determine whether it is `delimited.'
+ all of the first field to determine whether it is 'delimited.'
But that is unnecessary if all non-delimited lines must be printed
and the first field has been selected, or if non-delimited lines
must be suppressed and the first field has *not* been selected.
That is because a non-delimited line has exactly one field. */
- buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
while (1)
{
if (field_idx == 1 && buffer_first_field)
- {
- ssize_t len;
- size_t n_bytes;
-
- len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
- GETNLINE_NO_LIMIT, delim, '\n', stream);
- if (len < 0)
- {
- free (field_1_buffer);
- field_1_buffer = NULL;
- if (ferror (stream) || feof (stream))
- break;
- xalloc_die ();
- }
-
- n_bytes = len;
- assert (n_bytes != 0);
-
- /* If the first field extends to the end of line (it is not
- delimited) and we are printing all non-delimited lines,
- print this one. */
- if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
- {
- if (suppress_non_delimited)
- {
- /* Empty. */
- }
- else
- {
- fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
- /* Make sure the output line is newline terminated. */
- if (field_1_buffer[n_bytes - 1] != '\n')
- putchar ('\n');
- }
- continue;
- }
- if (print_kth (1, NULL))
- {
- /* Print the field, but not the trailing delimiter. */
- fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
- found_any_selected_field = true;
- }
- ++field_idx;
- }
-
- if (c != EOF)
- {
- if (print_kth (field_idx, NULL))
- {
- if (found_any_selected_field)
- {
- fwrite (output_delimiter_string, sizeof (char),
- output_delimiter_length, stdout);
- }
- found_any_selected_field = true;
-
- while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
- {
- putchar (c);
- }
- }
- else
- {
- while ((c = getc (stream)) != delim && c != '\n' && c != EOF)
- {
- /* Empty. */
- }
- }
- }
-
- if (c == '\n')
- {
- c = getc (stream);
- if (c != EOF)
- {
- ungetc (c, stream);
- c = '\n';
- }
- }
+ {
+ ssize_t len;
+ size_t n_bytes;
+
+ len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0,
+ GETNLINE_NO_LIMIT, delim, line_delim, stream);
+ if (len < 0)
+ {
+ free (field_1_buffer);
+ field_1_buffer = NULL;
+ if (ferror (stream) || feof (stream))
+ break;
+ xalloc_die ();
+ }
+
+ n_bytes = len;
+ assert (n_bytes != 0);
+
+ c = 0;
+
+ /* If the first field extends to the end of line (it is not
+ delimited) and we are printing all non-delimited lines,
+ print this one. */
+ if (to_uchar (field_1_buffer[n_bytes - 1]) != delim)
+ {
+ if (suppress_non_delimited)
+ {
+ /* Empty. */
+ }
+ else
+ {
+ fwrite (field_1_buffer, sizeof (char), n_bytes, stdout);
+ /* Make sure the output line is newline terminated. */
+ if (field_1_buffer[n_bytes - 1] != line_delim)
+ putchar (line_delim);
+ c = line_delim;
+ }
+ continue;
+ }
+ if (print_kth (1))
+ {
+ /* Print the field, but not the trailing delimiter. */
+ fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
+
+ /* With -d$'\n' don't treat the last '\n' as a delimiter. */
+ if (delim == line_delim)
+ {
+ int last_c = getc (stream);
+ if (last_c != EOF)
+ {
+ ungetc (last_c, stream);
+ found_any_selected_field = true;
+ }
+ }
+ else
+ found_any_selected_field = true;
+ }
+ next_item (&field_idx);
+ }
+
+ int prev_c = c;
+
+ if (print_kth (field_idx))
+ {
+ if (found_any_selected_field)
+ {
+ fwrite (output_delimiter_string, sizeof (char),
+ output_delimiter_length, stdout);
+ }
+ found_any_selected_field = true;
+
+ while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
+ {
+ putchar (c);
+ prev_c = c;
+ }
+ }
+ else
+ {
+ while ((c = getc (stream)) != delim && c != line_delim && c != EOF)
+ {
+ prev_c = c;
+ }
+ }
+
+ /* With -d$'\n' don't treat the last '\n' as a delimiter. */
+ if (delim == line_delim && c == delim)
+ {
+ int last_c = getc (stream);
+ if (last_c != EOF)
+ ungetc (last_c, stream);
+ else
+ c = last_c;
+ }
if (c == delim)
- ++field_idx;
- else if (c == '\n' || c == EOF)
- {
- if (found_any_selected_field
- || !(suppress_non_delimited && field_idx == 1))
- putchar ('\n');
- if (c == EOF)
- break;
- field_idx = 1;
- found_any_selected_field = false;
- }
+ next_item (&field_idx);
+ else if (c == line_delim || c == EOF)
+ {
+ if (found_any_selected_field
+ || !(suppress_non_delimited && field_idx == 1))
+ {
+ if (c == line_delim || prev_c != line_delim
+ || delim == line_delim)
+ putchar (line_delim);
+ }
+ if (c == EOF)
+ break;
+ field_idx = 1;
+ current_rp = frp;
+ found_any_selected_field = false;
+ }
}
}
@@ -718,24 +451,26 @@ cut_file (char const *file)
{
stream = fopen (file, "r");
if (stream == NULL)
- {
- error (0, errno, "%s", file);
- return false;
- }
+ {
+ error (0, errno, "%s", quotef (file));
+ return false;
+ }
}
+ fadvise (stream, FADVISE_SEQUENTIAL);
+
cut_stream (stream);
if (ferror (stream))
{
- error (0, errno, "%s", file);
+ error (0, errno, "%s", quotef (file));
return false;
}
if (STREQ (file, "-"))
clearerr (stream); /* Also clear EOF. */
else if (fclose (stream) == EOF)
{
- error (0, errno, "%s", file);
+ error (0, errno, "%s", quotef (file));
return false;
}
return true;
@@ -747,10 +482,10 @@ main (int argc, char **argv)
int optc;
bool ok;
bool delim_specified = false;
- char *spec_list_string IF_LINT(= NULL);
+ char *spec_list_string IF_LINT ( = NULL);
initialize_main (&argc, &argv);
- program_name = argv[0];
+ set_program_name (argv[0]);
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
@@ -765,69 +500,73 @@ main (int argc, char **argv)
delim = '\0';
have_read_stdin = false;
- while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, NULL)) != -1)
+ while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1)
{
switch (optc)
- {
- case 'b':
- case 'c':
- /* Build the byte list. */
- if (operating_mode != undefined_mode)
- FATAL_ERROR (_("only one type of list may be specified"));
- operating_mode = byte_mode;
- spec_list_string = optarg;
- break;
-
- case 'f':
- /* Build the field list. */
- if (operating_mode != undefined_mode)
- FATAL_ERROR (_("only one type of list may be specified"));
- operating_mode = field_mode;
- spec_list_string = optarg;
- break;
-
- case 'd':
- /* New delimiter. */
- /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
- if (optarg[0] != '\0' && optarg[1] != '\0')
- FATAL_ERROR (_("the delimiter must be a single character"));
- delim = optarg[0];
- delim_specified = true;
- break;
-
- case OUTPUT_DELIMITER_OPTION:
- output_delimiter_specified = true;
- /* Interpret --output-delimiter='' to mean
- `use the NUL byte as the delimiter.' */
- output_delimiter_length = (optarg[0] == '\0'
- ? 1 : strlen (optarg));
- output_delimiter_string = xstrdup (optarg);
- break;
-
- case 'n':
- break;
-
- case 's':
- suppress_non_delimited = true;
- break;
-
- case COMPLEMENT_OPTION:
- complement = true;
- break;
-
- case_GETOPT_HELP_CHAR;
-
- case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
-
- default:
- usage (EXIT_FAILURE);
- }
+ {
+ case 'b':
+ case 'c':
+ /* Build the byte list. */
+ if (operating_mode != undefined_mode)
+ FATAL_ERROR (_("only one type of list may be specified"));
+ operating_mode = byte_mode;
+ spec_list_string = optarg;
+ break;
+
+ case 'f':
+ /* Build the field list. */
+ if (operating_mode != undefined_mode)
+ FATAL_ERROR (_("only one type of list may be specified"));
+ operating_mode = field_mode;
+ spec_list_string = optarg;
+ break;
+
+ case 'd':
+ /* New delimiter. */
+ /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
+ if (optarg[0] != '\0' && optarg[1] != '\0')
+ FATAL_ERROR (_("the delimiter must be a single character"));
+ delim = optarg[0];
+ delim_specified = true;
+ break;
+
+ case OUTPUT_DELIMITER_OPTION:
+ output_delimiter_specified = true;
+ /* Interpret --output-delimiter='' to mean
+ 'use the NUL byte as the delimiter.' */
+ output_delimiter_length = (optarg[0] == '\0'
+ ? 1 : strlen (optarg));
+ output_delimiter_string = xstrdup (optarg);
+ break;
+
+ case 'n':
+ break;
+
+ case 's':
+ suppress_non_delimited = true;
+ break;
+
+ case 'z':
+ line_delim = '\0';
+ break;
+
+ case COMPLEMENT_OPTION:
+ complement = true;
+ break;
+
+ case_GETOPT_HELP_CHAR;
+
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+ default:
+ usage (EXIT_FAILURE);
+ }
}
if (operating_mode == undefined_mode)
FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
- if (delim != '\0' && operating_mode != field_mode)
+ if (delim_specified && operating_mode != field_mode)
FATAL_ERROR (_("an input delimiter may be specified only\
when operating on fields"));
@@ -835,23 +574,9 @@ main (int argc, char **argv)
FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
\tonly when operating on fields"));
- if (output_delimiter_specified)
- {
- range_start_ht = hash_initialize (HT_RANGE_START_INDEX_INITIAL_CAPACITY,
- NULL, hash_int,
- hash_compare_ints, NULL);
- if (range_start_ht == NULL)
- xalloc_die ();
-
- }
-
- if (! set_fields (spec_list_string))
- {
- if (operating_mode == field_mode)
- FATAL_ERROR (_("missing list of fields"));
- else
- FATAL_ERROR (_("missing list of positions"));
- }
+ set_fields (spec_list_string,
+ ( (operating_mode == field_mode) ? 0 : SETFLD_ERRMSG_USE_POS)
+ | (complement ? SETFLD_COMPLEMENT : 0) );
if (!delim_specified)
delim = '\t';
@@ -871,8 +596,6 @@ main (int argc, char **argv)
for (ok = true; optind < argc; optind++)
ok &= cut_file (argv[optind]);
- if (range_start_ht)
- hash_free (range_start_ht);
if (have_read_stdin && fclose (stdin) == EOF)
{
@@ -880,5 +603,7 @@ main (int argc, char **argv)
ok = false;
}
- exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
+ IF_LINT (reset_fields ());
+
+ return ok ? EXIT_SUCCESS : EXIT_FAILURE;
}