diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
commit | 70e9163c9c18e995515598085cb824e554eb7ae7 (patch) | |
tree | a42dc8b2a6c031354bf31472de888bfc8a060132 /src/join.c | |
parent | cbf5993c43f49281173f185863577d86bfac6eae (diff) | |
download | coreutils-tarball-master.tar.gz |
coreutils-8.25HEADcoreutils-8.25master
Diffstat (limited to 'src/join.c')
-rw-r--r-- | src/join.c | 1043 |
1 files changed, 653 insertions, 390 deletions
@@ -1,10 +1,10 @@ /* join - join lines of two files on a common field - Copyright (C) 91, 1995-2006 Free Software Foundation, Inc. + Copyright (C) 1991-2016 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -12,8 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + along with this program. If not, see <http://www.gnu.org/licenses/>. Written by Mike Haertel, mike@gnu.ai.mit.edu. */ @@ -25,6 +24,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "hard-locale.h" #include "linebuffer.h" #include "memcasecmp.h" @@ -32,14 +32,21 @@ #include "stdio--.h" #include "xmemcoll.h" #include "xstrtol.h" +#include "argmatch.h" -/* The official name of this program (e.g., no `g' prefix). */ +/* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "join" -#define AUTHORS "Mike Haertel" +#define AUTHORS proper_name ("Mike Haertel") #define join system_join +#define SWAPLINES(a, b) do { \ + struct line *tmp = a; \ + a = b; \ + b = tmp; \ +} while (0); + /* An element of the list identifying which fields to print for each output line. */ struct outlist @@ -65,8 +72,8 @@ struct field struct line { struct linebuffer buf; /* The line itself. */ - size_t nfields; /* Number of elements in `fields'. */ - size_t nfields_allocated; /* Number of elements allocated for `fields'. */ + size_t nfields; /* Number of elements in 'fields'. */ + size_t nfields_allocated; /* Number of elements allocated for 'fields'. */ struct field *fields; }; @@ -74,13 +81,24 @@ struct line same join field value. */ struct seq { - size_t count; /* Elements used in `lines'. */ - size_t alloc; /* Elements allocated in `lines'. */ - struct line *lines; + size_t count; /* Elements used in 'lines'. */ + size_t alloc; /* Elements allocated in 'lines'. */ + struct line **lines; }; -/* The name this program was run with. */ -char *program_name; +/* The previous line read from each file. */ +static struct line *prevline[2] = {NULL, NULL}; + +/* The number of lines read from each file. */ +static uintmax_t line_no[2] = {0, 0}; + +/* The input file names. */ +static char *g_names[2]; + +/* This provides an extra line buffer for each file. We need these if we + try to read two consecutive lines into the same buffer, since we don't + want to overwrite the previous buffer before we check order. */ +static struct line *spareline[2] = {NULL, NULL}; /* True if the LC_COLLATE locale is hard. */ static bool hard_LC_COLLATE; @@ -91,9 +109,22 @@ static bool print_unpairables_1, print_unpairables_2; /* If nonzero, print pairable lines. */ static bool print_pairables; +/* If nonzero, we have seen at least one unpairable line. */ +static bool seen_unpairable; + +/* If nonzero, we have warned about disorder in that file. */ +static bool issued_disorder_warning[2]; + /* Empty output field filler. */ static char const *empty_filler; +/* Whether to ensure the same number of fields are output from each line. */ +static bool autoformat; +/* The number of fields to output for each line. + Only significant when autoformat is true. */ +static size_t autocount_1; +static size_t autocount_2; + /* Field to join on; SIZE_MAX means they haven't been determined yet. */ static size_t join_field_1 = SIZE_MAX; static size_t join_field_2 = SIZE_MAX; @@ -101,7 +132,7 @@ static size_t join_field_2 = SIZE_MAX; /* List of fields to print. */ static struct outlist outlist_head; -/* Last element in `outlist', where a new element can be added. */ +/* Last element in 'outlist', where a new element can be added. */ static struct outlist *outlist_end = &outlist_head; /* Tab character separating fields. If negative, fields are separated @@ -109,9 +140,29 @@ static struct outlist *outlist_end = &outlist_head; tab character whose value (when cast to unsigned char) equals TAB. */ static int tab = -1; +/* If nonzero, check that the input is correctly ordered. */ +static enum + { + CHECK_ORDER_DEFAULT, + CHECK_ORDER_ENABLED, + CHECK_ORDER_DISABLED + } check_input_order; + +enum +{ + CHECK_ORDER_OPTION = CHAR_MAX + 1, + NOCHECK_ORDER_OPTION, + HEADER_LINE_OPTION +}; + + static struct option const longopts[] = { {"ignore-case", no_argument, NULL, 'i'}, + {"check-order", no_argument, NULL, CHECK_ORDER_OPTION}, + {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION}, + {"zero-terminated", no_argument, NULL, 'z'}, + {"header", no_argument, NULL, HEADER_LINE_OPTION}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} @@ -123,30 +174,42 @@ static struct line uni_blank; /* If nonzero, ignore case when comparing join fields. */ static bool ignore_case; +/* If nonzero, treat the first line of each file as column headers -- + join them without checking for ordering */ +static bool join_header_lines; + +/* The character marking end of line. Default to \n. */ +static char eolchar = '\n'; + void usage (int status) { if (status != EXIT_SUCCESS) - fprintf (stderr, _("Try `%s --help' for more information.\n"), - program_name); + emit_try_help (); else { printf (_("\ Usage: %s [OPTION]... FILE1 FILE2\n\ "), - program_name); + program_name); fputs (_("\ For each pair of input lines with identical join fields, write a line to\n\ -standard output. The default join field is the first, delimited\n\ -by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\ +standard output. The default join field is the first, delimited by blanks.\ \n\ - -a FILENUM print unpairable lines coming from file FILENUM, where\n\ +"), stdout); + fputs (_("\ +\n\ +When FILE1 or FILE2 (not both) is -, read standard input.\n\ +"), stdout); + fputs (_("\ +\n\ + -a FILENUM also print unpairable lines from file FILENUM, where\n\ FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\ -e EMPTY replace missing input fields with EMPTY\n\ "), stdout); fputs (_("\ -i, --ignore-case ignore differences in case when comparing fields\n\ - -j FIELD equivalent to `-1 FIELD -2 FIELD'\n\ + -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\ -o FORMAT obey FORMAT while constructing output line\n\ -t CHAR use CHAR as input and output field separator\n\ "), stdout); @@ -154,6 +217,14 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\ -v FILENUM like -a FILENUM, but suppress joined output lines\n\ -1 FIELD join on this FIELD of file 1\n\ -2 FIELD join on this FIELD of file 2\n\ + --check-order check that the input is correctly sorted, even\n\ + if all input lines are pairable\n\ + --nocheck-order do not check that the input is correctly sorted\n\ + --header treat the first line in each file as field headers,\n\ + print them without trying to pair them\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); @@ -162,14 +233,19 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\ Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\ else fields are separated by CHAR. Any FIELD is a field number counted\n\ from 1. FORMAT is one or more comma or blank separated specifications,\n\ -each being `FILENUM.FIELD' or `0'. Default FORMAT outputs the join field,\n\ +each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\ the remaining fields from FILE1, the remaining fields from FILE2, all\n\ -separated by CHAR.\n\ +separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\ +line of each file determines the number of fields output for each line.\n\ \n\ Important: FILE1 and FILE2 must be sorted on the join fields.\n\ -E.g., use `sort -k 1b,1' if `join' has no options.\n\ +E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\ +or use \"join -t ''\" if 'sort' has no options.\n\ +Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\ +If the input is not sorted and some lines cannot be joined, a\n\ +warning message will be given.\n\ "), stdout); - printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); + emit_ancillary_info (PROGRAM_NAME); } exit (status); } @@ -188,7 +264,7 @@ extract_field (struct line *line, char *field, size_t len) ++(line->nfields); } -/* Fill in the `fields' structure in LINE. */ +/* Fill in the 'fields' structure in LINE. */ static void xfields (struct line *line) @@ -199,108 +275,55 @@ xfields (struct line *line) if (ptr == lim) return; - if (0 <= tab) + if (0 <= tab && tab != '\n') { char *sep; for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1) - extract_field (line, ptr, sep - ptr); + extract_field (line, ptr, sep - ptr); } - else + else if (tab < 0) { /* Skip leading blanks before the first field. */ - while (isblank (to_uchar (*ptr))) - if (++ptr == lim) - return; + while (field_sep (*ptr)) + if (++ptr == lim) + return; do - { - char *sep; - for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++) - continue; - extract_field (line, ptr, sep - ptr); - if (sep == lim) - return; - for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++) - continue; - } + { + char *sep; + for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++) + continue; + extract_field (line, ptr, sep - ptr); + if (sep == lim) + return; + for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++) + continue; + } while (ptr != lim); } extract_field (line, ptr, lim - ptr); } -/* Read a line from FP into LINE and split it into fields. - Return true if successful. */ - -static bool -get_line (FILE *fp, struct line *line) -{ - initbuffer (&line->buf); - - if (! readlinebuffer (&line->buf, fp)) - { - if (ferror (fp)) - error (EXIT_FAILURE, errno, _("read error")); - free (line->buf.buffer); - line->buf.buffer = NULL; - return false; - } - - line->nfields_allocated = 0; - line->nfields = 0; - line->fields = NULL; - xfields (line); - return true; -} - static void freeline (struct line *line) { + if (line == NULL) + return; free (line->fields); + line->fields = NULL; free (line->buf.buffer); line->buf.buffer = NULL; } -static void -initseq (struct seq *seq) -{ - seq->count = 0; - seq->alloc = 0; - seq->lines = NULL; -} - -/* Read a line from FP and add it to SEQ. Return true if successful. */ - -static bool -getseq (FILE *fp, struct seq *seq) -{ - if (seq->count == seq->alloc) - seq->lines = X2NREALLOC (seq->lines, &seq->alloc); - - if (get_line (fp, &seq->lines[seq->count])) - { - ++seq->count; - return true; - } - return false; -} - -static void -delseq (struct seq *seq) -{ - size_t i; - for (i = 0; i < seq->count; i++) - if (seq->lines[i].buf.buffer) - freeline (&seq->lines[i]); - free (seq->lines); -} - /* Return <0 if the join field in LINE1 compares less than the one in LINE2; >0 if it compares greater; 0 if it compares equal. - Report an error and exit if the comparison fails. */ + Report an error and exit if the comparison fails. + Use join fields JF_1 and JF_2 respectively. */ static int -keycmp (struct line const *line1, struct line const *line2) +keycmp (struct line const *line1, struct line const *line2, + size_t jf_1, size_t jf_2) { /* Start of field to compare in each file. */ char *beg1; @@ -310,10 +333,10 @@ keycmp (struct line const *line1, struct line const *line2) size_t len2; /* Length of fields to compare. */ int diff; - if (join_field_1 < line1->nfields) + if (jf_1 < line1->nfields) { - beg1 = line1->fields[join_field_1].beg; - len1 = line1->fields[join_field_1].len; + beg1 = line1->fields[jf_1].beg; + len1 = line1->fields[jf_1].len; } else { @@ -321,10 +344,10 @@ keycmp (struct line const *line1, struct line const *line2) len1 = 0; } - if (join_field_2 < line2->nfields) + if (jf_2 < line2->nfields) { - beg2 = line2->fields[join_field_2].beg; - len2 = line2->fields[join_field_2].len; + beg2 = line2->fields[jf_2].beg; + len2 = line2->fields[jf_2].len; } else { @@ -346,7 +369,7 @@ keycmp (struct line const *line1, struct line const *line2) else { if (hard_LC_COLLATE) - return xmemcoll (beg1, len1, beg2, len2); + return xmemcoll (beg1, len1, beg2, len2); diff = memcmp (beg1, beg2, MIN (len1, len2)); } @@ -355,8 +378,174 @@ keycmp (struct line const *line1, struct line const *line2) return len1 < len2 ? -1 : len1 != len2; } +/* Check that successive input lines PREV and CURRENT from input file + WHATFILE are presented in order, unless the user may be relying on + the GNU extension that input lines may be out of order if no input + lines are unpairable. + + If the user specified --nocheck-order, the check is not made. + If the user specified --check-order, the problem is fatal. + Otherwise (the default), the message is simply a warning. + + A message is printed at most once per input file. */ + +static void +check_order (const struct line *prev, + const struct line *current, + int whatfile) +{ + if (check_input_order != CHECK_ORDER_DISABLED + && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable)) + { + if (!issued_disorder_warning[whatfile-1]) + { + size_t join_field = whatfile == 1 ? join_field_1 : join_field_2; + if (keycmp (prev, current, join_field, join_field) > 0) + { + /* Exclude any trailing newline. */ + size_t len = current->buf.length; + if (0 < len && current->buf.buffer[len - 1] == '\n') + --len; + + /* If the offending line is longer than INT_MAX, output + only the first INT_MAX bytes in this diagnostic. */ + len = MIN (INT_MAX, len); + + error ((check_input_order == CHECK_ORDER_ENABLED + ? EXIT_FAILURE : 0), + 0, _("%s:%"PRIuMAX": is not sorted: %.*s"), + g_names[whatfile - 1], line_no[whatfile - 1], + (int) len, current->buf.buffer); + + /* If we get to here, the message was merely a warning. + Arrange to issue it only once per file. */ + issued_disorder_warning[whatfile-1] = true; + } + } + } +} + +static inline void +reset_line (struct line *line) +{ + line->nfields = 0; +} + +static struct line * +init_linep (struct line **linep) +{ + struct line *line = xcalloc (1, sizeof *line); + *linep = line; + return line; +} + +/* Read a line from FP into LINE and split it into fields. + Return true if successful. */ + +static bool +get_line (FILE *fp, struct line **linep, int which) +{ + struct line *line = *linep; + + if (line == prevline[which - 1]) + { + SWAPLINES (line, spareline[which - 1]); + *linep = line; + } + + if (line) + reset_line (line); + else + line = init_linep (linep); + + if (! readlinebuffer_delim (&line->buf, fp, eolchar)) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("read error")); + freeline (line); + return false; + } + ++line_no[which - 1]; + + xfields (line); + + if (prevline[which - 1]) + check_order (prevline[which - 1], line, which); + + prevline[which - 1] = line; + return true; +} + +static void +free_spareline (void) +{ + size_t i; + + for (i = 0; i < ARRAY_CARDINALITY (spareline); i++) + { + if (spareline[i]) + { + freeline (spareline[i]); + free (spareline[i]); + } + } +} + +static void +initseq (struct seq *seq) +{ + seq->count = 0; + seq->alloc = 0; + seq->lines = NULL; +} + +/* Read a line from FP and add it to SEQ. Return true if successful. */ + +static bool +getseq (FILE *fp, struct seq *seq, int whichfile) +{ + if (seq->count == seq->alloc) + { + size_t i; + seq->lines = X2NREALLOC (seq->lines, &seq->alloc); + for (i = seq->count; i < seq->alloc; i++) + seq->lines[i] = NULL; + } + + if (get_line (fp, &seq->lines[seq->count], whichfile)) + { + ++seq->count; + return true; + } + return false; +} + +/* Read a line from FP and add it to SEQ, as the first item if FIRST is + true, else as the next. */ +static bool +advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile) +{ + if (first) + seq->count = 0; + + return getseq (fp, seq, whichfile); +} + +static void +delseq (struct seq *seq) +{ + size_t i; + for (i = 0; i < seq->alloc; i++) + { + freeline (seq->lines[i]); + free (seq->lines[i]); + } + free (seq->lines); +} + + /* Print field N of LINE if it exists and is nonempty, otherwise - `empty_filler' if it is nonempty. */ + 'empty_filler' if it is nonempty. */ static void prfield (size_t n, struct line const *line) @@ -367,14 +556,35 @@ prfield (size_t n, struct line const *line) { len = line->fields[n].len; if (len) - fwrite (line->fields[n].beg, 1, len, stdout); + fwrite (line->fields[n].beg, 1, len, stdout); else if (empty_filler) - fputs (empty_filler, stdout); + fputs (empty_filler, stdout); } else if (empty_filler) fputs (empty_filler, stdout); } +/* Output all the fields in line, other than the join field. */ + +static void +prfields (struct line const *line, size_t join_field, size_t autocount) +{ + size_t i; + size_t nfields = autoformat ? autocount : line->nfields; + char output_separator = tab < 0 ? ' ' : tab; + + for (i = 0; i < join_field && i < nfields; ++i) + { + putchar (output_separator); + prfield (i, line); + } + for (i = join_field + 1; i < nfields; ++i) + { + putchar (output_separator); + prfield (i, line); + } +} + /* Print the join of LINE1 and LINE2. */ static void @@ -382,6 +592,8 @@ prjoin (struct line const *line1, struct line const *line2) { const struct outlist *outlist; char output_separator = tab < 0 ? ' ' : tab; + size_t field; + struct line const *line; outlist = outlist_head.next; if (outlist) @@ -390,70 +602,54 @@ prjoin (struct line const *line1, struct line const *line2) o = outlist; while (1) - { - size_t field; - struct line const *line; - - if (o->file == 0) - { - if (line1 == &uni_blank) - { - line = line2; - field = join_field_2; - } - else - { - line = line1; - field = join_field_1; - } - } - else - { - line = (o->file == 1 ? line1 : line2); - field = o->field; - } - prfield (field, line); - o = o->next; - if (o == NULL) - break; - putchar (output_separator); - } - putchar ('\n'); + { + if (o->file == 0) + { + if (line1 == &uni_blank) + { + line = line2; + field = join_field_2; + } + else + { + line = line1; + field = join_field_1; + } + } + else + { + line = (o->file == 1 ? line1 : line2); + field = o->field; + } + prfield (field, line); + o = o->next; + if (o == NULL) + break; + putchar (output_separator); + } + putchar (eolchar); } else { - size_t i; - if (line1 == &uni_blank) - { - struct line const *t; - t = line1; - line1 = line2; - line2 = t; - } - prfield (join_field_1, line1); - for (i = 0; i < join_field_1 && i < line1->nfields; ++i) - { - putchar (output_separator); - prfield (i, line1); - } - for (i = join_field_1 + 1; i < line1->nfields; ++i) - { - putchar (output_separator); - prfield (i, line1); - } - - for (i = 0; i < join_field_2 && i < line2->nfields; ++i) - { - putchar (output_separator); - prfield (i, line2); - } - for (i = join_field_2 + 1; i < line2->nfields; ++i) - { - putchar (output_separator); - prfield (i, line2); - } - putchar ('\n'); + { + line = line2; + field = join_field_2; + } + else + { + line = line1; + field = join_field_1; + } + + /* Output the join field. */ + prfield (field, line); + + /* Output other fields. */ + prfields (line1, join_field_1, autocount_1); + prfields (line2, join_field_2, autocount_2); + + putchar (eolchar); } } @@ -463,121 +659,161 @@ static void join (FILE *fp1, FILE *fp2) { struct seq seq1, seq2; - struct line line; int diff; bool eof1, eof2; + fadvise (fp1, FADVISE_SEQUENTIAL); + fadvise (fp2, FADVISE_SEQUENTIAL); + /* Read the first line of each file. */ initseq (&seq1); - getseq (fp1, &seq1); + getseq (fp1, &seq1, 1); initseq (&seq2); - getseq (fp2, &seq2); + getseq (fp2, &seq2, 2); + + if (autoformat) + { + autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0; + autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0; + } + + if (join_header_lines && (seq1.count || seq2.count)) + { + struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank; + struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank; + prjoin (hline1, hline2); + prevline[0] = NULL; + prevline[1] = NULL; + if (seq1.count) + advance_seq (fp1, &seq1, true, 1); + if (seq2.count) + advance_seq (fp2, &seq2, true, 2); + } while (seq1.count && seq2.count) { size_t i; - diff = keycmp (&seq1.lines[0], &seq2.lines[0]); + diff = keycmp (seq1.lines[0], seq2.lines[0], + join_field_1, join_field_2); if (diff < 0) - { - if (print_unpairables_1) - prjoin (&seq1.lines[0], &uni_blank); - freeline (&seq1.lines[0]); - seq1.count = 0; - getseq (fp1, &seq1); - continue; - } + { + if (print_unpairables_1) + prjoin (seq1.lines[0], &uni_blank); + advance_seq (fp1, &seq1, true, 1); + seen_unpairable = true; + continue; + } if (diff > 0) - { - if (print_unpairables_2) - prjoin (&uni_blank, &seq2.lines[0]); - freeline (&seq2.lines[0]); - seq2.count = 0; - getseq (fp2, &seq2); - continue; - } + { + if (print_unpairables_2) + prjoin (&uni_blank, seq2.lines[0]); + advance_seq (fp2, &seq2, true, 2); + seen_unpairable = true; + continue; + } /* Keep reading lines from file1 as long as they continue to match the current line from file2. */ eof1 = false; do - if (!getseq (fp1, &seq1)) - { - eof1 = true; - ++seq1.count; - break; - } - while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0])); + if (!advance_seq (fp1, &seq1, false, 1)) + { + eof1 = true; + ++seq1.count; + break; + } + while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0], + join_field_1, join_field_2)); /* Keep reading lines from file2 as long as they continue to match the current line from file1. */ eof2 = false; do - if (!getseq (fp2, &seq2)) - { - eof2 = true; - ++seq2.count; - break; - } - while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1])); + if (!advance_seq (fp2, &seq2, false, 2)) + { + eof2 = true; + ++seq2.count; + break; + } + while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1], + join_field_1, join_field_2)); if (print_pairables) - { - for (i = 0; i < seq1.count - 1; ++i) - { - size_t j; - for (j = 0; j < seq2.count - 1; ++j) - prjoin (&seq1.lines[i], &seq2.lines[j]); - } - } - - for (i = 0; i < seq1.count - 1; ++i) - freeline (&seq1.lines[i]); + { + for (i = 0; i < seq1.count - 1; ++i) + { + size_t j; + for (j = 0; j < seq2.count - 1; ++j) + prjoin (seq1.lines[i], seq2.lines[j]); + } + } + if (!eof1) - { - seq1.lines[0] = seq1.lines[seq1.count - 1]; - seq1.count = 1; - } + { + SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]); + seq1.count = 1; + } else - seq1.count = 0; + seq1.count = 0; - for (i = 0; i < seq2.count - 1; ++i) - freeline (&seq2.lines[i]); if (!eof2) - { - seq2.lines[0] = seq2.lines[seq2.count - 1]; - seq2.count = 1; - } + { + SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]); + seq2.count = 1; + } else - seq2.count = 0; + seq2.count = 0; } - if (print_unpairables_1 && seq1.count) + /* If the user did not specify --nocheck-order, then we read the + tail ends of both inputs to verify that they are in order. We + skip the rest of the tail once we have issued a warning for that + file, unless we actually need to print the unpairable lines. */ + struct line *line = NULL; + bool checktail = false; + + if (check_input_order != CHECK_ORDER_DISABLED + && !(issued_disorder_warning[0] && issued_disorder_warning[1])) + checktail = true; + + if ((print_unpairables_1 || checktail) && seq1.count) { - prjoin (&seq1.lines[0], &uni_blank); - freeline (&seq1.lines[0]); - while (get_line (fp1, &line)) - { - prjoin (&line, &uni_blank); - freeline (&line); - } + if (print_unpairables_1) + prjoin (seq1.lines[0], &uni_blank); + if (seq2.count) + seen_unpairable = true; + while (get_line (fp1, &line, 1)) + { + if (print_unpairables_1) + prjoin (line, &uni_blank); + if (issued_disorder_warning[0] && !print_unpairables_1) + break; + } } - if (print_unpairables_2 && seq2.count) + if ((print_unpairables_2 || checktail) && seq2.count) { - prjoin (&uni_blank, &seq2.lines[0]); - freeline (&seq2.lines[0]); - while (get_line (fp2, &line)) - { - prjoin (&uni_blank, &line); - freeline (&line); - } + if (print_unpairables_2) + prjoin (&uni_blank, seq2.lines[0]); + if (seq1.count) + seen_unpairable = true; + while (get_line (fp2, &line, 2)) + { + if (print_unpairables_2) + prjoin (&uni_blank, line); + if (issued_disorder_warning[1] && !print_unpairables_2) + break; + } } + freeline (line); + free (line); + delseq (&seq1); delseq (&seq2); } -/* Add a field spec for field FIELD of file FILE to `outlist'. */ +/* Add a field spec for field FIELD of file FILE to 'outlist'. */ static void add_field (int file, size_t field) @@ -634,9 +870,9 @@ decode_field_spec (const char *s, int *file_index, size_t *field_index) case '0': if (s[1]) { - /* `0' must be all alone -- no `.FIELD'. */ - error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); - } + /* '0' must be all alone -- no '.FIELD'. */ + error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); + } *file_index = 0; *field_index = 0; break; @@ -644,25 +880,25 @@ decode_field_spec (const char *s, int *file_index, size_t *field_index) case '1': case '2': if (s[1] != '.') - error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); + error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); *file_index = s[0] - '0'; *field_index = string_to_join_field (s + 2); break; default: error (EXIT_FAILURE, 0, - _("invalid file number in field spec: %s"), quote (s)); + _("invalid file number in field spec: %s"), quote (s)); /* Tell gcc -W -Wall that we can't get beyond this point. - This avoids a warning (otherwise legit) that the caller's copies - of *file_index and *field_index might be used uninitialized. */ + This avoids a warning (otherwise legit) that the caller's copies + of *file_index and *field_index might be used uninitialized. */ abort (); break; } } -/* Add the comma or blank separated field spec(s) in STR to `outlist'. */ +/* Add the comma or blank separated field spec(s) in STR to 'outlist'. */ static void add_field_list (char *str) @@ -695,7 +931,7 @@ set_join_field (size_t *var, size_t val) unsigned long int var1 = *var + 1; unsigned long int val1 = val + 1; error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"), - var1, val1); + var1, val1); } *var = val; } @@ -723,8 +959,8 @@ enum operand_status static void add_file_name (char *name, char *names[2], - int operand_status[2], int joption_count[2], int *nfiles, - int *prev_optc_status, int *optc_status) + int operand_status[2], int joption_count[2], int *nfiles, + int *prev_optc_status, int *optc_status) { int n = *nfiles; @@ -733,30 +969,30 @@ add_file_name (char *name, char *names[2], bool op0 = (operand_status[0] == MUST_BE_OPERAND); char *arg = names[op0]; switch (operand_status[op0]) - { - case MUST_BE_OPERAND: - error (0, 0, _("extra operand %s"), quote (name)); - usage (EXIT_FAILURE); - - case MIGHT_BE_J1_ARG: - joption_count[0]--; - set_join_field (&join_field_1, string_to_join_field (arg)); - break; - - case MIGHT_BE_J2_ARG: - joption_count[1]--; - set_join_field (&join_field_2, string_to_join_field (arg)); - break; - - case MIGHT_BE_O_ARG: - add_field_list (arg); - break; - } + { + case MUST_BE_OPERAND: + error (0, 0, _("extra operand %s"), quoteaf (name)); + usage (EXIT_FAILURE); + + case MIGHT_BE_J1_ARG: + joption_count[0]--; + set_join_field (&join_field_1, string_to_join_field (arg)); + break; + + case MIGHT_BE_J2_ARG: + joption_count[1]--; + set_join_field (&join_field_2, string_to_join_field (arg)); + break; + + case MIGHT_BE_O_ARG: + add_field_list (arg); + break; + } if (!op0) - { - operand_status[0] = operand_status[1]; - names[0] = names[1]; - } + { + operand_status[0] = operand_status[1]; + names[0] = names[1]; + } n = 1; } @@ -774,120 +1010,144 @@ main (int argc, char **argv) int prev_optc_status = MUST_BE_OPERAND; int operand_status[2]; int joption_count[2] = { 0, 0 }; - char *names[2]; FILE *fp1, *fp2; int optc; int nfiles = 0; int i; initialize_main (&argc, &argv); - program_name = argv[0]; + set_program_name (argv[0]); setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); hard_LC_COLLATE = hard_locale (LC_COLLATE); atexit (close_stdout); + atexit (free_spareline); print_pairables = true; + seen_unpairable = false; + issued_disorder_warning[0] = issued_disorder_warning[1] = false; + check_input_order = CHECK_ORDER_DEFAULT; - while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:", - longopts, NULL)) - != -1) + while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z", + longopts, NULL)) + != -1) { optc_status = MUST_BE_OPERAND; switch (optc) - { - case 'v': - print_pairables = false; - /* Fall through. */ - - case 'a': - { - unsigned long int val; - if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK - || (val != 1 && val != 2)) - error (EXIT_FAILURE, 0, - _("invalid field number: %s"), quote (optarg)); - if (val == 1) - print_unpairables_1 = true; - else - print_unpairables_2 = true; - } - break; - - case 'e': - if (empty_filler && ! STREQ (empty_filler, optarg)) - error (EXIT_FAILURE, 0, - _("conflicting empty-field replacement strings")); - empty_filler = optarg; - break; - - case 'i': - ignore_case = true; - break; - - case '1': - set_join_field (&join_field_1, string_to_join_field (optarg)); - break; - - case '2': - set_join_field (&join_field_2, string_to_join_field (optarg)); - break; - - case 'j': - if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1] - && optarg == argv[optind - 1] + 2) - { - /* The argument was either "-j1" or "-j2". */ - bool is_j2 = (optarg[0] == '2'); - joption_count[is_j2]++; - optc_status = MIGHT_BE_J1_ARG + is_j2; - } - else - { - set_join_field (&join_field_1, string_to_join_field (optarg)); - set_join_field (&join_field_2, join_field_1); - } - break; - - case 'o': - add_field_list (optarg); - optc_status = MIGHT_BE_O_ARG; - break; - - case 't': - { - unsigned char newtab = optarg[0]; - if (! newtab) - error (EXIT_FAILURE, 0, _("empty tab")); - if (optarg[1]) - { - if (STREQ (optarg, "\\0")) - newtab = '\0'; - else - error (EXIT_FAILURE, 0, _("multi-character tab %s"), - quote (optarg)); - } - if (0 <= tab && tab != newtab) - error (EXIT_FAILURE, 0, _("incompatible tabs")); - tab = newtab; - } - break; - - case 1: /* Non-option argument. */ - add_file_name (optarg, names, operand_status, joption_count, - &nfiles, &prev_optc_status, &optc_status); - break; - - case_GETOPT_HELP_CHAR; - - case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); - - default: - usage (EXIT_FAILURE); - } + { + case 'v': + print_pairables = false; + /* Fall through. */ + + case 'a': + { + unsigned long int val; + if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK + || (val != 1 && val != 2)) + error (EXIT_FAILURE, 0, + _("invalid field number: %s"), quote (optarg)); + if (val == 1) + print_unpairables_1 = true; + else + print_unpairables_2 = true; + } + break; + + case 'e': + if (empty_filler && ! STREQ (empty_filler, optarg)) + error (EXIT_FAILURE, 0, + _("conflicting empty-field replacement strings")); + empty_filler = optarg; + break; + + case 'i': + ignore_case = true; + break; + + case '1': + set_join_field (&join_field_1, string_to_join_field (optarg)); + break; + + case '2': + set_join_field (&join_field_2, string_to_join_field (optarg)); + break; + + case 'j': + if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1] + && optarg == argv[optind - 1] + 2) + { + /* The argument was either "-j1" or "-j2". */ + bool is_j2 = (optarg[0] == '2'); + joption_count[is_j2]++; + optc_status = MIGHT_BE_J1_ARG + is_j2; + } + else + { + set_join_field (&join_field_1, string_to_join_field (optarg)); + set_join_field (&join_field_2, join_field_1); + } + break; + + case 'o': + if (STREQ (optarg, "auto")) + autoformat = true; + else + { + add_field_list (optarg); + optc_status = MIGHT_BE_O_ARG; + } + break; + + case 't': + { + unsigned char newtab = optarg[0]; + if (! newtab) + newtab = '\n'; /* '' => process the whole line. */ + else if (optarg[1]) + { + if (STREQ (optarg, "\\0")) + newtab = '\0'; + else + error (EXIT_FAILURE, 0, _("multi-character tab %s"), + quote (optarg)); + } + if (0 <= tab && tab != newtab) + error (EXIT_FAILURE, 0, _("incompatible tabs")); + tab = newtab; + } + break; + + case 'z': + eolchar = 0; + break; + + case NOCHECK_ORDER_OPTION: + check_input_order = CHECK_ORDER_DISABLED; + break; + + case CHECK_ORDER_OPTION: + check_input_order = CHECK_ORDER_ENABLED; + break; + + case 1: /* Non-option argument. */ + add_file_name (optarg, g_names, operand_status, joption_count, + &nfiles, &prev_optc_status, &optc_status); + break; + + case HEADER_LINE_OPTION: + join_header_lines = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } prev_optc_status = optc_status; } @@ -895,15 +1155,15 @@ main (int argc, char **argv) /* Process any operands after "--". */ prev_optc_status = MUST_BE_OPERAND; while (optind < argc) - add_file_name (argv[optind++], names, operand_status, joption_count, - &nfiles, &prev_optc_status, &optc_status); + add_file_name (argv[optind++], g_names, operand_status, joption_count, + &nfiles, &prev_optc_status, &optc_status); if (nfiles != 2) { if (nfiles == 0) - error (0, 0, _("missing operand")); + error (0, 0, _("missing operand")); else - error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); usage (EXIT_FAILURE); } @@ -912,8 +1172,8 @@ main (int argc, char **argv) for (i = 0; i < 2; i++) if (joption_count[i] != 0) { - set_join_field (&join_field_1, i); - set_join_field (&join_field_2, i); + set_join_field (&join_field_1, i); + set_join_field (&join_field_2, i); } if (join_field_1 == SIZE_MAX) @@ -921,20 +1181,23 @@ main (int argc, char **argv) if (join_field_2 == SIZE_MAX) join_field_2 = 0; - fp1 = STREQ (names[0], "-") ? stdin : fopen (names[0], "r"); + fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r"); if (!fp1) - error (EXIT_FAILURE, errno, "%s", names[0]); - fp2 = STREQ (names[1], "-") ? stdin : fopen (names[1], "r"); + error (EXIT_FAILURE, errno, "%s", quotef (g_names[0])); + fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r"); if (!fp2) - error (EXIT_FAILURE, errno, "%s", names[1]); + error (EXIT_FAILURE, errno, "%s", quotef (g_names[1])); if (fp1 == fp2) error (EXIT_FAILURE, errno, _("both files cannot be standard input")); join (fp1, fp2); if (fclose (fp1) != 0) - error (EXIT_FAILURE, errno, "%s", names[0]); + error (EXIT_FAILURE, errno, "%s", quotef (g_names[0])); if (fclose (fp2) != 0) - error (EXIT_FAILURE, errno, "%s", names[1]); + error (EXIT_FAILURE, errno, "%s", quotef (g_names[1])); - exit (EXIT_SUCCESS); + if (issued_disorder_warning[0] || issued_disorder_warning[1]) + return EXIT_FAILURE; + else + return EXIT_SUCCESS; } |