summaryrefslogtreecommitdiff
path: root/src/join.c
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2016-01-20 10:55:18 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2016-01-20 10:55:18 +0000
commit70e9163c9c18e995515598085cb824e554eb7ae7 (patch)
treea42dc8b2a6c031354bf31472de888bfc8a060132 /src/join.c
parentcbf5993c43f49281173f185863577d86bfac6eae (diff)
downloadcoreutils-tarball-master.tar.gz
Diffstat (limited to 'src/join.c')
-rw-r--r--src/join.c1043
1 files changed, 653 insertions, 390 deletions
diff --git a/src/join.c b/src/join.c
index b113c54..9b25da6 100644
--- a/src/join.c
+++ b/src/join.c
@@ -1,10 +1,10 @@
/* join - join lines of two files on a common field
- Copyright (C) 91, 1995-2006 Free Software Foundation, Inc.
+ Copyright (C) 1991-2016 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -12,8 +12,7 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation,
- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
Written by Mike Haertel, mike@gnu.ai.mit.edu. */
@@ -25,6 +24,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "hard-locale.h"
#include "linebuffer.h"
#include "memcasecmp.h"
@@ -32,14 +32,21 @@
#include "stdio--.h"
#include "xmemcoll.h"
#include "xstrtol.h"
+#include "argmatch.h"
-/* The official name of this program (e.g., no `g' prefix). */
+/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "join"
-#define AUTHORS "Mike Haertel"
+#define AUTHORS proper_name ("Mike Haertel")
#define join system_join
+#define SWAPLINES(a, b) do { \
+ struct line *tmp = a; \
+ a = b; \
+ b = tmp; \
+} while (0);
+
/* An element of the list identifying which fields to print for each
output line. */
struct outlist
@@ -65,8 +72,8 @@ struct field
struct line
{
struct linebuffer buf; /* The line itself. */
- size_t nfields; /* Number of elements in `fields'. */
- size_t nfields_allocated; /* Number of elements allocated for `fields'. */
+ size_t nfields; /* Number of elements in 'fields'. */
+ size_t nfields_allocated; /* Number of elements allocated for 'fields'. */
struct field *fields;
};
@@ -74,13 +81,24 @@ struct line
same join field value. */
struct seq
{
- size_t count; /* Elements used in `lines'. */
- size_t alloc; /* Elements allocated in `lines'. */
- struct line *lines;
+ size_t count; /* Elements used in 'lines'. */
+ size_t alloc; /* Elements allocated in 'lines'. */
+ struct line **lines;
};
-/* The name this program was run with. */
-char *program_name;
+/* The previous line read from each file. */
+static struct line *prevline[2] = {NULL, NULL};
+
+/* The number of lines read from each file. */
+static uintmax_t line_no[2] = {0, 0};
+
+/* The input file names. */
+static char *g_names[2];
+
+/* This provides an extra line buffer for each file. We need these if we
+ try to read two consecutive lines into the same buffer, since we don't
+ want to overwrite the previous buffer before we check order. */
+static struct line *spareline[2] = {NULL, NULL};
/* True if the LC_COLLATE locale is hard. */
static bool hard_LC_COLLATE;
@@ -91,9 +109,22 @@ static bool print_unpairables_1, print_unpairables_2;
/* If nonzero, print pairable lines. */
static bool print_pairables;
+/* If nonzero, we have seen at least one unpairable line. */
+static bool seen_unpairable;
+
+/* If nonzero, we have warned about disorder in that file. */
+static bool issued_disorder_warning[2];
+
/* Empty output field filler. */
static char const *empty_filler;
+/* Whether to ensure the same number of fields are output from each line. */
+static bool autoformat;
+/* The number of fields to output for each line.
+ Only significant when autoformat is true. */
+static size_t autocount_1;
+static size_t autocount_2;
+
/* Field to join on; SIZE_MAX means they haven't been determined yet. */
static size_t join_field_1 = SIZE_MAX;
static size_t join_field_2 = SIZE_MAX;
@@ -101,7 +132,7 @@ static size_t join_field_2 = SIZE_MAX;
/* List of fields to print. */
static struct outlist outlist_head;
-/* Last element in `outlist', where a new element can be added. */
+/* Last element in 'outlist', where a new element can be added. */
static struct outlist *outlist_end = &outlist_head;
/* Tab character separating fields. If negative, fields are separated
@@ -109,9 +140,29 @@ static struct outlist *outlist_end = &outlist_head;
tab character whose value (when cast to unsigned char) equals TAB. */
static int tab = -1;
+/* If nonzero, check that the input is correctly ordered. */
+static enum
+ {
+ CHECK_ORDER_DEFAULT,
+ CHECK_ORDER_ENABLED,
+ CHECK_ORDER_DISABLED
+ } check_input_order;
+
+enum
+{
+ CHECK_ORDER_OPTION = CHAR_MAX + 1,
+ NOCHECK_ORDER_OPTION,
+ HEADER_LINE_OPTION
+};
+
+
static struct option const longopts[] =
{
{"ignore-case", no_argument, NULL, 'i'},
+ {"check-order", no_argument, NULL, CHECK_ORDER_OPTION},
+ {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION},
+ {"zero-terminated", no_argument, NULL, 'z'},
+ {"header", no_argument, NULL, HEADER_LINE_OPTION},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
@@ -123,30 +174,42 @@ static struct line uni_blank;
/* If nonzero, ignore case when comparing join fields. */
static bool ignore_case;
+/* If nonzero, treat the first line of each file as column headers --
+ join them without checking for ordering */
+static bool join_header_lines;
+
+/* The character marking end of line. Default to \n. */
+static char eolchar = '\n';
+
void
usage (int status)
{
if (status != EXIT_SUCCESS)
- fprintf (stderr, _("Try `%s --help' for more information.\n"),
- program_name);
+ emit_try_help ();
else
{
printf (_("\
Usage: %s [OPTION]... FILE1 FILE2\n\
"),
- program_name);
+ program_name);
fputs (_("\
For each pair of input lines with identical join fields, write a line to\n\
-standard output. The default join field is the first, delimited\n\
-by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
+standard output. The default join field is the first, delimited by blanks.\
\n\
- -a FILENUM print unpairable lines coming from file FILENUM, where\n\
+"), stdout);
+ fputs (_("\
+\n\
+When FILE1 or FILE2 (not both) is -, read standard input.\n\
+"), stdout);
+ fputs (_("\
+\n\
+ -a FILENUM also print unpairable lines from file FILENUM, where\n\
FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\
-e EMPTY replace missing input fields with EMPTY\n\
"), stdout);
fputs (_("\
-i, --ignore-case ignore differences in case when comparing fields\n\
- -j FIELD equivalent to `-1 FIELD -2 FIELD'\n\
+ -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\
-o FORMAT obey FORMAT while constructing output line\n\
-t CHAR use CHAR as input and output field separator\n\
"), stdout);
@@ -154,6 +217,14 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
-v FILENUM like -a FILENUM, but suppress joined output lines\n\
-1 FIELD join on this FIELD of file 1\n\
-2 FIELD join on this FIELD of file 2\n\
+ --check-order check that the input is correctly sorted, even\n\
+ if all input lines are pairable\n\
+ --nocheck-order do not check that the input is correctly sorted\n\
+ --header treat the first line in each file as field headers,\n\
+ print them without trying to pair them\n\
+"), stdout);
+ fputs (_("\
+ -z, --zero-terminated line delimiter is NUL, not newline\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
@@ -162,14 +233,19 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\
Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\
else fields are separated by CHAR. Any FIELD is a field number counted\n\
from 1. FORMAT is one or more comma or blank separated specifications,\n\
-each being `FILENUM.FIELD' or `0'. Default FORMAT outputs the join field,\n\
+each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\
the remaining fields from FILE1, the remaining fields from FILE2, all\n\
-separated by CHAR.\n\
+separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
+line of each file determines the number of fields output for each line.\n\
\n\
Important: FILE1 and FILE2 must be sorted on the join fields.\n\
-E.g., use `sort -k 1b,1' if `join' has no options.\n\
+E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\
+or use \"join -t ''\" if 'sort' has no options.\n\
+Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\
+If the input is not sorted and some lines cannot be joined, a\n\
+warning message will be given.\n\
"), stdout);
- printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+ emit_ancillary_info (PROGRAM_NAME);
}
exit (status);
}
@@ -188,7 +264,7 @@ extract_field (struct line *line, char *field, size_t len)
++(line->nfields);
}
-/* Fill in the `fields' structure in LINE. */
+/* Fill in the 'fields' structure in LINE. */
static void
xfields (struct line *line)
@@ -199,108 +275,55 @@ xfields (struct line *line)
if (ptr == lim)
return;
- if (0 <= tab)
+ if (0 <= tab && tab != '\n')
{
char *sep;
for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
- extract_field (line, ptr, sep - ptr);
+ extract_field (line, ptr, sep - ptr);
}
- else
+ else if (tab < 0)
{
/* Skip leading blanks before the first field. */
- while (isblank (to_uchar (*ptr)))
- if (++ptr == lim)
- return;
+ while (field_sep (*ptr))
+ if (++ptr == lim)
+ return;
do
- {
- char *sep;
- for (sep = ptr + 1; sep != lim && ! isblank (to_uchar (*sep)); sep++)
- continue;
- extract_field (line, ptr, sep - ptr);
- if (sep == lim)
- return;
- for (ptr = sep + 1; ptr != lim && isblank (to_uchar (*ptr)); ptr++)
- continue;
- }
+ {
+ char *sep;
+ for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++)
+ continue;
+ extract_field (line, ptr, sep - ptr);
+ if (sep == lim)
+ return;
+ for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++)
+ continue;
+ }
while (ptr != lim);
}
extract_field (line, ptr, lim - ptr);
}
-/* Read a line from FP into LINE and split it into fields.
- Return true if successful. */
-
-static bool
-get_line (FILE *fp, struct line *line)
-{
- initbuffer (&line->buf);
-
- if (! readlinebuffer (&line->buf, fp))
- {
- if (ferror (fp))
- error (EXIT_FAILURE, errno, _("read error"));
- free (line->buf.buffer);
- line->buf.buffer = NULL;
- return false;
- }
-
- line->nfields_allocated = 0;
- line->nfields = 0;
- line->fields = NULL;
- xfields (line);
- return true;
-}
-
static void
freeline (struct line *line)
{
+ if (line == NULL)
+ return;
free (line->fields);
+ line->fields = NULL;
free (line->buf.buffer);
line->buf.buffer = NULL;
}
-static void
-initseq (struct seq *seq)
-{
- seq->count = 0;
- seq->alloc = 0;
- seq->lines = NULL;
-}
-
-/* Read a line from FP and add it to SEQ. Return true if successful. */
-
-static bool
-getseq (FILE *fp, struct seq *seq)
-{
- if (seq->count == seq->alloc)
- seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
-
- if (get_line (fp, &seq->lines[seq->count]))
- {
- ++seq->count;
- return true;
- }
- return false;
-}
-
-static void
-delseq (struct seq *seq)
-{
- size_t i;
- for (i = 0; i < seq->count; i++)
- if (seq->lines[i].buf.buffer)
- freeline (&seq->lines[i]);
- free (seq->lines);
-}
-
/* Return <0 if the join field in LINE1 compares less than the one in LINE2;
>0 if it compares greater; 0 if it compares equal.
- Report an error and exit if the comparison fails. */
+ Report an error and exit if the comparison fails.
+ Use join fields JF_1 and JF_2 respectively. */
static int
-keycmp (struct line const *line1, struct line const *line2)
+keycmp (struct line const *line1, struct line const *line2,
+ size_t jf_1, size_t jf_2)
{
/* Start of field to compare in each file. */
char *beg1;
@@ -310,10 +333,10 @@ keycmp (struct line const *line1, struct line const *line2)
size_t len2; /* Length of fields to compare. */
int diff;
- if (join_field_1 < line1->nfields)
+ if (jf_1 < line1->nfields)
{
- beg1 = line1->fields[join_field_1].beg;
- len1 = line1->fields[join_field_1].len;
+ beg1 = line1->fields[jf_1].beg;
+ len1 = line1->fields[jf_1].len;
}
else
{
@@ -321,10 +344,10 @@ keycmp (struct line const *line1, struct line const *line2)
len1 = 0;
}
- if (join_field_2 < line2->nfields)
+ if (jf_2 < line2->nfields)
{
- beg2 = line2->fields[join_field_2].beg;
- len2 = line2->fields[join_field_2].len;
+ beg2 = line2->fields[jf_2].beg;
+ len2 = line2->fields[jf_2].len;
}
else
{
@@ -346,7 +369,7 @@ keycmp (struct line const *line1, struct line const *line2)
else
{
if (hard_LC_COLLATE)
- return xmemcoll (beg1, len1, beg2, len2);
+ return xmemcoll (beg1, len1, beg2, len2);
diff = memcmp (beg1, beg2, MIN (len1, len2));
}
@@ -355,8 +378,174 @@ keycmp (struct line const *line1, struct line const *line2)
return len1 < len2 ? -1 : len1 != len2;
}
+/* Check that successive input lines PREV and CURRENT from input file
+ WHATFILE are presented in order, unless the user may be relying on
+ the GNU extension that input lines may be out of order if no input
+ lines are unpairable.
+
+ If the user specified --nocheck-order, the check is not made.
+ If the user specified --check-order, the problem is fatal.
+ Otherwise (the default), the message is simply a warning.
+
+ A message is printed at most once per input file. */
+
+static void
+check_order (const struct line *prev,
+ const struct line *current,
+ int whatfile)
+{
+ if (check_input_order != CHECK_ORDER_DISABLED
+ && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable))
+ {
+ if (!issued_disorder_warning[whatfile-1])
+ {
+ size_t join_field = whatfile == 1 ? join_field_1 : join_field_2;
+ if (keycmp (prev, current, join_field, join_field) > 0)
+ {
+ /* Exclude any trailing newline. */
+ size_t len = current->buf.length;
+ if (0 < len && current->buf.buffer[len - 1] == '\n')
+ --len;
+
+ /* If the offending line is longer than INT_MAX, output
+ only the first INT_MAX bytes in this diagnostic. */
+ len = MIN (INT_MAX, len);
+
+ error ((check_input_order == CHECK_ORDER_ENABLED
+ ? EXIT_FAILURE : 0),
+ 0, _("%s:%"PRIuMAX": is not sorted: %.*s"),
+ g_names[whatfile - 1], line_no[whatfile - 1],
+ (int) len, current->buf.buffer);
+
+ /* If we get to here, the message was merely a warning.
+ Arrange to issue it only once per file. */
+ issued_disorder_warning[whatfile-1] = true;
+ }
+ }
+ }
+}
+
+static inline void
+reset_line (struct line *line)
+{
+ line->nfields = 0;
+}
+
+static struct line *
+init_linep (struct line **linep)
+{
+ struct line *line = xcalloc (1, sizeof *line);
+ *linep = line;
+ return line;
+}
+
+/* Read a line from FP into LINE and split it into fields.
+ Return true if successful. */
+
+static bool
+get_line (FILE *fp, struct line **linep, int which)
+{
+ struct line *line = *linep;
+
+ if (line == prevline[which - 1])
+ {
+ SWAPLINES (line, spareline[which - 1]);
+ *linep = line;
+ }
+
+ if (line)
+ reset_line (line);
+ else
+ line = init_linep (linep);
+
+ if (! readlinebuffer_delim (&line->buf, fp, eolchar))
+ {
+ if (ferror (fp))
+ error (EXIT_FAILURE, errno, _("read error"));
+ freeline (line);
+ return false;
+ }
+ ++line_no[which - 1];
+
+ xfields (line);
+
+ if (prevline[which - 1])
+ check_order (prevline[which - 1], line, which);
+
+ prevline[which - 1] = line;
+ return true;
+}
+
+static void
+free_spareline (void)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_CARDINALITY (spareline); i++)
+ {
+ if (spareline[i])
+ {
+ freeline (spareline[i]);
+ free (spareline[i]);
+ }
+ }
+}
+
+static void
+initseq (struct seq *seq)
+{
+ seq->count = 0;
+ seq->alloc = 0;
+ seq->lines = NULL;
+}
+
+/* Read a line from FP and add it to SEQ. Return true if successful. */
+
+static bool
+getseq (FILE *fp, struct seq *seq, int whichfile)
+{
+ if (seq->count == seq->alloc)
+ {
+ size_t i;
+ seq->lines = X2NREALLOC (seq->lines, &seq->alloc);
+ for (i = seq->count; i < seq->alloc; i++)
+ seq->lines[i] = NULL;
+ }
+
+ if (get_line (fp, &seq->lines[seq->count], whichfile))
+ {
+ ++seq->count;
+ return true;
+ }
+ return false;
+}
+
+/* Read a line from FP and add it to SEQ, as the first item if FIRST is
+ true, else as the next. */
+static bool
+advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile)
+{
+ if (first)
+ seq->count = 0;
+
+ return getseq (fp, seq, whichfile);
+}
+
+static void
+delseq (struct seq *seq)
+{
+ size_t i;
+ for (i = 0; i < seq->alloc; i++)
+ {
+ freeline (seq->lines[i]);
+ free (seq->lines[i]);
+ }
+ free (seq->lines);
+}
+
+
/* Print field N of LINE if it exists and is nonempty, otherwise
- `empty_filler' if it is nonempty. */
+ 'empty_filler' if it is nonempty. */
static void
prfield (size_t n, struct line const *line)
@@ -367,14 +556,35 @@ prfield (size_t n, struct line const *line)
{
len = line->fields[n].len;
if (len)
- fwrite (line->fields[n].beg, 1, len, stdout);
+ fwrite (line->fields[n].beg, 1, len, stdout);
else if (empty_filler)
- fputs (empty_filler, stdout);
+ fputs (empty_filler, stdout);
}
else if (empty_filler)
fputs (empty_filler, stdout);
}
+/* Output all the fields in line, other than the join field. */
+
+static void
+prfields (struct line const *line, size_t join_field, size_t autocount)
+{
+ size_t i;
+ size_t nfields = autoformat ? autocount : line->nfields;
+ char output_separator = tab < 0 ? ' ' : tab;
+
+ for (i = 0; i < join_field && i < nfields; ++i)
+ {
+ putchar (output_separator);
+ prfield (i, line);
+ }
+ for (i = join_field + 1; i < nfields; ++i)
+ {
+ putchar (output_separator);
+ prfield (i, line);
+ }
+}
+
/* Print the join of LINE1 and LINE2. */
static void
@@ -382,6 +592,8 @@ prjoin (struct line const *line1, struct line const *line2)
{
const struct outlist *outlist;
char output_separator = tab < 0 ? ' ' : tab;
+ size_t field;
+ struct line const *line;
outlist = outlist_head.next;
if (outlist)
@@ -390,70 +602,54 @@ prjoin (struct line const *line1, struct line const *line2)
o = outlist;
while (1)
- {
- size_t field;
- struct line const *line;
-
- if (o->file == 0)
- {
- if (line1 == &uni_blank)
- {
- line = line2;
- field = join_field_2;
- }
- else
- {
- line = line1;
- field = join_field_1;
- }
- }
- else
- {
- line = (o->file == 1 ? line1 : line2);
- field = o->field;
- }
- prfield (field, line);
- o = o->next;
- if (o == NULL)
- break;
- putchar (output_separator);
- }
- putchar ('\n');
+ {
+ if (o->file == 0)
+ {
+ if (line1 == &uni_blank)
+ {
+ line = line2;
+ field = join_field_2;
+ }
+ else
+ {
+ line = line1;
+ field = join_field_1;
+ }
+ }
+ else
+ {
+ line = (o->file == 1 ? line1 : line2);
+ field = o->field;
+ }
+ prfield (field, line);
+ o = o->next;
+ if (o == NULL)
+ break;
+ putchar (output_separator);
+ }
+ putchar (eolchar);
}
else
{
- size_t i;
-
if (line1 == &uni_blank)
- {
- struct line const *t;
- t = line1;
- line1 = line2;
- line2 = t;
- }
- prfield (join_field_1, line1);
- for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
- {
- putchar (output_separator);
- prfield (i, line1);
- }
- for (i = join_field_1 + 1; i < line1->nfields; ++i)
- {
- putchar (output_separator);
- prfield (i, line1);
- }
-
- for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
- {
- putchar (output_separator);
- prfield (i, line2);
- }
- for (i = join_field_2 + 1; i < line2->nfields; ++i)
- {
- putchar (output_separator);
- prfield (i, line2);
- }
- putchar ('\n');
+ {
+ line = line2;
+ field = join_field_2;
+ }
+ else
+ {
+ line = line1;
+ field = join_field_1;
+ }
+
+ /* Output the join field. */
+ prfield (field, line);
+
+ /* Output other fields. */
+ prfields (line1, join_field_1, autocount_1);
+ prfields (line2, join_field_2, autocount_2);
+
+ putchar (eolchar);
}
}
@@ -463,121 +659,161 @@ static void
join (FILE *fp1, FILE *fp2)
{
struct seq seq1, seq2;
- struct line line;
int diff;
bool eof1, eof2;
+ fadvise (fp1, FADVISE_SEQUENTIAL);
+ fadvise (fp2, FADVISE_SEQUENTIAL);
+
/* Read the first line of each file. */
initseq (&seq1);
- getseq (fp1, &seq1);
+ getseq (fp1, &seq1, 1);
initseq (&seq2);
- getseq (fp2, &seq2);
+ getseq (fp2, &seq2, 2);
+
+ if (autoformat)
+ {
+ autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
+ autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
+ }
+
+ if (join_header_lines && (seq1.count || seq2.count))
+ {
+ struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank;
+ struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank;
+ prjoin (hline1, hline2);
+ prevline[0] = NULL;
+ prevline[1] = NULL;
+ if (seq1.count)
+ advance_seq (fp1, &seq1, true, 1);
+ if (seq2.count)
+ advance_seq (fp2, &seq2, true, 2);
+ }
while (seq1.count && seq2.count)
{
size_t i;
- diff = keycmp (&seq1.lines[0], &seq2.lines[0]);
+ diff = keycmp (seq1.lines[0], seq2.lines[0],
+ join_field_1, join_field_2);
if (diff < 0)
- {
- if (print_unpairables_1)
- prjoin (&seq1.lines[0], &uni_blank);
- freeline (&seq1.lines[0]);
- seq1.count = 0;
- getseq (fp1, &seq1);
- continue;
- }
+ {
+ if (print_unpairables_1)
+ prjoin (seq1.lines[0], &uni_blank);
+ advance_seq (fp1, &seq1, true, 1);
+ seen_unpairable = true;
+ continue;
+ }
if (diff > 0)
- {
- if (print_unpairables_2)
- prjoin (&uni_blank, &seq2.lines[0]);
- freeline (&seq2.lines[0]);
- seq2.count = 0;
- getseq (fp2, &seq2);
- continue;
- }
+ {
+ if (print_unpairables_2)
+ prjoin (&uni_blank, seq2.lines[0]);
+ advance_seq (fp2, &seq2, true, 2);
+ seen_unpairable = true;
+ continue;
+ }
/* Keep reading lines from file1 as long as they continue to
match the current line from file2. */
eof1 = false;
do
- if (!getseq (fp1, &seq1))
- {
- eof1 = true;
- ++seq1.count;
- break;
- }
- while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0]));
+ if (!advance_seq (fp1, &seq1, false, 1))
+ {
+ eof1 = true;
+ ++seq1.count;
+ break;
+ }
+ while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0],
+ join_field_1, join_field_2));
/* Keep reading lines from file2 as long as they continue to
match the current line from file1. */
eof2 = false;
do
- if (!getseq (fp2, &seq2))
- {
- eof2 = true;
- ++seq2.count;
- break;
- }
- while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1]));
+ if (!advance_seq (fp2, &seq2, false, 2))
+ {
+ eof2 = true;
+ ++seq2.count;
+ break;
+ }
+ while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1],
+ join_field_1, join_field_2));
if (print_pairables)
- {
- for (i = 0; i < seq1.count - 1; ++i)
- {
- size_t j;
- for (j = 0; j < seq2.count - 1; ++j)
- prjoin (&seq1.lines[i], &seq2.lines[j]);
- }
- }
-
- for (i = 0; i < seq1.count - 1; ++i)
- freeline (&seq1.lines[i]);
+ {
+ for (i = 0; i < seq1.count - 1; ++i)
+ {
+ size_t j;
+ for (j = 0; j < seq2.count - 1; ++j)
+ prjoin (seq1.lines[i], seq2.lines[j]);
+ }
+ }
+
if (!eof1)
- {
- seq1.lines[0] = seq1.lines[seq1.count - 1];
- seq1.count = 1;
- }
+ {
+ SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]);
+ seq1.count = 1;
+ }
else
- seq1.count = 0;
+ seq1.count = 0;
- for (i = 0; i < seq2.count - 1; ++i)
- freeline (&seq2.lines[i]);
if (!eof2)
- {
- seq2.lines[0] = seq2.lines[seq2.count - 1];
- seq2.count = 1;
- }
+ {
+ SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]);
+ seq2.count = 1;
+ }
else
- seq2.count = 0;
+ seq2.count = 0;
}
- if (print_unpairables_1 && seq1.count)
+ /* If the user did not specify --nocheck-order, then we read the
+ tail ends of both inputs to verify that they are in order. We
+ skip the rest of the tail once we have issued a warning for that
+ file, unless we actually need to print the unpairable lines. */
+ struct line *line = NULL;
+ bool checktail = false;
+
+ if (check_input_order != CHECK_ORDER_DISABLED
+ && !(issued_disorder_warning[0] && issued_disorder_warning[1]))
+ checktail = true;
+
+ if ((print_unpairables_1 || checktail) && seq1.count)
{
- prjoin (&seq1.lines[0], &uni_blank);
- freeline (&seq1.lines[0]);
- while (get_line (fp1, &line))
- {
- prjoin (&line, &uni_blank);
- freeline (&line);
- }
+ if (print_unpairables_1)
+ prjoin (seq1.lines[0], &uni_blank);
+ if (seq2.count)
+ seen_unpairable = true;
+ while (get_line (fp1, &line, 1))
+ {
+ if (print_unpairables_1)
+ prjoin (line, &uni_blank);
+ if (issued_disorder_warning[0] && !print_unpairables_1)
+ break;
+ }
}
- if (print_unpairables_2 && seq2.count)
+ if ((print_unpairables_2 || checktail) && seq2.count)
{
- prjoin (&uni_blank, &seq2.lines[0]);
- freeline (&seq2.lines[0]);
- while (get_line (fp2, &line))
- {
- prjoin (&uni_blank, &line);
- freeline (&line);
- }
+ if (print_unpairables_2)
+ prjoin (&uni_blank, seq2.lines[0]);
+ if (seq1.count)
+ seen_unpairable = true;
+ while (get_line (fp2, &line, 2))
+ {
+ if (print_unpairables_2)
+ prjoin (&uni_blank, line);
+ if (issued_disorder_warning[1] && !print_unpairables_2)
+ break;
+ }
}
+ freeline (line);
+ free (line);
+
delseq (&seq1);
delseq (&seq2);
}
-/* Add a field spec for field FIELD of file FILE to `outlist'. */
+/* Add a field spec for field FIELD of file FILE to 'outlist'. */
static void
add_field (int file, size_t field)
@@ -634,9 +870,9 @@ decode_field_spec (const char *s, int *file_index, size_t *field_index)
case '0':
if (s[1])
{
- /* `0' must be all alone -- no `.FIELD'. */
- error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
- }
+ /* '0' must be all alone -- no '.FIELD'. */
+ error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
+ }
*file_index = 0;
*field_index = 0;
break;
@@ -644,25 +880,25 @@ decode_field_spec (const char *s, int *file_index, size_t *field_index)
case '1':
case '2':
if (s[1] != '.')
- error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
+ error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s));
*file_index = s[0] - '0';
*field_index = string_to_join_field (s + 2);
break;
default:
error (EXIT_FAILURE, 0,
- _("invalid file number in field spec: %s"), quote (s));
+ _("invalid file number in field spec: %s"), quote (s));
/* Tell gcc -W -Wall that we can't get beyond this point.
- This avoids a warning (otherwise legit) that the caller's copies
- of *file_index and *field_index might be used uninitialized. */
+ This avoids a warning (otherwise legit) that the caller's copies
+ of *file_index and *field_index might be used uninitialized. */
abort ();
break;
}
}
-/* Add the comma or blank separated field spec(s) in STR to `outlist'. */
+/* Add the comma or blank separated field spec(s) in STR to 'outlist'. */
static void
add_field_list (char *str)
@@ -695,7 +931,7 @@ set_join_field (size_t *var, size_t val)
unsigned long int var1 = *var + 1;
unsigned long int val1 = val + 1;
error (EXIT_FAILURE, 0, _("incompatible join fields %lu, %lu"),
- var1, val1);
+ var1, val1);
}
*var = val;
}
@@ -723,8 +959,8 @@ enum operand_status
static void
add_file_name (char *name, char *names[2],
- int operand_status[2], int joption_count[2], int *nfiles,
- int *prev_optc_status, int *optc_status)
+ int operand_status[2], int joption_count[2], int *nfiles,
+ int *prev_optc_status, int *optc_status)
{
int n = *nfiles;
@@ -733,30 +969,30 @@ add_file_name (char *name, char *names[2],
bool op0 = (operand_status[0] == MUST_BE_OPERAND);
char *arg = names[op0];
switch (operand_status[op0])
- {
- case MUST_BE_OPERAND:
- error (0, 0, _("extra operand %s"), quote (name));
- usage (EXIT_FAILURE);
-
- case MIGHT_BE_J1_ARG:
- joption_count[0]--;
- set_join_field (&join_field_1, string_to_join_field (arg));
- break;
-
- case MIGHT_BE_J2_ARG:
- joption_count[1]--;
- set_join_field (&join_field_2, string_to_join_field (arg));
- break;
-
- case MIGHT_BE_O_ARG:
- add_field_list (arg);
- break;
- }
+ {
+ case MUST_BE_OPERAND:
+ error (0, 0, _("extra operand %s"), quoteaf (name));
+ usage (EXIT_FAILURE);
+
+ case MIGHT_BE_J1_ARG:
+ joption_count[0]--;
+ set_join_field (&join_field_1, string_to_join_field (arg));
+ break;
+
+ case MIGHT_BE_J2_ARG:
+ joption_count[1]--;
+ set_join_field (&join_field_2, string_to_join_field (arg));
+ break;
+
+ case MIGHT_BE_O_ARG:
+ add_field_list (arg);
+ break;
+ }
if (!op0)
- {
- operand_status[0] = operand_status[1];
- names[0] = names[1];
- }
+ {
+ operand_status[0] = operand_status[1];
+ names[0] = names[1];
+ }
n = 1;
}
@@ -774,120 +1010,144 @@ main (int argc, char **argv)
int prev_optc_status = MUST_BE_OPERAND;
int operand_status[2];
int joption_count[2] = { 0, 0 };
- char *names[2];
FILE *fp1, *fp2;
int optc;
int nfiles = 0;
int i;
initialize_main (&argc, &argv);
- program_name = argv[0];
+ set_program_name (argv[0]);
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
hard_LC_COLLATE = hard_locale (LC_COLLATE);
atexit (close_stdout);
+ atexit (free_spareline);
print_pairables = true;
+ seen_unpairable = false;
+ issued_disorder_warning[0] = issued_disorder_warning[1] = false;
+ check_input_order = CHECK_ORDER_DEFAULT;
- while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:",
- longopts, NULL))
- != -1)
+ while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z",
+ longopts, NULL))
+ != -1)
{
optc_status = MUST_BE_OPERAND;
switch (optc)
- {
- case 'v':
- print_pairables = false;
- /* Fall through. */
-
- case 'a':
- {
- unsigned long int val;
- if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
- || (val != 1 && val != 2))
- error (EXIT_FAILURE, 0,
- _("invalid field number: %s"), quote (optarg));
- if (val == 1)
- print_unpairables_1 = true;
- else
- print_unpairables_2 = true;
- }
- break;
-
- case 'e':
- if (empty_filler && ! STREQ (empty_filler, optarg))
- error (EXIT_FAILURE, 0,
- _("conflicting empty-field replacement strings"));
- empty_filler = optarg;
- break;
-
- case 'i':
- ignore_case = true;
- break;
-
- case '1':
- set_join_field (&join_field_1, string_to_join_field (optarg));
- break;
-
- case '2':
- set_join_field (&join_field_2, string_to_join_field (optarg));
- break;
-
- case 'j':
- if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
- && optarg == argv[optind - 1] + 2)
- {
- /* The argument was either "-j1" or "-j2". */
- bool is_j2 = (optarg[0] == '2');
- joption_count[is_j2]++;
- optc_status = MIGHT_BE_J1_ARG + is_j2;
- }
- else
- {
- set_join_field (&join_field_1, string_to_join_field (optarg));
- set_join_field (&join_field_2, join_field_1);
- }
- break;
-
- case 'o':
- add_field_list (optarg);
- optc_status = MIGHT_BE_O_ARG;
- break;
-
- case 't':
- {
- unsigned char newtab = optarg[0];
- if (! newtab)
- error (EXIT_FAILURE, 0, _("empty tab"));
- if (optarg[1])
- {
- if (STREQ (optarg, "\\0"))
- newtab = '\0';
- else
- error (EXIT_FAILURE, 0, _("multi-character tab %s"),
- quote (optarg));
- }
- if (0 <= tab && tab != newtab)
- error (EXIT_FAILURE, 0, _("incompatible tabs"));
- tab = newtab;
- }
- break;
-
- case 1: /* Non-option argument. */
- add_file_name (optarg, names, operand_status, joption_count,
- &nfiles, &prev_optc_status, &optc_status);
- break;
-
- case_GETOPT_HELP_CHAR;
-
- case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
-
- default:
- usage (EXIT_FAILURE);
- }
+ {
+ case 'v':
+ print_pairables = false;
+ /* Fall through. */
+
+ case 'a':
+ {
+ unsigned long int val;
+ if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
+ || (val != 1 && val != 2))
+ error (EXIT_FAILURE, 0,
+ _("invalid field number: %s"), quote (optarg));
+ if (val == 1)
+ print_unpairables_1 = true;
+ else
+ print_unpairables_2 = true;
+ }
+ break;
+
+ case 'e':
+ if (empty_filler && ! STREQ (empty_filler, optarg))
+ error (EXIT_FAILURE, 0,
+ _("conflicting empty-field replacement strings"));
+ empty_filler = optarg;
+ break;
+
+ case 'i':
+ ignore_case = true;
+ break;
+
+ case '1':
+ set_join_field (&join_field_1, string_to_join_field (optarg));
+ break;
+
+ case '2':
+ set_join_field (&join_field_2, string_to_join_field (optarg));
+ break;
+
+ case 'j':
+ if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1]
+ && optarg == argv[optind - 1] + 2)
+ {
+ /* The argument was either "-j1" or "-j2". */
+ bool is_j2 = (optarg[0] == '2');
+ joption_count[is_j2]++;
+ optc_status = MIGHT_BE_J1_ARG + is_j2;
+ }
+ else
+ {
+ set_join_field (&join_field_1, string_to_join_field (optarg));
+ set_join_field (&join_field_2, join_field_1);
+ }
+ break;
+
+ case 'o':
+ if (STREQ (optarg, "auto"))
+ autoformat = true;
+ else
+ {
+ add_field_list (optarg);
+ optc_status = MIGHT_BE_O_ARG;
+ }
+ break;
+
+ case 't':
+ {
+ unsigned char newtab = optarg[0];
+ if (! newtab)
+ newtab = '\n'; /* '' => process the whole line. */
+ else if (optarg[1])
+ {
+ if (STREQ (optarg, "\\0"))
+ newtab = '\0';
+ else
+ error (EXIT_FAILURE, 0, _("multi-character tab %s"),
+ quote (optarg));
+ }
+ if (0 <= tab && tab != newtab)
+ error (EXIT_FAILURE, 0, _("incompatible tabs"));
+ tab = newtab;
+ }
+ break;
+
+ case 'z':
+ eolchar = 0;
+ break;
+
+ case NOCHECK_ORDER_OPTION:
+ check_input_order = CHECK_ORDER_DISABLED;
+ break;
+
+ case CHECK_ORDER_OPTION:
+ check_input_order = CHECK_ORDER_ENABLED;
+ break;
+
+ case 1: /* Non-option argument. */
+ add_file_name (optarg, g_names, operand_status, joption_count,
+ &nfiles, &prev_optc_status, &optc_status);
+ break;
+
+ case HEADER_LINE_OPTION:
+ join_header_lines = true;
+ break;
+
+ case_GETOPT_HELP_CHAR;
+
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+ default:
+ usage (EXIT_FAILURE);
+ }
prev_optc_status = optc_status;
}
@@ -895,15 +1155,15 @@ main (int argc, char **argv)
/* Process any operands after "--". */
prev_optc_status = MUST_BE_OPERAND;
while (optind < argc)
- add_file_name (argv[optind++], names, operand_status, joption_count,
- &nfiles, &prev_optc_status, &optc_status);
+ add_file_name (argv[optind++], g_names, operand_status, joption_count,
+ &nfiles, &prev_optc_status, &optc_status);
if (nfiles != 2)
{
if (nfiles == 0)
- error (0, 0, _("missing operand"));
+ error (0, 0, _("missing operand"));
else
- error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
+ error (0, 0, _("missing operand after %s"), quote (argv[argc - 1]));
usage (EXIT_FAILURE);
}
@@ -912,8 +1172,8 @@ main (int argc, char **argv)
for (i = 0; i < 2; i++)
if (joption_count[i] != 0)
{
- set_join_field (&join_field_1, i);
- set_join_field (&join_field_2, i);
+ set_join_field (&join_field_1, i);
+ set_join_field (&join_field_2, i);
}
if (join_field_1 == SIZE_MAX)
@@ -921,20 +1181,23 @@ main (int argc, char **argv)
if (join_field_2 == SIZE_MAX)
join_field_2 = 0;
- fp1 = STREQ (names[0], "-") ? stdin : fopen (names[0], "r");
+ fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r");
if (!fp1)
- error (EXIT_FAILURE, errno, "%s", names[0]);
- fp2 = STREQ (names[1], "-") ? stdin : fopen (names[1], "r");
+ error (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
+ fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r");
if (!fp2)
- error (EXIT_FAILURE, errno, "%s", names[1]);
+ error (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
if (fp1 == fp2)
error (EXIT_FAILURE, errno, _("both files cannot be standard input"));
join (fp1, fp2);
if (fclose (fp1) != 0)
- error (EXIT_FAILURE, errno, "%s", names[0]);
+ error (EXIT_FAILURE, errno, "%s", quotef (g_names[0]));
if (fclose (fp2) != 0)
- error (EXIT_FAILURE, errno, "%s", names[1]);
+ error (EXIT_FAILURE, errno, "%s", quotef (g_names[1]));
- exit (EXIT_SUCCESS);
+ if (issued_disorder_warning[0] || issued_disorder_warning[1])
+ return EXIT_FAILURE;
+ else
+ return EXIT_SUCCESS;
}