summaryrefslogtreecommitdiff
path: root/src/split.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/split.c')
-rw-r--r--src/split.c1640
1 files changed, 1335 insertions, 305 deletions
diff --git a/src/split.c b/src/split.c
index 1f0f3d7..510af13 100644
--- a/src/split.c
+++ b/src/split.c
@@ -1,10 +1,10 @@
/* split.c -- split a file into pieces.
- Copyright (C) 1988, 1991, 1995-2006 Free Software Foundation, Inc.
+ Copyright (C) 1988-2016 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
+ This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -12,42 +12,56 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation,
- Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
-
-/* By tege@sics.se, with rms.
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
- To do:
- * Implement -t CHAR or -t REGEX to specify break characters other
- than newline. */
+/* By tege@sics.se, with rms.
+ TODO:
+ * support -p REGEX as in BSD's split.
+ * support --suppress-matched as in csplit. */
#include <config.h>
+#include <assert.h>
#include <stdio.h>
#include <getopt.h>
+#include <signal.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include "system.h"
#include "error.h"
#include "fd-reopen.h"
#include "fcntl--.h"
-#include "getpagesize.h"
-#include "full-read.h"
#include "full-write.h"
-#include "inttostr.h"
+#include "ioblksize.h"
#include "quote.h"
#include "safe-read.h"
+#include "sig2str.h"
+#include "xfreopen.h"
+#include "xdectoint.h"
#include "xstrtol.h"
-/* The official name of this program (e.g., no `g' prefix). */
+/* The official name of this program (e.g., no 'g' prefix). */
#define PROGRAM_NAME "split"
-#define AUTHORS "Torbjorn Granlund", "Richard M. Stallman"
+#define AUTHORS \
+ proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
+ proper_name ("Richard M. Stallman")
-#define DEFAULT_SUFFIX_LENGTH 2
+/* Shell command to filter through, instead of creating files. */
+static char const *filter_command;
-/* The name this program was run with. */
-char *program_name;
+/* Process ID of the filter. */
+static int filter_pid;
+
+/* Array of open pipes. */
+static int *open_pipes;
+static size_t open_pipes_alloc;
+static size_t n_open_pipes;
+
+/* Blocked signals. */
+static sigset_t oldblocked;
+static sigset_t newblocked;
/* Base name of output files. */
static char const *outbase;
@@ -59,27 +73,59 @@ static char *outfile;
Suffixes are inserted here. */
static char *outfile_mid;
+/* Generate new suffix when suffixes are exhausted. */
+static bool suffix_auto = true;
+
/* Length of OUTFILE's suffix. */
-static size_t suffix_length = DEFAULT_SUFFIX_LENGTH;
+static size_t suffix_length;
/* Alphabet of characters to use in suffix. */
static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
+/* Numerical suffix start value. */
+static const char *numeric_suffix_start;
+
+/* Additional suffix to append to output file names. */
+static char const *additional_suffix;
+
/* Name of input file. May be "-". */
static char *infile;
+/* stat buf for input file. */
+static struct stat in_stat_buf;
+
/* Descriptor on which output file is open. */
-static int output_desc;
+static int output_desc = -1;
/* If true, print a diagnostic on standard error just before each
output file is opened. */
static bool verbose;
+/* If true, don't generate zero length output files. */
+static bool elide_empty_files;
+
+/* If true, in round robin mode, immediately copy
+ input to output, which is much slower, so disabled by default. */
+static bool unbuffered;
+
+/* The character marking end of line. Defaults to \n below. */
+static int eolchar = -1;
+
+/* The split mode to use. */
+enum Split_type
+{
+ type_undef, type_bytes, type_byteslines, type_lines, type_digits,
+ type_chunk_bytes, type_chunk_lines, type_rr
+};
+
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum
{
- VERBOSE_OPTION = CHAR_MAX + 1
+ VERBOSE_OPTION = CHAR_MAX + 1,
+ FILTER_OPTION,
+ IO_BLKSIZE_OPTION,
+ ADDITIONAL_SUFFIX_OPTION
};
static struct option const longopts[] =
@@ -87,92 +133,259 @@ static struct option const longopts[] =
{"bytes", required_argument, NULL, 'b'},
{"lines", required_argument, NULL, 'l'},
{"line-bytes", required_argument, NULL, 'C'},
+ {"number", required_argument, NULL, 'n'},
+ {"elide-empty-files", no_argument, NULL, 'e'},
+ {"unbuffered", no_argument, NULL, 'u'},
{"suffix-length", required_argument, NULL, 'a'},
- {"numeric-suffixes", no_argument, NULL, 'd'},
+ {"additional-suffix", required_argument, NULL,
+ ADDITIONAL_SUFFIX_OPTION},
+ {"numeric-suffixes", optional_argument, NULL, 'd'},
+ {"filter", required_argument, NULL, FILTER_OPTION},
{"verbose", no_argument, NULL, VERBOSE_OPTION},
+ {"separator", required_argument, NULL, 't'},
+ {"-io-blksize", required_argument, NULL,
+ IO_BLKSIZE_OPTION}, /* do not document */
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
};
+/* Return true if the errno value, ERR, is ignorable. */
+static inline bool
+ignorable (int err)
+{
+ return filter_command && err == EPIPE;
+}
+
+static void
+set_suffix_length (uintmax_t n_units, enum Split_type split_type)
+{
+#define DEFAULT_SUFFIX_LENGTH 2
+
+ uintmax_t suffix_needed = 0;
+
+ /* The suffix auto length feature is incompatible with
+ a user specified start value as the generated suffixes
+ are not all consecutive. */
+ if (numeric_suffix_start)
+ suffix_auto = false;
+
+ /* Auto-calculate the suffix length if the number of files is given. */
+ if (split_type == type_chunk_bytes || split_type == type_chunk_lines
+ || split_type == type_rr)
+ {
+ uintmax_t n_units_end = n_units;
+ if (numeric_suffix_start)
+ {
+ uintmax_t n_start;
+ strtol_error e = xstrtoumax (numeric_suffix_start, NULL, 10,
+ &n_start, "");
+ if (e == LONGINT_OK && n_start <= UINTMAX_MAX - n_units)
+ {
+ /* Restrict auto adjustment so we don't keep
+ incrementing a suffix size arbitrarily,
+ as that would break sort order for files
+ generated from multiple split runs. */
+ if (n_start < n_units)
+ n_units_end += n_start;
+ }
+
+ }
+ size_t alphabet_len = strlen (suffix_alphabet);
+ bool alphabet_slop = (n_units_end % alphabet_len) != 0;
+ while (n_units_end /= alphabet_len)
+ suffix_needed++;
+ suffix_needed += alphabet_slop;
+ suffix_auto = false;
+ }
+
+ if (suffix_length) /* set by user */
+ {
+ if (suffix_length < suffix_needed)
+ {
+ error (EXIT_FAILURE, 0,
+ _("the suffix length needs to be at least %"PRIuMAX),
+ suffix_needed);
+ }
+ suffix_auto = false;
+ return;
+ }
+ else
+ suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
+}
+
void
usage (int status)
{
if (status != EXIT_SUCCESS)
- fprintf (stderr, _("Try `%s --help' for more information.\n"),
- program_name);
+ emit_try_help ();
else
{
printf (_("\
-Usage: %s [OPTION] [INPUT [PREFIX]]\n\
+Usage: %s [OPTION]... [FILE [PREFIX]]\n\
"),
- program_name);
- fputs (_("\
-Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default\n\
-size is 1000 lines, and default PREFIX is `x'. With no INPUT, or when INPUT\n\
-is -, read standard input.\n\
-\n\
-"), stdout);
+ program_name);
fputs (_("\
-Mandatory arguments to long options are mandatory for short options too.\n\
+Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\
+default size is 1000 lines, and default PREFIX is 'x'.\n\
"), stdout);
+
+ emit_stdin_note ();
+ emit_mandatory_arg_note ();
+
fprintf (stdout, _("\
- -a, --suffix-length=N use suffixes of length N (default %d)\n\
+ -a, --suffix-length=N generate suffixes of length N (default %d)\n\
+ --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\
-b, --bytes=SIZE put SIZE bytes per output file\n\
- -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
- -d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
- -l, --lines=NUMBER put NUMBER lines per output file\n\
+ -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\
+ -d use numeric suffixes starting at 0, not alphabetic\n\
+ --numeric-suffixes[=FROM] same as -d, but allow setting the start value\
+\n\
+ -e, --elide-empty-files do not generate empty output files with '-n'\n\
+ --filter=COMMAND write to shell COMMAND; file name is $FILE\n\
+ -l, --lines=NUMBER put NUMBER lines/records per output file\n\
+ -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\
+ -t, --separator=SEP use SEP instead of newline as the record separator;\n\
+ '\\0' (zero) specifies the NUL character\n\
+ -u, --unbuffered immediately copy input to output with '-n r/...'\n\
"), DEFAULT_SUFFIX_LENGTH);
fputs (_("\
- --verbose print a diagnostic to standard error just\n\
- before each output file is opened\n\
+ --verbose print a diagnostic just before each\n\
+ output file is opened\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
- fputs (_("\
-\n\
-SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
+ emit_size_note ();
+ fputs (_("\n\
+CHUNKS may be:\n\
+ N split into N files based on size of input\n\
+ K/N output Kth of N to stdout\n\
+ l/N split into N files without splitting lines/records\n\
+ l/K/N output Kth of N to stdout without splitting lines/records\n\
+ r/N like 'l' but use round robin distribution\n\
+ r/K/N likewise but only output Kth of N to stdout\n\
"), stdout);
- printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
+ emit_ancillary_info (PROGRAM_NAME);
}
exit (status);
}
+/* Return the number of bytes that can be read from FD, a file with
+ apparent size SIZE. Actually read the data into BUF (of size
+ BUFSIZE) if the file appears to be smaller than BUFSIZE, as this
+ works better on proc-like file systems. If the returned value is
+ less than BUFSIZE, store all the file's data into BUF; otherwise,
+ restore the input file's position so that the file can be reread if
+ needed. */
+
+static off_t
+input_file_size (int fd, off_t size, char *buf, size_t bufsize)
+{
+ if (size < bufsize)
+ {
+ size = 0;
+ while (true)
+ {
+ size_t save = size < bufsize ? size : 0;
+ size_t n_read = safe_read (fd, buf + save, bufsize - save);
+ if (n_read == 0)
+ break;
+ if (n_read == SAFE_READ_ERROR)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ size += n_read;
+ }
+ if (bufsize <= size && lseek (fd, - size, SEEK_CUR) < 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ }
+
+ return size;
+}
+
/* Compute the next sequential output file name and store it into the
- string `outfile'. */
+ string 'outfile'. */
static void
next_file_name (void)
{
/* Index in suffix_alphabet of each character in the suffix. */
static size_t *sufindex;
+ static size_t outbase_length;
+ static size_t outfile_length;
+ static size_t addsuf_length;
if (! outfile)
{
- /* Allocate and initialize the first file name. */
+ bool widen;
+
+new_name:
+ widen = !! outfile_length;
+
+ if (! widen)
+ {
+ /* Allocate and initialize the first file name. */
+
+ outbase_length = strlen (outbase);
+ addsuf_length = additional_suffix ? strlen (additional_suffix) : 0;
+ outfile_length = outbase_length + suffix_length + addsuf_length;
+ }
+ else
+ {
+ /* Reallocate and initialize a new wider file name.
+ We do this by subsuming the unchanging part of
+ the generated suffix into the prefix (base), and
+ reinitializing the now one longer suffix. */
+
+ outfile_length += 2;
+ suffix_length++;
+ }
- size_t outbase_length = strlen (outbase);
- size_t outfile_length = outbase_length + suffix_length;
if (outfile_length + 1 < outbase_length)
- xalloc_die ();
- outfile = xmalloc (outfile_length + 1);
+ xalloc_die ();
+ outfile = xrealloc (outfile, outfile_length + 1);
+
+ if (! widen)
+ memcpy (outfile, outbase, outbase_length);
+ else
+ {
+ /* Append the last alphabet character to the file name prefix. */
+ outfile[outbase_length] = suffix_alphabet[sufindex[0]];
+ outbase_length++;
+ }
+
outfile_mid = outfile + outbase_length;
- memcpy (outfile, outbase, outbase_length);
memset (outfile_mid, suffix_alphabet[0], suffix_length);
+ if (additional_suffix)
+ memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length);
outfile[outfile_length] = 0;
+
+ free (sufindex);
sufindex = xcalloc (suffix_length, sizeof *sufindex);
+ if (numeric_suffix_start)
+ {
+ assert (! widen);
+
+ /* Update the output file name. */
+ size_t i = strlen (numeric_suffix_start);
+ memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i);
+
+ /* Update the suffix index. */
+ size_t *sufindex_end = sufindex + suffix_length;
+ while (i-- != 0)
+ *--sufindex_end = numeric_suffix_start[i] - '0';
+ }
+
#if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX
/* POSIX requires that if the output file name is too long for
- its directory, `split' must fail without creating any files.
- This must be checked for explicitly on operating systems that
- silently truncate file names. */
+ its directory, 'split' must fail without creating any files.
+ This must be checked for explicitly on operating systems that
+ silently truncate file names. */
{
- char *dir = dir_name (outfile);
- long name_max = pathconf (dir, _PC_NAME_MAX);
- if (0 <= name_max && name_max < base_len (last_component (outfile)))
- error (EXIT_FAILURE, ENAMETOOLONG, "%s", outfile);
- free (dir);
+ char *dir = dir_name (outfile);
+ long name_max = pathconf (dir, _PC_NAME_MAX);
+ if (0 <= name_max && name_max < base_len (last_component (outfile)))
+ error (EXIT_FAILURE, ENAMETOOLONG, "%s", quotef (outfile));
+ free (dir);
}
#endif
}
@@ -182,15 +395,153 @@ next_file_name (void)
size_t i = suffix_length;
while (i-- != 0)
- {
- sufindex[i]++;
- outfile_mid[i] = suffix_alphabet[sufindex[i]];
- if (outfile_mid[i])
- return;
- sufindex[i] = 0;
- outfile_mid[i] = suffix_alphabet[sufindex[i]];
- }
- error (EXIT_FAILURE, 0, _("Output file suffixes exhausted"));
+ {
+ sufindex[i]++;
+ if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1])
+ goto new_name;
+ outfile_mid[i] = suffix_alphabet[sufindex[i]];
+ if (outfile_mid[i])
+ return;
+ sufindex[i] = 0;
+ outfile_mid[i] = suffix_alphabet[sufindex[i]];
+ }
+ error (EXIT_FAILURE, 0, _("output file suffixes exhausted"));
+ }
+}
+
+/* Create or truncate a file. */
+
+static int
+create (const char *name)
+{
+ if (!filter_command)
+ {
+ if (verbose)
+ fprintf (stdout, _("creating file %s\n"), quoteaf (name));
+
+ int fd = open (name, O_WRONLY | O_CREAT | O_BINARY, MODE_RW_UGO);
+ if (fd < 0)
+ return fd;
+ struct stat out_stat_buf;
+ if (fstat (fd, &out_stat_buf) != 0)
+ error (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (name));
+ if (SAME_INODE (in_stat_buf, out_stat_buf))
+ error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"),
+ quoteaf (name));
+ if (ftruncate (fd, 0) != 0)
+ error (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name));
+
+ return fd;
+ }
+ else
+ {
+ int fd_pair[2];
+ pid_t child_pid;
+ char const *shell_prog = getenv ("SHELL");
+ if (shell_prog == NULL)
+ shell_prog = "/bin/sh";
+ if (setenv ("FILE", name, 1) != 0)
+ error (EXIT_FAILURE, errno,
+ _("failed to set FILE environment variable"));
+ if (verbose)
+ fprintf (stdout, _("executing with FILE=%s\n"), quotef (name));
+ if (pipe (fd_pair) != 0)
+ error (EXIT_FAILURE, errno, _("failed to create pipe"));
+ child_pid = fork ();
+ if (child_pid == 0)
+ {
+ /* This is the child process. If an error occurs here, the
+ parent will eventually learn about it after doing a wait,
+ at which time it will emit its own error message. */
+ int j;
+ /* We have to close any pipes that were opened during an
+ earlier call, otherwise this process will be holding a
+ write-pipe that will prevent the earlier process from
+ reading an EOF on the corresponding read-pipe. */
+ for (j = 0; j < n_open_pipes; ++j)
+ if (close (open_pipes[j]) != 0)
+ error (EXIT_FAILURE, errno, _("closing prior pipe"));
+ if (close (fd_pair[1]))
+ error (EXIT_FAILURE, errno, _("closing output pipe"));
+ if (fd_pair[0] != STDIN_FILENO)
+ {
+ if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO)
+ error (EXIT_FAILURE, errno, _("moving input pipe"));
+ if (close (fd_pair[0]) != 0)
+ error (EXIT_FAILURE, errno, _("closing input pipe"));
+ }
+ sigprocmask (SIG_SETMASK, &oldblocked, NULL);
+ execl (shell_prog, last_component (shell_prog), "-c",
+ filter_command, (char *) NULL);
+ error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""),
+ shell_prog, filter_command);
+ }
+ if (child_pid == -1)
+ error (EXIT_FAILURE, errno, _("fork system call failed"));
+ if (close (fd_pair[0]) != 0)
+ error (EXIT_FAILURE, errno, _("failed to close input pipe"));
+ filter_pid = child_pid;
+ if (n_open_pipes == open_pipes_alloc)
+ open_pipes = x2nrealloc (open_pipes, &open_pipes_alloc,
+ sizeof *open_pipes);
+ open_pipes[n_open_pipes++] = fd_pair[1];
+ return fd_pair[1];
+ }
+}
+
+/* Close the output file, and do any associated cleanup.
+ If FP and FD are both specified, they refer to the same open file;
+ in this case FP is closed, but FD is still used in cleanup. */
+static void
+closeout (FILE *fp, int fd, pid_t pid, char const *name)
+{
+ if (fp != NULL && fclose (fp) != 0 && ! ignorable (errno))
+ error (EXIT_FAILURE, errno, "%s", quotef (name));
+ if (fd >= 0)
+ {
+ if (fp == NULL && close (fd) < 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (name));
+ int j;
+ for (j = 0; j < n_open_pipes; ++j)
+ {
+ if (open_pipes[j] == fd)
+ {
+ open_pipes[j] = open_pipes[--n_open_pipes];
+ break;
+ }
+ }
+ }
+ if (pid > 0)
+ {
+ int wstatus = 0;
+ if (waitpid (pid, &wstatus, 0) == -1 && errno != ECHILD)
+ error (EXIT_FAILURE, errno, _("waiting for child process"));
+ if (WIFSIGNALED (wstatus))
+ {
+ int sig = WTERMSIG (wstatus);
+ if (sig != SIGPIPE)
+ {
+ char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))];
+ if (sig2str (sig, signame) != 0)
+ sprintf (signame, "%d", sig);
+ error (sig + 128, 0,
+ _("with FILE=%s, signal %s from command: %s"),
+ quotef (name), signame, filter_command);
+ }
+ }
+ else if (WIFEXITED (wstatus))
+ {
+ int ex = WEXITSTATUS (wstatus);
+ if (ex != 0)
+ error (ex, 0, _("with FILE=%s, exit %d from command: %s"),
+ quotef (name), ex, filter_command);
+ }
+ else
+ {
+ /* shouldn't happen. */
+ error (EXIT_FAILURE, 0,
+ _("unknown status from command (0x%X)"), wstatus + 0u);
+ }
}
}
@@ -203,66 +554,86 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes)
{
if (new_file_flag)
{
- if (output_desc >= 0 && close (output_desc) < 0)
- error (EXIT_FAILURE, errno, "%s", outfile);
-
+ if (!bp && bytes == 0 && elide_empty_files)
+ return;
+ closeout (NULL, output_desc, filter_pid, outfile);
next_file_name ();
- if (verbose)
- fprintf (stderr, _("creating file %s\n"), quote (outfile));
- output_desc = open (outfile,
- O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
- (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP
- | S_IROTH | S_IWOTH));
- if (output_desc < 0)
- error (EXIT_FAILURE, errno, "%s", outfile);
- }
- if (full_write (output_desc, bp, bytes) != bytes)
- error (EXIT_FAILURE, errno, "%s", outfile);
+ if ((output_desc = create (outfile)) < 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (outfile));
+ }
+ if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno))
+ error (EXIT_FAILURE, errno, "%s", quotef (outfile));
}
/* Split into pieces of exactly N_BYTES bytes.
- Use buffer BUF, whose size is BUFSIZE. */
+ Use buffer BUF, whose size is BUFSIZE.
+ If INITIAL_READ != SIZE_MAX, the entire input file has already been
+ partly read into BUF and BUF contains INITIAL_READ input bytes. */
static void
-bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
+bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read,
+ uintmax_t max_files)
{
size_t n_read;
bool new_file_flag = true;
size_t to_read;
uintmax_t to_write = n_bytes;
char *bp_out;
+ uintmax_t opened = 0;
do
{
- n_read = full_read (STDIN_FILENO, buf, bufsize);
- if (n_read == SAFE_READ_ERROR)
- error (EXIT_FAILURE, errno, "%s", infile);
+ if (initial_read != SIZE_MAX)
+ {
+ n_read = initial_read;
+ initial_read = SIZE_MAX;
+ }
+ else
+ {
+ n_read = safe_read (STDIN_FILENO, buf, bufsize);
+ if (n_read == SAFE_READ_ERROR)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ }
bp_out = buf;
to_read = n_read;
- for (;;)
- {
- if (to_read < to_write)
- {
- if (to_read) /* do not write 0 bytes! */
- {
- cwrite (new_file_flag, bp_out, to_read);
- to_write -= to_read;
- new_file_flag = false;
- }
- break;
- }
- else
- {
- size_t w = to_write;
- cwrite (new_file_flag, bp_out, w);
- bp_out += w;
- to_read -= w;
- new_file_flag = true;
- to_write = n_bytes;
- }
- }
- }
- while (n_read == bufsize);
+ while (true)
+ {
+ if (to_read < to_write)
+ {
+ if (to_read) /* do not write 0 bytes! */
+ {
+ cwrite (new_file_flag, bp_out, to_read);
+ opened += new_file_flag;
+ to_write -= to_read;
+ new_file_flag = false;
+ }
+ break;
+ }
+ else
+ {
+ size_t w = to_write;
+ cwrite (new_file_flag, bp_out, w);
+ opened += new_file_flag;
+ new_file_flag = !max_files || (opened < max_files);
+ if (!new_file_flag && ignorable (errno))
+ {
+ /* If filter no longer accepting input, stop reading. */
+ n_read = 0;
+ break;
+ }
+ bp_out += w;
+ to_read -= w;
+ to_write = n_bytes;
+ }
+ }
+ }
+ while (n_read);
+
+ /* Ensure NUMBER files are created, which truncates
+ any existing files or notifies any consumers on fifos.
+ FIXME: Should we do this before EXIT_FAILURE? */
+ while (opened++ < max_files)
+ cwrite (true, NULL, 0);
}
/* Split into pieces of exactly N_LINES lines.
@@ -278,90 +649,555 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
do
{
- n_read = full_read (STDIN_FILENO, buf, bufsize);
+ n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
- error (EXIT_FAILURE, errno, "%s", infile);
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
bp = bp_out = buf;
eob = bp + n_read;
- *eob = '\n';
- for (;;)
- {
- bp = memchr (bp, '\n', eob - bp + 1);
- if (bp == eob)
- {
- if (eob != bp_out) /* do not write 0 bytes! */
- {
- size_t len = eob - bp_out;
- cwrite (new_file_flag, bp_out, len);
- new_file_flag = false;
- }
- break;
- }
-
- ++bp;
- if (++n >= n_lines)
- {
- cwrite (new_file_flag, bp_out, bp - bp_out);
- bp_out = bp;
- new_file_flag = true;
- n = 0;
- }
- }
- }
- while (n_read == bufsize);
+ *eob = eolchar;
+ while (true)
+ {
+ bp = memchr (bp, eolchar, eob - bp + 1);
+ if (bp == eob)
+ {
+ if (eob != bp_out) /* do not write 0 bytes! */
+ {
+ size_t len = eob - bp_out;
+ cwrite (new_file_flag, bp_out, len);
+ new_file_flag = false;
+ }
+ break;
+ }
+
+ ++bp;
+ if (++n >= n_lines)
+ {
+ cwrite (new_file_flag, bp_out, bp - bp_out);
+ bp_out = bp;
+ new_file_flag = true;
+ n = 0;
+ }
+ }
+ }
+ while (n_read);
}
-
+
/* Split into pieces that are as large as possible while still not more
than N_BYTES bytes, and are split on line boundaries except
- where lines longer than N_BYTES bytes occur.
- FIXME: Allow N_BYTES to be any uintmax_t value, and don't require a
- buffer of size N_BYTES, in case N_BYTES is very large. */
+ where lines longer than N_BYTES bytes occur. */
static void
-line_bytes_split (size_t n_bytes)
+line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
{
size_t n_read;
- char *bp;
- bool eof = false;
- size_t n_buffered = 0;
- char *buf = xmalloc (n_bytes);
+ uintmax_t n_out = 0; /* for each split. */
+ size_t n_hold = 0;
+ char *hold = NULL; /* for lines > bufsize. */
+ size_t hold_size = 0;
+ bool split_line = false; /* Whether a \n was output in a split. */
do
{
- /* Fill up the full buffer size from the input file. */
+ n_read = safe_read (STDIN_FILENO, buf, bufsize);
+ if (n_read == SAFE_READ_ERROR)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ size_t n_left = n_read;
+ char *sob = buf;
+ while (n_left)
+ {
+ size_t split_rest = 0;
+ char *eoc = NULL;
+ char *eol;
+
+ /* Determine End Of Chunk and/or End of Line,
+ which are used below to select what to write or buffer. */
+ if (n_bytes - n_out - n_hold <= n_left)
+ {
+ /* Have enough for split. */
+ split_rest = n_bytes - n_out - n_hold;
+ eoc = sob + split_rest - 1;
+ eol = memrchr (sob, eolchar, split_rest);
+ }
+ else
+ eol = memrchr (sob, eolchar, n_left);
+
+ /* Output hold space if possible. */
+ if (n_hold && !(!eol && n_out))
+ {
+ cwrite (n_out == 0, hold, n_hold);
+ n_out += n_hold;
+ if (n_hold > bufsize)
+ hold = xrealloc (hold, bufsize);
+ n_hold = 0;
+ hold_size = bufsize;
+ }
+
+ /* Output to eol if present. */
+ if (eol)
+ {
+ split_line = true;
+ size_t n_write = eol - sob + 1;
+ cwrite (n_out == 0, sob, n_write);
+ n_out += n_write;
+ n_left -= n_write;
+ sob += n_write;
+ if (eoc)
+ split_rest -= n_write;
+ }
+
+ /* Output to eoc or eob if possible. */
+ if (n_left && !split_line)
+ {
+ size_t n_write = eoc ? split_rest : n_left;
+ cwrite (n_out == 0, sob, n_write);
+ n_out += n_write;
+ n_left -= n_write;
+ sob += n_write;
+ if (eoc)
+ split_rest -= n_write;
+ }
+
+ /* Update hold if needed. */
+ if ((eoc && split_rest) || (!eoc && n_left))
+ {
+ size_t n_buf = eoc ? split_rest : n_left;
+ if (hold_size - n_hold < n_buf)
+ {
+ if (hold_size <= SIZE_MAX - bufsize)
+ hold_size += bufsize;
+ else
+ xalloc_die ();
+ hold = xrealloc (hold, hold_size);
+ }
+ memcpy (hold + n_hold, sob, n_buf);
+ n_hold += n_buf;
+ n_left -= n_buf;
+ sob += n_buf;
+ }
+
+ /* Reset for new split. */
+ if (eoc)
+ {
+ n_out = 0;
+ split_line = false;
+ }
+ }
+ }
+ while (n_read);
+
+ /* Handle no eol at end of file. */
+ if (n_hold)
+ cwrite (n_out == 0, hold, n_hold);
+
+ free (hold);
+}
+
+/* -n l/[K/]N: Write lines to files of approximately file size / N.
+ The file is partitioned into file size / N sized portions, with the
+ last assigned any excess. If a line _starts_ within a partition
+ it is written completely to the corresponding file. Since lines
+ are not split even if they overlap a partition, the files written
+ can be larger or smaller than the partition size, and even empty
+ if a line is so long as to completely overlap the partition. */
+
+static void
+lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
+ size_t initial_read, off_t file_size)
+{
+ assert (n && k <= n && n <= file_size);
+
+ const off_t chunk_size = file_size / n;
+ uintmax_t chunk_no = 1;
+ off_t chunk_end = chunk_size - 1;
+ off_t n_written = 0;
+ bool new_file_flag = true;
+ bool chunk_truncated = false;
+
+ if (k > 1)
+ {
+ /* Start reading 1 byte before kth chunk of file. */
+ off_t start = (k - 1) * chunk_size - 1;
+ if (initial_read != SIZE_MAX)
+ {
+ memmove (buf, buf + start, initial_read - start);
+ initial_read -= start;
+ }
+ else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ n_written = start;
+ chunk_no = k - 1;
+ chunk_end = chunk_no * chunk_size - 1;
+ }
+
+ while (n_written < file_size)
+ {
+ char *bp = buf, *eob;
+ size_t n_read;
+ if (initial_read != SIZE_MAX)
+ {
+ n_read = initial_read;
+ initial_read = SIZE_MAX;
+ }
+ else
+ {
+ n_read = safe_read (STDIN_FILENO, buf, bufsize);
+ if (n_read == SAFE_READ_ERROR)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ }
+ if (n_read == 0)
+ break; /* eof. */
+ n_read = MIN (n_read, file_size - n_written);
+ chunk_truncated = false;
+ eob = buf + n_read;
+
+ while (bp != eob)
+ {
+ size_t to_write;
+ bool next = false;
+
+ /* Begin looking for '\n' at last byte of chunk. */
+ off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
+ char *bp_out = memchr (bp + skip, eolchar, n_read - skip);
+ if (bp_out++)
+ next = true;
+ else
+ bp_out = eob;
+ to_write = bp_out - bp;
+
+ if (k == chunk_no)
+ {
+ /* We don't use the stdout buffer here since we're writing
+ large chunks from an existing file, so it's more efficient
+ to write out directly. */
+ if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
+ error (EXIT_FAILURE, errno, "%s", _("write error"));
+ }
+ else if (! k)
+ cwrite (new_file_flag, bp, to_write);
+ n_written += to_write;
+ bp += to_write;
+ n_read -= to_write;
+ new_file_flag = next;
+
+ /* A line could have been so long that it skipped
+ entire chunks. So create empty files in that case. */
+ while (next || chunk_end <= n_written - 1)
+ {
+ if (!next && bp == eob)
+ {
+ /* replenish buf, before going to next chunk. */
+ chunk_truncated = true;
+ break;
+ }
+ chunk_no++;
+ if (k && chunk_no > k)
+ return;
+ if (chunk_no == n)
+ chunk_end = file_size - 1; /* >= chunk_size. */
+ else
+ chunk_end += chunk_size;
+ if (chunk_end <= n_written - 1)
+ {
+ if (! k)
+ cwrite (true, NULL, 0);
+ }
+ else
+ next = false;
+ }
+ }
+ }
+
+ if (chunk_truncated)
+ chunk_no++;
+
+ /* Ensure NUMBER files are created, which truncates
+ any existing files or notifies any consumers on fifos.
+ FIXME: Should we do this before EXIT_FAILURE? */
+ while (!k && chunk_no++ <= n)
+ cwrite (true, NULL, 0);
+}
+
+/* -n K/N: Extract Kth of N chunks. */
+
+static void
+bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
+ size_t initial_read, off_t file_size)
+{
+ off_t start;
+ off_t end;
+
+ assert (k && n && k <= n && n <= file_size);
+
+ start = (k - 1) * (file_size / n);
+ end = (k == n) ? file_size : k * (file_size / n);
+
+ if (initial_read != SIZE_MAX)
+ {
+ memmove (buf, buf + start, initial_read - start);
+ initial_read -= start;
+ }
+ else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+
+ while (start < end)
+ {
+ size_t n_read;
+ if (initial_read != SIZE_MAX)
+ {
+ n_read = initial_read;
+ initial_read = SIZE_MAX;
+ }
+ else
+ {
+ n_read = safe_read (STDIN_FILENO, buf, bufsize);
+ if (n_read == SAFE_READ_ERROR)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ }
+ if (n_read == 0)
+ break; /* eof. */
+ n_read = MIN (n_read, end - start);
+ if (full_write (STDOUT_FILENO, buf, n_read) != n_read
+ && ! ignorable (errno))
+ error (EXIT_FAILURE, errno, "%s", quotef ("-"));
+ start += n_read;
+ }
+}
+
+typedef struct of_info
+{
+ char *of_name;
+ int ofd;
+ FILE *ofile;
+ int opid;
+} of_t;
+
+enum
+{
+ OFD_NEW = -1,
+ OFD_APPEND = -2
+};
+
+/* Rotate file descriptors when we're writing to more output files than we
+ have available file descriptors.
+ Return whether we came under file resource pressure.
+ If so, it's probably best to close each file when finished with it. */
+
+static bool
+ofile_open (of_t *files, size_t i_check, size_t nfiles)
+{
+ bool file_limit = false;
+
+ if (files[i_check].ofd <= OFD_NEW)
+ {
+ int fd;
+ size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
+
+ /* Another process could have opened a file in between the calls to
+ close and open, so we should keep trying until open succeeds or
+ we've closed all of our files. */
+ while (true)
+ {
+ if (files[i_check].ofd == OFD_NEW)
+ fd = create (files[i_check].of_name);
+ else /* OFD_APPEND */
+ {
+ /* Attempt to append to previously opened file.
+ We use O_NONBLOCK to support writing to fifos,
+ where the other end has closed because of our
+ previous close. In that case we'll immediately
+ get an error, rather than waiting indefinitely.
+ In specialised cases the consumer can keep reading
+ from the fifo, terminating on conditions in the data
+ itself, or perhaps never in the case of 'tail -f'.
+ I.e., for fifos it is valid to attempt this reopen.
+
+ We don't handle the filter_command case here, as create()
+ will exit if there are not enough files in that case.
+ I.e., we don't support restarting filters, as that would
+ put too much burden on users specifying --filter commands. */
+ fd = open (files[i_check].of_name,
+ O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
+ }
+
+ if (-1 < fd)
+ break;
+
+ if (!(errno == EMFILE || errno == ENFILE))
+ error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
+
+ file_limit = true;
+
+ /* Search backwards for an open file to close. */
+ while (files[i_reopen].ofd < 0)
+ {
+ i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
+ /* No more open files to close, exit with E[NM]FILE. */
+ if (i_reopen == i_check)
+ error (EXIT_FAILURE, errno, "%s",
+ quotef (files[i_check].of_name));
+ }
+
+ if (fclose (files[i_reopen].ofile) != 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (files[i_reopen].of_name));
+ files[i_reopen].ofile = NULL;
+ files[i_reopen].ofd = OFD_APPEND;
+ }
+
+ files[i_check].ofd = fd;
+ if (!(files[i_check].ofile = fdopen (fd, "a")))
+ error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name));
+ files[i_check].opid = filter_pid;
+ filter_pid = 0;
+ }
+
+ return file_limit;
+}
- n_read = full_read (STDIN_FILENO, buf + n_buffered, n_bytes - n_buffered);
+/* -n r/[K/]N: Divide file into N chunks in round robin fashion.
+ When K == 0, we try to keep the files open in parallel.
+ If we run out of file resources, then we revert
+ to opening and closing each file for each line. */
+
+static void
+lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
+{
+ bool wrapped = false;
+ bool wrote = false;
+ bool file_limit;
+ size_t i_file;
+ of_t *files IF_LINT (= NULL);
+ uintmax_t line_no;
+
+ if (k)
+ line_no = 1;
+ else
+ {
+ if (SIZE_MAX < n)
+ xalloc_die ();
+ files = xnmalloc (n, sizeof *files);
+
+ /* Generate output file names. */
+ for (i_file = 0; i_file < n; i_file++)
+ {
+ next_file_name ();
+ files[i_file].of_name = xstrdup (outfile);
+ files[i_file].ofd = OFD_NEW;
+ files[i_file].ofile = NULL;
+ files[i_file].opid = 0;
+ }
+ i_file = 0;
+ file_limit = false;
+ }
+
+ while (true)
+ {
+ char *bp = buf, *eob;
+ size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
- error (EXIT_FAILURE, errno, "%s", infile);
-
- n_buffered += n_read;
- if (n_buffered != n_bytes)
- eof = true;
-
- /* Find where to end this chunk. */
- bp = buf + n_buffered;
- if (n_buffered == n_bytes)
- {
- while (bp > buf && bp[-1] != '\n')
- bp--;
- }
-
- /* If chunk has no newlines, use all the chunk. */
- if (bp == buf)
- bp = buf + n_buffered;
-
- /* Output the chars as one output file. */
- cwrite (true, buf, bp - buf);
-
- /* Discard the chars we just output; move rest of chunk
- down to be the start of the next chunk. Source and
- destination probably overlap. */
- n_buffered -= bp - buf;
- if (n_buffered > 0)
- memmove (buf, bp, n_buffered);
- }
- while (!eof);
- free (buf);
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ else if (n_read == 0)
+ break; /* eof. */
+ eob = buf + n_read;
+
+ while (bp != eob)
+ {
+ size_t to_write;
+ bool next = false;
+
+ /* Find end of line. */
+ char *bp_out = memchr (bp, eolchar, eob - bp);
+ if (bp_out)
+ {
+ bp_out++;
+ next = true;
+ }
+ else
+ bp_out = eob;
+ to_write = bp_out - bp;
+
+ if (k)
+ {
+ if (line_no == k && unbuffered)
+ {
+ if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
+ error (EXIT_FAILURE, errno, "%s", _("write error"));
+ }
+ else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
+ {
+ clearerr (stdout); /* To silence close_stdout(). */
+ error (EXIT_FAILURE, errno, "%s", _("write error"));
+ }
+ if (next)
+ line_no = (line_no == n) ? 1 : line_no + 1;
+ }
+ else
+ {
+ /* Secure file descriptor. */
+ file_limit |= ofile_open (files, i_file, n);
+ if (unbuffered)
+ {
+ /* Note writing to fd, rather than flushing the FILE gives
+ an 8% performance benefit, due to reduced data copying. */
+ if (full_write (files[i_file].ofd, bp, to_write) != to_write
+ && ! ignorable (errno))
+ {
+ error (EXIT_FAILURE, errno, "%s",
+ quotef (files[i_file].of_name));
+ }
+ }
+ else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1
+ && ! ignorable (errno))
+ {
+ error (EXIT_FAILURE, errno, "%s",
+ quotef (files[i_file].of_name));
+ }
+ if (! ignorable (errno))
+ wrote = true;
+
+ if (file_limit)
+ {
+ if (fclose (files[i_file].ofile) != 0)
+ {
+ error (EXIT_FAILURE, errno, "%s",
+ quotef (files[i_file].of_name));
+ }
+ files[i_file].ofile = NULL;
+ files[i_file].ofd = OFD_APPEND;
+ }
+ if (next && ++i_file == n)
+ {
+ wrapped = true;
+ /* If no filters are accepting input, stop reading. */
+ if (! wrote)
+ goto no_filters;
+ wrote = false;
+ i_file = 0;
+ }
+ }
+
+ bp = bp_out;
+ }
+ }
+
+no_filters:
+ /* Ensure all files created, so that any existing files are truncated,
+ and to signal any waiting fifo consumers.
+ Also, close any open file descriptors.
+ FIXME: Should we do this before EXIT_FAILURE? */
+ if (!k)
+ {
+ int ceiling = (wrapped ? n : i_file);
+ for (i_file = 0; i_file < n; i_file++)
+ {
+ if (i_file >= ceiling && !elide_empty_files)
+ file_limit |= ofile_open (files, i_file, n);
+ if (files[i_file].ofd >= 0)
+ closeout (files[i_file].ofile, files[i_file].ofd,
+ files[i_file].opid, files[i_file].of_name);
+ files[i_file].ofd = OFD_APPEND;
+ }
+ }
+ IF_LINT (free (files));
}
#define FAIL_ONLY_ONE_WAY() \
@@ -372,23 +1208,39 @@ line_bytes_split (size_t n_bytes)
} \
while (0)
+
+/* Parse K/N syntax of chunk options. */
+
+static void
+parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
+{
+ *n_units = xdectoumax (slash + 1, 1, UINTMAX_MAX, "",
+ _("invalid number of chunks"), 0);
+ if (slash != optarg) /* a leading number is specified. */
+ {
+ *slash = '\0';
+ *k_units = xdectoumax (optarg, 1, *n_units, "",
+ _("invalid chunk number"), 0);
+ }
+}
+
+
int
main (int argc, char **argv)
{
- struct stat stat_buf;
- enum
- {
- type_undef, type_bytes, type_byteslines, type_lines, type_digits
- } split_type = type_undef;
- size_t in_blk_size; /* optimal block size of input file device */
- char *buf; /* file i/o buffer */
+ enum Split_type split_type = type_undef;
+ size_t in_blk_size = 0; /* optimal block size of input file device */
size_t page_size = getpagesize ();
- uintmax_t n_units;
+ uintmax_t k_units = 0;
+ uintmax_t n_units = 0;
+
+ static char const multipliers[] = "bEGKkMmPTYZ0";
int c;
int digits_optind = 0;
+ off_t file_size IF_LINT (= 0);
initialize_main (&argc, &argv);
- program_name = argv[0];
+ set_program_name (argv[0]);
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
@@ -397,113 +1249,204 @@ main (int argc, char **argv)
/* Parse command line options. */
- infile = "-";
- outbase = "x";
+ infile = bad_cast ("-");
+ outbase = bad_cast ("x");
- while (1)
+ while (true)
{
/* This is the argv-index of the option we will read next. */
int this_optind = optind ? optind : 1;
+ char *slash;
- c = getopt_long (argc, argv, "0123456789C:a:b:dl:", longopts, NULL);
+ c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:u",
+ longopts, NULL);
if (c == -1)
- break;
+ break;
switch (c)
- {
- case 'a':
- {
- unsigned long tmp;
- if (xstrtoul (optarg, NULL, 10, &tmp, "") != LONGINT_OK
- || SIZE_MAX / sizeof (size_t) < tmp)
- {
- error (0, 0, _("%s: invalid suffix length"), optarg);
- usage (EXIT_FAILURE);
- }
- suffix_length = tmp;
- }
- break;
-
- case 'b':
- if (split_type != type_undef)
- FAIL_ONLY_ONE_WAY ();
- split_type = type_bytes;
- if (xstrtoumax (optarg, NULL, 10, &n_units, "bkm") != LONGINT_OK
- || n_units == 0)
- {
- error (0, 0, _("%s: invalid number of bytes"), optarg);
- usage (EXIT_FAILURE);
- }
- break;
-
- case 'l':
- if (split_type != type_undef)
- FAIL_ONLY_ONE_WAY ();
- split_type = type_lines;
- if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
- || n_units == 0)
- {
- error (0, 0, _("%s: invalid number of lines"), optarg);
- usage (EXIT_FAILURE);
- }
- break;
-
- case 'C':
- if (split_type != type_undef)
- FAIL_ONLY_ONE_WAY ();
- split_type = type_byteslines;
- if (xstrtoumax (optarg, NULL, 10, &n_units, "bkm") != LONGINT_OK
- || n_units == 0 || SIZE_MAX < n_units)
- {
- error (0, 0, _("%s: invalid number of bytes"), optarg);
- usage (EXIT_FAILURE);
- }
- break;
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (split_type == type_undef)
- {
- split_type = type_digits;
- n_units = 0;
- }
- if (split_type != type_undef && split_type != type_digits)
- FAIL_ONLY_ONE_WAY ();
- if (digits_optind != 0 && digits_optind != this_optind)
- n_units = 0; /* More than one number given; ignore other. */
- digits_optind = this_optind;
- if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
- {
- char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
- error (EXIT_FAILURE, 0,
- _("line count option -%s%c... is too large"),
- umaxtostr (n_units, buffer), c);
- }
- break;
-
- case 'd':
- suffix_alphabet = "0123456789";
- break;
-
- case VERBOSE_OPTION:
- verbose = true;
- break;
-
- case_GETOPT_HELP_CHAR;
-
- case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
-
- default:
- usage (EXIT_FAILURE);
- }
+ {
+ case 'a':
+ suffix_length = xdectoumax (optarg, 0, SIZE_MAX / sizeof (size_t),
+ "", _("invalid suffix length"), 0);
+ break;
+
+ case ADDITIONAL_SUFFIX_OPTION:
+ if (last_component (optarg) != optarg)
+ {
+ error (0, 0,
+ _("invalid suffix %s, contains directory separator"),
+ quote (optarg));
+ usage (EXIT_FAILURE);
+ }
+ additional_suffix = optarg;
+ break;
+
+ case 'b':
+ if (split_type != type_undef)
+ FAIL_ONLY_ONE_WAY ();
+ split_type = type_bytes;
+ /* Limit to OFF_T_MAX, because if input is a pipe, we could get more
+ data than is possible to write to a single file, so indicate that
+ immediately rather than having possibly future invocations fail. */
+ n_units = xdectoumax (optarg, 1, OFF_T_MAX, multipliers,
+ _("invalid number of bytes"), 0);
+ break;
+
+ case 'l':
+ if (split_type != type_undef)
+ FAIL_ONLY_ONE_WAY ();
+ split_type = type_lines;
+ n_units = xdectoumax (optarg, 1, UINTMAX_MAX, "",
+ _("invalid number of lines"), 0);
+ break;
+
+ case 'C':
+ if (split_type != type_undef)
+ FAIL_ONLY_ONE_WAY ();
+ split_type = type_byteslines;
+ n_units = xdectoumax (optarg, 1, MIN (SIZE_MAX, OFF_T_MAX),
+ multipliers, _("invalid number of bytes"), 0);
+ break;
+
+ case 'n':
+ if (split_type != type_undef)
+ FAIL_ONLY_ONE_WAY ();
+ /* skip any whitespace */
+ while (isspace (to_uchar (*optarg)))
+ optarg++;
+ if (STRNCMP_LIT (optarg, "r/") == 0)
+ {
+ split_type = type_rr;
+ optarg += 2;
+ }
+ else if (STRNCMP_LIT (optarg, "l/") == 0)
+ {
+ split_type = type_chunk_lines;
+ optarg += 2;
+ }
+ else
+ split_type = type_chunk_bytes;
+ if ((slash = strchr (optarg, '/')))
+ parse_chunk (&k_units, &n_units, slash);
+ else
+ n_units = xdectoumax (optarg, 1, UINTMAX_MAX, "",
+ _("invalid number of chunks"), 0);
+ break;
+
+ case 'u':
+ unbuffered = true;
+ break;
+
+ case 't':
+ {
+ char neweol = optarg[0];
+ if (! neweol)
+ error (EXIT_FAILURE, 0, _("empty record separator"));
+ if (optarg[1])
+ {
+ if (STREQ (optarg, "\\0"))
+ neweol = '\0';
+ else
+ {
+ /* Provoke with 'split -txx'. Complain about
+ "multi-character tab" instead of "multibyte tab", so
+ that the diagnostic's wording does not need to be
+ changed once multibyte characters are supported. */
+ error (EXIT_FAILURE, 0, _("multi-character separator %s"),
+ quote (optarg));
+ }
+ }
+ /* Make it explicit we don't support multiple separators. */
+ if (0 <= eolchar && neweol != eolchar)
+ {
+ error (EXIT_FAILURE, 0,
+ _("multiple separator characters specified"));
+ }
+
+ eolchar = neweol;
+ }
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (split_type == type_undef)
+ {
+ split_type = type_digits;
+ n_units = 0;
+ }
+ if (split_type != type_undef && split_type != type_digits)
+ FAIL_ONLY_ONE_WAY ();
+ if (digits_optind != 0 && digits_optind != this_optind)
+ n_units = 0; /* More than one number given; ignore other. */
+ digits_optind = this_optind;
+ if (!DECIMAL_DIGIT_ACCUMULATE (n_units, c - '0', uintmax_t))
+ {
+ char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
+ error (EXIT_FAILURE, 0,
+ _("line count option -%s%c... is too large"),
+ umaxtostr (n_units, buffer), c);
+ }
+ break;
+
+ case 'd':
+ suffix_alphabet = "0123456789";
+ if (optarg)
+ {
+ if (strlen (optarg) != strspn (optarg, suffix_alphabet))
+ {
+ error (0, 0,
+ _("%s: invalid start value for numerical suffix"),
+ quote (optarg));
+ usage (EXIT_FAILURE);
+ }
+ else
+ {
+ /* Skip any leading zero. */
+ while (*optarg == '0' && *(optarg + 1) != '\0')
+ optarg++;
+ numeric_suffix_start = optarg;
+ }
+ }
+ break;
+
+ case 'e':
+ elide_empty_files = true;
+ break;
+
+ case FILTER_OPTION:
+ filter_command = optarg;
+ break;
+
+ case IO_BLKSIZE_OPTION:
+ in_blk_size = xdectoumax (optarg, 1, SIZE_MAX - page_size,
+ multipliers, _("invalid IO block size"), 0);
+ break;
+
+ case VERBOSE_OPTION:
+ verbose = true;
+ break;
+
+ case_GETOPT_HELP_CHAR;
+
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+ default:
+ usage (EXIT_FAILURE);
+ }
+ }
+
+ if (k_units != 0 && filter_command)
+ {
+ error (0, 0, _("--filter does not process a chunk extracted to stdout"));
+ usage (EXIT_FAILURE);
}
/* Handle default case. */
@@ -515,10 +1458,15 @@ main (int argc, char **argv)
if (n_units == 0)
{
- error (0, 0, _("invalid number of lines: 0"));
+ error (0, 0, "%s: %s", _("invalid number of lines"), quote ("0"));
usage (EXIT_FAILURE);
}
+ if (eolchar < 0)
+ eolchar = '\n';
+
+ set_suffix_length (n_units, split_type);
+
/* Get out the filename arguments. */
if (optind < argc)
@@ -533,26 +1481,87 @@ main (int argc, char **argv)
usage (EXIT_FAILURE);
}
+ /* Check that the suffix length is large enough for the numerical
+ suffix start value. */
+ if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length)
+ {
+ error (0, 0, _("numerical suffix start value is too large "
+ "for the suffix length"));
+ usage (EXIT_FAILURE);
+ }
+
/* Open the input file. */
if (! STREQ (infile, "-")
&& fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0)
error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
- quote (infile));
+ quoteaf (infile));
- /* Binary I/O is safer when bytecounts are used. */
+ /* Binary I/O is safer when byte counts are used. */
if (O_BINARY && ! isatty (STDIN_FILENO))
- freopen (NULL, "rb", stdin);
-
- /* No output file is open now. */
- output_desc = -1;
+ xfreopen (NULL, "rb", stdin);
/* Get the optimal block size of input device and make a buffer. */
- if (fstat (STDIN_FILENO, &stat_buf) != 0)
- error (EXIT_FAILURE, errno, "%s", infile);
- in_blk_size = ST_BLKSIZE (stat_buf);
+ if (fstat (STDIN_FILENO, &in_stat_buf) != 0)
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+
+ bool specified_buf_size = !! in_blk_size;
+ if (! specified_buf_size)
+ in_blk_size = io_blksize (in_stat_buf);
+
+ void *b = xmalloc (in_blk_size + 1 + page_size - 1);
+ char *buf = ptr_align (b, page_size);
+ size_t initial_read = SIZE_MAX;
- buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);
+ if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
+ {
+ off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
+ if (0 <= input_offset)
+ {
+ if (usable_st_size (&in_stat_buf) && ! specified_buf_size)
+ {
+ assert (ST_BLKSIZE (in_stat_buf) <= in_blk_size);
+ file_size = input_file_size (STDIN_FILENO, in_stat_buf.st_size,
+ buf, in_blk_size);
+ if (file_size < in_blk_size)
+ initial_read = file_size;
+ }
+ else
+ {
+ file_size = lseek (STDIN_FILENO, 0, SEEK_END);
+ input_offset = (file_size < 0
+ ? file_size
+ : lseek (STDIN_FILENO, input_offset, SEEK_SET));
+ file_size -= input_offset;
+ }
+ }
+ if (input_offset < 0)
+ error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
+ quotef (infile));
+ /* Overflow, and sanity checking. */
+ if (OFF_T_MAX < n_units)
+ {
+ char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
+ error (EXIT_FAILURE, EOVERFLOW, "%s: %s",
+ _("invalid number of chunks"),
+ quote (umaxtostr (n_units, buffer)));
+ }
+ /* increase file_size to n_units here, so that we still process
+ any input data, and create empty files for the rest. */
+ file_size = MAX (file_size, n_units);
+ }
+
+ /* When filtering, closure of one pipe must not terminate the process,
+ as there may still be other streams expecting input from us. */
+ if (filter_command)
+ {
+ struct sigaction act;
+ sigemptyset (&newblocked);
+ sigaction (SIGPIPE, NULL, &act);
+ if (act.sa_handler != SIG_IGN)
+ sigaddset (&newblocked, SIGPIPE);
+ sigprocmask (SIG_BLOCK, &newblocked, &oldblocked);
+ }
switch (split_type)
{
@@ -562,21 +1571,42 @@ main (int argc, char **argv)
break;
case type_bytes:
- bytes_split (n_units, buf, in_blk_size);
+ bytes_split (n_units, buf, in_blk_size, SIZE_MAX, 0);
break;
case type_byteslines:
- line_bytes_split (n_units);
+ line_bytes_split (n_units, buf, in_blk_size);
+ break;
+
+ case type_chunk_bytes:
+ if (k_units == 0)
+ bytes_split (file_size / n_units, buf, in_blk_size, initial_read,
+ n_units);
+ else
+ bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
+ file_size);
+ break;
+
+ case type_chunk_lines:
+ lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
+ file_size);
+ break;
+
+ case type_rr:
+ /* Note, this is like 'sed -n ${k}~${n}p' when k > 0,
+ but the functionality is provided for symmetry. */
+ lines_rr (k_units, n_units, buf, in_blk_size);
break;
default:
abort ();
}
+ IF_LINT (free (b));
+
if (close (STDIN_FILENO) != 0)
- error (EXIT_FAILURE, errno, "%s", infile);
- if (output_desc >= 0 && close (output_desc) < 0)
- error (EXIT_FAILURE, errno, "%s", outfile);
+ error (EXIT_FAILURE, errno, "%s", quotef (infile));
+ closeout (NULL, output_desc, filter_pid, outfile);
- exit (EXIT_SUCCESS);
+ return EXIT_SUCCESS;
}