diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
commit | 70e9163c9c18e995515598085cb824e554eb7ae7 (patch) | |
tree | a42dc8b2a6c031354bf31472de888bfc8a060132 /src/fmt.c | |
parent | cbf5993c43f49281173f185863577d86bfac6eae (diff) | |
download | coreutils-tarball-master.tar.gz |
coreutils-8.25HEADcoreutils-8.25master
Diffstat (limited to 'src/fmt.c')
-rw-r--r-- | src/fmt.c | 392 |
1 files changed, 203 insertions, 189 deletions
@@ -1,10 +1,10 @@ /* GNU fmt -- simple text formatter. - Copyright (C) 1994-2006 Free Software Foundation, Inc. + Copyright (C) 1994-2016 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -12,8 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* Written by Ross Paterson <rap@doc.ic.ac.uk>. */ @@ -21,6 +20,7 @@ #include <stdio.h> #include <sys/types.h> #include <getopt.h> +#include <assert.h> /* Redefine. Otherwise, systems (Unicos for one) with headers that define it to be a type get syntax errors for the variable declaration below. */ @@ -28,13 +28,13 @@ #include "system.h" #include "error.h" -#include "quote.h" -#include "xstrtol.h" +#include "fadvise.h" +#include "xdectoint.h" -/* The official name of this program (e.g., no `g' prefix). */ +/* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "fmt" -#define AUTHORS "Ross Paterson" +#define AUTHORS proper_name ("Ross Paterson") /* The following parameters represent the program's idea of what is "best". Adjust to taste, subject to the caveats given. */ @@ -68,7 +68,7 @@ typedef long int COST; #define SQR(n) ((n) * (n)) #define EQUIV(n) SQR ((COST) (n)) -/* Cost of a filled line n chars longer or shorter than best_width. */ +/* Cost of a filled line n chars longer or shorter than goal_width. */ #define SHORT_COST(n) EQUIV ((n) * 10) /* Cost of the difference between adjacent filled lines. */ @@ -116,7 +116,7 @@ typedef long int COST; /* Extra ctype(3)-style macros. */ -#define isopen(c) (strchr ("([`'\"", c) != NULL) +#define isopen(c) (strchr ("(['`\"", c) != NULL) #define isclose(c) (strchr (")]'\"", c) != NULL) #define isperiod(c) (strchr (".?!", c) != NULL) @@ -168,9 +168,6 @@ static void put_line (WORD *w, int indent); static void put_word (WORD *w); static void put_space (int space); -/* The name this program was run with. */ -const char *program_name; - /* Option values. */ /* If true, first 2 lines may have different indent (default false). */ @@ -204,7 +201,7 @@ static int prefix_lead_space; static int prefix_length; /* The preferred width of text lines, set to LEEWAY % less than max_width. */ -static int best_width; +static int goal_width; /* Dynamic variables. */ @@ -266,45 +263,43 @@ void usage (int status) { if (status != EXIT_SUCCESS) - fprintf (stderr, _("Try `%s --help' for more information.\n"), - program_name); + emit_try_help (); else { - printf (_("Usage: %s [-DIGITS] [OPTION]... [FILE]...\n"), program_name); + printf (_("Usage: %s [-WIDTH] [OPTION]... [FILE]...\n"), program_name); fputs (_("\ Reformat each paragraph in the FILE(s), writing to standard output.\n\ -If no FILE or if FILE is `-', read standard input.\n\ -\n\ -"), stdout); - fputs (_("\ -Mandatory arguments to long options are mandatory for short options too.\n\ +The option -WIDTH is an abbreviated form of --width=DIGITS.\n\ "), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + fputs (_("\ -c, --crown-margin preserve indentation of first two lines\n\ -p, --prefix=STRING reformat only lines beginning with STRING,\n\ reattaching the prefix to reformatted lines\n\ -s, --split-only split long lines, but do not refill\n\ "), - stdout); + stdout); + /* Tell xgettext that the "% o" below is not a printf-style + format string: xgettext:no-c-format */ fputs (_("\ -t, --tagged-paragraph indentation of first line different from second\n\ -u, --uniform-spacing one space between words, two after sentences\n\ -w, --width=WIDTH maximum line width (default of 75 columns)\n\ + -g, --goal=WIDTH goal width (default of 93% of width)\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); - fputs (_("\ -\n\ -With no FILE, or when FILE is -, read standard input.\n"), - stdout); - printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); + emit_ancillary_info (PROGRAM_NAME); } exit (status); } /* Decode options and launch execution. */ -static const struct option long_options[] = +static struct option const long_options[] = { {"crown-margin", no_argument, NULL, 'c'}, {"prefix", required_argument, NULL, 'p'}, @@ -312,6 +307,7 @@ static const struct option long_options[] = {"tagged-paragraph", no_argument, NULL, 't'}, {"uniform-spacing", no_argument, NULL, 'u'}, {"width", required_argument, NULL, 'w'}, + {"goal", required_argument, NULL, 'g'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0}, @@ -323,9 +319,10 @@ main (int argc, char **argv) int optchar; bool ok = true; char const *max_width_option = NULL; + char const *goal_width_option = NULL; initialize_main (&argc, &argv); - program_name = argv[0]; + set_program_name (argv[0]); setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); @@ -348,41 +345,45 @@ main (int argc, char **argv) argc--; } - while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:", - long_options, NULL)) - != -1) + while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:g:", + long_options, NULL)) + != -1) switch (optchar) { default: - if (ISDIGIT (optchar)) - error (0, 0, _("invalid option -- %c; -WIDTH is recognized\ + if (ISDIGIT (optchar)) + error (0, 0, _("invalid option -- %c; -WIDTH is recognized\ only when it is the first\noption; use -w N instead"), - optchar); - usage (EXIT_FAILURE); + optchar); + usage (EXIT_FAILURE); case 'c': - crown = true; - break; + crown = true; + break; case 's': - split = true; - break; + split = true; + break; case 't': - tagged = true; - break; + tagged = true; + break; case 'u': - uniform = true; - break; + uniform = true; + break; case 'w': - max_width_option = optarg; - break; + max_width_option = optarg; + break; + + case 'g': + goal_width_option = optarg; + break; case 'p': - set_prefix (optarg); - break; + set_prefix (optarg); + break; case_GETOPT_HELP_CHAR; @@ -393,50 +394,57 @@ main (int argc, char **argv) if (max_width_option) { /* Limit max_width to MAXCHARS / 2; otherwise, the resulting - output can be quite ugly. */ - unsigned long int tmp; - if (! (xstrtoul (max_width_option, NULL, 10, &tmp, "") == LONGINT_OK - && tmp <= MAXCHARS / 2)) - error (EXIT_FAILURE, 0, _("invalid width: %s"), - quote (max_width_option)); - max_width = tmp; + output can be quite ugly. */ + max_width = xdectoumax (max_width_option, 0, MAXCHARS / 2, "", + _("invalid width"), 0); } - best_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; + if (goal_width_option) + { + /* Limit goal_width to max_width. */ + goal_width = xdectoumax (goal_width_option, 0, max_width, "", + _("invalid width"), 0); + if (max_width_option == NULL) + max_width = goal_width + 10; + } + else + { + goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; + } if (optind == argc) fmt (stdin); else { for (; optind < argc; optind++) - { - char *file = argv[optind]; - if (STREQ (file, "-")) - fmt (stdin); - else - { - FILE *in_stream; - in_stream = fopen (file, "r"); - if (in_stream != NULL) - { - fmt (in_stream); - if (fclose (in_stream) == EOF) - { - error (0, errno, "%s", file); - ok = false; - } - } - else - { - error (0, errno, _("cannot open %s for reading"), - quote (file)); - ok = false; - } - } - } + { + char *file = argv[optind]; + if (STREQ (file, "-")) + fmt (stdin); + else + { + FILE *in_stream; + in_stream = fopen (file, "r"); + if (in_stream != NULL) + { + fmt (in_stream); + if (fclose (in_stream) == EOF) + { + error (0, errno, "%s", quotef (file)); + ok = false; + } + } + else + { + error (0, errno, _("cannot open %s for reading"), + quoteaf (file)); + ok = false; + } + } + } } - exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); + return ok ? EXIT_SUCCESS : EXIT_FAILURE; } /* Trim space from the front and back of the string P, yielding the prefix, @@ -467,6 +475,7 @@ set_prefix (char *p) static void fmt (FILE *f) { + fadvise (f, FADVISE_SEQUENTIAL); tabs = false; other_indent = 0; next_char = get_prefix (f); @@ -477,7 +486,7 @@ fmt (FILE *f) } } -/* Set the global variable `other_indent' according to SAME_PARAGRAPH +/* Set the global variable 'other_indent' according to SAME_PARAGRAPH and other global variables. */ static void @@ -492,9 +501,9 @@ set_other_indent (bool same_paragraph) else if (tagged) { if (same_paragraph && in_column != first_indent) - { - other_indent = in_column; - } + { + other_indent = in_column; + } /* Only one line: use the secondary indent from last time if it splits, or 0 if there have been no multi-line paragraphs in the @@ -502,7 +511,7 @@ set_other_indent (bool same_paragraph) pick a new secondary indent. */ else if (other_indent == first_indent) - other_indent = first_indent == 0 ? DEF_INDENT : 0; + other_indent = first_indent == 0 ? DEF_INDENT : 0; } else { @@ -536,15 +545,15 @@ get_paragraph (FILE *f) /* Scan (and copy) blank lines, and lines not introduced by the prefix. */ while (c == '\n' || c == EOF - || next_prefix_indent < prefix_lead_space - || in_column < next_prefix_indent + prefix_full_length) + || next_prefix_indent < prefix_lead_space + || in_column < next_prefix_indent + prefix_full_length) { c = copy_rest (f, c); if (c == EOF) - { - next_char = EOF; - return false; - } + { + next_char = EOF; + return false; + } putchar ('\n'); c = get_prefix (f); } @@ -567,30 +576,35 @@ get_paragraph (FILE *f) else if (crown) { if (same_para (c)) - { - do - { /* for each line till the end of the para */ - c = get_line (f, c); - } - while (same_para (c) && in_column == other_indent); - } + { + do + { /* for each line till the end of the para */ + c = get_line (f, c); + } + while (same_para (c) && in_column == other_indent); + } } else if (tagged) { if (same_para (c) && in_column != first_indent) - { - do - { /* for each line till the end of the para */ - c = get_line (f, c); - } - while (same_para (c) && in_column == other_indent); - } + { + do + { /* for each line till the end of the para */ + c = get_line (f, c); + } + while (same_para (c) && in_column == other_indent); + } } else { while (same_para (c) && in_column == other_indent) - c = get_line (f, c); + c = get_line (f, c); } + + /* Tell static analysis tools that using word_limit[-1] is ok. + word_limit is guaranteed to have been incremented by get_line. */ + assert (word < word_limit); + (word_limit - 1)->period = (word_limit - 1)->final = true; next_char = c; return true; @@ -611,11 +625,11 @@ copy_rest (FILE *f, int c) { put_space (next_prefix_indent); for (s = prefix; out_column != in_column && *s; out_column++) - putchar (*s++); + putchar (*s++); if (c != EOF && c != '\n') - put_space (in_column - out_column); + put_space (in_column - out_column); if (c == EOF && in_column >= next_prefix_indent + prefix_length) - putchar ('\n'); + putchar ('\n'); } while (c != '\n' && c != EOF) { @@ -633,8 +647,8 @@ static bool same_para (int c) { return (next_prefix_indent == prefix_indent - && in_column >= next_prefix_indent + prefix_full_length - && c != '\n' && c != EOF); + && in_column >= next_prefix_indent + prefix_full_length + && c != '\n' && c != EOF); } /* Read a line from input file F, given first non-blank character C @@ -662,15 +676,15 @@ get_line (FILE *f, int c) word_limit->text = wptr; do - { - if (wptr == end_of_parabuf) - { - set_other_indent (true); - flush_paragraph (); - } - *wptr++ = c; - c = getc (f); - } + { + if (wptr == end_of_parabuf) + { + set_other_indent (true); + flush_paragraph (); + } + *wptr++ = c; + c = getc (f); + } while (c != EOF && !isspace (c)); in_column += word_limit->length = wptr - word_limit->text; check_punctuation (word_limit); @@ -681,15 +695,15 @@ get_line (FILE *f, int c) c = get_space (f, c); word_limit->space = in_column - start; word_limit->final = (c == EOF - || (word_limit->period - && (c == '\n' || word_limit->space > 1))); + || (word_limit->period + && (c == '\n' || word_limit->space > 1))); if (c == '\n' || c == EOF || uniform) - word_limit->space = word_limit->final ? 2 : 1; + word_limit->space = word_limit->final ? 2 : 1; if (word_limit == end_of_word) - { - set_other_indent (true); - flush_paragraph (); - } + { + set_other_indent (true); + flush_paragraph (); + } word_limit++; } while (c != '\n' && c != EOF); @@ -714,13 +728,13 @@ get_prefix (FILE *f) const char *p; next_prefix_indent = in_column; for (p = prefix; *p != '\0'; p++) - { - unsigned char pc = *p; - if (c != pc) - return c; - in_column++; - c = getc (f); - } + { + unsigned char pc = *p; + if (c != pc) + return c; + in_column++; + c = getc (f); + } c = get_space (f, c); } return c; @@ -732,17 +746,17 @@ get_prefix (FILE *f) static int get_space (FILE *f, int c) { - for (;;) + while (true) { if (c == ' ') - in_column++; + in_column++; else if (c == '\t') - { - tabs = true; - in_column = (in_column / TABWIDTH + 1) * TABWIDTH; - } + { + tabs = true; + in_column = (in_column / TABWIDTH + 1) * TABWIDTH; + } else - return c; + return c; c = getc (f); } } @@ -798,12 +812,12 @@ flush_paragraph (void) for (w = word->next_break; w != word_limit; w = w->next_break) { if (w->best_cost - w->next_break->best_cost < best_break) - { - split_point = w; - best_break = w->best_cost - w->next_break->best_cost; - } + { + split_point = w; + best_break = w->best_cost - w->next_break->best_cost; + } if (best_break <= MAXCOST - LINE_CREDIT) - best_break += LINE_CREDIT; + best_break += LINE_CREDIT; } put_paragraph (split_point); @@ -852,30 +866,30 @@ fmt_paragraph (void) w = start; len += w->length; do - { - w++; - - /* Consider breaking before w. */ - - wcost = line_cost (w, len) + w->best_cost; - if (start == word && last_line_length > 0) - wcost += RAGGED_COST (len - last_line_length); - if (wcost < best) - { - best = wcost; - start->next_break = w; - start->line_length = len; - } - - /* This is a kludge to keep us from computing `len' as the - sum of the sentinel length and some non-zero number. - Since the sentinel w->length may be INT_MAX, adding - to that would give a negative result. */ - if (w == word_limit) - break; - - len += (w - 1)->space + w->length; /* w > start >= word */ - } + { + w++; + + /* Consider breaking before w. */ + + wcost = line_cost (w, len) + w->best_cost; + if (start == word && last_line_length > 0) + wcost += RAGGED_COST (len - last_line_length); + if (wcost < best) + { + best = wcost; + start->next_break = w; + start->line_length = len; + } + + /* This is a kludge to keep us from computing 'len' as the + sum of the sentinel length and some non-zero number. + Since the sentinel w->length may be INT_MAX, adding + to that would give a negative result. */ + if (w == word_limit) + break; + + len += (w - 1)->space + w->length; /* w > start >= word */ + } while (len < max_width); start->best_cost = best + base_cost (start); } @@ -896,16 +910,16 @@ base_cost (WORD *this) if (this > word) { if ((this - 1)->period) - { - if ((this - 1)->final) - cost -= SENTENCE_BONUS; - else - cost += NOBREAK_COST; - } + { + if ((this - 1)->final) + cost -= SENTENCE_BONUS; + else + cost += NOBREAK_COST; + } else if ((this - 1)->punct) - cost -= PUNCT_BONUS; + cost -= PUNCT_BONUS; else if (this > word + 1 && (this - 2)->final) - cost += WIDOW_COST ((this - 1)->length); + cost += WIDOW_COST ((this - 1)->length); } if (this->paren) @@ -927,7 +941,7 @@ line_cost (WORD *next, int len) if (next == word_limit) return 0; - n = best_width - len; + n = goal_width - len; cost = SHORT_COST (n); if (next->next_break != word_limit) { @@ -1001,11 +1015,11 @@ put_space (int space) { tab_target = space_target / TABWIDTH * TABWIDTH; if (out_column + 1 < tab_target) - while (out_column < tab_target) - { - putchar ('\t'); - out_column = (out_column / TABWIDTH + 1) * TABWIDTH; - } + while (out_column < tab_target) + { + putchar ('\t'); + out_column = (out_column / TABWIDTH + 1) * TABWIDTH; + } } while (out_column < space_target) { |