From 86a40dd6acc84e5633d2fc6b45f9193fe01b23f8 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Sun, 14 Aug 2011 14:37:01 -0700 Subject: diff, sdiff: new option --ignore-trailing-space (-Z) Derived from Roland McGrath's patch (dated June 2004!) in: http://lists.gnu.org/archive/html/bug-gnu-utils/2004-07/msg00000.html * NEWS: * doc/diffutils.texi (White Space, Blank Lines) (sdiff Option Summary, diff Options, sdiff Options): Document -Z. * src/diff.h (IGNORE_TRAILING_SPACE) (IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE): New constants, for -Z. * src/diff.c (shortopts, longopts, main, option_help_msgid): * src/sdiff.c (longopts, option_help_msgid, main): * src/io.c (find_and_hash_each_line): * src/util.c (lines_differ, analyze_hunk): Support -Z. --- NEWS | 4 ++ doc/diffutils.texi | 23 +++++++--- src/diff.c | 13 ++++-- src/diff.h | 8 ++++ src/io.c | 122 ++++++++++++++++++++++++++++++++++------------------- src/sdiff.c | 8 +++- src/util.c | 54 +++++++++++++++++++++--- 7 files changed, 171 insertions(+), 61 deletions(-) diff --git a/NEWS b/NEWS index 72f6395..2ef515f 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,10 @@ GNU diffutils NEWS -*- outline -*- --ignore-file-name-case now applies at the top level too. For example, "diff dir inIt" might compare "dir/Init" to "inIt". +** New features + + diff and sdiff have a new option --ignore-trailing-space (-Z). + * Noteworthy changes in release 3.1 (2011-08-10) [stable] ** Bug fixes diff --git a/doc/diffutils.texi b/doc/diffutils.texi index f29e9a1..222e9b7 100644 --- a/doc/diffutils.texi +++ b/doc/diffutils.texi @@ -289,7 +289,11 @@ The @option{--ignore-tab-expansion} (@option{-E}) option ignores the distinction between tabs and spaces on input. A tab is considered to be equivalent to the number of spaces to the next tab stop (@pxref{Tabs}). -The @option{--ignore-space-change} (@option{-b}) option is stronger. +The @option{--ignore-trailing-space} (@option{-Z}) option ignores white +space at line end. + +The @option{--ignore-space-change} (@option{-b}) option is stronger than +@option{-E} and @option{-Z} combined. It ignores white space at line end, and considers all other sequences of one or more white space characters within a line to be equivalent. With this option, @command{diff} considers the following two lines to be equivalent, @@ -344,12 +348,11 @@ is considered identical to a file containing @end example Normally this option affects only lines that are completely empty, but -if you also specify the @option{--ignore-space-change} (@option{-b}) -option, or the @option{--ignore-all-space} (@option{-w}) option, +if you also specify an option that ignores trailing spaces, lines are also affected if they look empty but contain white space. In other words, @option{-B} is equivalent to @samp{-I '^$'} by default, but it is equivalent to @option{-I '^[[:space:]]*$'} if -@option{-b} or @option{-w} is also specified. +@option{-b}, @option{-w} or @option{-Z} is also specified. @node Specified Lines @section Suppressing Differences Whose Lines All Match a Regular Expression @@ -2446,12 +2449,12 @@ The following @command{sdiff} options have the same meaning as for @example -a -b -d -i -t -v --B -E -I @var{regexp} +-B -E -I @var{regexp} -Z --expand-tabs --ignore-blank-lines --ignore-case --ignore-matching-lines=@var{regexp} --ignore-space-change ---ignore-tab-expansion +--ignore-tab-expansion --ignore-trailing-space --left-column --minimal --speed-large-files --strip-trailing-cr --suppress-common-lines --tabsize=@var{columns} --text --version --width=@var{columns} @@ -3966,6 +3969,10 @@ match any pattern contained in @var{file}. @xref{Comparing Directories}. @item -y @itemx --side-by-side Use the side by side output format. @xref{Side by Side Format}. + +@item -Z +@itemx --ignore-trailing-space +Ignore white space at line end. @xref{White Space}. @end table @node Invoking diff3 @@ -4446,6 +4453,10 @@ option is @option{-W} in @command{diff}, @option{-w} in @command{sdiff}. Ignore white space when comparing lines. @xref{White Space}. Note that for historical reasons, this option is @option{-w} in @command{diff}, @option{-W} in @command{sdiff}. + +@item -Z +@itemx --ignore-trailing-space +Ignore white space at line end. @xref{White Space}. @end table @node Standards conformance diff --git a/src/diff.c b/src/diff.c index 1e334b7..c096406 100644 --- a/src/diff.c +++ b/src/diff.c @@ -107,7 +107,7 @@ static bool unidirectional_new_file; static bool report_identical_files; static char const shortopts[] = -"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y"; +"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ"; /* Values for long options that do not have single-letter equivalents. */ enum @@ -178,6 +178,7 @@ static struct option const longopts[] = {"ignore-matching-lines", 1, 0, 'I'}, {"ignore-space-change", 0, 0, 'b'}, {"ignore-tab-expansion", 0, 0, 'E'}, + {"ignore-trailing-space", 0, 0, 'Z'}, {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, {"initial-tab", 0, 0, 'T'}, {"label", 1, 0, 'L'}, @@ -320,6 +321,11 @@ main (int argc, char **argv) ignore_white_space = IGNORE_SPACE_CHANGE; break; + case 'Z': + if (ignore_white_space < IGNORE_SPACE_CHANGE) + ignore_white_space |= IGNORE_TRAILING_SPACE; + break; + case 'B': ignore_blank_lines = true; break; @@ -381,8 +387,8 @@ main (int argc, char **argv) break; case 'E': - if (ignore_white_space < IGNORE_TAB_EXPANSION) - ignore_white_space = IGNORE_TAB_EXPANSION; + if (ignore_white_space < IGNORE_SPACE_CHANGE) + ignore_white_space |= IGNORE_TAB_EXPANSION; break; case 'f': @@ -880,6 +886,7 @@ static char const * const option_help_msgid[] = { "", N_("-i, --ignore-case ignore case differences in file contents"), N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"), + N_("-Z, --ignore-trailing-space ignore white space at line end"), N_("-b, --ignore-space-change ignore changes in the amount of white space"), N_("-w, --ignore-all-space ignore all white space"), N_("-B, --ignore-blank-lines ignore changes whose lines are all blank"), diff --git a/src/diff.h b/src/diff.h index b1c90c7..b44a157 100644 --- a/src/diff.h +++ b/src/diff.h @@ -106,6 +106,14 @@ enum DIFF_white_space /* Ignore changes due to tab expansion (-E). */ IGNORE_TAB_EXPANSION, + /* Ignore changes in trailing horizontal white space (-Z). */ + IGNORE_TRAILING_SPACE, + + /* IGNORE_TAB_EXPANSION and IGNORE_TRAILING_SPACE are a special case + because they are independent and can be ORed together, yielding + IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE. */ + IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE, + /* Ignore changes in horizontal white space (-b). */ IGNORE_SPACE_CHANGE, diff --git a/src/io.c b/src/io.c index 9e3e1ee..fdb4654 100644 --- a/src/io.c +++ b/src/io.c @@ -255,36 +255,53 @@ find_and_hash_each_line (struct file_data *current) break; case IGNORE_TAB_EXPANSION: + case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE: + case IGNORE_TRAILING_SPACE: { size_t column = 0; while ((c = *p++) != '\n') { - size_t repetitions = 1; - - switch (c) + if (ignore_white_space & IGNORE_TRAILING_SPACE + && isspace (c)) { - case '\b': - column -= 0 < column; - break; - - case '\t': - c = ' '; - repetitions = tabsize - column % tabsize; - column = (column + repetitions < column - ? 0 - : column + repetitions); - break; - - case '\r': - column = 0; - break; - - default: - c = tolower (c); - column++; - break; + char const *p1 = p; + unsigned char c1; + do + if ((c1 = *p1++) == '\n') + { + p = p1; + goto hashing_done; + } + while (isspace (c1)); } + size_t repetitions = 1; + + if (ignore_white_space & IGNORE_TAB_EXPANSION) + switch (c) + { + case '\b': + column -= 0 < column; + break; + + case '\t': + c = ' '; + repetitions = tabsize - column % tabsize; + column = (column + repetitions < column + ? 0 + : column + repetitions); + break; + + case '\r': + column = 0; + break; + + default: + column++; + break; + } + + c = tolower (c); do h = HASH (h, c); while (--repetitions != 0); @@ -325,35 +342,52 @@ find_and_hash_each_line (struct file_data *current) break; case IGNORE_TAB_EXPANSION: + case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE: + case IGNORE_TRAILING_SPACE: { size_t column = 0; while ((c = *p++) != '\n') { size_t repetitions = 1; - switch (c) + if (ignore_white_space & IGNORE_TRAILING_SPACE + && isspace (c)) { - case '\b': - column -= 0 < column; - break; - - case '\t': - c = ' '; - repetitions = tabsize - column % tabsize; - column = (column + repetitions < column - ? 0 - : column + repetitions); - break; - - case '\r': - column = 0; - break; - - default: - column++; - break; + char const *p1 = p; + unsigned char c1; + do + if ((c1 = *p1++) == '\n') + { + p = p1; + goto hashing_done; + } + while (isspace (c1)); } + if (ignore_white_space & IGNORE_TAB_EXPANSION) + switch (c) + { + case '\b': + column -= 0 < column; + break; + + case '\t': + c = ' '; + repetitions = tabsize - column % tabsize; + column = (column + repetitions < column + ? 0 + : column + repetitions); + break; + + case '\r': + column = 0; + break; + + default: + column++; + break; + } + do h = HASH (h, c); while (--repetitions != 0); @@ -381,7 +415,7 @@ find_and_hash_each_line (struct file_data *current) complete line, put it into buckets[-1] so that it can compare equal only to the other file's incomplete line (if one exists). */ - if (ignore_white_space < IGNORE_SPACE_CHANGE) + if (ignore_white_space < IGNORE_TRAILING_SPACE) bucket = &buckets[-1]; } diff --git a/src/sdiff.c b/src/sdiff.c index 6d2e72d..e0b29f0 100644 --- a/src/sdiff.c +++ b/src/sdiff.c @@ -132,6 +132,7 @@ static struct option const longopts[] = {"ignore-matching-lines", 1, 0, 'I'}, {"ignore-space-change", 0, 0, 'b'}, {"ignore-tab-expansion", 0, 0, 'E'}, + {"ignore-trailing-space", 0, 0, 'Z'}, {"left-column", 0, 0, 'l'}, {"minimal", 0, 0, 'd'}, {"output", 1, 0, 'o'}, @@ -170,6 +171,7 @@ static char const * const option_help_msgid[] = { "", N_("-i, --ignore-case consider upper- and lower-case to be the same"), N_("-E, --ignore-tab-expansion ignore changes due to tab expansion"), + N_("-Z, --ignore-trailing-space ignore white space at line end"), N_("-b, --ignore-space-change ignore changes in the amount of white space"), N_("-W, --ignore-all-space ignore all white space"), N_("-B, --ignore-blank-lines ignore changes whose lines are all blank"), @@ -458,7 +460,7 @@ main (int argc, char *argv[]) diffarg (DEFAULT_DIFF_PROGRAM); /* parse command line args */ - while ((opt = getopt_long (argc, argv, "abBdEHiI:lo:stvw:W", longopts, 0)) + while ((opt = getopt_long (argc, argv, "abBdEHiI:lo:stvw:WZ", longopts, 0)) != -1) { switch (opt) @@ -527,6 +529,10 @@ main (int argc, char *argv[]) diffarg ("-w"); break; + case 'Z': + diffarg ("-Z"); + break; + case DIFF_PROGRAM_OPTION: diffargv[0] = optarg; break; diff --git a/src/util.c b/src/util.c index bba51a5..5808434 100644 --- a/src/util.c +++ b/src/util.c @@ -395,6 +395,33 @@ lines_differ (char const *s1, char const *s2) break; + case IGNORE_TRAILING_SPACE: + case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE: + if (isspace (c1) && isspace (c2)) + { + unsigned char c; + if (c1 != '\n') + { + char const *p = t1; + while ((c = *p) != '\n' && isspace (c)) + ++p; + if (c != '\n') + break; + } + if (c2 != '\n') + { + char const *p = t2; + while ((c = *p) != '\n' && isspace (c)) + ++p; + if (c != '\n') + break; + } + /* Both lines have nothing but whitespace left. */ + return false; + } + if (ignore_white_space == IGNORE_TRAILING_SPACE) + break; + /* Fall through. */ case IGNORE_TAB_EXPANSION: if ((c1 == ' ' && c2 == '\t') || (c1 == '\t' && c2 == ' ')) @@ -674,8 +701,11 @@ analyze_hunk (struct change *hunk, size_t trivial_length = ignore_blank_lines - 1; /* If 0, ignore zero-length lines; if SIZE_MAX, do not ignore lines just because of their length. */ + + bool skip_white_space = + ignore_blank_lines && IGNORE_TRAILING_SPACE <= ignore_white_space; bool skip_leading_white_space = - (ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space); + skip_white_space && IGNORE_SPACE_CHANGE <= ignore_white_space; char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */ char const * const *linbuf1 = files[1].linbuf; @@ -699,9 +729,14 @@ analyze_hunk (struct change *hunk, char const *newline = linbuf0[i + 1] - 1; size_t len = newline - line; char const *p = line; - if (skip_leading_white_space) - while (isspace ((unsigned char) *p) && *p != '\n') - p++; + if (skip_white_space) + for (; *p != '\n'; p++) + if (! isspace ((unsigned char) *p)) + { + if (! skip_leading_white_space) + p = line; + break; + } if (newline - p != trivial_length && (! ignore_regexp.fastmap || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) @@ -714,9 +749,14 @@ analyze_hunk (struct change *hunk, char const *newline = linbuf1[i + 1] - 1; size_t len = newline - line; char const *p = line; - if (skip_leading_white_space) - while (isspace ((unsigned char) *p) && *p != '\n') - p++; + if (skip_white_space) + for (; *p != '\n'; p++) + if (! isspace ((unsigned char) *p)) + { + if (! skip_leading_white_space) + p = line; + break; + } if (newline - p != trivial_length && (! ignore_regexp.fastmap || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) -- cgit v1.2.1