diff, sdiff: new option --ignore-trailing-space (-Z)

Derived from Roland McGrath's patch (dated June 2004!) in: http://lists.gnu.org/archive/html/bug-gnu-utils/2004-07/msg00000.html * NEWS: * doc/diffutils.texi (White Space, Blank Lines) (sdiff Option Summary, diff Options, sdiff Options): Document -Z. * src/diff.h (IGNORE_TRAILING_SPACE) (IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE): New constants, for -Z. * src/diff.c (shortopts, longopts, main, option_help_msgid): * src/sdiff.c (longopts, option_help_msgid, main): * src/io.c (find_and_hash_each_line): * src/util.c (lines_differ, analyze_hunk): Support -Z.
author: Roland McGrath <roland@hack.frob.com> 2011-08-14 14:37:01 -0700
committer: Paul Eggert <eggert@cs.ucla.edu> 2011-08-14 22:09:21 -0700
commit: 86a40dd6acc84e5633d2fc6b45f9193fe01b23f8 (patch)
tree: 467c39c61d653b8a6a13b401948efb8d1bae9278
parent: efb0557f4cd63628957880ffbf63702b604e0ae2 (diff)
download: diffutils-86a40dd6acc84e5633d2fc6b45f9193fe01b23f8.tar.gz
7 files changed, 171 insertions, 61 deletions
diff --git a/NEWS b/NEWS
index 72f6395..2ef515f 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,10 @@ GNU diffutils NEWS                                    -*- outline -*-
   --ignore-file-name-case now applies at the top level too.
   For example, "diff dir inIt" might compare "dir/Init" to "inIt".
 
+** New features
+
+  diff and sdiff have a new option --ignore-trailing-space (-Z).
+
 * Noteworthy changes in release 3.1 (2011-08-10) [stable]
 
 ** Bug fixes
diff --git a/doc/diffutils.texi b/doc/diffutils.texi
index f29e9a1..222e9b7 100644
--- a/doc/diffutils.texi
+++ b/doc/diffutils.texi
@@ -289,7 +289,11 @@ The @option{--ignore-tab-expansion} (@option{-E}) option ignores the
 distinction between tabs and spaces on input.  A tab is considered to be
 equivalent to the number of spaces to the next tab stop (@pxref{Tabs}).
 
-The @option{--ignore-space-change} (@option{-b}) option is stronger.
+The @option{--ignore-trailing-space} (@option{-Z}) option ignores white
+space at line end.
+
+The @option{--ignore-space-change} (@option{-b}) option is stronger than
+@option{-E} and @option{-Z} combined.
 It ignores white space at line end, and considers all other sequences of
 one or more white space characters within a line to be equivalent.  With this
 option, @command{diff} considers the following two lines to be equivalent,
@@ -344,12 +348,11 @@ is considered identical to a file containing
 @end example
 
 Normally this option affects only lines that are completely empty, but
-if you also specify the @option{--ignore-space-change} (@option{-b})
-option, or the @option{--ignore-all-space} (@option{-w}) option,
+if you also specify an option that ignores trailing spaces,
 lines are also affected if they look empty but contain white space.
 In other words, @option{-B} is equivalent to @samp{-I '^$'} by
 default, but it is equivalent to @option{-I '^[[:space:]]*$'} if
-@option{-b} or @option{-w} is also specified.
+@option{-b}, @option{-w} or @option{-Z} is also specified.
 
 @node Specified Lines
 @section Suppressing Differences Whose Lines All Match a Regular Expression
@@ -2446,12 +2449,12 @@ The following @command{sdiff} options have the same meaning as for
 
 @example
 -a -b -d -i -t -v
--B -E -I @var{regexp}
+-B -E -I @var{regexp} -Z
 
 --expand-tabs
 --ignore-blank-lines  --ignore-case
 --ignore-matching-lines=@var{regexp}  --ignore-space-change
---ignore-tab-expansion
+--ignore-tab-expansion  --ignore-trailing-space
 --left-column  --minimal  --speed-large-files
 --strip-trailing-cr  --suppress-common-lines
 --tabsize=@var{columns}  --text  --version  --width=@var{columns}
@@ -3966,6 +3969,10 @@ match any pattern contained in @var{file}.  @xref{Comparing Directories}.
 @item -y
 @itemx --side-by-side
 Use the side by side output format.  @xref{Side by Side Format}.
+
+@item -Z
+@itemx --ignore-trailing-space
+Ignore white space at line end.  @xref{White Space}.
 @end table
 
 @node Invoking diff3
@@ -4446,6 +4453,10 @@ option is @option{-W} in @command{diff}, @option{-w} in @command{sdiff}.
 Ignore white space when comparing lines.  @xref{White Space}.
 Note that for historical reasons, this option is @option{-w} in @command{diff},
 @option{-W} in @command{sdiff}.
+
+@item -Z
+@itemx --ignore-trailing-space
+Ignore white space at line end.  @xref{White Space}.
 @end table
 
 @node Standards conformance
diff --git a/src/diff.c b/src/diff.c
index 1e334b7..c096406 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -107,7 +107,7 @@ static bool unidirectional_new_file;
 static bool report_identical_files;
 
 static char const shortopts[] =
-"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y";
+"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yZ";
 
 /* Values for long options that do not have single-letter equivalents.  */
 enum
@@ -178,6 +178,7 @@ static struct option const longopts[] =
   {"ignore-matching-lines", 1, 0, 'I'},
   {"ignore-space-change", 0, 0, 'b'},
   {"ignore-tab-expansion", 0, 0, 'E'},
+  {"ignore-trailing-space", 0, 0, 'Z'},
   {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
   {"initial-tab", 0, 0, 'T'},
   {"label", 1, 0, 'L'},
@@ -320,6 +321,11 @@ main (int argc, char **argv)
 	    ignore_white_space = IGNORE_SPACE_CHANGE;
 	  break;
 
+	case 'Z':
+	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
+	    ignore_white_space |= IGNORE_TRAILING_SPACE;
+	  break;
+
 	case 'B':
 	  ignore_blank_lines = true;
 	  break;
@@ -381,8 +387,8 @@ main (int argc, char **argv)
 	  break;
 
 	case 'E':
-	  if (ignore_white_space < IGNORE_TAB_EXPANSION)
-	    ignore_white_space = IGNORE_TAB_EXPANSION;
+	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
+	    ignore_white_space |= IGNORE_TAB_EXPANSION;
 	  break;
 
 	case 'f':
@@ -880,6 +886,7 @@ static char const * const option_help_msgid[] = {
   "",
   N_("-i, --ignore-case               ignore case differences in file contents"),
   N_("-E, --ignore-tab-expansion      ignore changes due to tab expansion"),
+  N_("-Z, --ignore-trailing-space     ignore white space at line end"),
   N_("-b, --ignore-space-change       ignore changes in the amount of white space"),
   N_("-w, --ignore-all-space          ignore all white space"),
   N_("-B, --ignore-blank-lines        ignore changes whose lines are all blank"),
diff --git a/src/diff.h b/src/diff.h
index b1c90c7..b44a157 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -106,6 +106,14 @@ enum DIFF_white_space
   /* Ignore changes due to tab expansion (-E).  */
   IGNORE_TAB_EXPANSION,
 
+  /* Ignore changes in trailing horizontal white space (-Z).  */
+  IGNORE_TRAILING_SPACE,
+
+  /* IGNORE_TAB_EXPANSION and IGNORE_TRAILING_SPACE are a special case
+     because they are independent and can be ORed together, yielding
+     IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE.  */
+  IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE,
+
   /* Ignore changes in horizontal white space (-b).  */
   IGNORE_SPACE_CHANGE,
 
diff --git a/src/io.c b/src/io.c
index 9e3e1ee..fdb4654 100644
--- a/src/io.c
+++ b/src/io.c
@@ -255,36 +255,53 @@ find_and_hash_each_line (struct file_data *current)
 	    break;
 
 	  case IGNORE_TAB_EXPANSION:
+	  case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+	  case IGNORE_TRAILING_SPACE:
 	    {
 	      size_t column = 0;
 	      while ((c = *p++) != '\n')
 		{
-		  size_t repetitions = 1;
-
-		  switch (c)
+		  if (ignore_white_space & IGNORE_TRAILING_SPACE
+		      && isspace (c))
 		    {
-		    case '\b':
-		      column -= 0 < column;
-		      break;
-
-		    case '\t':
-		      c = ' ';
-		      repetitions = tabsize - column % tabsize;
-		      column = (column + repetitions < column
-				? 0
-				: column + repetitions);
-		      break;
-
-		    case '\r':
-		      column = 0;
-		      break;
-
-		    default:
-		      c = tolower (c);
-		      column++;
-		      break;
+		      char const *p1 = p;
+		      unsigned char c1;
+		      do
+			if ((c1 = *p1++) == '\n')
+			  {
+			    p = p1;
+			    goto hashing_done;
+			  }
+		      while (isspace (c1));
 		    }
 
+		  size_t repetitions = 1;
+
+		  if (ignore_white_space & IGNORE_TAB_EXPANSION)
+		    switch (c)
+		      {
+		      case '\b':
+			column -= 0 < column;
+			break;
+
+		      case '\t':
+			c = ' ';
+			repetitions = tabsize - column % tabsize;
+			column = (column + repetitions < column
+				  ? 0
+				  : column + repetitions);
+			break;
+
+		      case '\r':
+			column = 0;
+			break;
+
+		      default:
+			column++;
+			break;
+		      }
+
+		  c = tolower (c);
 		  do
 		    h = HASH (h, c);
 		  while (--repetitions != 0);
@@ -325,35 +342,52 @@ find_and_hash_each_line (struct file_data *current)
 	    break;
 
 	  case IGNORE_TAB_EXPANSION:
+	  case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+	  case IGNORE_TRAILING_SPACE:
 	    {
 	      size_t column = 0;
 	      while ((c = *p++) != '\n')
 		{
 		  size_t repetitions = 1;
 
-		  switch (c)
+		  if (ignore_white_space & IGNORE_TRAILING_SPACE
+		      && isspace (c))
 		    {
-		    case '\b':
-		      column -= 0 < column;
-		      break;
-
-		    case '\t':
-		      c = ' ';
-		      repetitions = tabsize - column % tabsize;
-		      column = (column + repetitions < column
-				? 0
-				: column + repetitions);
-		      break;
-
-		    case '\r':
-		      column = 0;
-		      break;
-
-		    default:
-		      column++;
-		      break;
+		      char const *p1 = p;
+		      unsigned char c1;
+		      do
+			if ((c1 = *p1++) == '\n')
+			  {
+			    p = p1;
+			    goto hashing_done;
+			  }
+		      while (isspace (c1));
 		    }
 
+		  if (ignore_white_space & IGNORE_TAB_EXPANSION)
+		    switch (c)
+		      {
+		      case '\b':
+			column -= 0 < column;
+			break;
+
+		      case '\t':
+			c = ' ';
+			repetitions = tabsize - column % tabsize;
+			column = (column + repetitions < column
+				  ? 0
+				  : column + repetitions);
+			break;
+
+		      case '\r':
+			column = 0;
+			break;
+
+		      default:
+			column++;
+			break;
+		      }
+
 		  do
 		    h = HASH (h, c);
 		  while (--repetitions != 0);
@@ -381,7 +415,7 @@ find_and_hash_each_line (struct file_data *current)
 	     complete line, put it into buckets[-1] so that it can
 	     compare equal only to the other file's incomplete line
 	     (if one exists).  */
-	  if (ignore_white_space < IGNORE_SPACE_CHANGE)
+	  if (ignore_white_space < IGNORE_TRAILING_SPACE)
 	    bucket = &buckets[-1];
 	}
 
diff --git a/src/sdiff.c b/src/sdiff.c
index 6d2e72d..e0b29f0 100644
--- a/src/sdiff.c
+++ b/src/sdiff.c
@@ -132,6 +132,7 @@ static struct option const longopts[] =
   {"ignore-matching-lines", 1, 0, 'I'},
   {"ignore-space-change", 0, 0, 'b'},
   {"ignore-tab-expansion", 0, 0, 'E'},
+  {"ignore-trailing-space", 0, 0, 'Z'},
   {"left-column", 0, 0, 'l'},
   {"minimal", 0, 0, 'd'},
   {"output", 1, 0, 'o'},
@@ -170,6 +171,7 @@ static char const * const option_help_msgid[] = {
   "",
   N_("-i, --ignore-case            consider upper- and lower-case to be the same"),
   N_("-E, --ignore-tab-expansion   ignore changes due to tab expansion"),
+  N_("-Z, --ignore-trailing-space  ignore white space at line end"),
   N_("-b, --ignore-space-change    ignore changes in the amount of white space"),
   N_("-W, --ignore-all-space       ignore all white space"),
   N_("-B, --ignore-blank-lines     ignore changes whose lines are all blank"),
@@ -458,7 +460,7 @@ main (int argc, char *argv[])
   diffarg (DEFAULT_DIFF_PROGRAM);
 
   /* parse command line args */
-  while ((opt = getopt_long (argc, argv, "abBdEHiI:lo:stvw:W", longopts, 0))
+  while ((opt = getopt_long (argc, argv, "abBdEHiI:lo:stvw:WZ", longopts, 0))
 	 != -1)
     {
       switch (opt)
@@ -527,6 +529,10 @@ main (int argc, char *argv[])
 	  diffarg ("-w");
 	  break;
 
+	case 'Z':
+	  diffarg ("-Z");
+	  break;
+
 	case DIFF_PROGRAM_OPTION:
 	  diffargv[0] = optarg;
 	  break;
diff --git a/src/util.c b/src/util.c
index bba51a5..5808434 100644
--- a/src/util.c
+++ b/src/util.c
@@ -395,6 +395,33 @@ lines_differ (char const *s1, char const *s2)
 
 	      break;
 
+	    case IGNORE_TRAILING_SPACE:
+	    case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
+	      if (isspace (c1) && isspace (c2))
+		{
+		  unsigned char c;
+		  if (c1 != '\n')
+		    {
+		      char const *p = t1;
+		      while ((c = *p) != '\n' && isspace (c))
+			++p;
+		      if (c != '\n')
+			break;
+		    }
+		  if (c2 != '\n')
+		    {
+		      char const *p = t2;
+		      while ((c = *p) != '\n' && isspace (c))
+			++p;
+		      if (c != '\n')
+			break;
+		    }
+		  /* Both lines have nothing but whitespace left.  */
+		  return false;
+		}
+	      if (ignore_white_space == IGNORE_TRAILING_SPACE)
+		break;
+	      /* Fall through.  */
 	    case IGNORE_TAB_EXPANSION:
 	      if ((c1 == ' ' && c2 == '\t')
 		  || (c1 == '\t' && c2 == ' '))
@@ -674,8 +701,11 @@ analyze_hunk (struct change *hunk,
   size_t trivial_length = ignore_blank_lines - 1;
     /* If 0, ignore zero-length lines;
        if SIZE_MAX, do not ignore lines just because of their length.  */
+
+  bool skip_white_space =
+    ignore_blank_lines && IGNORE_TRAILING_SPACE <= ignore_white_space;
   bool skip_leading_white_space =
-    (ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space);
+    skip_white_space && IGNORE_SPACE_CHANGE <= ignore_white_space;
 
   char const * const *linbuf0 = files[0].linbuf;  /* Help the compiler.  */
   char const * const *linbuf1 = files[1].linbuf;
@@ -699,9 +729,14 @@ analyze_hunk (struct change *hunk,
 	  char const *newline = linbuf0[i + 1] - 1;
 	  size_t len = newline - line;
 	  char const *p = line;
-	  if (skip_leading_white_space)
-	    while (isspace ((unsigned char) *p) && *p != '\n')
-	      p++;
+	  if (skip_white_space)
+	    for (; *p != '\n'; p++)
+	      if (! isspace ((unsigned char) *p))
+		{
+		  if (! skip_leading_white_space)
+		    p = line;
+		  break;
+		}
 	  if (newline - p != trivial_length
 	      && (! ignore_regexp.fastmap
 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
@@ -714,9 +749,14 @@ analyze_hunk (struct change *hunk,
 	  char const *newline = linbuf1[i + 1] - 1;
 	  size_t len = newline - line;
 	  char const *p = line;
-	  if (skip_leading_white_space)
-	    while (isspace ((unsigned char) *p) && *p != '\n')
-	      p++;
+	  if (skip_white_space)
+	    for (; *p != '\n'; p++)
+	      if (! isspace ((unsigned char) *p))
+		{
+		  if (! skip_leading_white_space)
+		    p = line;
+		  break;
+		}
 	  if (newline - p != trivial_length
 	      && (! ignore_regexp.fastmap
 		  || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
author	Roland McGrath <roland@hack.frob.com>	2011-08-14 14:37:01 -0700
committer	Paul Eggert <eggert@cs.ucla.edu>	2011-08-14 22:09:21 -0700
commit	86a40dd6acc84e5633d2fc6b45f9193fe01b23f8 (patch)
tree	467c39c61d653b8a6a13b401948efb8d1bae9278
parent	efb0557f4cd63628957880ffbf63702b604e0ae2 (diff)
download	diffutils-86a40dd6acc84e5633d2fc6b45f9193fe01b23f8.tar.gz