summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Rast <trast@student.ethz.ch>2009-01-17 17:29:48 +0100
committerJunio C Hamano <gitster@pobox.com>2009-01-17 10:44:21 -0800
commit80c49c3de2d5a3aa12b0980a65f1163c8aef0c16 (patch)
tree6a87dd650320b12e63e1578ba69280f53b1d09b5
parentc4b252c3d894673968b144d8e10b79ef22c17b0a (diff)
downloadgit-80c49c3de2d5a3aa12b0980a65f1163c8aef0c16.tar.gz
color-words: make regex configurable via attributes
Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--Documentation/diff-options.txt4
-rw-r--r--Documentation/gitattributes.txt21
-rw-r--r--diff.c10
-rwxr-xr-xt/t4034-diff-words.sh36
-rw-r--r--userdiff.c78
-rw-r--r--userdiff.h1
6 files changed, 135 insertions, 15 deletions
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 8689a92d8d..1edb82e8e1 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -102,6 +102,10 @@ differences. You may want to append `|[^[:space:]]` to your regular
expression to make sure that it matches all non-whitespace characters.
A match that contains a newline is silently truncated(!) at the
newline.
++
+The regex can also be set via a diff driver, see
+linkgit:gitattributes[1]; giving it explicitly overrides any diff
+driver setting.
--no-renames::
Turn off rename detection, even when the configuration
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 8af22eccac..ba3ba12730 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -317,6 +317,8 @@ patterns are available:
- `bibtex` suitable for files with BibTeX coded references.
+- `cpp` suitable for source code in the C and C++ languages.
+
- `html` suitable for HTML/XHTML documents.
- `java` suitable for source code in the Java language.
@@ -334,6 +336,25 @@ patterns are available:
- `tex` suitable for source code for LaTeX documents.
+Customizing word diff
+^^^^^^^^^^^^^^^^^^^^^
+
+You can customize the rules that `git diff --color-words` uses to
+split words in a line, by specifying an appropriate regular expression
+in the "diff.*.wordregex" configuration variable. For example, in TeX
+a backslash followed by a sequence of letters forms a command, but
+several such commands can be run together without intervening
+whitespace. To separate them, use a regular expression such as
+
+------------------------
+[diff "tex"]
+ wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+"
+------------------------
+
+A built-in pattern is provided for all languages listed in the
+previous section.
+
+
Performing text diffs of binary files
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/diff.c b/diff.c
index 00c661f82e..9fcde963db 100644
--- a/diff.c
+++ b/diff.c
@@ -1380,6 +1380,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe
return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
}
+static const char *userdiff_word_regex(struct diff_filespec *one)
+{
+ diff_filespec_load_driver(one);
+ return one->driver->word_regex;
+}
+
void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
{
if (!options->a_prefix)
@@ -1540,6 +1546,10 @@ static void builtin_diff(const char *name_a,
ecbdata.diff_words =
xcalloc(1, sizeof(struct diff_words_data));
ecbdata.diff_words->file = o->file;
+ if (!o->word_regex)
+ o->word_regex = userdiff_word_regex(one);
+ if (!o->word_regex)
+ o->word_regex = userdiff_word_regex(two);
if (o->word_regex) {
ecbdata.diff_words->word_regex = (regex_t *)
xmalloc(sizeof(regex_t));
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index 4873486301..744221bef9 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -84,6 +84,41 @@ test_expect_success 'word diff with a regular expression' '
'
+test_expect_success 'set a diff driver' '
+ git config diff.testdriver.wordregex "[^[:space:]]" &&
+ cat <<EOF > .gitattributes
+pre diff=testdriver
+post diff=testdriver
+EOF
+'
+
+test_expect_success 'option overrides default' '
+
+ word_diff --color-words="[a-z]+"
+
+'
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 330b04f..5ed8eff 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1,3 +1,7 @@<RESET>
+h(4)<GREEN>,hh[44]<RESET>
+<RESET>
+a = b + c<RESET>
+
+<GREEN>aa = a<RESET>
+
+<GREEN>aeff = aeff * ( aaa )<RESET>
+EOF
+
+test_expect_success 'use default supplied by driver' '
+
+ word_diff --color-words
+
+'
+
echo 'aaa (aaa)' > pre
echo 'aaa (aaa) aaa' > post
@@ -100,6 +135,7 @@ test_expect_success 'test parsing words for newline' '
word_diff --color-words="a+"
+
'
echo '(:' > pre
diff --git a/userdiff.c b/userdiff.c
index 3681062ebf..2b55509485 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -6,14 +6,20 @@ static struct userdiff_driver *drivers;
static int ndrivers;
static int drivers_alloc;
-#define FUNCNAME(name, pattern) \
- { name, NULL, -1, { pattern, REG_EXTENDED } }
+#define PATTERNS(name, pattern, wordregex) \
+ { name, NULL, -1, { pattern, REG_EXTENDED }, wordregex }
static struct userdiff_driver builtin_drivers[] = {
-FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"),
-FUNCNAME("java",
+PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$",
+ "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("java",
"!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
- "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"),
-FUNCNAME("objc",
+ "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
+ "[a-zA-Z_][a-zA-Z0-9_]*"
+ "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
+ "|[-+*/<>%&^|=!]="
+ "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"
+ "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("objc",
/* Negate C statements that can look like functions */
"!^[ \t]*(do|for|if|else|return|switch|while)\n"
/* Objective-C methods */
@@ -21,20 +27,60 @@ FUNCNAME("objc",
/* C functions */
"^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n"
/* Objective-C class/protocol definitions */
- "^(@(implementation|interface|protocol)[ \t].*)$"),
-FUNCNAME("pascal",
+ "^(@(implementation|interface|protocol)[ \t].*)$",
+ /* -- */
+ "[a-zA-Z_][a-zA-Z0-9_]*"
+ "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
+ "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
+ "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("pascal",
"^((procedure|function|constructor|destructor|interface|"
"implementation|initialization|finalization)[ \t]*.*)$"
"\n"
- "^(.*=[ \t]*(class|record).*)$"),
-FUNCNAME("php", "^[\t ]*((function|class).*)"),
-FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"),
-FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"),
-FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"),
-FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"),
+ "^(.*=[ \t]*(class|record).*)$",
+ /* -- */
+ "[a-zA-Z_][a-zA-Z0-9_]*"
+ "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
+ "|<>|<=|>=|:=|\\.\\."
+ "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("php", "^[\t ]*((function|class).*)",
+ /* -- */
+ "[a-zA-Z_][a-zA-Z0-9_]*"
+ "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
+ "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->"
+ "|[^[:space:]]|[\x80-\xff]+"),
+PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$",
+ /* -- */
+ "[a-zA-Z_][a-zA-Z0-9_]*"
+ "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?"
+ "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?"
+ "|[^[:space:]|[\x80-\xff]+"),
+ /* -- */
+PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$",
+ /* -- */
+ "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*"
+ "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?."
+ "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"
+ "|[^[:space:]|[\x80-\xff]+"),
+PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+ "[={}\"]|[^={}\" \t]+"),
+PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
+ "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"),
+PATTERNS("cpp",
+ /* Jump targets or access declarations */
+ "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n"
+ /* C/++ functions/methods at top level */
+ "^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n"
+ /* compound type at top level */
+ "^((struct|class|enum)[^;]*)$",
+ /* -- */
+ "[a-zA-Z_][a-zA-Z0-9_]*"
+ "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
+ "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
+ "|[^[:space:]]|[\x80-\xff]+"),
{ "default", NULL, -1, { NULL, 0 } },
};
-#undef FUNCNAME
+#undef PATTERNS
static struct userdiff_driver driver_true = {
"diff=true",
@@ -134,6 +180,8 @@ int userdiff_config(const char *k, const char *v)
return parse_string(&drv->external, k, v);
if ((drv = parse_driver(k, v, "textconv")))
return parse_string(&drv->textconv, k, v);
+ if ((drv = parse_driver(k, v, "wordregex")))
+ return parse_string(&drv->word_regex, k, v);
return 0;
}
diff --git a/userdiff.h b/userdiff.h
index ba2945770b..c3151594f5 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -11,6 +11,7 @@ struct userdiff_driver {
const char *external;
int binary;
struct userdiff_funcname funcname;
+ const char *word_regex;
const char *textconv;
};