diff options
-rw-r--r-- | Documentation/config.txt | 6 | ||||
-rw-r--r-- | Documentation/diff-options.txt | 18 | ||||
-rw-r--r-- | Documentation/gitattributes.txt | 21 | ||||
-rw-r--r-- | color.c | 28 | ||||
-rw-r--r-- | color.h | 1 | ||||
-rw-r--r-- | diff.c | 225 | ||||
-rw-r--r-- | diff.h | 1 | ||||
-rwxr-xr-x | t/t4034-diff-words.sh | 200 | ||||
-rw-r--r-- | userdiff.c | 78 | ||||
-rw-r--r-- | userdiff.h | 1 |
10 files changed, 492 insertions, 87 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index 290cb48eb9..e2b8775dd3 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -639,6 +639,12 @@ diff.suppressBlankEmpty:: A boolean to inhibit the standard behavior of printing a space before each empty output line. Defaults to false. +diff.wordRegex:: + A POSIX Extended Regular Expression used to determine what is a "word" + when performing word-by-word difference calculations. Character + sequences that match the regular expression are "words", all other + characters are *ignorable* whitespace. + fetch.unpackLimit:: If the number of objects fetched over the git native transfer is below this diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index a8ed23e7f8..813a7b11b9 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -94,8 +94,22 @@ endif::git-format-patch[] Turn off colored diff, even when the configuration file gives the default to color output. ---color-words:: - Show colored word diff, i.e. color words which have changed. +--color-words[=<regex>]:: + Show colored word diff, i.e., color words which have changed. + By default, words are separated by whitespace. ++ +When a <regex> is specified, every non-overlapping match of the +<regex> is considered a word. Anything between these matches is +considered whitespace and ignored(!) for the purposes of finding +differences. You may want to append `|[^[:space:]]` to your regular +expression to make sure that it matches all non-whitespace characters. +A match that contains a newline is silently truncated(!) at the +newline. ++ +The regex can also be set via a diff driver or configuration option, see +linkgit:gitattributes[1] or linkgit:git-config[1]. Giving it explicitly +overrides any diff driver or configuration setting. Diff drivers +override configuration settings. --no-renames:: Turn off rename detection, even when the configuration diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 8af22eccac..227934f59a 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -317,6 +317,8 @@ patterns are available: - `bibtex` suitable for files with BibTeX coded references. +- `cpp` suitable for source code in the C and C++ languages. + - `html` suitable for HTML/XHTML documents. - `java` suitable for source code in the Java language. @@ -334,6 +336,25 @@ patterns are available: - `tex` suitable for source code for LaTeX documents. +Customizing word diff +^^^^^^^^^^^^^^^^^^^^^ + +You can customize the rules that `git diff --color-words` uses to +split words in a line, by specifying an appropriate regular expression +in the "diff.*.wordRegex" configuration variable. For example, in TeX +a backslash followed by a sequence of letters forms a command, but +several such commands can be run together without intervening +whitespace. To separate them, use a regular expression such as + +------------------------ +[diff "tex"] + wordRegex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+" +------------------------ + +A built-in pattern is provided for all languages listed in the +previous section. + + Performing text diffs of binary files ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -202,3 +202,31 @@ int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...) va_end(args); return r; } + +/* + * This function splits the buffer by newlines and colors the lines individually. + * + * Returns 0 on success. + */ +int color_fwrite_lines(FILE *fp, const char *color, + size_t count, const char *buf) +{ + if (!*color) + return fwrite(buf, count, 1, fp) != 1; + while (count) { + char *p = memchr(buf, '\n', count); + if (p != buf && (fputs(color, fp) < 0 || + fwrite(buf, p ? p - buf : count, 1, fp) != 1 || + fputs(COLOR_RESET, fp) < 0)) + return -1; + if (!p) + return 0; + if (fputc('\n', fp) < 0) + return -1; + count -= p + 1 - buf; + buf = p + 1; + } + return 0; +} + + @@ -20,5 +20,6 @@ void color_parse(const char *value, const char *var, char *dst); void color_parse_mem(const char *value, int len, const char *var, char *dst); int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); +int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); #endif /* COLOR_H */ @@ -23,6 +23,7 @@ static int diff_detect_rename_default; static int diff_rename_limit_default = 200; static int diff_suppress_blank_empty; int diff_use_color_default = -1; +static const char *diff_word_regex_cfg; static const char *external_diff_cmd_cfg; int diff_auto_refresh_index = 1; static int diff_mnemonic_prefix; @@ -92,6 +93,8 @@ int git_diff_ui_config(const char *var, const char *value, void *cb) } if (!strcmp(var, "diff.external")) return git_config_string(&external_diff_cmd_cfg, var, value); + if (!strcmp(var, "diff.wordregex")) + return git_config_string(&diff_word_regex_cfg, var, value); return git_diff_basic_config(var, value, cb); } @@ -321,82 +324,138 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) struct diff_words_buffer { mmfile_t text; long alloc; - long current; /* output pointer */ - int suppressed_newline; + struct diff_words_orig { + const char *begin, *end; + } *orig; + int orig_nr, orig_alloc; }; static void diff_words_append(char *line, unsigned long len, struct diff_words_buffer *buffer) { - if (buffer->text.size + len > buffer->alloc) { - buffer->alloc = (buffer->text.size + len) * 3 / 2; - buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc); - } + ALLOC_GROW(buffer->text.ptr, buffer->text.size + len, buffer->alloc); line++; len--; memcpy(buffer->text.ptr + buffer->text.size, line, len); buffer->text.size += len; + buffer->text.ptr[buffer->text.size] = '\0'; } struct diff_words_data { struct diff_words_buffer minus, plus; + const char *current_plus; FILE *file; + regex_t *word_regex; }; -static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color, - int suppress_newline) +static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len) { - const char *ptr; - int eol = 0; + struct diff_words_data *diff_words = priv; + int minus_first, minus_len, plus_first, plus_len; + const char *minus_begin, *minus_end, *plus_begin, *plus_end; - if (len == 0) + if (line[0] != '@' || parse_hunk_header(line, len, + &minus_first, &minus_len, &plus_first, &plus_len)) return; - ptr = buffer->text.ptr + buffer->current; - buffer->current += len; + /* POSIX requires that first be decremented by one if len == 0... */ + if (minus_len) { + minus_begin = diff_words->minus.orig[minus_first].begin; + minus_end = + diff_words->minus.orig[minus_first + minus_len - 1].end; + } else + minus_begin = minus_end = + diff_words->minus.orig[minus_first].end; - if (ptr[len - 1] == '\n') { - eol = 1; - len--; + if (plus_len) { + plus_begin = diff_words->plus.orig[plus_first].begin; + plus_end = diff_words->plus.orig[plus_first + plus_len - 1].end; + } else + plus_begin = plus_end = diff_words->plus.orig[plus_first].end; + + if (diff_words->current_plus != plus_begin) + fwrite(diff_words->current_plus, + plus_begin - diff_words->current_plus, 1, + diff_words->file); + if (minus_begin != minus_end) + color_fwrite_lines(diff_words->file, + diff_get_color(1, DIFF_FILE_OLD), + minus_end - minus_begin, minus_begin); + if (plus_begin != plus_end) + color_fwrite_lines(diff_words->file, + diff_get_color(1, DIFF_FILE_NEW), + plus_end - plus_begin, plus_begin); + + diff_words->current_plus = plus_end; +} + +/* This function starts looking at *begin, and returns 0 iff a word was found. */ +static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, + int *begin, int *end) +{ + if (word_regex && *begin < buffer->size) { + regmatch_t match[1]; + if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) { + char *p = memchr(buffer->ptr + *begin + match[0].rm_so, + '\n', match[0].rm_eo - match[0].rm_so); + *end = p ? p - buffer->ptr : match[0].rm_eo + *begin; + *begin += match[0].rm_so; + return *begin >= *end; + } + return -1; } - fputs(diff_get_color(1, color), file); - fwrite(ptr, len, 1, file); - fputs(diff_get_color(1, DIFF_RESET), file); + /* find the next word */ + while (*begin < buffer->size && isspace(buffer->ptr[*begin])) + (*begin)++; + if (*begin >= buffer->size) + return -1; - if (eol) { - if (suppress_newline) - buffer->suppressed_newline = 1; - else - putc('\n', file); - } + /* find the end of the word */ + *end = *begin + 1; + while (*end < buffer->size && !isspace(buffer->ptr[*end])) + (*end)++; + + return 0; } -static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len) +/* + * This function splits the words in buffer->text, stores the list with + * newline separator into out, and saves the offsets of the original words + * in buffer->orig. + */ +static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out, + regex_t *word_regex) { - struct diff_words_data *diff_words = priv; + int i, j; + long alloc = 0; - if (diff_words->minus.suppressed_newline) { - if (line[0] != '+') - putc('\n', diff_words->file); - diff_words->minus.suppressed_newline = 0; - } + out->size = 0; + out->ptr = NULL; - len--; - switch (line[0]) { - case '-': - print_word(diff_words->file, - &diff_words->minus, len, DIFF_FILE_OLD, 1); - break; - case '+': - print_word(diff_words->file, - &diff_words->plus, len, DIFF_FILE_NEW, 0); - break; - case ' ': - print_word(diff_words->file, - &diff_words->plus, len, DIFF_PLAIN, 0); - diff_words->minus.current += len; - break; + /* fake an empty "0th" word */ + ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc); + buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr; + buffer->orig_nr = 1; + + for (i = 0; i < buffer->text.size; i++) { + if (find_word_boundaries(&buffer->text, word_regex, &i, &j)) + return; + + /* store original boundaries */ + ALLOC_GROW(buffer->orig, buffer->orig_nr + 1, + buffer->orig_alloc); + buffer->orig[buffer->orig_nr].begin = buffer->text.ptr + i; + buffer->orig[buffer->orig_nr].end = buffer->text.ptr + j; + buffer->orig_nr++; + + /* store one word */ + ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc); + memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i); + out->ptr[out->size + j - i] = '\n'; + out->size += j - i + 1; + + i = j - 1; } } @@ -407,38 +466,36 @@ static void diff_words_show(struct diff_words_data *diff_words) xdemitconf_t xecfg; xdemitcb_t ecb; mmfile_t minus, plus; - int i; + + /* special case: only removal */ + if (!diff_words->plus.text.size) { + color_fwrite_lines(diff_words->file, + diff_get_color(1, DIFF_FILE_OLD), + diff_words->minus.text.size, diff_words->minus.text.ptr); + diff_words->minus.text.size = 0; + return; + } + + diff_words->current_plus = diff_words->plus.text.ptr; memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); - minus.size = diff_words->minus.text.size; - minus.ptr = xmalloc(minus.size); - memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size); - for (i = 0; i < minus.size; i++) - if (isspace(minus.ptr[i])) - minus.ptr[i] = '\n'; - diff_words->minus.current = 0; - - plus.size = diff_words->plus.text.size; - plus.ptr = xmalloc(plus.size); - memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size); - for (i = 0; i < plus.size; i++) - if (isspace(plus.ptr[i])) - plus.ptr[i] = '\n'; - diff_words->plus.current = 0; - + diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex); + diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex); xpp.flags = XDF_NEED_MINIMAL; - xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc; + /* as only the hunk header will be parsed, we need a 0-context */ + xecfg.ctxlen = 0; xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words, &xpp, &xecfg, &ecb); free(minus.ptr); free(plus.ptr); + if (diff_words->current_plus != diff_words->plus.text.ptr + + diff_words->plus.text.size) + fwrite(diff_words->current_plus, + diff_words->plus.text.ptr + diff_words->plus.text.size + - diff_words->current_plus, 1, + diff_words->file); diff_words->minus.text.size = diff_words->plus.text.size = 0; - - if (diff_words->minus.suppressed_newline) { - putc('\n', diff_words->file); - diff_words->minus.suppressed_newline = 0; - } } typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len); @@ -462,7 +519,10 @@ static void free_diff_words_data(struct emit_callback *ecbdata) diff_words_show(ecbdata->diff_words); free (ecbdata->diff_words->minus.text.ptr); + free (ecbdata->diff_words->minus.orig); free (ecbdata->diff_words->plus.text.ptr); + free (ecbdata->diff_words->plus.orig); + free(ecbdata->diff_words->word_regex); free(ecbdata->diff_words); ecbdata->diff_words = NULL; } @@ -1325,6 +1385,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe return one->driver->funcname.pattern ? &one->driver->funcname : NULL; } +static const char *userdiff_word_regex(struct diff_filespec *one) +{ + diff_filespec_load_driver(one); + return one->driver->word_regex; +} + void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) { if (!options->a_prefix) @@ -1485,6 +1551,21 @@ static void builtin_diff(const char *name_a, ecbdata.diff_words = xcalloc(1, sizeof(struct diff_words_data)); ecbdata.diff_words->file = o->file; + if (!o->word_regex) + o->word_regex = userdiff_word_regex(one); + if (!o->word_regex) + o->word_regex = userdiff_word_regex(two); + if (!o->word_regex) + o->word_regex = diff_word_regex_cfg; + if (o->word_regex) { + ecbdata.diff_words->word_regex = (regex_t *) + xmalloc(sizeof(regex_t)); + if (regcomp(ecbdata.diff_words->word_regex, + o->word_regex, + REG_EXTENDED | REG_NEWLINE)) + die ("Invalid regular expression: %s", + o->word_regex); + } } xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata, &xpp, &xecfg, &ecb); @@ -2498,6 +2579,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) DIFF_OPT_CLR(options, COLOR_DIFF); else if (!strcmp(arg, "--color-words")) options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS; + else if (!prefixcmp(arg, "--color-words=")) { + options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS; + options->word_regex = arg + 14; + } else if (!strcmp(arg, "--exit-code")) DIFF_OPT_SET(options, EXIT_WITH_STATUS); else if (!strcmp(arg, "--quiet")) @@ -98,6 +98,7 @@ struct diff_options { int stat_width; int stat_name_width; + const char *word_regex; /* this is set by diffcore for DIFF_FORMAT_PATCH */ int found_changes; diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh new file mode 100755 index 0000000000..4508effcaa --- /dev/null +++ b/t/t4034-diff-words.sh @@ -0,0 +1,200 @@ +#!/bin/sh + +test_description='word diff colors' + +. ./test-lib.sh + +test_expect_success setup ' + + git config diff.color.old red + git config diff.color.new green + +' + +decrypt_color () { + sed \ + -e 's/.\[1m/<WHITE>/g' \ + -e 's/.\[31m/<RED>/g' \ + -e 's/.\[32m/<GREEN>/g' \ + -e 's/.\[36m/<BROWN>/g' \ + -e 's/.\[m/<RESET>/g' +} + +word_diff () { + test_must_fail git diff --no-index "$@" pre post > output && + decrypt_color < output > output.decrypted && + test_cmp expect output.decrypted +} + +cat > pre <<\EOF +h(4) + +a = b + c +EOF + +cat > post <<\EOF +h(4),hh[44] + +a = b + c + +aa = a + +aeff = aeff * ( aaa ) +EOF + +cat > expect <<\EOF +<WHITE>diff --git a/pre b/post<RESET> +<WHITE>index 330b04f..5ed8eff 100644<RESET> +<WHITE>--- a/pre<RESET> +<WHITE>+++ b/post<RESET> +<BROWN>@@ -1,3 +1,7 @@<RESET> +<RED>h(4)<RESET><GREEN>h(4),hh[44]<RESET> +<RESET> +a = b + c<RESET> + +<GREEN>aa = a<RESET> + +<GREEN>aeff = aeff * ( aaa )<RESET> +EOF + +test_expect_success 'word diff with runs of whitespace' ' + + word_diff --color-words + +' + +cat > expect <<\EOF +<WHITE>diff --git a/pre b/post<RESET> +<WHITE>index 330b04f..5ed8eff 100644<RESET> +<WHITE>--- a/pre<RESET> +<WHITE>+++ b/post<RESET> +<BROWN>@@ -1,3 +1,7 @@<RESET> +h(4),<GREEN>hh<RESET>[44] +<RESET> +a = b + c<RESET> + +<GREEN>aa = a<RESET> + +<GREEN>aeff = aeff * ( aaa<RESET> ) +EOF +cp expect expect.letter-runs-are-words + +test_expect_success 'word diff with a regular expression' ' + + word_diff --color-words="[a-z]+" + +' + +test_expect_success 'set a diff driver' ' + git config diff.testdriver.wordRegex "[^[:space:]]" && + cat <<EOF > .gitattributes +pre diff=testdriver +post diff=testdriver +EOF +' + +test_expect_success 'option overrides .gitattributes' ' + + word_diff --color-words="[a-z]+" + +' + +cat > expect <<\EOF +<WHITE>diff --git a/pre b/post<RESET> +<WHITE>index 330b04f..5ed8eff 100644<RESET> +<WHITE>--- a/pre<RESET> +<WHITE>+++ b/post<RESET> +<BROWN>@@ -1,3 +1,7 @@<RESET> +h(4)<GREEN>,hh[44]<RESET> +<RESET> +a = b + c<RESET> + +<GREEN>aa = a<RESET> + +<GREEN>aeff = aeff * ( aaa )<RESET> +EOF +cp expect expect.non-whitespace-is-word + +test_expect_success 'use regex supplied by driver' ' + + word_diff --color-words + +' + +test_expect_success 'set diff.wordRegex option' ' + git config diff.wordRegex "[[:alnum:]]+" +' + +cp expect.letter-runs-are-words expect + +test_expect_success 'command-line overrides config' ' + word_diff --color-words="[a-z]+" +' + +cp expect.non-whitespace-is-word expect + +test_expect_success '.gitattributes override config' ' + word_diff --color-words +' + +test_expect_success 'remove diff driver regex' ' + git config --unset diff.testdriver.wordRegex +' + +cat > expect <<\EOF +<WHITE>diff --git a/pre b/post<RESET> +<WHITE>index 330b04f..5ed8eff 100644<RESET> +<WHITE>--- a/pre<RESET> +<WHITE>+++ b/post<RESET> +<BROWN>@@ -1,3 +1,7 @@<RESET> +h(4),<GREEN>hh[44<RESET>] +<RESET> +a = b + c<RESET> + +<GREEN>aa = a<RESET> + +<GREEN>aeff = aeff * ( aaa<RESET> ) +EOF + +test_expect_success 'use configured regex' ' + word_diff --color-words +' + +echo 'aaa (aaa)' > pre +echo 'aaa (aaa) aaa' > post + +cat > expect <<\EOF +<WHITE>diff --git a/pre b/post<RESET> +<WHITE>index c29453b..be22f37 100644<RESET> +<WHITE>--- a/pre<RESET> +<WHITE>+++ b/post<RESET> +<BROWN>@@ -1 +1 @@<RESET> +aaa (aaa) <GREEN>aaa<RESET> +EOF + +test_expect_success 'test parsing words for newline' ' + + word_diff --color-words="a+" + + +' + +echo '(:' > pre +echo '(' > post + +cat > expect <<\EOF +<WHITE>diff --git a/pre b/post<RESET> +<WHITE>index 289cb9d..2d06f37 100644<RESET> +<WHITE>--- a/pre<RESET> +<WHITE>+++ b/post<RESET> +<BROWN>@@ -1 +1 @@<RESET> +(<RED>:<RESET> +EOF + +test_expect_success 'test when words are only removed at the end' ' + + word_diff --color-words=. + +' + +test_done diff --git a/userdiff.c b/userdiff.c index 3681062ebf..d556da9751 100644 --- a/userdiff.c +++ b/userdiff.c @@ -6,14 +6,20 @@ static struct userdiff_driver *drivers; static int ndrivers; static int drivers_alloc; -#define FUNCNAME(name, pattern) \ - { name, NULL, -1, { pattern, REG_EXTENDED } } +#define PATTERNS(name, pattern, word_regex) \ + { name, NULL, -1, { pattern, REG_EXTENDED }, word_regex } static struct userdiff_driver builtin_drivers[] = { -FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"), -FUNCNAME("java", +PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", + "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("java", "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" - "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"), -FUNCNAME("objc", + "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$", + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=" + "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|" + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("objc", /* Negate C statements that can look like functions */ "!^[ \t]*(do|for|if|else|return|switch|while)\n" /* Objective-C methods */ @@ -21,20 +27,60 @@ FUNCNAME("objc", /* C functions */ "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n" /* Objective-C class/protocol definitions */ - "^(@(implementation|interface|protocol)[ \t].*)$"), -FUNCNAME("pascal", + "^(@(implementation|interface|protocol)[ \t].*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("pascal", "^((procedure|function|constructor|destructor|interface|" "implementation|initialization|finalization)[ \t]*.*)$" "\n" - "^(.*=[ \t]*(class|record).*)$"), -FUNCNAME("php", "^[\t ]*((function|class).*)"), -FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"), -FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"), -FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"), -FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"), + "^(.*=[ \t]*(class|record).*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" + "|<>|<=|>=|:=|\\.\\." + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("php", "^[\t ]*((function|class).*)", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" + "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?" + "|[^[:space:]|[\x80-\xff]+"), + /* -- */ +PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", + /* -- */ + "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." + "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~" + "|[^[:space:]|[\x80-\xff]+"), +PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", + "[={}\"]|[^={}\" \t]+"), +PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", + "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"), +PATTERNS("cpp", + /* Jump targets or access declarations */ + "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n" + /* C/++ functions/methods at top level */ + "^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n" + /* compound type at top level */ + "^((struct|class|enum)[^;]*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), { "default", NULL, -1, { NULL, 0 } }, }; -#undef FUNCNAME +#undef PATTERNS static struct userdiff_driver driver_true = { "diff=true", @@ -134,6 +180,8 @@ int userdiff_config(const char *k, const char *v) return parse_string(&drv->external, k, v); if ((drv = parse_driver(k, v, "textconv"))) return parse_string(&drv->textconv, k, v); + if ((drv = parse_driver(k, v, "wordregex"))) + return parse_string(&drv->word_regex, k, v); return 0; } diff --git a/userdiff.h b/userdiff.h index ba2945770b..c3151594f5 100644 --- a/userdiff.h +++ b/userdiff.h @@ -11,6 +11,7 @@ struct userdiff_driver { const char *external; int binary; struct userdiff_funcname funcname; + const char *word_regex; const char *textconv; }; |