diff options
Diffstat (limited to 'userdiff.c')
-rw-r--r-- | userdiff.c | 48 |
1 files changed, 40 insertions, 8 deletions
diff --git a/userdiff.c b/userdiff.c index 151d9a5278..eaec6ebb5e 100644 --- a/userdiff.c +++ b/userdiff.c @@ -1,7 +1,9 @@ -#include "cache.h" +#include "git-compat-util.h" +#include "alloc.h" #include "config.h" #include "userdiff.h" #include "attr.h" +#include "strbuf.h" static struct userdiff_driver *drivers; static int ndrivers; @@ -15,6 +17,7 @@ static int drivers_alloc; .cflags = REG_EXTENDED, \ }, \ .word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \ + .word_regex_multi_byte = wrx "|[^[:space:]]", \ } #define IPATTERN(lang, rx, wrx) { \ .name = lang, \ @@ -24,6 +27,7 @@ static int drivers_alloc; .cflags = REG_EXTENDED | REG_ICASE, \ }, \ .word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \ + .word_regex_multi_byte = wrx "|[^[:space:]]", \ } /* @@ -170,8 +174,8 @@ PATTERNS("html", "[^<>= \t]+"), PATTERNS("java", "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" - /* Class, enum, and interface declarations */ - "^[ \t]*(([a-z]+[ \t]+)*(class|enum|interface)[ \t]+[A-Za-z][A-Za-z0-9_$]*[ \t]+.*)$\n" + /* Class, enum, interface, and record declarations */ + "^[ \t]*(([a-z-]+[ \t]+)*(class|enum|interface|record)[ \t]+.*)$\n" /* Method definitions; note that constructor signatures are not */ /* matched because they are indistinguishable from method calls. */ "^[ \t]*(([A-Za-z_<>&][][?&<>.,A-Za-z_0-9]*[ \t]+)+[A-Za-z_][A-Za-z_0-9]*[ \t]*\\([^;]*)$", @@ -292,8 +296,8 @@ PATTERNS("scheme", /* All other words should be delimited by spaces or parentheses */ "|([^][)(}{[ \t])+"), PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", - "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+"), -{ "default", NULL, -1, { NULL, 0 } }, + "\\\\[a-zA-Z@]+|\\\\.|([a-zA-Z0-9]|[^\x01-\x7f])+"), +{ "default", NULL, NULL, -1, { NULL, 0 } }, }; #undef PATTERNS #undef IPATTERN @@ -315,7 +319,8 @@ struct find_by_namelen_data { }; static int userdiff_find_by_namelen_cb(struct userdiff_driver *driver, - enum userdiff_driver_type type, void *priv) + enum userdiff_driver_type type UNUSED, + void *priv) { struct find_by_namelen_data *cb_data = priv; @@ -327,6 +332,25 @@ static int userdiff_find_by_namelen_cb(struct userdiff_driver *driver, return 0; } +static int regexec_supports_multi_byte_chars(void) +{ + static const char not_space[] = "[^[:space:]]"; + static const char utf8_multi_byte_char[] = "\xc2\xa3"; + regex_t re; + regmatch_t match; + static int result = -1; + + if (result != -1) + return result; + if (regcomp(&re, not_space, REG_EXTENDED)) + BUG("invalid regular expression: %s", not_space); + result = !regexec(&re, utf8_multi_byte_char, 1, &match, 0) && + match.rm_so == 0 && + match.rm_eo == strlen(utf8_multi_byte_char); + regfree(&re); + return result; +} + static struct userdiff_driver *userdiff_find_by_namelen(const char *name, size_t len) { struct find_by_namelen_data udcbdata = { @@ -393,6 +417,8 @@ int userdiff_config(const char *k, const char *v) return parse_bool(&drv->textconv_want_cache, k, v); if (!strcmp(type, "wordregex")) return git_config_string(&drv->word_regex, k, v); + if (!strcmp(type, "algorithm")) + return git_config_string(&drv->algorithm, k, v); return 0; } @@ -400,7 +426,13 @@ int userdiff_config(const char *k, const char *v) struct userdiff_driver *userdiff_find_by_name(const char *name) { int len = strlen(name); - return userdiff_find_by_namelen(name, len); + struct userdiff_driver *driver = userdiff_find_by_namelen(name, len); + if (driver && driver->word_regex_multi_byte) { + if (regexec_supports_multi_byte_chars()) + driver->word_regex = driver->word_regex_multi_byte; + driver->word_regex_multi_byte = NULL; + } + return driver; } struct userdiff_driver *userdiff_find_by_path(struct index_state *istate, @@ -412,7 +444,7 @@ struct userdiff_driver *userdiff_find_by_path(struct index_state *istate, check = attr_check_initl("diff", NULL); if (!path) return NULL; - git_check_attr(istate, path, check); + git_check_attr(istate, NULL, path, check); if (ATTR_TRUE(check->items[0].value)) return &driver_true; |