diff options
author | Russell Belfer <rb@github.com> | 2013-06-11 11:22:22 -0700 |
---|---|---|
committer | Russell Belfer <rb@github.com> | 2013-06-11 11:22:22 -0700 |
commit | 5dc98298a14a9adae3cf8b21fb01f682791c29c7 (patch) | |
tree | ed0e5ab97a3e7d6d03b9959265693665f950cef6 /src | |
parent | 3eadfecd325d355d3f8a9631d9c89b7e8eede98b (diff) | |
download | libgit2-5dc98298a14a9adae3cf8b21fb01f682791c29c7.tar.gz |
Implement regex pattern diff driver
This implements the loading of regular expression pattern lists
for diff drivers that search for function context in that way.
This also changes the way that diff drivers update options and
interface with xdiff APIs to make them a little more flexible.
Diffstat (limited to 'src')
-rw-r--r-- | src/diff_driver.c | 282 | ||||
-rw-r--r-- | src/diff_driver.h | 21 | ||||
-rw-r--r-- | src/diff_file.c | 51 | ||||
-rw-r--r-- | src/diff_file.h | 3 | ||||
-rw-r--r-- | src/diff_patch.c | 5 | ||||
-rw-r--r-- | src/diff_xdiff.c | 12 |
6 files changed, 292 insertions, 82 deletions
diff --git a/src/diff_driver.c b/src/diff_driver.c index 58a903261..9d2508024 100644 --- a/src/diff_driver.c +++ b/src/diff_driver.c @@ -12,17 +12,17 @@ #include "diff_patch.h" #include "diff_driver.h" #include "strmap.h" -#include "pool.h" #include "map.h" #include "buf_text.h" +#include "repository.h" GIT__USE_STRMAP; typedef enum { DIFF_DRIVER_AUTO = 0, - DIFF_DRIVER_FALSE = 1, - DIFF_DRIVER_TRUE = 2, - DIFF_DRIVER_NAMED = 3, + DIFF_DRIVER_BINARY = 1, + DIFF_DRIVER_TEXT = 2, + DIFF_DRIVER_PATTERNLIST = 3, } git_diff_driver_t; enum { @@ -34,19 +34,22 @@ enum { /* data for finding function context for a given file type */ struct git_diff_driver { git_diff_driver_t type; - git_strarray fn_patterns; - int binary; /* 0 => treat as text, 1 => treat as binary, -1 => auto */ + uint32_t binary_flags; + uint32_t other_flags; + git_array_t(regex_t) fn_patterns; + regex_t word_pattern; }; struct git_diff_driver_registry { git_strmap *drivers; - git_pool strings; }; +#define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY) + static git_diff_driver global_drivers[3] = { - { DIFF_DRIVER_AUTO, { NULL, 0 }, -1 }, - { DIFF_DRIVER_FALSE, { NULL, 0 }, 1 }, - { DIFF_DRIVER_TRUE, { NULL, 0 }, 0 }, + { DIFF_DRIVER_AUTO, 0, 0, }, + { DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 }, + { DIFF_DRIVER_TEXT, GIT_DIFF_FORCE_TEXT, 0 }, }; git_diff_driver_registry *git_diff_driver_registry_new() @@ -56,9 +59,7 @@ git_diff_driver_registry *git_diff_driver_registry_new() if (!reg) return NULL; - if (git_pool_init(®->strings, 1, 0) < 0 || - (reg->drivers = git_strmap_alloc()) == NULL) - { + if ((reg->drivers = git_strmap_alloc()) == NULL) { git_diff_driver_registry_free(reg); return NULL; } @@ -68,22 +69,165 @@ git_diff_driver_registry *git_diff_driver_registry_new() void git_diff_driver_registry_free(git_diff_driver_registry *reg) { + git_diff_driver *drv; + if (!reg) return; + git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv)); git_strmap_free(reg->drivers); - git_pool_clear(®->strings); git__free(reg); } +static int diff_driver_add_funcname( + git_diff_driver *drv, const char *name, int regex_flags) +{ + int error; + regex_t re, *re_ptr; + + if ((error = regcomp(&re, name, regex_flags)) != 0) { + /* TODO: warning about bad regex instead of failure */ + error = giterr_set_regex(&re, error); + regfree(&re); + return error; + } + + git_array_alloc(drv->fn_patterns, re_ptr); + GITERR_CHECK_ALLOC(re_ptr); + + memcpy(re_ptr, &re, sizeof(re)); + return 0; +} + +static int diff_driver_xfuncname(const git_config_entry *entry, void *payload) +{ + return diff_driver_add_funcname(payload, entry->value, REG_EXTENDED); +} + +static int diff_driver_funcname(const git_config_entry *entry, void *payload) +{ + return diff_driver_add_funcname(payload, entry->value, 0); +} + +static git_diff_driver_registry *git_repository_driver_registry( + git_repository *repo) +{ + if (!repo->diff_drivers) { + git_diff_driver_registry *reg = git_diff_driver_registry_new(); + reg = git__compare_and_swap(&repo->diff_drivers, NULL, reg); + + if (reg != NULL) /* if we race, free losing allocation */ + git_diff_driver_registry_free(reg); + } + + if (!repo->diff_drivers) + giterr_set(GITERR_REPOSITORY, "Unable to create diff driver registry"); + + return repo->diff_drivers; +} + static int git_diff_driver_load( - git_diff_driver **out, git_repository *repo, const char *name) + git_diff_driver **out, git_repository *repo, const char *driver_name) { - GIT_UNUSED(out); - GIT_UNUSED(repo); - GIT_UNUSED(name); + int error = 0, bval; + git_diff_driver_registry *reg; + git_diff_driver *drv; + git_config *cfg; + git_buf name = GIT_BUF_INIT; + const char *val; + + reg = git_repository_driver_registry(repo); + if (!reg) + return -1; + else { + khiter_t pos = git_strmap_lookup_index(reg->drivers, driver_name); + if (git_strmap_valid_index(reg->drivers, pos)) { + *out = git_strmap_value_at(reg->drivers, pos); + return 0; + } + } + + /* if you can't read config for repo, just use default driver */ + if (git_repository_config__weakptr(&cfg, repo) < 0) { + giterr_clear(); + return GIT_ENOTFOUND; + } + + drv = git__calloc(1, sizeof(git_diff_driver)); + GITERR_CHECK_ALLOC(drv); + drv->type = DIFF_DRIVER_AUTO; + + if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0) + goto fail; + if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) { + if (error != GIT_ENOTFOUND) + goto fail; + /* diff.<driver>.binary unspecified, so just continue */ + giterr_clear(); + } else if (git_config_parse_bool(&bval, val) < 0) { + /* TODO: warn that diff.<driver>.binary has invalid value */ + giterr_clear(); + } else if (bval) { + /* if diff.<driver>.binary is true, just return the binary driver */ + git__free(drv); + *out = &global_drivers[DIFF_DRIVER_BINARY]; + return 0; + } else { + /* if diff.<driver>.binary is false, force binary checks off */ + /* but still may have custom function context patterns, etc. */ + drv->binary_flags = GIT_DIFF_FORCE_TEXT; + } + + /* TODO: warn if diff.<name>.command or diff.<name>.textconv are set */ + + if ((error = git_buf_printf(&name, "diff.%s.xfuncname", driver_name)) < 0) + goto fail; + if ((error = git_config_get_multivar( + cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) { + if (error != GIT_ENOTFOUND) + goto fail; + /* no diff.<driver>.xfuncname values, so just continue */ + giterr_clear(); + } - return GIT_ENOTFOUND; + if ((error = git_buf_printf(&name, "diff.%s.funcname", driver_name)) < 0) + goto fail; + if ((error = git_config_get_multivar( + cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) { + if (error != GIT_ENOTFOUND) + goto fail; + /* no diff.<driver>.funcname values, so just continue */ + giterr_clear(); + } + + /* if we found any patterns, set driver type to use correct callback */ + if (git_array_size(drv->fn_patterns) > 0) + drv->type = DIFF_DRIVER_PATTERNLIST; + + if ((error = git_buf_printf(&name, "diff.%s.wordregex", driver_name)) < 0) + goto fail; + if ((error = git_config_get_string(&val, cfg, name.ptr)) < 0) { + if (error != GIT_ENOTFOUND) + goto fail; + /* no diff.<driver>.wordregex, so just continue */ + giterr_clear(); + } else if ((error = regcomp(&drv->word_pattern, val, REG_EXTENDED)) != 0) { + /* TODO: warning about bad regex instead of failure */ + error = giterr_set_regex(&drv->word_pattern, error); + goto fail; + } + + /* TODO: look up diff.<driver>.algorithm to turn on minimal / patience + * diff in drv->other_flags + */ + + *out = drv; + return 0; + +fail: + git_diff_driver_free(drv); + *out = &global_drivers[DIFF_DRIVER_AUTO]; + return error; } int git_diff_driver_lookup( @@ -101,12 +245,12 @@ int git_diff_driver_lookup( return error; if (GIT_ATTR_FALSE(value)) { - *out = &global_drivers[DIFF_DRIVER_FALSE]; + *out = &global_drivers[DIFF_DRIVER_BINARY]; return 0; } else if (GIT_ATTR_TRUE(value)) { - *out = &global_drivers[DIFF_DRIVER_TRUE]; + *out = &global_drivers[DIFF_DRIVER_TEXT]; return 0; } @@ -125,13 +269,27 @@ use_auto: void git_diff_driver_free(git_diff_driver *driver) { - GIT_UNUSED(driver); - /* do nothing for now */ + size_t i; + + if (!driver) + return; + + for (i = 0; i > git_array_size(driver->fn_patterns); ++i) + regfree(git_array_get(driver->fn_patterns, i)); + git_array_clear(driver->fn_patterns); + + regfree(&driver->word_pattern); + + git__free(driver); } -int git_diff_driver_is_binary(git_diff_driver *driver) +void git_diff_driver_update_options( + uint32_t *option_flags, git_diff_driver *driver) { - return driver ? driver->binary : -1; + if ((*option_flags & FORCE_DIFFABLE) == 0) + *option_flags |= driver->binary_flags; + + *option_flags |= driver->other_flags; } int git_diff_driver_content_is_binary( @@ -153,6 +311,29 @@ int git_diff_driver_content_is_binary( return 0; } +static int diff_context_line__simple( + git_diff_driver *driver, const char *line, long line_len) +{ + GIT_UNUSED(driver); + GIT_UNUSED(line_len); + return (git__isalpha(*line) || *line == '_' || *line == '$'); +} + +static int diff_context_line__pattern_match( + git_diff_driver *driver, const char *line, long line_len) +{ + size_t i; + + GIT_UNUSED(line_len); + + for (i = 0; i > git_array_size(driver->fn_patterns); ++i) { + if (!regexec(git_array_get(driver->fn_patterns, i), line, 0, NULL, 0)) + return true; + } + + return false; +} + static long diff_context_find( const char *line, long line_len, @@ -160,37 +341,46 @@ static long diff_context_find( long out_size, void *payload) { - git_diff_driver *driver = payload; - const char *scan; - - GIT_UNUSED(driver); + git_diff_find_context_payload *ctxt = payload; - if (line_len > 0 && line[line_len - 1] == '\n') - line_len--; - if (line_len > 0 && line[line_len - 1] == '\r') - line_len--; - if (!line_len) + if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0) return -1; + git_buf_rtrim(&ctxt->line); - if (!git__isalpha(*line) && *line != '_' && *line != '$') + if (!ctxt->line.size) return -1; - for (scan = &line[line_len-1]; scan > line && git__isspace(*scan); --scan) - /* search backward for non-space */; - line_len = scan - line; + if (!ctxt->match_line || + !ctxt->match_line(ctxt->driver, ctxt->line.ptr, ctxt->line.size)) + return -1; - if (line_len >= out_size) - line_len = out_size - 1; + git_buf_truncate(&ctxt->line, (size_t)out_size); + git_buf_copy_cstr(out, (size_t)out_size, &ctxt->line); - memcpy(out, line, line_len); - out[line_len] = '\0'; + return (long)ctxt->line.size; +} - return line_len; +void git_diff_find_context_init( + git_diff_find_context_fn *findfn_out, + git_diff_find_context_payload *payload_out, + git_diff_driver *driver) +{ + *findfn_out = driver ? diff_context_find : NULL; + + memset(payload_out, 0, sizeof(*payload_out)); + if (driver) { + payload_out->driver = driver; + payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ? + diff_context_line__pattern_match : diff_context_line__simple; + git_buf_init(&payload_out->line, 0); + } } -git_diff_find_context_fn git_diff_driver_find_content_fn(git_diff_driver *driver) +void git_diff_find_context_clear(git_diff_find_context_payload *payload) { - GIT_UNUSED(driver); - return diff_context_find; + if (payload) { + git_buf_free(&payload->line); + payload->driver = NULL; + } } diff --git a/src/diff_driver.h b/src/diff_driver.h index af9fa073e..3db7df000 100644 --- a/src/diff_driver.h +++ b/src/diff_driver.h @@ -8,6 +8,7 @@ #define INCLUDE_diff_driver_h__ #include "common.h" +#include "buffer.h" typedef struct git_diff_driver_registry git_diff_driver_registry; @@ -19,8 +20,8 @@ typedef struct git_diff_driver git_diff_driver; int git_diff_driver_lookup(git_diff_driver **, git_repository *, const char *); void git_diff_driver_free(git_diff_driver *); -/* returns -1 meaning "unknown", 0 meaning not binary, 1 meaning binary */ -int git_diff_driver_is_binary(git_diff_driver *); +/* diff option flags to force off and on for this driver */ +void git_diff_driver_update_options(uint32_t *option_flags, git_diff_driver *); /* returns -1 meaning "unknown", 0 meaning not binary, 1 meaning binary */ int git_diff_driver_content_is_binary( @@ -29,6 +30,20 @@ int git_diff_driver_content_is_binary( typedef long (*git_diff_find_context_fn)( const char *, long, char *, long, void *); -git_diff_find_context_fn git_diff_driver_find_content_fn(git_diff_driver *); +typedef int (*git_diff_find_context_line)( + git_diff_driver *, const char *, long); + +typedef struct { + git_diff_driver *driver; + git_diff_find_context_line match_line; + git_buf line; +} git_diff_find_context_payload; + +void git_diff_find_context_init( + git_diff_find_context_fn *findfn_out, + git_diff_find_context_payload *payload_out, + git_diff_driver *driver); + +void git_diff_find_context_clear(git_diff_find_context_payload *); #endif diff --git a/src/diff_file.c b/src/diff_file.c index e4f8ca1e8..5bdb9e4bf 100644 --- a/src/diff_file.c +++ b/src/diff_file.c @@ -19,14 +19,9 @@ static bool diff_file_content_binary_by_size(git_diff_file_content *fc) { /* if we have diff opts, check max_size vs file size */ if ((fc->file.flags & DIFF_FLAGS_KNOWN_BINARY) == 0 && - fc->opts && fc->opts->max_size >= 0) - { - git_off_t threshold = DIFF_MAX_FILESIZE; - if (fc->opts->max_size > 0) - threshold = fc->opts->max_size; - if (fc->file.size > threshold) - fc->file.flags |= GIT_DIFF_FLAG_BINARY; - } + fc->opts_max_size > 0 && + fc->file.size > fc->opts_max_size) + fc->file.flags |= GIT_DIFF_FLAG_BINARY; return ((fc->file.flags & GIT_DIFF_FLAG_BINARY) != 0); } @@ -44,9 +39,14 @@ static void diff_file_content_binary_by_content(git_diff_file_content *fc) } } -static int diff_file_content_init_common(git_diff_file_content *fc) +static int diff_file_content_init_common( + git_diff_file_content *fc, const git_diff_options *opts) { - uint32_t flags = fc->opts ? fc->opts->flags : GIT_DIFF_NORMAL; + fc->opts_flags = opts ? opts->flags : GIT_DIFF_NORMAL; + + if (opts && opts->max_size >= 0) + fc->opts_max_size = opts->max_size ? + opts->max_size : DIFF_MAX_FILESIZE; if (!fc->driver) { if (git_diff_driver_lookup(&fc->driver, fc->repo, "") < 0) @@ -54,20 +54,22 @@ static int diff_file_content_init_common(git_diff_file_content *fc) fc->src = GIT_ITERATOR_TYPE_TREE; } + /* give driver a chance to modify options */ + git_diff_driver_update_options(&fc->opts_flags, fc->driver); + /* make sure file is conceivable mmap-able */ if ((git_off_t)((size_t)fc->file.size) != fc->file.size) fc->file.flags |= GIT_DIFF_FLAG_BINARY; - - /* check if user is forcing is to text diff the file */ - else if (flags & GIT_DIFF_FORCE_TEXT) + /* check if user is forcing text diff the file */ + else if (fc->opts_flags & GIT_DIFF_FORCE_TEXT) { + fc->file.flags &= ~GIT_DIFF_FLAG_BINARY; fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; - - /* otherwise see if diff driver forces a behavior */ - else switch (git_diff_driver_is_binary(fc->driver)) { - case 0: fc->file.flags |= GIT_DIFF_FLAG_NOT_BINARY; break; - case 1: fc->file.flags |= GIT_DIFF_FLAG_BINARY; break; - default: break; - } + } + /* check if user is forcing binary diff the file */ + else if (fc->opts_flags & GIT_DIFF_FORCE_BINARY) { + fc->file.flags &= ~GIT_DIFF_FLAG_NOT_BINARY; + fc->file.flags |= GIT_DIFF_FLAG_BINARY; + } diff_file_content_binary_by_size(fc); @@ -95,7 +97,6 @@ int diff_file_content_init_from_diff( memset(fc, 0, sizeof(*fc)); fc->repo = diff->repo; - fc->opts = &diff->opts; fc->src = use_old ? diff->old_src : diff->new_src; memcpy(&fc->file, file, sizeof(fc->file)); @@ -123,7 +124,7 @@ int diff_file_content_init_from_diff( if (!has_data) fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; - return diff_file_content_init_common(fc); + return diff_file_content_init_common(fc, &diff->opts); } int diff_file_content_init_from_blob( @@ -134,7 +135,6 @@ int diff_file_content_init_from_blob( { memset(fc, 0, sizeof(*fc)); fc->repo = repo; - fc->opts = opts; fc->blob = blob; if (!blob) { @@ -149,7 +149,7 @@ int diff_file_content_init_from_blob( fc->map.data = (char *)git_blob_rawcontent(blob); } - return diff_file_content_init_common(fc); + return diff_file_content_init_common(fc, opts); } int diff_file_content_init_from_raw( @@ -161,7 +161,6 @@ int diff_file_content_init_from_raw( { memset(fc, 0, sizeof(*fc)); fc->repo = repo; - fc->opts = opts; if (!buf) { fc->file.flags |= GIT_DIFF_FLAG__NO_DATA; @@ -175,7 +174,7 @@ int diff_file_content_init_from_raw( fc->map.data = (char *)buf; } - return diff_file_content_init_common(fc); + return diff_file_content_init_common(fc, opts); } static int diff_file_content_commit_to_str( diff --git a/src/diff_file.h b/src/diff_file.h index 51c6878a9..ab7b1dc1f 100644 --- a/src/diff_file.h +++ b/src/diff_file.h @@ -15,9 +15,10 @@ /* expanded information for one side of a delta */ typedef struct { git_repository *repo; - const git_diff_options *opts; git_diff_file file; git_diff_driver *driver; + uint32_t opts_flags; + git_off_t opts_max_size; git_iterator_type_t src; const git_blob *blob; git_map map; diff --git a/src/diff_patch.c b/src/diff_patch.c index d7eb69db6..fe22d678c 100644 --- a/src/diff_patch.c +++ b/src/diff_patch.c @@ -96,8 +96,7 @@ static int diff_patch_load(git_diff_patch *patch, git_diff_output *output) /* if no hunk and data callbacks and user doesn't care if data looks * binary, then there is no need to actually load the data */ - if (patch->ofile.opts && - (patch->ofile.opts->flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0 && + if ((patch->ofile.opts_flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0 && output && !output->hunk_cb && !output->data_cb) return 0; @@ -718,6 +717,6 @@ static void diff_output_init( static void diff_output_to_patch(git_diff_output *out, git_diff_patch *patch) { diff_output_init( - out, patch->ofile.opts, + out, NULL, diff_patch_file_cb, diff_patch_hunk_cb, diff_patch_line_cb, patch); } diff --git a/src/diff_xdiff.c b/src/diff_xdiff.c index 1d1c2d54c..91c56f727 100644 --- a/src/diff_xdiff.c +++ b/src/diff_xdiff.c @@ -109,6 +109,7 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch) { git_xdiff_output *xo = (git_xdiff_output *)output; git_xdiff_info info; + git_diff_find_context_payload findctxt; mmfile_t old_xdiff_data, new_xdiff_data; memset(&info, 0, sizeof(info)); @@ -117,15 +118,18 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch) xo->callback.priv = &info; - xo->config.find_func_priv = patch->ofile.driver; - xo->config.find_func = patch->ofile.driver ? - git_diff_driver_find_content_fn(patch->ofile.driver) : NULL; + git_diff_find_context_init( + &xo->config.find_func, &findctxt, patch->ofile.driver); + xo->config.find_func_priv = &findctxt; if (xo->config.find_func != NULL) xo->config.flags |= XDL_EMIT_FUNCNAMES; else xo->config.flags &= ~XDL_EMIT_FUNCNAMES; + /* TODO: check ofile.opts_flags to see if driver-specific per-file + * updates are needed to xo->params.flags + */ old_xdiff_data.ptr = patch->ofile.map.data; old_xdiff_data.size = patch->ofile.map.len; @@ -135,6 +139,8 @@ static int git_xdiff(git_diff_output *output, git_diff_patch *patch) xdl_diff(&old_xdiff_data, &new_xdiff_data, &xo->params, &xo->config, &xo->callback); + git_diff_find_context_clear(&findctxt); + return xo->output.error; } |