diff options
author | Russell Belfer <rb@github.com> | 2012-08-30 14:24:16 -0700 |
---|---|---|
committer | Russell Belfer <rb@github.com> | 2012-09-05 15:17:24 -0700 |
commit | f335ecd6e126aa9dea28786522c0e6ce71596e91 (patch) | |
tree | 3393df3457e423ddc1aaad4d89fb29184246f3a8 /src | |
parent | 4d3834038bd0aaef63d62c54900f6ddafec09515 (diff) | |
download | libgit2-f335ecd6e126aa9dea28786522c0e6ce71596e91.tar.gz |
Diff iterators
This refactors the diff output code so that an iterator object
can be used to traverse and generate the diffs, instead of just
the `foreach()` style with callbacks. The code has been rearranged
so that the two styles can still share most functions.
This also replaces `GIT_REVWALKOVER` with `GIT_ITEROVER` and uses
that as a common error code for marking the end of iteration when
using a iterator style of object.
Diffstat (limited to 'src')
-rw-r--r-- | src/attr.c | 1 | ||||
-rw-r--r-- | src/config_file.c | 1 | ||||
-rw-r--r-- | src/diff.c | 14 | ||||
-rw-r--r-- | src/diff.h | 1 | ||||
-rw-r--r-- | src/diff_output.c | 1016 | ||||
-rw-r--r-- | src/fetch.c | 2 | ||||
-rw-r--r-- | src/pool.h | 11 | ||||
-rw-r--r-- | src/refs.c | 1 | ||||
-rw-r--r-- | src/revparse.c | 2 | ||||
-rw-r--r-- | src/revwalk.c | 30 | ||||
-rw-r--r-- | src/status.c | 3 | ||||
-rw-r--r-- | src/submodule.c | 1 |
12 files changed, 809 insertions, 274 deletions
diff --git a/src/attr.c b/src/attr.c index 993220667..68f8d7de6 100644 --- a/src/attr.c +++ b/src/attr.c @@ -188,6 +188,7 @@ int git_attr_foreach( error = callback(assign->name, assign->value, payload); if (error) { + giterr_clear(); error = GIT_EUSER; goto cleanup; } diff --git a/src/config_file.c b/src/config_file.c index d3fb56aaa..c575649af 100644 --- a/src/config_file.c +++ b/src/config_file.c @@ -221,6 +221,7 @@ static int file_foreach( /* abort iterator on non-zero return value */ if (fn(key, var->value, data)) { + giterr_clear(); result = GIT_EUSER; goto cleanup; } diff --git a/src/diff.c b/src/diff.c index 430f52e0a..f8a01086c 100644 --- a/src/diff.c +++ b/src/diff.c @@ -316,6 +316,7 @@ static git_diff_list *git_diff_list_alloc( if (diff == NULL) return NULL; + GIT_REFCOUNT_INC(diff); diff->repo = repo; if (git_vector_init(&diff->deltas, 0, diff_delta__cmp) < 0 || @@ -391,15 +392,12 @@ fail: return NULL; } -void git_diff_list_free(git_diff_list *diff) +static void diff_list_free(git_diff_list *diff) { git_diff_delta *delta; git_attr_fnmatch *match; unsigned int i; - if (!diff) - return; - git_vector_foreach(&diff->deltas, i, delta) { git__free(delta); diff->deltas.contents[i] = NULL; @@ -416,6 +414,14 @@ void git_diff_list_free(git_diff_list *diff) git__free(diff); } +void git_diff_list_free(git_diff_list *diff) +{ + if (!diff) + return; + + GIT_REFCOUNT_DEC(diff, diff_list_free); +} + static int oid_for_workdir_item( git_repository *repo, const git_index_entry *item, diff --git a/src/diff.h b/src/diff.h index 6cc854fbd..2785fa425 100644 --- a/src/diff.h +++ b/src/diff.h @@ -26,6 +26,7 @@ enum { }; struct git_diff_list { + git_refcount rc; git_repository *repo; git_diff_options opts; git_vector pathspec; diff --git a/src/diff_output.c b/src/diff_output.c index 1f2c7233f..69921741c 100644 --- a/src/diff_output.c +++ b/src/diff_output.c @@ -16,16 +16,35 @@ #include "fileops.h" #include "filter.h" +/* + * A diff_delta_context represents all of the information that goes into + * processing the diff of an observed file change. In the case of the + * git_diff_foreach() call it is an emphemeral structure that is filled + * in to execute each diff. In the case of a git_diff_iterator, it holds + * most of the information for the diff in progress. + */ typedef struct { - git_diff_list *diff; - void *cb_data; - git_diff_hunk_fn hunk_cb; - git_diff_data_fn line_cb; - unsigned int index; + git_repository *repo; + git_diff_options *opts; + xdemitconf_t xdiff_config; + xpparam_t xdiff_params; git_diff_delta *delta; + uint32_t prepped : 1; + uint32_t loaded : 1; + uint32_t diffable : 1; + uint32_t diffed : 1; + git_iterator_type_t old_src; + git_iterator_type_t new_src; + git_blob *old_blob; + git_blob *new_blob; + git_map old_data; + git_map new_data; + void *cb_data; + git_diff_hunk_fn per_hunk; + git_diff_data_fn per_line; + int cb_error; git_diff_range range; - int error; -} diff_output_info; +} diff_delta_context; static int read_next_int(const char **str, int *value) { @@ -41,71 +60,89 @@ static int read_next_int(const char **str, int *value) return (digits > 0) ? 0 : -1; } -static int diff_output_cb(void *priv, mmbuffer_t *bufs, int len) +static int parse_hunk_header(git_diff_range *range, const char *header) { - diff_output_info *info = priv; - - if (len == 1 && info->hunk_cb) { - git_diff_range range = { -1, 0, -1, 0 }; - const char *scan = bufs[0].ptr; + /* expect something of the form "@@ -%d[,%d] +%d[,%d] @@" */ + if (*header != '@') + return -1; + if (read_next_int(&header, &range->old_start) < 0) + return -1; + if (*header == ',') { + if (read_next_int(&header, &range->old_lines) < 0) + return -1; + } else + range->old_lines = 1; + if (read_next_int(&header, &range->new_start) < 0) + return -1; + if (*header == ',') { + if (read_next_int(&header, &range->new_lines) < 0) + return -1; + } else + range->new_lines = 1; + if (range->old_start < 0 || range->new_start < 0) + return -1; - /* expect something of the form "@@ -%d[,%d] +%d[,%d] @@" */ - if (*scan != '@') - info->error = -1; - else if (read_next_int(&scan, &range.old_start) < 0) - info->error = -1; - else if (*scan == ',' && read_next_int(&scan, &range.old_lines) < 0) - info->error = -1; - else if (read_next_int(&scan, &range.new_start) < 0) - info->error = -1; - else if (*scan == ',' && read_next_int(&scan, &range.new_lines) < 0) - info->error = -1; - else if (range.old_start < 0 || range.new_start < 0) - info->error = -1; - else { - memcpy(&info->range, &range, sizeof(git_diff_range)); + return 0; +} - if (info->hunk_cb( - info->cb_data, info->delta, &range, bufs[0].ptr, bufs[0].size)) - info->error = GIT_EUSER; - } +static int format_hunk_header(char *header, size_t len, git_diff_range *range) +{ + if (range->old_lines != 1) { + if (range->new_lines != 1) + return snprintf( + header, len, "@@ -%d,%d +%d,%d @@", + range->old_start, range->old_lines, + range->new_start, range->new_lines); + else + return snprintf( + header, len, "@@ -%d,%d +%d @@", + range->old_start, range->old_lines, range->new_start); + } else { + if (range->new_lines != 1) + return snprintf( + header, len, "@@ -%d +%d,%d @@", + range->old_start, range->new_start, range->new_lines); + else + return snprintf( + header, len, "@@ -%d +%d @@", + range->old_start, range->new_start); } +} - if ((len == 2 || len == 3) && info->line_cb) { - int origin; - - /* expect " "/"-"/"+", then data, then maybe newline */ - origin = - (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_ADDITION : - (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_DELETION : - GIT_DIFF_LINE_CONTEXT; +static bool diff_delta_is_ambiguous(git_diff_delta *delta) +{ + return (git_oid_iszero(&delta->new_file.oid) && + (delta->new_file.flags & GIT_DIFF_FILE_VALID_OID) == 0 && + delta->status == GIT_DELTA_MODIFIED); +} - if (info->line_cb( - info->cb_data, info->delta, &info->range, origin, bufs[1].ptr, bufs[1].size)) - info->error = GIT_EUSER; +static bool diff_delta_should_skip(git_diff_options *opts, git_diff_delta *delta) +{ + if (delta->status == GIT_DELTA_UNMODIFIED && + (opts->flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) + return true; - /* This should only happen if we are adding a line that does not - * have a newline at the end and the old code did. In that case, - * we have a ADD with a DEL_EOFNL as a pair. - */ - else if (len == 3) { - origin = (origin == GIT_DIFF_LINE_ADDITION) ? - GIT_DIFF_LINE_DEL_EOFNL : GIT_DIFF_LINE_ADD_EOFNL; + if (delta->status == GIT_DELTA_IGNORED && + (opts->flags & GIT_DIFF_INCLUDE_IGNORED) == 0) + return true; - if (info->line_cb( - info->cb_data, info->delta, &info->range, origin, bufs[2].ptr, bufs[2].size)) - info->error = GIT_EUSER; - } - } + if (delta->status == GIT_DELTA_UNTRACKED && + (opts->flags & GIT_DIFF_INCLUDE_UNTRACKED) == 0) + return true; - return info->error; + return false; } #define BINARY_DIFF_FLAGS (GIT_DIFF_FILE_BINARY|GIT_DIFF_FILE_NOT_BINARY) -static int update_file_is_binary_by_attr(git_repository *repo, git_diff_file *file) +static int update_file_is_binary_by_attr( + git_repository *repo, git_diff_file *file) { const char *value; + + if (!repo) + return 0; + if (git_attr_get(&value, repo, 0, file->path, "diff") < 0) return -1; @@ -129,11 +166,10 @@ static void update_delta_is_binary(git_diff_delta *delta) /* otherwise leave delta->binary value untouched */ } -static int file_is_binary_by_attr( - git_diff_list *diff, - git_diff_delta *delta) +static int diff_delta_is_binary_by_attr(diff_delta_context *ctxt) { int error = 0, mirror_new; + git_diff_delta *delta = ctxt->delta; delta->binary = -1; @@ -148,7 +184,7 @@ static int file_is_binary_by_attr( } /* check if user is forcing us to text diff these files */ - if (diff->opts.flags & GIT_DIFF_FORCE_TEXT) { + if (ctxt->opts->flags & GIT_DIFF_FORCE_TEXT) { delta->old_file.flags |= GIT_DIFF_FILE_NOT_BINARY; delta->new_file.flags |= GIT_DIFF_FILE_NOT_BINARY; delta->binary = 0; @@ -156,7 +192,7 @@ static int file_is_binary_by_attr( } /* check diff attribute +, -, or 0 */ - if (update_file_is_binary_by_attr(diff->repo, &delta->old_file) < 0) + if (update_file_is_binary_by_attr(ctxt->repo, &delta->old_file) < 0) return -1; mirror_new = (delta->new_file.path == delta->old_file.path || @@ -164,23 +200,21 @@ static int file_is_binary_by_attr( if (mirror_new) delta->new_file.flags |= (delta->old_file.flags & BINARY_DIFF_FLAGS); else - error = update_file_is_binary_by_attr(diff->repo, &delta->new_file); + error = update_file_is_binary_by_attr(ctxt->repo, &delta->new_file); update_delta_is_binary(delta); return error; } -static int file_is_binary_by_content( - git_diff_delta *delta, - git_map *old_data, - git_map *new_data) +static int diff_delta_is_binary_by_content(diff_delta_context *ctxt) { + git_diff_delta *delta = ctxt->delta; git_buf search; if ((delta->old_file.flags & BINARY_DIFF_FLAGS) == 0) { - search.ptr = old_data->data; - search.size = min(old_data->len, 4000); + search.ptr = ctxt->old_data.data; + search.size = min(ctxt->old_data.len, 4000); if (git_buf_is_binary(&search)) delta->old_file.flags |= GIT_DIFF_FILE_BINARY; @@ -189,8 +223,8 @@ static int file_is_binary_by_content( } if ((delta->new_file.flags & BINARY_DIFF_FLAGS) == 0) { - search.ptr = new_data->data; - search.size = min(new_data->len, 4000); + search.ptr = ctxt->new_data.data; + search.size = min(ctxt->new_data.len, 4000); if (git_buf_is_binary(&search)) delta->new_file.flags |= GIT_DIFF_FILE_BINARY; @@ -256,16 +290,21 @@ static int get_workdir_content( return -1; if (S_ISLNK(file->mode)) { - ssize_t read_len; + ssize_t alloc_len, read_len; file->flags |= GIT_DIFF_FILE_FREE_DATA; file->flags |= GIT_DIFF_FILE_BINARY; - map->data = git__malloc((size_t)file->size + 1); + /* link path on disk could be UTF-16, so prepare a buffer that is + * big enough to handle some UTF-8 data expansion + */ + alloc_len = (ssize_t)(file->size * 2) + 1; + + map->data = git__malloc(alloc_len); GITERR_CHECK_ALLOC(map->data); - read_len = p_readlink(path.ptr, map->data, (size_t)file->size + 1); - if (read_len != (ssize_t)file->size) { + read_len = p_readlink(path.ptr, map->data, (int)alloc_len); + if (read_len < 0) { giterr_set(GITERR_OS, "Failed to read symlink '%s'", file->path); error = -1; } else @@ -286,189 +325,304 @@ static void release_content(git_diff_file *file, git_map *map, git_blob *blob) if (file->flags & GIT_DIFF_FILE_FREE_DATA) { git__free(map->data); - map->data = NULL; + map->data = ""; + map->len = 0; file->flags &= ~GIT_DIFF_FILE_FREE_DATA; } else if (file->flags & GIT_DIFF_FILE_UNMAP_DATA) { git_futils_mmap_free(map); - map->data = NULL; + map->data = ""; + map->len = 0; file->flags &= ~GIT_DIFF_FILE_UNMAP_DATA; } } -static void fill_map_from_mmfile(git_map *dst, mmfile_t *src) { - assert(dst && src); +static void diff_delta_init_context( + diff_delta_context *ctxt, + git_repository *repo, + git_diff_options *opts, + git_iterator_type_t old_src, + git_iterator_type_t new_src) +{ + memset(ctxt, 0, sizeof(diff_delta_context)); + + ctxt->repo = repo; + ctxt->opts = opts; + ctxt->old_src = old_src; + ctxt->new_src = new_src; - dst->data = src->ptr; - dst->len = src->size; -#ifdef GIT_WIN32 - dst->fmh = NULL; -#endif + setup_xdiff_options(opts, &ctxt->xdiff_config, &ctxt->xdiff_params); } -int git_diff_foreach( - git_diff_list *diff, - void *data, - git_diff_file_fn file_cb, - git_diff_hunk_fn hunk_cb, - git_diff_data_fn line_cb) +static void diff_delta_init_context_from_diff_list( + diff_delta_context *ctxt, + git_diff_list *diff) { - int error = 0; - diff_output_info info; - git_diff_delta *delta; - xpparam_t xdiff_params; - xdemitconf_t xdiff_config; - xdemitcb_t xdiff_callback; + diff_delta_init_context( + ctxt, diff->repo, &diff->opts, diff->old_src, diff->new_src); +} - memset(&info, 0, sizeof(info)); - info.diff = diff; - info.cb_data = data; - info.hunk_cb = hunk_cb; - info.line_cb = line_cb; +static void diff_delta_unload(diff_delta_context *ctxt) +{ + ctxt->diffed = 0; - setup_xdiff_options(&diff->opts, &xdiff_config, &xdiff_params); - memset(&xdiff_callback, 0, sizeof(xdiff_callback)); - xdiff_callback.outf = diff_output_cb; - xdiff_callback.priv = &info; + if (ctxt->loaded) { + release_content(&ctxt->delta->old_file, &ctxt->old_data, ctxt->old_blob); + release_content(&ctxt->delta->new_file, &ctxt->new_data, ctxt->new_blob); + ctxt->loaded = 0; + } - git_vector_foreach(&diff->deltas, info.index, delta) { - git_blob *old_blob = NULL, *new_blob = NULL; - git_map old_data, new_data; - mmfile_t old_xdiff_data, new_xdiff_data; + ctxt->delta = NULL; + ctxt->prepped = 0; +} - if (delta->status == GIT_DELTA_UNMODIFIED && - (diff->opts.flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) - continue; +static int diff_delta_prep(diff_delta_context *ctxt) +{ + int error; - if (delta->status == GIT_DELTA_IGNORED && - (diff->opts.flags & GIT_DIFF_INCLUDE_IGNORED) == 0) - continue; + if (ctxt->prepped || !ctxt->delta) + return 0; - if (delta->status == GIT_DELTA_UNTRACKED && - (diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED) == 0) - continue; + error = diff_delta_is_binary_by_attr(ctxt); - if ((error = file_is_binary_by_attr(diff, delta)) < 0) - goto cleanup; + ctxt->prepped = !error; - old_data.data = ""; - old_data.len = 0; - new_data.data = ""; - new_data.len = 0; + return error; +} - /* TODO: Partial blob reading to defer loading whole blob. - * I.e. I want a blob with just the first 4kb loaded, then - * later on I will read the rest of the blob if needed. - */ +static int diff_delta_load(diff_delta_context *ctxt) +{ + int error = 0; + git_diff_delta *delta = ctxt->delta; - /* map files */ - if (delta->binary != 1 && - (hunk_cb || line_cb || git_oid_iszero(&delta->old_file.oid)) && - (delta->status == GIT_DELTA_DELETED || - delta->status == GIT_DELTA_MODIFIED)) - { - if (diff->old_src == GIT_ITERATOR_WORKDIR) - error = get_workdir_content(diff->repo, &delta->old_file, &old_data); - else - error = get_blob_content( - diff->repo, &delta->old_file.oid, &old_data, &old_blob); + if (ctxt->loaded || !ctxt->delta) + return 0; - if (error < 0) - goto cleanup; + if (!ctxt->prepped && (error = diff_delta_prep(ctxt)) < 0) + goto cleanup; + + ctxt->old_data.data = ""; + ctxt->old_data.len = 0; + ctxt->old_blob = NULL; + + if (!error && delta->binary != 1 && + (delta->status == GIT_DELTA_DELETED || + delta->status == GIT_DELTA_MODIFIED)) + { + if (ctxt->old_src == GIT_ITERATOR_WORKDIR) + error = get_workdir_content( + ctxt->repo, &delta->old_file, &ctxt->old_data); + else { + error = get_blob_content( + ctxt->repo, &delta->old_file.oid, + &ctxt->old_data, &ctxt->old_blob); + + if (ctxt->new_src == GIT_ITERATOR_WORKDIR) { + /* TODO: convert crlf of blob content */ + } } + } - if (delta->binary != 1 && - (hunk_cb || line_cb || git_oid_iszero(&delta->new_file.oid)) && - (delta->status == GIT_DELTA_ADDED || - delta->status == GIT_DELTA_MODIFIED)) - { - if (diff->new_src == GIT_ITERATOR_WORKDIR) - error = get_workdir_content(diff->repo, &delta->new_file, &new_data); - else - error = get_blob_content( - diff->repo, &delta->new_file.oid, &new_data, &new_blob); + ctxt->new_data.data = ""; + ctxt->new_data.len = 0; + ctxt->new_blob = NULL; + + if (!error && delta->binary != 1 && + (delta->status == GIT_DELTA_ADDED || + delta->status == GIT_DELTA_MODIFIED)) + { + if (ctxt->new_src == GIT_ITERATOR_WORKDIR) + error = get_workdir_content( + ctxt->repo, &delta->new_file, &ctxt->new_data); + else { + error = get_blob_content( + ctxt->repo, &delta->new_file.oid, + &ctxt->new_data, &ctxt->new_blob); + if (ctxt->old_src == GIT_ITERATOR_WORKDIR) { + /* TODO: convert crlf of blob content */ + } + } + + if (!error && !(delta->new_file.flags & GIT_DIFF_FILE_VALID_OID)) { + error = git_odb_hash( + &delta->new_file.oid, ctxt->new_data.data, + ctxt->new_data.len, GIT_OBJ_BLOB); if (error < 0) goto cleanup; - if ((delta->new_file.flags & GIT_DIFF_FILE_VALID_OID) == 0) { - error = git_odb_hash( - &delta->new_file.oid, new_data.data, new_data.len, GIT_OBJ_BLOB); + delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID; - if (error < 0) + /* since we did not have the definitive oid, we may have + * incorrect status and need to skip this item. + */ + if (delta->old_file.mode == delta->new_file.mode && + !git_oid_cmp(&delta->old_file.oid, &delta->new_file.oid)) + { + delta->status = GIT_DELTA_UNMODIFIED; + + if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) goto cleanup; - delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID; - - /* since we did not have the definitive oid, we may have - * incorrect status and need to skip this item. - */ - if (delta->old_file.mode == delta->new_file.mode && - !git_oid_cmp(&delta->old_file.oid, &delta->new_file.oid)) - { - delta->status = GIT_DELTA_UNMODIFIED; - if ((diff->opts.flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) - goto cleanup; - } } } + } + + /* if we have not already decided whether file is binary, + * check the first 4K for nul bytes to decide... + */ + if (!error && delta->binary == -1) + error = diff_delta_is_binary_by_content(ctxt); + +cleanup: + ctxt->loaded = !error; + + /* flag if we would want to diff the contents of these files */ + if (ctxt->loaded) + ctxt->diffable = + (delta->binary != 1 && + delta->status != GIT_DELTA_UNMODIFIED && + (ctxt->old_data.len || ctxt->new_data.len) && + git_oid_cmp(&delta->old_file.oid, &delta->new_file.oid)); + + return error; +} + +static int diff_delta_cb(void *priv, mmbuffer_t *bufs, int len) +{ + diff_delta_context *ctxt = priv; + + if (len == 1) { + if ((ctxt->cb_error = parse_hunk_header(&ctxt->range, bufs[0].ptr)) < 0) + return ctxt->cb_error; + + if (ctxt->per_hunk != NULL && + ctxt->per_hunk(ctxt->cb_data, ctxt->delta, &ctxt->range, + bufs[0].ptr, bufs[0].size)) + ctxt->cb_error = GIT_EUSER; + } - /* if we have not already decided whether file is binary, - * check the first 4K for nul bytes to decide... + if (len == 2 || len == 3) { + /* expect " "/"-"/"+", then data */ + char origin = + (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_ADDITION : + (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_DELETION : + GIT_DIFF_LINE_CONTEXT; + + if (ctxt->per_line != NULL && + ctxt->per_line(ctxt->cb_data, ctxt->delta, &ctxt->range, origin, + bufs[1].ptr, bufs[1].size)) + ctxt->cb_error = GIT_EUSER; + } + + if (len == 3 && !ctxt->cb_error) { + /* This should only happen if we are adding a line that does not + * have a newline at the end and the old code did. In that case, + * we have a ADD with a DEL_EOFNL as a pair. */ - if (delta->binary == -1) { - error = file_is_binary_by_content( - delta, &old_data, &new_data); - if (error < 0) + char origin = + (*bufs[0].ptr == '+') ? GIT_DIFF_LINE_DEL_EOFNL : + (*bufs[0].ptr == '-') ? GIT_DIFF_LINE_ADD_EOFNL : + GIT_DIFF_LINE_CONTEXT; + + if (ctxt->per_line != NULL && + ctxt->per_line(ctxt->cb_data, ctxt->delta, &ctxt->range, origin, + bufs[2].ptr, bufs[2].size)) + ctxt->cb_error = GIT_EUSER; + } + + return ctxt->cb_error; +} + +static int diff_delta_exec( + diff_delta_context *ctxt, + void *cb_data, + git_diff_hunk_fn per_hunk, + git_diff_data_fn per_line) +{ + int error = 0; + xdemitcb_t xdiff_callback; + mmfile_t old_xdiff_data, new_xdiff_data; + + if (ctxt->diffed || !ctxt->delta) + return 0; + + if (!ctxt->loaded && (error = diff_delta_load(ctxt)) < 0) + goto cleanup; + + if (!ctxt->diffable) + return 0; + + ctxt->cb_data = cb_data; + ctxt->per_hunk = per_hunk; + ctxt->per_line = per_line; + ctxt->cb_error = 0; + + memset(&xdiff_callback, 0, sizeof(xdiff_callback)); + xdiff_callback.outf = diff_delta_cb; + xdiff_callback.priv = ctxt; + + old_xdiff_data.ptr = ctxt->old_data.data; + old_xdiff_data.size = ctxt->old_data.len; + new_xdiff_data.ptr = ctxt->new_data.data; + new_xdiff_data.size = ctxt->new_data.len; + + xdl_diff(&old_xdiff_data, &new_xdiff_data, + &ctxt->xdiff_params, &ctxt->xdiff_config, &xdiff_callback); + + error = ctxt->cb_error; + +cleanup: + ctxt->diffed = !error; + + return error; +} + +int git_diff_foreach( + git_diff_list *diff, + void *data, + git_diff_file_fn file_cb, + git_diff_hunk_fn hunk_cb, + git_diff_data_fn line_cb) +{ + int error = 0; + diff_delta_context ctxt; + size_t idx; + + diff_delta_init_context_from_diff_list(&ctxt, diff); + + git_vector_foreach(&diff->deltas, idx, ctxt.delta) { + if (diff_delta_is_ambiguous(ctxt.delta)) + if ((error = diff_delta_load(&ctxt)) < 0) goto cleanup; - } - /* TODO: if ignore_whitespace is set, then we *must* do text - * diffs to tell if a file has really been changed. - */ + if (diff_delta_should_skip(ctxt.opts, ctxt.delta)) + continue; + + if ((error = diff_delta_load(&ctxt)) < 0) + goto cleanup; if (file_cb != NULL && - file_cb(data, delta, (float)info.index / diff->deltas.length)) + file_cb(data, ctxt.delta, (float)idx / diff->deltas.length) != 0) { error = GIT_EUSER; goto cleanup; } - /* don't do hunk and line diffs if file is binary */ - if (delta->binary == 1) - goto cleanup; - - /* nothing to do if we did not get data */ - if (!old_data.len && !new_data.len) - goto cleanup; - - /* nothing to do if only diff was a mode change */ - if (!git_oid_cmp(&delta->old_file.oid, &delta->new_file.oid)) - goto cleanup; - - assert(hunk_cb || line_cb); - - info.delta = delta; - old_xdiff_data.ptr = old_data.data; - old_xdiff_data.size = old_data.len; - new_xdiff_data.ptr = new_data.data; - new_xdiff_data.size = new_data.len; - - xdl_diff(&old_xdiff_data, &new_xdiff_data, - &xdiff_params, &xdiff_config, &xdiff_callback); - error = info.error; + error = diff_delta_exec(&ctxt, data, hunk_cb, line_cb); cleanup: - release_content(&delta->old_file, &old_data, old_blob); - release_content(&delta->new_file, &new_data, new_blob); + diff_delta_unload(&ctxt); if (error < 0) break; } + if (error == GIT_EUSER) + giterr_clear(); + return error; } - typedef struct { git_diff_list *diff; git_diff_data_fn print_cb; @@ -531,7 +685,10 @@ static int print_compact(void *data, git_diff_delta *delta, float progress) if (pi->print_cb(pi->cb_data, delta, NULL, GIT_DIFF_LINE_FILE_HDR, git_buf_cstr(pi->buf), git_buf_len(pi->buf))) + { + giterr_clear(); return GIT_EUSER; + } return 0; } @@ -628,7 +785,10 @@ static int print_patch_file(void *data, git_diff_delta *delta, float progress) return -1; if (pi->print_cb(pi->cb_data, delta, NULL, GIT_DIFF_LINE_FILE_HDR, git_buf_cstr(pi->buf), git_buf_len(pi->buf))) + { + giterr_clear(); return GIT_EUSER; + } if (delta->binary != 1) return 0; @@ -642,7 +802,10 @@ static int print_patch_file(void *data, git_diff_delta *delta, float progress) if (pi->print_cb(pi->cb_data, delta, NULL, GIT_DIFF_LINE_BINARY, git_buf_cstr(pi->buf), git_buf_len(pi->buf))) + { + giterr_clear(); return GIT_EUSER; + } return 0; } @@ -662,7 +825,10 @@ static int print_patch_hunk( if (pi->print_cb(pi->cb_data, d, r, GIT_DIFF_LINE_HUNK_HDR, git_buf_cstr(pi->buf), git_buf_len(pi->buf))) + { + giterr_clear(); return GIT_EUSER; + } return 0; } @@ -691,7 +857,10 @@ static int print_patch_line( if (pi->print_cb(pi->cb_data, delta, range, line_origin, git_buf_cstr(pi->buf), git_buf_len(pi->buf))) + { + giterr_clear(); return GIT_EUSER; + } return 0; } @@ -726,17 +895,36 @@ int git_diff_entrycount(git_diff_list *diff, int delta_t) assert(diff); - if (delta_t < 0) - return (int)diff->deltas.length; - git_vector_foreach(&diff->deltas, i, delta) { - if (delta->status == (git_delta_t)delta_t) + if (diff_delta_should_skip(&diff->opts, delta)) + continue; + + if (delta_t < 0 || delta->status == (git_delta_t)delta_t) count++; } + /* It is possible that this has overcounted the number of diffs because + * there may be entries that are marked as MODIFIED due to differences + * in stat() output that will turn out to be the same once we calculate + * the actual SHA of the data on disk. + */ + return count; } +static void set_data_from_blob( + git_blob *blob, git_map *map, git_diff_file *file) +{ + if (blob) { + map->data = (char *)git_blob_rawcontent(blob); + file->size = map->len = git_blob_rawsize(blob); + git_oid_cpy(&file->oid, git_object_id((const git_object *)blob)); + } else { + map->data = ""; + file->size = map->len = 0; + } +} + int git_diff_blobs( git_blob *old_blob, git_blob *new_blob, @@ -746,17 +934,11 @@ int git_diff_blobs( git_diff_hunk_fn hunk_cb, git_diff_data_fn line_cb) { - diff_output_info info; + int error; + diff_delta_context ctxt; git_diff_delta delta; - mmfile_t old_data, new_data; - git_map old_map, new_map; - xpparam_t xdiff_params; - xdemitconf_t xdiff_config; - xdemitcb_t xdiff_callback; git_blob *new, *old; - memset(&delta, 0, sizeof(delta)); - new = new_blob; old = old_blob; @@ -766,25 +948,16 @@ int git_diff_blobs( new = swap; } - if (old) { - old_data.ptr = (char *)git_blob_rawcontent(old); - old_data.size = git_blob_rawsize(old); - git_oid_cpy(&delta.old_file.oid, git_object_id((const git_object *)old)); - } else { - old_data.ptr = ""; - old_data.size = 0; - } - - if (new) { - new_data.ptr = (char *)git_blob_rawcontent(new); - new_data.size = git_blob_rawsize(new); - git_oid_cpy(&delta.new_file.oid, git_object_id((const git_object *)new)); - } else { - new_data.ptr = ""; - new_data.size = 0; - } + diff_delta_init_context( + &ctxt, NULL, options, GIT_ITERATOR_TREE, GIT_ITERATOR_TREE); /* populate a "fake" delta record */ + + memset(&delta, 0, sizeof(delta)); + + set_data_from_blob(old, &ctxt.old_data, &delta.old_file); + set_data_from_blob(new, &ctxt.new_data, &delta.new_file); + delta.status = new ? (old ? GIT_DELTA_MODIFIED : GIT_DELTA_ADDED) : (old ? GIT_DELTA_DELETED : GIT_DELTA_UNTRACKED); @@ -792,39 +965,370 @@ int git_diff_blobs( if (git_oid_cmp(&delta.new_file.oid, &delta.old_file.oid) == 0) delta.status = GIT_DELTA_UNMODIFIED; - delta.old_file.size = old_data.size; - delta.new_file.size = new_data.size; + ctxt.delta = δ + + if ((error = diff_delta_prep(&ctxt)) < 0) + goto cleanup; + + if (delta.binary == -1 && + (error = diff_delta_is_binary_by_content(&ctxt)) < 0) + goto cleanup; + + ctxt.loaded = 1; + ctxt.diffable = (delta.binary != 1 && delta.status != GIT_DELTA_UNMODIFIED); + + /* do diffs */ + + if (file_cb != NULL && file_cb(cb_data, &delta, 1)) { + error = GIT_EUSER; + goto cleanup; + } + + error = diff_delta_exec(&ctxt, cb_data, hunk_cb, line_cb); + +cleanup: + if (error == GIT_EUSER) + giterr_clear(); + + diff_delta_unload(&ctxt); + + return error; +} + +typedef struct diffiter_line diffiter_line; +struct diffiter_line { + diffiter_line *next; + char origin; + const char *ptr; + size_t len; +}; + +typedef struct diffiter_hunk diffiter_hunk; +struct diffiter_hunk { + diffiter_hunk *next; + git_diff_range range; + diffiter_line *line_head; + size_t line_count; +}; + +struct git_diff_iterator { + git_diff_list *diff; + diff_delta_context ctxt; + size_t file_index; + size_t next_index; + size_t file_count; + git_pool hunks; + size_t hunk_count; + diffiter_hunk *hunk_head; + diffiter_hunk *hunk_curr; + char hunk_header[128]; + git_pool lines; + size_t line_count; + diffiter_line *line_curr; +}; + +typedef struct { + git_diff_iterator *iter; + diffiter_hunk *last_hunk; + diffiter_line *last_line; +} diffiter_cb_info; - fill_map_from_mmfile(&old_map, &old_data); - fill_map_from_mmfile(&new_map, &new_data); +static int diffiter_hunk_cb( + void *cb_data, + git_diff_delta *delta, + git_diff_range *range, + const char *header, + size_t header_len) +{ + diffiter_cb_info *info = cb_data; + git_diff_iterator *iter = info->iter; + diffiter_hunk *hunk; + + GIT_UNUSED(delta); + GIT_UNUSED(header); + GIT_UNUSED(header_len); - if (file_is_binary_by_content(&delta, &old_map, &new_map) < 0) + if ((hunk = git_pool_mallocz(&iter->hunks, 1)) == NULL) { + iter->ctxt.cb_error = -1; return -1; + } - if (file_cb != NULL && file_cb(cb_data, &delta, 1)) - return GIT_EUSER; + if (info->last_hunk) + info->last_hunk->next = hunk; + info->last_hunk = hunk; - /* don't do hunk and line diffs if the two blobs are identical */ - if (delta.status == GIT_DELTA_UNMODIFIED) - return 0; + memcpy(&hunk->range, range, sizeof(hunk->range)); + + iter->hunk_count++; + + if (iter->hunk_head == NULL) + iter->hunk_curr = iter->hunk_head = hunk; + + return 0; +} - /* don't do hunk and line diffs if file is binary */ - if (delta.binary == 1) +static int diffiter_line_cb( + void *cb_data, + git_diff_delta *delta, + git_diff_range *range, + char line_origin, + const char *content, + size_t content_len) +{ + diffiter_cb_info *info = cb_data; + git_diff_iterator *iter = info->iter; + diffiter_line *line; + + GIT_UNUSED(delta); + GIT_UNUSED(range); + + if ((line = git_pool_mallocz(&iter->lines, 1)) == NULL) { + iter->ctxt.cb_error = -1; + return -1; + } + + if (info->last_line) + info->last_line->next = line; + info->last_line = line; + + line->origin = line_origin; + line->ptr = content; + line->len = content_len; + + info->last_hunk->line_count++; + iter->line_count++; + + if (info->last_hunk->line_head == NULL) + info->last_hunk->line_head = line; + + return 0; +} + +static int diffiter_do_diff_file(git_diff_iterator *iter) +{ + int error; + diffiter_cb_info info; + + if (iter->ctxt.diffed || !iter->ctxt.delta) return 0; memset(&info, 0, sizeof(info)); - info.diff = NULL; - info.delta = δ - info.cb_data = cb_data; - info.hunk_cb = hunk_cb; - info.line_cb = line_cb; + info.iter = iter; - setup_xdiff_options(options, &xdiff_config, &xdiff_params); - memset(&xdiff_callback, 0, sizeof(xdiff_callback)); - xdiff_callback.outf = diff_output_cb; - xdiff_callback.priv = &info; + error = diff_delta_exec( + &iter->ctxt, &info, diffiter_hunk_cb, diffiter_line_cb); + + if (error == GIT_EUSER) + error = iter->ctxt.cb_error; + + return error; +} - xdl_diff(&old_data, &new_data, &xdiff_params, &xdiff_config, &xdiff_callback); +static void diffiter_do_unload_file(git_diff_iterator *iter) +{ + if (iter->ctxt.loaded) { + diff_delta_unload(&iter->ctxt); + + git_pool_clear(&iter->lines); + git_pool_clear(&iter->hunks); + } + + iter->ctxt.delta = NULL; + iter->hunk_head = NULL; + iter->hunk_count = 0; + iter->line_count = 0; +} + +int git_diff_iterator_new( + git_diff_iterator **iterator_ptr, + git_diff_list *diff) +{ + size_t i; + git_diff_delta *delta; + git_diff_iterator *iter; + + assert(diff && iterator_ptr); + + *iterator_ptr = NULL; + + iter = git__malloc(sizeof(git_diff_iterator)); + GITERR_CHECK_ALLOC(iter); + + memset(iter, 0, sizeof(*iter)); + + iter->diff = diff; + GIT_REFCOUNT_INC(iter->diff); + + diff_delta_init_context_from_diff_list(&iter->ctxt, diff); + + if (git_pool_init(&iter->hunks, sizeof(diffiter_hunk), 0) < 0 || + git_pool_init(&iter->lines, sizeof(diffiter_line), 0) < 0) + goto fail; + + git_vector_foreach(&diff->deltas, i, delta) { + if (diff_delta_should_skip(iter->ctxt.opts, delta)) + continue; + iter->file_count++; + } + + *iterator_ptr = iter; + + return 0; + +fail: + git_diff_iterator_free(iter); + + return -1; +} + +void git_diff_iterator_free(git_diff_iterator *iter) +{ + diffiter_do_unload_file(iter); + git_diff_list_free(iter->diff); /* decrement ref count */ + git__free(iter); +} + +int git_diff_iterator_num_files(git_diff_iterator *iter) +{ + return (int)iter->file_count; +} + +int git_diff_iterator_num_hunks_in_file(git_diff_iterator *iter) +{ + int error = diffiter_do_diff_file(iter); + return (error != 0) ? error : (int)iter->hunk_count; +} + +int git_diff_iterator_num_lines_in_hunk(git_diff_iterator *iter) +{ + int error = diffiter_do_diff_file(iter); + return (error != 0) ? error : (int)iter->line_count; +} + +int git_diff_iterator_next_file( + git_diff_delta **delta_ptr, + git_diff_iterator *iter) +{ + int error = 0; + + assert(iter); + + iter->file_index = iter->next_index; + + diffiter_do_unload_file(iter); - return info.error; + while (!error) { + iter->ctxt.delta = git_vector_get(&iter->diff->deltas, iter->file_index); + if (!iter->ctxt.delta) { + error = GIT_ITEROVER; + break; + } + + if (diff_delta_is_ambiguous(iter->ctxt.delta) && + (error = diff_delta_load(&iter->ctxt)) < 0) + break; + + if (!diff_delta_should_skip(iter->ctxt.opts, iter->ctxt.delta)) + break; + + iter->file_index++; + } + + if (!error) { + iter->next_index = iter->file_index + 1; + + error = diff_delta_prep(&iter->ctxt); + } + + if (iter->ctxt.delta == NULL) { + iter->hunk_curr = NULL; + iter->line_curr = NULL; + } + + if (delta_ptr != NULL) + *delta_ptr = !error ? iter->ctxt.delta : NULL; + + return error; +} + +int git_diff_iterator_next_hunk( + git_diff_range **range_ptr, + const char **header, + size_t *header_len, + git_diff_iterator *iter) +{ + int error = diffiter_do_diff_file(iter); + git_diff_range *range; + + if (error) + return error; + + if (iter->hunk_curr == NULL) { + if (range_ptr) *range_ptr = NULL; + if (header) *header = NULL; + if (header_len) *header_len = 0; + iter->line_curr = NULL; + return GIT_ITEROVER; + } + + range = &iter->hunk_curr->range; + + if (range_ptr) + *range_ptr = range; + + if (header) { + int out = format_hunk_header( + iter->hunk_header, sizeof(iter->hunk_header), range); + + /* TODO: append function name to header */ + + *(iter->hunk_header + out++) = '\n'; + + *header = iter->hunk_header; + + if (header_len) + *header_len = (size_t)out; + } + + iter->line_curr = iter->hunk_curr->line_head; + iter->hunk_curr = iter->hunk_curr->next; + + return error; +} + +int git_diff_iterator_next_line( + char *line_origin, /**< GIT_DIFF_LINE_... value from above */ + const char **content_ptr, + size_t *content_len, + git_diff_iterator *iter) +{ + int error = diffiter_do_diff_file(iter); + + if (error) + return error; + + /* if the user has not called next_hunk yet, call it implicitly (OK?) */ + if (iter->hunk_curr == iter->hunk_head) { + error = git_diff_iterator_next_hunk(NULL, NULL, NULL, iter); + if (error) + return error; + } + + if (iter->line_curr == NULL) { + if (line_origin) *line_origin = GIT_DIFF_LINE_CONTEXT; + if (content_ptr) *content_ptr = NULL; + if (content_len) *content_len = 0; + return GIT_ITEROVER; + } + + if (line_origin) + *line_origin = iter->line_curr->origin; + if (content_ptr) + *content_ptr = iter->line_curr->ptr; + if (content_len) + *content_len = iter->line_curr->len; + + iter->line_curr = iter->line_curr->next; + + return error; } diff --git a/src/fetch.c b/src/fetch.c index 278ba3c50..98e1f0b13 100644 --- a/src/fetch.c +++ b/src/fetch.c @@ -221,7 +221,7 @@ int git_fetch_negotiate(git_remote *remote) } } - if (error < 0 && error != GIT_REVWALKOVER) + if (error < 0 && error != GIT_ITEROVER) goto on_error; /* Tell the other end that we're done negotiating */ diff --git a/src/pool.h b/src/pool.h index 05d339244..dee6ecdae 100644 --- a/src/pool.h +++ b/src/pool.h @@ -76,6 +76,17 @@ extern void git_pool_swap(git_pool *a, git_pool *b); extern void *git_pool_malloc(git_pool *pool, uint32_t items); /** + * Allocate space and zero it out. + */ +GIT_INLINE(void *) git_pool_mallocz(git_pool *pool, uint32_t items) +{ + void *ptr = git_pool_malloc(pool, items); + if (ptr) + memset(ptr, 0, (size_t)items * (size_t)pool->item_size); + return ptr; +} + +/** * Allocate space and duplicate string data into it. * * This is allowed only for pools with item_size == sizeof(char) diff --git a/src/refs.c b/src/refs.c index 1589bc37d..211a5870c 100644 --- a/src/refs.c +++ b/src/refs.c @@ -1643,7 +1643,6 @@ int git_reference_normalize_name( } } - *buffer_out++ = *current++; buffer_size--; } diff --git a/src/revparse.c b/src/revparse.c index 3855c29f1..17266b944 100644 --- a/src/revparse.c +++ b/src/revparse.c @@ -493,7 +493,7 @@ static int walk_and_search(git_object **out, git_revwalk *walk, regex_t *regex) git_object_free(obj); } - if (error < 0 && error == GIT_REVWALKOVER) + if (error < 0 && error == GIT_ITEROVER) error = GIT_ENOTFOUND; return error; diff --git a/src/revwalk.c b/src/revwalk.c index 8b0e93baf..1a0927719 100644 --- a/src/revwalk.c +++ b/src/revwalk.c @@ -449,6 +449,7 @@ int git_merge_base(git_oid *out, git_repository *repo, git_oid *one, git_oid *tw if (!result) { git_revwalk_free(walk); + giterr_clear(); return GIT_ENOTFOUND; } @@ -682,7 +683,8 @@ static int revwalk_next_timesort(commit_object **object_out, git_revwalk *walk) } } - return GIT_REVWALKOVER; + giterr_clear(); + return GIT_ITEROVER; } static int revwalk_next_unsorted(commit_object **object_out, git_revwalk *walk) @@ -700,7 +702,8 @@ static int revwalk_next_unsorted(commit_object **object_out, git_revwalk *walk) } } - return GIT_REVWALKOVER; + giterr_clear(); + return GIT_ITEROVER; } static int revwalk_next_toposort(commit_object **object_out, git_revwalk *walk) @@ -710,8 +713,10 @@ static int revwalk_next_toposort(commit_object **object_out, git_revwalk *walk) for (;;) { next = commit_list_pop(&walk->iterator_topo); - if (next == NULL) - return GIT_REVWALKOVER; + if (next == NULL) { + giterr_clear(); + return GIT_ITEROVER; + } if (next->in_degree > 0) { next->topo_delay = 1; @@ -736,7 +741,7 @@ static int revwalk_next_toposort(commit_object **object_out, git_revwalk *walk) static int revwalk_next_reverse(commit_object **object_out, git_revwalk *walk) { *object_out = commit_list_pop(&walk->iterator_reverse); - return *object_out ? 0 : GIT_REVWALKOVER; + return *object_out ? 0 : GIT_ITEROVER; } @@ -751,8 +756,10 @@ static int prepare_walk(git_revwalk *walk) * If walk->one is NULL, there were no positive references, * so we know that the walk is already over. */ - if (walk->one == NULL) - return GIT_REVWALKOVER; + if (walk->one == NULL) { + giterr_clear(); + return GIT_ITEROVER; + } /* first figure out what the merge bases are */ if (merge_bases_many(&bases, walk, walk->one, &walk->twos) < 0) @@ -780,7 +787,7 @@ static int prepare_walk(git_revwalk *walk) return -1; } - if (error != GIT_REVWALKOVER) + if (error != GIT_ITEROVER) return error; walk->get_next = &revwalk_next_toposort; @@ -792,7 +799,7 @@ static int prepare_walk(git_revwalk *walk) if (commit_list_insert(next, &walk->iterator_reverse) == NULL) return -1; - if (error != GIT_REVWALKOVER) + if (error != GIT_ITEROVER) return error; walk->get_next = &revwalk_next_reverse; @@ -891,9 +898,10 @@ int git_revwalk_next(git_oid *oid, git_revwalk *walk) error = walk->get_next(&next, walk); - if (error == GIT_REVWALKOVER) { + if (error == GIT_ITEROVER) { git_revwalk_reset(walk); - return GIT_REVWALKOVER; + giterr_clear(); + return GIT_ITEROVER; } if (!error) diff --git a/src/status.c b/src/status.c index 3d3d15d77..0a5fbdcbf 100644 --- a/src/status.c +++ b/src/status.c @@ -151,6 +151,9 @@ cleanup: git_diff_list_free(idx2head); git_diff_list_free(wd2idx); + if (err == GIT_EUSER) + giterr_clear(); + return err; } diff --git a/src/submodule.c b/src/submodule.c index a9de9ee6e..66f1f84b4 100644 --- a/src/submodule.c +++ b/src/submodule.c @@ -163,6 +163,7 @@ int git_submodule_foreach( } if (callback(sm, sm->name, payload)) { + giterr_clear(); error = GIT_EUSER; break; } |