diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/checkout.c | 11 | ||||
| -rw-r--r-- | src/clone.c | 11 | ||||
| -rw-r--r-- | src/common.h | 6 | ||||
| -rw-r--r-- | src/config.c | 6 | ||||
| -rw-r--r-- | src/diff.c | 8 | ||||
| -rw-r--r-- | src/diff.h | 10 | ||||
| -rw-r--r-- | src/diff_output.c | 207 | ||||
| -rw-r--r-- | src/diff_tform.c | 324 | ||||
| -rw-r--r-- | src/fileops.c | 35 | ||||
| -rw-r--r-- | src/fileops.h | 10 | ||||
| -rw-r--r-- | src/hash/hash_generic.c | 2 | ||||
| -rw-r--r-- | src/hashsig.c | 365 | ||||
| -rw-r--r-- | src/hashsig.h | 72 | ||||
| -rw-r--r-- | src/index.c | 43 | ||||
| -rw-r--r-- | src/index.h | 2 | ||||
| -rw-r--r-- | src/indexer.c | 371 | ||||
| -rw-r--r-- | src/pack-objects.c | 2 | ||||
| -rw-r--r-- | src/pack.c | 11 | ||||
| -rw-r--r-- | src/pack.h | 4 | ||||
| -rw-r--r-- | src/repository.c | 1 | ||||
| -rw-r--r-- | src/tree.c | 16 | ||||
| -rw-r--r-- | src/win32/git2.rc | 4 | ||||
| -rw-r--r-- | src/win32/msvc-compat.h | 9 | ||||
| -rw-r--r-- | src/win32/posix_w32.c | 6 |
24 files changed, 977 insertions, 559 deletions
diff --git a/src/checkout.c b/src/checkout.c index 59cd218a9..19ac913d3 100644 --- a/src/checkout.c +++ b/src/checkout.c @@ -78,7 +78,7 @@ static int checkout_notify( git_oid_cpy(&wdfile.oid, &wditem->oid); wdfile.path = wditem->path; wdfile.size = wditem->file_size; - wdfile.flags = GIT_DIFF_FILE_VALID_OID; + wdfile.flags = GIT_DIFF_FLAG_VALID_OID; wdfile.mode = wditem->mode; workdir = &wdfile; @@ -456,7 +456,7 @@ static int checkout_action( while (1) { if (!wd) return checkout_action_no_wd(data, delta); - + cmp = strcomp(wd->path, delta->old_file.path); /* 1. wd before delta ("a/a" before "a/b") @@ -475,6 +475,8 @@ static int checkout_action( /* case 2 - entry prefixed by workdir tree */ if (git_iterator_advance_into_directory(workdir, &wd) < 0) goto fail; + + *wditem_ptr = wd; continue; } @@ -608,7 +610,7 @@ static int checkout_get_actions( if (act & CHECKOUT_ACTION__CONFLICT) counts[CHECKOUT_ACTION__CONFLICT]++; } - + error = checkout_remaining_wd_items(data, workdir, wditem, &pathspec); if (error < 0) goto fail; @@ -1141,6 +1143,9 @@ static int checkout_data_init( if ((error = git_repository_index(&data->index, data->repo)) < 0 || (error = git_index_read(data->index)) < 0) goto cleanup; + + /* clear the REUC when doing a tree or commit checkout */ + git_index_reuc_clear(data->index); } } diff --git a/src/clone.c b/src/clone.c index 409a77f92..0bbccd44b 100644 --- a/src/clone.c +++ b/src/clone.c @@ -429,6 +429,7 @@ int git_clone( int retcode = GIT_ERROR; git_repository *repo = NULL; git_clone_options normOptions; + int remove_directory_on_failure = 0; assert(out && url && local_path); @@ -439,11 +440,19 @@ int git_clone( return GIT_ERROR; } + /* Only remove the directory on failure if we create it */ + remove_directory_on_failure = !git_path_exists(local_path); + if (!(retcode = git_repository_init(&repo, local_path, normOptions.bare))) { if ((retcode = setup_remotes_and_fetch(repo, url, &normOptions)) < 0) { /* Failed to fetch; clean up */ git_repository_free(repo); - git_futils_rmdir_r(local_path, NULL, GIT_RMDIR_REMOVE_FILES); + + if (remove_directory_on_failure) + git_futils_rmdir_r(local_path, NULL, GIT_RMDIR_REMOVE_FILES); + else + git_futils_cleanupdir_r(local_path); + } else { *out = repo; retcode = 0; diff --git a/src/common.h b/src/common.h index ca203ee5c..e3a9e1984 100644 --- a/src/common.h +++ b/src/common.h @@ -33,14 +33,14 @@ # include "win32/pthread.h" #endif -# define snprintf _snprintf - #else -# include <unistd.h> +# include <unistd.h> # ifdef GIT_THREADS # include <pthread.h> # endif +#define GIT_STDLIB_CALL + #endif #include "git2/types.h" diff --git a/src/config.c b/src/config.c index ce105089e..d6aa3078c 100644 --- a/src/config.c +++ b/src/config.c @@ -426,8 +426,6 @@ static int get_string(const char **out, const git_config *cfg, const char *name) file_internal *internal; unsigned int i; - assert(cfg->files.length); - git_vector_foreach(&cfg->files, i, internal) { int res = get_string_at_file(out, internal->file, name); @@ -466,8 +464,6 @@ int git_config_get_entry(const git_config_entry **out, const git_config *cfg, co file_internal *internal; unsigned int i; - assert(cfg->files.length); - *out = NULL; git_vector_foreach(&cfg->files, i, internal) { @@ -488,8 +484,6 @@ int git_config_get_multivar(const git_config *cfg, const char *name, const char int ret = GIT_ENOTFOUND; size_t i; - assert(cfg->files.length); - /* * This loop runs the "wrong" way 'round because we need to * look at every value from the most general to most specific diff --git a/src/diff.c b/src/diff.c index d9bc32a37..0861b13eb 100644 --- a/src/diff.c +++ b/src/diff.c @@ -92,11 +92,11 @@ static int diff_delta__from_one( git_oid_cpy(&delta->new_file.oid, &entry->oid); } - delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID; if (delta->status == GIT_DELTA_DELETED || !git_oid_iszero(&delta->new_file.oid)) - delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID; + delta->new_file.flags |= GIT_DIFF_FLAG_VALID_OID; notify_res = diff_notify(diff, delta, matched_pathspec); @@ -142,7 +142,7 @@ static int diff_delta__from_two( git_oid_cpy(&delta->old_file.oid, &old_entry->oid); delta->old_file.size = old_entry->file_size; delta->old_file.mode = old_mode; - delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID; git_oid_cpy(&delta->new_file.oid, &new_entry->oid); delta->new_file.size = new_entry->file_size; @@ -156,7 +156,7 @@ static int diff_delta__from_two( } if (new_oid || !git_oid_iszero(&new_entry->oid)) - delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID; + delta->new_file.flags |= GIT_DIFF_FLAG_VALID_OID; notify_res = diff_notify(diff, delta, matched_pathspec); diff --git a/src/diff.h b/src/diff.h index 16fbf71e6..8e3cbcd46 100644 --- a/src/diff.h +++ b/src/diff.h @@ -28,8 +28,14 @@ enum { GIT_DIFFCAPS_USE_DEV = (1 << 4), /* use st_dev? */ }; -#define GIT_DELTA__TO_DELETE 10 -#define GIT_DELTA__TO_SPLIT 11 +enum { + GIT_DIFF_FLAG__FREE_PATH = (1 << 7), /* `path` is allocated memory */ + GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */ + GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */ + GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */ + GIT_DIFF_FLAG__TO_DELETE = (1 << 11), /* delete entry during rename det. */ + GIT_DIFF_FLAG__TO_SPLIT = (1 << 12), /* split entry during rename det. */ +}; struct git_diff_list { git_refcount rc; diff --git a/src/diff_output.c b/src/diff_output.c index 88ccc9d45..209a6e017 100644 --- a/src/diff_output.c +++ b/src/diff_output.c @@ -52,8 +52,8 @@ static int parse_hunk_header(git_diff_range *range, const char *header) return 0; } -#define KNOWN_BINARY_FLAGS (GIT_DIFF_FILE_BINARY|GIT_DIFF_FILE_NOT_BINARY) -#define NOT_BINARY_FLAGS (GIT_DIFF_FILE_NOT_BINARY|GIT_DIFF_FILE_NO_DATA) +#define KNOWN_BINARY_FLAGS (GIT_DIFF_FLAG_BINARY|GIT_DIFF_FLAG_NOT_BINARY) +#define NOT_BINARY_FLAGS (GIT_DIFF_FLAG_NOT_BINARY|GIT_DIFF_FLAG__NO_DATA) static int update_file_is_binary_by_attr( git_repository *repo, git_diff_file *file) @@ -68,9 +68,9 @@ static int update_file_is_binary_by_attr( return -1; if (GIT_ATTR_FALSE(value)) - file->flags |= GIT_DIFF_FILE_BINARY; + file->flags |= GIT_DIFF_FLAG_BINARY; else if (GIT_ATTR_TRUE(value)) - file->flags |= GIT_DIFF_FILE_NOT_BINARY; + file->flags |= GIT_DIFF_FLAG_NOT_BINARY; /* otherwise leave file->flags alone */ return 0; @@ -78,15 +78,15 @@ static int update_file_is_binary_by_attr( static void update_delta_is_binary(git_diff_delta *delta) { - if ((delta->old_file.flags & GIT_DIFF_FILE_BINARY) != 0 || - (delta->new_file.flags & GIT_DIFF_FILE_BINARY) != 0) - delta->binary = 1; + if ((delta->old_file.flags & GIT_DIFF_FLAG_BINARY) != 0 || + (delta->new_file.flags & GIT_DIFF_FLAG_BINARY) != 0) + delta->flags |= GIT_DIFF_FLAG_BINARY; else if ((delta->old_file.flags & NOT_BINARY_FLAGS) != 0 && (delta->new_file.flags & NOT_BINARY_FLAGS) != 0) - delta->binary = 0; + delta->flags |= GIT_DIFF_FLAG_NOT_BINARY; - /* otherwise leave delta->binary value untouched */ + /* otherwise leave delta->flags binary value untouched */ } /* returns if we forced binary setting (and no further checks needed) */ @@ -95,24 +95,24 @@ static bool diff_delta_is_binary_forced( git_diff_delta *delta) { /* return true if binary-ness has already been settled */ - if (delta->binary != -1) + if ((delta->flags & KNOWN_BINARY_FLAGS) != 0) return true; /* make sure files are conceivably mmap-able */ if ((git_off_t)((size_t)delta->old_file.size) != delta->old_file.size || (git_off_t)((size_t)delta->new_file.size) != delta->new_file.size) { - delta->old_file.flags |= GIT_DIFF_FILE_BINARY; - delta->new_file.flags |= GIT_DIFF_FILE_BINARY; - delta->binary = 1; + delta->old_file.flags |= GIT_DIFF_FLAG_BINARY; + delta->new_file.flags |= GIT_DIFF_FLAG_BINARY; + delta->flags |= GIT_DIFF_FLAG_BINARY; return true; } /* check if user is forcing us to text diff these files */ if (ctxt->opts && (ctxt->opts->flags & GIT_DIFF_FORCE_TEXT) != 0) { - delta->old_file.flags |= GIT_DIFF_FILE_NOT_BINARY; - delta->new_file.flags |= GIT_DIFF_FILE_NOT_BINARY; - delta->binary = 0; + delta->old_file.flags |= GIT_DIFF_FLAG_NOT_BINARY; + delta->new_file.flags |= GIT_DIFF_FLAG_NOT_BINARY; + delta->flags |= GIT_DIFF_FLAG_NOT_BINARY; return true; } @@ -125,8 +125,6 @@ static int diff_delta_is_binary_by_attr( int error = 0, mirror_new; git_diff_delta *delta = patch->delta; - delta->binary = -1; - if (diff_delta_is_binary_forced(ctxt, delta)) return 0; @@ -152,23 +150,21 @@ static int diff_delta_is_binary_by_content( git_diff_file *file, const git_map *map) { + const git_buf search = { map->data, 0, min(map->len, 4000) }; + if (diff_delta_is_binary_forced(ctxt, delta)) return 0; - if ((file->flags & KNOWN_BINARY_FLAGS) == 0) { - const git_buf search = { map->data, 0, min(map->len, 4000) }; - - /* TODO: provide encoding / binary detection callbacks that can - * be UTF-8 aware, etc. For now, instead of trying to be smart, - * let's just use the simple NUL-byte detection that core git uses. - */ + /* TODO: provide encoding / binary detection callbacks that can + * be UTF-8 aware, etc. For now, instead of trying to be smart, + * let's just use the simple NUL-byte detection that core git uses. + */ - /* previously was: if (git_buf_text_is_binary(&search)) */ - if (git_buf_text_contains_nul(&search)) - file->flags |= GIT_DIFF_FILE_BINARY; - else - file->flags |= GIT_DIFF_FILE_NOT_BINARY; - } + /* previously was: if (git_buf_text_is_binary(&search)) */ + if (git_buf_text_contains_nul(&search)) + file->flags |= GIT_DIFF_FLAG_BINARY; + else + file->flags |= GIT_DIFF_FLAG_NOT_BINARY; update_delta_is_binary(delta); @@ -192,7 +188,7 @@ static int diff_delta_is_binary_by_size( } if (file->size > threshold) - file->flags |= GIT_DIFF_FILE_BINARY; + file->flags |= GIT_DIFF_FLAG_BINARY; update_delta_is_binary(delta); @@ -247,7 +243,7 @@ static int get_blob_content( map->data = git_buf_detach(&content); map->len = strlen(map->data); - file->flags |= GIT_DIFF_FILE_FREE_DATA; + file->flags |= GIT_DIFF_FLAG__FREE_DATA; return 0; } @@ -270,7 +266,7 @@ static int get_blob_content( /* if blob is too large to diff, mark as binary */ if ((error = diff_delta_is_binary_by_size(ctxt, delta, file)) < 0) return error; - if (delta->binary == 1) + if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) return 0; if (odb_obj != NULL) { @@ -306,14 +302,14 @@ static int get_workdir_sm_content( return error; /* update OID if we didn't have it previously */ - if ((file->flags & GIT_DIFF_FILE_VALID_OID) == 0) { + if ((file->flags & GIT_DIFF_FLAG_VALID_OID) == 0) { const git_oid* sm_head; if ((sm_head = git_submodule_wd_id(sm)) != NULL || (sm_head = git_submodule_head_id(sm)) != NULL) { git_oid_cpy(&file->oid, sm_head); - file->flags |= GIT_DIFF_FILE_VALID_OID; + file->flags |= GIT_DIFF_FLAG_VALID_OID; } } @@ -329,7 +325,7 @@ static int get_workdir_sm_content( map->data = git_buf_detach(&content); map->len = strlen(map->data); - file->flags |= GIT_DIFF_FILE_FREE_DATA; + file->flags |= GIT_DIFF_FLAG__FREE_DATA; return 0; } @@ -356,8 +352,8 @@ static int get_workdir_content( if (S_ISLNK(file->mode)) { ssize_t alloc_len, read_len; - file->flags |= GIT_DIFF_FILE_FREE_DATA; - file->flags |= GIT_DIFF_FILE_BINARY; + file->flags |= GIT_DIFF_FLAG__FREE_DATA; + file->flags |= GIT_DIFF_FLAG_BINARY; /* link path on disk could be UTF-16, so prepare a buffer that is * big enough to handle some UTF-8 data expansion @@ -389,7 +385,7 @@ static int get_workdir_content( file->size = git_futils_filesize(fd); if ((error = diff_delta_is_binary_by_size(ctxt, delta, file)) < 0 || - delta->binary == 1) + (delta->flags & GIT_DIFF_FLAG_BINARY) != 0) goto close_and_cleanup; error = git_filters_load( @@ -402,7 +398,7 @@ static int get_workdir_content( goto close_and_cleanup; error = git_futils_mmap_ro(map, fd, 0, (size_t)file->size); - file->flags |= GIT_DIFF_FILE_UNMAP_DATA; + file->flags |= GIT_DIFF_FLAG__UNMAP_DATA; } else { git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT; @@ -412,7 +408,7 @@ static int get_workdir_content( map->len = git_buf_len(&filtered); map->data = git_buf_detach(&filtered); - file->flags |= GIT_DIFF_FILE_FREE_DATA; + file->flags |= GIT_DIFF_FLAG__FREE_DATA; } git_buf_free(&raw); @@ -425,11 +421,11 @@ close_and_cleanup: } /* once data is loaded, update OID if we didn't have it previously */ - if (!error && (file->flags & GIT_DIFF_FILE_VALID_OID) == 0) { + if (!error && (file->flags & GIT_DIFF_FLAG_VALID_OID) == 0) { error = git_odb_hash( &file->oid, map->data, map->len, GIT_OBJ_BLOB); if (!error) - file->flags |= GIT_DIFF_FILE_VALID_OID; + file->flags |= GIT_DIFF_FLAG_VALID_OID; } if (!error) @@ -445,22 +441,22 @@ static void release_content(git_diff_file *file, git_map *map, git_blob *blob) if (blob != NULL) git_blob_free(blob); - if (file->flags & GIT_DIFF_FILE_FREE_DATA) { + if (file->flags & GIT_DIFF_FLAG__FREE_DATA) { git__free(map->data); map->data = ""; map->len = 0; - file->flags &= ~GIT_DIFF_FILE_FREE_DATA; + file->flags &= ~GIT_DIFF_FLAG__FREE_DATA; } - else if (file->flags & GIT_DIFF_FILE_UNMAP_DATA) { + else if (file->flags & GIT_DIFF_FLAG__UNMAP_DATA) { git_futils_mmap_free(map); map->data = ""; map->len = 0; - file->flags &= ~GIT_DIFF_FILE_UNMAP_DATA; + file->flags &= ~GIT_DIFF_FLAG__UNMAP_DATA; } } -static void diff_context_init( +static int diff_context_init( diff_context *ctxt, git_diff_list *diff, git_repository *repo, @@ -472,6 +468,12 @@ static void diff_context_init( { memset(ctxt, 0, sizeof(diff_context)); + if (!repo && diff) + repo = diff->repo; + + if (!opts && diff) + opts = &diff->opts; + ctxt->repo = repo; ctxt->diff = diff; ctxt->opts = opts; @@ -482,6 +484,8 @@ static void diff_context_init( ctxt->error = 0; setup_xdiff_options(ctxt->opts, &ctxt->xdiff_config, &ctxt->xdiff_params); + + return 0; } static int diff_delta_file_callback( @@ -555,7 +559,7 @@ static int diff_patch_load( patch->new_data.len = 0; patch->new_blob = NULL; - if (delta->binary == 1) + if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) goto cleanup; if (!ctxt->hunk_cb && @@ -565,25 +569,25 @@ static int diff_patch_load( switch (delta->status) { case GIT_DELTA_ADDED: - delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA; + delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA; break; case GIT_DELTA_DELETED: - delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA; + delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA; break; case GIT_DELTA_MODIFIED: break; case GIT_DELTA_UNTRACKED: - delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA; + delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA; if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) == 0) - delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA; + delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA; break; default: - delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA; - delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA; + delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA; + delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA; break; } -#define CHECK_UNMODIFIED (GIT_DIFF_FILE_NO_DATA | GIT_DIFF_FILE_VALID_OID) +#define CHECK_UNMODIFIED (GIT_DIFF_FLAG__NO_DATA | GIT_DIFF_FLAG_VALID_OID) check_if_unmodified = (delta->old_file.flags & CHECK_UNMODIFIED) == 0 && @@ -594,41 +598,41 @@ static int diff_patch_load( * memory footprint during diff. */ - if ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + if ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && patch->old_src == GIT_ITERATOR_TYPE_WORKDIR) { if ((error = get_workdir_content( ctxt, delta, &delta->old_file, &patch->old_data)) < 0) goto cleanup; - if (delta->binary == 1) + if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) goto cleanup; } - if ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + if ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && patch->new_src == GIT_ITERATOR_TYPE_WORKDIR) { if ((error = get_workdir_content( ctxt, delta, &delta->new_file, &patch->new_data)) < 0) goto cleanup; - if (delta->binary == 1) + if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) goto cleanup; } - if ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + if ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && patch->old_src != GIT_ITERATOR_TYPE_WORKDIR) { if ((error = get_blob_content( ctxt, delta, &delta->old_file, &patch->old_data, &patch->old_blob)) < 0) goto cleanup; - if (delta->binary == 1) + if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) goto cleanup; } - if ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 && + if ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 && patch->new_src != GIT_ITERATOR_TYPE_WORKDIR) { if ((error = get_blob_content( ctxt, delta, &delta->new_file, &patch->new_data, &patch->new_blob)) < 0) goto cleanup; - if (delta->binary == 1) + if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0) goto cleanup; } @@ -646,13 +650,13 @@ static int diff_patch_load( } cleanup: - if (delta->binary == -1) + if ((delta->flags & KNOWN_BINARY_FLAGS) == 0) update_delta_is_binary(delta); if (!error) { patch->flags |= GIT_DIFF_PATCH_LOADED; - if (delta->binary != 1 && + if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0 && delta->status != GIT_DELTA_UNMODIFIED && (patch->old_data.len || patch->new_data.len) && !git_oid_equal(&delta->old_file.oid, &delta->new_file.oid)) @@ -926,6 +930,15 @@ static int diff_patch_line_cb( return 0; } +static int diff_required(git_diff_list *diff, const char *action) +{ + if (!diff) { + giterr_set(GITERR_INVALID, "Must provide valid diff to %s", action); + return -1; + } + + return 0; +} int git_diff_foreach( git_diff_list *diff, @@ -939,9 +952,12 @@ int git_diff_foreach( size_t idx; git_diff_patch patch; - diff_context_init( - &ctxt, diff, diff->repo, &diff->opts, - file_cb, hunk_cb, data_cb, payload); + if (diff_required(diff, "git_diff_foreach") < 0) + return -1; + + if (diff_context_init( + &ctxt, diff, NULL, NULL, file_cb, hunk_cb, data_cb, payload) < 0) + return -1; diff_patch_init(&ctxt, &patch); @@ -1138,7 +1154,7 @@ static int print_patch_file( newpath = "/dev/null"; } - if (delta->binary != 1) { + if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) { git_buf_printf(pi->buf, "--- %s%s\n", oldpfx, oldpath); git_buf_printf(pi->buf, "+++ %s%s\n", newpfx, newpath); } @@ -1153,7 +1169,7 @@ static int print_patch_file( return GIT_EUSER; } - if (delta->binary != 1) + if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) return 0; git_buf_clear(pi->buf); @@ -1268,7 +1284,7 @@ static void set_data_from_blob( map->data = (char *)git_blob_rawcontent(blob); } else { file->size = 0; - file->flags |= GIT_DIFF_FILE_NO_DATA; + file->flags |= GIT_DIFF_FLAG__NO_DATA; map->len = 0; map->data = ""; @@ -1283,7 +1299,7 @@ static void set_data_from_buffer( map->len = buffer_len; if (!buffer) { - file->flags |= GIT_DIFF_FILE_NO_DATA; + file->flags |= GIT_DIFF_FLAG__NO_DATA; map->data = NULL; } else { map->data = (char *)buffer; @@ -1310,8 +1326,10 @@ static int diff_single_init( memset(data, 0, sizeof(*data)); - diff_context_init( - &data->ctxt, NULL, repo, opts, file_cb, hunk_cb, data_cb, payload); + if (diff_context_init( + &data->ctxt, NULL, repo, opts, + file_cb, hunk_cb, data_cb, payload) < 0) + return -1; diff_patch_init(&data->ctxt, &data->patch); @@ -1322,13 +1340,13 @@ static int diff_single_apply(diff_single_data *data) { int error; git_diff_delta *delta = &data->delta; - bool has_old = ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0); - bool has_new = ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0); + bool has_old = ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); + bool has_new = ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0); /* finish setting up fake git_diff_delta record and loaded data */ data->patch.delta = delta; - delta->binary = -1; + delta->flags = delta->flags & ~KNOWN_BINARY_FLAGS; delta->status = has_new ? (has_old ? GIT_DELTA_MODIFIED : GIT_DELTA_ADDED) : @@ -1345,7 +1363,8 @@ static int diff_single_apply(diff_single_data *data) data->patch.flags |= GIT_DIFF_PATCH_LOADED; - if (delta->binary != 1 && delta->status != GIT_DELTA_UNMODIFIED) + if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0 && + delta->status != GIT_DELTA_UNMODIFIED) data->patch.flags |= GIT_DIFF_PATCH_DIFFABLE; /* do diffs */ @@ -1377,6 +1396,9 @@ int git_diff_blobs( new_blob ? git_object_owner((const git_object *)new_blob) : old_blob ? git_object_owner((const git_object *)old_blob) : NULL; + if (!repo) /* Hmm, given two NULL blobs, silently do no callbacks? */ + return 0; + if ((error = diff_single_init( &d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0) return error; @@ -1408,6 +1430,9 @@ int git_diff_blob_to_buffer( git_repository *repo = old_blob ? git_object_owner((const git_object *)old_blob) : NULL; + if (!repo && !buf) /* Hmm, given NULLs, silently do no callbacks? */ + return 0; + if ((error = diff_single_init( &d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0) return error; @@ -1456,11 +1481,19 @@ int git_diff_get_patch( if (patch_ptr) *patch_ptr = NULL; + if (delta_ptr) + *delta_ptr = NULL; + + if (diff_required(diff, "git_diff_get_patch") < 0) + return -1; + + if (diff_context_init( + &ctxt, diff, NULL, NULL, + NULL, diff_patch_hunk_cb, diff_patch_line_cb, NULL) < 0) + return -1; delta = git_vector_get(&diff->deltas, idx); if (!delta) { - if (delta_ptr) - *delta_ptr = NULL; giterr_set(GITERR_INVALID, "Index out of range for delta in diff"); return GIT_ENOTFOUND; } @@ -1469,17 +1502,17 @@ int git_diff_get_patch( *delta_ptr = delta; if (!patch_ptr && - (delta->binary != -1 || + ((delta->flags & KNOWN_BINARY_FLAGS) != 0 || (diff->opts.flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0)) return 0; - diff_context_init( - &ctxt, diff, diff->repo, &diff->opts, - NULL, diff_patch_hunk_cb, diff_patch_line_cb, NULL); - if (git_diff_delta__should_skip(ctxt.opts, delta)) return 0; + /* Don't load the patch if the user doesn't want it */ + if (!patch_ptr) + return 0; + patch = diff_patch_alloc(&ctxt, delta); if (!patch) return -1; diff --git a/src/diff_tform.c b/src/diff_tform.c index 2c2e1fb19..958d2bfec 100644 --- a/src/diff_tform.c +++ b/src/diff_tform.c @@ -7,6 +7,8 @@ #include "common.h" #include "diff.h" #include "git2/config.h" +#include "git2/blob.h" +#include "hashsig.h" static git_diff_delta *diff_delta__dup( const git_diff_delta *d, git_pool *pool) @@ -168,6 +170,36 @@ int git_diff_merge( return error; } +static int find_similar__hashsig_for_file( + void **out, const git_diff_file *f, const char *path, void *p) +{ + git_hashsig_option_t opt = (git_hashsig_option_t)p; + GIT_UNUSED(f); + return git_hashsig_create_fromfile((git_hashsig **)out, path, opt); +} + +static int find_similar__hashsig_for_buf( + void **out, const git_diff_file *f, const char *buf, size_t len, void *p) +{ + git_hashsig_option_t opt = (git_hashsig_option_t)p; + GIT_UNUSED(f); + return git_hashsig_create((git_hashsig **)out, buf, len, opt); +} + +static void find_similar__hashsig_free(void *sig, void *payload) +{ + GIT_UNUSED(payload); + git_hashsig_free(sig); +} + +static int find_similar__calc_similarity( + int *score, void *siga, void *sigb, void *payload) +{ + GIT_UNUSED(payload); + *score = git_hashsig_compare(siga, sigb); + return 0; +} + #define DEFAULT_THRESHOLD 50 #define DEFAULT_BREAK_REWRITE_THRESHOLD 60 #define DEFAULT_TARGET_LIMIT 200 @@ -178,7 +210,6 @@ static int normalize_find_opts( git_diff_find_options *given) { git_config *cfg = NULL; - const char *val; if (diff->repo != NULL && git_repository_config__weakptr(&cfg, diff->repo) < 0) @@ -187,8 +218,9 @@ static int normalize_find_opts( if (given != NULL) memcpy(opts, given, sizeof(*opts)); else { - git_diff_find_options init = GIT_DIFF_FIND_OPTIONS_INIT; - memmove(opts, &init, sizeof(init)); + const char *val = NULL; + + GIT_INIT_STRUCTURE(opts, GIT_DIFF_FIND_OPTIONS_VERSION); opts->flags = GIT_DIFF_FIND_RENAMES; @@ -236,6 +268,24 @@ static int normalize_find_opts( opts->target_limit = limit; } + /* assign the internal metric with whitespace flag as payload */ + if (!opts->metric) { + opts->metric = git__malloc(sizeof(git_diff_similarity_metric)); + GITERR_CHECK_ALLOC(opts->metric); + + opts->metric->file_signature = find_similar__hashsig_for_file; + opts->metric->buffer_signature = find_similar__hashsig_for_buf; + opts->metric->free_signature = find_similar__hashsig_free; + opts->metric->similarity = find_similar__calc_similarity; + + if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE) + opts->metric->payload = (void *)GIT_HASHSIG_IGNORE_WHITESPACE; + else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE) + opts->metric->payload = (void *)GIT_HASHSIG_NORMAL; + else + opts->metric->payload = (void *)GIT_HASHSIG_SMART_WHITESPACE; + } + return 0; } @@ -250,10 +300,10 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size) /* build new delta list without TO_DELETE and splitting TO_SPLIT */ git_vector_foreach(&diff->deltas, i, delta) { - if (delta->status == GIT_DELTA__TO_DELETE) + if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) continue; - if (delta->status == GIT_DELTA__TO_SPLIT) { + if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) { git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool); if (!deleted) goto on_error; @@ -261,7 +311,7 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size) deleted->status = GIT_DELTA_DELETED; memset(&deleted->new_file, 0, sizeof(deleted->new_file)); deleted->new_file.path = deleted->old_file.path; - deleted->new_file.flags |= GIT_DIFF_FILE_VALID_OID; + deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID; if (git_vector_insert(&onto, deleted) < 0) goto on_error; @@ -269,7 +319,7 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size) delta->status = GIT_DELTA_ADDED; memset(&delta->old_file, 0, sizeof(delta->old_file)); delta->old_file.path = delta->new_file.path; - delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID; } if (git_vector_insert(&onto, delta) < 0) @@ -278,7 +328,7 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size) /* cannot return an error past this point */ git_vector_foreach(&diff->deltas, i, delta) - if (delta->status == GIT_DELTA__TO_DELETE) + if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0) git__free(delta); /* swap new delta list into place */ @@ -297,17 +347,86 @@ on_error: return -1; } -static unsigned int calc_similarity( - void *cache, git_diff_file *old_file, git_diff_file *new_file) +GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx) +{ + git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2); + return (idx & 1) ? &delta->new_file : &delta->old_file; +} + +static int similarity_calc( + git_diff_list *diff, + git_diff_find_options *opts, + size_t file_idx, + void **cache) { - GIT_UNUSED(cache); + int error = 0; + git_diff_file *file = similarity_get_file(diff, file_idx); + git_iterator_type_t src = (file_idx & 1) ? diff->old_src : diff->new_src; + + if (src == GIT_ITERATOR_TYPE_WORKDIR) { /* compute hashsig from file */ + git_buf path = GIT_BUF_INIT; + + /* TODO: apply wd-to-odb filters to file data if necessary */ - if (git_oid_cmp(&old_file->oid, &new_file->oid) == 0) + if (!(error = git_buf_joinpath( + &path, git_repository_workdir(diff->repo), file->path))) + error = opts->metric->file_signature( + &cache[file_idx], file, path.ptr, opts->metric->payload); + + git_buf_free(&path); + } else { /* compute hashsig from blob buffer */ + git_blob *blob = NULL; + + /* TODO: add max size threshold a la diff? */ + + if ((error = git_blob_lookup(&blob, diff->repo, &file->oid)) < 0) + return error; + + error = opts->metric->buffer_signature( + &cache[file_idx], file, git_blob_rawcontent(blob), + git_blob_rawsize(blob), opts->metric->payload); + + git_blob_free(blob); + } + + return error; +} + +static int similarity_measure( + git_diff_list *diff, + git_diff_find_options *opts, + void **cache, + size_t a_idx, + size_t b_idx) +{ + int score = 0; + git_diff_file *a_file = similarity_get_file(diff, a_idx); + git_diff_file *b_file = similarity_get_file(diff, b_idx); + + if (GIT_MODE_TYPE(a_file->mode) != GIT_MODE_TYPE(b_file->mode)) + return 0; + + if (git_oid_cmp(&a_file->oid, &b_file->oid) == 0) return 100; - /* TODO: insert actual similarity algo here */ + /* update signature cache if needed */ + if (!cache[a_idx] && similarity_calc(diff, opts, a_idx, cache) < 0) + return -1; + if (!cache[b_idx] && similarity_calc(diff, opts, b_idx, cache) < 0) + return -1; - return 0; + /* compare signatures */ + if (opts->metric->similarity( + &score, cache[a_idx], cache[b_idx], opts->metric->payload) < 0) + return -1; + + /* clip score */ + if (score < 0) + score = 0; + else if (score > 100) + score = 100; + + return score; } #define FLAG_SET(opts,flag_name) ((opts.flags & flag_name) != 0) @@ -316,109 +435,135 @@ int git_diff_find_similar( git_diff_list *diff, git_diff_find_options *given_opts) { - unsigned int i, j, similarity; + size_t i, j, cache_size, *matches; + int error = 0, similarity; git_diff_delta *from, *to; git_diff_find_options opts; - unsigned int tried_targets, num_changes = 0; - git_vector matches = GIT_VECTOR_INIT; + size_t tried_targets, num_rewrites = 0; + void **cache; - if (normalize_find_opts(diff, &opts, given_opts) < 0) - return -1; + if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0) + return error; - /* first do splits if requested */ + /* TODO: maybe abort if deltas.length > target_limit ??? */ + + cache_size = diff->deltas.length * 2; /* must store b/c length may change */ + cache = git__calloc(cache_size, sizeof(void *)); + GITERR_CHECK_ALLOC(cache); + + matches = git__calloc(diff->deltas.length, sizeof(size_t)); + GITERR_CHECK_ALLOC(matches); + + /* first break MODIFIED records that are too different (if requested) */ if (FLAG_SET(opts, GIT_DIFF_FIND_AND_BREAK_REWRITES)) { git_vector_foreach(&diff->deltas, i, from) { if (from->status != GIT_DELTA_MODIFIED) continue; - /* Right now, this doesn't work right because the similarity - * algorithm isn't actually implemented... - */ - similarity = 100; - /* calc_similarity(NULL, &from->old_file, from->new_file); */ + similarity = similarity_measure( + diff, &opts, cache, 2 * i, 2 * i + 1); - if (similarity < opts.break_rewrite_threshold) { - from->status = GIT_DELTA__TO_SPLIT; - num_changes++; + if (similarity < 0) { + error = similarity; + goto cleanup; } - } - /* apply splits as needed */ - if (num_changes > 0 && - apply_splits_and_deletes( - diff, diff->deltas.length + num_changes) < 0) - return -1; + if ((unsigned int)similarity < opts.break_rewrite_threshold) { + from->flags |= GIT_DIFF_FLAG__TO_SPLIT; + num_rewrites++; + } + } } /* next find the most similar delta for each rename / copy candidate */ - if (git_vector_init(&matches, diff->deltas.length, git_diff_delta__cmp) < 0) - return -1; - git_vector_foreach(&diff->deltas, i, from) { tried_targets = 0; + /* skip things that aren't blobs */ + if (GIT_MODE_TYPE(from->old_file.mode) != + GIT_MODE_TYPE(GIT_FILEMODE_BLOB)) + continue; + + /* don't check UNMODIFIED files as source unless given option */ + if (from->status == GIT_DELTA_UNMODIFIED && + !FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) + continue; + + /* skip all but DELETED files unless copy detection is on */ + if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES) && + from->status != GIT_DELTA_DELETED && + (from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) + continue; + git_vector_foreach(&diff->deltas, j, to) { if (i == j) continue; + /* skip things that aren't blobs */ + if (GIT_MODE_TYPE(to->new_file.mode) != + GIT_MODE_TYPE(GIT_FILEMODE_BLOB)) + continue; + switch (to->status) { case GIT_DELTA_ADDED: case GIT_DELTA_UNTRACKED: case GIT_DELTA_RENAMED: case GIT_DELTA_COPIED: break; + case GIT_DELTA_MODIFIED: + if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) + continue; + break; default: /* only the above status values should be checked */ continue; } - /* skip all but DELETED files unless copy detection is on */ - if (from->status != GIT_DELTA_DELETED && - !FLAG_SET(opts, GIT_DIFF_FIND_COPIES)) - continue; - - /* don't check UNMODIFIED files as source unless given option */ - if (from->status == GIT_DELTA_UNMODIFIED && - !FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED)) - continue; - - /* cap on maximum files we'll examine */ + /* cap on maximum files we'll examine (per "from" file) */ if (++tried_targets > opts.target_limit) break; /* calculate similarity and see if this pair beats the * similarity score of the current best pair. */ - similarity = calc_similarity(NULL, &from->old_file, &to->new_file); + similarity = similarity_measure( + diff, &opts, cache, 2 * i, 2 * j + 1); + + if (similarity < 0) { + error = similarity; + goto cleanup; + } - if (to->similarity < similarity) { - to->similarity = similarity; - if (git_vector_set(NULL, &matches, j, from) < 0) - return -1; + if (to->similarity < (unsigned int)similarity) { + to->similarity = (unsigned int)similarity; + matches[j] = i + 1; } } } /* next rewrite the diffs with renames / copies */ - num_changes = 0; - git_vector_foreach(&diff->deltas, j, to) { - from = GIT_VECTOR_GET(&matches, j); - if (!from) { + if (!matches[j]) { assert(to->similarity == 0); continue; } - /* three possible outcomes here: + i = matches[j] - 1; + from = GIT_VECTOR_GET(&diff->deltas, i); + assert(from); + + /* four possible outcomes here: * 1. old DELETED and if over rename threshold, * new becomes RENAMED and old goes away - * 2. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and + * 2. old SPLIT and if over rename threshold, + * new becomes RENAMED and old becomes ADDED (clear SPLIT) + * 3. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and * old is more similar to new than it is to itself, in which * case, new becomes RENAMED and old becomed ADDED - * 3. otherwise if over copy threshold, new becomes COPIED + * 4. otherwise if over copy threshold, new becomes COPIED */ if (from->status == GIT_DELTA_DELETED) { @@ -430,8 +575,27 @@ int git_diff_find_similar( to->status = GIT_DELTA_RENAMED; memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); - from->status = GIT_DELTA__TO_DELETE; - num_changes++; + from->flags |= GIT_DIFF_FLAG__TO_DELETE; + num_rewrites++; + + continue; + } + + if (from->status == GIT_DELTA_MODIFIED && + (from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) + { + if (to->similarity < opts.rename_threshold) { + to->similarity = 0; + continue; + } + + to->status = GIT_DELTA_RENAMED; + memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); + + from->status = GIT_DELTA_ADDED; + from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; + memset(&from->old_file, 0, sizeof(from->old_file)); + num_rewrites--; continue; } @@ -440,17 +604,22 @@ int git_diff_find_similar( FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) && to->similarity > opts.rename_threshold) { - similarity = 100; - /* calc_similarity(NULL, &from->old_file, from->new_file); */ + similarity = similarity_measure( + diff, &opts, cache, 2 * i, 2 * i + 1); + + if (similarity < 0) { + error = similarity; + goto cleanup; + } - if (similarity < opts.rename_from_rewrite_threshold) { + if ((unsigned int)similarity < opts.rename_from_rewrite_threshold) { to->status = GIT_DELTA_RENAMED; memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); from->status = GIT_DELTA_ADDED; memset(&from->old_file, 0, sizeof(from->old_file)); from->old_file.path = to->old_file.path; - from->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID; continue; } @@ -466,17 +635,26 @@ int git_diff_find_similar( memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); } - git_vector_free(&matches); + if (num_rewrites > 0) { + assert(num_rewrites < diff->deltas.length); - if (num_changes > 0) { - assert(num_changes < diff->deltas.length); + error = apply_splits_and_deletes( + diff, diff->deltas.length - num_rewrites); + } + +cleanup: + git__free(matches); - if (apply_splits_and_deletes( - diff, diff->deltas.length - num_changes) < 0) - return -1; + for (i = 0; i < cache_size; ++i) { + if (cache[i] != NULL) + opts.metric->free_signature(cache[i], opts.metric->payload); } + git__free(cache); - return 0; + if (!given_opts || !given_opts->metric) + git__free(opts.metric); + + return error; } #undef FLAG_SET diff --git a/src/fileops.c b/src/fileops.c index 90ca11fb7..c1824e812 100644 --- a/src/fileops.c +++ b/src/fileops.c @@ -523,6 +523,41 @@ int git_futils_rmdir_r( return error; } +int git_futils_cleanupdir_r(const char *path) +{ + int error; + git_buf fullpath = GIT_BUF_INIT; + futils__rmdir_data data; + + if ((error = git_buf_put(&fullpath, path, strlen(path)) < 0)) + goto clean_up; + + data.base = ""; + data.baselen = 0; + data.flags = GIT_RMDIR_REMOVE_FILES; + data.error = 0; + + if (!git_path_exists(path)) { + giterr_set(GITERR_OS, "Path does not exist: %s" , path); + error = GIT_ERROR; + goto clean_up; + } + + if (!git_path_isdir(path)) { + giterr_set(GITERR_OS, "Path is not a directory: %s" , path); + error = GIT_ERROR; + goto clean_up; + } + + error = git_path_direach(&fullpath, futils__rmdir_recurs_foreach, &data); + if (error == GIT_EUSER) + error = data.error; + +clean_up: + git_buf_free(&fullpath); + return error; +} + int git_futils_find_system_file(git_buf *path, const char *filename) { #ifdef GIT_WIN32 diff --git a/src/fileops.h b/src/fileops.h index 988cc661a..7ba99d3d9 100644 --- a/src/fileops.h +++ b/src/fileops.h @@ -130,7 +130,7 @@ typedef enum { /** * Remove path and any files and directories beneath it. * - * @param path Path to to top level directory to process. + * @param path Path to the top level directory to process. * @param base Root for relative path. * @param flags Combination of git_futils_rmdir_flags values * @return 0 on success; -1 on error. @@ -138,6 +138,14 @@ typedef enum { extern int git_futils_rmdir_r(const char *path, const char *base, uint32_t flags); /** + * Remove all files and directories beneath the specified path. + * + * @param path Path to the top level directory to process. + * @return 0 on success; -1 on error. + */ +extern int git_futils_cleanupdir_r(const char *path); + +/** * Create and open a temporary file with a `_git2_` suffix. * Writes the filename into path_out. * @return On success, an open file descriptor, else an error code < 0. diff --git a/src/hash/hash_generic.c b/src/hash/hash_generic.c index 0723bfaf9..32fcd869c 100644 --- a/src/hash/hash_generic.c +++ b/src/hash/hash_generic.c @@ -232,7 +232,7 @@ int git_hash_init(git_hash_ctx *ctx) ctx->H[3] = 0x10325476; ctx->H[4] = 0xc3d2e1f0; - return 0; + return 0; } int git_hash_update(git_hash_ctx *ctx, const void *data, size_t len) diff --git a/src/hashsig.c b/src/hashsig.c new file mode 100644 index 000000000..e9c5164a4 --- /dev/null +++ b/src/hashsig.c @@ -0,0 +1,365 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "hashsig.h" +#include "fileops.h" + +typedef uint32_t hashsig_t; +typedef uint64_t hashsig_state; + +#define HASHSIG_SCALE 100 + +#define HASHSIG_HASH_WINDOW 32 +#define HASHSIG_HASH_START 0 +#define HASHSIG_HASH_SHIFT 5 +#define HASHSIG_HASH_MASK 0x7FFFFFFF + +#define HASHSIG_HEAP_SIZE ((1 << 7) - 1) + +typedef int (GIT_STDLIB_CALL *hashsig_cmp)(const void *a, const void *b); + +typedef struct { + int size, asize; + hashsig_cmp cmp; + hashsig_t values[HASHSIG_HEAP_SIZE]; +} hashsig_heap; + +typedef struct { + hashsig_state state, shift_n; + char window[HASHSIG_HASH_WINDOW]; + int win_len, win_pos, saw_lf; +} hashsig_in_progress; + +#define HASHSIG_IN_PROGRESS_INIT { HASHSIG_HASH_START, 1, {0}, 0, 0, 1 } + +struct git_hashsig { + hashsig_heap mins; + hashsig_heap maxs; + git_hashsig_option_t opt; + int considered; +}; + +#define HEAP_LCHILD_OF(I) (((I)*2)+1) +#define HEAP_RCHILD_OF(I) (((I)*2)+2) +#define HEAP_PARENT_OF(I) (((I)-1)>>1) + +static void hashsig_heap_init(hashsig_heap *h, hashsig_cmp cmp) +{ + h->size = 0; + h->asize = HASHSIG_HEAP_SIZE; + h->cmp = cmp; +} + +static int GIT_STDLIB_CALL hashsig_cmp_max(const void *a, const void *b) +{ + hashsig_t av = *(const hashsig_t *)a, bv = *(const hashsig_t *)b; + return (av < bv) ? -1 : (av > bv) ? 1 : 0; +} + +static int GIT_STDLIB_CALL hashsig_cmp_min(const void *a, const void *b) +{ + hashsig_t av = *(const hashsig_t *)a, bv = *(const hashsig_t *)b; + return (av > bv) ? -1 : (av < bv) ? 1 : 0; +} + +static void hashsig_heap_up(hashsig_heap *h, int el) +{ + int parent_el = HEAP_PARENT_OF(el); + + while (el > 0 && h->cmp(&h->values[parent_el], &h->values[el]) > 0) { + hashsig_t t = h->values[el]; + h->values[el] = h->values[parent_el]; + h->values[parent_el] = t; + + el = parent_el; + parent_el = HEAP_PARENT_OF(el); + } +} + +static void hashsig_heap_down(hashsig_heap *h, int el) +{ + hashsig_t v, lv, rv; + + /* 'el < h->size / 2' tests if el is bottom row of heap */ + + while (el < h->size / 2) { + int lel = HEAP_LCHILD_OF(el), rel = HEAP_RCHILD_OF(el), swapel; + + v = h->values[el]; + lv = h->values[lel]; + rv = h->values[rel]; + + if (h->cmp(&v, &lv) < 0 && h->cmp(&v, &rv) < 0) + break; + + swapel = (h->cmp(&lv, &rv) < 0) ? lel : rel; + + h->values[el] = h->values[swapel]; + h->values[swapel] = v; + + el = swapel; + } +} + +static void hashsig_heap_sort(hashsig_heap *h) +{ + /* only need to do this at the end for signature comparison */ + qsort(h->values, h->size, sizeof(hashsig_t), h->cmp); +} + +static void hashsig_heap_insert(hashsig_heap *h, hashsig_t val) +{ + /* if heap is full, pop top if new element should replace it */ + if (h->size == h->asize && h->cmp(&val, &h->values[0]) > 0) { + h->size--; + h->values[0] = h->values[h->size]; + hashsig_heap_down(h, 0); + } + + /* if heap is not full, insert new element */ + if (h->size < h->asize) { + h->values[h->size++] = val; + hashsig_heap_up(h, h->size - 1); + } +} + +GIT_INLINE(bool) hashsig_include_char( + char ch, git_hashsig_option_t opt, int *saw_lf) +{ + if ((opt & GIT_HASHSIG_IGNORE_WHITESPACE) && git__isspace(ch)) + return false; + + if (opt & GIT_HASHSIG_SMART_WHITESPACE) { + if (ch == '\r' || (*saw_lf && git__isspace(ch))) + return false; + + *saw_lf = (ch == '\n'); + } + + return true; +} + +static void hashsig_initial_window( + git_hashsig *sig, + const char **data, + size_t size, + hashsig_in_progress *prog) +{ + hashsig_state state, shift_n; + int win_len; + const char *scan, *end; + + /* init until we have processed at least HASHSIG_HASH_WINDOW data */ + + if (prog->win_len >= HASHSIG_HASH_WINDOW) + return; + + state = prog->state; + win_len = prog->win_len; + shift_n = prog->shift_n; + + scan = *data; + end = scan + size; + + while (scan < end && win_len < HASHSIG_HASH_WINDOW) { + char ch = *scan++; + + if (!hashsig_include_char(ch, sig->opt, &prog->saw_lf)) + continue; + + state = (state * HASHSIG_HASH_SHIFT + ch) & HASHSIG_HASH_MASK; + + if (!win_len) + shift_n = 1; + else + shift_n = (shift_n * HASHSIG_HASH_SHIFT) & HASHSIG_HASH_MASK; + + prog->window[win_len++] = ch; + } + + /* insert initial hash if we just finished */ + + if (win_len == HASHSIG_HASH_WINDOW) { + hashsig_heap_insert(&sig->mins, (hashsig_t)state); + hashsig_heap_insert(&sig->maxs, (hashsig_t)state); + sig->considered = 1; + } + + prog->state = state; + prog->win_len = win_len; + prog->shift_n = shift_n; + + *data = scan; +} + +static int hashsig_add_hashes( + git_hashsig *sig, + const char *data, + size_t size, + hashsig_in_progress *prog) +{ + const char *scan = data, *end = data + size; + hashsig_state state, shift_n, rmv; + + if (prog->win_len < HASHSIG_HASH_WINDOW) + hashsig_initial_window(sig, &scan, size, prog); + + state = prog->state; + shift_n = prog->shift_n; + + /* advance window, adding new chars and removing old */ + + for (; scan < end; ++scan) { + char ch = *scan; + + if (!hashsig_include_char(ch, sig->opt, &prog->saw_lf)) + continue; + + rmv = shift_n * prog->window[prog->win_pos]; + + state = (state - rmv) & HASHSIG_HASH_MASK; + state = (state * HASHSIG_HASH_SHIFT) & HASHSIG_HASH_MASK; + state = (state + ch) & HASHSIG_HASH_MASK; + + hashsig_heap_insert(&sig->mins, (hashsig_t)state); + hashsig_heap_insert(&sig->maxs, (hashsig_t)state); + sig->considered++; + + prog->window[prog->win_pos] = ch; + prog->win_pos = (prog->win_pos + 1) % HASHSIG_HASH_WINDOW; + } + + prog->state = state; + + return 0; +} + +static int hashsig_finalize_hashes(git_hashsig *sig) +{ + if (sig->mins.size < HASHSIG_HEAP_SIZE) { + giterr_set(GITERR_INVALID, + "File too small for similarity signature calculation"); + return GIT_EBUFS; + } + + hashsig_heap_sort(&sig->mins); + hashsig_heap_sort(&sig->maxs); + + return 0; +} + +static git_hashsig *hashsig_alloc(git_hashsig_option_t opts) +{ + git_hashsig *sig = git__calloc(1, sizeof(git_hashsig)); + if (!sig) + return NULL; + + hashsig_heap_init(&sig->mins, hashsig_cmp_min); + hashsig_heap_init(&sig->maxs, hashsig_cmp_max); + sig->opt = opts; + + return sig; +} + +int git_hashsig_create( + git_hashsig **out, + const char *buf, + size_t buflen, + git_hashsig_option_t opts) +{ + int error; + hashsig_in_progress prog = HASHSIG_IN_PROGRESS_INIT; + git_hashsig *sig = hashsig_alloc(opts); + GITERR_CHECK_ALLOC(sig); + + error = hashsig_add_hashes(sig, buf, buflen, &prog); + + if (!error) + error = hashsig_finalize_hashes(sig); + + if (!error) + *out = sig; + else + git_hashsig_free(sig); + + return error; +} + +int git_hashsig_create_fromfile( + git_hashsig **out, + const char *path, + git_hashsig_option_t opts) +{ + char buf[4096]; + ssize_t buflen = 0; + int error = 0, fd; + hashsig_in_progress prog = HASHSIG_IN_PROGRESS_INIT; + git_hashsig *sig = hashsig_alloc(opts); + GITERR_CHECK_ALLOC(sig); + + if ((fd = git_futils_open_ro(path)) < 0) { + git__free(sig); + return fd; + } + + while (!error) { + if ((buflen = p_read(fd, buf, sizeof(buf))) <= 0) { + if ((error = (int)buflen) < 0) + giterr_set(GITERR_OS, + "Read error on '%s' calculating similarity hashes", path); + break; + } + + error = hashsig_add_hashes(sig, buf, buflen, &prog); + } + + p_close(fd); + + if (!error) + error = hashsig_finalize_hashes(sig); + + if (!error) + *out = sig; + else + git_hashsig_free(sig); + + return error; +} + +void git_hashsig_free(git_hashsig *sig) +{ + git__free(sig); +} + +static int hashsig_heap_compare(const hashsig_heap *a, const hashsig_heap *b) +{ + int matches = 0, i, j, cmp; + + assert(a->cmp == b->cmp); + + /* hash heaps are sorted - just look for overlap vs total */ + + for (i = 0, j = 0; i < a->size && j < b->size; ) { + cmp = a->cmp(&a->values[i], &b->values[j]); + + if (cmp < 0) + ++i; + else if (cmp > 0) + ++j; + else { + ++i; ++j; ++matches; + } + } + + return HASHSIG_SCALE * (matches * 2) / (a->size + b->size); +} + +int git_hashsig_compare(const git_hashsig *a, const git_hashsig *b) +{ + return (hashsig_heap_compare(&a->mins, &b->mins) + + hashsig_heap_compare(&a->maxs, &b->maxs)) / 2; +} + diff --git a/src/hashsig.h b/src/hashsig.h new file mode 100644 index 000000000..8c920cbf1 --- /dev/null +++ b/src/hashsig.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_hashsig_h__ +#define INCLUDE_hashsig_h__ + +#include "common.h" + +/** + * Similarity signature of line hashes for a buffer + */ +typedef struct git_hashsig git_hashsig; + +typedef enum { + GIT_HASHSIG_NORMAL = 0, /* use all data */ + GIT_HASHSIG_IGNORE_WHITESPACE = 1, /* ignore whitespace */ + GIT_HASHSIG_SMART_WHITESPACE = 2, /* ignore \r and all space after \n */ +} git_hashsig_option_t; + +/** + * Build a similarity signature for a buffer + * + * If you have passed a whitespace-ignoring buffer, then the whitespace + * will be removed from the buffer while it is being processed, modifying + * the buffer in place. Sorry about that! + * + * This will return an error if the buffer doesn't contain enough data to + * compute a valid signature. + * + * @param out The array of hashed runs representing the file content + * @param buf The contents of the file to hash + * @param buflen The length of the data at `buf` + * @param generate_pairwise_hashes Should pairwise runs be hashed + */ +extern int git_hashsig_create( + git_hashsig **out, + const char *buf, + size_t buflen, + git_hashsig_option_t opts); + +/** + * Build a similarity signature from a file + * + * This walks through the file, only loading a maximum of 4K of file data at + * a time. Otherwise, it acts just like `git_hashsig_create`. + * + * This will return an error if the file doesn't contain enough data to + * compute a valid signature. + */ +extern int git_hashsig_create_fromfile( + git_hashsig **out, + const char *path, + git_hashsig_option_t opts); + +/** + * Release memory for a content similarity signature + */ +extern void git_hashsig_free(git_hashsig *sig); + +/** + * Measure similarity between two files + * + * @return <0 for error, [0 to 100] as similarity score + */ +extern int git_hashsig_compare( + const git_hashsig *a, + const git_hashsig *b); + +#endif diff --git a/src/index.c b/src/index.c index 59649083b..4deafd77f 100644 --- a/src/index.c +++ b/src/index.c @@ -242,8 +242,10 @@ static unsigned int index_merge_mode( return index_create_mode(mode); } -static void index_set_ignore_case(git_index *index, bool ignore_case) +void git_index__set_ignore_case(git_index *index, bool ignore_case) { + index->ignore_case = ignore_case; + index->entries._cmp = ignore_case ? index_icmp : index_cmp; index->entries_cmp_path = ignore_case ? index_icmp_path : index_cmp_path; index->entries_search = ignore_case ? index_isrch : index_srch; @@ -297,18 +299,8 @@ int git_index_new(git_index **out) static void index_free(git_index *index) { - git_index_entry *e; - git_index_reuc_entry *reuc; - size_t i; - git_index_clear(index); - git_vector_foreach(&index->entries, i, e) { - index_entry_free(e); - } git_vector_free(&index->entries); - git_vector_foreach(&index->reuc, i, reuc) { - index_entry_reuc_free(reuc); - } git_vector_free(&index->reuc); git__free(index->index_file_path); @@ -335,16 +327,10 @@ void git_index_clear(git_index *index) git__free(e->path); git__free(e); } - - for (i = 0; i < index->reuc.length; ++i) { - git_index_reuc_entry *e; - e = git_vector_get(&index->reuc, i); - git__free(e->path); - git__free(e); - } - git_vector_clear(&index->entries); - git_vector_clear(&index->reuc); + + git_index_reuc_clear(index); + git_futils_filestamp_set(&index->stamp, NULL); git_tree_cache_free(index->tree); @@ -388,7 +374,7 @@ int git_index_set_caps(git_index *index, unsigned int caps) } if (old_ignore_case != index->ignore_case) { - index_set_ignore_case(index, index->ignore_case); + git_index__set_ignore_case(index, index->ignore_case); } return 0; @@ -1151,6 +1137,21 @@ int git_index_reuc_remove(git_index *index, size_t position) return error; } +void git_index_reuc_clear(git_index *index) +{ + size_t i; + git_index_reuc_entry *reuc; + + assert(index); + + git_vector_foreach(&index->reuc, i, reuc) { + git__free(reuc->path); + git__free(reuc); + } + + git_vector_clear(&index->reuc); +} + static int index_error_invalid(const char *message) { giterr_set(GITERR_INDEX, "Invalid data in index - %s", message); diff --git a/src/index.h b/src/index.h index 9304b5539..2beaa6375 100644 --- a/src/index.h +++ b/src/index.h @@ -48,6 +48,8 @@ extern size_t git_index__prefix_position(git_index *index, const char *path); extern int git_index_entry__cmp(const void *a, const void *b); extern int git_index_entry__cmp_icase(const void *a, const void *b); +extern void git_index__set_ignore_case(git_index *index, bool ignore_case); + extern int git_index_read_tree_match( git_index *index, git_tree *tree, git_strarray *strspec); diff --git a/src/indexer.c b/src/indexer.c index c4648e400..c7e142baf 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -17,6 +17,7 @@ #include "posix.h" #include "pack.h" #include "filebuf.h" +#include "oidmap.h" #define UINT31_MAX (0x7FFFFFFF) @@ -27,15 +28,6 @@ struct entry { uint64_t offset_long; }; -struct git_indexer { - struct git_pack_file *pack; - size_t nr_objects; - git_vector objects; - git_filebuf file; - unsigned int fanout[256]; - git_oid hash; -}; - struct git_indexer_stream { unsigned int parsed_header :1, opened_pack :1, @@ -61,11 +53,6 @@ struct delta_info { git_off_t delta_off; }; -const git_oid *git_indexer_hash(const git_indexer *idx) -{ - return &idx->hash; -} - const git_oid *git_indexer_stream_hash(const git_indexer_stream *idx) { return &idx->hash; @@ -136,14 +123,6 @@ static int objects_cmp(const void *a, const void *b) return git_oid_cmp(&entrya->oid, &entryb->oid); } -static int cache_cmp(const void *a, const void *b) -{ - const struct git_pack_entry *ea = a; - const struct git_pack_entry *eb = b; - - return git_oid_cmp(&ea->sha1, &eb->sha1); -} - int git_indexer_stream_new( git_indexer_stream **out, const char *prefix, @@ -285,7 +264,8 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, static int store_object(git_indexer_stream *idx) { - int i; + int i, error; + khiter_t k; git_oid oid; struct entry *entry; git_off_t entry_size; @@ -310,11 +290,15 @@ static int store_object(git_indexer_stream *idx) git_oid_cpy(&pentry->sha1, &oid); pentry->offset = entry_start; - if (git_vector_insert(&idx->pack->cache, pentry) < 0) { + + k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error); + if (!error) { git__free(pentry); goto on_error; } + kh_value(idx->pack->idx_cache, k) = pentry; + git_oid_cpy(&entry->oid, &oid); if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) @@ -338,7 +322,8 @@ on_error: static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start) { - int i; + int i, error; + khiter_t k; git_oid oid; size_t entry_size; struct entry *entry; @@ -365,11 +350,14 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent git_oid_cpy(&pentry->sha1, &oid); pentry->offset = entry_start; - if (git_vector_insert(&idx->pack->cache, pentry) < 0) { + k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error); + if (!error) { git__free(pentry); goto on_error; } + kh_value(idx->pack->idx_cache, k) = pentry; + git_oid_cpy(&entry->oid, &oid); entry->crc = crc32(0L, Z_NULL, 0); @@ -440,8 +428,8 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz /* for now, limit to 2^32 objects */ assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects)); - if (git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp) < 0) - return -1; + idx->pack->idx_cache = git_oidmap_alloc(); + GITERR_CHECK_ALLOC(idx->pack->idx_cache); idx->pack->has_cache = 1; if (git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp) < 0) @@ -451,7 +439,7 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz return -1; stats->received_objects = 0; - stats->indexed_objects = 0; + processed = stats->indexed_objects = 0; stats->total_objects = (unsigned int)idx->nr_objects; do_progress_callback(idx, stats); } @@ -732,9 +720,9 @@ on_error: void git_indexer_stream_free(git_indexer_stream *idx) { + khiter_t k; unsigned int i; struct entry *e; - struct git_pack_entry *pe; struct delta_info *delta; if (idx == NULL) @@ -743,11 +731,16 @@ void git_indexer_stream_free(git_indexer_stream *idx) git_vector_foreach(&idx->objects, i, e) git__free(e); git_vector_free(&idx->objects); + if (idx->pack) { - git_vector_foreach(&idx->pack->cache, i, pe) - git__free(pe); - git_vector_free(&idx->pack->cache); + for (k = kh_begin(idx->pack->idx_cache); k != kh_end(idx->pack->idx_cache); k++) { + if (kh_exist(idx->pack->idx_cache, k)) + git__free(kh_value(idx->pack->idx_cache, k)); + } + + git_oidmap_free(idx->pack->idx_cache); } + git_vector_foreach(&idx->deltas, i, delta) git__free(delta); git_vector_free(&idx->deltas); @@ -755,315 +748,3 @@ void git_indexer_stream_free(git_indexer_stream *idx) git_filebuf_cleanup(&idx->pack_file); git__free(idx); } - -int git_indexer_new(git_indexer **out, const char *packname) -{ - git_indexer *idx; - struct git_pack_header hdr; - int error; - - assert(out && packname); - - idx = git__calloc(1, sizeof(git_indexer)); - GITERR_CHECK_ALLOC(idx); - - open_pack(&idx->pack, packname); - - if ((error = parse_header(&hdr, idx->pack)) < 0) - goto cleanup; - - idx->nr_objects = ntohl(hdr.hdr_entries); - - /* for now, limit to 2^32 objects */ - assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects)); - - error = git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp); - if (error < 0) - goto cleanup; - - idx->pack->has_cache = 1; - error = git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp); - if (error < 0) - goto cleanup; - - *out = idx; - - return 0; - -cleanup: - git_indexer_free(idx); - - return -1; -} - -static int index_path(git_buf *path, git_indexer *idx) -{ - const char prefix[] = "pack-", suffix[] = ".idx"; - size_t slash = (size_t)path->size; - - /* search backwards for '/' */ - while (slash > 0 && path->ptr[slash - 1] != '/') - slash--; - - if (git_buf_grow(path, slash + 1 + strlen(prefix) + - GIT_OID_HEXSZ + strlen(suffix) + 1) < 0) - return -1; - - git_buf_truncate(path, slash); - git_buf_puts(path, prefix); - git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash); - path->size += GIT_OID_HEXSZ; - git_buf_puts(path, suffix); - - return git_buf_oom(path) ? -1 : 0; -} - -int git_indexer_write(git_indexer *idx) -{ - git_mwindow *w = NULL; - int error; - unsigned int i, long_offsets = 0, left; - struct git_pack_idx_header hdr; - git_buf filename = GIT_BUF_INIT; - struct entry *entry; - void *packfile_hash; - git_oid file_hash; - git_hash_ctx ctx; - - if (git_hash_ctx_init(&ctx) < 0) - return -1; - - git_vector_sort(&idx->objects); - - git_buf_sets(&filename, idx->pack->pack_name); - git_buf_truncate(&filename, filename.size - strlen("pack")); - git_buf_puts(&filename, "idx"); - if (git_buf_oom(&filename)) - return -1; - - error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS); - if (error < 0) - goto cleanup; - - /* Write out the header */ - hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); - hdr.idx_version = htonl(2); - error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr)); - if (error < 0) - goto cleanup; - - /* Write out the fanout table */ - for (i = 0; i < 256; ++i) { - uint32_t n = htonl(idx->fanout[i]); - error = git_filebuf_write(&idx->file, &n, sizeof(n)); - if (error < 0) - goto cleanup; - } - - /* Write out the object names (SHA-1 hashes) */ - git_vector_foreach(&idx->objects, i, entry) { - if ((error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid))) < 0 || - (error = git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ)) < 0) - goto cleanup; - } - - if ((error = git_hash_final(&idx->hash, &ctx)) < 0) - goto cleanup; - - /* Write out the CRC32 values */ - git_vector_foreach(&idx->objects, i, entry) { - error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t)); - if (error < 0) - goto cleanup; - } - - /* Write out the offsets */ - git_vector_foreach(&idx->objects, i, entry) { - uint32_t n; - - if (entry->offset == UINT32_MAX) - n = htonl(0x80000000 | long_offsets++); - else - n = htonl(entry->offset); - - error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t)); - if (error < 0) - goto cleanup; - } - - /* Write out the long offsets */ - git_vector_foreach(&idx->objects, i, entry) { - uint32_t split[2]; - - if (entry->offset != UINT32_MAX) - continue; - - split[0] = htonl(entry->offset_long >> 32); - split[1] = htonl(entry->offset_long & 0xffffffff); - - error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2); - if (error < 0) - goto cleanup; - } - - /* Write out the packfile trailer */ - - packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); - git_mwindow_close(&w); - if (packfile_hash == NULL) { - error = -1; - goto cleanup; - } - - memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ); - - git_mwindow_close(&w); - - error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); - if (error < 0) - goto cleanup; - - /* Write out the index sha */ - error = git_filebuf_hash(&file_hash, &idx->file); - if (error < 0) - goto cleanup; - - error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); - if (error < 0) - goto cleanup; - - /* Figure out what the final name should be */ - error = index_path(&filename, idx); - if (error < 0) - goto cleanup; - - /* Commit file */ - error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE); - -cleanup: - git_mwindow_free_all(&idx->pack->mwf); - git_mwindow_file_deregister(&idx->pack->mwf); - if (error < 0) - git_filebuf_cleanup(&idx->file); - git_buf_free(&filename); - git_hash_ctx_cleanup(&ctx); - - return error; -} - -int git_indexer_run(git_indexer *idx, git_transfer_progress *stats) -{ - git_mwindow_file *mwf; - git_off_t off = sizeof(struct git_pack_header); - int error; - struct entry *entry; - unsigned int left, processed; - - assert(idx && stats); - - mwf = &idx->pack->mwf; - error = git_mwindow_file_register(mwf); - if (error < 0) - return error; - - stats->total_objects = (unsigned int)idx->nr_objects; - stats->indexed_objects = processed = 0; - - while (processed < idx->nr_objects) { - git_rawobj obj; - git_oid oid; - struct git_pack_entry *pentry; - git_mwindow *w = NULL; - int i; - git_off_t entry_start = off; - void *packed; - size_t entry_size; - char fmt[GIT_OID_HEXSZ] = {0}; - - entry = git__calloc(1, sizeof(*entry)); - GITERR_CHECK_ALLOC(entry); - - if (off > UINT31_MAX) { - entry->offset = UINT32_MAX; - entry->offset_long = off; - } else { - entry->offset = (uint32_t)off; - } - - error = git_packfile_unpack(&obj, idx->pack, &off); - if (error < 0) - goto cleanup; - - /* FIXME: Parse the object instead of hashing it */ - error = git_odb__hashobj(&oid, &obj); - if (error < 0) { - giterr_set(GITERR_INDEXER, "Failed to hash object"); - goto cleanup; - } - - pentry = git__malloc(sizeof(struct git_pack_entry)); - if (pentry == NULL) { - error = -1; - goto cleanup; - } - - git_oid_cpy(&pentry->sha1, &oid); - pentry->offset = entry_start; - git_oid_fmt(fmt, &oid); - error = git_vector_insert(&idx->pack->cache, pentry); - if (error < 0) - goto cleanup; - - git_oid_cpy(&entry->oid, &oid); - entry->crc = crc32(0L, Z_NULL, 0); - - entry_size = (size_t)(off - entry_start); - packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left); - if (packed == NULL) { - error = -1; - goto cleanup; - } - entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size)); - git_mwindow_close(&w); - - /* Add the object to the list */ - error = git_vector_insert(&idx->objects, entry); - if (error < 0) - goto cleanup; - - for (i = oid.id[0]; i < 256; ++i) { - idx->fanout[i]++; - } - - git__free(obj.data); - - stats->indexed_objects = ++processed; - } - -cleanup: - git_mwindow_free_all(mwf); - - return error; - -} - -void git_indexer_free(git_indexer *idx) -{ - unsigned int i; - struct entry *e; - struct git_pack_entry *pe; - - if (idx == NULL) - return; - - git_mwindow_file_deregister(&idx->pack->mwf); - git_vector_foreach(&idx->objects, i, e) - git__free(e); - git_vector_free(&idx->objects); - git_vector_foreach(&idx->pack->cache, i, pe) - git__free(pe); - git_vector_free(&idx->pack->cache); - git_packfile_free(idx->pack); - git__free(idx); -} - diff --git a/src/pack-objects.c b/src/pack-objects.c index e4b67192d..459201f58 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -21,8 +21,6 @@ #include "git2/indexer.h" #include "git2/config.h" -GIT__USE_OIDMAP; - struct unpacked { git_pobject *object; void *data; diff --git a/src/pack.c b/src/pack.c index f36f3cf6b..75ac98186 100644 --- a/src/pack.c +++ b/src/pack.c @@ -760,13 +760,14 @@ git_off_t get_delta_base( } else if (type == GIT_OBJ_REF_DELTA) { /* If we have the cooperative cache, search in it first */ if (p->has_cache) { - size_t pos; - struct git_pack_entry key; + khiter_t k; + git_oid oid; - git_oid_fromraw(&key.sha1, base_info); - if (!git_vector_bsearch(&pos, &p->cache, &key)) { + git_oid_fromraw(&oid, base_info); + k = kh_get(oid, p->idx_cache, &oid); + if (k != kh_end(p->idx_cache)) { *curpos += 20; - return ((struct git_pack_entry *)git_vector_get(&p->cache, pos))->offset; + return ((struct git_pack_entry *)kh_value(p->idx_cache, k))->offset; } } /* The base entry _must_ be in the same pack */ diff --git a/src/pack.h b/src/pack.h index 6c43d8f5b..8d7e33dfe 100644 --- a/src/pack.h +++ b/src/pack.h @@ -16,6 +16,7 @@ #include "map.h" #include "mwindow.h" #include "odb.h" +#include "oidmap.h" #define GIT_PACK_FILE_MODE 0444 @@ -62,6 +63,7 @@ typedef struct git_pack_cache_entry { #include "offmap.h" GIT__USE_OFFMAP; +GIT__USE_OIDMAP; #define GIT_PACK_CACHE_MEMORY_LIMIT 16 * 1024 * 1024 #define GIT_PACK_CACHE_SIZE_LIMIT 1024 * 1024 /* don't bother caching anything over 1MB */ @@ -86,7 +88,7 @@ struct git_pack_file { git_time_t mtime; unsigned pack_local:1, pack_keep:1, has_cache:1; git_oid sha1; - git_vector cache; + git_oidmap *idx_cache; git_oid **oids; git_pack_cache bases; /* delta base cache */ diff --git a/src/repository.c b/src/repository.c index cd1e658cf..278abfaf2 100644 --- a/src/repository.c +++ b/src/repository.c @@ -553,6 +553,7 @@ void git_repository_set_config(git_repository *repo, git_config *config) repo->_config = config; GIT_REFCOUNT_OWN(repo->_config, repo); + GIT_REFCOUNT_INC(repo->_config); } int git_repository_odb__weakptr(git_odb **out, git_repository *repo) diff --git a/src/tree.c b/src/tree.c index ec57e8bb8..11123a18a 100644 --- a/src/tree.c +++ b/src/tree.c @@ -566,6 +566,7 @@ int git_tree__write_index( git_oid *oid, git_index *index, git_repository *repo) { int ret; + bool old_ignore_case = false; assert(oid && index && repo); @@ -580,8 +581,21 @@ int git_tree__write_index( return 0; } - /* The tree cache didn't help us */ + /* The tree cache didn't help us; we'll have to write + * out a tree. If the index is ignore_case, we must + * make it case-sensitive for the duration of the tree-write + * operation. */ + + if (index->ignore_case) { + old_ignore_case = true; + git_index__set_ignore_case(index, false); + } + ret = write_tree(oid, repo, index, "", 0); + + if (old_ignore_case) + git_index__set_ignore_case(index, true); + return ret < 0 ? ret : 0; } diff --git a/src/win32/git2.rc b/src/win32/git2.rc index 892008b77..436913228 100644 --- a/src/win32/git2.rc +++ b/src/win32/git2.rc @@ -12,13 +12,13 @@ VS_VERSION_INFO VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE PRODUCTVERSION LIBGIT2_VER_MAJOR,LIBGIT2_VER_MINOR,LIBGIT2_VER_REVISION,0 FILEFLAGSMASK VS_FFI_FILEFLAGSMASK #ifdef _DEBUG - FILEFLAGS 1 + FILEFLAGS VS_FF_DEBUG #else FILEFLAGS 0 #endif FILEOS VOS_NT_WINDOWS32 FILETYPE VFT_DLL - FILESUBTYPE 0 // not used + FILESUBTYPE VFT2_UNKNOWN BEGIN BLOCK "StringFileInfo" BEGIN diff --git a/src/win32/msvc-compat.h b/src/win32/msvc-compat.h index 714a85e21..50865ed17 100644 --- a/src/win32/msvc-compat.h +++ b/src/win32/msvc-compat.h @@ -37,6 +37,15 @@ /* MSVC doesn't define ssize_t at all */ typedef SSIZE_T ssize_t; +/* define snprintf using variadic macro support if available */ +#if _MSC_VER >= 1400 +# define snprintf(BUF, SZ, FMT, ...) _snprintf_s(BUF, SZ, _TRUNCATE, FMT, __VA_ARGS__) +#else +# define snprintf _snprintf #endif +#endif + +#define GIT_STDLIB_CALL __cdecl + #endif /* INCLUDE_msvc_compat__ */ diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c index f533eaa5e..4d56299f7 100644 --- a/src/win32/posix_w32.c +++ b/src/win32/posix_w32.c @@ -375,7 +375,8 @@ int p_vsnprintf(char *buffer, size_t count, const char *format, va_list argptr) #ifdef _MSC_VER int len; - if (count == 0 || (len = _vsnprintf(buffer, count, format, argptr)) < 0) + if (count == 0 || + (len = _vsnprintf_s(buffer, count, _TRUNCATE, format, argptr)) < 0) return _vscprintf(format, argptr); return len; @@ -487,11 +488,14 @@ p_gmtime_r (const time_t *timer, struct tm *result) #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL #endif +#ifndef _TIMEZONE_DEFINED +#define _TIMEZONE_DEFINED struct timezone { int tz_minuteswest; /* minutes W of Greenwich */ int tz_dsttime; /* type of dst correction */ }; +#endif int p_gettimeofday(struct timeval *tv, struct timezone *tz) { |
