summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/diff.c33
-rw-r--r--src/diff.h2
-rw-r--r--src/diff_output.c214
-rw-r--r--src/fileops.c62
-rw-r--r--src/fileops.h1
-rw-r--r--src/odb.c33
-rw-r--r--src/odb.h17
-rw-r--r--tests-clar/status/worktree.c4
8 files changed, 260 insertions, 106 deletions
diff --git a/src/diff.c b/src/diff.c
index f8a01086c..499b95b44 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -11,6 +11,7 @@
#include "fileops.h"
#include "config.h"
#include "attr_file.h"
+#include "filter.h"
static char *diff_prefix_from_pathspec(const git_strarray *pathspec)
{
@@ -63,8 +64,8 @@ static bool diff_path_matches_pathspec(git_diff_list *diff, const char *path)
git_vector_foreach(&diff->pathspec, i, match) {
int result = strcmp(match->pattern, path) ? FNM_NOMATCH : 0;
-
- if (((diff->opts.flags & GIT_DIFF_DISABLE_PATHSPEC_MATCH) == 0) &&
+
+ if (((diff->opts.flags & GIT_DIFF_DISABLE_PATHSPEC_MATCH) == 0) &&
result == FNM_NOMATCH)
result = p_fnmatch(match->pattern, path, 0);
@@ -262,12 +263,14 @@ static int diff_delta__from_two(
delta = diff_delta__alloc(diff, status, old_entry->path);
GITERR_CHECK_ALLOC(delta);
- delta->old_file.mode = old_mode;
git_oid_cpy(&delta->old_file.oid, &old_entry->oid);
+ delta->old_file.size = old_entry->file_size;
+ delta->old_file.mode = old_mode;
delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
- delta->new_file.mode = new_mode;
git_oid_cpy(&delta->new_file.oid, new_oid ? new_oid : &new_entry->oid);
+ delta->new_file.size = new_entry->file_size;
+ delta->new_file.mode = new_mode;
if (new_oid || !git_oid_iszero(&new_entry->oid))
delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
@@ -440,14 +443,22 @@ static int oid_for_workdir_item(
giterr_set(GITERR_OS, "File size overflow for 32-bit systems");
result = -1;
} else {
- int fd = git_futils_open_ro(full_path.ptr);
- if (fd < 0)
- result = fd;
- else {
- result = git_odb__hashfd(
- oid, fd, (size_t)item->file_size, GIT_OBJ_BLOB);
- p_close(fd);
+ git_vector filters = GIT_VECTOR_INIT;
+
+ result = git_filters_load(
+ &filters, repo, item->path, GIT_FILTER_TO_ODB);
+ if (result >= 0) {
+ int fd = git_futils_open_ro(full_path.ptr);
+ if (fd < 0)
+ result = fd;
+ else {
+ result = git_odb__hashfd_filtered(
+ oid, fd, (size_t)item->file_size, GIT_OBJ_BLOB, &filters);
+ p_close(fd);
+ }
}
+
+ git_filters_free(&filters);
}
git_buf_free(&full_path);
diff --git a/src/diff.h b/src/diff.h
index 2785fa425..def746323 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -25,6 +25,8 @@ enum {
GIT_DIFFCAPS_USE_DEV = (1 << 4), /* use st_dev? */
};
+#define MAX_DIFF_FILESIZE 0x20000000
+
struct git_diff_list {
git_refcount rc;
git_repository *repo;
diff --git a/src/diff_output.c b/src/diff_output.c
index 2c64b92ee..e2ca8cf3e 100644
--- a/src/diff_output.c
+++ b/src/diff_output.c
@@ -22,7 +22,18 @@
* git_diff_foreach() call it is an emphemeral structure that is filled
* in to execute each diff. In the case of a git_diff_iterator, it holds
* most of the information for the diff in progress.
- */
+ *
+ * As each delta is processed, it goes through 3 phases: prep, load, exec.
+ *
+ * - In the prep phase, we just set the delta and quickly check the file
+ * attributes to see if it should be treated as binary.
+ * - In the load phase, we actually load the file content into memory.
+ * At this point, if we had deferred calculating OIDs, we might have to
+ * correct the delta to be UNMODIFIED.
+ * - In the exec phase, we actually run the diff and execute the callbacks.
+ * For foreach, this is just a pass-through to the user's callbacks. For
+ * iterators, we record the hunks and data spans into memory.
+ */
typedef struct {
git_repository *repo;
git_diff_options *opts;
@@ -263,18 +274,40 @@ static void setup_xdiff_options(
static int get_blob_content(
git_repository *repo,
- const git_oid *oid,
+ git_diff_file *file,
git_map *map,
git_blob **blob)
{
- if (git_oid_iszero(oid))
+ int error;
+ git_odb *odb;
+ size_t len;
+ git_otype type;
+
+ if (git_oid_iszero(&file->oid))
return 0;
- if (git_blob_lookup(blob, repo, oid) < 0)
- return -1;
+ /* peek at object header to avoid loading if too large */
+ if ((error = git_repository_odb__weakptr(&odb, repo)) < 0 ||
+ (error = git_odb_read_header(&len, &type, odb, &file->oid)) < 0)
+ return error;
+
+ assert(type == GIT_OBJ_BLOB);
+
+ /* if blob is too large to diff, mark as binary */
+ if (len > MAX_DIFF_FILESIZE) {
+ file->flags |= GIT_DIFF_FILE_BINARY;
+ return 0;
+ }
+
+ if (!file->size)
+ file->size = len;
+
+ if ((error = git_blob_lookup(blob, repo, &file->oid)) < 0)
+ return error;
map->data = (void *)git_blob_rawcontent(*blob);
map->len = git_blob_rawsize(*blob);
+
return 0;
}
@@ -307,13 +340,66 @@ static int get_workdir_content(
if (read_len < 0) {
giterr_set(GITERR_OS, "Failed to read symlink '%s'", file->path);
error = -1;
- } else
- map->len = read_len;
+ goto cleanup;
+ }
+
+ map->len = read_len;
}
else {
- error = git_futils_mmap_ro_file(map, path.ptr);
- file->flags |= GIT_DIFF_FILE_UNMAP_DATA;
+ git_file fd = git_futils_open_ro(path.ptr);
+ git_vector filters = GIT_VECTOR_INIT;
+
+ if (fd < 0) {
+ error = fd;
+ goto cleanup;
+ }
+
+ if (!file->size)
+ file->size = git_futils_filesize(fd);
+
+ /* if file is too large to diff, mark as binary */
+ if (file->size > MAX_DIFF_FILESIZE) {
+ file->flags |= GIT_DIFF_FILE_BINARY;
+ goto close_and_cleanup;
+ }
+
+ error = git_filters_load(&filters, repo, file->path, GIT_FILTER_TO_ODB);
+ if (error < 0)
+ goto close_and_cleanup;
+
+ if (error == 0) { /* note: git_filters_load returns filter count */
+ error = git_futils_mmap_ro(map, fd, 0, (size_t)file->size);
+ file->flags |= GIT_DIFF_FILE_UNMAP_DATA;
+ } else {
+ git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT;
+
+ if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)file->size)) &&
+ !(error = git_filters_apply(&filtered, &raw, &filters)))
+ {
+ map->len = git_buf_len(&filtered);
+ map->data = git_buf_detach(&filtered);
+
+ file->flags |= GIT_DIFF_FILE_FREE_DATA;
+ }
+
+ git_buf_free(&raw);
+ git_buf_free(&filtered);
+ }
+
+close_and_cleanup:
+ git_filters_free(&filters);
+ p_close(fd);
+ }
+
+ /* once data is loaded, update OID if we didn't have it previously */
+ if (!error && (file->flags & GIT_DIFF_FILE_VALID_OID) == 0) {
+ error = git_odb_hash(
+ &file->oid, map->data, map->len, GIT_OBJ_BLOB);
+ if (!error)
+ file->flags |= GIT_DIFF_FILE_VALID_OID;
}
+
+cleanup:
git_buf_free(&path);
return error;
}
@@ -393,7 +479,9 @@ static int diff_delta_prep(diff_delta_context *ctxt)
static int diff_delta_load(diff_delta_context *ctxt)
{
int error = 0;
+ git_repository *repo = ctxt->repo;
git_diff_delta *delta = ctxt->delta;
+ bool load_old = false, load_new = false, check_if_unmodified = false;
if (ctxt->loaded || !ctxt->delta)
return 0;
@@ -405,75 +493,77 @@ static int diff_delta_load(diff_delta_context *ctxt)
ctxt->old_data.len = 0;
ctxt->old_blob = NULL;
- if (!error && delta->binary != 1 &&
- (delta->status == GIT_DELTA_DELETED ||
- delta->status == GIT_DELTA_MODIFIED))
- {
- if (ctxt->old_src == GIT_ITERATOR_WORKDIR)
- error = get_workdir_content(
- ctxt->repo, &delta->old_file, &ctxt->old_data);
- else {
- error = get_blob_content(
- ctxt->repo, &delta->old_file.oid,
- &ctxt->old_data, &ctxt->old_blob);
-
- if (ctxt->new_src == GIT_ITERATOR_WORKDIR) {
- /* TODO: convert crlf of blob content */
- }
- }
- }
-
ctxt->new_data.data = "";
ctxt->new_data.len = 0;
ctxt->new_blob = NULL;
- if (!error && delta->binary != 1 &&
- (delta->status == GIT_DELTA_ADDED ||
- delta->status == GIT_DELTA_MODIFIED))
- {
- if (ctxt->new_src == GIT_ITERATOR_WORKDIR)
- error = get_workdir_content(
- ctxt->repo, &delta->new_file, &ctxt->new_data);
- else {
- error = get_blob_content(
- ctxt->repo, &delta->new_file.oid,
- &ctxt->new_data, &ctxt->new_blob);
-
- if (ctxt->old_src == GIT_ITERATOR_WORKDIR) {
- /* TODO: convert crlf of blob content */
- }
- }
+ if (delta->binary == 1)
+ goto cleanup;
- if (!error && !(delta->new_file.flags & GIT_DIFF_FILE_VALID_OID)) {
- error = git_odb_hash(
- &delta->new_file.oid, ctxt->new_data.data,
- ctxt->new_data.len, GIT_OBJ_BLOB);
- if (error < 0)
- goto cleanup;
+ switch (delta->status) {
+ case GIT_DELTA_ADDED: load_new = true; break;
+ case GIT_DELTA_DELETED: load_old = true; break;
+ case GIT_DELTA_MODIFIED: load_new = load_old = true; break;
+ default: break;
+ }
- delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
+ check_if_unmodified =
+ (load_old && (delta->old_file.flags & GIT_DIFF_FILE_VALID_OID) == 0) ||
+ (load_new && (delta->new_file.flags & GIT_DIFF_FILE_VALID_OID) == 0);
- /* since we did not have the definitive oid, we may have
- * incorrect status and need to skip this item.
- */
- if (delta->old_file.mode == delta->new_file.mode &&
- !git_oid_cmp(&delta->old_file.oid, &delta->new_file.oid))
- {
- delta->status = GIT_DELTA_UNMODIFIED;
+ /* Always try to load workdir content first, since it may need to be
+ * filtered (and hence use 2x memory) and we want to minimize the max
+ * memory footprint during diff.
+ */
- if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0)
- goto cleanup;
- }
- }
+ if (load_old && ctxt->old_src == GIT_ITERATOR_WORKDIR) {
+ if ((error = get_workdir_content(
+ repo, &delta->old_file, &ctxt->old_data)) < 0)
+ goto cleanup;
+
+ if ((delta->old_file.flags & GIT_DIFF_FILE_BINARY) != 0)
+ goto cleanup;
+ }
+
+ if (load_new && ctxt->new_src == GIT_ITERATOR_WORKDIR) {
+ if ((error = get_workdir_content(
+ repo, &delta->new_file, &ctxt->new_data)) < 0)
+ goto cleanup;
+
+ if ((delta->new_file.flags & GIT_DIFF_FILE_BINARY) != 0)
+ goto cleanup;
}
+ if (load_old && ctxt->old_src != GIT_ITERATOR_WORKDIR &&
+ (error = get_blob_content(
+ repo, &delta->old_file, &ctxt->old_data, &ctxt->old_blob)) < 0)
+ goto cleanup;
+
+ if (load_new && ctxt->new_src != GIT_ITERATOR_WORKDIR &&
+ (error = get_blob_content(
+ repo, &delta->new_file, &ctxt->new_data, &ctxt->new_blob)) < 0)
+ goto cleanup;
+
+ /* if we did not previously have the definitive oid, we may have
+ * incorrect status and need to switch this to UNMODIFIED.
+ */
+ if (check_if_unmodified &&
+ delta->old_file.mode == delta->new_file.mode &&
+ !git_oid_cmp(&delta->old_file.oid, &delta->new_file.oid))
+ {
+ delta->status = GIT_DELTA_UNMODIFIED;
+
+ if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0)
+ goto cleanup;
+ }
+
+cleanup:
/* if we have not already decided whether file is binary,
* check the first 4K for nul bytes to decide...
*/
if (!error && delta->binary == -1)
error = diff_delta_is_binary_by_content(ctxt);
-cleanup:
ctxt->loaded = !error;
/* flag if we would want to diff the contents of these files */
diff --git a/src/fileops.c b/src/fileops.c
index 95eacb5f1..d4def1a9a 100644
--- a/src/fileops.c
+++ b/src/fileops.c
@@ -115,10 +115,47 @@ mode_t git_futils_canonical_mode(mode_t raw_mode)
return 0;
}
-int git_futils_readbuffer_updated(git_buf *buf, const char *path, time_t *mtime, int *updated)
+#define MAX_READ_STALLS 10
+
+int git_futils_readbuffer_fd(git_buf *buf, git_file fd, size_t len)
+{
+ int stalls = MAX_READ_STALLS;
+
+ git_buf_clear(buf);
+
+ if (git_buf_grow(buf, len + 1) < 0)
+ return -1;
+
+ buf->ptr[len] = '\0';
+
+ while (len > 0) {
+ ssize_t read_size = p_read(fd, buf->ptr + buf->size, len);
+
+ if (read_size < 0) {
+ giterr_set(GITERR_OS, "Failed to read descriptor");
+ return -1;
+ }
+
+ if (read_size == 0) {
+ stalls--;
+
+ if (!stalls) {
+ giterr_set(GITERR_OS, "Too many stalls reading descriptor");
+ return -1;
+ }
+ }
+
+ len -= read_size;
+ buf->size += read_size;
+ }
+
+ return 0;
+}
+
+int git_futils_readbuffer_updated(
+ git_buf *buf, const char *path, time_t *mtime, int *updated)
{
git_file fd;
- size_t len;
struct stat st;
assert(buf && path && *path);
@@ -147,30 +184,11 @@ int git_futils_readbuffer_updated(git_buf *buf, const char *path, time_t *mtime,
if (mtime != NULL)
*mtime = st.st_mtime;
- len = (size_t) st.st_size;
-
- git_buf_clear(buf);
-
- if (git_buf_grow(buf, len + 1) < 0) {
+ if (git_futils_readbuffer_fd(buf, fd, (size_t)st.st_size) < 0) {
p_close(fd);
return -1;
}
- buf->ptr[len] = '\0';
-
- while (len > 0) {
- ssize_t read_size = p_read(fd, buf->ptr, len);
-
- if (read_size < 0) {
- p_close(fd);
- giterr_set(GITERR_OS, "Failed to read descriptor for '%s'", path);
- return -1;
- }
-
- len -= read_size;
- buf->size += read_size;
- }
-
p_close(fd);
if (updated != NULL)
diff --git a/src/fileops.h b/src/fileops.h
index 5c23ce30b..d2944f460 100644
--- a/src/fileops.h
+++ b/src/fileops.h
@@ -19,6 +19,7 @@
*/
extern int git_futils_readbuffer(git_buf *obj, const char *path);
extern int git_futils_readbuffer_updated(git_buf *obj, const char *path, time_t *mtime, int *updated);
+extern int git_futils_readbuffer_fd(git_buf *obj, git_file fd, size_t len);
/**
* File utils
diff --git a/src/odb.c b/src/odb.c
index 34033d15c..83c7a80fc 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -12,6 +12,7 @@
#include "hash.h"
#include "odb.h"
#include "delta-apply.h"
+#include "filter.h"
#include "git2/odb_backend.h"
#include "git2/oid.h"
@@ -118,11 +119,12 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)
hdr_len = format_object_header(hdr, sizeof(hdr), size, type);
ctx = git_hash_new_ctx();
+ GITERR_CHECK_ALLOC(ctx);
git_hash_update(ctx, hdr, hdr_len);
while (size > 0) {
- ssize_t read_len = read(fd, buffer, sizeof(buffer));
+ ssize_t read_len = p_read(fd, buffer, sizeof(buffer));
if (read_len < 0) {
git_hash_free_ctx(ctx);
@@ -140,6 +142,33 @@ int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)
return 0;
}
+int git_odb__hashfd_filtered(
+ git_oid *out, git_file fd, size_t size, git_otype type, git_vector *filters)
+{
+ int error;
+ git_buf raw = GIT_BUF_INIT;
+ git_buf filtered = GIT_BUF_INIT;
+
+ if (!filters || !filters->length)
+ return git_odb__hashfd(out, fd, size, type);
+
+ /* size of data is used in header, so we have to read the whole file
+ * into memory to apply filters before beginning to calculate the hash
+ */
+
+ if (!(error = git_futils_readbuffer_fd(&raw, fd, size)))
+ error = git_filters_apply(&filtered, &raw, filters);
+
+ git_buf_free(&raw);
+
+ if (!error)
+ error = git_odb_hash(out, filtered.ptr, filtered.size, type);
+
+ git_buf_free(&filtered);
+
+ return error;
+}
+
int git_odb__hashlink(git_oid *out, const char *path)
{
struct stat st;
@@ -171,7 +200,7 @@ int git_odb__hashlink(git_oid *out, const char *path)
result = git_odb_hash(out, link_data, (size_t)size, GIT_OBJ_BLOB);
git__free(link_data);
- } else {
+ } else {
int fd = git_futils_open_ro(path);
if (fd < 0)
return -1;
diff --git a/src/odb.h b/src/odb.h
index 263e4c30b..696e12943 100644
--- a/src/odb.h
+++ b/src/odb.h
@@ -58,12 +58,19 @@ int git_odb__hashobj(git_oid *id, git_rawobj *obj);
int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type);
/*
- * Hash a `path`, assuming it could be a POSIX symlink: if the path is a symlink,
- * then the raw contents of the symlink will be hashed. Otherwise, this will
- * fallback to `git_odb__hashfd`.
+ * Hash an open file descriptor applying an array of filters
+ * Acts just like git_odb__hashfd with the addition of filters...
+ */
+int git_odb__hashfd_filtered(
+ git_oid *out, git_file fd, size_t len, git_otype type, git_vector *filters);
+
+/*
+ * Hash a `path`, assuming it could be a POSIX symlink: if the path is a
+ * symlink, then the raw contents of the symlink will be hashed. Otherwise,
+ * this will fallback to `git_odb__hashfd`.
*
- * The hash type for this call is always `GIT_OBJ_BLOB` because symlinks may only
- * point to blobs.
+ * The hash type for this call is always `GIT_OBJ_BLOB` because symlinks may
+ * only point to blobs.
*/
int git_odb__hashlink(git_oid *out, const char *path);
diff --git a/tests-clar/status/worktree.c b/tests-clar/status/worktree.c
index c0412ef96..05e396e1f 100644
--- a/tests-clar/status/worktree.c
+++ b/tests-clar/status/worktree.c
@@ -839,9 +839,5 @@ void test_status_worktree__line_endings_dont_count_as_changes_with_autocrlf(void
cl_git_pass(git_status_file(&status, repo, "current_file"));
-#ifdef GIT_WIN32
cl_assert_equal_i(GIT_STATUS_CURRENT, status);
-#else
- cl_assert_equal_i(GIT_STATUS_WT_MODIFIED, status);
-#endif
}