diff options
author | Junio C Hamano <gitster@pobox.com> | 2014-09-11 10:33:32 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2014-09-11 10:33:33 -0700 |
commit | bedd3b4b7b25df0b994abf444ee2136995dfeffa (patch) | |
tree | 1f5866f73cce9cf92d602f8d61fb6a52d1c3d3c9 | |
parent | 08ad26a63deaa7eaaaa987ed2cc84ac0f83c080b (diff) | |
parent | 1aaf69e669b7fd67073d3024b386ac25ac77d0f8 (diff) | |
download | git-bedd3b4b7b25df0b994abf444ee2136995dfeffa.tar.gz |
Merge branch 'nd/large-blobs'
Teach a few codepaths to punt (instead of dying) when large blobs
that would not fit in core are involved in the operation.
* nd/large-blobs:
diff: shortcut for diff'ing two binary SHA-1 objects
diff --stat: mark any file larger than core.bigfilethreshold binary
diff.c: allow to pass more flags to diff_populate_filespec
sha1_file.c: do not die failing to malloc in unpack_compressed_entry
wrapper.c: introduce gentle xmallocz that does not die()
-rw-r--r-- | Documentation/config.txt | 3 | ||||
-rw-r--r-- | Documentation/gitattributes.txt | 4 | ||||
-rw-r--r-- | diff.c | 52 | ||||
-rw-r--r-- | diffcore-rename.c | 6 | ||||
-rw-r--r-- | diffcore.h | 4 | ||||
-rw-r--r-- | git-compat-util.h | 1 | ||||
-rw-r--r-- | sha1_file.c | 4 | ||||
-rwxr-xr-x | t/t1050-large.sh | 20 | ||||
-rw-r--r-- | wrapper.c | 68 |
9 files changed, 125 insertions, 37 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index c55c22ab7b..3b5b24aeb7 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -499,7 +499,8 @@ core.bigFileThreshold:: Files larger than this size are stored deflated, without attempting delta compression. Storing large files without delta compression avoids excessive memory usage, at the - slight expense of increased disk usage. + slight expense of increased disk usage. Additionally files + larger than this size are always treated as binary. + Default is 512 MiB on all platforms. This should be reasonable for most projects as source code and other text files can still diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 643c1ba929..9b45bda748 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -440,8 +440,8 @@ Unspecified:: A path to which the `diff` attribute is unspecified first gets its contents inspected, and if it looks like - text, it is treated as text. Otherwise it would - generate `Binary files differ`. + text and is smaller than core.bigFileThreshold, it is treated + as text. Otherwise it would generate `Binary files differ`. String:: @@ -376,7 +376,7 @@ static unsigned long diff_filespec_size(struct diff_filespec *one) { if (!DIFF_FILE_VALID(one)) return 0; - diff_populate_filespec(one, 1); + diff_populate_filespec(one, CHECK_SIZE_ONLY); return one->size; } @@ -1910,11 +1910,11 @@ static void show_dirstat(struct diff_options *options) diff_free_filespec_data(p->one); diff_free_filespec_data(p->two); } else if (DIFF_FILE_VALID(p->one)) { - diff_populate_filespec(p->one, 1); + diff_populate_filespec(p->one, CHECK_SIZE_ONLY); copied = added = 0; diff_free_filespec_data(p->one); } else if (DIFF_FILE_VALID(p->two)) { - diff_populate_filespec(p->two, 1); + diff_populate_filespec(p->two, CHECK_SIZE_ONLY); copied = 0; added = p->two->size; diff_free_filespec_data(p->two); @@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one) one->is_binary = one->driver->binary; else { if (!one->data && DIFF_FILE_VALID(one)) - diff_populate_filespec(one, 0); - if (one->data) + diff_populate_filespec(one, CHECK_BINARY); + if (one->is_binary == -1 && one->data) one->is_binary = buffer_is_binary(one->data, one->size); if (one->is_binary == -1) @@ -2324,6 +2324,19 @@ static void builtin_diff(const char *name_a, } else if (!DIFF_OPT_TST(o, TEXT) && ( (!textconv_one && diff_filespec_is_binary(one)) || (!textconv_two && diff_filespec_is_binary(two)) )) { + if (!one->data && !two->data && + S_ISREG(one->mode) && S_ISREG(two->mode) && + !DIFF_OPT_TST(o, BINARY)) { + if (!hashcmp(one->sha1, two->sha1)) { + if (must_show_header) + fprintf(o->file, "%s", header.buf); + goto free_ab_and_return; + } + fprintf(o->file, "%s", header.buf); + fprintf(o->file, "%sBinary files %s and %s differ\n", + line_prefix, lbl[0], lbl[1]); + goto free_ab_and_return; + } if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) die("unable to read files to diff"); /* Quite common confusing case */ @@ -2668,8 +2681,9 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only) * grab the data for the blob (or file) for our own in-core comparison. * diff_filespec has data and size fields for this purpose. */ -int diff_populate_filespec(struct diff_filespec *s, int size_only) +int diff_populate_filespec(struct diff_filespec *s, unsigned int flags) { + int size_only = flags & CHECK_SIZE_ONLY; int err = 0; /* * demote FAIL to WARN to allow inspecting the situation @@ -2724,6 +2738,11 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only) } if (size_only) return 0; + if ((flags & CHECK_BINARY) && + s->size > big_file_threshold && s->is_binary == -1) { + s->is_binary = 1; + return 0; + } fd = open(s->path, O_RDONLY); if (fd < 0) goto err_empty; @@ -2745,16 +2764,21 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only) } else { enum object_type type; - if (size_only) { + if (size_only || (flags & CHECK_BINARY)) { type = sha1_object_info(s->sha1, &s->size); if (type < 0) die("unable to read %s", sha1_to_hex(s->sha1)); - } else { - s->data = read_sha1_file(s->sha1, &type, &s->size); - if (!s->data) - die("unable to read %s", sha1_to_hex(s->sha1)); - s->should_free = 1; + if (size_only) + return 0; + if (s->size > big_file_threshold && s->is_binary == -1) { + s->is_binary = 1; + return 0; + } } + s->data = read_sha1_file(s->sha1, &type, &s->size); + if (!s->data) + die("unable to read %s", sha1_to_hex(s->sha1)); + s->should_free = 1; } return 0; } @@ -4688,8 +4712,8 @@ static int diff_filespec_check_stat_unmatch(struct diff_filepair *p) !DIFF_FILE_VALID(p->two) || (p->one->sha1_valid && p->two->sha1_valid) || (p->one->mode != p->two->mode) || - diff_populate_filespec(p->one, 1) || - diff_populate_filespec(p->two, 1) || + diff_populate_filespec(p->one, CHECK_SIZE_ONLY) || + diff_populate_filespec(p->two, CHECK_SIZE_ONLY) || (p->one->size != p->two->size) || !diff_filespec_is_identical(p->one, p->two)) /* (2) */ p->skip_stat_unmatch_result = 1; diff --git a/diffcore-rename.c b/diffcore-rename.c index 2e44a37459..4e132f1fdb 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -147,9 +147,11 @@ static int estimate_similarity(struct diff_filespec *src, * is a possible size - we really should have a flag to * say whether the size is valid or not!) */ - if (!src->cnt_data && diff_populate_filespec(src, 1)) + if (!src->cnt_data && + diff_populate_filespec(src, CHECK_SIZE_ONLY)) return 0; - if (!dst->cnt_data && diff_populate_filespec(dst, 1)) + if (!dst->cnt_data && + diff_populate_filespec(dst, CHECK_SIZE_ONLY)) return 0; max_size = ((src->size > dst->size) ? src->size : dst->size); diff --git a/diffcore.h b/diffcore.h index c876dac71a..33ea2de348 100644 --- a/diffcore.h +++ b/diffcore.h @@ -55,7 +55,9 @@ extern void free_filespec(struct diff_filespec *); extern void fill_filespec(struct diff_filespec *, const unsigned char *, int, unsigned short); -extern int diff_populate_filespec(struct diff_filespec *, int); +#define CHECK_SIZE_ONLY 1 +#define CHECK_BINARY 2 +extern int diff_populate_filespec(struct diff_filespec *, unsigned int); extern void diff_free_filespec_data(struct diff_filespec *); extern void diff_free_filespec_blob(struct diff_filespec *); extern int diff_filespec_is_binary(struct diff_filespec *); diff --git a/git-compat-util.h b/git-compat-util.h index d675c89603..4e7e3f8726 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -609,6 +609,7 @@ extern try_to_free_t set_try_to_free_routine(try_to_free_t); extern char *xstrdup(const char *str); extern void *xmalloc(size_t size); extern void *xmallocz(size_t size); +extern void *xmallocz_gently(size_t size); extern void *xmemdupz(const void *data, size_t len); extern char *xstrndup(const char *str, size_t len); extern void *xrealloc(void *ptr, size_t size); diff --git a/sha1_file.c b/sha1_file.c index 95afd20910..c08c0cbea8 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1923,7 +1923,9 @@ static void *unpack_compressed_entry(struct packed_git *p, git_zstream stream; unsigned char *buffer, *in; - buffer = xmallocz(size); + buffer = xmallocz_gently(size); + if (!buffer) + return NULL; memset(&stream, 0, sizeof(stream)); stream.next_out = buffer; stream.avail_out = size + 1; diff --git a/t/t1050-large.sh b/t/t1050-large.sh index aea493646e..05a1e1d270 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -112,6 +112,20 @@ test_expect_success 'diff --raw' ' git diff --raw HEAD^ ' +test_expect_success 'diff --stat' ' + git diff --stat HEAD^ HEAD +' + +test_expect_success 'diff' ' + git diff HEAD^ HEAD >actual && + grep "Binary files.*differ" actual +' + +test_expect_success 'diff --cached' ' + git diff --cached HEAD^ >actual && + grep "Binary files.*differ" actual +' + test_expect_success 'hash-object' ' git hash-object large1 ' @@ -163,4 +177,10 @@ test_expect_success 'zip achiving, deflate' ' git archive --format=zip HEAD >/dev/null ' +test_expect_success 'fsck' ' + test_must_fail git fsck 2>err && + n=$(grep "error: attempting to allocate .* over limit" err | wc -l) && + test "$n" -gt 1 +' + test_done @@ -9,16 +9,23 @@ static void do_nothing(size_t size) static void (*try_to_free_routine)(size_t size) = do_nothing; -static void memory_limit_check(size_t size) +static int memory_limit_check(size_t size, int gentle) { static int limit = -1; if (limit == -1) { const char *env = getenv("GIT_ALLOC_LIMIT"); limit = env ? atoi(env) * 1024 : 0; } - if (limit && size > limit) - die("attempting to allocate %"PRIuMAX" over limit %d", - (intmax_t)size, limit); + if (limit && size > limit) { + if (gentle) { + error("attempting to allocate %"PRIuMAX" over limit %d", + (intmax_t)size, limit); + return -1; + } else + die("attempting to allocate %"PRIuMAX" over limit %d", + (intmax_t)size, limit); + } + return 0; } try_to_free_t set_try_to_free_routine(try_to_free_t routine) @@ -42,11 +49,12 @@ char *xstrdup(const char *str) return ret; } -void *xmalloc(size_t size) +static void *do_xmalloc(size_t size, int gentle) { void *ret; - memory_limit_check(size); + if (memory_limit_check(size, gentle)) + return NULL; ret = malloc(size); if (!ret && !size) ret = malloc(1); @@ -55,9 +63,16 @@ void *xmalloc(size_t size) ret = malloc(size); if (!ret && !size) ret = malloc(1); - if (!ret) - die("Out of memory, malloc failed (tried to allocate %lu bytes)", - (unsigned long)size); + if (!ret) { + if (!gentle) + die("Out of memory, malloc failed (tried to allocate %lu bytes)", + (unsigned long)size); + else { + error("Out of memory, malloc failed (tried to allocate %lu bytes)", + (unsigned long)size); + return NULL; + } + } } #ifdef XMALLOC_POISON memset(ret, 0xA5, size); @@ -65,16 +80,37 @@ void *xmalloc(size_t size) return ret; } -void *xmallocz(size_t size) +void *xmalloc(size_t size) +{ + return do_xmalloc(size, 0); +} + +static void *do_xmallocz(size_t size, int gentle) { void *ret; - if (unsigned_add_overflows(size, 1)) - die("Data too large to fit into virtual memory space."); - ret = xmalloc(size + 1); - ((char*)ret)[size] = 0; + if (unsigned_add_overflows(size, 1)) { + if (gentle) { + error("Data too large to fit into virtual memory space."); + return NULL; + } else + die("Data too large to fit into virtual memory space."); + } + ret = do_xmalloc(size + 1, gentle); + if (ret) + ((char*)ret)[size] = 0; return ret; } +void *xmallocz(size_t size) +{ + return do_xmallocz(size, 0); +} + +void *xmallocz_gently(size_t size) +{ + return do_xmallocz(size, 1); +} + /* * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of * "data" to the allocated memory, zero terminates the allocated memory, @@ -96,7 +132,7 @@ void *xrealloc(void *ptr, size_t size) { void *ret; - memory_limit_check(size); + memory_limit_check(size, 0); ret = realloc(ptr, size); if (!ret && !size) ret = realloc(ptr, 1); @@ -115,7 +151,7 @@ void *xcalloc(size_t nmemb, size_t size) { void *ret; - memory_limit_check(size * nmemb); + memory_limit_check(size * nmemb, 0); ret = calloc(nmemb, size); if (!ret && (!nmemb || !size)) ret = calloc(1, 1); |