diff options
author | Junio C Hamano <gitster@pobox.com> | 2014-10-08 13:05:32 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2014-10-08 13:05:32 -0700 |
commit | f0d89001750a27c1c447b2eb3149b998521fa52c (patch) | |
tree | 01a5829fe13f88fa399429c56fdc1d3f798309cf | |
parent | 9342f49738a6b539b94a344fdfd45cf29c93ffcd (diff) | |
parent | 9079ab7cb6768fa04e086303f90be043b423cca4 (diff) | |
download | git-f0d89001750a27c1c447b2eb3149b998521fa52c.tar.gz |
Merge branch 'sp/stream-clean-filter'
When running a required clean filter, we do not have to mmap the
original before feeding the filter. Instead, stream the file
contents directly to the filter and process its output.
* sp/stream-clean-filter:
sha1_file: don't convert off_t to size_t too early to avoid potential die()
convert: stream from fd to required clean filter to reduce used address space
copy_fd(): do not close the input file descriptor
mmap_limit: introduce GIT_MMAP_LIMIT to allow testing expected mmap size
memory_limit: use git_env_ulong() to parse GIT_ALLOC_LIMIT
config.c: add git_env_ulong() to parse environment variable
convert: drop arguments other than 'path' from would_convert_to_git()
-rw-r--r-- | cache.h | 1 | ||||
-rw-r--r-- | config.c | 16 | ||||
-rw-r--r-- | convert.c | 55 | ||||
-rw-r--r-- | convert.h | 10 | ||||
-rw-r--r-- | copy.c | 26 | ||||
-rw-r--r-- | lockfile.c | 3 | ||||
-rw-r--r-- | sha1_file.c | 60 | ||||
-rwxr-xr-x | t/t0021-conversion.sh | 24 | ||||
-rwxr-xr-x | t/t1050-large.sh | 2 | ||||
-rw-r--r-- | wrapper.c | 19 |
10 files changed, 164 insertions, 52 deletions
@@ -1324,6 +1324,7 @@ extern int git_config_rename_section_in_file(const char *, const char *, const c extern const char *git_etc_gitconfig(void); extern int check_repository_format_version(const char *var, const char *value, void *cb); extern int git_env_bool(const char *, int); +extern unsigned long git_env_ulong(const char *, unsigned long); extern int git_config_system(void); extern int config_error_nonbool(const char *); #if defined(__GNUC__) @@ -1139,12 +1139,28 @@ const char *git_etc_gitconfig(void) return system_wide; } +/* + * Parse environment variable 'k' as a boolean (in various + * possible spellings); if missing, use the default value 'def'. + */ int git_env_bool(const char *k, int def) { const char *v = getenv(k); return v ? git_config_bool(k, v) : def; } +/* + * Parse environment variable 'k' as ulong with possibly a unit + * suffix; if missing, use the default value 'val'. + */ +unsigned long git_env_ulong(const char *k, unsigned long val) +{ + const char *v = getenv(k); + if (v && !git_parse_ulong(v, &val)) + die("failed to parse %s", k); + return val; +} + int git_config_system(void) { return !git_env_bool("GIT_CONFIG_NOSYSTEM", 0); @@ -312,11 +312,12 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len, struct filter_params { const char *src; unsigned long size; + int fd; const char *cmd; const char *path; }; -static int filter_buffer(int in, int out, void *data) +static int filter_buffer_or_fd(int in, int out, void *data) { /* * Spawn cmd and feed the buffer contents through its stdin. @@ -354,7 +355,12 @@ static int filter_buffer(int in, int out, void *data) sigchain_push(SIGPIPE, SIG_IGN); - write_err = (write_in_full(child_process.in, params->src, params->size) < 0); + if (params->src) { + write_err = (write_in_full(child_process.in, params->src, params->size) < 0); + } else { + write_err = copy_fd(params->fd, child_process.in); + } + if (close(child_process.in)) write_err = 1; if (write_err) @@ -370,7 +376,7 @@ static int filter_buffer(int in, int out, void *data) return (write_err || status); } -static int apply_filter(const char *path, const char *src, size_t len, +static int apply_filter(const char *path, const char *src, size_t len, int fd, struct strbuf *dst, const char *cmd) { /* @@ -391,11 +397,12 @@ static int apply_filter(const char *path, const char *src, size_t len, return 1; memset(&async, 0, sizeof(async)); - async.proc = filter_buffer; + async.proc = filter_buffer_or_fd; async.data = ¶ms; async.out = -1; params.src = src; params.size = len; + params.fd = fd; params.cmd = cmd; params.path = path; @@ -746,6 +753,25 @@ static void convert_attrs(struct conv_attrs *ca, const char *path) } } +int would_convert_to_git_filter_fd(const char *path) +{ + struct conv_attrs ca; + + convert_attrs(&ca, path); + if (!ca.drv) + return 0; + + /* + * Apply a filter to an fd only if the filter is required to succeed. + * We must die if the filter fails, because the original data before + * filtering is not available. + */ + if (!ca.drv->required) + return 0; + + return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean); +} + int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) { @@ -760,7 +786,7 @@ int convert_to_git(const char *path, const char *src, size_t len, required = ca.drv->required; } - ret |= apply_filter(path, src, len, dst, filter); + ret |= apply_filter(path, src, len, -1, dst, filter); if (!ret && required) die("%s: clean filter '%s' failed", path, ca.drv->name); @@ -777,6 +803,23 @@ int convert_to_git(const char *path, const char *src, size_t len, return ret | ident_to_git(path, src, len, dst, ca.ident); } +void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst, + enum safe_crlf checksafe) +{ + struct conv_attrs ca; + convert_attrs(&ca, path); + + assert(ca.drv); + assert(ca.drv->clean); + + if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean)) + die("%s: clean filter '%s' failed", path, ca.drv->name); + + ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe); + ident_to_git(path, dst->buf, dst->len, dst, ca.ident); +} + static int convert_to_working_tree_internal(const char *path, const char *src, size_t len, struct strbuf *dst, int normalizing) @@ -810,7 +853,7 @@ static int convert_to_working_tree_internal(const char *path, const char *src, } } - ret_filter = apply_filter(path, src, len, dst, filter); + ret_filter = apply_filter(path, src, len, -1, dst, filter); if (!ret_filter && required) die("%s: smudge filter %s failed", path, ca.drv->name); @@ -40,11 +40,15 @@ extern int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst); extern int renormalize_buffer(const char *path, const char *src, size_t len, struct strbuf *dst); -static inline int would_convert_to_git(const char *path, const char *src, - size_t len, enum safe_crlf checksafe) +static inline int would_convert_to_git(const char *path) { - return convert_to_git(path, src, len, NULL, checksafe); + return convert_to_git(path, NULL, 0, NULL, 0); } +/* Precondition: would_convert_to_git_filter_fd(path) == true */ +extern void convert_to_git_filter_fd(const char *path, int fd, + struct strbuf *dst, + enum safe_crlf checksafe); +extern int would_convert_to_git_filter_fd(const char *path); /***************************************************************** * @@ -4,34 +4,17 @@ int copy_fd(int ifd, int ofd) { while (1) { char buffer[8192]; - char *buf = buffer; ssize_t len = xread(ifd, buffer, sizeof(buffer)); if (!len) break; if (len < 0) { - int read_error = errno; - close(ifd); return error("copy-fd: read returned %s", - strerror(read_error)); - } - while (len) { - int written = xwrite(ofd, buf, len); - if (written > 0) { - buf += written; - len -= written; - } - else if (!written) { - close(ifd); - return error("copy-fd: write returned 0"); - } else { - int write_error = errno; - close(ifd); - return error("copy-fd: write returned %s", - strerror(write_error)); - } + strerror(errno)); } + if (write_in_full(ofd, buffer, len) < 0) + return error("copy-fd: write returned %s", + strerror(errno)); } - close(ifd); return 0; } @@ -60,6 +43,7 @@ int copy_file(const char *dst, const char *src, int mode) return fdo; } status = copy_fd(fdi, fdo); + close(fdi); if (close(fdo) != 0) return error("%s: close error: %s", dst, strerror(errno)); diff --git a/lockfile.c b/lockfile.c index 2a800cef33..d34a96df4f 100644 --- a/lockfile.c +++ b/lockfile.c @@ -224,8 +224,11 @@ int hold_lock_file_for_append(struct lock_file *lk, const char *path, int flags) } else if (copy_fd(orig_fd, fd)) { if (flags & LOCK_DIE_ON_ERROR) exit(128); + close(orig_fd); close(fd); return -1; + } else { + close(orig_fd); } return fd; } diff --git a/sha1_file.c b/sha1_file.c index c08c0cbea8..6f18c22ab1 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -663,10 +663,26 @@ void release_pack_memory(size_t need) ; /* nothing */ } +static void mmap_limit_check(size_t length) +{ + static size_t limit = 0; + if (!limit) { + limit = git_env_ulong("GIT_MMAP_LIMIT", 0); + if (!limit) + limit = SIZE_MAX; + } + if (length > limit) + die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX, + (uintmax_t)length, (uintmax_t)limit); +} + void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { - void *ret = mmap(start, length, prot, flags, fd, offset); + void *ret; + + mmap_limit_check(length); + ret = mmap(start, length, prot, flags, fd, offset); if (ret == MAP_FAILED) { if (!length) return NULL; @@ -3076,6 +3092,29 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, return ret; } +static int index_stream_convert_blob(unsigned char *sha1, int fd, + const char *path, unsigned flags) +{ + int ret; + const int write_object = flags & HASH_WRITE_OBJECT; + struct strbuf sbuf = STRBUF_INIT; + + assert(path); + assert(would_convert_to_git_filter_fd(path)); + + convert_to_git_filter_fd(path, fd, &sbuf, + write_object ? safe_crlf : SAFE_CRLF_FALSE); + + if (write_object) + ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB), + sha1); + else + ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB), + sha1); + strbuf_release(&sbuf); + return ret; +} + static int index_pipe(unsigned char *sha1, int fd, enum object_type type, const char *path, unsigned flags) { @@ -3141,15 +3180,22 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) { int ret; - size_t size = xsize_t(st->st_size); - if (!S_ISREG(st->st_mode)) + /* + * Call xsize_t() only when needed to avoid potentially unnecessary + * die() for large files. + */ + if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path)) + ret = index_stream_convert_blob(sha1, fd, path, flags); + else if (!S_ISREG(st->st_mode)) ret = index_pipe(sha1, fd, type, path, flags); - else if (size <= big_file_threshold || type != OBJ_BLOB || - (path && would_convert_to_git(path, NULL, 0, 0))) - ret = index_core(sha1, fd, size, type, path, flags); + else if (st->st_size <= big_file_threshold || type != OBJ_BLOB || + (path && would_convert_to_git(path))) + ret = index_core(sha1, fd, xsize_t(st->st_size), type, path, + flags); else - ret = index_stream(sha1, fd, size, type, path, flags); + ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path, + flags); close(fd); return ret; } diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index f890c54d13..ca7d2a630a 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -153,17 +153,23 @@ test_expect_success 'filter shell-escaped filenames' ' : ' -test_expect_success 'required filter success' ' - git config filter.required.smudge cat && - git config filter.required.clean cat && +test_expect_success 'required filter should filter data' ' + git config filter.required.smudge ./rot13.sh && + git config filter.required.clean ./rot13.sh && git config filter.required.required true && echo "*.r filter=required" >.gitattributes && - echo test >test.r && + cat test.o >test.r && git add test.r && + rm -f test.r && - git checkout -- test.r + git checkout -- test.r && + cmp test.o test.r && + + ./rot13.sh <test.o >expected && + git cat-file blob :test.r >actual && + cmp expected actual ' test_expect_success 'required filter smudge failure' ' @@ -190,6 +196,14 @@ test_expect_success 'required filter clean failure' ' test_must_fail git add test.fc ' +test_expect_success 'filtering large input to small output should use little memory' ' + git config filter.devnull.clean "cat >/dev/null" && + git config filter.devnull.required true && + for i in $(test_seq 1 30); do printf "%1048576d" 1; done >30MB && + echo "30MB filter=devnull" >.gitattributes && + GIT_MMAP_LIMIT=1m GIT_ALLOC_LIMIT=1m git add 30MB +' + test_expect_success EXPENSIVE 'filter large file' ' git config filter.largefile.smudge cat && git config filter.largefile.clean cat && diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 05a1e1d270..f5a9119290 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -13,7 +13,7 @@ test_expect_success setup ' echo X | dd of=large2 bs=1k seek=2000 && echo X | dd of=large3 bs=1k seek=2000 && echo Y | dd of=huge bs=1k seek=2500 && - GIT_ALLOC_LIMIT=1500 && + GIT_ALLOC_LIMIT=1500k && export GIT_ALLOC_LIMIT ' @@ -11,19 +11,20 @@ static void (*try_to_free_routine)(size_t size) = do_nothing; static int memory_limit_check(size_t size, int gentle) { - static int limit = -1; - if (limit == -1) { - const char *env = getenv("GIT_ALLOC_LIMIT"); - limit = env ? atoi(env) * 1024 : 0; + static size_t limit = 0; + if (!limit) { + limit = git_env_ulong("GIT_ALLOC_LIMIT", 0); + if (!limit) + limit = SIZE_MAX; } - if (limit && size > limit) { + if (size > limit) { if (gentle) { - error("attempting to allocate %"PRIuMAX" over limit %d", - (intmax_t)size, limit); + error("attempting to allocate %"PRIuMAX" over limit %"PRIuMAX, + (uintmax_t)size, (uintmax_t)limit); return -1; } else - die("attempting to allocate %"PRIuMAX" over limit %d", - (intmax_t)size, limit); + die("attempting to allocate %"PRIuMAX" over limit %"PRIuMAX, + (uintmax_t)size, (uintmax_t)limit); } return 0; } |