summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2014-10-08 13:05:32 -0700
committerJunio C Hamano <gitster@pobox.com>2014-10-08 13:05:32 -0700
commitf0d89001750a27c1c447b2eb3149b998521fa52c (patch)
tree01a5829fe13f88fa399429c56fdc1d3f798309cf
parent9342f49738a6b539b94a344fdfd45cf29c93ffcd (diff)
parent9079ab7cb6768fa04e086303f90be043b423cca4 (diff)
downloadgit-f0d89001750a27c1c447b2eb3149b998521fa52c.tar.gz
Merge branch 'sp/stream-clean-filter'
When running a required clean filter, we do not have to mmap the original before feeding the filter. Instead, stream the file contents directly to the filter and process its output. * sp/stream-clean-filter: sha1_file: don't convert off_t to size_t too early to avoid potential die() convert: stream from fd to required clean filter to reduce used address space copy_fd(): do not close the input file descriptor mmap_limit: introduce GIT_MMAP_LIMIT to allow testing expected mmap size memory_limit: use git_env_ulong() to parse GIT_ALLOC_LIMIT config.c: add git_env_ulong() to parse environment variable convert: drop arguments other than 'path' from would_convert_to_git()
-rw-r--r--cache.h1
-rw-r--r--config.c16
-rw-r--r--convert.c55
-rw-r--r--convert.h10
-rw-r--r--copy.c26
-rw-r--r--lockfile.c3
-rw-r--r--sha1_file.c60
-rwxr-xr-xt/t0021-conversion.sh24
-rwxr-xr-xt/t1050-large.sh2
-rw-r--r--wrapper.c19
10 files changed, 164 insertions, 52 deletions
diff --git a/cache.h b/cache.h
index 8206039cfe..3e6a914dba 100644
--- a/cache.h
+++ b/cache.h
@@ -1324,6 +1324,7 @@ extern int git_config_rename_section_in_file(const char *, const char *, const c
extern const char *git_etc_gitconfig(void);
extern int check_repository_format_version(const char *var, const char *value, void *cb);
extern int git_env_bool(const char *, int);
+extern unsigned long git_env_ulong(const char *, unsigned long);
extern int git_config_system(void);
extern int config_error_nonbool(const char *);
#if defined(__GNUC__)
diff --git a/config.c b/config.c
index a677eb6c59..039647d247 100644
--- a/config.c
+++ b/config.c
@@ -1139,12 +1139,28 @@ const char *git_etc_gitconfig(void)
return system_wide;
}
+/*
+ * Parse environment variable 'k' as a boolean (in various
+ * possible spellings); if missing, use the default value 'def'.
+ */
int git_env_bool(const char *k, int def)
{
const char *v = getenv(k);
return v ? git_config_bool(k, v) : def;
}
+/*
+ * Parse environment variable 'k' as ulong with possibly a unit
+ * suffix; if missing, use the default value 'val'.
+ */
+unsigned long git_env_ulong(const char *k, unsigned long val)
+{
+ const char *v = getenv(k);
+ if (v && !git_parse_ulong(v, &val))
+ die("failed to parse %s", k);
+ return val;
+}
+
int git_config_system(void)
{
return !git_env_bool("GIT_CONFIG_NOSYSTEM", 0);
diff --git a/convert.c b/convert.c
index aa7a139bcf..9a5612e93d 100644
--- a/convert.c
+++ b/convert.c
@@ -312,11 +312,12 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
struct filter_params {
const char *src;
unsigned long size;
+ int fd;
const char *cmd;
const char *path;
};
-static int filter_buffer(int in, int out, void *data)
+static int filter_buffer_or_fd(int in, int out, void *data)
{
/*
* Spawn cmd and feed the buffer contents through its stdin.
@@ -354,7 +355,12 @@ static int filter_buffer(int in, int out, void *data)
sigchain_push(SIGPIPE, SIG_IGN);
- write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
+ if (params->src) {
+ write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
+ } else {
+ write_err = copy_fd(params->fd, child_process.in);
+ }
+
if (close(child_process.in))
write_err = 1;
if (write_err)
@@ -370,7 +376,7 @@ static int filter_buffer(int in, int out, void *data)
return (write_err || status);
}
-static int apply_filter(const char *path, const char *src, size_t len,
+static int apply_filter(const char *path, const char *src, size_t len, int fd,
struct strbuf *dst, const char *cmd)
{
/*
@@ -391,11 +397,12 @@ static int apply_filter(const char *path, const char *src, size_t len,
return 1;
memset(&async, 0, sizeof(async));
- async.proc = filter_buffer;
+ async.proc = filter_buffer_or_fd;
async.data = &params;
async.out = -1;
params.src = src;
params.size = len;
+ params.fd = fd;
params.cmd = cmd;
params.path = path;
@@ -746,6 +753,25 @@ static void convert_attrs(struct conv_attrs *ca, const char *path)
}
}
+int would_convert_to_git_filter_fd(const char *path)
+{
+ struct conv_attrs ca;
+
+ convert_attrs(&ca, path);
+ if (!ca.drv)
+ return 0;
+
+ /*
+ * Apply a filter to an fd only if the filter is required to succeed.
+ * We must die if the filter fails, because the original data before
+ * filtering is not available.
+ */
+ if (!ca.drv->required)
+ return 0;
+
+ return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
+}
+
int convert_to_git(const char *path, const char *src, size_t len,
struct strbuf *dst, enum safe_crlf checksafe)
{
@@ -760,7 +786,7 @@ int convert_to_git(const char *path, const char *src, size_t len,
required = ca.drv->required;
}
- ret |= apply_filter(path, src, len, dst, filter);
+ ret |= apply_filter(path, src, len, -1, dst, filter);
if (!ret && required)
die("%s: clean filter '%s' failed", path, ca.drv->name);
@@ -777,6 +803,23 @@ int convert_to_git(const char *path, const char *src, size_t len,
return ret | ident_to_git(path, src, len, dst, ca.ident);
}
+void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
+ enum safe_crlf checksafe)
+{
+ struct conv_attrs ca;
+ convert_attrs(&ca, path);
+
+ assert(ca.drv);
+ assert(ca.drv->clean);
+
+ if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean))
+ die("%s: clean filter '%s' failed", path, ca.drv->name);
+
+ ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
+ crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
+ ident_to_git(path, dst->buf, dst->len, dst, ca.ident);
+}
+
static int convert_to_working_tree_internal(const char *path, const char *src,
size_t len, struct strbuf *dst,
int normalizing)
@@ -810,7 +853,7 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
}
}
- ret_filter = apply_filter(path, src, len, dst, filter);
+ ret_filter = apply_filter(path, src, len, -1, dst, filter);
if (!ret_filter && required)
die("%s: smudge filter %s failed", path, ca.drv->name);
diff --git a/convert.h b/convert.h
index 0c2143c152..d9d853cd3d 100644
--- a/convert.h
+++ b/convert.h
@@ -40,11 +40,15 @@ extern int convert_to_working_tree(const char *path, const char *src,
size_t len, struct strbuf *dst);
extern int renormalize_buffer(const char *path, const char *src, size_t len,
struct strbuf *dst);
-static inline int would_convert_to_git(const char *path, const char *src,
- size_t len, enum safe_crlf checksafe)
+static inline int would_convert_to_git(const char *path)
{
- return convert_to_git(path, src, len, NULL, checksafe);
+ return convert_to_git(path, NULL, 0, NULL, 0);
}
+/* Precondition: would_convert_to_git_filter_fd(path) == true */
+extern void convert_to_git_filter_fd(const char *path, int fd,
+ struct strbuf *dst,
+ enum safe_crlf checksafe);
+extern int would_convert_to_git_filter_fd(const char *path);
/*****************************************************************
*
diff --git a/copy.c b/copy.c
index a7f58fd905..f2970ec462 100644
--- a/copy.c
+++ b/copy.c
@@ -4,34 +4,17 @@ int copy_fd(int ifd, int ofd)
{
while (1) {
char buffer[8192];
- char *buf = buffer;
ssize_t len = xread(ifd, buffer, sizeof(buffer));
if (!len)
break;
if (len < 0) {
- int read_error = errno;
- close(ifd);
return error("copy-fd: read returned %s",
- strerror(read_error));
- }
- while (len) {
- int written = xwrite(ofd, buf, len);
- if (written > 0) {
- buf += written;
- len -= written;
- }
- else if (!written) {
- close(ifd);
- return error("copy-fd: write returned 0");
- } else {
- int write_error = errno;
- close(ifd);
- return error("copy-fd: write returned %s",
- strerror(write_error));
- }
+ strerror(errno));
}
+ if (write_in_full(ofd, buffer, len) < 0)
+ return error("copy-fd: write returned %s",
+ strerror(errno));
}
- close(ifd);
return 0;
}
@@ -60,6 +43,7 @@ int copy_file(const char *dst, const char *src, int mode)
return fdo;
}
status = copy_fd(fdi, fdo);
+ close(fdi);
if (close(fdo) != 0)
return error("%s: close error: %s", dst, strerror(errno));
diff --git a/lockfile.c b/lockfile.c
index 2a800cef33..d34a96df4f 100644
--- a/lockfile.c
+++ b/lockfile.c
@@ -224,8 +224,11 @@ int hold_lock_file_for_append(struct lock_file *lk, const char *path, int flags)
} else if (copy_fd(orig_fd, fd)) {
if (flags & LOCK_DIE_ON_ERROR)
exit(128);
+ close(orig_fd);
close(fd);
return -1;
+ } else {
+ close(orig_fd);
}
return fd;
}
diff --git a/sha1_file.c b/sha1_file.c
index c08c0cbea8..6f18c22ab1 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -663,10 +663,26 @@ void release_pack_memory(size_t need)
; /* nothing */
}
+static void mmap_limit_check(size_t length)
+{
+ static size_t limit = 0;
+ if (!limit) {
+ limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
+ if (!limit)
+ limit = SIZE_MAX;
+ }
+ if (length > limit)
+ die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
+ (uintmax_t)length, (uintmax_t)limit);
+}
+
void *xmmap(void *start, size_t length,
int prot, int flags, int fd, off_t offset)
{
- void *ret = mmap(start, length, prot, flags, fd, offset);
+ void *ret;
+
+ mmap_limit_check(length);
+ ret = mmap(start, length, prot, flags, fd, offset);
if (ret == MAP_FAILED) {
if (!length)
return NULL;
@@ -3076,6 +3092,29 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
return ret;
}
+static int index_stream_convert_blob(unsigned char *sha1, int fd,
+ const char *path, unsigned flags)
+{
+ int ret;
+ const int write_object = flags & HASH_WRITE_OBJECT;
+ struct strbuf sbuf = STRBUF_INIT;
+
+ assert(path);
+ assert(would_convert_to_git_filter_fd(path));
+
+ convert_to_git_filter_fd(path, fd, &sbuf,
+ write_object ? safe_crlf : SAFE_CRLF_FALSE);
+
+ if (write_object)
+ ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
+ sha1);
+ else
+ ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
+ sha1);
+ strbuf_release(&sbuf);
+ return ret;
+}
+
static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
const char *path, unsigned flags)
{
@@ -3141,15 +3180,22 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
enum object_type type, const char *path, unsigned flags)
{
int ret;
- size_t size = xsize_t(st->st_size);
- if (!S_ISREG(st->st_mode))
+ /*
+ * Call xsize_t() only when needed to avoid potentially unnecessary
+ * die() for large files.
+ */
+ if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
+ ret = index_stream_convert_blob(sha1, fd, path, flags);
+ else if (!S_ISREG(st->st_mode))
ret = index_pipe(sha1, fd, type, path, flags);
- else if (size <= big_file_threshold || type != OBJ_BLOB ||
- (path && would_convert_to_git(path, NULL, 0, 0)))
- ret = index_core(sha1, fd, size, type, path, flags);
+ else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
+ (path && would_convert_to_git(path)))
+ ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
+ flags);
else
- ret = index_stream(sha1, fd, size, type, path, flags);
+ ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
+ flags);
close(fd);
return ret;
}
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index f890c54d13..ca7d2a630a 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -153,17 +153,23 @@ test_expect_success 'filter shell-escaped filenames' '
:
'
-test_expect_success 'required filter success' '
- git config filter.required.smudge cat &&
- git config filter.required.clean cat &&
+test_expect_success 'required filter should filter data' '
+ git config filter.required.smudge ./rot13.sh &&
+ git config filter.required.clean ./rot13.sh &&
git config filter.required.required true &&
echo "*.r filter=required" >.gitattributes &&
- echo test >test.r &&
+ cat test.o >test.r &&
git add test.r &&
+
rm -f test.r &&
- git checkout -- test.r
+ git checkout -- test.r &&
+ cmp test.o test.r &&
+
+ ./rot13.sh <test.o >expected &&
+ git cat-file blob :test.r >actual &&
+ cmp expected actual
'
test_expect_success 'required filter smudge failure' '
@@ -190,6 +196,14 @@ test_expect_success 'required filter clean failure' '
test_must_fail git add test.fc
'
+test_expect_success 'filtering large input to small output should use little memory' '
+ git config filter.devnull.clean "cat >/dev/null" &&
+ git config filter.devnull.required true &&
+ for i in $(test_seq 1 30); do printf "%1048576d" 1; done >30MB &&
+ echo "30MB filter=devnull" >.gitattributes &&
+ GIT_MMAP_LIMIT=1m GIT_ALLOC_LIMIT=1m git add 30MB
+'
+
test_expect_success EXPENSIVE 'filter large file' '
git config filter.largefile.smudge cat &&
git config filter.largefile.clean cat &&
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 05a1e1d270..f5a9119290 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -13,7 +13,7 @@ test_expect_success setup '
echo X | dd of=large2 bs=1k seek=2000 &&
echo X | dd of=large3 bs=1k seek=2000 &&
echo Y | dd of=huge bs=1k seek=2500 &&
- GIT_ALLOC_LIMIT=1500 &&
+ GIT_ALLOC_LIMIT=1500k &&
export GIT_ALLOC_LIMIT
'
diff --git a/wrapper.c b/wrapper.c
index 25074d71b6..5b77d2a1ae 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -11,19 +11,20 @@ static void (*try_to_free_routine)(size_t size) = do_nothing;
static int memory_limit_check(size_t size, int gentle)
{
- static int limit = -1;
- if (limit == -1) {
- const char *env = getenv("GIT_ALLOC_LIMIT");
- limit = env ? atoi(env) * 1024 : 0;
+ static size_t limit = 0;
+ if (!limit) {
+ limit = git_env_ulong("GIT_ALLOC_LIMIT", 0);
+ if (!limit)
+ limit = SIZE_MAX;
}
- if (limit && size > limit) {
+ if (size > limit) {
if (gentle) {
- error("attempting to allocate %"PRIuMAX" over limit %d",
- (intmax_t)size, limit);
+ error("attempting to allocate %"PRIuMAX" over limit %"PRIuMAX,
+ (uintmax_t)size, (uintmax_t)limit);
return -1;
} else
- die("attempting to allocate %"PRIuMAX" over limit %d",
- (intmax_t)size, limit);
+ die("attempting to allocate %"PRIuMAX" over limit %"PRIuMAX,
+ (uintmax_t)size, (uintmax_t)limit);
}
return 0;
}