diff options
-rw-r--r-- | builtin/hash-object.c | 5 | ||||
-rw-r--r-- | builtin/update-index.c | 3 | ||||
-rw-r--r-- | cache.h | 7 | ||||
-rw-r--r-- | notes-merge.c | 2 | ||||
-rw-r--r-- | read-cache.c | 4 | ||||
-rw-r--r-- | sha1_file.c | 147 | ||||
-rwxr-xr-x | t/t1050-large.sh | 27 |
7 files changed, 164 insertions, 31 deletions
diff --git a/builtin/hash-object.c b/builtin/hash-object.c index b96f46acf5..33911fd5e9 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -14,8 +14,11 @@ static void hash_fd(int fd, const char *type, int write_object, const char *path { struct stat st; unsigned char sha1[20]; + unsigned flags = (HASH_FORMAT_CHECK | + (write_object ? HASH_WRITE_OBJECT : 0)); + if (fstat(fd, &st) < 0 || - index_fd(sha1, fd, &st, write_object, type_from_string(type), path, 1)) + index_fd(sha1, fd, &st, type_from_string(type), path, flags)) die(write_object ? "Unable to add %s to database" : "Unable to hash %s", path); diff --git a/builtin/update-index.c b/builtin/update-index.c index d7850c6309..f14bc90830 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -99,7 +99,8 @@ static int add_one_path(struct cache_entry *old, const char *path, int len, stru fill_stat_cache_info(ce, st); ce->ce_mode = ce_mode_from_stat(old, st->st_mode); - if (index_path(ce->sha1, path, st, !info_only)) + if (index_path(ce->sha1, path, st, + info_only ? 0 : HASH_WRITE_OBJECT)) return -1; option = allow_add ? ADD_CACHE_OK_TO_ADD : 0; option |= allow_replace ? ADD_CACHE_OK_TO_REPLACE : 0; @@ -518,8 +518,11 @@ struct pathspec { extern int init_pathspec(struct pathspec *, const char **); extern void free_pathspec(struct pathspec *); extern int ce_path_match(const struct cache_entry *ce, const struct pathspec *pathspec); -extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path, int format_check); -extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object); + +#define HASH_WRITE_OBJECT 1 +#define HASH_FORMAT_CHECK 2 +extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); +extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags); extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st); #define REFRESH_REALLY 0x0001 /* ignore_valid */ diff --git a/notes-merge.c b/notes-merge.c index 28046a9984..e1aaf43b43 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -707,7 +707,7 @@ int notes_merge_commit(struct notes_merge_options *o, /* write file as blob, and add to partial_tree */ if (stat(ent->name, &st)) die_errno("Failed to stat '%s'", ent->name); - if (index_path(blob_sha1, ent->name, &st, 1)) + if (index_path(blob_sha1, ent->name, &st, HASH_WRITE_OBJECT)) die("Failed to write blob object from '%s'", ent->name); if (add_note(partial_tree, obj_sha1, blob_sha1, NULL)) die("Failed to add resolved note '%s' to notes tree", diff --git a/read-cache.c b/read-cache.c index f38471cac3..4ac9a037f4 100644 --- a/read-cache.c +++ b/read-cache.c @@ -92,7 +92,7 @@ static int ce_compare_data(struct cache_entry *ce, struct stat *st) if (fd >= 0) { unsigned char sha1[20]; - if (!index_fd(sha1, fd, st, 0, OBJ_BLOB, ce->name, 0)) + if (!index_fd(sha1, fd, st, OBJ_BLOB, ce->name, 0)) match = hashcmp(sha1, ce->sha1); /* index_fd() closed the file descriptor already */ } @@ -641,7 +641,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st, return 0; } if (!intent_only) { - if (index_path(ce->sha1, path, st, 1)) + if (index_path(ce->sha1, path, st, HASH_WRITE_OBJECT)) return error("unable to index file %s", path); } else record_intent_to_add(ce); diff --git a/sha1_file.c b/sha1_file.c index 357f9ab7ff..064a330408 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -11,6 +11,7 @@ #include "pack.h" #include "blob.h" #include "commit.h" +#include "run-command.h" #include "tag.h" #include "tree.h" #include "tree-walk.h" @@ -2578,10 +2579,11 @@ static void check_tag(const void *buf, size_t size) } static int index_mem(unsigned char *sha1, void *buf, size_t size, - int write_object, enum object_type type, - const char *path, int format_check) + enum object_type type, + const char *path, unsigned flags) { int ret, re_allocated = 0; + int write_object = flags & HASH_WRITE_OBJECT; if (!type) type = OBJ_BLOB; @@ -2597,7 +2599,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, re_allocated = 1; } } - if (format_check) { + if (flags & HASH_FORMAT_CHECK) { if (type == OBJ_TREE) check_tree(buf, size); if (type == OBJ_COMMIT) @@ -2615,44 +2617,141 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, return ret; } +static int index_pipe(unsigned char *sha1, int fd, enum object_type type, + const char *path, unsigned flags) +{ + struct strbuf sbuf = STRBUF_INIT; + int ret; + + if (strbuf_read(&sbuf, fd, 4096) >= 0) + ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags); + else + ret = -1; + strbuf_release(&sbuf); + return ret; +} + #define SMALL_FILE_SIZE (32*1024) -int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, - enum object_type type, const char *path, int format_check) +static int index_core(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) { int ret; - size_t size = xsize_t(st->st_size); - if (!S_ISREG(st->st_mode)) { - struct strbuf sbuf = STRBUF_INIT; - if (strbuf_read(&sbuf, fd, 4096) >= 0) - ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object, - type, path, format_check); - else - ret = -1; - strbuf_release(&sbuf); - } else if (!size) { - ret = index_mem(sha1, NULL, size, write_object, type, path, - format_check); + if (!size) { + ret = index_mem(sha1, NULL, size, type, path, flags); } else if (size <= SMALL_FILE_SIZE) { char *buf = xmalloc(size); if (size == read_in_full(fd, buf, size)) - ret = index_mem(sha1, buf, size, write_object, type, - path, format_check); + ret = index_mem(sha1, buf, size, type, path, flags); else ret = error("short read %s", strerror(errno)); free(buf); } else { void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - ret = index_mem(sha1, buf, size, write_object, type, path, - format_check); + ret = index_mem(sha1, buf, size, type, path, flags); munmap(buf, size); } + return ret; +} + +/* + * This creates one packfile per large blob, because the caller + * immediately wants the result sha1, and fast-import can report the + * object name via marks mechanism only by closing the created + * packfile. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above), but the primary + * motivation for trying to stream from the working tree file and to + * avoid mmaping it in core is to deal with large binary blobs, and + * by definition they do _not_ want to get any conversion. + */ +static int index_stream(unsigned char *sha1, int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) +{ + struct child_process fast_import; + char export_marks[512]; + const char *argv[] = { "fast-import", "--quiet", export_marks, NULL }; + char tmpfile[512]; + char fast_import_cmd[512]; + char buf[512]; + int len, tmpfd; + + strcpy(tmpfile, git_path("hashstream_XXXXXX")); + tmpfd = git_mkstemp_mode(tmpfile, 0600); + if (tmpfd < 0) + die_errno("cannot create tempfile: %s", tmpfile); + if (close(tmpfd)) + die_errno("cannot close tempfile: %s", tmpfile); + sprintf(export_marks, "--export-marks=%s", tmpfile); + + memset(&fast_import, 0, sizeof(fast_import)); + fast_import.in = -1; + fast_import.argv = argv; + fast_import.git_cmd = 1; + if (start_command(&fast_import)) + die_errno("index-stream: git fast-import failed"); + + len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n", + (unsigned long) size); + write_or_whine(fast_import.in, fast_import_cmd, len, + "index-stream: feeding fast-import"); + while (size) { + char buf[10240]; + size_t sz = size < sizeof(buf) ? size : sizeof(buf); + size_t actual; + + actual = read_in_full(fd, buf, sz); + if (actual < 0) + die_errno("index-stream: reading input"); + if (write_in_full(fast_import.in, buf, actual) != actual) + die_errno("index-stream: feeding fast-import"); + size -= actual; + } + if (close(fast_import.in)) + die_errno("index-stream: closing fast-import"); + if (finish_command(&fast_import)) + die_errno("index-stream: finishing fast-import"); + + tmpfd = open(tmpfile, O_RDONLY); + if (tmpfd < 0) + die_errno("index-stream: cannot open fast-import mark"); + len = read(tmpfd, buf, sizeof(buf)); + if (len < 0) + die_errno("index-stream: reading fast-import mark"); + if (close(tmpfd) < 0) + die_errno("index-stream: closing fast-import mark"); + if (unlink(tmpfile)) + die_errno("index-stream: unlinking fast-import mark"); + if (len != 44 || + memcmp(":1 ", buf, 3) || + get_sha1_hex(buf + 3, sha1)) + die_errno("index-stream: unexpected fast-import mark: <%s>", buf); + return 0; +} + +int index_fd(unsigned char *sha1, int fd, struct stat *st, + enum object_type type, const char *path, unsigned flags) +{ + int ret; + size_t size = xsize_t(st->st_size); + + if (!S_ISREG(st->st_mode)) + ret = index_pipe(sha1, fd, type, path, flags); + else if (size <= big_file_threshold || type != OBJ_BLOB) + ret = index_core(sha1, fd, size, type, path, flags); + else + ret = index_stream(sha1, fd, size, type, path, flags); close(fd); return ret; } -int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object) +int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags) { int fd; struct strbuf sb = STRBUF_INIT; @@ -2663,7 +2762,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write if (fd < 0) return error("open(\"%s\"): %s", path, strerror(errno)); - if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path, 0) < 0) + if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0) return error("%s: failed to insert into database", path); break; @@ -2673,7 +2772,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write return error("readlink(\"%s\"): %s", path, errstr); } - if (!write_object) + if (!(flags & HASH_WRITE_OBJECT)) hash_sha1_file(sb.buf, sb.len, blob_type, sha1); else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1)) return error("%s: failed to insert into database", diff --git a/t/t1050-large.sh b/t/t1050-large.sh new file mode 100755 index 0000000000..deba111bd7 --- /dev/null +++ b/t/t1050-large.sh @@ -0,0 +1,27 @@ +#!/bin/sh +# Copyright (c) 2011, Google Inc. + +test_description='adding and checking out large blobs' + +. ./test-lib.sh + +test_expect_success setup ' + git config core.bigfilethreshold 200k && + echo X | dd of=large bs=1k seek=2000 +' + +test_expect_success 'add a large file' ' + git add large && + # make sure we got a packfile and no loose objects + test -f .git/objects/pack/pack-*.pack && + test ! -f .git/objects/??/?????????????????????????????????????? +' + +test_expect_success 'checkout a large file' ' + large=$(git rev-parse :large) && + git update-index --add --cacheinfo 100644 $large another && + git checkout another && + cmp large another ;# this must not be test_cmp +' + +test_done |