diff options
author | Junio C Hamano <gitster@pobox.com> | 2011-12-16 22:33:40 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2011-12-16 22:33:40 -0800 |
commit | 48b303675aa238c209e527feadcbb7ba1c025c97 (patch) | |
tree | 6a48f4388867f836db0e4c015098348079a9185d | |
parent | e45c9b03c32620c444f464403c23534160998624 (diff) | |
parent | 568508e76570e9ea36aad6446959424cebcf0535 (diff) | |
download | git-48b303675aa238c209e527feadcbb7ba1c025c97.tar.gz |
Merge branch 'jc/stream-to-pack'
* jc/stream-to-pack:
bulk-checkin: replace fast-import based implementation
csum-file: introduce sha1file_checkpoint
finish_tmp_packfile(): a helper function
create_tmp_packfile(): a helper function
write_pack_header(): a helper function
Conflicts:
pack.h
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | builtin/add.c | 5 | ||||
-rw-r--r-- | builtin/pack-objects.c | 62 | ||||
-rw-r--r-- | bulk-checkin.c | 275 | ||||
-rw-r--r-- | bulk-checkin.h | 16 | ||||
-rw-r--r-- | cache.h | 2 | ||||
-rw-r--r-- | config.c | 4 | ||||
-rw-r--r-- | csum-file.c | 20 | ||||
-rw-r--r-- | csum-file.h | 9 | ||||
-rw-r--r-- | environment.c | 1 | ||||
-rw-r--r-- | fast-import.c | 25 | ||||
-rw-r--r-- | pack-write.c | 53 | ||||
-rw-r--r-- | pack.h | 6 | ||||
-rw-r--r-- | sha1_file.c | 67 | ||||
-rwxr-xr-x | t/t1050-large.sh | 94 | ||||
-rw-r--r-- | zlib.c | 9 |
16 files changed, 516 insertions, 134 deletions
@@ -511,6 +511,7 @@ LIB_H += argv-array.h LIB_H += attr.h LIB_H += blob.h LIB_H += builtin.h +LIB_H += bulk-checkin.h LIB_H += cache.h LIB_H += cache-tree.h LIB_H += color.h @@ -600,6 +601,7 @@ LIB_OBJS += base85.o LIB_OBJS += bisect.o LIB_OBJS += blob.o LIB_OBJS += branch.o +LIB_OBJS += bulk-checkin.o LIB_OBJS += bundle.o LIB_OBJS += cache-tree.o LIB_OBJS += color.o diff --git a/builtin/add.c b/builtin/add.c index c59b0c98fe..1c42900ff8 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -13,6 +13,7 @@ #include "diff.h" #include "diffcore.h" #include "revision.h" +#include "bulk-checkin.h" static const char * const builtin_add_usage[] = { "git add [options] [--] <filepattern>...", @@ -458,11 +459,15 @@ int cmd_add(int argc, const char **argv, const char *prefix) free(seen); } + plug_bulk_checkin(); + exit_status |= add_files_to_cache(prefix, pathspec, flags); if (add_new_files) exit_status |= add_files(&dir, flags); + unplug_bulk_checkin(); + finish: if (active_cache_changed) { if (write_cache(newfd, active_cache, active_nr) || diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b1895aaaa1..96c1680976 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -76,7 +76,7 @@ static struct pack_idx_option pack_idx_opts; static const char *base_name; static int progress = 1; static int window = 10; -static unsigned long pack_size_limit, pack_size_limit_cfg; +static unsigned long pack_size_limit; static int depth = 50; static int delta_search_threads; static int pack_to_stdout; @@ -638,7 +638,6 @@ static void write_pack_file(void) uint32_t i = 0, j; struct sha1file *f; off_t offset; - struct pack_header hdr; uint32_t nr_remaining = nr_result; time_t last_mtime = 0; struct object_entry **write_order; @@ -652,22 +651,14 @@ static void write_pack_file(void) unsigned char sha1[20]; char *pack_tmp_name = NULL; - if (pack_to_stdout) { + if (pack_to_stdout) f = sha1fd_throughput(1, "<stdout>", progress_state); - } else { - char tmpname[PATH_MAX]; - int fd; - fd = odb_mkstemp(tmpname, sizeof(tmpname), - "pack/tmp_pack_XXXXXX"); - pack_tmp_name = xstrdup(tmpname); - f = sha1fd(fd, pack_tmp_name); - } - - hdr.hdr_signature = htonl(PACK_SIGNATURE); - hdr.hdr_version = htonl(PACK_VERSION); - hdr.hdr_entries = htonl(nr_remaining); - sha1write(f, &hdr, sizeof(hdr)); - offset = sizeof(hdr); + else + f = create_tmp_packfile(&pack_tmp_name); + + offset = write_pack_header(f, nr_remaining); + if (!offset) + die_errno("unable to write pack header"); nr_written = 0; for (; i < nr_objects; i++) { struct object_entry *e = write_order[i]; @@ -693,20 +684,8 @@ static void write_pack_file(void) if (!pack_to_stdout) { struct stat st; - const char *idx_tmp_name; char tmpname[PATH_MAX]; - idx_tmp_name = write_idx_file(NULL, written_list, nr_written, - &pack_idx_opts, sha1); - - snprintf(tmpname, sizeof(tmpname), "%s-%s.pack", - base_name, sha1_to_hex(sha1)); - free_pack_by_name(tmpname); - if (adjust_shared_perm(pack_tmp_name)) - die_errno("unable to make temporary pack file readable"); - if (rename(pack_tmp_name, tmpname)) - die_errno("unable to rename temporary pack file"); - /* * Packs are runtime accessed in their mtime * order since newer packs are more likely to contain @@ -714,28 +693,27 @@ static void write_pack_file(void) * packs then we should modify the mtime of later ones * to preserve this property. */ - if (stat(tmpname, &st) < 0) { + if (stat(pack_tmp_name, &st) < 0) { warning("failed to stat %s: %s", - tmpname, strerror(errno)); + pack_tmp_name, strerror(errno)); } else if (!last_mtime) { last_mtime = st.st_mtime; } else { struct utimbuf utb; utb.actime = st.st_atime; utb.modtime = --last_mtime; - if (utime(tmpname, &utb) < 0) + if (utime(pack_tmp_name, &utb) < 0) warning("failed utime() on %s: %s", tmpname, strerror(errno)); } - snprintf(tmpname, sizeof(tmpname), "%s-%s.idx", - base_name, sha1_to_hex(sha1)); - if (adjust_shared_perm(idx_tmp_name)) - die_errno("unable to make temporary index file readable"); - if (rename(idx_tmp_name, tmpname)) - die_errno("unable to rename temporary index file"); - - free((void *) idx_tmp_name); + /* Enough space for "-<sha-1>.pack"? */ + if (sizeof(tmpname) <= strlen(base_name) + 50) + die("pack base name '%s' too long", base_name); + snprintf(tmpname, sizeof(tmpname), "%s-", base_name); + finish_tmp_packfile(tmpname, pack_tmp_name, + written_list, nr_written, + &pack_idx_opts, sha1); free(pack_tmp_name); puts(sha1_to_hex(sha1)); } @@ -2098,10 +2076,6 @@ static int git_pack_config(const char *k, const char *v, void *cb) pack_idx_opts.version); return 0; } - if (!strcmp(k, "pack.packsizelimit")) { - pack_size_limit_cfg = git_config_ulong(k, v); - return 0; - } return git_default_config(k, v, cb); } diff --git a/bulk-checkin.c b/bulk-checkin.c new file mode 100644 index 0000000000..6b0b6d4904 --- /dev/null +++ b/bulk-checkin.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2011, Google Inc. + */ +#include "bulk-checkin.h" +#include "csum-file.h" +#include "pack.h" + +static int pack_compression_level = Z_DEFAULT_COMPRESSION; + +static struct bulk_checkin_state { + unsigned plugged:1; + + char *pack_tmp_name; + struct sha1file *f; + off_t offset; + struct pack_idx_option pack_idx_opts; + + struct pack_idx_entry **written; + uint32_t alloc_written; + uint32_t nr_written; +} state; + +static void finish_bulk_checkin(struct bulk_checkin_state *state) +{ + unsigned char sha1[20]; + char packname[PATH_MAX]; + int i; + + if (!state->f) + return; + + if (state->nr_written == 0) { + close(state->f->fd); + unlink(state->pack_tmp_name); + goto clear_exit; + } else if (state->nr_written == 1) { + sha1close(state->f, sha1, CSUM_FSYNC); + } else { + int fd = sha1close(state->f, sha1, 0); + fixup_pack_header_footer(fd, sha1, state->pack_tmp_name, + state->nr_written, sha1, + state->offset); + close(fd); + } + + sprintf(packname, "%s/pack/pack-", get_object_directory()); + finish_tmp_packfile(packname, state->pack_tmp_name, + state->written, state->nr_written, + &state->pack_idx_opts, sha1); + for (i = 0; i < state->nr_written; i++) + free(state->written[i]); + +clear_exit: + free(state->written); + memset(state, 0, sizeof(*state)); + + /* Make objects we just wrote available to ourselves */ + reprepare_packed_git(); +} + +static int already_written(struct bulk_checkin_state *state, unsigned char sha1[]) +{ + int i; + + /* The object may already exist in the repository */ + if (has_sha1_file(sha1)) + return 1; + + /* Might want to keep the list sorted */ + for (i = 0; i < state->nr_written; i++) + if (!hashcmp(state->written[i]->sha1, sha1)) + return 1; + + /* This is a new object we need to keep */ + return 0; +} + +/* + * Read the contents from fd for size bytes, streaming it to the + * packfile in state while updating the hash in ctx. Signal a failure + * by returning a negative value when the resulting pack would exceed + * the pack size limit and this is not the first object in the pack, + * so that the caller can discard what we wrote from the current pack + * by truncating it and opening a new one. The caller will then call + * us again after rewinding the input fd. + * + * The already_hashed_to pointer is kept untouched by the caller to + * make sure we do not hash the same byte when we are called + * again. This way, the caller does not have to checkpoint its hash + * status before calling us just in case we ask it to call us again + * with a new pack. + */ +static int stream_to_pack(struct bulk_checkin_state *state, + git_SHA_CTX *ctx, off_t *already_hashed_to, + int fd, size_t size, enum object_type type, + const char *path, unsigned flags) +{ + git_zstream s; + unsigned char obuf[16384]; + unsigned hdrlen; + int status = Z_OK; + int write_object = (flags & HASH_WRITE_OBJECT); + off_t offset = 0; + + memset(&s, 0, sizeof(s)); + git_deflate_init(&s, pack_compression_level); + + hdrlen = encode_in_pack_object_header(type, size, obuf); + s.next_out = obuf + hdrlen; + s.avail_out = sizeof(obuf) - hdrlen; + + while (status != Z_STREAM_END) { + unsigned char ibuf[16384]; + + if (size && !s.avail_in) { + ssize_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); + if (xread(fd, ibuf, rsize) != rsize) + die("failed to read %d bytes from '%s'", + (int)rsize, path); + offset += rsize; + if (*already_hashed_to < offset) { + size_t hsize = offset - *already_hashed_to; + if (rsize < hsize) + hsize = rsize; + if (hsize) + git_SHA1_Update(ctx, ibuf, hsize); + *already_hashed_to = offset; + } + s.next_in = ibuf; + s.avail_in = rsize; + size -= rsize; + } + + status = git_deflate(&s, size ? 0 : Z_FINISH); + + if (!s.avail_out || status == Z_STREAM_END) { + if (write_object) { + size_t written = s.next_out - obuf; + + /* would we bust the size limit? */ + if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + written) { + git_deflate_abort(&s); + return -1; + } + + sha1write(state->f, obuf, written); + state->offset += written; + } + s.next_out = obuf; + s.avail_out = sizeof(obuf); + } + + switch (status) { + case Z_OK: + case Z_BUF_ERROR: + case Z_STREAM_END: + continue; + default: + die("unexpected deflate failure: %d", status); + } + } + git_deflate_end(&s); + return 0; +} + +/* Lazily create backing packfile for the state */ +static void prepare_to_stream(struct bulk_checkin_state *state, + unsigned flags) +{ + if (!(flags & HASH_WRITE_OBJECT) || state->f) + return; + + state->f = create_tmp_packfile(&state->pack_tmp_name); + reset_pack_idx_option(&state->pack_idx_opts); + + /* Pretend we are going to write only one object */ + state->offset = write_pack_header(state->f, 1); + if (!state->offset) + die_errno("unable to write pack header"); +} + +static int deflate_to_pack(struct bulk_checkin_state *state, + unsigned char result_sha1[], + int fd, size_t size, + enum object_type type, const char *path, + unsigned flags) +{ + off_t seekback, already_hashed_to; + git_SHA_CTX ctx; + unsigned char obuf[16384]; + unsigned header_len; + struct sha1file_checkpoint checkpoint; + struct pack_idx_entry *idx = NULL; + + seekback = lseek(fd, 0, SEEK_CUR); + if (seekback == (off_t) -1) + return error("cannot find the current offset"); + + header_len = sprintf((char *)obuf, "%s %" PRIuMAX, + typename(type), (uintmax_t)size) + 1; + git_SHA1_Init(&ctx); + git_SHA1_Update(&ctx, obuf, header_len); + + /* Note: idx is non-NULL when we are writing */ + if ((flags & HASH_WRITE_OBJECT) != 0) + idx = xcalloc(1, sizeof(*idx)); + + already_hashed_to = 0; + + while (1) { + prepare_to_stream(state, flags); + if (idx) { + sha1file_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + } + if (!stream_to_pack(state, &ctx, &already_hashed_to, + fd, size, type, path, flags)) + break; + /* + * Writing this object to the current pack will make + * it too big; we need to truncate it, start a new + * pack, and write into it. + */ + if (!idx) + die("BUG: should not happen"); + sha1file_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + finish_bulk_checkin(state); + if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) + return error("cannot seek back"); + } + git_SHA1_Final(result_sha1, &ctx); + if (!idx) + return 0; + + idx->crc32 = crc32_end(state->f); + if (already_written(state, result_sha1)) { + sha1file_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + free(idx); + } else { + hashcpy(idx->sha1, result_sha1); + ALLOC_GROW(state->written, + state->nr_written + 1, + state->alloc_written); + state->written[state->nr_written++] = idx; + } + return 0; +} + +int index_bulk_checkin(unsigned char *sha1, + int fd, size_t size, enum object_type type, + const char *path, unsigned flags) +{ + int status = deflate_to_pack(&state, sha1, fd, size, type, + path, flags); + if (!state.plugged) + finish_bulk_checkin(&state); + return status; +} + +void plug_bulk_checkin(void) +{ + state.plugged = 1; +} + +void unplug_bulk_checkin(void) +{ + state.plugged = 0; + if (state.f) + finish_bulk_checkin(&state); +} diff --git a/bulk-checkin.h b/bulk-checkin.h new file mode 100644 index 0000000000..4f599f8841 --- /dev/null +++ b/bulk-checkin.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2011, Google Inc. + */ +#ifndef BULK_CHECKIN_H +#define BULK_CHECKIN_H + +#include "cache.h" + +extern int index_bulk_checkin(unsigned char sha1[], + int fd, size_t size, enum object_type type, + const char *path, unsigned flags); + +extern void plug_bulk_checkin(void); +extern void unplug_bulk_checkin(void); + +#endif @@ -35,6 +35,7 @@ int git_inflate(git_zstream *, int flush); void git_deflate_init(git_zstream *, int level); void git_deflate_init_gzip(git_zstream *, int level); void git_deflate_end(git_zstream *); +int git_deflate_abort(git_zstream *); int git_deflate_end_gently(git_zstream *); int git_deflate(git_zstream *, int flush); unsigned long git_deflate_bound(git_zstream *, unsigned long); @@ -597,6 +598,7 @@ extern size_t packed_git_window_size; extern size_t packed_git_limit; extern size_t delta_base_cache_limit; extern unsigned long big_file_threshold; +extern unsigned long pack_size_limit_cfg; extern int read_replace_refs; extern int fsync_object_files; extern int core_preload_index; @@ -818,6 +818,10 @@ int git_default_config(const char *var, const char *value, void *dummy) return 0; } + if (!strcmp(var, "pack.packsizelimit")) { + pack_size_limit_cfg = git_config_ulong(var, value); + return 0; + } /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/csum-file.c b/csum-file.c index fc97d6e045..53f5375b6c 100644 --- a/csum-file.c +++ b/csum-file.c @@ -158,6 +158,26 @@ struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp return f; } +void sha1file_checkpoint(struct sha1file *f, struct sha1file_checkpoint *checkpoint) +{ + sha1flush(f); + checkpoint->offset = f->total; + checkpoint->ctx = f->ctx; +} + +int sha1file_truncate(struct sha1file *f, struct sha1file_checkpoint *checkpoint) +{ + off_t offset = checkpoint->offset; + + if (ftruncate(f->fd, offset) || + lseek(f->fd, offset, SEEK_SET) != offset) + return -1; + f->total = offset; + f->ctx = checkpoint->ctx; + f->offset = 0; /* sha1flush() was called in checkpoint */ + return 0; +} + void crc32_begin(struct sha1file *f) { f->crc32 = crc32(0, NULL, 0); diff --git a/csum-file.h b/csum-file.h index 6a7967c6bf..3b540bdc21 100644 --- a/csum-file.h +++ b/csum-file.h @@ -17,6 +17,15 @@ struct sha1file { unsigned char buffer[8192]; }; +/* Checkpoint */ +struct sha1file_checkpoint { + off_t offset; + git_SHA_CTX ctx; +}; + +extern void sha1file_checkpoint(struct sha1file *, struct sha1file_checkpoint *); +extern int sha1file_truncate(struct sha1file *, struct sha1file_checkpoint *); + /* sha1close flags */ #define CSUM_CLOSE 1 #define CSUM_FSYNC 2 diff --git a/environment.c b/environment.c index 2c41d7d6cb..c93b8f44df 100644 --- a/environment.c +++ b/environment.c @@ -62,6 +62,7 @@ int grafts_replace_parents = 1; int core_apply_sparse_checkout; int merge_log_config = -1; struct startup_info *startup_info; +unsigned long pack_size_limit_cfg; /* Parallel index stat data preload? */ int core_preload_index = 0; diff --git a/fast-import.c b/fast-import.c index f4bfe0f665..350b2e9e10 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1143,17 +1143,11 @@ static int store_object( return 0; } -static void truncate_pack(off_t to, git_SHA_CTX *ctx) +static void truncate_pack(struct sha1file_checkpoint *checkpoint) { - if (ftruncate(pack_data->pack_fd, to) - || lseek(pack_data->pack_fd, to, SEEK_SET) != to) + if (sha1file_truncate(pack_file, checkpoint)) die_errno("cannot truncate pack to skip duplicate"); - pack_size = to; - - /* yes this is a layering violation */ - pack_file->total = to; - pack_file->offset = 0; - pack_file->ctx = *ctx; + pack_size = checkpoint->offset; } static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) @@ -1166,8 +1160,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) unsigned long hdrlen; off_t offset; git_SHA_CTX c; - git_SHA_CTX pack_file_ctx; git_zstream s; + struct sha1file_checkpoint checkpoint; int status = Z_OK; /* Determine if we should auto-checkpoint. */ @@ -1175,11 +1169,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) || (pack_size + 60 + len) < pack_size) cycle_packfile(); - offset = pack_size; - - /* preserve the pack_file SHA1 ctx in case we have to truncate later */ - sha1flush(pack_file); - pack_file_ctx = pack_file->ctx; + sha1file_checkpoint(pack_file, &checkpoint); + offset = checkpoint.offset; hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1; if (out_sz <= hdrlen) @@ -1245,14 +1236,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; - truncate_pack(offset, &pack_file_ctx); + truncate_pack(&checkpoint); } else if (find_sha1_pack(sha1, packed_git)) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[OBJ_BLOB]++; - truncate_pack(offset, &pack_file_ctx); + truncate_pack(&checkpoint); } else { e->depth = 0; diff --git a/pack-write.c b/pack-write.c index f84adde3eb..de2bd01414 100644 --- a/pack-write.c +++ b/pack-write.c @@ -182,6 +182,18 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec return index_name; } +off_t write_pack_header(struct sha1file *f, uint32_t nr_entries) +{ + struct pack_header hdr; + + hdr.hdr_signature = htonl(PACK_SIGNATURE); + hdr.hdr_version = htonl(PACK_VERSION); + hdr.hdr_entries = htonl(nr_entries); + if (sha1write(f, &hdr, sizeof(hdr))) + return 0; + return sizeof(hdr); +} + /* * Update pack header with object_count and compute new SHA1 for pack data * associated to pack_fd, and write that SHA1 at the end. That new SHA1 @@ -320,3 +332,44 @@ int encode_in_pack_object_header(enum object_type type, uintmax_t size, unsigned *hdr = c; return n; } + +struct sha1file *create_tmp_packfile(char **pack_tmp_name) +{ + char tmpname[PATH_MAX]; + int fd; + + fd = odb_mkstemp(tmpname, sizeof(tmpname), "pack/tmp_pack_XXXXXX"); + *pack_tmp_name = xstrdup(tmpname); + return sha1fd(fd, *pack_tmp_name); +} + +void finish_tmp_packfile(char *name_buffer, + const char *pack_tmp_name, + struct pack_idx_entry **written_list, + uint32_t nr_written, + struct pack_idx_option *pack_idx_opts, + unsigned char sha1[]) +{ + const char *idx_tmp_name; + char *end_of_name_prefix = strrchr(name_buffer, 0); + + if (adjust_shared_perm(pack_tmp_name)) + die_errno("unable to make temporary pack file readable"); + + idx_tmp_name = write_idx_file(NULL, written_list, nr_written, + pack_idx_opts, sha1); + if (adjust_shared_perm(idx_tmp_name)) + die_errno("unable to make temporary index file readable"); + + sprintf(end_of_name_prefix, "%s.pack", sha1_to_hex(sha1)); + free_pack_by_name(name_buffer); + + if (rename(pack_tmp_name, name_buffer)) + die_errno("unable to rename temporary pack file"); + + sprintf(end_of_name_prefix, "%s.idx", sha1_to_hex(sha1)); + if (rename(idx_tmp_name, name_buffer)) + die_errno("unable to rename temporary index file"); + + free((void *)idx_tmp_name); +} @@ -2,6 +2,7 @@ #define PACK_H #include "object.h" +#include "csum-file.h" /* * Packed object header @@ -79,6 +80,7 @@ extern const char *write_idx_file(const char *index_name, struct pack_idx_entry extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr); extern int verify_pack_index(struct packed_git *); extern int verify_pack(struct packed_git *, verify_fn fn, struct progress *, uint32_t); +extern off_t write_pack_header(struct sha1file *f, uint32_t); extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t); extern char *index_pack_lockfile(int fd); extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *); @@ -87,4 +89,8 @@ extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned ch #define PH_ERROR_PACK_SIGNATURE (-2) #define PH_ERROR_PROTOCOL (-3) extern int read_pack_header(int fd, struct pack_header *); + +extern struct sha1file *create_tmp_packfile(char **pack_tmp_name); +extern void finish_tmp_packfile(char *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[]); + #endif diff --git a/sha1_file.c b/sha1_file.c index 956422ba4a..f291f3f0f7 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -18,6 +18,7 @@ #include "refs.h" #include "pack-revindex.h" #include "sha1-lookup.h" +#include "bulk-checkin.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -2680,10 +2681,8 @@ static int index_core(unsigned char *sha1, int fd, size_t size, } /* - * This creates one packfile per large blob, because the caller - * immediately wants the result sha1, and fast-import can report the - * object name via marks mechanism only by closing the created - * packfile. + * This creates one packfile per large blob unless bulk-checkin + * machinery is "plugged". * * This also bypasses the usual "convert-to-git" dance, and that is on * purpose. We could write a streaming version of the converting @@ -2697,65 +2696,7 @@ static int index_stream(unsigned char *sha1, int fd, size_t size, enum object_type type, const char *path, unsigned flags) { - struct child_process fast_import; - char export_marks[512]; - const char *argv[] = { "fast-import", "--quiet", export_marks, NULL }; - char tmpfile[512]; - char fast_import_cmd[512]; - char buf[512]; - int len, tmpfd; - - strcpy(tmpfile, git_path("hashstream_XXXXXX")); - tmpfd = git_mkstemp_mode(tmpfile, 0600); - if (tmpfd < 0) - die_errno("cannot create tempfile: %s", tmpfile); - if (close(tmpfd)) - die_errno("cannot close tempfile: %s", tmpfile); - sprintf(export_marks, "--export-marks=%s", tmpfile); - - memset(&fast_import, 0, sizeof(fast_import)); - fast_import.in = -1; - fast_import.argv = argv; - fast_import.git_cmd = 1; - if (start_command(&fast_import)) - die_errno("index-stream: git fast-import failed"); - - len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n", - (unsigned long) size); - write_or_whine(fast_import.in, fast_import_cmd, len, - "index-stream: feeding fast-import"); - while (size) { - char buf[10240]; - size_t sz = size < sizeof(buf) ? size : sizeof(buf); - ssize_t actual; - - actual = read_in_full(fd, buf, sz); - if (actual < 0) - die_errno("index-stream: reading input"); - if (write_in_full(fast_import.in, buf, actual) != actual) - die_errno("index-stream: feeding fast-import"); - size -= actual; - } - if (close(fast_import.in)) - die_errno("index-stream: closing fast-import"); - if (finish_command(&fast_import)) - die_errno("index-stream: finishing fast-import"); - - tmpfd = open(tmpfile, O_RDONLY); - if (tmpfd < 0) - die_errno("index-stream: cannot open fast-import mark"); - len = read(tmpfd, buf, sizeof(buf)); - if (len < 0) - die_errno("index-stream: reading fast-import mark"); - if (close(tmpfd) < 0) - die_errno("index-stream: closing fast-import mark"); - if (unlink(tmpfile)) - die_errno("index-stream: unlinking fast-import mark"); - if (len != 44 || - memcmp(":1 ", buf, 3) || - get_sha1_hex(buf + 3, sha1)) - die_errno("index-stream: unexpected fast-import mark: <%s>", buf); - return 0; + return index_bulk_checkin(sha1, fd, size, type, path, flags); } int index_fd(unsigned char *sha1, int fd, struct stat *st, diff --git a/t/t1050-large.sh b/t/t1050-large.sh index deba111bd7..29d6024b7f 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -7,21 +7,97 @@ test_description='adding and checking out large blobs' test_expect_success setup ' git config core.bigfilethreshold 200k && - echo X | dd of=large bs=1k seek=2000 + echo X | dd of=large1 bs=1k seek=2000 && + echo X | dd of=large2 bs=1k seek=2000 && + echo X | dd of=large3 bs=1k seek=2000 && + echo Y | dd of=huge bs=1k seek=2500 ' -test_expect_success 'add a large file' ' - git add large && - # make sure we got a packfile and no loose objects - test -f .git/objects/pack/pack-*.pack && - test ! -f .git/objects/??/?????????????????????????????????????? +test_expect_success 'add a large file or two' ' + git add large1 huge large2 && + # make sure we got a single packfile and no loose objects + bad= count=0 idx= && + for p in .git/objects/pack/pack-*.pack + do + count=$(( $count + 1 )) + if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx" + then + continue + fi + bad=t + done && + test -z "$bad" && + test $count = 1 && + cnt=$(git show-index <"$idx" | wc -l) && + test $cnt = 2 && + for l in .git/objects/??/?????????????????????????????????????? + do + test -f "$l" || continue + bad=t + done && + test -z "$bad" && + + # attempt to add another copy of the same + git add large3 && + bad= count=0 && + for p in .git/objects/pack/pack-*.pack + do + count=$(( $count + 1 )) + if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx" + then + continue + fi + bad=t + done && + test -z "$bad" && + test $count = 1 ' test_expect_success 'checkout a large file' ' - large=$(git rev-parse :large) && - git update-index --add --cacheinfo 100644 $large another && + large1=$(git rev-parse :large1) && + git update-index --add --cacheinfo 100644 $large1 another && git checkout another && - cmp large another ;# this must not be test_cmp + cmp large1 another ;# this must not be test_cmp +' + +test_expect_success 'packsize limit' ' + test_create_repo mid && + ( + cd mid && + git config core.bigfilethreshold 64k && + git config pack.packsizelimit 256k && + + # mid1 and mid2 will fit within 256k limit but + # appending mid3 will bust the limit and will + # result in a separate packfile. + test-genrandom "a" $(( 66 * 1024 )) >mid1 && + test-genrandom "b" $(( 80 * 1024 )) >mid2 && + test-genrandom "c" $(( 128 * 1024 )) >mid3 && + git add mid1 mid2 mid3 && + + count=0 + for pi in .git/objects/pack/pack-*.idx + do + test -f "$pi" && count=$(( $count + 1 )) + done && + test $count = 2 && + + ( + git hash-object --stdin <mid1 + git hash-object --stdin <mid2 + git hash-object --stdin <mid3 + ) | + sort >expect && + + for pi in .git/objects/pack/pack-*.idx + do + git show-index <"$pi" + done | + sed -e "s/^[0-9]* \([0-9a-f]*\) .*/\1/" | + sort >actual && + + test_cmp expect actual + ) ' test_done @@ -188,13 +188,20 @@ void git_deflate_init_gzip(git_zstream *strm, int level) strm->z.msg ? strm->z.msg : "no message"); } -void git_deflate_end(git_zstream *strm) +int git_deflate_abort(git_zstream *strm) { int status; zlib_pre_call(strm); status = deflateEnd(&strm->z); zlib_post_call(strm); + return status; +} + +void git_deflate_end(git_zstream *strm) +{ + int status = git_deflate_abort(strm); + if (status == Z_OK) return; error("deflateEnd: %s (%s)", zerr_to_string(status), |