diff options
author | Junio C Hamano <gitster@pobox.com> | 2012-05-10 10:49:13 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2012-05-10 10:49:13 -0700 |
commit | d41355fc3126c73d7fd57d0194fa17dab8f4fc21 (patch) | |
tree | 4bfe9e5daf7bd5073a967f790d0d455829c7218e | |
parent | aa6912b081d11dfa8afaab3a3c5665833e8349df (diff) | |
parent | 2dd42334dea6619c0774511beda9a02642088f67 (diff) | |
download | git-d41355fc3126c73d7fd57d0194fa17dab8f4fc21.tar.gz |
Merge branch 'nd/stream-to-archive'
Stream large blobs directly out to archive files without slurping
everything in memory first.
By René Scharfe (6) and Nguyễn Thái Ngọc Duy (4)
* nd/stream-to-archive:
t5000: rationalize unzip tests
archive-zip: streaming for deflated files
archive-zip: streaming for stored files
archive-zip: factor out helpers for writing sizes and CRC
archive-zip: remove uncompressed_size
archive-tar: stream large blobs to tar file
archive: delegate blob reading to backend
archive-tar: unindent write_tar_entry by one level
archive-tar: turn write_tar_entry into blob-writing only
streaming: void pointer instead of char pointer
-rw-r--r-- | archive-tar.c | 207 | ||||
-rw-r--r-- | archive-zip.c | 200 | ||||
-rw-r--r-- | archive.c | 28 | ||||
-rw-r--r-- | archive.h | 10 | ||||
-rw-r--r-- | streaming.c | 2 | ||||
-rw-r--r-- | streaming.h | 2 | ||||
-rwxr-xr-x | t/t1050-large.sh | 12 | ||||
-rwxr-xr-x | t/t5000-tar-tree.sh | 78 |
8 files changed, 404 insertions, 135 deletions
diff --git a/archive-tar.c b/archive-tar.c index 20af0051a3..93387ea336 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -4,6 +4,7 @@ #include "cache.h" #include "tar.h" #include "archive.h" +#include "streaming.h" #include "run-command.h" #define RECORDSIZE (512) @@ -30,10 +31,9 @@ static void write_if_needed(void) * queues up writes, so that all our write(2) calls write exactly one * full block; pads writes to RECORDSIZE */ -static void write_blocked(const void *data, unsigned long size) +static void do_write_blocked(const void *data, unsigned long size) { const char *buf = data; - unsigned long tail; if (offset) { unsigned long chunk = BLOCKSIZE - offset; @@ -54,6 +54,11 @@ static void write_blocked(const void *data, unsigned long size) memcpy(block + offset, buf, size); offset += size; } +} + +static void finish_record(void) +{ + unsigned long tail; tail = offset % RECORDSIZE; if (tail) { memset(block + offset, 0, RECORDSIZE - tail); @@ -62,6 +67,12 @@ static void write_blocked(const void *data, unsigned long size) write_if_needed(); } +static void write_blocked(const void *data, unsigned long size) +{ + do_write_blocked(data, size); + finish_record(); +} + /* * The end of tar archives is marked by 2*512 nul bytes and after that * follows the rest of the block (if any). @@ -78,6 +89,33 @@ static void write_trailer(void) } /* + * queues up writes, so that all our write(2) calls write exactly one + * full block; pads writes to RECORDSIZE + */ +static int stream_blocked(const unsigned char *sha1) +{ + struct git_istream *st; + enum object_type type; + unsigned long sz; + char buf[BLOCKSIZE]; + ssize_t readlen; + + st = open_istream(sha1, &type, &sz, NULL); + if (!st) + return error("cannot stream blob %s", sha1_to_hex(sha1)); + for (;;) { + readlen = read_istream(st, buf, sizeof(buf)); + if (readlen <= 0) + break; + do_write_blocked(buf, readlen); + } + close_istream(st); + if (!readlen) + finish_record(); + return readlen; +} + +/* * pax extended header records have the format "%u %s=%s\n". %u contains * the size of the whole string (including the %u), the first %s is the * keyword, the second one is the value. This function constructs such a @@ -123,56 +161,101 @@ static size_t get_path_prefix(const char *path, size_t pathlen, size_t maxlen) return i; } +static void prepare_header(struct archiver_args *args, + struct ustar_header *header, + unsigned int mode, unsigned long size) +{ + sprintf(header->mode, "%07o", mode & 07777); + sprintf(header->size, "%011lo", S_ISREG(mode) ? size : 0); + sprintf(header->mtime, "%011lo", (unsigned long) args->time); + + sprintf(header->uid, "%07o", 0); + sprintf(header->gid, "%07o", 0); + strlcpy(header->uname, "root", sizeof(header->uname)); + strlcpy(header->gname, "root", sizeof(header->gname)); + sprintf(header->devmajor, "%07o", 0); + sprintf(header->devminor, "%07o", 0); + + memcpy(header->magic, "ustar", 6); + memcpy(header->version, "00", 2); + + sprintf(header->chksum, "%07o", ustar_header_chksum(header)); +} + +static int write_extended_header(struct archiver_args *args, + const unsigned char *sha1, + const void *buffer, unsigned long size) +{ + struct ustar_header header; + unsigned int mode; + memset(&header, 0, sizeof(header)); + *header.typeflag = TYPEFLAG_EXT_HEADER; + mode = 0100666; + sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1)); + prepare_header(args, &header, mode, size); + write_blocked(&header, sizeof(header)); + write_blocked(buffer, size); + return 0; +} + static int write_tar_entry(struct archiver_args *args, - const unsigned char *sha1, const char *path, size_t pathlen, - unsigned int mode, void *buffer, unsigned long size) + const unsigned char *sha1, + const char *path, size_t pathlen, + unsigned int mode) { struct ustar_header header; struct strbuf ext_header = STRBUF_INIT; + unsigned int old_mode = mode; + unsigned long size; + void *buffer; int err = 0; memset(&header, 0, sizeof(header)); - if (!sha1) { - *header.typeflag = TYPEFLAG_GLOBAL_HEADER; - mode = 0100666; - strcpy(header.name, "pax_global_header"); - } else if (!path) { - *header.typeflag = TYPEFLAG_EXT_HEADER; - mode = 0100666; - sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1)); + if (S_ISDIR(mode) || S_ISGITLINK(mode)) { + *header.typeflag = TYPEFLAG_DIR; + mode = (mode | 0777) & ~tar_umask; + } else if (S_ISLNK(mode)) { + *header.typeflag = TYPEFLAG_LNK; + mode |= 0777; + } else if (S_ISREG(mode)) { + *header.typeflag = TYPEFLAG_REG; + mode = (mode | ((mode & 0100) ? 0777 : 0666)) & ~tar_umask; } else { - if (S_ISDIR(mode) || S_ISGITLINK(mode)) { - *header.typeflag = TYPEFLAG_DIR; - mode = (mode | 0777) & ~tar_umask; - } else if (S_ISLNK(mode)) { - *header.typeflag = TYPEFLAG_LNK; - mode |= 0777; - } else if (S_ISREG(mode)) { - *header.typeflag = TYPEFLAG_REG; - mode = (mode | ((mode & 0100) ? 0777 : 0666)) & ~tar_umask; + return error("unsupported file mode: 0%o (SHA1: %s)", + mode, sha1_to_hex(sha1)); + } + if (pathlen > sizeof(header.name)) { + size_t plen = get_path_prefix(path, pathlen, + sizeof(header.prefix)); + size_t rest = pathlen - plen - 1; + if (plen > 0 && rest <= sizeof(header.name)) { + memcpy(header.prefix, path, plen); + memcpy(header.name, path + plen + 1, rest); } else { - return error("unsupported file mode: 0%o (SHA1: %s)", - mode, sha1_to_hex(sha1)); + sprintf(header.name, "%s.data", + sha1_to_hex(sha1)); + strbuf_append_ext_header(&ext_header, "path", + path, pathlen); } - if (pathlen > sizeof(header.name)) { - size_t plen = get_path_prefix(path, pathlen, - sizeof(header.prefix)); - size_t rest = pathlen - plen - 1; - if (plen > 0 && rest <= sizeof(header.name)) { - memcpy(header.prefix, path, plen); - memcpy(header.name, path + plen + 1, rest); - } else { - sprintf(header.name, "%s.data", - sha1_to_hex(sha1)); - strbuf_append_ext_header(&ext_header, "path", - path, pathlen); - } - } else - memcpy(header.name, path, pathlen); + } else + memcpy(header.name, path, pathlen); + + if (S_ISREG(mode) && !args->convert && + sha1_object_info(sha1, &size) == OBJ_BLOB && + size > big_file_threshold) + buffer = NULL; + else if (S_ISLNK(mode) || S_ISREG(mode)) { + enum object_type type; + buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size); + if (!buffer) + return error("cannot read %s", sha1_to_hex(sha1)); + } else { + buffer = NULL; + size = 0; } - if (S_ISLNK(mode) && buffer) { + if (S_ISLNK(mode)) { if (size > sizeof(header.linkname)) { sprintf(header.linkname, "see %s.paxheader", sha1_to_hex(sha1)); @@ -182,32 +265,25 @@ static int write_tar_entry(struct archiver_args *args, memcpy(header.linkname, buffer, size); } - sprintf(header.mode, "%07o", mode & 07777); - sprintf(header.size, "%011lo", S_ISREG(mode) ? size : 0); - sprintf(header.mtime, "%011lo", (unsigned long) args->time); - - sprintf(header.uid, "%07o", 0); - sprintf(header.gid, "%07o", 0); - strlcpy(header.uname, "root", sizeof(header.uname)); - strlcpy(header.gname, "root", sizeof(header.gname)); - sprintf(header.devmajor, "%07o", 0); - sprintf(header.devminor, "%07o", 0); - - memcpy(header.magic, "ustar", 6); - memcpy(header.version, "00", 2); - - sprintf(header.chksum, "%07o", ustar_header_chksum(&header)); + prepare_header(args, &header, mode, size); if (ext_header.len > 0) { - err = write_tar_entry(args, sha1, NULL, 0, 0, ext_header.buf, - ext_header.len); - if (err) + err = write_extended_header(args, sha1, ext_header.buf, + ext_header.len); + if (err) { + free(buffer); return err; + } } strbuf_release(&ext_header); write_blocked(&header, sizeof(header)); - if (S_ISREG(mode) && buffer && size > 0) - write_blocked(buffer, size); + if (S_ISREG(mode) && size > 0) { + if (buffer) + write_blocked(buffer, size); + else + err = stream_blocked(sha1); + } + free(buffer); return err; } @@ -215,11 +291,18 @@ static int write_global_extended_header(struct archiver_args *args) { const unsigned char *sha1 = args->commit_sha1; struct strbuf ext_header = STRBUF_INIT; - int err; + struct ustar_header header; + unsigned int mode; + int err = 0; strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40); - err = write_tar_entry(args, NULL, NULL, 0, 0, ext_header.buf, - ext_header.len); + memset(&header, 0, sizeof(header)); + *header.typeflag = TYPEFLAG_GLOBAL_HEADER; + mode = 0100666; + strcpy(header.name, "pax_global_header"); + prepare_header(args, &header, mode, ext_header.len); + write_blocked(&header, sizeof(header)); + write_blocked(ext_header.buf, ext_header.len); strbuf_release(&ext_header); return err; } diff --git a/archive-zip.c b/archive-zip.c index 02d1f3787a..f5af81f904 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -3,6 +3,7 @@ */ #include "cache.h" #include "archive.h" +#include "streaming.h" static int zip_date; static int zip_time; @@ -15,6 +16,7 @@ static unsigned int zip_dir_offset; static unsigned int zip_dir_entries; #define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024) +#define ZIP_STREAM (8) struct zip_local_header { unsigned char magic[4]; @@ -31,6 +33,14 @@ struct zip_local_header { unsigned char _end[1]; }; +struct zip_data_desc { + unsigned char magic[4]; + unsigned char crc32[4]; + unsigned char compressed_size[4]; + unsigned char size[4]; + unsigned char _end[1]; +}; + struct zip_dir_header { unsigned char magic[4]; unsigned char creator_version[2]; @@ -70,6 +80,7 @@ struct zip_dir_trailer { * we're interested in. */ #define ZIP_LOCAL_HEADER_SIZE offsetof(struct zip_local_header, _end) +#define ZIP_DATA_DESC_SIZE offsetof(struct zip_data_desc, _end) #define ZIP_DIR_HEADER_SIZE offsetof(struct zip_dir_header, _end) #define ZIP_DIR_TRAILER_SIZE offsetof(struct zip_dir_trailer, _end) @@ -120,20 +131,59 @@ static void *zlib_deflate(void *data, unsigned long size, return buffer; } +static void write_zip_data_desc(unsigned long size, + unsigned long compressed_size, + unsigned long crc) +{ + struct zip_data_desc trailer; + + copy_le32(trailer.magic, 0x08074b50); + copy_le32(trailer.crc32, crc); + copy_le32(trailer.compressed_size, compressed_size); + copy_le32(trailer.size, size); + write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE); +} + +static void set_zip_dir_data_desc(struct zip_dir_header *header, + unsigned long size, + unsigned long compressed_size, + unsigned long crc) +{ + copy_le32(header->crc32, crc); + copy_le32(header->compressed_size, compressed_size); + copy_le32(header->size, size); +} + +static void set_zip_header_data_desc(struct zip_local_header *header, + unsigned long size, + unsigned long compressed_size, + unsigned long crc) +{ + copy_le32(header->crc32, crc); + copy_le32(header->compressed_size, compressed_size); + copy_le32(header->size, size); +} + +#define STREAM_BUFFER_SIZE (1024 * 16) + static int write_zip_entry(struct archiver_args *args, - const unsigned char *sha1, const char *path, size_t pathlen, - unsigned int mode, void *buffer, unsigned long size) + const unsigned char *sha1, + const char *path, size_t pathlen, + unsigned int mode) { struct zip_local_header header; struct zip_dir_header dirent; unsigned long attr2; unsigned long compressed_size; - unsigned long uncompressed_size; unsigned long crc; unsigned long direntsize; int method; unsigned char *out; void *deflated = NULL; + void *buffer; + struct git_istream *stream = NULL; + unsigned long flags = 0; + unsigned long size; crc = crc32(0, NULL, 0); @@ -146,24 +196,43 @@ static int write_zip_entry(struct archiver_args *args, method = 0; attr2 = 16; out = NULL; - uncompressed_size = 0; + size = 0; compressed_size = 0; + buffer = NULL; + size = 0; } else if (S_ISREG(mode) || S_ISLNK(mode)) { + enum object_type type = sha1_object_info(sha1, &size); + method = 0; attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : (mode & 0111) ? ((mode) << 16) : 0; - if (S_ISREG(mode) && args->compression_level != 0) + if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = 8; - crc = crc32(crc, buffer, size); - out = buffer; - uncompressed_size = size; compressed_size = size; + + if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && + size > big_file_threshold) { + stream = open_istream(sha1, &type, &size, NULL); + if (!stream) + return error("cannot stream blob %s", + sha1_to_hex(sha1)); + flags |= ZIP_STREAM; + out = buffer = NULL; + } else { + buffer = sha1_file_to_archive(args, path, sha1, mode, + &type, &size); + if (!buffer) + return error("cannot read %s", + sha1_to_hex(sha1)); + crc = crc32(crc, buffer, size); + out = buffer; + } } else { return error("unsupported file mode: 0%o (SHA1: %s)", mode, sha1_to_hex(sha1)); } - if (method == 8) { + if (buffer && method == 8) { deflated = zlib_deflate(buffer, size, args->compression_level, &compressed_size); if (deflated && compressed_size - 6 < size) { @@ -188,13 +257,11 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.creator_version, S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0); copy_le16(dirent.version, 10); - copy_le16(dirent.flags, 0); + copy_le16(dirent.flags, flags); copy_le16(dirent.compression_method, method); copy_le16(dirent.mtime, zip_time); copy_le16(dirent.mdate, zip_date); - copy_le32(dirent.crc32, crc); - copy_le32(dirent.compressed_size, compressed_size); - copy_le32(dirent.size, uncompressed_size); + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); copy_le16(dirent.filename_length, pathlen); copy_le16(dirent.extra_length, 0); copy_le16(dirent.comment_length, 0); @@ -202,33 +269,120 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.attr1, 0); copy_le32(dirent.attr2, attr2); copy_le32(dirent.offset, zip_offset); - memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); - zip_dir_offset += ZIP_DIR_HEADER_SIZE; - memcpy(zip_dir + zip_dir_offset, path, pathlen); - zip_dir_offset += pathlen; - zip_dir_entries++; copy_le32(header.magic, 0x04034b50); copy_le16(header.version, 10); - copy_le16(header.flags, 0); + copy_le16(header.flags, flags); copy_le16(header.compression_method, method); copy_le16(header.mtime, zip_time); copy_le16(header.mdate, zip_date); - copy_le32(header.crc32, crc); - copy_le32(header.compressed_size, compressed_size); - copy_le32(header.size, uncompressed_size); + if (flags & ZIP_STREAM) + set_zip_header_data_desc(&header, 0, 0, 0); + else + set_zip_header_data_desc(&header, size, compressed_size, crc); copy_le16(header.filename_length, pathlen); copy_le16(header.extra_length, 0); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); zip_offset += ZIP_LOCAL_HEADER_SIZE; write_or_die(1, path, pathlen); zip_offset += pathlen; - if (compressed_size > 0) { + if (stream && method == 0) { + unsigned char buf[STREAM_BUFFER_SIZE]; + ssize_t readlen; + + for (;;) { + readlen = read_istream(stream, buf, sizeof(buf)); + if (readlen <= 0) + break; + crc = crc32(crc, buf, readlen); + write_or_die(1, buf, readlen); + } + close_istream(stream); + if (readlen) + return readlen; + + compressed_size = size; + zip_offset += compressed_size; + + write_zip_data_desc(size, compressed_size, crc); + zip_offset += ZIP_DATA_DESC_SIZE; + + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); + } else if (stream && method == 8) { + unsigned char buf[STREAM_BUFFER_SIZE]; + ssize_t readlen; + git_zstream zstream; + int result; + size_t out_len; + unsigned char compressed[STREAM_BUFFER_SIZE * 2]; + + memset(&zstream, 0, sizeof(zstream)); + git_deflate_init(&zstream, args->compression_level); + + compressed_size = 0; + zstream.next_out = compressed; + zstream.avail_out = sizeof(compressed); + + for (;;) { + readlen = read_istream(stream, buf, sizeof(buf)); + if (readlen <= 0) + break; + crc = crc32(crc, buf, readlen); + + zstream.next_in = buf; + zstream.avail_in = readlen; + result = git_deflate(&zstream, 0); + if (result != Z_OK) + die("deflate error (%d)", result); + out = compressed; + if (!compressed_size) + out += 2; + out_len = zstream.next_out - out; + + if (out_len > 0) { + write_or_die(1, out, out_len); + compressed_size += out_len; + zstream.next_out = compressed; + zstream.avail_out = sizeof(compressed); + } + + } + close_istream(stream); + if (readlen) + return readlen; + + zstream.next_in = buf; + zstream.avail_in = 0; + result = git_deflate(&zstream, Z_FINISH); + if (result != Z_STREAM_END) + die("deflate error (%d)", result); + + git_deflate_end(&zstream); + out = compressed; + if (!compressed_size) + out += 2; + out_len = zstream.next_out - out - 4; + write_or_die(1, out, out_len); + compressed_size += out_len; + zip_offset += compressed_size; + + write_zip_data_desc(size, compressed_size, crc); + zip_offset += ZIP_DATA_DESC_SIZE; + + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); + } else if (compressed_size > 0) { write_or_die(1, out, compressed_size); zip_offset += compressed_size; } free(deflated); + free(buffer); + + memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); + zip_dir_offset += ZIP_DIR_HEADER_SIZE; + memcpy(zip_dir + zip_dir_offset, path, pathlen); + zip_dir_offset += pathlen; + zip_dir_entries++; return 0; } @@ -59,12 +59,15 @@ static void format_subst(const struct commit *commit, free(to_free); } -static void *sha1_file_to_archive(const char *path, const unsigned char *sha1, - unsigned int mode, enum object_type *type, - unsigned long *sizep, const struct commit *commit) +void *sha1_file_to_archive(const struct archiver_args *args, + const char *path, const unsigned char *sha1, + unsigned int mode, enum object_type *type, + unsigned long *sizep) { void *buffer; + const struct commit *commit = args->convert ? args->commit : NULL; + path += args->baselen; buffer = read_sha1_file(sha1, type, sizep); if (buffer && S_ISREG(mode)) { struct strbuf buf = STRBUF_INIT; @@ -109,12 +112,9 @@ static int write_archive_entry(const unsigned char *sha1, const char *base, write_archive_entry_fn_t write_entry = c->write_entry; struct git_attr_check check[2]; const char *path_without_prefix; - int convert = 0; int err; - enum object_type type; - unsigned long size; - void *buffer; + args->convert = 0; strbuf_reset(&path); strbuf_grow(&path, PATH_MAX); strbuf_add(&path, args->base, args->baselen); @@ -126,28 +126,22 @@ static int write_archive_entry(const unsigned char *sha1, const char *base, if (!git_check_attr(path_without_prefix, ARRAY_SIZE(check), check)) { if (ATTR_TRUE(check[0].value)) return 0; - convert = ATTR_TRUE(check[1].value); + args->convert = ATTR_TRUE(check[1].value); } if (S_ISDIR(mode) || S_ISGITLINK(mode)) { strbuf_addch(&path, '/'); if (args->verbose) fprintf(stderr, "%.*s\n", (int)path.len, path.buf); - err = write_entry(args, sha1, path.buf, path.len, mode, NULL, 0); + err = write_entry(args, sha1, path.buf, path.len, mode); if (err) return err; return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0); } - buffer = sha1_file_to_archive(path_without_prefix, sha1, mode, - &type, &size, convert ? args->commit : NULL); - if (!buffer) - return error("cannot read %s", sha1_to_hex(sha1)); if (args->verbose) fprintf(stderr, "%.*s\n", (int)path.len, path.buf); - err = write_entry(args, sha1, path.buf, path.len, mode, buffer, size); - free(buffer); - return err; + return write_entry(args, sha1, path.buf, path.len, mode); } int write_archive_entries(struct archiver_args *args, @@ -167,7 +161,7 @@ int write_archive_entries(struct archiver_args *args, if (args->verbose) fprintf(stderr, "%.*s\n", (int)len, args->base); err = write_entry(args, args->tree->object.sha1, args->base, - len, 040777, NULL, 0); + len, 040777); if (err) return err; } @@ -11,6 +11,7 @@ struct archiver_args { const char **pathspec; unsigned int verbose : 1; unsigned int worktree_attributes : 1; + unsigned int convert : 1; int compression_level; }; @@ -27,11 +28,18 @@ extern void register_archiver(struct archiver *); extern void init_tar_archiver(void); extern void init_zip_archiver(void); -typedef int (*write_archive_entry_fn_t)(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, unsigned int mode, void *buffer, unsigned long size); +typedef int (*write_archive_entry_fn_t)(struct archiver_args *args, + const unsigned char *sha1, + const char *path, size_t pathlen, + unsigned int mode); extern int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry); extern int write_archive(int argc, const char **argv, const char *prefix, int setup_prefix, const char *name_hint, int remote); const char *archive_format_from_filename(const char *filename); +extern void *sha1_file_to_archive(const struct archiver_args *args, + const char *path, const unsigned char *sha1, + unsigned int mode, enum object_type *type, + unsigned long *sizep); #endif /* ARCHIVE_H */ diff --git a/streaming.c b/streaming.c index 7e7ee2be6f..3a3cd1206a 100644 --- a/streaming.c +++ b/streaming.c @@ -99,7 +99,7 @@ int close_istream(struct git_istream *st) return r; } -ssize_t read_istream(struct git_istream *st, char *buf, size_t sz) +ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) { return st->vtbl->read(st, buf, sz); } diff --git a/streaming.h b/streaming.h index 3e827709c8..1d05c2a465 100644 --- a/streaming.h +++ b/streaming.h @@ -10,7 +10,7 @@ struct git_istream; extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *); extern int close_istream(struct git_istream *); -extern ssize_t read_istream(struct git_istream *, char *, size_t); +extern ssize_t read_istream(struct git_istream *, void *, size_t); extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek); diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 4d127f19b7..55ed955cef 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -134,4 +134,16 @@ test_expect_success 'repack' ' git repack -ad ' +test_expect_success 'tar achiving' ' + git archive --format=tar HEAD >/dev/null +' + +test_expect_success 'zip achiving, store only' ' + git archive --format=zip -0 HEAD >/dev/null +' + +test_expect_success 'zip achiving, deflate' ' + git archive --format=zip HEAD >/dev/null +' + test_done diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 527c9e7548..ecf00edab2 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -31,6 +31,26 @@ GUNZIP=${GUNZIP:-gzip -d} SUBSTFORMAT=%H%n +check_zip() { + zipfile=$1.zip + listfile=$1.lst + dir=$1 + dir_with_prefix=$dir/$2 + + test_expect_success UNZIP " extract ZIP archive" " + (mkdir $dir && cd $dir && $UNZIP ../$zipfile) + " + + test_expect_success UNZIP " validate filenames" " + (cd ${dir_with_prefix}a && find .) | sort >$listfile && + test_cmp a.lst $listfile + " + + test_expect_success UNZIP " validate file contents" " + diff -r a ${dir_with_prefix}a + " +} + test_expect_success \ 'populate workdir' \ 'mkdir a b c && @@ -84,6 +104,12 @@ test_expect_success \ 'git archive vs. git tar-tree' \ 'test_cmp b.tar b2.tar' +test_expect_success 'git archive on large files' ' + test_config core.bigfilethreshold 1 && + git archive HEAD >b3.tar && + test_cmp b.tar b3.tar +' + test_expect_success \ 'git archive in a bare repo' \ '(cd bare.git && git archive HEAD) >b3.tar' @@ -175,10 +201,19 @@ test_expect_success \ test_cmp a/substfile2 g/prefix/a/substfile2 ' +$UNZIP -v >/dev/null 2>&1 +if [ $? -eq 127 ]; then + say "Skipping ZIP tests, because unzip was not found" +else + test_set_prereq UNZIP +fi + test_expect_success \ 'git archive --format=zip' \ 'git archive --format=zip HEAD >d.zip' +check_zip d + test_expect_success \ 'git archive --format=zip in a bare repo' \ '(cd bare.git && git archive --format=zip HEAD) >d1.zip' @@ -201,42 +236,25 @@ test_expect_success 'git archive with --output, override inferred format' ' test_cmp b.tar d4.zip ' -$UNZIP -v >/dev/null 2>&1 -if [ $? -eq 127 ]; then - say "Skipping ZIP tests, because unzip was not found" -else - test_set_prereq UNZIP -fi - -test_expect_success UNZIP \ - 'extract ZIP archive' \ - '(mkdir d && cd d && $UNZIP ../d.zip)' - -test_expect_success UNZIP \ - 'validate filenames' \ - '(cd d/a && find .) | sort >d.lst && - test_cmp a.lst d.lst' - -test_expect_success UNZIP \ - 'validate file contents' \ - 'diff -r a d/a' - test_expect_success \ 'git archive --format=zip with prefix' \ 'git archive --format=zip --prefix=prefix/ HEAD >e.zip' -test_expect_success UNZIP \ - 'extract ZIP archive with prefix' \ - '(mkdir e && cd e && $UNZIP ../e.zip)' +check_zip e prefix/ -test_expect_success UNZIP \ - 'validate filenames with prefix' \ - '(cd e/prefix/a && find .) | sort >e.lst && - test_cmp a.lst e.lst' +test_expect_success 'git archive -0 --format=zip on large files' ' + test_config core.bigfilethreshold 1 && + git archive -0 --format=zip HEAD >large.zip +' -test_expect_success UNZIP \ - 'validate file contents with prefix' \ - 'diff -r a e/prefix/a' +check_zip large + +test_expect_success 'git archive --format=zip on large files' ' + test_config core.bigfilethreshold 1 && + git archive --format=zip HEAD >large-compressed.zip +' + +check_zip large-compressed test_expect_success \ 'git archive --list outside of a git repo' \ |