summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>2013-09-09 20:57:54 +0700
committerNicolas Pitre <nico@fluxnic.net>2013-09-13 21:00:27 -0400
commite31f2ebc76225d36381108a40111363aca8ebadf (patch)
tree5cd90e54cc3fba1775b02558d814d12f91edb0a6
parentdf40f05971d3a59fad5c8176aa9d3499413caa45 (diff)
downloadgit-e31f2ebc76225d36381108a40111363aca8ebadf.tar.gz
pack v4: move packv4-create.c to libgit.a
git-packv4-create now becomes test-packv4. Code that will not be used by pack-objects.c is moved to test-packv4.c. It may be removed when the code transition to pack-objects completes. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
-rw-r--r--Makefile4
-rw-r--r--packv4-create.c481
-rw-r--r--packv4-create.h28
-rw-r--r--test-packv4.c477
4 files changed, 515 insertions, 475 deletions
diff --git a/Makefile b/Makefile
index 22fc276b61..af2e3e3c3b 100644
--- a/Makefile
+++ b/Makefile
@@ -550,7 +550,6 @@ PROGRAM_OBJS += shell.o
PROGRAM_OBJS += show-index.o
PROGRAM_OBJS += upload-pack.o
PROGRAM_OBJS += remote-testsvn.o
-PROGRAM_OBJS += packv4-create.o
# Binary suffix, set to .exe for Windows builds
X =
@@ -568,6 +567,7 @@ TEST_PROGRAMS_NEED_X += test-line-buffer
TEST_PROGRAMS_NEED_X += test-match-trees
TEST_PROGRAMS_NEED_X += test-mergesort
TEST_PROGRAMS_NEED_X += test-mktemp
+TEST_PROGRAMS_NEED_X += test-packv4
TEST_PROGRAMS_NEED_X += test-parse-options
TEST_PROGRAMS_NEED_X += test-path-utils
TEST_PROGRAMS_NEED_X += test-prio-queue
@@ -702,6 +702,7 @@ LIB_H += notes.h
LIB_H += object.h
LIB_H += pack-revindex.h
LIB_H += pack.h
+LIB_H += packv4-create.h
LIB_H += packv4-parse.h
LIB_H += parse-options.h
LIB_H += patch-ids.h
@@ -839,6 +840,7 @@ LIB_OBJS += object.o
LIB_OBJS += pack-check.o
LIB_OBJS += pack-revindex.o
LIB_OBJS += pack-write.o
+LIB_OBJS += packv4-create.o
LIB_OBJS += packv4-parse.o
LIB_OBJS += pager.o
LIB_OBJS += parse-options.o
diff --git a/packv4-create.c b/packv4-create.c
index 7a69c78964..83a63369cc 100644
--- a/packv4-create.c
+++ b/packv4-create.c
@@ -18,9 +18,9 @@
#include "packv4-create.h"
-static int pack_compression_seen;
-static int pack_compression_level = Z_DEFAULT_COMPRESSION;
-static int min_tree_copy = 1;
+int pack_compression_seen;
+int pack_compression_level = Z_DEFAULT_COMPRESSION;
+int min_tree_copy = 1;
struct data_entry {
unsigned offset;
@@ -139,7 +139,7 @@ static int cmp_dict_entries(const void *a_, const void *b_)
return diff;
}
-static void sort_dict_entries_by_hits(struct dict_table *t)
+void sort_dict_entries_by_hits(struct dict_table *t)
{
qsort(t->entry, t->nb_entries, sizeof(*t->entry), cmp_dict_entries);
t->hash_size = (t->nb_entries * 4 / 3) / 2;
@@ -208,7 +208,7 @@ int add_commit_dict_entries(struct dict_table *commit_ident_table,
return 0;
}
-static int add_tree_dict_entries(struct dict_table *tree_path_table,
+int add_tree_dict_entries(struct dict_table *tree_path_table,
void *buf, unsigned long size)
{
struct tree_desc desc;
@@ -224,7 +224,7 @@ static int add_tree_dict_entries(struct dict_table *tree_path_table,
return 0;
}
-void dump_dict_table(struct dict_table *t)
+static void dump_dict_table(struct dict_table *t)
{
int i;
@@ -241,7 +241,7 @@ void dump_dict_table(struct dict_table *t)
}
}
-static void dict_dump(struct packv4_tables *v4)
+void dict_dump(struct packv4_tables *v4)
{
dump_dict_table(v4->commit_ident_table);
dump_dict_table(v4->tree_path_table);
@@ -611,103 +611,6 @@ void *pv4_encode_tree(const struct packv4_tables *v4,
return buffer;
}
-static struct pack_idx_entry *get_packed_object_list(struct packed_git *p)
-{
- unsigned i, nr_objects = p->num_objects;
- struct pack_idx_entry *objects;
-
- objects = xmalloc((nr_objects + 1) * sizeof(*objects));
- objects[nr_objects].offset = p->pack_size - 20;
- for (i = 0; i < nr_objects; i++) {
- hashcpy(objects[i].sha1, nth_packed_object_sha1(p, i));
- objects[i].offset = nth_packed_object_offset(p, i);
- }
-
- return objects;
-}
-
-static int sort_by_offset(const void *e1, const void *e2)
-{
- const struct pack_idx_entry * const *entry1 = e1;
- const struct pack_idx_entry * const *entry2 = e2;
- if ((*entry1)->offset < (*entry2)->offset)
- return -1;
- if ((*entry1)->offset > (*entry2)->offset)
- return 1;
- return 0;
-}
-
-static struct pack_idx_entry **sort_objs_by_offset(struct pack_idx_entry *list,
- unsigned nr_objects)
-{
- unsigned i;
- struct pack_idx_entry **sorted;
-
- sorted = xmalloc((nr_objects + 1) * sizeof(*sorted));
- for (i = 0; i < nr_objects + 1; i++)
- sorted[i] = &list[i];
- qsort(sorted, nr_objects + 1, sizeof(*sorted), sort_by_offset);
-
- return sorted;
-}
-
-static int create_pack_dictionaries(struct packv4_tables *v4,
- struct packed_git *p,
- struct pack_idx_entry **obj_list)
-{
- struct progress *progress_state;
- unsigned int i;
-
- v4->commit_ident_table = create_dict_table();
- v4->tree_path_table = create_dict_table();
-
- progress_state = start_progress("Scanning objects", p->num_objects);
- for (i = 0; i < p->num_objects; i++) {
- struct pack_idx_entry *obj = obj_list[i];
- void *data;
- enum object_type type;
- unsigned long size;
- struct object_info oi = {};
- int (*add_dict_entries)(struct dict_table *, void *, unsigned long);
- struct dict_table *dict;
-
- display_progress(progress_state, i+1);
-
- oi.typep = &type;
- oi.sizep = &size;
- if (packed_object_info(p, obj->offset, &oi) < 0)
- die("cannot get type of %s from %s",
- sha1_to_hex(obj->sha1), p->pack_name);
-
- switch (type) {
- case OBJ_COMMIT:
- add_dict_entries = add_commit_dict_entries;
- dict = v4->commit_ident_table;
- break;
- case OBJ_TREE:
- add_dict_entries = add_tree_dict_entries;
- dict = v4->tree_path_table;
- break;
- default:
- continue;
- }
- data = unpack_entry(p, obj->offset, &type, &size);
- if (!data)
- die("cannot unpack %s from %s",
- sha1_to_hex(obj->sha1), p->pack_name);
- if (check_sha1_signature(obj->sha1, data, size, typename(type)))
- die("packed %s from %s is corrupt",
- sha1_to_hex(obj->sha1), p->pack_name);
- if (add_dict_entries(dict, data, size) < 0)
- die("can't process %s object %s",
- typename(type), sha1_to_hex(obj->sha1));
- free(data);
- }
-
- stop_progress(&progress_state);
- return 0;
-}
-
static unsigned long write_dict_table(struct sha1file *f, struct dict_table *t)
{
unsigned char buffer[1024];
@@ -757,28 +660,6 @@ static unsigned long write_dict_table(struct sha1file *f, struct dict_table *t)
return hdrlen + datalen;
}
-static struct sha1file * packv4_open(char *path)
-{
- int fd;
-
- fd = open(path, O_CREAT|O_EXCL|O_WRONLY, 0600);
- if (fd < 0)
- die_errno("unable to create '%s'", path);
- return sha1fd(fd, path);
-}
-
-static unsigned int packv4_write_header(struct sha1file *f, unsigned nr_objects)
-{
- struct pack_header hdr;
-
- hdr.hdr_signature = htonl(PACK_SIGNATURE);
- hdr.hdr_version = htonl(4);
- hdr.hdr_entries = htonl(nr_objects);
- sha1write(f, &hdr, sizeof(hdr));
-
- return sizeof(hdr);
-}
-
unsigned long packv4_write_tables(struct sha1file *f,
const struct packv4_tables *v4)
{
@@ -802,351 +683,3 @@ unsigned long packv4_write_tables(struct sha1file *f,
return written;
}
-
-static int write_object_header(struct sha1file *f, enum object_type type, unsigned long size)
-{
- unsigned char buf[16];
- uint64_t val;
- int len;
-
- /*
- * We really have only one kind of delta object.
- */
- if (type == OBJ_OFS_DELTA)
- type = OBJ_REF_DELTA;
-
- /*
- * We allocate 4 bits in the LSB for the object type which should
- * be good for quite a while, given that we effectively encodes
- * only 5 object types: commit, tree, blob, delta, tag.
- */
- val = size;
- if (MSB(val, 4))
- die("fixme: the code doesn't currently cope with big sizes");
- val <<= 4;
- val |= type;
- len = encode_varint(val, buf);
- sha1write(f, buf, len);
- return len;
-}
-
-static unsigned long copy_object_data(struct packv4_tables *v4,
- struct sha1file *f, struct packed_git *p,
- off_t offset)
-{
- struct pack_window *w_curs = NULL;
- struct revindex_entry *revidx;
- enum object_type type;
- unsigned long avail, size, datalen, written;
- int hdrlen, reflen, idx_nr;
- unsigned char *src, buf[24];
-
- revidx = find_pack_revindex(p, offset);
- idx_nr = revidx->nr;
- datalen = revidx[1].offset - offset;
-
- src = use_pack(p, &w_curs, offset, &avail);
- hdrlen = unpack_object_header_buffer(src, avail, &type, &size);
-
- written = write_object_header(f, type, size);
-
- if (type == OBJ_OFS_DELTA) {
- const unsigned char *cp = src + hdrlen;
- off_t base_offset = decode_varint(&cp);
- hdrlen = cp - src;
- base_offset = offset - base_offset;
- if (base_offset <= 0 || base_offset >= offset)
- die("delta offset out of bound");
- revidx = find_pack_revindex(p, base_offset);
- reflen = encode_sha1ref(v4,
- nth_packed_object_sha1(p, revidx->nr),
- buf);
- sha1write(f, buf, reflen);
- written += reflen;
- } else if (type == OBJ_REF_DELTA) {
- reflen = encode_sha1ref(v4, src + hdrlen, buf);
- hdrlen += 20;
- sha1write(f, buf, reflen);
- written += reflen;
- }
-
- if (p->index_version > 1 &&
- check_pack_crc(p, &w_curs, offset, datalen, idx_nr))
- die("bad CRC for object at offset %"PRIuMAX" in %s",
- (uintmax_t)offset, p->pack_name);
-
- offset += hdrlen;
- datalen -= hdrlen;
-
- while (datalen) {
- src = use_pack(p, &w_curs, offset, &avail);
- if (avail > datalen)
- avail = datalen;
- sha1write(f, src, avail);
- written += avail;
- offset += avail;
- datalen -= avail;
- }
- unuse_pack(&w_curs);
-
- return written;
-}
-
-static unsigned char *get_delta_base(struct packed_git *p, off_t offset,
- unsigned char *sha1_buf)
-{
- struct pack_window *w_curs = NULL;
- enum object_type type;
- unsigned long avail, size;
- int hdrlen;
- unsigned char *src;
- const unsigned char *base_sha1 = NULL; ;
-
- src = use_pack(p, &w_curs, offset, &avail);
- hdrlen = unpack_object_header_buffer(src, avail, &type, &size);
-
- if (type == OBJ_OFS_DELTA) {
- const unsigned char *cp = src + hdrlen;
- off_t base_offset = decode_varint(&cp);
- base_offset = offset - base_offset;
- if (base_offset <= 0 || base_offset >= offset) {
- error("delta offset out of bound");
- } else {
- struct revindex_entry *revidx;
- revidx = find_pack_revindex(p, base_offset);
- base_sha1 = nth_packed_object_sha1(p, revidx->nr);
- }
- } else if (type == OBJ_REF_DELTA) {
- base_sha1 = src + hdrlen;
- } else
- error("expected to get a delta but got a %s", typename(type));
-
- unuse_pack(&w_curs);
-
- if (!base_sha1)
- return NULL;
- hashcpy(sha1_buf, base_sha1);
- return sha1_buf;
-}
-
-static off_t packv4_write_object(struct packv4_tables *v4,
- struct sha1file *f, struct packed_git *p,
- struct pack_idx_entry *obj)
-{
- void *src, *result;
- struct object_info oi = {};
- enum object_type type, packed_type;
- unsigned long obj_size, buf_size;
- unsigned int hdrlen;
-
- oi.typep = &type;
- oi.sizep = &obj_size;
- packed_type = packed_object_info(p, obj->offset, &oi);
- if (packed_type < 0)
- die("cannot get type of %s from %s",
- sha1_to_hex(obj->sha1), p->pack_name);
-
- /* Some objects are copied without decompression */
- switch (type) {
- case OBJ_COMMIT:
- case OBJ_TREE:
- break;
- default:
- return copy_object_data(v4, f, p, obj->offset);
- }
-
- /* The rest is converted into their new format */
- src = unpack_entry(p, obj->offset, &type, &buf_size);
- if (!src || obj_size != buf_size)
- die("cannot unpack %s from %s",
- sha1_to_hex(obj->sha1), p->pack_name);
- if (check_sha1_signature(obj->sha1, src, buf_size, typename(type)))
- die("packed %s from %s is corrupt",
- sha1_to_hex(obj->sha1), p->pack_name);
-
- switch (type) {
- case OBJ_COMMIT:
- result = pv4_encode_commit(v4, src, &buf_size);
- break;
- case OBJ_TREE:
- if (packed_type != OBJ_TREE) {
- unsigned char sha1_buf[20], *ref_sha1;
- void *ref;
- enum object_type ref_type;
- unsigned long ref_size;
-
- ref_sha1 = get_delta_base(p, obj->offset, sha1_buf);
- if (!ref_sha1)
- die("unable to get delta base sha1 for %s",
- sha1_to_hex(obj->sha1));
- ref = read_sha1_file(ref_sha1, &ref_type, &ref_size);
- if (!ref || ref_type != OBJ_TREE)
- die("cannot obtain delta base for %s",
- sha1_to_hex(obj->sha1));
- result = pv4_encode_tree(v4, src, &buf_size,
- ref, ref_size, ref_sha1);
- free(ref);
- } else {
- result = pv4_encode_tree(v4, src, &buf_size,
- NULL, 0, NULL);
- }
- break;
- default:
- die("unexpected object type %d", type);
- }
- free(src);
- if (!result) {
- warning("can't convert %s object %s",
- typename(type), sha1_to_hex(obj->sha1));
- /* fall back to copy the object in its original form */
- return copy_object_data(v4, f, p, obj->offset);
- }
-
- /* Use bit 3 to indicate a special type encoding */
- type += 8;
- hdrlen = write_object_header(f, type, obj_size);
- sha1write(f, result, buf_size);
- free(result);
- return hdrlen + buf_size;
-}
-
-static char *normalize_pack_name(const char *path)
-{
- char buf[PATH_MAX];
- int len;
-
- len = strlcpy(buf, path, PATH_MAX);
- if (len >= PATH_MAX - 6)
- die("name too long: %s", path);
-
- /*
- * In addition to "foo.idx" we accept "foo.pack" and "foo";
- * normalize these forms to "foo.pack".
- */
- if (has_extension(buf, ".idx")) {
- strcpy(buf + len - 4, ".pack");
- len++;
- } else if (!has_extension(buf, ".pack")) {
- strcpy(buf + len, ".pack");
- len += 5;
- }
-
- return xstrdup(buf);
-}
-
-static struct packed_git *open_pack(const char *path)
-{
- char *packname = normalize_pack_name(path);
- int len = strlen(packname);
- struct packed_git *p;
-
- strcpy(packname + len - 5, ".idx");
- p = add_packed_git(packname, len - 1, 1);
- if (!p)
- die("packfile %s not found.", packname);
-
- install_packed_git(p);
- if (open_pack_index(p))
- die("packfile %s index not opened", p->pack_name);
-
- free(packname);
- return p;
-}
-
-static void process_one_pack(struct packv4_tables *v4, char *src_pack, char *dst_pack)
-{
- struct packed_git *p;
- struct sha1file *f;
- struct pack_idx_entry *objs, **p_objs;
- struct pack_idx_option idx_opts;
- unsigned i, nr_objects;
- off_t written = 0;
- char *packname;
- unsigned char pack_sha1[20];
- struct progress *progress_state;
-
- p = open_pack(src_pack);
- if (!p)
- die("unable to open source pack");
-
- nr_objects = p->num_objects;
- objs = get_packed_object_list(p);
- p_objs = sort_objs_by_offset(objs, nr_objects);
-
- v4->all_objs_nr = nr_objects;
- v4->all_objs = objs;
-
- create_pack_dictionaries(v4, p, p_objs);
- sort_dict_entries_by_hits(v4->commit_ident_table);
- sort_dict_entries_by_hits(v4->tree_path_table);
-
- packname = normalize_pack_name(dst_pack);
- f = packv4_open(packname);
- if (!f)
- die("unable to open destination pack");
- written += packv4_write_header(f, nr_objects);
- written += packv4_write_tables(f, v4);
-
- /* Let's write objects out, updating the object index list in place */
- progress_state = start_progress("Writing objects", nr_objects);
- for (i = 0; i < nr_objects; i++) {
- off_t obj_pos = written;
- struct pack_idx_entry *obj = p_objs[i];
- crc32_begin(f);
- written += packv4_write_object(v4, f, p, obj);
- obj->offset = obj_pos;
- obj->crc32 = crc32_end(f);
- display_progress(progress_state, i+1);
- }
- stop_progress(&progress_state);
-
- sha1close(f, pack_sha1, CSUM_CLOSE | CSUM_FSYNC);
-
- reset_pack_idx_option(&idx_opts);
- idx_opts.version = 3;
- strcpy(packname + strlen(packname) - 5, ".idx");
- write_idx_file(packname, p_objs, nr_objects, &idx_opts, pack_sha1);
-
- free(packname);
-}
-
-static int git_pack_config(const char *k, const char *v, void *cb)
-{
- if (!strcmp(k, "pack.compression")) {
- int level = git_config_int(k, v);
- if (level == -1)
- level = Z_DEFAULT_COMPRESSION;
- else if (level < 0 || level > Z_BEST_COMPRESSION)
- die("bad pack compression level %d", level);
- pack_compression_level = level;
- pack_compression_seen = 1;
- return 0;
- }
- return git_default_config(k, v, cb);
-}
-
-int main(int argc, char *argv[])
-{
- struct packv4_tables v4;
- char *src_pack, *dst_pack;
-
- if (argc == 3) {
- src_pack = argv[1];
- dst_pack = argv[2];
- } else if (argc == 4 && !prefixcmp(argv[1], "--min-tree-copy=")) {
- min_tree_copy = atoi(argv[1] + strlen("--min-tree-copy="));
- src_pack = argv[2];
- dst_pack = argv[3];
- } else {
- fprintf(stderr, "Usage: %s [--min-tree-copy=<n>] <src_packfile> <dst_packfile>\n", argv[0]);
- exit(1);
- }
-
- git_config(git_pack_config, NULL);
- if (!pack_compression_seen && core_compression_seen)
- pack_compression_level = core_compression_level;
- process_one_pack(&v4, src_pack, dst_pack);
- if (0)
- dict_dump(&v4);
- return 0;
-}
diff --git a/packv4-create.h b/packv4-create.h
index 0c8c77be92..ba4929a961 100644
--- a/packv4-create.h
+++ b/packv4-create.h
@@ -8,4 +8,32 @@ struct packv4_tables {
struct dict_table *tree_path_table;
};
+struct dict_table;
+struct sha1file;
+
+struct dict_table *create_dict_table(void);
+int dict_add_entry(struct dict_table *t, int val, const char *str, int str_len);
+void destroy_dict_table(struct dict_table *t);
+void dict_dump(struct packv4_tables *v4);
+
+int add_commit_dict_entries(struct dict_table *commit_ident_table,
+ void *buf, unsigned long size);
+int add_tree_dict_entries(struct dict_table *tree_path_table,
+ void *buf, unsigned long size);
+void sort_dict_entries_by_hits(struct dict_table *t);
+
+int encode_sha1ref(const struct packv4_tables *v4,
+ const unsigned char *sha1, unsigned char *buf);
+unsigned long packv4_write_tables(struct sha1file *f,
+ const struct packv4_tables *v4);
+void *pv4_encode_commit(const struct packv4_tables *v4,
+ void *buffer, unsigned long *sizep);
+void *pv4_encode_tree(const struct packv4_tables *v4,
+ void *_buffer, unsigned long *sizep,
+ void *delta, unsigned long delta_size,
+ const unsigned char *delta_sha1);
+
+void process_one_pack(struct packv4_tables *v4,
+ char *src_pack, char *dst_pack);
+
#endif
diff --git a/test-packv4.c b/test-packv4.c
new file mode 100644
index 0000000000..9379f50d78
--- /dev/null
+++ b/test-packv4.c
@@ -0,0 +1,477 @@
+#include "cache.h"
+#include "pack.h"
+#include "pack-revindex.h"
+#include "progress.h"
+#include "varint.h"
+#include "packv4-create.h"
+
+extern int pack_compression_seen;
+extern int pack_compression_level;
+extern int min_tree_copy;
+
+static struct pack_idx_entry *get_packed_object_list(struct packed_git *p)
+{
+ unsigned i, nr_objects = p->num_objects;
+ struct pack_idx_entry *objects;
+
+ objects = xmalloc((nr_objects + 1) * sizeof(*objects));
+ objects[nr_objects].offset = p->pack_size - 20;
+ for (i = 0; i < nr_objects; i++) {
+ hashcpy(objects[i].sha1, nth_packed_object_sha1(p, i));
+ objects[i].offset = nth_packed_object_offset(p, i);
+ }
+
+ return objects;
+}
+
+static int sort_by_offset(const void *e1, const void *e2)
+{
+ const struct pack_idx_entry * const *entry1 = e1;
+ const struct pack_idx_entry * const *entry2 = e2;
+ if ((*entry1)->offset < (*entry2)->offset)
+ return -1;
+ if ((*entry1)->offset > (*entry2)->offset)
+ return 1;
+ return 0;
+}
+
+static struct pack_idx_entry **sort_objs_by_offset(struct pack_idx_entry *list,
+ unsigned nr_objects)
+{
+ unsigned i;
+ struct pack_idx_entry **sorted;
+
+ sorted = xmalloc((nr_objects + 1) * sizeof(*sorted));
+ for (i = 0; i < nr_objects + 1; i++)
+ sorted[i] = &list[i];
+ qsort(sorted, nr_objects + 1, sizeof(*sorted), sort_by_offset);
+
+ return sorted;
+}
+
+static int create_pack_dictionaries(struct packv4_tables *v4,
+ struct packed_git *p,
+ struct pack_idx_entry **obj_list)
+{
+ struct progress *progress_state;
+ unsigned int i;
+
+ v4->commit_ident_table = create_dict_table();
+ v4->tree_path_table = create_dict_table();
+
+ progress_state = start_progress("Scanning objects", p->num_objects);
+ for (i = 0; i < p->num_objects; i++) {
+ struct pack_idx_entry *obj = obj_list[i];
+ void *data;
+ enum object_type type;
+ unsigned long size;
+ struct object_info oi = {};
+ int (*add_dict_entries)(struct dict_table *, void *, unsigned long);
+ struct dict_table *dict;
+
+ display_progress(progress_state, i+1);
+
+ oi.typep = &type;
+ oi.sizep = &size;
+ if (packed_object_info(p, obj->offset, &oi) < 0)
+ die("cannot get type of %s from %s",
+ sha1_to_hex(obj->sha1), p->pack_name);
+
+ switch (type) {
+ case OBJ_COMMIT:
+ add_dict_entries = add_commit_dict_entries;
+ dict = v4->commit_ident_table;
+ break;
+ case OBJ_TREE:
+ add_dict_entries = add_tree_dict_entries;
+ dict = v4->tree_path_table;
+ break;
+ default:
+ continue;
+ }
+ data = unpack_entry(p, obj->offset, &type, &size);
+ if (!data)
+ die("cannot unpack %s from %s",
+ sha1_to_hex(obj->sha1), p->pack_name);
+ if (check_sha1_signature(obj->sha1, data, size, typename(type)))
+ die("packed %s from %s is corrupt",
+ sha1_to_hex(obj->sha1), p->pack_name);
+ if (add_dict_entries(dict, data, size) < 0)
+ die("can't process %s object %s",
+ typename(type), sha1_to_hex(obj->sha1));
+ free(data);
+ }
+
+ stop_progress(&progress_state);
+ return 0;
+}
+
+static struct sha1file * packv4_open(char *path)
+{
+ int fd;
+
+ fd = open(path, O_CREAT|O_EXCL|O_WRONLY, 0600);
+ if (fd < 0)
+ die_errno("unable to create '%s'", path);
+ return sha1fd(fd, path);
+}
+
+static unsigned int packv4_write_header(struct sha1file *f, unsigned nr_objects)
+{
+ struct pack_header hdr;
+
+ hdr.hdr_signature = htonl(PACK_SIGNATURE);
+ hdr.hdr_version = htonl(4);
+ hdr.hdr_entries = htonl(nr_objects);
+ sha1write(f, &hdr, sizeof(hdr));
+
+ return sizeof(hdr);
+}
+
+static int write_object_header(struct sha1file *f, enum object_type type, unsigned long size)
+{
+ unsigned char buf[16];
+ uint64_t val;
+ int len;
+
+ /*
+ * We really have only one kind of delta object.
+ */
+ if (type == OBJ_OFS_DELTA)
+ type = OBJ_REF_DELTA;
+
+ /*
+ * We allocate 4 bits in the LSB for the object type which should
+ * be good for quite a while, given that we effectively encodes
+ * only 5 object types: commit, tree, blob, delta, tag.
+ */
+ val = size;
+ if (MSB(val, 4))
+ die("fixme: the code doesn't currently cope with big sizes");
+ val <<= 4;
+ val |= type;
+ len = encode_varint(val, buf);
+ sha1write(f, buf, len);
+ return len;
+}
+
+static unsigned long copy_object_data(struct packv4_tables *v4,
+ struct sha1file *f, struct packed_git *p,
+ off_t offset)
+{
+ struct pack_window *w_curs = NULL;
+ struct revindex_entry *revidx;
+ enum object_type type;
+ unsigned long avail, size, datalen, written;
+ int hdrlen, reflen, idx_nr;
+ unsigned char *src, buf[24];
+
+ revidx = find_pack_revindex(p, offset);
+ idx_nr = revidx->nr;
+ datalen = revidx[1].offset - offset;
+
+ src = use_pack(p, &w_curs, offset, &avail);
+ hdrlen = unpack_object_header_buffer(src, avail, &type, &size);
+
+ written = write_object_header(f, type, size);
+
+ if (type == OBJ_OFS_DELTA) {
+ const unsigned char *cp = src + hdrlen;
+ off_t base_offset = decode_varint(&cp);
+ hdrlen = cp - src;
+ base_offset = offset - base_offset;
+ if (base_offset <= 0 || base_offset >= offset)
+ die("delta offset out of bound");
+ revidx = find_pack_revindex(p, base_offset);
+ reflen = encode_sha1ref(v4,
+ nth_packed_object_sha1(p, revidx->nr),
+ buf);
+ sha1write(f, buf, reflen);
+ written += reflen;
+ } else if (type == OBJ_REF_DELTA) {
+ reflen = encode_sha1ref(v4, src + hdrlen, buf);
+ hdrlen += 20;
+ sha1write(f, buf, reflen);
+ written += reflen;
+ }
+
+ if (p->index_version > 1 &&
+ check_pack_crc(p, &w_curs, offset, datalen, idx_nr))
+ die("bad CRC for object at offset %"PRIuMAX" in %s",
+ (uintmax_t)offset, p->pack_name);
+
+ offset += hdrlen;
+ datalen -= hdrlen;
+
+ while (datalen) {
+ src = use_pack(p, &w_curs, offset, &avail);
+ if (avail > datalen)
+ avail = datalen;
+ sha1write(f, src, avail);
+ written += avail;
+ offset += avail;
+ datalen -= avail;
+ }
+ unuse_pack(&w_curs);
+
+ return written;
+}
+
+static unsigned char *get_delta_base(struct packed_git *p, off_t offset,
+ unsigned char *sha1_buf)
+{
+ struct pack_window *w_curs = NULL;
+ enum object_type type;
+ unsigned long avail, size;
+ int hdrlen;
+ unsigned char *src;
+ const unsigned char *base_sha1 = NULL; ;
+
+ src = use_pack(p, &w_curs, offset, &avail);
+ hdrlen = unpack_object_header_buffer(src, avail, &type, &size);
+
+ if (type == OBJ_OFS_DELTA) {
+ const unsigned char *cp = src + hdrlen;
+ off_t base_offset = decode_varint(&cp);
+ base_offset = offset - base_offset;
+ if (base_offset <= 0 || base_offset >= offset) {
+ error("delta offset out of bound");
+ } else {
+ struct revindex_entry *revidx;
+ revidx = find_pack_revindex(p, base_offset);
+ base_sha1 = nth_packed_object_sha1(p, revidx->nr);
+ }
+ } else if (type == OBJ_REF_DELTA) {
+ base_sha1 = src + hdrlen;
+ } else
+ error("expected to get a delta but got a %s", typename(type));
+
+ unuse_pack(&w_curs);
+
+ if (!base_sha1)
+ return NULL;
+ hashcpy(sha1_buf, base_sha1);
+ return sha1_buf;
+}
+
+static off_t packv4_write_object(struct packv4_tables *v4,
+ struct sha1file *f, struct packed_git *p,
+ struct pack_idx_entry *obj)
+{
+ void *src, *result;
+ struct object_info oi = {};
+ enum object_type type, packed_type;
+ unsigned long obj_size, buf_size;
+ unsigned int hdrlen;
+
+ oi.typep = &type;
+ oi.sizep = &obj_size;
+ packed_type = packed_object_info(p, obj->offset, &oi);
+ if (packed_type < 0)
+ die("cannot get type of %s from %s",
+ sha1_to_hex(obj->sha1), p->pack_name);
+
+ /* Some objects are copied without decompression */
+ switch (type) {
+ case OBJ_COMMIT:
+ case OBJ_TREE:
+ break;
+ default:
+ return copy_object_data(v4, f, p, obj->offset);
+ }
+
+ /* The rest is converted into their new format */
+ src = unpack_entry(p, obj->offset, &type, &buf_size);
+ if (!src || obj_size != buf_size)
+ die("cannot unpack %s from %s",
+ sha1_to_hex(obj->sha1), p->pack_name);
+ if (check_sha1_signature(obj->sha1, src, buf_size, typename(type)))
+ die("packed %s from %s is corrupt",
+ sha1_to_hex(obj->sha1), p->pack_name);
+
+ switch (type) {
+ case OBJ_COMMIT:
+ result = pv4_encode_commit(v4, src, &buf_size);
+ break;
+ case OBJ_TREE:
+ if (packed_type != OBJ_TREE) {
+ unsigned char sha1_buf[20], *ref_sha1;
+ void *ref;
+ enum object_type ref_type;
+ unsigned long ref_size;
+
+ ref_sha1 = get_delta_base(p, obj->offset, sha1_buf);
+ if (!ref_sha1)
+ die("unable to get delta base sha1 for %s",
+ sha1_to_hex(obj->sha1));
+ ref = read_sha1_file(ref_sha1, &ref_type, &ref_size);
+ if (!ref || ref_type != OBJ_TREE)
+ die("cannot obtain delta base for %s",
+ sha1_to_hex(obj->sha1));
+ result = pv4_encode_tree(v4, src, &buf_size,
+ ref, ref_size, ref_sha1);
+ free(ref);
+ } else {
+ result = pv4_encode_tree(v4, src, &buf_size,
+ NULL, 0, NULL);
+ }
+ break;
+ default:
+ die("unexpected object type %d", type);
+ }
+ free(src);
+ if (!result) {
+ warning("can't convert %s object %s",
+ typename(type), sha1_to_hex(obj->sha1));
+ /* fall back to copy the object in its original form */
+ return copy_object_data(v4, f, p, obj->offset);
+ }
+
+ /* Use bit 3 to indicate a special type encoding */
+ type += 8;
+ hdrlen = write_object_header(f, type, obj_size);
+ sha1write(f, result, buf_size);
+ free(result);
+ return hdrlen + buf_size;
+}
+
+static char *normalize_pack_name(const char *path)
+{
+ char buf[PATH_MAX];
+ int len;
+
+ len = strlcpy(buf, path, PATH_MAX);
+ if (len >= PATH_MAX - 6)
+ die("name too long: %s", path);
+
+ /*
+ * In addition to "foo.idx" we accept "foo.pack" and "foo";
+ * normalize these forms to "foo.pack".
+ */
+ if (has_extension(buf, ".idx")) {
+ strcpy(buf + len - 4, ".pack");
+ len++;
+ } else if (!has_extension(buf, ".pack")) {
+ strcpy(buf + len, ".pack");
+ len += 5;
+ }
+
+ return xstrdup(buf);
+}
+
+static struct packed_git *open_pack(const char *path)
+{
+ char *packname = normalize_pack_name(path);
+ int len = strlen(packname);
+ struct packed_git *p;
+
+ strcpy(packname + len - 5, ".idx");
+ p = add_packed_git(packname, len - 1, 1);
+ if (!p)
+ die("packfile %s not found.", packname);
+
+ install_packed_git(p);
+ if (open_pack_index(p))
+ die("packfile %s index not opened", p->pack_name);
+
+ free(packname);
+ return p;
+}
+
+void process_one_pack(struct packv4_tables *v4, char *src_pack, char *dst_pack)
+{
+ struct packed_git *p;
+ struct sha1file *f;
+ struct pack_idx_entry *objs, **p_objs;
+ struct pack_idx_option idx_opts;
+ unsigned i, nr_objects;
+ off_t written = 0;
+ char *packname;
+ unsigned char pack_sha1[20];
+ struct progress *progress_state;
+
+ p = open_pack(src_pack);
+ if (!p)
+ die("unable to open source pack");
+
+ nr_objects = p->num_objects;
+ objs = get_packed_object_list(p);
+ p_objs = sort_objs_by_offset(objs, nr_objects);
+
+ v4->all_objs = objs;
+ v4->all_objs_nr = nr_objects;
+
+ create_pack_dictionaries(v4, p, p_objs);
+ sort_dict_entries_by_hits(v4->commit_ident_table);
+ sort_dict_entries_by_hits(v4->tree_path_table);
+
+ packname = normalize_pack_name(dst_pack);
+ f = packv4_open(packname);
+ if (!f)
+ die("unable to open destination pack");
+ written += packv4_write_header(f, nr_objects);
+ written += packv4_write_tables(f, v4);
+
+ /* Let's write objects out, updating the object index list in place */
+ progress_state = start_progress("Writing objects", nr_objects);
+ for (i = 0; i < nr_objects; i++) {
+ off_t obj_pos = written;
+ struct pack_idx_entry *obj = p_objs[i];
+ crc32_begin(f);
+ written += packv4_write_object(v4, f, p, obj);
+ obj->offset = obj_pos;
+ obj->crc32 = crc32_end(f);
+ display_progress(progress_state, i+1);
+ }
+ stop_progress(&progress_state);
+
+ sha1close(f, pack_sha1, CSUM_CLOSE | CSUM_FSYNC);
+
+ reset_pack_idx_option(&idx_opts);
+ idx_opts.version = 3;
+ strcpy(packname + strlen(packname) - 5, ".idx");
+ write_idx_file(packname, p_objs, nr_objects, &idx_opts, pack_sha1);
+
+ free(packname);
+}
+
+static int git_pack_config(const char *k, const char *v, void *cb)
+{
+ if (!strcmp(k, "pack.compression")) {
+ int level = git_config_int(k, v);
+ if (level == -1)
+ level = Z_DEFAULT_COMPRESSION;
+ else if (level < 0 || level > Z_BEST_COMPRESSION)
+ die("bad pack compression level %d", level);
+ pack_compression_level = level;
+ pack_compression_seen = 1;
+ return 0;
+ }
+ return git_default_config(k, v, cb);
+}
+
+int main(int argc, char *argv[])
+{
+ struct packv4_tables v4;
+ char *src_pack, *dst_pack;
+
+ if (argc == 3) {
+ src_pack = argv[1];
+ dst_pack = argv[2];
+ } else if (argc == 4 && !prefixcmp(argv[1], "--min-tree-copy=")) {
+ min_tree_copy = atoi(argv[1] + strlen("--min-tree-copy="));
+ src_pack = argv[2];
+ dst_pack = argv[3];
+ } else {
+ fprintf(stderr, "Usage: %s [--min-tree-copy=<n>] <src_packfile> <dst_packfile>\n", argv[0]);
+ exit(1);
+ }
+
+ git_config(git_pack_config, NULL);
+ if (!pack_compression_seen && core_compression_seen)
+ pack_compression_level = core_compression_level;
+ process_one_pack(&v4, src_pack, dst_pack);
+ if (0)
+ dict_dump(&v4);
+ return 0;
+}