summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaylor Blau <me@ttaylorr.com>2021-01-25 18:37:18 -0500
committerJunio C Hamano <gitster@pobox.com>2021-01-25 18:32:43 -0800
commit8ef50d9958f7be21e38026433d30f72521b4de47 (patch)
treeb653d9be420dde954169b0af92ebf331ff607deb
parent2f4ba2a867f0390f139b622dbafcab766cb88e80 (diff)
downloadgit-8ef50d9958f7be21e38026433d30f72521b4de47.tar.gz
pack-write.c: prepare to write 'pack-*.rev' files
This patch prepares for callers to be able to write reverse index files to disk. It adds the necessary machinery to write a format-compliant .rev file from within 'write_rev_file()', which is called from 'finish_tmp_packfile()'. Similar to the process by which the reverse index is computed in memory, these new paths also have to sort a list of objects by their offsets within a packfile. These new paths use a qsort() (as opposed to a radix sort), since our specialized radix sort requires a full revindex_entry struct per object, which is more memory than we need to allocate. The qsort is obviously slower, but the theoretical slowdown would require a repository with a large amount of objects, likely implying that the time spent in, say, pack-objects during a repack would dominate the overall runtime. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--pack-write.c120
-rw-r--r--pack.h4
2 files changed, 123 insertions, 1 deletions
diff --git a/pack-write.c b/pack-write.c
index e9bb3fd949..680c36755d 100644
--- a/pack-write.c
+++ b/pack-write.c
@@ -167,6 +167,113 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
return index_name;
}
+static int pack_order_cmp(const void *va, const void *vb, void *ctx)
+{
+ struct pack_idx_entry **objects = ctx;
+
+ off_t oa = objects[*(uint32_t*)va]->offset;
+ off_t ob = objects[*(uint32_t*)vb]->offset;
+
+ if (oa < ob)
+ return -1;
+ if (oa > ob)
+ return 1;
+ return 0;
+}
+
+static void write_rev_header(struct hashfile *f)
+{
+ uint32_t oid_version;
+ switch (hash_algo_by_ptr(the_hash_algo)) {
+ case GIT_HASH_SHA1:
+ oid_version = 1;
+ break;
+ case GIT_HASH_SHA256:
+ oid_version = 2;
+ break;
+ default:
+ die("write_rev_header: unknown hash version");
+ }
+
+ hashwrite_be32(f, RIDX_SIGNATURE);
+ hashwrite_be32(f, RIDX_VERSION);
+ hashwrite_be32(f, oid_version);
+}
+
+static void write_rev_index_positions(struct hashfile *f,
+ struct pack_idx_entry **objects,
+ uint32_t nr_objects)
+{
+ uint32_t *pack_order;
+ uint32_t i;
+
+ ALLOC_ARRAY(pack_order, nr_objects);
+ for (i = 0; i < nr_objects; i++)
+ pack_order[i] = i;
+ QSORT_S(pack_order, nr_objects, pack_order_cmp, objects);
+
+ for (i = 0; i < nr_objects; i++)
+ hashwrite_be32(f, pack_order[i]);
+
+ free(pack_order);
+}
+
+static void write_rev_trailer(struct hashfile *f, const unsigned char *hash)
+{
+ hashwrite(f, hash, the_hash_algo->rawsz);
+}
+
+const char *write_rev_file(const char *rev_name,
+ struct pack_idx_entry **objects,
+ uint32_t nr_objects,
+ const unsigned char *hash,
+ unsigned flags)
+{
+ struct hashfile *f;
+ int fd;
+
+ if ((flags & WRITE_REV) && (flags & WRITE_REV_VERIFY))
+ die(_("cannot both write and verify reverse index"));
+
+ if (flags & WRITE_REV) {
+ if (!rev_name) {
+ struct strbuf tmp_file = STRBUF_INIT;
+ fd = odb_mkstemp(&tmp_file, "pack/tmp_rev_XXXXXX");
+ rev_name = strbuf_detach(&tmp_file, NULL);
+ } else {
+ unlink(rev_name);
+ fd = open(rev_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
+ if (fd < 0)
+ die_errno("unable to create '%s'", rev_name);
+ }
+ f = hashfd(fd, rev_name);
+ } else if (flags & WRITE_REV_VERIFY) {
+ struct stat statbuf;
+ if (stat(rev_name, &statbuf)) {
+ if (errno == ENOENT) {
+ /* .rev files are optional */
+ return NULL;
+ } else
+ die_errno(_("could not stat: %s"), rev_name);
+ }
+ f = hashfd_check(rev_name);
+ } else
+ return NULL;
+
+ write_rev_header(f);
+
+ write_rev_index_positions(f, objects, nr_objects);
+ write_rev_trailer(f, hash);
+
+ if (rev_name && adjust_shared_perm(rev_name) < 0)
+ die(_("failed to make %s readable"), rev_name);
+
+ finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
+ ((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
+
+ return rev_name;
+}
+
off_t write_pack_header(struct hashfile *f, uint32_t nr_entries)
{
struct pack_header hdr;
@@ -342,7 +449,7 @@ void finish_tmp_packfile(struct strbuf *name_buffer,
struct pack_idx_option *pack_idx_opts,
unsigned char hash[])
{
- const char *idx_tmp_name;
+ const char *idx_tmp_name, *rev_tmp_name = NULL;
int basename_len = name_buffer->len;
if (adjust_shared_perm(pack_tmp_name))
@@ -353,6 +460,9 @@ void finish_tmp_packfile(struct strbuf *name_buffer,
if (adjust_shared_perm(idx_tmp_name))
die_errno("unable to make temporary index file readable");
+ rev_tmp_name = write_rev_file(NULL, written_list, nr_written, hash,
+ pack_idx_opts->flags);
+
strbuf_addf(name_buffer, "%s.pack", hash_to_hex(hash));
if (rename(pack_tmp_name, name_buffer->buf))
@@ -366,6 +476,14 @@ void finish_tmp_packfile(struct strbuf *name_buffer,
strbuf_setlen(name_buffer, basename_len);
+ if (rev_tmp_name) {
+ strbuf_addf(name_buffer, "%s.rev", hash_to_hex(hash));
+ if (rename(rev_tmp_name, name_buffer->buf))
+ die_errno("unable to rename temporary reverse-index file");
+ }
+
+ strbuf_setlen(name_buffer, basename_len);
+
free((void *)idx_tmp_name);
}
diff --git a/pack.h b/pack.h
index 9ae640f417..afdcf8f5c7 100644
--- a/pack.h
+++ b/pack.h
@@ -42,6 +42,8 @@ struct pack_idx_option {
/* flag bits */
#define WRITE_IDX_VERIFY 01 /* verify only, do not write the idx file */
#define WRITE_IDX_STRICT 02
+#define WRITE_REV 04
+#define WRITE_REV_VERIFY 010
uint32_t version;
uint32_t off32_limit;
@@ -91,6 +93,8 @@ struct ref;
void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought);
+const char *write_rev_file(const char *rev_name, struct pack_idx_entry **objects, uint32_t nr_objects, const unsigned char *hash, unsigned flags);
+
/*
* The "hdr" output buffer should be at least this big, which will handle sizes
* up to 2^67.