summaryrefslogtreecommitdiff
path: root/refs/packed-format-v2.c
diff options
context:
space:
mode:
Diffstat (limited to 'refs/packed-format-v2.c')
-rw-r--r--refs/packed-format-v2.c624
1 files changed, 624 insertions, 0 deletions
diff --git a/refs/packed-format-v2.c b/refs/packed-format-v2.c
new file mode 100644
index 0000000000..ada34bf9bf
--- /dev/null
+++ b/refs/packed-format-v2.c
@@ -0,0 +1,624 @@
+#include "../cache.h"
+#include "../config.h"
+#include "../refs.h"
+#include "refs-internal.h"
+#include "packed-backend.h"
+#include "../iterator.h"
+#include "../lockfile.h"
+#include "../chdir-notify.h"
+#include "../chunk-format.h"
+#include "../csum-file.h"
+
+#define OFFSET_IS_PEELED (((uint64_t)1) << 63)
+
+#define PACKED_REFS_SIGNATURE 0x50524546 /* "PREF" */
+#define CHREFS_CHUNKID_OFFSETS 0x524F4646 /* "ROFF" */
+#define CHREFS_CHUNKID_REFS 0x52454653 /* "REFS" */
+#define CHREFS_CHUNKID_PREFIX_DATA 0x50465844 /* "PFXD" */
+#define CHREFS_CHUNKID_PREFIX_OFFSETS 0x5046584F /* "PFXO" */
+
+static const char *get_nth_prefix(struct snapshot *snapshot,
+ size_t n, size_t *len)
+{
+ uint64_t offset, next_offset;
+
+ if (n >= snapshot->prefixes_nr)
+ BUG("asking for prefix %"PRIu64" outside of bounds (%"PRIu64")",
+ (uint64_t)n, (uint64_t)snapshot->prefixes_nr);
+
+ if (n)
+ offset = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * (n - 1));
+ else
+ offset = 0;
+
+ if (len) {
+ next_offset = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * n);
+
+ /* Prefix includes null terminator. */
+ *len = next_offset - offset - 1;
+ }
+
+ return snapshot->prefix_chunk + offset;
+}
+
+/*
+ * Find the place in `snapshot->buf` where the start of the record for
+ * `refname` starts. If `mustexist` is true and the reference doesn't
+ * exist, then return NULL. If `mustexist` is false and the reference
+ * doesn't exist, then return the point where that reference would be
+ * inserted, or `snapshot->eof` (which might be NULL) if it would be
+ * inserted at the end of the file. In the latter mode, `refname`
+ * doesn't have to be a proper reference name; for example, one could
+ * search for "refs/replace/" to find the start of any replace
+ * references.
+ *
+ * The record is sought using a binary search, so `snapshot->buf` must
+ * be sorted.
+ */
+static const char *find_prefix_location(struct snapshot *snapshot,
+ const char *refname, size_t *pos)
+{
+ size_t lo = 0, hi = snapshot->prefixes_nr;
+
+ while (lo != hi) {
+ const char *rec;
+ int cmp;
+ size_t len;
+ size_t mid = lo + (hi - lo) / 2;
+
+ rec = get_nth_prefix(snapshot, mid, &len);
+ cmp = strncmp(rec, refname, len);
+ if (cmp < 0) {
+ lo = mid + 1;
+ } else if (cmp > 0) {
+ hi = mid;
+ } else {
+ /* we have a prefix match! */
+ *pos = mid;
+ return rec;
+ }
+ }
+
+ *pos = lo;
+ if (lo < snapshot->prefixes_nr)
+ return get_nth_prefix(snapshot, lo, NULL);
+ else
+ return NULL;
+}
+
+int detect_packed_format_v2_header(struct packed_ref_store *refs,
+ struct snapshot *snapshot)
+{
+ /*
+ * packed-refs v1 might not have a header, so check instead
+ * that the v2 signature is not present.
+ */
+ return get_be32(snapshot->buf) == PACKED_REFS_SIGNATURE;
+}
+
+static const char *get_nth_ref(struct snapshot *snapshot,
+ size_t n)
+{
+ uint64_t offset;
+
+ if (n >= snapshot->nr)
+ BUG("asking for position %"PRIu64" outside of bounds (%"PRIu64")",
+ (uint64_t)n, (uint64_t)snapshot->nr);
+
+ if (n)
+ offset = get_be64(snapshot->offset_chunk + (n-1) * sizeof(uint64_t))
+ & ~OFFSET_IS_PEELED;
+ else
+ offset = 0;
+
+ return snapshot->refs_chunk + offset;
+}
+
+/*
+ * Find the place in `snapshot->buf` where the start of the record for
+ * `refname` starts. If `mustexist` is true and the reference doesn't
+ * exist, then return NULL. If `mustexist` is false and the reference
+ * doesn't exist, then return the point where that reference would be
+ * inserted, or `snapshot->eof` (which might be NULL) if it would be
+ * inserted at the end of the file. In the latter mode, `refname`
+ * doesn't have to be a proper reference name; for example, one could
+ * search for "refs/replace/" to find the start of any replace
+ * references.
+ *
+ * The record is sought using a binary search, so `snapshot->buf` must
+ * be sorted.
+ */
+const char *find_reference_location_v2(struct snapshot *snapshot,
+ const char *refname, int mustexist,
+ size_t *pos)
+{
+ size_t lo = 0, hi = snapshot->nr;
+
+ if (snapshot->prefix_chunk) {
+ size_t prefix_row;
+ const char *prefix;
+ int found = 1;
+
+ prefix = find_prefix_location(snapshot, refname, &prefix_row);
+
+ if (!prefix || !starts_with(refname, prefix)) {
+ if (mustexist)
+ return NULL;
+ found = 0;
+ }
+
+ /* The second 4-byte column of the prefix offsets */
+ if (prefix_row) {
+ /* if prefix_row == 0, then lo = 0, which is already true. */
+ lo = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * (prefix_row - 1) + sizeof(uint32_t));
+ }
+
+ if (!found) {
+ const char *ret;
+ /* Terminate early with this lo position as the insertion point. */
+ if (pos)
+ *pos = lo;
+
+ if (lo >= snapshot->nr)
+ return NULL;
+
+ ret = get_nth_ref(snapshot, lo);
+ return ret;
+ }
+
+ hi = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * prefix_row + sizeof(uint32_t));
+
+ if (prefix)
+ refname += strlen(prefix);
+ }
+
+ while (lo != hi) {
+ const char *rec;
+ int cmp;
+ size_t mid = lo + (hi - lo) / 2;
+
+ rec = get_nth_ref(snapshot, mid);
+ cmp = strcmp(rec, refname);
+ if (cmp < 0) {
+ lo = mid + 1;
+ } else if (cmp > 0) {
+ hi = mid;
+ } else {
+ if (pos)
+ *pos = mid;
+ return rec;
+ }
+ }
+
+ if (mustexist) {
+ return NULL;
+ } else {
+ const char *ret;
+ /*
+ * We are likely doing a prefix match, so use the current
+ * 'lo' position as the indicator.
+ */
+ if (pos)
+ *pos = lo;
+ if (lo >= snapshot->nr)
+ return NULL;
+
+ ret = get_nth_ref(snapshot, lo);
+ return ret;
+ }
+}
+
+int packed_read_raw_ref_v2(struct packed_ref_store *refs, struct snapshot *snapshot,
+ const char *refname, struct object_id *oid,
+ unsigned int *type, int *failure_errno)
+{
+ const char *rec;
+
+ *type = 0;
+
+ rec = find_reference_location_v2(snapshot, refname, 1, NULL);
+
+ if (!rec) {
+ /* refname is not a packed reference. */
+ *failure_errno = ENOENT;
+ return -1;
+ }
+
+ hashcpy(oid->hash, (const unsigned char *)rec + strlen(rec) + 1);
+ oid->algo = hash_algo_by_ptr(the_hash_algo);
+
+ *type = REF_ISPACKED;
+ return 0;
+}
+
+static int packed_refs_read_offsets(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct snapshot *snapshot = data;
+
+ snapshot->offset_chunk = chunk_start;
+ snapshot->nr = chunk_size / sizeof(uint64_t);
+ return 0;
+}
+
+static int packed_refs_read_prefix_offsets(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct snapshot *snapshot = data;
+
+ snapshot->prefix_offsets_chunk = chunk_start;
+ snapshot->prefixes_nr = chunk_size / sizeof(uint64_t);
+ return 0;
+}
+
+void fill_snapshot_v2(struct snapshot *snapshot)
+{
+ uint32_t file_signature, file_version, hash_version;
+ struct chunkfile *cf;
+
+ file_signature = get_be32(snapshot->buf);
+ if (file_signature != PACKED_REFS_SIGNATURE)
+ die(_("%s file signature %X does not match signature %X"),
+ "packed-ref", file_signature, PACKED_REFS_SIGNATURE);
+
+ file_version = get_be32(snapshot->buf + sizeof(uint32_t));
+ if (file_version != 2)
+ die(_("format version %u does not match expected file version %u"),
+ file_version, 2);
+
+ hash_version = get_be32(snapshot->buf + 2 * sizeof(uint32_t));
+ if (hash_version != the_hash_algo->format_id)
+ die(_("hash version %X does not match expected hash version %X"),
+ hash_version, the_hash_algo->format_id);
+
+ cf = init_chunkfile(NULL);
+
+ if (read_trailing_table_of_contents(cf, (const unsigned char *)snapshot->buf, snapshot->buflen)) {
+ release_snapshot(snapshot);
+ snapshot = NULL;
+ goto cleanup;
+ }
+
+ read_chunk(cf, CHREFS_CHUNKID_OFFSETS, packed_refs_read_offsets, snapshot);
+ pair_chunk(cf, CHREFS_CHUNKID_REFS, (const unsigned char**)&snapshot->refs_chunk);
+
+ read_chunk(cf, CHREFS_CHUNKID_PREFIX_OFFSETS, packed_refs_read_prefix_offsets, snapshot);
+ pair_chunk(cf, CHREFS_CHUNKID_PREFIX_DATA, (const unsigned char**)&snapshot->prefix_chunk);
+
+ /* TODO: add error checks for invalid chunk combinations. */
+
+cleanup:
+ free_chunkfile(cf);
+}
+
+/*
+ * Move the iterator to the next record in the snapshot, without
+ * respect for whether the record is actually required by the current
+ * iteration. Adjust the fields in `iter` and return `ITER_OK` or
+ * `ITER_DONE`. This function does not free the iterator in the case
+ * of `ITER_DONE`.
+ */
+int next_record_v2(struct packed_ref_iterator *iter)
+{
+ uint64_t offset;
+ const char *pos = iter->pos;
+ strbuf_reset(&iter->refname_buf);
+
+ if (iter->row == iter->snapshot->nr)
+ return ITER_DONE;
+
+ iter->base.flags = REF_ISPACKED;
+
+ if (iter->cur_prefix)
+ strbuf_addstr(&iter->refname_buf, iter->cur_prefix);
+ strbuf_addstr(&iter->refname_buf, pos);
+ iter->base.refname = iter->refname_buf.buf;
+ pos += strlen(pos) + 1;
+
+ hashcpy(iter->oid.hash, (const unsigned char *)pos);
+ iter->oid.algo = hash_algo_by_ptr(the_hash_algo);
+ pos += the_hash_algo->rawsz;
+
+ if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) {
+ if (!refname_is_safe(iter->base.refname))
+ die("packed refname is dangerous: %s",
+ iter->base.refname);
+ oidclr(&iter->oid);
+ iter->base.flags |= REF_BAD_NAME | REF_ISBROKEN;
+ }
+
+ /* We always know the peeled value! */
+ iter->base.flags |= REF_KNOWS_PEELED;
+
+ offset = get_be64(iter->snapshot->offset_chunk + sizeof(uint64_t) * iter->row);
+ if (offset & OFFSET_IS_PEELED) {
+ hashcpy(iter->peeled.hash, (const unsigned char *)pos);
+ iter->peeled.algo = hash_algo_by_ptr(the_hash_algo);
+ } else {
+ oidclr(&iter->peeled);
+ }
+
+ /* TODO: somehow all tags are getting OFFSET_IS_PEELED even though
+ * some are not annotated tags.
+ */
+ iter->pos = iter->snapshot->refs_chunk + (offset & (~OFFSET_IS_PEELED));
+
+ iter->row++;
+
+ if (iter->row == iter->prefix_row_end && iter->snapshot->prefix_chunk) {
+ size_t prefix_pos = get_be32(iter->snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * iter->prefix_i);
+ iter->cur_prefix = iter->snapshot->prefix_chunk + prefix_pos;
+ iter->prefix_i++;
+ iter->prefix_row_end = get_be32(iter->snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * iter->prefix_i + sizeof(uint32_t));
+ }
+
+ return ITER_OK;
+}
+
+void init_iterator_prefix_info(const char *prefix,
+ struct packed_ref_iterator *iter)
+{
+ struct snapshot *snapshot = iter->snapshot;
+
+ if (snapshot->version != 2 || !snapshot->prefix_chunk) {
+ iter->prefix_row_end = snapshot->nr;
+ return;
+ }
+
+ if (prefix)
+ iter->cur_prefix = find_prefix_location(snapshot, prefix, &iter->prefix_i);
+ else {
+ iter->cur_prefix = snapshot->prefix_chunk;
+ iter->prefix_i = 0;
+ }
+
+ iter->prefix_row_end = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * iter->prefix_i +
+ sizeof(uint32_t));
+}
+
+struct write_packed_refs_v2_context {
+ struct packed_ref_store *refs;
+ struct string_list *updates;
+ struct strbuf *err;
+
+ struct hashfile *f;
+ struct chunkfile *cf;
+
+ /*
+ * As we stream the ref names to the refs chunk, store these
+ * values in-memory. These arrays are populated one for every ref.
+ */
+ uint64_t *offsets;
+ size_t nr;
+ size_t offsets_alloc;
+
+ int write_prefixes;
+ const char *cur_prefix;
+ size_t cur_prefix_len;
+
+ char **prefixes;
+ uint32_t *prefix_offsets;
+ uint32_t *prefix_rows;
+ size_t prefix_nr;
+ size_t prefixes_alloc;
+ size_t prefix_offsets_alloc;
+ size_t prefix_rows_alloc;
+};
+
+struct write_packed_refs_v2_context *create_v2_context(struct packed_ref_store *refs,
+ struct string_list *updates,
+ struct strbuf *err)
+{
+ struct write_packed_refs_v2_context *ctx;
+ int do_skip_hash;
+ CALLOC_ARRAY(ctx, 1);
+
+ ctx->refs = refs;
+ ctx->updates = updates;
+ ctx->err = err;
+
+ if (!fdopen_tempfile(refs->tempfile, "w")) {
+ strbuf_addf(err, "unable to fdopen packed-refs tempfile: %s",
+ strerror(errno));
+ return ctx;
+ }
+
+ ctx->f = hashfd(refs->tempfile->fd, refs->tempfile->filename.buf);
+
+ /* Default to true, so skip_hash if not set. */
+ if (git_config_get_maybe_bool("refs.hashpackedrefs", &do_skip_hash) ||
+ do_skip_hash)
+ ctx->f->skip_hash = 1;
+
+ ctx->cf = init_chunkfile(ctx->f);
+
+ return ctx;
+}
+
+static int write_packed_entry_v2(const char *refname,
+ const struct object_id *oid,
+ const struct object_id *peeled,
+ void *write_data)
+{
+ struct write_packed_refs_v2_context *ctx = write_data;
+ size_t reflen = strlen(refname) + 1;
+ size_t i = ctx->nr;
+
+ ALLOC_GROW(ctx->offsets, i + 1, ctx->offsets_alloc);
+
+ if (ctx->write_prefixes) {
+ if (ctx->cur_prefix && starts_with(refname, ctx->cur_prefix)) {
+ /* skip ahead! */
+ refname += ctx->cur_prefix_len;
+ reflen -= ctx->cur_prefix_len;
+ } else {
+ size_t len;
+ const char *slash, *slashslash = NULL;
+ if (ctx->prefix_nr) {
+ /* close out the old prefix. */
+ ctx->prefix_rows[ctx->prefix_nr - 1] = ctx->nr;
+ }
+
+ /* Find the new prefix. */
+ slash = strchr(refname, '/');
+ if (slash)
+ slashslash = strchr(slash + 1, '/');
+ /* If there are two slashes, use that. */
+ slash = slashslash ? slashslash : slash;
+ /*
+ * If there is at least one slash, use that,
+ * and include the slash in the string.
+ * Otherwise, use the end of the ref.
+ */
+ slash = slash ? slash + 1 : refname + strlen(refname);
+
+ len = slash - refname;
+ ALLOC_GROW(ctx->prefixes, ctx->prefix_nr + 1, ctx->prefixes_alloc);
+ ALLOC_GROW(ctx->prefix_offsets, ctx->prefix_nr + 1, ctx->prefix_offsets_alloc);
+ ALLOC_GROW(ctx->prefix_rows, ctx->prefix_nr + 1, ctx->prefix_rows_alloc);
+
+ if (ctx->prefix_nr)
+ ctx->prefix_offsets[ctx->prefix_nr] = ctx->prefix_offsets[ctx->prefix_nr - 1] + len + 1;
+ else
+ ctx->prefix_offsets[ctx->prefix_nr] = len + 1;
+
+ ctx->prefixes[ctx->prefix_nr] = xstrndup(refname, len);
+ ctx->cur_prefix = ctx->prefixes[ctx->prefix_nr];
+ ctx->prefix_nr++;
+
+ refname += len;
+ reflen -= len;
+ ctx->cur_prefix_len = len;
+ }
+
+ /* Update the last row continually. */
+ ctx->prefix_rows[ctx->prefix_nr - 1] = i + 1;
+ }
+
+
+ /* Write entire ref, including null terminator. */
+ hashwrite(ctx->f, refname, reflen);
+ hashwrite(ctx->f, oid->hash, the_hash_algo->rawsz);
+ if (peeled)
+ hashwrite(ctx->f, peeled->hash, the_hash_algo->rawsz);
+
+ if (i)
+ ctx->offsets[i] = (ctx->offsets[i - 1] & (~OFFSET_IS_PEELED));
+ else
+ ctx->offsets[i] = 0;
+ ctx->offsets[i] += reflen + the_hash_algo->rawsz;
+
+ if (peeled) {
+ ctx->offsets[i] += the_hash_algo->rawsz;
+ ctx->offsets[i] |= OFFSET_IS_PEELED;
+ }
+
+ ctx->nr++;
+ return 0;
+}
+
+static int write_refs_chunk_refs(struct hashfile *f,
+ void *data)
+{
+ struct write_packed_refs_v2_context *ctx = data;
+ int ok;
+
+ trace2_region_enter("refs", "refs-chunk", the_repository);
+ ok = merge_iterator_and_updates(ctx->refs, ctx->updates, ctx->err,
+ write_packed_entry_v2, ctx);
+ trace2_region_leave("refs", "refs-chunk", the_repository);
+
+ return ok != ITER_DONE;
+}
+
+static int write_refs_chunk_offsets(struct hashfile *f,
+ void *data)
+{
+ struct write_packed_refs_v2_context *ctx = data;
+ size_t i;
+
+ trace2_region_enter("refs", "offsets", the_repository);
+ for (i = 0; i < ctx->nr; i++)
+ hashwrite_be64(f, ctx->offsets[i]);
+
+ trace2_region_leave("refs", "offsets", the_repository);
+ return 0;
+}
+
+static int write_refs_chunk_prefix_data(struct hashfile *f,
+ void *data)
+{
+ struct write_packed_refs_v2_context *ctx = data;
+ size_t i;
+
+ trace2_region_enter("refs", "prefix-data", the_repository);
+ for (i = 0; i < ctx->prefix_nr; i++) {
+ size_t len = strlen(ctx->prefixes[i]) + 1;
+ hashwrite(f, ctx->prefixes[i], len);
+
+ /* TODO: assert the prefix lengths match the stored offsets? */
+ }
+
+ trace2_region_leave("refs", "prefix-data", the_repository);
+ return 0;
+}
+
+static int write_refs_chunk_prefix_offsets(struct hashfile *f,
+ void *data)
+{
+ struct write_packed_refs_v2_context *ctx = data;
+ size_t i;
+
+ trace2_region_enter("refs", "prefix-offsets", the_repository);
+ for (i = 0; i < ctx->prefix_nr; i++) {
+ hashwrite_be32(f, ctx->prefix_offsets[i]);
+ hashwrite_be32(f, ctx->prefix_rows[i]);
+ }
+
+ trace2_region_leave("refs", "prefix-offsets", the_repository);
+ return 0;
+}
+
+int write_packed_refs_v2(struct write_packed_refs_v2_context *ctx)
+{
+ unsigned char file_hash[GIT_MAX_RAWSZ];
+
+ ctx->write_prefixes = git_env_bool("GIT_TEST_WRITE_PACKED_REFS_PREFIXES", 1);
+
+ add_chunk(ctx->cf, CHREFS_CHUNKID_REFS, 0, write_refs_chunk_refs);
+ add_chunk(ctx->cf, CHREFS_CHUNKID_OFFSETS, 0, write_refs_chunk_offsets);
+
+ if (ctx->write_prefixes) {
+ add_chunk(ctx->cf, CHREFS_CHUNKID_PREFIX_DATA, 0, write_refs_chunk_prefix_data);
+ add_chunk(ctx->cf, CHREFS_CHUNKID_PREFIX_OFFSETS, 0, write_refs_chunk_prefix_offsets);
+ }
+
+ hashwrite_be32(ctx->f, PACKED_REFS_SIGNATURE);
+ hashwrite_be32(ctx->f, 2);
+ hashwrite_be32(ctx->f, the_hash_algo->format_id);
+
+ if (write_chunkfile(ctx->cf, CHUNKFILE_TRAILING_TOC, ctx))
+ goto failure;
+
+ finalize_hashfile(ctx->f, file_hash, FSYNC_COMPONENT_REFERENCE,
+ CSUM_HASH_IN_STREAM | CSUM_FSYNC);
+
+ return 0;
+
+failure:
+ return -1;
+}
+
+void free_v2_context(struct write_packed_refs_v2_context *ctx)
+{
+ if (ctx->cf)
+ free_chunkfile(ctx->cf);
+ free(ctx);
+}