1 files changed, 456 insertions, 0 deletions
diff --git a/refs/packed-format-v1.c b/refs/packed-format-v1.c
new file mode 100644
index 0000000000..2d071567c0
--- /dev/null
+++ b/refs/packed-format-v1.c
@@ -0,0 +1,456 @@
+#include "../cache.h"
+#include "../config.h"
+#include "../refs.h"
+#include "refs-internal.h"
+#include "packed-backend.h"
+#include "../iterator.h"
+#include "../lockfile.h"
+#include "../chdir-notify.h"
+
+static NORETURN void die_unterminated_line(const char *path,
+					   const char *p, size_t len)
+{
+	if (len < 80)
+		die("unterminated line in %s: %.*s", path, (int)len, p);
+	else
+		die("unterminated line in %s: %.75s...", path, p);
+}
+
+static NORETURN void die_invalid_line(const char *path,
+				      const char *p, size_t len)
+{
+	const char *eol = memchr(p, '\n', len);
+
+	if (!eol)
+		die_unterminated_line(path, p, len);
+	else if (eol - p < 80)
+		die("unexpected line in %s: %.*s", path, (int)(eol - p), p);
+	else
+		die("unexpected line in %s: %.75s...", path, p);
+}
+
+struct snapshot_record {
+	const char *start;
+	size_t len;
+};
+
+static int cmp_packed_ref_records(const void *v1, const void *v2)
+{
+	const struct snapshot_record *e1 = v1, *e2 = v2;
+	const char *r1 = e1->start + the_hash_algo->hexsz + 1;
+	const char *r2 = e2->start + the_hash_algo->hexsz + 1;
+
+	while (1) {
+		if (*r1 == '\n')
+			return *r2 == '\n' ? 0 : -1;
+		if (*r1 != *r2) {
+			if (*r2 == '\n')
+				return 1;
+			else
+				return (unsigned char)*r1 < (unsigned char)*r2 ? -1 : +1;
+		}
+		r1++;
+		r2++;
+	}
+}
+
+/*
+ * Compare a snapshot record at `rec` to the specified NUL-terminated
+ * refname.
+ */
+static int cmp_record_to_refname(const char *rec, const char *refname)
+{
+	const char *r1 = rec + the_hash_algo->hexsz + 1;
+	const char *r2 = refname;
+
+	while (1) {
+		if (*r1 == '\n')
+			return *r2 ? -1 : 0;
+		if (!*r2)
+			return 1;
+		if (*r1 != *r2)
+			return (unsigned char)*r1 < (unsigned char)*r2 ? -1 : +1;
+		r1++;
+		r2++;
+	}
+}
+
+/*
+ * `snapshot->buf` is not known to be sorted. Check whether it is, and
+ * if not, sort it into new memory and munmap/free the old storage.
+ */
+void sort_snapshot_v1(struct snapshot *snapshot)
+{
+	struct snapshot_record *records = NULL;
+	size_t alloc = 0, nr = 0;
+	int sorted = 1;
+	const char *pos, *eof, *eol;
+	size_t len, i;
+	char *new_buffer, *dst;
+
+	pos = snapshot->start;
+	eof = snapshot->eof;
+
+	if (pos == eof)
+		return;
+
+	len = eof - pos;
+
+	/*
+	 * Initialize records based on a crude estimate of the number
+	 * of references in the file (we'll grow it below if needed):
+	 */
+	ALLOC_GROW(records, len / 80 + 20, alloc);
+
+	while (pos < eof) {
+		eol = memchr(pos, '\n', eof - pos);
+		if (!eol)
+			/* The safety check should prevent this. */
+			BUG("unterminated line found in packed-refs");
+		if (eol - pos < the_hash_algo->hexsz + 2)
+			die_invalid_line(snapshot->refs->path,
+					 pos, eof - pos);
+		eol++;
+		if (eol < eof && *eol == '^') {
+			/*
+			 * Keep any peeled line together with its
+			 * reference:
+			 */
+			const char *peeled_start = eol;
+
+			eol = memchr(peeled_start, '\n', eof - peeled_start);
+			if (!eol)
+				/* The safety check should prevent this. */
+				BUG("unterminated peeled line found in packed-refs");
+			eol++;
+		}
+
+		ALLOC_GROW(records, nr + 1, alloc);
+		records[nr].start = pos;
+		records[nr].len = eol - pos;
+		nr++;
+
+		if (sorted &&
+		    nr > 1 &&
+		    cmp_packed_ref_records(&records[nr - 2],
+					   &records[nr - 1]) >= 0)
+			sorted = 0;
+
+		pos = eol;
+	}
+
+	if (sorted)
+		goto cleanup;
+
+	/* We need to sort the memory. First we sort the records array: */
+	QSORT(records, nr, cmp_packed_ref_records);
+
+	/*
+	 * Allocate a new chunk of memory, and copy the old memory to
+	 * the new in the order indicated by `records` (not bothering
+	 * with the header line):
+	 */
+	new_buffer = xmalloc(len);
+	for (dst = new_buffer, i = 0; i < nr; i++) {
+		memcpy(dst, records[i].start, records[i].len);
+		dst += records[i].len;
+	}
+
+	/*
+	 * Now munmap the old buffer and use the sorted buffer in its
+	 * place:
+	 */
+	clear_snapshot_buffer(snapshot);
+	snapshot->buf = snapshot->start = new_buffer;
+	snapshot->eof = new_buffer + len;
+
+cleanup:
+	free(records);
+}
+
+/*
+ * Return a pointer to the start of the record that contains the
+ * character `*p` (which must be within the buffer). If no other
+ * record start is found, return `buf`.
+ */
+static const char *find_start_of_record(const char *buf, const char *p)
+{
+	while (p > buf && (p[-1] != '\n' || p[0] == '^'))
+		p--;
+	return p;
+}
+
+/*
+ * Return a pointer to the start of the record following the record
+ * that contains `*p`. If none is found before `end`, return `end`.
+ */
+static const char *find_end_of_record(const char *p, const char *end)
+{
+	while (++p < end && (p[-1] != '\n' || p[0] == '^'))
+		;
+	return p;
+}
+
+/*
+ * We want to be able to compare mmapped reference records quickly,
+ * without totally parsing them. We can do so because the records are
+ * LF-terminated, and the refname should start exactly (GIT_SHA1_HEXSZ
+ * + 1) bytes past the beginning of the record.
+ *
+ * But what if the `packed-refs` file contains garbage? We're willing
+ * to tolerate not detecting the problem, as long as we don't produce
+ * totally garbled output (we can't afford to check the integrity of
+ * the whole file during every Git invocation). But we do want to be
+ * sure that we never read past the end of the buffer in memory and
+ * perform an illegal memory access.
+ *
+ * Guarantee that minimum level of safety by verifying that the last
+ * record in the file is LF-terminated, and that it has at least
+ * (GIT_SHA1_HEXSZ + 1) characters before the LF. Die if either of
+ * these checks fails.
+ */
+void verify_buffer_safe_v1(struct snapshot *snapshot)
+{
+	const char *start = snapshot->start;
+	const char *eof = snapshot->eof;
+	const char *last_line;
+
+	if (start == eof)
+		return;
+
+	last_line = find_start_of_record(start, eof - 1);
+	if (*(eof - 1) != '\n' || eof - last_line < the_hash_algo->hexsz + 2)
+		die_invalid_line(snapshot->refs->path,
+				 last_line, eof - last_line);
+}
+
+/*
+ * Find the place in `snapshot->buf` where the start of the record for
+ * `refname` starts. If `mustexist` is true and the reference doesn't
+ * exist, then return NULL. If `mustexist` is false and the reference
+ * doesn't exist, then return the point where that reference would be
+ * inserted, or `snapshot->eof` (which might be NULL) if it would be
+ * inserted at the end of the file. In the latter mode, `refname`
+ * doesn't have to be a proper reference name; for example, one could
+ * search for "refs/replace/" to find the start of any replace
+ * references.
+ *
+ * The record is sought using a binary search, so `snapshot->buf` must
+ * be sorted.
+ */
+const char *find_reference_location_v1(struct snapshot *snapshot,
+				       const char *refname, int mustexist)
+{
+	/*
+	 * This is not *quite* a garden-variety binary search, because
+	 * the data we're searching is made up of records, and we
+	 * always need to find the beginning of a record to do a
+	 * comparison. A "record" here is one line for the reference
+	 * itself and zero or one peel lines that start with '^'. Our
+	 * loop invariant is described in the next two comments.
+	 */
+
+	/*
+	 * A pointer to the character at the start of a record whose
+	 * preceding records all have reference names that come
+	 * *before* `refname`.
+	 */
+	const char *lo = snapshot->start;
+
+	/*
+	 * A pointer to a the first character of a record whose
+	 * reference name comes *after* `refname`.
+	 */
+	const char *hi = snapshot->eof;
+
+	while (lo != hi) {
+		const char *mid, *rec;
+		int cmp;
+
+		mid = lo + (hi - lo) / 2;
+		rec = find_start_of_record(lo, mid);
+		cmp = cmp_record_to_refname(rec, refname);
+		if (cmp < 0) {
+			lo = find_end_of_record(mid, hi);
+		} else if (cmp > 0) {
+			hi = rec;
+		} else {
+			return rec;
+		}
+	}
+
+	if (mustexist)
+		return NULL;
+	else
+		return lo;
+}
+
+int parse_packed_format_v1_header(struct packed_ref_store *refs,
+				  struct snapshot *snapshot,
+				  int *sorted)
+{
+	*sorted = 0;
+	/* If the file has a header line, process it: */
+	if (snapshot->buf < snapshot->eof && *snapshot->buf == '#') {
+		char *tmp, *p, *eol;
+		struct string_list traits = STRING_LIST_INIT_NODUP;
+
+		eol = memchr(snapshot->buf, '\n',
+			     snapshot->eof - snapshot->buf);
+		if (!eol)
+			die_unterminated_line(refs->path,
+					      snapshot->buf,
+					      snapshot->eof - snapshot->buf);
+
+		tmp = xmemdupz(snapshot->buf, eol - snapshot->buf);
+
+		if (!skip_prefix(tmp, "# pack-refs with:", (const char **)&p))
+			die_invalid_line(refs->path,
+					 snapshot->buf,
+					 snapshot->eof - snapshot->buf);
+
+		string_list_split_in_place(&traits, p, ' ', -1);
+
+		if (unsorted_string_list_has_string(&traits, "fully-peeled"))
+			snapshot->peeled = PEELED_FULLY;
+		else if (unsorted_string_list_has_string(&traits, "peeled"))
+			snapshot->peeled = PEELED_TAGS;
+
+		*sorted = unsorted_string_list_has_string(&traits, "sorted");
+
+		/* perhaps other traits later as well */
+
+		/* The "+ 1" is for the LF character. */
+		snapshot->start = eol + 1;
+
+		string_list_clear(&traits, 0);
+		free(tmp);
+	}
+
+	return 0;
+}
+
+int packed_read_raw_ref_v1(struct packed_ref_store *refs, struct snapshot *snapshot,
+			   const char *refname, struct object_id *oid,
+			   unsigned int *type, int *failure_errno)
+{
+	const char *rec;
+
+	*type = 0;
+
+	rec = find_reference_location_v1(snapshot, refname, 1);
+
+	if (!rec) {
+		/* refname is not a packed reference. */
+		*failure_errno = ENOENT;
+		return -1;
+	}
+
+	if (get_oid_hex(rec, oid))
+		die_invalid_line(refs->path, rec, snapshot->eof - rec);
+
+	*type = REF_ISPACKED;
+	return 0;
+}
+
+int next_record_v1(struct packed_ref_iterator *iter)
+{
+	const char *p = iter->pos, *eol;
+
+	strbuf_reset(&iter->refname_buf);
+
+	if (iter->pos == iter->eof)
+		return ITER_DONE;
+
+	iter->base.flags = REF_ISPACKED;
+
+	if (iter->eof - p < the_hash_algo->hexsz + 2 ||
+	    parse_oid_hex(p, &iter->oid, &p) ||
+	    !isspace(*p++))
+		die_invalid_line(iter->snapshot->refs->path,
+				 iter->pos, iter->eof - iter->pos);
+
+	eol = memchr(p, '\n', iter->eof - p);
+	if (!eol)
+		die_unterminated_line(iter->snapshot->refs->path,
+				      iter->pos, iter->eof - iter->pos);
+
+	strbuf_add(&iter->refname_buf, p, eol - p);
+	iter->base.refname = iter->refname_buf.buf;
+
+	if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) {
+		if (!refname_is_safe(iter->base.refname))
+			die("packed refname is dangerous: %s",
+			    iter->base.refname);
+		oidclr(&iter->oid);
+		iter->base.flags |= REF_BAD_NAME | REF_ISBROKEN;
+	}
+	if (iter->snapshot->peeled == PEELED_FULLY ||
+	    (iter->snapshot->peeled == PEELED_TAGS &&
+	     starts_with(iter->base.refname, "refs/tags/")))
+		iter->base.flags |= REF_KNOWS_PEELED;
+
+	iter->pos = eol + 1;
+
+	if (iter->pos < iter->eof && *iter->pos == '^') {
+		p = iter->pos + 1;
+		if (iter->eof - p < the_hash_algo->hexsz + 1 ||
+		    parse_oid_hex(p, &iter->peeled, &p) ||
+		    *p++ != '\n')
+			die_invalid_line(iter->snapshot->refs->path,
+					 iter->pos, iter->eof - iter->pos);
+		iter->pos = p;
+
+		/*
+		 * Regardless of what the file header said, we
+		 * definitely know the value of *this* reference. But
+		 * we suppress it if the reference is broken:
+		 */
+		if ((iter->base.flags & REF_ISBROKEN)) {
+			oidclr(&iter->peeled);
+			iter->base.flags &= ~REF_KNOWS_PEELED;
+		} else {
+			iter->base.flags |= REF_KNOWS_PEELED;
+		}
+	} else {
+		oidclr(&iter->peeled);
+	}
+
+	return ITER_OK;
+}
+
+/*
+ * The packed-refs header line that we write out. Perhaps other traits
+ * will be added later.
+ *
+ * Note that earlier versions of Git used to parse these traits by
+ * looking for " trait " in the line. For this reason, the space after
+ * the colon and the trailing space are required.
+ */
+static const char PACKED_REFS_HEADER[] =
+	"# pack-refs with: peeled fully-peeled sorted \n";
+
+int write_packed_file_header_v1(FILE *out)
+{
+	return fprintf(out, "%s", PACKED_REFS_HEADER);
+}
+
+/*
+ * Write an entry to the packed-refs file for the specified refname.
+ * If peeled is non-NULL, write it as the entry's peeled value. On
+ * error, return a nonzero value and leave errno set at the value left
+ * by the failing call to `fprintf()`.
+ */
+int write_packed_entry_v1(const char *refname,
+			  const struct object_id *oid,
+			  const struct object_id *peeled,
+			  void *write_data)
+{
+	FILE *fh = write_data;
+
+	if (fprintf(fh, "%s %s\n", oid_to_hex(oid), refname) < 0 ||
+	    (peeled && fprintf(fh, "^%s\n", oid_to_hex(peeled)) < 0))
+		return -1;
+
+	return 0;
+}