diff options
| -rw-r--r-- | pack-objects.c | 84 | ||||
| -rw-r--r-- | pack.h | 23 | ||||
| -rw-r--r-- | sha1_file.c | 75 | ||||
| -rw-r--r-- | unpack-objects.c | 65 | 
4 files changed, 174 insertions, 73 deletions
diff --git a/pack-objects.c b/pack-objects.c index e9764604f6..d1e62dc019 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -2,25 +2,18 @@  #include "cache.h"  #include "object.h"  #include "delta.h" +#include "pack.h"  #include "csum-file.h"  static const char pack_usage[] = "git-pack-objects [--window=N] [--depth=N] {--stdout | base-name} < object-list"; -/* - * The object type is a single-character shorthand: - *  - 'C' for "Commit" - *  - 'T' for "Tree" - *  - 'B' for "Blob" - *  - 'G' for "taG" - *  - 'D' for "Delta" - */  struct object_entry {  	unsigned char sha1[20];  	unsigned long size;  	unsigned long offset;  	unsigned int depth;  	unsigned int hash; -	unsigned char type; +	enum object_type type;  	unsigned long delta_size;  	struct object_entry *delta;  }; @@ -49,13 +42,54 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e  	return delta_buf;  } +/* + * The per-object header is a pretty dense thing, which is + *  - first byte: low four bits are "size", then three bits of "type", + *    and the high bit is "size continues". + *  - each byte afterwards: low seven bits are size continuation, + *    with the high bit being "size continues" + */ +static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr) +{ +	int n = 1, i; +	unsigned char c; + +	if (type < OBJ_COMMIT || type > OBJ_DELTA) +		die("bad type %d", type); + +	/* +	 * Shift the size up by 7 bits at a time, +	 * until you get bits in the "high four". +	 * That will be our beginning. We'll have +	 * four size bits in 28..31, then groups +	 * of seven in 21..27, 14..20, 7..13 and +	 * finally 0..6. +	 */ +	if (size) { +		n = 5; +		while (!(size & 0xfe000000)) { +			size <<= 7; +			n--; +		} +	} +	c = (type << 4) | (size >> 28); +	for (i = 1; i < n; i++) { +		*hdr++ = c | 0x80; +		c = (size >> 21) & 0x7f; +		size <<= 7; +	} +	*hdr = c; +	return n; +} +  static unsigned long write_object(struct sha1file *f, struct object_entry *entry)  {  	unsigned long size;  	char type[10];  	void *buf = read_sha1_file(entry->sha1, type, &size); -	char header[25]; +	unsigned char header[10];  	unsigned hdrlen, datalen; +	enum object_type obj_type;  	if (!buf)  		die("unable to read %s", sha1_to_hex(entry->sha1)); @@ -67,18 +101,18 @@ static unsigned long write_object(struct sha1file *f, struct object_entry *entry  	 * length, except for deltas that has the 20 bytes of delta sha  	 * instead.  	 */ -	header[0] = entry->type; -	hdrlen = 5; +	obj_type = entry->type;  	if (entry->delta) { -		header[0] = 'D'; -		memcpy(header+5, entry->delta, 20);  		buf = delta_against(buf, size, entry);  		size = entry->delta_size; -		hdrlen = 25; +		obj_type = OBJ_DELTA;  	} -	datalen = htonl(size); -	memcpy(header+1, &datalen, 4); +	hdrlen = encode_header(obj_type, size, header);  	sha1write(f, header, hdrlen); +	if (entry->delta) { +		sha1write(f, entry->delta, 20); +		hdrlen += 20; +	}  	datalen = sha1write_compressed(f, buf, size);  	free(buf);  	return hdrlen + datalen; @@ -88,13 +122,19 @@ static void write_pack_file(void)  {  	int i;  	struct sha1file *f; -	unsigned long offset = 0; +	unsigned long offset;  	unsigned long mb; +	struct pack_header hdr;  	if (!base_name)  		f = sha1fd(1, "<stdout>");  	else  		f = sha1create("%s.%s", base_name, "pack"); +	hdr.hdr_signature = htonl(PACK_SIGNATURE); +	hdr.hdr_version = htonl(1); +	hdr.hdr_entries = htonl(nr_objects); +	sha1write(f, &hdr, sizeof(hdr)); +	offset = sizeof(hdr);  	for (i = 0; i < nr_objects; i++) {  		struct object_entry *entry = objects + i;  		entry->offset = offset; @@ -168,13 +208,13 @@ static void check_object(struct object_entry *entry)  	if (!sha1_object_info(entry->sha1, type, &entry->size)) {  		if (!strcmp(type, "commit")) { -			entry->type = 'C'; +			entry->type = OBJ_COMMIT;  		} else if (!strcmp(type, "tree")) { -			entry->type = 'T'; +			entry->type = OBJ_TREE;  		} else if (!strcmp(type, "blob")) { -			entry->type = 'B'; +			entry->type = OBJ_BLOB;  		} else if (!strcmp(type, "tag")) { -			entry->type = 'G'; +			entry->type = OBJ_TAG;  		} else  			die("unable to pack object %s of type %s",  			    sha1_to_hex(entry->sha1), type); diff --git a/pack.h b/pack.h new file mode 100644 index 0000000000..08e120dca6 --- /dev/null +++ b/pack.h @@ -0,0 +1,23 @@ +#ifndef PACK_H +#define PACK_H + +enum object_type { +	OBJ_NONE, +	OBJ_COMMIT, +	OBJ_TREE, +	OBJ_BLOB, +	OBJ_TAG, +	OBJ_DELTA, +}; + +/* + * Packed object header + */ +#define PACK_SIGNATURE 0x5041434b	/* "PACK" */ +struct pack_header { +	unsigned int hdr_signature; +	unsigned int hdr_version; +	unsigned int hdr_entries; +}; + +#endif diff --git a/sha1_file.c b/sha1_file.c index e27affb288..6e3fd180f9 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -10,6 +10,7 @@  #include <dirent.h>  #include "cache.h"  #include "delta.h" +#include "pack.h"  #ifndef O_NOATIME  #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -665,37 +666,60 @@ static int packed_delta_info(unsigned char *base_sha1,  	return 0;  } +static unsigned long unpack_object_header(struct packed_git *p, unsigned long offset, +	enum object_type *type, unsigned long *sizep) +{ +	unsigned char *pack, c; +	unsigned long size; + +	if (offset >= p->pack_size) +		die("object offset outside of pack file"); + +	pack =  p->pack_base + offset; +	c = *pack++; +	offset++; +	*type = (c >> 4) & 7; +	size = c & 15; +	while (c & 0x80) { +		if (offset >= p->pack_size) +			die("object offset outside of pack file"); +		c = *pack++; +		offset++; +		size = (size << 7) | (c & 0x7f); +	} +	*sizep = size; +	return offset; +} +  static int packed_object_info(struct pack_entry *entry,  			      char *type, unsigned long *sizep)  {  	struct packed_git *p = entry->p;  	unsigned long offset, size, left;  	unsigned char *pack; - -	offset = entry->offset; -	if (p->pack_size - 5 < offset) -		die("object offset outside of pack file"); +	enum object_type kind;  	if (use_packed_git(p))  		die("cannot map packed file"); +	offset = unpack_object_header(p, entry->offset, &kind, &size);  	pack = p->pack_base + offset; -	size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4]; -	left = p->pack_size - offset - 5; -	switch (*pack) { -	case 'D': -		return packed_delta_info(pack+5, size, left, type, sizep); +	left = p->pack_size - offset; + +	switch (kind) { +	case OBJ_DELTA: +		return packed_delta_info(pack, size, left, type, sizep);  		break; -	case 'C': +	case OBJ_COMMIT:  		strcpy(type, "commit");  		break; -	case 'T': +	case OBJ_TREE:  		strcpy(type, "tree");  		break; -	case 'B': +	case OBJ_BLOB:  		strcpy(type, "blob");  		break; -	case 'G': +	case OBJ_TAG:  		strcpy(type, "tag");  		break;  	default: @@ -787,37 +811,34 @@ static void *unpack_entry(struct pack_entry *entry,  	struct packed_git *p = entry->p;  	unsigned long offset, size, left;  	unsigned char *pack; - -	offset = entry->offset; -	if (p->pack_size - 5 < offset) -		die("object offset outside of pack file"); +	enum object_type kind;  	if (use_packed_git(p))  		die("cannot map packed file"); +	offset = unpack_object_header(p, entry->offset, &kind, &size);  	pack = p->pack_base + offset; -	size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4]; -	left = p->pack_size - offset - 5; -	switch (*pack) { -	case 'D': -		return unpack_delta_entry(pack+5, size, left, type, sizep); -	case 'C': +	left = p->pack_size - offset; +	switch (kind) { +	case OBJ_DELTA: +		return unpack_delta_entry(pack, size, left, type, sizep); +	case OBJ_COMMIT:  		strcpy(type, "commit");  		break; -	case 'T': +	case OBJ_TREE:  		strcpy(type, "tree");  		break; -	case 'B': +	case OBJ_BLOB:  		strcpy(type, "blob");  		break; -	case 'G': +	case OBJ_TAG:  		strcpy(type, "tag");  		break;  	default:  		die("corrupted pack file");  	}  	*sizep = size; -	return unpack_non_delta_entry(pack+5, size, left); +	return unpack_non_delta_entry(pack, size, left);  }  static int find_pack_entry_1(const unsigned char *sha1, diff --git a/unpack-objects.c b/unpack-objects.c index 57f3c9b6bb..98b696cf2c 100644 --- a/unpack-objects.c +++ b/unpack-objects.c @@ -1,6 +1,7 @@  #include "cache.h"  #include "object.h"  #include "delta.h" +#include "pack.h"  static int dry_run;  static int nr_entries; @@ -92,7 +93,7 @@ static int check_index(void)  }  static int unpack_non_delta_entry(struct pack_entry *entry, -				  int kind, +				  enum object_type kind,  				  unsigned char *data,  				  unsigned long size,  				  unsigned long left) @@ -101,9 +102,9 @@ static int unpack_non_delta_entry(struct pack_entry *entry,  	z_stream stream;  	char *buffer;  	unsigned char sha1[20]; -	char *type_s; +	char *type; -	printf("%s %c %lu\n", sha1_to_hex(entry->sha1), kind, size); +	printf("%s %c %lu\n", sha1_to_hex(entry->sha1), ".CTBGD"[kind], size);  	if (dry_run)  		return 0; @@ -121,18 +122,18 @@ static int unpack_non_delta_entry(struct pack_entry *entry,  	if ((st != Z_STREAM_END) || stream.total_out != size)  		goto err_finish;  	switch (kind) { -	case 'C': type_s = "commit"; break; -	case 'T': type_s = "tree"; break; -	case 'B': type_s = "blob"; break; -	case 'G': type_s = "tag"; break; +	case OBJ_COMMIT: type = "commit"; break; +	case OBJ_TREE:   type = "tree"; break; +	case OBJ_BLOB:   type = "blob"; break; +	case OBJ_TAG:    type = "tag"; break;  	default: goto err_finish;  	} -	if (write_sha1_file(buffer, size, type_s, sha1) < 0) +	if (write_sha1_file(buffer, size, type, sha1) < 0)  		die("failed to write %s (%s)", -		    sha1_to_hex(entry->sha1), type_s); -	printf("%s %s\n", sha1_to_hex(sha1), type_s); +		    sha1_to_hex(entry->sha1), type); +	printf("%s %s\n", sha1_to_hex(sha1), type);  	if (memcmp(sha1, entry->sha1, 20)) -		die("resulting %s have wrong SHA1", type_s); +		die("resulting %s have wrong SHA1", type);   finish:  	st = 0; @@ -237,28 +238,44 @@ static int unpack_delta_entry(struct pack_entry *entry,  static void unpack_entry(struct pack_entry *entry)  {  	unsigned long offset, size, left; -	unsigned char *pack; +	unsigned char *pack, c; +	int type;  	/* Have we done this one already due to deltas based on it? */  	if (lookup_object(entry->sha1))  		return;  	offset = ntohl(entry->offset); -	if (offset > pack_size - 5) -		die("object offset outside of pack file"); +	if (offset >= pack_size) +		goto bad; +  	pack = pack_base + offset; -	size = (pack[1] << 24) + (pack[2] << 16) + (pack[3] << 8) + pack[4]; -	left = pack_size - offset - 5; -	switch (*pack) { -	case 'C': case 'T': case 'B': case 'G': -		unpack_non_delta_entry(entry, *pack, pack+5, size, left); -		break; -	case 'D': +	c = *pack++; +	offset++; +	type = (c >> 4) & 7; +	size = (c & 15); +	while (c & 0x80) { +		if (offset >= pack_size) +			goto bad; +		offset++; +		c = *pack++; +		size = (size << 7) + (c & 0x7f); +		 +	} +	left = pack_size - offset; +	switch (type) { +	case OBJ_COMMIT: +	case OBJ_TREE: +	case OBJ_BLOB: +	case OBJ_TAG: +		unpack_non_delta_entry(entry, type, pack, size, left); +		return; +	case OBJ_DELTA:  		unpack_delta_entry(entry, pack+5, size, left); -		break; -	default: -		die("corrupted pack file"); +		return;  	} +bad: +	die("corrupted pack file");  }  /*  | 
