summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2017-06-07 14:42:12 +0200
committerGitHub <noreply@github.com>2017-06-07 14:42:12 +0200
commit3bc95cfe3e46e4107535720335bed3edc0caf7e0 (patch)
tree850d102a6dd4bb2edd813fd73c0219ff6f05e275
parent6a13cf1e00fba5f963b273cb00aa54411478fdc5 (diff)
parent92d3ea4e398e8fbe3bf59088dc1a9ce8642b23fe (diff)
downloadlibgit2-3bc95cfe3e46e4107535720335bed3edc0caf7e0.tar.gz
Merge pull request #4236 from pks-t/pks/index-v4-fixes
Fix path computations for compressed index entries
-rw-r--r--src/index.c117
-rw-r--r--src/varint.c2
-rw-r--r--tests/core/encoding.c3
-rw-r--r--tests/index/version.c126
-rw-r--r--tests/resources/indexv4/.gitted/HEAD1
-rw-r--r--tests/resources/indexv4/.gitted/config5
-rw-r--r--tests/resources/indexv4/.gitted/indexbin0 -> 572 bytes
-rw-r--r--tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99bin0 -> 70 bytes
-rw-r--r--tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7bin0 -> 154 bytes
-rw-r--r--tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391bin0 -> 15 bytes
-rw-r--r--tests/resources/indexv4/.gitted/refs/heads/master1
-rw-r--r--tests/resources/indexv4/file.tx0
-rw-r--r--tests/resources/indexv4/file.txt0
-rw-r--r--tests/resources/indexv4/file.txz0
-rw-r--r--tests/resources/indexv4/foo0
-rw-r--r--tests/resources/indexv4/zzz0
16 files changed, 197 insertions, 58 deletions
diff --git a/src/index.c b/src/index.c
index 932a5306a..c29e90fb0 100644
--- a/src/index.c
+++ b/src/index.c
@@ -54,10 +54,6 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr
unsigned int flags,
git_index_matched_path_cb cb, void *payload);
-#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7)
-#define short_entry_size(len) entry_size(struct entry_short, len)
-#define long_entry_size(len) entry_size(struct entry_long, len)
-
#define minimal_entry_size (offsetof(struct entry_short, path))
static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ;
@@ -2282,12 +2278,29 @@ out_err:
return 0;
}
+static size_t index_entry_size(size_t path_len, size_t varint_len, uint32_t flags)
+{
+ if (varint_len) {
+ if (flags & GIT_IDXENTRY_EXTENDED)
+ return offsetof(struct entry_long, path) + path_len + 1 + varint_len;
+ else
+ return offsetof(struct entry_short, path) + path_len + 1 + varint_len;
+ } else {
+#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7)
+ if (flags & GIT_IDXENTRY_EXTENDED)
+ return entry_size(struct entry_long, path_len);
+ else
+ return entry_size(struct entry_short, path_len);
+#undef entry_size
+ }
+}
+
static size_t read_entry(
git_index_entry **out,
git_index *index,
const void *buffer,
size_t buffer_size,
- const char **last)
+ const char *last)
{
size_t path_length, entry_size;
const char *path_ptr;
@@ -2344,35 +2357,34 @@ static size_t read_entry(
path_length = path_end - path_ptr;
}
- if (entry.flags & GIT_IDXENTRY_EXTENDED)
- entry_size = long_entry_size(path_length);
- else
- entry_size = short_entry_size(path_length);
-
- if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
- return 0;
-
+ entry_size = index_entry_size(path_length, 0, entry.flags);
entry.path = (char *)path_ptr;
} else {
size_t varint_len;
- size_t shared = git_decode_varint((const unsigned char *)path_ptr,
- &varint_len);
- size_t len = strlen(path_ptr + varint_len);
- size_t last_len = strlen(*last);
- size_t tmp_path_len;
+ size_t strip_len = git_decode_varint((const unsigned char *)path_ptr,
+ &varint_len);
+ size_t last_len = strlen(last);
+ size_t prefix_len = last_len - strip_len;
+ size_t suffix_len = strlen(path_ptr + varint_len);
+ size_t path_len;
if (varint_len == 0)
return index_error_invalid("incorrect prefix length");
- GITERR_CHECK_ALLOC_ADD(&tmp_path_len, shared, len + 1);
- tmp_path = git__malloc(tmp_path_len);
+ GITERR_CHECK_ALLOC_ADD(&path_len, prefix_len, suffix_len);
+ GITERR_CHECK_ALLOC_ADD(&path_len, path_len, 1);
+ tmp_path = git__malloc(path_len);
GITERR_CHECK_ALLOC(tmp_path);
- memcpy(tmp_path, last, last_len);
- memcpy(tmp_path + last_len, path_ptr + varint_len, len);
- entry_size = long_entry_size(shared + len);
+
+ memcpy(tmp_path, last, prefix_len);
+ memcpy(tmp_path + prefix_len, path_ptr + varint_len, suffix_len + 1);
+ entry_size = index_entry_size(suffix_len, varint_len, entry.flags);
entry.path = tmp_path;
}
+ if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
+ return 0;
+
if (index_entry_dup(out, index, &entry) < 0) {
git__free(tmp_path);
return 0;
@@ -2445,7 +2457,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
unsigned int i;
struct index_header header = { 0 };
git_oid checksum_calculated, checksum_expected;
- const char **last = NULL;
+ const char *last = NULL;
const char *empty = "";
#define seek_forward(_increase) { \
@@ -2469,7 +2481,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
index->version = header.version;
if (index->version >= INDEX_VERSION_NUMBER_COMP)
- last = &empty;
+ last = empty;
seek_forward(INDEX_HEADER_SIZE);
@@ -2504,6 +2516,9 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
}
error = 0;
+ if (index->version >= INDEX_VERSION_NUMBER_COMP)
+ last = entry->path;
+
seek_forward(entry_size);
}
@@ -2574,11 +2589,12 @@ static bool is_index_extended(git_index *index)
return (extended > 0);
}
-static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char **last)
+static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char *last)
{
void *mem = NULL;
struct entry_short *ondisk;
size_t path_len, disk_size;
+ int varint_len = 0;
char *path;
const char *path_start = entry->path;
size_t same_len = 0;
@@ -2586,7 +2602,7 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha
path_len = ((struct entry_internal *)entry)->pathlen;
if (last) {
- const char *last_c = *last;
+ const char *last_c = last;
while (*path_start == *last_c) {
if (!*path_start || !*last_c)
@@ -2596,13 +2612,10 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha
++same_len;
}
path_len -= same_len;
- *last = entry->path;
+ varint_len = git_encode_varint(NULL, 0, same_len);
}
- if (entry->flags & GIT_IDXENTRY_EXTENDED)
- disk_size = long_entry_size(path_len);
- else
- disk_size = short_entry_size(path_len);
+ disk_size = index_entry_size(path_len, varint_len, entry->flags);
if (git_filebuf_reserve(file, &mem, disk_size) < 0)
return -1;
@@ -2642,16 +2655,34 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha
ondisk_ext->flags_extended = htons(entry->flags_extended &
GIT_IDXENTRY_EXTENDED_FLAGS);
path = ondisk_ext->path;
- }
- else
+ disk_size -= offsetof(struct entry_long, path);
+ } else {
path = ondisk->path;
+ disk_size -= offsetof(struct entry_short, path);
+ }
if (last) {
- path += git_encode_varint((unsigned char *) path,
- disk_size,
- path_len - same_len);
+ varint_len = git_encode_varint((unsigned char *) path,
+ disk_size, same_len);
+ assert(varint_len > 0);
+ path += varint_len;
+ disk_size -= varint_len;
+
+ /*
+ * If using path compression, we are not allowed
+ * to have additional trailing NULs.
+ */
+ assert(disk_size == path_len + 1);
+ } else {
+ /*
+ * If no path compression is used, we do have
+ * NULs as padding. As such, simply assert that
+ * we have enough space left to write the path.
+ */
+ assert(disk_size > path_len);
}
- memcpy(path, path_start, path_len);
+
+ memcpy(path, path_start, path_len + 1);
return 0;
}
@@ -2662,8 +2693,7 @@ static int write_entries(git_index *index, git_filebuf *file)
size_t i;
git_vector case_sorted, *entries;
git_index_entry *entry;
- const char **last = NULL;
- const char *empty = "";
+ const char *last = NULL;
/* If index->entries is sorted case-insensitively, then we need
* to re-sort it case-sensitively before writing */
@@ -2676,11 +2706,14 @@ static int write_entries(git_index *index, git_filebuf *file)
}
if (index->version >= INDEX_VERSION_NUMBER_COMP)
- last = &empty;
+ last = "";
- git_vector_foreach(entries, i, entry)
+ git_vector_foreach(entries, i, entry) {
if ((error = write_disk_entry(file, entry, last)) < 0)
break;
+ if (index->version >= INDEX_VERSION_NUMBER_COMP)
+ last = entry->path;
+ }
if (index->ignore_case)
git_vector_free(&case_sorted);
diff --git a/src/varint.c b/src/varint.c
index 2f868607c..beac8c709 100644
--- a/src/varint.c
+++ b/src/varint.c
@@ -36,7 +36,7 @@ int git_encode_varint(unsigned char *buf, size_t bufsize, uintmax_t value)
while (value >>= 7)
varint[--pos] = 128 | (--value & 127);
if (buf) {
- if (bufsize < pos)
+ if (bufsize < (sizeof(varint) - pos))
return -1;
memcpy(buf, varint + pos, sizeof(varint) - pos);
}
diff --git a/tests/core/encoding.c b/tests/core/encoding.c
index 7d91720f4..a677afe2e 100644
--- a/tests/core/encoding.c
+++ b/tests/core/encoding.c
@@ -29,6 +29,9 @@ void test_core_encoding__encode(void)
cl_assert(git_encode_varint(buf, 100, 65) == 1);
cl_assert(buf[0] == 'A');
+ cl_assert(git_encode_varint(buf, 1, 1) == 1);
+ cl_assert(!memcmp(buf, "\x01", 1));
+
cl_assert(git_encode_varint(buf, 100, 267869656) == 4);
cl_assert(!memcmp(buf, "\xfe\xdc\xbaX", 4));
diff --git a/tests/index/version.c b/tests/index/version.c
index 3fd240d3c..7ada302b5 100644
--- a/tests/index/version.c
+++ b/tests/index/version.c
@@ -3,39 +3,135 @@
static git_repository *g_repo = NULL;
-void test_index_version__can_write_v4(void)
+void test_index_version__cleanup(void)
+{
+ cl_git_sandbox_cleanup();
+ g_repo = NULL;
+}
+
+void test_index_version__can_read_v4(void)
{
+ const char *paths[] = {
+ "file.tx", "file.txt", "file.txz", "foo", "zzz",
+ };
git_index *index;
- const git_index_entry *entry;
+ size_t i;
+
+ g_repo = cl_git_sandbox_init("indexv4");
- g_repo = cl_git_sandbox_init("filemodes");
cl_git_pass(git_repository_index(&index, g_repo));
+ cl_assert_equal_sz(git_index_entrycount(index), 5);
- cl_assert(index->on_disk);
- cl_assert(git_index_version(index) == 2);
+ for (i = 0; i < ARRAY_SIZE(paths); i++) {
+ const git_index_entry *entry =
+ git_index_get_bypath(index, paths[i], GIT_INDEX_STAGE_NORMAL);
- cl_assert(git_index_entrycount(index) == 6);
+ cl_assert(entry != NULL);
+ }
+ git_index_free(index);
+}
+
+void test_index_version__can_write_v4(void)
+{
+ const char *paths[] = {
+ "foo",
+ "foox",
+ "foobar",
+ "foobal",
+ "x",
+ "xz",
+ "xyzzyx"
+ };
+ git_index_entry entry;
+ git_index *index;
+ size_t i;
+
+ g_repo = cl_git_sandbox_init("empty_standard_repo");
+ cl_git_pass(git_repository_index(&index, g_repo));
cl_git_pass(git_index_set_version(index, 4));
+ for (i = 0; i < ARRAY_SIZE(paths); i++) {
+ memset(&entry, 0, sizeof(entry));
+ entry.path = paths[i];
+ entry.mode = GIT_FILEMODE_BLOB;
+ cl_git_pass(git_index_add_frombuffer(index, &entry, paths[i],
+ strlen(paths[i]) + 1));
+ }
+ cl_assert_equal_sz(git_index_entrycount(index), ARRAY_SIZE(paths));
+
cl_git_pass(git_index_write(index));
git_index_free(index);
cl_git_pass(git_repository_index(&index, g_repo));
cl_assert(git_index_version(index) == 4);
- entry = git_index_get_bypath(index, "exec_off", 0);
- cl_assert(entry);
- entry = git_index_get_bypath(index, "exec_off2on_staged", 0);
- cl_assert(entry);
- entry = git_index_get_bypath(index, "exec_on", 0);
- cl_assert(entry);
+ for (i = 0; i < ARRAY_SIZE(paths); i++) {
+ const git_index_entry *e;
+
+ cl_assert(e = git_index_get_bypath(index, paths[i], 0));
+ cl_assert_equal_s(paths[i], e->path);
+ }
git_index_free(index);
}
-void test_index_version__cleanup(void)
+void test_index_version__v4_uses_path_compression(void)
{
- cl_git_sandbox_cleanup();
- g_repo = NULL;
+ git_index_entry entry;
+ git_index *index;
+ char path[250], buf[1];
+ struct stat st;
+ char i, j;
+
+ memset(path, 'a', sizeof(path));
+ memset(buf, 'a', sizeof(buf));
+
+ memset(&entry, 0, sizeof(entry));
+ entry.path = path;
+ entry.mode = GIT_FILEMODE_BLOB;
+
+ g_repo = cl_git_sandbox_init("indexv4");
+ cl_git_pass(git_repository_index(&index, g_repo));
+
+ /* write 676 paths of 250 bytes length */
+ for (i = 'a'; i <= 'z'; i++) {
+ for (j = 'a'; j < 'z'; j++) {
+ path[ARRAY_SIZE(path) - 3] = i;
+ path[ARRAY_SIZE(path) - 2] = j;
+ path[ARRAY_SIZE(path) - 1] = '\0';
+ cl_git_pass(git_index_add_frombuffer(index, &entry, buf, sizeof(buf)));
+ }
+ }
+
+ cl_git_pass(git_index_write(index));
+ cl_git_pass(p_stat(git_index_path(index), &st));
+
+ /*
+ * Without path compression, the written paths would at
+ * least take
+ *
+ * (entries * pathlen) = len
+ * (676 * 250) = 169000
+ *
+ * bytes. As index v4 uses suffix-compression and our
+ * written paths only differ in the last two entries,
+ * this number will be much smaller, e.g.
+ *
+ * (1 * pathlen) + (675 * 2) = len
+ * 676 + 1350 = 2026
+ *
+ * bytes.
+ *
+ * Note that the above calculations do not include
+ * additional metadata of the index, e.g. OIDs or
+ * index extensions. Including those we get an index
+ * of approx. 200kB without compression and 40kB with
+ * compression. As this is a lot smaller than without
+ * compression, we can verify that path compression is
+ * used.
+ */
+ cl_assert_(st.st_size < 75000, "path compression not enabled");
+
+ git_index_free(index);
}
diff --git a/tests/resources/indexv4/.gitted/HEAD b/tests/resources/indexv4/.gitted/HEAD
new file mode 100644
index 000000000..cb089cd89
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/HEAD
@@ -0,0 +1 @@
+ref: refs/heads/master
diff --git a/tests/resources/indexv4/.gitted/config b/tests/resources/indexv4/.gitted/config
new file mode 100644
index 000000000..515f48362
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/config
@@ -0,0 +1,5 @@
+[core]
+ repositoryformatversion = 0
+ filemode = true
+ bare = false
+ logallrefupdates = true
diff --git a/tests/resources/indexv4/.gitted/index b/tests/resources/indexv4/.gitted/index
new file mode 100644
index 000000000..e8fc61736
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/index
Binary files differ
diff --git a/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99 b/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99
new file mode 100644
index 000000000..cedd594b0
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99
Binary files differ
diff --git a/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7 b/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7
new file mode 100644
index 000000000..0ddc1d1a9
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7
Binary files differ
diff --git a/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 b/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
new file mode 100644
index 000000000..711223894
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
Binary files differ
diff --git a/tests/resources/indexv4/.gitted/refs/heads/master b/tests/resources/indexv4/.gitted/refs/heads/master
new file mode 100644
index 000000000..f3e960eb3
--- /dev/null
+++ b/tests/resources/indexv4/.gitted/refs/heads/master
@@ -0,0 +1 @@
+b0952dbb50bed5f01e03e31b296184cb183e54a7
diff --git a/tests/resources/indexv4/file.tx b/tests/resources/indexv4/file.tx
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/resources/indexv4/file.tx
diff --git a/tests/resources/indexv4/file.txt b/tests/resources/indexv4/file.txt
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/resources/indexv4/file.txt
diff --git a/tests/resources/indexv4/file.txz b/tests/resources/indexv4/file.txz
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/resources/indexv4/file.txz
diff --git a/tests/resources/indexv4/foo b/tests/resources/indexv4/foo
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/resources/indexv4/foo
diff --git a/tests/resources/indexv4/zzz b/tests/resources/indexv4/zzz
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/resources/indexv4/zzz