diff options
author | Edward Thomson <ethomson@edwardthomson.com> | 2017-06-07 14:42:12 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-07 14:42:12 +0200 |
commit | 3bc95cfe3e46e4107535720335bed3edc0caf7e0 (patch) | |
tree | 850d102a6dd4bb2edd813fd73c0219ff6f05e275 | |
parent | 6a13cf1e00fba5f963b273cb00aa54411478fdc5 (diff) | |
parent | 92d3ea4e398e8fbe3bf59088dc1a9ce8642b23fe (diff) | |
download | libgit2-3bc95cfe3e46e4107535720335bed3edc0caf7e0.tar.gz |
Merge pull request #4236 from pks-t/pks/index-v4-fixes
Fix path computations for compressed index entries
-rw-r--r-- | src/index.c | 117 | ||||
-rw-r--r-- | src/varint.c | 2 | ||||
-rw-r--r-- | tests/core/encoding.c | 3 | ||||
-rw-r--r-- | tests/index/version.c | 126 | ||||
-rw-r--r-- | tests/resources/indexv4/.gitted/HEAD | 1 | ||||
-rw-r--r-- | tests/resources/indexv4/.gitted/config | 5 | ||||
-rw-r--r-- | tests/resources/indexv4/.gitted/index | bin | 0 -> 572 bytes | |||
-rw-r--r-- | tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99 | bin | 0 -> 70 bytes | |||
-rw-r--r-- | tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7 | bin | 0 -> 154 bytes | |||
-rw-r--r-- | tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 | bin | 0 -> 15 bytes | |||
-rw-r--r-- | tests/resources/indexv4/.gitted/refs/heads/master | 1 | ||||
-rw-r--r-- | tests/resources/indexv4/file.tx | 0 | ||||
-rw-r--r-- | tests/resources/indexv4/file.txt | 0 | ||||
-rw-r--r-- | tests/resources/indexv4/file.txz | 0 | ||||
-rw-r--r-- | tests/resources/indexv4/foo | 0 | ||||
-rw-r--r-- | tests/resources/indexv4/zzz | 0 |
16 files changed, 197 insertions, 58 deletions
diff --git a/src/index.c b/src/index.c index 932a5306a..c29e90fb0 100644 --- a/src/index.c +++ b/src/index.c @@ -54,10 +54,6 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr unsigned int flags, git_index_matched_path_cb cb, void *payload); -#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7) -#define short_entry_size(len) entry_size(struct entry_short, len) -#define long_entry_size(len) entry_size(struct entry_long, len) - #define minimal_entry_size (offsetof(struct entry_short, path)) static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ; @@ -2282,12 +2278,29 @@ out_err: return 0; } +static size_t index_entry_size(size_t path_len, size_t varint_len, uint32_t flags) +{ + if (varint_len) { + if (flags & GIT_IDXENTRY_EXTENDED) + return offsetof(struct entry_long, path) + path_len + 1 + varint_len; + else + return offsetof(struct entry_short, path) + path_len + 1 + varint_len; + } else { +#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7) + if (flags & GIT_IDXENTRY_EXTENDED) + return entry_size(struct entry_long, path_len); + else + return entry_size(struct entry_short, path_len); +#undef entry_size + } +} + static size_t read_entry( git_index_entry **out, git_index *index, const void *buffer, size_t buffer_size, - const char **last) + const char *last) { size_t path_length, entry_size; const char *path_ptr; @@ -2344,35 +2357,34 @@ static size_t read_entry( path_length = path_end - path_ptr; } - if (entry.flags & GIT_IDXENTRY_EXTENDED) - entry_size = long_entry_size(path_length); - else - entry_size = short_entry_size(path_length); - - if (INDEX_FOOTER_SIZE + entry_size > buffer_size) - return 0; - + entry_size = index_entry_size(path_length, 0, entry.flags); entry.path = (char *)path_ptr; } else { size_t varint_len; - size_t shared = git_decode_varint((const unsigned char *)path_ptr, - &varint_len); - size_t len = strlen(path_ptr + varint_len); - size_t last_len = strlen(*last); - size_t tmp_path_len; + size_t strip_len = git_decode_varint((const unsigned char *)path_ptr, + &varint_len); + size_t last_len = strlen(last); + size_t prefix_len = last_len - strip_len; + size_t suffix_len = strlen(path_ptr + varint_len); + size_t path_len; if (varint_len == 0) return index_error_invalid("incorrect prefix length"); - GITERR_CHECK_ALLOC_ADD(&tmp_path_len, shared, len + 1); - tmp_path = git__malloc(tmp_path_len); + GITERR_CHECK_ALLOC_ADD(&path_len, prefix_len, suffix_len); + GITERR_CHECK_ALLOC_ADD(&path_len, path_len, 1); + tmp_path = git__malloc(path_len); GITERR_CHECK_ALLOC(tmp_path); - memcpy(tmp_path, last, last_len); - memcpy(tmp_path + last_len, path_ptr + varint_len, len); - entry_size = long_entry_size(shared + len); + + memcpy(tmp_path, last, prefix_len); + memcpy(tmp_path + prefix_len, path_ptr + varint_len, suffix_len + 1); + entry_size = index_entry_size(suffix_len, varint_len, entry.flags); entry.path = tmp_path; } + if (INDEX_FOOTER_SIZE + entry_size > buffer_size) + return 0; + if (index_entry_dup(out, index, &entry) < 0) { git__free(tmp_path); return 0; @@ -2445,7 +2457,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) unsigned int i; struct index_header header = { 0 }; git_oid checksum_calculated, checksum_expected; - const char **last = NULL; + const char *last = NULL; const char *empty = ""; #define seek_forward(_increase) { \ @@ -2469,7 +2481,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) index->version = header.version; if (index->version >= INDEX_VERSION_NUMBER_COMP) - last = ∅ + last = empty; seek_forward(INDEX_HEADER_SIZE); @@ -2504,6 +2516,9 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size) } error = 0; + if (index->version >= INDEX_VERSION_NUMBER_COMP) + last = entry->path; + seek_forward(entry_size); } @@ -2574,11 +2589,12 @@ static bool is_index_extended(git_index *index) return (extended > 0); } -static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char **last) +static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char *last) { void *mem = NULL; struct entry_short *ondisk; size_t path_len, disk_size; + int varint_len = 0; char *path; const char *path_start = entry->path; size_t same_len = 0; @@ -2586,7 +2602,7 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha path_len = ((struct entry_internal *)entry)->pathlen; if (last) { - const char *last_c = *last; + const char *last_c = last; while (*path_start == *last_c) { if (!*path_start || !*last_c) @@ -2596,13 +2612,10 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha ++same_len; } path_len -= same_len; - *last = entry->path; + varint_len = git_encode_varint(NULL, 0, same_len); } - if (entry->flags & GIT_IDXENTRY_EXTENDED) - disk_size = long_entry_size(path_len); - else - disk_size = short_entry_size(path_len); + disk_size = index_entry_size(path_len, varint_len, entry->flags); if (git_filebuf_reserve(file, &mem, disk_size) < 0) return -1; @@ -2642,16 +2655,34 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha ondisk_ext->flags_extended = htons(entry->flags_extended & GIT_IDXENTRY_EXTENDED_FLAGS); path = ondisk_ext->path; - } - else + disk_size -= offsetof(struct entry_long, path); + } else { path = ondisk->path; + disk_size -= offsetof(struct entry_short, path); + } if (last) { - path += git_encode_varint((unsigned char *) path, - disk_size, - path_len - same_len); + varint_len = git_encode_varint((unsigned char *) path, + disk_size, same_len); + assert(varint_len > 0); + path += varint_len; + disk_size -= varint_len; + + /* + * If using path compression, we are not allowed + * to have additional trailing NULs. + */ + assert(disk_size == path_len + 1); + } else { + /* + * If no path compression is used, we do have + * NULs as padding. As such, simply assert that + * we have enough space left to write the path. + */ + assert(disk_size > path_len); } - memcpy(path, path_start, path_len); + + memcpy(path, path_start, path_len + 1); return 0; } @@ -2662,8 +2693,7 @@ static int write_entries(git_index *index, git_filebuf *file) size_t i; git_vector case_sorted, *entries; git_index_entry *entry; - const char **last = NULL; - const char *empty = ""; + const char *last = NULL; /* If index->entries is sorted case-insensitively, then we need * to re-sort it case-sensitively before writing */ @@ -2676,11 +2706,14 @@ static int write_entries(git_index *index, git_filebuf *file) } if (index->version >= INDEX_VERSION_NUMBER_COMP) - last = ∅ + last = ""; - git_vector_foreach(entries, i, entry) + git_vector_foreach(entries, i, entry) { if ((error = write_disk_entry(file, entry, last)) < 0) break; + if (index->version >= INDEX_VERSION_NUMBER_COMP) + last = entry->path; + } if (index->ignore_case) git_vector_free(&case_sorted); diff --git a/src/varint.c b/src/varint.c index 2f868607c..beac8c709 100644 --- a/src/varint.c +++ b/src/varint.c @@ -36,7 +36,7 @@ int git_encode_varint(unsigned char *buf, size_t bufsize, uintmax_t value) while (value >>= 7) varint[--pos] = 128 | (--value & 127); if (buf) { - if (bufsize < pos) + if (bufsize < (sizeof(varint) - pos)) return -1; memcpy(buf, varint + pos, sizeof(varint) - pos); } diff --git a/tests/core/encoding.c b/tests/core/encoding.c index 7d91720f4..a677afe2e 100644 --- a/tests/core/encoding.c +++ b/tests/core/encoding.c @@ -29,6 +29,9 @@ void test_core_encoding__encode(void) cl_assert(git_encode_varint(buf, 100, 65) == 1); cl_assert(buf[0] == 'A'); + cl_assert(git_encode_varint(buf, 1, 1) == 1); + cl_assert(!memcmp(buf, "\x01", 1)); + cl_assert(git_encode_varint(buf, 100, 267869656) == 4); cl_assert(!memcmp(buf, "\xfe\xdc\xbaX", 4)); diff --git a/tests/index/version.c b/tests/index/version.c index 3fd240d3c..7ada302b5 100644 --- a/tests/index/version.c +++ b/tests/index/version.c @@ -3,39 +3,135 @@ static git_repository *g_repo = NULL; -void test_index_version__can_write_v4(void) +void test_index_version__cleanup(void) +{ + cl_git_sandbox_cleanup(); + g_repo = NULL; +} + +void test_index_version__can_read_v4(void) { + const char *paths[] = { + "file.tx", "file.txt", "file.txz", "foo", "zzz", + }; git_index *index; - const git_index_entry *entry; + size_t i; + + g_repo = cl_git_sandbox_init("indexv4"); - g_repo = cl_git_sandbox_init("filemodes"); cl_git_pass(git_repository_index(&index, g_repo)); + cl_assert_equal_sz(git_index_entrycount(index), 5); - cl_assert(index->on_disk); - cl_assert(git_index_version(index) == 2); + for (i = 0; i < ARRAY_SIZE(paths); i++) { + const git_index_entry *entry = + git_index_get_bypath(index, paths[i], GIT_INDEX_STAGE_NORMAL); - cl_assert(git_index_entrycount(index) == 6); + cl_assert(entry != NULL); + } + git_index_free(index); +} + +void test_index_version__can_write_v4(void) +{ + const char *paths[] = { + "foo", + "foox", + "foobar", + "foobal", + "x", + "xz", + "xyzzyx" + }; + git_index_entry entry; + git_index *index; + size_t i; + + g_repo = cl_git_sandbox_init("empty_standard_repo"); + cl_git_pass(git_repository_index(&index, g_repo)); cl_git_pass(git_index_set_version(index, 4)); + for (i = 0; i < ARRAY_SIZE(paths); i++) { + memset(&entry, 0, sizeof(entry)); + entry.path = paths[i]; + entry.mode = GIT_FILEMODE_BLOB; + cl_git_pass(git_index_add_frombuffer(index, &entry, paths[i], + strlen(paths[i]) + 1)); + } + cl_assert_equal_sz(git_index_entrycount(index), ARRAY_SIZE(paths)); + cl_git_pass(git_index_write(index)); git_index_free(index); cl_git_pass(git_repository_index(&index, g_repo)); cl_assert(git_index_version(index) == 4); - entry = git_index_get_bypath(index, "exec_off", 0); - cl_assert(entry); - entry = git_index_get_bypath(index, "exec_off2on_staged", 0); - cl_assert(entry); - entry = git_index_get_bypath(index, "exec_on", 0); - cl_assert(entry); + for (i = 0; i < ARRAY_SIZE(paths); i++) { + const git_index_entry *e; + + cl_assert(e = git_index_get_bypath(index, paths[i], 0)); + cl_assert_equal_s(paths[i], e->path); + } git_index_free(index); } -void test_index_version__cleanup(void) +void test_index_version__v4_uses_path_compression(void) { - cl_git_sandbox_cleanup(); - g_repo = NULL; + git_index_entry entry; + git_index *index; + char path[250], buf[1]; + struct stat st; + char i, j; + + memset(path, 'a', sizeof(path)); + memset(buf, 'a', sizeof(buf)); + + memset(&entry, 0, sizeof(entry)); + entry.path = path; + entry.mode = GIT_FILEMODE_BLOB; + + g_repo = cl_git_sandbox_init("indexv4"); + cl_git_pass(git_repository_index(&index, g_repo)); + + /* write 676 paths of 250 bytes length */ + for (i = 'a'; i <= 'z'; i++) { + for (j = 'a'; j < 'z'; j++) { + path[ARRAY_SIZE(path) - 3] = i; + path[ARRAY_SIZE(path) - 2] = j; + path[ARRAY_SIZE(path) - 1] = '\0'; + cl_git_pass(git_index_add_frombuffer(index, &entry, buf, sizeof(buf))); + } + } + + cl_git_pass(git_index_write(index)); + cl_git_pass(p_stat(git_index_path(index), &st)); + + /* + * Without path compression, the written paths would at + * least take + * + * (entries * pathlen) = len + * (676 * 250) = 169000 + * + * bytes. As index v4 uses suffix-compression and our + * written paths only differ in the last two entries, + * this number will be much smaller, e.g. + * + * (1 * pathlen) + (675 * 2) = len + * 676 + 1350 = 2026 + * + * bytes. + * + * Note that the above calculations do not include + * additional metadata of the index, e.g. OIDs or + * index extensions. Including those we get an index + * of approx. 200kB without compression and 40kB with + * compression. As this is a lot smaller than without + * compression, we can verify that path compression is + * used. + */ + cl_assert_(st.st_size < 75000, "path compression not enabled"); + + git_index_free(index); } diff --git a/tests/resources/indexv4/.gitted/HEAD b/tests/resources/indexv4/.gitted/HEAD new file mode 100644 index 000000000..cb089cd89 --- /dev/null +++ b/tests/resources/indexv4/.gitted/HEAD @@ -0,0 +1 @@ +ref: refs/heads/master diff --git a/tests/resources/indexv4/.gitted/config b/tests/resources/indexv4/.gitted/config new file mode 100644 index 000000000..515f48362 --- /dev/null +++ b/tests/resources/indexv4/.gitted/config @@ -0,0 +1,5 @@ +[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true diff --git a/tests/resources/indexv4/.gitted/index b/tests/resources/indexv4/.gitted/index Binary files differnew file mode 100644 index 000000000..e8fc61736 --- /dev/null +++ b/tests/resources/indexv4/.gitted/index diff --git a/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99 b/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99 Binary files differnew file mode 100644 index 000000000..cedd594b0 --- /dev/null +++ b/tests/resources/indexv4/.gitted/objects/4c/9109b3e671d851eec87e0e72f6305b582e7e99 diff --git a/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7 b/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7 Binary files differnew file mode 100644 index 000000000..0ddc1d1a9 --- /dev/null +++ b/tests/resources/indexv4/.gitted/objects/b0/952dbb50bed5f01e03e31b296184cb183e54a7 diff --git a/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 b/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 Binary files differnew file mode 100644 index 000000000..711223894 --- /dev/null +++ b/tests/resources/indexv4/.gitted/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/resources/indexv4/.gitted/refs/heads/master b/tests/resources/indexv4/.gitted/refs/heads/master new file mode 100644 index 000000000..f3e960eb3 --- /dev/null +++ b/tests/resources/indexv4/.gitted/refs/heads/master @@ -0,0 +1 @@ +b0952dbb50bed5f01e03e31b296184cb183e54a7 diff --git a/tests/resources/indexv4/file.tx b/tests/resources/indexv4/file.tx new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/resources/indexv4/file.tx diff --git a/tests/resources/indexv4/file.txt b/tests/resources/indexv4/file.txt new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/resources/indexv4/file.txt diff --git a/tests/resources/indexv4/file.txz b/tests/resources/indexv4/file.txz new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/resources/indexv4/file.txz diff --git a/tests/resources/indexv4/foo b/tests/resources/indexv4/foo new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/resources/indexv4/foo diff --git a/tests/resources/indexv4/zzz b/tests/resources/indexv4/zzz new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/resources/indexv4/zzz |