diff options
author | Stan Hu <stanhu@gmail.com> | 2018-02-22 22:55:50 -0800 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2018-05-05 14:54:27 -0700 |
commit | 9d83a2b08724211e564bffca740cd5fdc93d890e (patch) | |
tree | 769c2369cf8b20156ecc8d2d74791597a3ec271c /tests/diff | |
parent | 0ad2372b4309f511c48c8e293f1eec396468595a (diff) | |
download | libgit2-9d83a2b08724211e564bffca740cd5fdc93d890e.tar.gz |
Sanitize the hunk header to ensure it contains UTF-8 valid data
The diff driver truncates the hunk header text to 80 bytes, which can truncate
4-byte Unicode characters and introduce garbage characters in the diff
output. This change sanitizes the hunk header before it is displayed.
This mirrors the test in git: https://github.com/git/git/blob/master/t/t4025-hunk-header.sh
Closes https://github.com/libgit2/rugged/issues/716
Diffstat (limited to 'tests/diff')
-rw-r--r-- | tests/diff/patch.c | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/tests/diff/patch.c b/tests/diff/patch.c index 1184d1968..4c836289d 100644 --- a/tests/diff/patch.c +++ b/tests/diff/patch.c @@ -25,6 +25,12 @@ void test_diff_patch__cleanup(void) #define EXPECTED_HUNK "@@ -1,2 +0,0 @@\n" +#define UTF8_HUNK_HEADER "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\n" + +#define UTF8_TRUNCATED_A_HUNK_HEADER "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\n" + +#define UTF8_TRUNCATED_L_HUNK_HEADER "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\n" + static int check_removal_cb( const git_diff_delta *delta, const git_diff_hunk *hunk, @@ -610,3 +616,89 @@ void test_diff_patch__line_counts_with_eofnl(void) git_buf_free(&content); } + +void test_diff_patch__can_strip_bad_utf8(void) +{ + const char *a = "A " UTF8_HUNK_HEADER + " B\n" + " C\n" + " D\n" + " E\n" + " F\n" + " G\n" + " H\n" + " I\n" + " J\n" + " K\n" + "L " UTF8_HUNK_HEADER + " M\n" + " N\n" + " O\n" + " P\n" + " Q\n" + " R\n" + " S\n" + " T\n" + " U\n" + " V\n"; + + const char *b = "A " UTF8_HUNK_HEADER + " B\n" + " C\n" + " D\n" + " E modified\n" + " F\n" + " G\n" + " H\n" + " I\n" + " J\n" + " K\n" + "L " UTF8_HUNK_HEADER + " M\n" + " N\n" + " O\n" + " P modified\n" + " Q\n" + " R\n" + " S\n" + " T\n" + " U\n" + " V\n"; + + const char *expected = "diff --git a/file b/file\n" + "index d0647c4..7827ce5 100644\n" + "--- a/file\n" + "+++ b/file\n" + "@@ -2,7 +2,7 @@ A " UTF8_TRUNCATED_A_HUNK_HEADER + " B\n" + " C\n" + " D\n" + "- E\n" + "+ E modified\n" + " F\n" + " G\n" + " H\n" + "@@ -13,7 +13,7 @@ L " UTF8_TRUNCATED_L_HUNK_HEADER + " M\n" + " N\n" + " O\n" + "- P\n" + "+ P modified\n" + " Q\n" + " R\n" + " S\n"; + + git_diff_options opts; + git_patch *patch; + git_buf buf = GIT_BUF_INIT; + + cl_git_pass(git_diff_init_options(&opts, GIT_DIFF_OPTIONS_VERSION)); + + cl_git_pass(git_patch_from_buffers(&patch, a, strlen(a), NULL, b, strlen(b), NULL, &opts)); + cl_git_pass(git_patch_to_buf(&buf, patch)); + + cl_assert_equal_s(expected, buf.ptr); + + git_patch_free(patch); + git_buf_free(&buf); +} |