diff options
| -rw-r--r-- | src/diff_xdiff.c | 12 | ||||
| -rw-r--r-- | src/util.c | 16 | ||||
| -rw-r--r-- | src/util.h | 10 | ||||
| -rw-r--r-- | tests/diff/patch.c | 92 |
4 files changed, 130 insertions, 0 deletions
diff --git a/src/diff_xdiff.c b/src/diff_xdiff.c index 701eb1b5f..6907e52e1 100644 --- a/src/diff_xdiff.c +++ b/src/diff_xdiff.c @@ -6,6 +6,7 @@ */ #include "diff_xdiff.h" +#include "util.h" #include "git2/errors.h" #include "diff.h" @@ -115,6 +116,7 @@ static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len) const git_diff_delta *delta = patch->base.delta; git_patch_generated_output *output = &info->xo->output; git_diff_line line; + size_t buffer_len; if (len == 1) { output->error = git_xdiff_parse_hunk(&info->hunk, bufs[0].ptr); @@ -124,6 +126,16 @@ static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len) info->hunk.header_len = bufs[0].size; if (info->hunk.header_len >= sizeof(info->hunk.header)) info->hunk.header_len = sizeof(info->hunk.header) - 1; + + /* Sanitize the hunk header in case there is invalid Unicode */ + buffer_len = git__utf8_valid_buf_length((const uint8_t *) bufs[0].ptr, info->hunk.header_len); + /* Sanitizing the hunk header may delete the newline, so add it back again if there is room */ + if (buffer_len < info->hunk.header_len) { + bufs[0].ptr[buffer_len] = '\n'; + buffer_len += 1; + info->hunk.header_len = buffer_len; + } + memcpy(info->hunk.header, bufs[0].ptr, info->hunk.header_len); info->hunk.header[info->hunk.header_len] = '\0'; diff --git a/src/util.c b/src/util.c index 2955b7ca0..bf778a949 100644 --- a/src/util.c +++ b/src/util.c @@ -806,6 +806,22 @@ double git_time_monotonic(void) return git__timer(); } +size_t git__utf8_valid_buf_length(const uint8_t *str, size_t str_len) +{ + size_t offset = 0; + + while (offset < str_len) { + int length = git__utf8_charlen(str + offset, str_len - offset); + + if (length < 0) + break; + + offset += length; + } + + return offset; +} + #ifdef GIT_WIN32 int git__getenv(git_buf *out, const char *name) { diff --git a/src/util.h b/src/util.h index f6d19cfde..67ae4ef70 100644 --- a/src/util.h +++ b/src/util.h @@ -454,6 +454,16 @@ extern size_t git__unescape(char *str); extern int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst); /* + * Iterate through an UTF-8 string and stops after finding any invalid UTF-8 + * codepoints. + * + * @param str string to scan + * @param str_len size of the string + * @return length in bytes of the string that contains valid data + */ +extern size_t git__utf8_valid_buf_length(const uint8_t *str, size_t str_len); + +/* * Safely zero-out memory, making sure that the compiler * doesn't optimize away the operation. */ diff --git a/tests/diff/patch.c b/tests/diff/patch.c index 1184d1968..4c836289d 100644 --- a/tests/diff/patch.c +++ b/tests/diff/patch.c @@ -25,6 +25,12 @@ void test_diff_patch__cleanup(void) #define EXPECTED_HUNK "@@ -1,2 +0,0 @@\n" +#define UTF8_HUNK_HEADER "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\n" + +#define UTF8_TRUNCATED_A_HUNK_HEADER "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\n" + +#define UTF8_TRUNCATED_L_HUNK_HEADER "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\n" + static int check_removal_cb( const git_diff_delta *delta, const git_diff_hunk *hunk, @@ -610,3 +616,89 @@ void test_diff_patch__line_counts_with_eofnl(void) git_buf_free(&content); } + +void test_diff_patch__can_strip_bad_utf8(void) +{ + const char *a = "A " UTF8_HUNK_HEADER + " B\n" + " C\n" + " D\n" + " E\n" + " F\n" + " G\n" + " H\n" + " I\n" + " J\n" + " K\n" + "L " UTF8_HUNK_HEADER + " M\n" + " N\n" + " O\n" + " P\n" + " Q\n" + " R\n" + " S\n" + " T\n" + " U\n" + " V\n"; + + const char *b = "A " UTF8_HUNK_HEADER + " B\n" + " C\n" + " D\n" + " E modified\n" + " F\n" + " G\n" + " H\n" + " I\n" + " J\n" + " K\n" + "L " UTF8_HUNK_HEADER + " M\n" + " N\n" + " O\n" + " P modified\n" + " Q\n" + " R\n" + " S\n" + " T\n" + " U\n" + " V\n"; + + const char *expected = "diff --git a/file b/file\n" + "index d0647c4..7827ce5 100644\n" + "--- a/file\n" + "+++ b/file\n" + "@@ -2,7 +2,7 @@ A " UTF8_TRUNCATED_A_HUNK_HEADER + " B\n" + " C\n" + " D\n" + "- E\n" + "+ E modified\n" + " F\n" + " G\n" + " H\n" + "@@ -13,7 +13,7 @@ L " UTF8_TRUNCATED_L_HUNK_HEADER + " M\n" + " N\n" + " O\n" + "- P\n" + "+ P modified\n" + " Q\n" + " R\n" + " S\n"; + + git_diff_options opts; + git_patch *patch; + git_buf buf = GIT_BUF_INIT; + + cl_git_pass(git_diff_init_options(&opts, GIT_DIFF_OPTIONS_VERSION)); + + cl_git_pass(git_patch_from_buffers(&patch, a, strlen(a), NULL, b, strlen(b), NULL, &opts)); + cl_git_pass(git_patch_to_buf(&buf, patch)); + + cl_assert_equal_s(expected, buf.ptr); + + git_patch_free(patch); + git_buf_free(&buf); +} |
