summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/diff_xdiff.c12
-rw-r--r--src/util.c16
-rw-r--r--src/util.h10
-rw-r--r--tests/diff/patch.c92
4 files changed, 130 insertions, 0 deletions
diff --git a/src/diff_xdiff.c b/src/diff_xdiff.c
index 701eb1b5f..6907e52e1 100644
--- a/src/diff_xdiff.c
+++ b/src/diff_xdiff.c
@@ -6,6 +6,7 @@
*/
#include "diff_xdiff.h"
+#include "util.h"
#include "git2/errors.h"
#include "diff.h"
@@ -115,6 +116,7 @@ static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len)
const git_diff_delta *delta = patch->base.delta;
git_patch_generated_output *output = &info->xo->output;
git_diff_line line;
+ size_t buffer_len;
if (len == 1) {
output->error = git_xdiff_parse_hunk(&info->hunk, bufs[0].ptr);
@@ -124,6 +126,16 @@ static int git_xdiff_cb(void *priv, mmbuffer_t *bufs, int len)
info->hunk.header_len = bufs[0].size;
if (info->hunk.header_len >= sizeof(info->hunk.header))
info->hunk.header_len = sizeof(info->hunk.header) - 1;
+
+ /* Sanitize the hunk header in case there is invalid Unicode */
+ buffer_len = git__utf8_valid_buf_length((const uint8_t *) bufs[0].ptr, info->hunk.header_len);
+ /* Sanitizing the hunk header may delete the newline, so add it back again if there is room */
+ if (buffer_len < info->hunk.header_len) {
+ bufs[0].ptr[buffer_len] = '\n';
+ buffer_len += 1;
+ info->hunk.header_len = buffer_len;
+ }
+
memcpy(info->hunk.header, bufs[0].ptr, info->hunk.header_len);
info->hunk.header[info->hunk.header_len] = '\0';
diff --git a/src/util.c b/src/util.c
index 2955b7ca0..bf778a949 100644
--- a/src/util.c
+++ b/src/util.c
@@ -806,6 +806,22 @@ double git_time_monotonic(void)
return git__timer();
}
+size_t git__utf8_valid_buf_length(const uint8_t *str, size_t str_len)
+{
+ size_t offset = 0;
+
+ while (offset < str_len) {
+ int length = git__utf8_charlen(str + offset, str_len - offset);
+
+ if (length < 0)
+ break;
+
+ offset += length;
+ }
+
+ return offset;
+}
+
#ifdef GIT_WIN32
int git__getenv(git_buf *out, const char *name)
{
diff --git a/src/util.h b/src/util.h
index f6d19cfde..67ae4ef70 100644
--- a/src/util.h
+++ b/src/util.h
@@ -454,6 +454,16 @@ extern size_t git__unescape(char *str);
extern int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst);
/*
+ * Iterate through an UTF-8 string and stops after finding any invalid UTF-8
+ * codepoints.
+ *
+ * @param str string to scan
+ * @param str_len size of the string
+ * @return length in bytes of the string that contains valid data
+ */
+extern size_t git__utf8_valid_buf_length(const uint8_t *str, size_t str_len);
+
+/*
* Safely zero-out memory, making sure that the compiler
* doesn't optimize away the operation.
*/
diff --git a/tests/diff/patch.c b/tests/diff/patch.c
index 1184d1968..4c836289d 100644
--- a/tests/diff/patch.c
+++ b/tests/diff/patch.c
@@ -25,6 +25,12 @@ void test_diff_patch__cleanup(void)
#define EXPECTED_HUNK "@@ -1,2 +0,0 @@\n"
+#define UTF8_HUNK_HEADER "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\n"
+
+#define UTF8_TRUNCATED_A_HUNK_HEADER "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\n"
+
+#define UTF8_TRUNCATED_L_HUNK_HEADER "\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE6\x97\xA5\n"
+
static int check_removal_cb(
const git_diff_delta *delta,
const git_diff_hunk *hunk,
@@ -610,3 +616,89 @@ void test_diff_patch__line_counts_with_eofnl(void)
git_buf_free(&content);
}
+
+void test_diff_patch__can_strip_bad_utf8(void)
+{
+ const char *a = "A " UTF8_HUNK_HEADER
+ " B\n"
+ " C\n"
+ " D\n"
+ " E\n"
+ " F\n"
+ " G\n"
+ " H\n"
+ " I\n"
+ " J\n"
+ " K\n"
+ "L " UTF8_HUNK_HEADER
+ " M\n"
+ " N\n"
+ " O\n"
+ " P\n"
+ " Q\n"
+ " R\n"
+ " S\n"
+ " T\n"
+ " U\n"
+ " V\n";
+
+ const char *b = "A " UTF8_HUNK_HEADER
+ " B\n"
+ " C\n"
+ " D\n"
+ " E modified\n"
+ " F\n"
+ " G\n"
+ " H\n"
+ " I\n"
+ " J\n"
+ " K\n"
+ "L " UTF8_HUNK_HEADER
+ " M\n"
+ " N\n"
+ " O\n"
+ " P modified\n"
+ " Q\n"
+ " R\n"
+ " S\n"
+ " T\n"
+ " U\n"
+ " V\n";
+
+ const char *expected = "diff --git a/file b/file\n"
+ "index d0647c4..7827ce5 100644\n"
+ "--- a/file\n"
+ "+++ b/file\n"
+ "@@ -2,7 +2,7 @@ A " UTF8_TRUNCATED_A_HUNK_HEADER
+ " B\n"
+ " C\n"
+ " D\n"
+ "- E\n"
+ "+ E modified\n"
+ " F\n"
+ " G\n"
+ " H\n"
+ "@@ -13,7 +13,7 @@ L " UTF8_TRUNCATED_L_HUNK_HEADER
+ " M\n"
+ " N\n"
+ " O\n"
+ "- P\n"
+ "+ P modified\n"
+ " Q\n"
+ " R\n"
+ " S\n";
+
+ git_diff_options opts;
+ git_patch *patch;
+ git_buf buf = GIT_BUF_INIT;
+
+ cl_git_pass(git_diff_init_options(&opts, GIT_DIFF_OPTIONS_VERSION));
+
+ cl_git_pass(git_patch_from_buffers(&patch, a, strlen(a), NULL, b, strlen(b), NULL, &opts));
+ cl_git_pass(git_patch_to_buf(&buf, patch));
+
+ cl_assert_equal_s(expected, buf.ptr);
+
+ git_patch_free(patch);
+ git_buf_free(&buf);
+}