summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVicent Marti <vicent@github.com>2014-05-17 02:37:13 +0200
committerVicent Marti <vicent@github.com>2014-05-17 02:37:13 +0200
commit191ff93609efd393bcb2d4747d5cb9779a78a73f (patch)
tree90447539df3b3186d656be60c48dffa805f528d2
parentadebcb1645f7bd652ff4449ecf41c39ded12d0c7 (diff)
parent8af4966db15ed35832235627e2d01068d4734dea (diff)
downloadlibgit2-191ff93609efd393bcb2d4747d5cb9779a78a73f.tar.gz
Merge pull request #2362 from libgit2/rb/update-4k-to-8k
Test and fix Git diff binary detection compatibility
-rw-r--r--src/blob.c3
-rw-r--r--src/diff_driver.c6
-rw-r--r--src/filter.h4
-rw-r--r--tests/diff/workdir.c114
4 files changed, 125 insertions, 2 deletions
diff --git a/src/blob.c b/src/blob.c
index ab7dec67f..30d5b705b 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -334,7 +334,8 @@ int git_blob_is_binary(const git_blob *blob)
assert(blob);
content.ptr = blob->odb_object->buffer;
- content.size = min(blob->odb_object->cached.size, 4000);
+ content.size =
+ min(blob->odb_object->cached.size, GIT_FILTER_BYTES_TO_CHECK_NUL);
content.asize = 0;
return git_buf_text_is_binary(&content);
diff --git a/src/diff_driver.c b/src/diff_driver.c
index dc8e79e25..c3c5f365b 100644
--- a/src/diff_driver.c
+++ b/src/diff_driver.c
@@ -397,7 +397,11 @@ void git_diff_driver_update_options(
int git_diff_driver_content_is_binary(
git_diff_driver *driver, const char *content, size_t content_len)
{
- const git_buf search = { (char *)content, 0, min(content_len, 4000) };
+ git_buf search;
+
+ search.ptr = (char *)content;
+ search.size = min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL);
+ search.asize = 0;
GIT_UNUSED(driver);
diff --git a/src/filter.h b/src/filter.h
index d0ace0f9a..5a366108b 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -10,6 +10,10 @@
#include "common.h"
#include "git2/filter.h"
+/* Amount of file to examine for NUL byte when checking binary-ness */
+#define GIT_FILTER_BYTES_TO_CHECK_NUL 8000
+
+/* Possible CRLF values */
typedef enum {
GIT_CRLF_GUESS = -1,
GIT_CRLF_BINARY = 0,
diff --git a/tests/diff/workdir.c b/tests/diff/workdir.c
index a6d48abc6..f82bb00e8 100644
--- a/tests/diff/workdir.c
+++ b/tests/diff/workdir.c
@@ -1580,3 +1580,117 @@ void test_diff_workdir__can_update_index(void)
git_diff_free(diff);
}
+
+#define STR7 "0123456"
+#define STR8 "01234567"
+#define STR40 STR8 STR8 STR8 STR8 STR8
+#define STR200 STR40 STR40 STR40 STR40 STR40
+#define STR999Z STR200 STR200 STR200 STR200 STR40 STR40 STR40 STR40 \
+ STR8 STR8 STR8 STR8 STR7 "\0"
+#define STR1000 STR200 STR200 STR200 STR200 STR200
+#define STR3999Z STR1000 STR1000 STR1000 STR999Z
+#define STR4000 STR1000 STR1000 STR1000 STR1000
+
+static void assert_delta_binary(git_diff *diff, size_t idx, int is_binary)
+{
+ git_patch *patch;
+ const git_diff_delta *delta;
+
+ cl_git_pass(git_patch_from_diff(&patch, diff, idx));
+ delta = git_patch_get_delta(patch);
+ cl_assert_equal_b((delta->flags & GIT_DIFF_FLAG_BINARY), is_binary);
+ git_patch_free(patch);
+}
+
+void test_diff_workdir__binary_detection(void)
+{
+ git_index *idx;
+ git_diff *diff = NULL;
+ git_buf b = GIT_BUF_INIT;
+ int i;
+ git_buf data[10] = {
+ { "1234567890", 0, 0 }, /* 0 - all ascii text control */
+ { "Åü†HøπΩ", 0, 0 }, /* 1 - UTF-8 multibyte text */
+ { "\xEF\xBB\xBFÜ⤒ƒ8£€", 0, 0 }, /* 2 - UTF-8 with BOM */
+ { STR999Z, 0, 1000 }, /* 3 - ASCII with NUL at 1000 */
+ { STR3999Z, 0, 4000 }, /* 4 - ASCII with NUL at 4000 */
+ { STR4000 STR3999Z "x", 0, 8001 }, /* 5 - ASCII with NUL at 8000 */
+ { STR4000 STR4000 "\0", 0, 8001 }, /* 6 - ASCII with NUL at 8001 */
+ { "\x00\xDC\x00\x6E\x21\x39\xFE\x0E\x00\x63\x00\xF8"
+ "\x00\x64\x00\x65\x20\x48", 0, 18 }, /* 7 - UTF-16 text */
+ { "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d"
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d",
+ 0, 26 }, /* 8 - All non-printable characters (no NUL) */
+ { "Hello \x01\x02\x03\x04\x05\x06 World!\x01\x02\x03\x04"
+ "\x05\x06\x07", 0, 26 }, /* 9 - 50-50 non-printable (no NUL) */
+ };
+
+ g_repo = cl_git_sandbox_init("empty_standard_repo");
+ cl_git_pass(git_repository_index(&idx, g_repo));
+
+ /* We start with ASCII in index and test data in workdir,
+ * then we will try with test data in index and ASCII in workdir.
+ */
+
+ cl_git_pass(git_buf_sets(&b, "empty_standard_repo/0"));
+ for (i = 0; i < 10; ++i) {
+ b.ptr[b.size - 1] = '0' + i;
+ cl_git_mkfile(b.ptr, "baseline");
+ cl_git_pass(git_index_add_bypath(idx, &b.ptr[b.size - 1]));
+
+ if (data[i].size == 0)
+ data[i].size = strlen(data[i].ptr);
+ cl_git_write2file(
+ b.ptr, data[i].ptr, data[i].size, O_WRONLY|O_TRUNC, 0664);
+ }
+ git_index_write(idx);
+
+ cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, NULL, NULL));
+
+ cl_assert_equal_i(10, git_diff_num_deltas(diff));
+
+ /* using diff binary detection (i.e. looking for NUL byte) */
+ assert_delta_binary(diff, 0, false);
+ assert_delta_binary(diff, 1, false);
+ assert_delta_binary(diff, 2, false);
+ assert_delta_binary(diff, 3, true);
+ assert_delta_binary(diff, 4, true);
+ assert_delta_binary(diff, 5, true);
+ assert_delta_binary(diff, 6, false);
+ assert_delta_binary(diff, 7, true);
+ assert_delta_binary(diff, 8, false);
+ assert_delta_binary(diff, 9, false);
+ /* The above have been checked to match command-line Git */
+
+ git_diff_free(diff);
+
+ cl_git_pass(git_buf_sets(&b, "empty_standard_repo/0"));
+ for (i = 0; i < 10; ++i) {
+ b.ptr[b.size - 1] = '0' + i;
+ cl_git_pass(git_index_add_bypath(idx, &b.ptr[b.size - 1]));
+
+ cl_git_write2file(b.ptr, "baseline\n", 9, O_WRONLY|O_TRUNC, 0664);
+ }
+ git_index_write(idx);
+
+ cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, NULL, NULL));
+
+ cl_assert_equal_i(10, git_diff_num_deltas(diff));
+
+ /* using diff binary detection (i.e. looking for NUL byte) */
+ assert_delta_binary(diff, 0, false);
+ assert_delta_binary(diff, 1, false);
+ assert_delta_binary(diff, 2, false);
+ assert_delta_binary(diff, 3, true);
+ assert_delta_binary(diff, 4, true);
+ assert_delta_binary(diff, 5, true);
+ assert_delta_binary(diff, 6, false);
+ assert_delta_binary(diff, 7, true);
+ assert_delta_binary(diff, 8, false);
+ assert_delta_binary(diff, 9, false);
+
+ git_diff_free(diff);
+
+ git_index_free(idx);
+ git_buf_free(&b);
+}