summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/buf_text.c5
-rw-r--r--src/buf_text.h8
-rw-r--r--src/diff_output.c7
-rw-r--r--tests-clar/core/buffer.c23
4 files changed, 42 insertions, 1 deletions
diff --git a/src/buf_text.c b/src/buf_text.c
index a7122dc0c..0104a9057 100644
--- a/src/buf_text.c
+++ b/src/buf_text.c
@@ -109,6 +109,11 @@ bool git_buf_text_is_binary(const git_buf *buf)
return ((printable >> 7) < nonprintable);
}
+bool git_buf_text_contains_nul(const git_buf *buf)
+{
+ return (strnlen(buf->ptr, buf->size) != buf->size);
+}
+
int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
{
const char *ptr;
diff --git a/src/buf_text.h b/src/buf_text.h
index ae5e6ca30..458ee33c9 100644
--- a/src/buf_text.h
+++ b/src/buf_text.h
@@ -71,6 +71,14 @@ extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
extern bool git_buf_text_is_binary(const git_buf *buf);
/**
+ * Check quickly if buffer contains a NUL byte
+ *
+ * @param buf Buffer to check
+ * @return true if buffer contains a NUL byte
+ */
+extern bool git_buf_text_contains_nul(const git_buf *buf);
+
+/**
* Check if a buffer begins with a UTF BOM
*
* @param bom Set to the type of BOM detected or GIT_BOM_NONE
diff --git a/src/diff_output.c b/src/diff_output.c
index f98665dfb..dcff78871 100644
--- a/src/diff_output.c
+++ b/src/diff_output.c
@@ -142,7 +142,12 @@ static int diff_delta_is_binary_by_content(
GIT_UNUSED(ctxt);
if ((file->flags & KNOWN_BINARY_FLAGS) == 0) {
- if (git_buf_text_is_binary(&search))
+ /* TODO: provide encoding / binary detection callbacks that can
+ * be UTF-8 aware, etc. For now, instead of trying to be smart,
+ * let's just use the simple NUL-byte detection that core git uses.
+ */
+ /* previously was: if (git_buf_text_is_binary(&search)) */
+ if (git_buf_text_contains_nul(&search))
file->flags |= GIT_DIFF_FILE_BINARY;
else
file->flags |= GIT_DIFF_FILE_NOT_BINARY;
diff --git a/tests-clar/core/buffer.c b/tests-clar/core/buffer.c
index 40fc4c571..5d9b7850c 100644
--- a/tests-clar/core/buffer.c
+++ b/tests-clar/core/buffer.c
@@ -704,3 +704,26 @@ void test_core_buffer__base64(void)
git_buf_free(&buf);
}
+
+void test_core_buffer__classify_with_utf8(void)
+{
+ char *data0 = "Simple text\n";
+ size_t data0len = 12;
+ char *data1 = "Is that UTF-8 data I seeā€¦\nYep!\n";
+ size_t data1len = 31;
+ char *data2 = "Internal NUL!!!\000\n\nI see you!\n";
+ size_t data2len = 29;
+ git_buf b;
+
+ b.ptr = data0; b.size = b.asize = data0len;
+ cl_assert(!git_buf_text_is_binary(&b));
+ cl_assert(!git_buf_text_contains_nul(&b));
+
+ b.ptr = data1; b.size = b.asize = data1len;
+ cl_assert(git_buf_text_is_binary(&b));
+ cl_assert(!git_buf_text_contains_nul(&b));
+
+ b.ptr = data2; b.size = b.asize = data2len;
+ cl_assert(git_buf_text_is_binary(&b));
+ cl_assert(git_buf_text_contains_nul(&b));
+}