diff options
-rw-r--r-- | src/buf_text.c | 5 | ||||
-rw-r--r-- | src/buf_text.h | 8 | ||||
-rw-r--r-- | src/diff_output.c | 7 | ||||
-rw-r--r-- | tests-clar/core/buffer.c | 23 |
4 files changed, 42 insertions, 1 deletions
diff --git a/src/buf_text.c b/src/buf_text.c index a7122dc0c..0104a9057 100644 --- a/src/buf_text.c +++ b/src/buf_text.c @@ -109,6 +109,11 @@ bool git_buf_text_is_binary(const git_buf *buf) return ((printable >> 7) < nonprintable); } +bool git_buf_text_contains_nul(const git_buf *buf) +{ + return (strnlen(buf->ptr, buf->size) != buf->size); +} + int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset) { const char *ptr; diff --git a/src/buf_text.h b/src/buf_text.h index ae5e6ca30..458ee33c9 100644 --- a/src/buf_text.h +++ b/src/buf_text.h @@ -71,6 +71,14 @@ extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs); extern bool git_buf_text_is_binary(const git_buf *buf); /** + * Check quickly if buffer contains a NUL byte + * + * @param buf Buffer to check + * @return true if buffer contains a NUL byte + */ +extern bool git_buf_text_contains_nul(const git_buf *buf); + +/** * Check if a buffer begins with a UTF BOM * * @param bom Set to the type of BOM detected or GIT_BOM_NONE diff --git a/src/diff_output.c b/src/diff_output.c index f98665dfb..dcff78871 100644 --- a/src/diff_output.c +++ b/src/diff_output.c @@ -142,7 +142,12 @@ static int diff_delta_is_binary_by_content( GIT_UNUSED(ctxt); if ((file->flags & KNOWN_BINARY_FLAGS) == 0) { - if (git_buf_text_is_binary(&search)) + /* TODO: provide encoding / binary detection callbacks that can + * be UTF-8 aware, etc. For now, instead of trying to be smart, + * let's just use the simple NUL-byte detection that core git uses. + */ + /* previously was: if (git_buf_text_is_binary(&search)) */ + if (git_buf_text_contains_nul(&search)) file->flags |= GIT_DIFF_FILE_BINARY; else file->flags |= GIT_DIFF_FILE_NOT_BINARY; diff --git a/tests-clar/core/buffer.c b/tests-clar/core/buffer.c index 40fc4c571..5d9b7850c 100644 --- a/tests-clar/core/buffer.c +++ b/tests-clar/core/buffer.c @@ -704,3 +704,26 @@ void test_core_buffer__base64(void) git_buf_free(&buf); } + +void test_core_buffer__classify_with_utf8(void) +{ + char *data0 = "Simple text\n"; + size_t data0len = 12; + char *data1 = "Is that UTF-8 data I seeā¦\nYep!\n"; + size_t data1len = 31; + char *data2 = "Internal NUL!!!\000\n\nI see you!\n"; + size_t data2len = 29; + git_buf b; + + b.ptr = data0; b.size = b.asize = data0len; + cl_assert(!git_buf_text_is_binary(&b)); + cl_assert(!git_buf_text_contains_nul(&b)); + + b.ptr = data1; b.size = b.asize = data1len; + cl_assert(git_buf_text_is_binary(&b)); + cl_assert(!git_buf_text_contains_nul(&b)); + + b.ptr = data2; b.size = b.asize = data2len; + cl_assert(git_buf_text_is_binary(&b)); + cl_assert(git_buf_text_contains_nul(&b)); +} |