summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSven Strickroth <email@cs-ware.de>2014-11-13 19:30:47 +0100
committerEdward Thomson <ethomson@microsoft.com>2015-01-20 16:17:37 -0600
commit0161e096a30912e0721cf3e6446595d3400d55b7 (patch)
treede53a009898a7f21d33626d859d3839bee30aadc
parent2136240dbd35cf2b4308f92008a24c0c36665811 (diff)
downloadlibgit2-0161e096a30912e0721cf3e6446595d3400d55b7.tar.gz
Make binary detection work similar to vanilla git
Main change: Don't treat chars > 128 as non-printable (common in UTF-8 files) Signed-off-by: Sven Strickroth <email@cs-ware.de>
-rw-r--r--CHANGELOG.md3
-rw-r--r--src/buf_text.c5
-rw-r--r--tests/core/buffer.c2
3 files changed, 8 insertions, 2 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96bd9a16e..e1c02f965 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,9 @@ v0.22 + 1
### Changes or improvements
+* Updated binary identification in CRLF filtering to avoid false positives in
+ UTF-8 files.
+
* Rename and copy detection is enabled for small files.
### API additions
diff --git a/src/buf_text.c b/src/buf_text.c
index cead599f4..cb3661edb 100644
--- a/src/buf_text.c
+++ b/src/buf_text.c
@@ -191,7 +191,10 @@ bool git_buf_text_is_binary(const git_buf *buf)
while (scan < end) {
unsigned char c = *scan++;
- if (c > 0x1F && c < 0x7F)
+ /* Printable characters are those above SPACE (0x1F) excluding DEL,
+ * and including BS, ESC and FF.
+ */
+ if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
printable++;
else if (c == '\0')
return true;
diff --git a/tests/core/buffer.c b/tests/core/buffer.c
index 87dec4607..d28aa218f 100644
--- a/tests/core/buffer.c
+++ b/tests/core/buffer.c
@@ -830,7 +830,7 @@ void test_core_buffer__classify_with_utf8(void)
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data1; b.size = b.asize = data1len;
- cl_assert(git_buf_text_is_binary(&b));
+ cl_assert(!git_buf_text_is_binary(&b));
cl_assert(!git_buf_text_contains_nul(&b));
b.ptr = data2; b.size = b.asize = data2len;