summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/buf_text.c208
-rw-r--r--src/buf_text.h100
-rw-r--r--src/buffer.c114
-rw-r--r--src/buffer.h56
-rw-r--r--src/config.c3
-rw-r--r--src/config_file.c16
-rw-r--r--src/crlf.c13
-rw-r--r--src/diff_output.c2
-rw-r--r--src/filter.c69
-rw-r--r--src/filter.h29
-rw-r--r--src/path.c2
-rw-r--r--src/pathspec.c5
-rw-r--r--src/submodule.c3
-rw-r--r--tests-clar/core/buffer.c25
-rw-r--r--tests-clar/object/blob/filter.c32
15 files changed, 388 insertions, 289 deletions
diff --git a/src/buf_text.c b/src/buf_text.c
new file mode 100644
index 000000000..3c5024e6c
--- /dev/null
+++ b/src/buf_text.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2009-2012 the libgit2 contributors
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#include "buf_text.h"
+
+int git_buf_text_puts_escaped(
+ git_buf *buf,
+ const char *string,
+ const char *esc_chars,
+ const char *esc_with)
+{
+ const char *scan;
+ size_t total = 0, esc_len = strlen(esc_with), count;
+
+ if (!string)
+ return 0;
+
+ for (scan = string; *scan; ) {
+ /* count run of non-escaped characters */
+ count = strcspn(scan, esc_chars);
+ total += count;
+ scan += count;
+ /* count run of escaped characters */
+ count = strspn(scan, esc_chars);
+ total += count * (esc_len + 1);
+ scan += count;
+ }
+
+ if (git_buf_grow(buf, buf->size + total + 1) < 0)
+ return -1;
+
+ for (scan = string; *scan; ) {
+ count = strcspn(scan, esc_chars);
+
+ memmove(buf->ptr + buf->size, scan, count);
+ scan += count;
+ buf->size += count;
+
+ for (count = strspn(scan, esc_chars); count > 0; --count) {
+ /* copy escape sequence */
+ memmove(buf->ptr + buf->size, esc_with, esc_len);
+ buf->size += esc_len;
+ /* copy character to be escaped */
+ buf->ptr[buf->size] = *scan;
+ buf->size++;
+ scan++;
+ }
+ }
+
+ buf->ptr[buf->size] = '\0';
+
+ return 0;
+}
+
+void git_buf_text_unescape(git_buf *buf)
+{
+ buf->size = git__unescape(buf->ptr);
+}
+
+int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings)
+{
+ size_t i;
+ const char *str, *pfx;
+
+ git_buf_clear(buf);
+
+ if (!strings || !strings->count)
+ return 0;
+
+ /* initialize common prefix to first string */
+ if (git_buf_sets(buf, strings->strings[0]) < 0)
+ return -1;
+
+ /* go through the rest of the strings, truncating to shared prefix */
+ for (i = 1; i < strings->count; ++i) {
+
+ for (str = strings->strings[i], pfx = buf->ptr;
+ *str && *str == *pfx; str++, pfx++)
+ /* scanning */;
+
+ git_buf_truncate(buf, pfx - buf->ptr);
+
+ if (!buf->size)
+ break;
+ }
+
+ return 0;
+}
+
+bool git_buf_text_is_binary(const git_buf *buf)
+{
+ const char *scan = buf->ptr, *end = buf->ptr + buf->size;
+ int printable = 0, nonprintable = 0;
+
+ while (scan < end) {
+ unsigned char c = *scan++;
+
+ if (c > 0x1F && c < 0x7F)
+ printable++;
+ else if (c == '\0')
+ return true;
+ else if (!git__isspace(c))
+ nonprintable++;
+ }
+
+ return ((printable >> 7) < nonprintable);
+}
+
+int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf, size_t offset)
+{
+ const char *ptr;
+ size_t len;
+
+ /* need at least 2 bytes after offset to look for any BOM */
+ if (buf->size < offset + 2)
+ return 0;
+
+ ptr = buf->ptr + offset;
+ len = buf->size - offset;
+
+ switch (*ptr++) {
+ case 0:
+ if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
+ *bom = GIT_BOM_UTF32_BE;
+ return 4;
+ }
+ break;
+ case '\xEF':
+ if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
+ *bom = GIT_BOM_UTF8;
+ return 3;
+ }
+ break;
+ case '\xFE':
+ if (*ptr == '\xFF') {
+ *bom = GIT_BOM_UTF16_BE;
+ return 2;
+ }
+ break;
+ case '\xFF':
+ if (*ptr != '\xFE')
+ break;
+ if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
+ *bom = GIT_BOM_UTF32_LE;
+ return 4;
+ } else {
+ *bom = GIT_BOM_UTF16_LE;
+ return 2;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+bool git_buf_text_gather_stats(
+ git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
+{
+ const char *scan = buf->ptr, *end = buf->ptr + buf->size;
+ int skip;
+
+ memset(stats, 0, sizeof(*stats));
+
+ /* BOM detection */
+ skip = git_buf_text_detect_bom(&stats->bom, buf, 0);
+ if (skip_bom)
+ scan += skip;
+
+ /* Ignore EOF character */
+ if (buf->size > 0 && end[-1] == '\032')
+ end--;
+
+ /* Counting loop */
+ while (scan < end) {
+ unsigned char c = *scan++;
+
+ if ((c > 0x1F && c < 0x7F) || c > 0x9f)
+ stats->printable++;
+ else switch (c) {
+ case '\0':
+ stats->nul++;
+ stats->nonprintable++;
+ break;
+ case '\n':
+ stats->lf++;
+ break;
+ case '\r':
+ stats->cr++;
+ if (scan < end && *scan == '\n')
+ stats->crlf++;
+ break;
+ case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
+ stats->printable++;
+ break;
+ default:
+ stats->nonprintable++;
+ break;
+ }
+ }
+
+ return (stats->nul > 0 ||
+ ((stats->printable >> 7) < stats->nonprintable));
+}
diff --git a/src/buf_text.h b/src/buf_text.h
new file mode 100644
index 000000000..c22499bfe
--- /dev/null
+++ b/src/buf_text.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2009-2012 the libgit2 contributors
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_buf_text_h__
+#define INCLUDE_buf_text_h__
+
+#include "buffer.h"
+
+typedef enum {
+ GIT_BOM_NONE = 0,
+ GIT_BOM_UTF8 = 1,
+ GIT_BOM_UTF16_LE = 2,
+ GIT_BOM_UTF16_BE = 3,
+ GIT_BOM_UTF32_LE = 4,
+ GIT_BOM_UTF32_BE = 5
+} git_bom_t;
+
+typedef struct {
+ git_bom_t bom; /* BOM found at head of text */
+ unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */
+ unsigned int printable, nonprintable; /* These are just approximations! */
+} git_buf_text_stats;
+
+/**
+ * Append string to buffer, prefixing each character from `esc_chars` with
+ * `esc_with` string.
+ *
+ * @param buf Buffer to append data to
+ * @param string String to escape and append
+ * @param esc_chars Characters to be escaped
+ * @param esc_with String to insert in from of each found character
+ * @return 0 on success, <0 on failure (probably allocation problem)
+ */
+extern int git_buf_text_puts_escaped(
+ git_buf *buf,
+ const char *string,
+ const char *esc_chars,
+ const char *esc_with);
+
+/**
+ * Append string escaping characters that are regex special
+ */
+GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string)
+{
+ return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
+}
+
+/**
+ * Unescape all characters in a buffer in place
+ *
+ * I.e. remove backslashes
+ */
+extern void git_buf_text_unescape(git_buf *buf);
+
+/**
+ * Fill buffer with the common prefix of a array of strings
+ *
+ * Buffer will be set to empty if there is no common prefix
+ */
+extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
+
+/**
+ * Check quickly if buffer looks like it contains binary data
+ *
+ * @param buf Buffer to check
+ * @return true if buffer looks like non-text data
+ */
+extern bool git_buf_text_is_binary(const git_buf *buf);
+
+/**
+ * Check if a buffer begins with a UTF BOM
+ *
+ * @param bom Set to the type of BOM detected or GIT_BOM_NONE
+ * @param buf Buffer in which to check the first bytes for a BOM
+ * @param offset Offset into buffer to look for BOM
+ * @return Number of bytes of BOM data (or 0 if no BOM found)
+ */
+extern int git_buf_text_detect_bom(
+ git_bom_t *bom, const git_buf *buf, size_t offset);
+
+/**
+ * Gather stats for a piece of text
+ *
+ * Fill the `stats` structure with counts of unreadable characters, carriage
+ * returns, etc, so it can be used in heuristics. This automatically skips
+ * a trailing EOF (\032 character). Also it will look for a BOM at the
+ * start of the text and can be told to skip that as well.
+ *
+ * @param stats Structure to be filled in
+ * @param buf Text to process
+ * @param skip_bom Exclude leading BOM from stats if true
+ * @return Does the buffer heuristically look like binary data
+ */
+extern bool git_buf_text_gather_stats(
+ git_buf_text_stats *stats, const git_buf *buf, bool skip_bom);
+
+#endif
diff --git a/src/buffer.c b/src/buffer.c
index e55b0a230..0e2c005df 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -31,15 +31,7 @@ void git_buf_init(git_buf *buf, size_t initial_size)
git_buf_grow(buf, initial_size);
}
-int git_buf_grow(git_buf *buf, size_t target_size)
-{
- int error = git_buf_try_grow(buf, target_size);
- if (error != 0)
- buf->ptr = git_buf__oom;
- return error;
-}
-
-int git_buf_try_grow(git_buf *buf, size_t target_size)
+int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom)
{
char *new_ptr;
size_t new_size;
@@ -67,8 +59,12 @@ int git_buf_try_grow(git_buf *buf, size_t target_size)
new_size = (new_size + 7) & ~7;
new_ptr = git__realloc(new_ptr, new_size);
- if (!new_ptr)
+
+ if (!new_ptr) {
+ if (mark_oom)
+ buf->ptr = git_buf__oom;
return -1;
+ }
buf->asize = new_size;
buf->ptr = new_ptr;
@@ -141,51 +137,6 @@ int git_buf_puts(git_buf *buf, const char *string)
return git_buf_put(buf, string, strlen(string));
}
-int git_buf_puts_escaped(
- git_buf *buf, const char *string, const char *esc_chars, const char *esc_with)
-{
- const char *scan;
- size_t total = 0, esc_len = strlen(esc_with), count;
-
- if (!string)
- return 0;
-
- for (scan = string; *scan; ) {
- /* count run of non-escaped characters */
- count = strcspn(scan, esc_chars);
- total += count;
- scan += count;
- /* count run of escaped characters */
- count = strspn(scan, esc_chars);
- total += count * (esc_len + 1);
- scan += count;
- }
-
- ENSURE_SIZE(buf, buf->size + total + 1);
-
- for (scan = string; *scan; ) {
- count = strcspn(scan, esc_chars);
-
- memmove(buf->ptr + buf->size, scan, count);
- scan += count;
- buf->size += count;
-
- for (count = strspn(scan, esc_chars); count > 0; --count) {
- /* copy escape sequence */
- memmove(buf->ptr + buf->size, esc_with, esc_len);
- buf->size += esc_len;
- /* copy character to be escaped */
- buf->ptr[buf->size] = *scan;
- buf->size++;
- scan++;
- }
- }
-
- buf->ptr[buf->size] = '\0';
-
- return 0;
-}
-
static const char b64str[64] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@@ -497,59 +448,6 @@ int git_buf_cmp(const git_buf *a, const git_buf *b)
(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}
-int git_buf_common_prefix(git_buf *buf, const git_strarray *strings)
-{
- size_t i;
- const char *str, *pfx;
-
- git_buf_clear(buf);
-
- if (!strings || !strings->count)
- return 0;
-
- /* initialize common prefix to first string */
- if (git_buf_sets(buf, strings->strings[0]) < 0)
- return -1;
-
- /* go through the rest of the strings, truncating to shared prefix */
- for (i = 1; i < strings->count; ++i) {
-
- for (str = strings->strings[i], pfx = buf->ptr;
- *str && *str == *pfx; str++, pfx++)
- /* scanning */;
-
- git_buf_truncate(buf, pfx - buf->ptr);
-
- if (!buf->size)
- break;
- }
-
- return 0;
-}
-
-bool git_buf_is_binary(const git_buf *buf)
-{
- size_t i;
- int printable = 0, nonprintable = 0;
-
- for (i = 0; i < buf->size; i++) {
- unsigned char c = buf->ptr[i];
- if (c > 0x1F && c < 0x7F)
- printable++;
- else if (c == '\0')
- return true;
- else if (!git__isspace(c))
- nonprintable++;
- }
-
- return ((printable >> 7) < nonprintable);
-}
-
-void git_buf_unescape(git_buf *buf)
-{
- buf->size = git__unescape(buf->ptr);
-}
-
int git_buf_splice(
git_buf *buf,
size_t where,
diff --git a/src/buffer.h b/src/buffer.h
index a2896d486..379216bfc 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -27,30 +27,35 @@ extern char git_buf__oom[];
* For the cases where GIT_BUF_INIT cannot be used to do static
* initialization.
*/
-void git_buf_init(git_buf *buf, size_t initial_size);
+extern void git_buf_init(git_buf *buf, size_t initial_size);
/**
- * Grow the buffer to hold at least `target_size` bytes.
+ * Attempt to grow the buffer to hold at least `target_size` bytes.
*
- * If the allocation fails, this will return an error and the buffer
- * will be marked as invalid for future operations. The existing
- * contents of the buffer will be preserved however.
- * @return 0 on success or -1 on failure
+ * If the allocation fails, this will return an error. If mark_oom is true,
+ * this will mark the buffer as invalid for future operations; if false,
+ * existing buffer content will be preserved, but calling code must handle
+ * that buffer was not expanded.
*/
-int git_buf_grow(git_buf *buf, size_t target_size);
+extern int git_buf_try_grow(git_buf *buf, size_t target_size, bool mark_oom);
/**
- * Attempt to grow the buffer to hold at least `target_size` bytes.
+ * Grow the buffer to hold at least `target_size` bytes.
*
- * This is just like `git_buf_grow` except that even if the allocation
- * fails, the git_buf will still be left in a valid state.
+ * If the allocation fails, this will return an error and the buffer will be
+ * marked as invalid for future operations, invaliding contents.
+ *
+ * @return 0 on success or -1 on failure
*/
-int git_buf_try_grow(git_buf *buf, size_t target_size);
+GIT_INLINE(int) git_buf_grow(git_buf *buf, size_t target_size)
+{
+ return git_buf_try_grow(buf, target_size, true);
+}
-void git_buf_free(git_buf *buf);
-void git_buf_swap(git_buf *buf_a, git_buf *buf_b);
-char *git_buf_detach(git_buf *buf);
-void git_buf_attach(git_buf *buf, char *ptr, size_t asize);
+extern void git_buf_free(git_buf *buf);
+extern void git_buf_swap(git_buf *buf_a, git_buf *buf_b);
+extern char *git_buf_detach(git_buf *buf);
+extern void git_buf_attach(git_buf *buf, char *ptr, size_t asize);
/**
* Test if there have been any reallocation failures with this git_buf.
@@ -92,18 +97,6 @@ int git_buf_join_n(git_buf *buf, char separator, int nbuf, ...);
int git_buf_join(git_buf *buf, char separator, const char *str_a, const char *str_b);
/**
- * Copy string into buf prefixing every character that is contained in the
- * esc_chars string with the esc_with string.
- */
-int git_buf_puts_escaped(
- git_buf *buf, const char *string, const char *esc_chars, const char *esc_with);
-
-GIT_INLINE(int) git_buf_puts_escape_regex(git_buf *buf, const char *string)
-{
- return git_buf_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
-}
-
-/**
* Join two strings as paths, inserting a slash between as needed.
* @return 0 on success, -1 on failure
*/
@@ -146,15 +139,6 @@ void git_buf_rtrim(git_buf *buf);
int git_buf_cmp(const git_buf *a, const git_buf *b);
-/* Fill buf with the common prefix of a array of strings */
-int git_buf_common_prefix(git_buf *buf, const git_strarray *strings);
-
-/* Check if buffer looks like it contains binary data */
-bool git_buf_is_binary(const git_buf *buf);
-
-/* Unescape all characters in a buffer */
-void git_buf_unescape(git_buf *buf);
-
/* Write data as base64 encoded in buffer */
int git_buf_put_base64(git_buf *buf, const char *data, size_t len);
diff --git a/src/config.c b/src/config.c
index 5ee0d39ff..6347f7df7 100644
--- a/src/config.c
+++ b/src/config.c
@@ -10,6 +10,7 @@
#include "config.h"
#include "git2/config.h"
#include "vector.h"
+#include "buf_text.h"
#if GIT_WIN32
# include <windows.h>
#endif
@@ -803,7 +804,7 @@ int git_config_rename_section(
int error = -1;
struct rename_data data;
- git_buf_puts_escape_regex(&pattern, old_section_name);
+ git_buf_text_puts_escape_regex(&pattern, old_section_name);
git_buf_puts(&pattern, "\\..+");
if (git_buf_oom(&pattern))
goto cleanup;
diff --git a/src/config_file.c b/src/config_file.c
index 7cc812aa4..354a91986 100644
--- a/src/config_file.c
+++ b/src/config_file.c
@@ -10,6 +10,7 @@
#include "fileops.h"
#include "filebuf.h"
#include "buffer.h"
+#include "buf_text.h"
#include "git2/config.h"
#include "git2/types.h"
#include "strmap.h"
@@ -854,17 +855,14 @@ fail_parse:
static int skip_bom(diskfile_backend *cfg)
{
- static const char utf8_bom[] = { '\xef', '\xbb', '\xbf' };
+ git_bom_t bom;
+ int bom_offset = git_buf_text_detect_bom(&bom,
+ &cfg->reader.buffer, cfg->reader.read_ptr - cfg->reader.buffer.ptr);
- if (cfg->reader.buffer.size < sizeof(utf8_bom))
- return 0;
-
- if (memcmp(cfg->reader.read_ptr, utf8_bom, sizeof(utf8_bom)) == 0)
- cfg->reader.read_ptr += sizeof(utf8_bom);
+ if (bom == GIT_BOM_UTF8)
+ cfg->reader.read_ptr += bom_offset;
- /* TODO: the reference implementation does pretty stupid
- stuff with the BoM
- */
+ /* TODO: reference implementation is pretty stupid with BoM */
return 0;
}
diff --git a/src/crlf.c b/src/crlf.c
index 5e86b4eb6..80204ebf0 100644
--- a/src/crlf.c
+++ b/src/crlf.c
@@ -148,8 +148,11 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
if (filter->attrs.crlf_action == GIT_CRLF_AUTO ||
filter->attrs.crlf_action == GIT_CRLF_GUESS) {
- git_text_stats stats;
- git_text_gather_stats(&stats, source);
+ git_buf_text_stats stats;
+
+ /* Check heuristics for binary vs text... */
+ if (git_buf_text_gather_stats(&stats, source, false))
+ return -1;
/*
* We're currently not going to even try to convert stuff
@@ -159,12 +162,6 @@ static int crlf_apply_to_odb(git_filter *self, git_buf *dest, const git_buf *sou
if (stats.cr != stats.crlf)
return -1;
- /*
- * And add some heuristics for binary vs text, of course...
- */
- if (git_text_is_binary(&stats))
- return -1;
-
#if 0
if (crlf_action == CRLF_GUESS) {
/*
diff --git a/src/diff_output.c b/src/diff_output.c
index 3d5e03a29..e137fd0f2 100644
--- a/src/diff_output.c
+++ b/src/diff_output.c
@@ -142,7 +142,7 @@ static int diff_delta_is_binary_by_content(
search.ptr = map->data;
search.size = min(map->len, 4000);
- if (git_buf_is_binary(&search))
+ if (git_buf_text_is_binary(&search))
file->flags |= GIT_DIFF_FILE_BINARY;
else
file->flags |= GIT_DIFF_FILE_NOT_BINARY;
diff --git a/src/filter.c b/src/filter.c
index f2ab1b85a..6d27c0c46 100644
--- a/src/filter.c
+++ b/src/filter.c
@@ -13,75 +13,6 @@
#include "git2/config.h"
#include "blob.h"
-/* Tweaked from Core Git. I wonder what we could use this for... */
-void git_text_gather_stats(git_text_stats *stats, const git_buf *text)
-{
- size_t i;
-
- memset(stats, 0, sizeof(*stats));
-
- for (i = 0; i < git_buf_len(text); i++) {
- unsigned char c = text->ptr[i];
-
- if (c == '\r') {
- stats->cr++;
-
- if (i + 1 < git_buf_len(text) && text->ptr[i + 1] == '\n')
- stats->crlf++;
- }
-
- else if (c == '\n')
- stats->lf++;
-
- else if (c == 127)
- /* DEL */
- stats->nonprintable++;
-
- else if (c <= 0x1F || (c >= 0x80 && c <= 0x9F)) {
- switch (c) {
- /* BS, HT, ESC and FF */
- case '\b': case '\t': case '\033': case '\014':
- stats->printable++;
- break;
- case 0:
- stats->nul++;
- /* fall through */
- default:
- stats->nonprintable++;
- }
- }
-
- else
- stats->printable++;
- }
-
- /* If file ends with EOF then don't count this EOF as non-printable. */
- if (git_buf_len(text) >= 1 && text->ptr[text->size - 1] == '\032')
- stats->nonprintable--;
-}
-
-/*
- * Fresh from Core Git
- */
-int git_text_is_binary(git_text_stats *stats)
-{
- if (stats->nul)
- return 1;
-
- if ((stats->printable >> 7) < stats->nonprintable)
- return 1;
- /*
- * Other heuristics? Average line length might be relevant,
- * as might LF vs CR vs CRLF counts..
- *
- * NOTE! It might be normal to have a low ratio of CRLF to LF
- * (somebody starts with a LF-only file and edits it with an editor
- * that adds CRLF only to lines that are added..). But do we
- * want to support CR-only? Probably not.
- */
- return 0;
-}
-
int git_filters_load(git_vector *filters, git_repository *repo, const char *path, int mode)
{
int error;
diff --git a/src/filter.h b/src/filter.h
index b9beb4942..2b5051c69 100644
--- a/src/filter.h
+++ b/src/filter.h
@@ -9,6 +9,7 @@
#include "common.h"
#include "buffer.h"
+#include "buf_text.h"
#include "git2/odb.h"
#include "git2/repository.h"
@@ -31,14 +32,6 @@ typedef enum {
GIT_CRLF_AUTO,
} git_crlf_t;
-typedef struct {
- /* NUL, CR, LF and CRLF counts */
- unsigned int nul, cr, lf, crlf;
-
- /* These are just approximations! */
- unsigned int printable, nonprintable;
-} git_text_stats;
-
/*
* FILTER API
*/
@@ -99,24 +92,4 @@ extern int git_filter_add__crlf_to_odb(git_vector *filters, git_repository *repo
/* Add CRLF, from ODB to worktree */
extern int git_filter_add__crlf_to_workdir(git_vector *filters, git_repository *repo, const char *path);
-
-/*
- * PLAINTEXT API
- */
-
-/*
- * Gather stats for a piece of text
- *
- * Fill the `stats` structure with information on the number of
- * unreadable characters, carriage returns, etc, so it can be
- * used in heuristics.
- */
-extern void git_text_gather_stats(git_text_stats *stats, const git_buf *text);
-
-/*
- * Process `git_text_stats` data generated by `git_text_stat` to see
- * if it qualifies as a binary file
- */
-extern int git_text_is_binary(git_text_stats *stats);
-
#endif
diff --git a/src/path.c b/src/path.c
index 98351bec3..87eded3c4 100644
--- a/src/path.c
+++ b/src/path.c
@@ -511,7 +511,7 @@ static bool _check_dir_contents(
size_t sub_size = strlen(sub);
/* leave base valid even if we could not make space for subdir */
- if (git_buf_try_grow(dir, dir_size + sub_size + 2) < 0)
+ if (git_buf_try_grow(dir, dir_size + sub_size + 2, false) < 0)
return false;
/* save excursion */
diff --git a/src/pathspec.c b/src/pathspec.c
index fc6547afe..993b44667 100644
--- a/src/pathspec.c
+++ b/src/pathspec.c
@@ -6,6 +6,7 @@
*/
#include "pathspec.h"
+#include "buf_text.h"
#include "attr_file.h"
/* what is the common non-wildcard prefix for all items in the pathspec */
@@ -15,7 +16,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec)
const char *scan;
if (!pathspec || !pathspec->count ||
- git_buf_common_prefix(&prefix, pathspec) < 0)
+ git_buf_text_common_prefix(&prefix, pathspec) < 0)
return NULL;
/* diff prefix will only be leading non-wildcards */
@@ -31,7 +32,7 @@ char *git_pathspec_prefix(const git_strarray *pathspec)
return NULL;
}
- git_buf_unescape(&prefix);
+ git_buf_text_unescape(&prefix);
return git_buf_detach(&prefix);
}
diff --git a/src/submodule.c b/src/submodule.c
index 15158f0d8..c117255d4 100644
--- a/src/submodule.c
+++ b/src/submodule.c
@@ -12,6 +12,7 @@
#include "git2/index.h"
#include "git2/submodule.h"
#include "buffer.h"
+#include "buf_text.h"
#include "vector.h"
#include "posix.h"
#include "config_file.h"
@@ -782,7 +783,7 @@ int git_submodule_reload(git_submodule *submodule)
git_buf path = GIT_BUF_INIT;
git_buf_sets(&path, "submodule\\.");
- git_buf_puts_escape_regex(&path, submodule->name);
+ git_buf_text_puts_escape_regex(&path, submodule->name);
git_buf_puts(&path, ".*");
if (git_buf_oom(&path))
diff --git a/tests-clar/core/buffer.c b/tests-clar/core/buffer.c
index 236bf39da..40fc4c571 100644
--- a/tests-clar/core/buffer.c
+++ b/tests-clar/core/buffer.c
@@ -1,5 +1,6 @@
#include "clar_libgit2.h"
#include "buffer.h"
+#include "buf_text.h"
#define TESTSTR "Have you seen that? Have you seeeen that??"
const char *test_string = TESTSTR;
@@ -576,37 +577,37 @@ void test_core_buffer__11(void)
t.strings = t1;
t.count = 3;
- cl_git_pass(git_buf_common_prefix(&a, &t));
+ cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "");
t.strings = t2;
t.count = 3;
- cl_git_pass(git_buf_common_prefix(&a, &t));
+ cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "some");
t.strings = t3;
t.count = 3;
- cl_git_pass(git_buf_common_prefix(&a, &t));
+ cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "");
t.strings = t4;
t.count = 3;
- cl_git_pass(git_buf_common_prefix(&a, &t));
+ cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "happ");
t.strings = t5;
t.count = 3;
- cl_git_pass(git_buf_common_prefix(&a, &t));
+ cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "happ");
t.strings = t6;
t.count = 3;
- cl_git_pass(git_buf_common_prefix(&a, &t));
+ cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "");
t.strings = t7;
t.count = 3;
- cl_git_pass(git_buf_common_prefix(&a, &t));
+ cl_git_pass(git_buf_text_common_prefix(&a, &t));
cl_assert_equal_s(a.ptr, "");
git_buf_free(&a);
@@ -641,19 +642,19 @@ void test_core_buffer__puts_escaped(void)
git_buf a = GIT_BUF_INIT;
git_buf_clear(&a);
- cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "", ""));
+ cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "", ""));
cl_assert_equal_s("this is a test", a.ptr);
git_buf_clear(&a);
- cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "t", "\\"));
+ cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "t", "\\"));
cl_assert_equal_s("\\this is a \\tes\\t", a.ptr);
git_buf_clear(&a);
- cl_git_pass(git_buf_puts_escaped(&a, "this is a test", "i ", "__"));
+ cl_git_pass(git_buf_text_puts_escaped(&a, "this is a test", "i ", "__"));
cl_assert_equal_s("th__is__ __is__ a__ test", a.ptr);
git_buf_clear(&a);
- cl_git_pass(git_buf_puts_escape_regex(&a, "^match\\s*[A-Z]+.*"));
+ cl_git_pass(git_buf_text_puts_escape_regex(&a, "^match\\s*[A-Z]+.*"));
cl_assert_equal_s("\\^match\\\\s\\*\\[A-Z\\]\\+\\.\\*", a.ptr);
git_buf_free(&a);
@@ -663,7 +664,7 @@ static void assert_unescape(char *expected, char *to_unescape) {
git_buf buf = GIT_BUF_INIT;
cl_git_pass(git_buf_sets(&buf, to_unescape));
- git_buf_unescape(&buf);
+ git_buf_text_unescape(&buf);
cl_assert_equal_s(expected, buf.ptr);
cl_assert_equal_sz(strlen(expected), buf.size);
diff --git a/tests-clar/object/blob/filter.c b/tests-clar/object/blob/filter.c
index 785489849..b9bbfff0c 100644
--- a/tests-clar/object/blob/filter.c
+++ b/tests-clar/object/blob/filter.c
@@ -4,7 +4,7 @@
#include "filter.h"
static git_repository *g_repo = NULL;
-#define NUM_TEST_OBJECTS 6
+#define NUM_TEST_OBJECTS 8
static git_oid g_oids[NUM_TEST_OBJECTS];
static const char *g_raw[NUM_TEST_OBJECTS] = {
"",
@@ -12,16 +12,20 @@ static const char *g_raw[NUM_TEST_OBJECTS] = {
"foo\rbar\r",
"foo\r\nbar\r\n",
"foo\nbar\rboth\r\nreversed\n\ragain\nproblems\r",
- "123\n\000\001\002\003\004abc\255\254\253\r\n"
+ "123\n\000\001\002\003\004abc\255\254\253\r\n",
+ "\xEF\xBB\xBFThis is UTF-8\n",
+ "\xFE\xFF\x00T\x00h\x00i\x00s\x00!"
};
-static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17 };
-static git_text_stats g_stats[NUM_TEST_OBJECTS] = {
- { 0, 0, 0, 0, 0, 0 },
- { 0, 0, 2, 0, 6, 0 },
- { 0, 2, 0, 0, 6, 0 },
- { 0, 2, 2, 2, 6, 0 },
- { 0, 4, 4, 1, 31, 0 },
- { 1, 1, 2, 1, 9, 5 }
+static git_off_t g_len[NUM_TEST_OBJECTS] = { -1, -1, -1, -1, -1, 17, -1, 12 };
+static git_buf_text_stats g_stats[NUM_TEST_OBJECTS] = {
+ { 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 2, 0, 6, 0 },
+ { 0, 0, 2, 0, 0, 6, 0 },
+ { 0, 0, 2, 2, 2, 6, 0 },
+ { 0, 0, 4, 4, 1, 31, 0 },
+ { 0, 1, 1, 2, 1, 9, 5 },
+ { GIT_BOM_UTF8, 0, 0, 1, 0, 16, 0 },
+ { GIT_BOM_UTF16_BE, 5, 0, 0, 0, 7, 5 },
};
static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
{ "", 0, 0 },
@@ -29,7 +33,9 @@ static git_buf g_crlf_filtered[NUM_TEST_OBJECTS] = {
{ "foo\rbar\r", 0, 8 },
{ "foo\nbar\n", 0, 8 },
{ "foo\nbar\rboth\nreversed\n\ragain\nproblems\r", 0, 38 },
- { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 }
+ { "123\n\000\001\002\003\004abc\255\254\253\n", 0, 16 },
+ { "\xEF\xBB\xBFThis is UTF-8\n", 0, 17 },
+ { "\xFE\xFF\x00T\x00h\x00i\x00s\x00!", 0, 12 }
};
void test_object_blob_filter__initialize(void)
@@ -76,12 +82,12 @@ void test_object_blob_filter__stats(void)
int i;
git_blob *blob;
git_buf buf = GIT_BUF_INIT;
- git_text_stats stats;
+ git_buf_text_stats stats;
for (i = 0; i < NUM_TEST_OBJECTS; i++) {
cl_git_pass(git_blob_lookup(&blob, g_repo, &g_oids[i]));
cl_git_pass(git_blob__getbuf(&buf, blob));
- git_text_gather_stats(&stats, &buf);
+ git_buf_text_gather_stats(&stats, &buf, false);
cl_assert(memcmp(&g_stats[i], &stats, sizeof(stats)) == 0);
git_blob_free(blob);
}