summaryrefslogtreecommitdiff
path: root/util/buf_text.c
diff options
context:
space:
mode:
Diffstat (limited to 'util/buf_text.c')
-rw-r--r--util/buf_text.c316
1 files changed, 316 insertions, 0 deletions
diff --git a/util/buf_text.c b/util/buf_text.c
new file mode 100644
index 000000000..88fcb87dd
--- /dev/null
+++ b/util/buf_text.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#include "buf_text.h"
+
+int git_buf_text_puts_escaped(
+ git_buf *buf,
+ const char *string,
+ const char *esc_chars,
+ const char *esc_with)
+{
+ const char *scan;
+ size_t total = 0, esc_len = strlen(esc_with), count, alloclen;
+
+ if (!string)
+ return 0;
+
+ for (scan = string; *scan; ) {
+ /* count run of non-escaped characters */
+ count = strcspn(scan, esc_chars);
+ total += count;
+ scan += count;
+ /* count run of escaped characters */
+ count = strspn(scan, esc_chars);
+ total += count * (esc_len + 1);
+ scan += count;
+ }
+
+ GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, total, 1);
+ if (git_buf_grow_by(buf, alloclen) < 0)
+ return -1;
+
+ for (scan = string; *scan; ) {
+ count = strcspn(scan, esc_chars);
+
+ memmove(buf->ptr + buf->size, scan, count);
+ scan += count;
+ buf->size += count;
+
+ for (count = strspn(scan, esc_chars); count > 0; --count) {
+ /* copy escape sequence */
+ memmove(buf->ptr + buf->size, esc_with, esc_len);
+ buf->size += esc_len;
+ /* copy character to be escaped */
+ buf->ptr[buf->size] = *scan;
+ buf->size++;
+ scan++;
+ }
+ }
+
+ buf->ptr[buf->size] = '\0';
+
+ return 0;
+}
+
+void git_buf_text_unescape(git_buf *buf)
+{
+ buf->size = git__unescape(buf->ptr);
+}
+
+int git_buf_text_crlf_to_lf(git_buf *tgt, const git_buf *src)
+{
+ const char *scan = src->ptr;
+ const char *scan_end = src->ptr + src->size;
+ const char *next = memchr(scan, '\r', src->size);
+ size_t new_size;
+ char *out;
+
+ assert(tgt != src);
+
+ if (!next)
+ return git_buf_set(tgt, src->ptr, src->size);
+
+ /* reduce reallocs while in the loop */
+ GIT_ERROR_CHECK_ALLOC_ADD(&new_size, src->size, 1);
+ if (git_buf_grow(tgt, new_size) < 0)
+ return -1;
+
+ out = tgt->ptr;
+ tgt->size = 0;
+
+ /* Find the next \r and copy whole chunk up to there to tgt */
+ for (; next; scan = next + 1, next = memchr(scan, '\r', scan_end - scan)) {
+ if (next > scan) {
+ size_t copylen = (size_t)(next - scan);
+ memcpy(out, scan, copylen);
+ out += copylen;
+ }
+
+ /* Do not drop \r unless it is followed by \n */
+ if (next + 1 == scan_end || next[1] != '\n')
+ *out++ = '\r';
+ }
+
+ /* Copy remaining input into dest */
+ if (scan < scan_end) {
+ size_t remaining = (size_t)(scan_end - scan);
+ memcpy(out, scan, remaining);
+ out += remaining;
+ }
+
+ tgt->size = (size_t)(out - tgt->ptr);
+ tgt->ptr[tgt->size] = '\0';
+
+ return 0;
+}
+
+int git_buf_text_lf_to_crlf(git_buf *tgt, const git_buf *src)
+{
+ const char *start = src->ptr;
+ const char *end = start + src->size;
+ const char *scan = start;
+ const char *next = memchr(scan, '\n', src->size);
+ size_t alloclen;
+
+ assert(tgt != src);
+
+ if (!next)
+ return git_buf_set(tgt, src->ptr, src->size);
+
+ /* attempt to reduce reallocs while in the loop */
+ GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, src->size, src->size >> 4);
+ GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
+ if (git_buf_grow(tgt, alloclen) < 0)
+ return -1;
+ tgt->size = 0;
+
+ for (; next; scan = next + 1, next = memchr(scan, '\n', end - scan)) {
+ size_t copylen = next - scan;
+
+ /* if we find mixed line endings, carry on */
+ if (copylen && next[-1] == '\r')
+ copylen--;
+
+ GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, copylen, 3);
+ if (git_buf_grow_by(tgt, alloclen) < 0)
+ return -1;
+
+ if (copylen) {
+ memcpy(tgt->ptr + tgt->size, scan, copylen);
+ tgt->size += copylen;
+ }
+
+ tgt->ptr[tgt->size++] = '\r';
+ tgt->ptr[tgt->size++] = '\n';
+ }
+
+ tgt->ptr[tgt->size] = '\0';
+ return git_buf_put(tgt, scan, end - scan);
+}
+
+int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strings)
+{
+ size_t i;
+ const char *str, *pfx;
+
+ git_buf_clear(buf);
+
+ if (!strings || !strings->count)
+ return 0;
+
+ /* initialize common prefix to first string */
+ if (git_buf_sets(buf, strings->strings[0]) < 0)
+ return -1;
+
+ /* go through the rest of the strings, truncating to shared prefix */
+ for (i = 1; i < strings->count; ++i) {
+
+ for (str = strings->strings[i], pfx = buf->ptr;
+ *str && *str == *pfx; str++, pfx++)
+ /* scanning */;
+
+ git_buf_truncate(buf, pfx - buf->ptr);
+
+ if (!buf->size)
+ break;
+ }
+
+ return 0;
+}
+
+bool git_buf_text_is_binary(const git_buf *buf)
+{
+ const char *scan = buf->ptr, *end = buf->ptr + buf->size;
+ git_bom_t bom;
+ int printable = 0, nonprintable = 0;
+
+ scan += git_buf_text_detect_bom(&bom, buf);
+
+ if (bom > GIT_BOM_UTF8)
+ return 1;
+
+ while (scan < end) {
+ unsigned char c = *scan++;
+
+ /* Printable characters are those above SPACE (0x1F) excluding DEL,
+ * and including BS, ESC and FF.
+ */
+ if ((c > 0x1F && c != 127) || c == '\b' || c == '\033' || c == '\014')
+ printable++;
+ else if (c == '\0')
+ return true;
+ else if (!git__isspace(c))
+ nonprintable++;
+ }
+
+ return ((printable >> 7) < nonprintable);
+}
+
+bool git_buf_text_contains_nul(const git_buf *buf)
+{
+ return (memchr(buf->ptr, '\0', buf->size) != NULL);
+}
+
+int git_buf_text_detect_bom(git_bom_t *bom, const git_buf *buf)
+{
+ const char *ptr;
+ size_t len;
+
+ *bom = GIT_BOM_NONE;
+ /* need at least 2 bytes to look for any BOM */
+ if (buf->size < 2)
+ return 0;
+
+ ptr = buf->ptr;
+ len = buf->size;
+
+ switch (*ptr++) {
+ case 0:
+ if (len >= 4 && ptr[0] == 0 && ptr[1] == '\xFE' && ptr[2] == '\xFF') {
+ *bom = GIT_BOM_UTF32_BE;
+ return 4;
+ }
+ break;
+ case '\xEF':
+ if (len >= 3 && ptr[0] == '\xBB' && ptr[1] == '\xBF') {
+ *bom = GIT_BOM_UTF8;
+ return 3;
+ }
+ break;
+ case '\xFE':
+ if (*ptr == '\xFF') {
+ *bom = GIT_BOM_UTF16_BE;
+ return 2;
+ }
+ break;
+ case '\xFF':
+ if (*ptr != '\xFE')
+ break;
+ if (len >= 4 && ptr[1] == 0 && ptr[2] == 0) {
+ *bom = GIT_BOM_UTF32_LE;
+ return 4;
+ } else {
+ *bom = GIT_BOM_UTF16_LE;
+ return 2;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+bool git_buf_text_gather_stats(
+ git_buf_text_stats *stats, const git_buf *buf, bool skip_bom)
+{
+ const char *scan = buf->ptr, *end = buf->ptr + buf->size;
+ int skip;
+
+ memset(stats, 0, sizeof(*stats));
+
+ /* BOM detection */
+ skip = git_buf_text_detect_bom(&stats->bom, buf);
+ if (skip_bom)
+ scan += skip;
+
+ /* Ignore EOF character */
+ if (buf->size > 0 && end[-1] == '\032')
+ end--;
+
+ /* Counting loop */
+ while (scan < end) {
+ unsigned char c = *scan++;
+
+ if (c > 0x1F && c != 0x7F)
+ stats->printable++;
+ else switch (c) {
+ case '\0':
+ stats->nul++;
+ stats->nonprintable++;
+ break;
+ case '\n':
+ stats->lf++;
+ break;
+ case '\r':
+ stats->cr++;
+ if (scan < end && *scan == '\n')
+ stats->crlf++;
+ break;
+ case '\t': case '\f': case '\v': case '\b': case 0x1b: /*ESC*/
+ stats->printable++;
+ break;
+ default:
+ stats->nonprintable++;
+ break;
+ }
+ }
+
+ /* Treat files with a bare CR as binary */
+ return (stats->cr != stats->crlf || stats->nul > 0 ||
+ ((stats->printable >> 7) < stats->nonprintable));
+}