summaryrefslogtreecommitdiff
path: root/src/buf_text.h
diff options
context:
space:
mode:
authorRussell Belfer <rb@github.com>2012-11-28 09:58:48 -0800
committerRussell Belfer <rb@github.com>2012-11-28 09:58:48 -0800
commit7bf87ab6987cf6b9e166e23d2d9dbdcd2511fb32 (patch)
treedcc8a92ce69b2a0d9d8cca98d67f0cc71177ce40 /src/buf_text.h
parent693021262ba0eeac2923bbce1b2262717019c807 (diff)
downloadlibgit2-7bf87ab6987cf6b9e166e23d2d9dbdcd2511fb32.tar.gz
Consolidate text buffer functions
There are many scattered functions that look into the contents of buffers to do various text manipulations (such as escaping or unescaping data, calculating text stats, guessing if content is binary, etc). This groups all those functions together into a new file and converts the code to use that. This has two enhancements to existing functionality. The old text stats function is significantly rewritten and the BOM detection code was extended (although largely we can't deal with anything other than a UTF8 BOM).
Diffstat (limited to 'src/buf_text.h')
-rw-r--r--src/buf_text.h100
1 files changed, 100 insertions, 0 deletions
diff --git a/src/buf_text.h b/src/buf_text.h
new file mode 100644
index 000000000..c22499bfe
--- /dev/null
+++ b/src/buf_text.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2009-2012 the libgit2 contributors
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_buf_text_h__
+#define INCLUDE_buf_text_h__
+
+#include "buffer.h"
+
+typedef enum {
+ GIT_BOM_NONE = 0,
+ GIT_BOM_UTF8 = 1,
+ GIT_BOM_UTF16_LE = 2,
+ GIT_BOM_UTF16_BE = 3,
+ GIT_BOM_UTF32_LE = 4,
+ GIT_BOM_UTF32_BE = 5
+} git_bom_t;
+
+typedef struct {
+ git_bom_t bom; /* BOM found at head of text */
+ unsigned int nul, cr, lf, crlf; /* NUL, CR, LF and CRLF counts */
+ unsigned int printable, nonprintable; /* These are just approximations! */
+} git_buf_text_stats;
+
+/**
+ * Append string to buffer, prefixing each character from `esc_chars` with
+ * `esc_with` string.
+ *
+ * @param buf Buffer to append data to
+ * @param string String to escape and append
+ * @param esc_chars Characters to be escaped
+ * @param esc_with String to insert in from of each found character
+ * @return 0 on success, <0 on failure (probably allocation problem)
+ */
+extern int git_buf_text_puts_escaped(
+ git_buf *buf,
+ const char *string,
+ const char *esc_chars,
+ const char *esc_with);
+
+/**
+ * Append string escaping characters that are regex special
+ */
+GIT_INLINE(int) git_buf_text_puts_escape_regex(git_buf *buf, const char *string)
+{
+ return git_buf_text_puts_escaped(buf, string, "^.[]$()|*+?{}\\", "\\");
+}
+
+/**
+ * Unescape all characters in a buffer in place
+ *
+ * I.e. remove backslashes
+ */
+extern void git_buf_text_unescape(git_buf *buf);
+
+/**
+ * Fill buffer with the common prefix of a array of strings
+ *
+ * Buffer will be set to empty if there is no common prefix
+ */
+extern int git_buf_text_common_prefix(git_buf *buf, const git_strarray *strs);
+
+/**
+ * Check quickly if buffer looks like it contains binary data
+ *
+ * @param buf Buffer to check
+ * @return true if buffer looks like non-text data
+ */
+extern bool git_buf_text_is_binary(const git_buf *buf);
+
+/**
+ * Check if a buffer begins with a UTF BOM
+ *
+ * @param bom Set to the type of BOM detected or GIT_BOM_NONE
+ * @param buf Buffer in which to check the first bytes for a BOM
+ * @param offset Offset into buffer to look for BOM
+ * @return Number of bytes of BOM data (or 0 if no BOM found)
+ */
+extern int git_buf_text_detect_bom(
+ git_bom_t *bom, const git_buf *buf, size_t offset);
+
+/**
+ * Gather stats for a piece of text
+ *
+ * Fill the `stats` structure with counts of unreadable characters, carriage
+ * returns, etc, so it can be used in heuristics. This automatically skips
+ * a trailing EOF (\032 character). Also it will look for a BOM at the
+ * start of the text and can be told to skip that as well.
+ *
+ * @param stats Structure to be filled in
+ * @param buf Text to process
+ * @param skip_bom Exclude leading BOM from stats if true
+ * @return Does the buffer heuristically look like binary data
+ */
+extern bool git_buf_text_gather_stats(
+ git_buf_text_stats *stats, const git_buf *buf, bool skip_bom);
+
+#endif