summaryrefslogtreecommitdiff
path: root/src/utf8.h
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2021-04-14 15:47:27 +0100
committerEdward Thomson <ethomson@edwardthomson.com>2021-04-14 23:02:51 +0100
commit1d95b59b4dbd8eda3f83f8af2a4ae07c7cdfc245 (patch)
tree88e04d8ab4d21597002f8b74d8775677f69b104f /src/utf8.h
parent4f4b1139d23a7b38cceb9d83acbfaf73151f522f (diff)
downloadlibgit2-1d95b59b4dbd8eda3f83f8af2a4ae07c7cdfc245.tar.gz
utf8: refactor utf8 functions
Move the utf8 functions into a proper namespace `git_utf8` instead of being in the namespaceless `git__` function group. Update them to have out-params first and use `char *` instead of `uint8_t *` to match our API treating strings as `char *` (even if they truly contain `uchar`s inside).
Diffstat (limited to 'src/utf8.h')
-rw-r--r--src/utf8.h32
1 files changed, 32 insertions, 0 deletions
diff --git a/src/utf8.h b/src/utf8.h
new file mode 100644
index 000000000..71c8f3bee
--- /dev/null
+++ b/src/utf8.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_utf8_h__
+#define INCLUDE_utf8_h__
+
+#include "common.h"
+
+/*
+ * Iterate through an UTF-8 string, yielding one codepoint at a time.
+ *
+ * @param out pointer where to store the current codepoint
+ * @param str current position in the string
+ * @param str_len size left in the string
+ * @return length in bytes of the read codepoint; -1 if the codepoint was invalid
+ */
+extern int git_utf8_iterate(uint32_t *out, const char *str, size_t str_len);
+
+/**
+ * Iterate through an UTF-8 string and stops after finding any invalid UTF-8
+ * codepoints.
+ *
+ * @param str string to scan
+ * @param str_len size of the string
+ * @return length in bytes of the string that contains valid data
+ */
+extern size_t git_utf8_valid_buf_length(const char *str, size_t str_len);
+
+#endif