summaryrefslogtreecommitdiff
path: root/src/utf8.c
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2021-04-14 22:22:11 +0100
committerEdward Thomson <ethomson@edwardthomson.com>2021-04-14 23:02:51 +0100
commitcb136cddd93046d46c6a8c5ee2a3f5c30a001e97 (patch)
treef5d88259016e7b5b95fda5d190ec416c25b56c74 /src/utf8.c
parent1d95b59b4dbd8eda3f83f8af2a4ae07c7cdfc245 (diff)
downloadlibgit2-cb136cddd93046d46c6a8c5ee2a3f5c30a001e97.tar.gz
utf8: introduce git_utf8_char_length
Introduce a function to determine the number of Unicode characters in a given UTF-8 string.
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c18
1 files changed, 18 insertions, 0 deletions
diff --git a/src/utf8.c b/src/utf8.c
index 1a37da6fb..77065cb71 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -114,6 +114,24 @@ int git_utf8_iterate(uint32_t *out, const char *_str, size_t str_len)
return length;
}
+size_t git_utf8_char_length(const char *_str, size_t str_len)
+{
+ const uint8_t *str = (const uint8_t *)_str;
+ size_t offset = 0, count = 0;
+
+ while (offset < str_len) {
+ int length = utf8_charlen(str + offset, str_len - offset);
+
+ if (length < 0)
+ length = 1;
+
+ offset += length;
+ count++;
+ }
+
+ return count;
+}
+
size_t git_utf8_valid_buf_length(const char *_str, size_t str_len)
{
const uint8_t *str = (const uint8_t *)_str;