diff options
| author | Edward Thomson <ethomson@edwardthomson.com> | 2021-04-14 15:47:27 +0100 |
|---|---|---|
| committer | Edward Thomson <ethomson@edwardthomson.com> | 2021-04-14 23:02:51 +0100 |
| commit | 1d95b59b4dbd8eda3f83f8af2a4ae07c7cdfc245 (patch) | |
| tree | 88e04d8ab4d21597002f8b74d8775677f69b104f /src/util.c | |
| parent | 4f4b1139d23a7b38cceb9d83acbfaf73151f522f (diff) | |
| download | libgit2-1d95b59b4dbd8eda3f83f8af2a4ae07c7cdfc245.tar.gz | |
utf8: refactor utf8 functions
Move the utf8 functions into a proper namespace `git_utf8` instead of
being in the namespaceless `git__` function group. Update them to
have out-params first and use `char *` instead of `uint8_t *` to match
our API treating strings as `char *` (even if they truly contain `uchar`s
inside).
Diffstat (limited to 'src/util.c')
| -rw-r--r-- | src/util.c | 117 |
1 files changed, 0 insertions, 117 deletions
diff --git a/src/util.c b/src/util.c index af825e4d2..c7af2961a 100644 --- a/src/util.c +++ b/src/util.c @@ -734,123 +734,6 @@ void git__qsort_r( #endif } -/* - * git__utf8_iterate is taken from the utf8proc project, - * http://www.public-software-group.org/utf8proc - * - * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the ""Software""), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -static const int8_t utf8proc_utf8class[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static int util_utf8_charlen(const uint8_t *str, size_t str_len) -{ - size_t length, i; - - length = utf8proc_utf8class[str[0]]; - if (!length) - return -1; - - if (str_len > 0 && length > str_len) - return -1; - - for (i = 1; i < length; i++) { - if ((str[i] & 0xC0) != 0x80) - return -1; - } - - return (int)length; -} - -int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst) -{ - int length; - int32_t uc = -1; - - *dst = -1; - length = util_utf8_charlen(str, str_len); - if (length < 0) - return -1; - - switch (length) { - case 1: - uc = str[0]; - break; - case 2: - uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F); - if (uc < 0x80) uc = -1; - break; - case 3: - uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) - + (str[2] & 0x3F); - if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || - (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; - break; - case 4: - uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) - + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F); - if (uc < 0x10000 || uc >= 0x110000) uc = -1; - break; - } - - if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE)) - return -1; - - *dst = uc; - return length; -} - -size_t git__utf8_valid_buf_length(const uint8_t *str, size_t str_len) -{ - size_t offset = 0; - - while (offset < str_len) { - int length = util_utf8_charlen(str + offset, str_len - offset); - - if (length < 0) - break; - - offset += length; - } - - return offset; -} - #ifdef GIT_WIN32 int git__getenv(git_buf *out, const char *name) { |
