summaryrefslogtreecommitdiff
path: root/src/util.c
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2021-04-14 15:47:27 +0100
committerEdward Thomson <ethomson@edwardthomson.com>2021-04-14 23:02:51 +0100
commit1d95b59b4dbd8eda3f83f8af2a4ae07c7cdfc245 (patch)
tree88e04d8ab4d21597002f8b74d8775677f69b104f /src/util.c
parent4f4b1139d23a7b38cceb9d83acbfaf73151f522f (diff)
downloadlibgit2-1d95b59b4dbd8eda3f83f8af2a4ae07c7cdfc245.tar.gz
utf8: refactor utf8 functions
Move the utf8 functions into a proper namespace `git_utf8` instead of being in the namespaceless `git__` function group. Update them to have out-params first and use `char *` instead of `uint8_t *` to match our API treating strings as `char *` (even if they truly contain `uchar`s inside).
Diffstat (limited to 'src/util.c')
-rw-r--r--src/util.c117
1 files changed, 0 insertions, 117 deletions
diff --git a/src/util.c b/src/util.c
index af825e4d2..c7af2961a 100644
--- a/src/util.c
+++ b/src/util.c
@@ -734,123 +734,6 @@ void git__qsort_r(
#endif
}
-/*
- * git__utf8_iterate is taken from the utf8proc project,
- * http://www.public-software-group.org/utf8proc
- *
- * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the ""Software""),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-static const int8_t utf8proc_utf8class[256] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-static int util_utf8_charlen(const uint8_t *str, size_t str_len)
-{
- size_t length, i;
-
- length = utf8proc_utf8class[str[0]];
- if (!length)
- return -1;
-
- if (str_len > 0 && length > str_len)
- return -1;
-
- for (i = 1; i < length; i++) {
- if ((str[i] & 0xC0) != 0x80)
- return -1;
- }
-
- return (int)length;
-}
-
-int git__utf8_iterate(const uint8_t *str, int str_len, int32_t *dst)
-{
- int length;
- int32_t uc = -1;
-
- *dst = -1;
- length = util_utf8_charlen(str, str_len);
- if (length < 0)
- return -1;
-
- switch (length) {
- case 1:
- uc = str[0];
- break;
- case 2:
- uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
- if (uc < 0x80) uc = -1;
- break;
- case 3:
- uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
- + (str[2] & 0x3F);
- if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
- (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
- break;
- case 4:
- uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
- + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
- if (uc < 0x10000 || uc >= 0x110000) uc = -1;
- break;
- }
-
- if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
- return -1;
-
- *dst = uc;
- return length;
-}
-
-size_t git__utf8_valid_buf_length(const uint8_t *str, size_t str_len)
-{
- size_t offset = 0;
-
- while (offset < str_len) {
- int length = util_utf8_charlen(str + offset, str_len - offset);
-
- if (length < 0)
- break;
-
- offset += length;
- }
-
- return offset;
-}
-
#ifdef GIT_WIN32
int git__getenv(git_buf *out, const char *name)
{