diff options
author | Vincent Sanders <vince@kyllikki.org> | 2022-11-26 14:44:08 +0000 |
---|---|---|
committer | Vincent Sanders <vince@kyllikki.org> | 2022-11-26 15:21:16 +0000 |
commit | 1d82ef411a65095f218c15441fb804715e59f0eb (patch) | |
tree | 5d456b371af051f11c742b24a1138496b6baae38 | |
parent | 6780766fb7c27145415baa2c40251e386b3e894a (diff) | |
download | netsurf-1d82ef411a65095f218c15441fb804715e59f0eb.tar.gz |
consolodate duplicated conversion descriptor cache code
-rw-r--r-- | utils/utf8.c | 119 |
1 files changed, 61 insertions, 58 deletions
diff --git a/utils/utf8.c b/utils/utf8.c index 84cacd78c..7aa7d935b 100644 --- a/utils/utf8.c +++ b/utils/utf8.c @@ -44,7 +44,7 @@ uint32_t utf8_to_ucs4(const char *s_in, size_t l) parserutils_error perror; perror = parserutils_charset_utf8_to_ucs4((const uint8_t *) s_in, l, - &ucs4, &len); + &ucs4, &len); if (perror != PARSERUTILS_OK) ucs4 = 0xfffd; @@ -106,7 +106,7 @@ size_t utf8_char_byte_length(const char *s) parserutils_error perror; perror = parserutils_charset_utf8_char_byte_length((const uint8_t *) s, - &len); + &len); assert(perror == PARSERUTILS_OK); return len; @@ -131,7 +131,7 @@ size_t utf8_next(const char *s, size_t l, size_t o) parserutils_error perror; perror = parserutils_charset_utf8_next((const uint8_t *) s, l, o, - &next); + &next); assert(perror == PARSERUTILS_OK); return next; @@ -151,6 +151,47 @@ static inline void utf8_clear_cd_cache(void) last_cd.cd = 0; } +/** + * obtain a cached conversion descriptor + * + * either return the cached conversion descriptor or create one if required + */ +static nserror +get_cached_cd(const char *enc_from, const char *enc_to, iconv_t *cd_out) +{ + iconv_t cd; + /* we cache the last used conversion descriptor, + * so check if we're trying to use it here */ + if (strncasecmp(last_cd.from, enc_from, sizeof(last_cd.from)) == 0 && + strncasecmp(last_cd.to, enc_to, sizeof(last_cd.to)) == 0 && + last_cd.cd != 0) { + *cd_out = last_cd.cd; + return NSERROR_OK; + } + + /* no match, so create a new cd */ + cd = iconv_open(enc_to, enc_from); + if (cd == (iconv_t) -1) { + if (errno == EINVAL) { + return NSERROR_BAD_ENCODING; + } + /* default to no memory */ + return NSERROR_NOMEM; + } + + /* close the last cd - we don't care if this fails */ + if (last_cd.cd) { + iconv_close(last_cd.cd); + } + + /* and safely copy the to/from/cd data into last_cd */ + snprintf(last_cd.from, sizeof(last_cd.from), enc_from); + snprintf(last_cd.to, sizeof(last_cd.to), "%s", enc_to); + *cd_out = last_cd.cd = cd; + + return NSERROR_OK; +} + /* exported interface documented in utils/utf8.h */ nserror utf8_finalise(void) { @@ -187,6 +228,7 @@ utf8_convert(const char *string, iconv_t cd; char *temp, *out, *in, *result; size_t result_len; + nserror res; assert(string && from && to && result_out); @@ -215,29 +257,9 @@ utf8_convert(const char *string, in = (char *)string; - /* we cache the last used conversion descriptor, - * so check if we're trying to use it here */ - if (strncasecmp(last_cd.from, from, sizeof(last_cd.from)) == 0 && - strncasecmp(last_cd.to, to, sizeof(last_cd.to)) == 0) { - cd = last_cd.cd; - } else { - /* no match, so create a new cd */ - cd = iconv_open(to, from); - if (cd == (iconv_t)-1) { - if (errno == EINVAL) - return NSERROR_BAD_ENCODING; - /* default to no memory */ - return NSERROR_NOMEM; - } - - /* close the last cd - we don't care if this fails */ - if (last_cd.cd) - iconv_close(last_cd.cd); - - /* and copy the to/from/cd data into last_cd */ - snprintf(last_cd.from, sizeof(last_cd.from), "%s", from); - snprintf(last_cd.to, sizeof(last_cd.to), "%s", to); - last_cd.cd = cd; + res = get_cached_cd(from, to, &cd); + if (res != NSERROR_OK) { + return res; } /* Worst case = ASCII -> UCS4, so allocate an output buffer @@ -289,14 +311,14 @@ utf8_convert(const char *string, /* exported interface documented in utils/utf8.h */ nserror utf8_to_enc(const char *string, const char *encname, - size_t len, char **result) + size_t len, char **result) { return utf8_convert(string, len, "UTF-8", encname, result, NULL); } /* exported interface documented in utils/utf8.h */ nserror utf8_from_enc(const char *string, const char *encname, - size_t len, char **result, size_t *result_len) + size_t len, char **result, size_t *result_len) { return utf8_convert(string, len, encname, "UTF-8", result, result_len); } @@ -327,7 +349,7 @@ utf8_convert_html_chunk(iconv_t cd, esclen = snprintf(escape, sizeof(escape), "&#x%06x;", ucs4); pescape = escape; ret = iconv(cd, (void *) &pescape, &esclen, - (void *) out, outlen); + (void *) out, outlen); if (ret == (size_t) -1) return NSERROR_NOMEM; @@ -339,6 +361,8 @@ utf8_convert_html_chunk(iconv_t cd, return NSERROR_OK; } + + /* exported interface documented in utils/utf8.h */ nserror utf8_to_html(const char *string, const char *encname, size_t len, char **result) @@ -349,35 +373,14 @@ utf8_to_html(const char *string, const char *encname, size_t len, char **result) size_t off, prev_off, inlen, outlen, origoutlen, esclen; nserror ret; char *pescape, escape[11]; + nserror res; if (len == 0) len = strlen(string); - /* we cache the last used conversion descriptor, - * so check if we're trying to use it here */ - if (strncasecmp(last_cd.from, "UTF-8", sizeof(last_cd.from)) == 0 && - strncasecmp(last_cd.to, encname, - sizeof(last_cd.to)) == 0 && - last_cd.cd != 0) { - cd = last_cd.cd; - } else { - /* no match, so create a new cd */ - cd = iconv_open(encname, "UTF-8"); - if (cd == (iconv_t) -1) { - if (errno == EINVAL) - return NSERROR_BAD_ENCODING; - /* default to no memory */ - return NSERROR_NOMEM; - } - - /* close the last cd - we don't care if this fails */ - if (last_cd.cd) - iconv_close(last_cd.cd); - - /* and safely copy the to/from/cd data into last_cd */ - snprintf(last_cd.from, sizeof(last_cd.from), "UTF-8"); - snprintf(last_cd.to, sizeof(last_cd.to), "%s", encname); - last_cd.cd = cd; + res = get_cached_cd("UTF-8", encname, &cd); + if (res != NSERROR_OK) { + return res; } /* Worst case is ASCII -> UCS4, with all characters escaped: @@ -397,13 +400,13 @@ utf8_to_html(const char *string, const char *encname, size_t len, char **result) while (off < len) { /* Must escape '&', '<', and '>' */ if (string[off] == '&' || string[off] == '<' || - string[off] == '>') { + string[off] == '>') { if (off - prev_off > 0) { /* Emit chunk */ in = string + prev_off; inlen = off - prev_off; ret = utf8_convert_html_chunk(cd, in, inlen, - &out, &outlen); + &out, &outlen); if (ret != NSERROR_OK) { free(origout); iconv_close(cd); @@ -414,10 +417,10 @@ utf8_to_html(const char *string, const char *encname, size_t len, char **result) /* Emit mandatory escape */ esclen = snprintf(escape, sizeof(escape), - "&#x%06x;", string[off]); + "&#x%06x;", string[off]); pescape = escape; ret = utf8_convert_html_chunk(cd, pescape, esclen, - &out, &outlen); + &out, &outlen); if (ret != NSERROR_OK) { free(origout); iconv_close(cd); |