From 9f3d94cdd34de1fdfc85a589adf11a1a17929830 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Wed, 18 May 2022 14:06:42 -0400 Subject: Factor out UTF8 validation This is in preparation for optimizing this function a bit. --- pango/pango-utils.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'pango/pango-utils.c') diff --git a/pango/pango-utils.c b/pango/pango-utils.c index b942921e..a0ff000e 100644 --- a/pango/pango-utils.c +++ b/pango/pango-utils.c @@ -1257,3 +1257,49 @@ pango_find_paragraph_boundary (const char *text, if (start && next_paragraph_start) *next_paragraph_start = start - text; } + + +/*< private > + * pango_utf8_make_valid: + * @str: the string to convert to valid UTF-8 + * @n_bytes: return location for byte count + * @n_chars: return location for character count + * + * Validate that @str is valid UTF-8, and make it + * so if it isn't. + * + * Invalid bytes get replaced by -1 (which gets ultimatively + * turned into PANGO_GLYPH_INVALID_INPUT). + * + * Returns: `TRUE` if @str was valid without any modification + */ +gboolean +pango_utf8_make_valid (char *str, + int *n_bytes, + int *n_chars) +{ + char *start, *end; + + start = str; + + for (;;) + { + gboolean valid; + + valid = g_utf8_validate (start, -1, (const char **)&end); + + if (!*end) + break; + + if (!valid) + *end++ = -1; + + start = end; + } + + *n_bytes = strlen (str); + *n_chars = g_utf8_strlen (str, -1); + + return start == str; +} + -- cgit v1.2.1