diff options
author | Matthias Clasen <mclasen@redhat.com> | 2022-05-18 14:06:42 -0400 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2022-05-18 14:06:42 -0400 |
commit | 9f3d94cdd34de1fdfc85a589adf11a1a17929830 (patch) | |
tree | ca97995978323e7d0d82ac3d951a2da363c2983f | |
parent | c3152630a107665c35db438f5e1707455b674942 (diff) | |
download | pango-9f3d94cdd34de1fdfc85a589adf11a1a17929830.tar.gz |
Factor out UTF8 validation
This is in preparation for optimizing this
function a bit.
-rw-r--r-- | pango/pango-layout.c | 51 | ||||
-rw-r--r-- | pango/pango-utils-internal.h | 5 | ||||
-rw-r--r-- | pango/pango-utils.c | 46 |
3 files changed, 61 insertions, 41 deletions
diff --git a/pango/pango-layout.c b/pango/pango-layout.c index 2b03ee2c..de43dbc7 100644 --- a/pango/pango-layout.c +++ b/pango/pango-layout.c @@ -83,6 +83,7 @@ #include <string.h> #include <math.h> #include <locale.h> +#include "pango-utils-internal.h" #include <hb-ot.h> @@ -1230,7 +1231,7 @@ pango_layout_set_text (PangoLayout *layout, const char *text, int length) { - char *old_text, *start, *end; + char *old_text; g_return_if_fail (layout != NULL); g_return_if_fail (length == 0 || text != NULL); @@ -1238,50 +1239,18 @@ pango_layout_set_text (PangoLayout *layout, old_text = layout->text; if (length < 0) - { - layout->length = strlen (text); - layout->text = g_strndup (text, layout->length); - } + layout->text = g_strdup (text); else if (length > 0) - { - /* This is not exactly what we want. We don't need the padding... - */ - layout->length = length; - layout->text = g_strndup (text, length); - } + layout->text = g_strndup (text, length); else - { - layout->length = 0; - layout->text = g_malloc0 (1); - } + layout->text = g_malloc0 (1); /* validate it, and replace invalid bytes with -1 */ - start = layout->text; - for (;;) { - gboolean valid; - - valid = g_utf8_validate (start, -1, (const char **)&end); - - if (!*end) - break; - - /* Replace invalid bytes with -1. The -1 will be converted to - * ((gunichar) -1) by glib, and that in turn yields a glyph value of - * ((PangoGlyph) -1) by PANGO_GET_UNKNOWN_GLYPH(-1), - * and that's PANGO_GLYPH_INVALID_INPUT. - */ - if (!valid) - *end++ = -1; - - start = end; - } - - if (start != layout->text) - /* TODO: Write out the beginning excerpt of text? */ - g_warning ("Invalid UTF-8 string passed to pango_layout_set_text()"); - - layout->n_chars = pango_utf8_strlen (layout->text, -1); - layout->length = strlen (layout->text); + if (!pango_utf8_make_valid (layout->text, &layout->length, &layout->n_chars)) + { + /* TODO: Write out the beginning excerpt of text? */ + g_warning ("Invalid UTF-8 string passed to pango_layout_set_text()"); + } g_clear_pointer (&layout->log_attrs, g_free); layout_changed (layout); diff --git a/pango/pango-utils-internal.h b/pango/pango-utils-internal.h index 0bc355e0..162295c3 100644 --- a/pango/pango-utils-internal.h +++ b/pango/pango-utils-internal.h @@ -44,6 +44,11 @@ gboolean pango_parse_flags (GType type, char *_pango_trim_string (const char *str); +gboolean pango_utf8_make_valid (char *str, + int *n_bytes, + int *n_chars); + + G_END_DECLS #endif /* __PANGO_UTILS_H__ */ diff --git a/pango/pango-utils.c b/pango/pango-utils.c index b942921e..a0ff000e 100644 --- a/pango/pango-utils.c +++ b/pango/pango-utils.c @@ -1257,3 +1257,49 @@ pango_find_paragraph_boundary (const char *text, if (start && next_paragraph_start) *next_paragraph_start = start - text; } + + +/*< private > + * pango_utf8_make_valid: + * @str: the string to convert to valid UTF-8 + * @n_bytes: return location for byte count + * @n_chars: return location for character count + * + * Validate that @str is valid UTF-8, and make it + * so if it isn't. + * + * Invalid bytes get replaced by -1 (which gets ultimatively + * turned into PANGO_GLYPH_INVALID_INPUT). + * + * Returns: `TRUE` if @str was valid without any modification + */ +gboolean +pango_utf8_make_valid (char *str, + int *n_bytes, + int *n_chars) +{ + char *start, *end; + + start = str; + + for (;;) + { + gboolean valid; + + valid = g_utf8_validate (start, -1, (const char **)&end); + + if (!*end) + break; + + if (!valid) + *end++ = -1; + + start = end; + } + + *n_bytes = strlen (str); + *n_chars = g_utf8_strlen (str, -1); + + return start == str; +} + |