summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2022-05-18 14:06:42 -0400
committerMatthias Clasen <mclasen@redhat.com>2022-05-18 14:06:42 -0400
commit9f3d94cdd34de1fdfc85a589adf11a1a17929830 (patch)
treeca97995978323e7d0d82ac3d951a2da363c2983f
parentc3152630a107665c35db438f5e1707455b674942 (diff)
downloadpango-9f3d94cdd34de1fdfc85a589adf11a1a17929830.tar.gz
Factor out UTF8 validation
This is in preparation for optimizing this function a bit.
-rw-r--r--pango/pango-layout.c51
-rw-r--r--pango/pango-utils-internal.h5
-rw-r--r--pango/pango-utils.c46
3 files changed, 61 insertions, 41 deletions
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index 2b03ee2c..de43dbc7 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -83,6 +83,7 @@
#include <string.h>
#include <math.h>
#include <locale.h>
+#include "pango-utils-internal.h"
#include <hb-ot.h>
@@ -1230,7 +1231,7 @@ pango_layout_set_text (PangoLayout *layout,
const char *text,
int length)
{
- char *old_text, *start, *end;
+ char *old_text;
g_return_if_fail (layout != NULL);
g_return_if_fail (length == 0 || text != NULL);
@@ -1238,50 +1239,18 @@ pango_layout_set_text (PangoLayout *layout,
old_text = layout->text;
if (length < 0)
- {
- layout->length = strlen (text);
- layout->text = g_strndup (text, layout->length);
- }
+ layout->text = g_strdup (text);
else if (length > 0)
- {
- /* This is not exactly what we want. We don't need the padding...
- */
- layout->length = length;
- layout->text = g_strndup (text, length);
- }
+ layout->text = g_strndup (text, length);
else
- {
- layout->length = 0;
- layout->text = g_malloc0 (1);
- }
+ layout->text = g_malloc0 (1);
/* validate it, and replace invalid bytes with -1 */
- start = layout->text;
- for (;;) {
- gboolean valid;
-
- valid = g_utf8_validate (start, -1, (const char **)&end);
-
- if (!*end)
- break;
-
- /* Replace invalid bytes with -1. The -1 will be converted to
- * ((gunichar) -1) by glib, and that in turn yields a glyph value of
- * ((PangoGlyph) -1) by PANGO_GET_UNKNOWN_GLYPH(-1),
- * and that's PANGO_GLYPH_INVALID_INPUT.
- */
- if (!valid)
- *end++ = -1;
-
- start = end;
- }
-
- if (start != layout->text)
- /* TODO: Write out the beginning excerpt of text? */
- g_warning ("Invalid UTF-8 string passed to pango_layout_set_text()");
-
- layout->n_chars = pango_utf8_strlen (layout->text, -1);
- layout->length = strlen (layout->text);
+ if (!pango_utf8_make_valid (layout->text, &layout->length, &layout->n_chars))
+ {
+ /* TODO: Write out the beginning excerpt of text? */
+ g_warning ("Invalid UTF-8 string passed to pango_layout_set_text()");
+ }
g_clear_pointer (&layout->log_attrs, g_free);
layout_changed (layout);
diff --git a/pango/pango-utils-internal.h b/pango/pango-utils-internal.h
index 0bc355e0..162295c3 100644
--- a/pango/pango-utils-internal.h
+++ b/pango/pango-utils-internal.h
@@ -44,6 +44,11 @@ gboolean pango_parse_flags (GType type,
char *_pango_trim_string (const char *str);
+gboolean pango_utf8_make_valid (char *str,
+ int *n_bytes,
+ int *n_chars);
+
+
G_END_DECLS
#endif /* __PANGO_UTILS_H__ */
diff --git a/pango/pango-utils.c b/pango/pango-utils.c
index b942921e..a0ff000e 100644
--- a/pango/pango-utils.c
+++ b/pango/pango-utils.c
@@ -1257,3 +1257,49 @@ pango_find_paragraph_boundary (const char *text,
if (start && next_paragraph_start)
*next_paragraph_start = start - text;
}
+
+
+/*< private >
+ * pango_utf8_make_valid:
+ * @str: the string to convert to valid UTF-8
+ * @n_bytes: return location for byte count
+ * @n_chars: return location for character count
+ *
+ * Validate that @str is valid UTF-8, and make it
+ * so if it isn't.
+ *
+ * Invalid bytes get replaced by -1 (which gets ultimatively
+ * turned into PANGO_GLYPH_INVALID_INPUT).
+ *
+ * Returns: `TRUE` if @str was valid without any modification
+ */
+gboolean
+pango_utf8_make_valid (char *str,
+ int *n_bytes,
+ int *n_chars)
+{
+ char *start, *end;
+
+ start = str;
+
+ for (;;)
+ {
+ gboolean valid;
+
+ valid = g_utf8_validate (start, -1, (const char **)&end);
+
+ if (!*end)
+ break;
+
+ if (!valid)
+ *end++ = -1;
+
+ start = end;
+ }
+
+ *n_bytes = strlen (str);
+ *n_chars = g_utf8_strlen (str, -1);
+
+ return start == str;
+}
+