summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2022-05-18 14:32:45 -0400
committerMatthias Clasen <mclasen@redhat.com>2022-05-18 14:32:45 -0400
commitc408685494cd446899870838e11ad1351177c7eb (patch)
treedc91460ed3e133f307bad9654d6738c4c68e73c1
parent9f3d94cdd34de1fdfc85a589adf11a1a17929830 (diff)
downloadpango-performance-test.tar.gz
Optimize pango_utf8_make_validperformance-test
Avoid a separate g_utf8_strlen call. Difference in layout-performance create-layout: before: 1.1M layouts/s after: 3.3M layouts/s
-rw-r--r--pango/pango-utils.c114
1 files changed, 104 insertions, 10 deletions
diff --git a/pango/pango-utils.c b/pango/pango-utils.c
index a0ff000e..018f5dd2 100644
--- a/pango/pango-utils.c
+++ b/pango/pango-utils.c
@@ -1258,6 +1258,92 @@ pango_find_paragraph_boundary (const char *text,
*next_paragraph_start = start - text;
}
+#define VALIDATE_BYTE(mask, expect) \
+ G_STMT_START { \
+ if (G_UNLIKELY((*(guchar *)p & (mask)) != (expect))) \
+ goto error; \
+ } G_STMT_END
+
+static inline const char *
+utf8_validate (const char *str,
+ int *n_chars)
+
+{
+ const char *p;
+ int chars = 0;
+
+ for (p = str; *p; p++)
+ {
+ if (*(guchar *)p < 128)
+ {
+ chars++;
+ /* done */;
+ }
+ else
+ {
+ const char *last;
+
+ last = p;
+ if (*(guchar *)p < 0xe0) /* 110xxxxx */
+ {
+ if (G_UNLIKELY (*(guchar *)p < 0xc2))
+ goto error;
+ }
+ else
+ {
+ if (*(guchar *)p < 0xf0) /* 1110xxxx */
+ {
+ switch (*(guchar *)p++ & 0x0f)
+ {
+ case 0:
+ VALIDATE_BYTE(0xe0, 0xa0); /* 0xa0 ... 0xbf */
+ break;
+ case 0x0d:
+ VALIDATE_BYTE(0xe0, 0x80); /* 0x80 ... 0x9f */
+ break;
+ default:
+ VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
+ }
+ }
+ else if (*(guchar *)p < 0xf5) /* 11110xxx excluding out-of-range */
+ {
+ switch (*(guchar *)p++ & 0x07)
+ {
+ case 0:
+ VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
+ if (G_UNLIKELY((*(guchar *)p & 0x30) == 0))
+ goto error;
+ break;
+ case 4:
+ VALIDATE_BYTE(0xf0, 0x80); /* 0x80 ... 0x8f */
+ break;
+ default:
+ VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
+ }
+ p++;
+ VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
+ }
+ else
+ goto error;
+ }
+
+ p++;
+ VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
+
+ chars++;
+
+ continue;
+
+ error:
+ *n_chars = chars;
+ return last;
+ }
+ }
+
+ *n_chars = chars;
+
+ return p;
+}
/*< private >
* pango_utf8_make_valid:
@@ -1275,31 +1361,39 @@ pango_find_paragraph_boundary (const char *text,
*/
gboolean
pango_utf8_make_valid (char *str,
- int *n_bytes,
- int *n_chars)
+ int *num_bytes,
+ int *num_chars)
{
- char *start, *end;
+ char *start;
+ int n_bytes, n_chars;
start = str;
+ n_bytes = 0;
+ n_chars = 0;
+
for (;;)
{
- gboolean valid;
+ char *end;
+ int chars;
+
+ end = (char *)utf8_validate (start, &chars);
- valid = g_utf8_validate (start, -1, (const char **)&end);
+ n_bytes += end - start;
+ n_chars += chars;
if (!*end)
break;
- if (!valid)
- *end++ = -1;
+ *end++ = -1;
+ n_bytes += 1;
+ n_chars += 1;
start = end;
}
- *n_bytes = strlen (str);
- *n_chars = g_utf8_strlen (str, -1);
+ *num_bytes = n_bytes;
+ *num_chars = n_chars;
return start == str;
}
-