diff options
author | Matthias Clasen <mclasen@redhat.com> | 2021-07-28 16:10:26 -0400 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2021-07-30 07:58:16 -0400 |
commit | 43cdd9fbab3ababaae7308feec9e4dabdf8dc3a0 (patch) | |
tree | ebfe5d942562148551e864a29f2857e72c854da1 | |
parent | d16db0d6730ac4f93f6557bd01f3ba5fa75264a3 (diff) | |
download | pango-43cdd9fbab3ababaae7308feec9e4dabdf8dc3a0.tar.gz |
break: Cache per-character data
-rw-r--r-- | pango/break.c | 114 |
1 files changed, 66 insertions, 48 deletions
diff --git a/pango/break.c b/pango/break.c index b9cf3cae..aedf54c4 100644 --- a/pango/break.c +++ b/pango/break.c @@ -138,6 +138,37 @@ typedef enum WordNumbers } WordType; +static inline void +get_unichar_data (gunichar wc, + GUnicodeType *type, + GUnicodeBreakType *break_type, + PangoScript *script, + gboolean *extended_pictographic) +{ + static struct { + gunichar wc; + GUnicodeType type; + GUnicodeBreakType break_type; + PangoScript script; + gboolean extended_pictographic; + } cache[256] = { 0, }, *p; + + p = &cache[wc & 0xff]; + + if (G_UNLIKELY (p->wc != wc)) + { + p->wc = wc; + p->type = g_unichar_type (wc); + p->break_type = g_unichar_break_type (wc); + p->script = (PangoScript)g_unichar_get_script (wc); + p->extended_pictographic = _pango_Is_Emoji_Extended_Pictographic (wc); + } + + *type = p->type; + *break_type = p->break_type; + *script = p->script; + *extended_pictographic = p->extended_pictographic; +} /** * pango_default_break: @@ -182,10 +213,14 @@ pango_default_break (const gchar *text, JamoType prev_jamo; + GUnicodeType next_type; GUnicodeBreakType next_break_type; GUnicodeBreakType prev_break_type; GUnicodeBreakType prev_prev_break_type; + PangoScript next_script; + gboolean next_Extended_Pictographic; + /* See Grapheme_Cluster_Break Property Values table of UAX#29 */ typedef enum { @@ -256,6 +291,7 @@ pango_default_break (const gchar *text, LB_RI_Odd, LB_RI_Even, } LineBreakType; + LineBreakType LB_type; LineBreakType prev_LB_type = LB_Other; WordType current_word_type = WordNone; @@ -286,8 +322,7 @@ pango_default_break (const gchar *text, else next_wc = g_utf8_get_char (next); - next_break_type = g_unichar_break_type (next_wc); - next_break_type = BREAK_TYPE_SAFE (next_break_type); + get_unichar_data (next_wc, &next_type, &next_break_type, &next_script, &next_Extended_Pictographic); for (i = 0; !done ; i++) { @@ -299,6 +334,8 @@ pango_default_break (const gchar *text, JamoType jamo; gboolean makes_hangul_syllable; + PangoScript script; + /* UAX#29 boundaries */ gboolean is_grapheme_boundary; gboolean is_word_boundary; @@ -310,7 +347,10 @@ pango_default_break (const gchar *text, gboolean can_break; wc = next_wc; + type = next_type; break_type = next_break_type; + script = next_script; + is_Extended_Pictographic = next_Extended_Pictographic; if (almost_done) { @@ -319,6 +359,7 @@ pango_default_break (const gchar *text, * may not increment next */ next_wc = 0; + next_type = 0; next_break_type = G_UNICODE_BREAK_UNKNOWN; done = TRUE; } @@ -338,11 +379,9 @@ pango_default_break (const gchar *text, else next_wc = g_utf8_get_char (next); - next_break_type = g_unichar_break_type (next_wc); - next_break_type = BREAK_TYPE_SAFE (next_break_type); + get_unichar_data (next_wc, &next_type, &next_break_type, &next_script, &next_Extended_Pictographic); } - type = g_unichar_type (wc); jamo = JAMO_TYPE (break_type); /* Determine wheter this forms a Hangul syllable with prev. */ @@ -380,9 +419,6 @@ pango_default_break (const gchar *text, break; } - is_Extended_Pictographic = - _pango_Is_Emoji_Extended_Pictographic (wc); - /* ---- UAX#29 Grapheme Boundaries ---- */ { @@ -558,11 +594,8 @@ pango_default_break (const gchar *text, if (is_grapheme_boundary || G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) /* Rules WB3 and WB4 */ { - PangoScript script; WordBreakType WB_type; - script = (PangoScript)g_unichar_get_script (wc); - /* Find the WordBreakType of wc */ WB_type = WB_Other; @@ -1025,8 +1058,10 @@ pango_default_break (const gchar *text, */ can_break = attrs[i].is_cursor_position; - /* Rule LB1: - assign a line breaking class to each code point of the input. */ + LB_type = LB_Other; + + /* Rule LB1: assign a line breaking class to each code point of the input. */ + /* Also determine if we can break */ switch (break_type) { case G_UNICODE_BREAK_AMBIGUOUS: @@ -1058,50 +1093,33 @@ pango_default_break (const gchar *text, case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE: case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE: case G_UNICODE_BREAK_EMOJI_MODIFIER: - case G_UNICODE_BREAK_REGIONAL_INDICATOR: can_break = TRUE; break; + case G_UNICODE_BREAK_REGIONAL_INDICATOR: + can_break = TRUE; + if (prev_LB_type == LB_RI_Odd) + LB_type = LB_RI_Even; + else + LB_type = LB_RI_Odd; + break; + + case G_UNICODE_BREAK_NUMERIC: + LB_type = LB_Numeric; + break; + + case G_UNICODE_BREAK_CLOSE_PUNCTUATION: + case G_UNICODE_BREAK_CLOSE_PARANTHESIS: + if (prev_LB_type == LB_Numeric) + LB_type = LB_Numeric_Close; + break; + default: ; } if (can_break) { - LineBreakType LB_type; - - /* Find the LineBreakType of wc */ - LB_type = LB_Other; - - if (break_type == G_UNICODE_BREAK_NUMERIC) - LB_type = LB_Numeric; - - if (break_type == G_UNICODE_BREAK_SYMBOL || - break_type == G_UNICODE_BREAK_INFIX_SEPARATOR) - { - if (!(prev_LB_type == LB_Numeric)) - LB_type = LB_Other; - } - - if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION || - break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS) - { - if (prev_LB_type == LB_Numeric) - LB_type = LB_Numeric_Close; - else - LB_type = LB_Other; - } - - if (break_type == G_UNICODE_BREAK_REGIONAL_INDICATOR) - { - if (prev_LB_type == LB_RI_Odd) - LB_type = LB_RI_Even; - else if (prev_LB_type == LB_RI_Even) - LB_type = LB_RI_Odd; - else - LB_type = LB_RI_Odd; - } - attrs[i].is_line_break = TRUE; /* Rule LB31 */ /* Unicode doesn't specify char wrap; we wrap around all chars currently. */ |