diff options
author | Peng Wu <alexepico@gmail.com> | 2017-05-19 09:52:11 +0800 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2017-07-31 18:07:22 +0100 |
commit | 93474c366309feec2c562637711c8b2f0dd27790 (patch) | |
tree | 018f3518bdb75b4635f754f793b124a9e378095c /pango/break.c | |
parent | 59ccc00de377aa74ca11f191d9dc9fabfb808507 (diff) | |
download | pango-93474c366309feec2c562637711c8b2f0dd27790.tar.gz |
Update pango_default_break function for Emoji ZWJ sequence
Support Grapheme Boundaries Rule GB10, GB11, GB12 and GB13.
https://bugzilla.gnome.org/show_bug.cgi?id=782813
Diffstat (limited to 'pango/break.c')
-rw-r--r-- | pango/break.c | 117 |
1 files changed, 116 insertions, 1 deletions
diff --git a/pango/break.c b/pango/break.c index 498f7642..b650ca59 100644 --- a/pango/break.c +++ b/pango/break.c @@ -521,6 +521,15 @@ pango_default_break (const gchar *text, GB_Prepend, GB_SpacingMark, GB_InHangulSyllable, /* Handles all of L, V, T, LV, LVT rules */ + /* Use state machine to handle emoji sequence */ + /* Rule GB10 and GB11 */ + GB_E_Base, + GB_E_Modifier, + GB_Glue_After_Zwj, + GB_E_Base_GAZ, + /* Rule GB12 and GB13 */ + GB_RI_Odd, /* Meets odd number of RI */ + GB_RI_Even, /* Meets even number of RI */ } GraphemeBreakType; GraphemeBreakType prev_GB_type = GB_Other; @@ -671,6 +680,17 @@ pango_default_break (const gchar *text, GB_type = GB_Extend; /* U+200C and U+200D are Other_Grapheme_Extend */ break; } + if (G_UNLIKELY((wc >= 0x600 && wc <= 0x605) || + wc == 0x6DD || + wc == 0x70F || + wc == 0x8E2 || + wc == 0xD4E || + wc == 0x110BD || + (wc >= 0x111C2 && wc <= 0x111C3))) + { + GB_type = GB_Prepend; + break; + } /* fall through */ case G_UNICODE_CONTROL: case G_UNICODE_LINE_SEPARATOR: @@ -715,9 +735,86 @@ pango_default_break (const gchar *text, case G_UNICODE_NON_SPACING_MARK: GB_type = GB_Extend; /* Grapheme_Extend */ break; + + case G_UNICODE_OTHER_SYMBOL: + + if (G_UNLIKELY(wc == 0x261D || + wc == 0x26F9 || + (wc >= 0x270A && wc <= 0x270D) || + wc == 0x1F385 || + (wc >= 0x1F3C2 && wc <= 0x1F3C4) || + wc == 0x1F3C7 || + (wc >= 0x1F3CA && wc <= 0x1F3CC) || + (wc >= 0x1F442 && wc <= 0x1F443) || + (wc >= 0x1F446 && wc <= 0x1F450) || + wc == 0x1F46E || + (wc >= 0x1F470 && wc <= 0x1F478) || + wc == 0x1F47C || + (wc >= 0x1F481 && wc <= 0x1F483) || + (wc >= 0x1F485 && wc <= 0x1F487) || + wc == 0x1F4AA || + (wc >= 0x1F574 && wc <= 0x1F575) || + wc == 0x1F57A || + wc == 0x1F590 || + (wc >= 0x1F595 && wc <= 0x1F596) || + (wc >= 0x1F645 && wc <= 0x1F647) || + (wc >= 0x1F64B && wc <= 0x1F64F) || + wc == 0x1F6A3 || + (wc >= 0x1F6B4 && wc <= 0x1F6B6) || + wc == 0x1F6C0 || + wc == 0x1F6CC || + (wc >= 0x1F918 && wc <= 0x1F91C) || + (wc >= 0x1F91E && wc <= 0x1F91F) || + wc == 0x1F926 || + (wc >= 0x1F930 && wc <= 0x1F939) || + (wc >= 0x1F93D && wc <= 0x1F93E) || + (wc >= 0x1F9D1 && wc <= 0x1F9DD))) + GB_type = GB_E_Base; + + if (G_UNLIKELY(wc == 0x2640 || + wc == 0x2642 || + (wc >= 0x2695 && wc <= 0x2696) || + wc == 0x2708 || + wc == 0x2764 || + wc == 0x1F308 || + wc == 0x1F33E || + wc == 0x1F373 || + wc == 0x1F393 || + wc == 0x1F3A4 || + wc == 0x1F3A8 || + wc == 0x1F3EB || + wc == 0x1F3ED || + wc == 0x1F48B || + (wc >= 0x1F4BB && wc <= 0x1F4BC) || + wc == 0x1F527 || + wc == 0x1F52C || + wc == 0x1F5E8 || + wc == 0x1F680 || + wc == 0x1F692)) + GB_type = GB_Glue_After_Zwj; + + if (G_UNLIKELY(wc >= 0x1F466 && wc <= 0x1F469)) + GB_type = GB_E_Base_GAZ; + + if (G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) + { + if (prev_GB_type == GB_RI_Odd) + GB_type = GB_RI_Even; + else if (prev_GB_type == GB_RI_Even) + GB_type = GB_RI_Odd; + else + GB_type = GB_RI_Odd; + } + break; + + case G_UNICODE_MODIFIER_SYMBOL: + if (wc >= 0x1F3FB && wc <= 0x1F3FF) + GB_type = GB_E_Modifier; + break; } /* Grapheme Cluster Boundary Rules */ + /* We apply Rules GB1 and GB2 at the end of the function */ if (wc == '\n' && prev_wc == '\r') is_grapheme_boundary = FALSE; /* Rule GB3 */ @@ -726,13 +823,31 @@ pango_default_break (const gchar *text, else if (GB_type == GB_InHangulSyllable) is_grapheme_boundary = FALSE; /* Rules GB6, GB7, GB8 */ else if (GB_type == GB_Extend) + { + /* Rule GB10 */ + if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ) + GB_type = prev_GB_type; is_grapheme_boundary = FALSE; /* Rule GB9 */ + } else if (GB_type == GB_SpacingMark) is_grapheme_boundary = FALSE; /* Rule GB9a */ else if (prev_GB_type == GB_Prepend) is_grapheme_boundary = FALSE; /* Rule GB9b */ + /* Rule GB10 */ + else if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ) + { + if (GB_type == GB_E_Modifier) + is_grapheme_boundary = FALSE; + else + is_grapheme_boundary = TRUE; + } + else if (prev_wc == 0x200D && + (GB_type == GB_Glue_After_Zwj || GB_type == GB_E_Base_GAZ)) + is_grapheme_boundary = FALSE; /* Rule GB11 */ + else if (prev_GB_type == GB_RI_Odd && GB_type == GB_RI_Even) + is_grapheme_boundary = FALSE; /* Rule GB12 and GB13 */ else - is_grapheme_boundary = TRUE; /* Rule GB10 */ + is_grapheme_boundary = TRUE; /* Rule GB999 */ prev_GB_type = GB_type; |