summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2017-05-19 09:52:11 +0800
committerMatthias Clasen <mclasen@redhat.com>2017-07-31 18:07:22 +0100
commit93474c366309feec2c562637711c8b2f0dd27790 (patch)
tree018f3518bdb75b4635f754f793b124a9e378095c
parent59ccc00de377aa74ca11f191d9dc9fabfb808507 (diff)
downloadpango-93474c366309feec2c562637711c8b2f0dd27790.tar.gz
Update pango_default_break function for Emoji ZWJ sequence
Support Grapheme Boundaries Rule GB10, GB11, GB12 and GB13. https://bugzilla.gnome.org/show_bug.cgi?id=782813
-rw-r--r--pango/break.c117
1 files changed, 116 insertions, 1 deletions
diff --git a/pango/break.c b/pango/break.c
index 498f7642..b650ca59 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -521,6 +521,15 @@ pango_default_break (const gchar *text,
GB_Prepend,
GB_SpacingMark,
GB_InHangulSyllable, /* Handles all of L, V, T, LV, LVT rules */
+ /* Use state machine to handle emoji sequence */
+ /* Rule GB10 and GB11 */
+ GB_E_Base,
+ GB_E_Modifier,
+ GB_Glue_After_Zwj,
+ GB_E_Base_GAZ,
+ /* Rule GB12 and GB13 */
+ GB_RI_Odd, /* Meets odd number of RI */
+ GB_RI_Even, /* Meets even number of RI */
} GraphemeBreakType;
GraphemeBreakType prev_GB_type = GB_Other;
@@ -671,6 +680,17 @@ pango_default_break (const gchar *text,
GB_type = GB_Extend; /* U+200C and U+200D are Other_Grapheme_Extend */
break;
}
+ if (G_UNLIKELY((wc >= 0x600 && wc <= 0x605) ||
+ wc == 0x6DD ||
+ wc == 0x70F ||
+ wc == 0x8E2 ||
+ wc == 0xD4E ||
+ wc == 0x110BD ||
+ (wc >= 0x111C2 && wc <= 0x111C3)))
+ {
+ GB_type = GB_Prepend;
+ break;
+ }
/* fall through */
case G_UNICODE_CONTROL:
case G_UNICODE_LINE_SEPARATOR:
@@ -715,9 +735,86 @@ pango_default_break (const gchar *text,
case G_UNICODE_NON_SPACING_MARK:
GB_type = GB_Extend; /* Grapheme_Extend */
break;
+
+ case G_UNICODE_OTHER_SYMBOL:
+
+ if (G_UNLIKELY(wc == 0x261D ||
+ wc == 0x26F9 ||
+ (wc >= 0x270A && wc <= 0x270D) ||
+ wc == 0x1F385 ||
+ (wc >= 0x1F3C2 && wc <= 0x1F3C4) ||
+ wc == 0x1F3C7 ||
+ (wc >= 0x1F3CA && wc <= 0x1F3CC) ||
+ (wc >= 0x1F442 && wc <= 0x1F443) ||
+ (wc >= 0x1F446 && wc <= 0x1F450) ||
+ wc == 0x1F46E ||
+ (wc >= 0x1F470 && wc <= 0x1F478) ||
+ wc == 0x1F47C ||
+ (wc >= 0x1F481 && wc <= 0x1F483) ||
+ (wc >= 0x1F485 && wc <= 0x1F487) ||
+ wc == 0x1F4AA ||
+ (wc >= 0x1F574 && wc <= 0x1F575) ||
+ wc == 0x1F57A ||
+ wc == 0x1F590 ||
+ (wc >= 0x1F595 && wc <= 0x1F596) ||
+ (wc >= 0x1F645 && wc <= 0x1F647) ||
+ (wc >= 0x1F64B && wc <= 0x1F64F) ||
+ wc == 0x1F6A3 ||
+ (wc >= 0x1F6B4 && wc <= 0x1F6B6) ||
+ wc == 0x1F6C0 ||
+ wc == 0x1F6CC ||
+ (wc >= 0x1F918 && wc <= 0x1F91C) ||
+ (wc >= 0x1F91E && wc <= 0x1F91F) ||
+ wc == 0x1F926 ||
+ (wc >= 0x1F930 && wc <= 0x1F939) ||
+ (wc >= 0x1F93D && wc <= 0x1F93E) ||
+ (wc >= 0x1F9D1 && wc <= 0x1F9DD)))
+ GB_type = GB_E_Base;
+
+ if (G_UNLIKELY(wc == 0x2640 ||
+ wc == 0x2642 ||
+ (wc >= 0x2695 && wc <= 0x2696) ||
+ wc == 0x2708 ||
+ wc == 0x2764 ||
+ wc == 0x1F308 ||
+ wc == 0x1F33E ||
+ wc == 0x1F373 ||
+ wc == 0x1F393 ||
+ wc == 0x1F3A4 ||
+ wc == 0x1F3A8 ||
+ wc == 0x1F3EB ||
+ wc == 0x1F3ED ||
+ wc == 0x1F48B ||
+ (wc >= 0x1F4BB && wc <= 0x1F4BC) ||
+ wc == 0x1F527 ||
+ wc == 0x1F52C ||
+ wc == 0x1F5E8 ||
+ wc == 0x1F680 ||
+ wc == 0x1F692))
+ GB_type = GB_Glue_After_Zwj;
+
+ if (G_UNLIKELY(wc >= 0x1F466 && wc <= 0x1F469))
+ GB_type = GB_E_Base_GAZ;
+
+ if (G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF))
+ {
+ if (prev_GB_type == GB_RI_Odd)
+ GB_type = GB_RI_Even;
+ else if (prev_GB_type == GB_RI_Even)
+ GB_type = GB_RI_Odd;
+ else
+ GB_type = GB_RI_Odd;
+ }
+ break;
+
+ case G_UNICODE_MODIFIER_SYMBOL:
+ if (wc >= 0x1F3FB && wc <= 0x1F3FF)
+ GB_type = GB_E_Modifier;
+ break;
}
/* Grapheme Cluster Boundary Rules */
+
/* We apply Rules GB1 and GB2 at the end of the function */
if (wc == '\n' && prev_wc == '\r')
is_grapheme_boundary = FALSE; /* Rule GB3 */
@@ -726,13 +823,31 @@ pango_default_break (const gchar *text,
else if (GB_type == GB_InHangulSyllable)
is_grapheme_boundary = FALSE; /* Rules GB6, GB7, GB8 */
else if (GB_type == GB_Extend)
+ {
+ /* Rule GB10 */
+ if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ)
+ GB_type = prev_GB_type;
is_grapheme_boundary = FALSE; /* Rule GB9 */
+ }
else if (GB_type == GB_SpacingMark)
is_grapheme_boundary = FALSE; /* Rule GB9a */
else if (prev_GB_type == GB_Prepend)
is_grapheme_boundary = FALSE; /* Rule GB9b */
+ /* Rule GB10 */
+ else if (prev_GB_type == GB_E_Base || prev_GB_type == GB_E_Base_GAZ)
+ {
+ if (GB_type == GB_E_Modifier)
+ is_grapheme_boundary = FALSE;
+ else
+ is_grapheme_boundary = TRUE;
+ }
+ else if (prev_wc == 0x200D &&
+ (GB_type == GB_Glue_After_Zwj || GB_type == GB_E_Base_GAZ))
+ is_grapheme_boundary = FALSE; /* Rule GB11 */
+ else if (prev_GB_type == GB_RI_Odd && GB_type == GB_RI_Even)
+ is_grapheme_boundary = FALSE; /* Rule GB12 and GB13 */
else
- is_grapheme_boundary = TRUE; /* Rule GB10 */
+ is_grapheme_boundary = TRUE; /* Rule GB999 */
prev_GB_type = GB_type;