diff options
author | Peng Wu <alexepico@gmail.com> | 2017-09-12 14:57:11 +0800 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2017-10-27 17:37:48 -0400 |
commit | c4619480e536e393e2d4a8e26a6ceb5af1fe80e3 (patch) | |
tree | cbea23f7ab3a7ce453c8354be61ca3715f61a581 /pango/break.c | |
parent | 7721b0bed57038b342655220ad9bc32f4599f174 (diff) | |
download | pango-c4619480e536e393e2d4a8e26a6ceb5af1fe80e3.tar.gz |
Update pango_default_break function for Line Break
Support Line Break of Unicode 9.0.
https://bugzilla.gnome.org/show_bug.cgi?id=788115
Diffstat (limited to 'pango/break.c')
-rw-r--r-- | pango/break.c | 880 |
1 files changed, 421 insertions, 459 deletions
diff --git a/pango/break.c b/pango/break.c index 34a7cd28..c46f3382 100644 --- a/pango/break.c +++ b/pango/break.c @@ -40,323 +40,16 @@ typedef enum BREAK_PROHIBITED, /* no break, even if spaces intervene */ BREAK_IF_SPACES, /* "indirect break" (only if there are spaces) */ BREAK_ALLOWED /* "direct break" (can always break here) */ - /* TR 14 has one more break-opportunity class, + /* TR 14 has two more break-opportunity classes, * "indirect break opportunity for combining marks following a space" + * and "prohibited break for combining marks" * but we handle that inline in the code. */ } BreakOpportunity; - -enum -{ - INDEX_OPEN_PUNCTUATION, - INDEX_CLOSE_PUNCTUATION, - INDEX_QUOTATION, - INDEX_NON_BREAKING_GLUE, - INDEX_NON_STARTER, - INDEX_EXCLAMATION, - INDEX_SYMBOL, - INDEX_INFIX_SEPARATOR, - INDEX_PREFIX, - INDEX_POSTFIX, - INDEX_NUMERIC, - INDEX_ALPHABETIC, - INDEX_IDEOGRAPHIC, - INDEX_INSEPARABLE, - INDEX_HYPHEN, - INDEX_AFTER, - INDEX_BEFORE, - INDEX_BEFORE_AND_AFTER, - INDEX_ZERO_WIDTH_SPACE, - INDEX_COMBINING_MARK, - INDEX_WORD_JOINER, - - /* End of the table */ - - INDEX_END_OF_TABLE, - - /* The following are not in the tables */ - INDEX_MANDATORY, - INDEX_CARRIAGE_RETURN, - INDEX_LINE_FEED, - INDEX_SURROGATE, - INDEX_CONTINGENT, - INDEX_SPACE, - INDEX_COMPLEX_CONTEXT, - INDEX_AMBIGUOUS, - INDEX_UNKNOWN, - INDEX_NEXT_LINE, - INDEX_HANGUL_L_JAMO, - INDEX_HANGUL_V_JAMO, - INDEX_HANGUL_T_JAMO, - INDEX_HANGUL_LV_SYLLABLE, - INDEX_HANGUL_LVT_SYLLABLE, -}; - -static const BreakOpportunity row_OPEN_PUNCTUATION[INDEX_END_OF_TABLE] = { - BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_CLOSE_PUNCTUATION[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_QUOTATION[INDEX_END_OF_TABLE] = { - BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_NON_BREAKING_GLUE[INDEX_END_OF_TABLE] = { - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_NON_STARTER[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_EXCLAMATION[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_SYMBOL[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_INFIX_SEPARATOR[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_PREFIX[INDEX_END_OF_TABLE] = { - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_POSTFIX[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_NUMERIC[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_ALPHABETIC[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_IDEOGRAPHIC[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_INSEPARABLE[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_HYPHEN[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_AFTER[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_BEFORE[INDEX_END_OF_TABLE] = { - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_BEFORE_AND_AFTER[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_ZERO_WIDTH_SPACE[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED -}; - -static const BreakOpportunity row_COMBINING_MARK[INDEX_END_OF_TABLE] = { - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity row_WORD_JOINER[INDEX_END_OF_TABLE] = { - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED -}; - -static const BreakOpportunity *const line_break_rows[INDEX_END_OF_TABLE] = { - row_OPEN_PUNCTUATION, /* INDEX_OPEN_PUNCTUATION */ - row_CLOSE_PUNCTUATION, /* INDEX_CLOSE_PUNCTUATION */ - row_QUOTATION, /* INDEX_QUOTATION */ - row_NON_BREAKING_GLUE, /* INDEX_NON_BREAKING_GLUE */ - row_NON_STARTER, /* INDEX_NON_STARTER */ - row_EXCLAMATION, /* INDEX_EXCLAMATION */ - row_SYMBOL, /* INDEX_SYMBOL */ - row_INFIX_SEPARATOR, /* INDEX_INFIX_SEPARATOR */ - row_PREFIX, /* INDEX_PREFIX */ - row_POSTFIX, /* INDEX_POSTFIX */ - row_NUMERIC, /* INDEX_NUMERIC */ - row_ALPHABETIC, /* INDEX_ALPHABETIC */ - row_IDEOGRAPHIC, /* INDEX_IDEOGRAPHIC */ - row_INSEPARABLE, /* INDEX_INSEPARABLE */ - row_HYPHEN, /* INDEX_HYPHEN */ - row_AFTER, /* INDEX_AFTER */ - row_BEFORE, /* INDEX_BEFORE */ - row_BEFORE_AND_AFTER, /* INDEX_BEFORE_AND_AFTER */ - row_ZERO_WIDTH_SPACE, /* INDEX_ZERO_WIDTH_SPACE */ - row_COMBINING_MARK, /* INDEX_COMBINING_MARK */ - row_WORD_JOINER /* INDEX_WORD_JOINER */ -}; - -/* Map GUnicodeBreakType to table indexes */ -static const int line_break_indexes[] = { - INDEX_MANDATORY, - INDEX_CARRIAGE_RETURN, - INDEX_LINE_FEED, - INDEX_COMBINING_MARK, - INDEX_SURROGATE, - INDEX_ZERO_WIDTH_SPACE, - INDEX_INSEPARABLE, - INDEX_NON_BREAKING_GLUE, - INDEX_CONTINGENT, - INDEX_SPACE, - INDEX_AFTER, - INDEX_BEFORE, - INDEX_BEFORE_AND_AFTER, - INDEX_HYPHEN, - INDEX_NON_STARTER, - INDEX_OPEN_PUNCTUATION, - INDEX_CLOSE_PUNCTUATION, - INDEX_QUOTATION, - INDEX_EXCLAMATION, - INDEX_IDEOGRAPHIC, - INDEX_NUMERIC, - INDEX_INFIX_SEPARATOR, - INDEX_SYMBOL, - INDEX_ALPHABETIC, - INDEX_PREFIX, - INDEX_POSTFIX, - INDEX_COMPLEX_CONTEXT, - INDEX_AMBIGUOUS, - INDEX_UNKNOWN, - INDEX_NEXT_LINE, - INDEX_WORD_JOINER, - INDEX_HANGUL_L_JAMO, - INDEX_HANGUL_V_JAMO, - INDEX_HANGUL_T_JAMO, - INDEX_HANGUL_LV_SYLLABLE, - INDEX_HANGUL_LVT_SYLLABLE -}; - +/* need to sync the break range to glib/gunicode.h . */ #define BREAK_TYPE_SAFE(btype) \ - ((btype) < G_N_ELEMENTS(line_break_indexes) ? (btype) : G_UNICODE_BREAK_UNKNOWN) -#define BREAK_INDEX(btype) \ - (line_break_indexes[(btype)]) -#define BREAK_ROW(before_type) \ - (line_break_rows[BREAK_INDEX (before_type)]) -#define BREAK_OP(before_type, after_type) \ - (BREAK_ROW (before_type)[BREAK_INDEX (after_type)]) -#define IN_BREAK_TABLE(btype) \ - ((btype) < G_N_ELEMENTS(line_break_indexes) && BREAK_INDEX((btype)) < INDEX_END_OF_TABLE) - + ((btype) <= G_UNICODE_BREAK_ZERO_WIDTH_JOINER ? (btype) : G_UNICODE_BREAK_UNKNOWN) /* @@ -487,8 +180,8 @@ pango_default_break (const gchar *text, JamoType prev_jamo; GUnicodeBreakType next_break_type; - GUnicodeBreakType prev_break_type; /* skips spaces */ - gboolean prev_was_break_space; + GUnicodeBreakType prev_break_type; + GUnicodeBreakType prev_prev_break_type; /* See Grapheme_Cluster_Break Property Values table of UAX#29 */ typedef enum @@ -554,6 +247,17 @@ pango_default_break (const gchar *text, SentenceBreakType prev_prev_SB_type = SB_Other, prev_SB_type = SB_Other; gint prev_SB_i = -1; + /* Rule LB25 with Example 7 of Customization */ + typedef enum + { + LB_Other, + LB_Numeric, + LB_Numeric_Close, + LB_RI_Odd, + LB_RI_Even, + } LineBreakType; + LineBreakType prev_LB_type = LB_Other; + WordType current_word_type = WordNone; gunichar last_word_letter = 0; gunichar base_character = 0; @@ -570,7 +274,7 @@ pango_default_break (const gchar *text, next = text; prev_break_type = G_UNICODE_BREAK_UNKNOWN; - prev_was_break_space = FALSE; + prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN; prev_wc = 0; prev_jamo = NO_JAMO; @@ -590,6 +294,7 @@ pango_default_break (const gchar *text, GUnicodeType type; gunichar wc; GUnicodeBreakType break_type; + GUnicodeBreakType row_break_type; BreakOpportunity break_op; JamoType jamo; gboolean makes_hangul_syllable; @@ -1374,168 +1079,379 @@ pango_default_break (const gchar *text, break_op = BREAK_ALREADY_HANDLED; - g_assert (prev_break_type != G_UNICODE_BREAK_SPACE); + row_break_type = prev_break_type == G_UNICODE_BREAK_SPACE ? + prev_prev_break_type : prev_break_type; + g_assert (row_break_type != G_UNICODE_BREAK_SPACE); attrs[i].is_char_break = FALSE; attrs[i].is_line_break = FALSE; attrs[i].is_mandatory_break = FALSE; - if (attrs[i].is_cursor_position) /* If it's not a grapheme boundary, - * it's not a line break either - */ + /* Rule LB1: + assign a line breaking class to each code point of the input. */ + switch ((int) break_type) { - /* space followed by a combining mark is handled - * specially; (rule 7a from TR 14) - */ - if (break_type == G_UNICODE_BREAK_SPACE && - next_break_type == G_UNICODE_BREAK_COMBINING_MARK) - break_type = G_UNICODE_BREAK_IDEOGRAPHIC; + case G_UNICODE_BREAK_AMBIGUOUS: + case G_UNICODE_BREAK_SURROGATE: + case G_UNICODE_BREAK_UNKNOWN: + break_type = G_UNICODE_BREAK_ALPHABETIC; + break; - /* Unicode doesn't specify char wrap; we wrap around all chars - * except where a line break is prohibited, which means we - * effectively break everywhere except inside runs of spaces. - */ - attrs[i].is_char_break = TRUE; + case G_UNICODE_BREAK_COMPLEX_CONTEXT: + if (type == G_UNICODE_NON_SPACING_MARK || + type == G_UNICODE_SPACING_MARK) + break_type = G_UNICODE_BREAK_COMBINING_MARK; + else + break_type = G_UNICODE_BREAK_ALPHABETIC; + break; - /* Make any necessary replacements first */ - switch ((int) prev_break_type) - { - case G_UNICODE_BREAK_HANGUL_L_JAMO: - case G_UNICODE_BREAK_HANGUL_V_JAMO: - case G_UNICODE_BREAK_HANGUL_T_JAMO: - case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE: - case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE: - /* treat Jamo as IDEOGRAPHIC from now - */ - prev_break_type = G_UNICODE_BREAK_IDEOGRAPHIC; - break; + case G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER: + break_type = G_UNICODE_BREAK_NON_STARTER; + break; - case G_UNICODE_BREAK_AMBIGUOUS: - /* FIXME - * we need to resolve the East Asian width - * to decide what to do here - */ - case G_UNICODE_BREAK_COMPLEX_CONTEXT: - /* FIXME - * language engines should handle this case... - */ - case G_UNICODE_BREAK_UNKNOWN: - /* convert unknown, complex, ambiguous to ALPHABETIC - */ - prev_break_type = G_UNICODE_BREAK_ALPHABETIC; - break; + default: + ; + } - default: - ; - } + /* If it's not a grapheme boundary, it's not a line break either */ + if (attrs[i].is_cursor_position || + break_type == G_UNICODE_BREAK_COMBINING_MARK || + break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER || + break_type == G_UNICODE_BREAK_HANGUL_L_JAMO || + break_type == G_UNICODE_BREAK_HANGUL_V_JAMO || + break_type == G_UNICODE_BREAK_HANGUL_T_JAMO || + break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE || + break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE || + break_type == G_UNICODE_BREAK_REGIONAL_INDICATOR) + { + LineBreakType LB_type; - switch ((int) prev_break_type) - { - case G_UNICODE_BREAK_MANDATORY: - case G_UNICODE_BREAK_LINE_FEED: - case G_UNICODE_BREAK_NEXT_LINE: - attrs[i].is_line_break = TRUE; - attrs[i].is_mandatory_break = TRUE; - break; + /* Find the LineBreakType of wc */ + LB_type = LB_Other; - case G_UNICODE_BREAK_CARRIAGE_RETURN: - if (wc != '\n') - { - attrs[i].is_line_break = TRUE; - attrs[i].is_mandatory_break = TRUE; - } - break; + if (break_type == G_UNICODE_BREAK_NUMERIC) + LB_type = LB_Numeric; - case G_UNICODE_BREAK_CONTINGENT: - /* can break after 0xFFFC by default, though we might want - * to eventually have a PangoLayout setting or - * PangoAttribute that disables this, if for some - * application breaking after objects is not desired. - */ - break_op = BREAK_ALLOWED; - break; + if (break_type == G_UNICODE_BREAK_SYMBOL || + break_type == G_UNICODE_BREAK_INFIX_SEPARATOR) + { + if (!(prev_LB_type == LB_Numeric)) + LB_type = LB_Other; + } - case G_UNICODE_BREAK_SURROGATE: - /* Undefined according to UTR#14, but ALLOWED in test data. */ - break_op = BREAK_ALLOWED; - break; + if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION || + break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS) + { + if (prev_LB_type == LB_Numeric) + LB_type = LB_Numeric_Close; + else + LB_type = LB_Other; + } - default: - g_assert (IN_BREAK_TABLE (prev_break_type)); - - /* Note that our table assumes that combining marks - * are only applied to alphabetic characters; - * tech report 14 explains how to remove this assumption - * from the code, if anyone ever cares, but it shouldn't - * be a problem. Also this issue sort of goes - * away since we only look for breaks on grapheme - * boundaries. - */ + if (break_type == G_UNICODE_BREAK_REGIONAL_INDICATOR) + { + if (prev_LB_type == LB_RI_Odd) + LB_type = LB_RI_Even; + else if (prev_LB_type == LB_RI_Even) + LB_type = LB_RI_Odd; + else + LB_type = LB_RI_Odd; + } - switch ((int) break_type) - { - case G_UNICODE_BREAK_MANDATORY: - case G_UNICODE_BREAK_LINE_FEED: - case G_UNICODE_BREAK_CARRIAGE_RETURN: - case G_UNICODE_BREAK_NEXT_LINE: - case G_UNICODE_BREAK_SPACE: - /* These types all "pile up" at the end of lines and - * get elided. - */ - break_op = BREAK_PROHIBITED; - break; + attrs[i].is_line_break = TRUE; /* Rule LB31 */ + /* Unicode doesn't specify char wrap; we wrap around all chars + * except where a line break is prohibited, which means we + * effectively break everywhere except inside runs of spaces. + */ + attrs[i].is_char_break = TRUE; - case G_UNICODE_BREAK_CONTINGENT: - /* break before 0xFFFC by default, eventually - * make this configurable? - */ - break_op = BREAK_ALLOWED; - break; + /* Make any necessary replacements first */ + if (row_break_type == G_UNICODE_BREAK_UNKNOWN) + row_break_type = G_UNICODE_BREAK_ALPHABETIC; + + /* add the line break rules in reverse order to override + the lower priority rules. */ + + /* Rule LB30 */ + if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC || + prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER || + prev_break_type == G_UNICODE_BREAK_NUMERIC) && + break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION) + break_op = BREAK_PROHIBITED; + + if (prev_break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS && + (break_type == G_UNICODE_BREAK_ALPHABETIC || + break_type == G_UNICODE_BREAK_HEBREW_LETTER || + break_type == G_UNICODE_BREAK_NUMERIC)) + break_op = BREAK_PROHIBITED; + + /* Rule LB30a */ + if (prev_LB_type == LB_RI_Odd && LB_type == LB_RI_Even) + break_op = BREAK_PROHIBITED; + + /* Rule LB30b */ + if (prev_break_type == G_UNICODE_BREAK_EMOJI_BASE && + break_type == G_UNICODE_BREAK_EMOJI_MODIFIER) + break_op = BREAK_PROHIBITED; + + /* Rule LB29 */ + if (prev_break_type == G_UNICODE_BREAK_INFIX_SEPARATOR && + (break_type == G_UNICODE_BREAK_ALPHABETIC || + break_type == G_UNICODE_BREAK_HEBREW_LETTER)) + break_op = BREAK_PROHIBITED; + + /* Rule LB28 */ + if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC || + prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) && + (break_type == G_UNICODE_BREAK_ALPHABETIC || + break_type == G_UNICODE_BREAK_HEBREW_LETTER)) + break_op = BREAK_PROHIBITED; + + /* Rule LB27 */ + if ((prev_break_type == G_UNICODE_BREAK_HANGUL_L_JAMO || + prev_break_type == G_UNICODE_BREAK_HANGUL_V_JAMO || + prev_break_type == G_UNICODE_BREAK_HANGUL_T_JAMO || + prev_break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE || + prev_break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE) && + (break_type == G_UNICODE_BREAK_INSEPARABLE || + break_type == G_UNICODE_BREAK_POSTFIX)) + break_op = BREAK_PROHIBITED; + + if (prev_break_type == G_UNICODE_BREAK_PREFIX && + (break_type == G_UNICODE_BREAK_HANGUL_L_JAMO || + break_type == G_UNICODE_BREAK_HANGUL_V_JAMO || + break_type == G_UNICODE_BREAK_HANGUL_T_JAMO || + break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE || + break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE)) + break_op = BREAK_PROHIBITED; + + /* Rule LB26 */ + if (prev_break_type == G_UNICODE_BREAK_HANGUL_L_JAMO && + (break_type == G_UNICODE_BREAK_HANGUL_L_JAMO || + break_type == G_UNICODE_BREAK_HANGUL_V_JAMO || + break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE || + break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE)) + break_op = BREAK_PROHIBITED; + + if ((prev_break_type == G_UNICODE_BREAK_HANGUL_V_JAMO || + prev_break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE) && + (break_type == G_UNICODE_BREAK_HANGUL_V_JAMO || + break_type == G_UNICODE_BREAK_HANGUL_T_JAMO)) + break_op = BREAK_PROHIBITED; + + if ((prev_break_type == G_UNICODE_BREAK_HANGUL_T_JAMO || + prev_break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE) && + break_type == G_UNICODE_BREAK_HANGUL_T_JAMO) + break_op = BREAK_PROHIBITED; + + /* Rule LB25 with Example 7 of Customization */ + if ((prev_break_type == G_UNICODE_BREAK_PREFIX || + prev_break_type == G_UNICODE_BREAK_POSTFIX) && + break_type == G_UNICODE_BREAK_NUMERIC) + break_op = BREAK_PROHIBITED; + + if ((prev_break_type == G_UNICODE_BREAK_PREFIX || + prev_break_type == G_UNICODE_BREAK_POSTFIX) && + (break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION || + break_type == G_UNICODE_BREAK_HYPHEN) && + next_break_type == G_UNICODE_BREAK_NUMERIC) + break_op = BREAK_PROHIBITED; + + if ((prev_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION || + prev_break_type == G_UNICODE_BREAK_HYPHEN) && + break_type == G_UNICODE_BREAK_NUMERIC) + break_op = BREAK_PROHIBITED; + + if (prev_break_type == G_UNICODE_BREAK_NUMERIC && + (break_type == G_UNICODE_BREAK_NUMERIC || + break_type == G_UNICODE_BREAK_SYMBOL || + break_type == G_UNICODE_BREAK_INFIX_SEPARATOR)) + break_op = BREAK_PROHIBITED; + + if (prev_LB_type == LB_Numeric && + (break_type == G_UNICODE_BREAK_NUMERIC || + break_type == G_UNICODE_BREAK_SYMBOL || + break_type == G_UNICODE_BREAK_INFIX_SEPARATOR || + break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION || + break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS)) + break_op = BREAK_PROHIBITED; + + if ((prev_LB_type == LB_Numeric || + prev_LB_type == LB_Numeric_Close) && + (break_type == G_UNICODE_BREAK_POSTFIX || + break_type == G_UNICODE_BREAK_PREFIX)) + break_op = BREAK_PROHIBITED; + + /* Rule LB24 */ + if ((prev_break_type == G_UNICODE_BREAK_PREFIX || + prev_break_type == G_UNICODE_BREAK_POSTFIX) && + (break_type == G_UNICODE_BREAK_ALPHABETIC || + break_type == G_UNICODE_BREAK_HEBREW_LETTER)) + break_op = BREAK_PROHIBITED; + + if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC || + prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) && + (break_type == G_UNICODE_BREAK_PREFIX || + break_type == G_UNICODE_BREAK_POSTFIX)) + break_op = BREAK_PROHIBITED; + + /* Rule LB23 */ + if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC || + prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) && + break_type == G_UNICODE_BREAK_NUMERIC) + break_op = BREAK_PROHIBITED; + + if (prev_break_type == G_UNICODE_BREAK_NUMERIC && + (break_type == G_UNICODE_BREAK_ALPHABETIC || + break_type == G_UNICODE_BREAK_HEBREW_LETTER)) + break_op = BREAK_PROHIBITED; + + /* Rule LB23a */ + if (prev_break_type == G_UNICODE_BREAK_PREFIX && + (break_type == G_UNICODE_BREAK_IDEOGRAPHIC || + break_type == G_UNICODE_BREAK_EMOJI_BASE || + break_type == G_UNICODE_BREAK_EMOJI_MODIFIER)) + break_op = BREAK_PROHIBITED; + + if ((prev_break_type == G_UNICODE_BREAK_IDEOGRAPHIC || + prev_break_type == G_UNICODE_BREAK_EMOJI_BASE || + prev_break_type == G_UNICODE_BREAK_EMOJI_MODIFIER) && + break_type == G_UNICODE_BREAK_POSTFIX) + break_op = BREAK_PROHIBITED; + + /* Rule LB22 */ + if (break_type == G_UNICODE_BREAK_INSEPARABLE) + { + if (prev_break_type == G_UNICODE_BREAK_ALPHABETIC || + prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) + break_op = BREAK_PROHIBITED; - case G_UNICODE_BREAK_SURROGATE: - /* Undefined according to UTR#14, but ALLOWED in test data. */ - break_op = BREAK_ALLOWED; - break; + if (prev_break_type == G_UNICODE_BREAK_EXCLAMATION) + break_op = BREAK_PROHIBITED; - /* Hangul additions are from Unicode 4.1 UAX#14 */ - case G_UNICODE_BREAK_HANGUL_L_JAMO: - case G_UNICODE_BREAK_HANGUL_V_JAMO: - case G_UNICODE_BREAK_HANGUL_T_JAMO: - case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE: - case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE: - /* treat Jamo as IDEOGRAPHIC from now - */ - break_type = G_UNICODE_BREAK_IDEOGRAPHIC; - - if (makes_hangul_syllable) - break_op = BREAK_IF_SPACES; - else - break_op = BREAK_ALLOWED; - break; + if (prev_break_type == G_UNICODE_BREAK_IDEOGRAPHIC || + prev_break_type == G_UNICODE_BREAK_EMOJI_BASE || + prev_break_type == G_UNICODE_BREAK_EMOJI_MODIFIER) + break_op = BREAK_PROHIBITED; - case G_UNICODE_BREAK_AMBIGUOUS: - /* FIXME: - * we need to resolve the East Asian width - * to decide what to do here - */ - case G_UNICODE_BREAK_COMPLEX_CONTEXT: - /* FIXME: - * language engines should handle this case... - */ - case G_UNICODE_BREAK_UNKNOWN: - /* treat unknown, complex, and ambiguous like ALPHABETIC - * for now - */ - break_op = BREAK_OP (prev_break_type, G_UNICODE_BREAK_ALPHABETIC); - break; + if (prev_break_type == G_UNICODE_BREAK_INSEPARABLE) + break_op = BREAK_PROHIBITED; - default: + if (prev_break_type == G_UNICODE_BREAK_NUMERIC) + break_op = BREAK_PROHIBITED; + } - g_assert (IN_BREAK_TABLE (break_type)); - break_op = BREAK_OP (prev_break_type, break_type); - break; - } - break; + if (break_type == G_UNICODE_BREAK_AFTER || + break_type == G_UNICODE_BREAK_HYPHEN || + break_type == G_UNICODE_BREAK_NON_STARTER || + prev_break_type == G_UNICODE_BREAK_BEFORE) + break_op = BREAK_PROHIBITED; /* Rule LB21 */ + + if (prev_prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER && + (prev_break_type == G_UNICODE_BREAK_HYPHEN || + prev_break_type == G_UNICODE_BREAK_AFTER)) + break_op = BREAK_PROHIBITED; /* Rule LB21a */ + + if (prev_break_type == G_UNICODE_BREAK_SYMBOL && + break_type == G_UNICODE_BREAK_HEBREW_LETTER) + break_op = BREAK_PROHIBITED; /* Rule LB21b */ + + if (prev_break_type == G_UNICODE_BREAK_CONTINGENT || + break_type == G_UNICODE_BREAK_CONTINGENT) + break_op = BREAK_ALLOWED; /* Rule LB20 */ + + if (prev_break_type == G_UNICODE_BREAK_QUOTATION || + break_type == G_UNICODE_BREAK_QUOTATION) + break_op = BREAK_PROHIBITED; /* Rule LB19 */ + + /* handle related rules for Space as state machine here, + and override the pair table result. */ + if (prev_break_type == G_UNICODE_BREAK_SPACE) /* Rule LB18 */ + break_op = BREAK_ALLOWED; + + if (row_break_type == G_UNICODE_BREAK_BEFORE_AND_AFTER && + break_type == G_UNICODE_BREAK_BEFORE_AND_AFTER) + break_op = BREAK_PROHIBITED; /* Rule LB17 */ + + if ((row_break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION || + row_break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS) && + break_type == G_UNICODE_BREAK_NON_STARTER) + break_op = BREAK_PROHIBITED; /* Rule LB16 */ + + if (row_break_type == G_UNICODE_BREAK_QUOTATION && + break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION) + break_op = BREAK_PROHIBITED; /* Rule LB15 */ + + if (row_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION) + break_op = BREAK_PROHIBITED; /* Rule LB14 */ + + /* Rule LB13 with Example 7 of Customization */ + if (break_type == G_UNICODE_BREAK_EXCLAMATION) + break_op = BREAK_PROHIBITED; + + if (prev_break_type != G_UNICODE_BREAK_NUMERIC && + (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION || + break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS || + break_type == G_UNICODE_BREAK_INFIX_SEPARATOR || + break_type == G_UNICODE_BREAK_SYMBOL)) + break_op = BREAK_PROHIBITED; + + if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE) + break_op = BREAK_PROHIBITED; /* Rule LB12 */ + + if (break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE && + (prev_break_type != G_UNICODE_BREAK_SPACE && + prev_break_type != G_UNICODE_BREAK_AFTER && + prev_break_type != G_UNICODE_BREAK_HYPHEN)) + break_op = BREAK_PROHIBITED; /* Rule LB12a */ + + if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER || + break_type == G_UNICODE_BREAK_WORD_JOINER) + break_op = BREAK_PROHIBITED; /* Rule LB11 */ + + + /* Rule LB9 */ + if (break_type == G_UNICODE_BREAK_COMBINING_MARK || + break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER) + { + if (!(prev_break_type == G_UNICODE_BREAK_MANDATORY || + prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN || + prev_break_type == G_UNICODE_BREAK_LINE_FEED || + prev_break_type == G_UNICODE_BREAK_NEXT_LINE || + prev_break_type == G_UNICODE_BREAK_SPACE || + prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)) + break_op = BREAK_PROHIBITED; + } + + if (row_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE) + break_op = BREAK_ALLOWED; /* Rule LB8 */ + + if (prev_wc == 0x200D && + (break_type == G_UNICODE_BREAK_IDEOGRAPHIC || + break_type == G_UNICODE_BREAK_EMOJI_BASE || + break_type == G_UNICODE_BREAK_EMOJI_MODIFIER)) + break_op = BREAK_PROHIBITED; /* Rule LB8a */ + + if (break_type == G_UNICODE_BREAK_SPACE || + break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE) + break_op = BREAK_PROHIBITED; /* Rule LB7 */ + + /* Rule LB6 */ + if (break_type == G_UNICODE_BREAK_MANDATORY || + break_type == G_UNICODE_BREAK_CARRIAGE_RETURN || + break_type == G_UNICODE_BREAK_LINE_FEED || + break_type == G_UNICODE_BREAK_NEXT_LINE) + break_op = BREAK_PROHIBITED; + + /* Rules LB4 and LB5 */ + if (prev_break_type == G_UNICODE_BREAK_MANDATORY || + (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN && + wc != '\n') || + prev_break_type == G_UNICODE_BREAK_LINE_FEED || + prev_break_type == G_UNICODE_BREAK_NEXT_LINE) + { + attrs[i].is_mandatory_break = TRUE; + break_op = BREAK_ALLOWED; } switch (break_op) @@ -1543,12 +1459,13 @@ pango_default_break (const gchar *text, case BREAK_PROHIBITED: /* can't break here */ attrs[i].is_char_break = FALSE; + attrs[i].is_line_break = FALSE; break; case BREAK_IF_SPACES: /* break if prev char was space */ - if (prev_was_break_space) - attrs[i].is_line_break = TRUE; + if (prev_break_type != G_UNICODE_BREAK_SPACE) + attrs[i].is_line_break = FALSE; break; case BREAK_ALLOWED: @@ -1562,16 +1479,61 @@ pango_default_break (const gchar *text, g_assert_not_reached (); break; } + + /* Rule LB9 */ + if (!(break_type == G_UNICODE_BREAK_COMBINING_MARK || + break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)) + { + /* Rule LB25 with Example 7 of Customization */ + if (break_type == G_UNICODE_BREAK_NUMERIC || + break_type == G_UNICODE_BREAK_SYMBOL || + break_type == G_UNICODE_BREAK_INFIX_SEPARATOR) + { + if (prev_LB_type != LB_Numeric) + prev_LB_type = LB_type; + /* else don't change the prev_LB_type */ + } + else + { + prev_LB_type = LB_type; + } + } + /* else don't change the prev_LB_type for Rule LB9 */ } if (break_type != G_UNICODE_BREAK_SPACE) { - prev_break_type = break_type; - prev_was_break_space = FALSE; + /* Rule LB9 */ + if (break_type == G_UNICODE_BREAK_COMBINING_MARK || + break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER) + { + if (i == 0 /* start of text */ || + prev_break_type == G_UNICODE_BREAK_MANDATORY || + prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN || + prev_break_type == G_UNICODE_BREAK_LINE_FEED || + prev_break_type == G_UNICODE_BREAK_NEXT_LINE || + prev_break_type == G_UNICODE_BREAK_SPACE || + prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE) + prev_break_type = G_UNICODE_BREAK_ALPHABETIC; /* Rule LB10 */ + /* else don't change the prev_break_type for Rule LB9 */ + } + else + { + prev_prev_break_type = prev_break_type; + prev_break_type = break_type; + } + prev_jamo = jamo; } else - prev_was_break_space = TRUE; + { + if (prev_break_type != G_UNICODE_BREAK_SPACE) + { + prev_prev_break_type = prev_break_type; + prev_break_type = break_type; + } + /* else don't change the prev_break_type */ + } /* ---- Word breaks ---- */ |