summaryrefslogtreecommitdiff
path: root/pango/break.c
diff options
context:
space:
mode:
authorPeng Wu <alexepico@gmail.com>2017-09-12 14:57:11 +0800
committerMatthias Clasen <mclasen@redhat.com>2017-10-27 17:37:48 -0400
commitc4619480e536e393e2d4a8e26a6ceb5af1fe80e3 (patch)
treecbea23f7ab3a7ce453c8354be61ca3715f61a581 /pango/break.c
parent7721b0bed57038b342655220ad9bc32f4599f174 (diff)
downloadpango-c4619480e536e393e2d4a8e26a6ceb5af1fe80e3.tar.gz
Update pango_default_break function for Line Break
Support Line Break of Unicode 9.0. https://bugzilla.gnome.org/show_bug.cgi?id=788115
Diffstat (limited to 'pango/break.c')
-rw-r--r--pango/break.c880
1 files changed, 421 insertions, 459 deletions
diff --git a/pango/break.c b/pango/break.c
index 34a7cd28..c46f3382 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -40,323 +40,16 @@ typedef enum
BREAK_PROHIBITED, /* no break, even if spaces intervene */
BREAK_IF_SPACES, /* "indirect break" (only if there are spaces) */
BREAK_ALLOWED /* "direct break" (can always break here) */
- /* TR 14 has one more break-opportunity class,
+ /* TR 14 has two more break-opportunity classes,
* "indirect break opportunity for combining marks following a space"
+ * and "prohibited break for combining marks"
* but we handle that inline in the code.
*/
} BreakOpportunity;
-
-enum
-{
- INDEX_OPEN_PUNCTUATION,
- INDEX_CLOSE_PUNCTUATION,
- INDEX_QUOTATION,
- INDEX_NON_BREAKING_GLUE,
- INDEX_NON_STARTER,
- INDEX_EXCLAMATION,
- INDEX_SYMBOL,
- INDEX_INFIX_SEPARATOR,
- INDEX_PREFIX,
- INDEX_POSTFIX,
- INDEX_NUMERIC,
- INDEX_ALPHABETIC,
- INDEX_IDEOGRAPHIC,
- INDEX_INSEPARABLE,
- INDEX_HYPHEN,
- INDEX_AFTER,
- INDEX_BEFORE,
- INDEX_BEFORE_AND_AFTER,
- INDEX_ZERO_WIDTH_SPACE,
- INDEX_COMBINING_MARK,
- INDEX_WORD_JOINER,
-
- /* End of the table */
-
- INDEX_END_OF_TABLE,
-
- /* The following are not in the tables */
- INDEX_MANDATORY,
- INDEX_CARRIAGE_RETURN,
- INDEX_LINE_FEED,
- INDEX_SURROGATE,
- INDEX_CONTINGENT,
- INDEX_SPACE,
- INDEX_COMPLEX_CONTEXT,
- INDEX_AMBIGUOUS,
- INDEX_UNKNOWN,
- INDEX_NEXT_LINE,
- INDEX_HANGUL_L_JAMO,
- INDEX_HANGUL_V_JAMO,
- INDEX_HANGUL_T_JAMO,
- INDEX_HANGUL_LV_SYLLABLE,
- INDEX_HANGUL_LVT_SYLLABLE,
-};
-
-static const BreakOpportunity row_OPEN_PUNCTUATION[INDEX_END_OF_TABLE] = {
- BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_CLOSE_PUNCTUATION[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_QUOTATION[INDEX_END_OF_TABLE] = {
- BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_NON_BREAKING_GLUE[INDEX_END_OF_TABLE] = {
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_NON_STARTER[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_EXCLAMATION[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_SYMBOL[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_INFIX_SEPARATOR[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_PREFIX[INDEX_END_OF_TABLE] = {
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_POSTFIX[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_NUMERIC[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_ALPHABETIC[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_IDEOGRAPHIC[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_INSEPARABLE[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_HYPHEN[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_AFTER[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_BEFORE[INDEX_END_OF_TABLE] = {
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_BEFORE_AND_AFTER[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_ZERO_WIDTH_SPACE[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED
-};
-
-static const BreakOpportunity row_COMBINING_MARK[INDEX_END_OF_TABLE] = {
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_WORD_JOINER[INDEX_END_OF_TABLE] = {
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED
-};
-
-static const BreakOpportunity *const line_break_rows[INDEX_END_OF_TABLE] = {
- row_OPEN_PUNCTUATION, /* INDEX_OPEN_PUNCTUATION */
- row_CLOSE_PUNCTUATION, /* INDEX_CLOSE_PUNCTUATION */
- row_QUOTATION, /* INDEX_QUOTATION */
- row_NON_BREAKING_GLUE, /* INDEX_NON_BREAKING_GLUE */
- row_NON_STARTER, /* INDEX_NON_STARTER */
- row_EXCLAMATION, /* INDEX_EXCLAMATION */
- row_SYMBOL, /* INDEX_SYMBOL */
- row_INFIX_SEPARATOR, /* INDEX_INFIX_SEPARATOR */
- row_PREFIX, /* INDEX_PREFIX */
- row_POSTFIX, /* INDEX_POSTFIX */
- row_NUMERIC, /* INDEX_NUMERIC */
- row_ALPHABETIC, /* INDEX_ALPHABETIC */
- row_IDEOGRAPHIC, /* INDEX_IDEOGRAPHIC */
- row_INSEPARABLE, /* INDEX_INSEPARABLE */
- row_HYPHEN, /* INDEX_HYPHEN */
- row_AFTER, /* INDEX_AFTER */
- row_BEFORE, /* INDEX_BEFORE */
- row_BEFORE_AND_AFTER, /* INDEX_BEFORE_AND_AFTER */
- row_ZERO_WIDTH_SPACE, /* INDEX_ZERO_WIDTH_SPACE */
- row_COMBINING_MARK, /* INDEX_COMBINING_MARK */
- row_WORD_JOINER /* INDEX_WORD_JOINER */
-};
-
-/* Map GUnicodeBreakType to table indexes */
-static const int line_break_indexes[] = {
- INDEX_MANDATORY,
- INDEX_CARRIAGE_RETURN,
- INDEX_LINE_FEED,
- INDEX_COMBINING_MARK,
- INDEX_SURROGATE,
- INDEX_ZERO_WIDTH_SPACE,
- INDEX_INSEPARABLE,
- INDEX_NON_BREAKING_GLUE,
- INDEX_CONTINGENT,
- INDEX_SPACE,
- INDEX_AFTER,
- INDEX_BEFORE,
- INDEX_BEFORE_AND_AFTER,
- INDEX_HYPHEN,
- INDEX_NON_STARTER,
- INDEX_OPEN_PUNCTUATION,
- INDEX_CLOSE_PUNCTUATION,
- INDEX_QUOTATION,
- INDEX_EXCLAMATION,
- INDEX_IDEOGRAPHIC,
- INDEX_NUMERIC,
- INDEX_INFIX_SEPARATOR,
- INDEX_SYMBOL,
- INDEX_ALPHABETIC,
- INDEX_PREFIX,
- INDEX_POSTFIX,
- INDEX_COMPLEX_CONTEXT,
- INDEX_AMBIGUOUS,
- INDEX_UNKNOWN,
- INDEX_NEXT_LINE,
- INDEX_WORD_JOINER,
- INDEX_HANGUL_L_JAMO,
- INDEX_HANGUL_V_JAMO,
- INDEX_HANGUL_T_JAMO,
- INDEX_HANGUL_LV_SYLLABLE,
- INDEX_HANGUL_LVT_SYLLABLE
-};
-
+/* need to sync the break range to glib/gunicode.h . */
#define BREAK_TYPE_SAFE(btype) \
- ((btype) < G_N_ELEMENTS(line_break_indexes) ? (btype) : G_UNICODE_BREAK_UNKNOWN)
-#define BREAK_INDEX(btype) \
- (line_break_indexes[(btype)])
-#define BREAK_ROW(before_type) \
- (line_break_rows[BREAK_INDEX (before_type)])
-#define BREAK_OP(before_type, after_type) \
- (BREAK_ROW (before_type)[BREAK_INDEX (after_type)])
-#define IN_BREAK_TABLE(btype) \
- ((btype) < G_N_ELEMENTS(line_break_indexes) && BREAK_INDEX((btype)) < INDEX_END_OF_TABLE)
-
+ ((btype) <= G_UNICODE_BREAK_ZERO_WIDTH_JOINER ? (btype) : G_UNICODE_BREAK_UNKNOWN)
/*
@@ -487,8 +180,8 @@ pango_default_break (const gchar *text,
JamoType prev_jamo;
GUnicodeBreakType next_break_type;
- GUnicodeBreakType prev_break_type; /* skips spaces */
- gboolean prev_was_break_space;
+ GUnicodeBreakType prev_break_type;
+ GUnicodeBreakType prev_prev_break_type;
/* See Grapheme_Cluster_Break Property Values table of UAX#29 */
typedef enum
@@ -554,6 +247,17 @@ pango_default_break (const gchar *text,
SentenceBreakType prev_prev_SB_type = SB_Other, prev_SB_type = SB_Other;
gint prev_SB_i = -1;
+ /* Rule LB25 with Example 7 of Customization */
+ typedef enum
+ {
+ LB_Other,
+ LB_Numeric,
+ LB_Numeric_Close,
+ LB_RI_Odd,
+ LB_RI_Even,
+ } LineBreakType;
+ LineBreakType prev_LB_type = LB_Other;
+
WordType current_word_type = WordNone;
gunichar last_word_letter = 0;
gunichar base_character = 0;
@@ -570,7 +274,7 @@ pango_default_break (const gchar *text,
next = text;
prev_break_type = G_UNICODE_BREAK_UNKNOWN;
- prev_was_break_space = FALSE;
+ prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_wc = 0;
prev_jamo = NO_JAMO;
@@ -590,6 +294,7 @@ pango_default_break (const gchar *text,
GUnicodeType type;
gunichar wc;
GUnicodeBreakType break_type;
+ GUnicodeBreakType row_break_type;
BreakOpportunity break_op;
JamoType jamo;
gboolean makes_hangul_syllable;
@@ -1374,168 +1079,379 @@ pango_default_break (const gchar *text,
break_op = BREAK_ALREADY_HANDLED;
- g_assert (prev_break_type != G_UNICODE_BREAK_SPACE);
+ row_break_type = prev_break_type == G_UNICODE_BREAK_SPACE ?
+ prev_prev_break_type : prev_break_type;
+ g_assert (row_break_type != G_UNICODE_BREAK_SPACE);
attrs[i].is_char_break = FALSE;
attrs[i].is_line_break = FALSE;
attrs[i].is_mandatory_break = FALSE;
- if (attrs[i].is_cursor_position) /* If it's not a grapheme boundary,
- * it's not a line break either
- */
+ /* Rule LB1:
+ assign a line breaking class to each code point of the input. */
+ switch ((int) break_type)
{
- /* space followed by a combining mark is handled
- * specially; (rule 7a from TR 14)
- */
- if (break_type == G_UNICODE_BREAK_SPACE &&
- next_break_type == G_UNICODE_BREAK_COMBINING_MARK)
- break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
+ case G_UNICODE_BREAK_AMBIGUOUS:
+ case G_UNICODE_BREAK_SURROGATE:
+ case G_UNICODE_BREAK_UNKNOWN:
+ break_type = G_UNICODE_BREAK_ALPHABETIC;
+ break;
- /* Unicode doesn't specify char wrap; we wrap around all chars
- * except where a line break is prohibited, which means we
- * effectively break everywhere except inside runs of spaces.
- */
- attrs[i].is_char_break = TRUE;
+ case G_UNICODE_BREAK_COMPLEX_CONTEXT:
+ if (type == G_UNICODE_NON_SPACING_MARK ||
+ type == G_UNICODE_SPACING_MARK)
+ break_type = G_UNICODE_BREAK_COMBINING_MARK;
+ else
+ break_type = G_UNICODE_BREAK_ALPHABETIC;
+ break;
- /* Make any necessary replacements first */
- switch ((int) prev_break_type)
- {
- case G_UNICODE_BREAK_HANGUL_L_JAMO:
- case G_UNICODE_BREAK_HANGUL_V_JAMO:
- case G_UNICODE_BREAK_HANGUL_T_JAMO:
- case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
- case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
- /* treat Jamo as IDEOGRAPHIC from now
- */
- prev_break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
- break;
+ case G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER:
+ break_type = G_UNICODE_BREAK_NON_STARTER;
+ break;
- case G_UNICODE_BREAK_AMBIGUOUS:
- /* FIXME
- * we need to resolve the East Asian width
- * to decide what to do here
- */
- case G_UNICODE_BREAK_COMPLEX_CONTEXT:
- /* FIXME
- * language engines should handle this case...
- */
- case G_UNICODE_BREAK_UNKNOWN:
- /* convert unknown, complex, ambiguous to ALPHABETIC
- */
- prev_break_type = G_UNICODE_BREAK_ALPHABETIC;
- break;
+ default:
+ ;
+ }
- default:
- ;
- }
+ /* If it's not a grapheme boundary, it's not a line break either */
+ if (attrs[i].is_cursor_position ||
+ break_type == G_UNICODE_BREAK_COMBINING_MARK ||
+ break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER ||
+ break_type == G_UNICODE_BREAK_HANGUL_L_JAMO ||
+ break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+ break_type == G_UNICODE_BREAK_HANGUL_T_JAMO ||
+ break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE ||
+ break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE ||
+ break_type == G_UNICODE_BREAK_REGIONAL_INDICATOR)
+ {
+ LineBreakType LB_type;
- switch ((int) prev_break_type)
- {
- case G_UNICODE_BREAK_MANDATORY:
- case G_UNICODE_BREAK_LINE_FEED:
- case G_UNICODE_BREAK_NEXT_LINE:
- attrs[i].is_line_break = TRUE;
- attrs[i].is_mandatory_break = TRUE;
- break;
+ /* Find the LineBreakType of wc */
+ LB_type = LB_Other;
- case G_UNICODE_BREAK_CARRIAGE_RETURN:
- if (wc != '\n')
- {
- attrs[i].is_line_break = TRUE;
- attrs[i].is_mandatory_break = TRUE;
- }
- break;
+ if (break_type == G_UNICODE_BREAK_NUMERIC)
+ LB_type = LB_Numeric;
- case G_UNICODE_BREAK_CONTINGENT:
- /* can break after 0xFFFC by default, though we might want
- * to eventually have a PangoLayout setting or
- * PangoAttribute that disables this, if for some
- * application breaking after objects is not desired.
- */
- break_op = BREAK_ALLOWED;
- break;
+ if (break_type == G_UNICODE_BREAK_SYMBOL ||
+ break_type == G_UNICODE_BREAK_INFIX_SEPARATOR)
+ {
+ if (!(prev_LB_type == LB_Numeric))
+ LB_type = LB_Other;
+ }
- case G_UNICODE_BREAK_SURROGATE:
- /* Undefined according to UTR#14, but ALLOWED in test data. */
- break_op = BREAK_ALLOWED;
- break;
+ if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION ||
+ break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS)
+ {
+ if (prev_LB_type == LB_Numeric)
+ LB_type = LB_Numeric_Close;
+ else
+ LB_type = LB_Other;
+ }
- default:
- g_assert (IN_BREAK_TABLE (prev_break_type));
-
- /* Note that our table assumes that combining marks
- * are only applied to alphabetic characters;
- * tech report 14 explains how to remove this assumption
- * from the code, if anyone ever cares, but it shouldn't
- * be a problem. Also this issue sort of goes
- * away since we only look for breaks on grapheme
- * boundaries.
- */
+ if (break_type == G_UNICODE_BREAK_REGIONAL_INDICATOR)
+ {
+ if (prev_LB_type == LB_RI_Odd)
+ LB_type = LB_RI_Even;
+ else if (prev_LB_type == LB_RI_Even)
+ LB_type = LB_RI_Odd;
+ else
+ LB_type = LB_RI_Odd;
+ }
- switch ((int) break_type)
- {
- case G_UNICODE_BREAK_MANDATORY:
- case G_UNICODE_BREAK_LINE_FEED:
- case G_UNICODE_BREAK_CARRIAGE_RETURN:
- case G_UNICODE_BREAK_NEXT_LINE:
- case G_UNICODE_BREAK_SPACE:
- /* These types all "pile up" at the end of lines and
- * get elided.
- */
- break_op = BREAK_PROHIBITED;
- break;
+ attrs[i].is_line_break = TRUE; /* Rule LB31 */
+ /* Unicode doesn't specify char wrap; we wrap around all chars
+ * except where a line break is prohibited, which means we
+ * effectively break everywhere except inside runs of spaces.
+ */
+ attrs[i].is_char_break = TRUE;
- case G_UNICODE_BREAK_CONTINGENT:
- /* break before 0xFFFC by default, eventually
- * make this configurable?
- */
- break_op = BREAK_ALLOWED;
- break;
+ /* Make any necessary replacements first */
+ if (row_break_type == G_UNICODE_BREAK_UNKNOWN)
+ row_break_type = G_UNICODE_BREAK_ALPHABETIC;
+
+ /* add the line break rules in reverse order to override
+ the lower priority rules. */
+
+ /* Rule LB30 */
+ if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER ||
+ prev_break_type == G_UNICODE_BREAK_NUMERIC) &&
+ break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION)
+ break_op = BREAK_PROHIBITED;
+
+ if (prev_break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS &&
+ (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ break_type == G_UNICODE_BREAK_HEBREW_LETTER ||
+ break_type == G_UNICODE_BREAK_NUMERIC))
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB30a */
+ if (prev_LB_type == LB_RI_Odd && LB_type == LB_RI_Even)
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB30b */
+ if (prev_break_type == G_UNICODE_BREAK_EMOJI_BASE &&
+ break_type == G_UNICODE_BREAK_EMOJI_MODIFIER)
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB29 */
+ if (prev_break_type == G_UNICODE_BREAK_INFIX_SEPARATOR &&
+ (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ break_type == G_UNICODE_BREAK_HEBREW_LETTER))
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB28 */
+ if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) &&
+ (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ break_type == G_UNICODE_BREAK_HEBREW_LETTER))
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB27 */
+ if ((prev_break_type == G_UNICODE_BREAK_HANGUL_L_JAMO ||
+ prev_break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+ prev_break_type == G_UNICODE_BREAK_HANGUL_T_JAMO ||
+ prev_break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE ||
+ prev_break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE) &&
+ (break_type == G_UNICODE_BREAK_INSEPARABLE ||
+ break_type == G_UNICODE_BREAK_POSTFIX))
+ break_op = BREAK_PROHIBITED;
+
+ if (prev_break_type == G_UNICODE_BREAK_PREFIX &&
+ (break_type == G_UNICODE_BREAK_HANGUL_L_JAMO ||
+ break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+ break_type == G_UNICODE_BREAK_HANGUL_T_JAMO ||
+ break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE ||
+ break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE))
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB26 */
+ if (prev_break_type == G_UNICODE_BREAK_HANGUL_L_JAMO &&
+ (break_type == G_UNICODE_BREAK_HANGUL_L_JAMO ||
+ break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+ break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE ||
+ break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE))
+ break_op = BREAK_PROHIBITED;
+
+ if ((prev_break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+ prev_break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE) &&
+ (break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+ break_type == G_UNICODE_BREAK_HANGUL_T_JAMO))
+ break_op = BREAK_PROHIBITED;
+
+ if ((prev_break_type == G_UNICODE_BREAK_HANGUL_T_JAMO ||
+ prev_break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE) &&
+ break_type == G_UNICODE_BREAK_HANGUL_T_JAMO)
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB25 with Example 7 of Customization */
+ if ((prev_break_type == G_UNICODE_BREAK_PREFIX ||
+ prev_break_type == G_UNICODE_BREAK_POSTFIX) &&
+ break_type == G_UNICODE_BREAK_NUMERIC)
+ break_op = BREAK_PROHIBITED;
+
+ if ((prev_break_type == G_UNICODE_BREAK_PREFIX ||
+ prev_break_type == G_UNICODE_BREAK_POSTFIX) &&
+ (break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION ||
+ break_type == G_UNICODE_BREAK_HYPHEN) &&
+ next_break_type == G_UNICODE_BREAK_NUMERIC)
+ break_op = BREAK_PROHIBITED;
+
+ if ((prev_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION ||
+ prev_break_type == G_UNICODE_BREAK_HYPHEN) &&
+ break_type == G_UNICODE_BREAK_NUMERIC)
+ break_op = BREAK_PROHIBITED;
+
+ if (prev_break_type == G_UNICODE_BREAK_NUMERIC &&
+ (break_type == G_UNICODE_BREAK_NUMERIC ||
+ break_type == G_UNICODE_BREAK_SYMBOL ||
+ break_type == G_UNICODE_BREAK_INFIX_SEPARATOR))
+ break_op = BREAK_PROHIBITED;
+
+ if (prev_LB_type == LB_Numeric &&
+ (break_type == G_UNICODE_BREAK_NUMERIC ||
+ break_type == G_UNICODE_BREAK_SYMBOL ||
+ break_type == G_UNICODE_BREAK_INFIX_SEPARATOR ||
+ break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION ||
+ break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS))
+ break_op = BREAK_PROHIBITED;
+
+ if ((prev_LB_type == LB_Numeric ||
+ prev_LB_type == LB_Numeric_Close) &&
+ (break_type == G_UNICODE_BREAK_POSTFIX ||
+ break_type == G_UNICODE_BREAK_PREFIX))
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB24 */
+ if ((prev_break_type == G_UNICODE_BREAK_PREFIX ||
+ prev_break_type == G_UNICODE_BREAK_POSTFIX) &&
+ (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ break_type == G_UNICODE_BREAK_HEBREW_LETTER))
+ break_op = BREAK_PROHIBITED;
+
+ if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) &&
+ (break_type == G_UNICODE_BREAK_PREFIX ||
+ break_type == G_UNICODE_BREAK_POSTFIX))
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB23 */
+ if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) &&
+ break_type == G_UNICODE_BREAK_NUMERIC)
+ break_op = BREAK_PROHIBITED;
+
+ if (prev_break_type == G_UNICODE_BREAK_NUMERIC &&
+ (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ break_type == G_UNICODE_BREAK_HEBREW_LETTER))
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB23a */
+ if (prev_break_type == G_UNICODE_BREAK_PREFIX &&
+ (break_type == G_UNICODE_BREAK_IDEOGRAPHIC ||
+ break_type == G_UNICODE_BREAK_EMOJI_BASE ||
+ break_type == G_UNICODE_BREAK_EMOJI_MODIFIER))
+ break_op = BREAK_PROHIBITED;
+
+ if ((prev_break_type == G_UNICODE_BREAK_IDEOGRAPHIC ||
+ prev_break_type == G_UNICODE_BREAK_EMOJI_BASE ||
+ prev_break_type == G_UNICODE_BREAK_EMOJI_MODIFIER) &&
+ break_type == G_UNICODE_BREAK_POSTFIX)
+ break_op = BREAK_PROHIBITED;
+
+ /* Rule LB22 */
+ if (break_type == G_UNICODE_BREAK_INSEPARABLE)
+ {
+ if (prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+ prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER)
+ break_op = BREAK_PROHIBITED;
- case G_UNICODE_BREAK_SURROGATE:
- /* Undefined according to UTR#14, but ALLOWED in test data. */
- break_op = BREAK_ALLOWED;
- break;
+ if (prev_break_type == G_UNICODE_BREAK_EXCLAMATION)
+ break_op = BREAK_PROHIBITED;
- /* Hangul additions are from Unicode 4.1 UAX#14 */
- case G_UNICODE_BREAK_HANGUL_L_JAMO:
- case G_UNICODE_BREAK_HANGUL_V_JAMO:
- case G_UNICODE_BREAK_HANGUL_T_JAMO:
- case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
- case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
- /* treat Jamo as IDEOGRAPHIC from now
- */
- break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
-
- if (makes_hangul_syllable)
- break_op = BREAK_IF_SPACES;
- else
- break_op = BREAK_ALLOWED;
- break;
+ if (prev_break_type == G_UNICODE_BREAK_IDEOGRAPHIC ||
+ prev_break_type == G_UNICODE_BREAK_EMOJI_BASE ||
+ prev_break_type == G_UNICODE_BREAK_EMOJI_MODIFIER)
+ break_op = BREAK_PROHIBITED;
- case G_UNICODE_BREAK_AMBIGUOUS:
- /* FIXME:
- * we need to resolve the East Asian width
- * to decide what to do here
- */
- case G_UNICODE_BREAK_COMPLEX_CONTEXT:
- /* FIXME:
- * language engines should handle this case...
- */
- case G_UNICODE_BREAK_UNKNOWN:
- /* treat unknown, complex, and ambiguous like ALPHABETIC
- * for now
- */
- break_op = BREAK_OP (prev_break_type, G_UNICODE_BREAK_ALPHABETIC);
- break;
+ if (prev_break_type == G_UNICODE_BREAK_INSEPARABLE)
+ break_op = BREAK_PROHIBITED;
- default:
+ if (prev_break_type == G_UNICODE_BREAK_NUMERIC)
+ break_op = BREAK_PROHIBITED;
+ }
- g_assert (IN_BREAK_TABLE (break_type));
- break_op = BREAK_OP (prev_break_type, break_type);
- break;
- }
- break;
+ if (break_type == G_UNICODE_BREAK_AFTER ||
+ break_type == G_UNICODE_BREAK_HYPHEN ||
+ break_type == G_UNICODE_BREAK_NON_STARTER ||
+ prev_break_type == G_UNICODE_BREAK_BEFORE)
+ break_op = BREAK_PROHIBITED; /* Rule LB21 */
+
+ if (prev_prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER &&
+ (prev_break_type == G_UNICODE_BREAK_HYPHEN ||
+ prev_break_type == G_UNICODE_BREAK_AFTER))
+ break_op = BREAK_PROHIBITED; /* Rule LB21a */
+
+ if (prev_break_type == G_UNICODE_BREAK_SYMBOL &&
+ break_type == G_UNICODE_BREAK_HEBREW_LETTER)
+ break_op = BREAK_PROHIBITED; /* Rule LB21b */
+
+ if (prev_break_type == G_UNICODE_BREAK_CONTINGENT ||
+ break_type == G_UNICODE_BREAK_CONTINGENT)
+ break_op = BREAK_ALLOWED; /* Rule LB20 */
+
+ if (prev_break_type == G_UNICODE_BREAK_QUOTATION ||
+ break_type == G_UNICODE_BREAK_QUOTATION)
+ break_op = BREAK_PROHIBITED; /* Rule LB19 */
+
+ /* handle related rules for Space as state machine here,
+ and override the pair table result. */
+ if (prev_break_type == G_UNICODE_BREAK_SPACE) /* Rule LB18 */
+ break_op = BREAK_ALLOWED;
+
+ if (row_break_type == G_UNICODE_BREAK_BEFORE_AND_AFTER &&
+ break_type == G_UNICODE_BREAK_BEFORE_AND_AFTER)
+ break_op = BREAK_PROHIBITED; /* Rule LB17 */
+
+ if ((row_break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION ||
+ row_break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS) &&
+ break_type == G_UNICODE_BREAK_NON_STARTER)
+ break_op = BREAK_PROHIBITED; /* Rule LB16 */
+
+ if (row_break_type == G_UNICODE_BREAK_QUOTATION &&
+ break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION)
+ break_op = BREAK_PROHIBITED; /* Rule LB15 */
+
+ if (row_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION)
+ break_op = BREAK_PROHIBITED; /* Rule LB14 */
+
+ /* Rule LB13 with Example 7 of Customization */
+ if (break_type == G_UNICODE_BREAK_EXCLAMATION)
+ break_op = BREAK_PROHIBITED;
+
+ if (prev_break_type != G_UNICODE_BREAK_NUMERIC &&
+ (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION ||
+ break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS ||
+ break_type == G_UNICODE_BREAK_INFIX_SEPARATOR ||
+ break_type == G_UNICODE_BREAK_SYMBOL))
+ break_op = BREAK_PROHIBITED;
+
+ if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE)
+ break_op = BREAK_PROHIBITED; /* Rule LB12 */
+
+ if (break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE &&
+ (prev_break_type != G_UNICODE_BREAK_SPACE &&
+ prev_break_type != G_UNICODE_BREAK_AFTER &&
+ prev_break_type != G_UNICODE_BREAK_HYPHEN))
+ break_op = BREAK_PROHIBITED; /* Rule LB12a */
+
+ if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER ||
+ break_type == G_UNICODE_BREAK_WORD_JOINER)
+ break_op = BREAK_PROHIBITED; /* Rule LB11 */
+
+
+ /* Rule LB9 */
+ if (break_type == G_UNICODE_BREAK_COMBINING_MARK ||
+ break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)
+ {
+ if (!(prev_break_type == G_UNICODE_BREAK_MANDATORY ||
+ prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+ prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
+ prev_break_type == G_UNICODE_BREAK_NEXT_LINE ||
+ prev_break_type == G_UNICODE_BREAK_SPACE ||
+ prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE))
+ break_op = BREAK_PROHIBITED;
+ }
+
+ if (row_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
+ break_op = BREAK_ALLOWED; /* Rule LB8 */
+
+ if (prev_wc == 0x200D &&
+ (break_type == G_UNICODE_BREAK_IDEOGRAPHIC ||
+ break_type == G_UNICODE_BREAK_EMOJI_BASE ||
+ break_type == G_UNICODE_BREAK_EMOJI_MODIFIER))
+ break_op = BREAK_PROHIBITED; /* Rule LB8a */
+
+ if (break_type == G_UNICODE_BREAK_SPACE ||
+ break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
+ break_op = BREAK_PROHIBITED; /* Rule LB7 */
+
+ /* Rule LB6 */
+ if (break_type == G_UNICODE_BREAK_MANDATORY ||
+ break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+ break_type == G_UNICODE_BREAK_LINE_FEED ||
+ break_type == G_UNICODE_BREAK_NEXT_LINE)
+ break_op = BREAK_PROHIBITED;
+
+ /* Rules LB4 and LB5 */
+ if (prev_break_type == G_UNICODE_BREAK_MANDATORY ||
+ (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN &&
+ wc != '\n') ||
+ prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
+ prev_break_type == G_UNICODE_BREAK_NEXT_LINE)
+ {
+ attrs[i].is_mandatory_break = TRUE;
+ break_op = BREAK_ALLOWED;
}
switch (break_op)
@@ -1543,12 +1459,13 @@ pango_default_break (const gchar *text,
case BREAK_PROHIBITED:
/* can't break here */
attrs[i].is_char_break = FALSE;
+ attrs[i].is_line_break = FALSE;
break;
case BREAK_IF_SPACES:
/* break if prev char was space */
- if (prev_was_break_space)
- attrs[i].is_line_break = TRUE;
+ if (prev_break_type != G_UNICODE_BREAK_SPACE)
+ attrs[i].is_line_break = FALSE;
break;
case BREAK_ALLOWED:
@@ -1562,16 +1479,61 @@ pango_default_break (const gchar *text,
g_assert_not_reached ();
break;
}
+
+ /* Rule LB9 */
+ if (!(break_type == G_UNICODE_BREAK_COMBINING_MARK ||
+ break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER))
+ {
+ /* Rule LB25 with Example 7 of Customization */
+ if (break_type == G_UNICODE_BREAK_NUMERIC ||
+ break_type == G_UNICODE_BREAK_SYMBOL ||
+ break_type == G_UNICODE_BREAK_INFIX_SEPARATOR)
+ {
+ if (prev_LB_type != LB_Numeric)
+ prev_LB_type = LB_type;
+ /* else don't change the prev_LB_type */
+ }
+ else
+ {
+ prev_LB_type = LB_type;
+ }
+ }
+ /* else don't change the prev_LB_type for Rule LB9 */
}
if (break_type != G_UNICODE_BREAK_SPACE)
{
- prev_break_type = break_type;
- prev_was_break_space = FALSE;
+ /* Rule LB9 */
+ if (break_type == G_UNICODE_BREAK_COMBINING_MARK ||
+ break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)
+ {
+ if (i == 0 /* start of text */ ||
+ prev_break_type == G_UNICODE_BREAK_MANDATORY ||
+ prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+ prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
+ prev_break_type == G_UNICODE_BREAK_NEXT_LINE ||
+ prev_break_type == G_UNICODE_BREAK_SPACE ||
+ prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
+ prev_break_type = G_UNICODE_BREAK_ALPHABETIC; /* Rule LB10 */
+ /* else don't change the prev_break_type for Rule LB9 */
+ }
+ else
+ {
+ prev_prev_break_type = prev_break_type;
+ prev_break_type = break_type;
+ }
+
prev_jamo = jamo;
}
else
- prev_was_break_space = TRUE;
+ {
+ if (prev_break_type != G_UNICODE_BREAK_SPACE)
+ {
+ prev_prev_break_type = prev_break_type;
+ prev_break_type = break_type;
+ }
+ /* else don't change the prev_break_type */
+ }
/* ---- Word breaks ---- */