diff options
author | Owen Taylor <otaylor@redhat.com> | 2004-06-22 18:53:00 +0000 |
---|---|---|
committer | Owen Taylor <otaylor@src.gnome.org> | 2004-06-22 18:53:00 +0000 |
commit | 1939a027b1d4e6c7d83af9e42e191f1bfb0cf8a0 (patch) | |
tree | 1fcdd2ab1b502022d5604ac5c99724a0d56efe69 /pango/break.c | |
parent | 8ba821e11ccc3c357ae244b8a6707418c78a5e29 (diff) | |
download | pango-1939a027b1d4e6c7d83af9e42e191f1bfb0cf8a0.tar.gz |
Handle new Unicode-4.0 WORD_JOINER and NEXT_LINE classes. (#143436, Jeroen
Tue Jun 22 14:10:41 2004 Owen Taylor <otaylor@redhat.com>
* pango/break.c: Handle new Unicode-4.0 WORD_JOINER
and NEXT_LINE classes. (#143436, Jeroen Zwartepoorte)
* pango/break.c (pango_default_break): Handle space
followed by a combining character as called for by
TR 14.
* tests/testboundaries.c (check_line_char): Fix up
for space+combining mark
* pango/break.c (pango_default_break): Change fixme
for G_UNICODE_BREAK_SURROGATE to g_assert_not_reached().
Diffstat (limited to 'pango/break.c')
-rw-r--r-- | pango/break.c | 106 |
1 files changed, 73 insertions, 33 deletions
diff --git a/pango/break.c b/pango/break.c index 3eb1b32a..a1850767 100644 --- a/pango/break.c +++ b/pango/break.c @@ -32,7 +32,11 @@ typedef enum BREAK_ALREADY_HANDLED, /* didn't use the table */ BREAK_PROHIBITED, /* no break, even if spaces intervene */ BREAK_IF_SPACES, /* "indirect break" (only if there are spaces) */ - BREAK_ALLOWED /* "direct break" (can always break here) */ + BREAK_ALLOWED, /* "direct break" (can always break here) */ + /* TR 14 has one more break-opportunity class, + * "indirect break opportunity for combining marks following a space" + * but we handle that inline in the code. + */ } BreakOpportunity; enum @@ -57,8 +61,10 @@ enum INDEX_BEFORE_AND_AFTER, INDEX_ZERO_WIDTH_SPACE, INDEX_COMBINING_MARK, + INDEX_WORD_JOINER, /* End of the table */ + INDEX_END_OF_TABLE, /* The following are not in the tables */ @@ -70,7 +76,8 @@ enum INDEX_SPACE, INDEX_COMPLEX_CONTEXT, INDEX_AMBIGUOUS, - INDEX_UNKNOWN + INDEX_UNKNOWN, + INDEX_NEXT_LINE }; static BreakOpportunity row_OPEN_PUNCTUATION[INDEX_END_OF_TABLE] = { @@ -78,7 +85,8 @@ static BreakOpportunity row_OPEN_PUNCTUATION[INDEX_END_OF_TABLE] = { BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_CLOSE_PUNCTUATION[INDEX_END_OF_TABLE] = { @@ -86,7 +94,8 @@ static BreakOpportunity row_CLOSE_PUNCTUATION[INDEX_END_OF_TABLE] = { BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_QUOTATION[INDEX_END_OF_TABLE] = { @@ -94,7 +103,8 @@ static BreakOpportunity row_QUOTATION[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_NON_BREAKING_GLUE[INDEX_END_OF_TABLE] = { @@ -102,7 +112,8 @@ static BreakOpportunity row_NON_BREAKING_GLUE[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_NON_STARTER[INDEX_END_OF_TABLE] = { @@ -110,7 +121,8 @@ static BreakOpportunity row_NON_STARTER[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_EXCLAMATION[INDEX_END_OF_TABLE] = { @@ -118,7 +130,8 @@ static BreakOpportunity row_EXCLAMATION[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_SYMBOL[INDEX_END_OF_TABLE] = { @@ -126,15 +139,17 @@ static BreakOpportunity row_SYMBOL[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_INFIX_SEPARATOR[INDEX_END_OF_TABLE] = { BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_PREFIX[INDEX_END_OF_TABLE] = { @@ -142,7 +157,8 @@ static BreakOpportunity row_PREFIX[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_POSTFIX[INDEX_END_OF_TABLE] = { @@ -150,7 +166,8 @@ static BreakOpportunity row_POSTFIX[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_NUMERIC[INDEX_END_OF_TABLE] = { @@ -158,7 +175,8 @@ static BreakOpportunity row_NUMERIC[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_ALPHABETIC[INDEX_END_OF_TABLE] = { @@ -166,7 +184,8 @@ static BreakOpportunity row_ALPHABETIC[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_IDEOGRAPHIC[INDEX_END_OF_TABLE] = { @@ -174,7 +193,8 @@ static BreakOpportunity row_IDEOGRAPHIC[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_INSEPARABLE[INDEX_END_OF_TABLE] = { @@ -182,15 +202,17 @@ static BreakOpportunity row_INSEPARABLE[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_HYPHEN[INDEX_END_OF_TABLE] = { BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_AFTER[INDEX_END_OF_TABLE] = { @@ -198,7 +220,8 @@ static BreakOpportunity row_AFTER[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_BEFORE[INDEX_END_OF_TABLE] = { @@ -206,7 +229,8 @@ static BreakOpportunity row_BEFORE[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_BEFORE_AND_AFTER[INDEX_END_OF_TABLE] = { @@ -214,7 +238,8 @@ static BreakOpportunity row_BEFORE_AND_AFTER[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity row_ZERO_WIDTH_SPACE[INDEX_END_OF_TABLE] = { @@ -222,7 +247,8 @@ static BreakOpportunity row_ZERO_WIDTH_SPACE[INDEX_END_OF_TABLE] = { BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_ALLOWED }; static BreakOpportunity row_COMBINING_MARK[INDEX_END_OF_TABLE] = { @@ -230,7 +256,8 @@ static BreakOpportunity row_COMBINING_MARK[INDEX_END_OF_TABLE] = { BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, - BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES + BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, + BREAK_PROHIBITED }; static BreakOpportunity *line_break_rows[INDEX_END_OF_TABLE] = { @@ -286,7 +313,9 @@ static int line_break_indexes[] = { INDEX_POSTFIX, INDEX_COMPLEX_CONTEXT, INDEX_AMBIGUOUS, - INDEX_UNKNOWN + INDEX_UNKNOWN, + INDEX_NEXT_LINE, + INDEX_WORD_JOINER }; #define BREAK_INDEX(btype) \ @@ -296,7 +325,7 @@ static int line_break_indexes[] = { #define BREAK_OP(before_type, after_type) \ (BREAK_ROW (before_type)[BREAK_INDEX (after_type)]) #define IN_BREAK_TABLE(btype) \ - (BREAK_INDEX(btype) < INDEX_END_OF_TABLE) + (btype <= G_N_ELEMENTS(line_break_indexes) && BREAK_INDEX(btype) < INDEX_END_OF_TABLE) /* Keep these in sync with the same macros in the test program */ @@ -406,6 +435,7 @@ pango_default_break (const gchar *text, gint i; gunichar prev_wc; gunichar next_wc; + GUnicodeBreakType next_break_type; GUnicodeType prev_type; GUnicodeBreakType prev_break_type; /* skips spaces */ gboolean prev_was_break_space; @@ -448,6 +478,8 @@ pango_default_break (const gchar *text, else next_wc = '\n'; + next_break_type = g_unichar_break_type (next_wc); + for (i = 0; i <= n_chars; i++) { GUnicodeType type; @@ -456,6 +488,7 @@ pango_default_break (const gchar *text, BreakOpportunity break_op; wc = next_wc; + break_type = next_break_type; if (i == n_chars) { @@ -464,6 +497,7 @@ pango_default_break (const gchar *text, * may not increment next */ next_wc = 0; + next_break_type = G_UNICODE_BREAK_UNKNOWN; } else { @@ -481,6 +515,8 @@ pango_default_break (const gchar *text, next_wc = g_utf8_get_char (next); g_assert (next_wc != 0); } + + next_break_type = g_unichar_break_type (next_wc); } type = g_unichar_type (wc); @@ -613,7 +649,6 @@ pango_default_break (const gchar *text, /* ---- Line breaking ---- */ - break_type = g_unichar_break_type (wc); break_op = BREAK_ALREADY_HANDLED; g_assert (prev_break_type != G_UNICODE_BREAK_SPACE); @@ -625,6 +660,13 @@ pango_default_break (const gchar *text, * it's not a line break either */ { + /* space followed by a combining mark is handled + * specially; (rule 7a from TR 14) + */ + if (break_type == G_UNICODE_BREAK_SPACE && + next_break_type == G_UNICODE_BREAK_COMBINING_MARK) + break_type = G_UNICODE_BREAK_IDEOGRAPHIC; + /* Unicode doesn't specify char wrap; we wrap around all chars * except where a line break is prohibited, which means we * effectively break everywhere except inside runs of spaces. @@ -635,6 +677,7 @@ pango_default_break (const gchar *text, { case G_UNICODE_BREAK_MANDATORY: case G_UNICODE_BREAK_LINE_FEED: + case G_UNICODE_BREAK_NEXT_LINE: attrs[i].is_line_break = TRUE; attrs[i].is_mandatory_break = TRUE; break; @@ -657,9 +700,7 @@ pango_default_break (const gchar *text, break; case G_UNICODE_BREAK_SURROGATE: - /* FIXME I have no clue what to do with these, - * but we should do something with them - */ + g_assert_not_reached (); break; case G_UNICODE_BREAK_AMBIGUOUS: @@ -693,6 +734,7 @@ pango_default_break (const gchar *text, case G_UNICODE_BREAK_MANDATORY: case G_UNICODE_BREAK_LINE_FEED: case G_UNICODE_BREAK_CARRIAGE_RETURN: + case G_UNICODE_BREAK_NEXT_LINE: case G_UNICODE_BREAK_SPACE: /* These types all "pile up" at the end of lines and * get elided. @@ -718,9 +760,7 @@ pango_default_break (const gchar *text, break; case G_UNICODE_BREAK_SURROGATE: - /* FIXME this case needs to be handled - */ - break_op = BREAK_IF_SPACES; /* not right at all */ + g_assert_not_reached (); break; default: |