summaryrefslogtreecommitdiff
path: root/pango/break.c
diff options
context:
space:
mode:
Diffstat (limited to 'pango/break.c')
-rw-r--r--pango/break.c106
1 files changed, 73 insertions, 33 deletions
diff --git a/pango/break.c b/pango/break.c
index 3eb1b32a..a1850767 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -32,7 +32,11 @@ typedef enum
BREAK_ALREADY_HANDLED, /* didn't use the table */
BREAK_PROHIBITED, /* no break, even if spaces intervene */
BREAK_IF_SPACES, /* "indirect break" (only if there are spaces) */
- BREAK_ALLOWED /* "direct break" (can always break here) */
+ BREAK_ALLOWED, /* "direct break" (can always break here) */
+ /* TR 14 has one more break-opportunity class,
+ * "indirect break opportunity for combining marks following a space"
+ * but we handle that inline in the code.
+ */
} BreakOpportunity;
enum
@@ -57,8 +61,10 @@ enum
INDEX_BEFORE_AND_AFTER,
INDEX_ZERO_WIDTH_SPACE,
INDEX_COMBINING_MARK,
+ INDEX_WORD_JOINER,
/* End of the table */
+
INDEX_END_OF_TABLE,
/* The following are not in the tables */
@@ -70,7 +76,8 @@ enum
INDEX_SPACE,
INDEX_COMPLEX_CONTEXT,
INDEX_AMBIGUOUS,
- INDEX_UNKNOWN
+ INDEX_UNKNOWN,
+ INDEX_NEXT_LINE
};
static BreakOpportunity row_OPEN_PUNCTUATION[INDEX_END_OF_TABLE] = {
@@ -78,7 +85,8 @@ static BreakOpportunity row_OPEN_PUNCTUATION[INDEX_END_OF_TABLE] = {
BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_CLOSE_PUNCTUATION[INDEX_END_OF_TABLE] = {
@@ -86,7 +94,8 @@ static BreakOpportunity row_CLOSE_PUNCTUATION[INDEX_END_OF_TABLE] = {
BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_QUOTATION[INDEX_END_OF_TABLE] = {
@@ -94,7 +103,8 @@ static BreakOpportunity row_QUOTATION[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_NON_BREAKING_GLUE[INDEX_END_OF_TABLE] = {
@@ -102,7 +112,8 @@ static BreakOpportunity row_NON_BREAKING_GLUE[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_NON_STARTER[INDEX_END_OF_TABLE] = {
@@ -110,7 +121,8 @@ static BreakOpportunity row_NON_STARTER[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_EXCLAMATION[INDEX_END_OF_TABLE] = {
@@ -118,7 +130,8 @@ static BreakOpportunity row_EXCLAMATION[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_SYMBOL[INDEX_END_OF_TABLE] = {
@@ -126,15 +139,17 @@ static BreakOpportunity row_SYMBOL[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_INFIX_SEPARATOR[INDEX_END_OF_TABLE] = {
BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_PREFIX[INDEX_END_OF_TABLE] = {
@@ -142,7 +157,8 @@ static BreakOpportunity row_PREFIX[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_POSTFIX[INDEX_END_OF_TABLE] = {
@@ -150,7 +166,8 @@ static BreakOpportunity row_POSTFIX[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_NUMERIC[INDEX_END_OF_TABLE] = {
@@ -158,7 +175,8 @@ static BreakOpportunity row_NUMERIC[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_ALPHABETIC[INDEX_END_OF_TABLE] = {
@@ -166,7 +184,8 @@ static BreakOpportunity row_ALPHABETIC[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_IDEOGRAPHIC[INDEX_END_OF_TABLE] = {
@@ -174,7 +193,8 @@ static BreakOpportunity row_IDEOGRAPHIC[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_INSEPARABLE[INDEX_END_OF_TABLE] = {
@@ -182,15 +202,17 @@ static BreakOpportunity row_INSEPARABLE[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_HYPHEN[INDEX_END_OF_TABLE] = {
BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_AFTER[INDEX_END_OF_TABLE] = {
@@ -198,7 +220,8 @@ static BreakOpportunity row_AFTER[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_BEFORE[INDEX_END_OF_TABLE] = {
@@ -206,7 +229,8 @@ static BreakOpportunity row_BEFORE[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_BEFORE_AND_AFTER[INDEX_END_OF_TABLE] = {
@@ -214,7 +238,8 @@ static BreakOpportunity row_BEFORE_AND_AFTER[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity row_ZERO_WIDTH_SPACE[INDEX_END_OF_TABLE] = {
@@ -222,7 +247,8 @@ static BreakOpportunity row_ZERO_WIDTH_SPACE[INDEX_END_OF_TABLE] = {
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_ALLOWED
};
static BreakOpportunity row_COMBINING_MARK[INDEX_END_OF_TABLE] = {
@@ -230,7 +256,8 @@ static BreakOpportunity row_COMBINING_MARK[INDEX_END_OF_TABLE] = {
BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
- BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES
+ BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
+ BREAK_PROHIBITED
};
static BreakOpportunity *line_break_rows[INDEX_END_OF_TABLE] = {
@@ -286,7 +313,9 @@ static int line_break_indexes[] = {
INDEX_POSTFIX,
INDEX_COMPLEX_CONTEXT,
INDEX_AMBIGUOUS,
- INDEX_UNKNOWN
+ INDEX_UNKNOWN,
+ INDEX_NEXT_LINE,
+ INDEX_WORD_JOINER
};
#define BREAK_INDEX(btype) \
@@ -296,7 +325,7 @@ static int line_break_indexes[] = {
#define BREAK_OP(before_type, after_type) \
(BREAK_ROW (before_type)[BREAK_INDEX (after_type)])
#define IN_BREAK_TABLE(btype) \
- (BREAK_INDEX(btype) < INDEX_END_OF_TABLE)
+ (btype <= G_N_ELEMENTS(line_break_indexes) && BREAK_INDEX(btype) < INDEX_END_OF_TABLE)
/* Keep these in sync with the same macros in the test program */
@@ -406,6 +435,7 @@ pango_default_break (const gchar *text,
gint i;
gunichar prev_wc;
gunichar next_wc;
+ GUnicodeBreakType next_break_type;
GUnicodeType prev_type;
GUnicodeBreakType prev_break_type; /* skips spaces */
gboolean prev_was_break_space;
@@ -448,6 +478,8 @@ pango_default_break (const gchar *text,
else
next_wc = '\n';
+ next_break_type = g_unichar_break_type (next_wc);
+
for (i = 0; i <= n_chars; i++)
{
GUnicodeType type;
@@ -456,6 +488,7 @@ pango_default_break (const gchar *text,
BreakOpportunity break_op;
wc = next_wc;
+ break_type = next_break_type;
if (i == n_chars)
{
@@ -464,6 +497,7 @@ pango_default_break (const gchar *text,
* may not increment next
*/
next_wc = 0;
+ next_break_type = G_UNICODE_BREAK_UNKNOWN;
}
else
{
@@ -481,6 +515,8 @@ pango_default_break (const gchar *text,
next_wc = g_utf8_get_char (next);
g_assert (next_wc != 0);
}
+
+ next_break_type = g_unichar_break_type (next_wc);
}
type = g_unichar_type (wc);
@@ -613,7 +649,6 @@ pango_default_break (const gchar *text,
/* ---- Line breaking ---- */
- break_type = g_unichar_break_type (wc);
break_op = BREAK_ALREADY_HANDLED;
g_assert (prev_break_type != G_UNICODE_BREAK_SPACE);
@@ -625,6 +660,13 @@ pango_default_break (const gchar *text,
* it's not a line break either
*/
{
+ /* space followed by a combining mark is handled
+ * specially; (rule 7a from TR 14)
+ */
+ if (break_type == G_UNICODE_BREAK_SPACE &&
+ next_break_type == G_UNICODE_BREAK_COMBINING_MARK)
+ break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
+
/* Unicode doesn't specify char wrap; we wrap around all chars
* except where a line break is prohibited, which means we
* effectively break everywhere except inside runs of spaces.
@@ -635,6 +677,7 @@ pango_default_break (const gchar *text,
{
case G_UNICODE_BREAK_MANDATORY:
case G_UNICODE_BREAK_LINE_FEED:
+ case G_UNICODE_BREAK_NEXT_LINE:
attrs[i].is_line_break = TRUE;
attrs[i].is_mandatory_break = TRUE;
break;
@@ -657,9 +700,7 @@ pango_default_break (const gchar *text,
break;
case G_UNICODE_BREAK_SURROGATE:
- /* FIXME I have no clue what to do with these,
- * but we should do something with them
- */
+ g_assert_not_reached ();
break;
case G_UNICODE_BREAK_AMBIGUOUS:
@@ -693,6 +734,7 @@ pango_default_break (const gchar *text,
case G_UNICODE_BREAK_MANDATORY:
case G_UNICODE_BREAK_LINE_FEED:
case G_UNICODE_BREAK_CARRIAGE_RETURN:
+ case G_UNICODE_BREAK_NEXT_LINE:
case G_UNICODE_BREAK_SPACE:
/* These types all "pile up" at the end of lines and
* get elided.
@@ -718,9 +760,7 @@ pango_default_break (const gchar *text,
break;
case G_UNICODE_BREAK_SURROGATE:
- /* FIXME this case needs to be handled
- */
- break_op = BREAK_IF_SPACES; /* not right at all */
+ g_assert_not_reached ();
break;
default: