summaryrefslogtreecommitdiff
path: root/pango/break.c
diff options
context:
space:
mode:
Diffstat (limited to 'pango/break.c')
-rw-r--r--pango/break.c1506
1 files changed, 753 insertions, 753 deletions
diff --git a/pango/break.c b/pango/break.c
index d3c033d3..e159b024 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -345,15 +345,15 @@ static const int line_break_indexes[] = {
};
#define BREAK_TYPE_SAFE(btype) \
- (btype < G_N_ELEMENTS(line_break_indexes) ? btype : G_UNICODE_BREAK_UNKNOWN)
+ (btype < G_N_ELEMENTS(line_break_indexes) ? btype : G_UNICODE_BREAK_UNKNOWN)
#define BREAK_INDEX(btype) \
- (line_break_indexes[(btype)])
+ (line_break_indexes[(btype)])
#define BREAK_ROW(before_type) \
- (line_break_rows[BREAK_INDEX (before_type)])
+ (line_break_rows[BREAK_INDEX (before_type)])
#define BREAK_OP(before_type, after_type) \
- (BREAK_ROW (before_type)[BREAK_INDEX (after_type)])
+ (BREAK_ROW (before_type)[BREAK_INDEX (after_type)])
#define IN_BREAK_TABLE(btype) \
- (btype < G_N_ELEMENTS(line_break_indexes) && BREAK_INDEX(btype) < INDEX_END_OF_TABLE)
+ (btype < G_N_ELEMENTS(line_break_indexes) && BREAK_INDEX(btype) < INDEX_END_OF_TABLE)
@@ -425,19 +425,19 @@ static const CharJamoProps HangulJamoProps[] = {
*/
#define VIRAMA_SCRIPT(wc) ((wc) >= 0x0901 && (wc) <= 0x17FF)
#define VIRAMA(wc) ((wc) == 0x094D || \
- (wc) == 0x09CD || \
- (wc) == 0x0A4D || \
- (wc) == 0x0ACD || \
- (wc) == 0x0B4D || \
- (wc) == 0x0BCD || \
- (wc) == 0x0C4D || \
- (wc) == 0x0CCD || \
- (wc) == 0x0D4D || \
- (wc) == 0x0DCA || \
- (wc) == 0x0E3A || \
- (wc) == 0x0F84 || \
- (wc) == 0x1039 || \
- (wc) == 0x17D2)
+ (wc) == 0x09CD || \
+ (wc) == 0x0A4D || \
+ (wc) == 0x0ACD || \
+ (wc) == 0x0B4D || \
+ (wc) == 0x0BCD || \
+ (wc) == 0x0C4D || \
+ (wc) == 0x0CCD || \
+ (wc) == 0x0D4D || \
+ (wc) == 0x0DCA || \
+ (wc) == 0x0E3A || \
+ (wc) == 0x0F84 || \
+ (wc) == 0x1039 || \
+ (wc) == 0x17D2)
/* Types of Japanese characters */
#define JAPANESE(wc) ((wc) >= 0x2F00 && (wc) <= 0x30FF)
#define KANJI(wc) ((wc) >= 0x2F00 && (wc) <= 0x2FDF)
@@ -502,10 +502,10 @@ typedef enum
**/
void
pango_default_break (const gchar *text,
- gint length,
- PangoAnalysis *analysis,
- PangoLogAttr *attrs,
- int attrs_len)
+ gint length,
+ PangoAnalysis *analysis,
+ PangoLogAttr *attrs,
+ int attrs_len)
{
/* The rationale for all this is in section 5.15 of the Unicode 3.0 book,
* the line breaking stuff is also in TR14 on unicode.org
@@ -578,41 +578,41 @@ pango_default_break (const gchar *text,
break_type = next_break_type;
if (almost_done)
- {
- /*
- * If we have already reached the end of @text g_utf8_next_char()
- * may not increment next
- */
- next_wc = 0;
+ {
+ /*
+ * If we have already reached the end of @text g_utf8_next_char()
+ * may not increment next
+ */
+ next_wc = 0;
next_break_type = G_UNICODE_BREAK_UNKNOWN;
done = TRUE;
- }
+ }
else
- {
- next = g_utf8_next_char (next);
+ {
+ next = g_utf8_next_char (next);
if ((length >= 0 && next >= text + length) || *next == '\0')
- {
- /* This is how we fill in the last element (end position) of the
- * attr array - assume there's a paragraph separators off the end
+ {
+ /* This is how we fill in the last element (end position) of the
+ * attr array - assume there's a paragraph separators off the end
* of @text.
- */
+ */
next_wc = PARAGRAPH_SEPARATOR;
almost_done = TRUE;
- }
- else
+ }
+ else
next_wc = g_utf8_get_char (next);
next_break_type = g_unichar_break_type (next_wc);
- next_break_type = BREAK_TYPE_SAFE (next_break_type);
- }
+ next_break_type = BREAK_TYPE_SAFE (next_break_type);
+ }
type = g_unichar_type (wc);
jamo = JAMO_TYPE (break_type);
/* Determine wheter this forms a Hangul syllable with prev. */
if (jamo == NO_JAMO)
- makes_hangul_syllable = FALSE;
+ makes_hangul_syllable = FALSE;
else
{
JamoType prev_end = HangulJamoProps[prev_jamo].end ;
@@ -631,106 +631,106 @@ pango_default_break (const gchar *text,
/* ---- Cursor position breaks (Grapheme breaks) ---- */
if (wc == '\n')
- {
- /* Break before line feed unless prev char is a CR */
-
- if (prev_wc != '\r')
- attrs[i].is_cursor_position = TRUE;
- else
- attrs[i].is_cursor_position = FALSE;
- }
+ {
+ /* Break before line feed unless prev char is a CR */
+
+ if (prev_wc != '\r')
+ attrs[i].is_cursor_position = TRUE;
+ else
+ attrs[i].is_cursor_position = FALSE;
+ }
else if (i == 0 ||
- prev_type == G_UNICODE_CONTROL ||
- prev_type == G_UNICODE_FORMAT)
- {
- /* Break at first position (must be special cased, or if the
- * first char is say a combining mark there won't be a
- * cursor position at the start, which seems wrong to me
- * ???? - maybe it makes sense though, who knows)
- */
- /* break after all format or control characters */
- attrs[i].is_cursor_position = TRUE;
- }
+ prev_type == G_UNICODE_CONTROL ||
+ prev_type == G_UNICODE_FORMAT)
+ {
+ /* Break at first position (must be special cased, or if the
+ * first char is say a combining mark there won't be a
+ * cursor position at the start, which seems wrong to me
+ * ???? - maybe it makes sense though, who knows)
+ */
+ /* break after all format or control characters */
+ attrs[i].is_cursor_position = TRUE;
+ }
else
- {
- switch (type)
- {
- case G_UNICODE_CONTROL:
- case G_UNICODE_FORMAT:
- /* Break before all format or control characters */
- attrs[i].is_cursor_position = TRUE;
- break;
-
- case G_UNICODE_COMBINING_MARK:
- case G_UNICODE_ENCLOSING_MARK:
- case G_UNICODE_NON_SPACING_MARK:
- /* Unicode spec includes "Combining marks plus Tibetan
- * subjoined characters" as joining chars, but lists the
- * Tibetan subjoined characters as combining marks, and
- * g_unichar_type() returns NON_SPACING_MARK for the Tibetan
- * subjoined characters. So who knows, beats me.
- */
-
- /* It's a joining character, break only if preceded by
- * control or format; we already handled the case where
- * it was preceded earlier, so here we know it wasn't,
- * don't break
- */
- attrs[i].is_cursor_position = FALSE;
- break;
-
- case G_UNICODE_LOWERCASE_LETTER:
- case G_UNICODE_MODIFIER_LETTER:
- case G_UNICODE_OTHER_LETTER:
- case G_UNICODE_TITLECASE_LETTER:
- case G_UNICODE_UPPERCASE_LETTER:
+ {
+ switch (type)
+ {
+ case G_UNICODE_CONTROL:
+ case G_UNICODE_FORMAT:
+ /* Break before all format or control characters */
+ attrs[i].is_cursor_position = TRUE;
+ break;
+
+ case G_UNICODE_COMBINING_MARK:
+ case G_UNICODE_ENCLOSING_MARK:
+ case G_UNICODE_NON_SPACING_MARK:
+ /* Unicode spec includes "Combining marks plus Tibetan
+ * subjoined characters" as joining chars, but lists the
+ * Tibetan subjoined characters as combining marks, and
+ * g_unichar_type() returns NON_SPACING_MARK for the Tibetan
+ * subjoined characters. So who knows, beats me.
+ */
+
+ /* It's a joining character, break only if preceded by
+ * control or format; we already handled the case where
+ * it was preceded earlier, so here we know it wasn't,
+ * don't break
+ */
+ attrs[i].is_cursor_position = FALSE;
+ break;
+
+ case G_UNICODE_LOWERCASE_LETTER:
+ case G_UNICODE_MODIFIER_LETTER:
+ case G_UNICODE_OTHER_LETTER:
+ case G_UNICODE_TITLECASE_LETTER:
+ case G_UNICODE_UPPERCASE_LETTER:
if (makes_hangul_syllable)
- attrs[i].is_cursor_position = FALSE;
- else
- {
- /* Handle non-Hangul-syllable non-combining chars */
+ attrs[i].is_cursor_position = FALSE;
+ else
+ {
+ /* Handle non-Hangul-syllable non-combining chars */
- /* Break before Jamo if they are in a broken sequence or
- * next to non-Jamo; break if preceded by Jamo; don't
+ /* Break before Jamo if they are in a broken sequence or
+ * next to non-Jamo; break if preceded by Jamo; don't
* break if a letter is preceded by a virama; break in
* all other cases. No need to check whether we are or are
- * preceded by Jamo explicitly, since a Jamo is not
- * a virama, we just break in all cases where we
- * aren't a or preceded by a virama. Don't fool with
+ * preceded by Jamo explicitly, since a Jamo is not
+ * a virama, we just break in all cases where we
+ * aren't a or preceded by a virama. Don't fool with
* viramas if we aren't part of a script that uses them.
- */
-
- if (VIRAMA_SCRIPT (wc))
- {
- /* Check whether we're preceded by a virama; this
- * could use some optimization.
- */
- if (VIRAMA (prev_wc))
- attrs[i].is_cursor_position = FALSE;
- else
- attrs[i].is_cursor_position = TRUE;
- }
- else
- {
- attrs[i].is_cursor_position = TRUE;
- }
- }
- break;
-
- default:
- /* Some weirdo char, just break here, why not */
- attrs[i].is_cursor_position = TRUE;
- break;
- }
- }
+ */
+
+ if (VIRAMA_SCRIPT (wc))
+ {
+ /* Check whether we're preceded by a virama; this
+ * could use some optimization.
+ */
+ if (VIRAMA (prev_wc))
+ attrs[i].is_cursor_position = FALSE;
+ else
+ attrs[i].is_cursor_position = TRUE;
+ }
+ else
+ {
+ attrs[i].is_cursor_position = TRUE;
+ }
+ }
+ break;
+
+ default:
+ /* Some weirdo char, just break here, why not */
+ attrs[i].is_cursor_position = TRUE;
+ break;
+ }
+ }
/* If this is a grapheme boundary, we have to decide if backspace
* deletes a character or the whole grapheme cluster */
if (attrs[i].is_cursor_position)
- attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character);
+ attrs[i].backspace_deletes_character = BACKSPACE_DELETES_CHARACTER (base_character);
else
- attrs[i].backspace_deletes_character = FALSE;
+ attrs[i].backspace_deletes_character = FALSE;
/* ---- Line breaking ---- */
@@ -742,9 +742,9 @@ pango_default_break (const gchar *text,
attrs[i].is_mandatory_break = FALSE;
if (attrs[i].is_cursor_position) /* If it's not a grapheme boundary,
- * it's not a line break either
- */
- {
+ * it's not a line break either
+ */
+ {
/* space followed by a combining mark is handled
* specially; (rule 7a from TR 14)
*/
@@ -752,15 +752,15 @@ pango_default_break (const gchar *text,
next_break_type == G_UNICODE_BREAK_COMBINING_MARK)
break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
- /* Unicode doesn't specify char wrap; we wrap around all chars
- * except where a line break is prohibited, which means we
- * effectively break everywhere except inside runs of spaces.
- */
- attrs[i].is_char_break = TRUE;
+ /* Unicode doesn't specify char wrap; we wrap around all chars
+ * except where a line break is prohibited, which means we
+ * effectively break everywhere except inside runs of spaces.
+ */
+ attrs[i].is_char_break = TRUE;
/* Make any necessary replacements first */
- switch (prev_break_type)
- {
+ switch (prev_break_type)
+ {
case G_UNICODE_BREAK_HANGUL_L_JAMO:
case G_UNICODE_BREAK_HANGUL_V_JAMO:
case G_UNICODE_BREAK_HANGUL_T_JAMO:
@@ -771,16 +771,16 @@ pango_default_break (const gchar *text,
prev_break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
break;
- case G_UNICODE_BREAK_AMBIGUOUS:
+ case G_UNICODE_BREAK_AMBIGUOUS:
/* FIXME
- * we need to resolve the East Asian width
- * to decide what to do here
+ * we need to resolve the East Asian width
+ * to decide what to do here
*/
- case G_UNICODE_BREAK_COMPLEX_CONTEXT:
+ case G_UNICODE_BREAK_COMPLEX_CONTEXT:
/* FIXME
- * language engines should handle this case...
+ * language engines should handle this case...
*/
- case G_UNICODE_BREAK_UNKNOWN:
+ case G_UNICODE_BREAK_UNKNOWN:
/* convert unknown, complex, ambiguous to ALPHABETIC
*/
prev_break_type = G_UNICODE_BREAK_ALPHABETIC;
@@ -790,71 +790,71 @@ pango_default_break (const gchar *text,
;
}
- switch (prev_break_type)
- {
- case G_UNICODE_BREAK_MANDATORY:
- case G_UNICODE_BREAK_LINE_FEED:
- case G_UNICODE_BREAK_NEXT_LINE:
- attrs[i].is_line_break = TRUE;
- attrs[i].is_mandatory_break = TRUE;
- break;
-
- case G_UNICODE_BREAK_CARRIAGE_RETURN:
- if (wc != '\n')
- {
- attrs[i].is_line_break = TRUE;
- attrs[i].is_mandatory_break = TRUE;
- }
- break;
-
- case G_UNICODE_BREAK_CONTINGENT:
- /* can break after 0xFFFC by default, though we might want
- * to eventually have a PangoLayout setting or
- * PangoAttribute that disables this, if for some
- * application breaking after objects is not desired.
- */
- break_op = BREAK_ALLOWED;
- break;
-
- case G_UNICODE_BREAK_SURROGATE:
+ switch (prev_break_type)
+ {
+ case G_UNICODE_BREAK_MANDATORY:
+ case G_UNICODE_BREAK_LINE_FEED:
+ case G_UNICODE_BREAK_NEXT_LINE:
+ attrs[i].is_line_break = TRUE;
+ attrs[i].is_mandatory_break = TRUE;
+ break;
+
+ case G_UNICODE_BREAK_CARRIAGE_RETURN:
+ if (wc != '\n')
+ {
+ attrs[i].is_line_break = TRUE;
+ attrs[i].is_mandatory_break = TRUE;
+ }
+ break;
+
+ case G_UNICODE_BREAK_CONTINGENT:
+ /* can break after 0xFFFC by default, though we might want
+ * to eventually have a PangoLayout setting or
+ * PangoAttribute that disables this, if for some
+ * application breaking after objects is not desired.
+ */
+ break_op = BREAK_ALLOWED;
+ break;
+
+ case G_UNICODE_BREAK_SURROGATE:
g_assert_not_reached ();
- break;
-
- default:
- g_assert (IN_BREAK_TABLE (prev_break_type));
-
- /* Note that our table assumes that combining marks
- * are only applied to alphabetic characters;
- * tech report 14 explains how to remove this assumption
- * from the code, if anyone ever cares, but it shouldn't
- * be a problem. Also this issue sort of goes
- * away since we only look for breaks on grapheme
- * boundaries.
- */
-
- switch (break_type)
- {
- case G_UNICODE_BREAK_MANDATORY:
- case G_UNICODE_BREAK_LINE_FEED:
- case G_UNICODE_BREAK_CARRIAGE_RETURN:
- case G_UNICODE_BREAK_NEXT_LINE:
- case G_UNICODE_BREAK_SPACE:
- /* These types all "pile up" at the end of lines and
- * get elided.
- */
- break_op = BREAK_PROHIBITED;
- break;
-
- case G_UNICODE_BREAK_CONTINGENT:
- /* break before 0xFFFC by default, eventually
- * make this configurable?
- */
- break_op = BREAK_ALLOWED;
- break;
-
- case G_UNICODE_BREAK_SURROGATE:
+ break;
+
+ default:
+ g_assert (IN_BREAK_TABLE (prev_break_type));
+
+ /* Note that our table assumes that combining marks
+ * are only applied to alphabetic characters;
+ * tech report 14 explains how to remove this assumption
+ * from the code, if anyone ever cares, but it shouldn't
+ * be a problem. Also this issue sort of goes
+ * away since we only look for breaks on grapheme
+ * boundaries.
+ */
+
+ switch (break_type)
+ {
+ case G_UNICODE_BREAK_MANDATORY:
+ case G_UNICODE_BREAK_LINE_FEED:
+ case G_UNICODE_BREAK_CARRIAGE_RETURN:
+ case G_UNICODE_BREAK_NEXT_LINE:
+ case G_UNICODE_BREAK_SPACE:
+ /* These types all "pile up" at the end of lines and
+ * get elided.
+ */
+ break_op = BREAK_PROHIBITED;
+ break;
+
+ case G_UNICODE_BREAK_CONTINGENT:
+ /* break before 0xFFFC by default, eventually
+ * make this configurable?
+ */
+ break_op = BREAK_ALLOWED;
+ break;
+
+ case G_UNICODE_BREAK_SURROGATE:
g_assert_not_reached ();
- break;
+ break;
/* Hangul additions are from Unicode 4.1 UAX#14 */
case G_UNICODE_BREAK_HANGUL_L_JAMO:
@@ -862,9 +862,9 @@ pango_default_break (const gchar *text,
case G_UNICODE_BREAK_HANGUL_T_JAMO:
case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
- /* treat Jamo as IDEOGRAPHIC from now
+ /* treat Jamo as IDEOGRAPHIC from now
*/
- break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
+ break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
if (makes_hangul_syllable)
break_op = BREAK_IF_SPACES;
@@ -882,55 +882,55 @@ pango_default_break (const gchar *text,
* language engines should handle this case...
*/
case G_UNICODE_BREAK_UNKNOWN:
- /* treat unknown, complex, and ambiguous like ALPHABETIC
+ /* treat unknown, complex, and ambiguous like ALPHABETIC
* for now
- */
- break_op = BREAK_OP (prev_break_type, G_UNICODE_BREAK_ALPHABETIC);
- break;
+ */
+ break_op = BREAK_OP (prev_break_type, G_UNICODE_BREAK_ALPHABETIC);
+ break;
default:
g_assert (IN_BREAK_TABLE (break_type));
- break_op = BREAK_OP (prev_break_type, break_type);
- break;
- }
- break;
- }
-
- if (break_op != BREAK_ALREADY_HANDLED)
- {
- switch (break_op)
- {
- case BREAK_PROHIBITED:
- /* can't break here */
- attrs[i].is_char_break = FALSE;
- break;
-
- case BREAK_IF_SPACES:
- /* break if prev char was space */
- if (prev_was_break_space)
- attrs[i].is_line_break = TRUE;
- break;
-
- case BREAK_ALLOWED:
- attrs[i].is_line_break = TRUE;
- break;
-
- default:
- g_assert_not_reached ();
- break;
- }
- }
- }
+ break_op = BREAK_OP (prev_break_type, break_type);
+ break;
+ }
+ break;
+ }
+
+ if (break_op != BREAK_ALREADY_HANDLED)
+ {
+ switch (break_op)
+ {
+ case BREAK_PROHIBITED:
+ /* can't break here */
+ attrs[i].is_char_break = FALSE;
+ break;
+
+ case BREAK_IF_SPACES:
+ /* break if prev char was space */
+ if (prev_was_break_space)
+ attrs[i].is_line_break = TRUE;
+ break;
+
+ case BREAK_ALLOWED:
+ attrs[i].is_line_break = TRUE;
+ break;
+
+ default:
+ g_assert_not_reached ();
+ break;
+ }
+ }
+ }
if (break_type != G_UNICODE_BREAK_SPACE)
- {
- prev_break_type = break_type;
- prev_was_break_space = FALSE;
+ {
+ prev_break_type = break_type;
+ prev_was_break_space = FALSE;
prev_jamo = jamo;
- }
+ }
else
- prev_was_break_space = TRUE;
+ prev_was_break_space = TRUE;
/* ---- Word breaks ---- */
@@ -939,100 +939,100 @@ pango_default_break (const gchar *text,
attrs[i].is_word_end = FALSE;
if (current_word_type != WordNone)
- {
- /* Check for a word end */
- switch (type)
- {
- case G_UNICODE_COMBINING_MARK:
- case G_UNICODE_ENCLOSING_MARK:
- case G_UNICODE_NON_SPACING_MARK:
+ {
+ /* Check for a word end */
+ switch (type)
+ {
+ case G_UNICODE_COMBINING_MARK:
+ case G_UNICODE_ENCLOSING_MARK:
+ case G_UNICODE_NON_SPACING_MARK:
case G_UNICODE_FORMAT:
- /* nothing, we just eat these up as part of the word */
- break;
-
- case G_UNICODE_LOWERCASE_LETTER:
- case G_UNICODE_MODIFIER_LETTER:
- case G_UNICODE_OTHER_LETTER:
- case G_UNICODE_TITLECASE_LETTER:
- case G_UNICODE_UPPERCASE_LETTER:
- if (current_word_type == WordLetters)
- {
- /* Japanese special cases for ending the word */
- if (JAPANESE (last_word_letter) ||
- JAPANESE (wc))
- {
- if ((HIRAGANA (last_word_letter) &&
- !HIRAGANA (wc)) ||
- (KATAKANA (last_word_letter) &&
- !(KATAKANA (wc) || HIRAGANA (wc))) ||
- (KANJI (last_word_letter) &&
- !(HIRAGANA (wc) || KANJI (wc))) ||
- (JAPANESE (last_word_letter) &&
- !JAPANESE (wc)) ||
- (!JAPANESE (last_word_letter) &&
- JAPANESE (wc)))
- attrs[i].is_word_end = TRUE;
- }
- }
- else
- {
- /* end the number word, start the letter word */
- attrs[i].is_word_end = TRUE;
- attrs[i].is_word_start = TRUE;
- current_word_type = WordLetters;
- }
-
- last_word_letter = wc;
- break;
-
- case G_UNICODE_DECIMAL_NUMBER:
- case G_UNICODE_LETTER_NUMBER:
- case G_UNICODE_OTHER_NUMBER:
- if (current_word_type != WordNumbers)
- {
- attrs[i].is_word_end = TRUE;
- attrs[i].is_word_start = TRUE;
- current_word_type = WordNumbers;
- }
-
- last_word_letter = wc;
- break;
-
- default:
- /* Punctuation, control/format chars, etc. all end a word. */
- attrs[i].is_word_end = TRUE;
+ /* nothing, we just eat these up as part of the word */
+ break;
+
+ case G_UNICODE_LOWERCASE_LETTER:
+ case G_UNICODE_MODIFIER_LETTER:
+ case G_UNICODE_OTHER_LETTER:
+ case G_UNICODE_TITLECASE_LETTER:
+ case G_UNICODE_UPPERCASE_LETTER:
+ if (current_word_type == WordLetters)
+ {
+ /* Japanese special cases for ending the word */
+ if (JAPANESE (last_word_letter) ||
+ JAPANESE (wc))
+ {
+ if ((HIRAGANA (last_word_letter) &&
+ !HIRAGANA (wc)) ||
+ (KATAKANA (last_word_letter) &&
+ !(KATAKANA (wc) || HIRAGANA (wc))) ||
+ (KANJI (last_word_letter) &&
+ !(HIRAGANA (wc) || KANJI (wc))) ||
+ (JAPANESE (last_word_letter) &&
+ !JAPANESE (wc)) ||
+ (!JAPANESE (last_word_letter) &&
+ JAPANESE (wc)))
+ attrs[i].is_word_end = TRUE;
+ }
+ }
+ else
+ {
+ /* end the number word, start the letter word */
+ attrs[i].is_word_end = TRUE;
+ attrs[i].is_word_start = TRUE;
+ current_word_type = WordLetters;
+ }
+
+ last_word_letter = wc;
+ break;
+
+ case G_UNICODE_DECIMAL_NUMBER:
+ case G_UNICODE_LETTER_NUMBER:
+ case G_UNICODE_OTHER_NUMBER:
+ if (current_word_type != WordNumbers)
+ {
+ attrs[i].is_word_end = TRUE;
+ attrs[i].is_word_start = TRUE;
+ current_word_type = WordNumbers;
+ }
+
+ last_word_letter = wc;
+ break;
+
+ default:
+ /* Punctuation, control/format chars, etc. all end a word. */
+ attrs[i].is_word_end = TRUE;
current_word_type = WordNone;
- break;
- }
- }
+ break;
+ }
+ }
else
- {
- /* Check for a word start */
- switch (type)
- {
- case G_UNICODE_LOWERCASE_LETTER:
- case G_UNICODE_MODIFIER_LETTER:
- case G_UNICODE_OTHER_LETTER:
- case G_UNICODE_TITLECASE_LETTER:
- case G_UNICODE_UPPERCASE_LETTER:
- current_word_type = WordLetters;
- last_word_letter = wc;
- attrs[i].is_word_start = TRUE;
- break;
-
- case G_UNICODE_DECIMAL_NUMBER:
- case G_UNICODE_LETTER_NUMBER:
- case G_UNICODE_OTHER_NUMBER:
- current_word_type = WordNumbers;
- last_word_letter = wc;
- attrs[i].is_word_start = TRUE;
- break;
-
- default:
- /* No word here */
- break;
- }
- }
+ {
+ /* Check for a word start */
+ switch (type)
+ {
+ case G_UNICODE_LOWERCASE_LETTER:
+ case G_UNICODE_MODIFIER_LETTER:
+ case G_UNICODE_OTHER_LETTER:
+ case G_UNICODE_TITLECASE_LETTER:
+ case G_UNICODE_UPPERCASE_LETTER:
+ current_word_type = WordLetters;
+ last_word_letter = wc;
+ attrs[i].is_word_start = TRUE;
+ break;
+
+ case G_UNICODE_DECIMAL_NUMBER:
+ case G_UNICODE_LETTER_NUMBER:
+ case G_UNICODE_OTHER_NUMBER:
+ current_word_type = WordNumbers;
+ last_word_letter = wc;
+ attrs[i].is_word_start = TRUE;
+ break;
+
+ default:
+ /* No word here */
+ break;
+ }
+ }
/* ---- Sentence breaks ---- */
@@ -1050,21 +1050,21 @@ pango_default_break (const gchar *text,
*/
#define MAYBE_START_NEW_SENTENCE \
- switch (type) \
- { \
- case G_UNICODE_LINE_SEPARATOR: \
- case G_UNICODE_PARAGRAPH_SEPARATOR: \
- case G_UNICODE_CONTROL: \
- case G_UNICODE_FORMAT: \
- case G_UNICODE_SPACE_SEPARATOR: \
- sentence_state = STATE_SENTENCE_OUTSIDE; \
- break; \
- \
- default: \
- sentence_state = STATE_SENTENCE_BODY; \
- attrs[i].is_sentence_start = TRUE; \
- break; \
- }
+ switch (type) \
+ { \
+ case G_UNICODE_LINE_SEPARATOR: \
+ case G_UNICODE_PARAGRAPH_SEPARATOR: \
+ case G_UNICODE_CONTROL: \
+ case G_UNICODE_FORMAT: \
+ case G_UNICODE_SPACE_SEPARATOR: \
+ sentence_state = STATE_SENTENCE_OUTSIDE; \
+ break; \
+ \
+ default: \
+ sentence_state = STATE_SENTENCE_BODY; \
+ attrs[i].is_sentence_start = TRUE; \
+ break; \
+ }
/* No sentence break at the start of the text */
@@ -1084,374 +1084,374 @@ pango_default_break (const gchar *text,
* followed by newline
*/
switch (prev_type)
- {
- case G_UNICODE_LINE_SEPARATOR:
- case G_UNICODE_PARAGRAPH_SEPARATOR:
- case G_UNICODE_CONTROL:
- case G_UNICODE_FORMAT:
- if (wc == '\r')
- {
- if (next_wc != '\n')
- attrs[i].is_sentence_boundary = TRUE;
- }
- else
- attrs[i].is_sentence_boundary = TRUE;
- break;
-
- default:
- break;
- }
+ {
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ case G_UNICODE_CONTROL:
+ case G_UNICODE_FORMAT:
+ if (wc == '\r')
+ {
+ if (next_wc != '\n')
+ attrs[i].is_sentence_boundary = TRUE;
+ }
+ else
+ attrs[i].is_sentence_boundary = TRUE;
+ break;
+
+ default:
+ break;
+ }
/* break before para/line separators except newline following
* carriage return
*/
switch (type)
- {
- case G_UNICODE_LINE_SEPARATOR:
- case G_UNICODE_PARAGRAPH_SEPARATOR:
- case G_UNICODE_CONTROL:
- case G_UNICODE_FORMAT:
- if (wc == '\n')
- {
- if (prev_wc != '\r')
- attrs[i].is_sentence_boundary = TRUE;
- }
- else
- attrs[i].is_sentence_boundary = TRUE;
- break;
-
- default:
- break;
- }
+ {
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ case G_UNICODE_CONTROL:
+ case G_UNICODE_FORMAT:
+ if (wc == '\n')
+ {
+ if (prev_wc != '\r')
+ attrs[i].is_sentence_boundary = TRUE;
+ }
+ else
+ attrs[i].is_sentence_boundary = TRUE;
+ break;
+
+ default:
+ break;
+ }
switch (sentence_state)
- {
- case STATE_SENTENCE_OUTSIDE:
- /* Start sentence if we have non-whitespace/format/control */
- switch (type)
- {
- case G_UNICODE_LINE_SEPARATOR:
- case G_UNICODE_PARAGRAPH_SEPARATOR:
- case G_UNICODE_CONTROL:
- case G_UNICODE_FORMAT:
- case G_UNICODE_SPACE_SEPARATOR:
- break;
-
- default:
- attrs[i].is_sentence_start = TRUE;
- sentence_state = STATE_SENTENCE_BODY;
- break;
- }
- break;
-
- case STATE_SENTENCE_BODY:
- /* If we already broke here due to separators, end the sentence. */
- if (attrs[i].is_sentence_boundary)
- {
- attrs[i].is_sentence_end = TRUE;
-
- MAYBE_START_NEW_SENTENCE;
- }
- else
- {
- if (wc == '.')
- sentence_state = STATE_SENTENCE_DOT;
- else if (wc == '?' || wc == '!')
- sentence_state = STATE_SENTENCE_TERM;
- }
- break;
-
- case STATE_SENTENCE_TERM:
- /* End sentence on anything but close punctuation and some
- * loosely-specified OTHER_PUNCTUATION such as period,
- * comma, etc.; follow Unicode rules for breaks
- */
- switch (type)
- {
- case G_UNICODE_OTHER_PUNCTUATION:
- case G_UNICODE_CLOSE_PUNCTUATION:
- if (type == G_UNICODE_CLOSE_PUNCTUATION ||
- wc == '.' ||
- wc == ',' ||
- wc == '?' ||
- wc == '!')
- sentence_state = STATE_SENTENCE_POST_TERM_CLOSE;
- else
- {
- attrs[i].is_sentence_end = TRUE;
- attrs[i].is_sentence_boundary = TRUE;
-
- MAYBE_START_NEW_SENTENCE;
- }
- break;
-
- case G_UNICODE_SPACE_SEPARATOR:
- attrs[i].is_sentence_end = TRUE;
- sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
- break;
-
- case G_UNICODE_LINE_SEPARATOR:
- case G_UNICODE_PARAGRAPH_SEPARATOR:
- attrs[i].is_sentence_end = TRUE;
- sentence_state = STATE_SENTENCE_POST_TERM_SEP;
- break;
-
- default:
- attrs[i].is_sentence_end = TRUE;
- attrs[i].is_sentence_boundary = TRUE;
-
- MAYBE_START_NEW_SENTENCE;
-
- break;
- }
- break;
-
- case STATE_SENTENCE_POST_TERM_CLOSE:
- /* End sentence on anything besides more punctuation; follow
- * rules for breaks
- */
- switch (type)
- {
- case G_UNICODE_OTHER_PUNCTUATION:
- case G_UNICODE_CLOSE_PUNCTUATION:
- if (type == G_UNICODE_CLOSE_PUNCTUATION ||
- wc == '.' ||
- wc == ',' ||
- wc == '?' ||
- wc == '!')
- /* continue in this state */
- ;
- else
- {
- attrs[i].is_sentence_end = TRUE;
- attrs[i].is_sentence_boundary = TRUE;
-
- MAYBE_START_NEW_SENTENCE;
- }
- break;
-
- case G_UNICODE_SPACE_SEPARATOR:
- attrs[i].is_sentence_end = TRUE;
- sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
- break;
-
- case G_UNICODE_LINE_SEPARATOR:
- case G_UNICODE_PARAGRAPH_SEPARATOR:
- attrs[i].is_sentence_end = TRUE;
- /* undo the unconditional break-at-all-line/para-separators
- * from above; I'm not sure this is what the Unicode spec
- * intends, but it seems right - we get to include
- * a single line/para separator in the sentence according
- * to their rules
- */
- attrs[i].is_sentence_boundary = FALSE;
- sentence_state = STATE_SENTENCE_POST_TERM_SEP;
- break;
-
- default:
- attrs[i].is_sentence_end = TRUE;
- attrs[i].is_sentence_boundary = TRUE;
-
- MAYBE_START_NEW_SENTENCE;
-
- break;
- }
- break;
-
- case STATE_SENTENCE_POST_TERM_SPACE:
-
- /* Sentence is definitely already ended; to enter this state
- * we had to see a space, which ends the sentence.
- */
-
- switch (type)
- {
- case G_UNICODE_SPACE_SEPARATOR:
- /* continue in this state */
- break;
-
- case G_UNICODE_LINE_SEPARATOR:
- case G_UNICODE_PARAGRAPH_SEPARATOR:
- /* undo the unconditional break-at-all-line/para-separators
- * from above; I'm not sure this is what the Unicode spec
- * intends, but it seems right
- */
- attrs[i].is_sentence_boundary = FALSE;
- sentence_state = STATE_SENTENCE_POST_TERM_SEP;
- break;
-
- default:
- attrs[i].is_sentence_boundary = TRUE;
-
- MAYBE_START_NEW_SENTENCE;
-
- break;
- }
- break;
-
- case STATE_SENTENCE_POST_TERM_SEP:
- /* Break is forced at this point, unless we're a newline
- * after a CR, then we will break after the newline on the
- * next iteration. Only a single Sep can be in the
- * sentence.
- */
- if (!(prev_wc == '\r' && wc == '\n'))
- attrs[i].is_sentence_boundary = TRUE;
-
- MAYBE_START_NEW_SENTENCE;
-
- break;
-
- case STATE_SENTENCE_DOT:
- switch (type)
- {
- case G_UNICODE_CLOSE_PUNCTUATION:
- sentence_state = STATE_SENTENCE_POST_DOT_CLOSE;
- break;
-
- case G_UNICODE_SPACE_SEPARATOR:
- possible_sentence_end = i;
- sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
- break;
-
- default:
- /* If we broke on a control/format char, end the
- * sentence; else this was not a sentence end, since
- * we didn't enter the POST_DOT_SPACE state.
- */
- if (attrs[i].is_sentence_boundary)
- {
- attrs[i].is_sentence_end = TRUE;
-
- MAYBE_START_NEW_SENTENCE;
- }
- else
- sentence_state = STATE_SENTENCE_BODY;
- break;
- }
- break;
-
- case STATE_SENTENCE_POST_DOT_CLOSE:
- switch (type)
- {
- case G_UNICODE_SPACE_SEPARATOR:
- possible_sentence_end = i;
- sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
- break;
-
- default:
- /* If we broke on a control/format char, end the
- * sentence; else this was not a sentence end, since
- * we didn't enter the POST_DOT_SPACE state.
- */
- if (attrs[i].is_sentence_boundary)
- {
- attrs[i].is_sentence_end = TRUE;
-
- MAYBE_START_NEW_SENTENCE;
- }
- else
- sentence_state = STATE_SENTENCE_BODY;
- break;
- }
- break;
-
- case STATE_SENTENCE_POST_DOT_SPACE:
-
- possible_sentence_boundary = i;
-
- switch (type)
- {
- case G_UNICODE_SPACE_SEPARATOR:
- /* remain in current state */
- break;
-
- case G_UNICODE_OPEN_PUNCTUATION:
- sentence_state = STATE_SENTENCE_POST_DOT_OPEN;
- break;
-
- case G_UNICODE_LOWERCASE_LETTER:
- /* wasn't a sentence-ending period; so re-enter the sentence
- * body
- */
- sentence_state = STATE_SENTENCE_BODY;
- break;
-
- default:
- /* End the sentence, break, maybe start a new one */
-
- g_assert (possible_sentence_end >= 0);
- g_assert (possible_sentence_boundary >= 0);
-
- attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
- attrs[possible_sentence_end].is_sentence_end = TRUE;
-
- possible_sentence_end = -1;
- possible_sentence_boundary = -1;
-
- MAYBE_START_NEW_SENTENCE;
-
- break;
- }
- break;
-
- case STATE_SENTENCE_POST_DOT_OPEN:
- switch (type)
- {
- case G_UNICODE_OPEN_PUNCTUATION:
- /* continue in current state */
- break;
-
- case G_UNICODE_LOWERCASE_LETTER:
- /* wasn't a sentence-ending period; so re-enter the sentence
- * body
- */
- sentence_state = STATE_SENTENCE_BODY;
- break;
-
- default:
- /* End the sentence, break, maybe start a new one */
-
- g_assert (possible_sentence_end >= 0);
- g_assert (possible_sentence_boundary >= 0);
-
- attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
- attrs[possible_sentence_end].is_sentence_end = TRUE;
-
- possible_sentence_end = -1;
- possible_sentence_boundary = -1;
-
- MAYBE_START_NEW_SENTENCE;
-
- break;
- }
- break;
-
- case STATE_SENTENCE_POST_DOT_SEP:
- /* Break is forced at this point, unless we're a newline
- * after a CR, then we will break after the newline on the
- * next iteration. Only a single Sep can be in the
- * sentence.
- */
- if (!(prev_wc == '\r' && wc == '\n'))
- attrs[i].is_sentence_boundary = TRUE;
+ {
+ case STATE_SENTENCE_OUTSIDE:
+ /* Start sentence if we have non-whitespace/format/control */
+ switch (type)
+ {
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ case G_UNICODE_CONTROL:
+ case G_UNICODE_FORMAT:
+ case G_UNICODE_SPACE_SEPARATOR:
+ break;
- g_assert (possible_sentence_end >= 0);
- g_assert (possible_sentence_boundary >= 0);
+ default:
+ attrs[i].is_sentence_start = TRUE;
+ sentence_state = STATE_SENTENCE_BODY;
+ break;
+ }
+ break;
- attrs[possible_sentence_end].is_sentence_end = TRUE;
+ case STATE_SENTENCE_BODY:
+ /* If we already broke here due to separators, end the sentence. */
+ if (attrs[i].is_sentence_boundary)
+ {
+ attrs[i].is_sentence_end = TRUE;
- possible_sentence_end = -1;
- possible_sentence_boundary = -1;
+ MAYBE_START_NEW_SENTENCE;
+ }
+ else
+ {
+ if (wc == '.')
+ sentence_state = STATE_SENTENCE_DOT;
+ else if (wc == '?' || wc == '!')
+ sentence_state = STATE_SENTENCE_TERM;
+ }
+ break;
- MAYBE_START_NEW_SENTENCE;
+ case STATE_SENTENCE_TERM:
+ /* End sentence on anything but close punctuation and some
+ * loosely-specified OTHER_PUNCTUATION such as period,
+ * comma, etc.; follow Unicode rules for breaks
+ */
+ switch (type)
+ {
+ case G_UNICODE_OTHER_PUNCTUATION:
+ case G_UNICODE_CLOSE_PUNCTUATION:
+ if (type == G_UNICODE_CLOSE_PUNCTUATION ||
+ wc == '.' ||
+ wc == ',' ||
+ wc == '?' ||
+ wc == '!')
+ sentence_state = STATE_SENTENCE_POST_TERM_CLOSE;
+ else
+ {
+ attrs[i].is_sentence_end = TRUE;
+ attrs[i].is_sentence_boundary = TRUE;
+
+ MAYBE_START_NEW_SENTENCE;
+ }
+ break;
+
+ case G_UNICODE_SPACE_SEPARATOR:
+ attrs[i].is_sentence_end = TRUE;
+ sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
+ break;
+
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ attrs[i].is_sentence_end = TRUE;
+ sentence_state = STATE_SENTENCE_POST_TERM_SEP;
+ break;
+
+ default:
+ attrs[i].is_sentence_end = TRUE;
+ attrs[i].is_sentence_boundary = TRUE;
+
+ MAYBE_START_NEW_SENTENCE;
+
+ break;
+ }
+ break;
+
+ case STATE_SENTENCE_POST_TERM_CLOSE:
+ /* End sentence on anything besides more punctuation; follow
+ * rules for breaks
+ */
+ switch (type)
+ {
+ case G_UNICODE_OTHER_PUNCTUATION:
+ case G_UNICODE_CLOSE_PUNCTUATION:
+ if (type == G_UNICODE_CLOSE_PUNCTUATION ||
+ wc == '.' ||
+ wc == ',' ||
+ wc == '?' ||
+ wc == '!')
+ /* continue in this state */
+ ;
+ else
+ {
+ attrs[i].is_sentence_end = TRUE;
+ attrs[i].is_sentence_boundary = TRUE;
+
+ MAYBE_START_NEW_SENTENCE;
+ }
+ break;
+
+ case G_UNICODE_SPACE_SEPARATOR:
+ attrs[i].is_sentence_end = TRUE;
+ sentence_state = STATE_SENTENCE_POST_TERM_SPACE;
+ break;
+
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ attrs[i].is_sentence_end = TRUE;
+ /* undo the unconditional break-at-all-line/para-separators
+ * from above; I'm not sure this is what the Unicode spec
+ * intends, but it seems right - we get to include
+ * a single line/para separator in the sentence according
+ * to their rules
+ */
+ attrs[i].is_sentence_boundary = FALSE;
+ sentence_state = STATE_SENTENCE_POST_TERM_SEP;
+ break;
+
+ default:
+ attrs[i].is_sentence_end = TRUE;
+ attrs[i].is_sentence_boundary = TRUE;
+
+ MAYBE_START_NEW_SENTENCE;
+
+ break;
+ }
+ break;
+
+ case STATE_SENTENCE_POST_TERM_SPACE:
+
+ /* Sentence is definitely already ended; to enter this state
+ * we had to see a space, which ends the sentence.
+ */
+
+ switch (type)
+ {
+ case G_UNICODE_SPACE_SEPARATOR:
+ /* continue in this state */
+ break;
+
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ /* undo the unconditional break-at-all-line/para-separators
+ * from above; I'm not sure this is what the Unicode spec
+ * intends, but it seems right
+ */
+ attrs[i].is_sentence_boundary = FALSE;
+ sentence_state = STATE_SENTENCE_POST_TERM_SEP;
+ break;
+
+ default:
+ attrs[i].is_sentence_boundary = TRUE;
+
+ MAYBE_START_NEW_SENTENCE;
+
+ break;
+ }
+ break;
+
+ case STATE_SENTENCE_POST_TERM_SEP:
+ /* Break is forced at this point, unless we're a newline
+ * after a CR, then we will break after the newline on the
+ * next iteration. Only a single Sep can be in the
+ * sentence.
+ */
+ if (!(prev_wc == '\r' && wc == '\n'))
+ attrs[i].is_sentence_boundary = TRUE;
- break;
+ MAYBE_START_NEW_SENTENCE;
- default:
- g_assert_not_reached ();
- break;
- }
+ break;
+
+ case STATE_SENTENCE_DOT:
+ switch (type)
+ {
+ case G_UNICODE_CLOSE_PUNCTUATION:
+ sentence_state = STATE_SENTENCE_POST_DOT_CLOSE;
+ break;
+
+ case G_UNICODE_SPACE_SEPARATOR:
+ possible_sentence_end = i;
+ sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
+ break;
+
+ default:
+ /* If we broke on a control/format char, end the
+ * sentence; else this was not a sentence end, since
+ * we didn't enter the POST_DOT_SPACE state.
+ */
+ if (attrs[i].is_sentence_boundary)
+ {
+ attrs[i].is_sentence_end = TRUE;
+
+ MAYBE_START_NEW_SENTENCE;
+ }
+ else
+ sentence_state = STATE_SENTENCE_BODY;
+ break;
+ }
+ break;
+
+ case STATE_SENTENCE_POST_DOT_CLOSE:
+ switch (type)
+ {
+ case G_UNICODE_SPACE_SEPARATOR:
+ possible_sentence_end = i;
+ sentence_state = STATE_SENTENCE_POST_DOT_SPACE;
+ break;
+
+ default:
+ /* If we broke on a control/format char, end the
+ * sentence; else this was not a sentence end, since
+ * we didn't enter the POST_DOT_SPACE state.
+ */
+ if (attrs[i].is_sentence_boundary)
+ {
+ attrs[i].is_sentence_end = TRUE;
+
+ MAYBE_START_NEW_SENTENCE;
+ }
+ else
+ sentence_state = STATE_SENTENCE_BODY;
+ break;
+ }
+ break;
+
+ case STATE_SENTENCE_POST_DOT_SPACE:
+
+ possible_sentence_boundary = i;
+
+ switch (type)
+ {
+ case G_UNICODE_SPACE_SEPARATOR:
+ /* remain in current state */
+ break;
+
+ case G_UNICODE_OPEN_PUNCTUATION:
+ sentence_state = STATE_SENTENCE_POST_DOT_OPEN;
+ break;
+
+ case G_UNICODE_LOWERCASE_LETTER:
+ /* wasn't a sentence-ending period; so re-enter the sentence
+ * body
+ */
+ sentence_state = STATE_SENTENCE_BODY;
+ break;
+
+ default:
+ /* End the sentence, break, maybe start a new one */
+
+ g_assert (possible_sentence_end >= 0);
+ g_assert (possible_sentence_boundary >= 0);
+
+ attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
+ attrs[possible_sentence_end].is_sentence_end = TRUE;
+
+ possible_sentence_end = -1;
+ possible_sentence_boundary = -1;
+
+ MAYBE_START_NEW_SENTENCE;
+
+ break;
+ }
+ break;
+
+ case STATE_SENTENCE_POST_DOT_OPEN:
+ switch (type)
+ {
+ case G_UNICODE_OPEN_PUNCTUATION:
+ /* continue in current state */
+ break;
+
+ case G_UNICODE_LOWERCASE_LETTER:
+ /* wasn't a sentence-ending period; so re-enter the sentence
+ * body
+ */
+ sentence_state = STATE_SENTENCE_BODY;
+ break;
+
+ default:
+ /* End the sentence, break, maybe start a new one */
+
+ g_assert (possible_sentence_end >= 0);
+ g_assert (possible_sentence_boundary >= 0);
+
+ attrs[possible_sentence_boundary].is_sentence_boundary = TRUE;
+ attrs[possible_sentence_end].is_sentence_end = TRUE;
+
+ possible_sentence_end = -1;
+ possible_sentence_boundary = -1;
+
+ MAYBE_START_NEW_SENTENCE;
+
+ break;
+ }
+ break;
+
+ case STATE_SENTENCE_POST_DOT_SEP:
+ /* Break is forced at this point, unless we're a newline
+ * after a CR, then we will break after the newline on the
+ * next iteration. Only a single Sep can be in the
+ * sentence.
+ */
+ if (!(prev_wc == '\r' && wc == '\n'))
+ attrs[i].is_sentence_boundary = TRUE;
+
+ g_assert (possible_sentence_end >= 0);
+ g_assert (possible_sentence_boundary >= 0);
+
+ attrs[possible_sentence_end].is_sentence_end = TRUE;
+
+ possible_sentence_end = -1;
+ possible_sentence_boundary = -1;
+
+ MAYBE_START_NEW_SENTENCE;
+
+ break;
+
+ default:
+ g_assert_not_reached ();
+ break;
+ }
prev_type = type;
prev_wc = wc;
@@ -1459,18 +1459,18 @@ pango_default_break (const gchar *text,
/* wc might not be a valid Unicode base character, but really all we
* need to know is the last non-combining character */
if (type != G_UNICODE_COMBINING_MARK &&
- type != G_UNICODE_ENCLOSING_MARK &&
- type != G_UNICODE_NON_SPACING_MARK)
- base_character = wc;
+ type != G_UNICODE_ENCLOSING_MARK &&
+ type != G_UNICODE_NON_SPACING_MARK)
+ base_character = wc;
}
}
static gboolean
tailor_break (const gchar *text,
- gint length,
- PangoAnalysis *analysis,
- PangoLogAttr *attrs,
- int attrs_len)
+ gint length,
+ PangoAnalysis *analysis,
+ PangoLogAttr *attrs,
+ int attrs_len)
{
if (analysis->lang_engine && PANGO_ENGINE_LANG_GET_CLASS (analysis->lang_engine)->script_break)
{
@@ -1499,10 +1499,10 @@ tailor_break (const gchar *text,
*/
void
pango_break (const gchar *text,
- gint length,
- PangoAnalysis *analysis,
- PangoLogAttr *attrs,
- int attrs_len)
+ gint length,
+ PangoAnalysis *analysis,
+ PangoLogAttr *attrs,
+ int attrs_len)
{
g_return_if_fail (analysis != NULL);
g_return_if_fail (attrs != NULL);
@@ -1531,9 +1531,9 @@ pango_break (const gchar *text,
**/
void
pango_find_paragraph_boundary (const gchar *text,
- gint length,
- gint *paragraph_delimiter_index,
- gint *next_paragraph_start)
+ gint length,
+ gint *paragraph_delimiter_index,
+ gint *next_paragraph_start)
{
const gchar *p = text;
const gchar *end;
@@ -1570,29 +1570,29 @@ pango_find_paragraph_boundary (const gchar *text,
while (p != end)
{
if (prev_sep == '\n' ||
- prev_sep == PARAGRAPH_SEPARATOR_STRING[0])
- {
- g_assert (delimiter);
- start = p;
- break;
- }
+ prev_sep == PARAGRAPH_SEPARATOR_STRING[0])
+ {
+ g_assert (delimiter);
+ start = p;
+ break;
+ }
else if (prev_sep == '\r')
- {
- /* don't break between \r and \n */
- if (*p != '\n')
- {
- g_assert (delimiter);
- start = p;
- break;
- }
- }
+ {
+ /* don't break between \r and \n */
+ if (*p != '\n')
+ {
+ g_assert (delimiter);
+ start = p;
+ break;
+ }
+ }
if (*p == '\n' ||
- *p == '\r' ||
- !strncmp(p, PARAGRAPH_SEPARATOR_STRING,
+ *p == '\r' ||
+ !strncmp(p, PARAGRAPH_SEPARATOR_STRING,
strlen(PARAGRAPH_SEPARATOR_STRING)))
- {
- if (delimiter == NULL)
+ {
+ if (delimiter == NULL)
delimiter = p;
prev_sep = *p;
}
@@ -1663,11 +1663,11 @@ tailor_segment (const char *range_start,
*/
void
pango_get_log_attrs (const char *text,
- int length,
- int level,
- PangoLanguage *language,
- PangoLogAttr *log_attrs,
- int attrs_len)
+ int length,
+ int level,
+ PangoLanguage *language,
+ PangoLogAttr *log_attrs,
+ int attrs_len)
{
PangoMap *lang_map;
int chars_broken;
@@ -1711,15 +1711,15 @@ pango_get_log_attrs (const char *text,
g_assert (range_end == run_start);
if (range_engine != run_engine)
- {
- /* Engine has changed; do the tailoring for the current range,
- * then start a new range.
- */
+ {
+ /* Engine has changed; do the tailoring for the current range,
+ * then start a new range.
+ */
chars_broken += tailor_segment (range_start, range_end, range_engine, chars_broken, &analysis, log_attrs);
- range_start = run_start;
+ range_start = run_start;
range_engine = run_engine;
- }
+ }
range_end = run_end;
}
pango_script_iter_free (iter);