diff options
-rw-r--r-- | pango/break.c | 124 | ||||
-rw-r--r-- | pango/pango-break.h | 6 | ||||
-rw-r--r-- | pango/pango-layout.c | 179 | ||||
-rw-r--r-- | pango/shape.c | 49 | ||||
-rw-r--r-- | tests/breaks/eleven.expected | 13 | ||||
-rw-r--r-- | tests/breaks/fifteen.break | 1 | ||||
-rw-r--r-- | tests/breaks/fifteen.expected | 7 | ||||
-rw-r--r-- | tests/breaks/four.expected | 13 | ||||
-rw-r--r-- | tests/breaks/nine.expected | 13 | ||||
-rw-r--r-- | tests/breaks/one.expected | 1 | ||||
-rw-r--r-- | tests/breaks/sixteen.break | 1 | ||||
-rw-r--r-- | tests/breaks/sixteen.expected | 7 | ||||
-rw-r--r-- | tests/breaks/ten.expected | 13 | ||||
-rw-r--r-- | tests/breaks/three.expected | 13 | ||||
-rw-r--r-- | tests/breaks/two.expected | 1 | ||||
-rw-r--r-- | tests/layouts/valid-17.expected | 2 | ||||
-rw-r--r-- | tests/test-break.c | 31 | ||||
-rw-r--r-- | tools/gen-script-for-lang.c | 2 |
18 files changed, 275 insertions, 201 deletions
diff --git a/pango/break.c b/pango/break.c index c55d5f22..d348f9b8 100644 --- a/pango/break.c +++ b/pango/break.c @@ -249,6 +249,8 @@ default_break (const char *text, gint last_sentence_start = -1; gint last_non_space = -1; + gboolean prev_space_or_hyphen; + gboolean almost_done = FALSE; gboolean done = FALSE; @@ -261,6 +263,7 @@ default_break (const char *text, prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN; prev_wc = 0; prev_jamo = NO_JAMO; + prev_space_or_hyphen = FALSE; if (length == 0 || *text == '\0') { @@ -291,6 +294,8 @@ default_break (const char *text, /* Emoji extended pictographics */ gboolean is_Extended_Pictographic; + PangoScript script; + wc = next_wc; break_type = next_break_type; @@ -533,17 +538,16 @@ default_break (const char *text, prev_GB_type = GB_type; } + script = (PangoScript)g_unichar_get_script (wc); + /* ---- UAX#29 Word Boundaries ---- */ { is_word_boundary = FALSE; if (is_grapheme_boundary || G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) /* Rules WB3 and WB4 */ { - PangoScript script; WordBreakType WB_type; - script = (PangoScript)g_unichar_get_script (wc); - /* Find the WordBreakType of wc */ WB_type = WB_Other; @@ -1552,7 +1556,78 @@ default_break (const char *text, attrs[i - 1].is_white) { last_sentence_start++; } + } + + /* --- Hyphens --- */ + + { + gboolean insert_hyphens; + gboolean space_or_hyphen = FALSE; + + attrs[i].break_inserts_hyphen = FALSE; + attrs[i].break_removes_preceding = FALSE; + + switch ((int)script) + { + case PANGO_SCRIPT_COMMON: + case PANGO_SCRIPT_HAN: + case PANGO_SCRIPT_HANGUL: + case PANGO_SCRIPT_HIRAGANA: + case PANGO_SCRIPT_KATAKANA: + insert_hyphens = FALSE; + break; + default: + insert_hyphens = TRUE; + break; + } + switch ((int)type) + { + case G_UNICODE_SPACE_SEPARATOR: + case G_UNICODE_LINE_SEPARATOR: + case G_UNICODE_PARAGRAPH_SEPARATOR: + space_or_hyphen = TRUE; + break; + case G_UNICODE_CONTROL: + if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f') + space_or_hyphen = TRUE; + break; + default: + break; + } + + if (!space_or_hyphen) + { + if (wc == '-' || /* Hyphen-minus */ + wc == 0x058a || /* Armenian hyphen */ + wc == 0x1400 || /* Canadian syllabics hyphen */ + wc == 0x1806 || /* Mongolian todo hyphen */ + wc == 0x2010 || /* Hyphen */ + wc == 0x2e17 || /* Double oblique hyphen */ + wc == 0x2e40 || /* Double hyphen */ + wc == 0x30a0 || /* Katakana-Hiragana double hyphen */ + wc == 0xfe63 || /* Small hyphen-minus */ + wc == 0xff0d) /* Fullwidth hyphen-minus */ + space_or_hyphen = TRUE; + } + + if (attrs[i].is_word_boundary) + attrs[i].break_inserts_hyphen = FALSE; + else if (prev_space_or_hyphen) + attrs[i].break_inserts_hyphen = FALSE; + else if (space_or_hyphen) + attrs[i].break_inserts_hyphen = FALSE; + else + attrs[i].break_inserts_hyphen = insert_hyphens; + + if (prev_wc == 0x007C || /* Vertical Line */ + prev_wc == 0x2027) /* Hyphenation point */ + { + attrs[i].break_inserts_hyphen = TRUE; + attrs[i].break_removes_preceding = TRUE; + } + + prev_space_or_hyphen = space_or_hyphen; } prev_wc = wc; @@ -1633,22 +1708,21 @@ break_attrs (const char *text, int log_attrs_len) { PangoAttrList list; + PangoAttrList hyphens; PangoAttrIterator iter; GSList *l; _pango_attr_list_init (&list); + _pango_attr_list_init (&hyphens); + for (l = attributes; l; l = l->next) { PangoAttribute *attr = l->data; if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS) pango_attr_list_insert (&list, pango_attribute_copy (attr)); - } - - if (!_pango_attr_list_has_attributes (&list)) - { - _pango_attr_list_destroy (&list); - return FALSE; + else if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS) + pango_attr_list_insert (&hyphens, pango_attribute_copy (attr)); } _pango_attr_list_get_iterator (&list, &iter); @@ -1681,7 +1755,39 @@ break_attrs (const char *text, } while (pango_attr_iterator_next (&iter)); _pango_attr_iterator_destroy (&iter); + + _pango_attr_list_get_iterator (&hyphens, &iter); + do { + const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS); + + if (attr && ((PangoAttrInt*)attr)->value == 0) + { + int start, end; + int start_pos, end_pos; + int pos; + + pango_attr_iterator_range (&iter, &start, &end); + if (start < offset) + start_pos = 0; + else + start_pos = g_utf8_pointer_to_offset (text, text + start - offset); + if (end >= offset + length) + end_pos = log_attrs_len; + else + end_pos = g_utf8_pointer_to_offset (text, text + end - offset); + + for (pos = start_pos + 1; pos < end_pos; pos++) + { + if (!log_attrs[pos].break_removes_preceding) + log_attrs[pos].break_inserts_hyphen = FALSE; + } + } + } while (pango_attr_iterator_next (&iter)); + + _pango_attr_iterator_destroy (&iter); + _pango_attr_list_destroy (&list); + _pango_attr_list_destroy (&hyphens); return TRUE; } diff --git a/pango/pango-break.h b/pango/pango-break.h index a8e6c5b9..5d791e27 100644 --- a/pango/pango-break.h +++ b/pango/pango-break.h @@ -72,6 +72,10 @@ G_BEGIN_DECLS * This flag is particularly useful when selecting text word-by-word. This flag * implements Unicode's [Word Boundaries](http://www.unicode.org/reports/tr29/) * semantics. (Since: 1.22) + * @break_inserts_hyphen: when breaking lines before this char, insert a hyphen. + * Since: 1.50 + * @break_removes_preceding: when breaking lines before this char, remove the + * preceding char. Since 1.50 * * The `PangoLogAttr` structure stores information about the attributes of a * single character. @@ -91,6 +95,8 @@ struct _PangoLogAttr guint backspace_deletes_character : 1; guint is_expandable_space : 1; guint is_word_boundary : 1; + guint break_inserts_hyphen : 1; + guint break_removes_preceding : 1; }; PANGO_DEPRECATED_IN_1_44 diff --git a/pango/pango-layout.c b/pango/pango-layout.c index ccebc557..85f12f0a 100644 --- a/pango/pango-layout.c +++ b/pango/pango-layout.c @@ -3595,7 +3595,6 @@ struct _ParaBreakState int log_widths_offset; /* Offset into log_widths to the point corresponding * to the remaining portion of the first item */ - int *need_hyphen; /* Insert a hyphen if breaking here ? */ int line_start_index; /* Start index (byte offset) of line in layout->text */ int line_start_offset; /* Character offset of line in layout->text */ @@ -3684,140 +3683,19 @@ insert_run (PangoLayoutLine *line, state->glyphs = NULL; g_free (state->log_widths); state->log_widths = NULL; - g_free (state->need_hyphen); - state->need_hyphen = NULL; } line->runs = g_slist_prepend (line->runs, run); line->length += run_item->length; } -static void -get_need_hyphen (PangoItem *item, - const char *text, - int *need_hyphen) -{ - int i; - const char *p; - gboolean prev_space; - gboolean prev_hyphen; - PangoAttrList attrs; - PangoAttrIterator iter; - GSList *l; - - _pango_attr_list_init (&attrs); - for (l = item->analysis.extra_attrs; l; l = l->next) - { - PangoAttribute *attr = l->data; - if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS) - pango_attr_list_change (&attrs, pango_attribute_copy (attr)); - } - _pango_attr_list_get_iterator (&attrs, &iter); - - prev_space = prev_hyphen = TRUE; - - for (i = 0, p = text + item->offset; i < item->num_chars; i++, p = g_utf8_next_char (p)) - { - gunichar wc = g_utf8_get_char (p); - gboolean space; - gboolean hyphen; - int start, end, pos; - gboolean insert_hyphens = TRUE; - - pos = p - text; - do { - pango_attr_iterator_range (&iter, &start, &end); - if (end > pos) - break; - } while (pango_attr_iterator_next (&iter)); - - if (start <= pos && pos < end) - { - PangoAttribute *attr; - attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS); - if (attr) - insert_hyphens = ((PangoAttrInt*)attr)->value; - - /* Some scripts don't use hyphen.*/ - switch (item->analysis.script) - { - case PANGO_SCRIPT_COMMON: - case PANGO_SCRIPT_HAN: - case PANGO_SCRIPT_HANGUL: - case PANGO_SCRIPT_HIRAGANA: - case PANGO_SCRIPT_KATAKANA: - insert_hyphens = FALSE; - break; - default: - break; - } - } - - switch ((int)g_unichar_type (wc)) - { - case G_UNICODE_SPACE_SEPARATOR: - case G_UNICODE_LINE_SEPARATOR: - case G_UNICODE_PARAGRAPH_SEPARATOR: - space = TRUE; - break; - case G_UNICODE_CONTROL: - if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f') - space = TRUE; - else - space = FALSE; - break; - default: - space = FALSE; - break; - } - - if (wc == '-' || /* Hyphen-minus */ - wc == 0x058a || /* Armenian hyphen */ - wc == 0x1400 || /* Canadian syllabics hyphen */ - wc == 0x1806 || /* Mongolian todo hyphen */ - wc == 0x2010 || /* Hyphen */ - wc == 0x2027 || /* Hyphenation point */ - wc == 0x2e17 || /* Double oblique hyphen */ - wc == 0x2e40 || /* Double hyphen */ - wc == 0x30a0 || /* Katakana-Hiragana double hyphen */ - wc == 0xfe63 || /* Small hyphen-minus */ - wc == 0xff0d) /* Fullwidth hyphen-minus */ - hyphen = TRUE; - else - hyphen = FALSE; - - if (prev_space || space) - need_hyphen[i] = FALSE; - else if (prev_hyphen || hyphen) - need_hyphen[i] = FALSE; - else - need_hyphen[i] = insert_hyphens; - - prev_space = space; - prev_hyphen = hyphen; - } - - need_hyphen[item->num_chars - 1] = FALSE; - - _pango_attr_iterator_destroy (&iter); - _pango_attr_list_destroy (&attrs); -} - static gboolean break_needs_hyphen (PangoLayout *layout, ParaBreakState *state, int pos) { - if (state->log_widths_offset + pos == 0) - return FALSE; - - if (layout->log_attrs[state->start_offset + pos].is_word_boundary) - return FALSE; - - if (state->need_hyphen[state->log_widths_offset + pos - 1]) - return TRUE; - - return FALSE; + return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen || + layout->log_attrs[state->start_offset + pos].break_removes_preceding; } static int @@ -3843,23 +3721,56 @@ find_hyphen_width (PangoItem *item) } static int +find_char_width (PangoItem *item, + gunichar wc) +{ + hb_font_t *hb_font; + hb_codepoint_t glyph; + + if (!item->analysis.font) + return 0; + + hb_font = pango_font_get_hb_font (item->analysis.font); + if (hb_font_get_nominal_glyph (hb_font, wc, &glyph)) + return hb_font_get_glyph_h_advance (hb_font, glyph); + + return 0; +} + +static inline void +ensure_hyphen_width (ParaBreakState *state) +{ + if (state->hyphen_width < 0) + { + PangoItem *item = state->items->data; + state->hyphen_width = find_hyphen_width (item); + } +} + +static int find_break_extra_width (PangoLayout *layout, ParaBreakState *state, int pos) { /* Check whether to insert a hyphen */ - if (break_needs_hyphen (layout, state, pos)) + if (layout->log_attrs[state->start_offset + pos].break_inserts_hyphen) { - if (state->hyphen_width < 0) + ensure_hyphen_width (state); + + if (layout->log_attrs[state->start_offset + pos].break_removes_preceding) { PangoItem *item = state->items->data; - state->hyphen_width = find_hyphen_width (item); - } + gunichar wc; - return state->hyphen_width; + wc = g_utf8_get_char (g_utf8_offset_to_pointer (layout->text, state->start_offset + pos - 1)); + + return state->hyphen_width - find_char_width (item, wc); + } + else + return state->hyphen_width; } - else - return 0; + + return 0; } #if 0 @@ -3923,7 +3834,6 @@ process_item (PangoLayout *layout, state->glyphs = shape_run (line, state, item); state->log_widths = NULL; - state->need_hyphen = NULL; state->log_widths_offset = 0; processing_new_item = TRUE; @@ -3980,8 +3890,6 @@ process_item (PangoLayout *layout, PangoGlyphItem glyph_item = {item, state->glyphs}; state->log_widths = g_new (int, item->num_chars); pango_glyph_item_get_logical_widths (&glyph_item, layout->text, state->log_widths); - state->need_hyphen = g_new (int, item->num_chars); - get_need_hyphen (item, layout->text, state->need_hyphen); } retry_break: @@ -4082,8 +3990,6 @@ process_item (PangoLayout *layout, state->glyphs = NULL; g_free (state->log_widths); state->log_widths = NULL; - g_free (state->need_hyphen); - state->need_hyphen = NULL; return BREAK_NONE_FIT; } @@ -4629,7 +4535,6 @@ pango_layout_check_lines (PangoLayout *layout) state.glyphs = NULL; state.log_widths = NULL; - state.need_hyphen = NULL; /* for deterministic bug hunting's sake set everything! */ state.line_width = -1; diff --git a/pango/shape.c b/pango/shape.c index aeb9aa47..62c0f025 100644 --- a/pango/shape.c +++ b/pango/shape.c @@ -344,6 +344,7 @@ pango_hb_shape (const char *item_text, int paragraph_length, const PangoAnalysis *analysis, PangoLogAttr *log_attrs, + int num_chars, PangoGlyphString *glyphs, PangoShapeFlags flags) { @@ -362,6 +363,7 @@ pango_hb_shape (const char *item_text, unsigned int num_features = 0; PangoGlyphInfo *infos; PangoTextTransform transform; + int hyphen_index; g_return_if_fail (analysis != NULL); g_return_if_fail (analysis->font != NULL); @@ -392,22 +394,35 @@ pango_hb_shape (const char *item_text, hb_buffer_set_flags (hb_buffer, hb_buffer_flags); hb_buffer_set_invisible_glyph (hb_buffer, PANGO_GLYPH_EMPTY); + if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN) + { + const char *p = paragraph_text + item_offset + item_length; + int last_char_len = p - g_utf8_prev_char (p); + + hyphen_index = item_offset + item_length - last_char_len; + + if (log_attrs[num_chars].break_removes_preceding) + item_length -= last_char_len; + } + + /* Add pre-context */ + hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0); + if (transform == PANGO_TEXT_TRANSFORM_NONE) { - hb_buffer_add_utf8 (hb_buffer, paragraph_text, paragraph_length, item_offset, item_length); + hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset + item_length, item_offset, item_length); } else { const char *p; int i; - /* Add pre-context */ - hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0); - /* Transform the item text according to text transform. * Note: we assume text transforms won't cross font boundaries */ - for (p = paragraph_text + item_offset, i = 0; p < paragraph_text + item_offset + item_length; p = g_utf8_next_char (p), i++) + for (p = paragraph_text + item_offset, i = 0; + p < paragraph_text + item_offset + item_length; + p = g_utf8_next_char (p), i++) { int index = p - paragraph_text; gunichar ch = g_utf8_get_char (p); @@ -447,26 +462,23 @@ pango_hb_shape (const char *item_text, else hb_buffer_add (hb_buffer, ch, index); } - - /* Add post-context */ - hb_buffer_add_utf8 (hb_buffer, paragraph_text + item_offset + item_length, paragraph_length - (item_offset + item_length), - item_offset + item_length, 0); } + /* Add post-context */ + hb_buffer_add_utf8 (hb_buffer, paragraph_text, paragraph_length, item_offset + item_length, 0); + if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN) { /* Insert either a Unicode or ASCII hyphen. We may * want to look for script-specific hyphens here. */ - const char *p = paragraph_text + item_offset + item_length; - int last_char_len = p - g_utf8_prev_char (p); hb_codepoint_t glyph; /* Note: We rely on hb_buffer_add clearing existing post-context */ if (hb_font_get_nominal_glyph (hb_font, 0x2010, &glyph)) - hb_buffer_add (hb_buffer, 0x2010, item_offset + item_length - last_char_len); + hb_buffer_add (hb_buffer, 0x2010, hyphen_index); else if (hb_font_get_nominal_glyph (hb_font, '-', &glyph)) - hb_buffer_add (hb_buffer, '-', item_offset + item_length - last_char_len); + hb_buffer_add (hb_buffer, '-', hyphen_index); } pango_font_get_features (analysis->font, features, G_N_ELEMENTS (features), &num_features); @@ -579,6 +591,7 @@ pango_shape_internal (const char *item_text, int paragraph_length, const PangoAnalysis *analysis, PangoLogAttr *log_attrs, + int num_chars, PangoGlyphString *glyphs, PangoShapeFlags flags) { @@ -606,9 +619,8 @@ pango_shape_internal (const char *item_text, pango_hb_shape (item_text, item_length, paragraph_text, paragraph_length, analysis, - log_attrs, - glyphs, - flags); + log_attrs, num_chars, + glyphs, flags); if (G_UNLIKELY (glyphs->num_glyphs == 0)) { @@ -867,7 +879,7 @@ pango_shape_with_flags (const char *item_text, { pango_shape_internal (item_text, item_length, paragraph_text, paragraph_length, - analysis, NULL, + analysis, NULL, 0, glyphs, flags); } @@ -906,7 +918,8 @@ pango_shape_item (PangoItem *item, { pango_shape_internal (paragraph_text + item->offset, item->length, paragraph_text, paragraph_length, - &item->analysis, log_attrs, + &item->analysis, + log_attrs, item->num_chars, glyphs, flags); } diff --git a/tests/breaks/eleven.expected b/tests/breaks/eleven.expected index 45c7ad01..f5b26708 100644 --- a/tests/breaks/eleven.expected +++ b/tests/breaks/eleven.expected @@ -1,6 +1,7 @@ -Text: ❤️︎︎ 👨[0x200d]🦰 👨🏿[0x200d]🦱 0️⃣ 🏴[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] 🇩🇪️ [0x0a] -Breaks: c lc lc lc lc lc c c -Whitespace: w w -Sentences: bs e b -Words: b b b bs be b b b -Graphemes: b b b b b b b b +Text: ❤ ️ ︎ ︎ 👨 [0x200d]🦰 👨🏿 [0x200d]🦱 0 ️ ⃣ 🏴[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] 🇩🇪 ️ [0x0a] +Breaks: c lc lc lc lc lc c c +Whitespace: w w +Sentences: bs e b +Words: b b b bs be b b b +Graphemes: b b b b b b b b +Hyphens: i i i i i i i i diff --git a/tests/breaks/fifteen.break b/tests/breaks/fifteen.break new file mode 100644 index 00000000..452d9bb5 --- /dev/null +++ b/tests/breaks/fifteen.break @@ -0,0 +1 @@ +<span insert_hyphens='false'>one</span> two-three four diff --git a/tests/breaks/fifteen.expected b/tests/breaks/fifteen.expected new file mode 100644 index 00000000..3f5e4655 --- /dev/null +++ b/tests/breaks/fifteen.expected @@ -0,0 +1,7 @@ +Text: o n e [ ] t w o - t h r e e [ ] f o [0xad] u r [0x0a] +Breaks: c c c c lc c c c lc c c c c c lc c c lc c c c +Whitespace: x x w w +Sentences: bs e b +Words: bs be bs be bs be bs be b +Graphemes: b b b b b b b b b b b b b b b b b b b b b +Hyphens: i i i i i i i i i diff --git a/tests/breaks/four.expected b/tests/breaks/four.expected index 2f29d778..e94af24a 100644 --- a/tests/breaks/four.expected +++ b/tests/breaks/four.expected @@ -1,6 +1,7 @@ -Text: ภ า ษ า ไ ท ย [ ] ห รื อ [ ] ภ า ษ า ไ ท ย ก ล า ง [ ] เ ป็ น ภ า ษ า ร า ช ก า ร แ ล ะ ภ า ษ า ป ร ะ จ ำ ช า ติ ข อ ง ป ร ะ เ ท ศ ไ ท ย [ ] ภ า ษ า ไ ท ย เ ป็ น ภ า ษ า ใ น ก ลุ่ ม ภ า ษ า ไ ท ซึ่ ง เ ป็ น ก ลุ่ ม ย่ อ ย ข อ ง ต ร ะ กู ล ภ า ษ า ข ร้ า [ ] ไ ท [ ] สั น นิ ษ ฐ า น ว่ า [ ] ภ า ษ า ใ น ต ร ะ กู ล นี้ มี ถิ่ น ก ำ เ นิ ด จ า ก ท า ง ต อ น ใ ต้ ข อ ง ป ร ะ เ ท ศ จี น [ ] แ ล ะ นั ก ภ า ษ า ศ า ส ต ร์ บ า ง ส่ ว น เ ส น อ ว่ า [ ] ภ า ษ า ไ ท ย น่ า จ ะ มี ค ว า ม เ ชื่ อ ม โ ย ง กั บ ต ร ะ กู ล ภ า ษ า อ อ ส โ ต ร [ ] เ อ เ ชี ย ติ ก [ ] ต ร ะ กู ล ภ า ษ า อ อ ส โ ต ร นี เ ซี ย น [ ] แ ล ะ ต ร ะ กู ล ภ า ษ า จี น [ ] ทิ เ บ ต [0x0a] -Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c c -Whitespace: x x x x x x x x x x x x x w w -Sentences: bs e b -Words: bs b b b bse b b be bs b b be bs b b b bse b b bse b b b be bs b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bs b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bs b be bs b b b b b b bse b be bs b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bs b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bs b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bs b b b b bse b be bs b b b b bse b b b bse b bse b b b b b b b b be bs b b bse b b b b bse b b b bse b be bs b b b be b -Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b +Text: ภ า ษ า ไ ท ย [ ] ห ร ื อ [ ] ภ า ษ า ไ ท ย ก ล า ง [ ] เ ป ็ น ภ า ษ า ร า ช ก า ร แ ล ะ ภ า ษ า ป ร ะ จ ำ ช า ต ิ ข อ ง ป ร ะ เ ท ศ ไ ท ย [ ] ภ า ษ า ไ ท ย เ ป ็ น ภ า ษ า ใ น ก ล ุ ่ ม ภ า ษ า ไ ท ซ ึ ่ ง เ ป ็ น ก ล ุ ่ ม ย ่ อ ย ข อ ง ต ร ะ ก ู ล ภ า ษ า ข ร ้ า [ ] ไ ท [ ] ส ั น น ิ ษ ฐ า น ว ่ า [ ] ภ า ษ า ใ น ต ร ะ ก ู ล น ี ้ ม ี ถ ิ ่ น ก ำ เ น ิ ด จ า ก ท า ง ต อ น ใ ต ้ ข อ ง ป ร ะ เ ท ศ จ ี น [ ] แ ล ะ น ั ก ภ า ษ า ศ า ส ต ร ์ บ า ง ส ่ ว น เ ส น อ ว ่ า [ ] ภ า ษ า ไ ท ย น ่ า จ ะ ม ี ค ว า ม เ ช ื ่ อ ม โ ย ง ก ั บ ต ร ะ ก ู ล ภ า ษ า อ อ ส โ ต ร [ ] เ อ เ ช ี ย ต ิ ก [ ] ต ร ะ ก ู ล ภ า ษ า อ อ ส โ ต ร น ี เ ซ ี ย น [ ] แ ล ะ ต ร ะ ก ู ล ภ า ษ า จ ี น [ ] ท ิ เ บ ต [0x0a] +Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c c +Whitespace: x x x x x x x x x x x x x w w +Sentences: bs e b +Words: bs b b b bse b b be bs b b be bs b b b bse b b bse b b b be bs b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bs b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bs b be bs b b b b b b bse b be bs b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bs b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bs b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bs b b b b bse b be bs b b b b bse b b b bse b bse b b b b b b b b be bs b b bse b b b b bse b b b bse b be bs b b b be b +Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b +Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i diff --git a/tests/breaks/nine.expected b/tests/breaks/nine.expected index 957f67e5..818a78aa 100644 --- a/tests/breaks/nine.expected +++ b/tests/breaks/nine.expected @@ -1,6 +1,7 @@ -Text: मी [ ] का च [ ] खा ऊ [ ] श क तो , [ ] म ला [ ] ते [ ] दु ख त [ ] ना ह [0x0a] मैं [ ] काँ च [ ] खा [ ] स क ता [ ] हूँ , [ ] मु झे [ ] उ स [ ] से [ ] को ई [ ] पी डा [ ] न हीं [ ] हो त [0x0a] நா ன் [ ] க ண் ணா டி [ ] சா ப் பி டு வே ன் , [ ] அ த னா ல் [ ] எ ன க் கு [ ] ஒ ரு [ ] கே டு ம் [ ] வ ரா த [0x0a] ﻢ ﯾ ں [ ] ﮎ ﺎ ﻨ ﭼ [ ] ﮎ ھ ﺍ [ ] ﺲ ﮑ ﺗ ﺍ [ ] ہ ﻭ ں [ ] ﺍ ﻭ ﺭ [ ] ﻢ ﺟ ھ ے [ ] ﺖ ﮑ ﻠ ﯿ ﻓ [ ] ﻥ ہ ﯼ ں [ ] ہ ﻮ ﺘ ﯾ [ ] [0x0a] ﺰ ﻫ [ ] ﺶ ﻴ ﺸ ﻫ [ ] ﺥ ﻭ ړ ﻝ ې [ ] ﺶ ﻣ ، [ ] ﻪ ﻐ ﻫ [ ] ﻡ ﺍ [ ] ﻦ ﻫ [ ] ﺥ ﻭ ږ ﻮ ﻳ [0x0a] -Breaks: c c lc c c lc c c lc c c c c lc c c lc c lc c c c lc c c c c lc c c lc c lc c c c lc c c lc c c lc c c lc c lc c c lc c c lc c c lc c c c c c lc c c c c lc c c c c c c c lc c c c c lc c c c c lc c c lc c c c lc c c c c c c c lc c c c c lc c c c lc c c c c lc c c c lc c c c lc c c c c lc c c c c c lc c c c c lc c c c c c c c c lc c c c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c c c c -Whitespace: x x x x x x x w x x x x x x x x x x x w x x x x x x x w x x x x x x x x x x w x x x x x x x w w -Sentences: bs e bs e bs e bs e bs e b -Words: bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be b bs be bs be bs be bs be b -Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b +Text: म ी [ ] क ा च [ ] ख ा ऊ [ ] श क त ो , [ ] म ल ा [ ] त े [ ] द ु ख त [ ] न ा ह [0x0a] म ै ं [ ] क ा ँ च [ ] ख ा [ ] स क त ा [ ] ह ू ँ , [ ] म ु झ े [ ] उ स [ ] स े [ ] क ो ई [ ] प ी ड ा [ ] न ह ी ं [ ] ह ो त [0x0a] ந ா ன ் [ ] க ண ் ண ா ட ி [ ] ச ா ப ் ப ி ட ு வ ே ன ் , [ ] அ த ன ா ல ் [ ] எ ன க ் க ு [ ] ஒ ர ு [ ] க ே ட ு ம ் [ ] வ ர ா த [0x0a] ﻢ ﯾ ں [ ] ﮎ ﺎ ﻨ ﭼ [ ] ﮎ ھ ﺍ [ ] ﺲ ﮑ ﺗ ﺍ [ ] ہ ﻭ ں [ ] ﺍ ﻭ ﺭ [ ] ﻢ ﺟ ھ ے [ ] ﺖ ﮑ ﻠ ﯿ ﻓ [ ] ﻥ ہ ﯼ ں [ ] ہ ﻮ ﺘ ﯾ [ ] [0x0a] ﺰ ﻫ [ ] ﺶ ﻴ ﺸ ﻫ [ ] ﺥ ﻭ ړ ﻝ ې [ ] ﺶ ﻣ ، [ ] ﻪ ﻐ ﻫ [ ] ﻡ ﺍ [ ] ﻦ ﻫ [ ] ﺥ ﻭ ږ ﻮ ﻳ [0x0a] +Breaks: c c lc c c lc c c lc c c c c lc c c lc c lc c c c lc c c c c lc c c lc c lc c c c lc c c lc c c lc c c lc c lc c c lc c c lc c c lc c c c c c lc c c c c lc c c c c c c c lc c c c c lc c c c c lc c c lc c c c lc c c c c c c c lc c c c c lc c c c lc c c c c lc c c c lc c c c lc c c c c lc c c c c c lc c c c c lc c c c c c c c c lc c c c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c c c c +Whitespace: x x x x x x x w x x x x x x x x x x x w x x x x x x x w x x x x x x x x x x w x x x x x x x w w +Sentences: bs e bs e bs e bs e bs e b +Words: bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be b bs be bs be bs be bs be b +Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b +Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i diff --git a/tests/breaks/one.expected b/tests/breaks/one.expected index bbae494d..cc257131 100644 --- a/tests/breaks/one.expected +++ b/tests/breaks/one.expected @@ -4,3 +4,4 @@ Whitespace: x x w w Sentences: bs e bs e b Words: bs be bs be bs be b bs be b Graphemes: b b b b b b b b b b b b b b b b b b b b b b +Hyphens: i i i i i i i i i i i diff --git a/tests/breaks/sixteen.break b/tests/breaks/sixteen.break new file mode 100644 index 00000000..51062618 --- /dev/null +++ b/tests/breaks/sixteen.break @@ -0,0 +1 @@ +hy‧phen|ation overload diff --git a/tests/breaks/sixteen.expected b/tests/breaks/sixteen.expected new file mode 100644 index 00000000..75f20b9f --- /dev/null +++ b/tests/breaks/sixteen.expected @@ -0,0 +1,7 @@ +Text: h y ‧ p h e n | a t i o n [ ] o v e r [0xad] l o a d [0x0a] +Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c c +Whitespace: x w w +Sentences: bs e b +Words: bs e s be bs be bs be b +Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b +Hyphens: i ri i i i ri i i i i i i i i i i i diff --git a/tests/breaks/ten.expected b/tests/breaks/ten.expected index c1f8cc35..c9f91dc8 100644 --- a/tests/breaks/ten.expected +++ b/tests/breaks/ten.expected @@ -1,6 +1,7 @@ -Text: i ක්[0x200d]ක [ ] a[0x200c] a் [0x0a] -Breaks: c c c lc c c c -Whitespace: x w w -Sentences: bs e b -Words: bs be bs be b -Graphemes: b b b b b b b +Text: i ක ් [0x200d] ක [ ] a [0x200c] a ் [0x0a] +Breaks: c c c lc c c c +Whitespace: x w w +Sentences: bs e b +Words: bs be bs be b +Graphemes: b b b b b b b +Hyphens: i i i i i i i diff --git a/tests/breaks/three.expected b/tests/breaks/three.expected index 7f078f4f..c2c89158 100644 --- a/tests/breaks/three.expected +++ b/tests/breaks/three.expected @@ -1,6 +1,7 @@ -Text: o n e [ ] t w o [0x2028] r e d [ ] b l u e[0x200d] g r e e n [0x0a] -Breaks: c c c c lc c c c Lc c c c lc c c c c c c c c c c -Whitespace: x w x w w -Sentences: bs e bs e b -Words: bs be bs be bs be bs be b -Graphemes: b b b b b b b b b b b b b b b b b b b b b b b +Text: o n e [ ] t w o [0x2028] r e d [ ] b l u e [0x200d] g r e e n [0x0a] +Breaks: c c c c lc c c c Lc c c c lc c c c c c c c c c c +Whitespace: x w x w w +Sentences: bs e bs e b +Words: bs be bs be bs be bs be b +Graphemes: b b b b b b b b b b b b b b b b b b b b b b b +Hyphens: i i i i i i i i i i i i i i i diff --git a/tests/breaks/two.expected b/tests/breaks/two.expected index 3ec2d948..0280c52f 100644 --- a/tests/breaks/two.expected +++ b/tests/breaks/two.expected @@ -4,3 +4,4 @@ Whitespace: w w Sentences: bs e b Words: bs e s be b Graphemes: b b b b b b b b b b b +Hyphens: i i i i i i i diff --git a/tests/layouts/valid-17.expected b/tests/layouts/valid-17.expected index 4b3192fb..a2b7d494 100644 --- a/tests/layouts/valid-17.expected +++ b/tests/layouts/valid-17.expected @@ -28,7 +28,7 @@ i=3, index=17, paragraph-start=1, dir=ltr '' --- runs -i=1, index=0, chars=13, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'some|bla|bla|' +i=1, index=0, chars=13, level=0, gravity=south, flags=4, font=OMITTED, script=latin, language=en-us, 'some|bla|bla|' i=2, index=13, no run, line end i=3, index=13, chars=3, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'bla' i=4, index=16, no run, line end diff --git a/tests/test-break.c b/tests/test-break.c index 3fb5cdcd..f7fcf6ec 100644 --- a/tests/test-break.c +++ b/tests/test-break.c @@ -46,7 +46,7 @@ test_file (const gchar *filename, GString *string) int len2; char *p; int i; - GString *s1, *s2, *s3, *s4, *s5; + GString *s1, *s2, *s3, *s4, *s5, *s6; int m; char *test; char *text; @@ -109,6 +109,7 @@ test_file (const gchar *filename, GString *string) s3 = g_string_new ("Sentences:"); s4 = g_string_new ("Words:"); s5 = g_string_new ("Graphemes:"); + s6 = g_string_new ("Hyphens:"); g_string_append (string, "Text: "); @@ -119,6 +120,7 @@ test_file (const gchar *filename, GString *string) g_string_append_printf (s3, "%*s", (int)(m - s3->len), ""); g_string_append_printf (s4, "%*s", (int)(m - s4->len), ""); g_string_append_printf (s5, "%*s", (int)(m - s5->len), ""); + g_string_append_printf (s6, "%*s", (int)(m - s6->len), ""); g_string_append_printf (string, "%*s", (int)(m - strlen ("Text: ")), ""); for (i = 0, p = text; i < len; i++, p = g_utf8_next_char (p)) @@ -129,6 +131,7 @@ test_file (const gchar *filename, GString *string) int o = 0; int s = 0; int g = 0; + int h = 0; if (log.is_mandatory_break) { @@ -195,7 +198,18 @@ test_file (const gchar *filename, GString *string) g++; } - m = MAX (MAX (MAX (b, w), MAX (o, s)), g); + if (log.break_removes_preceding) + { + g_string_append (s6, "r"); + h++; + } + if (log.break_inserts_hyphen) + { + g_string_append (s6, "i"); + h++; + } + + m = MAX (MAX (MAX (b, w), MAX (o, s)), MAX (g, h)); g_string_append_printf (string, "%*s", m, ""); g_string_append_printf (s1, "%*s", m - b, ""); @@ -203,6 +217,7 @@ test_file (const gchar *filename, GString *string) g_string_append_printf (s3, "%*s", m - s, ""); g_string_append_printf (s4, "%*s", m - o, ""); g_string_append_printf (s5, "%*s", m - g, ""); + g_string_append_printf (s6, "%*s", m - h, ""); if (i < len - 1) { @@ -215,6 +230,7 @@ test_file (const gchar *filename, GString *string) g_string_append (s3, " "); g_string_append (s4, " "); g_string_append (s5, " "); + g_string_append (s6, " "); } else if (g_unichar_isgraph (ch) && !(g_unichar_type (ch) == G_UNICODE_LINE_SEPARATOR || @@ -228,6 +244,7 @@ test_file (const gchar *filename, GString *string) g_string_append (s3, " "); g_string_append (s4, " "); g_string_append (s5, " "); + g_string_append (s6, " "); } else { @@ -238,6 +255,7 @@ test_file (const gchar *filename, GString *string) g_string_append_printf (s3, "%*s", (int)strlen (str), ""); g_string_append_printf (s4, "%*s", (int)strlen (str), ""); g_string_append_printf (s5, "%*s", (int)strlen (str), ""); + g_string_append_printf (s6, "%*s", (int)strlen (str), ""); g_free (str); } } @@ -253,12 +271,15 @@ test_file (const gchar *filename, GString *string) g_string_append (string, "\n"); g_string_append_len (string, s5->str, s5->len); g_string_append (string, "\n"); + g_string_append_len (string, s6->str, s6->len); + g_string_append (string, "\n"); g_string_free (s1, TRUE); g_string_free (s2, TRUE); g_string_free (s3, TRUE); g_string_free (s4, TRUE); g_string_free (s5, TRUE); + g_string_free (s6, TRUE); g_object_unref (layout); g_free (attrs); @@ -366,9 +387,9 @@ main (int argc, char *argv[]) " l - line break s - word start\n" " c - char break e - word end\n" "\n" - "Whitespace: Sentences:\n" - " x - expandable space b - sentence boundary\n" - " w - whitespace s - sentence start\n" + "Whitespace: Sentences:\n Hyphens" + " x - expandable space b - sentence boundary i - insert hyphen\n" + " w - whitespace s - sentence start r - remove preceding\n" " e - sentence end\n"); return 0; } diff --git a/tools/gen-script-for-lang.c b/tools/gen-script-for-lang.c index b3238f85..2fb0cbc7 100644 --- a/tools/gen-script-for-lang.c +++ b/tools/gen-script-for-lang.c @@ -44,7 +44,7 @@ typedef struct { ScriptInfo scripts[MAX_SCRIPTS]; } LangInfo; -static const char *get_script_name (PangoScript script) +static const char *get_script_name (GUnicodeScript script) { static GEnumClass *class = NULL; GEnumValue *value; |