diff options
Diffstat (limited to 'pango')
-rw-r--r-- | pango/break.c | 124 | ||||
-rw-r--r-- | pango/pango-break.h | 6 | ||||
-rw-r--r-- | pango/pango-layout.c | 179 | ||||
-rw-r--r-- | pango/shape.c | 49 |
4 files changed, 194 insertions, 164 deletions
diff --git a/pango/break.c b/pango/break.c index c55d5f22..d348f9b8 100644 --- a/pango/break.c +++ b/pango/break.c @@ -249,6 +249,8 @@ default_break (const char *text, gint last_sentence_start = -1; gint last_non_space = -1; + gboolean prev_space_or_hyphen; + gboolean almost_done = FALSE; gboolean done = FALSE; @@ -261,6 +263,7 @@ default_break (const char *text, prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN; prev_wc = 0; prev_jamo = NO_JAMO; + prev_space_or_hyphen = FALSE; if (length == 0 || *text == '\0') { @@ -291,6 +294,8 @@ default_break (const char *text, /* Emoji extended pictographics */ gboolean is_Extended_Pictographic; + PangoScript script; + wc = next_wc; break_type = next_break_type; @@ -533,17 +538,16 @@ default_break (const char *text, prev_GB_type = GB_type; } + script = (PangoScript)g_unichar_get_script (wc); + /* ---- UAX#29 Word Boundaries ---- */ { is_word_boundary = FALSE; if (is_grapheme_boundary || G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) /* Rules WB3 and WB4 */ { - PangoScript script; WordBreakType WB_type; - script = (PangoScript)g_unichar_get_script (wc); - /* Find the WordBreakType of wc */ WB_type = WB_Other; @@ -1552,7 +1556,78 @@ default_break (const char *text, attrs[i - 1].is_white) { last_sentence_start++; } + } + + /* --- Hyphens --- */ + + { + gboolean insert_hyphens; + gboolean space_or_hyphen = FALSE; + + attrs[i].break_inserts_hyphen = FALSE; + attrs[i].break_removes_preceding = FALSE; + + switch ((int)script) + { + case PANGO_SCRIPT_COMMON: + case PANGO_SCRIPT_HAN: + case PANGO_SCRIPT_HANGUL: + case PANGO_SCRIPT_HIRAGANA: + case PANGO_SCRIPT_KATAKANA: + insert_hyphens = FALSE; + break; + default: + insert_hyphens = TRUE; + break; + } + switch ((int)type) + { + case G_UNICODE_SPACE_SEPARATOR: + case G_UNICODE_LINE_SEPARATOR: + case G_UNICODE_PARAGRAPH_SEPARATOR: + space_or_hyphen = TRUE; + break; + case G_UNICODE_CONTROL: + if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f') + space_or_hyphen = TRUE; + break; + default: + break; + } + + if (!space_or_hyphen) + { + if (wc == '-' || /* Hyphen-minus */ + wc == 0x058a || /* Armenian hyphen */ + wc == 0x1400 || /* Canadian syllabics hyphen */ + wc == 0x1806 || /* Mongolian todo hyphen */ + wc == 0x2010 || /* Hyphen */ + wc == 0x2e17 || /* Double oblique hyphen */ + wc == 0x2e40 || /* Double hyphen */ + wc == 0x30a0 || /* Katakana-Hiragana double hyphen */ + wc == 0xfe63 || /* Small hyphen-minus */ + wc == 0xff0d) /* Fullwidth hyphen-minus */ + space_or_hyphen = TRUE; + } + + if (attrs[i].is_word_boundary) + attrs[i].break_inserts_hyphen = FALSE; + else if (prev_space_or_hyphen) + attrs[i].break_inserts_hyphen = FALSE; + else if (space_or_hyphen) + attrs[i].break_inserts_hyphen = FALSE; + else + attrs[i].break_inserts_hyphen = insert_hyphens; + + if (prev_wc == 0x007C || /* Vertical Line */ + prev_wc == 0x2027) /* Hyphenation point */ + { + attrs[i].break_inserts_hyphen = TRUE; + attrs[i].break_removes_preceding = TRUE; + } + + prev_space_or_hyphen = space_or_hyphen; } prev_wc = wc; @@ -1633,22 +1708,21 @@ break_attrs (const char *text, int log_attrs_len) { PangoAttrList list; + PangoAttrList hyphens; PangoAttrIterator iter; GSList *l; _pango_attr_list_init (&list); + _pango_attr_list_init (&hyphens); + for (l = attributes; l; l = l->next) { PangoAttribute *attr = l->data; if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS) pango_attr_list_insert (&list, pango_attribute_copy (attr)); - } - - if (!_pango_attr_list_has_attributes (&list)) - { - _pango_attr_list_destroy (&list); - return FALSE; + else if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS) + pango_attr_list_insert (&hyphens, pango_attribute_copy (attr)); } _pango_attr_list_get_iterator (&list, &iter); @@ -1681,7 +1755,39 @@ break_attrs (const char *text, } while (pango_attr_iterator_next (&iter)); _pango_attr_iterator_destroy (&iter); + + _pango_attr_list_get_iterator (&hyphens, &iter); + do { + const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS); + + if (attr && ((PangoAttrInt*)attr)->value == 0) + { + int start, end; + int start_pos, end_pos; + int pos; + + pango_attr_iterator_range (&iter, &start, &end); + if (start < offset) + start_pos = 0; + else + start_pos = g_utf8_pointer_to_offset (text, text + start - offset); + if (end >= offset + length) + end_pos = log_attrs_len; + else + end_pos = g_utf8_pointer_to_offset (text, text + end - offset); + + for (pos = start_pos + 1; pos < end_pos; pos++) + { + if (!log_attrs[pos].break_removes_preceding) + log_attrs[pos].break_inserts_hyphen = FALSE; + } + } + } while (pango_attr_iterator_next (&iter)); + + _pango_attr_iterator_destroy (&iter); + _pango_attr_list_destroy (&list); + _pango_attr_list_destroy (&hyphens); return TRUE; } diff --git a/pango/pango-break.h b/pango/pango-break.h index a8e6c5b9..5d791e27 100644 --- a/pango/pango-break.h +++ b/pango/pango-break.h @@ -72,6 +72,10 @@ G_BEGIN_DECLS * This flag is particularly useful when selecting text word-by-word. This flag * implements Unicode's [Word Boundaries](http://www.unicode.org/reports/tr29/) * semantics. (Since: 1.22) + * @break_inserts_hyphen: when breaking lines before this char, insert a hyphen. + * Since: 1.50 + * @break_removes_preceding: when breaking lines before this char, remove the + * preceding char. Since 1.50 * * The `PangoLogAttr` structure stores information about the attributes of a * single character. @@ -91,6 +95,8 @@ struct _PangoLogAttr guint backspace_deletes_character : 1; guint is_expandable_space : 1; guint is_word_boundary : 1; + guint break_inserts_hyphen : 1; + guint break_removes_preceding : 1; }; PANGO_DEPRECATED_IN_1_44 diff --git a/pango/pango-layout.c b/pango/pango-layout.c index ccebc557..85f12f0a 100644 --- a/pango/pango-layout.c +++ b/pango/pango-layout.c @@ -3595,7 +3595,6 @@ struct _ParaBreakState int log_widths_offset; /* Offset into log_widths to the point corresponding * to the remaining portion of the first item */ - int *need_hyphen; /* Insert a hyphen if breaking here ? */ int line_start_index; /* Start index (byte offset) of line in layout->text */ int line_start_offset; /* Character offset of line in layout->text */ @@ -3684,140 +3683,19 @@ insert_run (PangoLayoutLine *line, state->glyphs = NULL; g_free (state->log_widths); state->log_widths = NULL; - g_free (state->need_hyphen); - state->need_hyphen = NULL; } line->runs = g_slist_prepend (line->runs, run); line->length += run_item->length; } -static void -get_need_hyphen (PangoItem *item, - const char *text, - int *need_hyphen) -{ - int i; - const char *p; - gboolean prev_space; - gboolean prev_hyphen; - PangoAttrList attrs; - PangoAttrIterator iter; - GSList *l; - - _pango_attr_list_init (&attrs); - for (l = item->analysis.extra_attrs; l; l = l->next) - { - PangoAttribute *attr = l->data; - if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS) - pango_attr_list_change (&attrs, pango_attribute_copy (attr)); - } - _pango_attr_list_get_iterator (&attrs, &iter); - - prev_space = prev_hyphen = TRUE; - - for (i = 0, p = text + item->offset; i < item->num_chars; i++, p = g_utf8_next_char (p)) - { - gunichar wc = g_utf8_get_char (p); - gboolean space; - gboolean hyphen; - int start, end, pos; - gboolean insert_hyphens = TRUE; - - pos = p - text; - do { - pango_attr_iterator_range (&iter, &start, &end); - if (end > pos) - break; - } while (pango_attr_iterator_next (&iter)); - - if (start <= pos && pos < end) - { - PangoAttribute *attr; - attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS); - if (attr) - insert_hyphens = ((PangoAttrInt*)attr)->value; - - /* Some scripts don't use hyphen.*/ - switch (item->analysis.script) - { - case PANGO_SCRIPT_COMMON: - case PANGO_SCRIPT_HAN: - case PANGO_SCRIPT_HANGUL: - case PANGO_SCRIPT_HIRAGANA: - case PANGO_SCRIPT_KATAKANA: - insert_hyphens = FALSE; - break; - default: - break; - } - } - - switch ((int)g_unichar_type (wc)) - { - case G_UNICODE_SPACE_SEPARATOR: - case G_UNICODE_LINE_SEPARATOR: - case G_UNICODE_PARAGRAPH_SEPARATOR: - space = TRUE; - break; - case G_UNICODE_CONTROL: - if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f') - space = TRUE; - else - space = FALSE; - break; - default: - space = FALSE; - break; - } - - if (wc == '-' || /* Hyphen-minus */ - wc == 0x058a || /* Armenian hyphen */ - wc == 0x1400 || /* Canadian syllabics hyphen */ - wc == 0x1806 || /* Mongolian todo hyphen */ - wc == 0x2010 || /* Hyphen */ - wc == 0x2027 || /* Hyphenation point */ - wc == 0x2e17 || /* Double oblique hyphen */ - wc == 0x2e40 || /* Double hyphen */ - wc == 0x30a0 || /* Katakana-Hiragana double hyphen */ - wc == 0xfe63 || /* Small hyphen-minus */ - wc == 0xff0d) /* Fullwidth hyphen-minus */ - hyphen = TRUE; - else - hyphen = FALSE; - - if (prev_space || space) - need_hyphen[i] = FALSE; - else if (prev_hyphen || hyphen) - need_hyphen[i] = FALSE; - else - need_hyphen[i] = insert_hyphens; - - prev_space = space; - prev_hyphen = hyphen; - } - - need_hyphen[item->num_chars - 1] = FALSE; - - _pango_attr_iterator_destroy (&iter); - _pango_attr_list_destroy (&attrs); -} - static gboolean break_needs_hyphen (PangoLayout *layout, ParaBreakState *state, int pos) { - if (state->log_widths_offset + pos == 0) - return FALSE; - - if (layout->log_attrs[state->start_offset + pos].is_word_boundary) - return FALSE; - - if (state->need_hyphen[state->log_widths_offset + pos - 1]) - return TRUE; - - return FALSE; + return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen || + layout->log_attrs[state->start_offset + pos].break_removes_preceding; } static int @@ -3843,23 +3721,56 @@ find_hyphen_width (PangoItem *item) } static int +find_char_width (PangoItem *item, + gunichar wc) +{ + hb_font_t *hb_font; + hb_codepoint_t glyph; + + if (!item->analysis.font) + return 0; + + hb_font = pango_font_get_hb_font (item->analysis.font); + if (hb_font_get_nominal_glyph (hb_font, wc, &glyph)) + return hb_font_get_glyph_h_advance (hb_font, glyph); + + return 0; +} + +static inline void +ensure_hyphen_width (ParaBreakState *state) +{ + if (state->hyphen_width < 0) + { + PangoItem *item = state->items->data; + state->hyphen_width = find_hyphen_width (item); + } +} + +static int find_break_extra_width (PangoLayout *layout, ParaBreakState *state, int pos) { /* Check whether to insert a hyphen */ - if (break_needs_hyphen (layout, state, pos)) + if (layout->log_attrs[state->start_offset + pos].break_inserts_hyphen) { - if (state->hyphen_width < 0) + ensure_hyphen_width (state); + + if (layout->log_attrs[state->start_offset + pos].break_removes_preceding) { PangoItem *item = state->items->data; - state->hyphen_width = find_hyphen_width (item); - } + gunichar wc; - return state->hyphen_width; + wc = g_utf8_get_char (g_utf8_offset_to_pointer (layout->text, state->start_offset + pos - 1)); + + return state->hyphen_width - find_char_width (item, wc); + } + else + return state->hyphen_width; } - else - return 0; + + return 0; } #if 0 @@ -3923,7 +3834,6 @@ process_item (PangoLayout *layout, state->glyphs = shape_run (line, state, item); state->log_widths = NULL; - state->need_hyphen = NULL; state->log_widths_offset = 0; processing_new_item = TRUE; @@ -3980,8 +3890,6 @@ process_item (PangoLayout *layout, PangoGlyphItem glyph_item = {item, state->glyphs}; state->log_widths = g_new (int, item->num_chars); pango_glyph_item_get_logical_widths (&glyph_item, layout->text, state->log_widths); - state->need_hyphen = g_new (int, item->num_chars); - get_need_hyphen (item, layout->text, state->need_hyphen); } retry_break: @@ -4082,8 +3990,6 @@ process_item (PangoLayout *layout, state->glyphs = NULL; g_free (state->log_widths); state->log_widths = NULL; - g_free (state->need_hyphen); - state->need_hyphen = NULL; return BREAK_NONE_FIT; } @@ -4629,7 +4535,6 @@ pango_layout_check_lines (PangoLayout *layout) state.glyphs = NULL; state.log_widths = NULL; - state.need_hyphen = NULL; /* for deterministic bug hunting's sake set everything! */ state.line_width = -1; diff --git a/pango/shape.c b/pango/shape.c index aeb9aa47..62c0f025 100644 --- a/pango/shape.c +++ b/pango/shape.c @@ -344,6 +344,7 @@ pango_hb_shape (const char *item_text, int paragraph_length, const PangoAnalysis *analysis, PangoLogAttr *log_attrs, + int num_chars, PangoGlyphString *glyphs, PangoShapeFlags flags) { @@ -362,6 +363,7 @@ pango_hb_shape (const char *item_text, unsigned int num_features = 0; PangoGlyphInfo *infos; PangoTextTransform transform; + int hyphen_index; g_return_if_fail (analysis != NULL); g_return_if_fail (analysis->font != NULL); @@ -392,22 +394,35 @@ pango_hb_shape (const char *item_text, hb_buffer_set_flags (hb_buffer, hb_buffer_flags); hb_buffer_set_invisible_glyph (hb_buffer, PANGO_GLYPH_EMPTY); + if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN) + { + const char *p = paragraph_text + item_offset + item_length; + int last_char_len = p - g_utf8_prev_char (p); + + hyphen_index = item_offset + item_length - last_char_len; + + if (log_attrs[num_chars].break_removes_preceding) + item_length -= last_char_len; + } + + /* Add pre-context */ + hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0); + if (transform == PANGO_TEXT_TRANSFORM_NONE) { - hb_buffer_add_utf8 (hb_buffer, paragraph_text, paragraph_length, item_offset, item_length); + hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset + item_length, item_offset, item_length); } else { const char *p; int i; - /* Add pre-context */ - hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0); - /* Transform the item text according to text transform. * Note: we assume text transforms won't cross font boundaries */ - for (p = paragraph_text + item_offset, i = 0; p < paragraph_text + item_offset + item_length; p = g_utf8_next_char (p), i++) + for (p = paragraph_text + item_offset, i = 0; + p < paragraph_text + item_offset + item_length; + p = g_utf8_next_char (p), i++) { int index = p - paragraph_text; gunichar ch = g_utf8_get_char (p); @@ -447,26 +462,23 @@ pango_hb_shape (const char *item_text, else hb_buffer_add (hb_buffer, ch, index); } - - /* Add post-context */ - hb_buffer_add_utf8 (hb_buffer, paragraph_text + item_offset + item_length, paragraph_length - (item_offset + item_length), - item_offset + item_length, 0); } + /* Add post-context */ + hb_buffer_add_utf8 (hb_buffer, paragraph_text, paragraph_length, item_offset + item_length, 0); + if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN) { /* Insert either a Unicode or ASCII hyphen. We may * want to look for script-specific hyphens here. */ - const char *p = paragraph_text + item_offset + item_length; - int last_char_len = p - g_utf8_prev_char (p); hb_codepoint_t glyph; /* Note: We rely on hb_buffer_add clearing existing post-context */ if (hb_font_get_nominal_glyph (hb_font, 0x2010, &glyph)) - hb_buffer_add (hb_buffer, 0x2010, item_offset + item_length - last_char_len); + hb_buffer_add (hb_buffer, 0x2010, hyphen_index); else if (hb_font_get_nominal_glyph (hb_font, '-', &glyph)) - hb_buffer_add (hb_buffer, '-', item_offset + item_length - last_char_len); + hb_buffer_add (hb_buffer, '-', hyphen_index); } pango_font_get_features (analysis->font, features, G_N_ELEMENTS (features), &num_features); @@ -579,6 +591,7 @@ pango_shape_internal (const char *item_text, int paragraph_length, const PangoAnalysis *analysis, PangoLogAttr *log_attrs, + int num_chars, PangoGlyphString *glyphs, PangoShapeFlags flags) { @@ -606,9 +619,8 @@ pango_shape_internal (const char *item_text, pango_hb_shape (item_text, item_length, paragraph_text, paragraph_length, analysis, - log_attrs, - glyphs, - flags); + log_attrs, num_chars, + glyphs, flags); if (G_UNLIKELY (glyphs->num_glyphs == 0)) { @@ -867,7 +879,7 @@ pango_shape_with_flags (const char *item_text, { pango_shape_internal (item_text, item_length, paragraph_text, paragraph_length, - analysis, NULL, + analysis, NULL, 0, glyphs, flags); } @@ -906,7 +918,8 @@ pango_shape_item (PangoItem *item, { pango_shape_internal (paragraph_text + item->offset, item->length, paragraph_text, paragraph_length, - &item->analysis, log_attrs, + &item->analysis, + log_attrs, item->num_chars, glyphs, flags); } |