summaryrefslogtreecommitdiff
path: root/pango
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-08-25 04:52:39 +0000
committerMatthias Clasen <mclasen@redhat.com>2021-08-25 04:52:39 +0000
commit3aee7615e9a123ae750e49e9864bdaa4b267cdbb (patch)
treea6fd68a40846f41e2dfd57c14e895e6f4b78a66b /pango
parentd3677f9f89181a254b57fe972257bd146f443899 (diff)
parent8fda48b39e7379348d98effb6b94ad1f83caab82 (diff)
downloadpango-3aee7615e9a123ae750e49e9864bdaa4b267cdbb.tar.gz
Merge branch 'hyphen-log-attr' into 'main'
Add hyphens to log attrs Closes #603 See merge request GNOME/pango!436
Diffstat (limited to 'pango')
-rw-r--r--pango/break.c124
-rw-r--r--pango/pango-break.h6
-rw-r--r--pango/pango-layout.c179
-rw-r--r--pango/shape.c49
4 files changed, 194 insertions, 164 deletions
diff --git a/pango/break.c b/pango/break.c
index c55d5f22..d348f9b8 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -249,6 +249,8 @@ default_break (const char *text,
gint last_sentence_start = -1;
gint last_non_space = -1;
+ gboolean prev_space_or_hyphen;
+
gboolean almost_done = FALSE;
gboolean done = FALSE;
@@ -261,6 +263,7 @@ default_break (const char *text,
prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_wc = 0;
prev_jamo = NO_JAMO;
+ prev_space_or_hyphen = FALSE;
if (length == 0 || *text == '\0')
{
@@ -291,6 +294,8 @@ default_break (const char *text,
/* Emoji extended pictographics */
gboolean is_Extended_Pictographic;
+ PangoScript script;
+
wc = next_wc;
break_type = next_break_type;
@@ -533,17 +538,16 @@ default_break (const char *text,
prev_GB_type = GB_type;
}
+ script = (PangoScript)g_unichar_get_script (wc);
+
/* ---- UAX#29 Word Boundaries ---- */
{
is_word_boundary = FALSE;
if (is_grapheme_boundary ||
G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) /* Rules WB3 and WB4 */
{
- PangoScript script;
WordBreakType WB_type;
- script = (PangoScript)g_unichar_get_script (wc);
-
/* Find the WordBreakType of wc */
WB_type = WB_Other;
@@ -1552,7 +1556,78 @@ default_break (const char *text,
attrs[i - 1].is_white) {
last_sentence_start++;
}
+ }
+
+ /* --- Hyphens --- */
+
+ {
+ gboolean insert_hyphens;
+ gboolean space_or_hyphen = FALSE;
+
+ attrs[i].break_inserts_hyphen = FALSE;
+ attrs[i].break_removes_preceding = FALSE;
+
+ switch ((int)script)
+ {
+ case PANGO_SCRIPT_COMMON:
+ case PANGO_SCRIPT_HAN:
+ case PANGO_SCRIPT_HANGUL:
+ case PANGO_SCRIPT_HIRAGANA:
+ case PANGO_SCRIPT_KATAKANA:
+ insert_hyphens = FALSE;
+ break;
+ default:
+ insert_hyphens = TRUE;
+ break;
+ }
+ switch ((int)type)
+ {
+ case G_UNICODE_SPACE_SEPARATOR:
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ space_or_hyphen = TRUE;
+ break;
+ case G_UNICODE_CONTROL:
+ if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f')
+ space_or_hyphen = TRUE;
+ break;
+ default:
+ break;
+ }
+
+ if (!space_or_hyphen)
+ {
+ if (wc == '-' || /* Hyphen-minus */
+ wc == 0x058a || /* Armenian hyphen */
+ wc == 0x1400 || /* Canadian syllabics hyphen */
+ wc == 0x1806 || /* Mongolian todo hyphen */
+ wc == 0x2010 || /* Hyphen */
+ wc == 0x2e17 || /* Double oblique hyphen */
+ wc == 0x2e40 || /* Double hyphen */
+ wc == 0x30a0 || /* Katakana-Hiragana double hyphen */
+ wc == 0xfe63 || /* Small hyphen-minus */
+ wc == 0xff0d) /* Fullwidth hyphen-minus */
+ space_or_hyphen = TRUE;
+ }
+
+ if (attrs[i].is_word_boundary)
+ attrs[i].break_inserts_hyphen = FALSE;
+ else if (prev_space_or_hyphen)
+ attrs[i].break_inserts_hyphen = FALSE;
+ else if (space_or_hyphen)
+ attrs[i].break_inserts_hyphen = FALSE;
+ else
+ attrs[i].break_inserts_hyphen = insert_hyphens;
+
+ if (prev_wc == 0x007C || /* Vertical Line */
+ prev_wc == 0x2027) /* Hyphenation point */
+ {
+ attrs[i].break_inserts_hyphen = TRUE;
+ attrs[i].break_removes_preceding = TRUE;
+ }
+
+ prev_space_or_hyphen = space_or_hyphen;
}
prev_wc = wc;
@@ -1633,22 +1708,21 @@ break_attrs (const char *text,
int log_attrs_len)
{
PangoAttrList list;
+ PangoAttrList hyphens;
PangoAttrIterator iter;
GSList *l;
_pango_attr_list_init (&list);
+ _pango_attr_list_init (&hyphens);
+
for (l = attributes; l; l = l->next)
{
PangoAttribute *attr = l->data;
if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
pango_attr_list_insert (&list, pango_attribute_copy (attr));
- }
-
- if (!_pango_attr_list_has_attributes (&list))
- {
- _pango_attr_list_destroy (&list);
- return FALSE;
+ else if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
+ pango_attr_list_insert (&hyphens, pango_attribute_copy (attr));
}
_pango_attr_list_get_iterator (&list, &iter);
@@ -1681,7 +1755,39 @@ break_attrs (const char *text,
} while (pango_attr_iterator_next (&iter));
_pango_attr_iterator_destroy (&iter);
+
+ _pango_attr_list_get_iterator (&hyphens, &iter);
+ do {
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS);
+
+ if (attr && ((PangoAttrInt*)attr)->value == 0)
+ {
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ pango_attr_iterator_range (&iter, &start, &end);
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ {
+ if (!log_attrs[pos].break_removes_preceding)
+ log_attrs[pos].break_inserts_hyphen = FALSE;
+ }
+ }
+ } while (pango_attr_iterator_next (&iter));
+
+ _pango_attr_iterator_destroy (&iter);
+
_pango_attr_list_destroy (&list);
+ _pango_attr_list_destroy (&hyphens);
return TRUE;
}
diff --git a/pango/pango-break.h b/pango/pango-break.h
index a8e6c5b9..5d791e27 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -72,6 +72,10 @@ G_BEGIN_DECLS
* This flag is particularly useful when selecting text word-by-word. This flag
* implements Unicode's [Word Boundaries](http://www.unicode.org/reports/tr29/)
* semantics. (Since: 1.22)
+ * @break_inserts_hyphen: when breaking lines before this char, insert a hyphen.
+ * Since: 1.50
+ * @break_removes_preceding: when breaking lines before this char, remove the
+ * preceding char. Since 1.50
*
* The `PangoLogAttr` structure stores information about the attributes of a
* single character.
@@ -91,6 +95,8 @@ struct _PangoLogAttr
guint backspace_deletes_character : 1;
guint is_expandable_space : 1;
guint is_word_boundary : 1;
+ guint break_inserts_hyphen : 1;
+ guint break_removes_preceding : 1;
};
PANGO_DEPRECATED_IN_1_44
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index ccebc557..85f12f0a 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -3595,7 +3595,6 @@ struct _ParaBreakState
int log_widths_offset; /* Offset into log_widths to the point corresponding
* to the remaining portion of the first item */
- int *need_hyphen; /* Insert a hyphen if breaking here ? */
int line_start_index; /* Start index (byte offset) of line in layout->text */
int line_start_offset; /* Character offset of line in layout->text */
@@ -3684,140 +3683,19 @@ insert_run (PangoLayoutLine *line,
state->glyphs = NULL;
g_free (state->log_widths);
state->log_widths = NULL;
- g_free (state->need_hyphen);
- state->need_hyphen = NULL;
}
line->runs = g_slist_prepend (line->runs, run);
line->length += run_item->length;
}
-static void
-get_need_hyphen (PangoItem *item,
- const char *text,
- int *need_hyphen)
-{
- int i;
- const char *p;
- gboolean prev_space;
- gboolean prev_hyphen;
- PangoAttrList attrs;
- PangoAttrIterator iter;
- GSList *l;
-
- _pango_attr_list_init (&attrs);
- for (l = item->analysis.extra_attrs; l; l = l->next)
- {
- PangoAttribute *attr = l->data;
- if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
- pango_attr_list_change (&attrs, pango_attribute_copy (attr));
- }
- _pango_attr_list_get_iterator (&attrs, &iter);
-
- prev_space = prev_hyphen = TRUE;
-
- for (i = 0, p = text + item->offset; i < item->num_chars; i++, p = g_utf8_next_char (p))
- {
- gunichar wc = g_utf8_get_char (p);
- gboolean space;
- gboolean hyphen;
- int start, end, pos;
- gboolean insert_hyphens = TRUE;
-
- pos = p - text;
- do {
- pango_attr_iterator_range (&iter, &start, &end);
- if (end > pos)
- break;
- } while (pango_attr_iterator_next (&iter));
-
- if (start <= pos && pos < end)
- {
- PangoAttribute *attr;
- attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS);
- if (attr)
- insert_hyphens = ((PangoAttrInt*)attr)->value;
-
- /* Some scripts don't use hyphen.*/
- switch (item->analysis.script)
- {
- case PANGO_SCRIPT_COMMON:
- case PANGO_SCRIPT_HAN:
- case PANGO_SCRIPT_HANGUL:
- case PANGO_SCRIPT_HIRAGANA:
- case PANGO_SCRIPT_KATAKANA:
- insert_hyphens = FALSE;
- break;
- default:
- break;
- }
- }
-
- switch ((int)g_unichar_type (wc))
- {
- case G_UNICODE_SPACE_SEPARATOR:
- case G_UNICODE_LINE_SEPARATOR:
- case G_UNICODE_PARAGRAPH_SEPARATOR:
- space = TRUE;
- break;
- case G_UNICODE_CONTROL:
- if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f')
- space = TRUE;
- else
- space = FALSE;
- break;
- default:
- space = FALSE;
- break;
- }
-
- if (wc == '-' || /* Hyphen-minus */
- wc == 0x058a || /* Armenian hyphen */
- wc == 0x1400 || /* Canadian syllabics hyphen */
- wc == 0x1806 || /* Mongolian todo hyphen */
- wc == 0x2010 || /* Hyphen */
- wc == 0x2027 || /* Hyphenation point */
- wc == 0x2e17 || /* Double oblique hyphen */
- wc == 0x2e40 || /* Double hyphen */
- wc == 0x30a0 || /* Katakana-Hiragana double hyphen */
- wc == 0xfe63 || /* Small hyphen-minus */
- wc == 0xff0d) /* Fullwidth hyphen-minus */
- hyphen = TRUE;
- else
- hyphen = FALSE;
-
- if (prev_space || space)
- need_hyphen[i] = FALSE;
- else if (prev_hyphen || hyphen)
- need_hyphen[i] = FALSE;
- else
- need_hyphen[i] = insert_hyphens;
-
- prev_space = space;
- prev_hyphen = hyphen;
- }
-
- need_hyphen[item->num_chars - 1] = FALSE;
-
- _pango_attr_iterator_destroy (&iter);
- _pango_attr_list_destroy (&attrs);
-}
-
static gboolean
break_needs_hyphen (PangoLayout *layout,
ParaBreakState *state,
int pos)
{
- if (state->log_widths_offset + pos == 0)
- return FALSE;
-
- if (layout->log_attrs[state->start_offset + pos].is_word_boundary)
- return FALSE;
-
- if (state->need_hyphen[state->log_widths_offset + pos - 1])
- return TRUE;
-
- return FALSE;
+ return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen ||
+ layout->log_attrs[state->start_offset + pos].break_removes_preceding;
}
static int
@@ -3843,23 +3721,56 @@ find_hyphen_width (PangoItem *item)
}
static int
+find_char_width (PangoItem *item,
+ gunichar wc)
+{
+ hb_font_t *hb_font;
+ hb_codepoint_t glyph;
+
+ if (!item->analysis.font)
+ return 0;
+
+ hb_font = pango_font_get_hb_font (item->analysis.font);
+ if (hb_font_get_nominal_glyph (hb_font, wc, &glyph))
+ return hb_font_get_glyph_h_advance (hb_font, glyph);
+
+ return 0;
+}
+
+static inline void
+ensure_hyphen_width (ParaBreakState *state)
+{
+ if (state->hyphen_width < 0)
+ {
+ PangoItem *item = state->items->data;
+ state->hyphen_width = find_hyphen_width (item);
+ }
+}
+
+static int
find_break_extra_width (PangoLayout *layout,
ParaBreakState *state,
int pos)
{
/* Check whether to insert a hyphen */
- if (break_needs_hyphen (layout, state, pos))
+ if (layout->log_attrs[state->start_offset + pos].break_inserts_hyphen)
{
- if (state->hyphen_width < 0)
+ ensure_hyphen_width (state);
+
+ if (layout->log_attrs[state->start_offset + pos].break_removes_preceding)
{
PangoItem *item = state->items->data;
- state->hyphen_width = find_hyphen_width (item);
- }
+ gunichar wc;
- return state->hyphen_width;
+ wc = g_utf8_get_char (g_utf8_offset_to_pointer (layout->text, state->start_offset + pos - 1));
+
+ return state->hyphen_width - find_char_width (item, wc);
+ }
+ else
+ return state->hyphen_width;
}
- else
- return 0;
+
+ return 0;
}
#if 0
@@ -3923,7 +3834,6 @@ process_item (PangoLayout *layout,
state->glyphs = shape_run (line, state, item);
state->log_widths = NULL;
- state->need_hyphen = NULL;
state->log_widths_offset = 0;
processing_new_item = TRUE;
@@ -3980,8 +3890,6 @@ process_item (PangoLayout *layout,
PangoGlyphItem glyph_item = {item, state->glyphs};
state->log_widths = g_new (int, item->num_chars);
pango_glyph_item_get_logical_widths (&glyph_item, layout->text, state->log_widths);
- state->need_hyphen = g_new (int, item->num_chars);
- get_need_hyphen (item, layout->text, state->need_hyphen);
}
retry_break:
@@ -4082,8 +3990,6 @@ process_item (PangoLayout *layout,
state->glyphs = NULL;
g_free (state->log_widths);
state->log_widths = NULL;
- g_free (state->need_hyphen);
- state->need_hyphen = NULL;
return BREAK_NONE_FIT;
}
@@ -4629,7 +4535,6 @@ pango_layout_check_lines (PangoLayout *layout)
state.glyphs = NULL;
state.log_widths = NULL;
- state.need_hyphen = NULL;
/* for deterministic bug hunting's sake set everything! */
state.line_width = -1;
diff --git a/pango/shape.c b/pango/shape.c
index aeb9aa47..62c0f025 100644
--- a/pango/shape.c
+++ b/pango/shape.c
@@ -344,6 +344,7 @@ pango_hb_shape (const char *item_text,
int paragraph_length,
const PangoAnalysis *analysis,
PangoLogAttr *log_attrs,
+ int num_chars,
PangoGlyphString *glyphs,
PangoShapeFlags flags)
{
@@ -362,6 +363,7 @@ pango_hb_shape (const char *item_text,
unsigned int num_features = 0;
PangoGlyphInfo *infos;
PangoTextTransform transform;
+ int hyphen_index;
g_return_if_fail (analysis != NULL);
g_return_if_fail (analysis->font != NULL);
@@ -392,22 +394,35 @@ pango_hb_shape (const char *item_text,
hb_buffer_set_flags (hb_buffer, hb_buffer_flags);
hb_buffer_set_invisible_glyph (hb_buffer, PANGO_GLYPH_EMPTY);
+ if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN)
+ {
+ const char *p = paragraph_text + item_offset + item_length;
+ int last_char_len = p - g_utf8_prev_char (p);
+
+ hyphen_index = item_offset + item_length - last_char_len;
+
+ if (log_attrs[num_chars].break_removes_preceding)
+ item_length -= last_char_len;
+ }
+
+ /* Add pre-context */
+ hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0);
+
if (transform == PANGO_TEXT_TRANSFORM_NONE)
{
- hb_buffer_add_utf8 (hb_buffer, paragraph_text, paragraph_length, item_offset, item_length);
+ hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset + item_length, item_offset, item_length);
}
else
{
const char *p;
int i;
- /* Add pre-context */
- hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0);
-
/* Transform the item text according to text transform.
* Note: we assume text transforms won't cross font boundaries
*/
- for (p = paragraph_text + item_offset, i = 0; p < paragraph_text + item_offset + item_length; p = g_utf8_next_char (p), i++)
+ for (p = paragraph_text + item_offset, i = 0;
+ p < paragraph_text + item_offset + item_length;
+ p = g_utf8_next_char (p), i++)
{
int index = p - paragraph_text;
gunichar ch = g_utf8_get_char (p);
@@ -447,26 +462,23 @@ pango_hb_shape (const char *item_text,
else
hb_buffer_add (hb_buffer, ch, index);
}
-
- /* Add post-context */
- hb_buffer_add_utf8 (hb_buffer, paragraph_text + item_offset + item_length, paragraph_length - (item_offset + item_length),
- item_offset + item_length, 0);
}
+ /* Add post-context */
+ hb_buffer_add_utf8 (hb_buffer, paragraph_text, paragraph_length, item_offset + item_length, 0);
+
if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN)
{
/* Insert either a Unicode or ASCII hyphen. We may
* want to look for script-specific hyphens here.
*/
- const char *p = paragraph_text + item_offset + item_length;
- int last_char_len = p - g_utf8_prev_char (p);
hb_codepoint_t glyph;
/* Note: We rely on hb_buffer_add clearing existing post-context */
if (hb_font_get_nominal_glyph (hb_font, 0x2010, &glyph))
- hb_buffer_add (hb_buffer, 0x2010, item_offset + item_length - last_char_len);
+ hb_buffer_add (hb_buffer, 0x2010, hyphen_index);
else if (hb_font_get_nominal_glyph (hb_font, '-', &glyph))
- hb_buffer_add (hb_buffer, '-', item_offset + item_length - last_char_len);
+ hb_buffer_add (hb_buffer, '-', hyphen_index);
}
pango_font_get_features (analysis->font, features, G_N_ELEMENTS (features), &num_features);
@@ -579,6 +591,7 @@ pango_shape_internal (const char *item_text,
int paragraph_length,
const PangoAnalysis *analysis,
PangoLogAttr *log_attrs,
+ int num_chars,
PangoGlyphString *glyphs,
PangoShapeFlags flags)
{
@@ -606,9 +619,8 @@ pango_shape_internal (const char *item_text,
pango_hb_shape (item_text, item_length,
paragraph_text, paragraph_length,
analysis,
- log_attrs,
- glyphs,
- flags);
+ log_attrs, num_chars,
+ glyphs, flags);
if (G_UNLIKELY (glyphs->num_glyphs == 0))
{
@@ -867,7 +879,7 @@ pango_shape_with_flags (const char *item_text,
{
pango_shape_internal (item_text, item_length,
paragraph_text, paragraph_length,
- analysis, NULL,
+ analysis, NULL, 0,
glyphs, flags);
}
@@ -906,7 +918,8 @@ pango_shape_item (PangoItem *item,
{
pango_shape_internal (paragraph_text + item->offset, item->length,
paragraph_text, paragraph_length,
- &item->analysis, log_attrs,
+ &item->analysis,
+ log_attrs, item->num_chars,
glyphs, flags);
}