summaryrefslogtreecommitdiff
path: root/pango
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-08-24 22:41:15 -0400
committerMatthias Clasen <mclasen@redhat.com>2021-08-24 23:27:54 -0400
commit70933101e4eb4dee9a29fe0992624ae9c58adf00 (patch)
treee10668e5cfab76731d1b12dee4d958e39ca30b46 /pango
parent3d7bb496c129a0efbabc6fa436c35516b5b6cd69 (diff)
downloadpango-70933101e4eb4dee9a29fe0992624ae9c58adf00.tar.gz
Add hyphens to log attrs
The code computing this is much better off in break.c, so move it there, and keep the information in the log attr array.
Diffstat (limited to 'pango')
-rw-r--r--pango/break.c105
-rw-r--r--pango/pango-break.h3
2 files changed, 105 insertions, 3 deletions
diff --git a/pango/break.c b/pango/break.c
index c55d5f22..8e1aeb56 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -249,6 +249,8 @@ default_break (const char *text,
gint last_sentence_start = -1;
gint last_non_space = -1;
+ gboolean prev_space_or_hyphen;
+
gboolean almost_done = FALSE;
gboolean done = FALSE;
@@ -261,6 +263,7 @@ default_break (const char *text,
prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_wc = 0;
prev_jamo = NO_JAMO;
+ prev_space_or_hyphen = FALSE;
if (length == 0 || *text == '\0')
{
@@ -291,6 +294,8 @@ default_break (const char *text,
/* Emoji extended pictographics */
gboolean is_Extended_Pictographic;
+ PangoScript script;
+
wc = next_wc;
break_type = next_break_type;
@@ -533,17 +538,16 @@ default_break (const char *text,
prev_GB_type = GB_type;
}
+ script = (PangoScript)g_unichar_get_script (wc);
+
/* ---- UAX#29 Word Boundaries ---- */
{
is_word_boundary = FALSE;
if (is_grapheme_boundary ||
G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) /* Rules WB3 and WB4 */
{
- PangoScript script;
WordBreakType WB_type;
- script = (PangoScript)g_unichar_get_script (wc);
-
/* Find the WordBreakType of wc */
WB_type = WB_Other;
@@ -1552,7 +1556,68 @@ default_break (const char *text,
attrs[i - 1].is_white) {
last_sentence_start++;
}
+ }
+
+ /* --- Hyphens --- */
+ {
+ gboolean insert_hyphens;
+ gboolean space_or_hyphen = FALSE;
+
+ switch ((int)script)
+ {
+ case PANGO_SCRIPT_COMMON:
+ case PANGO_SCRIPT_HAN:
+ case PANGO_SCRIPT_HANGUL:
+ case PANGO_SCRIPT_HIRAGANA:
+ case PANGO_SCRIPT_KATAKANA:
+ insert_hyphens = FALSE;
+ break;
+ default:
+ insert_hyphens = TRUE;
+ break;
+ }
+
+ switch ((int)type)
+ {
+ case G_UNICODE_SPACE_SEPARATOR:
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ space_or_hyphen = TRUE;
+ break;
+ case G_UNICODE_CONTROL:
+ if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f')
+ space_or_hyphen = TRUE;
+ break;
+ default:
+ break;
+ }
+
+ if (!space_or_hyphen)
+ {
+ if (wc == '-' || /* Hyphen-minus */
+ wc == 0x058a || /* Armenian hyphen */
+ wc == 0x1400 || /* Canadian syllabics hyphen */
+ wc == 0x1806 || /* Mongolian todo hyphen */
+ wc == 0x2010 || /* Hyphen */
+ wc == 0x2027 || /* Hyphenation point */
+ wc == 0x2e17 || /* Double oblique hyphen */
+ wc == 0x2e40 || /* Double hyphen */
+ wc == 0x30a0 || /* Katakana-Hiragana double hyphen */
+ wc == 0xfe63 || /* Small hyphen-minus */
+ wc == 0xff0d) /* Fullwidth hyphen-minus */
+ space_or_hyphen = TRUE;
+ }
+
+ if (attrs[i].is_word_boundary)
+ attrs[i].break_inserts_hyphen = FALSE;
+ else if (prev_space_or_hyphen)
+ attrs[i].break_inserts_hyphen = FALSE;
+ else if (space_or_hyphen)
+ attrs[i].break_inserts_hyphen = FALSE;
+ else
+ attrs[i].break_inserts_hyphen = insert_hyphens;
+ prev_space_or_hyphen = space_or_hyphen;
}
prev_wc = wc;
@@ -1633,16 +1698,21 @@ break_attrs (const char *text,
int log_attrs_len)
{
PangoAttrList list;
+ PangoAttrList hyphens;
PangoAttrIterator iter;
GSList *l;
_pango_attr_list_init (&list);
+ _pango_attr_list_init (&hyphens);
+
for (l = attributes; l; l = l->next)
{
PangoAttribute *attr = l->data;
if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
pango_attr_list_insert (&list, pango_attribute_copy (attr));
+ if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
+ pango_attr_list_insert (&hyphens, pango_attribute_copy (attr));
}
if (!_pango_attr_list_has_attributes (&list))
@@ -1681,7 +1751,36 @@ break_attrs (const char *text,
} while (pango_attr_iterator_next (&iter));
_pango_attr_iterator_destroy (&iter);
+
+ _pango_attr_list_get_iterator (&hyphens, &iter);
+ do {
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS);
+
+ if (attr && ((PangoAttrInt*)attr)->value == 0)
+ {
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ pango_attr_iterator_range (&iter, &start, &end);
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ {
+ log_attrs[pos].break_inserts_hyphen = FALSE;
+ }
+ }
+ } while (pango_attr_iterator_next (&iter));
+
_pango_attr_list_destroy (&list);
+ _pango_attr_list_destroy (&hyphens);
return TRUE;
}
diff --git a/pango/pango-break.h b/pango/pango-break.h
index a8e6c5b9..52febd3d 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -72,6 +72,8 @@ G_BEGIN_DECLS
* This flag is particularly useful when selecting text word-by-word. This flag
* implements Unicode's [Word Boundaries](http://www.unicode.org/reports/tr29/)
* semantics. (Since: 1.22)
+ * @break_inserts_hyphen: when breaking lines before this char, insert a hyphen.
+ * Since: 1.50
*
* The `PangoLogAttr` structure stores information about the attributes of a
* single character.
@@ -91,6 +93,7 @@ struct _PangoLogAttr
guint backspace_deletes_character : 1;
guint is_expandable_space : 1;
guint is_word_boundary : 1;
+ guint break_inserts_hyphen : 1;
};
PANGO_DEPRECATED_IN_1_44