summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-08-21 23:54:03 -0400
committerMatthias Clasen <mclasen@redhat.com>2021-08-25 01:08:02 -0400
commitb614ea2b06b3c9defaceb92b6904fa8a92249abe (patch)
treec20fdc8a2dc8a85561e3a8df5bf147d3c2888cd5
parent3aee7615e9a123ae750e49e9864bdaa4b267cdbb (diff)
downloadpango-b614ea2b06b3c9defaceb92b6904fa8a92249abe.tar.gz
Add segmentation attributesbreak-tailoring
Add attributes that let us override word and sentence boundaries (and, indirectly, line breaks). Tests included.
-rw-r--r--docs/pango_markup.md6
-rw-r--r--pango/break.c410
-rw-r--r--pango/pango-attributes.c56
-rw-r--r--pango/pango-attributes.h10
-rw-r--r--pango/pango-layout.c2
-rw-r--r--pango/pango-markup.c23
-rw-r--r--tests/breaks/five.break1
-rw-r--r--tests/breaks/five.expected7
-rw-r--r--tests/breaks/fourteen.break2
-rw-r--r--tests/breaks/fourteen.expected7
-rw-r--r--tests/breaks/thirteen.break2
-rw-r--r--tests/breaks/thirteen.expected7
-rw-r--r--tests/breaks/twelve.break2
-rw-r--r--tests/breaks/twelve.expected7
-rw-r--r--tests/test-common.c2
-rw-r--r--tests/testattributes.c16
16 files changed, 509 insertions, 51 deletions
diff --git a/docs/pango_markup.md b/docs/pango_markup.md
index 3a1cc311..287bbc56 100644
--- a/docs/pango_markup.md
+++ b/docs/pango_markup.md
@@ -201,6 +201,12 @@ text_transform
'none', 'lowercase', 'uppercase' or 'capitalize'. Support for text transformation
was added in Pango 1.50.
+segment
+: Overrides word or sentence boundaries. The value can be 'word' or 'sentence',
+ to indicate that the span should be treated as a single word or sentence.
+ Overlapping segments will be split to allow this, and line breaks will be
+ adjusted accordingly. Available since Pango 1.50.
+
## Convenience Tags
`<b>`
diff --git a/pango/break.c b/pango/break.c
index d348f9b8..864ac339 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1699,64 +1699,325 @@ break_script (const char *item_text,
/* }}} */
/* {{{ Attribute-based customization */
+/* We allow customizing log attrs in two ways:
+ *
+ * - You can directly remove breaks from a range, using allow_breaks=false.
+ * We preserve the non-tailorable rules from UAX #14, so mandatory breaks
+ * and breaks after ZWS remain. We also preserve break opportunities after
+ * hyphens and visible word dividers.
+ *
+ * - You can tweak the segmentation by marking ranges as word or sentence.
+ * When doing so, we split adjacent segments to preserve alternating
+ * starts and ends. We add a line break opportunity before each word that
+ * is created in this way, and we remove line break opportunities inside
+ * the word in the same way as for a range marked as allow_breaks=false,
+ * except that we don't remove char break opportunities.
+ *
+ * Note that UAX #14 does not guarantee that words fall neatly into
+ * sentences, so we don't do extra work to enforce that.
+ */
+
+static void
+remove_breaks_from_range (const char *text,
+ int start,
+ PangoLogAttr *log_attrs,
+ int start_pos,
+ int end_pos)
+{
+ int pos;
+ const char *p;
+ gunichar ch;
+ int bt;
+ gboolean after_zws;
+ gboolean after_hyphen;
+
+ /* Assume our range doesn't start after a hyphen or in a zws sequence */
+ after_zws = FALSE;
+ after_hyphen = FALSE;
+ for (pos = start_pos + 1, p = g_utf8_next_char (text + start);
+ pos < end_pos;
+ pos++, p = g_utf8_next_char (p))
+ {
+ /* Mandatory breaks aren't tailorable */
+ if (!log_attrs[pos].is_mandatory_break)
+ log_attrs[pos].is_line_break = FALSE;
+
+ ch = g_utf8_get_char (p);
+ bt = g_unichar_break_type (ch);
+
+ /* Hyphens and visible word dividers */
+ if (after_hyphen)
+ log_attrs[pos].is_line_break = TRUE;
+
+ after_hyphen = ch == 0x00ad || /* Soft Hyphen */
+ ch == 0x05A0 || ch == 0x2010 || /* Breaking Hyphens */
+ ch == 0x2012 || ch == 0x2013 ||
+ ch == 0x05BE || ch == 0x0F0B || /* Visible word dividers */
+ ch == 0x1361 || ch == 0x17D8 ||
+ ch == 0x17DA || ch == 0x2027 ||
+ ch == 0x007C;
+
+ /* ZWS sequence */
+ if (after_zws && bt != G_UNICODE_BREAK_SPACE)
+ log_attrs[pos].is_line_break = TRUE;
+
+ after_zws = bt == G_UNICODE_BREAK_ZERO_WIDTH_SPACE ||
+ (bt == G_UNICODE_BREAK_SPACE && after_zws);
+ }
+}
+
static gboolean
-break_attrs (const char *text,
- int length,
- GSList *attributes,
- int offset,
- PangoLogAttr *log_attrs,
- int log_attrs_len)
+handle_allow_breaks (const char *text,
+ int length,
+ PangoAttrList *attrs,
+ int offset,
+ PangoLogAttr *log_attrs,
+ int log_attrs_len)
{
- PangoAttrList list;
- PangoAttrList hyphens;
PangoAttrIterator iter;
- GSList *l;
+ gboolean tailored = FALSE;
- _pango_attr_list_init (&list);
- _pango_attr_list_init (&hyphens);
+ _pango_attr_list_get_iterator (attrs, &iter);
- for (l = attributes; l; l = l->next)
+ do
{
- PangoAttribute *attr = l->data;
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_ALLOW_BREAKS);
- if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
- pango_attr_list_insert (&list, pango_attribute_copy (attr));
- else if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
- pango_attr_list_insert (&hyphens, pango_attribute_copy (attr));
+ if (!attr)
+ continue;
+
+ if (!((PangoAttrInt*)attr)->value)
+ {
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ start = attr->start_index;
+ end = attr->end_index;
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ log_attrs[pos].is_char_break = FALSE;
+
+ remove_breaks_from_range (text, MAX (start - offset, 0), log_attrs, start_pos, end_pos);
+
+ tailored = TRUE;
+ }
}
+ while (pango_attr_iterator_next (&iter));
- _pango_attr_list_get_iterator (&list, &iter);
- do {
- const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_ALLOW_BREAKS);
+ _pango_attr_iterator_destroy (&iter);
- if (attr && ((PangoAttrInt*)attr)->value == 0)
- {
- int start, end;
- int start_pos, end_pos;
- int pos;
+ return tailored;
+}
- pango_attr_iterator_range (&iter, &start, &end);
- if (start < offset)
- start_pos = 0;
- else
- start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
- if (end >= offset + length)
- end_pos = log_attrs_len;
- else
- end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
- for (pos = start_pos + 1; pos < end_pos; pos++)
- {
- log_attrs[pos].is_mandatory_break = FALSE;
- log_attrs[pos].is_line_break = FALSE;
- log_attrs[pos].is_char_break = FALSE;
- }
- }
- } while (pango_attr_iterator_next (&iter));
+static gboolean
+handle_words (const char *text,
+ int length,
+ PangoAttrList *attrs,
+ int offset,
+ PangoLogAttr *log_attrs,
+ int log_attrs_len)
+{
+ PangoAttrIterator iter;
+ gboolean tailored = FALSE;
+
+ _pango_attr_list_get_iterator (attrs, &iter);
+
+ do
+ {
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_WORD);
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ if (!attr)
+ continue;
+
+ start = attr->start_index;
+ end = attr->end_index;
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ {
+ log_attrs[pos].is_word_start = FALSE;
+ log_attrs[pos].is_word_end = FALSE;
+ log_attrs[pos].is_word_boundary = FALSE;
+ }
+
+ remove_breaks_from_range (text, MAX (start - offset, 0), log_attrs,
+ start_pos, end_pos);
+
+ if (start >= offset)
+ {
+ gboolean in_word = FALSE;
+ for (pos = start_pos - 1; pos >= 0; pos--)
+ {
+ if (log_attrs[pos].is_word_end)
+ break;
+ if (log_attrs[pos].is_word_start)
+ {
+ in_word = TRUE;
+ break;
+ }
+ }
+ log_attrs[start_pos].is_word_start = TRUE;
+ log_attrs[start_pos].is_word_end = in_word;
+ log_attrs[start_pos].is_word_boundary = TRUE;
+
+ /* Allow line breaks before words */
+ log_attrs[start_pos].is_line_break = TRUE;
+
+ tailored = TRUE;
+ }
+
+ if (end < offset + length)
+ {
+ gboolean in_word = FALSE;
+ for (pos = end_pos + 1; pos < log_attrs_len; pos++)
+ {
+ if (log_attrs[pos].is_word_start)
+ break;
+ if (log_attrs[pos].is_word_end)
+ {
+ in_word = TRUE;
+ break;
+ }
+ }
+ log_attrs[end_pos].is_word_start = in_word;
+ log_attrs[end_pos].is_word_end = TRUE;
+ log_attrs[end_pos].is_word_boundary = TRUE;
+
+ /* Allow line breaks before words */
+ if (in_word)
+ log_attrs[end_pos].is_line_break = TRUE;
+
+ tailored = TRUE;
+ }
+ }
+ while (pango_attr_iterator_next (&iter));
+
+ _pango_attr_iterator_destroy (&iter);
+
+ return tailored;
+}
+
+static gboolean
+handle_sentences (const char *text,
+ int length,
+ PangoAttrList *attrs,
+ int offset,
+ PangoLogAttr *log_attrs,
+ int log_attrs_len)
+{
+ PangoAttrIterator iter;
+ gboolean tailored = FALSE;
+
+ _pango_attr_list_get_iterator (attrs, &iter);
+
+ do
+ {
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_SENTENCE);
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ if (!attr)
+ continue;
+
+ start = attr->start_index;
+ end = attr->end_index;
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ {
+ log_attrs[pos].is_sentence_start = FALSE;
+ log_attrs[pos].is_sentence_end = FALSE;
+ log_attrs[pos].is_sentence_boundary = FALSE;
+
+ tailored = TRUE;
+ }
+ if (start >= offset)
+ {
+ gboolean in_sentence = FALSE;
+ for (pos = start_pos - 1; pos >= 0; pos--)
+ {
+ if (log_attrs[pos].is_sentence_end)
+ break;
+ if (log_attrs[pos].is_sentence_start)
+ {
+ in_sentence = TRUE;
+ break;
+ }
+ }
+ log_attrs[start_pos].is_sentence_start = TRUE;
+ log_attrs[start_pos].is_sentence_end = in_sentence;
+ log_attrs[start_pos].is_sentence_boundary = TRUE;
+
+ tailored = TRUE;
+ }
+ if (end < offset + length)
+ {
+ gboolean in_sentence = FALSE;
+ for (pos = end_pos + 1; end_pos < log_attrs_len; pos++)
+ {
+ if (log_attrs[pos].is_sentence_start)
+ break;
+ if (log_attrs[pos].is_sentence_end)
+ {
+ in_sentence = TRUE;
+ break;
+ }
+ }
+ log_attrs[end_pos].is_sentence_start = in_sentence;
+ log_attrs[end_pos].is_sentence_end = TRUE;
+ log_attrs[end_pos].is_sentence_boundary = TRUE;
+
+ tailored = TRUE;
+ }
+ }
+ while (pango_attr_iterator_next (&iter));
_pango_attr_iterator_destroy (&iter);
- _pango_attr_list_get_iterator (&hyphens, &iter);
+ return tailored;
+}
+
+static gboolean
+handle_hyphens (const char *text,
+ int length,
+ PangoAttrList *attrs,
+ int offset,
+ PangoLogAttr *log_attrs,
+ int log_attrs_len)
+{
+ PangoAttrIterator iter;
+ gboolean tailored = FALSE;
+
+ _pango_attr_list_get_iterator (attrs, &iter);
+
do {
const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS);
@@ -1779,17 +2040,72 @@ break_attrs (const char *text,
for (pos = start_pos + 1; pos < end_pos; pos++)
{
if (!log_attrs[pos].break_removes_preceding)
- log_attrs[pos].break_inserts_hyphen = FALSE;
+ {
+ log_attrs[pos].break_inserts_hyphen = FALSE;
+
+ tailored = TRUE;
+ }
}
}
} while (pango_attr_iterator_next (&iter));
_pango_attr_iterator_destroy (&iter);
- _pango_attr_list_destroy (&list);
+ return tailored;
+}
+
+static gboolean
+break_attrs (const char *text,
+ int length,
+ GSList *attributes,
+ int offset,
+ PangoLogAttr *log_attrs,
+ int log_attrs_len)
+{
+ PangoAttrList allow_breaks;
+ PangoAttrList words;
+ PangoAttrList sentences;
+ PangoAttrList hyphens;
+ GSList *l;
+ gboolean tailored = FALSE;
+
+ _pango_attr_list_init (&allow_breaks);
+ _pango_attr_list_init (&words);
+ _pango_attr_list_init (&sentences);
+ _pango_attr_list_init (&hyphens);
+
+ for (l = attributes; l; l = l->next)
+ {
+ PangoAttribute *attr = l->data;
+
+ if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
+ pango_attr_list_insert (&allow_breaks, pango_attribute_copy (attr));
+ else if (attr->klass->type == PANGO_ATTR_WORD)
+ pango_attr_list_insert (&words, pango_attribute_copy (attr));
+ else if (attr->klass->type == PANGO_ATTR_SENTENCE)
+ pango_attr_list_insert (&sentences, pango_attribute_copy (attr));
+ else if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
+ pango_attr_list_insert (&hyphens, pango_attribute_copy (attr));
+ }
+
+ tailored |= handle_allow_breaks (text, length, &allow_breaks, offset,
+ log_attrs, log_attrs_len);
+
+ tailored |= handle_words (text, length, &words, offset,
+ log_attrs, log_attrs_len);
+
+ tailored |= handle_sentences (text, length, &words, offset,
+ log_attrs, log_attrs_len);
+
+ tailored |= handle_hyphens (text, length, &hyphens, offset,
+ log_attrs, log_attrs_len);
+
+ _pango_attr_list_destroy (&allow_breaks);
+ _pango_attr_list_destroy (&words);
+ _pango_attr_list_destroy (&sentences);
_pango_attr_list_destroy (&hyphens);
- return TRUE;
+ return tailored;
}
/* }}} */
@@ -2033,6 +2349,6 @@ pango_get_log_attrs (const char *text,
attrs_len);
}
- /* }}} */
+/* }}} */
/* vim:set foldmethod=marker expandtab: */
diff --git a/pango/pango-attributes.c b/pango/pango-attributes.c
index 28dc4105..326234d2 100644
--- a/pango/pango-attributes.c
+++ b/pango/pango-attributes.c
@@ -1303,6 +1303,60 @@ pango_attr_show_new (PangoShowFlags flags)
}
/**
+ * pango_attr_word_new:
+ *
+ * Marks the range of the attribute as a single word.
+ *
+ * Note that this may require adjustments to word and
+ * sentence classification around the range.
+ *
+ * Return value: (transfer full): the newly allocated
+ * `PangoAttribute`, which should be freed with
+ * [method@Pango.Attribute.destroy]
+ *
+ * Since: 1.50
+ */
+PangoAttribute *
+pango_attr_word_new (void)
+{
+ static const PangoAttrClass klass = {
+ PANGO_ATTR_WORD,
+ pango_attr_int_copy,
+ pango_attr_int_destroy,
+ pango_attr_int_equal,
+ };
+
+ return pango_attr_int_new (&klass, 0);
+}
+
+/**
+ * pango_attr_sentence_new:
+ *
+ * Marks the range of the attribute as a single sentence.
+ *
+ * Note that this may require adjustments to word and
+ * sentence classification around the range.
+ *
+ * Return value: (transfer full): the newly allocated
+ * `PangoAttribute`, which should be freed with
+ * [method@Pango.Attribute.destroy]
+ *
+ * Since: 1.50
+ */
+PangoAttribute *
+pango_attr_sentence_new (void)
+{
+ static const PangoAttrClass klass = {
+ PANGO_ATTR_SENTENCE,
+ pango_attr_int_copy,
+ pango_attr_int_destroy,
+ pango_attr_int_equal,
+ };
+
+ return pango_attr_int_new (&klass, 0);
+}
+
+/**
* pango_attr_overline_new:
* @overline: the overline style
*
@@ -1477,6 +1531,8 @@ pango_attribute_as_int (PangoAttribute *attr)
case PANGO_ATTR_OVERLINE:
case PANGO_ATTR_ABSOLUTE_LINE_HEIGHT:
case PANGO_ATTR_TEXT_TRANSFORM:
+ case PANGO_ATTR_WORD:
+ case PANGO_ATTR_SENTENCE:
return (PangoAttrInt *)attr;
default:
diff --git a/pango/pango-attributes.h b/pango/pango-attributes.h
index 86826b62..613aa021 100644
--- a/pango/pango-attributes.h
+++ b/pango/pango-attributes.h
@@ -77,6 +77,8 @@ typedef struct _PangoAttrFontFeatures PangoAttrFontFeatures;
* @PANGO_ATTR_OVERLINE_COLOR: overline color ([struct@Pango.AttrColor]). Since 1.46
* @PANGO_ATTR_LINE_HEIGHT: line height factor ([struct@Pango.AttrFloat]). Since: 1.50
* @PANGO_ATTR_ABSOLUTE_LINE_HEIGHT: line height ([struct@Pango.AttrInt]). Since: 1.50
+ * @PANGO_ATTR_WORD: override segmentation to classify the range of the attribute as a single word ([struct@Pango.AttrInt]). Since 1.50
+ * @PANGO_ATTR_SENTENCE: override segmentation to classify the range of the attribute as a single sentence ([struct@Pango.AttrInt]). Since 1.50
*
* The `PangoAttrType` distinguishes between different types of attributes.
*
@@ -121,6 +123,8 @@ typedef enum
PANGO_ATTR_LINE_HEIGHT, /* PangoAttrFloat */
PANGO_ATTR_ABSOLUTE_LINE_HEIGHT, /* PangoAttrInt */
PANGO_ATTR_TEXT_TRANSFORM, /* PangoAttrInt */
+ PANGO_ATTR_WORD, /* PangoAttrInt */
+ PANGO_ATTR_SENTENCE, /* PangoAttrInt */
} PangoAttrType;
/**
@@ -538,6 +542,12 @@ PANGO_AVAILABLE_IN_1_38
PangoAttribute * pango_attr_background_alpha_new (guint16 alpha);
PANGO_AVAILABLE_IN_1_44
PangoAttribute * pango_attr_allow_breaks_new (gboolean allow_breaks);
+
+PANGO_AVAILABLE_IN_1_50
+PangoAttribute * pango_attr_word_new (void);
+PANGO_AVAILABLE_IN_1_50
+PangoAttribute * pango_attr_sentence_new (void);
+
PANGO_AVAILABLE_IN_1_44
PangoAttribute * pango_attr_insert_hyphens_new (gboolean insert_hyphens);
PANGO_AVAILABLE_IN_1_46
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index 85f12f0a..ad56e8f9 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -4323,6 +4323,8 @@ affects_break_or_shape (PangoAttribute *attr,
{
/* Affects breaks */
case PANGO_ATTR_ALLOW_BREAKS:
+ case PANGO_ATTR_WORD:
+ case PANGO_ATTR_SENTENCE:
/* Affects shaping */
case PANGO_ATTR_INSERT_HYPHENS:
case PANGO_ATTR_FONT_FEATURES:
diff --git a/pango/pango-markup.c b/pango/pango-markup.c
index 22064103..a9df8ed0 100644
--- a/pango/pango-markup.c
+++ b/pango/pango-markup.c
@@ -1230,6 +1230,7 @@ span_parse_func (MarkupData *md G_GNUC_UNUSED,
const char *show = NULL;
const char *line_height = NULL;
const char *text_transform = NULL;
+ const char *segment = NULL;
g_markup_parse_context_get_position (context,
&line_number, &char_number);
@@ -1297,6 +1298,7 @@ span_parse_func (MarkupData *md G_GNUC_UNUSED,
CHECK_ATTRIBUTE (strikethrough);
CHECK_ATTRIBUTE (strikethrough_color);
CHECK_ATTRIBUTE (style);
+ CHECK_ATTRIBUTE (segment);
break;
case 't':
CHECK_ATTRIBUTE (text_transform);
@@ -1712,7 +1714,7 @@ span_parse_func (MarkupData *md G_GNUC_UNUSED,
gboolean b = FALSE;
if (!span_parse_boolean ("allow_breaks", allow_breaks, &b, line_number, error))
- goto error;
+ goto error;
add_attribute (tag, pango_attr_allow_breaks_new (b));
}
@@ -1727,6 +1729,25 @@ span_parse_func (MarkupData *md G_GNUC_UNUSED,
add_attribute (tag, pango_attr_insert_hyphens_new (b));
}
+ if (G_UNLIKELY (segment))
+ {
+ if (strcmp (segment, "word") == 0)
+ add_attribute (tag, pango_attr_word_new ());
+ else if (strcmp (segment, "sentence") == 0)
+ add_attribute (tag, pango_attr_sentence_new ());
+ else
+ {
+ g_set_error (error,
+ G_MARKUP_ERROR,
+ G_MARKUP_ERROR_INVALID_CONTENT,
+ _("Value of 'segment' attribute on <span> tag on line %d "
+ "could not be parsed; should be one of 'word' or "
+ "'sentence', not '%s'"),
+ line_number, segment);
+ goto error;
+ }
+ }
+
return TRUE;
error:
diff --git a/tests/breaks/five.break b/tests/breaks/five.break
new file mode 100644
index 00000000..c9a192dd
--- /dev/null
+++ b/tests/breaks/five.break
@@ -0,0 +1 @@
+<span segment='word'>ab
cd</span>
diff --git a/tests/breaks/five.expected b/tests/breaks/five.expected
new file mode 100644
index 00000000..17ce3a58
--- /dev/null
+++ b/tests/breaks/five.expected
@@ -0,0 +1,7 @@
+Text: ⁦a⁩ ⁦b⁩ [0x2028] ⁦c⁩ ⁦d⁩ [0x0a]
+Breaks: lc c c Lc c c c
+Whitespace: w w w
+Sentences: bs e bs e b
+Words: bs be b
+Graphemes: b b b b b b b
+Hyphens: i i
diff --git a/tests/breaks/fourteen.break b/tests/breaks/fourteen.break
new file mode 100644
index 00000000..e270276d
--- /dev/null
+++ b/tests/breaks/fourteen.break
@@ -0,0 +1,2 @@
+# item-spanning line break attrs
+<span allow_breaks='n'>ab<span segment='word'>c α</span>βγ</span>
diff --git a/tests/breaks/fourteen.expected b/tests/breaks/fourteen.expected
new file mode 100644
index 00000000..c8a22617
--- /dev/null
+++ b/tests/breaks/fourteen.expected
@@ -0,0 +1,7 @@
+Text: ⁦a⁩ ⁦b⁩ ⁦c⁩ [ ] ⁦α⁩ ⁦β⁩ ⁦γ⁩ [0x0a]
+Breaks: c l l c c
+Whitespace: x w w
+Sentences: bs e b
+Words: bs bse bse be b
+Graphemes: b b b b b b b b b
+Hyphens: i i i i
diff --git a/tests/breaks/thirteen.break b/tests/breaks/thirteen.break
new file mode 100644
index 00000000..a1e5cacf
--- /dev/null
+++ b/tests/breaks/thirteen.break
@@ -0,0 +1,2 @@
+# some line breaks remain in words
+a <span segment='word'>ab​sp​ de­fg</span> b
diff --git a/tests/breaks/thirteen.expected b/tests/breaks/thirteen.expected
new file mode 100644
index 00000000..70742cff
--- /dev/null
+++ b/tests/breaks/thirteen.expected
@@ -0,0 +1,7 @@
+Text: ⁦a⁩ [ ] ⁦a⁩ ⁦b⁩ [0x200b] ⁦s⁩ ⁦p⁩ [0x200b] [ ] [ ] ⁦d⁩ ⁦e⁩ [0xad] ⁦f⁩ ⁦g⁩ [ ] ⁦b⁩ [0x0a]
+Breaks: c c lc c c lc c c c c lc c c lc c c lc c c
+Whitespace: x x x x w w
+Sentences: bs e b
+Words: bs be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i
diff --git a/tests/breaks/twelve.break b/tests/breaks/twelve.break
new file mode 100644
index 00000000..a1659be4
--- /dev/null
+++ b/tests/breaks/twelve.break
@@ -0,0 +1,2 @@
+# test line break attributes
+the file <span segment='word'>/path/</span><span segment='word'>to/</span><span segment='word'>my/</span>home is cursed.
diff --git a/tests/breaks/twelve.expected b/tests/breaks/twelve.expected
new file mode 100644
index 00000000..63e14e64
--- /dev/null
+++ b/tests/breaks/twelve.expected
@@ -0,0 +1,7 @@
+Text: ⁦t⁩ ⁦h⁩ ⁦e⁩ [ ] ⁦f⁩ ⁦i⁩ ⁦l⁩ ⁦e⁩ [ ] ⁦/⁩ ⁦p⁩ ⁦a⁩ ⁦t⁩ ⁦h⁩ ⁦/⁩ ⁦t⁩ ⁦o⁩ ⁦/⁩ ⁦m⁩ ⁦y⁩ ⁦/⁩ ⁦h⁩ ⁦o⁩ ⁦m⁩ ⁦e⁩ [ ] ⁦i⁩ ⁦s⁩ [ ] ⁦c⁩ ⁦u⁩ ⁦r⁩ ⁦s⁩ ⁦e⁩ ⁦d⁩ ⁦.⁩ [0x0a]
+Breaks: c c c c lc c c c c lc c c c c c lc c c lc c c lc c c c c lc c c lc c c c c c c c c
+Whitespace: x x x x w w
+Sentences: bs e b
+Words: bs be bs be bs bse bs bse be bs be bs be b b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i i i i i i i
diff --git a/tests/test-common.c b/tests/test-common.c
index 011b2eef..0e3719e7 100644
--- a/tests/test-common.c
+++ b/tests/test-common.c
@@ -144,6 +144,8 @@ print_attribute (PangoAttribute *attr, GString *string)
case PANGO_ATTR_SHOW:
case PANGO_ATTR_TEXT_TRANSFORM:
case PANGO_ATTR_ABSOLUTE_LINE_HEIGHT:
+ case PANGO_ATTR_WORD:
+ case PANGO_ATTR_SENTENCE:
g_string_append_printf (string, "%d", ((PangoAttrInt *)attr)->value);
break;
case PANGO_ATTR_FONT_DESC:
diff --git a/tests/testattributes.c b/tests/testattributes.c
index f950a204..b9dcc1b3 100644
--- a/tests/testattributes.c
+++ b/tests/testattributes.c
@@ -75,6 +75,8 @@ test_attributes_basic (void)
test_copy (pango_attr_text_transform_new (PANGO_TEXT_TRANSFORM_UPPERCASE));
test_copy (pango_attr_line_height_new (1.5));
test_copy (pango_attr_line_height_new_absolute (3000));
+ test_copy (pango_attr_word_new ());
+ test_copy (pango_attr_sentence_new ());
}
static void
@@ -125,7 +127,7 @@ test_binding (PangoAttribute *attr)
INVALID, LANGUAGE, STRING, INT, INT, INT, INT, SIZE, FONT_DESC, COLOR,
COLOR, INT, INT, INT, SHAPE, FLOAT, INT, INT, COLOR, COLOR, SIZE,
INT, INT, FONT_FEATURES, INT, INT, INT, INT, INT, INT, COLOR, FLOAT,
- INT, INT, INT, INT
+ INT, INT, INT, INT, INT, INT
};
switch (attr_base[attr->klass->type])
@@ -207,6 +209,8 @@ test_binding_helpers (void)
test_binding (pango_attr_text_transform_new (PANGO_TEXT_TRANSFORM_UPPERCASE));
test_binding (pango_attr_line_height_new (1.5));
test_binding (pango_attr_line_height_new_absolute (3000));
+ test_binding (pango_attr_word_new ());
+ test_binding (pango_attr_sentence_new ());
}
static void
@@ -1213,8 +1217,8 @@ test_merge2 (void)
pango_attr_list_unref (list);
}
-/* This only prints rise, size and scale, which are the
- * only relevant attributes in the test that uses this
+/* This only prints rise, size, scale, allow_breaks and line_break,
+ * which are the only relevant attributes in the tests that use this
* function.
*/
static void
@@ -1240,6 +1244,12 @@ print_tags_for_attributes (PangoAttrIterator *iter,
g_string_append_printf (s, "[%d, %d]scale=%f\n",
attr->start_index, attr->end_index,
((PangoAttrFloat*)attr)->value);
+
+ attr = pango_attr_iterator_get (iter, PANGO_ATTR_ALLOW_BREAKS);
+ if (attr)
+ g_string_append_printf (s, "[%d, %d]allow_breaks=%d\n",
+ attr->start_index, attr->end_index,
+ ((PangoAttrInt*)attr)->value);
}
static void