summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-08-25 04:52:39 +0000
committerMatthias Clasen <mclasen@redhat.com>2021-08-25 04:52:39 +0000
commit3aee7615e9a123ae750e49e9864bdaa4b267cdbb (patch)
treea6fd68a40846f41e2dfd57c14e895e6f4b78a66b
parentd3677f9f89181a254b57fe972257bd146f443899 (diff)
parent8fda48b39e7379348d98effb6b94ad1f83caab82 (diff)
downloadpango-3aee7615e9a123ae750e49e9864bdaa4b267cdbb.tar.gz
Merge branch 'hyphen-log-attr' into 'main'
Add hyphens to log attrs Closes #603 See merge request GNOME/pango!436
-rw-r--r--pango/break.c124
-rw-r--r--pango/pango-break.h6
-rw-r--r--pango/pango-layout.c179
-rw-r--r--pango/shape.c49
-rw-r--r--tests/breaks/eleven.expected13
-rw-r--r--tests/breaks/fifteen.break1
-rw-r--r--tests/breaks/fifteen.expected7
-rw-r--r--tests/breaks/four.expected13
-rw-r--r--tests/breaks/nine.expected13
-rw-r--r--tests/breaks/one.expected1
-rw-r--r--tests/breaks/sixteen.break1
-rw-r--r--tests/breaks/sixteen.expected7
-rw-r--r--tests/breaks/ten.expected13
-rw-r--r--tests/breaks/three.expected13
-rw-r--r--tests/breaks/two.expected1
-rw-r--r--tests/layouts/valid-17.expected2
-rw-r--r--tests/test-break.c31
-rw-r--r--tools/gen-script-for-lang.c2
18 files changed, 275 insertions, 201 deletions
diff --git a/pango/break.c b/pango/break.c
index c55d5f22..d348f9b8 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -249,6 +249,8 @@ default_break (const char *text,
gint last_sentence_start = -1;
gint last_non_space = -1;
+ gboolean prev_space_or_hyphen;
+
gboolean almost_done = FALSE;
gboolean done = FALSE;
@@ -261,6 +263,7 @@ default_break (const char *text,
prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_wc = 0;
prev_jamo = NO_JAMO;
+ prev_space_or_hyphen = FALSE;
if (length == 0 || *text == '\0')
{
@@ -291,6 +294,8 @@ default_break (const char *text,
/* Emoji extended pictographics */
gboolean is_Extended_Pictographic;
+ PangoScript script;
+
wc = next_wc;
break_type = next_break_type;
@@ -533,17 +538,16 @@ default_break (const char *text,
prev_GB_type = GB_type;
}
+ script = (PangoScript)g_unichar_get_script (wc);
+
/* ---- UAX#29 Word Boundaries ---- */
{
is_word_boundary = FALSE;
if (is_grapheme_boundary ||
G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) /* Rules WB3 and WB4 */
{
- PangoScript script;
WordBreakType WB_type;
- script = (PangoScript)g_unichar_get_script (wc);
-
/* Find the WordBreakType of wc */
WB_type = WB_Other;
@@ -1552,7 +1556,78 @@ default_break (const char *text,
attrs[i - 1].is_white) {
last_sentence_start++;
}
+ }
+
+ /* --- Hyphens --- */
+
+ {
+ gboolean insert_hyphens;
+ gboolean space_or_hyphen = FALSE;
+
+ attrs[i].break_inserts_hyphen = FALSE;
+ attrs[i].break_removes_preceding = FALSE;
+
+ switch ((int)script)
+ {
+ case PANGO_SCRIPT_COMMON:
+ case PANGO_SCRIPT_HAN:
+ case PANGO_SCRIPT_HANGUL:
+ case PANGO_SCRIPT_HIRAGANA:
+ case PANGO_SCRIPT_KATAKANA:
+ insert_hyphens = FALSE;
+ break;
+ default:
+ insert_hyphens = TRUE;
+ break;
+ }
+ switch ((int)type)
+ {
+ case G_UNICODE_SPACE_SEPARATOR:
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ space_or_hyphen = TRUE;
+ break;
+ case G_UNICODE_CONTROL:
+ if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f')
+ space_or_hyphen = TRUE;
+ break;
+ default:
+ break;
+ }
+
+ if (!space_or_hyphen)
+ {
+ if (wc == '-' || /* Hyphen-minus */
+ wc == 0x058a || /* Armenian hyphen */
+ wc == 0x1400 || /* Canadian syllabics hyphen */
+ wc == 0x1806 || /* Mongolian todo hyphen */
+ wc == 0x2010 || /* Hyphen */
+ wc == 0x2e17 || /* Double oblique hyphen */
+ wc == 0x2e40 || /* Double hyphen */
+ wc == 0x30a0 || /* Katakana-Hiragana double hyphen */
+ wc == 0xfe63 || /* Small hyphen-minus */
+ wc == 0xff0d) /* Fullwidth hyphen-minus */
+ space_or_hyphen = TRUE;
+ }
+
+ if (attrs[i].is_word_boundary)
+ attrs[i].break_inserts_hyphen = FALSE;
+ else if (prev_space_or_hyphen)
+ attrs[i].break_inserts_hyphen = FALSE;
+ else if (space_or_hyphen)
+ attrs[i].break_inserts_hyphen = FALSE;
+ else
+ attrs[i].break_inserts_hyphen = insert_hyphens;
+
+ if (prev_wc == 0x007C || /* Vertical Line */
+ prev_wc == 0x2027) /* Hyphenation point */
+ {
+ attrs[i].break_inserts_hyphen = TRUE;
+ attrs[i].break_removes_preceding = TRUE;
+ }
+
+ prev_space_or_hyphen = space_or_hyphen;
}
prev_wc = wc;
@@ -1633,22 +1708,21 @@ break_attrs (const char *text,
int log_attrs_len)
{
PangoAttrList list;
+ PangoAttrList hyphens;
PangoAttrIterator iter;
GSList *l;
_pango_attr_list_init (&list);
+ _pango_attr_list_init (&hyphens);
+
for (l = attributes; l; l = l->next)
{
PangoAttribute *attr = l->data;
if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
pango_attr_list_insert (&list, pango_attribute_copy (attr));
- }
-
- if (!_pango_attr_list_has_attributes (&list))
- {
- _pango_attr_list_destroy (&list);
- return FALSE;
+ else if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
+ pango_attr_list_insert (&hyphens, pango_attribute_copy (attr));
}
_pango_attr_list_get_iterator (&list, &iter);
@@ -1681,7 +1755,39 @@ break_attrs (const char *text,
} while (pango_attr_iterator_next (&iter));
_pango_attr_iterator_destroy (&iter);
+
+ _pango_attr_list_get_iterator (&hyphens, &iter);
+ do {
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS);
+
+ if (attr && ((PangoAttrInt*)attr)->value == 0)
+ {
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ pango_attr_iterator_range (&iter, &start, &end);
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ {
+ if (!log_attrs[pos].break_removes_preceding)
+ log_attrs[pos].break_inserts_hyphen = FALSE;
+ }
+ }
+ } while (pango_attr_iterator_next (&iter));
+
+ _pango_attr_iterator_destroy (&iter);
+
_pango_attr_list_destroy (&list);
+ _pango_attr_list_destroy (&hyphens);
return TRUE;
}
diff --git a/pango/pango-break.h b/pango/pango-break.h
index a8e6c5b9..5d791e27 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -72,6 +72,10 @@ G_BEGIN_DECLS
* This flag is particularly useful when selecting text word-by-word. This flag
* implements Unicode's [Word Boundaries](http://www.unicode.org/reports/tr29/)
* semantics. (Since: 1.22)
+ * @break_inserts_hyphen: when breaking lines before this char, insert a hyphen.
+ * Since: 1.50
+ * @break_removes_preceding: when breaking lines before this char, remove the
+ * preceding char. Since 1.50
*
* The `PangoLogAttr` structure stores information about the attributes of a
* single character.
@@ -91,6 +95,8 @@ struct _PangoLogAttr
guint backspace_deletes_character : 1;
guint is_expandable_space : 1;
guint is_word_boundary : 1;
+ guint break_inserts_hyphen : 1;
+ guint break_removes_preceding : 1;
};
PANGO_DEPRECATED_IN_1_44
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index ccebc557..85f12f0a 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -3595,7 +3595,6 @@ struct _ParaBreakState
int log_widths_offset; /* Offset into log_widths to the point corresponding
* to the remaining portion of the first item */
- int *need_hyphen; /* Insert a hyphen if breaking here ? */
int line_start_index; /* Start index (byte offset) of line in layout->text */
int line_start_offset; /* Character offset of line in layout->text */
@@ -3684,140 +3683,19 @@ insert_run (PangoLayoutLine *line,
state->glyphs = NULL;
g_free (state->log_widths);
state->log_widths = NULL;
- g_free (state->need_hyphen);
- state->need_hyphen = NULL;
}
line->runs = g_slist_prepend (line->runs, run);
line->length += run_item->length;
}
-static void
-get_need_hyphen (PangoItem *item,
- const char *text,
- int *need_hyphen)
-{
- int i;
- const char *p;
- gboolean prev_space;
- gboolean prev_hyphen;
- PangoAttrList attrs;
- PangoAttrIterator iter;
- GSList *l;
-
- _pango_attr_list_init (&attrs);
- for (l = item->analysis.extra_attrs; l; l = l->next)
- {
- PangoAttribute *attr = l->data;
- if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
- pango_attr_list_change (&attrs, pango_attribute_copy (attr));
- }
- _pango_attr_list_get_iterator (&attrs, &iter);
-
- prev_space = prev_hyphen = TRUE;
-
- for (i = 0, p = text + item->offset; i < item->num_chars; i++, p = g_utf8_next_char (p))
- {
- gunichar wc = g_utf8_get_char (p);
- gboolean space;
- gboolean hyphen;
- int start, end, pos;
- gboolean insert_hyphens = TRUE;
-
- pos = p - text;
- do {
- pango_attr_iterator_range (&iter, &start, &end);
- if (end > pos)
- break;
- } while (pango_attr_iterator_next (&iter));
-
- if (start <= pos && pos < end)
- {
- PangoAttribute *attr;
- attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS);
- if (attr)
- insert_hyphens = ((PangoAttrInt*)attr)->value;
-
- /* Some scripts don't use hyphen.*/
- switch (item->analysis.script)
- {
- case PANGO_SCRIPT_COMMON:
- case PANGO_SCRIPT_HAN:
- case PANGO_SCRIPT_HANGUL:
- case PANGO_SCRIPT_HIRAGANA:
- case PANGO_SCRIPT_KATAKANA:
- insert_hyphens = FALSE;
- break;
- default:
- break;
- }
- }
-
- switch ((int)g_unichar_type (wc))
- {
- case G_UNICODE_SPACE_SEPARATOR:
- case G_UNICODE_LINE_SEPARATOR:
- case G_UNICODE_PARAGRAPH_SEPARATOR:
- space = TRUE;
- break;
- case G_UNICODE_CONTROL:
- if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f')
- space = TRUE;
- else
- space = FALSE;
- break;
- default:
- space = FALSE;
- break;
- }
-
- if (wc == '-' || /* Hyphen-minus */
- wc == 0x058a || /* Armenian hyphen */
- wc == 0x1400 || /* Canadian syllabics hyphen */
- wc == 0x1806 || /* Mongolian todo hyphen */
- wc == 0x2010 || /* Hyphen */
- wc == 0x2027 || /* Hyphenation point */
- wc == 0x2e17 || /* Double oblique hyphen */
- wc == 0x2e40 || /* Double hyphen */
- wc == 0x30a0 || /* Katakana-Hiragana double hyphen */
- wc == 0xfe63 || /* Small hyphen-minus */
- wc == 0xff0d) /* Fullwidth hyphen-minus */
- hyphen = TRUE;
- else
- hyphen = FALSE;
-
- if (prev_space || space)
- need_hyphen[i] = FALSE;
- else if (prev_hyphen || hyphen)
- need_hyphen[i] = FALSE;
- else
- need_hyphen[i] = insert_hyphens;
-
- prev_space = space;
- prev_hyphen = hyphen;
- }
-
- need_hyphen[item->num_chars - 1] = FALSE;
-
- _pango_attr_iterator_destroy (&iter);
- _pango_attr_list_destroy (&attrs);
-}
-
static gboolean
break_needs_hyphen (PangoLayout *layout,
ParaBreakState *state,
int pos)
{
- if (state->log_widths_offset + pos == 0)
- return FALSE;
-
- if (layout->log_attrs[state->start_offset + pos].is_word_boundary)
- return FALSE;
-
- if (state->need_hyphen[state->log_widths_offset + pos - 1])
- return TRUE;
-
- return FALSE;
+ return layout->log_attrs[state->start_offset + pos].break_inserts_hyphen ||
+ layout->log_attrs[state->start_offset + pos].break_removes_preceding;
}
static int
@@ -3843,23 +3721,56 @@ find_hyphen_width (PangoItem *item)
}
static int
+find_char_width (PangoItem *item,
+ gunichar wc)
+{
+ hb_font_t *hb_font;
+ hb_codepoint_t glyph;
+
+ if (!item->analysis.font)
+ return 0;
+
+ hb_font = pango_font_get_hb_font (item->analysis.font);
+ if (hb_font_get_nominal_glyph (hb_font, wc, &glyph))
+ return hb_font_get_glyph_h_advance (hb_font, glyph);
+
+ return 0;
+}
+
+static inline void
+ensure_hyphen_width (ParaBreakState *state)
+{
+ if (state->hyphen_width < 0)
+ {
+ PangoItem *item = state->items->data;
+ state->hyphen_width = find_hyphen_width (item);
+ }
+}
+
+static int
find_break_extra_width (PangoLayout *layout,
ParaBreakState *state,
int pos)
{
/* Check whether to insert a hyphen */
- if (break_needs_hyphen (layout, state, pos))
+ if (layout->log_attrs[state->start_offset + pos].break_inserts_hyphen)
{
- if (state->hyphen_width < 0)
+ ensure_hyphen_width (state);
+
+ if (layout->log_attrs[state->start_offset + pos].break_removes_preceding)
{
PangoItem *item = state->items->data;
- state->hyphen_width = find_hyphen_width (item);
- }
+ gunichar wc;
- return state->hyphen_width;
+ wc = g_utf8_get_char (g_utf8_offset_to_pointer (layout->text, state->start_offset + pos - 1));
+
+ return state->hyphen_width - find_char_width (item, wc);
+ }
+ else
+ return state->hyphen_width;
}
- else
- return 0;
+
+ return 0;
}
#if 0
@@ -3923,7 +3834,6 @@ process_item (PangoLayout *layout,
state->glyphs = shape_run (line, state, item);
state->log_widths = NULL;
- state->need_hyphen = NULL;
state->log_widths_offset = 0;
processing_new_item = TRUE;
@@ -3980,8 +3890,6 @@ process_item (PangoLayout *layout,
PangoGlyphItem glyph_item = {item, state->glyphs};
state->log_widths = g_new (int, item->num_chars);
pango_glyph_item_get_logical_widths (&glyph_item, layout->text, state->log_widths);
- state->need_hyphen = g_new (int, item->num_chars);
- get_need_hyphen (item, layout->text, state->need_hyphen);
}
retry_break:
@@ -4082,8 +3990,6 @@ process_item (PangoLayout *layout,
state->glyphs = NULL;
g_free (state->log_widths);
state->log_widths = NULL;
- g_free (state->need_hyphen);
- state->need_hyphen = NULL;
return BREAK_NONE_FIT;
}
@@ -4629,7 +4535,6 @@ pango_layout_check_lines (PangoLayout *layout)
state.glyphs = NULL;
state.log_widths = NULL;
- state.need_hyphen = NULL;
/* for deterministic bug hunting's sake set everything! */
state.line_width = -1;
diff --git a/pango/shape.c b/pango/shape.c
index aeb9aa47..62c0f025 100644
--- a/pango/shape.c
+++ b/pango/shape.c
@@ -344,6 +344,7 @@ pango_hb_shape (const char *item_text,
int paragraph_length,
const PangoAnalysis *analysis,
PangoLogAttr *log_attrs,
+ int num_chars,
PangoGlyphString *glyphs,
PangoShapeFlags flags)
{
@@ -362,6 +363,7 @@ pango_hb_shape (const char *item_text,
unsigned int num_features = 0;
PangoGlyphInfo *infos;
PangoTextTransform transform;
+ int hyphen_index;
g_return_if_fail (analysis != NULL);
g_return_if_fail (analysis->font != NULL);
@@ -392,22 +394,35 @@ pango_hb_shape (const char *item_text,
hb_buffer_set_flags (hb_buffer, hb_buffer_flags);
hb_buffer_set_invisible_glyph (hb_buffer, PANGO_GLYPH_EMPTY);
+ if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN)
+ {
+ const char *p = paragraph_text + item_offset + item_length;
+ int last_char_len = p - g_utf8_prev_char (p);
+
+ hyphen_index = item_offset + item_length - last_char_len;
+
+ if (log_attrs[num_chars].break_removes_preceding)
+ item_length -= last_char_len;
+ }
+
+ /* Add pre-context */
+ hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0);
+
if (transform == PANGO_TEXT_TRANSFORM_NONE)
{
- hb_buffer_add_utf8 (hb_buffer, paragraph_text, paragraph_length, item_offset, item_length);
+ hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset + item_length, item_offset, item_length);
}
else
{
const char *p;
int i;
- /* Add pre-context */
- hb_buffer_add_utf8 (hb_buffer, paragraph_text, item_offset, item_offset, 0);
-
/* Transform the item text according to text transform.
* Note: we assume text transforms won't cross font boundaries
*/
- for (p = paragraph_text + item_offset, i = 0; p < paragraph_text + item_offset + item_length; p = g_utf8_next_char (p), i++)
+ for (p = paragraph_text + item_offset, i = 0;
+ p < paragraph_text + item_offset + item_length;
+ p = g_utf8_next_char (p), i++)
{
int index = p - paragraph_text;
gunichar ch = g_utf8_get_char (p);
@@ -447,26 +462,23 @@ pango_hb_shape (const char *item_text,
else
hb_buffer_add (hb_buffer, ch, index);
}
-
- /* Add post-context */
- hb_buffer_add_utf8 (hb_buffer, paragraph_text + item_offset + item_length, paragraph_length - (item_offset + item_length),
- item_offset + item_length, 0);
}
+ /* Add post-context */
+ hb_buffer_add_utf8 (hb_buffer, paragraph_text, paragraph_length, item_offset + item_length, 0);
+
if (analysis->flags & PANGO_ANALYSIS_FLAG_NEED_HYPHEN)
{
/* Insert either a Unicode or ASCII hyphen. We may
* want to look for script-specific hyphens here.
*/
- const char *p = paragraph_text + item_offset + item_length;
- int last_char_len = p - g_utf8_prev_char (p);
hb_codepoint_t glyph;
/* Note: We rely on hb_buffer_add clearing existing post-context */
if (hb_font_get_nominal_glyph (hb_font, 0x2010, &glyph))
- hb_buffer_add (hb_buffer, 0x2010, item_offset + item_length - last_char_len);
+ hb_buffer_add (hb_buffer, 0x2010, hyphen_index);
else if (hb_font_get_nominal_glyph (hb_font, '-', &glyph))
- hb_buffer_add (hb_buffer, '-', item_offset + item_length - last_char_len);
+ hb_buffer_add (hb_buffer, '-', hyphen_index);
}
pango_font_get_features (analysis->font, features, G_N_ELEMENTS (features), &num_features);
@@ -579,6 +591,7 @@ pango_shape_internal (const char *item_text,
int paragraph_length,
const PangoAnalysis *analysis,
PangoLogAttr *log_attrs,
+ int num_chars,
PangoGlyphString *glyphs,
PangoShapeFlags flags)
{
@@ -606,9 +619,8 @@ pango_shape_internal (const char *item_text,
pango_hb_shape (item_text, item_length,
paragraph_text, paragraph_length,
analysis,
- log_attrs,
- glyphs,
- flags);
+ log_attrs, num_chars,
+ glyphs, flags);
if (G_UNLIKELY (glyphs->num_glyphs == 0))
{
@@ -867,7 +879,7 @@ pango_shape_with_flags (const char *item_text,
{
pango_shape_internal (item_text, item_length,
paragraph_text, paragraph_length,
- analysis, NULL,
+ analysis, NULL, 0,
glyphs, flags);
}
@@ -906,7 +918,8 @@ pango_shape_item (PangoItem *item,
{
pango_shape_internal (paragraph_text + item->offset, item->length,
paragraph_text, paragraph_length,
- &item->analysis, log_attrs,
+ &item->analysis,
+ log_attrs, item->num_chars,
glyphs, flags);
}
diff --git a/tests/breaks/eleven.expected b/tests/breaks/eleven.expected
index 45c7ad01..f5b26708 100644
--- a/tests/breaks/eleven.expected
+++ b/tests/breaks/eleven.expected
@@ -1,6 +1,7 @@
-Text: ⁦❤⁩⁦️⁩⁦︎⁩⁦︎⁩ ⁦👨⁩[0x200d]⁦🦰⁩ ⁦👨⁩⁦🏿⁩[0x200d]⁦🦱⁩ ⁦0⁩⁦️⁩⁦⃣⁩ ⁦🏴⁩[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] ⁦🇩⁩⁦🇪⁩⁦️⁩ [0x0a]
-Breaks: c lc lc lc lc lc c c
-Whitespace: w w
-Sentences: bs e b
-Words: b b b bs be b b b
-Graphemes: b b b b b b b b
+Text: ⁦❤⁩ ⁦️⁩ ⁦︎⁩ ⁦︎⁩ ⁦👨⁩ [0x200d]⁦🦰⁩ ⁦👨⁩⁦🏿⁩ [0x200d]⁦🦱⁩ ⁦0⁩ ⁦️⁩ ⁦⃣⁩ ⁦🏴⁩[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f] ⁦🇩⁩⁦🇪⁩ ⁦️⁩ [0x0a]
+Breaks: c lc lc lc lc lc c c
+Whitespace: w w
+Sentences: bs e b
+Words: b b b bs be b b b
+Graphemes: b b b b b b b b
+Hyphens: i i i i i i i i
diff --git a/tests/breaks/fifteen.break b/tests/breaks/fifteen.break
new file mode 100644
index 00000000..452d9bb5
--- /dev/null
+++ b/tests/breaks/fifteen.break
@@ -0,0 +1 @@
+<span insert_hyphens='false'>one</span> two-three fo­ur
diff --git a/tests/breaks/fifteen.expected b/tests/breaks/fifteen.expected
new file mode 100644
index 00000000..3f5e4655
--- /dev/null
+++ b/tests/breaks/fifteen.expected
@@ -0,0 +1,7 @@
+Text: ⁦o⁩ ⁦n⁩ ⁦e⁩ [ ] ⁦t⁩ ⁦w⁩ ⁦o⁩ ⁦-⁩ ⁦t⁩ ⁦h⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩ [ ] ⁦f⁩ ⁦o⁩ [0xad] ⁦u⁩ ⁦r⁩ [0x0a]
+Breaks: c c c c lc c c c lc c c c c c lc c c lc c c c
+Whitespace: x x w w
+Sentences: bs e b
+Words: bs be bs be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i
diff --git a/tests/breaks/four.expected b/tests/breaks/four.expected
index 2f29d778..e94af24a 100644
--- a/tests/breaks/four.expected
+++ b/tests/breaks/four.expected
@@ -1,6 +1,7 @@
-Text: ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ห⁩ ⁦ร⁩⁦ื⁩ ⁦อ⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦ก⁩ ⁦ล⁩ ⁦า⁩ ⁦ง⁩ [ ] ⁦เ⁩ ⁦ป⁩⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ร⁩ ⁦า⁩ ⁦ช⁩ ⁦ก⁩ ⁦า⁩ ⁦ร⁩ ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦จ⁩ ⁦ำ⁩ ⁦ช⁩ ⁦า⁩ ⁦ต⁩⁦ิ⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦เ⁩ ⁦ป⁩⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩⁦ุ⁩⁦่⁩ ⁦ม⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ซ⁩⁦ึ⁩⁦่⁩ ⁦ง⁩ ⁦เ⁩ ⁦ป⁩⁦็⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩⁦ุ⁩⁦่⁩ ⁦ม⁩ ⁦ย⁩⁦่⁩ ⁦อ⁩ ⁦ย⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ข⁩ ⁦ร⁩⁦้⁩ ⁦า⁩ [ ] ⁦ไ⁩ ⁦ท⁩ [ ] ⁦ส⁩⁦ั⁩ ⁦น⁩ ⁦น⁩⁦ิ⁩ ⁦ษ⁩ ⁦ฐ⁩ ⁦า⁩ ⁦น⁩ ⁦ว⁩⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦น⁩⁦ี⁩⁦้⁩ ⁦ม⁩⁦ี⁩ ⁦ถ⁩⁦ิ⁩⁦่⁩ ⁦น⁩ ⁦ก⁩ ⁦ำ⁩ ⁦เ⁩ ⁦น⁩⁦ิ⁩ ⁦ด⁩ ⁦จ⁩ ⁦า⁩ ⁦ก⁩ ⁦ท⁩ ⁦า⁩ ⁦ง⁩ ⁦ต⁩ ⁦อ⁩ ⁦น⁩ ⁦ใ⁩ ⁦ต⁩⁦้⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦จ⁩⁦ี⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦น⁩⁦ั⁩ ⁦ก⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ศ⁩ ⁦า⁩ ⁦ส⁩ ⁦ต⁩ ⁦ร⁩⁦์⁩ ⁦บ⁩ ⁦า⁩ ⁦ง⁩ ⁦ส⁩⁦่⁩ ⁦ว⁩ ⁦น⁩ ⁦เ⁩ ⁦ส⁩ ⁦น⁩ ⁦อ⁩ ⁦ว⁩⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦น⁩⁦่⁩ ⁦า⁩ ⁦จ⁩ ⁦ะ⁩ ⁦ม⁩⁦ี⁩ ⁦ค⁩ ⁦ว⁩ ⁦า⁩ ⁦ม⁩ ⁦เ⁩ ⁦ช⁩⁦ื⁩⁦่⁩ ⁦อ⁩ ⁦ม⁩ ⁦โ⁩ ⁦ย⁩ ⁦ง⁩ ⁦ก⁩⁦ั⁩ ⁦บ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ [ ] ⁦เ⁩ ⁦อ⁩ ⁦เ⁩ ⁦ช⁩⁦ี⁩ ⁦ย⁩ ⁦ต⁩⁦ิ⁩ ⁦ก⁩ [ ] ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ ⁦น⁩⁦ี⁩ ⁦เ⁩ ⁦ซ⁩⁦ี⁩ ⁦ย⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦จ⁩⁦ี⁩ ⁦น⁩ [ ] ⁦ท⁩⁦ิ⁩ ⁦เ⁩ ⁦บ⁩ ⁦ต⁩ [0x0a]
-Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c c
-Whitespace: x x x x x x x x x x x x x w w
-Sentences: bs e b
-Words: bs b b b bse b b be bs b b be bs b b b bse b b bse b b b be bs b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bs b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bs b be bs b b b b b b bse b be bs b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bs b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bs b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bs b b b b bse b be bs b b b b bse b b b bse b bse b b b b b b b b be bs b b bse b b b b bse b b b bse b be bs b b b be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Text: ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ห⁩ ⁦ร⁩ ⁦ื⁩ ⁦อ⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦ก⁩ ⁦ล⁩ ⁦า⁩ ⁦ง⁩ [ ] ⁦เ⁩ ⁦ป⁩ ⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ร⁩ ⁦า⁩ ⁦ช⁩ ⁦ก⁩ ⁦า⁩ ⁦ร⁩ ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦จ⁩ ⁦ำ⁩ ⁦ช⁩ ⁦า⁩ ⁦ต⁩ ⁦ิ⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦เ⁩ ⁦ป⁩ ⁦็⁩ ⁦น⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩ ⁦ุ⁩ ⁦่⁩ ⁦ม⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ซ⁩ ⁦ึ⁩ ⁦่⁩ ⁦ง⁩ ⁦เ⁩ ⁦ป⁩ ⁦็⁩ ⁦น⁩ ⁦ก⁩ ⁦ล⁩ ⁦ุ⁩ ⁦่⁩ ⁦ม⁩ ⁦ย⁩ ⁦่⁩ ⁦อ⁩ ⁦ย⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ข⁩ ⁦ร⁩ ⁦้⁩ ⁦า⁩ [ ] ⁦ไ⁩ ⁦ท⁩ [ ] ⁦ส⁩ ⁦ั⁩ ⁦น⁩ ⁦น⁩ ⁦ิ⁩ ⁦ษ⁩ ⁦ฐ⁩ ⁦า⁩ ⁦น⁩ ⁦ว⁩ ⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ใ⁩ ⁦น⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦น⁩ ⁦ี⁩ ⁦้⁩ ⁦ม⁩ ⁦ี⁩ ⁦ถ⁩ ⁦ิ⁩ ⁦่⁩ ⁦น⁩ ⁦ก⁩ ⁦ำ⁩ ⁦เ⁩ ⁦น⁩ ⁦ิ⁩ ⁦ด⁩ ⁦จ⁩ ⁦า⁩ ⁦ก⁩ ⁦ท⁩ ⁦า⁩ ⁦ง⁩ ⁦ต⁩ ⁦อ⁩ ⁦น⁩ ⁦ใ⁩ ⁦ต⁩ ⁦้⁩ ⁦ข⁩ ⁦อ⁩ ⁦ง⁩ ⁦ป⁩ ⁦ร⁩ ⁦ะ⁩ ⁦เ⁩ ⁦ท⁩ ⁦ศ⁩ ⁦จ⁩ ⁦ี⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦น⁩ ⁦ั⁩ ⁦ก⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ศ⁩ ⁦า⁩ ⁦ส⁩ ⁦ต⁩ ⁦ร⁩ ⁦์⁩ ⁦บ⁩ ⁦า⁩ ⁦ง⁩ ⁦ส⁩ ⁦่⁩ ⁦ว⁩ ⁦น⁩ ⁦เ⁩ ⁦ส⁩ ⁦น⁩ ⁦อ⁩ ⁦ว⁩ ⁦่⁩ ⁦า⁩ [ ] ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦ไ⁩ ⁦ท⁩ ⁦ย⁩ ⁦น⁩ ⁦่⁩ ⁦า⁩ ⁦จ⁩ ⁦ะ⁩ ⁦ม⁩ ⁦ี⁩ ⁦ค⁩ ⁦ว⁩ ⁦า⁩ ⁦ม⁩ ⁦เ⁩ ⁦ช⁩ ⁦ื⁩ ⁦่⁩ ⁦อ⁩ ⁦ม⁩ ⁦โ⁩ ⁦ย⁩ ⁦ง⁩ ⁦ก⁩ ⁦ั⁩ ⁦บ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ [ ] ⁦เ⁩ ⁦อ⁩ ⁦เ⁩ ⁦ช⁩ ⁦ี⁩ ⁦ย⁩ ⁦ต⁩ ⁦ิ⁩ ⁦ก⁩ [ ] ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦อ⁩ ⁦อ⁩ ⁦ส⁩ ⁦โ⁩ ⁦ต⁩ ⁦ร⁩ ⁦น⁩ ⁦ี⁩ ⁦เ⁩ ⁦ซ⁩ ⁦ี⁩ ⁦ย⁩ ⁦น⁩ [ ] ⁦แ⁩ ⁦ล⁩ ⁦ะ⁩ ⁦ต⁩ ⁦ร⁩ ⁦ะ⁩ ⁦ก⁩ ⁦ู⁩ ⁦ล⁩ ⁦ภ⁩ ⁦า⁩ ⁦ษ⁩ ⁦า⁩ ⁦จ⁩ ⁦ี⁩ ⁦น⁩ [ ] ⁦ท⁩ ⁦ิ⁩ ⁦เ⁩ ⁦บ⁩ ⁦ต⁩ [0x0a]
+Breaks: c c c c lc c c c lc c c c lc c c c lc c c lc c c c c lc c c lc c c c lc c c c c c lc c c lc c c c lc c c c c lc c c lc c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c lc c lc c c lc c c c lc c lc c lc c c lc c c lc c c lc c c lc c c c c lc c c c lc c c c lc c c lc c c c c c c lc c c lc c c c lc c lc c c c c lc lc lc c lc c c c c lc c c lc c c lc c c lc c lc c c lc c c c c c lc c c lc c c lc c lc c c c c c c c c lc c c lc c c lc c c c lc c c lc c c c lc c c lc c lc c lc lc c c c lc c c c lc c c lc c lc c c c c lc c c c lc c lc c c c c lc c c c c lc c c lc c c c c lc c c c lc c lc c c c c c c c c c lc c c lc c c c c lc c c c lc c c lc c c c c c
+Whitespace: x x x x x x x x x x x x x w w
+Sentences: bs e b
+Words: bs b b b bse b b be bs b b be bs b b b bse b b bse b b b be bs b b bse b b b bse b b b b b bse b b bse b b b bse b b b b bse b b bse b b bse b b b b b bse b b be bs b b b bse b b bse b b bse b b b bse b bse b b bse b b b bse b bse b bse b b bse b b bse b b bse b b bse b b b b bse b b b bse b b be bs b be bs b b b b b b bse b be bs b b b bse b bse b b b b bse bse bse b bse b b b b bse b b bse b b bse b b bse b bse b b bse b b b b b bse b be bs b b bse b bse b b b b b b b b bse b b bse b b bse b b b bse b be bs b b b bse b b bse b bse b bse bse b b b bse b b b bse b b bse b bse b b b b bse b b b bse b bse b b b be bs b b b b bse b be bs b b b b bse b b b bse b bse b b b b b b b b be bs b b bse b b b b bse b b b bse b be bs b b b be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
diff --git a/tests/breaks/nine.expected b/tests/breaks/nine.expected
index 957f67e5..818a78aa 100644
--- a/tests/breaks/nine.expected
+++ b/tests/breaks/nine.expected
@@ -1,6 +1,7 @@
-Text: ⁦म⁩⁦ी⁩ [ ] ⁦क⁩⁦ा⁩ ⁦च⁩ [ ] ⁦ख⁩⁦ा⁩ ⁦ऊ⁩ [ ] ⁦श⁩ ⁦क⁩ ⁦त⁩⁦ो⁩ ⁦,⁩ [ ] ⁦म⁩ ⁦ल⁩⁦ा⁩ [ ] ⁦त⁩⁦े⁩ [ ] ⁦द⁩⁦ु⁩ ⁦ख⁩ ⁦त⁩ [ ] ⁦न⁩⁦ा⁩ ⁦ह⁩ [0x0a] ⁦म⁩⁦ै⁩⁦ं⁩ [ ] ⁦क⁩⁦ा⁩⁦ँ⁩ ⁦च⁩ [ ] ⁦ख⁩⁦ा⁩ [ ] ⁦स⁩ ⁦क⁩ ⁦त⁩⁦ा⁩ [ ] ⁦ह⁩⁦ू⁩⁦ँ⁩ ⁦,⁩ [ ] ⁦म⁩⁦ु⁩ ⁦झ⁩⁦े⁩ [ ] ⁦उ⁩ ⁦स⁩ [ ] ⁦स⁩⁦े⁩ [ ] ⁦क⁩⁦ो⁩ ⁦ई⁩ [ ] ⁦प⁩⁦ी⁩ ⁦ड⁩⁦ा⁩ [ ] ⁦न⁩ ⁦ह⁩⁦ी⁩⁦ं⁩ [ ] ⁦ह⁩⁦ो⁩ ⁦त⁩ [0x0a] ⁦ந⁩⁦ா⁩ ⁦ன⁩⁦்⁩ [ ] ⁦க⁩ ⁦ண⁩⁦்⁩ ⁦ண⁩⁦ா⁩ ⁦ட⁩⁦ி⁩ [ ] ⁦ச⁩⁦ா⁩ ⁦ப⁩⁦்⁩ ⁦ப⁩⁦ி⁩ ⁦ட⁩⁦ு⁩ ⁦வ⁩⁦ே⁩ ⁦ன⁩⁦்⁩ ⁦,⁩ [ ] ⁦அ⁩ ⁦த⁩ ⁦ன⁩⁦ா⁩ ⁦ல⁩⁦்⁩ [ ] ⁦எ⁩ ⁦ன⁩ ⁦க⁩⁦்⁩ ⁦க⁩⁦ு⁩ [ ] ⁦ஒ⁩ ⁦ர⁩⁦ு⁩ [ ] ⁦க⁩⁦ே⁩ ⁦ட⁩⁦ு⁩ ⁦ம⁩⁦்⁩ [ ] ⁦வ⁩ ⁦ர⁩⁦ா⁩ ⁦த⁩ [0x0a] ⁦ﻢ⁩ ⁦ﯾ⁩ ⁦ں⁩ [ ] ⁦ﮎ⁩ ⁦ﺎ⁩ ⁦ﻨ⁩ ⁦ﭼ⁩ [ ] ⁦ﮎ⁩ ⁦ھ⁩ ⁦ﺍ⁩ [ ] ⁦ﺲ⁩ ⁦ﮑ⁩ ⁦ﺗ⁩ ⁦ﺍ⁩ [ ] ⁦ہ⁩ ⁦ﻭ⁩ ⁦ں⁩ [ ] ⁦ﺍ⁩ ⁦ﻭ⁩ ⁦ﺭ⁩ [ ] ⁦ﻢ⁩ ⁦ﺟ⁩ ⁦ھ⁩ ⁦ے⁩ [ ] ⁦ﺖ⁩ ⁦ﮑ⁩ ⁦ﻠ⁩ ⁦ﯿ⁩ ⁦ﻓ⁩ [ ] ⁦ﻥ⁩ ⁦ہ⁩ ⁦ﯼ⁩ ⁦ں⁩ [ ] ⁦ہ⁩ ⁦ﻮ⁩ ⁦ﺘ⁩ ⁦ﯾ⁩ [ ] [0x0a] ⁦ﺰ⁩ ⁦ﻫ⁩ [ ] ⁦ﺶ⁩ ⁦ﻴ⁩ ⁦ﺸ⁩ ⁦ﻫ⁩ [ ] ⁦ﺥ⁩ ⁦ﻭ⁩ ⁦ړ⁩ ⁦ﻝ⁩ ⁦ې⁩ [ ] ⁦ﺶ⁩ ⁦ﻣ⁩ ⁦،⁩ [ ] ⁦ﻪ⁩ ⁦ﻐ⁩ ⁦ﻫ⁩ [ ] ⁦ﻡ⁩ ⁦ﺍ⁩ [ ] ⁦ﻦ⁩ ⁦ﻫ⁩ [ ] ⁦ﺥ⁩ ⁦ﻭ⁩ ⁦ږ⁩ ⁦ﻮ⁩ ⁦ﻳ⁩ [0x0a]
-Breaks: c c lc c c lc c c lc c c c c lc c c lc c lc c c c lc c c c c lc c c lc c lc c c c lc c c lc c c lc c c lc c lc c c lc c c lc c c lc c c c c c lc c c c c lc c c c c c c c lc c c c c lc c c c c lc c c lc c c c lc c c c c c c c lc c c c c lc c c c lc c c c c lc c c c lc c c c lc c c c c lc c c c c c lc c c c c lc c c c c c c c c lc c c c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c c c c
-Whitespace: x x x x x x x w x x x x x x x x x x x w x x x x x x x w x x x x x x x x x x w x x x x x x x w w
-Sentences: bs e bs e bs e bs e bs e b
-Words: bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be b bs be bs be bs be bs be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Text: ⁦म⁩ ⁦ी⁩ [ ] ⁦क⁩ ⁦ा⁩ ⁦च⁩ [ ] ⁦ख⁩ ⁦ा⁩ ⁦ऊ⁩ [ ] ⁦श⁩ ⁦क⁩ ⁦त⁩ ⁦ो⁩ ⁦,⁩ [ ] ⁦म⁩ ⁦ल⁩ ⁦ा⁩ [ ] ⁦त⁩ ⁦े⁩ [ ] ⁦द⁩ ⁦ु⁩ ⁦ख⁩ ⁦त⁩ [ ] ⁦न⁩ ⁦ा⁩ ⁦ह⁩ [0x0a] ⁦म⁩ ⁦ै⁩ ⁦ं⁩ [ ] ⁦क⁩ ⁦ा⁩ ⁦ँ⁩ ⁦च⁩ [ ] ⁦ख⁩ ⁦ा⁩ [ ] ⁦स⁩ ⁦क⁩ ⁦त⁩ ⁦ा⁩ [ ] ⁦ह⁩ ⁦ू⁩ ⁦ँ⁩ ⁦,⁩ [ ] ⁦म⁩ ⁦ु⁩ ⁦झ⁩ ⁦े⁩ [ ] ⁦उ⁩ ⁦स⁩ [ ] ⁦स⁩ ⁦े⁩ [ ] ⁦क⁩ ⁦ो⁩ ⁦ई⁩ [ ] ⁦प⁩ ⁦ी⁩ ⁦ड⁩ ⁦ा⁩ [ ] ⁦न⁩ ⁦ह⁩ ⁦ी⁩ ⁦ं⁩ [ ] ⁦ह⁩ ⁦ो⁩ ⁦त⁩ [0x0a] ⁦ந⁩ ⁦ா⁩ ⁦ன⁩ ⁦்⁩ [ ] ⁦க⁩ ⁦ண⁩ ⁦்⁩ ⁦ண⁩ ⁦ா⁩ ⁦ட⁩ ⁦ி⁩ [ ] ⁦ச⁩ ⁦ா⁩ ⁦ப⁩ ⁦்⁩ ⁦ப⁩ ⁦ி⁩ ⁦ட⁩ ⁦ு⁩ ⁦வ⁩ ⁦ே⁩ ⁦ன⁩ ⁦்⁩ ⁦,⁩ [ ] ⁦அ⁩ ⁦த⁩ ⁦ன⁩ ⁦ா⁩ ⁦ல⁩ ⁦்⁩ [ ] ⁦எ⁩ ⁦ன⁩ ⁦க⁩ ⁦்⁩ ⁦க⁩ ⁦ு⁩ [ ] ⁦ஒ⁩ ⁦ர⁩ ⁦ு⁩ [ ] ⁦க⁩ ⁦ே⁩ ⁦ட⁩ ⁦ு⁩ ⁦ம⁩ ⁦்⁩ [ ] ⁦வ⁩ ⁦ர⁩ ⁦ா⁩ ⁦த⁩ [0x0a] ⁦ﻢ⁩ ⁦ﯾ⁩ ⁦ں⁩ [ ] ⁦ﮎ⁩ ⁦ﺎ⁩ ⁦ﻨ⁩ ⁦ﭼ⁩ [ ] ⁦ﮎ⁩ ⁦ھ⁩ ⁦ﺍ⁩ [ ] ⁦ﺲ⁩ ⁦ﮑ⁩ ⁦ﺗ⁩ ⁦ﺍ⁩ [ ] ⁦ہ⁩ ⁦ﻭ⁩ ⁦ں⁩ [ ] ⁦ﺍ⁩ ⁦ﻭ⁩ ⁦ﺭ⁩ [ ] ⁦ﻢ⁩ ⁦ﺟ⁩ ⁦ھ⁩ ⁦ے⁩ [ ] ⁦ﺖ⁩ ⁦ﮑ⁩ ⁦ﻠ⁩ ⁦ﯿ⁩ ⁦ﻓ⁩ [ ] ⁦ﻥ⁩ ⁦ہ⁩ ⁦ﯼ⁩ ⁦ں⁩ [ ] ⁦ہ⁩ ⁦ﻮ⁩ ⁦ﺘ⁩ ⁦ﯾ⁩ [ ] [0x0a] ⁦ﺰ⁩ ⁦ﻫ⁩ [ ] ⁦ﺶ⁩ ⁦ﻴ⁩ ⁦ﺸ⁩ ⁦ﻫ⁩ [ ] ⁦ﺥ⁩ ⁦ﻭ⁩ ⁦ړ⁩ ⁦ﻝ⁩ ⁦ې⁩ [ ] ⁦ﺶ⁩ ⁦ﻣ⁩ ⁦،⁩ [ ] ⁦ﻪ⁩ ⁦ﻐ⁩ ⁦ﻫ⁩ [ ] ⁦ﻡ⁩ ⁦ﺍ⁩ [ ] ⁦ﻦ⁩ ⁦ﻫ⁩ [ ] ⁦ﺥ⁩ ⁦ﻭ⁩ ⁦ږ⁩ ⁦ﻮ⁩ ⁦ﻳ⁩ [0x0a]
+Breaks: c c lc c c lc c c lc c c c c lc c c lc c lc c c c lc c c c c lc c c lc c lc c c c lc c c lc c c lc c c lc c lc c c lc c c lc c c lc c c c c c lc c c c c lc c c c c c c c lc c c c c lc c c c c lc c c lc c c c lc c c c c c c c lc c c c c lc c c c lc c c c c lc c c c lc c c c lc c c c c lc c c c c c lc c c c c lc c c c c c c c c lc c c c c lc c c c c c lc c c c lc c c c lc c c lc c c lc c c c c c c
+Whitespace: x x x x x x x w x x x x x x x x x x x w x x x x x x x w x x x x x x x x x x w x x x x x x x w w
+Sentences: bs e bs e bs e bs e bs e b
+Words: bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be bs be b bs be bs be bs be bs be b bs be bs be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
diff --git a/tests/breaks/one.expected b/tests/breaks/one.expected
index bbae494d..cc257131 100644
--- a/tests/breaks/one.expected
+++ b/tests/breaks/one.expected
@@ -4,3 +4,4 @@ Whitespace: x x w w
Sentences: bs e bs e b
Words: bs be bs be bs be b bs be b
Graphemes: b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i
diff --git a/tests/breaks/sixteen.break b/tests/breaks/sixteen.break
new file mode 100644
index 00000000..51062618
--- /dev/null
+++ b/tests/breaks/sixteen.break
@@ -0,0 +1 @@
+hy‧phen|ation over­load
diff --git a/tests/breaks/sixteen.expected b/tests/breaks/sixteen.expected
new file mode 100644
index 00000000..75f20b9f
--- /dev/null
+++ b/tests/breaks/sixteen.expected
@@ -0,0 +1,7 @@
+Text: ⁦h⁩ ⁦y⁩ ⁦‧⁩ ⁦p⁩ ⁦h⁩ ⁦e⁩ ⁦n⁩ ⁦|⁩ ⁦a⁩ ⁦t⁩ ⁦i⁩ ⁦o⁩ ⁦n⁩ [ ] ⁦o⁩ ⁦v⁩ ⁦e⁩ ⁦r⁩ [0xad] ⁦l⁩ ⁦o⁩ ⁦a⁩ ⁦d⁩ [0x0a]
+Breaks: c c c lc c c c c lc c c c c c lc c c c c lc c c c c c
+Whitespace: x w w
+Sentences: bs e b
+Words: bs e s be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i ri i i i ri i i i i i i i i i i i
diff --git a/tests/breaks/ten.expected b/tests/breaks/ten.expected
index c1f8cc35..c9f91dc8 100644
--- a/tests/breaks/ten.expected
+++ b/tests/breaks/ten.expected
@@ -1,6 +1,7 @@
-Text: ⁦i⁩ ⁦ක⁩⁦්⁩[0x200d]⁦ක⁩ [ ] ⁦a⁩[0x200c] ⁦a⁩⁦்⁩ [0x0a]
-Breaks: c c c lc c c c
-Whitespace: x w w
-Sentences: bs e b
-Words: bs be bs be b
-Graphemes: b b b b b b b
+Text: ⁦i⁩ ⁦ක⁩ ⁦්⁩ [0x200d] ⁦ක⁩ [ ] ⁦a⁩ [0x200c] ⁦a⁩ ⁦்⁩ [0x0a]
+Breaks: c c c lc c c c
+Whitespace: x w w
+Sentences: bs e b
+Words: bs be bs be b
+Graphemes: b b b b b b b
+Hyphens: i i i i i i i
diff --git a/tests/breaks/three.expected b/tests/breaks/three.expected
index 7f078f4f..c2c89158 100644
--- a/tests/breaks/three.expected
+++ b/tests/breaks/three.expected
@@ -1,6 +1,7 @@
-Text: ⁦o⁩ ⁦n⁩ ⁦e⁩ [ ] ⁦t⁩ ⁦w⁩ ⁦o⁩ [0x2028] ⁦r⁩ ⁦e⁩ ⁦d⁩ [ ] ⁦b⁩ ⁦l⁩ ⁦u⁩ ⁦e⁩[0x200d] ⁦g⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩ ⁦n⁩ [0x0a]
-Breaks: c c c c lc c c c Lc c c c lc c c c c c c c c c c
-Whitespace: x w x w w
-Sentences: bs e bs e b
-Words: bs be bs be bs be bs be b
-Graphemes: b b b b b b b b b b b b b b b b b b b b b b b
+Text: ⁦o⁩ ⁦n⁩ ⁦e⁩ [ ] ⁦t⁩ ⁦w⁩ ⁦o⁩ [0x2028] ⁦r⁩ ⁦e⁩ ⁦d⁩ [ ] ⁦b⁩ ⁦l⁩ ⁦u⁩ ⁦e⁩ [0x200d] ⁦g⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩ ⁦n⁩ [0x0a]
+Breaks: c c c c lc c c c Lc c c c lc c c c c c c c c c c
+Whitespace: x w x w w
+Sentences: bs e bs e b
+Words: bs be bs be bs be bs be b
+Graphemes: b b b b b b b b b b b b b b b b b b b b b b b
+Hyphens: i i i i i i i i i i i i i i i
diff --git a/tests/breaks/two.expected b/tests/breaks/two.expected
index 3ec2d948..0280c52f 100644
--- a/tests/breaks/two.expected
+++ b/tests/breaks/two.expected
@@ -4,3 +4,4 @@ Whitespace: w w
Sentences: bs e b
Words: bs e s be b
Graphemes: b b b b b b b b b b b
+Hyphens: i i i i i i i
diff --git a/tests/layouts/valid-17.expected b/tests/layouts/valid-17.expected
index 4b3192fb..a2b7d494 100644
--- a/tests/layouts/valid-17.expected
+++ b/tests/layouts/valid-17.expected
@@ -28,7 +28,7 @@ i=3, index=17, paragraph-start=1, dir=ltr ''
--- runs
-i=1, index=0, chars=13, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'some|bla|bla|'
+i=1, index=0, chars=13, level=0, gravity=south, flags=4, font=OMITTED, script=latin, language=en-us, 'some|bla|bla|'
i=2, index=13, no run, line end
i=3, index=13, chars=3, level=0, gravity=south, flags=0, font=OMITTED, script=latin, language=en-us, 'bla'
i=4, index=16, no run, line end
diff --git a/tests/test-break.c b/tests/test-break.c
index 3fb5cdcd..f7fcf6ec 100644
--- a/tests/test-break.c
+++ b/tests/test-break.c
@@ -46,7 +46,7 @@ test_file (const gchar *filename, GString *string)
int len2;
char *p;
int i;
- GString *s1, *s2, *s3, *s4, *s5;
+ GString *s1, *s2, *s3, *s4, *s5, *s6;
int m;
char *test;
char *text;
@@ -109,6 +109,7 @@ test_file (const gchar *filename, GString *string)
s3 = g_string_new ("Sentences:");
s4 = g_string_new ("Words:");
s5 = g_string_new ("Graphemes:");
+ s6 = g_string_new ("Hyphens:");
g_string_append (string, "Text: ");
@@ -119,6 +120,7 @@ test_file (const gchar *filename, GString *string)
g_string_append_printf (s3, "%*s", (int)(m - s3->len), "");
g_string_append_printf (s4, "%*s", (int)(m - s4->len), "");
g_string_append_printf (s5, "%*s", (int)(m - s5->len), "");
+ g_string_append_printf (s6, "%*s", (int)(m - s6->len), "");
g_string_append_printf (string, "%*s", (int)(m - strlen ("Text: ")), "");
for (i = 0, p = text; i < len; i++, p = g_utf8_next_char (p))
@@ -129,6 +131,7 @@ test_file (const gchar *filename, GString *string)
int o = 0;
int s = 0;
int g = 0;
+ int h = 0;
if (log.is_mandatory_break)
{
@@ -195,7 +198,18 @@ test_file (const gchar *filename, GString *string)
g++;
}
- m = MAX (MAX (MAX (b, w), MAX (o, s)), g);
+ if (log.break_removes_preceding)
+ {
+ g_string_append (s6, "r");
+ h++;
+ }
+ if (log.break_inserts_hyphen)
+ {
+ g_string_append (s6, "i");
+ h++;
+ }
+
+ m = MAX (MAX (MAX (b, w), MAX (o, s)), MAX (g, h));
g_string_append_printf (string, "%*s", m, "");
g_string_append_printf (s1, "%*s", m - b, "");
@@ -203,6 +217,7 @@ test_file (const gchar *filename, GString *string)
g_string_append_printf (s3, "%*s", m - s, "");
g_string_append_printf (s4, "%*s", m - o, "");
g_string_append_printf (s5, "%*s", m - g, "");
+ g_string_append_printf (s6, "%*s", m - h, "");
if (i < len - 1)
{
@@ -215,6 +230,7 @@ test_file (const gchar *filename, GString *string)
g_string_append (s3, " ");
g_string_append (s4, " ");
g_string_append (s5, " ");
+ g_string_append (s6, " ");
}
else if (g_unichar_isgraph (ch) &&
!(g_unichar_type (ch) == G_UNICODE_LINE_SEPARATOR ||
@@ -228,6 +244,7 @@ test_file (const gchar *filename, GString *string)
g_string_append (s3, " ");
g_string_append (s4, " ");
g_string_append (s5, " ");
+ g_string_append (s6, " ");
}
else
{
@@ -238,6 +255,7 @@ test_file (const gchar *filename, GString *string)
g_string_append_printf (s3, "%*s", (int)strlen (str), "");
g_string_append_printf (s4, "%*s", (int)strlen (str), "");
g_string_append_printf (s5, "%*s", (int)strlen (str), "");
+ g_string_append_printf (s6, "%*s", (int)strlen (str), "");
g_free (str);
}
}
@@ -253,12 +271,15 @@ test_file (const gchar *filename, GString *string)
g_string_append (string, "\n");
g_string_append_len (string, s5->str, s5->len);
g_string_append (string, "\n");
+ g_string_append_len (string, s6->str, s6->len);
+ g_string_append (string, "\n");
g_string_free (s1, TRUE);
g_string_free (s2, TRUE);
g_string_free (s3, TRUE);
g_string_free (s4, TRUE);
g_string_free (s5, TRUE);
+ g_string_free (s6, TRUE);
g_object_unref (layout);
g_free (attrs);
@@ -366,9 +387,9 @@ main (int argc, char *argv[])
" l - line break s - word start\n"
" c - char break e - word end\n"
"\n"
- "Whitespace: Sentences:\n"
- " x - expandable space b - sentence boundary\n"
- " w - whitespace s - sentence start\n"
+ "Whitespace: Sentences:\n Hyphens"
+ " x - expandable space b - sentence boundary i - insert hyphen\n"
+ " w - whitespace s - sentence start r - remove preceding\n"
" e - sentence end\n");
return 0;
}
diff --git a/tools/gen-script-for-lang.c b/tools/gen-script-for-lang.c
index b3238f85..2fb0cbc7 100644
--- a/tools/gen-script-for-lang.c
+++ b/tools/gen-script-for-lang.c
@@ -44,7 +44,7 @@ typedef struct {
ScriptInfo scripts[MAX_SCRIPTS];
} LangInfo;
-static const char *get_script_name (PangoScript script)
+static const char *get_script_name (GUnicodeScript script)
{
static GEnumClass *class = NULL;
GEnumValue *value;