summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2019-07-07 15:32:57 +0000
committerMatthias Clasen <mclasen@redhat.com>2019-07-07 15:32:57 +0000
commit068aa03ceaa55537fc0144783c6446e03314c481 (patch)
treecf473f8e7c399bd844770165e5c82256a2840eaf
parent6a866f134b427ad20680c74313feb2e43b08784c (diff)
parent1e0fdcdb71e144ea6eab5d85edfd4fd0ae65a8e3 (diff)
downloadpango-068aa03ceaa55537fc0144783c6446e03314c481.tar.gz
Merge branch 'soft-hyphen' into 'master'
Soft hyphen See merge request GNOME/pango!68
-rw-r--r--pango/break.c9
-rw-r--r--pango/pango-break.h5
-rw-r--r--pango/pango-layout.c96
-rw-r--r--tests/breaks/one.break1
-rw-r--r--tests/breaks/one.expected5
-rw-r--r--tests/meson.build1
-rw-r--r--tests/test-break.c289
7 files changed, 403 insertions, 3 deletions
diff --git a/pango/break.c b/pango/break.c
index 11f4079e..13ccbdaf 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -984,6 +984,7 @@ pango_default_break (const gchar *text,
attrs[i].is_char_break = FALSE;
attrs[i].is_line_break = FALSE;
attrs[i].is_mandatory_break = FALSE;
+ attrs[i].is_soft_hyphen = FALSE;
/* Rule LB1:
assign a line breaking class to each code point of the input. */
@@ -1364,9 +1365,15 @@ pango_default_break (const gchar *text,
case BREAK_ALLOWED:
attrs[i].is_line_break = TRUE;
- break;
+ /* fall through */
case BREAK_ALREADY_HANDLED:
+ if (attrs[i].is_line_break)
+ {
+ /* After Soft Hyphen */
+ if (prev_wc == 0x00AD)
+ attrs[i].is_soft_hyphen = TRUE;
+ }
break;
default:
diff --git a/pango/pango-break.h b/pango/pango-break.h
index 92af390b..66d81631 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -79,6 +79,9 @@ G_BEGIN_DECLS
* This flag implements Unicode's
* <ulink url="http://www.unicode.org/reports/tr29/">Word
* Boundaries</ulink> semantics. (Since: 1.22)
+ * @is_soft_hyphen: is a line break due to a Soft Hyphen (0x00AD).
+ * This indicates a position where a hyphen should be inserted
+ * if the break is taken.
*
* The #PangoLogAttr structure stores information
* about the attributes of a single character.
@@ -86,7 +89,6 @@ G_BEGIN_DECLS
struct _PangoLogAttr
{
guint is_line_break : 1; /* Can break line in front of character */
-
guint is_mandatory_break : 1; /* Must break line in front of character */
guint is_char_break : 1; /* Can break here when doing char wrap */
@@ -130,6 +132,7 @@ struct _PangoLogAttr
/* Word boundary as defined by UAX#29 */
guint is_word_boundary : 1; /* is NOT in the middle of a word */
+ guint is_soft_hyphen : 1; /* line break due to a soft hyphen */
};
/* Determine information about cluster/word/line breaks in a string
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index 1ba614e1..baf81bd4 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -3376,6 +3376,66 @@ insert_run (PangoLayoutLine *line,
line->length += run_item->length;
}
+static void
+advance_iterator_to (PangoAttrIterator *iter,
+ int new_index)
+{
+ int start, end;
+
+ do
+ {
+ pango_attr_iterator_range (iter, &start, &end);
+ if (end > new_index)
+ break;
+ }
+ while (pango_attr_iterator_next (iter));
+}
+
+static PangoLayoutRun *
+create_hyphen_run (PangoLayout *layout,
+ PangoItem *item,
+ int offset)
+{
+ PangoLayoutRun *hyphen;
+ GList *items;
+ const char *hyphen_text = "-";
+ PangoAttrList *attrs;
+ PangoAttrIterator *iter;
+ GSList *list, *l;
+ PangoAttrList *run_attrs;
+
+ run_attrs = pango_attr_list_new ();
+
+ attrs = pango_layout_get_effective_attributes (layout);
+ iter = pango_attr_list_get_iterator (attrs);
+
+ advance_iterator_to (iter, offset);
+ list = pango_attr_iterator_get_attrs (iter);
+ for (l = list; l; l = l->next)
+ {
+ PangoAttribute *attr = l->data;
+ attr->start_index = 0;
+ attr->end_index = G_MAXINT;
+ pango_attr_list_insert (attrs, attr);
+ }
+ g_slist_free (list);
+
+ hyphen = g_slice_new (PangoGlyphItem);
+ hyphen->glyphs = pango_glyph_string_new ();
+ items = pango_itemize (layout->context, hyphen_text, 0, strlen (hyphen_text), attrs, NULL);
+ g_assert (items->next == NULL);
+ hyphen->item = items->data;
+ hyphen->item->offset = offset;
+ g_list_free (items);
+ pango_shape (hyphen_text, strlen (hyphen_text), &hyphen->item->analysis, hyphen->glyphs);
+
+ pango_attr_iterator_destroy (iter);
+ pango_attr_list_unref (attrs);
+ pango_attr_list_unref (run_attrs);
+
+ return hyphen;
+}
+
#if 0
# define DEBUG debug
void
@@ -3483,6 +3543,8 @@ process_item (PangoLayout *layout,
int break_num_chars = num_chars;
int break_width = width;
int orig_width = width;
+ int break_extra_width;
+ int hyphen_width;
gboolean retrying_with_char_breaks = FALSE;
if (processing_new_item)
@@ -3492,13 +3554,22 @@ process_item (PangoLayout *layout,
pango_glyph_item_get_logical_widths (&glyph_item, layout->text, state->log_widths);
}
+ {
+ PangoLayoutRun *run;
+
+ run = create_hyphen_run (layout, item, state->start_offset);
+ hyphen_width = pango_glyph_string_get_width (run->glyphs);
+ pango_glyph_item_free (run);
+ }
+
retry_break:
/* See how much of the item we can stuff in the line. */
width = 0;
+ break_extra_width = 0;
for (num_chars = 0; num_chars < item->num_chars; num_chars++)
{
- if (width > state->remaining_width && break_num_chars < item->num_chars)
+ if (width + break_extra_width > state->remaining_width && break_num_chars < item->num_chars)
break;
/* If there are no previous runs we have to take care to grab at least one char. */
@@ -3507,6 +3578,12 @@ process_item (PangoLayout *layout,
{
break_num_chars = num_chars;
break_width = width;
+
+ /* Check whether to insert a hyphen */
+ if (layout->log_attrs[state->start_offset + num_chars].is_soft_hyphen)
+ break_extra_width = hyphen_width;
+ else
+ break_extra_width = 0;
}
width += state->log_widths[state->log_widths_offset + num_chars];
@@ -3543,6 +3620,14 @@ process_item (PangoLayout *layout,
if (break_num_chars == item->num_chars)
{
insert_run (line, state, item, TRUE);
+ if (layout->log_attrs[state->start_offset + break_num_chars].is_soft_hyphen)
+ {
+ PangoLayoutRun *run;
+
+ run = create_hyphen_run (layout, item, state->start_offset + break_num_chars);
+ line->runs = g_slist_prepend (line->runs, run);
+ state->remaining_width -= pango_glyph_string_get_width (run->glyphs);
+ }
return BREAK_ALL_FIT;
}
@@ -3569,6 +3654,15 @@ process_item (PangoLayout *layout,
/* Shaped items should never be broken */
g_assert (!shape_set);
+ if (layout->log_attrs[state->start_offset + break_num_chars].is_soft_hyphen)
+ {
+ PangoLayoutRun *run;
+
+ run = create_hyphen_run (layout, item, state->start_offset + break_num_chars);
+ line->runs = g_slist_prepend (line->runs, run);
+ state->remaining_width -= pango_glyph_string_get_width (run->glyphs);
+ }
+
return BREAK_SOME_FIT;
}
}
diff --git a/tests/breaks/one.break b/tests/breaks/one.break
new file mode 100644
index 00000000..f30dc534
--- /dev/null
+++ b/tests/breaks/one.break
@@ -0,0 +1 @@
+abc/def ghi­jkl. Bla
diff --git a/tests/breaks/one.expected b/tests/breaks/one.expected
new file mode 100644
index 00000000..90ea8d18
--- /dev/null
+++ b/tests/breaks/one.expected
@@ -0,0 +1,5 @@
+Text: a b c / d e f [ ] g h i [0xad] j k l . [ ] B l a [0x0a]
+Breaks: c c c c lc c c c lc c c c lhc c c c c lc c c c Lc
+Whitespace: x x w w
+Words: s e s e s e s e
+Sentences: s e s e
diff --git a/tests/meson.build b/tests/meson.build
index 7df7a999..50176c3d 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -22,6 +22,7 @@ tests = [
[ 'testcolor' ],
[ 'testscript' ],
[ 'cxx-test', [ 'cxx-test.cpp' ] ],
+ [ 'test-break' ],
]
if build_pangoft2
diff --git a/tests/test-break.c b/tests/test-break.c
new file mode 100644
index 00000000..9f069c09
--- /dev/null
+++ b/tests/test-break.c
@@ -0,0 +1,289 @@
+/* Pango
+ * test-break.c: Test Pango line breaking
+ *
+ * Copyright (C) 2019 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <glib.h>
+#include <string.h>
+#include <locale.h>
+
+#ifndef G_OS_WIN32
+#include <unistd.h>
+#endif
+
+#include "config.h"
+#include <pango/pangocairo.h>
+#include "test-common.h"
+
+
+static PangoContext *context;
+
+static void
+test_file (const gchar *filename, GString *string)
+{
+ gchar *contents;
+ gsize length;
+ GError *error = NULL;
+ PangoLogAttr *attrs;
+ PangoLanguage *lang;
+ int len;
+ char *p;
+ int i;
+ GString *s1, *s2, *s3, *s4;
+ int m;
+
+ if (!g_file_get_contents (filename, &contents, &length, &error))
+ {
+ fprintf (stderr, "%s\n", error->message);
+ g_error_free (error);
+ return;
+ }
+
+ len = g_utf8_strlen (contents, -1) + 1;
+ attrs = g_new (PangoLogAttr, len);
+
+ lang = pango_language_from_string ("en");
+
+ pango_get_log_attrs (contents, length, -1, lang, attrs, len);
+
+ s1 = g_string_new ("Breaks: ");
+ s2 = g_string_new ("Whitespace: ");
+ s3 = g_string_new ("Words:");
+ s4 = g_string_new ("Sentences:");
+
+ g_string_append (string, "Text: ");
+
+ m = MAX (MAX (s1->len, s2->len), MAX (s3->len, s4->len));
+
+ g_string_append_printf (s1, "%*s", (int)(m - s1->len), "");
+ g_string_append_printf (s2, "%*s", (int)(m - s2->len), "");
+ g_string_append_printf (s3, "%*s", (int)(m - s3->len), "");
+ g_string_append_printf (s4, "%*s", (int)(m - s4->len), "");
+ g_string_append_printf (string, "%*s", (int)(m - strlen ("Text: ")), "");
+
+ for (i = 0, p = contents; i < len; i++, p = g_utf8_next_char (p))
+ {
+ PangoLogAttr log = attrs[i];
+ int b = 0;
+ int w = 0;
+ int o = 0;
+ int s = 0;
+
+ if (log.is_mandatory_break)
+ {
+ g_string_append (s1, "L");
+ b++;
+ }
+ else if (log.is_line_break)
+ {
+ g_string_append (s1, "l");
+ b++;
+ }
+ if (log.is_soft_hyphen)
+ {
+ g_string_append (s1, "h");
+ b++;
+ }
+ if (log.is_char_break)
+ {
+ g_string_append (s1, "c");
+ b++;
+ }
+
+ if (log.is_expandable_space)
+ {
+ g_string_append (s2, "x");
+ w++;
+ }
+ else if (log.is_white)
+ {
+ g_string_append (s2, "w");
+ w++;
+ }
+
+ if (log.is_word_start)
+ {
+ g_string_append (s3, "s");
+ o++;
+ }
+ if (log.is_word_end)
+ {
+ g_string_append (s3, "e");
+ o++;
+ }
+
+ if (log.is_sentence_start)
+ {
+ g_string_append (s4, "s");
+ s++;
+ }
+ if (log.is_sentence_end)
+ {
+ g_string_append (s4, "e");
+ s++;
+ }
+
+ m = MAX (MAX (b, w), MAX (o, s));
+
+ g_string_append_printf (string, "%*s", m, "");
+ g_string_append_printf (s1, "%*s", m - b, "");
+ g_string_append_printf (s2, "%*s", m - w, "");
+ g_string_append_printf (s3, "%*s", m - o, "");
+ g_string_append_printf (s4, "%*s", m - s, "");
+
+ if (i < len - 1)
+ {
+ gunichar ch = g_utf8_get_char (p);
+ if (ch == 0x20)
+ {
+ g_string_append (string, "[ ]");
+ g_string_append (s1, " ");
+ g_string_append (s2, " ");
+ g_string_append (s3, " ");
+ g_string_append (s4, " ");
+ }
+ else if (g_unichar_isprint (ch))
+ {
+ g_string_append_unichar (string, ch);
+ g_string_append (s1, " ");
+ g_string_append (s2, " ");
+ g_string_append (s3, " ");
+ g_string_append (s4, " ");
+ }
+ else
+ {
+ char *str = g_strdup_printf ("[%#04x]", ch);
+ g_string_append (string, str);
+ g_string_append_printf (s1, "%*s", (int)strlen (str), "");
+ g_string_append_printf (s2, "%*s", (int)strlen (str), "");
+ g_string_append_printf (s3, "%*s", (int)strlen (str), "");
+ g_string_append_printf (s4, "%*s", (int)strlen (str), "");
+ g_free (str);
+ }
+ }
+ }
+ g_string_append (string, "\n");
+ g_string_append_len (string, s1->str, s1->len);
+ g_string_append (string, "\n");
+ g_string_append_len (string, s2->str, s2->len);
+ g_string_append (string, "\n");
+ g_string_append_len (string, s3->str, s3->len);
+ g_string_append (string, "\n");
+ g_string_append_len (string, s4->str, s4->len);
+ g_string_append (string, "\n");
+
+ g_string_free (s1, TRUE);
+ g_string_free (s2, TRUE);
+ g_string_free (s3, TRUE);
+ g_string_free (s4, TRUE);
+
+ g_free (attrs);
+ g_free (contents);
+}
+
+static gchar *
+get_expected_filename (const gchar *filename)
+{
+ gchar *f, *p, *expected;
+
+ f = g_strdup (filename);
+ p = strstr (f, ".break");
+ if (p)
+ *p = 0;
+ expected = g_strconcat (f, ".expected", NULL);
+
+ g_free (f);
+
+ return expected;
+}
+
+static void
+test_break (gconstpointer d)
+{
+ const gchar *filename = d;
+ gchar *expected_file;
+ GError *error = NULL;
+ GString *dump;
+ gchar *diff;
+
+ expected_file = get_expected_filename (filename);
+
+ dump = g_string_sized_new (0);
+
+ test_file (filename, dump);
+
+ //diff = diff_with_file (expected_file, dump->str, dump->len, &error);
+ g_assert_no_error (error);
+
+ if (diff && diff[0])
+ {
+ g_printerr ("Contents don't match expected contents:\n%s", diff);
+ g_test_fail ();
+ g_free (diff);
+ }
+
+ g_string_free (dump, TRUE);
+ g_free (expected_file);
+}
+
+int
+main (int argc, char *argv[])
+{
+ GDir *dir;
+ GError *error = NULL;
+ const gchar *name;
+ gchar *path;
+
+ g_setenv ("LC_ALL", "en_US.UTF-8", TRUE);
+ setlocale (LC_ALL, "");
+
+ g_test_init (&argc, &argv, NULL);
+
+ context = pango_context_new ();
+
+ /* allow to easily generate expected output for new test cases */
+ if (argc > 1)
+ {
+ GString *string;
+
+ string = g_string_sized_new (0);
+ test_file (argv[1], string);
+ printf ("%s", string->str);
+
+ return 0;
+ }
+
+ path = g_test_build_filename (G_TEST_DIST, "breaks", NULL);
+ dir = g_dir_open (path, 0, &error);
+ g_free (path);
+ g_assert_no_error (error);
+ while ((name = g_dir_read_name (dir)) != NULL)
+ {
+ if (!strstr (name, "break"))
+ continue;
+
+ path = g_strdup_printf ("/break/%s", name);
+ g_test_add_data_func_full (path, g_test_build_filename (G_TEST_DIST, "breaks", name, NULL),
+ test_break, g_free);
+ g_free (path);
+ }
+ g_dir_close (dir);
+
+ return g_test_run ();
+}