diff options
author | Matthias Clasen <mclasen@redhat.com> | 2019-07-07 15:32:57 +0000 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2019-07-07 15:32:57 +0000 |
commit | 068aa03ceaa55537fc0144783c6446e03314c481 (patch) | |
tree | cf473f8e7c399bd844770165e5c82256a2840eaf | |
parent | 6a866f134b427ad20680c74313feb2e43b08784c (diff) | |
parent | 1e0fdcdb71e144ea6eab5d85edfd4fd0ae65a8e3 (diff) | |
download | pango-068aa03ceaa55537fc0144783c6446e03314c481.tar.gz |
Merge branch 'soft-hyphen' into 'master'
Soft hyphen
See merge request GNOME/pango!68
-rw-r--r-- | pango/break.c | 9 | ||||
-rw-r--r-- | pango/pango-break.h | 5 | ||||
-rw-r--r-- | pango/pango-layout.c | 96 | ||||
-rw-r--r-- | tests/breaks/one.break | 1 | ||||
-rw-r--r-- | tests/breaks/one.expected | 5 | ||||
-rw-r--r-- | tests/meson.build | 1 | ||||
-rw-r--r-- | tests/test-break.c | 289 |
7 files changed, 403 insertions, 3 deletions
diff --git a/pango/break.c b/pango/break.c index 11f4079e..13ccbdaf 100644 --- a/pango/break.c +++ b/pango/break.c @@ -984,6 +984,7 @@ pango_default_break (const gchar *text, attrs[i].is_char_break = FALSE; attrs[i].is_line_break = FALSE; attrs[i].is_mandatory_break = FALSE; + attrs[i].is_soft_hyphen = FALSE; /* Rule LB1: assign a line breaking class to each code point of the input. */ @@ -1364,9 +1365,15 @@ pango_default_break (const gchar *text, case BREAK_ALLOWED: attrs[i].is_line_break = TRUE; - break; + /* fall through */ case BREAK_ALREADY_HANDLED: + if (attrs[i].is_line_break) + { + /* After Soft Hyphen */ + if (prev_wc == 0x00AD) + attrs[i].is_soft_hyphen = TRUE; + } break; default: diff --git a/pango/pango-break.h b/pango/pango-break.h index 92af390b..66d81631 100644 --- a/pango/pango-break.h +++ b/pango/pango-break.h @@ -79,6 +79,9 @@ G_BEGIN_DECLS * This flag implements Unicode's * <ulink url="http://www.unicode.org/reports/tr29/">Word * Boundaries</ulink> semantics. (Since: 1.22) + * @is_soft_hyphen: is a line break due to a Soft Hyphen (0x00AD). + * This indicates a position where a hyphen should be inserted + * if the break is taken. * * The #PangoLogAttr structure stores information * about the attributes of a single character. @@ -86,7 +89,6 @@ G_BEGIN_DECLS struct _PangoLogAttr { guint is_line_break : 1; /* Can break line in front of character */ - guint is_mandatory_break : 1; /* Must break line in front of character */ guint is_char_break : 1; /* Can break here when doing char wrap */ @@ -130,6 +132,7 @@ struct _PangoLogAttr /* Word boundary as defined by UAX#29 */ guint is_word_boundary : 1; /* is NOT in the middle of a word */ + guint is_soft_hyphen : 1; /* line break due to a soft hyphen */ }; /* Determine information about cluster/word/line breaks in a string diff --git a/pango/pango-layout.c b/pango/pango-layout.c index 1ba614e1..baf81bd4 100644 --- a/pango/pango-layout.c +++ b/pango/pango-layout.c @@ -3376,6 +3376,66 @@ insert_run (PangoLayoutLine *line, line->length += run_item->length; } +static void +advance_iterator_to (PangoAttrIterator *iter, + int new_index) +{ + int start, end; + + do + { + pango_attr_iterator_range (iter, &start, &end); + if (end > new_index) + break; + } + while (pango_attr_iterator_next (iter)); +} + +static PangoLayoutRun * +create_hyphen_run (PangoLayout *layout, + PangoItem *item, + int offset) +{ + PangoLayoutRun *hyphen; + GList *items; + const char *hyphen_text = "-"; + PangoAttrList *attrs; + PangoAttrIterator *iter; + GSList *list, *l; + PangoAttrList *run_attrs; + + run_attrs = pango_attr_list_new (); + + attrs = pango_layout_get_effective_attributes (layout); + iter = pango_attr_list_get_iterator (attrs); + + advance_iterator_to (iter, offset); + list = pango_attr_iterator_get_attrs (iter); + for (l = list; l; l = l->next) + { + PangoAttribute *attr = l->data; + attr->start_index = 0; + attr->end_index = G_MAXINT; + pango_attr_list_insert (attrs, attr); + } + g_slist_free (list); + + hyphen = g_slice_new (PangoGlyphItem); + hyphen->glyphs = pango_glyph_string_new (); + items = pango_itemize (layout->context, hyphen_text, 0, strlen (hyphen_text), attrs, NULL); + g_assert (items->next == NULL); + hyphen->item = items->data; + hyphen->item->offset = offset; + g_list_free (items); + pango_shape (hyphen_text, strlen (hyphen_text), &hyphen->item->analysis, hyphen->glyphs); + + pango_attr_iterator_destroy (iter); + pango_attr_list_unref (attrs); + pango_attr_list_unref (run_attrs); + + return hyphen; +} + #if 0 # define DEBUG debug void @@ -3483,6 +3543,8 @@ process_item (PangoLayout *layout, int break_num_chars = num_chars; int break_width = width; int orig_width = width; + int break_extra_width; + int hyphen_width; gboolean retrying_with_char_breaks = FALSE; if (processing_new_item) @@ -3492,13 +3554,22 @@ process_item (PangoLayout *layout, pango_glyph_item_get_logical_widths (&glyph_item, layout->text, state->log_widths); } + { + PangoLayoutRun *run; + + run = create_hyphen_run (layout, item, state->start_offset); + hyphen_width = pango_glyph_string_get_width (run->glyphs); + pango_glyph_item_free (run); + } + retry_break: /* See how much of the item we can stuff in the line. */ width = 0; + break_extra_width = 0; for (num_chars = 0; num_chars < item->num_chars; num_chars++) { - if (width > state->remaining_width && break_num_chars < item->num_chars) + if (width + break_extra_width > state->remaining_width && break_num_chars < item->num_chars) break; /* If there are no previous runs we have to take care to grab at least one char. */ @@ -3507,6 +3578,12 @@ process_item (PangoLayout *layout, { break_num_chars = num_chars; break_width = width; + + /* Check whether to insert a hyphen */ + if (layout->log_attrs[state->start_offset + num_chars].is_soft_hyphen) + break_extra_width = hyphen_width; + else + break_extra_width = 0; } width += state->log_widths[state->log_widths_offset + num_chars]; @@ -3543,6 +3620,14 @@ process_item (PangoLayout *layout, if (break_num_chars == item->num_chars) { insert_run (line, state, item, TRUE); + if (layout->log_attrs[state->start_offset + break_num_chars].is_soft_hyphen) + { + PangoLayoutRun *run; + + run = create_hyphen_run (layout, item, state->start_offset + break_num_chars); + line->runs = g_slist_prepend (line->runs, run); + state->remaining_width -= pango_glyph_string_get_width (run->glyphs); + } return BREAK_ALL_FIT; } @@ -3569,6 +3654,15 @@ process_item (PangoLayout *layout, /* Shaped items should never be broken */ g_assert (!shape_set); + if (layout->log_attrs[state->start_offset + break_num_chars].is_soft_hyphen) + { + PangoLayoutRun *run; + + run = create_hyphen_run (layout, item, state->start_offset + break_num_chars); + line->runs = g_slist_prepend (line->runs, run); + state->remaining_width -= pango_glyph_string_get_width (run->glyphs); + } + return BREAK_SOME_FIT; } } diff --git a/tests/breaks/one.break b/tests/breaks/one.break new file mode 100644 index 00000000..f30dc534 --- /dev/null +++ b/tests/breaks/one.break @@ -0,0 +1 @@ +abc/def ghijkl. Bla diff --git a/tests/breaks/one.expected b/tests/breaks/one.expected new file mode 100644 index 00000000..90ea8d18 --- /dev/null +++ b/tests/breaks/one.expected @@ -0,0 +1,5 @@ +Text: a b c / d e f [ ] g h i [0xad] j k l . [ ] B l a [0x0a] +Breaks: c c c c lc c c c lc c c c lhc c c c c lc c c c Lc +Whitespace: x x w w +Words: s e s e s e s e +Sentences: s e s e diff --git a/tests/meson.build b/tests/meson.build index 7df7a999..50176c3d 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -22,6 +22,7 @@ tests = [ [ 'testcolor' ], [ 'testscript' ], [ 'cxx-test', [ 'cxx-test.cpp' ] ], + [ 'test-break' ], ] if build_pangoft2 diff --git a/tests/test-break.c b/tests/test-break.c new file mode 100644 index 00000000..9f069c09 --- /dev/null +++ b/tests/test-break.c @@ -0,0 +1,289 @@ +/* Pango + * test-break.c: Test Pango line breaking + * + * Copyright (C) 2019 Red Hat, Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include <glib.h> +#include <string.h> +#include <locale.h> + +#ifndef G_OS_WIN32 +#include <unistd.h> +#endif + +#include "config.h" +#include <pango/pangocairo.h> +#include "test-common.h" + + +static PangoContext *context; + +static void +test_file (const gchar *filename, GString *string) +{ + gchar *contents; + gsize length; + GError *error = NULL; + PangoLogAttr *attrs; + PangoLanguage *lang; + int len; + char *p; + int i; + GString *s1, *s2, *s3, *s4; + int m; + + if (!g_file_get_contents (filename, &contents, &length, &error)) + { + fprintf (stderr, "%s\n", error->message); + g_error_free (error); + return; + } + + len = g_utf8_strlen (contents, -1) + 1; + attrs = g_new (PangoLogAttr, len); + + lang = pango_language_from_string ("en"); + + pango_get_log_attrs (contents, length, -1, lang, attrs, len); + + s1 = g_string_new ("Breaks: "); + s2 = g_string_new ("Whitespace: "); + s3 = g_string_new ("Words:"); + s4 = g_string_new ("Sentences:"); + + g_string_append (string, "Text: "); + + m = MAX (MAX (s1->len, s2->len), MAX (s3->len, s4->len)); + + g_string_append_printf (s1, "%*s", (int)(m - s1->len), ""); + g_string_append_printf (s2, "%*s", (int)(m - s2->len), ""); + g_string_append_printf (s3, "%*s", (int)(m - s3->len), ""); + g_string_append_printf (s4, "%*s", (int)(m - s4->len), ""); + g_string_append_printf (string, "%*s", (int)(m - strlen ("Text: ")), ""); + + for (i = 0, p = contents; i < len; i++, p = g_utf8_next_char (p)) + { + PangoLogAttr log = attrs[i]; + int b = 0; + int w = 0; + int o = 0; + int s = 0; + + if (log.is_mandatory_break) + { + g_string_append (s1, "L"); + b++; + } + else if (log.is_line_break) + { + g_string_append (s1, "l"); + b++; + } + if (log.is_soft_hyphen) + { + g_string_append (s1, "h"); + b++; + } + if (log.is_char_break) + { + g_string_append (s1, "c"); + b++; + } + + if (log.is_expandable_space) + { + g_string_append (s2, "x"); + w++; + } + else if (log.is_white) + { + g_string_append (s2, "w"); + w++; + } + + if (log.is_word_start) + { + g_string_append (s3, "s"); + o++; + } + if (log.is_word_end) + { + g_string_append (s3, "e"); + o++; + } + + if (log.is_sentence_start) + { + g_string_append (s4, "s"); + s++; + } + if (log.is_sentence_end) + { + g_string_append (s4, "e"); + s++; + } + + m = MAX (MAX (b, w), MAX (o, s)); + + g_string_append_printf (string, "%*s", m, ""); + g_string_append_printf (s1, "%*s", m - b, ""); + g_string_append_printf (s2, "%*s", m - w, ""); + g_string_append_printf (s3, "%*s", m - o, ""); + g_string_append_printf (s4, "%*s", m - s, ""); + + if (i < len - 1) + { + gunichar ch = g_utf8_get_char (p); + if (ch == 0x20) + { + g_string_append (string, "[ ]"); + g_string_append (s1, " "); + g_string_append (s2, " "); + g_string_append (s3, " "); + g_string_append (s4, " "); + } + else if (g_unichar_isprint (ch)) + { + g_string_append_unichar (string, ch); + g_string_append (s1, " "); + g_string_append (s2, " "); + g_string_append (s3, " "); + g_string_append (s4, " "); + } + else + { + char *str = g_strdup_printf ("[%#04x]", ch); + g_string_append (string, str); + g_string_append_printf (s1, "%*s", (int)strlen (str), ""); + g_string_append_printf (s2, "%*s", (int)strlen (str), ""); + g_string_append_printf (s3, "%*s", (int)strlen (str), ""); + g_string_append_printf (s4, "%*s", (int)strlen (str), ""); + g_free (str); + } + } + } + g_string_append (string, "\n"); + g_string_append_len (string, s1->str, s1->len); + g_string_append (string, "\n"); + g_string_append_len (string, s2->str, s2->len); + g_string_append (string, "\n"); + g_string_append_len (string, s3->str, s3->len); + g_string_append (string, "\n"); + g_string_append_len (string, s4->str, s4->len); + g_string_append (string, "\n"); + + g_string_free (s1, TRUE); + g_string_free (s2, TRUE); + g_string_free (s3, TRUE); + g_string_free (s4, TRUE); + + g_free (attrs); + g_free (contents); +} + +static gchar * +get_expected_filename (const gchar *filename) +{ + gchar *f, *p, *expected; + + f = g_strdup (filename); + p = strstr (f, ".break"); + if (p) + *p = 0; + expected = g_strconcat (f, ".expected", NULL); + + g_free (f); + + return expected; +} + +static void +test_break (gconstpointer d) +{ + const gchar *filename = d; + gchar *expected_file; + GError *error = NULL; + GString *dump; + gchar *diff; + + expected_file = get_expected_filename (filename); + + dump = g_string_sized_new (0); + + test_file (filename, dump); + + //diff = diff_with_file (expected_file, dump->str, dump->len, &error); + g_assert_no_error (error); + + if (diff && diff[0]) + { + g_printerr ("Contents don't match expected contents:\n%s", diff); + g_test_fail (); + g_free (diff); + } + + g_string_free (dump, TRUE); + g_free (expected_file); +} + +int +main (int argc, char *argv[]) +{ + GDir *dir; + GError *error = NULL; + const gchar *name; + gchar *path; + + g_setenv ("LC_ALL", "en_US.UTF-8", TRUE); + setlocale (LC_ALL, ""); + + g_test_init (&argc, &argv, NULL); + + context = pango_context_new (); + + /* allow to easily generate expected output for new test cases */ + if (argc > 1) + { + GString *string; + + string = g_string_sized_new (0); + test_file (argv[1], string); + printf ("%s", string->str); + + return 0; + } + + path = g_test_build_filename (G_TEST_DIST, "breaks", NULL); + dir = g_dir_open (path, 0, &error); + g_free (path); + g_assert_no_error (error); + while ((name = g_dir_read_name (dir)) != NULL) + { + if (!strstr (name, "break")) + continue; + + path = g_strdup_printf ("/break/%s", name); + g_test_add_data_func_full (path, g_test_build_filename (G_TEST_DIST, "breaks", name, NULL), + test_break, g_free); + g_free (path); + } + g_dir_close (dir); + + return g_test_run (); +} |