From 4555b12e78f700a7d52b9ed438b8100bdd352ec8 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Sun, 9 May 2021 23:25:37 -0400 Subject: Add pango-segmentation A little utility to show text segmentation. --- utils/meson.build | 7 ++ utils/pango-segmentation.c | 182 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) create mode 100644 utils/pango-segmentation.c (limited to 'utils') diff --git a/utils/meson.build b/utils/meson.build index b71cfe2a..12968ef7 100644 --- a/utils/meson.build +++ b/utils/meson.build @@ -80,3 +80,10 @@ if cairo_dep.found() ]) endif + +pango_segmentation = executable('pango-segmentation', + 'pango-segmentation.c', + dependencies: [ libpango_dep, libpangocairo_dep ], + include_directories: [ root_inc ], + install: false, + ) diff --git a/utils/pango-segmentation.c b/utils/pango-segmentation.c new file mode 100644 index 00000000..3e774167 --- /dev/null +++ b/utils/pango-segmentation.c @@ -0,0 +1,182 @@ +/* Pango + * pango-segmentation.c: Test Pango line breaking + * + * Copyright (C) 2021 Red Hat, Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include + +#ifndef G_OS_WIN32 +#include +#endif + +typedef enum { + GRAPHEME, + WORD, + LINE, + SENTENCE +} BreakKind; + +static BreakKind +kind_from_string (const char *str) +{ + if (strcmp (str, "grapheme") == 0) + return GRAPHEME; + else if (strcmp (str, "word") == 0) + return WORD; + else if (strcmp (str, "line") == 0) + return LINE; + else if (strcmp (str, "sentence") == 0) + return SENTENCE; + else + { + g_printerr ("Not a segmentation: %s", str); + return 0; + } +} + +static gboolean +show_segmentation (const char *input, + BreakKind kind) +{ + GString *string; + PangoContext *context; + gsize length; + GError *error = NULL; + PangoLogAttr *attrs; + int len; + char *p; + int i; + char *text; + PangoAttrList *attributes; + PangoLayout *layout; + + context = pango_font_map_create_context (pango_cairo_font_map_get_default ()); + + string = g_string_new (""); + + length = strlen (input); + len = g_utf8_strlen (input, -1) + 1; + + pango_parse_markup (input, -1, 0, &attributes, &text, NULL, &error); + g_assert_no_error (error); + + layout = pango_layout_new (context); + pango_layout_set_text (layout, text, length); + pango_layout_set_attributes (layout, attributes); + + if (pango_layout_get_unknown_glyphs_count (layout) > 0) + { + char *msg = g_strdup_printf ("Missing glyphs - skipping. Maybe fonts are missing?"); + g_test_skip (msg); + g_free (msg); + g_object_unref (layout); + pango_attr_list_unref (attributes); + g_free (text); + return FALSE; + } + + pango_layout_get_log_attrs (layout, &attrs, &len); + + for (i = 0, p = text; i < len; i++, p = g_utf8_next_char (p)) + { + PangoLogAttr log = attrs[i]; + gboolean is_break = FALSE; + + switch (kind) + { + case GRAPHEME: + is_break = log.is_cursor_position; + break; + case WORD: + is_break = log.is_word_boundary; + break; + case LINE: + is_break = log.is_line_break; + break; + case SENTENCE: + is_break = log.is_sentence_boundary; + break; + default: + g_assert_not_reached (); + } + + if (is_break) + g_string_append (string, "|"); + + if (i < len - 1) + { + gunichar ch = g_utf8_get_char (p); + if (ch == 0x20) + g_string_append (string, " "); + else if (g_unichar_isgraph (ch) && + !(g_unichar_type (ch) == G_UNICODE_LINE_SEPARATOR || + g_unichar_type (ch) == G_UNICODE_PARAGRAPH_SEPARATOR)) + g_string_append_unichar (string, ch); + else + g_string_append_printf (string, "[%#04x]", ch); + } + } + + g_object_unref (layout); + g_free (attrs); + g_free (text); + pango_attr_list_unref (attributes); + + g_print ("%s\n", string->str); + + g_string_free (string, TRUE); + + return TRUE; +} + +int +main (int argc, char *argv[]) +{ + setlocale (LC_ALL, ""); + char *opt_kind = "grapheme"; + GOptionEntry entries[] = { + { "kind", 0, 0, G_OPTION_ARG_STRING, &opt_kind, "Kind of boundary (grapheme/word/line/sentence)", "KIND" }, + { NULL, }, + }; + GOptionContext *context; + GError *error = NULL; + + context = g_option_context_new ("TEXT"); + g_option_context_add_main_entries (context, entries, NULL); + g_option_context_set_description (context, + "Show text segmentation as determined by Pango."); + if (!g_option_context_parse (context, &argc, &argv, &error)) + { + g_printerr ("%s\n", error->message); + exit (1); + } + + if (argc < 2) + { + g_printerr ("Usage: pango-segmentation [OPTIONS…] TEXT"); + exit (1); + } + + show_segmentation (argv[1], kind_from_string (opt_kind)); + + return 0; +} -- cgit v1.2.1