summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-05-09 23:25:37 -0400
committerMatthias Clasen <mclasen@redhat.com>2021-05-09 23:30:22 -0400
commit4555b12e78f700a7d52b9ed438b8100bdd352ec8 (patch)
tree00945308fd5850c8d70cca66f4123fa928d103a6
parentb3ebe097a3042737165ba497dc5b08f235df8167 (diff)
downloadpango-segmentation-util.tar.gz
Add pango-segmentationsegmentation-util
A little utility to show text segmentation.
-rw-r--r--utils/meson.build7
-rw-r--r--utils/pango-segmentation.c182
2 files changed, 189 insertions, 0 deletions
diff --git a/utils/meson.build b/utils/meson.build
index b71cfe2a..12968ef7 100644
--- a/utils/meson.build
+++ b/utils/meson.build
@@ -80,3 +80,10 @@ if cairo_dep.found()
])
endif
+
+pango_segmentation = executable('pango-segmentation',
+ 'pango-segmentation.c',
+ dependencies: [ libpango_dep, libpangocairo_dep ],
+ include_directories: [ root_inc ],
+ install: false,
+ )
diff --git a/utils/pango-segmentation.c b/utils/pango-segmentation.c
new file mode 100644
index 00000000..3e774167
--- /dev/null
+++ b/utils/pango-segmentation.c
@@ -0,0 +1,182 @@
+/* Pango
+ * pango-segmentation.c: Test Pango line breaking
+ *
+ * Copyright (C) 2021 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <glib.h>
+#include <pango/pangocairo.h>
+#include <string.h>
+#include <locale.h>
+
+#ifndef G_OS_WIN32
+#include <unistd.h>
+#endif
+
+typedef enum {
+ GRAPHEME,
+ WORD,
+ LINE,
+ SENTENCE
+} BreakKind;
+
+static BreakKind
+kind_from_string (const char *str)
+{
+ if (strcmp (str, "grapheme") == 0)
+ return GRAPHEME;
+ else if (strcmp (str, "word") == 0)
+ return WORD;
+ else if (strcmp (str, "line") == 0)
+ return LINE;
+ else if (strcmp (str, "sentence") == 0)
+ return SENTENCE;
+ else
+ {
+ g_printerr ("Not a segmentation: %s", str);
+ return 0;
+ }
+}
+
+static gboolean
+show_segmentation (const char *input,
+ BreakKind kind)
+{
+ GString *string;
+ PangoContext *context;
+ gsize length;
+ GError *error = NULL;
+ PangoLogAttr *attrs;
+ int len;
+ char *p;
+ int i;
+ char *text;
+ PangoAttrList *attributes;
+ PangoLayout *layout;
+
+ context = pango_font_map_create_context (pango_cairo_font_map_get_default ());
+
+ string = g_string_new ("");
+
+ length = strlen (input);
+ len = g_utf8_strlen (input, -1) + 1;
+
+ pango_parse_markup (input, -1, 0, &attributes, &text, NULL, &error);
+ g_assert_no_error (error);
+
+ layout = pango_layout_new (context);
+ pango_layout_set_text (layout, text, length);
+ pango_layout_set_attributes (layout, attributes);
+
+ if (pango_layout_get_unknown_glyphs_count (layout) > 0)
+ {
+ char *msg = g_strdup_printf ("Missing glyphs - skipping. Maybe fonts are missing?");
+ g_test_skip (msg);
+ g_free (msg);
+ g_object_unref (layout);
+ pango_attr_list_unref (attributes);
+ g_free (text);
+ return FALSE;
+ }
+
+ pango_layout_get_log_attrs (layout, &attrs, &len);
+
+ for (i = 0, p = text; i < len; i++, p = g_utf8_next_char (p))
+ {
+ PangoLogAttr log = attrs[i];
+ gboolean is_break = FALSE;
+
+ switch (kind)
+ {
+ case GRAPHEME:
+ is_break = log.is_cursor_position;
+ break;
+ case WORD:
+ is_break = log.is_word_boundary;
+ break;
+ case LINE:
+ is_break = log.is_line_break;
+ break;
+ case SENTENCE:
+ is_break = log.is_sentence_boundary;
+ break;
+ default:
+ g_assert_not_reached ();
+ }
+
+ if (is_break)
+ g_string_append (string, "|");
+
+ if (i < len - 1)
+ {
+ gunichar ch = g_utf8_get_char (p);
+ if (ch == 0x20)
+ g_string_append (string, " ");
+ else if (g_unichar_isgraph (ch) &&
+ !(g_unichar_type (ch) == G_UNICODE_LINE_SEPARATOR ||
+ g_unichar_type (ch) == G_UNICODE_PARAGRAPH_SEPARATOR))
+ g_string_append_unichar (string, ch);
+ else
+ g_string_append_printf (string, "[%#04x]", ch);
+ }
+ }
+
+ g_object_unref (layout);
+ g_free (attrs);
+ g_free (text);
+ pango_attr_list_unref (attributes);
+
+ g_print ("%s\n", string->str);
+
+ g_string_free (string, TRUE);
+
+ return TRUE;
+}
+
+int
+main (int argc, char *argv[])
+{
+ setlocale (LC_ALL, "");
+ char *opt_kind = "grapheme";
+ GOptionEntry entries[] = {
+ { "kind", 0, 0, G_OPTION_ARG_STRING, &opt_kind, "Kind of boundary (grapheme/word/line/sentence)", "KIND" },
+ { NULL, },
+ };
+ GOptionContext *context;
+ GError *error = NULL;
+
+ context = g_option_context_new ("TEXT");
+ g_option_context_add_main_entries (context, entries, NULL);
+ g_option_context_set_description (context,
+ "Show text segmentation as determined by Pango.");
+ if (!g_option_context_parse (context, &argc, &argv, &error))
+ {
+ g_printerr ("%s\n", error->message);
+ exit (1);
+ }
+
+ if (argc < 2)
+ {
+ g_printerr ("Usage: pango-segmentation [OPTIONS…] TEXT");
+ exit (1);
+ }
+
+ show_segmentation (argv[1], kind_from_string (opt_kind));
+
+ return 0;
+}