diff options
author | Behdad Esfahbod <behdad@gnome.org> | 2008-04-24 20:00:41 +0000 |
---|---|---|
committer | Behdad Esfahbod <behdad@src.gnome.org> | 2008-04-24 20:00:41 +0000 |
commit | f6b1fef713a035e4abcbe0d0fda54721a3560d5e (patch) | |
tree | 4e2590bc28e59fc971cc481de2920db32ee536ff /tests | |
parent | 0c950b1d5c2a51ec8d238ae1399d1f71b132fa6e (diff) | |
download | pango-f6b1fef713a035e4abcbe0d0fda54721a3560d5e.tar.gz |
Part of Bug 97545 – Make pango_default_break follow Unicode TR #29 Patch
2008-04-24 Behdad Esfahbod <behdad@gnome.org>
Part of Bug 97545 – Make pango_default_break follow Unicode TR #29
Patch from Noah Levitt
* tests/Makefile.am:
* tests/runtests.sh.in:
* tests/testboundaries_ucd.c (count_attrs), (parse_line),
(attrs_equal), (make_test_string), (do_test), (main):
Add test driver for UAX#14 and UAX#29 test data from Unicode Character
Databse. Just drop the following four files in pango/tests for it to
use them:
GraphemeBreakTest.txt
LineBreakTest.txt
SentenceBreakTest.txt
WordBreakTest.txt
svn path=/trunk/; revision=2617
Diffstat (limited to 'tests')
-rw-r--r-- | tests/Makefile.am | 8 | ||||
-rwxr-xr-x | tests/runtests.sh.in | 2 | ||||
-rw-r--r-- | tests/testboundaries_ucd.c | 364 |
3 files changed, 371 insertions, 3 deletions
diff --git a/tests/Makefile.am b/tests/Makefile.am index f1b24a42..40c86780 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -3,7 +3,10 @@ EXTRA_DIST = \ all-unicode.txt \ boundaries.utf8 \ - runtests.sh + runtests.sh \ + GraphemeClusterBreakTest.txt \ + SentenceBreakTest.txt \ + WordBreakTest.txt CLEANFILES = pangorc DISTCLEANFILES = all-unicode.txt runtests.log @@ -43,7 +46,7 @@ TESTS_ENVIRONMENT = \ noinst_PROGRAMS = gen-all-unicode dump-boundaries -check_PROGRAMS = testboundaries testcolor testscript +check_PROGRAMS = testboundaries testboundaries_ucd testcolor testscript if HAVE_CAIRO check_PROGRAMS += testiter @@ -54,6 +57,7 @@ endif gen_all_unicode_LDADD = $(GLIB_LIBS) testboundaries_LDADD = ../pango/libpango-$(PANGO_API_VERSION).la +testboundaries_ucd_LDADD = ../pango/libpango-$(PANGO_API_VERSION).la testcolor_LDADD = ../pango/libpango-$(PANGO_API_VERSION).la testiter_LDADD = ../pango/libpango-$(PANGO_API_VERSION).la ../pango/libpangocairo-$(PANGO_API_VERSION).la testscript_LDADD = ../pango/libpango-$(PANGO_API_VERSION).la diff --git a/tests/runtests.sh.in b/tests/runtests.sh.in index 0c3466e2..a6d60007 100755 --- a/tests/runtests.sh.in +++ b/tests/runtests.sh.in @@ -1,7 +1,7 @@ #! @SHELL@ LOGFILE=runtests.log -POTENTIAL_TESTS='testboundaries testcolor' +POTENTIAL_TESTS='testboundaries testcolor testboundaries_ucd' ECHO_C='@ECHO_C@' ECHO_N='@ECHO_N@' diff --git a/tests/testboundaries_ucd.c b/tests/testboundaries_ucd.c new file mode 100644 index 00000000..7e266f88 --- /dev/null +++ b/tests/testboundaries_ucd.c @@ -0,0 +1,364 @@ +/* Pango + * testboundaries_ucd.c: Test text boundary algorithms with test data from + * Unicode Character Database. + * + * Copyright (C) 2003 Noah Levitt + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include <pango/pango.h> +#include <stdlib.h> +#include <string.h> +#include <locale.h> + +static gboolean failed = FALSE; + +/* PangoLogAttr has to be the same size as guint or this hack breaks */ +typedef union +{ + PangoLogAttr attr; + guint bits; +} +AttrBits; + +/* counts the number of multiplication and divison signs up to the first + * '#' or null character */ +static gint +count_attrs (gchar *line) +{ + gunichar ch; + gchar *p = line; + gint count = 0; + + for (;;) + { + ch = g_utf8_get_char (p); + + switch (ch) + { + /* MULTIPLICATION SIGN, DIVISION SIGN */ + case 0x00d7: case 0x00f7: + count++; + break; + + /* null char, NUMBER SIGN */ + case 0x0000: case 0x0023: + return count; + + default: + break; + } + + p = g_utf8_next_char (p); + } + /* not reached */ +} + +static gboolean +parse_line (gchar *line, + AttrBits bits, + gchar **str_return, + PangoLogAttr **attr_return, + gint *num_attrs) +{ + GString *gs; + gunichar ch, character; + gchar *p, *q; + gint i; + AttrBits temp_attr; + + *num_attrs = count_attrs (line); + *attr_return = g_new (PangoLogAttr, *num_attrs); + + p = line; + i = 0; + gs = g_string_new (NULL); + + for (;;) + { + temp_attr.bits = 0; + + /* skip white space */ + do + { + ch = g_utf8_get_char (p); + p = g_utf8_next_char (p); + } + while (g_unichar_isspace (ch)); + + switch (ch) + { + case 0x00f7: /* DIVISION SIGN: boundary here */ + temp_attr.bits |= bits.bits; + /* fall through */ + + case 0x00d7: /* MULTIPLICATION SIGN: no boundary here */ + break; + + case 0x0000: + case 0x0023: + *str_return = g_string_free (gs, FALSE); + return TRUE; + + default: /* unexpected character */ + g_free (*attr_return); + return FALSE; + } + + (*attr_return)[i] = temp_attr.attr; + + /* skip white space */ + do + { + ch = g_utf8_get_char (p); + p = g_utf8_next_char (p); + } + while (g_unichar_isspace (ch)); + p = g_utf8_prev_char (p); + + if (ch == 0x0023 || ch == 0x0000) + { + *str_return = g_string_free (gs, FALSE); + return TRUE; + } + + character = strtoul (p, &q, 16); + if (q < p + 4 || q > p + 6 || character > 0x10ffff) + { + g_free (*attr_return); + return FALSE; + } + + p = q; + + gs = g_string_append_unichar (gs, character); + + i++; + } +} + +static gboolean +attrs_equal (PangoLogAttr *attrs1, + PangoLogAttr *attrs2, + gint len, + AttrBits bits) +{ + AttrBits a, b; + gint i; + + for (i = 0; i < len; i++) + { + a.bits = 0; + a.attr = attrs1[i]; + + b.bits = 0; + b.attr = attrs2[i]; + + /* can't do a straight comparison because the bitmask may have + * multiple bits set, and as long as attr&bitmask is not zero, it + * counts as being set (see word boundaries) */ + if (((a.bits & bits.bits) && !(b.bits & bits.bits)) || + !(a.bits & bits.bits) && (b.bits & bits.bits)) + return FALSE; + } + + return TRUE; +} + +static gchar * +make_test_string (gchar *string, + PangoLogAttr *attrs, + AttrBits bits) +{ + GString *gs = g_string_new (NULL); + gint i = 0; + AttrBits a; + gchar *p = string; + gunichar ch; + + for (;;) + { + a.bits = 0; + a.attr = attrs[i]; + if ((a.bits & bits.bits) != 0) + gs = g_string_append_unichar (gs, 0x00f7); + else + gs = g_string_append_unichar (gs, 0x00d7); + + g_string_append_c (gs, ' '); + + if (*p == '\0') + break; + + ch = g_utf8_get_char (p); + g_string_append_printf (gs, "%04X ", ch); + + p = g_utf8_next_char (p); + i++; + } + + return g_string_free (gs, FALSE); +} + +static void +do_test (gchar *filename, + AttrBits bits, + gboolean fixup_broken_linebreaktest) +{ + GIOChannel *channel; + GIOStatus status; + gchar *line; + gsize length, terminator_pos; + GError *error; + gchar *string; + PangoLogAttr *expected_attrs; + gint num_attrs; + gint i; + + error = NULL; + channel = g_io_channel_new_file (filename, "r", &error); + if (!channel) + { + if (error->domain == G_FILE_ERROR && error->code == G_FILE_ERROR_NOENT) + { + g_print ("%s not found. Skipping test.\n", filename); + goto done; + } + else + { + g_printerr ("%s: %s\n", filename, error->message); + exit (1); + } + } + + i = 1; + for (;;) + { + error = NULL; + status = g_io_channel_read_line (channel, &line, &length, &terminator_pos, &error); + + switch (status) + { + case G_IO_STATUS_ERROR: + g_printerr ("%s: %s\n", filename, error->message); + exit (1); + + case G_IO_STATUS_EOF: + goto done; + + case G_IO_STATUS_AGAIN: + continue; + + case G_IO_STATUS_NORMAL: + line[terminator_pos] = '\0'; + break; + } + + if (! parse_line (line, bits, &string, &expected_attrs, &num_attrs)) + { + g_printerr ("%s: error parsing line %d: %s\n", filename, i, line); + exit (1); + } + + if (num_attrs > 0) + { + PangoLogAttr *attrs = g_new (PangoLogAttr, num_attrs); + pango_get_log_attrs (string, -1, 0, pango_language_from_string ("C"), attrs, num_attrs); + + /* LineBreakTest.txt from Unicode 5.1.0 has this bug that it says + * breaking is allowed at the beginning of the strings, while the + * algorithm says it's not. Fix that up. */ + if (fixup_broken_linebreaktest) + memset (expected_attrs, 0, sizeof (expected_attrs[0])); + + if (! attrs_equal (attrs, expected_attrs, num_attrs, bits)) + { + gchar *str = make_test_string (string, attrs, bits); + gchar *comments = strchr (line, '#'); + if (comments) /* don't print the # comment in the error message. print it separately */ + { + *comments = '\0'; + comments++; + } + else + { + comments = ""; + } + + g_printerr ("%s: line %d failed\n" + " expected: %s\n" + " returned: %s\n" + " comments: %s\n\n", + filename, i, line, str, comments); + + g_free (str); + failed = TRUE; + } + g_free (attrs); + } + g_free (string); + g_free (expected_attrs); + + i++; + } + +done: + if (channel) + g_io_channel_unref (channel); + if (error) + g_error_free (error); + g_free (filename); +} + +gint +main (gint argc, + gchar **argv) +{ + gchar *srcdir; + gchar *filename; + AttrBits bits; + + setlocale (LC_ALL, ""); + + srcdir = getenv ("srcdir"); + if (!srcdir) + srcdir = "."; + + filename = g_strdup_printf ("%s/GraphemeBreakTest.txt", srcdir); + bits.bits = 0; + bits.attr.is_cursor_position = 1; + do_test (filename, bits, FALSE); + + filename = g_strdup_printf ("%s/WordBreakTest.txt", srcdir); + bits.bits = 0; + bits.attr.is_word_start = 1; /* either word start or end */ + bits.attr.is_word_end = 1; /* (is this right?) */ + do_test (filename, bits, FALSE); + + filename = g_strdup_printf ("%s/SentenceBreakTest.txt", srcdir); + bits.bits = 0; + bits.attr.is_sentence_boundary = 1; + do_test (filename, bits, FALSE); + + filename = g_strdup_printf ("%s/LineBreakTest.txt", srcdir); + bits.bits = 0; + bits.attr.is_line_break = 1; + bits.attr.is_mandatory_break = 1; + do_test (filename, bits, TRUE); + + exit (failed); +} |