diff options
author | Havoc Pennington <hp@pobox.com> | 2000-12-02 07:49:56 +0000 |
---|---|---|
committer | Havoc Pennington <hp@src.gnome.org> | 2000-12-02 07:49:56 +0000 |
commit | 31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525 (patch) | |
tree | d7ed3aa9ac35017fe03d954dd6baa2ccfaf3ed30 /tests/testboundaries.c | |
parent | e9e84a3f75fbab073ce5488c0e82b3e7fc39bcda (diff) | |
download | pango-31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525.tar.gz |
delete lang engine
2000-11-30 Havoc Pennington <hp@pobox.com>
* modules/thai/thai.c: delete lang engine
* modules/tamil/tamil.c: delete lang engine
(tamil_engine_x_new): fix type tag for shape engine
* modules/indic/myanmar.c: delete lang engine
(pango_engine_x_new): fix type tag for shape engine
* modules/indic/gurmukhi.c: delete lang engine
(pango_indic_engine_x_new): fix type tag for shape engine
* modules/indic/gujarati.c: delete lang engine
(pango_indic_engine_x_new): fix type tag for shape engine
* modules/indic/devanagari.c: delete lang engine
(pango_indic_engine_x_new): fix type tag for shape engine
* modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION):
delete lang engine
* modules/indic/bengali.c: delete the lang engine
(pango_indic_engine_x_new): fix type tag for shape engine
* modules/hangul/hangul.c: delete the lang engine
(hangul_engine_x_new): fix type tag for shape engine
* modules/basic/basic.c: delete the lang engine
(basic_engine_x_new): fix type tag for shape engine
* modules/basic/basic-win32.c: delete the lang engine
(basic_engine_win32_new): this was a shape engine,
use correct type tag
* modules/basic/basic-ft2.c: delete the lang engine
* modules/arabic/arabic.c: Delete the lang engine
(arabic_engine_x_new): this is a shape
engine, not a lang engine, fix type tag
* pango/pango-layout.c (pango_layout_index_to_line_x): handle
the fact that paragraph delimiters aren't in the layout lines
(pango_layout_index_to_pos): update to handle paragraph
delimiters
* pango/break.c (pango_find_paragraph_boundary): New function
to find paragraph boundaries
* pango/pango-layout.c (get_items_log_attrs): don't separate calls
to pango_break() when directional level changes
* pango/pango-layout.h (struct _PangoLayoutLine): put start index
of the line into the struct
* pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to
reflect the fact that paragraph separators are removed from the
input text.
* pango/pango-layout.c (can_break_at): don't
special-case start of line and whitespace-following-alphabetic
here, because pango_break() already handles that properly
* tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh:
Add directory for test programs, and a script to run them all
* configure.in: Create Makefile in tests
* pango/break.c (pango_break): Try for a real implementation of
the Unicode text boundary algorithms
(pango_get_log_attrs): Allow length to be -1
* pango/pango-context.c (pango_itemize): use pango_item_new(),
assert that items added to the list are sane.
* pango/pango-layout.c (pango_layout_check_lines): Reimplement
to honor the paragraph boundaries from pango_break()
* pango/pango-layout.c (process_item): use pango_item_split() here
* pango/pango-item.c (pango_item_split): New function to split an
item into two items
Diffstat (limited to 'tests/testboundaries.c')
-rw-r--r-- | tests/testboundaries.c | 356 |
1 files changed, 356 insertions, 0 deletions
diff --git a/tests/testboundaries.c b/tests/testboundaries.c new file mode 100644 index 00000000..c05bc837 --- /dev/null +++ b/tests/testboundaries.c @@ -0,0 +1,356 @@ +/* Pango + * testboundaries.c: Test text boundary algorithms + * + * Copyright (C) 1999-2000 Red Hat Software + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> + +#include <pango/pango.h> + +#define CHFORMAT "%0#6x" + +/* FIXME for now this just tests that the breaking of some sample + * text conforms to certain rules and invariants. But eventually + * we should also have test-result pairs, i.e. a string and some + * encoding of the correct way to break the string, to check + * more precisely that things worked + */ + + +/* Keep these in sync with the same macros in break.c */ + +#define LEADING_JAMO(wc) ((wc) >= 0x1100 && (wc) <= 0x115F) +#define VOWEL_JAMO(wc) ((wc) >= 0x1160 && (wc) <= 0x11A2) +#define TRAILING_JAMO(wc) ((wc) >= 0x11A8 && (wc) <= 0x11F9) +#define JAMO(wc) ((wc) >= 0x1100 && (wc) <= 0x11FF) +/* "virama script" is just an optimization; it includes a bunch of + * scripts without viramas in them + */ +#define VIRAMA_SCRIPT(wc) ((wc) >= 0x0901 && (wc) <= 0x17FF) +#define VIRAMA(wc) ((wc) == 0x094D || \ + (wc) == 0x09CD || \ + (wc) == 0x0A4D || \ + (wc) == 0x0ACD || \ + (wc) == 0x0B4D || \ + (wc) == 0x0BCD || \ + (wc) == 0x0C4D || \ + (wc) == 0x0CCD || \ + (wc) == 0x0D4D || \ + (wc) == 0x0DCA || \ + (wc) == 0x0E3A || \ + (wc) == 0x0F84 || \ + (wc) == 0x1039 || \ + (wc) == 0x17D2) +/* Types of Japanese characters */ +#define JAPANESE(wc) ((wc) >= 0x2F00 && (wc) <= 0x30FF) +#define KANJI(wc) ((wc) >= 0x2F00 && (wc) <= 0x2FDF) +#define HIRAGANA(wc) ((wc) >= 0x3040 && (wc) <= 0x309F) +#define KATAKANA(wc) ((wc) >= 0x30A0 && (wc) <= 0x30FF) + +static int offset = 0; +static int line = 0; +static gunichar current_wc = 0; +static const char *line_start = NULL; +static const char *line_end = NULL; + +static void +fail (const char *format, + ...) +{ + char *str; + char *line_text; + + va_list args; + + va_start (args, format); + str = g_strdup_vprintf (format, args); + va_end (args); + + line_text = g_strndup (line_start, line_end - line_start); + + fprintf (stderr, "line %d offset %d char is " CHFORMAT ": %s\n (line is '%s')\n", line, offset, current_wc, str, line_text); + g_free (str); + g_free (line_text); + + exit (1); +} + +typedef void (* CharForeachFunc) (gunichar wc, + gunichar prev_wc, + gunichar next_wc, + GUnicodeType type, + GUnicodeType prev_type, + GUnicodeType next_type, + PangoLogAttr *attr, + PangoLogAttr *prev_attr, + PangoLogAttr *next_attr, + gpointer data); + +static void +log_attr_foreach (const char *text, + PangoLogAttr *attrs, + CharForeachFunc func, + gpointer data) +{ + const gchar *next = text; + gint length = strlen (text); + const gchar *end = text + length; + gint i = 0; + gunichar prev_wc; + gunichar next_wc; + GUnicodeType prev_type; + GUnicodeType next_type; + + if (next == end) + return; + + offset = 0; + line = 0; + + prev_type = (GUnicodeType) -1; + prev_wc = 0; + + next_wc = g_utf8_get_char (next); + next_type = g_unichar_type (next_wc); + + line_start = text; + line_end = text; + + while (next_wc != 0) + { + GUnicodeType type; + gunichar wc; + + wc = next_wc; + type = next_type; + + current_wc = wc; + + next = g_utf8_next_char (next); + line_end = next; + + if (next >= end) + next_wc = 0; + else + next_wc = g_utf8_get_char (next); + + if (next_wc) + next_type = g_unichar_type (next_wc); + + (* func) (wc, prev_wc, next_wc, + type, prev_type, next_type, + &attrs[i], + i != 0 ? &attrs[i-1] : NULL, + next_wc != 0 ? &attrs[i+1] : NULL, + data); + + prev_type = type; + prev_wc = wc; + ++i; + ++offset; + if (wc == '\n') + { + ++line; + offset = 0; + line_start = next; + line_end = next; + } + } +} + +static void +check_line_char (gunichar wc, + gunichar prev_wc, + gunichar next_wc, + GUnicodeType type, + GUnicodeType prev_type, + GUnicodeType next_type, + PangoLogAttr *attr, + PangoLogAttr *prev_attr, + PangoLogAttr *next_attr, + gpointer data) +{ + GUnicodeBreakType break_type; + GUnicodeBreakType prev_break_type; + + break_type = g_unichar_break_type (wc); + if (prev_wc) + prev_break_type = g_unichar_break_type (prev_wc); + else + prev_break_type = G_UNICODE_BREAK_UNKNOWN; + + if (wc == '\n') + { + if (prev_wc == '\r') + { + if (attr->is_break) + fail ("line break between \\r and \\n"); + } + + if (next_attr && !next_attr->is_break) + fail ("no line break after \\n"); + } + + if (attr->is_break && prev_wc == 0) + fail ("first char in string should not be marked as a line break"); + + if (break_type == G_UNICODE_BREAK_SPACE) + { + if (attr->is_break && prev_attr != NULL && + !attr->is_mandatory_break) + fail ("can't break lines before a space unless a mandatory break char precedes it; prev char was " CHFORMAT, prev_wc); + } + + if (attr->is_mandatory_break && !attr->is_break) + fail ("mandatory breaks must also be marked as regular breaks"); + + + + /* FIXME use the break tables from break.c to automatically + * check invariants for each cell in the table. Shouldn't + * be that hard to do. + */ + + if (break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION && + prev_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION && + attr->is_break && + !attr->is_mandatory_break) + fail ("can't break between two open punctuation chars"); + + if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION && + prev_break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION && + attr->is_break && + !attr->is_mandatory_break) + fail ("can't break between two close punctuation chars"); + + if (break_type == G_UNICODE_BREAK_QUOTATION && + prev_break_type == G_UNICODE_BREAK_ALPHABETIC && + attr->is_break && + !attr->is_mandatory_break) + fail ("can't break letter-quotemark sequence"); +} + +static void +check_line_invariants (const char *text, + PangoLogAttr *attrs) +{ + log_attr_foreach (text, attrs, check_line_char, NULL); +} + +static void +check_word_invariants (const char *text, + PangoLogAttr *attrs) +{ + + +} + +static void +check_sentence_invariants (const char *text, + PangoLogAttr *attrs) +{ + + +} + +static void +check_grapheme_invariants (const char *text, + PangoLogAttr *attrs) +{ + + +} + +static void +print_sentences (const char *text, + PangoLogAttr *attrs) +{ + const char *p; + const char *last; + int i = 0; + + last = text; + p = text; + + while (*p) + { + if (attrs[i].is_sentence_boundary) + { + char *s = g_strndup (last, p - last); + printf ("%s\n", s); + g_free (s); + last = p; + } + + p = g_utf8_next_char (p); + ++i; + } +} + +static void +check_invariants (const char *text) +{ + int len; + PangoLogAttr *attrs; + + if (!g_utf8_validate (text, -1, NULL)) + fail ("Invalid UTF-8 in test text"); + + len = g_utf8_strlen (text, -1); + attrs = g_new0 (PangoLogAttr, len); + + pango_get_log_attrs (text, + -1, + 0, + "C", + attrs); + + check_line_invariants (text, attrs); + check_sentence_invariants (text, attrs); + check_grapheme_invariants (text, attrs); + check_word_invariants (text, attrs); + +#if 0 + print_sentences (text, attrs); +#endif + + g_free (attrs); +} + +int +main (int argc, + char **argv) +{ + gchar *text; + + if (!g_file_get_contents ("boundaries.utf8", &text, NULL, NULL)) + fail ("Couldn't open sample text file"); + + check_invariants (text); + + g_free (text); + + printf ("testboundaries passed\n"); + + return 0; +} + |