delete lang engine

2000-11-30 Havoc Pennington <hp@pobox.com> * modules/thai/thai.c: delete lang engine * modules/tamil/tamil.c: delete lang engine (tamil_engine_x_new): fix type tag for shape engine * modules/indic/myanmar.c: delete lang engine (pango_engine_x_new): fix type tag for shape engine * modules/indic/gurmukhi.c: delete lang engine (pango_indic_engine_x_new): fix type tag for shape engine * modules/indic/gujarati.c: delete lang engine (pango_indic_engine_x_new): fix type tag for shape engine * modules/indic/devanagari.c: delete lang engine (pango_indic_engine_x_new): fix type tag for shape engine * modules/indic/pango-indic-script.h (SCRIPT_ENGINE_DEFINITION): delete lang engine * modules/indic/bengali.c: delete the lang engine (pango_indic_engine_x_new): fix type tag for shape engine * modules/hangul/hangul.c: delete the lang engine (hangul_engine_x_new): fix type tag for shape engine * modules/basic/basic.c: delete the lang engine (basic_engine_x_new): fix type tag for shape engine * modules/basic/basic-win32.c: delete the lang engine (basic_engine_win32_new): this was a shape engine, use correct type tag * modules/basic/basic-ft2.c: delete the lang engine * modules/arabic/arabic.c: Delete the lang engine (arabic_engine_x_new): this is a shape engine, not a lang engine, fix type tag * pango/pango-layout.c (pango_layout_index_to_line_x): handle the fact that paragraph delimiters aren't in the layout lines (pango_layout_index_to_pos): update to handle paragraph delimiters * pango/break.c (pango_find_paragraph_boundary): New function to find paragraph boundaries * pango/pango-layout.c (get_items_log_attrs): don't separate calls to pango_break() when directional level changes * pango/pango-layout.h (struct _PangoLayoutLine): put start index of the line into the struct * pango/pango-layout.c (pango_layout_get_cursor_pos): Fixups to reflect the fact that paragraph separators are removed from the input text. * pango/pango-layout.c (can_break_at): don't special-case start of line and whitespace-following-alphabetic here, because pango_break() already handles that properly * tests/testboundaries.c, tests/Makefile.am, tests/runtests.sh: Add directory for test programs, and a script to run them all * configure.in: Create Makefile in tests * pango/break.c (pango_break): Try for a real implementation of the Unicode text boundary algorithms (pango_get_log_attrs): Allow length to be -1 * pango/pango-context.c (pango_itemize): use pango_item_new(), assert that items added to the list are sane. * pango/pango-layout.c (pango_layout_check_lines): Reimplement to honor the paragraph boundaries from pango_break() * pango/pango-layout.c (process_item): use pango_item_split() here * pango/pango-item.c (pango_item_split): New function to split an item into two items
author: Havoc Pennington <hp@pobox.com> 2000-12-02 07:49:56 +0000
committer: Havoc Pennington <hp@src.gnome.org> 2000-12-02 07:49:56 +0000
commit: 31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525 (patch)
tree: d7ed3aa9ac35017fe03d954dd6baa2ccfaf3ed30 /tests/testboundaries.c
parent: e9e84a3f75fbab073ce5488c0e82b3e7fc39bcda (diff)
download: pango-31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525.tar.gz
1 files changed, 356 insertions, 0 deletions
diff --git a/tests/testboundaries.c b/tests/testboundaries.c
new file mode 100644
index 00000000..c05bc837
--- /dev/null
+++ b/tests/testboundaries.c
@@ -0,0 +1,356 @@
+/* Pango
+ * testboundaries.c: Test text boundary algorithms
+ *
+ * Copyright (C) 1999-2000 Red Hat Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <pango/pango.h>
+
+#define CHFORMAT "%0#6x"
+
+/* FIXME for now this just tests that the breaking of some sample
+ * text conforms to certain rules and invariants. But eventually
+ * we should also have test-result pairs, i.e. a string and some
+ * encoding of the correct way to break the string, to check
+ * more precisely that things worked
+ */
+
+
+/* Keep these in sync with the same macros in break.c */
+
+#define LEADING_JAMO(wc)  ((wc) >= 0x1100 && (wc) <= 0x115F)
+#define VOWEL_JAMO(wc)    ((wc) >= 0x1160 && (wc) <= 0x11A2)
+#define TRAILING_JAMO(wc) ((wc) >= 0x11A8 && (wc) <= 0x11F9)
+#define JAMO(wc)          ((wc) >= 0x1100 && (wc) <= 0x11FF)
+/* "virama script" is just an optimization; it includes a bunch of
+ * scripts without viramas in them
+ */
+#define VIRAMA_SCRIPT(wc)        ((wc) >= 0x0901 && (wc) <= 0x17FF)
+#define VIRAMA(wc) ((wc) == 0x094D || \
+                    (wc) == 0x09CD || \
+                    (wc) == 0x0A4D || \
+                    (wc) == 0x0ACD || \
+                    (wc) == 0x0B4D || \
+                    (wc) == 0x0BCD || \
+                    (wc) == 0x0C4D || \
+                    (wc) == 0x0CCD || \
+                    (wc) == 0x0D4D || \
+                    (wc) == 0x0DCA || \
+                    (wc) == 0x0E3A || \
+                    (wc) == 0x0F84 || \
+                    (wc) == 0x1039 || \
+                    (wc) == 0x17D2)
+/* Types of Japanese characters */
+#define JAPANESE(wc) ((wc) >= 0x2F00 && (wc) <= 0x30FF)
+#define KANJI(wc)    ((wc) >= 0x2F00 && (wc) <= 0x2FDF)
+#define HIRAGANA(wc) ((wc) >= 0x3040 && (wc) <= 0x309F)
+#define KATAKANA(wc) ((wc) >= 0x30A0 && (wc) <= 0x30FF)
+
+static int offset = 0;
+static int line = 0;
+static gunichar current_wc = 0;
+static const char *line_start = NULL;
+static const char *line_end = NULL;
+
+static void
+fail (const char *format,
+      ...)
+{
+  char *str;
+  char *line_text;
+  
+  va_list args;
+
+  va_start (args, format);
+  str = g_strdup_vprintf (format, args);
+  va_end (args);
+
+  line_text = g_strndup (line_start, line_end - line_start);
+  
+  fprintf (stderr, "line %d offset %d char is " CHFORMAT ": %s\n (line is '%s')\n", line, offset, current_wc, str, line_text);
+  g_free (str);
+  g_free (line_text);
+
+  exit (1);
+}
+
+typedef void (* CharForeachFunc) (gunichar      wc,
+                                  gunichar      prev_wc,
+                                  gunichar      next_wc,
+                                  GUnicodeType  type,
+                                  GUnicodeType  prev_type,
+                                  GUnicodeType  next_type,
+                                  PangoLogAttr *attr,
+                                  PangoLogAttr *prev_attr,
+                                  PangoLogAttr *next_attr,
+                                  gpointer      data);
+
+static void
+log_attr_foreach (const char     *text,
+                  PangoLogAttr   *attrs,
+                  CharForeachFunc func,
+                  gpointer        data)
+{
+  const gchar *next = text;
+  gint length = strlen (text);
+  const gchar *end = text + length;
+  gint i = 0;
+  gunichar prev_wc;
+  gunichar next_wc;
+  GUnicodeType prev_type;
+  GUnicodeType next_type;
+  
+  if (next == end)
+    return;
+
+  offset = 0;
+  line = 0;
+  
+  prev_type = (GUnicodeType) -1;
+  prev_wc = 0;
+
+  next_wc = g_utf8_get_char (next);
+  next_type = g_unichar_type (next_wc);
+
+  line_start = text;
+  line_end = text;
+  
+  while (next_wc != 0)
+    {
+      GUnicodeType type;
+      gunichar wc;
+
+      wc = next_wc;
+      type = next_type;
+
+      current_wc = wc;
+      
+      next = g_utf8_next_char (next);
+      line_end = next;
+      
+      if (next >= end)
+        next_wc = 0;
+      else
+        next_wc = g_utf8_get_char (next);
+
+      if (next_wc)
+        next_type = g_unichar_type (next_wc);
+
+      (* func) (wc, prev_wc, next_wc,
+                type, prev_type, next_type,
+                &attrs[i],
+                i != 0 ? &attrs[i-1] : NULL,
+                next_wc != 0 ? &attrs[i+1] : NULL,
+                data);
+            
+      prev_type = type;
+      prev_wc = wc;
+      ++i;
+      ++offset;
+      if (wc == '\n')
+        {
+          ++line;
+          offset = 0;
+          line_start = next;
+          line_end = next;
+        }
+    }
+}
+
+static void
+check_line_char (gunichar      wc,
+                 gunichar      prev_wc,
+                 gunichar      next_wc,
+                 GUnicodeType  type,
+                 GUnicodeType  prev_type,
+                 GUnicodeType  next_type,
+                 PangoLogAttr *attr,
+                 PangoLogAttr *prev_attr,
+                 PangoLogAttr *next_attr,
+                 gpointer      data)
+{
+  GUnicodeBreakType break_type;
+  GUnicodeBreakType prev_break_type;
+
+  break_type = g_unichar_break_type (wc);
+  if (prev_wc)
+    prev_break_type = g_unichar_break_type (prev_wc);
+  else
+    prev_break_type = G_UNICODE_BREAK_UNKNOWN;
+  
+  if (wc == '\n')
+    {
+      if (prev_wc == '\r')
+        {
+          if (attr->is_break)
+            fail ("line break between \\r and \\n");
+        }
+      
+      if (next_attr && !next_attr->is_break)
+        fail ("no line break after \\n");
+    }
+  
+  if (attr->is_break && prev_wc == 0)
+    fail ("first char in string should not be marked as a line break");
+
+  if (break_type == G_UNICODE_BREAK_SPACE)
+    {
+      if (attr->is_break && prev_attr != NULL &&
+          !attr->is_mandatory_break)
+        fail ("can't break lines before a space unless a mandatory break char precedes it; prev char was " CHFORMAT, prev_wc);
+    }
+
+  if (attr->is_mandatory_break && !attr->is_break)
+    fail ("mandatory breaks must also be marked as regular breaks");
+
+  
+  
+  /* FIXME use the break tables from break.c to automatically
+   * check invariants for each cell in the table. Shouldn't
+   * be that hard to do.
+   */
+  
+  if (break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
+      prev_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
+      attr->is_break &&
+      !attr->is_mandatory_break)
+    fail ("can't break between two open punctuation chars");
+
+  if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
+      prev_break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
+      attr->is_break &&
+      !attr->is_mandatory_break)
+    fail ("can't break between two close punctuation chars");
+
+  if (break_type == G_UNICODE_BREAK_QUOTATION &&
+      prev_break_type == G_UNICODE_BREAK_ALPHABETIC &&
+      attr->is_break &&
+      !attr->is_mandatory_break)
+    fail ("can't break letter-quotemark sequence");  
+}
+
+static void
+check_line_invariants (const char   *text,
+                       PangoLogAttr *attrs)
+{
+  log_attr_foreach (text, attrs, check_line_char, NULL);
+}
+
+static void
+check_word_invariants (const char   *text,
+                       PangoLogAttr *attrs)
+{
+
+
+}
+
+static void
+check_sentence_invariants (const char   *text,
+                           PangoLogAttr *attrs)
+{
+
+
+}
+
+static void
+check_grapheme_invariants (const char   *text,
+                           PangoLogAttr *attrs)
+{
+
+
+}
+
+static void
+print_sentences (const char   *text,
+                 PangoLogAttr *attrs)
+{
+  const char *p;
+  const char *last;
+  int i = 0;
+
+  last = text;
+  p = text;
+
+  while (*p)
+    {
+      if (attrs[i].is_sentence_boundary)
+        {
+          char *s = g_strndup (last, p - last);
+          printf ("%s\n", s);
+          g_free (s);
+          last = p;
+        }
+      
+      p = g_utf8_next_char (p);
+      ++i;
+    }
+}
+
+static void
+check_invariants (const char *text)
+{
+  int len;
+  PangoLogAttr *attrs;
+
+  if (!g_utf8_validate (text, -1, NULL))
+    fail ("Invalid UTF-8 in test text");
+  
+  len = g_utf8_strlen (text, -1);
+  attrs = g_new0 (PangoLogAttr, len);
+
+  pango_get_log_attrs (text,
+                       -1,
+                       0,
+                       "C",
+                       attrs);
+  
+  check_line_invariants (text, attrs);
+  check_sentence_invariants (text, attrs);
+  check_grapheme_invariants (text, attrs);
+  check_word_invariants (text, attrs);
+
+#if 0
+  print_sentences (text, attrs);
+#endif
+  
+  g_free (attrs);
+}
+
+int
+main (int    argc,
+      char **argv)
+{
+  gchar *text;
+
+  if (!g_file_get_contents ("boundaries.utf8", &text, NULL, NULL))
+    fail ("Couldn't open sample text file");
+  
+  check_invariants (text);
+
+  g_free (text);
+
+  printf ("testboundaries passed\n");
+  
+  return 0;
+}
+
author	Havoc Pennington <hp@pobox.com>	2000-12-02 07:49:56 +0000
committer	Havoc Pennington <hp@src.gnome.org>	2000-12-02 07:49:56 +0000
commit	31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525 (patch)
tree	d7ed3aa9ac35017fe03d954dd6baa2ccfaf3ed30 /tests/testboundaries.c
parent	e9e84a3f75fbab073ce5488c0e82b3e7fc39bcda (diff)
download	pango-31832c0f4bcdf3e7c69cd5b8a7ad570a7b60d525.tar.gz