summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOwen Taylor <otaylor@redhat.com>2000-03-30 22:13:13 +0000
committerOwen Taylor <otaylor@src.gnome.org>2000-03-30 22:13:13 +0000
commitc1198ebb48f2eaf3e71a5f37b4ceaae16cba84d7 (patch)
tree6aa116dfc63b08175102874c9945fadbb31f6b15
parent5b5698e8ad4fa0f0f5da0286ab34b5ba1fa8ecae (diff)
downloadpango-c1198ebb48f2eaf3e71a5f37b4ceaae16cba84d7.tar.gz
New version of Arabic module from Karl Koehler, supporting:
Thu Mar 30 17:06:39 2000 Owen Taylor <otaylor@redhat.com> * modules/arabic/*.[ch]: New version of Arabic module from Karl Koehler, supporting: - More extensive ligatures - Some Hamza handling - Vowel marks - mule-arabic font - LangBox font - Persian (farsi) (needs testing)
-rw-r--r--ChangeLog12
-rw-r--r--ChangeLog.pre-1-012
-rw-r--r--ChangeLog.pre-1-1012
-rw-r--r--ChangeLog.pre-1-212
-rw-r--r--ChangeLog.pre-1-412
-rw-r--r--ChangeLog.pre-1-612
-rw-r--r--ChangeLog.pre-1-812
-rw-r--r--modules/arabic/Makefile.am5
-rw-r--r--modules/arabic/arabic-x.c456
-rw-r--r--modules/arabic/arabic.c456
-rw-r--r--modules/arabic/arconv.c699
-rw-r--r--modules/arabic/arconv.h20
-rw-r--r--modules/arabic/mulefont.c260
-rw-r--r--modules/arabic/mulefont.h22
14 files changed, 1411 insertions, 591 deletions
diff --git a/ChangeLog b/ChangeLog
index 7a85abff..34bf6fd3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+Thu Mar 30 17:06:39 2000 Owen Taylor <otaylor@redhat.com>
+
+ * modules/arabic/*.[ch]: New version of Arabic module
+ from Karl Koehler, supporting:
+
+ - More extensive ligatures
+ - Some Hamza handling
+ - Vowel marks
+ - mule-arabic font
+ - LangBox font
+ - Persian (farsi) (needs testing)
+
Thu Mar 30 16:49:06 2000 Owen Taylor <otaylor@redhat.com>
* modules/basic/basic.c pango/utils.c (find_converter): Use
diff --git a/ChangeLog.pre-1-0 b/ChangeLog.pre-1-0
index 7a85abff..34bf6fd3 100644
--- a/ChangeLog.pre-1-0
+++ b/ChangeLog.pre-1-0
@@ -1,3 +1,15 @@
+Thu Mar 30 17:06:39 2000 Owen Taylor <otaylor@redhat.com>
+
+ * modules/arabic/*.[ch]: New version of Arabic module
+ from Karl Koehler, supporting:
+
+ - More extensive ligatures
+ - Some Hamza handling
+ - Vowel marks
+ - mule-arabic font
+ - LangBox font
+ - Persian (farsi) (needs testing)
+
Thu Mar 30 16:49:06 2000 Owen Taylor <otaylor@redhat.com>
* modules/basic/basic.c pango/utils.c (find_converter): Use
diff --git a/ChangeLog.pre-1-10 b/ChangeLog.pre-1-10
index 7a85abff..34bf6fd3 100644
--- a/ChangeLog.pre-1-10
+++ b/ChangeLog.pre-1-10
@@ -1,3 +1,15 @@
+Thu Mar 30 17:06:39 2000 Owen Taylor <otaylor@redhat.com>
+
+ * modules/arabic/*.[ch]: New version of Arabic module
+ from Karl Koehler, supporting:
+
+ - More extensive ligatures
+ - Some Hamza handling
+ - Vowel marks
+ - mule-arabic font
+ - LangBox font
+ - Persian (farsi) (needs testing)
+
Thu Mar 30 16:49:06 2000 Owen Taylor <otaylor@redhat.com>
* modules/basic/basic.c pango/utils.c (find_converter): Use
diff --git a/ChangeLog.pre-1-2 b/ChangeLog.pre-1-2
index 7a85abff..34bf6fd3 100644
--- a/ChangeLog.pre-1-2
+++ b/ChangeLog.pre-1-2
@@ -1,3 +1,15 @@
+Thu Mar 30 17:06:39 2000 Owen Taylor <otaylor@redhat.com>
+
+ * modules/arabic/*.[ch]: New version of Arabic module
+ from Karl Koehler, supporting:
+
+ - More extensive ligatures
+ - Some Hamza handling
+ - Vowel marks
+ - mule-arabic font
+ - LangBox font
+ - Persian (farsi) (needs testing)
+
Thu Mar 30 16:49:06 2000 Owen Taylor <otaylor@redhat.com>
* modules/basic/basic.c pango/utils.c (find_converter): Use
diff --git a/ChangeLog.pre-1-4 b/ChangeLog.pre-1-4
index 7a85abff..34bf6fd3 100644
--- a/ChangeLog.pre-1-4
+++ b/ChangeLog.pre-1-4
@@ -1,3 +1,15 @@
+Thu Mar 30 17:06:39 2000 Owen Taylor <otaylor@redhat.com>
+
+ * modules/arabic/*.[ch]: New version of Arabic module
+ from Karl Koehler, supporting:
+
+ - More extensive ligatures
+ - Some Hamza handling
+ - Vowel marks
+ - mule-arabic font
+ - LangBox font
+ - Persian (farsi) (needs testing)
+
Thu Mar 30 16:49:06 2000 Owen Taylor <otaylor@redhat.com>
* modules/basic/basic.c pango/utils.c (find_converter): Use
diff --git a/ChangeLog.pre-1-6 b/ChangeLog.pre-1-6
index 7a85abff..34bf6fd3 100644
--- a/ChangeLog.pre-1-6
+++ b/ChangeLog.pre-1-6
@@ -1,3 +1,15 @@
+Thu Mar 30 17:06:39 2000 Owen Taylor <otaylor@redhat.com>
+
+ * modules/arabic/*.[ch]: New version of Arabic module
+ from Karl Koehler, supporting:
+
+ - More extensive ligatures
+ - Some Hamza handling
+ - Vowel marks
+ - mule-arabic font
+ - LangBox font
+ - Persian (farsi) (needs testing)
+
Thu Mar 30 16:49:06 2000 Owen Taylor <otaylor@redhat.com>
* modules/basic/basic.c pango/utils.c (find_converter): Use
diff --git a/ChangeLog.pre-1-8 b/ChangeLog.pre-1-8
index 7a85abff..34bf6fd3 100644
--- a/ChangeLog.pre-1-8
+++ b/ChangeLog.pre-1-8
@@ -1,3 +1,15 @@
+Thu Mar 30 17:06:39 2000 Owen Taylor <otaylor@redhat.com>
+
+ * modules/arabic/*.[ch]: New version of Arabic module
+ from Karl Koehler, supporting:
+
+ - More extensive ligatures
+ - Some Hamza handling
+ - Vowel marks
+ - mule-arabic font
+ - LangBox font
+ - Persian (farsi) (needs testing)
+
Thu Mar 30 16:49:06 2000 Owen Taylor <otaylor@redhat.com>
* modules/basic/basic.c pango/utils.c (find_converter): Use
diff --git a/modules/arabic/Makefile.am b/modules/arabic/Makefile.am
index 951a3247..c614ae83 100644
--- a/modules/arabic/Makefile.am
+++ b/modules/arabic/Makefile.am
@@ -1,12 +1,15 @@
## Process this file with automake to create Makefile.in.
-noinst_LTLIBRARIES = pango-arabic.la
+moduledir = $(libdir)/pango/modules
+module_LTLIBRARIES = pango-arabic.la
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/pango/
pango_arabic_la_SOURCES = \
arabic.c \
arconv.c \
+ mulefont.c \
+ mulefont.h \
arconv.h
pango_arabic_la_LDFLAGS = -rpath $(libdir) -export-dynamic -avoid-version -module
diff --git a/modules/arabic/arabic-x.c b/modules/arabic/arabic-x.c
index 18ee4ce1..805c7bba 100644
--- a/modules/arabic/arabic-x.c
+++ b/modules/arabic/arabic-x.c
@@ -1,7 +1,8 @@
/* Pango - Arabic module
* arabic module
*
- * (C) 2000 K. Koehler <koehler@or.uni-bonn.de>
+ * (C) 2000 Karl Koehler<koehler@or.uni-bonn.de>
+ * Owen Taylor <otayler@redhat.com>
*
*/
@@ -13,28 +14,31 @@
#include <unicode.h>
#include "arconv.h"
+#include "mulefont.h"
+
+/* #define DEBUG */
+#ifdef DEBUG
+#include <stdio.h>
+#endif
-/*
-** I hope thing's work easily ...
-*/
static PangoEngineRange arabic_range[] = {
- { 0x060B, 0x067F, "*" },
+ { 0x060B, 0x06D3, "*" } /* 0x060B, 0x06D3 */
};
static PangoEngineInfo script_engines[] = {
- {
- "ArabicScriptEngineLang",
- PANGO_ENGINE_TYPE_LANG,
- PANGO_RENDER_TYPE_NONE,
- arabic_range, G_N_ELEMENTS(arabic_range)
- },
- {
- "ArabicScriptEngineX",
- PANGO_ENGINE_TYPE_SHAPE,
- PANGO_RENDER_TYPE_X,
- arabic_range, G_N_ELEMENTS(arabic_range)
- }
+ {
+ "ArabicScriptEngineLang",
+ PANGO_ENGINE_TYPE_LANG,
+ PANGO_RENDER_TYPE_NONE,
+ arabic_range, G_N_ELEMENTS(arabic_range)
+ },
+ {
+ "ArabicScriptEngineX",
+ PANGO_ENGINE_TYPE_SHAPE,
+ PANGO_RENDER_TYPE_X,
+ arabic_range, G_N_ELEMENTS(arabic_range)
+ }
};
static gint n_script_engines = G_N_ELEMENTS (script_engines);
@@ -47,79 +51,109 @@ static gint n_script_engines = G_N_ELEMENTS (script_engines);
*/
static void
-arabic_engine_break (const char *text,
+arabic_engine_break (const char *text,
int len,
PangoAnalysis *analysis,
PangoLogAttr *attrs)
{
- /* Most of the code comes from tamil_engine_break
- * only difference is char stop based on modifiers
- */
-
- const char *cur = text;
- const char *next;
- gint i = 0;
- GUChar4 wc;
-
- while (*cur)
- {
- if (!_pango_utf8_iterate (cur, &next, &wc))
- return;
- if (cur == next)
- break;
- if ((next - text) > len)
- break;
- cur = next;
-
- attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
- attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
- attrs[i].is_char_stop = 1;
- attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
-
- i++;
- }
+ /* Most of the code comes from tamil_engine_break
+ * only difference is char stop based on modifiers
+ */
+
+ const char *cur = text;
+ const char *next;
+ gint i = 0;
+ GUChar4 wc;
+
+ while (*cur)
+ {
+ if (!_pango_utf8_iterate (cur, &next, &wc))
+ return;
+ if (cur == next)
+ break;
+ if ((next - text) > len)
+ break;
+ cur = next;
+
+ attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
+ attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
+ attrs[i].is_char_stop = 1;
+ attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
+ /* actually, is_word_stop in not correct, but simple and good enough. */
+ i++;
+ }
}
static PangoEngine *
arabic_engine_lang_new ()
{
- PangoEngineLang *result;
+ PangoEngineLang *result;
- result = g_new (PangoEngineLang, 1);
+ result = g_new (PangoEngineLang, 1);
- result->engine.id = "ArabicScriptEngine";
- result->engine.type = PANGO_ENGINE_TYPE_LANG;
- result->engine.length = sizeof (result);
- result->script_break = arabic_engine_break;
+ result->engine.id = "ArabicScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_break = arabic_engine_break;
- return (PangoEngine *)result;
+ return (PangoEngine *)result;
}
/*
* X window system script engine portion
*/
-static PangoXSubfont
-find_unic_font (PangoFont *font,char* charsets[])
+static int
+find_unic_font (PangoFont *font,char* charsets[],PangoXSubfont* rfonts)
{
- PangoXSubfont *subfonts;
- int *subfont_charsets;
- int n_subfonts;
- PangoXSubfont result = 0;
-
- n_subfonts = pango_x_list_subfonts (font, charsets, 1, &subfonts, &subfont_charsets);
-
- if (n_subfonts > 0)
- result = subfonts[0];
-
- g_free (subfonts);
- g_free (subfont_charsets);
-
- return result;
+ PangoXSubfont *subfonts;
+ int *subfont_charsets;
+ int n_subfonts;
+ int i;
+ int result = 0;
+
+ n_subfonts = pango_x_list_subfonts (font, charsets, 2,
+ &subfonts, &subfont_charsets);
+
+ for (i=0; i < n_subfonts; i++)
+ {
+ if ( (strcmp (charsets[subfont_charsets[i]], "mulearabic-2") == 0)
+ && arabic_muleinit(font,rfonts) )
+ {
+ result = 1; /* we know we have a mulearabic-font ... */
+ break;
+ }
+ else
+ { /* test if the font has Alif-Madda; if so assume it is ok */
+ if ( pango_x_has_glyph /* Alif-Madda */
+ (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFE81)))
+ {
+ rfonts[0] = subfonts[i];
+ result = 2;
+ }
+ if ( pango_x_has_glyph /* Shadda+Kasra */
+ (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC62)))
+ {
+ result = 3; /* extra vowels in font, hopefully */
+ }
+ if ( pango_x_has_glyph /* Lam-Min alone */
+ (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC42)))
+ {
+ result = 4; /* extra ligatures in font, hopefully */
+ }
+ break;
+ }
+ }
+
+ g_free (subfonts);
+ g_free (subfont_charsets);
+
+ return result;
}
static char *default_charset[] = {
- "iso10646-1"
+ "iso10646-1",
+ "mulearabic-2"
};
@@ -127,25 +161,36 @@ static char *default_charset[] = {
static void
set_glyph (PangoGlyphString *glyphs,
PangoFont *font, PangoXSubfont subfont,
- int i, int cluster_start, int glyph)
+ int i, int cluster_start, int glyph)
{
- PangoRectangle logical_rect;
+ PangoRectangle logical_rect;
+#ifdef DEBUG
+ if ( i < 0){
+ fprintf(stderr,"[ar] setglyph: setting glyph %x at index %i, cluster %i\n",
+ glyph,i,cluster_start);
+ raise(6);
+ } else {
+ fprintf(stderr,"[ar] setglyph: setting glyph %x at index %i, "
+ "cluster %i ( subfont %x )\n",
+ glyph,i,cluster_start,subfont);
+ }
+#endif
- glyphs->glyphs[i].glyph = PANGO_X_MAKE_GLYPH (subfont, glyph);
+ glyphs->glyphs[i].glyph = PANGO_X_MAKE_GLYPH (subfont, glyph);
- glyphs->glyphs[i].geometry.x_offset = 0;
- glyphs->glyphs[i].geometry.y_offset = 0;
-
- pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph, NULL, &logical_rect);
- glyphs->log_clusters[i] = cluster_start;
- if (arabic_isvowel(glyph))
- {
- glyphs->glyphs[i].geometry.width = 0;
- }
- else
- {
- glyphs->glyphs[i].geometry.width = logical_rect.width;
- }
+ glyphs->glyphs[i].geometry.x_offset = 0;
+ glyphs->glyphs[i].geometry.y_offset = 0;
+
+ pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph, NULL, &logical_rect);
+ glyphs->log_clusters[i] = cluster_start;
+ if (arabic_isvowel(glyph))
+ {
+ glyphs->glyphs[i].geometry.width = 0;
+ }
+ else
+ {
+ glyphs->glyphs[i].geometry.width = logical_rect.width;
+ }
}
@@ -158,106 +203,115 @@ arabic_engine_shape (PangoFont *font,
PangoAnalysis *analysis,
PangoGlyphString *glyphs)
{
- PangoXSubfont subfont;
-
- int n_chars, n_glyph;
- int i;
- const char *p;
- const char *pold;
- const char *next;
- GUChar4 *wc;
-
- g_return_if_fail (font != NULL);
- g_return_if_fail (text != NULL);
- g_return_if_fail (length >= 0);
- g_return_if_fail (analysis != NULL);
-
- /* We assume we have an unicode-font like 10x20 which containes
- ** the needed chars
- */
+ PangoXSubfont subfont;
+ PangoXSubfont arfonts[3];
+
+
+ int n_chars, n_glyph;
+ int i;
+ const char *p;
+ const char *pold;
+ const char *next;
+ GUChar4 *wc;
+ int lvl = 1;
+
+ g_return_if_fail (font != NULL);
+ g_return_if_fail (text != NULL);
+ g_return_if_fail (length >= 0);
+ g_return_if_fail (analysis != NULL);
+
+ /* We assume we have an unicode-font like 10x20 which containes
+ ** the needed chars -- or tree mulearabic-coded fonts ...
+ */
- n_chars = n_glyph = unicode_strlen (text, length);
-
- if (!(subfont = find_unic_font (font, default_charset)))
- {
- PangoGlyph unknown_glyph = pango_x_get_unknown_glyph (font);
+ n_chars = n_glyph = unicode_strlen (text, length);
+
+ if (!(lvl = find_unic_font (font, default_charset,arfonts)))
+ {
+
+ PangoGlyph unknown_glyph = pango_x_get_unknown_glyph (font);
+
+ pango_glyph_string_set_size (glyphs, n_chars);
+
+ p = text;
+ for (i=0; i<n_chars; i++)
+ {
+ set_glyph (glyphs, font, arfonts[0], i,
+ p - text, unknown_glyph);
+ p = unicode_next_utf8 (p);
+ }
+ return;
+ }
+ subfont = arfonts[0];
+
+ if (analysis->level % 2 == 0)
+ {
+ /* We were called on a LTR directional run (e.g. some numbers);
+ fallback as simple as possible */
+
+ pango_glyph_string_set_size (glyphs, n_chars);
+
+ p = text;
+ for (i=0; i < n_chars; i++)
+ {
+ GUChar4 tmp_char;
- pango_glyph_string_set_size (glyphs, n_chars);
-
- p = text;
- for (i=0; i<n_chars; i++)
- {
- set_glyph (glyphs, font, subfont, i, p - text, unknown_glyph);
- p = unicode_next_utf8 (p);
- }
-
- return;
- }
-
- if (analysis->level % 2 == 0)
- {
- /* We were called on a LTR directional run (e.g. some numbers);
- fallback as simply as possible */
-
- pango_glyph_string_set_size (glyphs, n_chars);
-
- p = text;
- for (i=0; i < n_chars; i++)
- {
- GUChar4 tmp_char;
-
- _pango_utf8_iterate (p, &next, &tmp_char);
- set_glyph (glyphs, font, subfont, i, p - text, tmp_char);
- p = next;
- }
-
- return;
- }
-
- wc = (GUChar4 *)g_malloc(sizeof(GUChar4)*n_chars);
-
- p = text;
- for (i=0; i < n_chars; i++)
- {
- _pango_utf8_iterate (p, &next, &wc[n_chars - i - 1]);
- p = next;
- }
+ _pango_utf8_iterate (p, &next, &tmp_char);
+ set_glyph (glyphs, font, subfont, i, p - text, tmp_char);
+ p = next;
+ }
+
+ return;
+ }
+
+ wc = (GUChar4 *)g_malloc(sizeof(GUChar4)*n_chars);
+
+ p = text;
+ for (i=0; i < n_chars; i++)
+ {
+ _pango_utf8_iterate (p, &next, &wc[n_chars - i - 1]);
+ p = next;
+ }
- reshape(&n_glyph,wc);
-/* raise(2); */
-
- pango_glyph_string_set_size (glyphs, n_glyph);
-
- p = text;
- pold = p;
- i = n_chars-1;
- while(i >= 0)
- {
- if (wc[i] == 0)
- {
- p = unicode_next_utf8 (p);
- i--;
- }
- else
- {
- set_glyph (glyphs, font, subfont, n_glyph - 1, p - text, wc[i]);
- if ( arabic_isvowel(wc[i]))
- {
- glyphs->log_clusters[n_glyph-1] = pold - text;
- }
+ arabic_reshape(&n_glyph,wc,lvl);
+
+ pango_glyph_string_set_size (glyphs, n_glyph);
+
+ p = text;
+ pold = p;
+ i = n_chars-1;
+ while(i >= 0)
+ {
+ if (wc[i] == 0)
+ {
+ p = unicode_next_utf8 (p);
+ i--;
+ }
+ else
+ {
+ int cluster_start = arabic_isvowel (wc[i])
+ ? pold - text : p - text;
+ if ( lvl == 1 )
+ {
+#ifdef DEBUG
+ fprintf(stderr,"[ar] mule-recoding char %x",
+ wc[i]);
+#endif
+ arabic_mule_recode(&subfont,&(wc[i]),arfonts);
+ }
+
+ set_glyph(glyphs, font, subfont, n_glyph - 1,
+ cluster_start, wc[i]);
- pold = p;
- p = unicode_next_utf8 (p);
- n_glyph--;
- i--;
- };
- }
-
-/* if ((i != 0)||(n_glyph-1 != 0)){ */
-/* printf(" i= %x , n_glyph = %x "); */
-/* }; */
- g_free(wc);
+ pold = p;
+ p = unicode_next_utf8 (p);
+ n_glyph--;
+ i--;
+ }
+ }
+
+ g_free(wc);
}
@@ -265,29 +319,31 @@ static PangoCoverage *
arabic_engine_get_coverage (PangoFont *font,
const char *lang)
{
- GUChar4 i;
- PangoCoverage *result = pango_coverage_new ();
+ GUChar4 i;
+ PangoCoverage *result = pango_coverage_new ();
- for (i = 0x60B; i <= 0x67E; i++)
- pango_coverage_set (result, i, PANGO_COVERAGE_EXACT);
+ for (i = 0x60B; i <= 0x66D; i++)
+ pango_coverage_set (result, i, PANGO_COVERAGE_EXACT);
+ for (i = 0x670; i <= 0x6D3; i++)
+ pango_coverage_set (result, i, PANGO_COVERAGE_EXACT);
- return result;
+ return result;
}
static PangoEngine *
arabic_engine_x_new ()
{
- PangoEngineShape *result;
+ PangoEngineShape *result;
- result = g_new (PangoEngineShape, 1);
+ result = g_new (PangoEngineShape, 1);
- result->engine.id = "ArabicScriptEngine";
- result->engine.type = PANGO_ENGINE_TYPE_LANG;
- result->engine.length = sizeof (result);
- result->script_shape = arabic_engine_shape;
- result->get_coverage = arabic_engine_get_coverage;
+ result->engine.id = "ArabicScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_shape = arabic_engine_shape;
+ result->get_coverage = arabic_engine_get_coverage;
- return (PangoEngine *)result;
+ return (PangoEngine *)result;
}
@@ -300,19 +356,19 @@ arabic_engine_x_new ()
void
script_engine_list (PangoEngineInfo **engines, int *n_engines)
{
- *engines = script_engines;
- *n_engines = n_script_engines;
+ *engines = script_engines;
+ *n_engines = n_script_engines;
}
PangoEngine *
script_engine_load (const char *id)
{
- if (!strcmp (id, "ArabicScriptEngineLang"))
- return arabic_engine_lang_new ();
- else if (!strcmp (id, "ArabicScriptEngineX"))
- return arabic_engine_x_new ();
- else
- return NULL;
+ if (!strcmp (id, "ArabicScriptEngineLang"))
+ return arabic_engine_lang_new ();
+ else if (!strcmp (id, "ArabicScriptEngineX"))
+ return arabic_engine_x_new ();
+ else
+ return NULL;
}
void
diff --git a/modules/arabic/arabic.c b/modules/arabic/arabic.c
index 18ee4ce1..805c7bba 100644
--- a/modules/arabic/arabic.c
+++ b/modules/arabic/arabic.c
@@ -1,7 +1,8 @@
/* Pango - Arabic module
* arabic module
*
- * (C) 2000 K. Koehler <koehler@or.uni-bonn.de>
+ * (C) 2000 Karl Koehler<koehler@or.uni-bonn.de>
+ * Owen Taylor <otayler@redhat.com>
*
*/
@@ -13,28 +14,31 @@
#include <unicode.h>
#include "arconv.h"
+#include "mulefont.h"
+
+/* #define DEBUG */
+#ifdef DEBUG
+#include <stdio.h>
+#endif
-/*
-** I hope thing's work easily ...
-*/
static PangoEngineRange arabic_range[] = {
- { 0x060B, 0x067F, "*" },
+ { 0x060B, 0x06D3, "*" } /* 0x060B, 0x06D3 */
};
static PangoEngineInfo script_engines[] = {
- {
- "ArabicScriptEngineLang",
- PANGO_ENGINE_TYPE_LANG,
- PANGO_RENDER_TYPE_NONE,
- arabic_range, G_N_ELEMENTS(arabic_range)
- },
- {
- "ArabicScriptEngineX",
- PANGO_ENGINE_TYPE_SHAPE,
- PANGO_RENDER_TYPE_X,
- arabic_range, G_N_ELEMENTS(arabic_range)
- }
+ {
+ "ArabicScriptEngineLang",
+ PANGO_ENGINE_TYPE_LANG,
+ PANGO_RENDER_TYPE_NONE,
+ arabic_range, G_N_ELEMENTS(arabic_range)
+ },
+ {
+ "ArabicScriptEngineX",
+ PANGO_ENGINE_TYPE_SHAPE,
+ PANGO_RENDER_TYPE_X,
+ arabic_range, G_N_ELEMENTS(arabic_range)
+ }
};
static gint n_script_engines = G_N_ELEMENTS (script_engines);
@@ -47,79 +51,109 @@ static gint n_script_engines = G_N_ELEMENTS (script_engines);
*/
static void
-arabic_engine_break (const char *text,
+arabic_engine_break (const char *text,
int len,
PangoAnalysis *analysis,
PangoLogAttr *attrs)
{
- /* Most of the code comes from tamil_engine_break
- * only difference is char stop based on modifiers
- */
-
- const char *cur = text;
- const char *next;
- gint i = 0;
- GUChar4 wc;
-
- while (*cur)
- {
- if (!_pango_utf8_iterate (cur, &next, &wc))
- return;
- if (cur == next)
- break;
- if ((next - text) > len)
- break;
- cur = next;
-
- attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
- attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
- attrs[i].is_char_stop = 1;
- attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
-
- i++;
- }
+ /* Most of the code comes from tamil_engine_break
+ * only difference is char stop based on modifiers
+ */
+
+ const char *cur = text;
+ const char *next;
+ gint i = 0;
+ GUChar4 wc;
+
+ while (*cur)
+ {
+ if (!_pango_utf8_iterate (cur, &next, &wc))
+ return;
+ if (cur == next)
+ break;
+ if ((next - text) > len)
+ break;
+ cur = next;
+
+ attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
+ attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
+ attrs[i].is_char_stop = 1;
+ attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
+ /* actually, is_word_stop in not correct, but simple and good enough. */
+ i++;
+ }
}
static PangoEngine *
arabic_engine_lang_new ()
{
- PangoEngineLang *result;
+ PangoEngineLang *result;
- result = g_new (PangoEngineLang, 1);
+ result = g_new (PangoEngineLang, 1);
- result->engine.id = "ArabicScriptEngine";
- result->engine.type = PANGO_ENGINE_TYPE_LANG;
- result->engine.length = sizeof (result);
- result->script_break = arabic_engine_break;
+ result->engine.id = "ArabicScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_break = arabic_engine_break;
- return (PangoEngine *)result;
+ return (PangoEngine *)result;
}
/*
* X window system script engine portion
*/
-static PangoXSubfont
-find_unic_font (PangoFont *font,char* charsets[])
+static int
+find_unic_font (PangoFont *font,char* charsets[],PangoXSubfont* rfonts)
{
- PangoXSubfont *subfonts;
- int *subfont_charsets;
- int n_subfonts;
- PangoXSubfont result = 0;
-
- n_subfonts = pango_x_list_subfonts (font, charsets, 1, &subfonts, &subfont_charsets);
-
- if (n_subfonts > 0)
- result = subfonts[0];
-
- g_free (subfonts);
- g_free (subfont_charsets);
-
- return result;
+ PangoXSubfont *subfonts;
+ int *subfont_charsets;
+ int n_subfonts;
+ int i;
+ int result = 0;
+
+ n_subfonts = pango_x_list_subfonts (font, charsets, 2,
+ &subfonts, &subfont_charsets);
+
+ for (i=0; i < n_subfonts; i++)
+ {
+ if ( (strcmp (charsets[subfont_charsets[i]], "mulearabic-2") == 0)
+ && arabic_muleinit(font,rfonts) )
+ {
+ result = 1; /* we know we have a mulearabic-font ... */
+ break;
+ }
+ else
+ { /* test if the font has Alif-Madda; if so assume it is ok */
+ if ( pango_x_has_glyph /* Alif-Madda */
+ (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFE81)))
+ {
+ rfonts[0] = subfonts[i];
+ result = 2;
+ }
+ if ( pango_x_has_glyph /* Shadda+Kasra */
+ (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC62)))
+ {
+ result = 3; /* extra vowels in font, hopefully */
+ }
+ if ( pango_x_has_glyph /* Lam-Min alone */
+ (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC42)))
+ {
+ result = 4; /* extra ligatures in font, hopefully */
+ }
+ break;
+ }
+ }
+
+ g_free (subfonts);
+ g_free (subfont_charsets);
+
+ return result;
}
static char *default_charset[] = {
- "iso10646-1"
+ "iso10646-1",
+ "mulearabic-2"
};
@@ -127,25 +161,36 @@ static char *default_charset[] = {
static void
set_glyph (PangoGlyphString *glyphs,
PangoFont *font, PangoXSubfont subfont,
- int i, int cluster_start, int glyph)
+ int i, int cluster_start, int glyph)
{
- PangoRectangle logical_rect;
+ PangoRectangle logical_rect;
+#ifdef DEBUG
+ if ( i < 0){
+ fprintf(stderr,"[ar] setglyph: setting glyph %x at index %i, cluster %i\n",
+ glyph,i,cluster_start);
+ raise(6);
+ } else {
+ fprintf(stderr,"[ar] setglyph: setting glyph %x at index %i, "
+ "cluster %i ( subfont %x )\n",
+ glyph,i,cluster_start,subfont);
+ }
+#endif
- glyphs->glyphs[i].glyph = PANGO_X_MAKE_GLYPH (subfont, glyph);
+ glyphs->glyphs[i].glyph = PANGO_X_MAKE_GLYPH (subfont, glyph);
- glyphs->glyphs[i].geometry.x_offset = 0;
- glyphs->glyphs[i].geometry.y_offset = 0;
-
- pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph, NULL, &logical_rect);
- glyphs->log_clusters[i] = cluster_start;
- if (arabic_isvowel(glyph))
- {
- glyphs->glyphs[i].geometry.width = 0;
- }
- else
- {
- glyphs->glyphs[i].geometry.width = logical_rect.width;
- }
+ glyphs->glyphs[i].geometry.x_offset = 0;
+ glyphs->glyphs[i].geometry.y_offset = 0;
+
+ pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph, NULL, &logical_rect);
+ glyphs->log_clusters[i] = cluster_start;
+ if (arabic_isvowel(glyph))
+ {
+ glyphs->glyphs[i].geometry.width = 0;
+ }
+ else
+ {
+ glyphs->glyphs[i].geometry.width = logical_rect.width;
+ }
}
@@ -158,106 +203,115 @@ arabic_engine_shape (PangoFont *font,
PangoAnalysis *analysis,
PangoGlyphString *glyphs)
{
- PangoXSubfont subfont;
-
- int n_chars, n_glyph;
- int i;
- const char *p;
- const char *pold;
- const char *next;
- GUChar4 *wc;
-
- g_return_if_fail (font != NULL);
- g_return_if_fail (text != NULL);
- g_return_if_fail (length >= 0);
- g_return_if_fail (analysis != NULL);
-
- /* We assume we have an unicode-font like 10x20 which containes
- ** the needed chars
- */
+ PangoXSubfont subfont;
+ PangoXSubfont arfonts[3];
+
+
+ int n_chars, n_glyph;
+ int i;
+ const char *p;
+ const char *pold;
+ const char *next;
+ GUChar4 *wc;
+ int lvl = 1;
+
+ g_return_if_fail (font != NULL);
+ g_return_if_fail (text != NULL);
+ g_return_if_fail (length >= 0);
+ g_return_if_fail (analysis != NULL);
+
+ /* We assume we have an unicode-font like 10x20 which containes
+ ** the needed chars -- or tree mulearabic-coded fonts ...
+ */
- n_chars = n_glyph = unicode_strlen (text, length);
-
- if (!(subfont = find_unic_font (font, default_charset)))
- {
- PangoGlyph unknown_glyph = pango_x_get_unknown_glyph (font);
+ n_chars = n_glyph = unicode_strlen (text, length);
+
+ if (!(lvl = find_unic_font (font, default_charset,arfonts)))
+ {
+
+ PangoGlyph unknown_glyph = pango_x_get_unknown_glyph (font);
+
+ pango_glyph_string_set_size (glyphs, n_chars);
+
+ p = text;
+ for (i=0; i<n_chars; i++)
+ {
+ set_glyph (glyphs, font, arfonts[0], i,
+ p - text, unknown_glyph);
+ p = unicode_next_utf8 (p);
+ }
+ return;
+ }
+ subfont = arfonts[0];
+
+ if (analysis->level % 2 == 0)
+ {
+ /* We were called on a LTR directional run (e.g. some numbers);
+ fallback as simple as possible */
+
+ pango_glyph_string_set_size (glyphs, n_chars);
+
+ p = text;
+ for (i=0; i < n_chars; i++)
+ {
+ GUChar4 tmp_char;
- pango_glyph_string_set_size (glyphs, n_chars);
-
- p = text;
- for (i=0; i<n_chars; i++)
- {
- set_glyph (glyphs, font, subfont, i, p - text, unknown_glyph);
- p = unicode_next_utf8 (p);
- }
-
- return;
- }
-
- if (analysis->level % 2 == 0)
- {
- /* We were called on a LTR directional run (e.g. some numbers);
- fallback as simply as possible */
-
- pango_glyph_string_set_size (glyphs, n_chars);
-
- p = text;
- for (i=0; i < n_chars; i++)
- {
- GUChar4 tmp_char;
-
- _pango_utf8_iterate (p, &next, &tmp_char);
- set_glyph (glyphs, font, subfont, i, p - text, tmp_char);
- p = next;
- }
-
- return;
- }
-
- wc = (GUChar4 *)g_malloc(sizeof(GUChar4)*n_chars);
-
- p = text;
- for (i=0; i < n_chars; i++)
- {
- _pango_utf8_iterate (p, &next, &wc[n_chars - i - 1]);
- p = next;
- }
+ _pango_utf8_iterate (p, &next, &tmp_char);
+ set_glyph (glyphs, font, subfont, i, p - text, tmp_char);
+ p = next;
+ }
+
+ return;
+ }
+
+ wc = (GUChar4 *)g_malloc(sizeof(GUChar4)*n_chars);
+
+ p = text;
+ for (i=0; i < n_chars; i++)
+ {
+ _pango_utf8_iterate (p, &next, &wc[n_chars - i - 1]);
+ p = next;
+ }
- reshape(&n_glyph,wc);
-/* raise(2); */
-
- pango_glyph_string_set_size (glyphs, n_glyph);
-
- p = text;
- pold = p;
- i = n_chars-1;
- while(i >= 0)
- {
- if (wc[i] == 0)
- {
- p = unicode_next_utf8 (p);
- i--;
- }
- else
- {
- set_glyph (glyphs, font, subfont, n_glyph - 1, p - text, wc[i]);
- if ( arabic_isvowel(wc[i]))
- {
- glyphs->log_clusters[n_glyph-1] = pold - text;
- }
+ arabic_reshape(&n_glyph,wc,lvl);
+
+ pango_glyph_string_set_size (glyphs, n_glyph);
+
+ p = text;
+ pold = p;
+ i = n_chars-1;
+ while(i >= 0)
+ {
+ if (wc[i] == 0)
+ {
+ p = unicode_next_utf8 (p);
+ i--;
+ }
+ else
+ {
+ int cluster_start = arabic_isvowel (wc[i])
+ ? pold - text : p - text;
+ if ( lvl == 1 )
+ {
+#ifdef DEBUG
+ fprintf(stderr,"[ar] mule-recoding char %x",
+ wc[i]);
+#endif
+ arabic_mule_recode(&subfont,&(wc[i]),arfonts);
+ }
+
+ set_glyph(glyphs, font, subfont, n_glyph - 1,
+ cluster_start, wc[i]);
- pold = p;
- p = unicode_next_utf8 (p);
- n_glyph--;
- i--;
- };
- }
-
-/* if ((i != 0)||(n_glyph-1 != 0)){ */
-/* printf(" i= %x , n_glyph = %x "); */
-/* }; */
- g_free(wc);
+ pold = p;
+ p = unicode_next_utf8 (p);
+ n_glyph--;
+ i--;
+ }
+ }
+
+ g_free(wc);
}
@@ -265,29 +319,31 @@ static PangoCoverage *
arabic_engine_get_coverage (PangoFont *font,
const char *lang)
{
- GUChar4 i;
- PangoCoverage *result = pango_coverage_new ();
+ GUChar4 i;
+ PangoCoverage *result = pango_coverage_new ();
- for (i = 0x60B; i <= 0x67E; i++)
- pango_coverage_set (result, i, PANGO_COVERAGE_EXACT);
+ for (i = 0x60B; i <= 0x66D; i++)
+ pango_coverage_set (result, i, PANGO_COVERAGE_EXACT);
+ for (i = 0x670; i <= 0x6D3; i++)
+ pango_coverage_set (result, i, PANGO_COVERAGE_EXACT);
- return result;
+ return result;
}
static PangoEngine *
arabic_engine_x_new ()
{
- PangoEngineShape *result;
+ PangoEngineShape *result;
- result = g_new (PangoEngineShape, 1);
+ result = g_new (PangoEngineShape, 1);
- result->engine.id = "ArabicScriptEngine";
- result->engine.type = PANGO_ENGINE_TYPE_LANG;
- result->engine.length = sizeof (result);
- result->script_shape = arabic_engine_shape;
- result->get_coverage = arabic_engine_get_coverage;
+ result->engine.id = "ArabicScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_shape = arabic_engine_shape;
+ result->get_coverage = arabic_engine_get_coverage;
- return (PangoEngine *)result;
+ return (PangoEngine *)result;
}
@@ -300,19 +356,19 @@ arabic_engine_x_new ()
void
script_engine_list (PangoEngineInfo **engines, int *n_engines)
{
- *engines = script_engines;
- *n_engines = n_script_engines;
+ *engines = script_engines;
+ *n_engines = n_script_engines;
}
PangoEngine *
script_engine_load (const char *id)
{
- if (!strcmp (id, "ArabicScriptEngineLang"))
- return arabic_engine_lang_new ();
- else if (!strcmp (id, "ArabicScriptEngineX"))
- return arabic_engine_x_new ();
- else
- return NULL;
+ if (!strcmp (id, "ArabicScriptEngineLang"))
+ return arabic_engine_lang_new ();
+ else if (!strcmp (id, "ArabicScriptEngineX"))
+ return arabic_engine_x_new ();
+ else
+ return NULL;
}
void
diff --git a/modules/arabic/arconv.c b/modules/arabic/arconv.c
index c345e3a8..979ceb9d 100644
--- a/modules/arabic/arconv.c
+++ b/modules/arabic/arconv.c
@@ -1,233 +1,556 @@
+/* This is part of Pango - Arabic shaping module
+ *
+ * (C) 2000 Karl Koehler<koehler@or.uni-bonn.de>
+ *
+ * Note : The book "The Unicode Standard Version 3.0" is not very helpful
+ * regarding arabic, so I implemented this according to my own best
+ * knowledge. Bad examples from the book are 'ALEF.LAM'-ligature,
+ * ( one also sais fi-ligature, not if-ligature ) and HAMZA handling.
+ * There is only _one_ letter HAMZA, and so four (!) forms of HAMZA in
+ * the basic section seem .. strange.
+ */
#include "arconv.h"
+/* #define DEBUG */
+#ifdef DEBUG
+#include <stdio.h>
+#endif
-/* This belongs to my arabic-shaping-module */
typedef struct {
- GUChar4 basechar;
- GUChar4 charstart;
- int count;
+ GUChar4 basechar;
+ GUChar4 charstart;
+ int count;
} shapestruct;
+typedef struct {
+ GUChar4 basechar;
+ GUChar4 mark1; /* has to be initialized to zero */
+ GUChar4 vowel; /* */
+ char connecttoleft;
+ signed char lignum; /* is a ligature with lignum aditional characters */
+ char numshapes;
+} charstruct;
+
/* The Unicode order is always: Standalone End Beginning Middle */
static shapestruct chartable [] =
{
- {0x621, 0xFE80,4}, /* HAMZA; handle seperately !!! */
- {0x622, 0xFE81,2}, /* ALIF MADDA */
- {0x623, 0xFE83,2}, /* ALIF HAMZA */
- {0x624, 0xFE85,2}, /* WAW HAMZA */
- {0x625, 0xFE87,2}, /* ALIF IHAMZA */
- {0x626, 0xFE89,4}, /* YA HAMZA */
- {0x627, 0xFE8D,2}, /* ALIF */
- {0x628, 0xFE8F,4}, /* BA */
- {0x629, 0xFE93,2}, /* TA MARBUTA */
- {0x62A, 0xFE95,4}, /* TA */
- {0x62B, 0xFE99,4}, /* THA */
- {0x62C, 0xFE9D,4}, /* DJIM */
- {0x62D, 0xFEA1,4}, /* HA */
- {0x62E, 0xFEA5,4}, /* CHA */
- {0x62F, 0xFEA9,2}, /* DAL */
- {0x630, 0xFEAB,2}, /* THAL */
- {0x631, 0xFEAD,2}, /* RA */
- {0x632, 0xFEAF,2}, /* ZAY */
- {0x633, 0xFEB1,4}, /* SIN */
- {0x634, 0xFEB5,4}, /* SHIN */
- {0x635, 0xFEB9,4}, /* SAAD */
- {0x636, 0xFEBD,4}, /* DAAD */
- {0x637, 0xFEC1,4}, /* .TA */
- {0x638, 0xFEC5,4}, /* .ZA */
- {0x639, 0xFEC9,4}, /* AIN */
- {0x63A, 0xFECD,4}, /* RAIN */
- {0x641, 0xFED1,4}, /* FA */
- {0x642, 0xFED5,4}, /* QAF */
- {0x643, 0xFED9,4}, /* KAF */
- {0x644, 0xFEDD,4}, /* LAM */
- {0x645, 0xFEE1,4}, /* MIM */
- {0x646, 0xFEE5,4}, /* NUN */
- {0x647, 0xFEE9,4}, /* HA */
- {0x648, 0xFEED,2}, /* WAW */
- {0x649, 0xFEEF,2}, /* ALIF MAQSORA */
- {0x64A, 0xFEF1,4}, /* YA */
- {0xFEF5, 0xFEF5,2}, /* Lam-Alif Madda */
- {0xFEF7, 0xFEF7,2}, /* Lam-Alif Hamza*/
- {0xFEF9, 0xFEF9,2}, /* Lam-Alif iHamza*/
- {0xFEFB, 0xFEFB,2} /* Lam-Alif */
-};
+ {0x621, 0xFE80,4}, /* HAMZA; handle seperately !!! */
+ {0x622, 0xFE81,2}, /* ALIF MADDA */
+ {0x623, 0xFE83,2}, /* ALIF HAMZA */
+ {0x624, 0xFE85,2}, /* WAW HAMZA */
+ {0x625, 0xFE87,2}, /* ALIF IHAMZA */
+ {0x626, 0xFE89,4}, /* YA HAMZA */
+ {0x627, 0xFE8D,2}, /* ALIF */
+ {0x628, 0xFE8F,4}, /* BA */
+ {0x629, 0xFE93,2}, /* TA MARBUTA */
+ {0x62A, 0xFE95,4}, /* TA */
+ {0x62B, 0xFE99,4}, /* THA */
+ {0x62C, 0xFE9D,4}, /* DJIM */
+ {0x62D, 0xFEA1,4}, /* HA */
+ {0x62E, 0xFEA5,4}, /* CHA */
+ {0x62F, 0xFEA9,2}, /* DAL */
+ {0x630, 0xFEAB,2}, /* THAL */
+ {0x631, 0xFEAD,2}, /* RA */
+ {0x632, 0xFEAF,2}, /* ZAY */
+ {0x633, 0xFEB1,4}, /* SIN */
+ {0x634, 0xFEB5,4}, /* SHIN */
+ {0x635, 0xFEB9,4}, /* SAAD */
+ {0x636, 0xFEBD,4}, /* DAAD */
+ {0x637, 0xFEC1,4}, /* .TA */
+ {0x638, 0xFEC5,4}, /* .ZA */
+ {0x639, 0xFEC9,4}, /* AIN */
+ {0x63A, 0xFECD,4}, /* RAIN */
+ {0x63B, 0x0000,0}, /* : */
+ {0x63C, 0x0000,0}, /* epmty for */
+ {0x63D, 0x0000,0}, /* simple */
+ {0x63E, 0x0000,0}, /* indexing */
+ {0x63F, 0x0000,0}, /* : */
+ {0x640, 0x0640,1}, /* */
+ {0x641, 0xFED1,4}, /* FA */
+ {0x642, 0xFED5,4}, /* QAF */
+ {0x643, 0xFED9,4}, /* KAF */
+ {0x644, 0xFEDD,4}, /* LAM */
+ {0x645, 0xFEE1,4}, /* MIM */
+ {0x646, 0xFEE5,4}, /* NUN */
+ {0x647, 0xFEE9,4}, /* HA */
+ {0x648, 0xFEED,2}, /* WAW */
+ {0x649, 0xFEEF,2}, /* ALIF MAQSURA */
+ {0x64A, 0xFEF1,4}, /* YA */
+ /* The following are letters are not plain arabic */
+ /* some of the coding does not preserve order ... */
+ {0x679, 0xFB66,4}, /* Urdu:TTEH */
+ {0x67B, 0xFB52,4}, /* Sindhi: */
+ {0x67E, 0xFB56,4}, /* PEH: latin compatibility */
+ {0x680, 0xFB62,4}, /* Sindhi: */
+ {0x683, 0xFB86,4}, /* " */
+ {0x684, 0xFB72,4}, /* " */
+ {0x686, 0xFB7A,4}, /* Persian: Tcheh */
+ {0x687, 0xFB7E,4}, /* Sindhi: */
+ {0x68C, 0xFB84,2}, /* Sindhi: DAHAL */
+ {0x68D, 0xFB82,2}, /* Sindhi */
+ {0x68E, 0xFB86,2}, /* */
+ {0x691, 0xFB8C,2}, /* Urdu */
+ {0x698, 0xFB8A,2}, /* Persian: JEH */
+ {0x6A4, 0xFB6A,4}, /* VEH: latin compatibility */
+ {0x6A6, 0xFB6E,4}, /* Sindhi */
+ {0x6A9, 0xFB8E,4}, /* Persan K */
+ {0x6AA, 0xFB8E,4}, /* extrawide KAF-> Persian KAF */
+ {0x6AF, 0xFB92,4}, /* Persian: GAF */
+ {0x6B1, 0xFB9A,4}, /* Sindhi: */
+ {0x6B3, 0xFB97,4}, /* */
+ {0x6BA, 0xFB9E,2}, /* Urdu:NUN GHUNNA */
+ {0x6BB, 0xFBA0,4}, /* Sindhi: */
+ {0x6BE, 0xFBAA,4}, /* HA special */
+ {0x6C0, 0xFBA4,2}, /* izafet: HA HAMZA */
+ {0x6C1, 0xFBA6,4}, /* Urdu: */
+ {0x6D2, 0xFBAE,2}, /* YA barree */
+ {0x6D3, 0xFBB0,2}, /* YA BARREE HAMZA */
+
+ {0xFEF5, 0xFEF5,2}, /* Lam-Alif Madda */
+ {0xFEF7, 0xFEF7,2}, /* Lam-Alif Hamza */
+ {0xFEF9, 0xFEF9,2}, /* Lam-Alif iHamza */
+ {0xFEFB, 0xFEFB,2} /* Lam-Alif */
+};
#define ALIF 0x627
#define ALIFHAMZA 0x623
#define ALIFIHAMZA 0x625
#define ALIFMADDA 0x622
#define LAM 0x644
#define HAMZA 0x621
+
+/* Hmaza below ( saves Kasra and special cases ), Hamza above ( always joins ).
+ * As I don't know what sHAMZA is good for I don't handle it.
+ */
+#define iHAMZA 0x654
+#define aHAMZA 0x655
+#define sHAMZA 0x674
+
#define WAW 0x648
#define WAWHAMZA 0x624
+#define SHADDA 0x651
+#define KASRA 0x650
+#define FATHA 0x64E
+#define DAMMA 0x64F
+
#define LAM_ALIF 0xFEFB
#define LAM_ALIFHAMZA 0xFEF7
#define LAM_ALIFIHAMZA 0xFEF9
#define LAM_ALIFMADDA 0xFEF5
-#define LAM_ALIF_f 0xFEFC
-#define LAM_ALIFHAMZA_f 0xFEF8
-#define LAM_ALIFIHAMZA_f 0xFEFA
-#define LAM_ALIFMADDA_f 0xFEF6
-
-int arabic_isvowel(GUChar4 s)
+
+void
+charstruct_init(charstruct* s)
{
- if ((s >= 0x64B) && (s <= 0x653)) return 1;
- if ((s >= 0xFE70) && (s <= 0xFE7F)) return 1;
- return 0;
+ s->basechar = 0;
+ s->mark1 = 0;
+ s->vowel = 0;
+ s->connecttoleft = 0;
+ s->lignum = 0;
+ s->numshapes = 0;
+}
+
+void
+copycstostring(GUChar4* string,int* i,charstruct* s,int level)
+{ /* s is a shaped charstruct; i is the index into the string */
+ if (s->basechar == 0) return;
+
+ string[*i] = s->basechar; (*i)--; (s->lignum)--;
+ if (s->mark1 != 0)
+ {
+ string[*i] = s->mark1; (*i)--; (s->lignum)--;
+ }
+ if (s->vowel != 0)
+ {
+ if (level > 1)
+ {
+ string[*i] = s->vowel; (*i)--; (s->lignum)--;
+ }
+ else
+ { /* vowel elimination */
+ string[*i] = 0; (*i)--; (s->lignum)--;
+ }
+ }
+ while (s->lignum > 0 )
+ {
+ string[*i] = 0; (*i)--; (s->lignum)--;
+ }
+#ifdef DEBUG
+ if (*i < -1){
+ fprintf(stderr,"you are in trouble ! i = %i, the last char is %x, "
+ "lignum = %i",
+ *i,s->basechar,s->lignum);
+ }
+#endif
}
-static GUChar4 unshape(GUChar4 s)
+int
+arabic_isvowel(GUChar4 s)
+{ /* is this 'joining HAMZA' ( strange but has to be handled like a vowel )
+ * Kasra, Fatha, Damma, Sukun ?
+ */
+ if ((s >= 0x64B) && (s <= 0x655)) return 1;
+ if ((s >= 0xFC5E) && (s <= 0xFC63)) return 1;
+ if ((s >= 0xFE70) && (s <= 0xFE7F)) return 1;
+ return 0;
+}
+
+static GUChar4
+unshape(GUChar4 s)
{
- int j = 0;
- if ( (s > 0xFE80) && ( s < 0xFEFF )){ /* arabic shaped Glyph , not HAMZA */
- while ( chartable[j+1].charstart <= s) j++;
- return chartable[j].basechar;
- } else if ((s == 0xFE8B)||(s == 0xFE8C)){
- return HAMZA;
- } else {
- return s;
- };
+ int j = 0;
+ if ( (s > 0xFE80) && ( s < 0xFEFF ))
+ { /* arabic shaped Glyph , not HAMZA */
+ while ( chartable[j+1].charstart <= s) j++;
+ return chartable[j].basechar;
+ }
+ else if ((s == 0xFE8B)||(s == 0xFE8C))
+ {
+ return HAMZA;
+ }
+ else
+ {
+ return s;
+ }
}
-static GUChar4 charshape(GUChar4 s,short which)
+static GUChar4
+charshape(GUChar4 s,short which)
{ /* which 0=alone 1=end 2=start 3=middle */
- int j = 0;
- if ( (s >= chartable[1].basechar) && ( s <= 0xFEFB )){
- /* arabic base char of Lam-Alif */
- while ( chartable[j].basechar < s) j++;
- return chartable[j].charstart+which;
- } else if (s == HAMZA){
- if (which < 2) return s;
- else return 0xFE8B+(which-2); /* The Hamza-'pod' */
- } else {
- return s;
- };
+ int j = 0;
+ if ((s >= chartable[1].basechar) && (s <= 0x64A))
+ { /* basic character */
+ return chartable[s-chartable[0].basechar].charstart+which;
+ }
+ else if ( (s >= chartable[1].basechar) && ( s <= 0xFEFB ))
+ { /* special char or Lam-Alif */
+ while ( chartable[j].basechar < s) j++;
+ return chartable[j].charstart+which;
+ }
+ else if (s == HAMZA)
+ {
+ if (which < 2) return s;
+ else return 0xFE8B+(which-2); /* The Hamza-'pod' */
+ }
+ else
+ {
+ return s;
+ }
}
-static short shapecount(GUChar4 s)
+static short
+shapecount(GUChar4 s)
{
- int j = 0;
- if (arabic_isvowel(s)){ /* correct trailing wovels */
- return 1;
- } else if ( (s >= chartable[0].basechar) && ( s <= 0xFEFB )){
- /* arabic base char or ligature */
- while ( chartable[j].basechar < s) j++;
- return chartable[j].count;
- } else {
- return 1;
- };
+ int j = 0;
+ if (arabic_isvowel(s))
+ { /* correct trailing wovels */
+ return 1;
+ }
+ else if ((s >= chartable[1].basechar) && ( s <= 0x64A ))
+ { /* basic character */
+ return chartable[s-chartable[0].basechar].count;
+ }
+ else if ( (s >= chartable[0].basechar) && ( s <= 0xFEFB ))
+ {
+ /* arabic base char or ligature */
+ while ( chartable[j].basechar < s) j++;
+ return chartable[j].count;
+ }
+ else
+ {
+ return 1;
+ }
}
-void shape(int* len,GUChar4* string)
+int
+ligature(GUChar4* string,int si,int len,charstruct* oldchar)
+{ /* no ligature possible --> return 0; 1 == vowel; 2 = two chars */
+ int retval = 0;
+ GUChar4 newchar = string[si];
+ if (arabic_isvowel(newchar))
+ {
+ retval = 1;
+ switch(newchar)
+ {
+ case SHADDA: oldchar->mark1 = newchar; break;
+ case iHAMZA:
+ switch(oldchar->basechar)
+ {
+ case ALIF:
+ oldchar->basechar = ALIFIHAMZA;
+ retval = 2; break;
+ case LAM_ALIF:
+ oldchar->basechar = LAM_ALIFIHAMZA;
+ retval = 2; break;
+ default: oldchar->mark1 = newchar; break;
+ }
+ break;
+ case aHAMZA:
+ switch(oldchar->basechar)
+ {
+ case ALIF:
+ oldchar->basechar = ALIFHAMZA;
+ retval = 2; break;
+ case LAM_ALIF:
+ oldchar->basechar = LAM_ALIFHAMZA;
+ retval = 2; break;
+ case WAW:
+ oldchar->basechar = WAWHAMZA;
+ retval = 2; break;
+ default: /* whatever sense this may make .. */
+ oldchar->mark1 = newchar; break;
+ }
+ break;
+ case KASRA:
+ switch(oldchar->basechar)
+ {
+ case ALIFHAMZA:
+ oldchar->basechar = ALIFIHAMZA;
+ retval = 2; break;
+ case LAM_ALIFHAMZA:
+ oldchar->basechar = LAM_ALIFIHAMZA;
+ retval = 2; break;
+ default: oldchar->vowel = newchar; break;
+ }
+ break;
+ default: oldchar->vowel = newchar; break;
+ }
+ oldchar->lignum++;
+ return retval;
+ }
+ if (oldchar->vowel != 0)
+ { /* if we already joined a vowel, we can't join a Hamza */
+ return 0;
+ }
+
+ switch(oldchar->basechar)
+ {
+ case LAM:
+ switch (newchar)
+ {
+ case ALIF: oldchar->basechar = LAM_ALIF;
+ oldchar->numshapes = 2; retval = 2; break;
+ case ALIFHAMZA: oldchar->basechar = LAM_ALIFHAMZA;
+ oldchar->numshapes = 2; retval = 2; break;
+ case ALIFIHAMZA:oldchar->basechar = LAM_ALIFIHAMZA;
+ oldchar->numshapes = 2; retval = 2; break;
+ case ALIFMADDA: oldchar->basechar = LAM_ALIFMADDA ;
+ oldchar->numshapes = 2; retval = 2; break;
+ }
+ break;
+ case ALIF:
+ switch (newchar)
+ {
+ case ALIF: oldchar->basechar = ALIFMADDA; retval = 2; break;
+ case HAMZA:
+ if (si == len-2) /* HAMZA is 2nd char */
+ {
+ oldchar->basechar = ALIFHAMZA; retval = 2;
+ }
+ break;
+ }
+ break;
+ case WAW:
+ switch (newchar)
+ {
+ case HAMZA:oldchar->basechar = WAWHAMZA; retval = 2; break;
+ }
+ break;
+ case LAM_ALIF:
+ switch (newchar)
+ {
+ case HAMZA:
+ if (si == len-4) /* ! We assume the string has been split
+ into words. This is AL-A.. I hope */
+ {
+ oldchar->basechar = LAM_ALIFHAMZA; retval = 2;
+ }
+ break;
+ }
+ break;
+ case 0:
+ oldchar->basechar = newchar;
+ oldchar->numshapes = shapecount(newchar);
+ retval = 1;
+ break;
+ }
+ if (retval)
+ {
+ oldchar->lignum++;
+#ifdef DEBUG
+ fprintf(stderr,"[ar] ligature : added %x to make %x\n",
+ newchar,oldchar->basechar);
+#endif
+ }
+ return retval;
+}
+
+static void
+shape(int olen,int* len,GUChar4* string,int level)
{
- /* The string must be in visual order already.
- ** This routine does the basic arabic reshaping.
- */
- int j;
- int pcount = 1; /* #of preceding vowels , plus one */
- short nc; /* #shapes of next char */
- short sc; /* #shapes of current char */
- int prevstate = 0; /* */
- int which;
-
- GUChar4 prevchar = 0;
-
- sc = shapecount(string[(*len)-1]);
- j = (*len)-1;
- while (j >= 0){
- if (arabic_isvowel(string[j])){
- j--; continue; /* don't shape vowels */
- }
- if (string[j] == 0){
- j--; continue;
- };
- if (j > 0){
- pcount = 1;
- while ( ( j-pcount >= 0)&&
- ( (arabic_isvowel(string[j-pcount]))||
- (string[j-pcount] == 0) ))
- pcount++;
- nc = shapecount(string[j-pcount]);
- } else {
- nc = 1;
- };
-
- if (nc == 1){
- which = 0; /* end or basic */
- } else {
- which = 2; /* middle or beginning */
- };
- if ( prevstate & 2 ){ /* use end or Middle-form */
- which += 1;
-#define LIG
-#ifdef LIG
- /* test if char == Alif && prev == Lam */
- if (prevchar == LAM){
- switch(string[j]){
- case ALIF:
- (*len)--;
- string[j+1] = LAM_ALIF;
- string[j] = 0; j++; break;
- case ALIFHAMZA:
- (*len)--;
- string[j+1] = LAM_ALIFHAMZA;
- string[j] = 0; j++; break;
- case ALIFIHAMZA:
- (*len)--;
- string[j+1] = LAM_ALIFIHAMZA;
- string[j] = 0; j++; break;
- case ALIFMADDA:
- (*len)--;
- string[j+1] = LAM_ALIFMADDA;
- string[j] = 0; j++; break;
- }
- }
-#endif
- } else { /* use basic or beginning form */
-#ifdef LIG
- if ((string[j] == HAMZA)
- &&(prevchar)
- &&(!arabic_isvowel(string[j-1])) ){
- switch(prevchar){
- case ALIF:
- (*len)--;
- string[j+1] = ALIFHAMZA;
- string[j] = 0;
- j++; sc = 2; break;
- case WAW:
- (*len)--;
- string[j+1] = WAWHAMZA;
- string[j] = 0;
- j++; sc = 2; break;
- /* case LAM_ALIF: have to hande LAM_ALIF+HAMZA.*/
- /* But LAM_ALIF is two back already ... */
- }
- }
+ /* The string must be in visual order already.
+ ** This routine does the basic arabic reshaping.
+ ** olen is the memory lenght, *len the number of non-null characters.
+ */
+ charstruct oldchar,curchar;
+ int si = (olen)-1;
+ int j = (olen)-1;
+ int join;
+ int which;
+
+ *len = olen;
+ charstruct_init(&oldchar);
+ charstruct_init(&curchar);
+ while (si >= 0)
+ {
+ join = ligature(string,si,olen,&curchar);
+ if (!join)
+ { /* shape curchar */
+ int nc = shapecount(string[si]);
+ if (nc == 1)
+ {
+ which = 0; /* end or basic */
+ }
+ else
+ {
+ which = 2; /* middle or beginning */
+ }
+ if (oldchar.connecttoleft) which++;
+ which = which % (curchar.numshapes);
+ curchar.basechar = charshape(curchar.basechar,which);
+ if (curchar.numshapes > 2)
+ curchar.connecttoleft = 1;
+
+ /* get rid of oldchar */
+ copycstostring(string,&j,&oldchar,level);
+ oldchar = curchar; /* new vlues in oldchar */
+
+ /* init new curchar */
+ charstruct_init(&curchar);
+ curchar.basechar = string[si];
+ curchar.numshapes = nc;
+ curchar.lignum++;
+ }
+ else if ( join == 2 )
+ {
+ (*len)--;
+ }
+ si--;
+ }
+ /* Handle last char */
+
+ if (oldchar.connecttoleft)
+ which = 1;
+ else
+ which = 0;
+ which = which % (curchar.numshapes);
+ curchar.basechar = charshape(curchar.basechar,which);
+ /* get rid of oldchar */
+ copycstostring(string,&j,&oldchar,level);
+ copycstostring(string,&j,&curchar,level);
+#ifdef DEBUG
+ fprintf(stderr,"[ar] shape statistic: %i chars -> %i glyphs \n",
+ olen,*len);
#endif
- }
- which = which % sc; /* middle->end,beginning->basic */
- if (string[j] != 0){
- prevchar = string[j];
- string[j] = charshape(string[j],which);
- prevstate = which;
- sc = nc; /* no need to recalculate */
- }
- j--;
- }
}
-void reshape(int* len,GUChar4* string)
+static void
+doublelig(int olen,int* len,GUChar4* string,int level)
+{ /* Ok. We have presentation ligatures in our font. */
+ int si = (olen)-1;
+ GUChar4 lapresult;
+
+ while (si > 0)
+ {
+ lapresult = 0;
+ switch(string[si])
+ {
+ case SHADDA:
+ switch(string[si-1])
+ {
+ case KASRA: lapresult = 0xFC62; break;
+ case FATHA: lapresult = 0xFC60; break;
+ case DAMMA: lapresult = 0xFC61; break;
+ case 0x64C: lapresult = 0xFC5E; break;
+ case 0x64D: lapresult = 0xFC5F; break;
+ }
+ break;
+ case KASRA:
+ if (string[si-1]==SHADDA) lapresult = 0xFC62;
+ break;
+ case FATHA:
+ if (string[si-1]==SHADDA) lapresult = 0xFC60;
+ break;
+ case DAMMA:
+ if (string[si-1]==SHADDA) lapresult = 0xFC61;
+ break;
+ case 0xFEDF: /* LAM initial */
+ if (level > 3){
+ switch(string[si-1]){
+ case 0xFE9E : lapresult = 0xFC3F; break; /* DJEEM final*/
+ case 0xFEA0 : lapresult = 0xFCC9; break;
+ case 0xFEA2 : lapresult = 0xFC40; break; /* .HA final */
+ case 0xFEA4 : lapresult = 0xFCCA; break;
+ case 0xFEA6 : lapresult = 0xFCF1; break; /* CHA final */
+ case 0xFEA8 : lapresult = 0xFCCB; break;
+ case 0xFEE2 : lapresult = 0xFC42; break; /* MIM final */
+ case 0xFEE4 : lapresult = 0xFCCC; break;
+ }
+ }
+ break;
+ case 0xFE97: /* TA inital */
+ if (level > 3){
+ switch(string[si-1]){
+ case 0xFEA0 : lapresult = 0xFCA1; break; /* DJ init */
+ case 0xFEA4 : lapresult = 0xFCA2; break; /* .HA */
+ case 0xFEA8 : lapresult = 0xFCA3; break; /* CHA */
+ }
+ }
+ break;
+ case 0xFE91: /* BA inital */
+ if (level > 3){
+ switch(string[si-1]){
+ case 0xFEA0 : lapresult = 0xFC9C; break; /* DJ init */
+ case 0xFEA4 : lapresult = 0xFC9D; break; /* .HA */
+ case 0xFEA8 : lapresult = 0xFC9E; break; /* CHA */
+ }
+ }
+ break;
+ case 0xFEE7: /* NUN inital */
+ if (level > 3) {
+ switch(string[si-1]){
+ case 0xFEA0 : lapresult = 0xFCD2; break; /* DJ init */
+ case 0xFEA4 : lapresult = 0xFCD3; break; /* .HA */
+ case 0xFEA8 : lapresult = 0xFCD4; break; /* CHA */
+ }
+ }
+ break;
+ default:
+ break;
+ } /* end switch string[si] */
+ if (lapresult != 0)
+ {
+ string[si] = lapresult; (*len)--; string[si-1] = 0x0;
+ }
+ si--;
+ }
+}
+
+void
+arabic_reshape(int* len,GUChar4* string,int level)
{
- int i;
- for ( i = 0; i < *len; i++){
- string[i] = unshape(string[i]);
- }
- shape(len,string);
+ int i;
+ int olen = *len;
+ for ( i = 0; i < *len; i++){
+ string[i] = unshape(string[i]);
+ }
+ shape(olen,len,string,level);
+ if (level > 2)
+ doublelig(olen,len,string,level);
}
-/* Lam-Alif beginns at F5,F7,F9,FB ( MADDA,HAMZA,IHAMZA, . respectively ) */
+
+
diff --git a/modules/arabic/arconv.h b/modules/arabic/arconv.h
index a90a0e78..6a9d7233 100644
--- a/modules/arabic/arconv.h
+++ b/modules/arabic/arconv.h
@@ -1,9 +1,25 @@
+/* pango-arabic module
+ *
+ * (C) 2000 K. Koehler <koehler@or.uni-bonn.de>
+ *
+ * general functions for arabic shaping
+ */
#ifndef __arconv_h_
#define __arconv_h_
-#include "utils.h"
+#include "../../libpango/utils.h"
-void reshape(int* len,GUChar4* string);
+/*
+ * arabic_reshape: reshapes string ( ordered left-to right visual order )
+ * len : before: is the length of the string
+ * after : number of nun-NULL characters
+ * level: 1 : font with basic arabic characters, no vowels
+ * 2 : with vowels
+ * 3 : with composed vowels : Shadda+(Fatha,Damma,Kasra)
+ * 4 : with some extra Ligatures
+ *
+ */
+void arabic_reshape(int* len,GUChar4* string,int level);
int arabic_isvowel(GUChar4 s);
#endif
diff --git a/modules/arabic/mulefont.c b/modules/arabic/mulefont.c
new file mode 100644
index 00000000..edb440a8
--- /dev/null
+++ b/modules/arabic/mulefont.c
@@ -0,0 +1,260 @@
+/* pango-arabic module
+ *
+ * (C) 2000 K. Koehler <koehler@or.uni-bonn.de>
+ *
+ * This file provides a mapping unicode <- mulefont
+ */
+
+
+#include <stdio.h>
+#include <glib.h>
+#include "pango.h"
+#include "pangox.h"
+#include "utils.h"
+#include <unicode.h>
+
+/* #define DEBUG */
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+
+
+int
+arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts)
+{
+ static char *mule_charsets0[] = {
+ "mulearabic-0",
+ };
+
+ static char *mule_charsets1[] = {
+ "mulearabic-1",
+ };
+
+ static char *mule_charsets2[] = {
+ "mulearabic-2",
+ };
+ PangoXSubfont *subfonts;
+ int *subfont_charsets;
+ int n_subfonts;
+
+ n_subfonts = pango_x_list_subfonts (font,mule_charsets0,
+ 1, &subfonts, &subfont_charsets);
+ if (n_subfonts > 0)
+ mulefonts[0] = subfonts[0];
+ g_free (subfonts);
+ g_free (subfont_charsets);
+
+ n_subfonts = pango_x_list_subfonts (font,mule_charsets1,
+ 1, &subfonts, &subfont_charsets);
+ if (n_subfonts > 0)
+ {
+ mulefonts[1] = subfonts[0];
+ g_free (subfonts);
+ g_free (subfont_charsets);
+ }
+ else
+ {
+ g_free (subfonts);
+ g_free (subfont_charsets);
+ return 0;
+ }
+
+ n_subfonts = pango_x_list_subfonts (font,mule_charsets2,
+ 1, &subfonts, &subfont_charsets);
+ if (n_subfonts > 0)
+ {
+ mulefonts[2] = subfonts[0];
+ g_free (subfonts);
+ g_free (subfont_charsets);
+ }
+ else
+ {
+ g_free (subfonts);
+ g_free (subfont_charsets);
+ return 0;
+ }
+
+ return 1;
+}
+
+
+typedef struct {
+ GUChar4 unicodechar;
+ int fontindex;
+ int charindex;
+} fontentry;
+
+
+static fontentry charmap [] =
+{
+ { 0xFE80,1,0x2d }, /* HAMZA; handle seperately !!! */
+ { 0xFE81,1,0x2e }, /* ALIF MADDA */
+ { 0xFE82,1,0x2f },
+ { 0xFE83,1,0x30 }, /* ALIF HAMZA */
+ { 0xFE84,1,0x31 },
+ { 0xFE85,1,0x32 }, /* WAW HAMZA */
+ { 0xFE86,1,0x33 },
+ { 0xFE87,1,0x34 }, /* ALIF IHAMZA */
+ { 0xFE88,1,0x35 },
+ { 0xFE89,2,0x21 }, /* YA HAMZA */
+ { 0xFE8A,2,0x22 },
+ { 0xFE8B,1,0x36 }, /* HMAZA-'pod' */
+ { 0xFE8C,1,0x37 },
+ { 0xFE8D,1,0x38 }, /* ALIF */
+ { 0xFE8E,1,0x39 },
+ { 0xFE8F,2,0x23 }, /* BA */
+ { 0xFE90,2,0x24 },
+ { 0xFE91,1,0x3A },
+ { 0xFE92,1,0x3B },
+ { 0xFE93,1,0x3C }, /* TA MARBUTA */
+ { 0xFE94,1,0x3D },
+ { 0xFE95,2,0x25 }, /* TA */
+ { 0xFE96,2,0x26 },
+ { 0xFE97,1,0x3E },
+ { 0xFE98,1,0x3F },
+ { 0xFE99,2,0x27 }, /* THA */
+ { 0xFE9A,2,0x28 },
+ { 0xFE9B,1,0x40 },
+ { 0xFE9C,1,0x41 },
+ { 0xFE9D,2,0x29 }, /* DJIM */
+ { 0xFE9E,2,0x2C },
+ { 0xFE9F,2,0x2A },
+ { 0xFEA0,2,0x2B },
+ { 0xFEA1,2,0x2D }, /* .HA */
+ { 0xFEA2,2,0x30 },
+ { 0xFEA3,2,0x2E },
+ { 0xFEA4,2,0x2F },
+ { 0xFEA5,2,0x31 }, /* CHA */
+ { 0xFEA6,2,0x34 },
+ { 0xFEA7,2,0x32 },
+ { 0xFEA8,2,0x33 },
+ { 0xFEA9,1,0x42 }, /* DAL */
+ { 0xFEAA,1,0x43 },
+ { 0xFEAB,1,0x44 }, /* THAL */
+ { 0xFEAC,1,0x45 },
+ { 0xFEAD,1,0x46 }, /* RA */
+ { 0xFEAE,1,0x47 },
+ { 0xFEAF,1,0x48 }, /* ZAY */
+ { 0xFEB0,1,0x49 },
+ { 0xFEB1,2,0x35 }, /* SIN */
+ { 0xFEB2,2,0x38 },
+ { 0xFEB3,2,0x36 },
+ { 0xFEB4,2,0x37 },
+ { 0xFEB5,2,0x39 }, /* SHIN */
+ { 0xFEB2,2,0x3C },
+ { 0xFEB3,2,0x3A },
+ { 0xFEB4,2,0x3B },
+ { 0xFEB9,2,0x3D }, /* SAAD */
+ { 0xFEBA,2,0x40 },
+ { 0xFEBB,2,0x3E },
+ { 0xFEBC,2,0x3F },
+ { 0xFEBD,2,0x41 }, /* DAAD */
+ { 0xFEBE,2,0x44 },
+ { 0xFEBF,2,0x42 },
+ { 0xFEC0,2,0x43 },
+ { 0xFEC1,2,0x45 }, /* .TA */
+ { 0xFEC2,2,0x48 },
+ { 0xFEC3,2,0x46 },
+ { 0xFEC4,2,0x47 },
+ { 0xFEC5,2,0x49 }, /* .ZA */
+ { 0xFEC6,2,0x4C },
+ { 0xFEC7,2,0x4A },
+ { 0xFEC8,2,0x4B },
+ { 0xFEC9,2,0x4D }, /* AIN */
+ { 0xFECA,2,0x4E },
+ { 0xFECB,1,0x4A },
+ { 0xFECC,1,0x4B },
+ { 0xFECD,2,0x4F }, /* RAIN */
+ { 0xFECE,2,0x50 },
+ { 0xFECF,1,0x4C },
+ { 0xFED0,1,0x4D },
+ { 0xFED1,2,0x51 }, /* FA */
+ { 0xFED2,2,0x52 },
+ { 0xFED3,1,0x4E },
+ { 0xFED4,1,0x4F },
+ { 0xFED5,2,0x53 }, /* QAF */
+ { 0xFED6,2,0x54 },
+ { 0xFED7,1,0x50 },
+ { 0xFEB8,1,0x51 },
+ { 0xFED9,2,0x55 }, /* KAF */
+ { 0xFEDA,2,0x58 },
+ { 0xFEDB,2,0x56 },
+ { 0xFEDC,2,0x57 },
+ { 0xFEDD,2,0x59 }, /* LAM */
+ { 0xFEDE,2,0x5A },
+ { 0xFEDF,1,0x52 },
+ { 0xFEE0,1,0x53 },
+ { 0xFEE1,1,0x54 }, /* MIM */
+ { 0xFEE2,1,0x57 },
+ { 0xFEE3,1,0x55 },
+ { 0xFEE4,1,0x56 },
+ { 0xFEE5,2,0x5B }, /* NUN */
+ { 0xFEE6,2,0x5C },
+ { 0xFEE7,1,0x58 },
+ { 0xFEE8,1,0x59 },
+ { 0xFEE9,1,0x5A }, /* HA */
+ { 0xFEEA,1,0x5D },
+ { 0xFEEB,1,0x5B },
+ { 0xFEEC,1,0x5C },
+ { 0xFEED,1,0x5E }, /* WAW */
+ { 0xFEEE,1,0x5F },
+ { 0xFEEF,2,0x5D }, /* ALIF MAQSORA */
+ { 0xFEF0,2,0x5E },
+ { 0xFEF1,2,0x5F }, /* YA */
+ { 0xFEF2,2,0x60 },
+ { 0xFEF3,1,0x60 },
+ { 0xFEF4,1,0x61 },
+ { 0xFEF5,1,0x62 }, /* Lam-Alif Madda */
+ { 0xFEF6,2,0x61 },
+ { 0xFEF7,1,0x63 }, /* Lam-Alif Hamza*/
+ { 0xFEF8,2,0x62 },
+ { 0xFEF9,1,0x64 }, /* Lam-Alif iHamza*/
+ { 0xFEFA,2,0x63 },
+ { 0xFEFB,1,0x65 }, /* Lam-Alif */
+ { 0xFEFC,2,0x64 }
+};
+
+void
+arabic_mule_recode(PangoXSubfont* subfont,int* glyph,PangoXSubfont* mulefonts)
+{
+ int letter=*glyph;
+ if ((letter >= 0x660)&&(letter <= 0x669)) /* indic numeral */
+ {
+ *subfont = mulefonts[0];
+ *glyph = letter - 0x660 + 0x21;
+ }
+ else if ((letter >= 0xFE80)&&(letter <= 0xFEFC))
+ { /* now we have a mess ... a big mess ... */
+ /* The mule 'idea' is that "wide" forms are in the -2 font, whereas
+ * narrow one are in the -1 font.
+ * to conserve space, the characters are all ordered in a big lump.
+ * emacs can't handle it ...
+ */
+#ifdef DEBUG
+ if (charmap[letter-0xFE80].unicodechar != letter)
+ {
+ fprintf(stderr,"[ar] mulefont charmap table defect "
+ "%x comes out as %x ",
+ letter,charmap[letter-0xFE80].unicodechar);
+ }
+#endif
+ *subfont = mulefonts[charmap[letter-0xFE80].fontindex];
+ *glyph = charmap[letter-0xFE80].charindex;
+ }
+ else if (letter == 0x621)
+ {
+ *subfont = mulefonts[charmap[0].fontindex];
+ *glyph = charmap[0].charindex;
+ }
+ else
+ {
+ *subfont = mulefonts[charmap[1].fontindex];
+ *glyph = 0x20; /* we don't have this thing -- use a space */
+ /* This has to be something that does not print anything !! */
+ }
+}
+
+
+
+
diff --git a/modules/arabic/mulefont.h b/modules/arabic/mulefont.h
new file mode 100644
index 00000000..acdf1983
--- /dev/null
+++ b/modules/arabic/mulefont.h
@@ -0,0 +1,22 @@
+/* pango-arabic module
+ *
+ * (C) 2000 K. Koehler <koehler@or.uni-bonn.de>
+ *
+ * This file provides a mapping unicode <- mulefont
+ */
+#ifndef __mulefont_h_
+#define __mulefont_h_
+#include "pango.h"
+#include "pangox.h"
+
+/* mulefonts must be an array with at least three entries */
+
+int
+arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts);
+/* a return value of 0 means this has failed */
+
+void
+arabic_mule_recode(PangoXSubfont* subfont,int* glyph,PangoXSubfont* mulefonts);
+
+
+#endif