From 2028e281b0ba721d859833ea88bb5f303e78e95b Mon Sep 17 00:00:00 2001 From: Owen Taylor Date: Thu, 2 Nov 2000 21:22:13 +0000 Subject: New Arabic shaper from Karl Koehler. Thu Nov 2 16:21:22 2000 Owen Taylor * New Arabic shaper from Karl Koehler. --- modules/arabic/arabic-x.c | 208 ++++++++++++----------- modules/arabic/arabic.c | 208 ++++++++++++----------- modules/arabic/arconv.c | 224 ++++++++++++------------ modules/arabic/arconv.h | 17 +- modules/arabic/langboxfont.c | 58 +++---- modules/arabic/langboxfont.h | 9 +- modules/arabic/mulefont.c | 55 +++--- modules/arabic/mulefont.h | 13 +- modules/arabic/naqshfont.c | 394 ++++++++++++++++++++++++------------------- modules/arabic/naqshfont.h | 13 +- 10 files changed, 655 insertions(+), 544 deletions(-) (limited to 'modules') diff --git a/modules/arabic/arabic-x.c b/modules/arabic/arabic-x.c index 6b10fc19..e73c20c4 100644 --- a/modules/arabic/arabic-x.c +++ b/modules/arabic/arabic-x.c @@ -8,7 +8,6 @@ #include #include -#include #include "pango.h" #include "pangox.h" @@ -17,7 +16,7 @@ #include "langboxfont.h" #include "naqshfont.h" -/* #define DEBUG */ +/* #define DEBUG */ #ifdef DEBUG #include #endif @@ -58,7 +57,6 @@ arabic_engine_break (const char *text, PangoLogAttr *attrs) { /* Most of the code comes from tamil_engine_break - * only difference is char stop based on modifiers */ const char *cur = text; @@ -66,20 +64,20 @@ arabic_engine_break (const char *text, gunichar wc; while (*cur && cur - text < len) - { - wc = g_utf8_get_char (cur); - if (wc == (gunichar)-1) - break; /* FIXME: ERROR */ + { + wc = g_utf8_get_char (cur); + if (wc == (gunichar)-1) + break; /* FIXME: ERROR */ attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0; attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white; attrs[i].is_char_stop = 1; attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white; /* actually, is_word_stop in not correct, but simple and good enough. */ - + i++; cur = g_utf8_next_char (cur); - } + } } static PangoEngine * @@ -101,88 +99,104 @@ arabic_engine_lang_new () * X window system script engine portion */ -static arabic_level -find_unic_font (PangoFont *font,char* charsets[],PangoXSubfont* rfonts) +static ArabicFontInfo* +arabic_unicodeinit(PangoFont *font, PangoXSubfont subfont) +{ + ArabicFontInfo *fs = NULL; + + if (subfont != 0) + { + if ( pango_x_has_glyph /* Alif-Madda */ + (font,PANGO_X_MAKE_GLYPH(subfont,0xFE81))) + { + fs = g_new (ArabicFontInfo,1); + fs->level = ar_standard | ar_unifont; + fs->subfonts[0] = subfont; + + if ( pango_x_has_glyph /* Shadda+Kasra */ + (font,PANGO_X_MAKE_GLYPH(subfont,0xFC62))) + { + fs->level |= ar_composedtashkeel; + /* extra vowels in font, hopefully */ + } + if ( pango_x_has_glyph /* Lam-Min alone */ + (font,PANGO_X_MAKE_GLYPH(subfont,0xFC42))) + { + fs->level |= ar_lig; + /* extra ligatures in font, hopefully */ + } + } + } + return fs; +} + +static ArabicFontInfo* +find_unic_font (PangoFont *font) { - PangoXSubfont *subfonts; - int *subfont_charsets; - int n_subfonts; - int i; - int result = 0; + static char *charsets[] = { + "iso10646-1", + "iso8859-6.8x", + "mulearabic-2", + "urdunaqsh-0", +/* "symbol-0" */ + }; + + ArabicFontInfo *fs = NULL; + PangoXSubfont *subfonts; + int *subfont_charsets; + int n_subfonts; + int i; + + GQuark info_id = g_quark_from_string ("arabic-font-info"); + fs = g_object_get_qdata (G_OBJECT (font), info_id); + if (fs) return fs; n_subfonts = pango_x_list_subfonts (font, charsets, 4, &subfonts, &subfont_charsets); for (i=0; i < n_subfonts; i++) { - if ( (strcmp (charsets[subfont_charsets[i]], "mulearabic-2") == 0) - && arabic_muleinit(font,rfonts) ) + if ( !strcmp (charsets[subfont_charsets[i]], "mulearabic-2")) { - result = ar_mulefont | ar_novowel; - /* we know we have a mulearabic-font ... */ #ifdef DEBUG if (getenv("PANGO_AR_NOMULEFONT") == NULL ) #endif - break; + fs = arabic_muleinit(font); } - else if ( (strcmp (charsets[subfont_charsets[i]], "iso8859-6.8x") == 0) - && arabic_lboxinit(font,rfonts) ) + else if ( !strcmp (charsets[subfont_charsets[i]], "iso8859-6.8x")) { - result = ar_standard | ar_lboxfont; #ifdef DEBUG if (getenv("PANGO_AR_NOLBOXFONT") == NULL ) #endif - break; + fs = arabic_lboxinit(font); } - else if ( (strcmp (charsets[subfont_charsets[i]], "urdunaqsh-0") == 0) - && urdu_naqshinit(font,rfonts) ) + else if ( !strcmp (charsets[subfont_charsets[i]], "urdunaqsh-0")) { - result = ar_standard | ar_naqshfont; #ifdef DEBUG if (getenv("PANGO_AR_NONQFONT") == NULL ) #endif - break; + fs = urdu_naqshinit(font); } else - { /* test if the font has Alif-Madda; if so assume it is ok */ - if ( pango_x_has_glyph /* Alif-Madda */ - (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFE81))) - { - rfonts[0] = subfonts[i]; - result = ar_standard | ar_unifont; - } - if ( pango_x_has_glyph /* Shadda+Kasra */ - (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC62))) - { - result |= ar_composedtashkeel; - /* extra vowels in font, hopefully */ - } - if ( pango_x_has_glyph /* Lam-Min alone */ - (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC42))) - { - result |= ar_lig; - /* extra ligatures in font, hopefully */ - } + { #ifdef DEBUG if (getenv("PANGO_AR_NOUNIFONT") == NULL ) #endif - if (result) break; + fs = arabic_unicodeinit(font,subfonts[i]); } + if (fs){ + g_object_set_qdata_full (G_OBJECT (font), info_id, + fs, (GDestroyNotify)g_free); + break; + } } g_free (subfonts); g_free (subfont_charsets); - return result; + return fs; } -static char *default_charset[] = { - "iso10646-1", - "iso8859-6.8x", - "mulearabic-2", - "urdunaqsh-0", -}; - static void @@ -219,16 +233,13 @@ arabic_engine_shape (PangoFont *font, PangoAnalysis *analysis, PangoGlyphString *glyphs) { - PangoXSubfont subfont; - PangoXSubfont arfonts[3]; - - - int n_chars, n_glyph; - int i; - const char *p; - const char *pold; - gunichar *wc; - arabic_level lvl; + PangoXSubfont subfont; + int n_chars; + int i; + ArabicFontInfo *fs; + const char *p; + const char *pold; + gunichar *wc; g_return_if_fail (font != NULL); g_return_if_fail (text != NULL); @@ -236,15 +247,14 @@ arabic_engine_shape (PangoFont *font, g_return_if_fail (analysis != NULL); /* We hope there is a suitible font installed .. - */ + */ - n_chars = n_glyph = g_utf8_strlen (text, length); - - if (!(lvl = find_unic_font (font, default_charset,arfonts))) + if (! (fs = find_unic_font (font)) ) { PangoGlyph unknown_glyph = pango_x_get_unknown_glyph (font); + n_chars = g_utf8_strlen(text,length); pango_glyph_string_set_size (glyphs, n_chars); p = text; @@ -257,20 +267,13 @@ arabic_engine_shape (PangoFont *font, } return; } - subfont = arfonts[0]; - wc = (gunichar *)g_malloc(sizeof(gunichar)*n_chars); p = text; - for (i=0; i < n_chars; i++) - { - wc[n_chars - i - 1] = g_utf8_get_char (p); - p = g_utf8_next_char (p); - } - - if (analysis->level % 2 == 0) { + wc = g_utf8_to_ucs4(text,length); + n_chars = g_utf8_strlen(text,length); /* We were called on a LTR directional run (e.g. some numbers); fallback as simple as possible */ pango_glyph_string_set_size (glyphs, n_chars); @@ -278,21 +281,28 @@ arabic_engine_shape (PangoFont *font, } else { - arabic_reshape(&n_glyph,wc,lvl); - pango_glyph_string_set_size (glyphs, n_glyph); + wc = (gunichar *)g_malloc(sizeof(gunichar)* (length) ); /* length is succicient: all arabic chars use at + least 2 bytes in utf-8 encoding */ + n_chars = length; + arabic_reshape(&n_chars,text,wc,fs->level); + pango_glyph_string_set_size (glyphs, n_chars); }; p = text; pold = p; - i = n_chars-1; + i = 0; + subfont = fs->subfonts[0]; - while(i >= 0) + while(i < n_chars) { if (wc[i] == 0) { p = g_utf8_next_char (p); - i--; +#ifdef DEBUG + fprintf(stderr,"NULL-character detected in generated string.!"); +#endif + i++; } else { @@ -300,40 +310,42 @@ arabic_engine_shape (PangoFont *font, int is_vowel = arabic_isvowel(wc[i]); cluster_start = is_vowel ? pold - text : p - text; - if ( lvl & ar_mulefont ) + if ( fs->level & ar_mulefont ) { - arabic_mule_recode(&subfont,&(wc[i]),arfonts); + arabic_mule_recode(&subfont,&(wc[i]), + fs->subfonts); } - else if ( lvl & ar_lboxfont ) + else if ( fs->level & ar_lboxfont ) { - if (( i > 0 )&&(wc[i-1] == 0)) + if (( i < n_chars-1 )&&(wc[i+1] == 0)) { arabic_lbox_recode(&subfont,&(wc[i]), - &(wc[i-1]), arfonts); + &(wc[i+1]), + fs->subfonts); } else arabic_lbox_recode(&subfont,&(wc[i]),NULL, - arfonts); + fs->subfonts); } - else if ( lvl & ar_naqshfont ) + else if ( fs->level & ar_naqshfont ) { - if (( i > 0 )&&(wc[i-1] == 0)) + if (( i < n_chars-1 )&&(wc[i+1] == 0)) { urdu_naqsh_recode(&subfont,&(wc[i]), - &(wc[i-1]), arfonts); + &(wc[i+1]), + fs->subfonts); } else urdu_naqsh_recode(&subfont,&(wc[i]),NULL, - arfonts); + fs->subfonts); } - set_glyph(glyphs, font, subfont, n_glyph - 1, + set_glyph(glyphs, font, subfont, n_chars - i - 1, cluster_start, wc[i], is_vowel); pold = p; p = g_utf8_next_char (p); - n_glyph--; - i--; + i++; } } diff --git a/modules/arabic/arabic.c b/modules/arabic/arabic.c index 6b10fc19..e73c20c4 100644 --- a/modules/arabic/arabic.c +++ b/modules/arabic/arabic.c @@ -8,7 +8,6 @@ #include #include -#include #include "pango.h" #include "pangox.h" @@ -17,7 +16,7 @@ #include "langboxfont.h" #include "naqshfont.h" -/* #define DEBUG */ +/* #define DEBUG */ #ifdef DEBUG #include #endif @@ -58,7 +57,6 @@ arabic_engine_break (const char *text, PangoLogAttr *attrs) { /* Most of the code comes from tamil_engine_break - * only difference is char stop based on modifiers */ const char *cur = text; @@ -66,20 +64,20 @@ arabic_engine_break (const char *text, gunichar wc; while (*cur && cur - text < len) - { - wc = g_utf8_get_char (cur); - if (wc == (gunichar)-1) - break; /* FIXME: ERROR */ + { + wc = g_utf8_get_char (cur); + if (wc == (gunichar)-1) + break; /* FIXME: ERROR */ attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0; attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white; attrs[i].is_char_stop = 1; attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white; /* actually, is_word_stop in not correct, but simple and good enough. */ - + i++; cur = g_utf8_next_char (cur); - } + } } static PangoEngine * @@ -101,88 +99,104 @@ arabic_engine_lang_new () * X window system script engine portion */ -static arabic_level -find_unic_font (PangoFont *font,char* charsets[],PangoXSubfont* rfonts) +static ArabicFontInfo* +arabic_unicodeinit(PangoFont *font, PangoXSubfont subfont) +{ + ArabicFontInfo *fs = NULL; + + if (subfont != 0) + { + if ( pango_x_has_glyph /* Alif-Madda */ + (font,PANGO_X_MAKE_GLYPH(subfont,0xFE81))) + { + fs = g_new (ArabicFontInfo,1); + fs->level = ar_standard | ar_unifont; + fs->subfonts[0] = subfont; + + if ( pango_x_has_glyph /* Shadda+Kasra */ + (font,PANGO_X_MAKE_GLYPH(subfont,0xFC62))) + { + fs->level |= ar_composedtashkeel; + /* extra vowels in font, hopefully */ + } + if ( pango_x_has_glyph /* Lam-Min alone */ + (font,PANGO_X_MAKE_GLYPH(subfont,0xFC42))) + { + fs->level |= ar_lig; + /* extra ligatures in font, hopefully */ + } + } + } + return fs; +} + +static ArabicFontInfo* +find_unic_font (PangoFont *font) { - PangoXSubfont *subfonts; - int *subfont_charsets; - int n_subfonts; - int i; - int result = 0; + static char *charsets[] = { + "iso10646-1", + "iso8859-6.8x", + "mulearabic-2", + "urdunaqsh-0", +/* "symbol-0" */ + }; + + ArabicFontInfo *fs = NULL; + PangoXSubfont *subfonts; + int *subfont_charsets; + int n_subfonts; + int i; + + GQuark info_id = g_quark_from_string ("arabic-font-info"); + fs = g_object_get_qdata (G_OBJECT (font), info_id); + if (fs) return fs; n_subfonts = pango_x_list_subfonts (font, charsets, 4, &subfonts, &subfont_charsets); for (i=0; i < n_subfonts; i++) { - if ( (strcmp (charsets[subfont_charsets[i]], "mulearabic-2") == 0) - && arabic_muleinit(font,rfonts) ) + if ( !strcmp (charsets[subfont_charsets[i]], "mulearabic-2")) { - result = ar_mulefont | ar_novowel; - /* we know we have a mulearabic-font ... */ #ifdef DEBUG if (getenv("PANGO_AR_NOMULEFONT") == NULL ) #endif - break; + fs = arabic_muleinit(font); } - else if ( (strcmp (charsets[subfont_charsets[i]], "iso8859-6.8x") == 0) - && arabic_lboxinit(font,rfonts) ) + else if ( !strcmp (charsets[subfont_charsets[i]], "iso8859-6.8x")) { - result = ar_standard | ar_lboxfont; #ifdef DEBUG if (getenv("PANGO_AR_NOLBOXFONT") == NULL ) #endif - break; + fs = arabic_lboxinit(font); } - else if ( (strcmp (charsets[subfont_charsets[i]], "urdunaqsh-0") == 0) - && urdu_naqshinit(font,rfonts) ) + else if ( !strcmp (charsets[subfont_charsets[i]], "urdunaqsh-0")) { - result = ar_standard | ar_naqshfont; #ifdef DEBUG if (getenv("PANGO_AR_NONQFONT") == NULL ) #endif - break; + fs = urdu_naqshinit(font); } else - { /* test if the font has Alif-Madda; if so assume it is ok */ - if ( pango_x_has_glyph /* Alif-Madda */ - (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFE81))) - { - rfonts[0] = subfonts[i]; - result = ar_standard | ar_unifont; - } - if ( pango_x_has_glyph /* Shadda+Kasra */ - (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC62))) - { - result |= ar_composedtashkeel; - /* extra vowels in font, hopefully */ - } - if ( pango_x_has_glyph /* Lam-Min alone */ - (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC42))) - { - result |= ar_lig; - /* extra ligatures in font, hopefully */ - } + { #ifdef DEBUG if (getenv("PANGO_AR_NOUNIFONT") == NULL ) #endif - if (result) break; + fs = arabic_unicodeinit(font,subfonts[i]); } + if (fs){ + g_object_set_qdata_full (G_OBJECT (font), info_id, + fs, (GDestroyNotify)g_free); + break; + } } g_free (subfonts); g_free (subfont_charsets); - return result; + return fs; } -static char *default_charset[] = { - "iso10646-1", - "iso8859-6.8x", - "mulearabic-2", - "urdunaqsh-0", -}; - static void @@ -219,16 +233,13 @@ arabic_engine_shape (PangoFont *font, PangoAnalysis *analysis, PangoGlyphString *glyphs) { - PangoXSubfont subfont; - PangoXSubfont arfonts[3]; - - - int n_chars, n_glyph; - int i; - const char *p; - const char *pold; - gunichar *wc; - arabic_level lvl; + PangoXSubfont subfont; + int n_chars; + int i; + ArabicFontInfo *fs; + const char *p; + const char *pold; + gunichar *wc; g_return_if_fail (font != NULL); g_return_if_fail (text != NULL); @@ -236,15 +247,14 @@ arabic_engine_shape (PangoFont *font, g_return_if_fail (analysis != NULL); /* We hope there is a suitible font installed .. - */ + */ - n_chars = n_glyph = g_utf8_strlen (text, length); - - if (!(lvl = find_unic_font (font, default_charset,arfonts))) + if (! (fs = find_unic_font (font)) ) { PangoGlyph unknown_glyph = pango_x_get_unknown_glyph (font); + n_chars = g_utf8_strlen(text,length); pango_glyph_string_set_size (glyphs, n_chars); p = text; @@ -257,20 +267,13 @@ arabic_engine_shape (PangoFont *font, } return; } - subfont = arfonts[0]; - wc = (gunichar *)g_malloc(sizeof(gunichar)*n_chars); p = text; - for (i=0; i < n_chars; i++) - { - wc[n_chars - i - 1] = g_utf8_get_char (p); - p = g_utf8_next_char (p); - } - - if (analysis->level % 2 == 0) { + wc = g_utf8_to_ucs4(text,length); + n_chars = g_utf8_strlen(text,length); /* We were called on a LTR directional run (e.g. some numbers); fallback as simple as possible */ pango_glyph_string_set_size (glyphs, n_chars); @@ -278,21 +281,28 @@ arabic_engine_shape (PangoFont *font, } else { - arabic_reshape(&n_glyph,wc,lvl); - pango_glyph_string_set_size (glyphs, n_glyph); + wc = (gunichar *)g_malloc(sizeof(gunichar)* (length) ); /* length is succicient: all arabic chars use at + least 2 bytes in utf-8 encoding */ + n_chars = length; + arabic_reshape(&n_chars,text,wc,fs->level); + pango_glyph_string_set_size (glyphs, n_chars); }; p = text; pold = p; - i = n_chars-1; + i = 0; + subfont = fs->subfonts[0]; - while(i >= 0) + while(i < n_chars) { if (wc[i] == 0) { p = g_utf8_next_char (p); - i--; +#ifdef DEBUG + fprintf(stderr,"NULL-character detected in generated string.!"); +#endif + i++; } else { @@ -300,40 +310,42 @@ arabic_engine_shape (PangoFont *font, int is_vowel = arabic_isvowel(wc[i]); cluster_start = is_vowel ? pold - text : p - text; - if ( lvl & ar_mulefont ) + if ( fs->level & ar_mulefont ) { - arabic_mule_recode(&subfont,&(wc[i]),arfonts); + arabic_mule_recode(&subfont,&(wc[i]), + fs->subfonts); } - else if ( lvl & ar_lboxfont ) + else if ( fs->level & ar_lboxfont ) { - if (( i > 0 )&&(wc[i-1] == 0)) + if (( i < n_chars-1 )&&(wc[i+1] == 0)) { arabic_lbox_recode(&subfont,&(wc[i]), - &(wc[i-1]), arfonts); + &(wc[i+1]), + fs->subfonts); } else arabic_lbox_recode(&subfont,&(wc[i]),NULL, - arfonts); + fs->subfonts); } - else if ( lvl & ar_naqshfont ) + else if ( fs->level & ar_naqshfont ) { - if (( i > 0 )&&(wc[i-1] == 0)) + if (( i < n_chars-1 )&&(wc[i+1] == 0)) { urdu_naqsh_recode(&subfont,&(wc[i]), - &(wc[i-1]), arfonts); + &(wc[i+1]), + fs->subfonts); } else urdu_naqsh_recode(&subfont,&(wc[i]),NULL, - arfonts); + fs->subfonts); } - set_glyph(glyphs, font, subfont, n_glyph - 1, + set_glyph(glyphs, font, subfont, n_chars - i - 1, cluster_start, wc[i], is_vowel); pold = p; p = g_utf8_next_char (p); - n_glyph--; - i--; + i++; } } diff --git a/modules/arabic/arconv.c b/modules/arabic/arconv.c index 7f566e0d..c41a69be 100644 --- a/modules/arabic/arconv.c +++ b/modules/arabic/arconv.c @@ -12,7 +12,7 @@ */ #include "arconv.h" -/* #define DEBUG */ +/* #define DEBUG */ #ifdef DEBUG #include #endif @@ -36,7 +36,7 @@ typedef struct { static shapestruct chartable [] = { - {0x621, 0xFE80,4}, /* HAMZA; handle seperately !!! */ + {0x621, 0xFE80,1}, /* HAMZA; handle seperately !!! */ {0x622, 0xFE81,2}, /* ALIF MADDA */ {0x623, 0xFE83,2}, /* ALIF HAMZA */ {0x624, 0xFE85,2}, /* WAW HAMZA */ @@ -103,7 +103,7 @@ static shapestruct chartable [] = {0x6BA, 0xFB9E,2}, /* Urdu:NUN GHUNNA */ {0x6BB, 0xFBA0,4}, /* Sindhi: */ {0x6BE, 0xFBAA,4}, /* HA special */ - {0x6CC, 0xFEF1,4}, /* farsi ya */ + {0x6CC, 0xFBFC,4}, /* farsi ya */ {0x6C0, 0xFBA4,2}, /* izafet: HA HAMZA */ {0x6C1, 0xFBA6,4}, /* Urdu: */ {0x6D2, 0xFBAE,2}, /* YA barree */ @@ -127,8 +127,8 @@ static shapestruct chartable [] = /* Hamza below ( saves Kasra and special cases ), Hamza above ( always joins ). * As I don't know what sHAMZA is good for I don't handle it. */ -#define iHAMZA 0x654 -#define aHAMZA 0x655 +#define aHAMZA 0x654 +#define iHAMZA 0x655 #define sHAMZA 0x674 #define WAW 0x648 @@ -163,40 +163,33 @@ copycstostring(gunichar* string,int* i,charstruct* s,arabic_level level) { /* s is a shaped charstruct; i is the index into the string */ if (s->basechar == 0) return; - string[*i] = s->basechar; (*i)--; (s->lignum)--; + string[*i] = s->basechar; (*i)++; (s->lignum)--; if (s->mark1 != 0) { if ( !(level & ar_novowel) ) { - string[*i] = s->mark1; (*i)--; (s->lignum)--; + string[*i] = s->mark1; (*i)++; (s->lignum)--; } else { - string[*i] = 0; (*i)--; (s->lignum)--; + (s->lignum)--; } } if (s->vowel != 0) { if (! (level & ar_novowel) ) { - string[*i] = s->vowel; (*i)--; (s->lignum)--; + string[*i] = s->vowel; (*i)++; (s->lignum)--; } else - { /* vowel elimination */ - string[*i] = 0; (*i)--; (s->lignum)--; + { /* vowel elimination */ + (s->lignum)--; } } - while (s->lignum > 0 ) - { - string[*i] = 0; (*i)--; (s->lignum)--; + while (s->lignum > 0 ) + { /* NULL-insertion for Langbox-font */ + string[*i] = 0; (*i)++; (s->lignum)--; } -#ifdef DEBUG - if (*i < -1){ - fprintf(stderr,"you are in trouble ! i = %i, the last char is %x, " - "lignum = %i", - *i,s->basechar,s->lignum); - } -#endif } int @@ -249,14 +242,14 @@ charshape(gunichar s,short which) else return 0xFE8B+(which-2); /* The Hamza-'pod' */ } else if (s == 0x6CC) - { /* farsi ya --> map to Alif maqsura and Ya, depending on form */ - switch (which){ - case 0: return 0xFEEF; - case 1: return 0xFEF0; - case 2: return 0xFEF3; - case 3: return 0xFEF4; - } - } + { /* farsi ya --> map to Alif maqsura and Ya, depending on form */ + switch (which){ + case 0: return 0xFEEF; + case 1: return 0xFEF0; + case 2: return 0xFEF3; + case 3: return 0xFEF4; + } + } else { return s; @@ -288,11 +281,32 @@ shapecount(gunichar s) } } +int unligature(charstruct* curchar,arabic_level level) +{ + int result = 0; + if (level & ar_naqshfont){ + /* decompose Alif-Madda ... */ + switch(curchar->basechar){ + case ALIFHAMZA : curchar->basechar = ALIF; curchar->mark1 = aHAMZA; + result++; break; + case ALIFIHAMZA: curchar->basechar = ALIF; curchar->mark1 = iHAMZA; + result++; break; + case WAWHAMZA : curchar->basechar = WAW; curchar->mark1 = aHAMZA; + result++; break; + case ALIFMADDA :curchar->basechar = ALIF; curchar->vowel = MADDA; + result++; break; + } + } + return result; +} + int -ligature(gunichar* string,int si,int len,charstruct* oldchar) -{ /* no ligature possible --> return 0; 1 == vowel; 2 = two chars */ +ligature(gunichar newchar,charstruct* oldchar) +{ /* no ligature possible --> return 0; 1 == vowel; 2 = two chars + * 3 = Lam-Alif + */ int retval = 0; - gunichar newchar = string[si]; + if (!(oldchar->basechar)) return 0; if (arabic_isvowel(newchar)) { @@ -363,7 +377,10 @@ ligature(gunichar* string,int si,int len,charstruct* oldchar) break; default: oldchar->vowel = newchar; break; } - oldchar->lignum++; + if (retval == 1) + { + oldchar->lignum++; + } return retval; } if (oldchar->vowel != 0) @@ -390,12 +407,6 @@ ligature(gunichar* string,int si,int len,charstruct* oldchar) switch (newchar) { case ALIF: oldchar->basechar = ALIFMADDA; retval = 2; break; - case HAMZA: - if (si == len-2) /* HAMZA is 2nd char */ - { - oldchar->basechar = ALIFHAMZA; retval = 2; - } - break; } break; case WAW: @@ -404,57 +415,46 @@ ligature(gunichar* string,int si,int len,charstruct* oldchar) case HAMZA:oldchar->basechar = WAWHAMZA; retval = 2; break; } break; - case LAM_ALIF: - switch (newchar) - { - case HAMZA: - if (si == len-4) /* ! We assume the string has been split - into words. This is AL-A.. I hope */ - { - oldchar->basechar = LAM_ALIFHAMZA; retval = 2; - } - break; - } - break; case 0: oldchar->basechar = newchar; oldchar->numshapes = shapecount(newchar); retval = 1; break; } - if (retval) - { - oldchar->lignum++; -#ifdef DEBUG - fprintf(stderr,"[ar] ligature : added %x to make %x\n", - newchar,oldchar->basechar); -#endif - } return retval; } static void -shape(int olen,int* len,gunichar* string,arabic_level level) +shape(int* len,const char* text,gunichar* string,arabic_level level) { - /* The string must be in visual order already. + /* string is assumed to be empty an big enough. + ** text is the original text. ** This routine does the basic arabic reshaping. - ** olen is the memory lenght, *len the number of non-null characters. + ** *len the number of non-null characters. */ - charstruct oldchar,curchar; - int si = (olen)-1; - int j = (olen)-1; - int join; - int which; - - *len = olen; + /* Note ! we have to unshape each character first ! */ + int olen = *len; + charstruct oldchar,curchar; + /* int si = (olen)-1; */ + int j = 0; + int join; + int which; + gunichar nextletter; + const char* p = text; + + *len = 0 ; /* initialize for output */ charstruct_init(&oldchar); charstruct_init(&curchar); - while (si >= 0) + while (p < text+olen) { - join = ligature(string,si,olen,&curchar); + nextletter = g_utf8_get_char (p); + nextletter = unshape(nextletter); + + join = ligature(nextletter,&curchar); if (!join) { /* shape curchar */ - int nc = shapecount(string[si]); + int nc = shapecount(nextletter); + (*len)++; if (nc == 1) { which = 0; /* end or basic */ @@ -486,19 +486,25 @@ shape(int olen,int* len,gunichar* string,arabic_level level) /* init new curchar */ charstruct_init(&curchar); - curchar.basechar = string[si]; + curchar.basechar = nextletter; curchar.numshapes = nc; curchar.lignum++; + (*len) += unligature(&curchar,level); + } + else if ((join == 3)&&(level & ar_lboxfont)) + { /* Lam-Alif extra in langbox-font */ + (*len)++; + curchar.lignum++; + } + else if (join == 1) + { + (*len)++; } - else if ( ( join == 2 ) - ||((join == 3)&&(! (level & ar_lboxfont) )) - ||((join == 1)&&(level & ar_novowel ) - && arabic_isvowel(string[si])) ) - { /* Lam-Alif in Langbox-font is no ligature */ - /* No vowels in Mulearabic-font */ - (*len)--; + else + { + (*len) += unligature(&curchar,level); } - si--; + p = g_utf8_next_char (p); } /* Handle last char */ @@ -518,19 +524,21 @@ shape(int olen,int* len,gunichar* string,arabic_level level) } static void -doublelig(int olen,int* len,gunichar* string,arabic_level level) +doublelig(int* len,gunichar* string,arabic_level level) { /* Ok. We have presentation ligatures in our font. */ - int si = (olen)-1; + int olen = *len; + int j = 0, si = 1; gunichar lapresult; - while (si > 0) + + while (si < olen) { lapresult = 0; if ( level & ar_composedtashkeel ){ - switch(string[si]) + switch(string[j]) { case SHADDA: - switch(string[si-1]) + switch(string[si]) { case KASRA: lapresult = 0xFC62; break; case FATHA: lapresult = 0xFC60; break; @@ -540,22 +548,22 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level) } break; case KASRA: - if (string[si-1]==SHADDA) lapresult = 0xFC62; + if (string[si]==SHADDA) lapresult = 0xFC62; break; case FATHA: - if (string[si-1]==SHADDA) lapresult = 0xFC60; + if (string[si]==SHADDA) lapresult = 0xFC60; break; case DAMMA: - if (string[si-1]==SHADDA) lapresult = 0xFC61; + if (string[si]==SHADDA) lapresult = 0xFC61; break; } } if ( level & ar_lig ){ - switch(string[si]) + switch(string[j]) { case 0xFEDF: /* LAM initial */ - switch(string[si-1]){ + switch(string[si]){ case 0xFE9E : lapresult = 0xFC3F; break; /* DJEEM final*/ case 0xFEA0 : lapresult = 0xFCC9; break; case 0xFEA2 : lapresult = 0xFC40; break; /* .HA final */ @@ -567,21 +575,21 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level) } break; case 0xFE97: /* TA inital */ - switch(string[si-1]){ + switch(string[si]){ case 0xFEA0 : lapresult = 0xFCA1; break; /* DJ init */ case 0xFEA4 : lapresult = 0xFCA2; break; /* .HA */ case 0xFEA8 : lapresult = 0xFCA3; break; /* CHA */ } break; case 0xFE91: /* BA inital */ - switch(string[si-1]){ + switch(string[si]){ case 0xFEA0 : lapresult = 0xFC9C; break; /* DJ init */ case 0xFEA4 : lapresult = 0xFC9D; break; /* .HA */ case 0xFEA8 : lapresult = 0xFC9E; break; /* CHA */ } break; case 0xFEE7: /* NUN inital */ - switch(string[si-1]){ + switch(string[si]){ case 0xFEA0 : lapresult = 0xFCD2; break; /* DJ init */ case 0xFEA4 : lapresult = 0xFCD3; break; /* .HA */ case 0xFEA8 : lapresult = 0xFCD4; break; /* CHA */ @@ -589,14 +597,14 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level) break; case 0xFEE8: /* NUN medial */ - switch(string[si-1]){ + switch(string[si]){ /* missing : nun-ra : FC8A und nun-sai : FC8B */ case 0xFEAE : lapresult = 0xFC8A; break; /* nun-ra */ case 0xFEB0 : lapresult = 0xFC8B; break; /* nun-sai */ } break; case 0xFEE3: /* Mim initial */ - switch(string[si-1]){ + switch(string[si]){ case 0xFEA0 : lapresult = 0xFCCE ; break; /* DJ init */ case 0xFEA4 : lapresult = 0xFCCF ; break; /* .HA init */ case 0xFEA8 : lapresult = 0xFCD0 ; break; /* CHA init */ @@ -605,7 +613,7 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level) break; case 0xFED3: /* Fa initial */ - switch(string[si-1]){ + switch(string[si]){ case 0xFEF2 : lapresult = 0xFC32 ; break; /* fi-ligature (!) */ } break; @@ -616,23 +624,25 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level) } if (lapresult != 0) { - string[si] = lapresult; (*len)--; string[si-1] = 0x0; + string[j] = lapresult; (*len)--; + si++; /* jump over one character */ + /* we'll have to change this, too. */ + } + else + { + j++; + string[j] = string[si]; + si++; } - si--; } } void -arabic_reshape(int* len,gunichar* string,arabic_level level) +arabic_reshape(int* len,const char* text,gunichar* string,arabic_level level) { - int i; - int olen = *len; - for ( i = 0; i < *len; i++){ - string[i] = unshape(string[i]); - } - shape(olen,len,string,level); + shape(len,text ,string,level); if ( level & ( ar_composedtashkeel | ar_lig ) ) - doublelig(olen,len,string,level); + doublelig(len,string,level); } diff --git a/modules/arabic/arconv.h b/modules/arabic/arconv.h index 04fb135c..a0180e4c 100644 --- a/modules/arabic/arconv.h +++ b/modules/arabic/arconv.h @@ -9,11 +9,13 @@ #define __arconv_h_ #include +#include "pango.h" +#include "pangox.h" /* * arabic_reshape: reshapes string ( ordered left-to right visual order ) * len : before: is the length of the string - * after : number of nun-NULL characters + * after : number of non-NULL characters * */ typedef enum @@ -25,7 +27,18 @@ typedef enum ar_unifont = 0x40, ar_naqshfont = 0x80 } arabic_level; -void arabic_reshape(int* len,gunichar* string,arabic_level level); +typedef struct +{ + PangoXSubfont subfonts[3]; + arabic_level level; +} ArabicFontInfo; + +/* len : beforehand: #chars in string + * after: #chars in text + * string: original-string + * text : output-text + */ +void arabic_reshape(int* len,const char* text,gunichar* string,arabic_level level); int arabic_isvowel(gunichar s); #endif diff --git a/modules/arabic/langboxfont.c b/modules/arabic/langboxfont.c index 8a92b51b..2cf84f57 100644 --- a/modules/arabic/langboxfont.c +++ b/modules/arabic/langboxfont.c @@ -15,35 +15,33 @@ #ifdef DEBUG #include #endif +#include "langboxfont.h" -int -arabic_lboxinit(PangoFont *font,PangoXSubfont* lboxfonts) +ArabicFontInfo* +arabic_lboxinit(PangoFont *font) { static char *lbox_charsets0[] = { "iso8859-6.8x", }; - - PangoXSubfont *subfonts; - int *subfont_charsets; - int n_subfonts; + + ArabicFontInfo *fs = NULL; + PangoXSubfont *subfonts; + int *subfont_charsets; + int n_subfonts; n_subfonts = pango_x_list_subfonts (font,lbox_charsets0, 1, &subfonts, &subfont_charsets); if (n_subfonts > 0) { - lboxfonts[0] = subfonts[0]; - g_free (subfonts); - g_free (subfont_charsets); - } - else - { - g_free (subfonts); - g_free (subfont_charsets); - return 0; + fs = g_new (ArabicFontInfo,1); + fs->level = ar_standard | ar_composedtashkeel | ar_lboxfont; + fs->subfonts[0] = subfonts[0]; } - return 1; + g_free (subfonts); + g_free (subfont_charsets); + return fs; } @@ -187,26 +185,14 @@ arabic_lbox_recode(PangoXSubfont* subfont,int* glyph,int* glyph2, } else if ((letter >= 0xFE80)&&(letter <= 0xFEF4)) { -#ifdef DEBUG - if (charmap[letter-0xFE80].unicodechar != letter) - { - fprintf(stderr,"[ar] lboxfont charmap table defect " - "%x comes out as %x ", - letter,charmap[letter-0xFE80].unicodechar); - } -#endif *glyph = charmap[letter-0xFE80].charindex; } else if ((letter >= 0x64B)&&(letter <= 0x652)) { /* a vowel */ *glyph = letter - 0x64B + 0xA8; } - else if ((letter >= 0xFEF5)&&(letter <= 0xFEFC)) + else if ((letter >= 0xFEF5)&&(letter <= 0xFEFC)&&(glyph2)&&(*glyph2==0)) { /* Lam-Alif. Langbox solved the problem in their own way ... */ -#ifdef DEBUG - fprintf(stderr,"[ar] lbox-recoding chars %x", - letter); -#endif if (!(letter % 2)) { *glyph = 0xA6; @@ -233,7 +219,7 @@ arabic_lbox_recode(PangoXSubfont* subfont,int* glyph,int* glyph2, } else switch(letter) { - /* extra vowels */ + /* extra vowels */ case 0xFC5E: *glyph = 0x82; break; case 0xFC5F: *glyph = 0x83; break; case 0xFC60: *glyph = 0x84; break; @@ -244,6 +230,18 @@ arabic_lbox_recode(PangoXSubfont* subfont,int* glyph,int* glyph2, case 0x621: *glyph = charmap[0].charindex; break; /* hamza */ case 0x640: *glyph = 0xE0; break; /* tatweel */ case 0x61F: *glyph = 0xBF; break; /* question mark */ + + /* farsi ye */ + case 0xFBFC: *glyph = 0x8D; break; + case 0xFBFD: *glyph = 0xE9; break; + case 0xFBFE: *glyph = 0xFE; break; + case 0xFBFF: *glyph = 0xFE; break; + /* Gaf -- the font does not have it, but this is better than nothing */ + case 0xFB92: *glyph = 0xE3; break; + case 0xFB93: *glyph = 0xE3; break; + case 0xFB94: *glyph = 0xF9; break; + case 0xFB95: *glyph = 0x9B; break; + default: *glyph = 0x20; /* we don't have this thing -- use a space */ /* This has to be something that does not print anything !! */ diff --git a/modules/arabic/langboxfont.h b/modules/arabic/langboxfont.h index 1c43a47c..59be6e8c 100644 --- a/modules/arabic/langboxfont.h +++ b/modules/arabic/langboxfont.h @@ -8,13 +8,14 @@ #define __lboxfont_h_ #include "pango.h" #include "pangox.h" +#include "arconv.h" /* - * lboxfont must point to valid memory for this to work. + * create an arabic_fontstruct for the langbox-module + * returns: NULL on failure */ -int -arabic_lboxinit(PangoFont *font,PangoXSubfont* lboxfonts); -/* a return value of 0 means this has failed */ +ArabicFontInfo* +arabic_lboxinit(PangoFont *font); /* glyph2 is the next glyph in line; if there is none, put in NULL diff --git a/modules/arabic/mulefont.c b/modules/arabic/mulefont.c index 6bfc6b0b..084f19ac 100644 --- a/modules/arabic/mulefont.c +++ b/modules/arabic/mulefont.c @@ -11,15 +11,15 @@ #include "pango.h" #include "pangox.h" + /* #define DEBUG */ #ifdef DEBUG #include #endif +#include "mulefont.h" - - -int -arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts) +ArabicFontInfo* +arabic_muleinit(PangoFont *font) { static char *mule_charsets0[] = { "mulearabic-0", @@ -32,14 +32,19 @@ arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts) static char *mule_charsets2[] = { "mulearabic-2", }; - PangoXSubfont *subfonts; - int *subfont_charsets; - int n_subfonts; + + ArabicFontInfo *fs = NULL; + PangoXSubfont *subfonts; + int *subfont_charsets; + int n_subfonts; + PangoXSubfont mulefonts[3]; n_subfonts = pango_x_list_subfonts (font,mule_charsets0, 1, &subfonts, &subfont_charsets); if (n_subfonts > 0) - mulefonts[0] = subfonts[0]; + { + mulefonts[0] = subfonts[0]; + } g_free (subfonts); g_free (subfont_charsets); @@ -48,32 +53,28 @@ arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts) if (n_subfonts > 0) { mulefonts[1] = subfonts[0]; - g_free (subfonts); - g_free (subfont_charsets); - } - else - { - g_free (subfonts); - g_free (subfont_charsets); - return 0; } + g_free (subfonts); + g_free (subfont_charsets); n_subfonts = pango_x_list_subfonts (font,mule_charsets2, 1, &subfonts, &subfont_charsets); if (n_subfonts > 0) { mulefonts[2] = subfonts[0]; - g_free (subfonts); - g_free (subfont_charsets); } - else + g_free (subfonts); + g_free (subfont_charsets); + + if (( mulefonts[0] != 0)&&(mulefonts[1] != 0)&&(mulefonts[2] != 0)) { - g_free (subfonts); - g_free (subfont_charsets); - return 0; + fs = g_new (ArabicFontInfo,1); + fs->level = ar_novowel | ar_mulefont; + fs->subfonts[0] = mulefonts[0]; + fs->subfonts[1] = mulefonts[1]; + fs->subfonts[2] = mulefonts[2]; } - - return 1; + return fs; } @@ -274,8 +275,14 @@ arabic_mule_recode(PangoXSubfont* subfont,int* glyph,PangoXSubfont* mulefonts) case 0xFB58: *subfont = mulefonts[1]; *glyph = 0x66; break; case 0xFB59: *subfont = mulefonts[1]; *glyph = 0x67; break; /* farsi Jeh */ + case 0xFBFC: *subfont = mulefonts[2]; *glyph = 0x5D; break; + case 0xFBFD: *subfont = mulefonts[2]; *glyph = 0x5E; break; + case 0xFBFE: *subfont = mulefonts[1]; *glyph = 0x60; break; + case 0xFBFF: *subfont = mulefonts[1]; *glyph = 0x61; break; + /* extra */ case 0xFB8A: *subfont = mulefonts[1]; *glyph = 0x68; break; case 0xFB8B: *subfont = mulefonts[1]; *glyph = 0x69; break; + default: *subfont = mulefonts[1]; *glyph = 0x26; /* we don't have this thing -- use a dot */ diff --git a/modules/arabic/mulefont.h b/modules/arabic/mulefont.h index acdf1983..6851ae7d 100644 --- a/modules/arabic/mulefont.h +++ b/modules/arabic/mulefont.h @@ -8,15 +8,16 @@ #define __mulefont_h_ #include "pango.h" #include "pangox.h" +#include "arconv.h" -/* mulefonts must be an array with at least three entries */ - -int -arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts); -/* a return value of 0 means this has failed */ +/* + * create an arabic_fontstruct for the mulefont-module + * returns: NULL on failure + */ +ArabicFontInfo* +arabic_muleinit(PangoFont *font); void arabic_mule_recode(PangoXSubfont* subfont,int* glyph,PangoXSubfont* mulefonts); - #endif diff --git a/modules/arabic/naqshfont.c b/modules/arabic/naqshfont.c index c390ce4d..d4885245 100644 --- a/modules/arabic/naqshfont.c +++ b/modules/arabic/naqshfont.c @@ -1,6 +1,7 @@ /* pango-arabic module * * (C) 2000 K. Koehler + * (c) 2000 Pablo Saratxaga * * This file provides a mapping unicode <- naqshfont */ @@ -11,172 +12,305 @@ #include "pango.h" #include "pangox.h" -/* #define DEBUG */ +/* #define DEBUG */ #ifdef DEBUG #include #endif +#include "naqshfont.h" -int -urdu_naqshinit(PangoFont *font,PangoXSubfont* nqfont) +ArabicFontInfo* +urdu_naqshinit(PangoFont *font) { static char *nq_charsets0[] = { - "urdunaqsh-0", + "symbol-0", +/* "urdunaqsh-0" */ }; - PangoXSubfont *subfonts; - int *subfont_charsets; - int n_subfonts; + ArabicFontInfo *fs = NULL; + PangoXSubfont *subfonts; + int *subfont_charsets; + int n_subfonts; n_subfonts = pango_x_list_subfonts (font,nq_charsets0, 1, &subfonts, &subfont_charsets); if (n_subfonts > 0) { - nqfont[0] = subfonts[0]; - g_free (subfonts); - g_free (subfont_charsets); + fs = g_new (ArabicFontInfo,1); + fs->level = ar_standard | ar_naqshfont; + fs->subfonts[0] = subfonts[0]; } - else - { - g_free (subfonts); - g_free (subfont_charsets); - return 0; - } - - return 1; + g_free (subfonts); + g_free (subfont_charsets); + return fs; } + typedef struct { gunichar unicodechar; - int charindex; + int charindex; } fontentry; static fontentry charmap [] = -{ - { 0xFE80,0x4A }, /* HAMZA; handle seperately !!! */ - { 0xFE81,0x22 }, /* ALIF MADDA */ - { 0xFE82,0xAD }, - { 0xFE83,0x22 }, /* ALIF HAMZA */ - { 0xFE84,0xAD }, - { 0xFE85,0x46 }, /* WAW HAMZA */ - { 0xFE86,0xD2 }, - { 0xFE87,0x22 }, /* ALIF IHAMZA */ - { 0xFE88,0xAD }, - { 0xFE89,0x4C }, /* YA HAMZA */ - { 0xFE8A,0xD7 }, - - { 0xFE8B,0x6C }, /* HMAZA-'pod' */ - { 0xFE8C,0xAB }, - { 0xFE8D,0x22 }, /* ALIF */ - { 0xFE8E,0xAD }, - { 0xFE8F,0x23 }, /* BA */ +{ /* the basic arabic block comes first ... */ + { 0xFE8B,0x6C }, /* HMAZA-'pod' */ + { 0xFE8C,0xAB }, + { 0xFE8D,0x22 }, /* ALEF */ + { 0xFE8E,0xAD }, + { 0xFE8F,0x23 }, /* BEH */ { 0xFE90,0xAE }, { 0xFE91,0x4E }, { 0xFE92,0x6E }, - { 0xFE93,0x48 }, /* TA MARBUTA */ + { 0xFE93,0x48 }, /* TEH MARBUTA */ { 0xFE94,0xD5 }, - { 0xFE95,0x25 }, /* TA */ + { 0xFE95,0x25 }, /* TEH */ { 0xFE96,0xB0 }, { 0xFE97,0x50 }, { 0xFE98,0x70 }, - { 0xFE99,0x27 }, /* THA */ + { 0xFE99,0x27 }, /* THEH */ { 0xFE9A,0xB2 }, { 0xFE9B,0x52 }, { 0xFE9C,0x72 }, - { 0xFE9D,0x28 }, /* DJIM */ + { 0xFE9D,0x28 }, /* JEEM */ { 0xFE9E,0xB3 }, { 0xFE9F,0x53 }, { 0xFEA0,0x73 }, - { 0xFEA1,0x2A }, /* .HA */ + { 0xFEA1,0x2A }, /* HAH . */ { 0xFEA2,0xB5 }, { 0xFEA3,0x55 }, { 0xFEA4,0x75 }, - { 0xFEA5,0x2B }, /* CHA */ + { 0xFEA5,0x2B }, /* KHAH */ { 0xFEA6,0xB6 }, { 0xFEA7,0x56 }, { 0xFEA8,0x76 }, - { 0xFEA9,0x2C }, /* DAL */ + { 0xFEA9,0x2C }, /* DAL */ { 0xFEAA,0xB8 }, - { 0xFEAB,0x2E }, /* THAL */ + { 0xFEAB,0x2E }, /* THAL */ { 0xFEAC,0xBA }, - { 0xFEAD,0x2F }, /* RA */ + { 0xFEAD,0x2F }, /* REH */ { 0xFEAE,0xBB }, - { 0xFEAF,0x31 }, /* ZAY */ + { 0xFEAF,0x31 }, /* ZAIN (ZAY) */ { 0xFEB0,0xBD }, - - { 0xFEB1,0x33 }, /* SIN */ + { 0xFEB1,0x33 }, /* SEEN */ { 0xFEB2,0xBF }, { 0xFEB3,0x57 }, { 0xFEB4,0x77 }, - { 0xFEB5,0x34 }, /* SHIN */ + { 0xFEB5,0x34 }, /* SHEEN */ { 0xFEB2,0xC0 }, { 0xFEB3,0x58 }, { 0xFEB4,0x78 }, - { 0xFEB9,0x35 }, /* SAAD */ + { 0xFEB9,0x35 }, /* SAD */ { 0xFEBA,0xC1 }, { 0xFEBB,0x59 }, { 0xFEBC,0x79 }, - { 0xFEBD,0x36 }, /* DAAD */ + { 0xFEBD,0x36 }, /* DAD */ { 0xFEBE,0xC2 }, { 0xFEBF,0x5A }, { 0xFEC0,0x7A }, - { 0xFEC1,0x37 }, /* .TA */ + { 0xFEC1,0x37 }, /* TAH . */ { 0xFEC2,0xC3 }, { 0xFEC3,0x5B }, { 0xFEC4,0x7B }, - { 0xFEC5,0x38 }, /* .ZA */ + { 0xFEC5,0x38 }, /* ZAH . */ { 0xFEC6,0xC4 }, { 0xFEC7,0x5C }, { 0xFEC8,0x7C }, - { 0xFEC9,0x39 }, /* AIN */ + { 0xFEC9,0x39 }, /* AIN */ { 0xFECA,0xC5 }, { 0xFECB,0x5D }, { 0xFECC,0x7D }, - { 0xFECD,0x3A }, /* RAIN */ + { 0xFECD,0x3A }, /* GHAIN */ { 0xFECE,0xC6 }, { 0xFECF,0x5E }, { 0xFED0,0x7E }, - { 0xFED1,0x3B }, /* FA */ + { 0xFED1,0x3B }, /* FEH */ { 0xFED2,0xC7 }, { 0xFED3,0x5F }, { 0xFED4,0xA1 }, - - { 0xFED5,0x3D }, /* QAF */ + { 0xFED5,0x3D }, /* QAF */ { 0xFED6,0xC9 }, { 0xFED7,0x61 }, { 0xFEB8,0xA3 }, - { 0xFED9,0x3E }, /* KAF */ + { 0xFED9,0x3E }, /* KAF */ { 0xFEDA,0xCA }, { 0xFEDB,0x62 }, { 0xFEDC,0xA4 }, - { 0xFEDD,0x41 }, /* LAM */ + { 0xFEDD,0x41 }, /* LAM */ { 0xFEDE,0xCD }, { 0xFEDF,0x66 }, { 0xFEE0,0xA6 }, - { 0xFEE1,0x43 }, /* MIM */ + { 0xFEE1,0x43 }, /* MEEM */ { 0xFEE2,0xCF }, { 0xFEE3,0x67 }, { 0xFEE4,0xA7 }, - { 0xFEE5,0x44 }, /* NUN */ + { 0xFEE5,0x44 }, /* NOON */ { 0xFEE6,0xD0 }, { 0xFEE7,0x68 }, { 0xFEE8,0xA8 }, - - { 0xFEE9,0x47 }, /* HA */ + { 0xFEE9,0x47 }, /* HEH (HA) */ { 0xFEEA,0xD4 }, { 0xFEEB,0x6B }, { 0xFEEC,0xAA }, - { 0xFEED,0x46 }, /* WAW */ + { 0xFEED,0x46 }, /* WAW */ { 0xFEEE,0xD2 }, - { 0xFEEF,0x4B }, /* ALIF MAQSORA */ + { 0xFEEF,0x4B }, /* ALEF MAKSURA */ { 0xFEF0,0xD6 }, - { 0xFEF1,0x4C }, /* YA */ + { 0xFEF1,0x4C }, /* YEH (YA) */ { 0xFEF2,0xD7 }, { 0xFEF3,0x6D }, - { 0xFEF4,0xAC } + { 0xFEF4,0xAC }, + + + { 0x0020,0x20 }, /* space */ /* We ought to NOT get these ! */ + { 0x0021,0xEA }, /* ! */ + { 0x0027,0xEC }, /* ' */ + { 0x0028,0xED }, /* ( */ + { 0x0029,0xEE }, /* ) */ + { 0x002E,0xE7 }, /* . */ + { 0x003A,0xEB }, /* : */ + { 0x00A0,0xA0 }, /* non breaking space */ + + + + { 0x060C,0xE8 }, /* arabic comma */ + { 0x061F,0xE9 }, /* arabic question mark */ + { 0x0621,0x4A }, /* hamza */ + { 0x0640,0xE6 }, /* arabic tatweel */ + { 0x064B,0xF8 }, /* arabic fathatan, unshaped */ + { 0x064C,0xF7 }, /* arabic dammatan, unshaped */ + { 0x064D,0xF9 }, /* arabic kasratan, unshaped */ + { 0x064E,0xFE }, /* arabic fatha, unshaped */ + { 0x064F,0xFD }, /* araibc damma, unshaped */ + { 0x0650,0xFF }, /* arabic kasra, unshaped */ + { 0x0651,0xFC }, /* arabic shadda, unshaped */ + { 0x0652,0xA0 }, /* sukun -- non-existant in the font */ + { 0x0653,0xF3 }, /* arabic madda above, should occur in only one case: upon left-joined Alif */ + { 0x0654,0xF6 }, /* arabic hamza above, unshaped */ + { 0x0655,0xF5 }, /* arabic hamza below, unshaped */ + + /* arabic digits */ + { 0x0660,0xE5 }, /* arabic digit 0 */ + { 0x0661,0xD9 }, /* arabic digit 1 */ + { 0x0662,0xDA }, /* arabic digit 2 */ + { 0x0663,0xDB }, /* arabic digit 3 */ + { 0x0664,0xDC }, /* arabic digit 4 */ + { 0x0665,0xDE }, /* arabic digit 5 */ + { 0x0666,0xE0 }, /* arabic digit 6 */ + { 0x0667,0xE1 }, /* arabic digit 7 */ + { 0x0668,0xE3 }, /* arabic digit 8 */ + { 0x0669,0xE4 }, /* arabic digit 9 */ + { 0x0670,0xF4 }, /* arabic percent sign */ + { 0x0679,0x26 }, /* some arabic letters, unshaped */ + { 0x067E,0x24 }, + { 0x0686,0x29 }, + { 0x0688,0x2D }, + { 0x0691,0x30 }, + { 0x0698,0x32 }, + { 0x06A4,0x3C }, + { 0x06A9,0x3F }, + { 0x06AF,0x40 }, + { 0x06BA,0x45 }, + { 0x06BE,0x49 }, + { 0x06C1,0x47 }, + { 0x06CC,0x4B }, + { 0x06D2,0x4D }, + { 0x06D4,0xE7 }, + /* persian digits */ + { 0x06F0,0xE5 }, /* persian digit 0 */ + { 0x06F1,0xD9 }, /* persian digit 1 */ + { 0x06F2,0xDA }, /* persian digit 2 */ + { 0x06F3,0xDB }, /* persian digit 3 */ + { 0x06F4,0xDD }, /* persian digit 4 */ + { 0x06F5,0xDF }, /* persian digit 5 */ + { 0x06F6,0xE0 }, /* persian digit 6 */ + { 0x06F7,0xE2 }, /* persian digit 7 */ + { 0x06F8,0xE3 }, /* persian digit 8 */ + { 0x06F9,0xE4 }, /* persian digit 9 */ + + /* shaped letters & ligatures */ + { 0xFB56,0x24 }, /* PEH */ + { 0xFB57,0xAF }, + { 0xFB58,0x4F }, + { 0xFB59,0x6F }, + { 0xFB66,0x26 }, /* TTEH */ + { 0xFB67,0xB1 }, + { 0xFB68,0x51 }, + { 0xFB69,0x71 }, + { 0xFB6A,0x3C }, /* VEH */ + { 0xFB6B,0xC8 }, + { 0xFB6C,0x60 }, + { 0xFB6D,0xA2 }, + { 0xFB7A,0x29 }, /* TCHEH */ + { 0xFB7B,0xB4 }, + { 0xFB7C,0x54 }, + { 0xFB7D,0x74 }, + { 0xFB88,0x2D }, /* DDAL */ + { 0xFB89,0xB9 }, + { 0xFB8A,0x32 }, /* JEH */ + { 0xFB8B,0xBE }, + { 0xFB8C,0x30 }, /* RREH */ + { 0xFB8D,0xBC }, + { 0xFB8E,0x3F }, /* KEHEH */ + { 0xFB8F,0xCB }, + { 0xFB90,0x62 }, + { 0xFB90,0x64 }, + { 0xFB91,0xA4 }, + { 0xFB92,0x40 }, /* GAF */ + { 0xFB93,0xCC }, + { 0xFB94,0x63 }, + { 0xFB94,0x65 }, + { 0xFB95,0xA5 }, + { 0xFB9E,0x45 }, /* NOON GHUNNA */ + { 0xFB9F,0xD1 }, + { 0xFBA6,0x47 }, /* HEH GOAL */ + { 0xFBA7,0xD3 }, + { 0xFBA8,0x69 }, + { 0xFBA8,0x6A }, + { 0xFBA9,0xA9 }, + { 0xFBAA,0x49 }, /* HEH DOACHASMEE */ + { 0xFBAB,0xAA }, + { 0xFBAC,0x6B }, + { 0xFBAD,0xAA }, + { 0xFBAE,0x4D }, /* YEH BAREE */ + { 0xFBAF,0xD8 }, + { 0xFBFC,0x4B }, /* FARSI YEH */ + { 0xFBFD,0xD6 }, + { 0xFBFE,0x6D }, + { 0xFBFF,0xAC }, + { 0xFE80,0x4A }, /* HAMZA */ + { 0xFE81,0x21 }, /* ALEF WITH MADDA ABOVE */ + { 0xFE82,0xAD }, + + /* fake entries (the font doesn't provide glyphs with the hamza + * above; so the ones without hamza are used, as a best approach) */ + /* these *should never occur* */ + { 0xFE83,0x22 }, /* ALIF HAMZA */ + { 0xFE84,0xAD }, + { 0xFE85,0x46 }, /* WAW HAMZA */ + { 0xFE86,0xD2 }, + { 0xFE87,0x22 }, /* ALIF IHAMZA */ + { 0xFE88,0xAD }, + { 0xFE89,0x4C }, /* YA HAMZA */ + { 0xFE8A,0xD7 }, + + { 0xFEFB,0x42 }, /* ligature LAM+ALEF ISOLATED */ + { 0xFEFC,0xCE }, /* ligature LAM+ALEF FINAL */ + + /* I've been unable to map those font glyphs to an unicode value; + * if you can, please tell me -- Pablo Saratxaga + { 0x????,0xB7 }, + { 0x????,0xEF }, + { 0x????,0xF0 }, + { 0x????,0xF1 }, + { 0x????,0xF2 }, + { 0x????,0xFA }, + { 0x????,0xFB }, + */ + + { 0x0000,0x00 } }; @@ -185,28 +319,23 @@ urdu_naqsh_recode(PangoXSubfont* subfont,int* glyph,int* glyph2, PangoXSubfont* nqfont) { int letter=*glyph; + int i; + *subfont = nqfont[0]; - if ((letter >= 0x661)&&(letter <= 0x664)) /* indic numeral */ - { - *glyph = letter - 0x661 + 0xD9; - } - if ((letter >= 0x6F1)&&(letter <= 0x6F3)) /* indic numeral */ - { - *glyph = letter - 0x6F1 + 0xD9; - } - else if ((letter >= 0xFE80)&&(letter <= 0xFEF4)) + + if ((letter >= 0xFE8B)&&(letter <= 0xFEF4)) { - *glyph = charmap[letter-0xFE80].charindex; + *glyph = charmap[letter-0xFE8B].charindex; + } + else if (letter < 0xFF ) + { /* recoded in previous run */ + *glyph = letter; } else if ((letter >= 0xFEF5)&&(letter <= 0xFEFC)) { /* Lam-Alif. Also solved interestingly ... * At least, the Lam-Alif itself is not split ... */ -#ifdef DEBUG - fprintf(stderr,"[ar] nq-recoding chars %x", - letter); -#endif if (!(letter % 2)) { *glyph = 0xCE; @@ -224,94 +353,19 @@ urdu_naqsh_recode(PangoXSubfont* subfont,int* glyph,int* glyph2, case 0xFEF9 : case 0xFEFA : *glyph2 = 0xF5; break; /* Lam-Alif iHamza */ } - } - else switch(letter) + } + else { - case 0x665: *glyph = 0xDE; break; /* indic numerals */ - case 0x666: *glyph = 0xE0; break; - case 0x667: *glyph = 0xE1; break; - case 0x668: *glyph = 0xE3; break; - case 0x669: *glyph = 0xE4; break; - case 0x660: *glyph = 0xE5; break; - /* farsi numerals */ - case 0x6F4: *glyph = 0xDD; break; - case 0x6F5: *glyph = 0xDF; break; - case 0x6F6: *glyph = 0xE0; break; - case 0x6F7: *glyph = 0xE1; break; - case 0x6F8: *glyph = 0xE3; break; - case 0x6F9: *glyph = 0xE4; break; - case 0x6F0: *glyph = 0xE5; break; - /* tashkeel */ - case 0x64B: *glyph = 0xF8; break; - case 0x64C: *glyph = 0xF7; break; - case 0x64D: *glyph = 0xF9; break; - case 0x64E: *glyph = 0xFD; break; - case 0x64F: *glyph = 0xFE; break; - case 0x650: *glyph = 0xFF; break; - case 0x651: *glyph = 0xFC; break; - case 0x652: *glyph = 0xEC; break; - case 0x653: *glyph = 0xF3; break; - case 0x670: *glyph = 0xF4; break; - /* urdu letters */ - case 0xFB56: *glyph = 0x24; - case 0xFB57: *glyph = 0x24; - case 0xFB58: *glyph = 0x24; - case 0xFB59: *glyph = 0x24; - - case 0xFB66: *glyph = 0x26; - case 0xFB67: *glyph = 0xB1; - case 0xFB68: *glyph = 0x51; - case 0xFB69: *glyph = 0x71; - - case 0xFB7A: *glyph = 0x29; - case 0xFB7B: *glyph = 0xB4; - case 0xFB7C: *glyph = 0x54; - case 0xFB7D: *glyph = 0x74; - - case 0xFB88: *glyph = 0x2D; - case 0xFB89: *glyph = 0xB9; - - case 0xFB8C: *glyph = 0x30; - case 0xFB8D: *glyph = 0xBC; - - case 0xFB8A: *glyph = 0x32; - case 0xFB8B: *glyph = 0xBE; - - case 0xFB6A: *glyph = 0x3C; - case 0xFB6B: *glyph = 0xC8; - case 0xFB6C: *glyph = 0x60; - case 0xFB6D: *glyph = 0xA2; - - case 0xFB8E: *glyph = 0x3F; - case 0xFB8F: *glyph = 0xCB; - case 0xFB90: *glyph = 0x62; - case 0xFB91: *glyph = 0xA4; - - case 0xFB92: *glyph = 0x40; - case 0xFB93: *glyph = 0xCC; - case 0xFB94: *glyph = 0x63; - case 0xFB95: *glyph = 0xA5; - - case 0xFBAA: *glyph = 0x49; - case 0xFBAB: *glyph = 0xD4; - case 0xFBAC: *glyph = 0x6B; - case 0xFBAD: *glyph = 0xAA; - - case 0xFBAE: *glyph = 0x4D; - case 0xFBAF: *glyph = 0xD8; - - case 0xFBA6: *glyph = 0x47; - case 0xFBA7: *glyph = 0xD3; - case 0xFBA8: *glyph = 0x69; - case 0xFBA9: *glyph = 0xA9; - - - /* specials */ - case 0x621: *glyph = 0x4A; break; - case 0x640: *glyph = 0xD3; break; /* tatweel ? */ - case 0x61F: *glyph = 0xE9; break; /* Question mark */ - - default: *glyph = 0xE5; break; /* don't know what to do */ + for (i=0 ; charmap[i].unicodechar!=0x0000 ; i++) { + if (charmap[i].unicodechar == letter) { + *glyph = charmap[i].charindex; + break; + } + } + if (charmap[i].unicodechar == 0x0000) { + /* if the glyph wasn't on the table */ + *glyph = 0xE5; /* don't know what to do */ + } } } diff --git a/modules/arabic/naqshfont.h b/modules/arabic/naqshfont.h index f4f7f098..4abde1e3 100644 --- a/modules/arabic/naqshfont.h +++ b/modules/arabic/naqshfont.h @@ -2,19 +2,20 @@ * * (C) 2000 K. Koehler * - * This file provides a mapping unicode <- mulefont + * This file provides a mapping unicode <- urdu nq-font */ #ifndef __nqfont_h_ #define __nqfont_h_ #include "pango.h" #include "pangox.h" +#include "arconv.h" /* - * nqfont must point to valid memory for this to work. + * create an arabic_fontstruct for the urdu_naqshfont-module + * returns: NULL on failure */ -int -urdu_naqshinit(PangoFont *font,PangoXSubfont* nqfont); -/* a return value of 0 means this has failed */ +ArabicFontInfo* +urdu_naqshinit(PangoFont *font); /* glyph2 is the next glyph in line; if there is none, put in NULL @@ -23,3 +24,5 @@ void urdu_naqsh_recode(PangoXSubfont* subfont,int* glyph,int* glyph2, PangoXSubfont* nqfont); #endif + + -- cgit v1.2.1