summaryrefslogtreecommitdiff
path: root/modules/arabic
diff options
context:
space:
mode:
authorOwen Taylor <otaylor@redhat.com>2000-11-02 21:22:13 +0000
committerOwen Taylor <otaylor@src.gnome.org>2000-11-02 21:22:13 +0000
commit2028e281b0ba721d859833ea88bb5f303e78e95b (patch)
treed920819962844b42419a5d5c2f761c759d01df0d /modules/arabic
parentda4f30ffe78e3c79b1cce78bb06e6226cef69d61 (diff)
downloadpango-2028e281b0ba721d859833ea88bb5f303e78e95b.tar.gz
New Arabic shaper from Karl Koehler.
Thu Nov 2 16:21:22 2000 Owen Taylor <otaylor@redhat.com> * New Arabic shaper from Karl Koehler.
Diffstat (limited to 'modules/arabic')
-rw-r--r--modules/arabic/arabic-x.c208
-rw-r--r--modules/arabic/arabic.c208
-rw-r--r--modules/arabic/arconv.c224
-rw-r--r--modules/arabic/arconv.h17
-rw-r--r--modules/arabic/langboxfont.c58
-rw-r--r--modules/arabic/langboxfont.h9
-rw-r--r--modules/arabic/mulefont.c55
-rw-r--r--modules/arabic/mulefont.h13
-rw-r--r--modules/arabic/naqshfont.c394
-rw-r--r--modules/arabic/naqshfont.h13
10 files changed, 655 insertions, 544 deletions
diff --git a/modules/arabic/arabic-x.c b/modules/arabic/arabic-x.c
index 6b10fc19..e73c20c4 100644
--- a/modules/arabic/arabic-x.c
+++ b/modules/arabic/arabic-x.c
@@ -8,7 +8,6 @@
#include <stdio.h>
#include <glib.h>
-#include <string.h>
#include "pango.h"
#include "pangox.h"
@@ -17,7 +16,7 @@
#include "langboxfont.h"
#include "naqshfont.h"
-/* #define DEBUG */
+/* #define DEBUG */
#ifdef DEBUG
#include <stdio.h>
#endif
@@ -58,7 +57,6 @@ arabic_engine_break (const char *text,
PangoLogAttr *attrs)
{
/* Most of the code comes from tamil_engine_break
- * only difference is char stop based on modifiers
*/
const char *cur = text;
@@ -66,20 +64,20 @@ arabic_engine_break (const char *text,
gunichar wc;
while (*cur && cur - text < len)
- {
- wc = g_utf8_get_char (cur);
- if (wc == (gunichar)-1)
- break; /* FIXME: ERROR */
+ {
+ wc = g_utf8_get_char (cur);
+ if (wc == (gunichar)-1)
+ break; /* FIXME: ERROR */
attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
attrs[i].is_char_stop = 1;
attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
/* actually, is_word_stop in not correct, but simple and good enough. */
-
+
i++;
cur = g_utf8_next_char (cur);
- }
+ }
}
static PangoEngine *
@@ -101,88 +99,104 @@ arabic_engine_lang_new ()
* X window system script engine portion
*/
-static arabic_level
-find_unic_font (PangoFont *font,char* charsets[],PangoXSubfont* rfonts)
+static ArabicFontInfo*
+arabic_unicodeinit(PangoFont *font, PangoXSubfont subfont)
+{
+ ArabicFontInfo *fs = NULL;
+
+ if (subfont != 0)
+ {
+ if ( pango_x_has_glyph /* Alif-Madda */
+ (font,PANGO_X_MAKE_GLYPH(subfont,0xFE81)))
+ {
+ fs = g_new (ArabicFontInfo,1);
+ fs->level = ar_standard | ar_unifont;
+ fs->subfonts[0] = subfont;
+
+ if ( pango_x_has_glyph /* Shadda+Kasra */
+ (font,PANGO_X_MAKE_GLYPH(subfont,0xFC62)))
+ {
+ fs->level |= ar_composedtashkeel;
+ /* extra vowels in font, hopefully */
+ }
+ if ( pango_x_has_glyph /* Lam-Min alone */
+ (font,PANGO_X_MAKE_GLYPH(subfont,0xFC42)))
+ {
+ fs->level |= ar_lig;
+ /* extra ligatures in font, hopefully */
+ }
+ }
+ }
+ return fs;
+}
+
+static ArabicFontInfo*
+find_unic_font (PangoFont *font)
{
- PangoXSubfont *subfonts;
- int *subfont_charsets;
- int n_subfonts;
- int i;
- int result = 0;
+ static char *charsets[] = {
+ "iso10646-1",
+ "iso8859-6.8x",
+ "mulearabic-2",
+ "urdunaqsh-0",
+/* "symbol-0" */
+ };
+
+ ArabicFontInfo *fs = NULL;
+ PangoXSubfont *subfonts;
+ int *subfont_charsets;
+ int n_subfonts;
+ int i;
+
+ GQuark info_id = g_quark_from_string ("arabic-font-info");
+ fs = g_object_get_qdata (G_OBJECT (font), info_id);
+ if (fs) return fs;
n_subfonts = pango_x_list_subfonts (font, charsets, 4,
&subfonts, &subfont_charsets);
for (i=0; i < n_subfonts; i++)
{
- if ( (strcmp (charsets[subfont_charsets[i]], "mulearabic-2") == 0)
- && arabic_muleinit(font,rfonts) )
+ if ( !strcmp (charsets[subfont_charsets[i]], "mulearabic-2"))
{
- result = ar_mulefont | ar_novowel;
- /* we know we have a mulearabic-font ... */
#ifdef DEBUG
if (getenv("PANGO_AR_NOMULEFONT") == NULL )
#endif
- break;
+ fs = arabic_muleinit(font);
}
- else if ( (strcmp (charsets[subfont_charsets[i]], "iso8859-6.8x") == 0)
- && arabic_lboxinit(font,rfonts) )
+ else if ( !strcmp (charsets[subfont_charsets[i]], "iso8859-6.8x"))
{
- result = ar_standard | ar_lboxfont;
#ifdef DEBUG
if (getenv("PANGO_AR_NOLBOXFONT") == NULL )
#endif
- break;
+ fs = arabic_lboxinit(font);
}
- else if ( (strcmp (charsets[subfont_charsets[i]], "urdunaqsh-0") == 0)
- && urdu_naqshinit(font,rfonts) )
+ else if ( !strcmp (charsets[subfont_charsets[i]], "urdunaqsh-0"))
{
- result = ar_standard | ar_naqshfont;
#ifdef DEBUG
if (getenv("PANGO_AR_NONQFONT") == NULL )
#endif
- break;
+ fs = urdu_naqshinit(font);
}
else
- { /* test if the font has Alif-Madda; if so assume it is ok */
- if ( pango_x_has_glyph /* Alif-Madda */
- (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFE81)))
- {
- rfonts[0] = subfonts[i];
- result = ar_standard | ar_unifont;
- }
- if ( pango_x_has_glyph /* Shadda+Kasra */
- (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC62)))
- {
- result |= ar_composedtashkeel;
- /* extra vowels in font, hopefully */
- }
- if ( pango_x_has_glyph /* Lam-Min alone */
- (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC42)))
- {
- result |= ar_lig;
- /* extra ligatures in font, hopefully */
- }
+ {
#ifdef DEBUG
if (getenv("PANGO_AR_NOUNIFONT") == NULL )
#endif
- if (result) break;
+ fs = arabic_unicodeinit(font,subfonts[i]);
}
+ if (fs){
+ g_object_set_qdata_full (G_OBJECT (font), info_id,
+ fs, (GDestroyNotify)g_free);
+ break;
+ }
}
g_free (subfonts);
g_free (subfont_charsets);
- return result;
+ return fs;
}
-static char *default_charset[] = {
- "iso10646-1",
- "iso8859-6.8x",
- "mulearabic-2",
- "urdunaqsh-0",
-};
-
static void
@@ -219,16 +233,13 @@ arabic_engine_shape (PangoFont *font,
PangoAnalysis *analysis,
PangoGlyphString *glyphs)
{
- PangoXSubfont subfont;
- PangoXSubfont arfonts[3];
-
-
- int n_chars, n_glyph;
- int i;
- const char *p;
- const char *pold;
- gunichar *wc;
- arabic_level lvl;
+ PangoXSubfont subfont;
+ int n_chars;
+ int i;
+ ArabicFontInfo *fs;
+ const char *p;
+ const char *pold;
+ gunichar *wc;
g_return_if_fail (font != NULL);
g_return_if_fail (text != NULL);
@@ -236,15 +247,14 @@ arabic_engine_shape (PangoFont *font,
g_return_if_fail (analysis != NULL);
/* We hope there is a suitible font installed ..
- */
+ */
- n_chars = n_glyph = g_utf8_strlen (text, length);
-
- if (!(lvl = find_unic_font (font, default_charset,arfonts)))
+ if (! (fs = find_unic_font (font)) )
{
PangoGlyph unknown_glyph = pango_x_get_unknown_glyph (font);
+ n_chars = g_utf8_strlen(text,length);
pango_glyph_string_set_size (glyphs, n_chars);
p = text;
@@ -257,20 +267,13 @@ arabic_engine_shape (PangoFont *font,
}
return;
}
- subfont = arfonts[0];
- wc = (gunichar *)g_malloc(sizeof(gunichar)*n_chars);
p = text;
- for (i=0; i < n_chars; i++)
- {
- wc[n_chars - i - 1] = g_utf8_get_char (p);
- p = g_utf8_next_char (p);
- }
-
-
if (analysis->level % 2 == 0)
{
+ wc = g_utf8_to_ucs4(text,length);
+ n_chars = g_utf8_strlen(text,length);
/* We were called on a LTR directional run (e.g. some numbers);
fallback as simple as possible */
pango_glyph_string_set_size (glyphs, n_chars);
@@ -278,21 +281,28 @@ arabic_engine_shape (PangoFont *font,
}
else
{
- arabic_reshape(&n_glyph,wc,lvl);
- pango_glyph_string_set_size (glyphs, n_glyph);
+ wc = (gunichar *)g_malloc(sizeof(gunichar)* (length) ); /* length is succicient: all arabic chars use at
+ least 2 bytes in utf-8 encoding */
+ n_chars = length;
+ arabic_reshape(&n_chars,text,wc,fs->level);
+ pango_glyph_string_set_size (glyphs, n_chars);
};
p = text;
pold = p;
- i = n_chars-1;
+ i = 0;
+ subfont = fs->subfonts[0];
- while(i >= 0)
+ while(i < n_chars)
{
if (wc[i] == 0)
{
p = g_utf8_next_char (p);
- i--;
+#ifdef DEBUG
+ fprintf(stderr,"NULL-character detected in generated string.!");
+#endif
+ i++;
}
else
{
@@ -300,40 +310,42 @@ arabic_engine_shape (PangoFont *font,
int is_vowel = arabic_isvowel(wc[i]);
cluster_start = is_vowel ? pold - text : p - text;
- if ( lvl & ar_mulefont )
+ if ( fs->level & ar_mulefont )
{
- arabic_mule_recode(&subfont,&(wc[i]),arfonts);
+ arabic_mule_recode(&subfont,&(wc[i]),
+ fs->subfonts);
}
- else if ( lvl & ar_lboxfont )
+ else if ( fs->level & ar_lboxfont )
{
- if (( i > 0 )&&(wc[i-1] == 0))
+ if (( i < n_chars-1 )&&(wc[i+1] == 0))
{
arabic_lbox_recode(&subfont,&(wc[i]),
- &(wc[i-1]), arfonts);
+ &(wc[i+1]),
+ fs->subfonts);
}
else
arabic_lbox_recode(&subfont,&(wc[i]),NULL,
- arfonts);
+ fs->subfonts);
}
- else if ( lvl & ar_naqshfont )
+ else if ( fs->level & ar_naqshfont )
{
- if (( i > 0 )&&(wc[i-1] == 0))
+ if (( i < n_chars-1 )&&(wc[i+1] == 0))
{
urdu_naqsh_recode(&subfont,&(wc[i]),
- &(wc[i-1]), arfonts);
+ &(wc[i+1]),
+ fs->subfonts);
}
else
urdu_naqsh_recode(&subfont,&(wc[i]),NULL,
- arfonts);
+ fs->subfonts);
}
- set_glyph(glyphs, font, subfont, n_glyph - 1,
+ set_glyph(glyphs, font, subfont, n_chars - i - 1,
cluster_start, wc[i], is_vowel);
pold = p;
p = g_utf8_next_char (p);
- n_glyph--;
- i--;
+ i++;
}
}
diff --git a/modules/arabic/arabic.c b/modules/arabic/arabic.c
index 6b10fc19..e73c20c4 100644
--- a/modules/arabic/arabic.c
+++ b/modules/arabic/arabic.c
@@ -8,7 +8,6 @@
#include <stdio.h>
#include <glib.h>
-#include <string.h>
#include "pango.h"
#include "pangox.h"
@@ -17,7 +16,7 @@
#include "langboxfont.h"
#include "naqshfont.h"
-/* #define DEBUG */
+/* #define DEBUG */
#ifdef DEBUG
#include <stdio.h>
#endif
@@ -58,7 +57,6 @@ arabic_engine_break (const char *text,
PangoLogAttr *attrs)
{
/* Most of the code comes from tamil_engine_break
- * only difference is char stop based on modifiers
*/
const char *cur = text;
@@ -66,20 +64,20 @@ arabic_engine_break (const char *text,
gunichar wc;
while (*cur && cur - text < len)
- {
- wc = g_utf8_get_char (cur);
- if (wc == (gunichar)-1)
- break; /* FIXME: ERROR */
+ {
+ wc = g_utf8_get_char (cur);
+ if (wc == (gunichar)-1)
+ break; /* FIXME: ERROR */
attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
attrs[i].is_char_stop = 1;
attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
/* actually, is_word_stop in not correct, but simple and good enough. */
-
+
i++;
cur = g_utf8_next_char (cur);
- }
+ }
}
static PangoEngine *
@@ -101,88 +99,104 @@ arabic_engine_lang_new ()
* X window system script engine portion
*/
-static arabic_level
-find_unic_font (PangoFont *font,char* charsets[],PangoXSubfont* rfonts)
+static ArabicFontInfo*
+arabic_unicodeinit(PangoFont *font, PangoXSubfont subfont)
+{
+ ArabicFontInfo *fs = NULL;
+
+ if (subfont != 0)
+ {
+ if ( pango_x_has_glyph /* Alif-Madda */
+ (font,PANGO_X_MAKE_GLYPH(subfont,0xFE81)))
+ {
+ fs = g_new (ArabicFontInfo,1);
+ fs->level = ar_standard | ar_unifont;
+ fs->subfonts[0] = subfont;
+
+ if ( pango_x_has_glyph /* Shadda+Kasra */
+ (font,PANGO_X_MAKE_GLYPH(subfont,0xFC62)))
+ {
+ fs->level |= ar_composedtashkeel;
+ /* extra vowels in font, hopefully */
+ }
+ if ( pango_x_has_glyph /* Lam-Min alone */
+ (font,PANGO_X_MAKE_GLYPH(subfont,0xFC42)))
+ {
+ fs->level |= ar_lig;
+ /* extra ligatures in font, hopefully */
+ }
+ }
+ }
+ return fs;
+}
+
+static ArabicFontInfo*
+find_unic_font (PangoFont *font)
{
- PangoXSubfont *subfonts;
- int *subfont_charsets;
- int n_subfonts;
- int i;
- int result = 0;
+ static char *charsets[] = {
+ "iso10646-1",
+ "iso8859-6.8x",
+ "mulearabic-2",
+ "urdunaqsh-0",
+/* "symbol-0" */
+ };
+
+ ArabicFontInfo *fs = NULL;
+ PangoXSubfont *subfonts;
+ int *subfont_charsets;
+ int n_subfonts;
+ int i;
+
+ GQuark info_id = g_quark_from_string ("arabic-font-info");
+ fs = g_object_get_qdata (G_OBJECT (font), info_id);
+ if (fs) return fs;
n_subfonts = pango_x_list_subfonts (font, charsets, 4,
&subfonts, &subfont_charsets);
for (i=0; i < n_subfonts; i++)
{
- if ( (strcmp (charsets[subfont_charsets[i]], "mulearabic-2") == 0)
- && arabic_muleinit(font,rfonts) )
+ if ( !strcmp (charsets[subfont_charsets[i]], "mulearabic-2"))
{
- result = ar_mulefont | ar_novowel;
- /* we know we have a mulearabic-font ... */
#ifdef DEBUG
if (getenv("PANGO_AR_NOMULEFONT") == NULL )
#endif
- break;
+ fs = arabic_muleinit(font);
}
- else if ( (strcmp (charsets[subfont_charsets[i]], "iso8859-6.8x") == 0)
- && arabic_lboxinit(font,rfonts) )
+ else if ( !strcmp (charsets[subfont_charsets[i]], "iso8859-6.8x"))
{
- result = ar_standard | ar_lboxfont;
#ifdef DEBUG
if (getenv("PANGO_AR_NOLBOXFONT") == NULL )
#endif
- break;
+ fs = arabic_lboxinit(font);
}
- else if ( (strcmp (charsets[subfont_charsets[i]], "urdunaqsh-0") == 0)
- && urdu_naqshinit(font,rfonts) )
+ else if ( !strcmp (charsets[subfont_charsets[i]], "urdunaqsh-0"))
{
- result = ar_standard | ar_naqshfont;
#ifdef DEBUG
if (getenv("PANGO_AR_NONQFONT") == NULL )
#endif
- break;
+ fs = urdu_naqshinit(font);
}
else
- { /* test if the font has Alif-Madda; if so assume it is ok */
- if ( pango_x_has_glyph /* Alif-Madda */
- (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFE81)))
- {
- rfonts[0] = subfonts[i];
- result = ar_standard | ar_unifont;
- }
- if ( pango_x_has_glyph /* Shadda+Kasra */
- (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC62)))
- {
- result |= ar_composedtashkeel;
- /* extra vowels in font, hopefully */
- }
- if ( pango_x_has_glyph /* Lam-Min alone */
- (font,PANGO_X_MAKE_GLYPH(subfonts[i],0xFC42)))
- {
- result |= ar_lig;
- /* extra ligatures in font, hopefully */
- }
+ {
#ifdef DEBUG
if (getenv("PANGO_AR_NOUNIFONT") == NULL )
#endif
- if (result) break;
+ fs = arabic_unicodeinit(font,subfonts[i]);
}
+ if (fs){
+ g_object_set_qdata_full (G_OBJECT (font), info_id,
+ fs, (GDestroyNotify)g_free);
+ break;
+ }
}
g_free (subfonts);
g_free (subfont_charsets);
- return result;
+ return fs;
}
-static char *default_charset[] = {
- "iso10646-1",
- "iso8859-6.8x",
- "mulearabic-2",
- "urdunaqsh-0",
-};
-
static void
@@ -219,16 +233,13 @@ arabic_engine_shape (PangoFont *font,
PangoAnalysis *analysis,
PangoGlyphString *glyphs)
{
- PangoXSubfont subfont;
- PangoXSubfont arfonts[3];
-
-
- int n_chars, n_glyph;
- int i;
- const char *p;
- const char *pold;
- gunichar *wc;
- arabic_level lvl;
+ PangoXSubfont subfont;
+ int n_chars;
+ int i;
+ ArabicFontInfo *fs;
+ const char *p;
+ const char *pold;
+ gunichar *wc;
g_return_if_fail (font != NULL);
g_return_if_fail (text != NULL);
@@ -236,15 +247,14 @@ arabic_engine_shape (PangoFont *font,
g_return_if_fail (analysis != NULL);
/* We hope there is a suitible font installed ..
- */
+ */
- n_chars = n_glyph = g_utf8_strlen (text, length);
-
- if (!(lvl = find_unic_font (font, default_charset,arfonts)))
+ if (! (fs = find_unic_font (font)) )
{
PangoGlyph unknown_glyph = pango_x_get_unknown_glyph (font);
+ n_chars = g_utf8_strlen(text,length);
pango_glyph_string_set_size (glyphs, n_chars);
p = text;
@@ -257,20 +267,13 @@ arabic_engine_shape (PangoFont *font,
}
return;
}
- subfont = arfonts[0];
- wc = (gunichar *)g_malloc(sizeof(gunichar)*n_chars);
p = text;
- for (i=0; i < n_chars; i++)
- {
- wc[n_chars - i - 1] = g_utf8_get_char (p);
- p = g_utf8_next_char (p);
- }
-
-
if (analysis->level % 2 == 0)
{
+ wc = g_utf8_to_ucs4(text,length);
+ n_chars = g_utf8_strlen(text,length);
/* We were called on a LTR directional run (e.g. some numbers);
fallback as simple as possible */
pango_glyph_string_set_size (glyphs, n_chars);
@@ -278,21 +281,28 @@ arabic_engine_shape (PangoFont *font,
}
else
{
- arabic_reshape(&n_glyph,wc,lvl);
- pango_glyph_string_set_size (glyphs, n_glyph);
+ wc = (gunichar *)g_malloc(sizeof(gunichar)* (length) ); /* length is succicient: all arabic chars use at
+ least 2 bytes in utf-8 encoding */
+ n_chars = length;
+ arabic_reshape(&n_chars,text,wc,fs->level);
+ pango_glyph_string_set_size (glyphs, n_chars);
};
p = text;
pold = p;
- i = n_chars-1;
+ i = 0;
+ subfont = fs->subfonts[0];
- while(i >= 0)
+ while(i < n_chars)
{
if (wc[i] == 0)
{
p = g_utf8_next_char (p);
- i--;
+#ifdef DEBUG
+ fprintf(stderr,"NULL-character detected in generated string.!");
+#endif
+ i++;
}
else
{
@@ -300,40 +310,42 @@ arabic_engine_shape (PangoFont *font,
int is_vowel = arabic_isvowel(wc[i]);
cluster_start = is_vowel ? pold - text : p - text;
- if ( lvl & ar_mulefont )
+ if ( fs->level & ar_mulefont )
{
- arabic_mule_recode(&subfont,&(wc[i]),arfonts);
+ arabic_mule_recode(&subfont,&(wc[i]),
+ fs->subfonts);
}
- else if ( lvl & ar_lboxfont )
+ else if ( fs->level & ar_lboxfont )
{
- if (( i > 0 )&&(wc[i-1] == 0))
+ if (( i < n_chars-1 )&&(wc[i+1] == 0))
{
arabic_lbox_recode(&subfont,&(wc[i]),
- &(wc[i-1]), arfonts);
+ &(wc[i+1]),
+ fs->subfonts);
}
else
arabic_lbox_recode(&subfont,&(wc[i]),NULL,
- arfonts);
+ fs->subfonts);
}
- else if ( lvl & ar_naqshfont )
+ else if ( fs->level & ar_naqshfont )
{
- if (( i > 0 )&&(wc[i-1] == 0))
+ if (( i < n_chars-1 )&&(wc[i+1] == 0))
{
urdu_naqsh_recode(&subfont,&(wc[i]),
- &(wc[i-1]), arfonts);
+ &(wc[i+1]),
+ fs->subfonts);
}
else
urdu_naqsh_recode(&subfont,&(wc[i]),NULL,
- arfonts);
+ fs->subfonts);
}
- set_glyph(glyphs, font, subfont, n_glyph - 1,
+ set_glyph(glyphs, font, subfont, n_chars - i - 1,
cluster_start, wc[i], is_vowel);
pold = p;
p = g_utf8_next_char (p);
- n_glyph--;
- i--;
+ i++;
}
}
diff --git a/modules/arabic/arconv.c b/modules/arabic/arconv.c
index 7f566e0d..c41a69be 100644
--- a/modules/arabic/arconv.c
+++ b/modules/arabic/arconv.c
@@ -12,7 +12,7 @@
*/
#include "arconv.h"
-/* #define DEBUG */
+/* #define DEBUG */
#ifdef DEBUG
#include <stdio.h>
#endif
@@ -36,7 +36,7 @@ typedef struct {
static shapestruct chartable [] =
{
- {0x621, 0xFE80,4}, /* HAMZA; handle seperately !!! */
+ {0x621, 0xFE80,1}, /* HAMZA; handle seperately !!! */
{0x622, 0xFE81,2}, /* ALIF MADDA */
{0x623, 0xFE83,2}, /* ALIF HAMZA */
{0x624, 0xFE85,2}, /* WAW HAMZA */
@@ -103,7 +103,7 @@ static shapestruct chartable [] =
{0x6BA, 0xFB9E,2}, /* Urdu:NUN GHUNNA */
{0x6BB, 0xFBA0,4}, /* Sindhi: */
{0x6BE, 0xFBAA,4}, /* HA special */
- {0x6CC, 0xFEF1,4}, /* farsi ya */
+ {0x6CC, 0xFBFC,4}, /* farsi ya */
{0x6C0, 0xFBA4,2}, /* izafet: HA HAMZA */
{0x6C1, 0xFBA6,4}, /* Urdu: */
{0x6D2, 0xFBAE,2}, /* YA barree */
@@ -127,8 +127,8 @@ static shapestruct chartable [] =
/* Hamza below ( saves Kasra and special cases ), Hamza above ( always joins ).
* As I don't know what sHAMZA is good for I don't handle it.
*/
-#define iHAMZA 0x654
-#define aHAMZA 0x655
+#define aHAMZA 0x654
+#define iHAMZA 0x655
#define sHAMZA 0x674
#define WAW 0x648
@@ -163,40 +163,33 @@ copycstostring(gunichar* string,int* i,charstruct* s,arabic_level level)
{ /* s is a shaped charstruct; i is the index into the string */
if (s->basechar == 0) return;
- string[*i] = s->basechar; (*i)--; (s->lignum)--;
+ string[*i] = s->basechar; (*i)++; (s->lignum)--;
if (s->mark1 != 0)
{
if ( !(level & ar_novowel) )
{
- string[*i] = s->mark1; (*i)--; (s->lignum)--;
+ string[*i] = s->mark1; (*i)++; (s->lignum)--;
}
else
{
- string[*i] = 0; (*i)--; (s->lignum)--;
+ (s->lignum)--;
}
}
if (s->vowel != 0)
{
if (! (level & ar_novowel) )
{
- string[*i] = s->vowel; (*i)--; (s->lignum)--;
+ string[*i] = s->vowel; (*i)++; (s->lignum)--;
}
else
- { /* vowel elimination */
- string[*i] = 0; (*i)--; (s->lignum)--;
+ { /* vowel elimination */
+ (s->lignum)--;
}
}
- while (s->lignum > 0 )
- {
- string[*i] = 0; (*i)--; (s->lignum)--;
+ while (s->lignum > 0 )
+ { /* NULL-insertion for Langbox-font */
+ string[*i] = 0; (*i)++; (s->lignum)--;
}
-#ifdef DEBUG
- if (*i < -1){
- fprintf(stderr,"you are in trouble ! i = %i, the last char is %x, "
- "lignum = %i",
- *i,s->basechar,s->lignum);
- }
-#endif
}
int
@@ -249,14 +242,14 @@ charshape(gunichar s,short which)
else return 0xFE8B+(which-2); /* The Hamza-'pod' */
}
else if (s == 0x6CC)
- { /* farsi ya --> map to Alif maqsura and Ya, depending on form */
- switch (which){
- case 0: return 0xFEEF;
- case 1: return 0xFEF0;
- case 2: return 0xFEF3;
- case 3: return 0xFEF4;
- }
- }
+ { /* farsi ya --> map to Alif maqsura and Ya, depending on form */
+ switch (which){
+ case 0: return 0xFEEF;
+ case 1: return 0xFEF0;
+ case 2: return 0xFEF3;
+ case 3: return 0xFEF4;
+ }
+ }
else
{
return s;
@@ -288,11 +281,32 @@ shapecount(gunichar s)
}
}
+int unligature(charstruct* curchar,arabic_level level)
+{
+ int result = 0;
+ if (level & ar_naqshfont){
+ /* decompose Alif-Madda ... */
+ switch(curchar->basechar){
+ case ALIFHAMZA : curchar->basechar = ALIF; curchar->mark1 = aHAMZA;
+ result++; break;
+ case ALIFIHAMZA: curchar->basechar = ALIF; curchar->mark1 = iHAMZA;
+ result++; break;
+ case WAWHAMZA : curchar->basechar = WAW; curchar->mark1 = aHAMZA;
+ result++; break;
+ case ALIFMADDA :curchar->basechar = ALIF; curchar->vowel = MADDA;
+ result++; break;
+ }
+ }
+ return result;
+}
+
int
-ligature(gunichar* string,int si,int len,charstruct* oldchar)
-{ /* no ligature possible --> return 0; 1 == vowel; 2 = two chars */
+ligature(gunichar newchar,charstruct* oldchar)
+{ /* no ligature possible --> return 0; 1 == vowel; 2 = two chars
+ * 3 = Lam-Alif
+ */
int retval = 0;
- gunichar newchar = string[si];
+
if (!(oldchar->basechar)) return 0;
if (arabic_isvowel(newchar))
{
@@ -363,7 +377,10 @@ ligature(gunichar* string,int si,int len,charstruct* oldchar)
break;
default: oldchar->vowel = newchar; break;
}
- oldchar->lignum++;
+ if (retval == 1)
+ {
+ oldchar->lignum++;
+ }
return retval;
}
if (oldchar->vowel != 0)
@@ -390,12 +407,6 @@ ligature(gunichar* string,int si,int len,charstruct* oldchar)
switch (newchar)
{
case ALIF: oldchar->basechar = ALIFMADDA; retval = 2; break;
- case HAMZA:
- if (si == len-2) /* HAMZA is 2nd char */
- {
- oldchar->basechar = ALIFHAMZA; retval = 2;
- }
- break;
}
break;
case WAW:
@@ -404,57 +415,46 @@ ligature(gunichar* string,int si,int len,charstruct* oldchar)
case HAMZA:oldchar->basechar = WAWHAMZA; retval = 2; break;
}
break;
- case LAM_ALIF:
- switch (newchar)
- {
- case HAMZA:
- if (si == len-4) /* ! We assume the string has been split
- into words. This is AL-A.. I hope */
- {
- oldchar->basechar = LAM_ALIFHAMZA; retval = 2;
- }
- break;
- }
- break;
case 0:
oldchar->basechar = newchar;
oldchar->numshapes = shapecount(newchar);
retval = 1;
break;
}
- if (retval)
- {
- oldchar->lignum++;
-#ifdef DEBUG
- fprintf(stderr,"[ar] ligature : added %x to make %x\n",
- newchar,oldchar->basechar);
-#endif
- }
return retval;
}
static void
-shape(int olen,int* len,gunichar* string,arabic_level level)
+shape(int* len,const char* text,gunichar* string,arabic_level level)
{
- /* The string must be in visual order already.
+ /* string is assumed to be empty an big enough.
+ ** text is the original text.
** This routine does the basic arabic reshaping.
- ** olen is the memory lenght, *len the number of non-null characters.
+ ** *len the number of non-null characters.
*/
- charstruct oldchar,curchar;
- int si = (olen)-1;
- int j = (olen)-1;
- int join;
- int which;
-
- *len = olen;
+ /* Note ! we have to unshape each character first ! */
+ int olen = *len;
+ charstruct oldchar,curchar;
+ /* int si = (olen)-1; */
+ int j = 0;
+ int join;
+ int which;
+ gunichar nextletter;
+ const char* p = text;
+
+ *len = 0 ; /* initialize for output */
charstruct_init(&oldchar);
charstruct_init(&curchar);
- while (si >= 0)
+ while (p < text+olen)
{
- join = ligature(string,si,olen,&curchar);
+ nextletter = g_utf8_get_char (p);
+ nextletter = unshape(nextletter);
+
+ join = ligature(nextletter,&curchar);
if (!join)
{ /* shape curchar */
- int nc = shapecount(string[si]);
+ int nc = shapecount(nextletter);
+ (*len)++;
if (nc == 1)
{
which = 0; /* end or basic */
@@ -486,19 +486,25 @@ shape(int olen,int* len,gunichar* string,arabic_level level)
/* init new curchar */
charstruct_init(&curchar);
- curchar.basechar = string[si];
+ curchar.basechar = nextletter;
curchar.numshapes = nc;
curchar.lignum++;
+ (*len) += unligature(&curchar,level);
+ }
+ else if ((join == 3)&&(level & ar_lboxfont))
+ { /* Lam-Alif extra in langbox-font */
+ (*len)++;
+ curchar.lignum++;
+ }
+ else if (join == 1)
+ {
+ (*len)++;
}
- else if ( ( join == 2 )
- ||((join == 3)&&(! (level & ar_lboxfont) ))
- ||((join == 1)&&(level & ar_novowel )
- && arabic_isvowel(string[si])) )
- { /* Lam-Alif in Langbox-font is no ligature */
- /* No vowels in Mulearabic-font */
- (*len)--;
+ else
+ {
+ (*len) += unligature(&curchar,level);
}
- si--;
+ p = g_utf8_next_char (p);
}
/* Handle last char */
@@ -518,19 +524,21 @@ shape(int olen,int* len,gunichar* string,arabic_level level)
}
static void
-doublelig(int olen,int* len,gunichar* string,arabic_level level)
+doublelig(int* len,gunichar* string,arabic_level level)
{ /* Ok. We have presentation ligatures in our font. */
- int si = (olen)-1;
+ int olen = *len;
+ int j = 0, si = 1;
gunichar lapresult;
- while (si > 0)
+
+ while (si < olen)
{
lapresult = 0;
if ( level & ar_composedtashkeel ){
- switch(string[si])
+ switch(string[j])
{
case SHADDA:
- switch(string[si-1])
+ switch(string[si])
{
case KASRA: lapresult = 0xFC62; break;
case FATHA: lapresult = 0xFC60; break;
@@ -540,22 +548,22 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level)
}
break;
case KASRA:
- if (string[si-1]==SHADDA) lapresult = 0xFC62;
+ if (string[si]==SHADDA) lapresult = 0xFC62;
break;
case FATHA:
- if (string[si-1]==SHADDA) lapresult = 0xFC60;
+ if (string[si]==SHADDA) lapresult = 0xFC60;
break;
case DAMMA:
- if (string[si-1]==SHADDA) lapresult = 0xFC61;
+ if (string[si]==SHADDA) lapresult = 0xFC61;
break;
}
}
if ( level & ar_lig ){
- switch(string[si])
+ switch(string[j])
{
case 0xFEDF: /* LAM initial */
- switch(string[si-1]){
+ switch(string[si]){
case 0xFE9E : lapresult = 0xFC3F; break; /* DJEEM final*/
case 0xFEA0 : lapresult = 0xFCC9; break;
case 0xFEA2 : lapresult = 0xFC40; break; /* .HA final */
@@ -567,21 +575,21 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level)
}
break;
case 0xFE97: /* TA inital */
- switch(string[si-1]){
+ switch(string[si]){
case 0xFEA0 : lapresult = 0xFCA1; break; /* DJ init */
case 0xFEA4 : lapresult = 0xFCA2; break; /* .HA */
case 0xFEA8 : lapresult = 0xFCA3; break; /* CHA */
}
break;
case 0xFE91: /* BA inital */
- switch(string[si-1]){
+ switch(string[si]){
case 0xFEA0 : lapresult = 0xFC9C; break; /* DJ init */
case 0xFEA4 : lapresult = 0xFC9D; break; /* .HA */
case 0xFEA8 : lapresult = 0xFC9E; break; /* CHA */
}
break;
case 0xFEE7: /* NUN inital */
- switch(string[si-1]){
+ switch(string[si]){
case 0xFEA0 : lapresult = 0xFCD2; break; /* DJ init */
case 0xFEA4 : lapresult = 0xFCD3; break; /* .HA */
case 0xFEA8 : lapresult = 0xFCD4; break; /* CHA */
@@ -589,14 +597,14 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level)
break;
case 0xFEE8: /* NUN medial */
- switch(string[si-1]){
+ switch(string[si]){
/* missing : nun-ra : FC8A und nun-sai : FC8B */
case 0xFEAE : lapresult = 0xFC8A; break; /* nun-ra */
case 0xFEB0 : lapresult = 0xFC8B; break; /* nun-sai */
}
break;
case 0xFEE3: /* Mim initial */
- switch(string[si-1]){
+ switch(string[si]){
case 0xFEA0 : lapresult = 0xFCCE ; break; /* DJ init */
case 0xFEA4 : lapresult = 0xFCCF ; break; /* .HA init */
case 0xFEA8 : lapresult = 0xFCD0 ; break; /* CHA init */
@@ -605,7 +613,7 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level)
break;
case 0xFED3: /* Fa initial */
- switch(string[si-1]){
+ switch(string[si]){
case 0xFEF2 : lapresult = 0xFC32 ; break; /* fi-ligature (!) */
}
break;
@@ -616,23 +624,25 @@ doublelig(int olen,int* len,gunichar* string,arabic_level level)
}
if (lapresult != 0)
{
- string[si] = lapresult; (*len)--; string[si-1] = 0x0;
+ string[j] = lapresult; (*len)--;
+ si++; /* jump over one character */
+ /* we'll have to change this, too. */
+ }
+ else
+ {
+ j++;
+ string[j] = string[si];
+ si++;
}
- si--;
}
}
void
-arabic_reshape(int* len,gunichar* string,arabic_level level)
+arabic_reshape(int* len,const char* text,gunichar* string,arabic_level level)
{
- int i;
- int olen = *len;
- for ( i = 0; i < *len; i++){
- string[i] = unshape(string[i]);
- }
- shape(olen,len,string,level);
+ shape(len,text ,string,level);
if ( level & ( ar_composedtashkeel | ar_lig ) )
- doublelig(olen,len,string,level);
+ doublelig(len,string,level);
}
diff --git a/modules/arabic/arconv.h b/modules/arabic/arconv.h
index 04fb135c..a0180e4c 100644
--- a/modules/arabic/arconv.h
+++ b/modules/arabic/arconv.h
@@ -9,11 +9,13 @@
#define __arconv_h_
#include <glib.h>
+#include "pango.h"
+#include "pangox.h"
/*
* arabic_reshape: reshapes string ( ordered left-to right visual order )
* len : before: is the length of the string
- * after : number of nun-NULL characters
+ * after : number of non-NULL characters
*
*/
typedef enum
@@ -25,7 +27,18 @@ typedef enum
ar_unifont = 0x40, ar_naqshfont = 0x80
} arabic_level;
-void arabic_reshape(int* len,gunichar* string,arabic_level level);
+typedef struct
+{
+ PangoXSubfont subfonts[3];
+ arabic_level level;
+} ArabicFontInfo;
+
+/* len : beforehand: #chars in string
+ * after: #chars in text
+ * string: original-string
+ * text : output-text
+ */
+void arabic_reshape(int* len,const char* text,gunichar* string,arabic_level level);
int arabic_isvowel(gunichar s);
#endif
diff --git a/modules/arabic/langboxfont.c b/modules/arabic/langboxfont.c
index 8a92b51b..2cf84f57 100644
--- a/modules/arabic/langboxfont.c
+++ b/modules/arabic/langboxfont.c
@@ -15,35 +15,33 @@
#ifdef DEBUG
#include <stdio.h>
#endif
+#include "langboxfont.h"
-int
-arabic_lboxinit(PangoFont *font,PangoXSubfont* lboxfonts)
+ArabicFontInfo*
+arabic_lboxinit(PangoFont *font)
{
static char *lbox_charsets0[] = {
"iso8859-6.8x",
};
-
- PangoXSubfont *subfonts;
- int *subfont_charsets;
- int n_subfonts;
+
+ ArabicFontInfo *fs = NULL;
+ PangoXSubfont *subfonts;
+ int *subfont_charsets;
+ int n_subfonts;
n_subfonts = pango_x_list_subfonts (font,lbox_charsets0,
1, &subfonts, &subfont_charsets);
if (n_subfonts > 0)
{
- lboxfonts[0] = subfonts[0];
- g_free (subfonts);
- g_free (subfont_charsets);
- }
- else
- {
- g_free (subfonts);
- g_free (subfont_charsets);
- return 0;
+ fs = g_new (ArabicFontInfo,1);
+ fs->level = ar_standard | ar_composedtashkeel | ar_lboxfont;
+ fs->subfonts[0] = subfonts[0];
}
- return 1;
+ g_free (subfonts);
+ g_free (subfont_charsets);
+ return fs;
}
@@ -187,26 +185,14 @@ arabic_lbox_recode(PangoXSubfont* subfont,int* glyph,int* glyph2,
}
else if ((letter >= 0xFE80)&&(letter <= 0xFEF4))
{
-#ifdef DEBUG
- if (charmap[letter-0xFE80].unicodechar != letter)
- {
- fprintf(stderr,"[ar] lboxfont charmap table defect "
- "%x comes out as %x ",
- letter,charmap[letter-0xFE80].unicodechar);
- }
-#endif
*glyph = charmap[letter-0xFE80].charindex;
}
else if ((letter >= 0x64B)&&(letter <= 0x652))
{ /* a vowel */
*glyph = letter - 0x64B + 0xA8;
}
- else if ((letter >= 0xFEF5)&&(letter <= 0xFEFC))
+ else if ((letter >= 0xFEF5)&&(letter <= 0xFEFC)&&(glyph2)&&(*glyph2==0))
{ /* Lam-Alif. Langbox solved the problem in their own way ... */
-#ifdef DEBUG
- fprintf(stderr,"[ar] lbox-recoding chars %x",
- letter);
-#endif
if (!(letter % 2))
{
*glyph = 0xA6;
@@ -233,7 +219,7 @@ arabic_lbox_recode(PangoXSubfont* subfont,int* glyph,int* glyph2,
}
else switch(letter)
{
- /* extra vowels */
+ /* extra vowels */
case 0xFC5E: *glyph = 0x82; break;
case 0xFC5F: *glyph = 0x83; break;
case 0xFC60: *glyph = 0x84; break;
@@ -244,6 +230,18 @@ arabic_lbox_recode(PangoXSubfont* subfont,int* glyph,int* glyph2,
case 0x621: *glyph = charmap[0].charindex; break; /* hamza */
case 0x640: *glyph = 0xE0; break; /* tatweel */
case 0x61F: *glyph = 0xBF; break; /* question mark */
+
+ /* farsi ye */
+ case 0xFBFC: *glyph = 0x8D; break;
+ case 0xFBFD: *glyph = 0xE9; break;
+ case 0xFBFE: *glyph = 0xFE; break;
+ case 0xFBFF: *glyph = 0xFE; break;
+ /* Gaf -- the font does not have it, but this is better than nothing */
+ case 0xFB92: *glyph = 0xE3; break;
+ case 0xFB93: *glyph = 0xE3; break;
+ case 0xFB94: *glyph = 0xF9; break;
+ case 0xFB95: *glyph = 0x9B; break;
+
default:
*glyph = 0x20; /* we don't have this thing -- use a space */
/* This has to be something that does not print anything !! */
diff --git a/modules/arabic/langboxfont.h b/modules/arabic/langboxfont.h
index 1c43a47c..59be6e8c 100644
--- a/modules/arabic/langboxfont.h
+++ b/modules/arabic/langboxfont.h
@@ -8,13 +8,14 @@
#define __lboxfont_h_
#include "pango.h"
#include "pangox.h"
+#include "arconv.h"
/*
- * lboxfont must point to valid memory for this to work.
+ * create an arabic_fontstruct for the langbox-module
+ * returns: NULL on failure
*/
-int
-arabic_lboxinit(PangoFont *font,PangoXSubfont* lboxfonts);
-/* a return value of 0 means this has failed */
+ArabicFontInfo*
+arabic_lboxinit(PangoFont *font);
/* glyph2 is the next glyph in line; if there is none, put in NULL
diff --git a/modules/arabic/mulefont.c b/modules/arabic/mulefont.c
index 6bfc6b0b..084f19ac 100644
--- a/modules/arabic/mulefont.c
+++ b/modules/arabic/mulefont.c
@@ -11,15 +11,15 @@
#include "pango.h"
#include "pangox.h"
+
/* #define DEBUG */
#ifdef DEBUG
#include <stdio.h>
#endif
+#include "mulefont.h"
-
-
-int
-arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts)
+ArabicFontInfo*
+arabic_muleinit(PangoFont *font)
{
static char *mule_charsets0[] = {
"mulearabic-0",
@@ -32,14 +32,19 @@ arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts)
static char *mule_charsets2[] = {
"mulearabic-2",
};
- PangoXSubfont *subfonts;
- int *subfont_charsets;
- int n_subfonts;
+
+ ArabicFontInfo *fs = NULL;
+ PangoXSubfont *subfonts;
+ int *subfont_charsets;
+ int n_subfonts;
+ PangoXSubfont mulefonts[3];
n_subfonts = pango_x_list_subfonts (font,mule_charsets0,
1, &subfonts, &subfont_charsets);
if (n_subfonts > 0)
- mulefonts[0] = subfonts[0];
+ {
+ mulefonts[0] = subfonts[0];
+ }
g_free (subfonts);
g_free (subfont_charsets);
@@ -48,32 +53,28 @@ arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts)
if (n_subfonts > 0)
{
mulefonts[1] = subfonts[0];
- g_free (subfonts);
- g_free (subfont_charsets);
- }
- else
- {
- g_free (subfonts);
- g_free (subfont_charsets);
- return 0;
}
+ g_free (subfonts);
+ g_free (subfont_charsets);
n_subfonts = pango_x_list_subfonts (font,mule_charsets2,
1, &subfonts, &subfont_charsets);
if (n_subfonts > 0)
{
mulefonts[2] = subfonts[0];
- g_free (subfonts);
- g_free (subfont_charsets);
}
- else
+ g_free (subfonts);
+ g_free (subfont_charsets);
+
+ if (( mulefonts[0] != 0)&&(mulefonts[1] != 0)&&(mulefonts[2] != 0))
{
- g_free (subfonts);
- g_free (subfont_charsets);
- return 0;
+ fs = g_new (ArabicFontInfo,1);
+ fs->level = ar_novowel | ar_mulefont;
+ fs->subfonts[0] = mulefonts[0];
+ fs->subfonts[1] = mulefonts[1];
+ fs->subfonts[2] = mulefonts[2];
}
-
- return 1;
+ return fs;
}
@@ -274,8 +275,14 @@ arabic_mule_recode(PangoXSubfont* subfont,int* glyph,PangoXSubfont* mulefonts)
case 0xFB58: *subfont = mulefonts[1]; *glyph = 0x66; break;
case 0xFB59: *subfont = mulefonts[1]; *glyph = 0x67; break;
/* farsi Jeh */
+ case 0xFBFC: *subfont = mulefonts[2]; *glyph = 0x5D; break;
+ case 0xFBFD: *subfont = mulefonts[2]; *glyph = 0x5E; break;
+ case 0xFBFE: *subfont = mulefonts[1]; *glyph = 0x60; break;
+ case 0xFBFF: *subfont = mulefonts[1]; *glyph = 0x61; break;
+ /* extra */
case 0xFB8A: *subfont = mulefonts[1]; *glyph = 0x68; break;
case 0xFB8B: *subfont = mulefonts[1]; *glyph = 0x69; break;
+
default:
*subfont = mulefonts[1];
*glyph = 0x26; /* we don't have this thing -- use a dot */
diff --git a/modules/arabic/mulefont.h b/modules/arabic/mulefont.h
index acdf1983..6851ae7d 100644
--- a/modules/arabic/mulefont.h
+++ b/modules/arabic/mulefont.h
@@ -8,15 +8,16 @@
#define __mulefont_h_
#include "pango.h"
#include "pangox.h"
+#include "arconv.h"
-/* mulefonts must be an array with at least three entries */
-
-int
-arabic_muleinit(PangoFont *font,PangoXSubfont* mulefonts);
-/* a return value of 0 means this has failed */
+/*
+ * create an arabic_fontstruct for the mulefont-module
+ * returns: NULL on failure
+ */
+ArabicFontInfo*
+arabic_muleinit(PangoFont *font);
void
arabic_mule_recode(PangoXSubfont* subfont,int* glyph,PangoXSubfont* mulefonts);
-
#endif
diff --git a/modules/arabic/naqshfont.c b/modules/arabic/naqshfont.c
index c390ce4d..d4885245 100644
--- a/modules/arabic/naqshfont.c
+++ b/modules/arabic/naqshfont.c
@@ -1,6 +1,7 @@
/* pango-arabic module
*
* (C) 2000 K. Koehler <koehler@or.uni-bonn.de>
+ * (c) 2000 Pablo Saratxaga <pablo@mandrakesoft.com>
*
* This file provides a mapping unicode <- naqshfont
*/
@@ -11,172 +12,305 @@
#include "pango.h"
#include "pangox.h"
-/* #define DEBUG */
+/* #define DEBUG */
#ifdef DEBUG
#include <stdio.h>
#endif
+#include "naqshfont.h"
-int
-urdu_naqshinit(PangoFont *font,PangoXSubfont* nqfont)
+ArabicFontInfo*
+urdu_naqshinit(PangoFont *font)
{
static char *nq_charsets0[] = {
- "urdunaqsh-0",
+ "symbol-0",
+/* "urdunaqsh-0" */
};
- PangoXSubfont *subfonts;
- int *subfont_charsets;
- int n_subfonts;
+ ArabicFontInfo *fs = NULL;
+ PangoXSubfont *subfonts;
+ int *subfont_charsets;
+ int n_subfonts;
n_subfonts = pango_x_list_subfonts (font,nq_charsets0,
1, &subfonts, &subfont_charsets);
if (n_subfonts > 0)
{
- nqfont[0] = subfonts[0];
- g_free (subfonts);
- g_free (subfont_charsets);
+ fs = g_new (ArabicFontInfo,1);
+ fs->level = ar_standard | ar_naqshfont;
+ fs->subfonts[0] = subfonts[0];
}
- else
- {
- g_free (subfonts);
- g_free (subfont_charsets);
- return 0;
- }
-
- return 1;
+ g_free (subfonts);
+ g_free (subfont_charsets);
+ return fs;
}
+
typedef struct {
gunichar unicodechar;
- int charindex;
+ int charindex;
} fontentry;
static fontentry charmap [] =
-{
- { 0xFE80,0x4A }, /* HAMZA; handle seperately !!! */
- { 0xFE81,0x22 }, /* ALIF MADDA */
- { 0xFE82,0xAD },
- { 0xFE83,0x22 }, /* ALIF HAMZA */
- { 0xFE84,0xAD },
- { 0xFE85,0x46 }, /* WAW HAMZA */
- { 0xFE86,0xD2 },
- { 0xFE87,0x22 }, /* ALIF IHAMZA */
- { 0xFE88,0xAD },
- { 0xFE89,0x4C }, /* YA HAMZA */
- { 0xFE8A,0xD7 },
-
- { 0xFE8B,0x6C }, /* HMAZA-'pod' */
- { 0xFE8C,0xAB },
- { 0xFE8D,0x22 }, /* ALIF */
- { 0xFE8E,0xAD },
- { 0xFE8F,0x23 }, /* BA */
+{ /* the basic arabic block comes first ... */
+ { 0xFE8B,0x6C }, /* HMAZA-'pod' */
+ { 0xFE8C,0xAB },
+ { 0xFE8D,0x22 }, /* ALEF */
+ { 0xFE8E,0xAD },
+ { 0xFE8F,0x23 }, /* BEH */
{ 0xFE90,0xAE },
{ 0xFE91,0x4E },
{ 0xFE92,0x6E },
- { 0xFE93,0x48 }, /* TA MARBUTA */
+ { 0xFE93,0x48 }, /* TEH MARBUTA */
{ 0xFE94,0xD5 },
- { 0xFE95,0x25 }, /* TA */
+ { 0xFE95,0x25 }, /* TEH */
{ 0xFE96,0xB0 },
{ 0xFE97,0x50 },
{ 0xFE98,0x70 },
- { 0xFE99,0x27 }, /* THA */
+ { 0xFE99,0x27 }, /* THEH */
{ 0xFE9A,0xB2 },
{ 0xFE9B,0x52 },
{ 0xFE9C,0x72 },
- { 0xFE9D,0x28 }, /* DJIM */
+ { 0xFE9D,0x28 }, /* JEEM */
{ 0xFE9E,0xB3 },
{ 0xFE9F,0x53 },
{ 0xFEA0,0x73 },
- { 0xFEA1,0x2A }, /* .HA */
+ { 0xFEA1,0x2A }, /* HAH . */
{ 0xFEA2,0xB5 },
{ 0xFEA3,0x55 },
{ 0xFEA4,0x75 },
- { 0xFEA5,0x2B }, /* CHA */
+ { 0xFEA5,0x2B }, /* KHAH */
{ 0xFEA6,0xB6 },
{ 0xFEA7,0x56 },
{ 0xFEA8,0x76 },
- { 0xFEA9,0x2C }, /* DAL */
+ { 0xFEA9,0x2C }, /* DAL */
{ 0xFEAA,0xB8 },
- { 0xFEAB,0x2E }, /* THAL */
+ { 0xFEAB,0x2E }, /* THAL */
{ 0xFEAC,0xBA },
- { 0xFEAD,0x2F }, /* RA */
+ { 0xFEAD,0x2F }, /* REH */
{ 0xFEAE,0xBB },
- { 0xFEAF,0x31 }, /* ZAY */
+ { 0xFEAF,0x31 }, /* ZAIN (ZAY) */
{ 0xFEB0,0xBD },
-
- { 0xFEB1,0x33 }, /* SIN */
+ { 0xFEB1,0x33 }, /* SEEN */
{ 0xFEB2,0xBF },
{ 0xFEB3,0x57 },
{ 0xFEB4,0x77 },
- { 0xFEB5,0x34 }, /* SHIN */
+ { 0xFEB5,0x34 }, /* SHEEN */
{ 0xFEB2,0xC0 },
{ 0xFEB3,0x58 },
{ 0xFEB4,0x78 },
- { 0xFEB9,0x35 }, /* SAAD */
+ { 0xFEB9,0x35 }, /* SAD */
{ 0xFEBA,0xC1 },
{ 0xFEBB,0x59 },
{ 0xFEBC,0x79 },
- { 0xFEBD,0x36 }, /* DAAD */
+ { 0xFEBD,0x36 }, /* DAD */
{ 0xFEBE,0xC2 },
{ 0xFEBF,0x5A },
{ 0xFEC0,0x7A },
- { 0xFEC1,0x37 }, /* .TA */
+ { 0xFEC1,0x37 }, /* TAH . */
{ 0xFEC2,0xC3 },
{ 0xFEC3,0x5B },
{ 0xFEC4,0x7B },
- { 0xFEC5,0x38 }, /* .ZA */
+ { 0xFEC5,0x38 }, /* ZAH . */
{ 0xFEC6,0xC4 },
{ 0xFEC7,0x5C },
{ 0xFEC8,0x7C },
- { 0xFEC9,0x39 }, /* AIN */
+ { 0xFEC9,0x39 }, /* AIN */
{ 0xFECA,0xC5 },
{ 0xFECB,0x5D },
{ 0xFECC,0x7D },
- { 0xFECD,0x3A }, /* RAIN */
+ { 0xFECD,0x3A }, /* GHAIN */
{ 0xFECE,0xC6 },
{ 0xFECF,0x5E },
{ 0xFED0,0x7E },
- { 0xFED1,0x3B }, /* FA */
+ { 0xFED1,0x3B }, /* FEH */
{ 0xFED2,0xC7 },
{ 0xFED3,0x5F },
{ 0xFED4,0xA1 },
-
- { 0xFED5,0x3D }, /* QAF */
+ { 0xFED5,0x3D }, /* QAF */
{ 0xFED6,0xC9 },
{ 0xFED7,0x61 },
{ 0xFEB8,0xA3 },
- { 0xFED9,0x3E }, /* KAF */
+ { 0xFED9,0x3E }, /* KAF */
{ 0xFEDA,0xCA },
{ 0xFEDB,0x62 },
{ 0xFEDC,0xA4 },
- { 0xFEDD,0x41 }, /* LAM */
+ { 0xFEDD,0x41 }, /* LAM */
{ 0xFEDE,0xCD },
{ 0xFEDF,0x66 },
{ 0xFEE0,0xA6 },
- { 0xFEE1,0x43 }, /* MIM */
+ { 0xFEE1,0x43 }, /* MEEM */
{ 0xFEE2,0xCF },
{ 0xFEE3,0x67 },
{ 0xFEE4,0xA7 },
- { 0xFEE5,0x44 }, /* NUN */
+ { 0xFEE5,0x44 }, /* NOON */
{ 0xFEE6,0xD0 },
{ 0xFEE7,0x68 },
{ 0xFEE8,0xA8 },
-
- { 0xFEE9,0x47 }, /* HA */
+ { 0xFEE9,0x47 }, /* HEH (HA) */
{ 0xFEEA,0xD4 },
{ 0xFEEB,0x6B },
{ 0xFEEC,0xAA },
- { 0xFEED,0x46 }, /* WAW */
+ { 0xFEED,0x46 }, /* WAW */
{ 0xFEEE,0xD2 },
- { 0xFEEF,0x4B }, /* ALIF MAQSORA */
+ { 0xFEEF,0x4B }, /* ALEF MAKSURA */
{ 0xFEF0,0xD6 },
- { 0xFEF1,0x4C }, /* YA */
+ { 0xFEF1,0x4C }, /* YEH (YA) */
{ 0xFEF2,0xD7 },
{ 0xFEF3,0x6D },
- { 0xFEF4,0xAC }
+ { 0xFEF4,0xAC },
+
+
+ { 0x0020,0x20 }, /* space */ /* We ought to NOT get these ! */
+ { 0x0021,0xEA }, /* ! */
+ { 0x0027,0xEC }, /* ' */
+ { 0x0028,0xED }, /* ( */
+ { 0x0029,0xEE }, /* ) */
+ { 0x002E,0xE7 }, /* . */
+ { 0x003A,0xEB }, /* : */
+ { 0x00A0,0xA0 }, /* non breaking space */
+
+
+
+ { 0x060C,0xE8 }, /* arabic comma */
+ { 0x061F,0xE9 }, /* arabic question mark */
+ { 0x0621,0x4A }, /* hamza */
+ { 0x0640,0xE6 }, /* arabic tatweel */
+ { 0x064B,0xF8 }, /* arabic fathatan, unshaped */
+ { 0x064C,0xF7 }, /* arabic dammatan, unshaped */
+ { 0x064D,0xF9 }, /* arabic kasratan, unshaped */
+ { 0x064E,0xFE }, /* arabic fatha, unshaped */
+ { 0x064F,0xFD }, /* araibc damma, unshaped */
+ { 0x0650,0xFF }, /* arabic kasra, unshaped */
+ { 0x0651,0xFC }, /* arabic shadda, unshaped */
+ { 0x0652,0xA0 }, /* sukun -- non-existant in the font */
+ { 0x0653,0xF3 }, /* arabic madda above, should occur in only one case: upon left-joined Alif */
+ { 0x0654,0xF6 }, /* arabic hamza above, unshaped */
+ { 0x0655,0xF5 }, /* arabic hamza below, unshaped */
+
+ /* arabic digits */
+ { 0x0660,0xE5 }, /* arabic digit 0 */
+ { 0x0661,0xD9 }, /* arabic digit 1 */
+ { 0x0662,0xDA }, /* arabic digit 2 */
+ { 0x0663,0xDB }, /* arabic digit 3 */
+ { 0x0664,0xDC }, /* arabic digit 4 */
+ { 0x0665,0xDE }, /* arabic digit 5 */
+ { 0x0666,0xE0 }, /* arabic digit 6 */
+ { 0x0667,0xE1 }, /* arabic digit 7 */
+ { 0x0668,0xE3 }, /* arabic digit 8 */
+ { 0x0669,0xE4 }, /* arabic digit 9 */
+ { 0x0670,0xF4 }, /* arabic percent sign */
+ { 0x0679,0x26 }, /* some arabic letters, unshaped */
+ { 0x067E,0x24 },
+ { 0x0686,0x29 },
+ { 0x0688,0x2D },
+ { 0x0691,0x30 },
+ { 0x0698,0x32 },
+ { 0x06A4,0x3C },
+ { 0x06A9,0x3F },
+ { 0x06AF,0x40 },
+ { 0x06BA,0x45 },
+ { 0x06BE,0x49 },
+ { 0x06C1,0x47 },
+ { 0x06CC,0x4B },
+ { 0x06D2,0x4D },
+ { 0x06D4,0xE7 },
+ /* persian digits */
+ { 0x06F0,0xE5 }, /* persian digit 0 */
+ { 0x06F1,0xD9 }, /* persian digit 1 */
+ { 0x06F2,0xDA }, /* persian digit 2 */
+ { 0x06F3,0xDB }, /* persian digit 3 */
+ { 0x06F4,0xDD }, /* persian digit 4 */
+ { 0x06F5,0xDF }, /* persian digit 5 */
+ { 0x06F6,0xE0 }, /* persian digit 6 */
+ { 0x06F7,0xE2 }, /* persian digit 7 */
+ { 0x06F8,0xE3 }, /* persian digit 8 */
+ { 0x06F9,0xE4 }, /* persian digit 9 */
+
+ /* shaped letters & ligatures */
+ { 0xFB56,0x24 }, /* PEH */
+ { 0xFB57,0xAF },
+ { 0xFB58,0x4F },
+ { 0xFB59,0x6F },
+ { 0xFB66,0x26 }, /* TTEH */
+ { 0xFB67,0xB1 },
+ { 0xFB68,0x51 },
+ { 0xFB69,0x71 },
+ { 0xFB6A,0x3C }, /* VEH */
+ { 0xFB6B,0xC8 },
+ { 0xFB6C,0x60 },
+ { 0xFB6D,0xA2 },
+ { 0xFB7A,0x29 }, /* TCHEH */
+ { 0xFB7B,0xB4 },
+ { 0xFB7C,0x54 },
+ { 0xFB7D,0x74 },
+ { 0xFB88,0x2D }, /* DDAL */
+ { 0xFB89,0xB9 },
+ { 0xFB8A,0x32 }, /* JEH */
+ { 0xFB8B,0xBE },
+ { 0xFB8C,0x30 }, /* RREH */
+ { 0xFB8D,0xBC },
+ { 0xFB8E,0x3F }, /* KEHEH */
+ { 0xFB8F,0xCB },
+ { 0xFB90,0x62 },
+ { 0xFB90,0x64 },
+ { 0xFB91,0xA4 },
+ { 0xFB92,0x40 }, /* GAF */
+ { 0xFB93,0xCC },
+ { 0xFB94,0x63 },
+ { 0xFB94,0x65 },
+ { 0xFB95,0xA5 },
+ { 0xFB9E,0x45 }, /* NOON GHUNNA */
+ { 0xFB9F,0xD1 },
+ { 0xFBA6,0x47 }, /* HEH GOAL */
+ { 0xFBA7,0xD3 },
+ { 0xFBA8,0x69 },
+ { 0xFBA8,0x6A },
+ { 0xFBA9,0xA9 },
+ { 0xFBAA,0x49 }, /* HEH DOACHASMEE */
+ { 0xFBAB,0xAA },
+ { 0xFBAC,0x6B },
+ { 0xFBAD,0xAA },
+ { 0xFBAE,0x4D }, /* YEH BAREE */
+ { 0xFBAF,0xD8 },
+ { 0xFBFC,0x4B }, /* FARSI YEH */
+ { 0xFBFD,0xD6 },
+ { 0xFBFE,0x6D },
+ { 0xFBFF,0xAC },
+ { 0xFE80,0x4A }, /* HAMZA */
+ { 0xFE81,0x21 }, /* ALEF WITH MADDA ABOVE */
+ { 0xFE82,0xAD },
+
+ /* fake entries (the font doesn't provide glyphs with the hamza
+ * above; so the ones without hamza are used, as a best approach) */
+ /* these *should never occur* */
+ { 0xFE83,0x22 }, /* ALIF HAMZA */
+ { 0xFE84,0xAD },
+ { 0xFE85,0x46 }, /* WAW HAMZA */
+ { 0xFE86,0xD2 },
+ { 0xFE87,0x22 }, /* ALIF IHAMZA */
+ { 0xFE88,0xAD },
+ { 0xFE89,0x4C }, /* YA HAMZA */
+ { 0xFE8A,0xD7 },
+
+ { 0xFEFB,0x42 }, /* ligature LAM+ALEF ISOLATED */
+ { 0xFEFC,0xCE }, /* ligature LAM+ALEF FINAL */
+
+ /* I've been unable to map those font glyphs to an unicode value;
+ * if you can, please tell me -- Pablo Saratxaga <pablo@mandrakesoft.com>
+ { 0x????,0xB7 },
+ { 0x????,0xEF },
+ { 0x????,0xF0 },
+ { 0x????,0xF1 },
+ { 0x????,0xF2 },
+ { 0x????,0xFA },
+ { 0x????,0xFB },
+ */
+
+ { 0x0000,0x00 }
};
@@ -185,28 +319,23 @@ urdu_naqsh_recode(PangoXSubfont* subfont,int* glyph,int* glyph2,
PangoXSubfont* nqfont)
{
int letter=*glyph;
+ int i;
+
*subfont = nqfont[0];
- if ((letter >= 0x661)&&(letter <= 0x664)) /* indic numeral */
- {
- *glyph = letter - 0x661 + 0xD9;
- }
- if ((letter >= 0x6F1)&&(letter <= 0x6F3)) /* indic numeral */
- {
- *glyph = letter - 0x6F1 + 0xD9;
- }
- else if ((letter >= 0xFE80)&&(letter <= 0xFEF4))
+
+ if ((letter >= 0xFE8B)&&(letter <= 0xFEF4))
{
- *glyph = charmap[letter-0xFE80].charindex;
+ *glyph = charmap[letter-0xFE8B].charindex;
+ }
+ else if (letter < 0xFF )
+ { /* recoded in previous run */
+ *glyph = letter;
}
else if ((letter >= 0xFEF5)&&(letter <= 0xFEFC))
{ /* Lam-Alif. Also solved interestingly ...
* At least, the Lam-Alif itself is not split ...
*/
-#ifdef DEBUG
- fprintf(stderr,"[ar] nq-recoding chars %x",
- letter);
-#endif
if (!(letter % 2))
{
*glyph = 0xCE;
@@ -224,94 +353,19 @@ urdu_naqsh_recode(PangoXSubfont* subfont,int* glyph,int* glyph2,
case 0xFEF9 :
case 0xFEFA : *glyph2 = 0xF5; break; /* Lam-Alif iHamza */
}
- }
- else switch(letter)
+ }
+ else
{
- case 0x665: *glyph = 0xDE; break; /* indic numerals */
- case 0x666: *glyph = 0xE0; break;
- case 0x667: *glyph = 0xE1; break;
- case 0x668: *glyph = 0xE3; break;
- case 0x669: *glyph = 0xE4; break;
- case 0x660: *glyph = 0xE5; break;
- /* farsi numerals */
- case 0x6F4: *glyph = 0xDD; break;
- case 0x6F5: *glyph = 0xDF; break;
- case 0x6F6: *glyph = 0xE0; break;
- case 0x6F7: *glyph = 0xE1; break;
- case 0x6F8: *glyph = 0xE3; break;
- case 0x6F9: *glyph = 0xE4; break;
- case 0x6F0: *glyph = 0xE5; break;
- /* tashkeel */
- case 0x64B: *glyph = 0xF8; break;
- case 0x64C: *glyph = 0xF7; break;
- case 0x64D: *glyph = 0xF9; break;
- case 0x64E: *glyph = 0xFD; break;
- case 0x64F: *glyph = 0xFE; break;
- case 0x650: *glyph = 0xFF; break;
- case 0x651: *glyph = 0xFC; break;
- case 0x652: *glyph = 0xEC; break;
- case 0x653: *glyph = 0xF3; break;
- case 0x670: *glyph = 0xF4; break;
- /* urdu letters */
- case 0xFB56: *glyph = 0x24;
- case 0xFB57: *glyph = 0x24;
- case 0xFB58: *glyph = 0x24;
- case 0xFB59: *glyph = 0x24;
-
- case 0xFB66: *glyph = 0x26;
- case 0xFB67: *glyph = 0xB1;
- case 0xFB68: *glyph = 0x51;
- case 0xFB69: *glyph = 0x71;
-
- case 0xFB7A: *glyph = 0x29;
- case 0xFB7B: *glyph = 0xB4;
- case 0xFB7C: *glyph = 0x54;
- case 0xFB7D: *glyph = 0x74;
-
- case 0xFB88: *glyph = 0x2D;
- case 0xFB89: *glyph = 0xB9;
-
- case 0xFB8C: *glyph = 0x30;
- case 0xFB8D: *glyph = 0xBC;
-
- case 0xFB8A: *glyph = 0x32;
- case 0xFB8B: *glyph = 0xBE;
-
- case 0xFB6A: *glyph = 0x3C;
- case 0xFB6B: *glyph = 0xC8;
- case 0xFB6C: *glyph = 0x60;
- case 0xFB6D: *glyph = 0xA2;
-
- case 0xFB8E: *glyph = 0x3F;
- case 0xFB8F: *glyph = 0xCB;
- case 0xFB90: *glyph = 0x62;
- case 0xFB91: *glyph = 0xA4;
-
- case 0xFB92: *glyph = 0x40;
- case 0xFB93: *glyph = 0xCC;
- case 0xFB94: *glyph = 0x63;
- case 0xFB95: *glyph = 0xA5;
-
- case 0xFBAA: *glyph = 0x49;
- case 0xFBAB: *glyph = 0xD4;
- case 0xFBAC: *glyph = 0x6B;
- case 0xFBAD: *glyph = 0xAA;
-
- case 0xFBAE: *glyph = 0x4D;
- case 0xFBAF: *glyph = 0xD8;
-
- case 0xFBA6: *glyph = 0x47;
- case 0xFBA7: *glyph = 0xD3;
- case 0xFBA8: *glyph = 0x69;
- case 0xFBA9: *glyph = 0xA9;
-
-
- /* specials */
- case 0x621: *glyph = 0x4A; break;
- case 0x640: *glyph = 0xD3; break; /* tatweel ? */
- case 0x61F: *glyph = 0xE9; break; /* Question mark */
-
- default: *glyph = 0xE5; break; /* don't know what to do */
+ for (i=0 ; charmap[i].unicodechar!=0x0000 ; i++) {
+ if (charmap[i].unicodechar == letter) {
+ *glyph = charmap[i].charindex;
+ break;
+ }
+ }
+ if (charmap[i].unicodechar == 0x0000) {
+ /* if the glyph wasn't on the table */
+ *glyph = 0xE5; /* don't know what to do */
+ }
}
}
diff --git a/modules/arabic/naqshfont.h b/modules/arabic/naqshfont.h
index f4f7f098..4abde1e3 100644
--- a/modules/arabic/naqshfont.h
+++ b/modules/arabic/naqshfont.h
@@ -2,19 +2,20 @@
*
* (C) 2000 K. Koehler <koehler@or.uni-bonn.de>
*
- * This file provides a mapping unicode <- mulefont
+ * This file provides a mapping unicode <- urdu nq-font
*/
#ifndef __nqfont_h_
#define __nqfont_h_
#include "pango.h"
#include "pangox.h"
+#include "arconv.h"
/*
- * nqfont must point to valid memory for this to work.
+ * create an arabic_fontstruct for the urdu_naqshfont-module
+ * returns: NULL on failure
*/
-int
-urdu_naqshinit(PangoFont *font,PangoXSubfont* nqfont);
-/* a return value of 0 means this has failed */
+ArabicFontInfo*
+urdu_naqshinit(PangoFont *font);
/* glyph2 is the next glyph in line; if there is none, put in NULL
@@ -23,3 +24,5 @@ void
urdu_naqsh_recode(PangoXSubfont* subfont,int* glyph,int* glyph2,
PangoXSubfont* nqfont);
#endif
+
+