summaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorOwen Taylor <otaylor@redhat.com>2001-11-26 23:18:53 +0000
committerOwen Taylor <otaylor@src.gnome.org>2001-11-26 23:18:53 +0000
commite3f92b5972401c611f26f028804c4b7056f0be00 (patch)
tree6564f810c7235e71ffff0dd844961dc1438433e6 /modules
parent1170903eb4010f837197c71d2817b9933b3f9ef4 (diff)
downloadpango-e3f92b5972401c611f26f028804c4b7056f0be00.tar.gz
Instead of including all-unicode.txt in the distribution, include a small
Mon Nov 26 18:17:00 2001 Owen Taylor <otaylor@redhat.com> * tests/Makefile.am tests/gen-all-unicode.c: Instead of including all-unicode.txt in the distribution, include a small program to generate it. * modules/hangul/hangul-x.c (render_syllable_with_ksc5601): When falling back to uncomposed Jamos, if we can't render a Jamo in any way, use the unknown glyph rather than nothing. * modules/hangul/hangul-x.c (hangul_engine_shape): Prevent overflows for invalid sequences which can be of arbitrary length. (#50086) * modules/arabic/arconv.c: New versions from Roozbeh Pournader. - Replacing the tables with automatically-generated ones - Using binary search instead of linear search for table lookup - Updating all the names with Unicode names (eg Nun -> Noon) - Fixed the Hamza mixup, using the Unicode rules - Updating missed cases (eg Superscript Alef was missing from arabic_isvowel) - Removing too-intelligent cases which made the code non-compliant (eg Alef with Hamza above+Kasra was changing to Alef with Hamza below) - Removing 'connecttoleft' from 'charstruct' (replacing it with a macro) - Indenting in the GNU style
Diffstat (limited to 'modules')
-rw-r--r--modules/arabic/arconv.c1296
-rw-r--r--modules/hangul/hangul-x.c47
2 files changed, 767 insertions, 576 deletions
diff --git a/modules/arabic/arconv.c b/modules/arabic/arconv.c
index e86abcbe..6980bcf3 100644
--- a/modules/arabic/arconv.c
+++ b/modules/arabic/arconv.c
@@ -1,14 +1,8 @@
/* This is part of Pango - Arabic shaping module
*
- * (C) 2000 Karl Koehler<koehler@or.uni-bonn.de>
+ * (C) 2000 Karl Koehler <koehler@or.uni-bonn.de>
+ * (C) 2001 Roozbeh Pournader <roozbeh@sharif.edu>
*
- * Note : The book "The Unicode Standard Version 3.0" is not very helpful
- * regarding arabic, so I implemented this according to my own best
- * knowledge. Bad examples from the book are 'ALEF.LAM'-ligature,
- * ( one also sais fi-ligature, not if-ligature ) and HAMZA handling.
- * There is only _one_ letter HAMZA, and so four (!) forms of HAMZA in
- * the basic section seem .. strange ( maybe I just have not understood
- * the sense of them, though ).
*/
#include "arconv.h"
@@ -17,630 +11,782 @@
#include <stdio.h>
#endif
-typedef struct {
- gunichar basechar;
- gunichar charstart;
- int count;
-} shapestruct;
-
-typedef struct {
- gunichar basechar;
- gunichar mark1; /* has to be initialized to zero */
- gunichar vowel; /* */
- char connecttoleft;
- signed char lignum; /* is a ligature with lignum aditional characters */
- char numshapes;
-} charstruct;
-
-/* The Unicode order is always: Standalone End Beginning Middle */
-
-static shapestruct chartable [] =
+typedef struct
{
- {0x621, 0xFE80,1}, /* HAMZA; handle seperately !!! */
- {0x622, 0xFE81,2}, /* ALIF MADDA */
- {0x623, 0xFE83,2}, /* ALIF HAMZA */
- {0x624, 0xFE85,2}, /* WAW HAMZA */
- {0x625, 0xFE87,2}, /* ALIF IHAMZA */
- {0x626, 0xFE89,4}, /* YA HAMZA */
- {0x627, 0xFE8D,2}, /* ALIF */
- {0x628, 0xFE8F,4}, /* BA */
- {0x629, 0xFE93,2}, /* TA MARBUTA */
- {0x62A, 0xFE95,4}, /* TA */
- {0x62B, 0xFE99,4}, /* THA */
- {0x62C, 0xFE9D,4}, /* DJIM */
- {0x62D, 0xFEA1,4}, /* HA */
- {0x62E, 0xFEA5,4}, /* CHA */
- {0x62F, 0xFEA9,2}, /* DAL */
- {0x630, 0xFEAB,2}, /* THAL */
- {0x631, 0xFEAD,2}, /* RA */
- {0x632, 0xFEAF,2}, /* ZAY */
- {0x633, 0xFEB1,4}, /* SIN */
- {0x634, 0xFEB5,4}, /* SHIN */
- {0x635, 0xFEB9,4}, /* SAAD */
- {0x636, 0xFEBD,4}, /* DAAD */
- {0x637, 0xFEC1,4}, /* .TA */
- {0x638, 0xFEC5,4}, /* .ZA */
- {0x639, 0xFEC9,4}, /* AIN */
- {0x63A, 0xFECD,4}, /* RAIN */
- {0x63B, 0x0000,0}, /* : */
- {0x63C, 0x0000,0}, /* epmty for */
- {0x63D, 0x0000,0}, /* simple */
- {0x63E, 0x0000,0}, /* indexing */
- {0x63F, 0x0000,0}, /* : */
- {0x640, 0x0640,4}, /* tatweel */
- {0x641, 0xFED1,4}, /* FA */
- {0x642, 0xFED5,4}, /* QAF */
- {0x643, 0xFED9,4}, /* KAF */
- {0x644, 0xFEDD,4}, /* LAM */
- {0x645, 0xFEE1,4}, /* MIM */
- {0x646, 0xFEE5,4}, /* NUN */
- {0x647, 0xFEE9,4}, /* HA */
- {0x648, 0xFEED,2}, /* WAW */
- {0x649, 0xFEEF,2}, /* ALIF MAQSURA */
- {0x64A, 0xFEF1,4}, /* YA */
- /* The following are letters are not plain arabic */
- /* some of the coding does not preserve order ... */
- {0x679, 0xFB66,4}, /* Urdu:TTEH */
- {0x67B, 0xFB52,4}, /* Sindhi: */
- {0x67E, 0xFB56,4}, /* PEH: latin compatibility */
- {0x680, 0xFB62,4}, /* Sindhi: */
- {0x683, 0xFB86,4}, /* " */
- {0x684, 0xFB72,4}, /* " */
- {0x686, 0xFB7A,4}, /* Persian: Tcheh */
- {0x687, 0xFB7E,4}, /* Sindhi: */
- {0x68C, 0xFB84,2}, /* Sindhi: DAHAL */
- {0x68D, 0xFB82,2}, /* Sindhi */
- {0x68E, 0xFB86,2}, /* */
- {0x691, 0xFB8C,2}, /* Urdu */
- {0x698, 0xFB8A,2}, /* Persian: JEH */
- {0x6A4, 0xFB6A,4}, /* VEH: latin compatibility */
- {0x6A6, 0xFB6E,4}, /* Sindhi */
- {0x6A9, 0xFB8E,4}, /* Persan K */
- {0x6AA, 0xFB8E,4}, /* extrawide KAF-> Persian KAF */
- {0x6AF, 0xFB92,4}, /* Persian: GAF */
- {0x6B1, 0xFB9A,4}, /* Sindhi: */
- {0x6B3, 0xFB97,4}, /* */
- {0x6BA, 0xFB9E,2}, /* Urdu:NUN GHUNNA */
- {0x6BB, 0xFBA0,4}, /* Sindhi: */
- {0x6BE, 0xFBAA,4}, /* HA special */
- {0x6CC, 0xFBFC,4}, /* farsi ya */
- {0x6C0, 0xFBA4,2}, /* izafet: HA HAMZA */
- {0x6C1, 0xFBA6,4}, /* Urdu: */
- {0x6D2, 0xFBAE,2}, /* YA barree */
- {0x6D3, 0xFBB0,2}, /* YA BARREE HAMZA */
-
- {0x200D, 0x200D,4}, /* Zero-width joiner */
- {0xFEF5, 0xFEF5,2}, /* Lam-Alif Madda */
- {0xFEF7, 0xFEF7,2}, /* Lam-Alif Hamza */
- {0xFEF9, 0xFEF9,2}, /* Lam-Alif iHamza */
- {0xFEFB, 0xFEFB,2} /* Lam-Alif */
-};
-#define ALIF 0x627
-#define ALIFHAMZA 0x623
-#define ALIFIHAMZA 0x625
-#define ALIFMADDA 0x622
-#define LAM 0x644
-#define HAMZA 0x621
-#define TATWEEL 0x640
-#define JOINER 0x200D
-
-/* Hamza below ( saves Kasra and special cases ), Hamza above ( always joins ).
- * As I don't know what sHAMZA is good for I don't handle it.
- */
-#define aHAMZA 0x654
-#define iHAMZA 0x655
-#define sHAMZA 0x674
-
-#define WAW 0x648
-#define WAWHAMZA 0x624
-
-#define SHADDA 0x651
-#define KASRA 0x650
-#define FATHA 0x64E
-#define DAMMA 0x64F
-#define MADDA 0x653
-
-
-#define LAM_ALIF 0xFEFB
-#define LAM_ALIFHAMZA 0xFEF7
-#define LAM_ALIFIHAMZA 0xFEF9
-#define LAM_ALIFMADDA 0xFEF5
+ gunichar basechar;
+ int count;
+ gunichar charshape[4];
+}
+shapestruct;
+typedef struct
+{
+ gunichar basechar;
+ gunichar mark1; /* has to be initialized to zero */
+ gunichar vowel;
+ signed char lignum; /* is a ligature with lignum aditional characters */
+ char numshapes;
+}
+charstruct;
+
+#define connects_to_left(a) ((a).numshapes > 2)
+
+/* The Unicode order is always 'isolated, final, initial, medial'. */
+
+/* *INDENT-OFF* */
+static shapestruct chartable[] = {
+ {0x0621, 1, {0xFE80}}, /* HAMZA */
+ {0x0622, 2, {0xFE81, 0xFE82}}, /* ALEF WITH MADDA ABOVE */
+ {0x0623, 2, {0xFE83, 0xFE84}}, /* ALEF WITH HAMZA ABOVE */
+ {0x0624, 2, {0xFE85, 0xFE86}}, /* WAW WITH HAMZA ABOVE */
+ {0x0625, 2, {0xFE87, 0xFE88}}, /* ALEF WITH HAMZA BELOW */
+ {0x0626, 4, {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}}, /* YEH WITH HAMZA ABOVE */
+ {0x0627, 2, {0xFE8D, 0xFE8E}}, /* ALEF */
+ {0x0628, 4, {0xFE8F, 0xFE90, 0xFE91, 0xFE92}}, /* BEH */
+ {0x0629, 2, {0xFE93, 0xFE94}}, /* TEH MARBUTA */
+ {0x062A, 4, {0xFE95, 0xFE96, 0xFE97, 0xFE98}}, /* TEH */
+ {0x062B, 4, {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}}, /* THEH */
+ {0x062C, 4, {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}}, /* JEEM */
+ {0x062D, 4, {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}}, /* HAH */
+ {0x062E, 4, {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}}, /* KHAH */
+ {0x062F, 2, {0xFEA9, 0xFEAA}}, /* DAL */
+ {0x0630, 2, {0xFEAB, 0xFEAC}}, /* THAL */
+ {0x0631, 2, {0xFEAD, 0xFEAE}}, /* REH */
+ {0x0632, 2, {0xFEAF, 0xFEB0}}, /* ZAIN */
+ {0x0633, 4, {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}}, /* SEEN */
+ {0x0634, 4, {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}}, /* SHEEN */
+ {0x0635, 4, {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}}, /* SAD */
+ {0x0636, 4, {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}}, /* DAD */
+ {0x0637, 4, {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}}, /* TAH */
+ {0x0638, 4, {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}}, /* ZAH */
+ {0x0639, 4, {0xFEC9, 0xFECA, 0xFECB, 0xFECC}}, /* AIN */
+ {0x063A, 4, {0xFECD, 0xFECE, 0xFECF, 0xFED0}}, /* GHAIN */
+ {0x0640, 4, {0x0640, 0x0640, 0x0640, 0x0640}}, /* TATWEEL */
+ {0x0641, 4, {0xFED1, 0xFED2, 0xFED3, 0xFED4}}, /* FEH */
+ {0x0642, 4, {0xFED5, 0xFED6, 0xFED7, 0xFED8}}, /* QAF */
+ {0x0643, 4, {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}}, /* KAF */
+ {0x0644, 4, {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}}, /* LAM */
+ {0x0645, 4, {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}}, /* MEEM */
+ {0x0646, 4, {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}}, /* NOON */
+ {0x0647, 4, {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}}, /* HEH */
+ {0x0648, 2, {0xFEED, 0xFEEE}}, /* WAW */
+ {0x0649, 4, {0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9}}, /* ALEF MAKSURA */
+ {0x064A, 4, {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}}, /* YEH */
+ {0x0671, 2, {0xFB50, 0xFB51}}, /* ALEF WASLA */
+ {0x0679, 4, {0xFB66, 0xFB67, 0xFB68, 0xFB69}}, /* TTEH */
+ {0x067A, 4, {0xFB5E, 0xFB5F, 0xFB60, 0xFB61}}, /* TTEHEH */
+ {0x067B, 4, {0xFB52, 0xFB53, 0xFB54, 0xFB55}}, /* BEEH */
+ {0x067E, 4, {0xFB56, 0xFB57, 0xFB58, 0xFB59}}, /* PEH */
+ {0x067F, 4, {0xFB62, 0xFB63, 0xFB64, 0xFB65}}, /* TEHEH */
+ {0x0680, 4, {0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D}}, /* BEHEH */
+ {0x0683, 4, {0xFB76, 0xFB77, 0xFB78, 0xFB79}}, /* NYEH */
+ {0x0684, 4, {0xFB72, 0xFB73, 0xFB74, 0xFB75}}, /* DYEH */
+ {0x0686, 4, {0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D}}, /* TCHEH */
+ {0x0687, 4, {0xFB7E, 0xFB7F, 0xFB80, 0xFB81}}, /* TCHEHEH */
+ {0x0688, 2, {0xFB88, 0xFB89}}, /* DDAL */
+ {0x068C, 2, {0xFB84, 0xFB85}}, /* DAHAL */
+ {0x068D, 2, {0xFB82, 0xFB83}}, /* DDAHAL */
+ {0x068E, 2, {0xFB86, 0xFB87}}, /* DUL */
+ {0x0691, 2, {0xFB8C, 0xFB8D}}, /* RREH */
+ {0x0698, 2, {0xFB8A, 0xFB8B}}, /* JEH */
+ {0x06A4, 4, {0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D}}, /* VEH */
+ {0x06A6, 4, {0xFB6E, 0xFB6F, 0xFB70, 0xFB71}}, /* PEHEH */
+ {0x06A9, 4, {0xFB8E, 0xFB8F, 0xFB90, 0xFB91}}, /* KEHEH */
+ {0x06AD, 4, {0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6}}, /* NG */
+ {0x06AF, 4, {0xFB92, 0xFB93, 0xFB94, 0xFB95}}, /* GAF */
+ {0x06B1, 4, {0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D}}, /* NGOEH */
+ {0x06B3, 4, {0xFB96, 0xFB97, 0xFB98, 0xFB99}}, /* GUEH */
+ {0x06BB, 4, {0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3}}, /* RNOON */
+ {0x06BE, 4, {0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD}}, /* HEH DOACHASHMEE */
+ {0x06C0, 2, {0xFBA4, 0xFBA5}}, /* HEH WITH YEH ABOVE */
+ {0x06C1, 4, {0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9}}, /* HEH GOAL */
+ {0x06C5, 2, {0xFBE0, 0xFBE1}}, /* KIRGHIZ OE */
+ {0x06C6, 2, {0xFBD9, 0xFBDA}}, /* OE */
+ {0x06C7, 2, {0xFBD7, 0xFBD8}}, /* U */
+ {0x06C8, 2, {0xFBDB, 0xFBDC}}, /* YU */
+ {0x06C9, 2, {0xFBE2, 0xFBE3}}, /* KIRGHIZ YU */
+ {0x06CB, 2, {0xFBDE, 0xFBDF}}, /* VE */
+ {0x06CC, 4, {0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF}}, /* FARSI YEH */
+ {0x06D0, 4, {0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7}}, /* E */
+ {0x06D2, 2, {0xFBAE, 0xFBAF}}, /* YEH BARREE */
+ {0x06D3, 2, {0xFBB0, 0xFBB1}}, /* YEH BARREE WITH HAMZA ABOVE */
+};
+/* *INDENT-ON* */
+
+static gunichar unshapetableFB[] = {
+ 0x0671, 0x0671, 0x067B, 0x067B, 0x067B, 0x067B, 0x067E, 0x067E,
+ 0x067E, 0x067E, 0x0680, 0x0680, 0x0680, 0x0680, 0x067A, 0x067A,
+ 0x067A, 0x067A, 0x067F, 0x067F, 0x067F, 0x067F, 0x0679, 0x0679,
+ 0x0679, 0x0679, 0x06A4, 0x06A4, 0x06A4, 0x06A4, 0x06A6, 0x06A6,
+ 0x06A6, 0x06A6, 0x0684, 0x0684, 0x0684, 0x0684, 0x0683, 0x0683,
+ 0x0683, 0x0683, 0x0686, 0x0686, 0x0686, 0x0686, 0x0687, 0x0687,
+ 0x0687, 0x0687, 0x068D, 0x068D, 0x068C, 0x068C, 0x068E, 0x068E,
+ 0x0688, 0x0688, 0x0698, 0x0698, 0x0691, 0x0691, 0x06A9, 0x06A9,
+ 0x06A9, 0x06A9, 0x06AF, 0x06AF, 0x06AF, 0x06AF, 0x06B3, 0x06B3,
+ 0x06B3, 0x06B3, 0x06B1, 0x06B1, 0x06B1, 0x06B1, 0x0000, 0x0000,
+ 0x06BB, 0x06BB, 0x06BB, 0x06BB, 0x06C0, 0x06C0, 0x06C1, 0x06C1,
+ 0x06C1, 0x06C1, 0x06BE, 0x06BE, 0x06BE, 0x06BE, 0x06D2, 0x06D2,
+ 0x06D3, 0x06D3, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x06AD, 0x06AD, 0x06AD, 0x06AD, 0x06C7,
+ 0x06C7, 0x06C6, 0x06C6, 0x06C8, 0x06C8, 0x0000, 0x06CB, 0x06CB,
+ 0x06C5, 0x06C5, 0x06C9, 0x06C9, 0x06D0, 0x06D0, 0x06D0, 0x06D0,
+ 0x0649, 0x0649, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x06CC, 0x06CC, 0x06CC, 0x06CC
+};
+
+static gunichar unshapetableFE[] = {
+ 0x0621, 0x0622, 0x0622, 0x0623, 0x0623, 0x0624, 0x0624, 0x0625,
+ 0x0625, 0x0626, 0x0626, 0x0626, 0x0626, 0x0627, 0x0627, 0x0628,
+ 0x0628, 0x0628, 0x0628, 0x0629, 0x0629, 0x062A, 0x062A, 0x062A,
+ 0x062A, 0x062B, 0x062B, 0x062B, 0x062B, 0x062C, 0x062C, 0x062C,
+ 0x062C, 0x062D, 0x062D, 0x062D, 0x062D, 0x062E, 0x062E, 0x062E,
+ 0x062E, 0x062F, 0x062F, 0x0630, 0x0630, 0x0631, 0x0631, 0x0632,
+ 0x0632, 0x0633, 0x0633, 0x0633, 0x0633, 0x0634, 0x0634, 0x0634,
+ 0x0634, 0x0635, 0x0635, 0x0635, 0x0635, 0x0636, 0x0636, 0x0636,
+ 0x0636, 0x0637, 0x0637, 0x0637, 0x0637, 0x0638, 0x0638, 0x0638,
+ 0x0638, 0x0639, 0x0639, 0x0639, 0x0639, 0x063A, 0x063A, 0x063A,
+ 0x063A, 0x0641, 0x0641, 0x0641, 0x0641, 0x0642, 0x0642, 0x0642,
+ 0x0642, 0x0643, 0x0643, 0x0643, 0x0643, 0x0644, 0x0644, 0x0644,
+ 0x0644, 0x0645, 0x0645, 0x0645, 0x0645, 0x0646, 0x0646, 0x0646,
+ 0x0646, 0x0647, 0x0647, 0x0647, 0x0647, 0x0648, 0x0648, 0x0649,
+ 0x0649, 0x064A, 0x064A, 0x064A, 0x064A
+};
+
+#define ALEF 0x0627
+#define ALEFHAMZA 0x0623
+#define ALEFHAMZABELOW 0x0625
+#define ALEFMADDA 0x0622
+#define LAM 0x0644
+#define HAMZA 0x0621
+#define TATWEEL 0x0640
+#define ZWJ 0x200D
+
+#define HAMZAABOVE 0x0654
+#define HAMZABELOW 0x0655
+
+#define WAWHAMZA 0x0624
+#define YEHHAMZA 0x0626
+#define WAW 0x0648
+#define ALEFMAKSURA 0x0649
+#define YEH 0x064A
+#define FARSIYEH 0x06CC
+
+#define SHADDA 0x0651
+#define KASRA 0x0650
+#define FATHA 0x064E
+#define DAMMA 0x064F
+#define MADDA 0x0653
+
+#define LAM_ALEF 0xFEFB
+#define LAM_ALEFHAMZA 0xFEF7
+#define LAM_ALEFHAMZABELOW 0xFEF9
+#define LAM_ALEFMADDA 0xFEF5
-static void
-charstruct_init(charstruct* s)
+static void
+charstruct_init (charstruct * s)
{
- s->basechar = 0;
- s->mark1 = 0;
- s->vowel = 0;
- s->connecttoleft = 0;
- s->lignum = 0;
- s->numshapes = 1;
+ s->basechar = 0;
+ s->mark1 = 0;
+ s->vowel = 0;
+ s->lignum = 0;
+ s->numshapes = 1;
}
-
+
static void
-copycstostring(gunichar* string,int* i,charstruct* s,arabic_level level)
-{ /* s is a shaped charstruct; i is the index into the string */
- if (s->basechar == 0) return;
-
- string[*i] = s->basechar; (*i)++; (s->lignum)--;
- if (s->mark1 != 0)
- {
- if ( !(level & ar_novowel) )
- {
- string[*i] = s->mark1; (*i)++; (s->lignum)--;
- }
- else
- {
- (s->lignum)--;
- }
+copycstostring (gunichar * string, int *i, charstruct * s, arabic_level level)
+/* s is a shaped charstruct; i is the index into the string */
+{
+ if (s->basechar == 0)
+ return;
+
+ string[*i] = s->basechar;
+ (*i)++;
+ (s->lignum)--;
+ if (s->mark1 != 0)
+ {
+ if (!(level & ar_novowel))
+ {
+ string[*i] = s->mark1;
+ (*i)++;
+ (s->lignum)--;
}
- if (s->vowel != 0)
- {
- if (! (level & ar_novowel) )
- {
- string[*i] = s->vowel; (*i)++; (s->lignum)--;
- }
- else
- { /* vowel elimination */
- (s->lignum)--;
- }
+ else
+ {
+ (s->lignum)--;
+ }
+ }
+ if (s->vowel != 0)
+ {
+ if (!(level & ar_novowel))
+ {
+ string[*i] = s->vowel;
+ (*i)++;
+ (s->lignum)--;
}
- while (s->lignum > 0 )
- { /* NULL-insertion for Langbox-font */
- string[*i] = 0; (*i)++; (s->lignum)--;
+ else
+ { /* vowel elimination */
+ (s->lignum)--;
}
+ }
+ while (s->lignum > 0)
+ { /* NULL-insertion for Langbox-font */
+ string[*i] = 0;
+ (*i)++;
+ (s->lignum)--;
+ }
}
-int
-arabic_isvowel(gunichar s)
-{ /* is this 'joining HAMZA' ( strange but has to be handled like a vowel )
- * Kasra, Fatha, Damma, Sukun ?
- */
- if ((s >= 0x64B) && (s <= 0x655)) return 1;
- if ((s >= 0xFC5E) && (s <= 0xFC63)) return 1;
- if ((s >= 0xFE70) && (s <= 0xFE7F)) return 1;
- return 0;
+int
+arabic_isvowel (gunichar s)
+/* is this a combining mark? */
+{
+ if (((s >= 0x064B) && (s <= 0x0655)) || (s == 0x0670))
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
}
-static gunichar
-unshape(gunichar s)
+static gunichar
+unshape (gunichar s)
{
- int j = 0;
- if ( (s > 0xFE80) && ( s < 0xFEFF ))
- { /* arabic shaped Glyph , not HAMZA */
- while ( chartable[j+1].charstart <= s) j++;
- return chartable[j].basechar;
- }
- else if ((s == 0xFE8B)||(s == 0xFE8C))
- {
- return HAMZA;
- }
- else
- {
- return s;
- }
+ gunichar r;
+
+ if ((s >= 0xFE80) && (s <= 0xFEF4)) /* Arabic Presentation Forms-B */
+ {
+ return unshapetableFE[s - 0xFE80];
+ }
+ else if ((s >= 0xFEF5) && (s <= 0xFEFC)) /* Lam+Alef ligatures */
+ {
+ return ((s % 2) ? s : (s - 1));
+ }
+ else if ((s >= 0xFB50) && (s <= 0xFBFF)) /* Arabic Presentation Forms-A */
+ {
+ return ((r = unshapetableFB[s - 0xFB50]) ? r : s);
+ }
+ else
+ {
+ return s;
+ }
}
-static gunichar
-charshape(gunichar s,short which)
-{ /* which 0=alone 1=end 2=start 3=middle */
- int j = 0;
- if ((s >= chartable[1].basechar) && (s <= 0x64A) && ( s != TATWEEL) && ( s != JOINER))
- { /* basic character */
- return chartable[s-chartable[0].basechar].charstart+which;
- }
- else if ( (s >= chartable[1].basechar) && ( s <= 0xFEFB )
- && (s != TATWEEL) && ( s != JOINER) && ( s!= 0x6CC))
- { /* special char or Lam-Alif */
- while ( chartable[j].basechar < s) j++;
- return chartable[j].charstart+which;
- }
- else if (s == HAMZA)
+static gunichar
+charshape (gunichar s, short which)
+/* which 0=isolated 1=final 2=initial 3=medial */
+{
+ int l, r, m;
+ if ((s >= 0x0621) && (s <= 0x06D3))
+ {
+ l = 0;
+ r = sizeof (chartable) / sizeof (shapestruct);
+ while (l <= r)
{
- if (which < 2) return s;
- else return 0xFE8B+(which-2); /* The Hamza-'pod' */
- }
- else if (s == 0x6CC)
- { /* farsi ya --> map to Alif maqsura and Ya, depending on form */
- switch (which){
- case 0: return 0xFEEF;
- case 1: return 0xFEF0;
- case 2: return 0xFEF3;
- case 3: return 0xFEF4;
+ m = (l + r) / 2;
+ if (s == chartable[m].basechar)
+ {
+ return chartable[m].charshape[which];
+ }
+ else if (s < chartable[m].basechar)
+ {
+ r = m - 1;
+ }
+ else
+ {
+ l = m + 1;
}
}
+ }
+ else if ((s >= 0xFEF5) && (s <= 0xFEFB))
+ { /* Lam+Alef */
+ return s + which;
+ }
- /* else */
- return s;
+ return s;
}
-static short
-shapecount(gunichar s)
+static short
+shapecount (gunichar s)
{
- int j = 0;
- if (arabic_isvowel(s))
- { /* correct trailing wovels */
- return 1;
- }
- else if ((s >= chartable[1].basechar) && ( s <= 0x64A ))
- { /* basic character */
- return chartable[s-chartable[0].basechar].count;
- }
- else if ( (s >= chartable[0].basechar) && ( s <= 0xFEFB ))
- {
- /* arabic base char or ligature */
- while ( chartable[j].basechar < s) j++;
- return chartable[j].count;
- }
- else
+ int l, r, m;
+ if ((s >= 0x0621) && (s <= 0x06D3) && !arabic_isvowel (s))
+ {
+ l = 0;
+ r = sizeof (chartable) / sizeof (shapestruct);
+ while (l <= r)
{
- return 1;
+ m = (l + r) / 2;
+ if (s == chartable[m].basechar)
+ {
+ return chartable[m].count;
+ }
+ else if (s < chartable[m].basechar)
+ {
+ r = m - 1;
+ }
+ else
+ {
+ l = m + 1;
+ }
}
+ }
+ else if (s == ZWJ)
+ {
+ return 4;
+ }
+ return 1;
}
static int
-unligature(charstruct* curchar,arabic_level level)
+unligature (charstruct * curchar, arabic_level level)
{
- int result = 0;
- if (level & ar_naqshfont){
- /* decompose Alif-Madda ... */
- switch(curchar->basechar){
- case ALIFHAMZA : curchar->basechar = ALIF; curchar->mark1 = aHAMZA;
- result++; break;
- case ALIFIHAMZA: curchar->basechar = ALIF; curchar->mark1 = iHAMZA;
- result++; break;
- case WAWHAMZA : curchar->basechar = WAW; curchar->mark1 = aHAMZA;
- result++; break;
- case ALIFMADDA :curchar->basechar = ALIF; curchar->vowel = MADDA;
- result++; break;
+ int result = 0;
+ if (level & ar_naqshfont)
+ {
+ /* decompose Alef Madda ... */
+ switch (curchar->basechar)
+ {
+ case ALEFHAMZA:
+ curchar->basechar = ALEF;
+ curchar->mark1 = HAMZAABOVE;
+ result++;
+ break;
+ case ALEFHAMZABELOW:
+ curchar->basechar = ALEF;
+ curchar->mark1 = HAMZABELOW;
+ result++;
+ break;
+ case WAWHAMZA:
+ curchar->basechar = WAW;
+ curchar->mark1 = HAMZAABOVE;
+ result++;
+ break;
+ case ALEFMADDA:
+ curchar->basechar = ALEF;
+ curchar->vowel = MADDA;
+ result++;
+ break;
}
- }
- return result;
+ }
+ return result;
}
-static int
-ligature(gunichar newchar,charstruct* oldchar)
-{ /* no ligature possible --> return 0; 1 == vowel; 2 = two chars
- * 3 = Lam-Alif
- */
- int retval = 0;
+static int
+ligature (gunichar newchar, charstruct * oldchar)
+/* 0 == no ligature possible; 1 == vowel; 2 == two chars; 3 == Lam+Alef */
+{
+ int retval = 0;
- if (!(oldchar->basechar)) return 0;
- if (arabic_isvowel(newchar))
+ if (!(oldchar->basechar))
+ return 0;
+ if (arabic_isvowel (newchar))
+ {
+ retval = 1;
+ if ((oldchar->vowel != 0) && (newchar != SHADDA))
+ {
+ retval = 2; /* we eliminate the old vowel .. */
+ }
+ switch (newchar)
{
- retval = 1;
- if ((oldchar->vowel != 0)&&(newchar != SHADDA)){
- retval = 2; /* we eliminate the old vowel .. */
+ case SHADDA:
+ if (oldchar->mark1 == 0)
+ {
+ oldchar->mark1 = SHADDA;
}
- switch(newchar)
- {
- case SHADDA:
- if (oldchar->mark1 == 0)
- {
- oldchar->mark1 = newchar;
- }
- else
- {
- return 0; /* no ligature possible */
- }
- break;
- case iHAMZA:
- switch(oldchar->basechar)
- {
- case ALIF:
- oldchar->basechar = ALIFIHAMZA;
- retval = 2; break;
- case LAM_ALIF:
- oldchar->basechar = LAM_ALIFIHAMZA;
- retval = 2; break;
- default: oldchar->mark1 = newchar; break;
- }
- break;
- case aHAMZA:
- switch(oldchar->basechar)
- {
- case ALIF:
- oldchar->basechar = ALIFHAMZA;
- retval = 2; break;
- case LAM_ALIF:
- oldchar->basechar = LAM_ALIFHAMZA;
- retval = 2; break;
- case WAW:
- oldchar->basechar = WAWHAMZA;
- retval = 2; break;
- default: /* whatever sense this may make .. */
- oldchar->mark1 = newchar; break;
- }
- break;
- case MADDA:
- switch(oldchar->basechar)
- {
- case ALIFHAMZA:
- case ALIF:
- oldchar->basechar = ALIFMADDA;
- retval = 2; break;
- }
- break;
- case KASRA:
- switch(oldchar->basechar)
- {
- case ALIFHAMZA:
- oldchar->basechar = ALIFIHAMZA;
- retval = 2; break;
- case LAM_ALIFHAMZA:
- oldchar->basechar = LAM_ALIFIHAMZA;
- retval = 2; break;
- default: oldchar->vowel = newchar; break;
- }
- break;
- default: oldchar->vowel = newchar; break;
- }
- if (retval == 1)
- {
- oldchar->lignum++;
- }
- return retval;
+ else
+ {
+ return 0; /* no ligature possible */
+ }
+ break;
+ case HAMZABELOW:
+ switch (oldchar->basechar)
+ {
+ case ALEF:
+ oldchar->basechar = ALEFHAMZABELOW;
+ retval = 2;
+ break;
+ case LAM_ALEF:
+ oldchar->basechar = LAM_ALEFHAMZABELOW;
+ retval = 2;
+ break;
+ default:
+ oldchar->mark1 = HAMZABELOW;
+ break;
+ }
+ break;
+ case HAMZAABOVE:
+ switch (oldchar->basechar)
+ {
+ case ALEF:
+ oldchar->basechar = ALEFHAMZA;
+ retval = 2;
+ break;
+ case LAM_ALEF:
+ oldchar->basechar = LAM_ALEFHAMZA;
+ retval = 2;
+ break;
+ case WAW:
+ oldchar->basechar = WAWHAMZA;
+ retval = 2;
+ break;
+ case YEH:
+ case ALEFMAKSURA:
+ case FARSIYEH:
+ oldchar->basechar = YEHHAMZA;
+ retval = 2;
+ break;
+ default: /* whatever sense this may make .. */
+ oldchar->mark1 = HAMZAABOVE;
+ break;
+ }
+ break;
+ case MADDA:
+ switch (oldchar->basechar)
+ {
+ case ALEF:
+ oldchar->basechar = ALEFMADDA;
+ retval = 2;
+ break;
+ }
+ break;
+ default:
+ oldchar->vowel = newchar;
+ break;
}
- if (oldchar->vowel != 0)
- { /* if we already joined a vowel, we can't join a Hamza */
- return 0;
+ if (retval == 1)
+ {
+ oldchar->lignum++;
}
-
- switch(oldchar->basechar)
+ return retval;
+ }
+ if (oldchar->vowel != 0)
+ { /* if we already joined a vowel, we can't join a Hamza */
+ return 0;
+ }
+
+ switch (oldchar->basechar)
+ {
+ case LAM:
+ switch (newchar)
{
- case LAM:
- switch (newchar)
- {
- case ALIF: oldchar->basechar = LAM_ALIF;
- oldchar->numshapes = 2; retval = 3; break;
- case ALIFHAMZA: oldchar->basechar = LAM_ALIFHAMZA;
- oldchar->numshapes = 2; retval = 3; break;
- case ALIFIHAMZA:oldchar->basechar = LAM_ALIFIHAMZA;
- oldchar->numshapes = 2; retval = 3; break;
- case ALIFMADDA: oldchar->basechar = LAM_ALIFMADDA ;
- oldchar->numshapes = 2; retval = 3; break;
- }
- break;
- case ALIF:
- switch (newchar)
- {
- case ALIF: oldchar->basechar = ALIFMADDA; retval = 2; break;
- }
- break;
- case WAW:
- switch (newchar)
- {
- case HAMZA:oldchar->basechar = WAWHAMZA; retval = 2; break;
- }
- break;
- case 0:
- oldchar->basechar = newchar;
- oldchar->numshapes = shapecount(newchar);
- retval = 1;
- break;
+ case ALEF:
+ oldchar->basechar = LAM_ALEF;
+ oldchar->numshapes = 2;
+ retval = 3;
+ break;
+ case ALEFHAMZA:
+ oldchar->basechar = LAM_ALEFHAMZA;
+ oldchar->numshapes = 2;
+ retval = 3;
+ break;
+ case ALEFHAMZABELOW:
+ oldchar->basechar = LAM_ALEFHAMZABELOW;
+ oldchar->numshapes = 2;
+ retval = 3;
+ break;
+ case ALEFMADDA:
+ oldchar->basechar = LAM_ALEFMADDA;
+ oldchar->numshapes = 2;
+ retval = 3;
+ break;
}
- return retval;
+ break;
+ case 0:
+ oldchar->basechar = newchar;
+ oldchar->numshapes = shapecount (newchar);
+ retval = 1;
+ break;
+ }
+ return retval;
}
-static void
-shape(long* len,const char* text,gunichar* string,arabic_level level)
+static void
+shape (long *len, const char *text, gunichar * string, arabic_level level)
{
- /* string is assumed to be empty an big enough.
- ** text is the original text.
- ** This routine does the basic arabic reshaping.
- ** *len the number of non-null characters.
- */
- /* Note ! we have to unshape each character first ! */
- int olen = *len;
- charstruct oldchar,curchar;
- /* int si = (olen)-1; */
- int j = 0;
- int join;
- int which;
- gunichar nextletter;
- const char* p = text;
-
- *len = 0 ; /* initialize for output */
- charstruct_init(&oldchar);
- charstruct_init(&curchar);
- while (p < text+olen)
+ /* string is assumed to be empty and big enough.
+ * text is the original text.
+ * This routine does the basic arabic reshaping.
+ * *len the number of non-null characters.
+ *
+ * Note: We have to unshape each character first!
+ */
+ int olen = *len;
+ charstruct oldchar, curchar;
+ int j = 0;
+ int join;
+ int which;
+ gunichar nextletter;
+ const char *p = text;
+
+ *len = 0; /* initialize for output */
+ charstruct_init (&oldchar);
+ charstruct_init (&curchar);
+ while (p < text + olen)
+ {
+ nextletter = g_utf8_get_char (p);
+ nextletter = unshape (nextletter);
+
+ join = ligature (nextletter, &curchar);
+ if (!join)
+ { /* shape curchar */
+ int nc = shapecount (nextletter);
+ (*len)++;
+ if (nc == 1)
+ {
+ which = 0; /* final or isolated */
+ }
+ else
+ {
+ which = 2; /* medial or initial */
+ }
+ if (connects_to_left (oldchar))
+ {
+ which++;
+ }
+
+ which = which % (curchar.numshapes);
+ curchar.basechar = charshape (curchar.basechar, which);
+
+ /* get rid of oldchar */
+ copycstostring (string, &j, &oldchar, level);
+ oldchar = curchar; /* new values in oldchar */
+
+ /* init new curchar */
+ charstruct_init (&curchar);
+ curchar.basechar = nextletter;
+ curchar.numshapes = nc;
+ curchar.lignum++;
+ (*len) += unligature (&curchar, level);
+ }
+ else if ((join == 3) && (level & ar_lboxfont))
+ { /* Lam+Alef extra in langbox-font */
+ (*len)++;
+ curchar.lignum++;
+ }
+ else if (join == 1)
{
- nextletter = g_utf8_get_char (p);
- nextletter = unshape(nextletter);
-
- join = ligature(nextletter,&curchar);
- if (!join)
- { /* shape curchar */
- int nc = shapecount(nextletter);
- (*len)++;
- if (nc == 1)
- {
- which = 0; /* end or basic */
- }
- else
- {
- which = 2; /* middle or beginning */
- }
- if (oldchar.connecttoleft)
- {
- which++;
- }
- else if (curchar.basechar == HAMZA)
- { /* normally, a Hamza hangs loose after an Alif.
- * Use the form Ya-Hamza if you want a Hamza
- * on a pod !
- */
- curchar.numshapes = 1;
- }
-
- which = which % (curchar.numshapes);
- curchar.basechar = charshape(curchar.basechar,which);
- if (curchar.numshapes > 2)
- curchar.connecttoleft = 1;
-
- /* get rid of oldchar */
- copycstostring(string,&j,&oldchar,level);
- oldchar = curchar; /* new vlues in oldchar */
-
- /* init new curchar */
- charstruct_init(&curchar);
- curchar.basechar = nextletter;
- curchar.numshapes = nc;
- curchar.lignum++;
- (*len) += unligature(&curchar,level);
- }
- else if ((join == 3)&&(level & ar_lboxfont))
- { /* Lam-Alif extra in langbox-font */
- (*len)++;
- curchar.lignum++;
- }
- else if (join == 1)
- {
- (*len)++;
- }
- else
- {
- (*len) += unligature(&curchar,level);
- }
- p = g_utf8_next_char (p);
+ (*len)++;
}
- /* Handle last char */
-
- if (oldchar.connecttoleft)
- which = 1;
- else
- which = 0;
- which = which % (curchar.numshapes);
- curchar.basechar = charshape(curchar.basechar,which);
- /* get rid of oldchar */
- copycstostring(string,&j,&oldchar,level);
- copycstostring(string,&j,&curchar,level);
+ else
+ {
+ (*len) += unligature (&curchar, level);
+ }
+ p = g_utf8_next_char (p);
+ }
+
+ /* Handle last char */
+ if (connects_to_left (oldchar))
+ which = 1;
+ else
+ which = 0;
+ which = which % (curchar.numshapes);
+ curchar.basechar = charshape (curchar.basechar, which);
+
+ /* get rid of oldchar */
+ copycstostring (string, &j, &oldchar, level);
+ copycstostring (string, &j, &curchar, level);
#ifdef DEBUG
- fprintf(stderr,"[ar] shape statistic: %i chars -> %i glyphs \n",
- olen,*len);
+ fprintf (stderr, "[ar] shape statistic: %i chars -> %i glyphs \n",
+ olen, *len);
#endif
}
-static void
-doublelig(long* len,gunichar* string,arabic_level level)
-{ /* Ok. We have presentation ligatures in our font. */
- int olen = *len;
- int j = 0, si = 1;
- gunichar lapresult;
-
-
- while (si < olen)
+static void
+doublelig (long *len, gunichar * string, arabic_level level)
+/* Ok. We have presentation ligatures in our font. */
+{
+ int olen = *len;
+ int j = 0, si = 1;
+ gunichar lapresult;
+
+ while (si < olen)
+ {
+ lapresult = 0;
+ if (level & ar_composedtashkeel)
{
- lapresult = 0;
- if ( level & ar_composedtashkeel ){
- switch(string[j])
- {
- case SHADDA:
- switch(string[si])
- {
- case KASRA: lapresult = 0xFC62; break;
- case FATHA: lapresult = 0xFC60; break;
- case DAMMA: lapresult = 0xFC61; break;
- case 0x64C: lapresult = 0xFC5E; break;
- case 0x64D: lapresult = 0xFC5F; break;
- }
- break;
- case KASRA:
- if (string[si]==SHADDA) lapresult = 0xFC62;
- break;
- case FATHA:
- if (string[si]==SHADDA) lapresult = 0xFC60;
- break;
- case DAMMA:
- if (string[si]==SHADDA) lapresult = 0xFC61;
- break;
- }
+ switch (string[j])
+ {
+ case SHADDA:
+ switch (string[si])
+ {
+ case KASRA:
+ lapresult = 0xFC62;
+ break;
+ case FATHA:
+ lapresult = 0xFC60;
+ break;
+ case DAMMA:
+ lapresult = 0xFC61;
+ break;
+ case 0x064C:
+ lapresult = 0xFC5E;
+ break;
+ case 0x064D:
+ lapresult = 0xFC5F;
+ break;
+ }
+ break;
+ case KASRA:
+ if (string[si] == SHADDA)
+ lapresult = 0xFC62;
+ break;
+ case FATHA:
+ if (string[si] == SHADDA)
+ lapresult = 0xFC60;
+ break;
+ case DAMMA:
+ if (string[si] == SHADDA)
+ lapresult = 0xFC61;
+ break;
}
+ }
- if ( level & ar_lig ){
- switch(string[j])
- {
- case 0xFEDF: /* LAM initial */
- switch(string[si]){
- case 0xFE9E : lapresult = 0xFC3F; break; /* DJEEM final*/
- case 0xFEA0 : lapresult = 0xFCC9; break;
- case 0xFEA2 : lapresult = 0xFC40; break; /* .HA final */
- case 0xFEA4 : lapresult = 0xFCCA; break;
- case 0xFEA6 : lapresult = 0xFCF1; break; /* CHA final */
- case 0xFEA8 : lapresult = 0xFCCB; break;
- case 0xFEE2 : lapresult = 0xFC42; break; /* MIM final */
- case 0xFEE4 : lapresult = 0xFCCC; break;
- }
- break;
- case 0xFE97: /* TA inital */
- switch(string[si]){
- case 0xFEA0 : lapresult = 0xFCA1; break; /* DJ init */
- case 0xFEA4 : lapresult = 0xFCA2; break; /* .HA */
- case 0xFEA8 : lapresult = 0xFCA3; break; /* CHA */
- }
- break;
- case 0xFE91: /* BA inital */
- switch(string[si]){
- case 0xFEA0 : lapresult = 0xFC9C; break; /* DJ init */
- case 0xFEA4 : lapresult = 0xFC9D; break; /* .HA */
- case 0xFEA8 : lapresult = 0xFC9E; break; /* CHA */
- }
- break;
- case 0xFEE7: /* NUN inital */
- switch(string[si]){
- case 0xFEA0 : lapresult = 0xFCD2; break; /* DJ init */
- case 0xFEA4 : lapresult = 0xFCD3; break; /* .HA */
- case 0xFEA8 : lapresult = 0xFCD4; break; /* CHA */
- }
- break;
-
- case 0xFEE8: /* NUN medial */
- switch(string[si]){
- /* missing : nun-ra : FC8A und nun-sai : FC8B */
- case 0xFEAE : lapresult = 0xFC8A; break; /* nun-ra */
- case 0xFEB0 : lapresult = 0xFC8B; break; /* nun-sai */
- }
- break;
- case 0xFEE3: /* Mim initial */
- switch(string[si]){
- case 0xFEA0 : lapresult = 0xFCCE ; break; /* DJ init */
- case 0xFEA4 : lapresult = 0xFCCF ; break; /* .HA init */
- case 0xFEA8 : lapresult = 0xFCD0 ; break; /* CHA init */
- case 0xFEE4 : lapresult = 0xFCD1 ; break; /* Mim init */
- }
- break;
-
- case 0xFED3: /* Fa initial */
- switch(string[si]){
- case 0xFEF2 : lapresult = 0xFC32 ; break; /* fi-ligature (!) */
- }
- break;
-
- default:
- break;
- } /* end switch string[si] */
- }
- if (lapresult != 0)
+ if (level & ar_lig)
+ {
+ switch (string[j])
+ {
+ case 0xFEDF: /* LAM initial */
+ switch (string[si])
{
- string[j] = lapresult; (*len)--;
- si++; /* jump over one character */
- /* we'll have to change this, too. */
+ case 0xFE9E:
+ lapresult = 0xFC3F;
+ break; /* JEEM final */
+ case 0xFEA0:
+ lapresult = 0xFCC9;
+ break; /* JEEM medial */
+ case 0xFEA2:
+ lapresult = 0xFC40;
+ break; /* HAH final */
+ case 0xFEA4:
+ lapresult = 0xFCCA;
+ break; /* HAH medial */
+ case 0xFEA6:
+ lapresult = 0xFC41;
+ break; /* KHAH final */
+ case 0xFEA8:
+ lapresult = 0xFCCB;
+ break; /* KHAH medial */
+ case 0xFEE2:
+ lapresult = 0xFC42;
+ break; /* MEEM final */
+ case 0xFEE4:
+ lapresult = 0xFCCC;
+ break; /* MEEM medial */
}
- else
+ break;
+ case 0xFE97: /* TEH inital */
+ switch (string[si])
{
- j++;
- string[j] = string[si];
- si++;
+ case 0xFEA0:
+ lapresult = 0xFCA1;
+ break; /* JEEM medial */
+ case 0xFEA4:
+ lapresult = 0xFCA2;
+ break; /* HAH medial */
+ case 0xFEA8:
+ lapresult = 0xFCA3;
+ break; /* KHAH medial */
}
- }
+ break;
+ case 0xFE91: /* BEH inital */
+ switch (string[si])
+ {
+ case 0xFEA0:
+ lapresult = 0xFC9C;
+ break; /* JEEM medial */
+ case 0xFEA4:
+ lapresult = 0xFC9D;
+ break; /* HAH medial */
+ case 0xFEA8:
+ lapresult = 0xFC9E;
+ break; /* KHAH medial */
+ }
+ break;
+ case 0xFEE7: /* NOON inital */
+ switch (string[si])
+ {
+ case 0xFEA0:
+ lapresult = 0xFCD2;
+ break; /* JEEM initial */
+ case 0xFEA4:
+ lapresult = 0xFCD3;
+ break; /* HAH medial */
+ case 0xFEA8:
+ lapresult = 0xFCD4;
+ break; /* KHAH medial */
+ }
+ break;
+
+ case 0xFEE8: /* NOON medial */
+ switch (string[si])
+ {
+ case 0xFEAE:
+ lapresult = 0xFC8A;
+ break; /* REH final */
+ case 0xFEB0:
+ lapresult = 0xFC8B;
+ break; /* ZAIN final */
+ }
+ break;
+ case 0xFEE3: /* MEEM initial */
+ switch (string[si])
+ {
+ case 0xFEA0:
+ lapresult = 0xFCCE;
+ break; /* JEEM medial */
+ case 0xFEA4:
+ lapresult = 0xFCCF;
+ break; /* HAH medial */
+ case 0xFEA8:
+ lapresult = 0xFCD0;
+ break; /* KHAH medial */
+ case 0xFEE4:
+ lapresult = 0xFCD1;
+ break; /* MEEM medial */
+ }
+ break;
+
+ case 0xFED3: /* FEH initial */
+ switch (string[si])
+ {
+ case 0xFEF2:
+ lapresult = 0xFC32;
+ break; /* YEH final */
+ }
+ break;
+
+ default:
+ break;
+ } /* end switch string[si] */
+ }
+ if (lapresult != 0)
+ {
+ string[j] = lapresult;
+ (*len)--;
+ si++; /* jump over one character */
+ /* we'll have to change this, too. */
+ }
+ else
+ {
+ j++;
+ string[j] = string[si];
+ si++;
+ }
+ }
}
-void
-arabic_reshape(long* len,const char* text,gunichar* string,arabic_level level)
+void
+arabic_reshape (long *len, const char *text, gunichar * string,
+ arabic_level level)
{
- shape(len,text ,string,level);
- if ( level & ( ar_composedtashkeel | ar_lig ) )
- doublelig(len,string,level);
+ shape (len, text, string, level);
+ if (level & (ar_composedtashkeel | ar_lig))
+ doublelig (len, string, level);
}
diff --git a/modules/hangul/hangul-x.c b/modules/hangul/hangul-x.c
index 951819b1..d1308b6e 100644
--- a/modules/hangul/hangul-x.c
+++ b/modules/hangul/hangul-x.c
@@ -20,6 +20,7 @@
*/
#include <glib.h>
+#include <stdlib.h>
#include <string.h>
#include "pangox.h"
@@ -71,6 +72,29 @@ set_glyph (PangoGlyphString *glyphs,
glyphs->glyphs[i].geometry.width = logical_rect.width;
}
+static void
+set_unknown_glyph (PangoGlyphString *glyphs,
+ int *n_glyphs,
+ PangoFont *font,
+ gunichar wc,
+ int cluster_offset)
+{
+ PangoRectangle logical_rect;
+ gint i = *n_glyphs;
+
+ (*n_glyphs)++;
+ pango_glyph_string_set_size (glyphs, *n_glyphs);
+
+ glyphs->glyphs[i].glyph = pango_x_get_unknown_glyph (font);
+
+ glyphs->glyphs[i].geometry.x_offset = 0;
+ glyphs->glyphs[i].geometry.y_offset = 0;
+
+ pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph, NULL, &logical_rect);
+ glyphs->glyphs[i].geometry.width = logical_rect.width;
+
+ glyphs->log_clusters[i] = cluster_offset;
+}
/*
* From 3.10 of the Unicode 2.0 Book; used for combining Jamos.
@@ -264,6 +288,8 @@ JOHAB_COMMON
else
break;
}
+ if (j == 0)
+ set_unknown_glyph (glyphs, n_glyphs, font, text[i], cluster_offset);
}
}
@@ -297,6 +323,8 @@ JOHAB_COMMON
(*n_glyphs)++;
}
}
+ if (j == 0)
+ set_unknown_glyph (glyphs, n_glyphs, font, wc, cluster_offset);
}
}
@@ -458,6 +486,8 @@ render_syllable_with_ksc5601 (PangoFont *font, PangoXSubfont subfont,
glyphs->log_clusters[*n_glyphs] = cluster_offset;
(*n_glyphs)++;
}
+ if (j == 0)
+ set_unknown_glyph (glyphs, n_glyphs, font, gindex, cluster_offset);
}
}
@@ -555,7 +585,9 @@ hangul_engine_shape (PangoFont *font,
const char *ptr;
const char *next;
int i, n_chars;
- gunichar2 jamos[4];
+ gunichar2 jamos_static[4];
+ guint jamos_max = G_N_ELEMENTS (jamos_static);
+ gunichar2 *jamos = jamos_static;
int n_jamos = 0;
int n_glyphs = 0, cluster_offset = 0;
@@ -637,6 +669,16 @@ hangul_engine_shape (PangoFont *font,
/* Clear. */
n_jamos = 0;
}
+ if (n_jamos == jamos_max)
+ {
+ gunichar2 *new_jamos;
+
+ jamos_max++;
+ new_jamos = g_new (gunichar2, jamos_max);
+ memcpy (new_jamos, jamos, n_jamos * sizeof (gunichar2));
+
+ jamos = new_jamos;
+ }
jamos[n_jamos++] = wc;
}
}
@@ -657,6 +699,9 @@ hangul_engine_shape (PangoFont *font,
cluster_offset = next - text;
n_jamos = 0;
}
+
+ if (jamos != jamos_static)
+ g_free (jamos);
}
static PangoCoverage *