From 93a115e37bfb86f6d10d2d61249bee1007ed3839 Mon Sep 17 00:00:00 2001 From: Owen Taylor Date: Thu, 3 Mar 2005 23:07:12 +0000 Subject: Split out handling of sinhala al-lakuna character from handling of Virama 2005-03-03 Owen Taylor * modules/indic/indic-ot.[ch] modules/indic-ot-class-tables.c: Split out handling of sinhala al-lakuna character from handling of Virama in the state table to avoid implicit formation of conjucts for Sinhala. (Patch from Harshula, ##161981) * modules/indic/indic-fc.c modules/indic/indic-ot.h: Add a new script flag SF_PROCESS_ZWJ indicating whether zero width characters should be passed to gsub/gpos. * modules/indic/indic-ot-class-tables.c: Set SF_PROCESS_ZWJ for Sinhala. (#161981, Harshula) --- ChangeLog | 16 ++++++++++++++++ ChangeLog.pre-1-10 | 16 ++++++++++++++++ modules/indic/indic-fc.c | 9 ++++++--- modules/indic/indic-ot-class-tables.c | 35 ++++++++++++++++++++++------------- modules/indic/indic-ot.c | 21 +++++++++++---------- modules/indic/indic-ot.h | 7 ++++++- 6 files changed, 77 insertions(+), 27 deletions(-) diff --git a/ChangeLog b/ChangeLog index dffaf079..6f87f63d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +2005-03-03 Owen Taylor + + * modules/indic/indic-ot.[ch] modules/indic-ot-class-tables.c: + Split out handling of sinhala al-lakuna character from + handling of Virama in the state table to avoid implicit + formation of conjucts for Sinhala. (Patch from + Harshula, ##161981) + + * modules/indic/indic-fc.c modules/indic/indic-ot.h: + Add a new script flag SF_PROCESS_ZWJ indicating + whether zero width characters should be passed to + gsub/gpos. + + * modules/indic/indic-ot-class-tables.c: Set SF_PROCESS_ZWJ + for Sinhala. (#161981, Harshula) + 2005-03-03 Owen Taylor * pango/pango-fontmap.c (pango_font_map_load_font): Add diff --git a/ChangeLog.pre-1-10 b/ChangeLog.pre-1-10 index dffaf079..6f87f63d 100644 --- a/ChangeLog.pre-1-10 +++ b/ChangeLog.pre-1-10 @@ -1,3 +1,19 @@ +2005-03-03 Owen Taylor + + * modules/indic/indic-ot.[ch] modules/indic-ot-class-tables.c: + Split out handling of sinhala al-lakuna character from + handling of Virama in the state table to avoid implicit + formation of conjucts for Sinhala. (Patch from + Harshula, ##161981) + + * modules/indic/indic-fc.c modules/indic/indic-ot.h: + Add a new script flag SF_PROCESS_ZWJ indicating + whether zero width characters should be passed to + gsub/gpos. + + * modules/indic/indic-ot-class-tables.c: Set SF_PROCESS_ZWJ + for Sinhala. (#161981, Harshula) + 2005-03-03 Owen Taylor * pango/pango-fontmap.c (pango_font_map_load_font): Add diff --git a/modules/indic/indic-fc.c b/modules/indic/indic-fc.c index 788bc64f..b9816a21 100644 --- a/modules/indic/indic-fc.c +++ b/modules/indic/indic-fc.c @@ -250,7 +250,7 @@ get_gpos_ruleset (FT_Face face, PangoIndicInfo *indic_info) } static void -set_glyphs (PangoFont *font, FT_Face face, const gunichar *wcs, gulong *tags, glong n_glyphs, PangoOTBuffer *buffer) +set_glyphs (PangoFont *font, FT_Face face, const gunichar *wcs, gulong *tags, glong n_glyphs, PangoOTBuffer *buffer, gboolean process_zwj) { gint i; @@ -260,7 +260,8 @@ set_glyphs (PangoFont *font, FT_Face face, const gunichar *wcs, gulong *tags, gl { guint glyph; - if (ZERO_WIDTH_CHAR (wcs[i])) + if (ZERO_WIDTH_CHAR (wcs[i]) && + (!process_zwj || wcs[i] != 0x200D)) glyph = 0; else glyph = FT_Get_Char_Index (face, wcs[i]); @@ -347,7 +348,9 @@ indic_engine_shape (PangoEngineShape *engine, pango_glyph_string_set_size (glyphs, n_glyphs); buffer = pango_ot_buffer_new (fc_font); - set_glyphs(font, face, wc_out, tags, n_glyphs, buffer); + + set_glyphs(font, face, wc_out, tags, n_glyphs, buffer, + (indic_info->classTable->scriptFlags & SF_PROCESS_ZWJ) != 0); /* do gsub processing */ gsub_ruleset = get_gsub_ruleset (face, indic_info); diff --git a/modules/indic/indic-ot-class-tables.c b/modules/indic/indic-ot-class-tables.c index 85fac646..4f9a74b5 100644 --- a/modules/indic/indic-ot-class-tables.c +++ b/modules/indic/indic-ot-class-tables.c @@ -40,6 +40,7 @@ enum _dr = _dv | CF_MATRA_POST, _lm = _dv | CF_LENGTH_MARK, _vr = CC_VIRAMA, + _al = CC_AL_LAKUNA, /* * Split matras @@ -198,7 +199,7 @@ static IndicOTCharClass sinhCharClasses[] = _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _xx, _ct, _ct, _ct, _ct, _ct, _ct, /* 0D90 - 0D9F */ _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, /* 0DA0 - 0DAF */ _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _xx, _xx, /* 0DB0 - 0DBF */ - _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _vr, _xx, _xx, _xx, _xx, _dr, /* 0DC0 - 0DCF */ + _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _al, _xx, _xx, _xx, _xx, _dr, /* 0DC0 - 0DCF */ _dr, _dr, _da, _da, _db, _xx, _db, _xx, _dr, _dl, _s1, _dl, _s2, _s3, _s4, _dr, /* 0DD0 - 0DDF */ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0DE0 - 0DEF */ _xx, _xx, _dr, _dr, _xx /* 0DF0 - 0DF4 */ @@ -243,7 +244,7 @@ static const IndicOTSplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x #define TELU_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | 3) #define KNDA_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | 3) #define MLYM_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT) -#define SINH_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT) +#define SINH_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT | SF_PROCESS_ZWJ) /* * Indic Class Tables @@ -307,7 +308,14 @@ gboolean indic_ot_is_virama(const IndicOTClassTable *class_table, gunichar ch) { IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); - return IS_VIRAMA(char_class); + return (IS_VIRAMA(char_class) || IS_AL_LAKUNA(char_class)); +} + +gboolean indic_ot_is_al_lakuna(const IndicOTClassTable *class_table, gunichar ch) +{ + IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch); + + return IS_AL_LAKUNA(char_class); } gboolean indic_ot_is_nukta(const IndicOTClassTable *class_table, gunichar ch) @@ -413,16 +421,17 @@ IndicOTCharClass indic_ot_get_char_class(const IndicOTClassTable *class_table, g static const gint8 stateTable[][CC_COUNT] = { -/* xx ma mp iv ct cn nu dv vr zw */ - { 1, 1, 1, 5, 3, 2, 1, 1, 1, 1}, - {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, - {-1, 6, 1, -1, -1, -1, -1, 5, 4, -1}, - {-1, 6, 1, -1, -1, -1, 2, 5, 4, -1}, - {-1, -1, -1, -1, 3, 2, -1, -1, -1, 8}, - {-1, 6, 1, -1, -1, -1, -1, 5, -1, -1}, - {-1, 7, 1, -1, -1, -1, -1, -1, -1, -1}, - {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1}, - {-1, -1, -1, -1, 3, 2, -1, -1, -1, -1} +/* xx ma mp iv ct cn nu dv vr zw al */ + { 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, 6, 1, -1, -1, -1, -1, 5, 4, -1, -1}, + {-1, 6, 1, -1, -1, -1, 2, 5, 4, -1, 9}, + {-1, -1, -1, -1, 3, 2, -1, -1, -1, 8, -1}, + {-1, 6, 1, -1, -1, -1, -1, 5, -1, -1, -1}, + {-1, 7, 1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, 8, -1} }; diff --git a/modules/indic/indic-ot.c b/modules/indic/indic-ot.c index 159cecdc..6b484f3b 100644 --- a/modules/indic/indic-ot.c +++ b/modules/indic/indic-ot.c @@ -29,7 +29,7 @@ struct _Output gunichar fMabove; gunichar fMpost; gunichar fLengthMark; - gunichar fVirama; /* to handle virama in sinhala split matras */ + gunichar fAlLakuna; /* to handle Al-Lakuna in sinhala split matras */ glong fMatraIndex; gulong fMatraTags; gboolean fMatraWordStart; @@ -51,7 +51,7 @@ static void initOutput(Output *output, const glong *originalOffsets, gunichar *o output->fOutIndex = 0; output->fMatraTags = 0; - output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fVirama = 0; + output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fAlLakuna = 0; output->fMPreOutIndex = -1; output->fMPreFixups = mpreFixups; @@ -70,14 +70,14 @@ static void saveMatra(Output *output, gunichar matra, IndicOTCharClass matraClas output->fMpost = matra; } else if (IS_LENGTH_MARK(matraClass)) { output->fLengthMark = matra; - } else if (IS_VIRAMA(matraClass)) { - output->fVirama = matra; + } else if (IS_AL_LAKUNA(matraClass)) { + output->fAlLakuna = matra; } } static void initMatra(Output *output, guint32 matraIndex, gulong matraTags, gboolean wordStart) { - output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fVirama = 0; + output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fAlLakuna = 0; output->fMPreOutIndex = -1; output->fMatraIndex = matraIndex; output->fMatraTags = matraTags; @@ -165,10 +165,10 @@ static void writeLengthMark(Output *output) } } -static void writeVirama(Output *output) +static void writeAlLakuna(Output *output) { - if (output->fVirama != 0) { - writeChar(output, output->fVirama, output->fMatraIndex, output->fMatraTags); + if (output->fAlLakuna != 0) { + writeChar(output, output->fAlLakuna, output->fMatraIndex, output->fMatraTags); } } @@ -229,6 +229,7 @@ glong indic_ot_reorder(const gunichar *chars, const glong *utf8_offsets, glong c case CC_MODIFYING_MARK_POST: case CC_NUKTA: case CC_VIRAMA: + case CC_AL_LAKUNA: writeChar(&output, C_DOTTED_CIRCLE, prev, blwf_p); writeChar(&output, chars[prev], prev, blwf_p); break; @@ -240,7 +241,7 @@ glong indic_ot_reorder(const gunichar *chars, const glong *utf8_offsets, glong c writeMabove(&output); writeMpost(&output); writeLengthMark(&output); - writeVirama(&output); + writeAlLakuna(&output); break; case CC_CONSONANT: @@ -425,7 +426,7 @@ glong indic_ot_reorder(const gunichar *chars, const glong *utf8_offsets, glong c } writeLengthMark(&output); - writeVirama(&output); + writeAlLakuna(&output); /* write reph */ if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { diff --git a/modules/indic/indic-ot.h b/modules/indic/indic-ot.h index d6b498b3..47ec64e4 100644 --- a/modules/indic/indic-ot.h +++ b/modules/indic/indic-ot.h @@ -47,7 +47,8 @@ typedef enum CC_DEPENDENT_VOWEL = 7, CC_VIRAMA = 8, CC_ZERO_WIDTH_MARK = 9, - CC_COUNT = 10 + CC_AL_LAKUNA = 10, + CC_COUNT = 11 } IndicOTCharClassValues; /* @@ -89,6 +90,7 @@ typedef enum SF_REPH_AFTER_BELOW = 0x40000000, SF_EYELASH_RA = 0x20000000, SF_MPRE_FIXUP = 0x10000000, + SF_PROCESS_ZWJ = 0x08000000, SF_POST_BASE_LIMIT_MASK = 0x0000FFFF, SF_NO_POST_BASE_LIMIT = 0x00007FFF @@ -153,6 +155,8 @@ enum indic_glyph_property_ #define IS_VIRAMA(charClass) ((charClass & CF_CLASS_MASK) == CC_VIRAMA) +#define IS_AL_LAKUNA(charClass) ((charClass & CF_CLASS_MASK) == CC_AL_LAKUNA) + #define IS_VATTU(charClass) ((charClass & CF_VATTU) != 0) #define IS_MATRA(charClass) ((charClass & CF_CLASS_MASK) == CC_DEPENDENT_VOWEL) @@ -227,6 +231,7 @@ gboolean indic_ot_is_vm_post(const IndicOTClassTable *class_table, gunichar ch); gboolean indic_ot_is_consonant(const IndicOTClassTable *class_table, gunichar ch); gboolean indic_ot_is_reph(const IndicOTClassTable *class_table, gunichar ch); gboolean indic_ot_is_virama(const IndicOTClassTable *class_table, gunichar ch); +gboolean indic_ot_is_al_lakuna(const IndicOTClassTable *class_table, gunichar ch); gboolean indic_ot_is_nukta(const IndicOTClassTable *class_table, gunichar ch); gboolean indic_ot_is_vattu(const IndicOTClassTable *class_table, gunichar ch); gboolean indic_ot_is_matra(const IndicOTClassTable *class_table, gunichar ch); -- cgit v1.2.1