From 1705254b8e07f9ca96fff7132bda44d70df908d9 Mon Sep 17 00:00:00 2001 From: Owen Taylor Date: Fri, 25 Jul 2003 15:10:44 +0000 Subject: Port of some fixes from ICU. Fri Jul 25 11:06:14 2003 Owen Taylor * modules/indic/indic-ot.[ch]: Port of some fixes from ICU. "Fixed problems w/ backing off the front of a syllable if all consonants have post-base form... Fix tag list for base consonant so that it doesn't have 'pstf'"" Based on patch from Taneem Ahmed (#118297) --- ChangeLog | 11 ++ ChangeLog.pre-1-10 | 11 ++ ChangeLog.pre-1-4 | 11 ++ ChangeLog.pre-1-6 | 11 ++ ChangeLog.pre-1-8 | 11 ++ modules/indic/indic-ot.c | 345 +++++++++++++++++++++++------------------------ modules/indic/indic-ot.h | 2 +- 7 files changed, 226 insertions(+), 176 deletions(-) diff --git a/ChangeLog b/ChangeLog index 75849141..a2bf4ff9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Fri Jul 25 11:06:14 2003 Owen Taylor + + * modules/indic/indic-ot.[ch]: Port of some fixes from + ICU. + + "Fixed problems w/ backing off the front of a syllable if + all consonants have post-base form... Fix tag list for base + consonant so that it doesn't have 'pstf'"" + + Based on patch from Taneem Ahmed (#118297) + 2003-07-25 Noah Levitt * docs/tmpl/engines.sgml: diff --git a/ChangeLog.pre-1-10 b/ChangeLog.pre-1-10 index 75849141..a2bf4ff9 100644 --- a/ChangeLog.pre-1-10 +++ b/ChangeLog.pre-1-10 @@ -1,3 +1,14 @@ +Fri Jul 25 11:06:14 2003 Owen Taylor + + * modules/indic/indic-ot.[ch]: Port of some fixes from + ICU. + + "Fixed problems w/ backing off the front of a syllable if + all consonants have post-base form... Fix tag list for base + consonant so that it doesn't have 'pstf'"" + + Based on patch from Taneem Ahmed (#118297) + 2003-07-25 Noah Levitt * docs/tmpl/engines.sgml: diff --git a/ChangeLog.pre-1-4 b/ChangeLog.pre-1-4 index 75849141..a2bf4ff9 100644 --- a/ChangeLog.pre-1-4 +++ b/ChangeLog.pre-1-4 @@ -1,3 +1,14 @@ +Fri Jul 25 11:06:14 2003 Owen Taylor + + * modules/indic/indic-ot.[ch]: Port of some fixes from + ICU. + + "Fixed problems w/ backing off the front of a syllable if + all consonants have post-base form... Fix tag list for base + consonant so that it doesn't have 'pstf'"" + + Based on patch from Taneem Ahmed (#118297) + 2003-07-25 Noah Levitt * docs/tmpl/engines.sgml: diff --git a/ChangeLog.pre-1-6 b/ChangeLog.pre-1-6 index 75849141..a2bf4ff9 100644 --- a/ChangeLog.pre-1-6 +++ b/ChangeLog.pre-1-6 @@ -1,3 +1,14 @@ +Fri Jul 25 11:06:14 2003 Owen Taylor + + * modules/indic/indic-ot.[ch]: Port of some fixes from + ICU. + + "Fixed problems w/ backing off the front of a syllable if + all consonants have post-base form... Fix tag list for base + consonant so that it doesn't have 'pstf'"" + + Based on patch from Taneem Ahmed (#118297) + 2003-07-25 Noah Levitt * docs/tmpl/engines.sgml: diff --git a/ChangeLog.pre-1-8 b/ChangeLog.pre-1-8 index 75849141..a2bf4ff9 100644 --- a/ChangeLog.pre-1-8 +++ b/ChangeLog.pre-1-8 @@ -1,3 +1,14 @@ +Fri Jul 25 11:06:14 2003 Owen Taylor + + * modules/indic/indic-ot.[ch]: Port of some fixes from + ICU. + + "Fixed problems w/ backing off the front of a syllable if + all consonants have post-base form... Fix tag list for base + consonant so that it doesn't have 'pstf'"" + + Based on patch from Taneem Ahmed (#118297) + 2003-07-25 Noah Levitt * docs/tmpl/engines.sgml: diff --git a/modules/indic/indic-ot.c b/modules/indic/indic-ot.c index 651cfc60..99fdff4d 100644 --- a/modules/indic/indic-ot.c +++ b/modules/indic/indic-ot.c @@ -200,200 +200,195 @@ glong indic_ot_reorder(const gunichar *chars, const glong *utf8_offsets, glong c guint32 length = vmabove - prev; glong lastConsonant = vmabove - 1; glong baseLimit = prev; - glong baseConsonant, postBase; + glong baseConsonant, postBase, postBaseLimit; + gboolean seenVattu, seenBelowBaseForm, supressVattu; + glong bcSpan; /* Check for REPH at front of syllable */ if (length > 2 && indic_ot_is_reph(class_table, chars[prev]) && indic_ot_is_virama(class_table, chars[prev + 1])) { - baseLimit = prev + 2; + baseLimit += 2; /* Check for eyelash RA, if the script supports it */ if ((class_table->scriptFlags & SF_EYELASH_RA) != 0 && - chars[prev + 2] == C_SIGN_ZWJ) { + chars[baseLimit] == C_SIGN_ZWJ) { if (length > 3) { baseLimit += 1; } else { - baseLimit = prev; + baseLimit -= 2; } } } - while (lastConsonant >= baseLimit && !indic_ot_is_consonant(class_table, chars[lastConsonant])) { + while (lastConsonant > baseLimit && !indic_ot_is_consonant(class_table, chars[lastConsonant])) { lastConsonant -= 1; } baseConsonant = lastConsonant; postBase = lastConsonant + 1; - if (lastConsonant >= prev) { - glong postBaseLimit = class_table->scriptFlags & SF_POST_BASE_LIMIT_MASK; - gboolean seenVattu = false; - gboolean seenBelowBaseForm = false; - gboolean supressVattu = true; - glong bcSpan; - - while (baseConsonant >= baseLimit) { - IndicOTCharClass charClass = indic_ot_get_char_class(class_table, chars[baseConsonant]); - - if (IS_CONSONANT(charClass)) { - if (postBaseLimit == 0 || seenVattu || - (baseConsonant > baseLimit && !indic_ot_is_virama(class_table, chars[baseConsonant - 1])) || - !HAS_POST_OR_BELOW_BASE_FORM(charClass)) { - break; - } - - seenVattu = IS_VATTU(charClass); - - if (HAS_POST_BASE_FORM(charClass)) { - if (seenBelowBaseForm) { - break; - } - - postBase = baseConsonant; - } else if (HAS_BELOW_BASE_FORM(charClass)) { - seenBelowBaseForm = true; - } - - postBaseLimit -= 1; - } - - baseConsonant -= 1; - } - - if (baseConsonant < baseLimit) { - baseConsonant = baseLimit; - } - - /* Write Mpre */ - writeMpre(&output); - - /* Write eyelash RA */ - /* NOTE: baseLimit == prev + 3 iff eyelash RA present... */ - if (baseLimit == prev + 3) { - writeChar(&output, chars[prev], prev, half_p); - writeChar(&output, chars[prev + 1], prev /*+ 1*/, half_p); - writeChar(&output, chars[prev + 2], prev /*+ 2*/, half_p); - } - - /* write any pre-base consonants */ - supressVattu = true; - - for (i = baseLimit; i < baseConsonant; i += 1) { - gunichar ch = chars[i]; - gulong tag = blwf_p; - IndicOTCharClass charClass = indic_ot_get_char_class(class_table, ch); - - if (IS_CONSONANT(charClass)) { - if (IS_VATTU(charClass) && supressVattu) { - tag = nukt_p; - } - - supressVattu = IS_VATTU(charClass); - } else if (IS_VIRAMA(charClass) && chars[i + 1] == C_SIGN_ZWNJ) - { - tag = nukt_p; - } - - writeChar(&output, ch, /*i*/ prev, tag); - } - - bcSpan = baseConsonant + 1; - - if (bcSpan < vmabove && indic_ot_is_nukta(class_table, chars[bcSpan])) { - bcSpan += 1; - } - - if (baseConsonant == lastConsonant && bcSpan < vmabove && indic_ot_is_virama(class_table, chars[bcSpan])) { - bcSpan += 1; - - if (bcSpan < vmabove && chars[bcSpan] == C_SIGN_ZWNJ) { - bcSpan += 1; - } - } - - /* write base consonant */ - for (i = baseConsonant; i < bcSpan; i += 1) { - writeChar(&output, chars[i], /*i*/ prev, nukt_p); - } - - if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { - writeMbelow(&output); - writeMabove(&output); - writeMpost(&output); - } - - /* write below-base consonants */ - if (baseConsonant != lastConsonant) { - for (i = bcSpan + 1; i < postBase; i += 1) { - writeChar(&output, chars[i], /*i*/ prev, blwf_p); - } - - if (postBase > lastConsonant) { - /* write halant that was after base consonant */ - writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p); - } - } - - /* write Mbelow, Mabove */ - if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { - writeMbelow(&output); - writeMabove(&output); - } - - if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { - if (baseLimit == prev + 2) { - writeChar(&output, chars[prev], prev, rphf_p); - writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p); - } - - /* write VMabove */ - for (i = vmabove; i < vmpost; i += 1) { - writeChar(&output, chars[i], /*i*/ prev, blwf_p); - } - } - - /* write post-base consonants */ - /* FIXME: does this put the right tags on post-base consonants? */ - if (baseConsonant != lastConsonant) { - if (postBase <= lastConsonant) { - for (i = postBase; i <= lastConsonant; i += 1) { - writeChar(&output, chars[i], /*i*/ prev, nukt_p); - } - - /* write halant that was after base consonant */ - writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p); - } - - /* write the training halant, if there is one */ - if (lastConsonant < matra && indic_ot_is_virama(class_table, chars[matra])) { - writeChar(&output, chars[matra], /*matra*/ prev, nukt_p); - } - } - - /* write Mpost */ - if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { - writeMpost(&output); - } - - writeLengthMark(&output); - - /* write reph */ - if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { - if (baseLimit == prev + 2) { - writeChar(&output, chars[prev], prev, rphf_p); - writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p); - } - - /* write VMabove */ - for (i = vmabove; i < vmpost; i += 1) { - writeChar(&output, chars[i], /*i*/ prev, blwf_p); - } - } - - /* write VMpost */ - for (i = vmpost; i < syllable; i += 1) { - writeChar(&output, chars[i], /*i*/ prev, blwf_p); - } - } + postBaseLimit = class_table->scriptFlags & SF_POST_BASE_LIMIT_MASK; + seenVattu = false; + seenBelowBaseForm = false; + supressVattu = true; + + while (baseConsonant > baseLimit) { + IndicOTCharClass charClass = indic_ot_get_char_class(class_table, chars[baseConsonant]); + + if (IS_CONSONANT(charClass)) { + if (postBaseLimit == 0 || seenVattu || + (baseConsonant > baseLimit && !indic_ot_is_virama(class_table, chars[baseConsonant - 1])) || + !HAS_POST_OR_BELOW_BASE_FORM(charClass)) { + break; + } + + seenVattu = IS_VATTU(charClass); + + if (HAS_POST_BASE_FORM(charClass)) { + if (seenBelowBaseForm) { + break; + } + + postBase = baseConsonant; + } else if (HAS_BELOW_BASE_FORM(charClass)) { + seenBelowBaseForm = true; + } + + postBaseLimit -= 1; + } + + baseConsonant -= 1; + } + + /* Write Mpre */ + writeMpre(&output); + + /* Write eyelash RA */ + /* NOTE: baseLimit == prev + 3 iff eyelash RA present... */ + if (baseLimit == prev + 3) { + writeChar(&output, chars[prev], prev, half_p); + writeChar(&output, chars[prev + 1], prev /*+ 1*/, half_p); + writeChar(&output, chars[prev + 2], prev /*+ 2*/, half_p); + } + + /* write any pre-base consonants */ + supressVattu = true; + + for (i = baseLimit; i < baseConsonant; i += 1) { + gunichar ch = chars[i]; + gulong tag = blwf_p; + IndicOTCharClass charClass = indic_ot_get_char_class(class_table, ch); + + if (IS_CONSONANT(charClass)) { + if (IS_VATTU(charClass) && supressVattu) { + tag = nukt_p; + } + + supressVattu = IS_VATTU(charClass); + } else if (IS_VIRAMA(charClass) && chars[i + 1] == C_SIGN_ZWNJ) + { + tag = nukt_p; + } + + writeChar(&output, ch, /*i*/ prev, tag); + } + + bcSpan = baseConsonant + 1; + + if (bcSpan < vmabove && indic_ot_is_nukta(class_table, chars[bcSpan])) { + bcSpan += 1; + } + + if (baseConsonant == lastConsonant && bcSpan < vmabove && indic_ot_is_virama(class_table, chars[bcSpan])) { + bcSpan += 1; + + if (bcSpan < vmabove && chars[bcSpan] == C_SIGN_ZWNJ) { + bcSpan += 1; + } + } + + /* write base consonant */ + for (i = baseConsonant; i < bcSpan; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, nukt_p); + } + + if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { + writeMbelow(&output); + writeMabove(&output); + writeMpost(&output); + } + + /* write below-base consonants */ + if (baseConsonant != lastConsonant) { + for (i = bcSpan + 1; i < postBase; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, blwf_p); + } + + if (postBase > lastConsonant) { + /* write halant that was after base consonant */ + writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p); + } + } + + /* write Mbelow, Mabove */ + if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { + writeMbelow(&output); + writeMabove(&output); + } + + if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { + if (baseLimit == prev + 2) { + writeChar(&output, chars[prev], prev, rphf_p); + writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p); + } + + /* write VMabove */ + for (i = vmabove; i < vmpost; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, blwf_p); + } + } + + /* write post-base consonants */ + /* FIXME: does this put the right tags on post-base consonants? */ + if (baseConsonant != lastConsonant) { + if (postBase <= lastConsonant) { + for (i = postBase; i <= lastConsonant; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, nukt_p); + } + + /* write halant that was after base consonant */ + writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p); + } + + /* write the training halant, if there is one */ + if (lastConsonant < matra && indic_ot_is_virama(class_table, chars[matra])) { + writeChar(&output, chars[matra], /*matra*/ prev, nukt_p); + } + } + + /* write Mpost */ + if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { + writeMpost(&output); + } + + writeLengthMark(&output); + + /* write reph */ + if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { + if (baseLimit == prev + 2) { + writeChar(&output, chars[prev], prev, rphf_p); + writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p); + } + + /* write VMabove */ + for (i = vmabove; i < vmpost; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, blwf_p); + } + } + + /* write VMpost */ + for (i = vmpost; i < syllable; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, blwf_p); + } break; } diff --git a/modules/indic/indic-ot.h b/modules/indic/indic-ot.h index 2c2614e9..2e831f9a 100644 --- a/modules/indic/indic-ot.h +++ b/modules/indic/indic-ot.h @@ -133,7 +133,7 @@ enum indic_glyph_property_ rphf_p = (junk | dist), blwf_p = (junk | dist | rphf), half_p = (junk | dist | rphf | blwf), - nukt_p = (junk | dist | rphf | blwf | half) + nukt_p = (junk | dist | rphf | blwf | half | pstf) }; /* -- cgit v1.2.1