diff options
Diffstat (limited to 'trunk/modules/indic/indic-ot.c')
-rw-r--r-- | trunk/modules/indic/indic-ot.c | 535 |
1 files changed, 535 insertions, 0 deletions
diff --git a/trunk/modules/indic/indic-ot.c b/trunk/modules/indic/indic-ot.c new file mode 100644 index 00000000..ca3ca7f0 --- /dev/null +++ b/trunk/modules/indic/indic-ot.c @@ -0,0 +1,535 @@ +/* Pango + * indic-ot.c: + * + * Copyright (C) 2001, 2002 IBM Corporation. All Rights Reserved. + * Author: Eric Mader <mader@jtcsv.com> + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, and/or sell copies of the + * Software, and to permit persons to whom the Software is furnished + * to do so, provided that the above copyright notice(s) and this + * permission notice appear in all copies of the Software and that + * both the above copyright notice(s) and this permission notice + * appear in supporting documentation. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR + * ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY + * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder + * shall not be used in advertising or otherwise to promote the sale, + * use or other dealings in this Software without prior written + * authorization of the copyright holder. + */ + +#include <config.h> + +#include "indic-ot.h" +#include "mprefixups.h" +/* + * FIXME: should the IndicOutput stuff be moved + * to a separate .h and .c file just to keep the + * clutter down here? (it's not really usefull + * anyplace else, is it?) + */ +struct _Output +{ + glong fOutIndex; + + const glong *fOriginalOffsets; + + gunichar *fOutChars; + glong *fCharIndices; + gulong *fCharTags; + + gunichar fMpre; + gunichar fMbelow; + gunichar fMabove; + gunichar fMpost; + gunichar fLengthMark; + gunichar fAlLakuna; /* to handle Al-Lakuna in sinhala split matras */ + glong fMatraIndex; + gulong fMatraTags; + gboolean fMatraWordStart; + glong fMPreOutIndex; + + MPreFixups *fMPreFixups; +}; + +typedef struct _Output Output; + +static void initOutput(Output *output, const glong *originalOffsets, gunichar *outChars, glong *charIndices, gulong *charTags, MPreFixups *mpreFixups) +{ + output->fOriginalOffsets = originalOffsets; + + output->fOutChars = outChars; + output->fCharIndices = charIndices; + output->fCharTags = charTags; + + output->fOutIndex = 0; + output->fMatraTags = 0; + + output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fAlLakuna = 0; + + output->fMPreOutIndex = -1; + output->fMPreFixups = mpreFixups; +} + +static void saveMatra(Output *output, gunichar matra, IndicOTCharClass matraClass) +{ + /* FIXME: check if already set, or if not a matra... */ + if (IS_M_PRE(matraClass)) { + output->fMpre = matra; + } else if (IS_M_BELOW(matraClass)) { + output->fMbelow = matra; + } else if (IS_M_ABOVE(matraClass)) { + output->fMabove = matra; + } else if (IS_M_POST(matraClass)) { + output->fMpost = matra; + } else if (IS_LENGTH_MARK(matraClass)) { + output->fLengthMark = matra; + } else if (IS_AL_LAKUNA(matraClass)) { + output->fAlLakuna = matra; + } +} + +static void initMatra(Output *output, guint32 matraIndex, gulong matraTags, gboolean wordStart) +{ + output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fAlLakuna = 0; + output->fMPreOutIndex = -1; + output->fMatraIndex = matraIndex; + output->fMatraTags = matraTags; + output->fMatraWordStart = wordStart; +} + +static gboolean noteMatra(Output *output, const IndicOTClassTable *classTable, gunichar matra) +{ + IndicOTCharClass matraClass = indic_ot_get_char_class(classTable, matra); + + if (IS_MATRA(matraClass)) { + if (IS_SPLIT_MATRA(matraClass)) { + const IndicOTSplitMatra *splitMatra = indic_ot_get_split_matra(classTable, matraClass); + int i; + + for (i = 0; i < 3 && (*splitMatra)[i] != 0; i += 1) { + gunichar piece = (*splitMatra)[i]; + IndicOTCharClass pieceClass = indic_ot_get_char_class(classTable, piece); + + saveMatra(output, piece, pieceClass); + } + } else { + saveMatra(output, matra, matraClass); + } + + return TRUE; + } else + return FALSE; +} + +static void noteBaseConsonant(Output *output) +{ + if (output->fMPreFixups && output->fMPreOutIndex >= 0) { + indic_mprefixups_add(output->fMPreFixups, output->fOutIndex, output->fMPreOutIndex); + } +} + +static void swapChars(Output *output, int a, int b) +{ + if (output->fOutChars != NULL) { + gunichar temp_char; + guint32 temp_index; + gulong temp_tag; + + temp_char = output->fOutChars[output->fOutIndex + b]; + temp_index = output->fCharIndices[output->fOutIndex + b]; + temp_tag = output->fCharTags[output->fOutIndex + b]; + + output->fOutChars[output->fOutIndex + b] = output->fOutChars[output->fOutIndex + a]; + output->fCharIndices[output->fOutIndex + b] = output->fCharIndices[output->fOutIndex + a]; + output->fCharTags[output->fOutIndex + b] = pstf_p; + + output->fOutChars[output->fOutIndex + a] = temp_char; + output->fCharIndices[output->fOutIndex + a] = temp_index; + output->fCharTags[output->fOutIndex + a] = temp_tag; + } +} + +static void writeChar(Output *output, gunichar ch, guint32 charIndex, gulong charTags) +{ + if (output->fOutChars != NULL) { + output->fOutChars[output->fOutIndex] = ch; + output->fCharIndices[output->fOutIndex] = output->fOriginalOffsets[charIndex]; + output->fCharTags[output->fOutIndex] = charTags; + } + + output->fOutIndex += 1; +} + +static void writeMpre(Output *output) +{ + if (output->fMpre != 0) { + gulong tags = output->fMatraTags; + if (output->fMatraWordStart) + tags &= ~init; + + output->fMPreOutIndex = output->fOutIndex; + writeChar(output, output->fMpre, output->fMatraIndex, tags); + } +} + +static void writeMbelow(Output *output) +{ + if (output->fMbelow != 0) { + writeChar(output, output->fMbelow, output->fMatraIndex, output->fMatraTags); + } +} + +static void writeMabove(Output *output) +{ + if (output->fMabove != 0) { + writeChar(output, output->fMabove, output->fMatraIndex, output->fMatraTags); + } +} + +static void writeMpost(Output *output) +{ + if (output->fMpost != 0) { + writeChar(output, output->fMpost, output->fMatraIndex, output->fMatraTags); + } +} + +static void writeLengthMark(Output *output) +{ + if (output->fLengthMark != 0) { + writeChar(output, output->fLengthMark, output->fMatraIndex, output->fMatraTags); + } +} + +static void writeAlLakuna(Output *output) +{ + if (output->fAlLakuna != 0) { + writeChar(output, output->fAlLakuna, output->fMatraIndex, output->fMatraTags); + } +} + +static glong getOutputIndex(Output *output) +{ + return output->fOutIndex; +} + +#define false 0 +#define true 1 + +glong indic_ot_reorder(const gunichar *chars, const glong *utf8_offsets, glong char_count, const IndicOTClassTable *class_table, gunichar *out_chars, glong *char_indices, gulong *char_tags, MPreFixups **outMPreFixups) +{ + MPreFixups *mpreFixups = NULL; + Output output; + glong i, prev = 0; + gboolean last_in_word = FALSE; + + if (outMPreFixups && (class_table->scriptFlags & SF_MPRE_FIXUP)) { + mpreFixups = indic_mprefixups_new (char_count); + } + + initOutput(&output, utf8_offsets, out_chars, char_indices, char_tags, mpreFixups); + + while (prev < char_count) { + glong syllable = indic_ot_find_syllable(class_table, chars, prev, char_count); + glong matra, vmabove, vmpost = syllable; + + while (vmpost > prev && indic_ot_is_vm_post(class_table, chars[vmpost - 1])) { + vmpost -= 1; + } + + vmabove = vmpost; + while (vmabove > prev && indic_ot_is_vm_above(class_table, chars[vmabove - 1])) { + vmabove -= 1; + } + + matra = vmabove - 1; + initMatra(&output, prev, blwf_p, !last_in_word); + while (noteMatra(&output, class_table, chars[matra]) && + matra != prev) + matra--; + + last_in_word = TRUE; + switch (indic_ot_get_char_class(class_table, chars[prev]) & CF_CLASS_MASK) { + case CC_RESERVED: + last_in_word = FALSE; + /* Fall through */ + case CC_INDEPENDENT_VOWEL: + case CC_ZERO_WIDTH_MARK: + for (i = prev; i < syllable; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, blwf_p); + } + + break; + + case CC_MODIFYING_MARK_ABOVE: + case CC_MODIFYING_MARK_POST: + case CC_NUKTA: + case CC_VIRAMA: + case CC_AL_LAKUNA: + writeChar(&output, C_DOTTED_CIRCLE, prev, blwf_p); + writeChar(&output, chars[prev], prev, blwf_p); + break; + + case CC_DEPENDENT_VOWEL: + writeMpre(&output); + writeChar(&output, C_DOTTED_CIRCLE, prev, blwf_p); + writeMbelow(&output); + writeMabove(&output); + writeMpost(&output); + writeLengthMark(&output); + writeAlLakuna(&output); + break; + + case CC_CONSONANT: + case CC_CONSONANT_WITH_NUKTA: + { + guint32 length = vmabove - prev; + glong lastConsonant = vmabove - 1; + glong baseLimit = prev; + glong baseConsonant, postBase, postBaseLimit; + gboolean seenVattu, seenBelowBaseForm, supressVattu; + glong bcSpan; + + /* Check for REPH at front of syllable */ + if (length > 2 && indic_ot_is_reph(class_table, chars[prev]) && indic_ot_is_virama(class_table, chars[prev + 1])) { + baseLimit += 2; + + /* Check for eyelash RA, if the script supports it */ + if ((class_table->scriptFlags & SF_EYELASH_RA) != 0 && + chars[baseLimit] == C_SIGN_ZWJ) { + if (length > 3) { + baseLimit += 1; + } else { + baseLimit -= 2; + } + } + } + + while (lastConsonant > baseLimit && !indic_ot_is_consonant(class_table, chars[lastConsonant])) { + lastConsonant -= 1; + } + + baseConsonant = lastConsonant; + postBase = lastConsonant + 1; + + postBaseLimit = class_table->scriptFlags & SF_POST_BASE_LIMIT_MASK; + seenVattu = false; + seenBelowBaseForm = false; + supressVattu = true; + + while (baseConsonant > baseLimit) { + IndicOTCharClass charClass = indic_ot_get_char_class(class_table, chars[baseConsonant]); + + if (IS_CONSONANT(charClass)) { + if (postBaseLimit == 0 || seenVattu || + (baseConsonant > baseLimit && !indic_ot_is_virama(class_table, chars[baseConsonant - 1])) || + !HAS_POST_OR_BELOW_BASE_FORM(charClass)) { + break; + } + + seenVattu = IS_VATTU(charClass); + + if (HAS_POST_BASE_FORM(charClass)) { + if (seenBelowBaseForm) { + break; + } + + postBase = baseConsonant; + } else if (HAS_BELOW_BASE_FORM(charClass)) { + seenBelowBaseForm = true; + } + + postBaseLimit -= 1; + } + + baseConsonant -= 1; + } + + /* Write Mpre */ + writeMpre(&output); + + /* Write eyelash RA */ + /* NOTE: baseLimit == prev + 3 iff eyelash RA present... */ + if (baseLimit == prev + 3) { + writeChar(&output, chars[prev], prev, half_p); + writeChar(&output, chars[prev + 1], prev /*+ 1*/, half_p); + writeChar(&output, chars[prev + 2], prev /*+ 2*/, half_p); + } + + /* write any pre-base consonants */ + supressVattu = true; + + for (i = baseLimit; i < baseConsonant; i += 1) { + gunichar ch = chars[i]; + /* Applying blwf to the first consonant doesn't makes sense + * since the below-form follows the consonant that it is + * put under */ + gulong tag = (i == baseLimit) ? half_p : blwf_p; + IndicOTCharClass charClass = indic_ot_get_char_class(class_table, ch); + + if (IS_CONSONANT(charClass)) { + if (IS_VATTU(charClass) && supressVattu) { + tag = nukt_p; + } + else if ((i + 2 < baseConsonant) && (chars[i + 2] == C_SIGN_ZWNJ)) { + tag = nukt_p; + } + + supressVattu = IS_VATTU(charClass); + } else if (IS_VIRAMA(charClass) && chars[i + 1] == C_SIGN_ZWNJ) + { + tag = nukt_p; + } + + writeChar(&output, ch, /*i*/ prev, tag); + } + + bcSpan = baseConsonant + 1; + + if (bcSpan < vmabove && indic_ot_is_nukta(class_table, chars[bcSpan])) { + bcSpan += 1; + } + + if (baseConsonant == lastConsonant && bcSpan < vmabove && indic_ot_is_virama(class_table, chars[bcSpan])) { + bcSpan += 1; + + if (bcSpan < vmabove && chars[bcSpan] == C_SIGN_ZWNJ) { + bcSpan += 1; + } + } + + /* note the base consonant for post-GSUB fixups */ + noteBaseConsonant(&output); + + /* write base consonant */ + for (i = baseConsonant; i < bcSpan; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, nukt_p); + } + + if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { + gboolean is_for_0C48 = FALSE; + if (output.fOutChars != NULL) { /*for 0x0C48 of Telugu*/ + int t; + for (t = prev; t < syllable; t++) { + if (chars[t] == 0x0C48) { + writeMabove(&output); + writeMbelow(&output); + writeMpost(&output); + + is_for_0C48 = TRUE; + break; + } + } + } + + if (!is_for_0C48) { + writeMbelow(&output); + writeMabove(&output); + writeMpost(&output); + } + } + + /* write below-base consonants */ + if (baseConsonant != lastConsonant) { + for (i = bcSpan + 1; i < postBase; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, blwf_p); + } + + if (postBase > lastConsonant) { + /* write halant that was after base consonant */ + writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p); + } + } + + /* write Mbelow, Mabove */ + if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { + writeMbelow(&output); + writeMabove(&output); + } + + if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { + if (baseLimit == prev + 2) { + writeChar(&output, chars[prev], prev, rphf_p); + writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p); + } + + /* write VMabove */ + for (i = vmabove; i < vmpost; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, blwf_p); + } + } + + /* write post-base consonants */ + if (baseConsonant != lastConsonant) { + if (postBase <= lastConsonant) { + for (i = postBase; i <= lastConsonant; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, pstf_p); + } + + /* write halant that was after base consonant */ + writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p); + } + + /* write the training halant, if there is one */ + if (lastConsonant < matra && indic_ot_is_virama(class_table, chars[matra])) { + writeChar(&output, chars[matra], /*matra*/ prev, nukt_p); + } + } + + /* write Mpost */ + if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { + writeMpost(&output); + } + + writeLengthMark(&output); + writeAlLakuna(&output); + + /* write reph */ + if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { + if (baseLimit == prev + 2) { + writeChar(&output, chars[prev], prev, rphf_p); + writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p); + } + + /* write VMabove */ + for (i = vmabove; i < vmpost; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, blwf_p); + } + } + + /* write VMpost */ + for (i = vmpost; i < syllable; i += 1) { + writeChar(&output, chars[i], /*i*/ prev, blwf_p); + } + + break; + } + + default: + break; + } + + + prev = syllable; + } + + if (outMPreFixups) { + *outMPreFixups = mpreFixups; + } + + return getOutputIndex(&output); +} |