/* Pango * indic-ot.c: * * Copyright (C) 2001, 2002 IBM Corporation. All Rights Reserved. * Author: Eric Mader * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, and/or sell copies of the * Software, and to permit persons to whom the Software is furnished * to do so, provided that the above copyright notice(s) and this * permission notice appear in all copies of the Software and that * both the above copyright notice(s) and this permission notice * appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR * ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder * shall not be used in advertising or otherwise to promote the sale, * use or other dealings in this Software without prior written * authorization of the copyright holder. */ #include #include "indic-ot.h" #include "mprefixups.h" /* * FIXME: should the IndicOutput stuff be moved * to a separate .h and .c file just to keep the * clutter down here? (it's not really usefull * anyplace else, is it?) */ struct _Output { glong fOutIndex; const glong *fOriginalOffsets; gunichar *fOutChars; glong *fCharIndices; gulong *fCharTags; gunichar fMpre; gunichar fMbelow; gunichar fMabove; gunichar fMpost; gunichar fLengthMark; gunichar fAlLakuna; /* to handle Al-Lakuna in sinhala split matras */ glong fMatraIndex; gulong fMatraTags; gboolean fMatraWordStart; glong fMPreOutIndex; MPreFixups *fMPreFixups; }; typedef struct _Output Output; static void initOutput(Output *output, const glong *originalOffsets, gunichar *outChars, glong *charIndices, gulong *charTags, MPreFixups *mpreFixups) { output->fOriginalOffsets = originalOffsets; output->fOutChars = outChars; output->fCharIndices = charIndices; output->fCharTags = charTags; output->fOutIndex = 0; output->fMatraTags = 0; output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fAlLakuna = 0; output->fMPreOutIndex = -1; output->fMPreFixups = mpreFixups; } static void saveMatra(Output *output, gunichar matra, IndicOTCharClass matraClass) { /* FIXME: check if already set, or if not a matra... */ if (IS_M_PRE(matraClass)) { output->fMpre = matra; } else if (IS_M_BELOW(matraClass)) { output->fMbelow = matra; } else if (IS_M_ABOVE(matraClass)) { output->fMabove = matra; } else if (IS_M_POST(matraClass)) { output->fMpost = matra; } else if (IS_LENGTH_MARK(matraClass)) { output->fLengthMark = matra; } else if (IS_AL_LAKUNA(matraClass)) { output->fAlLakuna = matra; } } static void initMatra(Output *output, guint32 matraIndex, gulong matraTags, gboolean wordStart) { output->fMpre = output->fMbelow = output->fMabove = output->fMpost = output->fLengthMark = output->fAlLakuna = 0; output->fMPreOutIndex = -1; output->fMatraIndex = matraIndex; output->fMatraTags = matraTags; output->fMatraWordStart = wordStart; } static gboolean noteMatra(Output *output, const IndicOTClassTable *classTable, gunichar matra) { IndicOTCharClass matraClass = indic_ot_get_char_class(classTable, matra); if (IS_MATRA(matraClass)) { if (IS_SPLIT_MATRA(matraClass)) { const IndicOTSplitMatra *splitMatra = indic_ot_get_split_matra(classTable, matraClass); int i; for (i = 0; i < 3 && (*splitMatra)[i] != 0; i += 1) { gunichar piece = (*splitMatra)[i]; IndicOTCharClass pieceClass = indic_ot_get_char_class(classTable, piece); saveMatra(output, piece, pieceClass); } } else { saveMatra(output, matra, matraClass); } return TRUE; } else return FALSE; } static void noteBaseConsonant(Output *output) { if (output->fMPreFixups && output->fMPreOutIndex >= 0) { indic_mprefixups_add(output->fMPreFixups, output->fOutIndex, output->fMPreOutIndex); } } static void swapChars(Output *output, int a, int b) { if (output->fOutChars != NULL) { gunichar temp_char; guint32 temp_index; gulong temp_tag; temp_char = output->fOutChars[output->fOutIndex + b]; temp_index = output->fCharIndices[output->fOutIndex + b]; temp_tag = output->fCharTags[output->fOutIndex + b]; output->fOutChars[output->fOutIndex + b] = output->fOutChars[output->fOutIndex + a]; output->fCharIndices[output->fOutIndex + b] = output->fCharIndices[output->fOutIndex + a]; output->fCharTags[output->fOutIndex + b] = pstf_p; output->fOutChars[output->fOutIndex + a] = temp_char; output->fCharIndices[output->fOutIndex + a] = temp_index; output->fCharTags[output->fOutIndex + a] = temp_tag; } } static void writeChar(Output *output, gunichar ch, guint32 charIndex, gulong charTags) { if (output->fOutChars != NULL) { output->fOutChars[output->fOutIndex] = ch; output->fCharIndices[output->fOutIndex] = output->fOriginalOffsets[charIndex]; output->fCharTags[output->fOutIndex] = charTags; } output->fOutIndex += 1; } static void writeMpre(Output *output) { if (output->fMpre != 0) { gulong tags = output->fMatraTags; if (output->fMatraWordStart) tags &= ~init; output->fMPreOutIndex = output->fOutIndex; writeChar(output, output->fMpre, output->fMatraIndex, tags); } } static void writeMbelow(Output *output) { if (output->fMbelow != 0) { writeChar(output, output->fMbelow, output->fMatraIndex, output->fMatraTags); } } static void writeMabove(Output *output) { if (output->fMabove != 0) { writeChar(output, output->fMabove, output->fMatraIndex, output->fMatraTags); } } static void writeMpost(Output *output) { if (output->fMpost != 0) { writeChar(output, output->fMpost, output->fMatraIndex, output->fMatraTags); } } static void writeLengthMark(Output *output) { if (output->fLengthMark != 0) { writeChar(output, output->fLengthMark, output->fMatraIndex, output->fMatraTags); } } static void writeAlLakuna(Output *output) { if (output->fAlLakuna != 0) { writeChar(output, output->fAlLakuna, output->fMatraIndex, output->fMatraTags); } } static glong getOutputIndex(Output *output) { return output->fOutIndex; } #define false 0 #define true 1 glong indic_ot_reorder(const gunichar *chars, const glong *utf8_offsets, glong char_count, const IndicOTClassTable *class_table, gunichar *out_chars, glong *char_indices, gulong *char_tags, MPreFixups **outMPreFixups) { MPreFixups *mpreFixups = NULL; Output output; glong i, prev = 0; gboolean last_in_word = FALSE; if (outMPreFixups && (class_table->scriptFlags & SF_MPRE_FIXUP)) { mpreFixups = indic_mprefixups_new (char_count); } initOutput(&output, utf8_offsets, out_chars, char_indices, char_tags, mpreFixups); while (prev < char_count) { glong syllable = indic_ot_find_syllable(class_table, chars, prev, char_count); glong matra, vmabove, vmpost = syllable; while (vmpost > prev && indic_ot_is_vm_post(class_table, chars[vmpost - 1])) { vmpost -= 1; } vmabove = vmpost; while (vmabove > prev && indic_ot_is_vm_above(class_table, chars[vmabove - 1])) { vmabove -= 1; } matra = vmabove - 1; initMatra(&output, prev, blwf_p, !last_in_word); while (noteMatra(&output, class_table, chars[matra]) && matra != prev) matra--; last_in_word = TRUE; switch (indic_ot_get_char_class(class_table, chars[prev]) & CF_CLASS_MASK) { case CC_RESERVED: last_in_word = FALSE; /* Fall through */ case CC_INDEPENDENT_VOWEL: case CC_ZERO_WIDTH_MARK: for (i = prev; i < syllable; i += 1) { writeChar(&output, chars[i], /*i*/ prev, blwf_p); } break; case CC_MODIFYING_MARK_ABOVE: case CC_MODIFYING_MARK_POST: case CC_NUKTA: case CC_VIRAMA: case CC_AL_LAKUNA: writeChar(&output, C_DOTTED_CIRCLE, prev, blwf_p); writeChar(&output, chars[prev], prev, blwf_p); break; case CC_DEPENDENT_VOWEL: writeMpre(&output); writeChar(&output, C_DOTTED_CIRCLE, prev, blwf_p); writeMbelow(&output); writeMabove(&output); writeMpost(&output); writeLengthMark(&output); writeAlLakuna(&output); break; case CC_CONSONANT: case CC_CONSONANT_WITH_NUKTA: { guint32 length = vmabove - prev; glong lastConsonant = vmabove - 1; glong baseLimit = prev; glong baseConsonant, postBase, postBaseLimit; gboolean seenVattu, seenBelowBaseForm, supressVattu; glong bcSpan; /* Check for REPH at front of syllable */ if (length > 2 && indic_ot_is_reph(class_table, chars[prev]) && indic_ot_is_virama(class_table, chars[prev + 1])) { baseLimit += 2; /* Check for eyelash RA, if the script supports it */ if ((class_table->scriptFlags & SF_EYELASH_RA) != 0 && chars[baseLimit] == C_SIGN_ZWJ) { if (length > 3) { baseLimit += 1; } else { baseLimit -= 2; } } } while (lastConsonant > baseLimit && !indic_ot_is_consonant(class_table, chars[lastConsonant])) { lastConsonant -= 1; } baseConsonant = lastConsonant; postBase = lastConsonant + 1; postBaseLimit = class_table->scriptFlags & SF_POST_BASE_LIMIT_MASK; seenVattu = false; seenBelowBaseForm = false; supressVattu = true; while (baseConsonant > baseLimit) { IndicOTCharClass charClass = indic_ot_get_char_class(class_table, chars[baseConsonant]); if (IS_CONSONANT(charClass)) { if (postBaseLimit == 0 || seenVattu || (baseConsonant > baseLimit && !indic_ot_is_virama(class_table, chars[baseConsonant - 1])) || !HAS_POST_OR_BELOW_BASE_FORM(charClass)) { break; } seenVattu = IS_VATTU(charClass); if (HAS_POST_BASE_FORM(charClass)) { if (seenBelowBaseForm) { break; } postBase = baseConsonant; } else if (HAS_BELOW_BASE_FORM(charClass)) { seenBelowBaseForm = true; } postBaseLimit -= 1; } baseConsonant -= 1; } /* Write Mpre */ writeMpre(&output); /* Write eyelash RA */ /* NOTE: baseLimit == prev + 3 iff eyelash RA present... */ if (baseLimit == prev + 3) { writeChar(&output, chars[prev], prev, half_p); writeChar(&output, chars[prev + 1], prev /*+ 1*/, half_p); writeChar(&output, chars[prev + 2], prev /*+ 2*/, half_p); } /* write any pre-base consonants */ supressVattu = true; for (i = baseLimit; i < baseConsonant; i += 1) { gunichar ch = chars[i]; /* Applying blwf to the first consonant doesn't makes sense * since the below-form follows the consonant that it is * put under */ gulong tag = (i == baseLimit) ? half_p : blwf_p; IndicOTCharClass charClass = indic_ot_get_char_class(class_table, ch); if (IS_CONSONANT(charClass)) { if (IS_VATTU(charClass) && supressVattu) { tag = nukt_p; } else if ((i + 2 < baseConsonant) && (chars[i + 2] == C_SIGN_ZWNJ)) { tag = nukt_p; } supressVattu = IS_VATTU(charClass); } else if (IS_VIRAMA(charClass) && chars[i + 1] == C_SIGN_ZWNJ) { tag = nukt_p; } writeChar(&output, ch, /*i*/ prev, tag); } bcSpan = baseConsonant + 1; if (bcSpan < vmabove && indic_ot_is_nukta(class_table, chars[bcSpan])) { bcSpan += 1; } if (baseConsonant == lastConsonant && bcSpan < vmabove && indic_ot_is_virama(class_table, chars[bcSpan])) { bcSpan += 1; if (bcSpan < vmabove && chars[bcSpan] == C_SIGN_ZWNJ) { bcSpan += 1; } } /* note the base consonant for post-GSUB fixups */ noteBaseConsonant(&output); /* write base consonant */ for (i = baseConsonant; i < bcSpan; i += 1) { writeChar(&output, chars[i], /*i*/ prev, nukt_p); } /* for the special conjuction of Cons+0x0d4d+0x0d31 or Cons+0x0d4d+0x0d30 of Malayalam */ if ((baseConsonant - 2 >= 0) && (chars[baseConsonant - 1] == 0x0d4d) && ((chars[baseConsonant] == 0x0d31) || (chars[baseConsonant] == 0x0d30)) && ((chars[baseConsonant - 2] >= 0x0d15) && (chars[baseConsonant - 2] <= 0x0d39))) { swapChars (&output, -1, -3); if (mpreFixups) { if (mpreFixups->fFixupCount > 0) { mpreFixups->fFixupCount--; } } } if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { gboolean is_for_0C48 = FALSE; if (output.fOutChars != NULL) { /*for 0x0C48 of Telugu*/ int t; for (t = prev; t < syllable; t++) { if (chars[t] == 0x0C48) { writeMabove(&output); writeMbelow(&output); writeMpost(&output); is_for_0C48 = TRUE; break; } } } if (!is_for_0C48) { writeMbelow(&output); writeMabove(&output); writeMpost(&output); } } /* write below-base consonants */ if (baseConsonant != lastConsonant) { for (i = bcSpan + 1; i < postBase; i += 1) { writeChar(&output, chars[i], /*i*/ prev, blwf_p); } if (postBase > lastConsonant) { /* write halant that was after base consonant */ writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p); } } /* write Mbelow, Mabove */ if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { writeMbelow(&output); writeMabove(&output); } if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { if (baseLimit == prev + 2) { writeChar(&output, chars[prev], prev, rphf_p); writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p); } /* write VMabove */ for (i = vmabove; i < vmpost; i += 1) { writeChar(&output, chars[i], /*i*/ prev, blwf_p); } } /* write post-base consonants */ if (baseConsonant != lastConsonant) { if (postBase <= lastConsonant) { for (i = postBase; i <= lastConsonant; i += 1) { writeChar(&output, chars[i], /*i*/ prev, pstf_p); } /* write halant that was after base consonant */ writeChar(&output, chars[bcSpan], /*bcSpan*/ prev, blwf_p); } /* write the training halant, if there is one */ if (lastConsonant < matra && indic_ot_is_virama(class_table, chars[matra])) { writeChar(&output, chars[matra], /*matra*/ prev, nukt_p); } } /* write Mpost */ if ((class_table->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { writeMpost(&output); } writeLengthMark(&output); writeAlLakuna(&output); /* write reph */ if ((class_table->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { if (baseLimit == prev + 2) { writeChar(&output, chars[prev], prev, rphf_p); writeChar(&output, chars[prev + 1], prev /*+ 1*/, rphf_p); } /* write VMabove */ for (i = vmabove; i < vmpost; i += 1) { writeChar(&output, chars[i], /*i*/ prev, blwf_p); } } /* write VMpost */ for (i = vmpost; i < syllable; i += 1) { writeChar(&output, chars[i], /*i*/ prev, blwf_p); } break; } default: break; } prev = syllable; } if (outMPreFixups) { *outMPreFixups = mpreFixups; } return getOutputIndex(&output); }