summaryrefslogtreecommitdiff
path: root/trunk/modules/indic/indic-ot-class-tables.c
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/modules/indic/indic-ot-class-tables.c')
-rw-r--r--trunk/modules/indic/indic-ot-class-tables.c522
1 files changed, 522 insertions, 0 deletions
diff --git a/trunk/modules/indic/indic-ot-class-tables.c b/trunk/modules/indic/indic-ot-class-tables.c
new file mode 100644
index 00000000..963dcfa5
--- /dev/null
+++ b/trunk/modules/indic/indic-ot-class-tables.c
@@ -0,0 +1,522 @@
+/* Pango
+ * indic-ot-class-tables.c:
+ *
+ * Copyright (C) 2001, 2002 IBM Corporation. All Rights Reserved.
+ * Author: Eric Mader <mader@jtcsv.com>
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, and/or sell copies of the
+ * Software, and to permit persons to whom the Software is furnished
+ * to do so, provided that the above copyright notice(s) and this
+ * permission notice appear in all copies of the Software and that
+ * both the above copyright notice(s) and this permission notice
+ * appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR
+ * ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder
+ * shall not be used in advertising or otherwise to promote the sale,
+ * use or other dealings in this Software without prior written
+ * authorization of the copyright holder.
+ */
+
+#include <config.h>
+
+#include "indic-ot.h"
+
+
+/*
+ * Split matra table indices
+ */
+#define _x1 (1 << CF_INDEX_SHIFT)
+#define _x2 (2 << CF_INDEX_SHIFT)
+#define _x3 (3 << CF_INDEX_SHIFT)
+#define _x4 (4 << CF_INDEX_SHIFT)
+#define _x5 (5 << CF_INDEX_SHIFT)
+#define _x6 (6 << CF_INDEX_SHIFT)
+#define _x7 (7 << CF_INDEX_SHIFT)
+#define _x8 (8 << CF_INDEX_SHIFT)
+#define _x9 (9 << CF_INDEX_SHIFT)
+
+/*
+ * Simple classes
+ */
+#define _xx (CC_RESERVED)
+#define _ma (CC_MODIFYING_MARK_ABOVE)
+#define _mp (CC_MODIFYING_MARK_POST)
+#define _iv (CC_INDEPENDENT_VOWEL)
+#define _ct (CC_CONSONANT | CF_CONSONANT)
+#define _cn (CC_CONSONANT_WITH_NUKTA | CF_CONSONANT)
+#define _nu (CC_NUKTA)
+#define _dv (CC_DEPENDENT_VOWEL)
+#define _dl (_dv | CF_MATRA_PRE)
+#define _db (_dv | CF_MATRA_BELOW)
+#define _da (_dv | CF_MATRA_ABOVE)
+#define _dr (_dv | CF_MATRA_POST)
+#define _lm (_dv | CF_LENGTH_MARK)
+#define _vr (CC_VIRAMA)
+#define _al (CC_AL_LAKUNA)
+
+/*
+ * Split matras
+ */
+#define _s1 (_dv | _x1)
+#define _s2 (_dv | _x2)
+#define _s3 (_dv | _x3)
+#define _s4 (_dv | _x4)
+#define _s5 (_dv | _x5)
+#define _s6 (_dv | _x6)
+#define _s7 (_dv | _x7)
+#define _s8 (_dv | _x8)
+#define _s9 (_dv | _x9)
+
+/*
+ * consonants with special forms
+ * NOTE: this assumes that no consonants with nukta have
+ * special forms... (Bengali RA?)
+ */
+#define _bb (_ct | CF_BELOW_BASE)
+#define _pb (_ct | CF_POST_BASE)
+#define _vt (_bb | CF_VATTU)
+#define _rv (_vt | CF_REPH)
+#define _rp (_pb | CF_REPH)
+#define _rb (_bb | CF_REPH)
+
+
+/*
+ * Character class tables
+ */
+static const IndicOTCharClass devaCharClasses[] =
+{
+ _xx, _ma, _ma, _mp, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, /* 0900 - 090F */
+ _iv, _iv, _iv, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, /* 0910 - 091F */
+ _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _cn, _ct, _ct, _ct, _ct, _ct, _ct, /* 0920 - 092F */
+ _rv, _cn, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _nu, _xx, _dr, _dl, /* 0930 - 093F */
+ _dr, _db, _db, _db, _db, _da, _da, _da, _da, _dr, _dr, _dr, _dr, _vr, _xx, _xx, /* 0940 - 094F */
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _cn, _cn, _cn, _cn, _cn, _cn, _cn, _cn, /* 0950 - 095F */
+ _iv, _iv, _db, _db, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0960 - 096F */
+ _xx /* 0970 */
+};
+
+/* As a hack, BENGALI LETTER A (U+0985) and BENGALI LETTER E (U+098F)
+ * are marked as consonants below; this gives approximately the
+ * right behavior for the sequences "a halant ya aa" and
+ * "e halant ya aa".
+ */
+static const IndicOTCharClass bengCharClasses[] =
+{
+ _xx, _ma, _mp, _mp, _xx, _ct, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _ct, /* 0980 - 098F */
+ _iv, _xx, _xx, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, /* 0990 - 099F */
+ _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _bb, _ct, _ct, _pb, /* 09A0 - 09AF */
+ _rv, _xx, _ct, _xx, _xx, _xx, _ct, _ct, _ct, _ct, _xx, _xx, _nu, _xx, _dr, _dl, /* 09B0 - 09BF */
+ _dr, _db, _db, _db, _db, _xx, _xx, _dl, _dl, _xx, _xx, _s1, _s2, _vr, _xx, _xx, /* 09C0 - 09CF */
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _dr, _xx, _xx, _xx, _xx, _cn, _cn, _xx, _cn, /* 09D0 - 09DF */
+ _iv, _iv, _dv, _dv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 09E0 - 09EF */
+ _rv, _ct, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx /* 09F0 - 09FA */
+};
+
+static const IndicOTCharClass guruCharClasses[] =
+{
+ _xx, _ma, _ma, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _xx, _xx, _iv, /* 0A00 - 0A0F */
+ _iv, _xx, _xx, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, /* 0A10 - 0A1F */
+ _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _bb, /* 0A20 - 0A2F */
+ _vt, _xx, _ct, _cn, _xx, _bb, _cn, _xx, _ct, _bb, _xx, _xx, _nu, _xx, _dr, _dl, /* 0A30 - 0A3F */
+ _dr, _db, _db, _xx, _xx, _xx, _xx, _da, _da, _xx, _xx, _da, _da, _vr, _xx, _xx, /* 0A40 - 0A4F */
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _cn, _cn, _cn, _ct, _xx, _cn, _xx, /* 0A50 - 0A5F */
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0A60 - 0A6F */
+ _ma, _ma, _ct, _ct, _xx /* 0A70 - 0A74 */
+};
+
+static const IndicOTCharClass gujrCharClasses[] =
+{
+ _xx, _ma, _ma, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _iv, _xx, _iv, /* 0A80 - 0A8F */
+ _iv, _iv, _xx, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, /* 0A90 - 0A9F */
+ _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _ct, /* 0AA0 - 0AAF */
+ _rv, _xx, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _nu, _xx, _dr, _dl, /* 0AB0 - 0ABF */
+ _dr, _db, _db, _db, _db, _da, _xx, _da, _da, _dr, _xx, _dr, _dr, _vr, _xx, _xx, /* 0AC0 - 0ACF */
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0AD0 - 0ADF */
+ _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx /* 0AE0 - 0AEF */
+};
+
+static const IndicOTCharClass oryaCharClasses[] =
+{
+ _xx, _ma, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _iv, /* 0B00 - 0B0F */
+ _iv, _xx, _xx, _iv, _iv, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _ct, _bb, /* 0B10 - 0B1F */
+ _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _pb, /* 0B20 - 0B2F */
+ _rb, _xx, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _xx, _xx, _nu, _xx, _dr, _da, /* 0B30 - 0B3F */
+ _dr, _db, _db, _db, _xx, _xx, _xx, _dl, _s1, _xx, _xx, _s2, _s3, _vr, _xx, _xx, /* 0B40 - 0B4F */
+ _xx, _xx, _xx, _xx, _xx, _xx, _da, _dr, _xx, _xx, _xx, _xx, _cn, _cn, _xx, _pb, /* 0B50 - 0B5F */
+ _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0B60 - 0B6F */
+ _xx, _bb /* 0B70 - 0B71 */
+};
+
+static const IndicOTCharClass tamlCharClasses[] =
+{
+ _xx, _xx, _ma, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _xx, _iv, _iv, /* 0B80 - 0B8F */
+ _iv, _xx, _iv, _iv, _iv, _ct, _xx, _xx, _xx, _ct, _ct, _xx, _ct, _xx, _ct, _ct, /* 0B90 - 0B9F */
+ _xx, _xx, _xx, _ct, _ct, _xx, _xx, _xx, _ct, _ct, _ct, _xx, _xx, _xx, _ct, _ct, /* 0BA0 - 0BAF */
+ _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _ct, _xx, _xx, _xx, _xx, _dr, _dr, /* 0BB0 - 0BBF */
+ _da, _dr, _dr, _xx, _xx, _xx, _dl, _dl, _dl, _xx, _s1, _s2, _s3, _vr, _xx, _xx, /* 0BC0 - 0BCF */
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _dr, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0BD0 - 0BDF */
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0BE0 - 0BEF */
+ _xx, _xx, _xx /* 0BF0 - 0BF2 */
+};
+
+/* FIXME: Should some of the bb's be pb's? (KA, NA, MA, YA, VA, etc. (approx 13)) */
+static const IndicOTCharClass teluCharClasses[] =
+{
+ _xx, _mp, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _iv, _iv, /* 0C00 - 0C0F */
+ _iv, _xx, _iv, _iv, _iv, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, /* 0C10 - 0C1F */
+ _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _bb, /* 0C20 - 0C2F */
+ _bb, _bb, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _xx, _xx, _xx, _xx, _da, _da, /* 0C30 - 0C3F */
+ _da, _dr, _dr, _dr, _dr, _xx, _da, _da, _s1, _xx, _da, _da, _da, _vr, _xx, _xx, /* 0C40 - 0C4F */
+ _xx, _xx, _xx, _xx, _xx, _da, _db, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0C50 - 0C5F */
+ _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx /* 0C60 - 0C6F */
+};
+
+/* U+CC3 and U+CC4 are _lm here not _dr since the Kannada rendering
+ * rules want them below and to the right of the entire cluster. They
+ * aren't, strictly speaking, length marks, however.
+ *
+ * There's some information about this in:
+ *
+ * http://brahmi.sourceforge.net/docs/KannadaComputing.html
+ */
+static const IndicOTCharClass kndaCharClasses[] =
+{
+ _xx, _xx, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _iv, /* 0C80 - 0C8F */
+ _iv, _xx, _iv, _iv, _iv, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, /* 0C90 - 0C9F */
+ _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _bb, /* 0CA0 - 0CAF */
+ _rb, _ct, _bb, _bb, _xx, _bb, _bb, _bb, _bb, _bb, _xx, _xx, _xx, _xx, _dr, _da, /* 0CB0 - 0CBF */
+ _s1, _dr, _dr, _lm, _lm, _xx, _da, _s2, _s3, _xx, _s4, _s5, _da, _vr, _xx, _xx, /* 0CC0 - 0CCF */
+ _xx, _xx, _xx, _xx, _xx, _lm, _lm, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _ct, _xx, /* 0CD0 - 0CDF */
+ _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx /* 0CE0 - 0CEF */
+};
+
+/*
+ * FIXME: this is correct for old-style Malayalam (MAL) but not for reformed Malayalam (MLR)
+ * FIXME: should there be a REPH for old-style Malayalam?
+ */
+static const IndicOTCharClass mlymCharClasses[] =
+{
+ _xx, _xx, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _iv, _iv, /* 0D00 - 0D0F */
+ _iv, _xx, _iv, _iv, _iv, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, /* 0D10 - 0D1F */
+ _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _pb, /* 0D20 - 0D2F */
+ _cn, _cn, _ct, _ct, _ct, _pb, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _xx, _dr, _dr, /* 0D30 - 0D3F */
+ _dr, _dr, _dr, _dr, _xx, _xx, _dl, _dl, _dl, _xx, _s1, _s2, _s3, _vr, _xx, _xx, /* 0D40 - 0D4F */
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _dr, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0D50 - 0D5F */
+ _iv, _iv, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx /* 0D60 - 0D6F */
+};
+
+static const IndicOTCharClass sinhCharClasses[] =
+{
+ _xx, _xx, _mp, _mp, _xx, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, _iv, /* 0D80 - 0D8F */
+ _iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _xx, _ct, _ct, _ct, _ct, _ct, _ct, /* 0D90 - 0D9F */
+ _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, /* 0DA0 - 0DAF */
+ _ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _xx, _xx, /* 0DB0 - 0DBF */
+ _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _al, _xx, _xx, _xx, _xx, _dr, /* 0DC0 - 0DCF */
+ _dr, _dr, _da, _da, _db, _xx, _db, _xx, _dr, _dl, _s1, _dl, _s2, _s3, _s4, _dr, /* 0DD0 - 0DDF */
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, /* 0DE0 - 0DEF */
+ _xx, _xx, _dr, _dr, _xx /* 0DF0 - 0DF4 */
+};
+
+
+/*
+ * Split matra tables
+ */
+static const IndicOTSplitMatra bengSplitTable[] = {{0x09C7, 0x09BE}, {0x09C7, 0x09D7}};
+
+static const IndicOTSplitMatra oryaSplitTable[] = {{0x0B47, 0x0B56}, {0x0B47, 0x0B3E}, {0x0B47, 0x0B57}};
+
+static const IndicOTSplitMatra tamlSplitTable[] = {{0x0BC6, 0x0BBE}, {0x0BC7, 0x0BBE}, {0x0BC6, 0x0BD7}};
+
+static const IndicOTSplitMatra teluSplitTable[] = {{0x0C46, 0x0C56}};
+
+static const IndicOTSplitMatra kndaSplitTable[] = {{0x0CBF, 0x0CD5}, {0x0CC6, 0x0CD5}, {0x0CC6, 0x0CD6}, {0x0CC6, 0x0CC2},
+ {0x0CC6, 0x0CC2, 0x0CD5}};
+
+static const IndicOTSplitMatra mlymSplitTable[] = {{0x0D46, 0x0D3E}, {0x0D47, 0x0D3E}, {0x0D46, 0x0D57}};
+
+static const IndicOTSplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF}, {0x0DD9, 0x0DCF, 0x0DCA},
+ {0x0DD9, 0x0DDF} };
+
+
+/*
+ * Script Flags
+ */
+
+/*
+ * FIXME: post 'GSUB' reordering of MATRA_PRE's for Malayalam and Tamil
+ * FIXME: reformed Malayalam needs to reorder VATTU to before base glyph...
+ * FIXME: eyelash RA only for Devanagari??
+ */
+#define DEVA_SCRIPT_FLAGS (SF_EYELASH_RA | SF_NO_POST_BASE_LIMIT)
+#define BENG_SCRIPT_FLAGS (SF_REPH_AFTER_BELOW | SF_NO_POST_BASE_LIMIT)
+#define GURU_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT)
+#define GUJR_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT)
+#define ORYA_SCRIPT_FLAGS (SF_REPH_AFTER_BELOW | SF_NO_POST_BASE_LIMIT)
+#define TAML_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT)
+#define TELU_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | 3)
+#define KNDA_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | 3)
+#define MLYM_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT )
+#define SINH_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT | SF_PROCESS_ZWJ)
+
+/*
+ * Indic Class Tables
+ */
+/* Add a little macro to compute lastChar based on size of the charClasses * table */
+#define INDIC_OT_CLASS_TABLE_DEFINE(name, firstChar, worstCaseExpansion, scriptFlags, charClasses, splitMatraTable) \
+ const IndicOTClassTable name = {firstChar, firstChar + G_N_ELEMENTS (charClasses) - 1, \
+ worstCaseExpansion, scriptFlags, charClasses, splitMatraTable}
+INDIC_OT_CLASS_TABLE_DEFINE (deva_class_table, 0x0900, 2, DEVA_SCRIPT_FLAGS, devaCharClasses, NULL);
+INDIC_OT_CLASS_TABLE_DEFINE (beng_class_table, 0x0980, 3, BENG_SCRIPT_FLAGS, bengCharClasses, bengSplitTable);
+INDIC_OT_CLASS_TABLE_DEFINE (guru_class_table, 0x0A00, 2, GURU_SCRIPT_FLAGS, guruCharClasses, NULL);
+INDIC_OT_CLASS_TABLE_DEFINE (gujr_class_table, 0x0A80, 2, GUJR_SCRIPT_FLAGS, gujrCharClasses, NULL);
+INDIC_OT_CLASS_TABLE_DEFINE (orya_class_table, 0x0B00, 3, ORYA_SCRIPT_FLAGS, oryaCharClasses, oryaSplitTable);
+INDIC_OT_CLASS_TABLE_DEFINE (taml_class_table, 0x0B80, 3, TAML_SCRIPT_FLAGS, tamlCharClasses, tamlSplitTable);
+INDIC_OT_CLASS_TABLE_DEFINE (telu_class_table, 0x0C00, 3, TELU_SCRIPT_FLAGS, teluCharClasses, teluSplitTable);
+INDIC_OT_CLASS_TABLE_DEFINE (knda_class_table, 0x0C80, 4, KNDA_SCRIPT_FLAGS, kndaCharClasses, kndaSplitTable);
+INDIC_OT_CLASS_TABLE_DEFINE (mlym_class_table, 0x0D00, 3, MLYM_SCRIPT_FLAGS, mlymCharClasses, mlymSplitTable);
+INDIC_OT_CLASS_TABLE_DEFINE (sinh_class_table, 0x0D80, 4, SINH_SCRIPT_FLAGS, sinhCharClasses, sinhSplitTable);
+
+const IndicOTSplitMatra *indic_ot_get_split_matra(const IndicOTClassTable *class_table, IndicOTCharClass char_class)
+{
+ gint32 index = (char_class & CF_INDEX_MASK) >> CF_INDEX_SHIFT;
+
+ return &class_table->splitMatraTable[index - 1];
+}
+
+gboolean indic_ot_is_vm_above(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_VM_ABOVE(char_class);
+}
+
+gboolean indic_ot_is_vm_post(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_VM_POST(char_class);
+}
+
+gboolean indic_ot_is_consonant(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_CONSONANT(char_class);
+}
+
+gboolean indic_ot_is_reph(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_REPH(char_class);
+}
+
+gboolean indic_ot_is_virama(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return (IS_VIRAMA(char_class) || IS_AL_LAKUNA(char_class));
+}
+
+gboolean indic_ot_is_al_lakuna(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_AL_LAKUNA(char_class);
+}
+
+gboolean indic_ot_is_nukta(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_NUKTA(char_class);
+}
+
+gboolean indic_ot_is_vattu(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_VATTU(char_class);
+}
+
+gboolean indic_ot_is_matra(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_MATRA(char_class);
+}
+
+gboolean indic_ot_is_split_matra(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_SPLIT_MATRA(char_class);
+}
+
+gboolean indic_ot_is_m_pre(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_M_PRE(char_class);
+}
+
+gboolean indic_ot_is_m_below(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_M_BELOW(char_class);
+}
+
+gboolean indic_ot_is_m_above(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_M_ABOVE(char_class);
+}
+
+gboolean indic_ot_is_m_post(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_M_POST(char_class);
+}
+
+gboolean indic_ot_is_length_mark(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return IS_LENGTH_MARK(char_class);
+}
+
+gboolean indic_ot_has_post_or_below_base_form(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return HAS_POST_OR_BELOW_BASE_FORM(char_class);
+}
+
+gboolean indic_ot_has_post_base_form(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return HAS_POST_BASE_FORM(char_class);
+}
+
+gboolean indic_ot_has_below_base_form(const IndicOTClassTable *class_table, gunichar ch)
+{
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, ch);
+
+ return HAS_BELOW_BASE_FORM(char_class);
+}
+
+IndicOTCharClass indic_ot_get_char_class(const IndicOTClassTable *class_table, gunichar ch)
+{
+ if (ch == C_SIGN_ZWJ) {
+ return CF_CONSONANT | CC_ZERO_WIDTH_MARK;
+ }
+
+ if (ch == C_SIGN_ZWNJ) {
+ return CC_ZERO_WIDTH_MARK;
+ }
+
+ if (ch < class_table->firstChar || ch > class_table->lastChar) {
+ return CC_RESERVED;
+ }
+
+ return class_table->charClasses[ch - class_table->firstChar];
+}
+
+static const gint8 stateTable[][CC_COUNT] =
+{
+/* xx ma mp iv ct cn nu dv vr zw al */
+ { 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, 6, 1, -1, -1, -1, -1, 5, 4, -1, -1},
+ {-1, 6, 1, -1, -1, -1, 2, 5, 4, 10, 9},
+ {-1, -1, -1, -1, 3, 2, -1, -1, -1, 8, -1},
+ {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, 7, 1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, 8, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, 8, -1, 8}
+
+};
+
+glong indic_ot_find_syllable(const IndicOTClassTable *class_table, const gunichar *chars, glong prev, glong char_count)
+{
+ glong cursor = prev;
+ gint8 state = 0;
+
+ while (cursor < char_count) {
+ IndicOTCharClass char_class = indic_ot_get_char_class(class_table, chars[cursor]);
+
+ state = stateTable[state][char_class & CF_CLASS_MASK];
+
+ /*for the components of split matra*/
+ if ((char_count >= cursor + 3) &&
+ (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCF && chars[cursor + 2] == 0x0DCA)) { /*for 3 split matra of Sinhala*/
+ return cursor + 3;
+ }
+ else if ((char_count >= cursor + 3) &&
+ (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CC2 && chars[cursor + 2] == 0x0CD5)) { /*for 3 split matra of Kannada*/
+ return cursor + 3;
+ }
+ /*for 2 split matra*/
+ else if (char_count >= cursor + 2) {
+ /*for Bengali*/
+ if ((chars[cursor] == 0x09C7 && chars[cursor + 1] == 0x09BE) ||
+ (chars[cursor] == 0x09C7 && chars[cursor + 1] == 0x09D7) ||
+ /*for Oriya*/
+ (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B3E) ||
+ (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B56) ||
+ (chars[cursor] == 0x0B47 && chars[cursor + 1] == 0x0B57) ||
+ /*for Tamil*/
+ (chars[cursor] == 0x0BC6 && chars[cursor + 1] == 0x0BBE) ||
+ (chars[cursor] == 0x0BC6 && chars[cursor + 1] == 0x0BD7) ||
+ (chars[cursor] == 0x0BC7 && chars[cursor + 1] == 0x0BBE) ||
+ /*for Malayalam*/
+ (chars[cursor] == 0x0D46 && chars[cursor + 1] == 0x0D3E) ||
+ (chars[cursor] == 0x0D46 && chars[cursor + 1] == 0x0D57) ||
+ (chars[cursor] == 0x0D47 && chars[cursor + 1] == 0x0D3E) ||
+ /*for Sinhala*/
+ (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCA) ||
+ (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DCF) ||
+ (chars[cursor] == 0x0DD9 && chars[cursor + 1] == 0x0DDF) ||
+ (chars[cursor] == 0x0DDC && chars[cursor + 1] == 0x0DCA) ||
+ /*for Telugu*/
+ (chars[cursor] == 0x0C46 && chars[cursor + 1] == 0x0C56) ||
+ /*for Kannada*/
+ (chars[cursor] == 0x0CBF && chars[cursor + 1] == 0x0CD5) ||
+ (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CD5) ||
+ (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CD6) ||
+ (chars[cursor] == 0x0CC6 && chars[cursor + 1] == 0x0CC2) ||
+ (chars[cursor] == 0x0CCA && chars[cursor + 1] == 0x0CD5))
+ return cursor + 2;
+ }
+
+ if (state < 0) {
+ break;
+ }
+
+ cursor += 1;
+ }
+
+ return cursor;
+}
+