summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOwen Taylor <otaylor@redhat.com>2005-06-21 15:58:45 +0000
committerOwen Taylor <otaylor@src.gnome.org>2005-06-21 15:58:45 +0000
commit50d0b340d03a19ae688a3807bbe22e5ae3763bd9 (patch)
treeab763b9293bd9eea51c7813802ed1691b03e696a
parente4bdbe0e4c468c50e809bf737444af04e054cd0e (diff)
downloadpango-50d0b340d03a19ae688a3807bbe22e5ae3763bd9.tar.gz
Add a Khmer module by Jens Herden and Javier Sola.
2005-06-21 Owen Taylor <otaylor@redhat.com> * modules/khmer configure.in modules/Makefile.am modules/makefile.msc: Add a Khmer module by Jens Herden and Javier Sola.
-rw-r--r--ChangeLog6
-rw-r--r--ChangeLog.pre-1-106
-rw-r--r--configure.in6
-rw-r--r--modules/Makefile.am1
-rw-r--r--modules/khmer/Makefile.am45
-rw-r--r--modules/khmer/khmer-fc.c736
-rw-r--r--modules/makefile.msc6
7 files changed, 804 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 30eaadea..f5e3abb2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2005-06-21 Owen Taylor <otaylor@redhat.com>
+
+ * modules/khmer configure.in modules/Makefile.am
+ modules/makefile.msc: Add a Khmer module by
+ Jens Herden and Javier Sola.
+
2005-06-16 Tor Lillqvist <tml@novell.com>
* configure.in: Move the check for native Win32 a bit later, as
diff --git a/ChangeLog.pre-1-10 b/ChangeLog.pre-1-10
index 30eaadea..f5e3abb2 100644
--- a/ChangeLog.pre-1-10
+++ b/ChangeLog.pre-1-10
@@ -1,3 +1,9 @@
+2005-06-21 Owen Taylor <otaylor@redhat.com>
+
+ * modules/khmer configure.in modules/Makefile.am
+ modules/makefile.msc: Add a Khmer module by
+ Jens Herden and Javier Sola.
+
2005-06-16 Tor Lillqvist <tml@novell.com>
* configure.in: Move the check for native Win32 a bit later, as
diff --git a/configure.in b/configure.in
index 06532e16..3acd4e25 100644
--- a/configure.in
+++ b/configure.in
@@ -326,11 +326,12 @@ basic_modules="basic-fc,basic-win32,basic-x"
hangul_modules="hangul-fc"
hebrew_modules="hebrew-fc"
indic_modules="indic-fc"
+khmer_modules="khmer-fc"
syriac_modules="syriac-fc"
thai_modules="thai-fc"
tibetan_modules="tibetan-fc"
-all_modules="$arabic_modules,$basic_modules,$hangul_modules,$hebrew_modules,$indic_modules,$syriac_modules,$thai_modules,$tibetan_modules"
+all_modules="$arabic_modules,$basic_modules,$hangul_modules,$hebrew_modules,$indic_modules,$khmer_modules,$syriac_modules,$thai_modules,$tibetan_modules"
included_modules=""
if test "x$with_included_modules" != xno || test "x$with_included_modules" = x ; then
@@ -385,6 +386,8 @@ AM_CONDITIONAL(INCLUDE_HEBREW_FC,echo $included_modules | egrep '(^|,)hebrew-fc(
AM_CONDITIONAL(INCLUDE_INDIC_FC,echo $included_modules | egrep '(^|,)indic-fc($|,)' > /dev/null)
+AM_CONDITIONAL(INCLUDE_KHMER_FC,echo $included_modules | grep '(^|,)khmer-fc($|,)' > /dev/null)
+
AM_CONDITIONAL(INCLUDE_SYRIAC_FC,echo $included_modules | egrep '(^|,)syriac-fc($|,)' > /dev/null)
AM_CONDITIONAL(INCLUDE_THAI_FC,echo $included_modules | egrep '(^|,)thai-fc($|,)' > /dev/null)
@@ -646,6 +649,7 @@ modules/basic/Makefile
modules/hangul/Makefile
modules/hebrew/Makefile
modules/indic/Makefile
+modules/khmer/Makefile
modules/syriac/Makefile
modules/thai/Makefile
modules/tibetan/Makefile
diff --git a/modules/Makefile.am b/modules/Makefile.am
index f1ff5d30..39cc6ff7 100644
--- a/modules/Makefile.am
+++ b/modules/Makefile.am
@@ -6,6 +6,7 @@ SUBDIRS = \
hangul \
hebrew \
indic \
+ khmer \
syriac \
thai \
tibetan
diff --git a/modules/khmer/Makefile.am b/modules/khmer/Makefile.am
new file mode 100644
index 00000000..d9608f3b
--- /dev/null
+++ b/modules/khmer/Makefile.am
@@ -0,0 +1,45 @@
+## Process this file with automake to create Makefile.in.
+
+pangolibs = $(top_builddir)/pango/libpango-$(PANGO_API_VERSION).la $(GLIB_LIBS)
+pangoft2libs = $(top_builddir)/pango/libpangoft2-$(PANGO_API_VERSION).la $(FREETYPE_LIBS) $(pangolibs)
+
+INCLUDES = \
+ -DG_LOG_DOMAIN=\"Pango\" \
+ -DPANGO_ENABLE_ENGINE \
+ -DG_DISABLE_DEPRECATED \
+ $(PANGO_DEBUG_FLAGS) \
+ -I$(top_srcdir) \
+ -I$(top_srcdir)/pango/ \
+ $(GLIB_CFLAGS)
+
+if PLATFORM_WIN32
+no_undefined = -no-undefined
+endif
+
+moduledir = $(libdir)/pango/$(PANGO_MODULE_VERSION)/modules
+module_LTLIBRARIES =
+noinst_LTLIBRARIES =
+
+
+if HAVE_FREETYPE
+INCLUDES += $(FREETYPE_CFLAGS)
+if INCLUDE_KHMER_FC
+noinst_LTLIBRARIES += libpango-khmer-fc.la
+else
+module_LTLIBRARIES += pango-khmer-fc.la
+endif
+endif
+
+fc_sources = \
+ khmer-fc.c
+
+pango_khmer_fc_la_LDFLAGS = -export-dynamic -avoid-version -module $(no_undefined)
+pango_khmer_fc_la_LIBADD = $(pangoft2libs)
+pango_khmer_fc_la_SOURCES = $(fc_sources)
+libpango_khmer_fc_la_SOURCES = $(fc_sources)
+libpango_khmer_fc_la_CFLAGS = -DPANGO_MODULE_PREFIX=_pango_khmer_fc
+
+
+included-modules: $(noinst_LTLIBRARIES)
+
+.PHONY: included-modules
diff --git a/modules/khmer/khmer-fc.c b/modules/khmer/khmer-fc.c
new file mode 100644
index 00000000..cc6c09a8
--- /dev/null
+++ b/modules/khmer/khmer-fc.c
@@ -0,0 +1,736 @@
+/* Pango
+ * khmer-fc.c: Shaper for Khmer script
+ *
+ * Copyright (C) 2004 Open Forum of Cambodia (www.forum.org.kh / www.khmeros.info)
+ * Authors: Jens Herden <jens@khmeros.info> and Javier Sola <javier@khmeros.info>
+ *
+ * Based on code from other shapers
+ * Copyright (C) 1999-2004 Red Hat Software
+ * Author: Owen Taylor <otaylor@redhat.com>
+
+ * Partially based on Indic shaper
+ * Copyright (C) 2001, 2002 IBM Corporation
+ * Author: Eric Mader <mader@jtcsv.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <string.h>
+
+
+#include "pango-engine.h"
+#include "pango-ot.h"
+#include "pango-utils.h"
+#include "pangofc-font.h"
+
+
+#define SCRIPT_ENGINE_NAME "KhmerScriptEngineFc"
+#define RENDER_TYPE PANGO_RENDER_TYPE_FC
+
+
+typedef PangoEngineShape KhmerEngineFc;
+typedef PangoEngineShapeClass KhmerEngineFcClass ;
+
+
+static PangoEngineScriptInfo khmer_scripts[] =
+{
+ { PANGO_SCRIPT_KHMER, "*" }
+};
+
+static PangoEngineInfo script_engines[] =
+{
+ {
+ SCRIPT_ENGINE_NAME,
+ PANGO_ENGINE_TYPE_SHAPE,
+ RENDER_TYPE,
+ khmer_scripts, G_N_ELEMENTS (khmer_scripts)
+ }
+};
+
+
+/* Vocabulary
+ * Base -> A consonant or an independent vowel in its full (not subscript) form. It is the
+ * center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
+ * split vowels, signs... but there is only one base in a syllable, it has to be coded as
+ * the first character of the syllable.
+ * split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
+ * Khmer language has five of them. Khmer split vowels either have one part before the
+ * base and one after the base or they have a part before the base and a part above the base.
+ * The first part of all Khmer split vowels is the same character, identical to
+ * the glyph of Khmer dependent vowel SRA EI
+ * coeng --> modifier used in Khmer to construct coeng (subscript) consonants
+ * Differently than indian languages, the coeng modifies the consonant that follows it,
+ * not the one preceding it Each consonant has two forms, the base form and the subscript form
+ * the base form is the normal one (using the consonants code-point), the subscript form is
+ * displayed when the combination coeng + consonant is encountered.
+ * Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
+ * Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
+ * Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
+ * Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
+ * if it is attached to a consonant of the first series or a consonant of the second series
+ * Most consonants have an equivalent in the other series, but some of theme exist only in
+ * one series (for example SA). If we want to use the consonant SA with a vowel sound that
+ * can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
+ * of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
+ * x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
+ * MUSIKATOAN a second series consonant to have a first series vowel sound.
+ * Consonant shifter are both normally supercript marks, but, when they are followed by a
+ * superscript, they change shape and take the form of subscript dependent vowel SRA U.
+ * If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
+ * should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
+ * be placed after the coeng consonant.
+ * Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base
+ * Each vowel has its own position. Only one vowel per syllable is allowed.
+ * Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are
+ * Allowed in a syllable.
+ *
+ *
+ * order is important here! This order must be the same that is found in each horizontal
+ * line in the statetable for Khmer (see khmerStateTable) .
+ */
+enum KhmerCharClassValues
+{
+ CC_RESERVED = 0,
+ CC_CONSONANT = 1, /* Consonant of type 1 or independent vowel */
+ CC_CONSONANT2 = 2, /* Consonant of type 2 */
+ CC_CONSONANT3 = 3, /* Consonant of type 3 */
+ CC_ZERO_WIDTH_NJ_MARK = 4, /* Zero Width non joiner character (0x200C) */
+ CC_CONSONANT_SHIFTER = 5,
+ CC_ROBAT = 6, /* Khmer special diacritic accent -treated differently in state table */
+ CC_COENG = 7, /* Subscript consonant combining character */
+ CC_DEPENDENT_VOWEL = 8,
+ CC_SIGN_ABOVE = 9,
+ CC_SIGN_AFTER = 10,
+ CC_ZERO_WIDTH_J_MARK = 11, /* Zero width joiner character */
+ CC_COUNT = 12 /* This is the number of character classes */
+};
+
+
+enum KhmerCharClassFlags
+{
+ CF_CLASS_MASK = 0x0000FFFF,
+
+ CF_CONSONANT = 0x01000000, /* flag to speed up comparing */
+ CF_SPLIT_VOWEL = 0x02000000, /* flag for a split vowel -> the first part is added in front of the syllable */
+ CF_DOTTED_CIRCLE = 0x04000000, /* add a dotted circle if a character with this flag is the first in a syllable */
+ CF_COENG = 0x08000000, /* flag to speed up comparing */
+ CF_SHIFTER = 0x10000000, /* flag to speed up comparing */
+ CF_ABOVE_VOWEL = 0x20000000, /* flag to speed up comparing */
+
+ /* position flags */
+ CF_POS_BEFORE = 0x00080000,
+ CF_POS_BELOW = 0x00040000,
+ CF_POS_ABOVE = 0x00020000,
+ CF_POS_AFTER = 0x00010000,
+ CF_POS_MASK = 0x000f0000
+};
+
+
+/* Characters that get refrered to by name */
+enum KhmerChar
+{
+ C_SIGN_ZWNJ = 0x200C,
+ C_SIGN_ZWJ = 0x200D,
+ C_DOTTED_CIRCLE = 0x25CC,
+ C_RO = 0x179A,
+ C_VOWEL_AA = 0x17B6,
+ C_SIGN_NIKAHIT = 0x17C6,
+ C_VOWEL_E = 0x17C1,
+ C_COENG = 0x17D2
+};
+
+
+enum
+{
+ /* simple classes, they are used in the state table (in this file) to control the length of a syllable
+ * they are also used to know where a character should be placed (location in reference to the base character)
+ * and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
+ * indicate error in syllable construction
+ */
+ _xx = CC_RESERVED,
+ _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
+ _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,
+ _c1 = CC_CONSONANT | CF_CONSONANT,
+ _c2 = CC_CONSONANT2 | CF_CONSONANT,
+ _c3 = CC_CONSONANT3 | CF_CONSONANT,
+ _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,
+ _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,
+ _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,
+ _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,
+ _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,
+ _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,
+ _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,
+
+ /* split vowel */
+ _va = _da | CF_SPLIT_VOWEL,
+ _vr = _dr | CF_SPLIT_VOWEL
+};
+
+
+/* Character class: a character class value
+ * ORed with character class flags.
+ */
+typedef glong KhmerCharClass;
+
+
+/* Character class tables
+ * _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
+ * _sa Sign placed above the base
+ * _sp Sign placed after the base
+ * _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
+ * _c2 Consonant of type 2 (only RO)
+ * _c3 Consonant of type 3
+ * _rb Khmer sign robat u17CC. combining mark for subscript consonants
+ * _cd Consonant-shifter
+ * _dl Dependent vowel placed before the base (left of the base)
+ * _db Dependent vowel placed below the base
+ * _da Dependent vowel placed above the base
+ * _dr Dependent vowel placed behind the base (right of the base)
+ * _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
+ * it to create a subscript consonant or independent vowel
+ * _va Khmer split vowel in wich the first part is before the base and the second one above the base
+ * _vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base
+ */
+static const KhmerCharClass khmerCharClasses[] =
+{
+ _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */
+ _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */
+ _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */
+ _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */
+ _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */
+ _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx, /* 17D0 - 17DF */
+};
+
+/* this define must reflect the range of khmerCharClasses */
+#define firstChar 0x1780
+#define lastChar 0x17df
+
+
+
+/* The stateTable is used to calculate the end (the length) of a well
+ * formed Khmer Syllable.
+ *
+ * Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
+ * CharClassValues. This coincidence of values allows the follow up of the table.
+ *
+ * Each line corresponds to a state, which does not necessarily need to be a type
+ * of component... for example, state 2 is a base, with is always a first character
+ * in the syllable, but the state could be produced a consonant of any type when
+ * it is the first character that is analysed (in ground state).
+ *
+ * Differentiating 3 types of consonants is necessary in order to
+ * forbid the use of certain combinations, such as having a second
+ * coeng after a coeng RO,
+ * The inexistent possibility of having a type 3 after another type 3 is permitted,
+ * eliminating it would very much complicate the table, and it does not create typing
+ * problems, as the case above.
+ *
+ * The table is quite complex, in order to limit the number of coeng consonants
+ * to 2 (by means of the table).
+ *
+ * There a peculiarity, as far as Unicode is concerned:
+ * - The consonant-shifter is considered in two possible different
+ * locations, the one considered in Unicode 3.0 and the one considered in
+ * Unicode 4.0. (there is a backwards compatibility problem in this standard).
+ *
+ *
+ * xx independent character, such as a number, punctuation sign or non-khmer char
+ *
+ * c1 Khmer consonant of type 1 or an independent vowel
+ * that is, a letter in which the subscript for is only under the
+ * base, not taking any space to the right or to the left
+ *
+ * c2 Khmer consonant of type 2, the coeng form takes space under
+ * and to the left of the base (only RO is of this type)
+ *
+ * c3 Khmer consonant of type 3. Its subscript form takes space under
+ * and to the right of the base.
+ *
+ * cs Khmer consonant shifter
+ *
+ * rb Khmer robat
+ *
+ * co coeng character (u17D2)
+ *
+ * dv dependent vowel (including split vowels, they are treated in the same way).
+ * even if dv is not defined above, the component that is really tested for is
+ * KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
+ *
+ * zwj Zero Width joiner
+ *
+ * zwnj Zero width non joiner
+ *
+ * sa above sign
+ *
+ * sp post sign
+ *
+ * there are lines with equal content but for an easier understanding
+ * (and maybe change in the future) we did not join them
+ */
+static const gint8 khmerStateTable[][CC_COUNT] =
+{
+/* xx c1 c2 c3 zwnj cs rb co dv sa sp zwj */
+ { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, /* 0 - ground state */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sign to the right of the syllable) */
+ {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, /* 2 - Base consonant */
+ {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, /* 3 - First ZWNJ before a register shifter
+ It can only be followed by a shifter or a vowel */
+ {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, /* 4 - First register shifter */
+ {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, /* 5 - Robat */
+ {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, /* 6 - First Coeng */
+ {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 7 - First consonant of type 1 after coeng */
+ {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, /* 8 - First consonant of type 2 after coeng */
+ {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 9 - First consonant or type 3 after ceong */
+ {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
+ {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
+ {-1, -1, 1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
+ {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 13 - Second register shifter */
+ {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
+ {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, /* 16 - dependent vowel */
+ {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, /* 17 - sign above */
+ {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
+ {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 20 - dependent vowel after a Robat */
+};
+
+
+enum property_flags
+{
+ abvf = 0x0001,
+ pref = 0x0002,
+ pstf = 0x0004,
+ blwf = 0x0008,
+
+ pres = 0x0010,
+ blws = 0x0020,
+ abvs = 0x0040,
+ psts = 0x0080,
+ clig = 0x0100,
+
+ dist = 0x0200,
+ blwm = 0x0400,
+ abvm = 0x0800,
+ mkmk = 0x1000
+};
+
+
+enum properties
+{
+ blwf_p = /*(blwf | blws | clig | dist | blwm | mkmk)*/ (abvf | pref | pstf | pres | abvs | psts | abvm),
+ pstf_p = /*(blwf | blws | pref | pres | pstf | psts | clig | dist | blwm)*/ (abvf | abvs | abvm | mkmk),
+ abvf_p = /*(abvf | abvs | clig | dist | abvm | mkmk)*/ (pref | pstf | blwf | pres | blws | psts | blwm),
+ pref_p = /*(pref | pres | clig | dist)*/ (abvf | pstf | blwf | blws | abvs | psts | blwm | abvm | mkmk),
+ default_p = /*(pres | blws | clig | dist | abvm | blwm | mkmk)*/ (pref | blwf |abvf | pstf | abvs | psts)
+};
+
+
+/* Below we define how a character in the input string is either in the khmerCharClasses table
+ * (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
+ * within the syllable, but are not in the table) we also get their type back, or an unknown object
+ * in which case we get _xx (CC_RESERVED) back
+ */
+static KhmerCharClass
+get_char_class (gunichar ch)
+{
+ if (ch == C_SIGN_ZWJ)
+ return CC_ZERO_WIDTH_J_MARK;
+
+ if (ch == C_SIGN_ZWNJ)
+ return CC_ZERO_WIDTH_NJ_MARK;
+
+ if (ch < firstChar || ch > lastChar)
+ return CC_RESERVED;
+
+ return khmerCharClasses[ch - firstChar];
+}
+
+
+/* Given an input string of characters and a location in which to start looking
+ * calculate, using the state table, which one is the last character of the syllable
+ * that starts in the starting position.
+ */
+static glong
+find_syllable (const gunichar *chars,
+ glong start,
+ glong char_count)
+{
+ glong cursor = start;
+ gint8 state = 0;
+ KhmerCharClass charClass;
+
+ while (cursor < char_count)
+ {
+ charClass = get_char_class (chars[cursor]) & CF_CLASS_MASK;
+ state = khmerStateTable[state][charClass];
+
+ if (state < 0)
+ break;
+
+ cursor += 1;
+ }
+
+ return cursor;
+}
+
+
+static void
+maybe_add_GSUB_feature (PangoOTRuleset *ruleset,
+ PangoOTInfo *info,
+ guint script_index,
+ PangoOTTag tag,
+ gulong property_bit)
+{
+ guint feature_index;
+
+ /* 0xffff == default language system */
+ if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GSUB,
+ tag, script_index, 0xffff, &feature_index))
+ pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GSUB, feature_index,
+ property_bit);
+}
+
+
+static void
+maybe_add_GPOS_feature (PangoOTRuleset *ruleset,
+ PangoOTInfo *info,
+ guint script_index,
+ PangoOTTag tag,
+ gulong property_bit)
+{
+ guint feature_index;
+
+ /* 0xffff == default language system */
+ if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GPOS,
+ tag, script_index, 0xffff, &feature_index))
+ pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GPOS, feature_index,
+ property_bit);
+}
+
+
+static PangoOTRuleset *
+get_ruleset (FT_Face face)
+{
+ PangoOTRuleset *ruleset;
+ static GQuark ruleset_quark = 0;
+
+ PangoOTInfo *info = pango_ot_info_get (face);
+
+ if (!ruleset_quark)
+ ruleset_quark = g_quark_from_string ("pango-khmer-ruleset");
+
+ if (!info)
+ return NULL;
+
+ ruleset = g_object_get_qdata (G_OBJECT (info), ruleset_quark);
+
+ if (!ruleset)
+ {
+ PangoOTTag khmer_tag = FT_MAKE_TAG ('k', 'h', 'm', 'r');
+ guint script_index;
+
+ ruleset = pango_ot_ruleset_new (info);
+
+ if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GSUB,
+ khmer_tag, &script_index))
+ {
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','f'), pref);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','f'), blwf);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','f'), abvf);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','f'), pstf);
+
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','s'), pres);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','s'), blws);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','s'), abvs);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','s'), psts);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('c','l','i','g'), clig);
+ }
+
+ if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GPOS,
+ khmer_tag, &script_index))
+ {
+ maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('d','i','s','t'), dist);
+ maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','m'), blwm);
+ maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','m'), abvm);
+ maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('m','k','m','k'), mkmk);
+ }
+
+ g_object_set_qdata_full (G_OBJECT (info), ruleset_quark, ruleset,
+ (GDestroyNotify)g_object_unref);
+ }
+
+ return ruleset;
+}
+
+
+static PangoGlyph
+get_index (PangoFcFont *fc_font, gunichar wc)
+{
+ PangoGlyph index = pango_fc_font_get_glyph (fc_font, wc);
+ if (!index)
+ index = pango_fc_font_get_unknown_glyph (fc_font, wc);
+ return index;
+}
+
+
+static void
+khmer_engine_shape (PangoEngineShape *engine,
+ PangoFont *font,
+ const char *text,
+ int length,
+ PangoAnalysis *analysis,
+ PangoGlyphString *glyphs)
+{
+ PangoFcFont *fc_font = PANGO_FC_FONT (font);
+ FT_Face face;
+ PangoOTBuffer *buffer;
+ glong n_chars, i;
+ gunichar *wcs;
+ const char *p;
+ glong syllable;
+ KhmerCharClass charClass;
+ glong cursor = 0;
+
+ face = pango_fc_font_lock_face (fc_font);
+ g_assert (face);
+
+ wcs = g_utf8_to_ucs4_fast (text, length, &n_chars);
+ p = text;
+
+ /* This loop only exits when we reach the end of a run, which may contain
+ * several syllables.
+ */
+ while (cursor < n_chars)
+ {
+ syllable = find_syllable (wcs, cursor, n_chars);
+
+ /* write a pre vowel or the pre part of a split vowel first
+ * and look out for coeng + ro. RO is the only vowel of type 2, and
+ * therefore the only one that requires saving space before the base.
+ */
+ glong coengRo = -1; /* There is no Coeng Ro, if found this value will change */
+ for (i = cursor; i < syllable; i += 1)
+ {
+ charClass = get_char_class (wcs[i]);
+
+ /* if a split vowel, write the pre part. In Khmer the pre part
+ * is the same for all split vowels, same glyph as pre vowel C_VOWEL_E
+ */
+ if (charClass & CF_SPLIT_VOWEL)
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_VOWEL_E), pref_p, p - text);
+ break; /* there can be only one vowel */
+ }
+
+ /* if a vowel with pos before write it out */
+ if (charClass & CF_POS_BEFORE)
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pref_p, p - text);
+ break; /* there can be only one vowel */
+ }
+
+ /* look for coeng + ro and remember position
+ * works because coeng + ro is always in front of a vowel (if there is a vowel)
+ * and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
+ * with this flag
+ */
+ if ((charClass & CF_COENG) && (i + 1 < syllable) &&
+ ((get_char_class (wcs[i + 1]) & CF_CLASS_MASK) == CC_CONSONANT2))
+ {
+ coengRo = i;
+ }
+ }
+
+ /* write coeng + ro if found */
+ if (coengRo > -1)
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_COENG), pref_p, p - text);
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_RO), pref_p, p - text);
+ }
+
+ /* shall we add a dotted circle?
+ * If in the position in which the base should be (first char in the string) there is
+ * a character that has the Dotted circle flag (a character that cannot be a base)
+ * then write a dotted circle
+ */
+ if (get_char_class (wcs[cursor]) & CF_DOTTED_CIRCLE)
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_DOTTED_CIRCLE), default_p, p - text);
+ }
+
+ /* copy what is left to the output, skipping before vowels and
+ * coeng Ro if they are present
+ */
+ for (i = cursor; i < syllable; i += 1)
+ {
+ charClass = get_char_class (wcs[i]);
+
+ /* skip a before vowel, it was already processed */
+ if (charClass & CF_POS_BEFORE)
+ {
+ p = g_utf8_next_char (p);
+ continue;
+ }
+
+ /* skip coeng + ro, it was already processed */
+ if (i == coengRo)
+ {
+ p = g_utf8_next_char (p);
+ i += 1;
+ p = g_utf8_next_char (p);
+ continue;
+ }
+
+ switch (charClass & CF_POS_MASK)
+ {
+ case CF_POS_ABOVE :
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), abvf_p, p - text);
+ break;
+
+ case CF_POS_AFTER :
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
+ break;
+
+ case CF_POS_BELOW :
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+ break;
+
+ default:
+ /* assign the correct flags to a coeng consonant
+ * Consonants of type 3 are taged as Post forms and those type 1 as below forms
+ */
+ if ((charClass & CF_COENG) && i + 1 < syllable)
+ {
+ if ((get_char_class (wcs[i + 1]) & CF_CLASS_MASK) == CC_CONSONANT3)
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
+ p = g_utf8_next_char (p);
+ i += 1;
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
+ break;
+ }
+ else
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+ p = g_utf8_next_char (p);
+ i += 1;
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+ break;
+ }
+ }
+
+ /* if a shifter is followed by an above vowel change the shifter to below form,
+ * an above vowel can have two possible positions i + 1 or i + 3
+ * (position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
+ * and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
+ * different positions, right after the shifter or after a vowel (Unicode 4)
+ */
+ if ((charClass & CF_SHIFTER) && (i + 1 < syllable))
+ {
+ if (get_char_class (wcs[i + 1]) & CF_ABOVE_VOWEL)
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+ break;
+ }
+ if (i + 2 < syllable &&
+ (wcs[i + 1] == C_VOWEL_AA) &&
+ (wcs[i + 2] == C_SIGN_NIKAHIT) )
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+ break;
+ }
+ if (i + 3 < syllable && (get_char_class (wcs[i + 3]) & CF_ABOVE_VOWEL) )
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+ break;
+ }
+ if (i + 4 < syllable &&
+ (wcs[i + 3] == C_VOWEL_AA) &&
+ (wcs[i + 4] == C_SIGN_NIKAHIT) )
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+ break;
+ }
+
+ }
+
+ /* default - any other characters */
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), default_p, p - text);
+ break;
+ } /* switch */
+ p = g_utf8_next_char (p);
+ } /* for */
+
+ cursor = syllable; /* move the pointer to the start of next syllable */
+ } /* while */
+
+ /* do gsub processing */
+ PangoOTRuleset *ruleset = get_ruleset (face);
+ if (ruleset != NULL)
+ {
+ pango_ot_ruleset_substitute (ruleset, buffer);
+ pango_ot_ruleset_position (ruleset, buffer);
+ }
+
+ pango_ot_buffer_output (buffer, glyphs);
+
+ g_free (wcs);
+ pango_ot_buffer_destroy (buffer);
+
+ pango_fc_font_unlock_face (fc_font);
+}
+
+
+static void
+khmer_engine_fc_class_init (PangoEngineShapeClass *class)
+{
+ class->script_shape = khmer_engine_shape;
+}
+
+PANGO_ENGINE_SHAPE_DEFINE_TYPE (KhmerEngineFc, khmer_engine_fc,
+ khmer_engine_fc_class_init, NULL);
+
+
+void
+PANGO_MODULE_ENTRY(init) (GTypeModule *module)
+{
+ khmer_engine_fc_register_type (module);
+}
+
+
+void
+PANGO_MODULE_ENTRY(exit) (void)
+{
+}
+
+
+void
+PANGO_MODULE_ENTRY(list) (PangoEngineInfo **engines,
+ int *n_engines)
+{
+ *engines = script_engines;
+ *n_engines = G_N_ELEMENTS (script_engines);
+}
+
+
+PangoEngine *
+PANGO_MODULE_ENTRY(create) (const char *id)
+{
+ if (!strcmp (id, SCRIPT_ENGINE_NAME))
+ return g_object_new (khmer_engine_fc_type, NULL);
+ else
+ return NULL;
+}
diff --git a/modules/makefile.msc b/modules/makefile.msc
index e56edf6a..5e61f0cd 100644
--- a/modules/makefile.msc
+++ b/modules/makefile.msc
@@ -1,6 +1,6 @@
# modles for the ft2 backend, don't include 'basic'
# here it is built-in pangoft.dll, see ../pango/makefile.msc
-MODULES = arabic hangul hebrew indic syriac thai
+MODULES = arabic hangul hebrew indic syriac thai khmer
!IFNDEF MODULE
@@ -35,6 +35,10 @@ OBJECTS = hebrew-fc.obj hebrew-shaper.obj
OBJECTS = indic-fc.obj indic-ot.obj indic-ot-class-tables.obj mprefixups.obj
!ENDIF
+!IFDEF OBJ_khmer
+OBJECTS = khmer_fc.obj
+!ENDIF
+
!IFDEF OBJ_syriac
OBJECTS = syriac-fc.obj syriac-ot.obj
!ENDIF