Add a Khmer module by Jens Herden and Javier Sola.

2005-06-21 Owen Taylor <otaylor@redhat.com> * modules/khmer configure.in modules/Makefile.am modules/makefile.msc: Add a Khmer module by Jens Herden and Javier Sola.
author: Owen Taylor <otaylor@redhat.com> 2005-06-21 15:58:45 +0000
committer: Owen Taylor <otaylor@src.gnome.org> 2005-06-21 15:58:45 +0000
commit: 50d0b340d03a19ae688a3807bbe22e5ae3763bd9 (patch)
tree: ab763b9293bd9eea51c7813802ed1691b03e696a
parent: e4bdbe0e4c468c50e809bf737444af04e054cd0e (diff)
download: pango-50d0b340d03a19ae688a3807bbe22e5ae3763bd9.tar.gz
7 files changed, 804 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 30eaadea..f5e3abb2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2005-06-21  Owen Taylor  <otaylor@redhat.com>
+
+	* modules/khmer configure.in modules/Makefile.am 
+	modules/makefile.msc: Add a Khmer module by
+	Jens Herden and Javier Sola.
+
 2005-06-16  Tor Lillqvist  <tml@novell.com>
 
 	* configure.in: Move the check for native Win32 a bit later, as
diff --git a/ChangeLog.pre-1-10 b/ChangeLog.pre-1-10
index 30eaadea..f5e3abb2 100644
--- a/ChangeLog.pre-1-10
+++ b/ChangeLog.pre-1-10
@@ -1,3 +1,9 @@
+2005-06-21  Owen Taylor  <otaylor@redhat.com>
+
+	* modules/khmer configure.in modules/Makefile.am 
+	modules/makefile.msc: Add a Khmer module by
+	Jens Herden and Javier Sola.
+
 2005-06-16  Tor Lillqvist  <tml@novell.com>
 
 	* configure.in: Move the check for native Win32 a bit later, as
diff --git a/configure.in b/configure.in
index 06532e16..3acd4e25 100644
--- a/configure.in
+++ b/configure.in
@@ -326,11 +326,12 @@ basic_modules="basic-fc,basic-win32,basic-x"
 hangul_modules="hangul-fc"
 hebrew_modules="hebrew-fc"
 indic_modules="indic-fc"
+khmer_modules="khmer-fc"
 syriac_modules="syriac-fc"
 thai_modules="thai-fc"
 tibetan_modules="tibetan-fc"
 
-all_modules="$arabic_modules,$basic_modules,$hangul_modules,$hebrew_modules,$indic_modules,$syriac_modules,$thai_modules,$tibetan_modules"
+all_modules="$arabic_modules,$basic_modules,$hangul_modules,$hebrew_modules,$indic_modules,$khmer_modules,$syriac_modules,$thai_modules,$tibetan_modules"
 
 included_modules=""
 if test "x$with_included_modules" != xno || test "x$with_included_modules" = x ; then
@@ -385,6 +386,8 @@ AM_CONDITIONAL(INCLUDE_HEBREW_FC,echo $included_modules | egrep '(^|,)hebrew-fc(
 
 AM_CONDITIONAL(INCLUDE_INDIC_FC,echo $included_modules | egrep '(^|,)indic-fc($|,)' > /dev/null)
 				     
+AM_CONDITIONAL(INCLUDE_KHMER_FC,echo $included_modules | grep '(^|,)khmer-fc($|,)' > /dev/null)
+
 AM_CONDITIONAL(INCLUDE_SYRIAC_FC,echo $included_modules | egrep '(^|,)syriac-fc($|,)' > /dev/null)
 
 AM_CONDITIONAL(INCLUDE_THAI_FC,echo $included_modules | egrep '(^|,)thai-fc($|,)' > /dev/null)
@@ -646,6 +649,7 @@ modules/basic/Makefile
 modules/hangul/Makefile
 modules/hebrew/Makefile
 modules/indic/Makefile
+modules/khmer/Makefile
 modules/syriac/Makefile
 modules/thai/Makefile
 modules/tibetan/Makefile
diff --git a/modules/Makefile.am b/modules/Makefile.am
index f1ff5d30..39cc6ff7 100644
--- a/modules/Makefile.am
+++ b/modules/Makefile.am
@@ -6,6 +6,7 @@ SUBDIRS = 		\
 	hangul 		\
 	hebrew		\
 	indic           \
+	khmer		\
 	syriac		\
 	thai		\
 	tibetan
diff --git a/modules/khmer/Makefile.am b/modules/khmer/Makefile.am
new file mode 100644
index 00000000..d9608f3b
--- /dev/null
+++ b/modules/khmer/Makefile.am
@@ -0,0 +1,45 @@
+## Process this file with automake to create Makefile.in.
+
+pangolibs = $(top_builddir)/pango/libpango-$(PANGO_API_VERSION).la $(GLIB_LIBS)
+pangoft2libs = $(top_builddir)/pango/libpangoft2-$(PANGO_API_VERSION).la $(FREETYPE_LIBS) $(pangolibs)
+
+INCLUDES =					\
+	-DG_LOG_DOMAIN=\"Pango\"		\
+	-DPANGO_ENABLE_ENGINE			\
+	-DG_DISABLE_DEPRECATED			\
+	$(PANGO_DEBUG_FLAGS)			\
+	-I$(top_srcdir)				\
+	-I$(top_srcdir)/pango/			\
+	$(GLIB_CFLAGS)
+
+if PLATFORM_WIN32
+no_undefined = -no-undefined
+endif
+
+moduledir = $(libdir)/pango/$(PANGO_MODULE_VERSION)/modules
+module_LTLIBRARIES =
+noinst_LTLIBRARIES =
+
+
+if HAVE_FREETYPE
+INCLUDES += $(FREETYPE_CFLAGS)
+if INCLUDE_KHMER_FC
+noinst_LTLIBRARIES += libpango-khmer-fc.la
+else
+module_LTLIBRARIES += pango-khmer-fc.la
+endif
+endif
+
+fc_sources =		 	\
+	khmer-fc.c
+
+pango_khmer_fc_la_LDFLAGS = -export-dynamic -avoid-version -module $(no_undefined)
+pango_khmer_fc_la_LIBADD = $(pangoft2libs)
+pango_khmer_fc_la_SOURCES = $(fc_sources)
+libpango_khmer_fc_la_SOURCES = $(fc_sources)
+libpango_khmer_fc_la_CFLAGS = -DPANGO_MODULE_PREFIX=_pango_khmer_fc
+
+
+included-modules: $(noinst_LTLIBRARIES)
+
+.PHONY: included-modules
diff --git a/modules/khmer/khmer-fc.c b/modules/khmer/khmer-fc.c
new file mode 100644
index 00000000..cc6c09a8
--- /dev/null
+++ b/modules/khmer/khmer-fc.c
@@ -0,0 +1,736 @@
+/* Pango
+ * khmer-fc.c: Shaper for Khmer script
+ *
+ * Copyright (C) 2004 Open Forum of Cambodia (www.forum.org.kh / www.khmeros.info)
+ * Authors: Jens Herden <jens@khmeros.info> and Javier Sola <javier@khmeros.info>
+ *
+ * Based on code from other shapers
+ * Copyright (C) 1999-2004 Red Hat Software
+ * Author: Owen Taylor <otaylor@redhat.com>
+
+ * Partially based on Indic shaper
+ * Copyright (C) 2001, 2002 IBM Corporation
+ * Author: Eric Mader <mader@jtcsv.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <string.h>
+
+
+#include "pango-engine.h"
+#include "pango-ot.h"
+#include "pango-utils.h"
+#include "pangofc-font.h"
+
+
+#define SCRIPT_ENGINE_NAME "KhmerScriptEngineFc"
+#define RENDER_TYPE PANGO_RENDER_TYPE_FC
+
+
+typedef PangoEngineShape      KhmerEngineFc;
+typedef PangoEngineShapeClass KhmerEngineFcClass ;
+
+
+static PangoEngineScriptInfo khmer_scripts[] =
+{
+  { PANGO_SCRIPT_KHMER, "*" }
+};
+
+static PangoEngineInfo script_engines[] =
+{
+  {
+    SCRIPT_ENGINE_NAME,
+    PANGO_ENGINE_TYPE_SHAPE,
+    RENDER_TYPE,
+    khmer_scripts, G_N_ELEMENTS (khmer_scripts)
+  }
+};
+
+
+/* Vocabulary
+ *     Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the
+ *                     center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
+ *                     split vowels, signs... but there is only one base in a syllable, it has to be coded as
+ *                     the first character of the syllable.
+ *     split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
+ *                     Khmer language has five of them. Khmer split vowels either have one part before the
+ *                     base and one after the base or they have a part before the base and a part above the base.
+ *                     The first part of all Khmer split vowels is the same character, identical to
+ *                     the glyph of Khmer dependent vowel SRA EI
+ *     coeng -->  modifier used in Khmer to construct coeng (subscript) consonants
+ *                Differently than indian languages, the coeng modifies the consonant that follows it,
+ *                not the one preceding it  Each consonant has two forms, the base form and the subscript form
+ *                the base form is the normal one (using the consonants code-point), the subscript form is
+ *                displayed when the combination coeng + consonant is encountered.
+ *     Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
+ *     Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
+ *     Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
+ *     Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
+ *                          if it is attached to a consonant of the first series or a consonant of the second series
+ *                          Most consonants have an equivalent in the other series, but some of theme exist only in
+ *                          one series (for example SA). If we want to use the consonant SA with a vowel sound that
+ *                          can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
+ *                          of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
+ *                          x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
+ *                          MUSIKATOAN a second series consonant to have a first series vowel sound.
+ *                          Consonant shifter are both normally supercript marks, but, when they are followed by a
+ *                          superscript, they change shape and take the form of subscript dependent vowel SRA U.
+ *                          If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
+ *                          should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
+ *                          be placed after the coeng consonant.
+ *     Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base
+ *                          Each vowel has its own position. Only one vowel per syllable is allowed.
+ *     Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
+ *                          Allowed in a syllable.
+ *
+ *
+ *  order is important here! This order must be the same that is found in each horizontal
+ *  line in the statetable for Khmer (see khmerStateTable) .
+ */
+enum KhmerCharClassValues
+{
+  CC_RESERVED             =  0,
+  CC_CONSONANT            =  1, /* Consonant of type 1 or independent vowel */
+  CC_CONSONANT2           =  2, /* Consonant of type 2 */
+  CC_CONSONANT3           =  3, /* Consonant of type 3 */
+  CC_ZERO_WIDTH_NJ_MARK   =  4, /* Zero Width non joiner character (0x200C) */
+  CC_CONSONANT_SHIFTER    =  5,
+  CC_ROBAT                =  6, /* Khmer special diacritic accent -treated differently in state table */
+  CC_COENG                =  7, /* Subscript consonant combining character */
+  CC_DEPENDENT_VOWEL      =  8,
+  CC_SIGN_ABOVE           =  9,
+  CC_SIGN_AFTER           = 10,
+  CC_ZERO_WIDTH_J_MARK    = 11, /* Zero width joiner character */
+  CC_COUNT                = 12  /* This is the number of character classes */
+};
+
+
+enum KhmerCharClassFlags
+{
+  CF_CLASS_MASK    = 0x0000FFFF,
+
+  CF_CONSONANT     = 0x01000000,  /* flag to speed up comparing */
+  CF_SPLIT_VOWEL   = 0x02000000,  /* flag for a split vowel -> the first part is added in front of the syllable */
+  CF_DOTTED_CIRCLE = 0x04000000,  /* add a dotted circle if a character with this flag is the first in a syllable */
+  CF_COENG         = 0x08000000,  /* flag to speed up comparing */
+  CF_SHIFTER       = 0x10000000,  /* flag to speed up comparing */
+  CF_ABOVE_VOWEL   = 0x20000000,  /* flag to speed up comparing */
+
+  /* position flags */
+  CF_POS_BEFORE    = 0x00080000,
+  CF_POS_BELOW     = 0x00040000,
+  CF_POS_ABOVE     = 0x00020000,
+  CF_POS_AFTER     = 0x00010000,
+  CF_POS_MASK      = 0x000f0000
+};
+
+
+/* Characters that get refrered to by name */
+enum KhmerChar
+{
+  C_SIGN_ZWNJ     = 0x200C,
+  C_SIGN_ZWJ      = 0x200D,
+  C_DOTTED_CIRCLE = 0x25CC,
+  C_RO            = 0x179A,
+  C_VOWEL_AA      = 0x17B6,
+  C_SIGN_NIKAHIT  = 0x17C6,
+  C_VOWEL_E       = 0x17C1,
+  C_COENG         = 0x17D2
+};
+
+
+enum
+{
+  /* simple classes, they are used in the state table (in this file) to control the length of a syllable
+   * they are also used to know where a character should be placed (location in reference to the base character)
+   * and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
+   * indicate error in syllable construction
+   */
+  _xx = CC_RESERVED,
+  _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
+  _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,
+  _c1 = CC_CONSONANT | CF_CONSONANT,
+  _c2 = CC_CONSONANT2 | CF_CONSONANT,
+  _c3 = CC_CONSONANT3 | CF_CONSONANT,
+  _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,
+  _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,
+  _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,
+  _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,
+  _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,
+  _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,
+  _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,
+
+  /* split vowel */
+  _va = _da | CF_SPLIT_VOWEL,
+  _vr = _dr | CF_SPLIT_VOWEL
+};
+
+
+/* Character class: a character class value
+ * ORed with character class flags.
+ */
+typedef glong KhmerCharClass;
+
+
+/* Character class tables
+ * _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
+ * _sa Sign placed above the base
+ * _sp Sign placed after the base
+ * _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
+ * _c2 Consonant of type 2 (only RO)
+ * _c3 Consonant of type 3
+ * _rb Khmer sign robat u17CC. combining mark for subscript consonants
+ * _cd Consonant-shifter
+ * _dl Dependent vowel placed before the base (left of the base)
+ * _db Dependent vowel placed below the base
+ * _da Dependent vowel placed above the base
+ * _dr Dependent vowel placed behind the base (right of the base)
+ * _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
+ *     it to create a subscript consonant or independent vowel
+ * _va Khmer split vowel in wich the first part is before the base and the second one above the base
+ * _vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base
+ */
+static const KhmerCharClass khmerCharClasses[] =
+{
+  _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */
+  _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */
+  _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */
+  _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */
+  _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */
+  _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx, /* 17D0 - 17DF */
+};
+
+/* this define must reflect the range of khmerCharClasses */
+#define firstChar 0x1780
+#define lastChar 0x17df
+
+
+
+/* The stateTable is used to calculate the end (the length) of a well
+ * formed Khmer Syllable.
+ *
+ * Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
+ * CharClassValues. This coincidence of values allows the follow up of the table.
+ *
+ * Each line corresponds to a state, which does not necessarily need to be a type
+ * of component... for example, state 2 is a base, with is always a first character
+ * in the syllable, but the state could be produced a consonant of any type when
+ * it is the first character that is analysed (in ground state).
+ *
+ * Differentiating 3 types of consonants is necessary in order to
+ * forbid the use of certain combinations, such as having a second
+ * coeng after a coeng RO,
+ * The inexistent possibility of having a type 3 after another type 3 is permitted,
+ * eliminating it would very much complicate the table, and it does not create typing
+ * problems, as the case above.
+ *
+ * The table is quite complex, in order to limit the number of coeng consonants
+ * to 2 (by means of the table).
+ *
+ * There a peculiarity, as far as Unicode is concerned:
+ * - The consonant-shifter is considered in two possible different
+ *   locations, the one considered in Unicode 3.0 and the one considered in
+ *   Unicode 4.0. (there is a backwards compatibility problem in this standard).
+ *
+ *
+ * xx    independent character, such as a number, punctuation sign or non-khmer char
+ *
+ * c1    Khmer consonant of type 1 or an independent vowel
+ *       that is, a letter in which the subscript for is only under the
+ *       base, not taking any space to the right or to the left
+ *
+ * c2    Khmer consonant of type 2, the coeng form takes space under
+ *       and to the left of the base (only RO is of this type)
+ *
+ * c3    Khmer consonant of type 3. Its subscript form takes space under
+ *       and to the right of the base.
+ *
+ * cs    Khmer consonant shifter
+ *
+ * rb    Khmer robat
+ *
+ * co    coeng character (u17D2)
+ *
+ * dv    dependent vowel (including split vowels, they are treated in the same way).
+ *       even if dv is not defined above, the component that is really tested for is
+ *       KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
+ *
+ * zwj   Zero Width joiner
+ *
+ * zwnj  Zero width non joiner
+ *
+ * sa    above sign
+ *
+ * sp    post sign
+ *
+ * there are lines with equal content but for an easier understanding
+ * (and maybe change in the future) we did not join them
+ */
+static const gint8 khmerStateTable[][CC_COUNT] =
+{
+/* xx  c1  c2  c3 zwnj cs  rb  co  dv  sa  sp zwj  */
+  { 1,  2,  2,  2,  1,  1,  1,  6,  1,  1,  1,  2}, /*  0 - ground state */
+  {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /*  1 - exit state (or sign to the right of the syllable) */
+  {-1, -1, -1, -1,  3,  4,  5,  6, 16, 17,  1, -1}, /*  2 - Base consonant */
+  {-1, -1, -1, -1, -1,  4, -1, -1, 16, -1, -1, -1}, /*  3 - First ZWNJ before a register shifter
+                                                            It can only be followed by a shifter or a vowel */
+  {-1, -1, -1, -1, 15, -1, -1,  6, 16, 17,  1, 14}, /*  4 - First register shifter */
+  {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1,  1, -1}, /*  5 - Robat */
+  {-1,  7,  8,  9, -1, -1, -1, -1, -1, -1, -1, -1}, /*  6 - First Coeng */
+  {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  7 - First consonant of type 1 after coeng */
+  {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17,  1, 14}, /*  8 - First consonant of type 2 after coeng */
+  {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  9 - First consonant or type 3 after ceong */
+  {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
+  {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
+  {-1, -1,  1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
+  {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 13 - Second register shifter */
+  {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
+  {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
+  {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17,  1, 18}, /* 16 - dependent vowel */
+  {-1, -1,  1, -1, -1, -1, -1, -1, -1, -1,  1, 18}, /* 17 - sign above */
+  {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
+  {-1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
+  {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1}, /* 20 - dependent vowel after a Robat */
+};
+
+
+enum property_flags
+{
+  abvf = 0x0001,
+  pref = 0x0002,
+  pstf = 0x0004,
+  blwf = 0x0008,
+
+  pres = 0x0010,
+  blws = 0x0020,
+  abvs = 0x0040,
+  psts = 0x0080,
+  clig = 0x0100,
+
+  dist = 0x0200,
+  blwm = 0x0400,
+  abvm = 0x0800,
+  mkmk = 0x1000
+};
+
+
+enum properties
+{
+  blwf_p    = /*(blwf | blws | clig | dist | blwm | mkmk)*/ (abvf | pref | pstf | pres | abvs | psts | abvm),
+  pstf_p    = /*(blwf | blws | pref | pres | pstf | psts | clig | dist | blwm)*/ (abvf | abvs | abvm | mkmk),
+  abvf_p    = /*(abvf | abvs | clig | dist | abvm | mkmk)*/ (pref | pstf | blwf | pres | blws | psts | blwm),
+  pref_p    = /*(pref | pres | clig | dist)*/ (abvf | pstf | blwf | blws | abvs | psts | blwm | abvm | mkmk),
+  default_p = /*(pres | blws | clig | dist | abvm | blwm | mkmk)*/ (pref | blwf |abvf | pstf | abvs | psts)
+};
+
+
+/* Below we define how a character in the input string is either in the khmerCharClasses table
+ * (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
+ * within the syllable, but are not in the table) we also get their type back, or an unknown object
+ * in which case we get _xx (CC_RESERVED) back
+ */
+static KhmerCharClass
+get_char_class (gunichar ch)
+{
+  if (ch == C_SIGN_ZWJ)
+    return CC_ZERO_WIDTH_J_MARK;
+
+  if (ch == C_SIGN_ZWNJ)
+    return CC_ZERO_WIDTH_NJ_MARK;
+
+  if (ch < firstChar || ch > lastChar)
+    return CC_RESERVED;
+
+  return khmerCharClasses[ch - firstChar];
+}
+
+
+/* Given an input string of characters and a location in which to start looking
+ * calculate, using the state table, which one is the last character of the syllable
+ * that starts in the starting position.
+ */
+static glong
+find_syllable (const gunichar *chars,
+	       glong           start,
+	       glong           char_count)
+{
+  glong cursor = start;
+  gint8 state = 0;
+  KhmerCharClass charClass;
+
+  while (cursor < char_count)
+    {
+      charClass = get_char_class (chars[cursor]) & CF_CLASS_MASK;
+      state = khmerStateTable[state][charClass];
+
+      if (state < 0)
+	break;
+
+      cursor += 1;
+    }
+
+  return cursor;
+}
+
+
+static void
+maybe_add_GSUB_feature (PangoOTRuleset *ruleset,
+                        PangoOTInfo    *info,
+                        guint           script_index,
+                        PangoOTTag      tag,
+                        gulong          property_bit)
+{
+  guint feature_index;
+
+  /* 0xffff == default language system */
+  if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GSUB,
+                                  tag, script_index, 0xffff, &feature_index))
+    pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GSUB, feature_index,
+                                  property_bit);
+}
+
+
+static void
+maybe_add_GPOS_feature (PangoOTRuleset *ruleset,
+                        PangoOTInfo    *info,
+                        guint           script_index,
+                        PangoOTTag      tag,
+                        gulong          property_bit)
+{
+  guint feature_index;
+
+  /* 0xffff == default language system */
+  if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GPOS,
+                                  tag, script_index, 0xffff, &feature_index))
+    pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GPOS, feature_index,
+                                  property_bit);
+}
+
+
+static PangoOTRuleset *
+get_ruleset (FT_Face face)
+{
+  PangoOTRuleset *ruleset;
+  static GQuark ruleset_quark = 0;
+
+  PangoOTInfo *info = pango_ot_info_get (face);
+
+  if (!ruleset_quark)
+    ruleset_quark = g_quark_from_string ("pango-khmer-ruleset");
+
+  if (!info)
+    return NULL;
+
+  ruleset = g_object_get_qdata (G_OBJECT (info), ruleset_quark);
+
+  if (!ruleset)
+    {
+      PangoOTTag khmer_tag = FT_MAKE_TAG ('k', 'h', 'm', 'r');
+      guint script_index;
+
+      ruleset = pango_ot_ruleset_new (info);
+
+      if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GSUB,
+                                      khmer_tag, &script_index))
+        {
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','f'), pref);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','f'), blwf);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','f'), abvf);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','f'), pstf);
+
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','s'), pres);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','s'), blws);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','s'), abvs);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','s'), psts);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('c','l','i','g'), clig);
+        }
+
+      if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GPOS,
+                                      khmer_tag, &script_index))
+        {
+          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('d','i','s','t'), dist);
+          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','m'), blwm);
+          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','m'), abvm);
+          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('m','k','m','k'), mkmk);
+        }
+
+      g_object_set_qdata_full (G_OBJECT (info), ruleset_quark, ruleset,
+                                (GDestroyNotify)g_object_unref);
+    }
+
+  return ruleset;
+}
+
+
+static PangoGlyph
+get_index (PangoFcFont *fc_font, gunichar wc)
+{
+  PangoGlyph index = pango_fc_font_get_glyph (fc_font, wc);
+  if (!index)
+    index = pango_fc_font_get_unknown_glyph (fc_font, wc);
+  return index;
+}
+
+
+static void
+khmer_engine_shape (PangoEngineShape *engine,
+                    PangoFont        *font,
+                    const char       *text,
+                    int               length,
+                    PangoAnalysis    *analysis,
+                    PangoGlyphString *glyphs)
+{
+  PangoFcFont *fc_font = PANGO_FC_FONT (font);
+  FT_Face face;
+  PangoOTBuffer *buffer;
+  glong n_chars, i;
+  gunichar *wcs;
+  const char *p;
+  glong syllable;
+  KhmerCharClass charClass;
+  glong cursor = 0;
+
+  face = pango_fc_font_lock_face (fc_font);
+  g_assert (face);
+
+  wcs = g_utf8_to_ucs4_fast (text, length, &n_chars);
+  p = text;
+
+  /* This loop only exits when we reach the end of a run, which may contain
+   * several syllables.
+   */
+  while (cursor < n_chars)
+    {
+      syllable = find_syllable (wcs, cursor, n_chars);
+
+      /* write a pre vowel or the pre part of a split vowel first
+       * and look out for coeng + ro. RO is the only vowel of type 2, and
+       * therefore the only one that requires saving space before the base.
+       */
+      glong coengRo = -1;  /* There is no Coeng Ro, if found this value will change */
+      for (i = cursor; i < syllable; i += 1)
+        {
+          charClass = get_char_class (wcs[i]);
+
+          /* if a split vowel, write the pre part. In Khmer the pre part
+           * is the same for all split vowels, same glyph as pre vowel C_VOWEL_E
+           */
+          if (charClass & CF_SPLIT_VOWEL)
+            {
+              pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_VOWEL_E), pref_p, p - text);
+              break; /* there can be only one vowel */
+            }
+
+          /* if a vowel with pos before write it out */
+          if (charClass & CF_POS_BEFORE)
+            {
+              pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pref_p, p - text);
+              break; /* there can be only one vowel */
+            }
+
+          /* look for coeng + ro and remember position
+           * works because coeng + ro is always in front of a vowel (if there is a vowel)
+           * and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
+           * with this flag
+           */
+          if ((charClass & CF_COENG) && (i + 1 < syllable) &&
+             ((get_char_class (wcs[i + 1]) & CF_CLASS_MASK) == CC_CONSONANT2))
+            {
+              coengRo = i;
+            }
+        }
+
+      /* write coeng + ro if found  */
+      if (coengRo > -1)
+        {
+          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_COENG), pref_p, p - text);
+          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_RO), pref_p, p - text);
+        }
+
+      /* shall we add a dotted circle?
+      * If in the position in which the base should be (first char in the string) there is
+      * a character that has the Dotted circle flag (a character that cannot be a base)
+      * then write a dotted circle
+      */
+      if (get_char_class (wcs[cursor]) & CF_DOTTED_CIRCLE)
+        {
+          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_DOTTED_CIRCLE), default_p, p - text);
+        }
+
+      /* copy what is left to the output, skipping before vowels and
+      * coeng Ro if they are present
+      */
+      for (i = cursor; i < syllable; i += 1)
+        {
+          charClass = get_char_class (wcs[i]);
+
+          /* skip a before vowel, it was already processed */
+          if (charClass & CF_POS_BEFORE)
+            {
+              p = g_utf8_next_char (p);
+              continue;
+            }
+
+          /* skip coeng + ro, it was already processed */
+          if (i == coengRo)
+            {
+              p = g_utf8_next_char (p);
+              i += 1;
+              p = g_utf8_next_char (p);
+              continue;
+            }
+
+          switch (charClass & CF_POS_MASK)
+            {
+              case CF_POS_ABOVE :
+		pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), abvf_p, p - text);
+		break;
+
+              case CF_POS_AFTER :
+		pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
+		break;
+		
+              case CF_POS_BELOW :
+		pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+		break;
+
+              default:
+                  /* assign the correct flags to a coeng consonant
+                  * Consonants of type 3 are taged as Post forms and those type 1 as below forms
+                  */
+		if ((charClass & CF_COENG) && i + 1 < syllable)
+		  {
+		    if ((get_char_class (wcs[i + 1]) & CF_CLASS_MASK) == CC_CONSONANT3)
+		      {
+			pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
+			p = g_utf8_next_char (p);
+			i += 1;
+			pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
+			break;
+		      }
+		    else
+		      {
+			pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+			p = g_utf8_next_char (p);
+			i += 1;
+			pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+			break;
+		      }
+		  }
+
+                  /* if a shifter is followed by an above vowel change the shifter to below form,
+                  * an above vowel can have two possible positions i + 1 or i + 3
+                  * (position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
+                  * and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
+                  * different positions, right after the shifter or after a vowel (Unicode 4)
+                  */
+                  if ((charClass & CF_SHIFTER) && (i + 1 < syllable))
+                    {
+                      if (get_char_class (wcs[i + 1]) & CF_ABOVE_VOWEL)
+                        {
+                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+                          break;
+                        }
+                      if (i + 2 < syllable &&
+                          (wcs[i + 1] == C_VOWEL_AA) &&
+                          (wcs[i + 2] == C_SIGN_NIKAHIT) )
+                        {
+                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+                          break;
+                        }
+                      if (i + 3 < syllable && (get_char_class (wcs[i + 3]) & CF_ABOVE_VOWEL) )
+                        {
+                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+                          break;
+                        }
+                      if (i + 4 < syllable &&
+                          (wcs[i + 3] == C_VOWEL_AA) &&
+                          (wcs[i + 4] == C_SIGN_NIKAHIT) )
+                        {
+                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+                          break;
+                        }
+
+                    }
+
+                  /* default - any other characters  */
+                  pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), default_p, p - text);
+                  break;
+            } /* switch */
+          p = g_utf8_next_char (p);
+        } /* for */
+
+      cursor = syllable; /* move the pointer to the start of next syllable */
+    } /* while */
+
+  /* do gsub processing */
+  PangoOTRuleset *ruleset = get_ruleset (face);
+  if (ruleset != NULL)
+    {
+      pango_ot_ruleset_substitute (ruleset, buffer);
+      pango_ot_ruleset_position (ruleset, buffer);
+    }
+
+  pango_ot_buffer_output (buffer, glyphs);
+
+  g_free (wcs);
+  pango_ot_buffer_destroy (buffer);
+
+  pango_fc_font_unlock_face (fc_font);
+}
+
+
+static void
+khmer_engine_fc_class_init (PangoEngineShapeClass *class)
+{
+  class->script_shape = khmer_engine_shape;
+}
+
+PANGO_ENGINE_SHAPE_DEFINE_TYPE (KhmerEngineFc, khmer_engine_fc,
+                                khmer_engine_fc_class_init, NULL);
+
+
+void
+PANGO_MODULE_ENTRY(init) (GTypeModule *module)
+{
+  khmer_engine_fc_register_type (module);
+}
+
+
+void
+PANGO_MODULE_ENTRY(exit) (void)
+{
+}
+
+
+void
+PANGO_MODULE_ENTRY(list) (PangoEngineInfo **engines,
+                          int              *n_engines)
+{
+  *engines = script_engines;
+  *n_engines = G_N_ELEMENTS (script_engines);
+}
+
+
+PangoEngine *
+PANGO_MODULE_ENTRY(create) (const char *id)
+{
+  if (!strcmp (id, SCRIPT_ENGINE_NAME))
+    return g_object_new (khmer_engine_fc_type, NULL);
+  else
+    return NULL;
+}
diff --git a/modules/makefile.msc b/modules/makefile.msc
index e56edf6a..5e61f0cd 100644
--- a/modules/makefile.msc
+++ b/modules/makefile.msc
@@ -1,6 +1,6 @@
 # modles for the ft2 backend, don't include 'basic'
 # here it is built-in pangoft.dll, see ../pango/makefile.msc
-MODULES = arabic hangul hebrew indic syriac thai
+MODULES = arabic hangul hebrew indic syriac thai khmer
 
 !IFNDEF MODULE
 
@@ -35,6 +35,10 @@ OBJECTS = hebrew-fc.obj hebrew-shaper.obj
 OBJECTS = indic-fc.obj indic-ot.obj indic-ot-class-tables.obj mprefixups.obj 
 !ENDIF
 
+!IFDEF OBJ_khmer
+OBJECTS = khmer_fc.obj
+!ENDIF
+
 !IFDEF OBJ_syriac
 OBJECTS = syriac-fc.obj syriac-ot.obj
 !ENDIF
author	Owen Taylor <otaylor@redhat.com>	2005-06-21 15:58:45 +0000
committer	Owen Taylor <otaylor@src.gnome.org>	2005-06-21 15:58:45 +0000
commit	50d0b340d03a19ae688a3807bbe22e5ae3763bd9 (patch)
tree	ab763b9293bd9eea51c7813802ed1691b03e696a
parent	e4bdbe0e4c468c50e809bf737444af04e054cd0e (diff)
download	pango-50d0b340d03a19ae688a3807bbe22e5ae3763bd9.tar.gz