/* Pango
 * khmer-fc.c: Shaper for Khmer script
 *
 * Copyright (C) 2004 Open Forum of Cambodia (www.forum.org.kh / www.khmeros.info)
 * Authors: Jens Herden <jens@khmeros.info> and Javier Sola <javier@khmeros.info>
 *
 * Based on code from other shapers
 * Copyright (C) 1999-2004 Red Hat Software
 * Author: Owen Taylor <otaylor@redhat.com>

 * Partially based on Indic shaper
 * Copyright (C) 2001, 2002 IBM Corporation
 * Author: Eric Mader <mader@jtcsv.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * The license on the original Indic shaper code is as follows:
 *
 *  * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, and/or sell copies of the
 * Software, and to permit persons to whom the Software is furnished
 * to do so, provided that the above copyright notice(s) and this
 * permission notice appear in all copies of the Software and that
 * both the above copyright notice(s) and this permission notice
 * appear in supporting documentation.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE
 * COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR
 * ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THIS SOFTWARE.
 *
 * Except as contained in this notice, the name of a copyright holder
 * shall not be used in advertising or otherwise to promote the sale,
 * use or other dealings in this Software without prior written
 * authorization of the copyright holder.
 */
#include <config.h>
#include <string.h>

#include "pango-engine.h"
#include "pango-ot.h"
#include "pango-utils.h"
#include "pangofc-font.h"


#define SCRIPT_ENGINE_NAME "KhmerScriptEngineFc"
#define RENDER_TYPE PANGO_RENDER_TYPE_FC


typedef PangoEngineShape      KhmerEngineFc;
typedef PangoEngineShapeClass KhmerEngineFcClass ;


static PangoEngineScriptInfo khmer_scripts[] =
{
  { PANGO_SCRIPT_KHMER, "*" }
};

static PangoEngineInfo script_engines[] =
{
  {
    SCRIPT_ENGINE_NAME,
    PANGO_ENGINE_TYPE_SHAPE,
    RENDER_TYPE,
    khmer_scripts, G_N_ELEMENTS (khmer_scripts)
  }
};


/* Vocabulary
 *     Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the
 *                     center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
 *                     split vowels, signs... but there is only one base in a syllable, it has to be coded as
 *                     the first character of the syllable.
 *     split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
 *                     Khmer language has five of them. Khmer split vowels either have one part before the
 *                     base and one after the base or they have a part before the base and a part above the base.
 *                     The first part of all Khmer split vowels is the same character, identical to
 *                     the glyph of Khmer dependent vowel SRA EI
 *     coeng -->  modifier used in Khmer to construct coeng (subscript) consonants
 *                Differently than indian languages, the coeng modifies the consonant that follows it,
 *                not the one preceding it  Each consonant has two forms, the base form and the subscript form
 *                the base form is the normal one (using the consonants code-point), the subscript form is
 *                displayed when the combination coeng + consonant is encountered.
 *     Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
 *     Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
 *     Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
 *     Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
 *                          if it is attached to a consonant of the first series or a consonant of the second series
 *                          Most consonants have an equivalent in the other series, but some of theme exist only in
 *                          one series (for example SA). If we want to use the consonant SA with a vowel sound that
 *                          can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
 *                          of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
 *                          x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
 *                          MUSIKATOAN a second series consonant to have a first series vowel sound.
 *                          Consonant shifter are both normally supercript marks, but, when they are followed by a
 *                          superscript, they change shape and take the form of subscript dependent vowel SRA U.
 *                          If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
 *                          should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
 *                          be placed after the coeng consonant.
 *     Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base
 *                          Each vowel has its own position. Only one vowel per syllable is allowed.
 *     Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
 *                          Allowed in a syllable.
 *
 *
 *  order is important here! This order must be the same that is found in each horizontal
 *  line in the statetable for Khmer (see khmerStateTable) .
 */
enum KhmerCharClassValues
{
  CC_RESERVED             =  0,
  CC_CONSONANT            =  1, /* Consonant of type 1 or independent vowel */
  CC_CONSONANT2           =  2, /* Consonant of type 2 */
  CC_CONSONANT3           =  3, /* Consonant of type 3 */
  CC_ZERO_WIDTH_NJ_MARK   =  4, /* Zero Width non joiner character (0x200C) */
  CC_CONSONANT_SHIFTER    =  5,
  CC_ROBAT                =  6, /* Khmer special diacritic accent -treated differently in state table */
  CC_COENG                =  7, /* Subscript consonant combining character */
  CC_DEPENDENT_VOWEL      =  8,
  CC_SIGN_ABOVE           =  9,
  CC_SIGN_AFTER           = 10,
  CC_ZERO_WIDTH_J_MARK    = 11, /* Zero width joiner character */
  CC_COUNT                = 12  /* This is the number of character classes */
};


enum KhmerCharClassFlags
{
  CF_CLASS_MASK    = 0x0000FFFF,

  CF_CONSONANT     = 0x01000000,  /* flag to speed up comparing */
  CF_SPLIT_VOWEL   = 0x02000000,  /* flag for a split vowel -> the first part is added in front of the syllable */
  CF_DOTTED_CIRCLE = 0x04000000,  /* add a dotted circle if a character with this flag is the first in a syllable */
  CF_COENG         = 0x08000000,  /* flag to speed up comparing */
  CF_SHIFTER       = 0x10000000,  /* flag to speed up comparing */
  CF_ABOVE_VOWEL   = 0x20000000,  /* flag to speed up comparing */

  /* position flags */
  CF_POS_BEFORE    = 0x00080000,
  CF_POS_BELOW     = 0x00040000,
  CF_POS_ABOVE     = 0x00020000,
  CF_POS_AFTER     = 0x00010000,
  CF_POS_MASK      = 0x000f0000
};


/* Characters that get refrered to by name */
enum KhmerChar
{
  C_SIGN_ZWNJ     = 0x200C,
  C_SIGN_ZWJ      = 0x200D,
  C_DOTTED_CIRCLE = 0x25CC,
  C_RO            = 0x179A,
  C_VOWEL_AA      = 0x17B6,
  C_SIGN_NIKAHIT  = 0x17C6,
  C_VOWEL_E       = 0x17C1,
  C_COENG         = 0x17D2
};


enum
{
  /* simple classes, they are used in the state table (in this file) to control the length of a syllable
   * they are also used to know where a character should be placed (location in reference to the base character)
   * and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
   * indicate error in syllable construction
   */
  _xx = CC_RESERVED,
  _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
  _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,
  _c1 = CC_CONSONANT | CF_CONSONANT,
  _c2 = CC_CONSONANT2 | CF_CONSONANT,
  _c3 = CC_CONSONANT3 | CF_CONSONANT,
  _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,
  _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,
  _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,
  _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,
  _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,
  _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,
  _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,

  /* split vowel */
  _va = _da | CF_SPLIT_VOWEL,
  _vr = _dr | CF_SPLIT_VOWEL
};


/* Character class: a character class value
 * ORed with character class flags.
 */
typedef glong KhmerCharClass;


/* Character class tables
 * _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
 * _sa Sign placed above the base
 * _sp Sign placed after the base
 * _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
 * _c2 Consonant of type 2 (only RO)
 * _c3 Consonant of type 3
 * _rb Khmer sign robat u17CC. combining mark for subscript consonants
 * _cd Consonant-shifter
 * _dl Dependent vowel placed before the base (left of the base)
 * _db Dependent vowel placed below the base
 * _da Dependent vowel placed above the base
 * _dr Dependent vowel placed behind the base (right of the base)
 * _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
 *     it to create a subscript consonant or independent vowel
 * _va Khmer split vowel in wich the first part is before the base and the second one above the base
 * _vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base
 */
static const KhmerCharClass khmerCharClasses[] =
{
  _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */
  _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */
  _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */
  _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */
  _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */
  _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx, /* 17D0 - 17DF */
};

/* this define must reflect the range of khmerCharClasses */
#define firstChar 0x1780
#define lastChar 0x17df


/* The stateTable is used to calculate the end (the length) of a well
 * formed Khmer Syllable.
 *
 * Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
 * CharClassValues. This coincidence of values allows the follow up of the table.
 *
 * Each line corresponds to a state, which does not necessarily need to be a type
 * of component... for example, state 2 is a base, with is always a first character
 * in the syllable, but the state could be produced a consonant of any type when
 * it is the first character that is analysed (in ground state).
 *
 * Differentiating 3 types of consonants is necessary in order to
 * forbid the use of certain combinations, such as having a second
 * coeng after a coeng RO,
 * The inexistent possibility of having a type 3 after another type 3 is permitted,
 * eliminating it would very much complicate the table, and it does not create typing
 * problems, as the case above.
 *
 * The table is quite complex, in order to limit the number of coeng consonants
 * to 2 (by means of the table).
 *
 * There a peculiarity, as far as Unicode is concerned:
 * - The consonant-shifter is considered in two possible different
 *   locations, the one considered in Unicode 3.0 and the one considered in
 *   Unicode 4.0. (there is a backwards compatibility problem in this standard).
 *
 *
 * xx    independent character, such as a number, punctuation sign or non-khmer char
 *
 * c1    Khmer consonant of type 1 or an independent vowel
 *       that is, a letter in which the subscript for is only under the
 *       base, not taking any space to the right or to the left
 *
 * c2    Khmer consonant of type 2, the coeng form takes space under
 *       and to the left of the base (only RO is of this type)
 *
 * c3    Khmer consonant of type 3. Its subscript form takes space under
 *       and to the right of the base.
 *
 * cs    Khmer consonant shifter
 *
 * rb    Khmer robat
 *
 * co    coeng character (u17D2)
 *
 * dv    dependent vowel (including split vowels, they are treated in the same way).
 *       even if dv is not defined above, the component that is really tested for is
 *       KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
 *
 * zwj   Zero Width joiner
 *
 * zwnj  Zero width non joiner
 *
 * sa    above sign
 *
 * sp    post sign
 *
 * there are lines with equal content but for an easier understanding
 * (and maybe change in the future) we did not join them
 */
static const gint8 khmerStateTable[][CC_COUNT] =
{
/* xx  c1  c2  c3 zwnj cs  rb  co  dv  sa  sp zwj  */
  { 1,  2,  2,  2,  1,  1,  1,  6,  1,  1,  1,  2}, /*  0 - ground state */
  {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /*  1 - exit state (or sign to the right of the syllable) */
  {-1, -1, -1, -1,  3,  4,  5,  6, 16, 17,  1, -1}, /*  2 - Base consonant */
  {-1, -1, -1, -1, -1,  4, -1, -1, 16, -1, -1, -1}, /*  3 - First ZWNJ before a register shifter
                                                            It can only be followed by a shifter or a vowel */
  {-1, -1, -1, -1, 15, -1, -1,  6, 16, 17,  1, 14}, /*  4 - First register shifter */
  {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1,  1, -1}, /*  5 - Robat */
  {-1,  7,  8,  9, -1, -1, -1, -1, -1, -1, -1, -1}, /*  6 - First Coeng */
  {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  7 - First consonant of type 1 after coeng */
  {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17,  1, 14}, /*  8 - First consonant of type 2 after coeng */
  {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17,  1, 14}, /*  9 - First consonant or type 3 after ceong */
  {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
  {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
  {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
  {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17,  1, 14}, /* 13 - Second register shifter */
  {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
  {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
  {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17,  1, 18}, /* 16 - dependent vowel */
  {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, 18}, /* 17 - sign above */
  {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
  {-1,  1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
  {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1}, /* 20 - dependent vowel after a Robat */
};


enum property_flags
{
  abvf = 0x0001,
  pref = 0x0002,
  pstf = 0x0004,
  blwf = 0x0008,

  pres = 0x0010,
  blws = 0x0020,
  abvs = 0x0040,
  psts = 0x0080,
  clig = 0x0100,

  dist = 0x0200,
  blwm = 0x0400,
  abvm = 0x0800,
  mkmk = 0x1000
};


enum properties
{
  blwf_p    = /*(blwf | blws | clig | dist | blwm | mkmk)*/ (abvf | pref | pstf | pres | abvs | psts | abvm),
  pstf_p    = /*(blwf | blws | pref | pres | pstf | psts | clig | dist | blwm)*/ (abvf | abvs | abvm | mkmk),
  abvf_p    = /*(abvf | abvs | clig | dist | abvm | mkmk)*/ (pref | pstf | blwf | pres | blws | psts | blwm),
  pref_p    = /*(pref | pres | clig | dist)*/ (abvf | pstf | blwf | blws | abvs | psts | blwm | abvm | mkmk),
  default_p = /*(pres | blws | clig | dist | abvm | blwm | mkmk)*/ (pref | blwf |abvf | pstf | abvs | psts)
};


/* Below we define how a character in the input string is either in the khmerCharClasses table
 * (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
 * within the syllable, but are not in the table) we also get their type back, or an unknown object
 * in which case we get _xx (CC_RESERVED) back
 */
static KhmerCharClass
get_char_class (gunichar ch)
{
  if (ch == C_SIGN_ZWJ)
    return CC_ZERO_WIDTH_J_MARK;

  if (ch == C_SIGN_ZWNJ)
    return CC_ZERO_WIDTH_NJ_MARK;

  if (ch < firstChar || ch > lastChar)
    return CC_RESERVED;

  return khmerCharClasses[ch - firstChar];
}


/* Given an input string of characters and a location in which to start looking
 * calculate, using the state table, which one is the last character of the syllable
 * that starts in the starting position.
 */
static glong
find_syllable (const gunichar *chars,
	       glong           start,
	       glong           char_count)
{
  glong cursor = start;
  gint8 state = 0;
  KhmerCharClass charClass;

  while (cursor < char_count)
    {
      charClass = get_char_class (chars[cursor]) & CF_CLASS_MASK;
      state = khmerStateTable[state][charClass];

      if (state < 0)
	break;

      cursor += 1;
    }

  return cursor;
}


static void
maybe_add_GSUB_feature (PangoOTRuleset *ruleset,
                        PangoOTInfo    *info,
                        guint           script_index,
                        PangoOTTag      tag,
                        gulong          property_bit)
{
  guint feature_index;

  /* 0xffff == default language system */
  if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GSUB,
                                  tag, script_index, 0xffff, &feature_index))
    pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GSUB, feature_index,
                                  property_bit);
}


static void
maybe_add_GPOS_feature (PangoOTRuleset *ruleset,
                        PangoOTInfo    *info,
                        guint           script_index,
                        PangoOTTag      tag,
                        gulong          property_bit)
{
  guint feature_index;

  /* 0xffff == default language system */
  if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GPOS,
                                  tag, script_index, 0xffff, &feature_index))
    pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GPOS, feature_index,
                                  property_bit);
}


static PangoOTRuleset *
get_ruleset (FT_Face face)
{
  PangoOTRuleset *ruleset;
  static GQuark ruleset_quark = 0;

  PangoOTInfo *info = pango_ot_info_get (face);

  if (!ruleset_quark)
    ruleset_quark = g_quark_from_string ("pango-khmer-ruleset");

  if (!info)
    return NULL;

  ruleset = g_object_get_qdata (G_OBJECT (info), ruleset_quark);

  if (!ruleset)
    {
      PangoOTTag khmer_tag = FT_MAKE_TAG ('k', 'h', 'm', 'r');
      guint script_index;

      ruleset = pango_ot_ruleset_new (info);

      if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GSUB,
                                      khmer_tag, &script_index))
        {
          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','f'), pref);
          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','f'), blwf);
          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','f'), abvf);
          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','f'), pstf);

          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','s'), pres);
          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','s'), blws);
          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','s'), abvs);
          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','s'), psts);
          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('c','l','i','g'), clig);
        }

      if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GPOS,
                                      khmer_tag, &script_index))
        {
          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('d','i','s','t'), dist);
          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','m'), blwm);
          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','m'), abvm);
          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('m','k','m','k'), mkmk);
        }

      g_object_set_qdata_full (G_OBJECT (info), ruleset_quark, ruleset,
                                (GDestroyNotify)g_object_unref);
    }

  return ruleset;
}


static PangoGlyph
get_index (PangoFcFont *fc_font, gunichar wc)
{
  PangoGlyph index = pango_fc_font_get_glyph (fc_font, wc);
  if (!index)
    index = pango_fc_font_get_unknown_glyph (fc_font, wc);
  return index;
}


static void
khmer_engine_shape (PangoEngineShape *engine,
                    PangoFont        *font,
                    const char       *text,
                    int               length,
                    PangoAnalysis    *analysis,
                    PangoGlyphString *glyphs)
{
  PangoFcFont *fc_font;
  FT_Face face;
  PangoOTRuleset *ruleset;
  PangoOTBuffer *buffer;
  glong n_chars;
  gunichar *wcs;
  const char *p;
  int i;
  glong syllable;
  KhmerCharClass charClass;
  glong cursor = 0;

  g_return_if_fail (font != NULL);
  g_return_if_fail (text != NULL);
  g_return_if_fail (length >= 0);
  g_return_if_fail (analysis != NULL);

  fc_font = PANGO_FC_FONT (font);
  face = pango_fc_font_lock_face (fc_font);
  g_return_if_fail (face != NULL);

  buffer = pango_ot_buffer_new (fc_font);

  wcs = g_utf8_to_ucs4_fast (text, length, &n_chars);

  p = text;
  /* This loop only exits when we reach the end of a run, which may contain
   * several syllables.
   */
  while (cursor < n_chars)
    {
      /* write a pre vowel or the pre part of a split vowel first
       * and look out for coeng + ro. RO is the only vowel of type 2, and
       * therefore the only one that requires saving space before the base.
       */
      glong coengRo = -1;  /* There is no Coeng Ro, if found this value will change */

      syllable = find_syllable (wcs, cursor, n_chars);

      for (i = cursor; i < syllable; i += 1)
        {
          charClass = get_char_class (wcs[i]);

          /* if a split vowel, write the pre part. In Khmer the pre part
           * is the same for all split vowels, same glyph as pre vowel C_VOWEL_E
           */
          if (charClass & CF_SPLIT_VOWEL)
            {
              pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_VOWEL_E), pref_p, p - text);
              break; /* there can be only one vowel */
            }

          /* if a vowel with pos before write it out */
          if (charClass & CF_POS_BEFORE)
            {
              pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pref_p, p - text);
              break; /* there can be only one vowel */
            }

          /* look for coeng + ro and remember position
           * works because coeng + ro is always in front of a vowel (if there is a vowel)
           * and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
           * with this flag
           */
          if ((charClass & CF_COENG) && (i + 1 < syllable) &&
             ((get_char_class (wcs[i + 1]) & CF_CLASS_MASK) == CC_CONSONANT2))
            {
              coengRo = i;
            }
        }

      /* write coeng + ro if found  */
      if (coengRo > -1)
        {
          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_COENG), pref_p, p - text);
          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_RO), pref_p, p - text);
        }

      /* shall we add a dotted circle?
      * If in the position in which the base should be (first char in the string) there is
      * a character that has the Dotted circle flag (a character that cannot be a base)
      * then write a dotted circle
      */
      if (get_char_class (wcs[cursor]) & CF_DOTTED_CIRCLE)
        {
          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_DOTTED_CIRCLE), default_p, p - text);
        }

      /* copy what is left to the output, skipping before vowels and
      * coeng Ro if they are present
      */
      for (i = cursor; i < syllable; i += 1)
        {
          charClass = get_char_class (wcs[i]);

          /* skip a before vowel, it was already processed */
          if (charClass & CF_POS_BEFORE)
            {
              p = g_utf8_next_char (p);
              continue;
            }

          /* skip coeng + ro, it was already processed */
          if (i == coengRo)
            {
              p = g_utf8_next_char (p);
              i += 1;
              p = g_utf8_next_char (p);
              continue;
            }

          switch (charClass & CF_POS_MASK)
            {
              case CF_POS_ABOVE :
		pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), abvf_p, p - text);
		break;

              case CF_POS_AFTER :
		pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
		break;
		
              case CF_POS_BELOW :
		pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
		break;

              default:
                  /* assign the correct flags to a coeng consonant
                  * Consonants of type 3 are taged as Post forms and those type 1 as below forms
                  */
		if ((charClass & CF_COENG) && i + 1 < syllable)
		  {
		    if ((get_char_class (wcs[i + 1]) & CF_CLASS_MASK) == CC_CONSONANT3)
		      {
			pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
			p = g_utf8_next_char (p);
			i += 1;
			pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
			break;
		      }
		    else
		      {
			pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
			p = g_utf8_next_char (p);
			i += 1;
			pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
			break;
		      }
		  }

                  /* if a shifter is followed by an above vowel change the shifter to below form,
                  * an above vowel can have two possible positions i + 1 or i + 3
                  * (position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
                  * and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
                  * different positions, right after the shifter or after a vowel (Unicode 4)
                  */
                  if ((charClass & CF_SHIFTER) && (i + 1 < syllable))
                    {
                      if (get_char_class (wcs[i + 1]) & CF_ABOVE_VOWEL)
                        {
                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
                          break;
                        }
                      if (i + 2 < syllable &&
                          (wcs[i + 1] == C_VOWEL_AA) &&
                          (wcs[i + 2] == C_SIGN_NIKAHIT) )
                        {
                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
                          break;
                        }
                      if (i + 3 < syllable && (get_char_class (wcs[i + 3]) & CF_ABOVE_VOWEL) )
                        {
                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
                          break;
                        }
                      if (i + 4 < syllable &&
                          (wcs[i + 3] == C_VOWEL_AA) &&
                          (wcs[i + 4] == C_SIGN_NIKAHIT) )
                        {
                          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
                          break;
                        }

                    }

                  /* default - any other characters  */
                  pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), default_p, p - text);
                  break;
            } /* switch */
          p = g_utf8_next_char (p);
        } /* for */

      cursor = syllable; /* move the pointer to the start of next syllable */
    } /* while */

  /* do gsub processing */
  ruleset = get_ruleset (face);
  if (ruleset != NULL)
    {
      pango_ot_ruleset_substitute (ruleset, buffer);
      pango_ot_ruleset_position (ruleset, buffer);
    }

  pango_ot_buffer_output (buffer, glyphs);

  g_free (wcs);
  pango_ot_buffer_destroy (buffer);

  pango_fc_font_unlock_face (fc_font);
}


static void
khmer_engine_fc_class_init (PangoEngineShapeClass *class)
{
  class->script_shape = khmer_engine_shape;
}

PANGO_ENGINE_SHAPE_DEFINE_TYPE (KhmerEngineFc, khmer_engine_fc,
                                khmer_engine_fc_class_init, NULL);


void
PANGO_MODULE_ENTRY(init) (GTypeModule *module)
{
  khmer_engine_fc_register_type (module);
}


void
PANGO_MODULE_ENTRY(exit) (void)
{
}


void
PANGO_MODULE_ENTRY(list) (PangoEngineInfo **engines,
                          int              *n_engines)
{
  *engines = script_engines;
  *n_engines = G_N_ELEMENTS (script_engines);
}


PangoEngine *
PANGO_MODULE_ENTRY(create) (const char *id)
{
  if (!strcmp (id, SCRIPT_ENGINE_NAME))
    return g_object_new (khmer_engine_fc_type, NULL);
  else
    return NULL;
}