Updated Tibetan shaper from Pema Geyleg. (#313513)

2005-11-14 Behdad Esfahbod <behdad@gnome.org> Updated Tibetan shaper from Pema Geyleg. (#313513) * examples/tibetan.utf: Added. * examples/Makefile.am (EXTRA_DIST): tibetan.utf added. * modules/tibetan/tibetan-fc.c: Updated Tibetan shaper that is rewritten from scratch. Supports the number pre-combining mark, illegal sequence detection, etc.
author: Behdad Esfahbod <behdad@gnome.org> 2005-11-15 09:02:41 +0000
committer: Behdad Esfahbod <behdad@src.gnome.org> 2005-11-15 09:02:41 +0000
commit: 5356deb630664007a0d40e4a5703129c086a74d8 (patch)
tree: 3fa22311f3af2ad1e71337a83f45da41346b7936 /modules/tibetan
parent: d1b386437af58dfee74fe376adbb0db5c5da55fd (diff)
download: pango-5356deb630664007a0d40e4a5703129c086a74d8.tar.gz
1 files changed, 491 insertions, 181 deletions
diff --git a/modules/tibetan/tibetan-fc.c b/modules/tibetan/tibetan-fc.c
index 642f9f87..525736e8 100644
--- a/modules/tibetan/tibetan-fc.c
+++ b/modules/tibetan/tibetan-fc.c
@@ -1,13 +1,24 @@
 /* Pango
  * tibetan-fc.c: Shaper for Tibetan script
- * based on thai-fc.c and basic-fc.c
  *
+ * Copyright (C) 2005 DIT, Government of Bhutan <http://www.dit.gov.bt>
+ * Contact person : Pema Geyleg <pema_geyleg@druknet.bt> 
+ *
+ *  Based on code from khmer shapers developed by Jens Herden 
+ *  <jens@tibetanos.inf > and Javier Sola <javier@tibetanos.info>
+ *
+ * Based on code from other shapers
  * Copyright (C) 1999-2004 Red Hat Software
  * Author: Owen Taylor <otaylor@redhat.com>
+
+ * Partially based on Indic shaper
+ * Copyright (C) 2001, 2002 IBM Corporation
+ * Author: Eric Mader <mader@jtcsv.com>
  *
- * Copyright (C) 2004 Theppitak Karoonboonyanan <thep@linux.thai.net>
- *
- * Copyright (C) 2004 G Karunakar <karunakar@freedomink.org>
+ * The first module for Tibetan shaper was developed by Mr. Karunakar under 
+ * PanLocalization project. 
+ * Mr. Chris Fynn, Mr.Javier Sola, Mr. Namgay Thinley were involved
+ * while developing this shaper.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -23,115 +34,397 @@
  * License along with this library; if not, write to the
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 02111-1307, USA.
+ *
+ * The license on the original Indic shaper code is as follows:
+ *
+ *  Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, and/or sell copies of the
+ * Software, and to permit persons to whom the Software is furnished
+ * to do so, provided that the above copyright notice(s) and this
+ * permission notice appear in all copies of the Software and that
+ * both the above copyright notice(s) and this permission notice
+ * appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR
+ * ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder
+ * shall not be used in advertising or otherwise to promote the sale,
+ * use or other dealings in this Software without prior written
+ * authorization of the copyright holder.
  */
 
 #include <string.h>
 
-#include <glib.h>
-#include <pango-engine.h>
+
+#include "pango-engine.h"
 #include "pango-ot.h"
+#include "pango-utils.h"
 #include "pangofc-font.h"
 
-typedef PangoEngineShape	TibetanEngineFc;
-typedef PangoEngineShapeClass	TibetanEngineFcClass;
 
 #define SCRIPT_ENGINE_NAME "TibetanScriptEngineFc"
 #define RENDER_TYPE PANGO_RENDER_TYPE_FC
 
-static PangoEngineScriptInfo tibetan_scripts[] = {
+
+typedef PangoEngineShape      TibetanEngineFc;
+typedef PangoEngineShapeClass TibetanEngineFcClass ;
+
+
+static PangoEngineScriptInfo tibetan_scripts[] =
+{
   { PANGO_SCRIPT_TIBETAN, "*" }
 };
 
-static PangoEngineInfo script_engines[] = {
+static PangoEngineInfo script_engines[] =
+{
   {
     SCRIPT_ENGINE_NAME,
     PANGO_ENGINE_TYPE_SHAPE,
     RENDER_TYPE,
-    tibetan_scripts, G_N_ELEMENTS(tibetan_scripts)
+    tibetan_scripts, G_N_ELEMENTS (tibetan_scripts)
   }
 };
 
-/* GPOS tables are not present in Joyig font */
-#undef DO_GPOS
 
-static void
-maybe_add_gsub_feature (PangoOTRuleset *ruleset,
-			PangoOTInfo    *info,
-			guint           script_index,
-			PangoOTTag      feature_tag,
-			gulong          property_bit)
+
+// Vocabulary 
+//     Base ->         A consonant in its full (not subscript) form. It is the 
+//                     center of the syllable, it can be souranded by subjoined consonants, vowels,
+//                     signs... but there is only one base in a stack, it has to be coded as
+//                     the first character of the syllable.Included here are also groups of base + subjoined 
+//										 which are represented by one single code point in unicode (e.g. 0F43) Also other characters that might take 
+//                     subjoined consonants or other combining characters.											
+//     Subjoined ->    Subjoined consonants and groups of subjoined consonants which have a single code-point 
+//                     to repersent the group (even if each subjoined consonant is represented independently
+//                     by anothe code-point
+//     Tsa Phru -->    Tsa Phru character, Bhutanese people will always place it right after the base, but sometimes, due to 
+// 										"normalization"							
+//										 is placed after all the subjoined consonants, and it is also permitted there.
+//     A Chung  Vowel lengthening mark --> . 0F71 It is placed after the base and any subjoined consonants but before any vowels
+//     Precomposed Sanskrit vowels --> The are combinations of subjoined consonants + vowels that have been assigned
+//                     a given code-point (in spite of each single part of them having also a code-point
+//                     They are avoided, and users are encouraged to use the combination of code-points that
+//                     represents the same sound instead of using this combined characters. This is included here
+//                     for compatibility with possible texts that use them (they are not in the Dzongkha keyboard).
+//     Halanta ->      The Halanta or Virama character 0F84 indicates that a consonant should not use its inheernt vowel, 
+//                     in spite of not having other vowels present. It is usually placed immediatly after a base consonant,
+//                     but in some special cases it can also be placed after a subjoined consonant, so this is also
+//                     permitted in this algorithm. (Halanta is always displayed in Tibetan not used as a connecting char)
+//
+//     Subjoined vowels -> Dependent vowels (matras) placed below the base and below all subjoined consonants. There
+//                     might be as much as three subjoined vowels in a given stack (only one in general text, but up 
+//                     to three for abreviations, they have to be permitted).
+//     Superscript vowels -> There are three superscript vowels, and they can be repeated or combined (up to three
+//                     times. They can combine with subjoined vowels, and are always coded after these.
+//     Anusvara -->    Nasalisation sign. Traditioinally placed in absence of vowels, but also after vowels. In some
+//                     special cases it can be placed before a vowel, so this is also permitted
+//     Candrabindu ->  Forms of the Anusvara with different glyphs (and different in identity) which can be placed
+//                     without vowel or after the vowel, but never before. Cannot combine with Anusvara.
+//     Stress marks -> Marks placed above or below a syllable, affecting the whole syllable. They are combining
+//                     marks, so they have to be attached to a specific stack. The are using to emphasise a syllable.
+//
+//     Digits ->       Digits are not considered as non-combining characters because there are a few characters which
+//                     combine with them, so they have to be considered independently.
+//     Digit combining marks -> dependent marks that combine with digits.
+//     
+//     TODO
+//     There are a number of characters in the CJK block that are used in Tibetan script, two of these are symbols
+//     are used as bases for combining glyphs, and have not been encoded in Tibetan. As these characters are outside
+//     of the tibetan block, they have not been treated in this program.
+     
+
+enum TibetanCharClassValues
 {
-  guint feature_index;
-  
-  /* 0xffff == default language system */
-  if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GSUB,
-				  feature_tag, script_index, 0xffff, &feature_index))
-    {
-      pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GSUB, feature_index,
-				    property_bit);
-    }
-}
+  			CC_RESERVED             =  0, //Non Combining Characters
+        CC_BASE                 =  1, // Base Consonants, Base Consonants with Subjoined attached in code point, Sanskrit base marks
+        CC_SUBJOINED            =  2, // Subjoined Consonats, combination of more than Subjoined Consonants in the code point
+        CC_TSA_PHRU             =  3, // Tsa-Phru character 0F39 
+        CC_A_CHUNG              =  4, // Vowel Lenthening a-chung mark 0F71
+        CC_COMP_SANSKRIT        =  5, // Precomposed Sanskrit vowels including Subjoined characters and vowels
+        CC_HALANTA              =  6, // Halanta Character 0F84
+        CC_BELOW_VOWEL          =  7, // Subjoined vowels
+        CC_ABOVE_VOWEL          =  8, // Superscript vowels
+        CC_ANUSVARA             =  9, // Tibetan sign Rjes Su Nga Ro 0F7E
+        CC_CANDRABINDU          = 10, // Tibetan sign Sna Ldan and Nyi Zla Naa Da 0F82, 0F83
+        CC_VISARGA              = 11, // Tibetan sign Rnam Bcad (0F7F)
+        CC_ABOVE_S_MARK         = 12, // Stress Marks placed above the text
+        CC_BELOW_S_MARK         = 13, // Stress Marks placed below the text
+        CC_DIGIT                = 14, // Dzongkha Digits
+        CC_PRE_DIGIT_MARK       = 15, // Mark placed before the digit
+        CC_POST_BELOW_DIGIT_M   = 16, // Mark placed below or after the digit
+        CC_COUNT                = 17  // This is the number of character classes
+};
 
-#ifdef DO_GPOS
-static void
-maybe_add_gpos_feature (PangoOTRuleset *ruleset,
-		        PangoOTInfo    *info,
-			guint           script_index,
-			PangoOTTag      feature_tag,
-			gulong          property_bit)
+
+enum TibetanCharClassFlags
 {
-  guint feature_index;
+        CF_CLASS_MASK    = 0x0000FFFF,
+
+        CF_DOTTED_CIRCLE = 0x04000000,  // add a dotted circle if a character with this flag is the first in a syllable
+        CF_DIGIT         = 0x01000000,  // flag to speed up comparaisson
+        CF_PREDIGIT      = 0x02000000,  // flag to detect pre-digit marks for reordering
+
+        // position flags
+        CF_POS_BEFORE    = 0x00080000,
+        CF_POS_BELOW     = 0x00040000,
+        CF_POS_ABOVE     = 0x00020000,
+        CF_POS_AFTER     = 0x00010000,
+        CF_POS_MASK      = 0x000f0000
+};
 
-  if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GPOS,
-				  feature_tag, script_index, 0xffff, &feature_index))
-    {
-      pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GPOS, feature_index,
-				    property_bit);
-    }
-}
-#endif
 
-static PangoOTRuleset *
-get_gsub_ruleset (FT_Face face)
+/* Characters that get refrered to by name */
+enum TibetanChar
 {
-  PangoOTInfo    *info = pango_ot_info_get (face);
-  GQuark          ruleset_quark = g_quark_from_string ("tibetan-gsub-ruleset");
-  PangoOTRuleset *ruleset;
+  C_DOTTED_CIRCLE = 0x25CC,
+  C_PRE_NUMBER_MARK = 0x0F3F
+};
 
-  if (!info)
-    return NULL;
 
-  ruleset = g_object_get_qdata (G_OBJECT (info), ruleset_quark);
+enum
+{
+    // simple classes, they are used in the statetable (in this file) to control the length of a syllable
+    // they are also used to know where a character should be placed (location in reference to the base character)
+    // and also to know if a character, when independtly displayed, should be displayed with a dotted-circle to
+    // indicate error in syllable construction 
+    _xx = CC_RESERVED,                                    
+    _ba = CC_BASE,                                        
+    _sj = CC_SUBJOINED | CF_DOTTED_CIRCLE | CF_POS_BELOW, 
+    _tp = CC_TSA_PHRU  | CF_DOTTED_CIRCLE | CF_POS_ABOVE, 
+    _ac = CC_A_CHUNG |  CF_DOTTED_CIRCLE | CF_POS_BELOW,  
+    _cs = CC_COMP_SANSKRIT | CF_DOTTED_CIRCLE | CF_POS_BELOW, 
+    _ha = CC_HALANTA | CF_DOTTED_CIRCLE | CF_POS_BELOW,       
+    _bv = CC_BELOW_VOWEL | CF_DOTTED_CIRCLE | CF_POS_BELOW,  
+    _av = CC_ABOVE_VOWEL | CF_DOTTED_CIRCLE | CF_POS_ABOVE,   
+    _an = CC_ANUSVARA | CF_DOTTED_CIRCLE | CF_POS_ABOVE,      
+    _cb = CC_CANDRABINDU | CF_DOTTED_CIRCLE | CF_POS_ABOVE,   
+    _vs = CC_VISARGA | CF_DOTTED_CIRCLE| CF_POS_AFTER,        
+    _as = CC_ABOVE_S_MARK | CF_DOTTED_CIRCLE | CF_POS_ABOVE,  
+    _bs = CC_BELOW_S_MARK | CF_DOTTED_CIRCLE | CF_POS_BELOW,  
+    _di = CC_DIGIT | CF_DIGIT,                                
+    _pd = CC_PRE_DIGIT_MARK | CF_DOTTED_CIRCLE | CF_PREDIGIT | CF_POS_BEFORE , 
+    _bd = CC_POST_BELOW_DIGIT_M | CF_DOTTED_CIRCLE | CF_POS_AFTER  
+};
 
-  if (!ruleset)
-    {
-      PangoOTTag tibt_tag = FT_MAKE_TAG ('t', 'i', 'b', 't');
-      guint      script_index;
 
-      ruleset = pango_ot_ruleset_new (info);
+/* Character class: a character class value
+ * ORed with character class flags.
+ */
+typedef glong TibetanCharClass;
+//_xx Non Combining characters
+//_ba Base Consonants
+//_sj Subjoined consonants
+//_tp Tsa - phru
+//_ac A-chung, Vowel Lengthening mark
+//_cs Precomposed Sanskrit vowel + subjoined consonants
+//_ha Halanta/Virama
+//_bv Below vowel
+//_av above vowel
+//_an Anusvara
+//_cb Candrabindu
+//_vs Visaraga/Post mark
+//_as Upper Stress marks
+//_bs Lower Stress marks
+//_di Digit
+//_pd Number pre combining, Needs reordering
+//_bd Other number combining marks
+
+
+static const TibetanCharClass tibetanCharClasses[] =
+{
+  // 0    1    2    3    4    5    6    7    8    9   a     b   c    d     e   f
+    _xx, _ba, _xx, _xx, _ba, _ba, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0F00 - 0F0F 0
+    _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _bd, _bd, _xx, _xx, _xx, _xx, _xx, _xx, // 0F10 - 0F1F 1
+    _di, _di, _di, _di, _di, _di, _di, _di, _di, _di, _xx, _xx, _xx, _xx, _xx, _xx, // 0F20 - 0F2F 2 
+    _xx, _xx, _xx, _xx, _xx, _bs, _xx, _bs, _xx, _tp, _xx, _xx, _xx, _xx, _bd, _pd, // 0F30 - 0F3F 3 
+    _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F40 - 0F4F 4
+    _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F50 - 0F5F 5
+    _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, _xx, // 0F60 - 0F6F 6
+    _xx, _ac, _av, _cs, _bv, _bv, _cs, _cs, _cs, _cs, _av, _av, _av, _av, _an, _vs, // 0F70 - 0F7F 7
+    _av, _cs, _cb, _cb, _ha, _xx, _as, _as, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, // 0F80 - 0F8F 8
+    _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0F90 - 0F9F 9
+    _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0FA0 - 0FAF a
+    _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, // 0FB0 - 0FBF b
+    _xx, _xx, _xx, _xx, _xx, _xx, _bs, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FC0 - 0FCF c
+    _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx,// 0FD0 - 0FDF  d
+    _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FE0 - 0FEF e
+    _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FF0 - 0FFF f 
+};
 
-     if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GSUB,
-				     tibt_tag, &script_index))
-	{
-	  maybe_add_gsub_feature (ruleset, info, script_index, FT_MAKE_TAG ('c','c','m','p'), 0xFFFF);
-	  maybe_add_gsub_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','s'), 0xFFFF);
-	  maybe_add_gsub_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','s'), 0xFFFF);
-	}
+/* this define must reflect the range of tibetanCharClasses */
+//First Tibetan Character
+#define firstChar 0x0F00
+//Last Tibetan Character
+#define lastChar 0x0FFF
+
+// The stateTable is used to calculate the end (the length) of a well
+// formed Tibetan Stack
+//
+// Each horizontal line is ordered exactly the same way as the values in TibetanClassTable
+// CharClassValues.This coincidence of values allows the follow up of the table.
+//
+// Each line corresponds to a state, which does not necessarily need to be a type
+// of component... for example, state 2 is a base, with is always a first character
+// in the Stack but the state could be produced a consonant of any type when
+// it is the first character that is analysed (in ground state).
+
+static const gint8 tibetanStateTable[][CC_COUNT] =
+{
+    //Dzongkha state table
+    //xx  ba  sj  tp  ac  cs  ha  bv  av  an  cb  vs  as  bs  di  pd  bd
+    { 1,  2,  4,  3,  8,  7,  9, 10, 14, 13, 17, 18, 19, 19, 20, 21, 21,}, //  0 - ground state
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, //  1 - exit state (or sign to the right of the syllable)
+    {-1, -1,  4,  3,  8,  7,  9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  2 - Base consonant    
+    {-1, -1,  5, -1,  8,  7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  3 - Tsa phru after base
+    {-1, -1,  4,  6,  8,  7,  9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  4 - Subjoined consonant after base             
+    {-1, -1,  5, -1,  8,  7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  5 - Subjoined consonant after tsa phru
+    {-1, -1, -1, -1,  8,  7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  6 - Tsa phru after subjoined consonant
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, //  7 - Pre Composed Sanskrit
+    {-1, -1, -1, -1, -1, -1, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  8 - A-chung
+    {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, -1, 19, 19, -1, -1, -1,}, //  9 - Halanta
+    {-1, -1, -1, -1, -1, -1, -1, 11, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 10 - below vowel 1
+    {-1, -1, -1, -1, -1, -1, -1, 12, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 11 - below vowel 2
+    {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 12 - below vowel 3   
+    {-1, -1, -1, -1, -1, -1, -1, -1, 14, 17, 17, 18, 19, 19, -1, -1, -1,}, // 13 - Anusvara before vowel
+    {-1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 17, 18, 19, 19, -1, -1, -1,}, // 14 - above vowel 1
+    {-1, -1, -1, -1, -1, -1, -1, -1, 16, 17, 17, 18, 19, 19, -1, -1, -1,}, // 15 - above vowel 2
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 17, 18, 19, 19, -1, -1, -1,}, // 16 - above vowel 3
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 19, 19, -1, -1, -1,}, // 17 - Anusvara or Candrabindu after vowel 
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, // 18 - Visarga    
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 19 - strss mark
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 21, 21,}, // 20 - digit 
+    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 21 - digit mark
+};
 
-      g_object_set_qdata_full (G_OBJECT (info), ruleset_quark, ruleset,
-			       (GDestroyNotify)g_object_unref);
+
+enum property_flags
+{
+  abvf = 0x0001,
+  pref = 0x0002,
+  pstf = 0x0004,
+  blwf = 0x0008,
+
+  pres = 0x0010,
+  blws = 0x0020,
+  abvs = 0x0040,
+  psts = 0x0080,
+  clig = 0x0100,
+
+  dist = 0x0200,
+  blwm = 0x0400,
+  abvm = 0x0800,
+  mkmk = 0x1000
+};
+
+
+enum properties
+{
+  blwf_p    = /*(blwf | blws | clig | dist | blwm | mkmk)*/ (abvf | pref | pstf | pres | abvs | psts | abvm),
+  pstf_p    = /*(blwf | blws | pref | pres | pstf | psts | clig | dist | blwm)*/ (abvf | abvs | abvm | mkmk),
+  abvf_p    = /*(abvf | abvs | clig | dist | abvm | mkmk)*/ (pref | pstf | blwf | pres | blws | psts | blwm),
+  pref_p    = /*(pref | pres | clig | dist)*/ (abvf | pstf | blwf | blws | abvs | psts | blwm | abvm | mkmk),
+  default_p = /*(pres | blws | clig | dist | abvm | blwm | mkmk)*/ (pref | blwf |abvf | pstf | abvs | psts)
+};
+
+
+/* Below we define how a character in the input string is either in the tibetanCharClasses table
+ * (in which case we get its type back), or an unknown object in which case we get _xx (CC_RESERVED) back
+ */
+static TibetanCharClass
+get_char_class (gunichar ch)
+{
+
+  if (ch < firstChar || ch > lastChar)
+    return CC_RESERVED;
+
+  return tibetanCharClasses[ch - firstChar];
+}
+
+
+/* Given an input string of characters and a location in which to start looking
+ * calculate, using the state table, which one is the last character of the syllable
+ * that starts in the starting position.
+ */
+static glong
+find_syllable (const gunichar *chars,
+	       glong           start,
+	       glong           char_count)
+{
+  glong cursor = start;
+  gint8 state = 0;
+  TibetanCharClass charClass;
+
+  while (cursor < char_count)
+    {
+      charClass = get_char_class (chars[cursor]) & CF_CLASS_MASK;
+      state = tibetanStateTable[state][charClass];
+
+      if (state < 0)
+	break;
+
+      cursor += 1;
     }
 
-  return ruleset;
+  return cursor;
+}
+
+
+static void
+maybe_add_GSUB_feature (PangoOTRuleset *ruleset,
+                        PangoOTInfo    *info,
+                        guint           script_index,
+                        PangoOTTag      tag,
+                        gulong          property_bit)
+{
+  guint feature_index;
+
+  /* 0xffff == default language system */
+  if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GSUB,
+                                  tag, script_index, 0xffff, &feature_index))
+    pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GSUB, feature_index,
+                                  property_bit);
 }
 
+
+static void
+maybe_add_GPOS_feature (PangoOTRuleset *ruleset,
+                        PangoOTInfo    *info,
+                        guint           script_index,
+                        PangoOTTag      tag,
+                        gulong          property_bit)
+{
+  guint feature_index;
+
+  /* 0xffff == default language system */
+  if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GPOS,
+                                  tag, script_index, 0xffff, &feature_index))
+    pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GPOS, feature_index,
+                                  property_bit);
+}
+
+//Rules found in the Open type font features
 static PangoOTRuleset *
-get_gpos_ruleset (FT_Face face)
+get_ruleset (FT_Face face)
 {
-#ifdef DO_GPOS
-  PangoOTInfo    *info = pango_ot_info_get (face);
-  GQuark          ruleset_quark = g_quark_from_string ("tibetan-gpos-ruleset");
   PangoOTRuleset *ruleset;
+  static GQuark ruleset_quark = 0;
+
+  PangoOTInfo *info = pango_ot_info_get (face);
+
+  if (!ruleset_quark)
+    ruleset_quark = g_quark_from_string ("pango-tibetan-ruleset");
 
   if (!info)
     return NULL;
@@ -141,142 +434,155 @@ get_gpos_ruleset (FT_Face face)
   if (!ruleset)
     {
       PangoOTTag tibetan_tag = FT_MAKE_TAG ('t', 'i', 'b', 't');
-      guint      script_index;
+      guint script_index;
 
       ruleset = pango_ot_ruleset_new (info);
 
+      if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GSUB,
+                                      tibetan_tag, &script_index))
+        {
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','f'), pref);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','f'), blwf);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','f'), abvf);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','f'), pstf);
+
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','s'), pres);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','s'), blws);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','s'), abvs);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','s'), psts);
+          maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('c','l','i','g'), clig);
+        }
+
       if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GPOS,
-				     tibetan_tag, &script_index))
-	{
-	  maybe_add_gpos_feature (ruleset, info, script_index, FT_MAKE_TAG ('k','e','r','n'), 0xFFFF);
-	  maybe_add_gpos_feature (ruleset, info, script_index, FT_MAKE_TAG ('m','a','r','k'), 0xFFFF);
-	  maybe_add_gpos_feature (ruleset, info, script_index, FT_MAKE_TAG ('m','k','m','k'), 0xFFFF);
-	}
+                                      tibetan_tag, &script_index))
+        {
+          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('d','i','s','t'), dist);
+          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','m'), blwm);
+          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','m'), abvm);
+          maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('m','k','m','k'), mkmk);
+        }
 
       g_object_set_qdata_full (G_OBJECT (info), ruleset_quark, ruleset,
-			       (GDestroyNotify)g_object_unref);
+                                (GDestroyNotify)g_object_unref);
     }
 
   return ruleset;
-#else
-  return NULL;
-#endif
 }
 
-static void
-set_glyph (PangoFont        *font,
-           PangoGlyphString *glyphs,
-           int               i,
-           int               offset,
-           PangoGlyph        glyph)
-{
-  PangoRectangle logical_rect;
-
-  glyphs->glyphs[i].glyph = glyph;
-  
-  glyphs->glyphs[i].geometry.x_offset = 0;
-  glyphs->glyphs[i].geometry.y_offset = 0;
-
-  glyphs->log_clusters[i] = offset;
 
-  pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph, NULL, 
-				&logical_rect);
-  glyphs->glyphs[i].geometry.width = logical_rect.width;
+static PangoGlyph
+get_index (PangoFcFont *fc_font, gunichar wc)
+{
+  PangoGlyph index = pango_fc_font_get_glyph (fc_font, wc);
+  if (!index)
+    index = pango_fc_font_get_unknown_glyph (fc_font, wc);
+  return index;
 }
 
-static void 
-fallback_shape (PangoFont        *font,
-		const char       *text,
-		gint              length,
-		PangoGlyphString *glyphs)
+
+static void
+tibetan_engine_shape (PangoEngineShape *engine,
+                    PangoFont        *font,
+                    const char       *text,
+                    int               length,
+                    PangoAnalysis    *analysis,
+                    PangoGlyphString *glyphs)
 {
   PangoFcFont *fc_font = PANGO_FC_FONT (font);
+  FT_Face face;
+  PangoOTBuffer *buffer;
+  PangoOTRuleset *ruleset;
+  glong n_chars, i;
+  gunichar *wcs;
   const char *p;
-  long n_chars, i;
+  glong syllable;
+  TibetanCharClass charClass;
+  glong cursor = 0;
 
-  n_chars = g_utf8_strlen (text, length);
-  pango_glyph_string_set_size (glyphs, n_chars);
-  
-  for (i = 0, p = text; i < n_chars; i++, p = g_utf8_next_char (p))
-    {
-      gunichar wc;
-      PangoGlyph index;
+  buffer = pango_ot_buffer_new (fc_font);
 
-      wc = g_utf8_get_char (p);
+  face = pango_fc_font_lock_face (fc_font);
+  g_assert (face);
 
-      index = pango_fc_font_get_glyph (fc_font, wc);
-      if (!index)
-	index = pango_fc_font_get_unknown_glyph (fc_font, wc);
-      
-      set_glyph (font, glyphs, i, p - text, index);
-    }
-}
+  wcs = g_utf8_to_ucs4_fast (text, length, &n_chars);
+  p = text;
 
-static void
-ot_shape (PangoFont        *font,
-	  PangoOTRuleset   *gsub_ruleset,
-	  PangoOTRuleset   *gpos_ruleset,
-	  const char       *text,
-	  gint              length,
-	  PangoGlyphString *glyphs)
-{
-  PangoFcFont *fc_font = PANGO_FC_FONT(font);
-  PangoOTBuffer *buffer = pango_ot_buffer_new (fc_font);
-  const char *p;
-  
-  for (p = text; p - text < length; p = g_utf8_next_char (p))
+  /* This loop only exits when we reach the end of a run, which may contain
+   * several syllables.
+   */
+  while (cursor < n_chars)
+    {
+      syllable = find_syllable (wcs, cursor, n_chars);
+
+      /* shall we add a dotted circle?
+      * If in the position in which the base should be (first char in the string) there is
+      * a character that has the Dotted circle flag (a character that cannot be a base)
+      * then write a dotted circle
+      */
+      if (get_char_class (wcs[cursor]) & CF_DOTTED_CIRCLE)
+        {
+          pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_DOTTED_CIRCLE), default_p, p - text);
+        }
+
+      /* If it encounters a digit followed by number pre combining mark, then reorder the two characters
+      * coeng Ro if they are present 
+      */
+      for (i = cursor; i < syllable; i += 1)
+        {
+          charClass = get_char_class (wcs[i]);
+
+          if ((charClass & CF_DIGIT ) 
+              && ( get_char_class (wcs[i+1]) & CF_PREDIGIT))
+           {
+         		 pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_PRE_NUMBER_MARK), pref_p, p - text);
+         		 p = g_utf8_next_char (p);
+         		 pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pref_p, p - text);
+ 			       i += 1;          
+         } else {
+          switch (charClass & CF_POS_MASK)
+            {
+              case CF_POS_ABOVE :
+		             pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), abvf_p, p - text);
+		             break;
+
+              case CF_POS_AFTER :
+		             pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
+		             break;
+		
+              case CF_POS_BELOW :
+		             pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+		             break;
+
+              default:
+                   /* default - any other characters  */
+                  pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), default_p, p - text);
+                  break;
+            } /* switch */
+           } 
+            
+          p = g_utf8_next_char (p);
+        } /* for */
+
+      cursor = syllable; /* move the pointer to the start of next syllable */
+    } /* while */
+
+  /* do gsub processing */
+  ruleset = get_ruleset (face);
+  if (ruleset != NULL)
     {
-      gunichar wc;
-      PangoGlyph index;
-      
-      wc = g_utf8_get_char (p);
-      
-      index = pango_fc_font_get_glyph (fc_font, wc);	  
-      if (!index)
-	index = pango_fc_font_get_unknown_glyph (fc_font, wc);
-      
-      pango_ot_buffer_add_glyph (buffer, index, 0, p - text);
+      pango_ot_ruleset_substitute (ruleset, buffer);
+      pango_ot_ruleset_position (ruleset, buffer);
     }
-  
-  if (gsub_ruleset != NULL)
-    pango_ot_ruleset_substitute (gsub_ruleset, buffer);
-  
-  if (gpos_ruleset != NULL)
-    pango_ot_ruleset_position (gpos_ruleset, buffer);
-  
-  pango_ot_buffer_output (buffer, glyphs);
-  pango_ot_buffer_destroy (buffer);
-}
-
-static void
-tibetan_engine_shape (PangoEngineShape *engine,
-		      PangoFont        *font,
-		      const char       *text,
-		      int               length,
-		      PangoAnalysis    *analysis,
-		      PangoGlyphString *glyphs)
-{
-  PangoFcFont *fc_font = PANGO_FC_FONT(font);
-  PangoOTRuleset *gsub_ruleset;
-  PangoOTRuleset *gpos_ruleset;
-  FT_Face face;
 
-  g_return_if_fail (length >= 0);
+  pango_ot_buffer_output (buffer, glyphs);
 
-  face = pango_fc_font_lock_face (fc_font);
-  g_assert (face != NULL);
-  
-  gsub_ruleset = get_gsub_ruleset (face);
-  gpos_ruleset = get_gpos_ruleset (face);
-  
-  if (gsub_ruleset != NULL)
-    ot_shape (font, gsub_ruleset, gpos_ruleset, text, length, glyphs);
-  else
-    fallback_shape (font, text, length, glyphs);
+  g_free (wcs);
+  pango_ot_buffer_destroy (buffer);
 
   pango_fc_font_unlock_face (fc_font);
 }
 
+
 static void
 tibetan_engine_fc_class_init (PangoEngineShapeClass *class)
 {
@@ -284,7 +590,8 @@ tibetan_engine_fc_class_init (PangoEngineShapeClass *class)
 }
 
 PANGO_ENGINE_SHAPE_DEFINE_TYPE (TibetanEngineFc, tibetan_engine_fc,
-				tibetan_engine_fc_class_init, NULL);
+                                tibetan_engine_fc_class_init, NULL);
+
 
 void
 PANGO_MODULE_ENTRY(init) (GTypeModule *module)
@@ -292,19 +599,22 @@ PANGO_MODULE_ENTRY(init) (GTypeModule *module)
   tibetan_engine_fc_register_type (module);
 }
 
+
 void
 PANGO_MODULE_ENTRY(exit) (void)
 {
 }
 
+
 void
 PANGO_MODULE_ENTRY(list) (PangoEngineInfo **engines,
-			  int		   *n_engines)
+                          int              *n_engines)
 {
   *engines = script_engines;
   *n_engines = G_N_ELEMENTS (script_engines);
 }
 
+
 PangoEngine *
 PANGO_MODULE_ENTRY(create) (const char *id)
 {
author	Behdad Esfahbod <behdad@gnome.org>	2005-11-15 09:02:41 +0000
committer	Behdad Esfahbod <behdad@src.gnome.org>	2005-11-15 09:02:41 +0000
commit	5356deb630664007a0d40e4a5703129c086a74d8 (patch)
tree	3fa22311f3af2ad1e71337a83f45da41346b7936 /modules/tibetan
parent	d1b386437af58dfee74fe376adbb0db5c5da55fd (diff)
download	pango-5356deb630664007a0d40e4a5703129c086a74d8.tar.gz