summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBehdad Esfahbod <behdad@gnome.org>2005-11-15 09:03:48 +0000
committerBehdad Esfahbod <behdad@src.gnome.org>2005-11-15 09:03:48 +0000
commit2106b9e48714e656a1a2beafc58fed32f25780ff (patch)
treecd83df1c412ec5b652344c895a6be4a189a930c1
parentf37e6c982304445ccfbe573a323ae5df7a066f99 (diff)
downloadpango-2106b9e48714e656a1a2beafc58fed32f25780ff.tar.gz
Updated Tibetan shaper from Pema Geyleg. (#313513)
2005-11-14 Behdad Esfahbod <behdad@gnome.org> Updated Tibetan shaper from Pema Geyleg. (#313513) * examples/tibetan.utf: Added. * examples/Makefile.am (EXTRA_DIST): tibetan.utf added. * modules/tibetan/tibetan-fc.c: Updated Tibetan shaper that is rewritten from scratch. Supports the number pre-combining mark, illegal sequence detection, etc.
-rw-r--r--ChangeLog12
-rw-r--r--examples/Makefile.am1
-rw-r--r--examples/tibetan.utf12
-rw-r--r--modules/tibetan/tibetan-fc.c672
4 files changed, 516 insertions, 181 deletions
diff --git a/ChangeLog b/ChangeLog
index 4a1dd9a6..a74c46a8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,17 @@
2005-11-14 Behdad Esfahbod <behdad@gnome.org>
+ Updated Tibetan shaper from Pema Geyleg. (#313513)
+
+ * examples/tibetan.utf: Added.
+
+ * examples/Makefile.am (EXTRA_DIST): tibetan.utf added.
+
+ * modules/tibetan/tibetan-fc.c: Updated Tibetan shaper that is
+ rewritten from scratch. Supports the number pre-combining mark,
+ illegal sequence detection, etc.
+
+2005-11-14 Behdad Esfahbod <behdad@gnome.org>
+
* examples/GLASS.utf8: Added. The infamous "I Can Eat Glass" in
vairous languages. Adopted from
http://www.columbia.edu/kermit/utf8.html#glass
diff --git a/examples/Makefile.am b/examples/Makefile.am
index e79ff0fa..a830c1ec 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -112,6 +112,7 @@ EXTRA_DIST= \
dev-example.utf \
muru.utf \
syriac.utf \
+ tibetan.utf \
pangorc \
pangox.aliases
diff --git a/examples/tibetan.utf b/examples/tibetan.utf
new file mode 100644
index 00000000..83d44aab
--- /dev/null
+++ b/examples/tibetan.utf
@@ -0,0 +1,12 @@
+Regular letters འབྲུག་རྒྱལ་ཁབ་འདི་དགའ་ཏོག་ཏོ་ཡོད།
+Reordering of Digits ༡༿༢༿༣༿ Illegal ཀ༿ སྐ༿༾
+Digits with other combining marks ༡༾༢༘༤༥༦ Illegal ཀ༘ཁ༙སྐ༘༙
+Tsa phru after base ཀ༹ Illegal ཀྱུ༹ ཀི༹
+Tsa phru after subjoined སྐ༹ རྒྱ༹
+A-Chung ཀཱ ཀྲཱ གླཱ གྱཱ༹ Illegal ཀཱུ ཀཱི
+Halanta ཀ྄ ཀ྄ཱ སྐ྄ སྐྱ྄
+Vowels ཀྱུ སྐྱིུ ཀྀ ཀེ ཀོ ལྐཻ ཀཽ
+Anusvara ཀུཾ ཀིཾ ཀཾི ཀིཾ ཀཾིཾ
+Visaraga ཀཿ Illegal ཀ༵ཿ
+Lower Stress Mark ཀ༷ ཀཱ༵
+Candrabindu ཀྃ ཀིྃ Illegal ཀིྃ
diff --git a/modules/tibetan/tibetan-fc.c b/modules/tibetan/tibetan-fc.c
index 642f9f87..525736e8 100644
--- a/modules/tibetan/tibetan-fc.c
+++ b/modules/tibetan/tibetan-fc.c
@@ -1,13 +1,24 @@
/* Pango
* tibetan-fc.c: Shaper for Tibetan script
- * based on thai-fc.c and basic-fc.c
*
+ * Copyright (C) 2005 DIT, Government of Bhutan <http://www.dit.gov.bt>
+ * Contact person : Pema Geyleg <pema_geyleg@druknet.bt>
+ *
+ * Based on code from khmer shapers developed by Jens Herden
+ * <jens@tibetanos.inf > and Javier Sola <javier@tibetanos.info>
+ *
+ * Based on code from other shapers
* Copyright (C) 1999-2004 Red Hat Software
* Author: Owen Taylor <otaylor@redhat.com>
+
+ * Partially based on Indic shaper
+ * Copyright (C) 2001, 2002 IBM Corporation
+ * Author: Eric Mader <mader@jtcsv.com>
*
- * Copyright (C) 2004 Theppitak Karoonboonyanan <thep@linux.thai.net>
- *
- * Copyright (C) 2004 G Karunakar <karunakar@freedomink.org>
+ * The first module for Tibetan shaper was developed by Mr. Karunakar under
+ * PanLocalization project.
+ * Mr. Chris Fynn, Mr.Javier Sola, Mr. Namgay Thinley were involved
+ * while developing this shaper.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -23,115 +34,397 @@
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
+ *
+ * The license on the original Indic shaper code is as follows:
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, and/or sell copies of the
+ * Software, and to permit persons to whom the Software is furnished
+ * to do so, provided that the above copyright notice(s) and this
+ * permission notice appear in all copies of the Software and that
+ * both the above copyright notice(s) and this permission notice
+ * appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR
+ * ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder
+ * shall not be used in advertising or otherwise to promote the sale,
+ * use or other dealings in this Software without prior written
+ * authorization of the copyright holder.
*/
#include <string.h>
-#include <glib.h>
-#include <pango-engine.h>
+
+#include "pango-engine.h"
#include "pango-ot.h"
+#include "pango-utils.h"
#include "pangofc-font.h"
-typedef PangoEngineShape TibetanEngineFc;
-typedef PangoEngineShapeClass TibetanEngineFcClass;
#define SCRIPT_ENGINE_NAME "TibetanScriptEngineFc"
#define RENDER_TYPE PANGO_RENDER_TYPE_FC
-static PangoEngineScriptInfo tibetan_scripts[] = {
+
+typedef PangoEngineShape TibetanEngineFc;
+typedef PangoEngineShapeClass TibetanEngineFcClass ;
+
+
+static PangoEngineScriptInfo tibetan_scripts[] =
+{
{ PANGO_SCRIPT_TIBETAN, "*" }
};
-static PangoEngineInfo script_engines[] = {
+static PangoEngineInfo script_engines[] =
+{
{
SCRIPT_ENGINE_NAME,
PANGO_ENGINE_TYPE_SHAPE,
RENDER_TYPE,
- tibetan_scripts, G_N_ELEMENTS(tibetan_scripts)
+ tibetan_scripts, G_N_ELEMENTS (tibetan_scripts)
}
};
-/* GPOS tables are not present in Joyig font */
-#undef DO_GPOS
-static void
-maybe_add_gsub_feature (PangoOTRuleset *ruleset,
- PangoOTInfo *info,
- guint script_index,
- PangoOTTag feature_tag,
- gulong property_bit)
+
+// Vocabulary
+// Base -> A consonant in its full (not subscript) form. It is the
+// center of the syllable, it can be souranded by subjoined consonants, vowels,
+// signs... but there is only one base in a stack, it has to be coded as
+// the first character of the syllable.Included here are also groups of base + subjoined
+// which are represented by one single code point in unicode (e.g. 0F43) Also other characters that might take
+// subjoined consonants or other combining characters.
+// Subjoined -> Subjoined consonants and groups of subjoined consonants which have a single code-point
+// to repersent the group (even if each subjoined consonant is represented independently
+// by anothe code-point
+// Tsa Phru --> Tsa Phru character, Bhutanese people will always place it right after the base, but sometimes, due to
+// "normalization"
+// is placed after all the subjoined consonants, and it is also permitted there.
+// A Chung Vowel lengthening mark --> . 0F71 It is placed after the base and any subjoined consonants but before any vowels
+// Precomposed Sanskrit vowels --> The are combinations of subjoined consonants + vowels that have been assigned
+// a given code-point (in spite of each single part of them having also a code-point
+// They are avoided, and users are encouraged to use the combination of code-points that
+// represents the same sound instead of using this combined characters. This is included here
+// for compatibility with possible texts that use them (they are not in the Dzongkha keyboard).
+// Halanta -> The Halanta or Virama character 0F84 indicates that a consonant should not use its inheernt vowel,
+// in spite of not having other vowels present. It is usually placed immediatly after a base consonant,
+// but in some special cases it can also be placed after a subjoined consonant, so this is also
+// permitted in this algorithm. (Halanta is always displayed in Tibetan not used as a connecting char)
+//
+// Subjoined vowels -> Dependent vowels (matras) placed below the base and below all subjoined consonants. There
+// might be as much as three subjoined vowels in a given stack (only one in general text, but up
+// to three for abreviations, they have to be permitted).
+// Superscript vowels -> There are three superscript vowels, and they can be repeated or combined (up to three
+// times. They can combine with subjoined vowels, and are always coded after these.
+// Anusvara --> Nasalisation sign. Traditioinally placed in absence of vowels, but also after vowels. In some
+// special cases it can be placed before a vowel, so this is also permitted
+// Candrabindu -> Forms of the Anusvara with different glyphs (and different in identity) which can be placed
+// without vowel or after the vowel, but never before. Cannot combine with Anusvara.
+// Stress marks -> Marks placed above or below a syllable, affecting the whole syllable. They are combining
+// marks, so they have to be attached to a specific stack. The are using to emphasise a syllable.
+//
+// Digits -> Digits are not considered as non-combining characters because there are a few characters which
+// combine with them, so they have to be considered independently.
+// Digit combining marks -> dependent marks that combine with digits.
+//
+// TODO
+// There are a number of characters in the CJK block that are used in Tibetan script, two of these are symbols
+// are used as bases for combining glyphs, and have not been encoded in Tibetan. As these characters are outside
+// of the tibetan block, they have not been treated in this program.
+
+
+enum TibetanCharClassValues
{
- guint feature_index;
-
- /* 0xffff == default language system */
- if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GSUB,
- feature_tag, script_index, 0xffff, &feature_index))
- {
- pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GSUB, feature_index,
- property_bit);
- }
-}
+ CC_RESERVED = 0, //Non Combining Characters
+ CC_BASE = 1, // Base Consonants, Base Consonants with Subjoined attached in code point, Sanskrit base marks
+ CC_SUBJOINED = 2, // Subjoined Consonats, combination of more than Subjoined Consonants in the code point
+ CC_TSA_PHRU = 3, // Tsa-Phru character 0F39
+ CC_A_CHUNG = 4, // Vowel Lenthening a-chung mark 0F71
+ CC_COMP_SANSKRIT = 5, // Precomposed Sanskrit vowels including Subjoined characters and vowels
+ CC_HALANTA = 6, // Halanta Character 0F84
+ CC_BELOW_VOWEL = 7, // Subjoined vowels
+ CC_ABOVE_VOWEL = 8, // Superscript vowels
+ CC_ANUSVARA = 9, // Tibetan sign Rjes Su Nga Ro 0F7E
+ CC_CANDRABINDU = 10, // Tibetan sign Sna Ldan and Nyi Zla Naa Da 0F82, 0F83
+ CC_VISARGA = 11, // Tibetan sign Rnam Bcad (0F7F)
+ CC_ABOVE_S_MARK = 12, // Stress Marks placed above the text
+ CC_BELOW_S_MARK = 13, // Stress Marks placed below the text
+ CC_DIGIT = 14, // Dzongkha Digits
+ CC_PRE_DIGIT_MARK = 15, // Mark placed before the digit
+ CC_POST_BELOW_DIGIT_M = 16, // Mark placed below or after the digit
+ CC_COUNT = 17 // This is the number of character classes
+};
-#ifdef DO_GPOS
-static void
-maybe_add_gpos_feature (PangoOTRuleset *ruleset,
- PangoOTInfo *info,
- guint script_index,
- PangoOTTag feature_tag,
- gulong property_bit)
+
+enum TibetanCharClassFlags
{
- guint feature_index;
+ CF_CLASS_MASK = 0x0000FFFF,
+
+ CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable
+ CF_DIGIT = 0x01000000, // flag to speed up comparaisson
+ CF_PREDIGIT = 0x02000000, // flag to detect pre-digit marks for reordering
+
+ // position flags
+ CF_POS_BEFORE = 0x00080000,
+ CF_POS_BELOW = 0x00040000,
+ CF_POS_ABOVE = 0x00020000,
+ CF_POS_AFTER = 0x00010000,
+ CF_POS_MASK = 0x000f0000
+};
- if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GPOS,
- feature_tag, script_index, 0xffff, &feature_index))
- {
- pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GPOS, feature_index,
- property_bit);
- }
-}
-#endif
-static PangoOTRuleset *
-get_gsub_ruleset (FT_Face face)
+/* Characters that get refrered to by name */
+enum TibetanChar
{
- PangoOTInfo *info = pango_ot_info_get (face);
- GQuark ruleset_quark = g_quark_from_string ("tibetan-gsub-ruleset");
- PangoOTRuleset *ruleset;
+ C_DOTTED_CIRCLE = 0x25CC,
+ C_PRE_NUMBER_MARK = 0x0F3F
+};
- if (!info)
- return NULL;
- ruleset = g_object_get_qdata (G_OBJECT (info), ruleset_quark);
+enum
+{
+ // simple classes, they are used in the statetable (in this file) to control the length of a syllable
+ // they are also used to know where a character should be placed (location in reference to the base character)
+ // and also to know if a character, when independtly displayed, should be displayed with a dotted-circle to
+ // indicate error in syllable construction
+ _xx = CC_RESERVED,
+ _ba = CC_BASE,
+ _sj = CC_SUBJOINED | CF_DOTTED_CIRCLE | CF_POS_BELOW,
+ _tp = CC_TSA_PHRU | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
+ _ac = CC_A_CHUNG | CF_DOTTED_CIRCLE | CF_POS_BELOW,
+ _cs = CC_COMP_SANSKRIT | CF_DOTTED_CIRCLE | CF_POS_BELOW,
+ _ha = CC_HALANTA | CF_DOTTED_CIRCLE | CF_POS_BELOW,
+ _bv = CC_BELOW_VOWEL | CF_DOTTED_CIRCLE | CF_POS_BELOW,
+ _av = CC_ABOVE_VOWEL | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
+ _an = CC_ANUSVARA | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
+ _cb = CC_CANDRABINDU | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
+ _vs = CC_VISARGA | CF_DOTTED_CIRCLE| CF_POS_AFTER,
+ _as = CC_ABOVE_S_MARK | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
+ _bs = CC_BELOW_S_MARK | CF_DOTTED_CIRCLE | CF_POS_BELOW,
+ _di = CC_DIGIT | CF_DIGIT,
+ _pd = CC_PRE_DIGIT_MARK | CF_DOTTED_CIRCLE | CF_PREDIGIT | CF_POS_BEFORE ,
+ _bd = CC_POST_BELOW_DIGIT_M | CF_DOTTED_CIRCLE | CF_POS_AFTER
+};
- if (!ruleset)
- {
- PangoOTTag tibt_tag = FT_MAKE_TAG ('t', 'i', 'b', 't');
- guint script_index;
- ruleset = pango_ot_ruleset_new (info);
+/* Character class: a character class value
+ * ORed with character class flags.
+ */
+typedef glong TibetanCharClass;
+//_xx Non Combining characters
+//_ba Base Consonants
+//_sj Subjoined consonants
+//_tp Tsa - phru
+//_ac A-chung, Vowel Lengthening mark
+//_cs Precomposed Sanskrit vowel + subjoined consonants
+//_ha Halanta/Virama
+//_bv Below vowel
+//_av above vowel
+//_an Anusvara
+//_cb Candrabindu
+//_vs Visaraga/Post mark
+//_as Upper Stress marks
+//_bs Lower Stress marks
+//_di Digit
+//_pd Number pre combining, Needs reordering
+//_bd Other number combining marks
+
+
+static const TibetanCharClass tibetanCharClasses[] =
+{
+ // 0 1 2 3 4 5 6 7 8 9 a b c d e f
+ _xx, _ba, _xx, _xx, _ba, _ba, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0F00 - 0F0F 0
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _bd, _bd, _xx, _xx, _xx, _xx, _xx, _xx, // 0F10 - 0F1F 1
+ _di, _di, _di, _di, _di, _di, _di, _di, _di, _di, _xx, _xx, _xx, _xx, _xx, _xx, // 0F20 - 0F2F 2
+ _xx, _xx, _xx, _xx, _xx, _bs, _xx, _bs, _xx, _tp, _xx, _xx, _xx, _xx, _bd, _pd, // 0F30 - 0F3F 3
+ _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F40 - 0F4F 4
+ _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F50 - 0F5F 5
+ _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, _xx, // 0F60 - 0F6F 6
+ _xx, _ac, _av, _cs, _bv, _bv, _cs, _cs, _cs, _cs, _av, _av, _av, _av, _an, _vs, // 0F70 - 0F7F 7
+ _av, _cs, _cb, _cb, _ha, _xx, _as, _as, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, // 0F80 - 0F8F 8
+ _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0F90 - 0F9F 9
+ _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0FA0 - 0FAF a
+ _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, // 0FB0 - 0FBF b
+ _xx, _xx, _xx, _xx, _xx, _xx, _bs, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FC0 - 0FCF c
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx,// 0FD0 - 0FDF d
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FE0 - 0FEF e
+ _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FF0 - 0FFF f
+};
- if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GSUB,
- tibt_tag, &script_index))
- {
- maybe_add_gsub_feature (ruleset, info, script_index, FT_MAKE_TAG ('c','c','m','p'), 0xFFFF);
- maybe_add_gsub_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','s'), 0xFFFF);
- maybe_add_gsub_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','s'), 0xFFFF);
- }
+/* this define must reflect the range of tibetanCharClasses */
+//First Tibetan Character
+#define firstChar 0x0F00
+//Last Tibetan Character
+#define lastChar 0x0FFF
+
+// The stateTable is used to calculate the end (the length) of a well
+// formed Tibetan Stack
+//
+// Each horizontal line is ordered exactly the same way as the values in TibetanClassTable
+// CharClassValues.This coincidence of values allows the follow up of the table.
+//
+// Each line corresponds to a state, which does not necessarily need to be a type
+// of component... for example, state 2 is a base, with is always a first character
+// in the Stack but the state could be produced a consonant of any type when
+// it is the first character that is analysed (in ground state).
+
+static const gint8 tibetanStateTable[][CC_COUNT] =
+{
+ //Dzongkha state table
+ //xx ba sj tp ac cs ha bv av an cb vs as bs di pd bd
+ { 1, 2, 4, 3, 8, 7, 9, 10, 14, 13, 17, 18, 19, 19, 20, 21, 21,}, // 0 - ground state
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 1 - exit state (or sign to the right of the syllable)
+ {-1, -1, 4, 3, 8, 7, 9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 2 - Base consonant
+ {-1, -1, 5, -1, 8, 7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 3 - Tsa phru after base
+ {-1, -1, 4, 6, 8, 7, 9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 4 - Subjoined consonant after base
+ {-1, -1, 5, -1, 8, 7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 5 - Subjoined consonant after tsa phru
+ {-1, -1, -1, -1, 8, 7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 6 - Tsa phru after subjoined consonant
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, // 7 - Pre Composed Sanskrit
+ {-1, -1, -1, -1, -1, -1, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 8 - A-chung
+ {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, -1, 19, 19, -1, -1, -1,}, // 9 - Halanta
+ {-1, -1, -1, -1, -1, -1, -1, 11, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 10 - below vowel 1
+ {-1, -1, -1, -1, -1, -1, -1, 12, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 11 - below vowel 2
+ {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 12 - below vowel 3
+ {-1, -1, -1, -1, -1, -1, -1, -1, 14, 17, 17, 18, 19, 19, -1, -1, -1,}, // 13 - Anusvara before vowel
+ {-1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 17, 18, 19, 19, -1, -1, -1,}, // 14 - above vowel 1
+ {-1, -1, -1, -1, -1, -1, -1, -1, 16, 17, 17, 18, 19, 19, -1, -1, -1,}, // 15 - above vowel 2
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 17, 18, 19, 19, -1, -1, -1,}, // 16 - above vowel 3
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 19, 19, -1, -1, -1,}, // 17 - Anusvara or Candrabindu after vowel
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, // 18 - Visarga
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 19 - strss mark
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 21, 21,}, // 20 - digit
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 21 - digit mark
+};
- g_object_set_qdata_full (G_OBJECT (info), ruleset_quark, ruleset,
- (GDestroyNotify)g_object_unref);
+
+enum property_flags
+{
+ abvf = 0x0001,
+ pref = 0x0002,
+ pstf = 0x0004,
+ blwf = 0x0008,
+
+ pres = 0x0010,
+ blws = 0x0020,
+ abvs = 0x0040,
+ psts = 0x0080,
+ clig = 0x0100,
+
+ dist = 0x0200,
+ blwm = 0x0400,
+ abvm = 0x0800,
+ mkmk = 0x1000
+};
+
+
+enum properties
+{
+ blwf_p = /*(blwf | blws | clig | dist | blwm | mkmk)*/ (abvf | pref | pstf | pres | abvs | psts | abvm),
+ pstf_p = /*(blwf | blws | pref | pres | pstf | psts | clig | dist | blwm)*/ (abvf | abvs | abvm | mkmk),
+ abvf_p = /*(abvf | abvs | clig | dist | abvm | mkmk)*/ (pref | pstf | blwf | pres | blws | psts | blwm),
+ pref_p = /*(pref | pres | clig | dist)*/ (abvf | pstf | blwf | blws | abvs | psts | blwm | abvm | mkmk),
+ default_p = /*(pres | blws | clig | dist | abvm | blwm | mkmk)*/ (pref | blwf |abvf | pstf | abvs | psts)
+};
+
+
+/* Below we define how a character in the input string is either in the tibetanCharClasses table
+ * (in which case we get its type back), or an unknown object in which case we get _xx (CC_RESERVED) back
+ */
+static TibetanCharClass
+get_char_class (gunichar ch)
+{
+
+ if (ch < firstChar || ch > lastChar)
+ return CC_RESERVED;
+
+ return tibetanCharClasses[ch - firstChar];
+}
+
+
+/* Given an input string of characters and a location in which to start looking
+ * calculate, using the state table, which one is the last character of the syllable
+ * that starts in the starting position.
+ */
+static glong
+find_syllable (const gunichar *chars,
+ glong start,
+ glong char_count)
+{
+ glong cursor = start;
+ gint8 state = 0;
+ TibetanCharClass charClass;
+
+ while (cursor < char_count)
+ {
+ charClass = get_char_class (chars[cursor]) & CF_CLASS_MASK;
+ state = tibetanStateTable[state][charClass];
+
+ if (state < 0)
+ break;
+
+ cursor += 1;
}
- return ruleset;
+ return cursor;
+}
+
+
+static void
+maybe_add_GSUB_feature (PangoOTRuleset *ruleset,
+ PangoOTInfo *info,
+ guint script_index,
+ PangoOTTag tag,
+ gulong property_bit)
+{
+ guint feature_index;
+
+ /* 0xffff == default language system */
+ if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GSUB,
+ tag, script_index, 0xffff, &feature_index))
+ pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GSUB, feature_index,
+ property_bit);
}
+
+static void
+maybe_add_GPOS_feature (PangoOTRuleset *ruleset,
+ PangoOTInfo *info,
+ guint script_index,
+ PangoOTTag tag,
+ gulong property_bit)
+{
+ guint feature_index;
+
+ /* 0xffff == default language system */
+ if (pango_ot_info_find_feature (info, PANGO_OT_TABLE_GPOS,
+ tag, script_index, 0xffff, &feature_index))
+ pango_ot_ruleset_add_feature (ruleset, PANGO_OT_TABLE_GPOS, feature_index,
+ property_bit);
+}
+
+//Rules found in the Open type font features
static PangoOTRuleset *
-get_gpos_ruleset (FT_Face face)
+get_ruleset (FT_Face face)
{
-#ifdef DO_GPOS
- PangoOTInfo *info = pango_ot_info_get (face);
- GQuark ruleset_quark = g_quark_from_string ("tibetan-gpos-ruleset");
PangoOTRuleset *ruleset;
+ static GQuark ruleset_quark = 0;
+
+ PangoOTInfo *info = pango_ot_info_get (face);
+
+ if (!ruleset_quark)
+ ruleset_quark = g_quark_from_string ("pango-tibetan-ruleset");
if (!info)
return NULL;
@@ -141,142 +434,155 @@ get_gpos_ruleset (FT_Face face)
if (!ruleset)
{
PangoOTTag tibetan_tag = FT_MAKE_TAG ('t', 'i', 'b', 't');
- guint script_index;
+ guint script_index;
ruleset = pango_ot_ruleset_new (info);
+ if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GSUB,
+ tibetan_tag, &script_index))
+ {
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','f'), pref);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','f'), blwf);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','f'), abvf);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','f'), pstf);
+
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','r','e','s'), pres);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','s'), blws);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','s'), abvs);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('p','s','t','s'), psts);
+ maybe_add_GSUB_feature (ruleset, info, script_index, FT_MAKE_TAG ('c','l','i','g'), clig);
+ }
+
if (pango_ot_info_find_script (info, PANGO_OT_TABLE_GPOS,
- tibetan_tag, &script_index))
- {
- maybe_add_gpos_feature (ruleset, info, script_index, FT_MAKE_TAG ('k','e','r','n'), 0xFFFF);
- maybe_add_gpos_feature (ruleset, info, script_index, FT_MAKE_TAG ('m','a','r','k'), 0xFFFF);
- maybe_add_gpos_feature (ruleset, info, script_index, FT_MAKE_TAG ('m','k','m','k'), 0xFFFF);
- }
+ tibetan_tag, &script_index))
+ {
+ maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('d','i','s','t'), dist);
+ maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('b','l','w','m'), blwm);
+ maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('a','b','v','m'), abvm);
+ maybe_add_GPOS_feature (ruleset, info, script_index, FT_MAKE_TAG ('m','k','m','k'), mkmk);
+ }
g_object_set_qdata_full (G_OBJECT (info), ruleset_quark, ruleset,
- (GDestroyNotify)g_object_unref);
+ (GDestroyNotify)g_object_unref);
}
return ruleset;
-#else
- return NULL;
-#endif
}
-static void
-set_glyph (PangoFont *font,
- PangoGlyphString *glyphs,
- int i,
- int offset,
- PangoGlyph glyph)
-{
- PangoRectangle logical_rect;
-
- glyphs->glyphs[i].glyph = glyph;
-
- glyphs->glyphs[i].geometry.x_offset = 0;
- glyphs->glyphs[i].geometry.y_offset = 0;
-
- glyphs->log_clusters[i] = offset;
- pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph, NULL,
- &logical_rect);
- glyphs->glyphs[i].geometry.width = logical_rect.width;
+static PangoGlyph
+get_index (PangoFcFont *fc_font, gunichar wc)
+{
+ PangoGlyph index = pango_fc_font_get_glyph (fc_font, wc);
+ if (!index)
+ index = pango_fc_font_get_unknown_glyph (fc_font, wc);
+ return index;
}
-static void
-fallback_shape (PangoFont *font,
- const char *text,
- gint length,
- PangoGlyphString *glyphs)
+
+static void
+tibetan_engine_shape (PangoEngineShape *engine,
+ PangoFont *font,
+ const char *text,
+ int length,
+ PangoAnalysis *analysis,
+ PangoGlyphString *glyphs)
{
PangoFcFont *fc_font = PANGO_FC_FONT (font);
+ FT_Face face;
+ PangoOTBuffer *buffer;
+ PangoOTRuleset *ruleset;
+ glong n_chars, i;
+ gunichar *wcs;
const char *p;
- long n_chars, i;
+ glong syllable;
+ TibetanCharClass charClass;
+ glong cursor = 0;
- n_chars = g_utf8_strlen (text, length);
- pango_glyph_string_set_size (glyphs, n_chars);
-
- for (i = 0, p = text; i < n_chars; i++, p = g_utf8_next_char (p))
- {
- gunichar wc;
- PangoGlyph index;
+ buffer = pango_ot_buffer_new (fc_font);
- wc = g_utf8_get_char (p);
+ face = pango_fc_font_lock_face (fc_font);
+ g_assert (face);
- index = pango_fc_font_get_glyph (fc_font, wc);
- if (!index)
- index = pango_fc_font_get_unknown_glyph (fc_font, wc);
-
- set_glyph (font, glyphs, i, p - text, index);
- }
-}
+ wcs = g_utf8_to_ucs4_fast (text, length, &n_chars);
+ p = text;
-static void
-ot_shape (PangoFont *font,
- PangoOTRuleset *gsub_ruleset,
- PangoOTRuleset *gpos_ruleset,
- const char *text,
- gint length,
- PangoGlyphString *glyphs)
-{
- PangoFcFont *fc_font = PANGO_FC_FONT(font);
- PangoOTBuffer *buffer = pango_ot_buffer_new (fc_font);
- const char *p;
-
- for (p = text; p - text < length; p = g_utf8_next_char (p))
+ /* This loop only exits when we reach the end of a run, which may contain
+ * several syllables.
+ */
+ while (cursor < n_chars)
+ {
+ syllable = find_syllable (wcs, cursor, n_chars);
+
+ /* shall we add a dotted circle?
+ * If in the position in which the base should be (first char in the string) there is
+ * a character that has the Dotted circle flag (a character that cannot be a base)
+ * then write a dotted circle
+ */
+ if (get_char_class (wcs[cursor]) & CF_DOTTED_CIRCLE)
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_DOTTED_CIRCLE), default_p, p - text);
+ }
+
+ /* If it encounters a digit followed by number pre combining mark, then reorder the two characters
+ * coeng Ro if they are present
+ */
+ for (i = cursor; i < syllable; i += 1)
+ {
+ charClass = get_char_class (wcs[i]);
+
+ if ((charClass & CF_DIGIT )
+ && ( get_char_class (wcs[i+1]) & CF_PREDIGIT))
+ {
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, C_PRE_NUMBER_MARK), pref_p, p - text);
+ p = g_utf8_next_char (p);
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pref_p, p - text);
+ i += 1;
+ } else {
+ switch (charClass & CF_POS_MASK)
+ {
+ case CF_POS_ABOVE :
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), abvf_p, p - text);
+ break;
+
+ case CF_POS_AFTER :
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), pstf_p, p - text);
+ break;
+
+ case CF_POS_BELOW :
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), blwf_p, p - text);
+ break;
+
+ default:
+ /* default - any other characters */
+ pango_ot_buffer_add_glyph (buffer, get_index (fc_font, wcs[i]), default_p, p - text);
+ break;
+ } /* switch */
+ }
+
+ p = g_utf8_next_char (p);
+ } /* for */
+
+ cursor = syllable; /* move the pointer to the start of next syllable */
+ } /* while */
+
+ /* do gsub processing */
+ ruleset = get_ruleset (face);
+ if (ruleset != NULL)
{
- gunichar wc;
- PangoGlyph index;
-
- wc = g_utf8_get_char (p);
-
- index = pango_fc_font_get_glyph (fc_font, wc);
- if (!index)
- index = pango_fc_font_get_unknown_glyph (fc_font, wc);
-
- pango_ot_buffer_add_glyph (buffer, index, 0, p - text);
+ pango_ot_ruleset_substitute (ruleset, buffer);
+ pango_ot_ruleset_position (ruleset, buffer);
}
-
- if (gsub_ruleset != NULL)
- pango_ot_ruleset_substitute (gsub_ruleset, buffer);
-
- if (gpos_ruleset != NULL)
- pango_ot_ruleset_position (gpos_ruleset, buffer);
-
- pango_ot_buffer_output (buffer, glyphs);
- pango_ot_buffer_destroy (buffer);
-}
-
-static void
-tibetan_engine_shape (PangoEngineShape *engine,
- PangoFont *font,
- const char *text,
- int length,
- PangoAnalysis *analysis,
- PangoGlyphString *glyphs)
-{
- PangoFcFont *fc_font = PANGO_FC_FONT(font);
- PangoOTRuleset *gsub_ruleset;
- PangoOTRuleset *gpos_ruleset;
- FT_Face face;
- g_return_if_fail (length >= 0);
+ pango_ot_buffer_output (buffer, glyphs);
- face = pango_fc_font_lock_face (fc_font);
- g_assert (face != NULL);
-
- gsub_ruleset = get_gsub_ruleset (face);
- gpos_ruleset = get_gpos_ruleset (face);
-
- if (gsub_ruleset != NULL)
- ot_shape (font, gsub_ruleset, gpos_ruleset, text, length, glyphs);
- else
- fallback_shape (font, text, length, glyphs);
+ g_free (wcs);
+ pango_ot_buffer_destroy (buffer);
pango_fc_font_unlock_face (fc_font);
}
+
static void
tibetan_engine_fc_class_init (PangoEngineShapeClass *class)
{
@@ -284,7 +590,8 @@ tibetan_engine_fc_class_init (PangoEngineShapeClass *class)
}
PANGO_ENGINE_SHAPE_DEFINE_TYPE (TibetanEngineFc, tibetan_engine_fc,
- tibetan_engine_fc_class_init, NULL);
+ tibetan_engine_fc_class_init, NULL);
+
void
PANGO_MODULE_ENTRY(init) (GTypeModule *module)
@@ -292,19 +599,22 @@ PANGO_MODULE_ENTRY(init) (GTypeModule *module)
tibetan_engine_fc_register_type (module);
}
+
void
PANGO_MODULE_ENTRY(exit) (void)
{
}
+
void
PANGO_MODULE_ENTRY(list) (PangoEngineInfo **engines,
- int *n_engines)
+ int *n_engines)
{
*engines = script_engines;
*n_engines = G_N_ELEMENTS (script_engines);
}
+
PangoEngine *
PANGO_MODULE_ENTRY(create) (const char *id)
{