1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
|
/* Pango
* indic-ot.h:
*
* Copyright (C) 2001, 2002 IBM Corporation. All Rights Reserved.
* Author: Eric Mader <mader@jtcsv.com>
*
*/
#ifndef __INDIC_OT_H__
#define __INDIC_OT_H__
#include <freetype/freetype.h>
#include <pango/pango-glyph.h>
#include <pango/pango-types.h>
#include "mprefixups.h"
G_BEGIN_DECLS
#ifdef PANGO_ENABLE_ENGINE
/* Characters that get refered to by name... */
enum
{
C_SIGN_ZWNJ = 0x200C,
C_SIGN_ZWJ = 0x200D,
C_DOTTED_CIRCLE = 0x25CC
};
/*
* The characters that a split matra splits into.
* Unused characters will be zero.
*/
typedef gunichar IndicOTSplitMatra[3];
/*
* Character class values
*/
/* FIXME: does this need to be a typedef? */
typedef enum
{
CC_RESERVED = 0,
CC_MODIFYING_MARK_ABOVE = 1,
CC_MODIFYING_MARK_POST = 2,
CC_INDEPENDENT_VOWEL = 3,
CC_CONSONANT = 4,
CC_CONSONANT_WITH_NUKTA = 5,
CC_NUKTA = 6,
CC_DEPENDENT_VOWEL = 7,
CC_VIRAMA = 8,
CC_ZERO_WIDTH_MARK = 9,
CC_COUNT = 10
} IndicOTCharClassValues;
/*
* Character class flags
*/
/* FIXME: does this need to be a typedef? */
typedef enum
{
CF_CLASS_MASK = 0x0000FFFF,
CF_CONSONANT = (int)0x80000000,
CF_REPH = 0x40000000,
CF_VATTU = 0x20000000,
CF_BELOW_BASE = 0x10000000,
CF_POST_BASE = 0x08000000,
CF_MATRA_PRE = 0x04000000,
CF_MATRA_BELOW = 0x02000000,
CF_MATRA_ABOVE = 0x01000000,
CF_MATRA_POST = 0x00800000,
CF_LENGTH_MARK = 0x00400000,
CF_INDEX_MASK = 0x000F0000,
CF_INDEX_SHIFT = 16
} IndicOTCharClassFlags;
/*
* Character class: a character class value
* ORed with character class flags.
*/
typedef glong IndicOTCharClass;
/*
* Script flags
*/
typedef enum
{
SF_MATRAS_AFTER_BASE = (int)0x80000000,
SF_REPH_AFTER_BELOW = 0x40000000,
SF_EYELASH_RA = 0x20000000,
SF_MPRE_FIXUP = 0x10000000,
SF_POST_BASE_LIMIT_MASK = 0x0000FFFF,
SF_NO_POST_BASE_LIMIT = 0x00007FFF
} IndicOTScriptFlags;
/*
* Bit flags for the indic feature tags
*/
enum indic_glyph_feature_
{
nukt = 0x0001,
akhn = 0x0002,
rphf = 0x0004,
blwf = 0x0008,
half = 0x0010,
pstf = 0x0020,
vatu = 0x0040,
pres = 0x0080,
blws = 0x0100,
abvs = 0x0200,
psts = 0x0400,
haln = 0x0800,
blwm = 0x1000,
abvm = 0x2000,
dist = 0x4000,
junk = 0x8000
};
/*
* Complement of the feature flags that
* will be assigned to specific glyphs.
*
* The names come from the ICU implementation,
* which listed the actual tags in an order
* such that tags could be assigned using the
* address of the first one: &tags[0], &tags[1],
* &tags[2], &tags[3]. The name of each set here
* is the name of the first tag in the ICU list.
*/
enum indic_glyph_property_
{
rphf_p = (junk | dist),
blwf_p = (junk | dist | rphf),
half_p = (junk | dist | rphf | blwf),
nukt_p = (junk | dist | rphf | blwf | half | pstf)
};
/*
* Per-script character ranges
*/
#define SCRIPT_RANGE_deva 0x0900, 0x0970
#define SCRIPT_RANGE_beng 0x0980, 0x09FA
#define SCRIPT_RANGE_guru 0x0A00, 0x0A74
#define SCRIPT_RANGE_gujr 0x0A80, 0x0AEF
#define SCRIPT_RANGE_orya 0x0B00, 0x0B70
#define SCRIPT_RANGE_taml 0x0B80, 0x0BF2
#define SCRIPT_RANGE_telu 0x0C00, 0x0C6F
#define SCRIPT_RANGE_knda 0x0C80, 0x0CEF
#define SCRIPT_RANGE_mlym 0x0D00, 0x0D6F
/*
* Macros to test the charClass flags for various things.
*/
#define IS_VM_ABOVE(charClass) ((charClass & CF_CLASS_MASK) == CC_MODIFYING_MARK_ABOVE)
#define IS_VM_POST(charClass) ((charClass & CF_CLASS_MASK) == CC_MODIFYING_MARK_POST)
#define IS_CONSONANT(charClass) ((charClass & CF_CONSONANT) != 0)
#define IS_REPH(charClass) ((charClass & CF_REPH) != 0)
#define IS_NUKTA(charClass) ((charClass & CF_CLASS_MASK) == CC_NUKTA)
#define IS_VIRAMA(charClass) ((charClass & CF_CLASS_MASK) == CC_VIRAMA)
#define IS_VATTU(charClass) ((charClass & CF_VATTU) != 0)
#define IS_MATRA(charClass) ((charClass & CF_CLASS_MASK) == CC_DEPENDENT_VOWEL)
#define IS_SPLIT_MATRA(charClass) ((charClass & CF_INDEX_MASK) != 0)
#define IS_M_PRE(charClass) ((charClass & CF_MATRA_PRE) != 0)
#define IS_M_BELOW(charClass) ((charClass & CF_MATRA_BELOW) != 0)
#define IS_M_ABOVE(charClass) ((charClass & CF_MATRA_ABOVE) != 0)
#define IS_M_POST(charClass) ((charClass & CF_MATRA_POST) != 0)
#define IS_LENGTH_MARK(charClass) ((charClass & CF_LENGTH_MARK) != 0)
#define HAS_POST_OR_BELOW_BASE_FORM(charClass) ((charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0)
#define HAS_POST_BASE_FORM(charClass) ((charClass & CF_POST_BASE) != 0)
#define HAS_BELOW_BASE_FORM(charClass) ((charClass & CF_BELOW_BASE) != 0)
struct _IndicOTClassTable
{
gunichar firstChar;
gunichar lastChar;
glong worstCaseExpansion;
IndicOTScriptFlags scriptFlags;
IndicOTCharClass *charClasses;
const IndicOTSplitMatra *splitMatraTable;
};
typedef struct _IndicOTClassTable IndicOTClassTable;
extern IndicOTClassTable deva_class_table;
extern IndicOTClassTable beng_class_table;
extern IndicOTClassTable guru_class_table;
extern IndicOTClassTable gujr_class_table;
extern IndicOTClassTable orya_class_table;
extern IndicOTClassTable taml_class_table;
extern IndicOTClassTable telu_class_table;
extern IndicOTClassTable knda_class_table;
extern IndicOTClassTable mlym_class_table;
const IndicOTSplitMatra *indic_ot_get_split_matra(const IndicOTClassTable *class_table, IndicOTCharClass char_class);
IndicOTCharClass indic_ot_get_char_class(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_vm_above(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_vm_post(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_consonant(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_reph(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_virama(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_nukta(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_vattu(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_matra(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_split_matra(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_m_pre(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_m_below(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_m_above(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_m_post(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_length_mark(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_has_post_or_below_base_form(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_has_post_base_form(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_has_below_base_form(const IndicOTClassTable *class_table, gunichar ch);
glong indic_ot_find_syllable(const IndicOTClassTable *class_table, const gunichar *chars, glong prev, glong char_count);
glong indic_ot_reorder(const gunichar *chars, const glong *utf8_offsets, glong char_count, const IndicOTClassTable *class_table, gunichar *out_chars, glong *char_indices, gulong *char_tags, MPreFixups **outMPreFixups);
#endif /* PANGO_ENABLE_ENGINE */
G_END_DECLS
#endif /* __INDIC_OT_H__ */
|