summaryrefslogtreecommitdiff
path: root/modules/indic/indic-ot.h
blob: 3337fe0e140ba2c6a7162a34ec6ab21613ba980f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
/* Pango
 * indic-ot.h:
 *
 * Copyright (C) 2001, 2002 IBM Corporation. All Rights Reserved.
 * Author: Eric Mader <mader@jtcsv.com>
 *
 */

#ifndef __INDIC_OT_H__
#define __INDIC_OT_H__

#include <pango/pango-glyph.h>
#include <pango/pango-types.h>
#include "mprefixups.h"

G_BEGIN_DECLS

#ifdef PANGO_ENABLE_ENGINE

/* Characters that get refered to by name... */
enum
{
  C_SIGN_ZWNJ     = 0x200C,
  C_SIGN_ZWJ      = 0x200D,
  C_DOTTED_CIRCLE = 0x25CC
};

/*
 * The characters that a split matra splits into.
 * Unused characters will be zero.
 */
typedef gunichar IndicOTSplitMatra[3];

/*
 * Character class values
 */
/* FIXME: does this need to be a typedef? */
typedef enum
{
  CC_RESERVED             = 0,
  CC_MODIFYING_MARK_ABOVE = 1,
  CC_MODIFYING_MARK_POST  = 2,
  CC_INDEPENDENT_VOWEL    = 3,
  CC_CONSONANT            = 4,
  CC_CONSONANT_WITH_NUKTA = 5,
  CC_NUKTA                = 6,
  CC_DEPENDENT_VOWEL      = 7,
  CC_VIRAMA               = 8,
  CC_ZERO_WIDTH_MARK      = 9,
  CC_COUNT                = 10
} IndicOTCharClassValues;

/*
 * Character class flags
 */
/* FIXME: does this need to be a typedef? */
typedef enum
{
  CF_CLASS_MASK   = 0x0000FFFF,

  CF_CONSONANT    = (int)0x80000000,

  CF_REPH         = 0x40000000,
  CF_VATTU        = 0x20000000,
  CF_BELOW_BASE   = 0x10000000,
  CF_POST_BASE    = 0x08000000,

  CF_MATRA_PRE    = 0x04000000,
  CF_MATRA_BELOW  = 0x02000000,
  CF_MATRA_ABOVE  = 0x01000000,
  CF_MATRA_POST   = 0x00800000,
  CF_LENGTH_MARK  = 0x00400000,
  CF_INDEX_MASK   = 0x000F0000,
  CF_INDEX_SHIFT  = 16
} IndicOTCharClassFlags;

/*
 * Character class: a character class value
 * ORed with character class flags.
 */
typedef glong IndicOTCharClass;

/*
 * Script flags
 */
typedef enum
{
  SF_MATRAS_AFTER_BASE    = (int)0x80000000,
  SF_REPH_AFTER_BELOW     = 0x40000000,
  SF_EYELASH_RA           = 0x20000000,
  SF_MPRE_FIXUP           = 0x10000000,

  SF_POST_BASE_LIMIT_MASK = 0x0000FFFF,
  SF_NO_POST_BASE_LIMIT   = 0x00007FFF
} IndicOTScriptFlags;

/*
 * Bit flags for the indic feature tags
 */
enum indic_glyph_feature_
{
  nukt = 0x0001,
  akhn = 0x0002,
  rphf = 0x0004,
  blwf = 0x0008,
  half = 0x0010,
  pstf = 0x0020,
  vatu = 0x0040,
  pres = 0x0080,
  blws = 0x0100,
  abvs = 0x0200,
  psts = 0x0400,
  haln = 0x0800,
  blwm = 0x1000,
  abvm = 0x2000,
  dist = 0x4000,
  junk = 0x8000,
  init = 0x10000
};

/*
 * Complement of the feature flags that
 * will be assigned to specific glyphs.
 *
 * The names come from the ICU implementation,
 * which listed the actual tags in an order
 * such that tags could be assigned using the
 * address of the first one: &tags[0], &tags[1],
 * &tags[2], &tags[3]. The name of each set here
 * is the name of the first tag in the ICU list.
 */
enum indic_glyph_property_
{
  rphf_p = (junk | dist | init),
  blwf_p = (junk | dist | init | rphf),
  half_p = (junk | dist | init | rphf | blwf),
  pstf_p = (junk | dist | init | rphf | blwf | half),
  nukt_p = (junk | dist | init | rphf | blwf | half | pstf)
};

/*
 * Macros to test the charClass flags for various things.
 */
#define IS_VM_ABOVE(charClass) ((charClass & CF_CLASS_MASK) == CC_MODIFYING_MARK_ABOVE)

#define IS_VM_POST(charClass) ((charClass & CF_CLASS_MASK) == CC_MODIFYING_MARK_POST)

#define IS_CONSONANT(charClass) ((charClass & CF_CONSONANT) != 0)

#define IS_REPH(charClass) ((charClass & CF_REPH) != 0)

#define IS_NUKTA(charClass) ((charClass & CF_CLASS_MASK) == CC_NUKTA)

#define IS_VIRAMA(charClass) ((charClass & CF_CLASS_MASK) == CC_VIRAMA)

#define IS_VATTU(charClass) ((charClass & CF_VATTU) != 0)

#define IS_MATRA(charClass) ((charClass & CF_CLASS_MASK) == CC_DEPENDENT_VOWEL)

#define IS_SPLIT_MATRA(charClass) ((charClass & CF_INDEX_MASK) != 0)

#define IS_M_PRE(charClass) ((charClass & CF_MATRA_PRE) != 0)

#define IS_M_BELOW(charClass) ((charClass & CF_MATRA_BELOW) != 0)

#define IS_M_ABOVE(charClass) ((charClass & CF_MATRA_ABOVE) != 0)

#define IS_M_POST(charClass) ((charClass & CF_MATRA_POST) != 0)

#define IS_LENGTH_MARK(charClass) ((charClass & CF_LENGTH_MARK) != 0)

#define HAS_POST_OR_BELOW_BASE_FORM(charClass) ((charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0)

#define HAS_POST_BASE_FORM(charClass) ((charClass & CF_POST_BASE) != 0)

#define HAS_BELOW_BASE_FORM(charClass) ((charClass & CF_BELOW_BASE) != 0)

struct _IndicOTClassTable
{
  gunichar	      firstChar;
  gunichar	      lastChar;
  glong  	      worstCaseExpansion;
  IndicOTScriptFlags  scriptFlags;
  IndicOTCharClass   *charClasses;

  const IndicOTSplitMatra *splitMatraTable;
};

typedef struct _IndicOTClassTable IndicOTClassTable;

extern IndicOTClassTable deva_class_table;
extern IndicOTClassTable beng_class_table;
extern IndicOTClassTable guru_class_table;
extern IndicOTClassTable gujr_class_table;
extern IndicOTClassTable orya_class_table;
extern IndicOTClassTable taml_class_table;
extern IndicOTClassTable telu_class_table;
extern IndicOTClassTable knda_class_table;
extern IndicOTClassTable mlym_class_table;

const IndicOTSplitMatra *indic_ot_get_split_matra(const IndicOTClassTable *class_table, IndicOTCharClass char_class);

IndicOTCharClass indic_ot_get_char_class(const IndicOTClassTable *class_table, gunichar ch);

gboolean indic_ot_is_vm_above(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_vm_post(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_consonant(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_reph(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_virama(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_nukta(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_vattu(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_matra(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_split_matra(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_m_pre(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_m_below(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_m_above(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_m_post(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_is_length_mark(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_has_post_or_below_base_form(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_has_post_base_form(const IndicOTClassTable *class_table, gunichar ch);
gboolean indic_ot_has_below_base_form(const IndicOTClassTable *class_table, gunichar ch);

glong indic_ot_find_syllable(const IndicOTClassTable *class_table, const gunichar *chars, glong prev, glong char_count);

glong indic_ot_reorder(const gunichar *chars, const glong *utf8_offsets, glong char_count, const IndicOTClassTable *class_table, gunichar *out_chars, glong *char_indices, gulong *char_tags, MPreFixups **outMPreFixups);

#endif /* PANGO_ENABLE_ENGINE */

G_END_DECLS

#endif /* __INDIC_OT_H__ */