summaryrefslogtreecommitdiff
path: root/modules
diff options
context:
space:
mode:
authorRobert Brady <rwb197@zepler.org>2000-04-21 18:15:34 +0000
committerRobert Brady <rbrady@src.gnome.org>2000-04-21 18:15:34 +0000
commit735141cc8eee680428daacd623dd9a5c2240980c (patch)
treea082048904c65eda989931661f2536e019b71bae /modules
parent799b1cfe73d7fc5676f584715b6ac6efefc6a9eb (diff)
downloadpango-735141cc8eee680428daacd623dd9a5c2240980c.tar.gz
Add some more ligature mappings.
2000-04-21 Robert Brady <rwb197@zepler.org> * modules/devanagari/dev-ligatures.h: Add some more ligature mappings. * modules/devanagari.c: New version, now syllable-based, with proper RA handling.
Diffstat (limited to 'modules')
-rw-r--r--modules/devanagari/dev-ligatures.h22
-rw-r--r--modules/devanagari/devanagari.c408
2 files changed, 238 insertions, 192 deletions
diff --git a/modules/devanagari/dev-ligatures.h b/modules/devanagari/dev-ligatures.h
index 02eca1b3..c890c74d 100644
--- a/modules/devanagari/dev-ligatures.h
+++ b/modules/devanagari/dev-ligatures.h
@@ -1,3 +1,8 @@
+/* ones marked with ! weren't used by CURender.java. The
+ mappings were worked out by staring at the Unicode code
+ charts for a long time.
+ */
+
{ 0xE900, { 0x0915, VIRAMA, 0x0937 } },
{ 0xE901, { 0x091C, VIRAMA, 0x091E } },
{ 0xE902, { 0x0924, VIRAMA, 0x0924 } },
@@ -16,12 +21,29 @@
{ 0xE940, { 0x0915, VIRAMA, 0x0915 } },
{ 0xE941, { 0x0915, VIRAMA, 0x0924 } },
+{ 0xE942, { 0x0919, VIRAMA, 0x0915 } }, // !
+{ 0xE943, { 0x0919, VIRAMA, 0x0916 } }, // !
+{ 0xE944, { 0x0919, VIRAMA, 0x0917 } }, // !
+{ 0xE945, { 0x0919, VIRAMA, 0x0918 } }, // !
+{ 0xE946, { 0x091e, VIRAMA, 0x091e } }, // !
{ 0xE947, { 0x0926, VIRAMA, 0x0918 } },
+{ 0xE948, { 0x0926, VIRAMA, 0x0926 } }, // !
{ 0xE949, { 0x0926, VIRAMA, 0x0927 } },
+{ 0xE94A, { 0x0926, VIRAMA, 0x0937 } }, // !
+{ 0xE94B, { 0x0926, VIRAMA, 0x092D } }, // !
+{ 0xE94C, { 0x0926, VIRAMA, 0x092E } }, // !
{ 0xE94D, { 0x0926, VIRAMA, 0x092F } },
{ 0xE94E, { 0x0926, VIRAMA, 0x0935 } },
{ 0xE94F, { 0x091F, VIRAMA, 0x091F } },
{ 0xE950, { 0x091F, VIRAMA, 0x0920 } },
+{ 0xE951, { 0x0920, VIRAMA, 0x0920 } }, // !
{ 0xE952, { 0x0921, VIRAMA, 0x0917 } },
+{ 0xE953, { 0x0921, VIRAMA, 0x0921 } }, // !
+{ 0xE954, { 0x0921, VIRAMA, 0x0922 } }, // !
{ 0xE955, { 0x0928, VIRAMA, 0x0928 } },
+{ 0xE956, { 0x0939, VIRAMA, 0x092e } }, // !
+{ 0xE957, { 0x0939, VIRAMA, 0x092f } }, // !
+{ 0xE958, { 0x0939, VIRAMA, 0x0932 } }, // !
+{ 0xE959, { 0x0939, VIRAMA, 0x0935 } }, // !
+// { 0xE95a, { 0x0939, VIRAMA, 0x0e903 } }, // !
diff --git a/modules/devanagari/devanagari.c b/modules/devanagari/devanagari.c
index 38b4d4cd..3579a574 100644
--- a/modules/devanagari/devanagari.c
+++ b/modules/devanagari/devanagari.c
@@ -21,6 +21,7 @@
#include <glib.h>
#include <unicode.h>
+#include <stdio.h>
#include "utils.h"
#include "pango.h"
@@ -33,6 +34,11 @@
#define RA 0x930
#define JOINING_RA 0xe97f
#define REPHA 0xe97e
+#define EYELASH_RA 0xe97d
+#define RRA 0x931
+
+#define U_S 0x941
+#define UU_S 0x942
typedef struct _LigData LigData;
@@ -159,6 +165,12 @@ is_consonant (int i)
}
static int
+is_ind_vowel (int i)
+{
+ return (i >= 0x905 && i <= 0x914);
+}
+
+static int
is_nonspacing_vowel (GUChar4 c)
{
/* one that doesn't space. ie 93f and 940 don't count */
@@ -173,187 +185,29 @@ get_char (GUChar4 * chars, GUChar4 * end)
return *chars;
}
-void
-devanagari_make_ligatures (int *num, GUChar4 * chars, gint * cluster)
-{
- /* perhaps a syllable based approach would be better? */
- GUChar4 *src = chars;
- GUChar4 *start = chars;
- GUChar4 *end = chars + *num;
- gint *c_src = cluster;
- while (src < end)
- {
- int t0, t1, t2, t3, p1;
- if (chars != start)
- p1 = chars[-1];
- else
- p1 = 0;
- t0 = get_char (src, end);
- t1 = get_char (src + 1, end);
- t2 = get_char (src + 2, end);
- t3 = get_char (src + 3, end);
-
- if (!is_half_consonant (p1))
- {
- int i;
- /* This makes T.T.T.T come out OK. We need an expert in Devanagari
- * to explain what 3 and 4-consonant ligatures are supposed to
- * look like, especially when some of the adjacent characters
- * form ligatures in 2 consonant form.
- *
- * (T.T.T.T is significant as T.T forms a conjunt with a half-form
- * which looks very similar so it was producing TT (half-form),
- * joined to TT unfortunately, this was indistinguishable from
- * T.T.T )
- */
- for (i = 0; i < n_ligatures; i++)
- {
- /* handle the conjuncts */
- LigData *l = ligatures + i;
- if (t0 == l->source[0] && t1 == l->source[1]
- && t2 == l->source[2])
- {
- /* RA ligature handling magic */
- if (t2 == RA && (is_consonant (t3) || (t3 == 0x94d)))
- continue;
-
- chars[0] = l->replacement;
- src += 3;
- chars++;
-
- *cluster = *c_src;
- c_src += 3;
- cluster++;
- break;
- }
- }
- if (i != n_ligatures)
- {
- /* if we made a conjunct here, loop... */
- continue;
- }
- }
-
- if ((is_consonant (t0)) &&
- (t1 == VIRAMA) && (t2 == RA) &&
- (!is_consonant (t3)) && (t3 != 0x94d))
- {
- /* turn C vir RA to C joining-RA */
- chars[0] = *src;
- chars[1] = JOINING_RA;
-
- *cluster = *c_src;
- cluster[1] = *c_src;
-
- src += 3;
- chars += 2;
-
- c_src += 3;
- cluster += 2;
- continue;
- }
-
- /* some ligatures have half-forms. use them. */
- if ((p1 >= 0xe900 && p1 <= 0xe906) && t0 == VIRAMA && is_consonant (t1))
- {
- chars[-1] = 0xe972;
- src++;
- c_src++;
- continue;
- }
-
- /* is_ligating_consonant(t2) probably wants to
- * be is_consonant(t2), not sure. */
- if (is_ligating_consonant (t0) &&
- t1 == VIRAMA && is_ligating_consonant (t2))
- {
- chars[0] = t0 + 0xe000;
- src += 2;
- chars++;
-
- *cluster = *c_src;
- c_src += 2;
- cluster++;
- continue;
- }
-
- /* Handle Virama followed by Nukta. This suppresses the special-case
- * ligature, and just enables regular half-form building.
- *
- * Cavaet as above. */
- if (is_ligating_consonant (t0) &&
- t1 == VIRAMA && t2 == NUKTA && is_ligating_consonant (t3))
- {
- chars[0] = t0 + 0xe000;
- src += 3;
- chars++;
-
- *cluster = *c_src;
- c_src += 3;
- cluster++;
-
- continue;
- }
-
- /* convert R virama vowel to full-vowel with repha */
- if (p1 != VIRAMA &&
- !is_half_consonant (p1) &&
- t0 == RA && t1 == VIRAMA && is_comb_vowel (t2))
- {
- chars[0] = vowelsign_to_letter (t2);
- chars[1] = REPHA;
- *cluster = *c_src;
- cluster[1] = *c_src;
- chars += 2;
- cluster += 2;
-
- c_src += 3;
- src += 3;
- continue;
- }
-
- *chars = *src;
- src++;
- chars++;
-
- *cluster = *c_src;
- cluster++;
- c_src++;
- }
- *num = chars - start;
-}
-
-void
-devanagari_shift_vowels (int *num, GUChar4 * chars, gint * clusters)
+static void
+devanagari_shift_vowels (GUChar4 * chars, GUChar4 * end)
{
/* moves 0x93f (I) before consonant clusters where appropriate. */
- GUChar4 *strt = chars, *end = chars + *num;
+ GUChar4 *strt = chars;
while (chars < end)
{
if (*chars == 0x93f && chars > strt)
{
GUChar4 *bubble = chars;
int i = 1;
- /* move back one consonant, and past any half consonants */
- /* How should this interact with vowel letters and other
- * non-consonant signs? */
- /* also, should it go back past consonants that have a virama
- * attached, so as to be at the start of the syllable? */
+ /* move back TO START! */
- /* probably should go past JOINING RA as well. */
- while (bubble > strt && (i || is_half_consonant (bubble[-1])))
+ while (bubble > strt)
{
bubble[0] = bubble[-1];
bubble[-1] = 0x93f;
i = 0;
bubble--;
}
- /* XXX : if we bubble the cluster stuff here back with the
- glyph, it breaks. */
}
chars++;
- clusters++;
}
}
@@ -366,7 +220,9 @@ devanagari_convert_vowels (int *num, GUChar4 * chars)
GUChar4 *start = chars;
while (chars < end)
{
- if (chars == start && is_comb_vowel (chars[0]))
+ if ((chars == start && is_comb_vowel (chars[0])) ||
+ (chars != start && is_comb_vowel (chars[0])
+ && is_comb_vowel (chars[-1])))
{
chars[0] = vowelsign_to_letter (chars[0]);
}
@@ -374,21 +230,7 @@ devanagari_convert_vowels (int *num, GUChar4 * chars)
}
}
-void
-devanagari_remove_explicit_virama (int *num, GUChar4 * chars)
-{
- /* collapse two viramas in a row to one virama. This is defined
- * to mean 'show it with the virama, don't ligate'. */
- GUChar4 *end = chars + *num;
- while (chars < end)
- {
- if (chars[0] == VIRAMA && chars[1] == VIRAMA)
- chars[1] = 0;
- chars++;
- }
-}
-
-void
+static void
devanagari_compact (int *num, GUChar4 * chars, gint * cluster)
{
/* shuffle stuff up into the blanked out elements. */
@@ -415,6 +257,164 @@ devanagari_compact (int *num, GUChar4 * chars, gint * cluster)
*num -= (chars - dest);
}
+#if 0
+const char *foo[] =
+{
+ "k", "kh", "g", "gh", "ng",
+ "c", "ch", "j", "jh", "ny",
+ "tt", "tth", "dd", "ddh", "nn",
+ "t", "th", "d", "dh", "n", "nnn",
+ "p", "ph", "b", "bh", "m",
+
+ "y", "r", "rr", "l", "ll", "lll",
+
+ "v", "sh", "ss", "s", "h",
+
+ "-", "-", "-", "-",
+
+ "aa",
+ "i", "ii",
+ "u", "uu",
+ "[r]", "[rr]",
+ "[e]", "{e}",
+ "e", "ai",
+ "[o]", "{o}",
+ "o", "au",
+};
+
+const char *bar[] =
+{
+ "A", "AA",
+ "I", "II",
+ "U", "UU",
+ "[R]", "[RR]",
+ "[E]", "{E}",
+ "E", "AI",
+ "[O]", "{O}",
+ "O", "AU",
+};
+#endif
+
+static void
+devanagari_make_ligs (GUChar4 * start, GUChar4 * end, int *cluster)
+{
+ GUChar4 t0 = get_char (start, end);
+ GUChar4 t1 = get_char (start + 1, end);
+ GUChar4 t2 = get_char (start + 2, end);
+ GUChar4 t3 = get_char (start + 3, end);
+
+ int i, j;
+ int repha = 0, ligature = 0;
+
+ for (i = 0; i < (end - start); i++)
+ {
+ t0 = get_char (start + i, end);
+ t1 = get_char (start + 1 + i, end);
+ t2 = get_char (start + 2 + i, end);
+ t3 = get_char (start + 3 + i, end);
+
+ if (!ligature)
+ {
+ for (j = 0; j < n_ligatures; j++)
+ {
+ /* handle the conjuncts */
+ LigData *l = ligatures + j;
+ if (t0 == l->source[0] && t1 == l->source[1]
+ && t2 == l->source[2])
+ {
+ start[i + 0] = 0;
+ start[i + 1] = 0;
+ start[i + 2] = l->replacement;
+ ligature = 1;
+ break;
+ }
+ }
+ if (j != n_ligatures)
+ continue;
+ }
+
+ if ((t0 >= 0xe900 && t0 <= 0xe906) && t1 == VIRAMA
+ && is_ligating_consonant (t2))
+ {
+ start[i + 1] = start[i] + 0x70;
+ start[i] = 0;
+ continue;
+ }
+
+ if (is_consonant (t0) && t1 == VIRAMA && t2 == RA)
+ {
+ start[i + 1] = 0;
+ start[i + 2] = JOINING_RA;
+ continue;
+ }
+
+ if (t0 == RRA && t1 == VIRAMA)
+ {
+ start[i] = 0;
+ start[i + 1] = EYELASH_RA;
+ continue;
+ }
+
+ if (t0 == RA && t1 == VIRAMA && is_ligating_consonant (t2))
+ {
+
+ start[i + 0] = 0;
+ start[i + 1] = 0;
+ start[i + 2] = t2;
+ repha = 1;
+ continue;
+ }
+
+ if (is_ligating_consonant (t0) &&
+ t1 == VIRAMA && is_ligating_consonant (t2))
+ {
+ start[i + 0] = t0 + 0xe000;
+ start[i + 1] = 0;
+ start[i + 2] = t2;
+ continue;
+ }
+
+ if (t0 == RA && (t1 == U_S || t1 == UU_S))
+ {
+
+ if (t1 == U_S)
+ start[i + 1] = 0xe90e;
+
+ if (t1 == UU_S)
+ start[i + 1] = 0xe90f;
+
+ start[i] = 0;
+
+ }
+ }
+
+ for (i = 0; i < (end - start); i++)
+ {
+ t0 = get_char (start + i, end);
+ t1 = get_char (start + 1 + i, end);
+ t2 = get_char (start + 2 + i, end);
+ t3 = get_char (start + 3 + i, end);
+ }
+
+ if (repha)
+ {
+ int src = 0, dest = 0;
+ while (src < (end - start))
+ {
+ start[dest] = start[src];
+ src++;
+ if (start[dest])
+ dest++;
+ }
+ while (dest < (end - start))
+ {
+ start[dest] = 0;
+ dest++;
+ }
+ end[-1] = REPHA;
+ }
+}
+
static void
devanagari_engine_shape (PangoFont * font,
const char *text,
@@ -427,7 +427,9 @@ devanagari_engine_shape (PangoFont * font,
int lvl;
const char *p, *next;
int i;
- GUChar4 *wc;
+ GUChar4 *wc, *sb;
+ int n_syls;
+ GUChar4 **syls = g_malloc (sizeof (GUChar4 **));
g_return_if_fail (font != NULL);
g_return_if_fail (text != NULL);
@@ -466,10 +468,33 @@ devanagari_engine_shape (PangoFont * font,
}
devanagari_convert_vowels (&n_glyph, wc);
- devanagari_make_ligatures (&n_glyph, wc, glyphs->log_clusters);
- devanagari_remove_explicit_virama (&n_glyph, wc);
+
+ n_syls = 1;
+ syls[0] = wc;
+ sb = glyphs->log_clusters[0];
+ for (i = 0; i < n_chars; i++)
+ {
+ if (i && (is_consonant (wc[i]) | is_ind_vowel (wc[i]))
+ && wc[i - 1] != 0x94d)
+ {
+ syls = g_realloc (syls, ((n_syls + 2) * sizeof (GUChar4 **)));
+ syls[n_syls] = wc + i;
+ n_syls++;
+ sb = glyphs->log_clusters[i];
+ }
+ glyphs->log_clusters[i] = sb;
+ }
+ syls[n_syls] = wc + i;
+
+ for (i = 0; i < n_syls; i++)
+ {
+ devanagari_make_ligs (syls[i], syls[i + 1], glyphs->log_clusters +
+ (syls[i] - wc));
+ devanagari_shift_vowels (syls[i], syls[i + 1]);
+ }
+
devanagari_compact (&n_glyph, wc, glyphs->log_clusters);
- devanagari_shift_vowels (&n_glyph, wc, glyphs->log_clusters);
+
pango_glyph_string_set_size (glyphs, n_glyph);
for (i = 0; i < n_glyph; i++)
@@ -482,20 +507,18 @@ devanagari_engine_shape (PangoFont * font,
glyphs->glyphs[i].geometry.y_offset = 0;
glyphs->glyphs[i].geometry.width = logical_rect.width;
- if ((wc[i] == VIRAMA || wc[i] == ANUSWAR || wc[i] == CANDRA ||
- wc[i] == JOINING_RA || wc[i] == REPHA ||
- is_nonspacing_vowel (wc[i])) && i)
+ if (wc[i] == JOINING_RA || wc[i] == ANUSWAR ||
+ wc[i] == REPHA || wc[i] == VIRAMA || wc[i] == CANDRA
+ is_nonspacing_vowel (wc[i]))
{
if (wc[i] == VIRAMA)
{
glyphs->glyphs[i].geometry.x_offset =
(-glyphs->glyphs[i - 1].geometry.width / 2);
- }
- else if (is_nonspacing_vowel (wc[i]))
- {
- glyphs->glyphs[i].geometry.x_offset =
- -((glyphs->glyphs[i - 1].geometry.width) +
- (logical_rect.width)) / 2;
+
+ if (!glyphs->glyphs[i].geometry.x_offset)
+ glyphs->glyphs[i].geometry.x_offset =
+ (-glyphs->glyphs[i - 2].geometry.width / 2);
}
else
glyphs->glyphs[i].geometry.x_offset = -logical_rect.width * 2;
@@ -504,6 +527,7 @@ devanagari_engine_shape (PangoFont * font,
glyphs->log_clusters[i] = glyphs->log_clusters[i - 1];
}
}
+ g_free (syls);
}
static PangoEngine *