summaryrefslogtreecommitdiff
path: root/modules/devanagari
diff options
context:
space:
mode:
Diffstat (limited to 'modules/devanagari')
-rw-r--r--modules/devanagari/.cvsignore6
-rw-r--r--modules/devanagari/Makefile.am14
-rw-r--r--modules/devanagari/dev-ligatures.h27
-rw-r--r--modules/devanagari/devanagari.c589
4 files changed, 636 insertions, 0 deletions
diff --git a/modules/devanagari/.cvsignore b/modules/devanagari/.cvsignore
new file mode 100644
index 00000000..6e5ca7ed
--- /dev/null
+++ b/modules/devanagari/.cvsignore
@@ -0,0 +1,6 @@
+Makefile
+Makefile.in
+.deps
+.libs
+*.lo
+*.la
diff --git a/modules/devanagari/Makefile.am b/modules/devanagari/Makefile.am
new file mode 100644
index 00000000..2f3d5601
--- /dev/null
+++ b/modules/devanagari/Makefile.am
@@ -0,0 +1,14 @@
+## Process this file with automake to create Makefile.in.
+
+moduledir = $(libdir)/pango/modules
+module_LTLIBRARIES = pango-devanagari.la
+
+INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/pango/
+
+pango_devanagari_la_SOURCES = \
+ devanagari.c \
+ dev-ligatures.h
+
+pango_devanagari_la_LDFLAGS = -rpath $(libdir) -export-dynamic -avoid-version -module
+
+EXTRA_DIST=
diff --git a/modules/devanagari/dev-ligatures.h b/modules/devanagari/dev-ligatures.h
new file mode 100644
index 00000000..02eca1b3
--- /dev/null
+++ b/modules/devanagari/dev-ligatures.h
@@ -0,0 +1,27 @@
+{ 0xE900, { 0x0915, VIRAMA, 0x0937 } },
+{ 0xE901, { 0x091C, VIRAMA, 0x091E } },
+{ 0xE902, { 0x0924, VIRAMA, 0x0924 } },
+{ 0xE903, { 0x0924, VIRAMA, 0x0930 } },
+{ 0xE904, { 0x0936, VIRAMA, 0x091B } },
+{ 0xE905, { 0x0936, VIRAMA, 0x0930 } },
+{ 0xE906, { 0x0936, VIRAMA, 0x0935 } },
+
+{ 0xE907, { 0x0915, VIRAMA, RA } },
+{ 0xE908, { 0x091C, VIRAMA, RA } },
+{ 0xE909, { 0x095B, VIRAMA, RA } },
+{ 0xE90A, { 0x092B, VIRAMA, RA } },
+{ 0xE90B, { 0x095E, VIRAMA, RA } },
+{ 0xE90C, { 0x092A, VIRAMA, RA } },
+{ 0xE90D, { 0x0938, VIRAMA, RA } },
+
+{ 0xE940, { 0x0915, VIRAMA, 0x0915 } },
+{ 0xE941, { 0x0915, VIRAMA, 0x0924 } },
+{ 0xE947, { 0x0926, VIRAMA, 0x0918 } },
+{ 0xE949, { 0x0926, VIRAMA, 0x0927 } },
+{ 0xE94D, { 0x0926, VIRAMA, 0x092F } },
+{ 0xE94E, { 0x0926, VIRAMA, 0x0935 } },
+{ 0xE94F, { 0x091F, VIRAMA, 0x091F } },
+{ 0xE950, { 0x091F, VIRAMA, 0x0920 } },
+{ 0xE952, { 0x0921, VIRAMA, 0x0917 } },
+{ 0xE955, { 0x0928, VIRAMA, 0x0928 } },
+
diff --git a/modules/devanagari/devanagari.c b/modules/devanagari/devanagari.c
new file mode 100644
index 00000000..38b4d4cd
--- /dev/null
+++ b/modules/devanagari/devanagari.c
@@ -0,0 +1,589 @@
+/* Pango - Devanagari module
+ * devanagari.c:
+ *
+ * Copyright (C) 2000 Robert Brady <rwb197@zepler.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <glib.h>
+#include <unicode.h>
+
+#include "utils.h"
+#include "pango.h"
+#include "pangox.h"
+
+#define VIRAMA 0x94d
+#define CANDRA 0x901
+#define ANUSWAR 0x902
+#define NUKTA 0x93c
+#define RA 0x930
+#define JOINING_RA 0xe97f
+#define REPHA 0xe97e
+
+typedef struct _LigData LigData;
+
+struct _LigData
+ {
+ int replacement;
+ int source[3];
+ };
+
+static LigData ligatures[] =
+{
+#include "dev-ligatures.h"
+};
+
+static gint n_ligatures = G_N_ELEMENTS (ligatures);
+
+static char *default_charset[] =
+{
+ "iso10646-dev",
+ /* devanagari encoded in iso10646 way, with PUA used for
+ * ligatures and half forms */
+};
+
+/* Table about ligatures in the font. This should come from the font
+ * somehow : this needs to be co-ordinated with fonts@xfree86.org.
+ * (and for whatever passes for a font working group at X.org)
+ */
+
+static PangoEngineRange devanagari_range[] =
+{
+ {0x900, 0x97f, "*"}
+};
+
+static PangoEngineInfo script_engines[] =
+{
+ {
+ "DevanagariScriptEngineLang",
+ PANGO_ENGINE_TYPE_LANG,
+ PANGO_RENDER_TYPE_NONE,
+ devanagari_range, G_N_ELEMENTS (devanagari_range)},
+ {
+ "DevanagariScriptEngineX",
+ PANGO_ENGINE_TYPE_SHAPE,
+ PANGO_RENDER_TYPE_X,
+ devanagari_range, G_N_ELEMENTS (devanagari_range)}
+};
+
+static gint n_script_engines = G_N_ELEMENTS (script_engines);
+
+static gboolean
+ find_unic_font (PangoFont * font, char *charsets[], PangoXSubfont * rfont);
+
+static PangoCoverage *
+devanagari_engine_get_coverage (PangoFont * font, const char *lang)
+{
+ GUChar4 i;
+ PangoCoverage *result = pango_coverage_new ();
+ PangoXSubfont subfont;
+
+ int dev_font = find_unic_font (font, default_charset, &subfont);
+
+ if (dev_font)
+ {
+ for (i = 0x900; i < 0x97f; i++)
+ pango_coverage_set (result, i, PANGO_COVERAGE_EXACT);
+ }
+
+ return result;
+}
+
+static gboolean
+find_unic_font (PangoFont * font, char *charsets[], PangoXSubfont * rfont)
+{
+ int n_subfonts;
+ int result = 0;
+ PangoXSubfont *subfonts;
+ int *subfont_charsets;
+ n_subfonts = pango_x_list_subfonts (font, charsets, 1,
+ &subfonts, &subfont_charsets);
+
+ if (n_subfonts > 0)
+ {
+ rfont[0] = subfonts[0];
+ result = 1;
+ }
+
+ g_free (subfonts);
+ g_free (subfont_charsets);
+ return result;
+}
+
+static int
+is_ligating_consonant (int ch)
+{
+ /* false for 958 to 961, as these don't ligate in any way */
+ return (ch >= 0x915 && ch <= 0x939);
+}
+
+static int
+is_comb_vowel (int i)
+{
+ /* one that combines, whether or not it spaces */
+ return (i >= 0x93E && i <= 0x94c) || (i >= 0x962 && i <= 0x963);
+}
+
+static int
+vowelsign_to_letter (int i)
+{
+ if (i >= 0x93e && i <= 0x94c)
+ return i - 0x93e + 0x906;
+ return i;
+}
+
+static int
+is_half_consonant (int i)
+{
+ return (i >= 0xe915 && i <= 0xe939) || (i >= 0xe970 && i <= 0xe976);
+}
+
+static int
+is_consonant (int i)
+{
+ return (i >= 0x915 && i <= 0x939) || (i >= 0x958 && i <= 0x95f);
+}
+
+static int
+is_nonspacing_vowel (GUChar4 c)
+{
+ /* one that doesn't space. ie 93f and 940 don't count */
+ return (c >= 0x941 && c <= 0x948) || (c >= 0x962 && c <= 0x963);
+}
+
+static int
+get_char (GUChar4 * chars, GUChar4 * end)
+{
+ if (chars >= end)
+ return 0;
+ return *chars;
+}
+
+void
+devanagari_make_ligatures (int *num, GUChar4 * chars, gint * cluster)
+{
+ /* perhaps a syllable based approach would be better? */
+ GUChar4 *src = chars;
+ GUChar4 *start = chars;
+ GUChar4 *end = chars + *num;
+ gint *c_src = cluster;
+ while (src < end)
+ {
+ int t0, t1, t2, t3, p1;
+ if (chars != start)
+ p1 = chars[-1];
+ else
+ p1 = 0;
+ t0 = get_char (src, end);
+ t1 = get_char (src + 1, end);
+ t2 = get_char (src + 2, end);
+ t3 = get_char (src + 3, end);
+
+ if (!is_half_consonant (p1))
+ {
+ int i;
+ /* This makes T.T.T.T come out OK. We need an expert in Devanagari
+ * to explain what 3 and 4-consonant ligatures are supposed to
+ * look like, especially when some of the adjacent characters
+ * form ligatures in 2 consonant form.
+ *
+ * (T.T.T.T is significant as T.T forms a conjunt with a half-form
+ * which looks very similar so it was producing TT (half-form),
+ * joined to TT unfortunately, this was indistinguishable from
+ * T.T.T )
+ */
+ for (i = 0; i < n_ligatures; i++)
+ {
+ /* handle the conjuncts */
+ LigData *l = ligatures + i;
+ if (t0 == l->source[0] && t1 == l->source[1]
+ && t2 == l->source[2])
+ {
+ /* RA ligature handling magic */
+ if (t2 == RA && (is_consonant (t3) || (t3 == 0x94d)))
+ continue;
+
+ chars[0] = l->replacement;
+ src += 3;
+ chars++;
+
+ *cluster = *c_src;
+ c_src += 3;
+ cluster++;
+ break;
+ }
+ }
+ if (i != n_ligatures)
+ {
+ /* if we made a conjunct here, loop... */
+ continue;
+ }
+ }
+
+ if ((is_consonant (t0)) &&
+ (t1 == VIRAMA) && (t2 == RA) &&
+ (!is_consonant (t3)) && (t3 != 0x94d))
+ {
+ /* turn C vir RA to C joining-RA */
+ chars[0] = *src;
+ chars[1] = JOINING_RA;
+
+ *cluster = *c_src;
+ cluster[1] = *c_src;
+
+ src += 3;
+ chars += 2;
+
+ c_src += 3;
+ cluster += 2;
+ continue;
+ }
+
+ /* some ligatures have half-forms. use them. */
+ if ((p1 >= 0xe900 && p1 <= 0xe906) && t0 == VIRAMA && is_consonant (t1))
+ {
+ chars[-1] = 0xe972;
+ src++;
+ c_src++;
+ continue;
+ }
+
+ /* is_ligating_consonant(t2) probably wants to
+ * be is_consonant(t2), not sure. */
+ if (is_ligating_consonant (t0) &&
+ t1 == VIRAMA && is_ligating_consonant (t2))
+ {
+ chars[0] = t0 + 0xe000;
+ src += 2;
+ chars++;
+
+ *cluster = *c_src;
+ c_src += 2;
+ cluster++;
+ continue;
+ }
+
+ /* Handle Virama followed by Nukta. This suppresses the special-case
+ * ligature, and just enables regular half-form building.
+ *
+ * Cavaet as above. */
+ if (is_ligating_consonant (t0) &&
+ t1 == VIRAMA && t2 == NUKTA && is_ligating_consonant (t3))
+ {
+ chars[0] = t0 + 0xe000;
+ src += 3;
+ chars++;
+
+ *cluster = *c_src;
+ c_src += 3;
+ cluster++;
+
+ continue;
+ }
+
+ /* convert R virama vowel to full-vowel with repha */
+ if (p1 != VIRAMA &&
+ !is_half_consonant (p1) &&
+ t0 == RA && t1 == VIRAMA && is_comb_vowel (t2))
+ {
+ chars[0] = vowelsign_to_letter (t2);
+ chars[1] = REPHA;
+ *cluster = *c_src;
+ cluster[1] = *c_src;
+ chars += 2;
+ cluster += 2;
+
+ c_src += 3;
+ src += 3;
+ continue;
+ }
+
+ *chars = *src;
+ src++;
+ chars++;
+
+ *cluster = *c_src;
+ cluster++;
+ c_src++;
+ }
+ *num = chars - start;
+}
+
+void
+devanagari_shift_vowels (int *num, GUChar4 * chars, gint * clusters)
+{
+ /* moves 0x93f (I) before consonant clusters where appropriate. */
+ GUChar4 *strt = chars, *end = chars + *num;
+ while (chars < end)
+ {
+ if (*chars == 0x93f && chars > strt)
+ {
+ GUChar4 *bubble = chars;
+ int i = 1;
+ /* move back one consonant, and past any half consonants */
+ /* How should this interact with vowel letters and other
+ * non-consonant signs? */
+
+ /* also, should it go back past consonants that have a virama
+ * attached, so as to be at the start of the syllable? */
+
+ /* probably should go past JOINING RA as well. */
+ while (bubble > strt && (i || is_half_consonant (bubble[-1])))
+ {
+ bubble[0] = bubble[-1];
+ bubble[-1] = 0x93f;
+ i = 0;
+ bubble--;
+ }
+ /* XXX : if we bubble the cluster stuff here back with the
+ glyph, it breaks. */
+ }
+ chars++;
+ clusters++;
+ }
+}
+
+void
+devanagari_convert_vowels (int *num, GUChar4 * chars)
+{
+ /* goes along and converts matras to vowel letters if needed.
+ * this is only currently done at the beginning of the string. */
+ GUChar4 *end = chars + *num;
+ GUChar4 *start = chars;
+ while (chars < end)
+ {
+ if (chars == start && is_comb_vowel (chars[0]))
+ {
+ chars[0] = vowelsign_to_letter (chars[0]);
+ }
+ chars++;
+ }
+}
+
+void
+devanagari_remove_explicit_virama (int *num, GUChar4 * chars)
+{
+ /* collapse two viramas in a row to one virama. This is defined
+ * to mean 'show it with the virama, don't ligate'. */
+ GUChar4 *end = chars + *num;
+ while (chars < end)
+ {
+ if (chars[0] == VIRAMA && chars[1] == VIRAMA)
+ chars[1] = 0;
+ chars++;
+ }
+}
+
+void
+devanagari_compact (int *num, GUChar4 * chars, gint * cluster)
+{
+ /* shuffle stuff up into the blanked out elements. */
+ GUChar4 *dest = chars;
+ GUChar4 *end = chars + *num;
+ gint *cluster_dest = cluster;
+ while (chars < end)
+ {
+ if (*chars)
+ {
+ *dest = *chars;
+ *cluster_dest = *cluster;
+ dest++;
+ chars++;
+ cluster++;
+ cluster_dest++;
+ }
+ else
+ {
+ chars++;
+ cluster++;
+ }
+ }
+ *num -= (chars - dest);
+}
+
+static void
+devanagari_engine_shape (PangoFont * font,
+ const char *text,
+ int length,
+ PangoAnalysis * analysis, PangoGlyphString * glyphs)
+{
+ PangoXSubfont subfont;
+
+ int n_chars, n_glyph;
+ int lvl;
+ const char *p, *next;
+ int i;
+ GUChar4 *wc;
+
+ g_return_if_fail (font != NULL);
+ g_return_if_fail (text != NULL);
+ g_return_if_fail (length >= 0);
+ g_return_if_fail (analysis != NULL);
+
+ n_chars = n_glyph = unicode_strlen (text, length);
+ lvl = find_unic_font (font, default_charset, &subfont);
+ if (!lvl)
+ {
+ PangoGlyph unknown_glyph = pango_x_get_unknown_glyph (font);
+ PangoRectangle logical_rect;
+ pango_font_get_glyph_extents (font, unknown_glyph, NULL, &logical_rect);
+ pango_glyph_string_set_size (glyphs, n_chars);
+ p = text;
+ for (i = 0; i < n_chars; i++)
+ {
+ glyphs->glyphs[i].glyph = unknown_glyph;
+ glyphs->glyphs[i].geometry.x_offset = 0;
+ glyphs->glyphs[i].geometry.y_offset = 0;
+ glyphs->glyphs[i].geometry.width = logical_rect.width;
+ glyphs->log_clusters[i] = 0;
+
+ p = unicode_next_utf8 (p);
+ }
+ return;
+ }
+ p = text;
+ wc = (GUChar4 *) g_malloc (sizeof (GUChar4) * n_chars);
+ pango_glyph_string_set_size (glyphs, n_glyph);
+ for (i = 0; i < n_chars; i++)
+ {
+ _pango_utf8_iterate (p, &next, &wc[i]);
+ glyphs->log_clusters[i] = p - text;
+ p = next;
+ }
+
+ devanagari_convert_vowels (&n_glyph, wc);
+ devanagari_make_ligatures (&n_glyph, wc, glyphs->log_clusters);
+ devanagari_remove_explicit_virama (&n_glyph, wc);
+ devanagari_compact (&n_glyph, wc, glyphs->log_clusters);
+ devanagari_shift_vowels (&n_glyph, wc, glyphs->log_clusters);
+ pango_glyph_string_set_size (glyphs, n_glyph);
+
+ for (i = 0; i < n_glyph; i++)
+ {
+ PangoRectangle logical_rect;
+ glyphs->glyphs[i].glyph = PANGO_X_MAKE_GLYPH (subfont, wc[i]);
+ pango_font_get_glyph_extents (font, glyphs->glyphs[i].glyph,
+ NULL, &logical_rect);
+ glyphs->glyphs[i].geometry.x_offset = 0;
+ glyphs->glyphs[i].geometry.y_offset = 0;
+ glyphs->glyphs[i].geometry.width = logical_rect.width;
+
+ if ((wc[i] == VIRAMA || wc[i] == ANUSWAR || wc[i] == CANDRA ||
+ wc[i] == JOINING_RA || wc[i] == REPHA ||
+ is_nonspacing_vowel (wc[i])) && i)
+ {
+ if (wc[i] == VIRAMA)
+ {
+ glyphs->glyphs[i].geometry.x_offset =
+ (-glyphs->glyphs[i - 1].geometry.width / 2);
+ }
+ else if (is_nonspacing_vowel (wc[i]))
+ {
+ glyphs->glyphs[i].geometry.x_offset =
+ -((glyphs->glyphs[i - 1].geometry.width) +
+ (logical_rect.width)) / 2;
+ }
+ else
+ glyphs->glyphs[i].geometry.x_offset = -logical_rect.width * 2;
+
+ glyphs->glyphs[i].geometry.width = 0;
+ glyphs->log_clusters[i] = glyphs->log_clusters[i - 1];
+ }
+ }
+}
+
+static PangoEngine *
+devanagari_engine_x_new ()
+{
+ PangoEngineShape *result;
+ result = g_new (PangoEngineShape, 1);
+ result->engine.id = "DevanagariScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_shape = devanagari_engine_shape;
+ result->get_coverage = devanagari_engine_get_coverage;
+ return (PangoEngine *) result;
+}
+
+void
+script_engine_list (PangoEngineInfo ** engines, int *n_engines)
+{
+ *engines = script_engines;
+ *n_engines = n_script_engines;
+}
+
+static void
+devanagari_engine_break (const char *text,
+ int len,
+ PangoAnalysis * analysis, PangoLogAttr * attrs)
+{
+ const char *cur = text;
+ const char *next;
+ gint i = 0;
+ GUChar4 wc;
+
+ while (*cur)
+ {
+ if (!_pango_utf8_iterate (cur, &next, &wc))
+ return;
+ if (cur == next)
+ break;
+ if ((next - text) > len)
+ break;
+ cur = next;
+
+ attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
+ attrs[i].is_break = (i > 0 && attrs[i - 1].is_white) ||
+ attrs[i].is_white;
+ attrs[i].is_char_stop = 1;
+ attrs[i].is_word_stop = (i == 0) || attrs[i - 1].is_white;
+ /* actually, is_word_stop in not correct, but simple and good enough. */
+ i++;
+ }
+}
+
+
+static PangoEngine *
+devanagari_engine_lang_new ()
+{
+ PangoEngineLang *result;
+
+ result = g_new (PangoEngineLang, 1);
+
+ result->engine.id = "DevanagariScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_break = devanagari_engine_break;
+
+ return (PangoEngine *) result;
+}
+
+PangoEngine *
+script_engine_load (const char *id)
+{
+ if (!strcmp (id, "DevanagariScriptEngineLang"))
+ return devanagari_engine_lang_new ();
+ else if (!strcmp (id, "DevanagariScriptEngineX"))
+ return devanagari_engine_x_new ();
+ else
+ return NULL;
+}
+
+void
+script_engine_unload (PangoEngine * engine)
+{
+}