summaryrefslogtreecommitdiff
path: root/modules/tamil
diff options
context:
space:
mode:
Diffstat (limited to 'modules/tamil')
-rw-r--r--modules/tamil/Makefile.am10
-rw-r--r--modules/tamil/taconv.c590
-rw-r--r--modules/tamil/taconv.h55
-rw-r--r--modules/tamil/tadefs.h132
-rw-r--r--modules/tamil/tamil-x.c226
-rw-r--r--modules/tamil/tamil.c226
6 files changed, 1239 insertions, 0 deletions
diff --git a/modules/tamil/Makefile.am b/modules/tamil/Makefile.am
new file mode 100644
index 00000000..476a14fa
--- /dev/null
+++ b/modules/tamil/Makefile.am
@@ -0,0 +1,10 @@
+## Process this file with automake to create Makefile.in.
+
+noinst_LTLIBRARIES = pango-tamil.la
+
+INCLUDES = -I$(top_srcdir)/libpango/
+
+pango_tamil_la_SOURCES = tamil.c taconv.c taconv.h tadefs.h
+pango_tamil_la_LDFLAGS = -rpath $(libdir) -export-dynamic -avoid-version -module
+
+EXTRA_DIST=
diff --git a/modules/tamil/taconv.c b/modules/tamil/taconv.c
new file mode 100644
index 00000000..85e15de6
--- /dev/null
+++ b/modules/tamil/taconv.c
@@ -0,0 +1,590 @@
+/* taconv.h
+ * Author: Sivaraj D (sivaraj@tamil.net)
+ * Date : 4-Jan-2000
+ */
+
+/* Warning: These routines are very inefficient.
+ */
+
+#include <stdio.h>
+#include "taconv.h"
+#include "tadefs.h"
+
+/* tsc_find_char: Search the string for given character,
+ * and return its position */
+int tsc_find_char(unsigned char *str, unsigned char c)
+{
+ int i=0;
+ while(str[i]) {
+ if (c==str[i])
+ return i;
+ i++;
+ }
+ return -1; /* character not found */
+}
+
+/* u_find_char: Search the string of integers for given unicode character
+ * and return its position */
+int u_find_char(unsigned int *str, unsigned int c)
+{
+ int i=0;
+ while(str[i]) {
+ if (c==str[i])
+ return i;
+ i++;
+ }
+ return -1; /* character not found */
+}
+
+int tsc2uni(unsigned char *tsc_str, unsigned int *uni_str,
+ int *num_tsc, int *num_uni,
+ int num_in_left, int num_out_left)
+{
+ int i = 0, pos;
+ unsigned char c1, c2, c3;
+
+ /* default is 1 char */
+ *num_tsc = 1;
+ *num_uni = 1;
+
+ if (num_in_left > 0)
+ c1 = tsc_str[0];
+ else
+ return TA_INSUF_IN;
+
+ if (num_in_left > 1)
+ c2 = tsc_str[1];
+ else
+ c2 = TA_NULL;
+
+ if (num_in_left > 2)
+ c3 = tsc_str[2];
+ else
+ c3 = TA_NULL;
+
+ if (num_out_left < 3)
+ return TA_INSUF_OUT; /* Need atleast 3 chars */
+
+ /* us-ascii characters */
+ if (c1 < 0x80) {
+ uni_str[i] = (int)c1;
+ return TA_SUCCESS;
+ }
+
+ /* direct one to one translation - uyirs, aaytham, quotes,
+ * copyright & numbers */
+ if((pos = tsc_find_char(tsc_uyir, c1)) >= 0 ) {
+ uni_str[i] = u_uyir[pos];
+ return TA_SUCCESS;
+ }
+
+ /* mey is always amey + puLLI in unicode */
+ if (is_tsc_mey(c1)) {
+ if (c1 == 0x8C) {
+ uni_str[i++] = 0x0B95;
+ uni_str[i++] = 0x0BCD;
+ }
+ pos = tsc_find_char(tsc_mey, c1);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_PULLI;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ }
+
+ /* ukaram = amey + umodi1 */
+ if ((c1 >= 0xCC && c1 <= 0xDB) ||
+ c1 == 0x99 || c1 == 0x9A ) {
+ pos = tsc_find_char(tsc_ukaram, c1);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_UMODI1;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ }
+
+ /* uukaram = amey + umodi2 */
+ if ((c1 >= 0xDC && c1 <= 0xEB) ||
+ c1 == 0x9B || c1 == 0x9C ) {
+ pos = tsc_find_char(tsc_uukaaram, c1);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_UMODI2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ }
+
+ /* TI */
+ if (c1 == TSC_TI) {
+ uni_str[i++] = 0x0B9F; /* TA */
+ uni_str[i] = U_KOKKI1;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ }
+
+ /* TII */
+ if (c1 == TSC_TI) {
+ uni_str[i++] = 0x0B9F; /* TA */
+ uni_str[i] = U_KOKKI2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ }
+
+ /* characters starting with akarameriya meys */
+ if (is_tsc_amey(c1)) {
+ if (c1 == 0x87) { /* KSHA = KA+puLLi+SHA in unicode */
+ uni_str[i++] = 0x0B95; /* KA */
+ uni_str[i++] = 0x0BCD; /* puLLi */
+ }
+ pos = tsc_find_char(tsc_amey, c1);
+ switch(c2) {
+ case TSC_KAAL:
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_KAAL;
+ *num_tsc = 2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ case TSC_KOKKI1:
+ pos = tsc_find_char(tsc_amey, c1);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_KOKKI1;
+ *num_tsc = 2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ case TSC_KOKKI2:
+ pos = tsc_find_char(tsc_amey, c1);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_KOKKI2;
+ *num_tsc = 2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ case TSC_UMODI1: /* ok, I know this is not correct */
+ pos = tsc_find_char(tsc_amey, c1);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_UMODI1;
+ *num_tsc = 2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ case TSC_UMODI2:
+ pos = tsc_find_char(tsc_amey, c1);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_UMODI2;
+ *num_tsc = 2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ default:
+ pos = tsc_find_char(tsc_amey, c1);
+ uni_str[i] = u_amey[pos];
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ }
+ }
+
+ /* ekaram, okaram & aukaaram */
+ if (c1 == TSC_KOMBU1) {
+ if (((c2 >= 0xB8 && c2 <= 0xC9) ||
+ (c2 >= 0x93 && c2 <= 0x96)) &&
+ num_in_left > 2) {
+ pos = tsc_find_char(tsc_amey, c2);
+ switch(c3) {
+ case TSC_KAAL:
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_OMODI1;
+ *num_tsc = 3;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ case TSC_AUMODI:
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_AUMODI;
+ *num_tsc = 3;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ default: /* it is ekaram */
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_KOMBU1;
+ *num_tsc = 2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ } /* switch */
+ } /* c2 */
+ else {
+ /* if the sequence is illegal, handle it gracefully */
+ uni_str[i++] = U_ZWSP; /* zero width space */
+ uni_str[i] = U_KOMBU1;
+ *num_uni = i+1;
+ return TA_ILL_SEQ;
+ } /* c2 */
+ } /* c1 */
+
+
+ /* eekaaram, ookaaram */
+ if (c1 == TSC_KOMBU2) {
+ if ((c2 >= 0xB8 && c2 <= 0xC9) ||
+ (c2 >= 0x93 && c2 <= 0x96)) {
+ switch(c3) {
+ case TSC_KAAL:
+ pos = tsc_find_char(tsc_amey, c2);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_OMODI2;
+ *num_tsc = 3;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ default: /* it is eekaaram */
+ pos = tsc_find_char(tsc_amey, c2);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_KOMBU2;
+ *num_tsc = 2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ } /* switch */
+ } /* c2 */
+ else {
+ /* if the sequence is illegal, handle it gracefully */
+ uni_str[i++] = U_ZWSP; /* zero width space */
+ uni_str[i] = U_KOMBU2;
+ *num_uni = i+1;
+ return TA_ILL_SEQ;
+ } /* c2 */
+ } /* c1 */
+
+ /* aikaaram */
+ if (c1 == TSC_AIMODI) {
+ if ((c2 >= 0xB8 && c2 <= 0xC9) ||
+ (c2 >= 0x93 && c2 <= 0x96)) {
+ pos = tsc_find_char(tsc_amey, c2);
+ uni_str[i++] = u_amey[pos];
+ uni_str[i] = U_AIMODI;
+ *num_tsc = 2;
+ *num_uni = i+1;
+ return TA_SUCCESS;
+ } /* c2 */
+ else {
+ /* if the sequence is illegal, handle it gracefully */
+ uni_str[i++] = U_ZWSP; /* zero width space */
+ uni_str[i] = U_AIMODI;
+ *num_uni = i+1;
+ return TA_ILL_SEQ;
+ } /* c2 */
+ } /* c1 */
+
+ /* It is illegal in the language for the modifiers to appear alone.
+ * However in practice, they might appear alone, for example, when
+ * teaching the language. We will precede those with a zero width
+ * space to avoid combining them improperly */
+ if (c1 == TSC_KAAL || c1 == TSC_AUMODI ||
+ c1 == TSC_KOKKI1 || c1 == TSC_KOKKI2 ||
+ c1 == TSC_UMODI1 || c1 == TSC_UMODI2 ) {
+ pos = tsc_find_char(tsc_modi, c1);
+ uni_str[i++] = U_ZWSP;
+ uni_str[i] = u_modi[pos];
+ *num_uni = i+1;
+ return TA_ILL_SEQ;
+ }
+
+ /* These two characters were undefined in TSCII */
+ if (c1 == 0xFE || c1 == 0xFF ) {
+ uni_str[i++] = U_ZWSP;
+ return TA_NOT_DEFINED;
+ }
+
+ /* For everything else, display a space */
+ uni_str[i++] = U_SPACE;
+ return TA_ILL_SEQ;
+}
+
+
+int uni2tsc(unsigned int *uni_str, unsigned char *tsc_str,
+ int *num_uni, int *num_tsc,
+ int num_in_left, int num_out_left)
+{
+ int i = 0, pos;
+ unsigned int c1, c2, c3;
+
+ /* default is 1 char */
+ *num_uni = 1;
+ *num_tsc = 1;
+
+ if (num_in_left > 0)
+ c1 = uni_str[0];
+ else
+ return TA_INSUF_IN;
+
+ if (num_in_left > 1)
+ c2 = uni_str[1];
+ else
+ c2 = TA_NULL;
+
+ if (num_in_left > 2)
+ c3 = uni_str[2];
+ else
+ c3 = TA_NULL;
+
+ if (num_out_left < 3)
+ return TA_INSUF_OUT; /* Need atleast three chars */
+
+ if (c1 < 0x80) {
+ tsc_str[i] = (char)c1;
+ return TA_SUCCESS;
+ }
+
+ if (c1 < 0x0B80 || c1 > 0x0BFF) {
+ tsc_str[i] = SPACE;
+ return TA_OUT_OF_RANGE;
+ }
+
+ if (c1 < 0x0B82)
+ return TA_ILL_SEQ;
+
+ if (c1 == 0x0B82) {
+ tsc_str[i] = SPACE;
+ return TA_SUCCESS; /* Don't know any TAMIL SIGN ANUSVARA */
+ }
+
+ /* uyir, aaytham & numbers */
+ if ((c1 >= 0x0B83 && c1 <= 0x0B94) ||
+ (c1 >= 0x0BE7 && c1 <= 0x0BF2)) {
+ if ((pos = u_find_char(u_uyir, c1)) < 0) {
+ tsc_str[i] = SPACE;
+ return TA_NOT_DEFINED;
+ }
+ tsc_str[i] = tsc_uyir[pos];
+ return TA_SUCCESS;
+ }
+
+ /* akarameriya mey */
+ if (c1 >= 0x0B95 && c1 <= 0x0BB9) {
+ if ((pos = u_find_char(u_amey, c1)) < 0) {
+ tsc_str[i] = SPACE;
+ return TA_NOT_DEFINED;
+ }
+ switch(c2) {
+ case U_PULLI:
+ tsc_str[i] = tsc_mey[pos];
+ *num_uni = 2;
+ return TA_SUCCESS;
+ case U_KAAL:
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_KAAL;
+ *num_tsc = 2;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ case U_KOKKI1:
+ /* TI & TII case */
+ if (c1 == 0x0B9f) {
+ tsc_str[i] = TSC_TI;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ }
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_KOKKI1;
+ *num_tsc = 2;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ case U_KOKKI2:
+ /* TI & TII case */
+ if (c1 == 0x0B9f) {
+ tsc_str[i] = TSC_TII;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ }
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_KOKKI2;
+ *num_tsc = 2;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ case U_UMODI1:
+ /* If it is a grantha add a hook, otherwise
+ * we have separate chars in TSCII */
+ if (u_find_char(u_grantha, c1) < 0) {
+ tsc_str[i] = tsc_ukaram[pos];
+ *num_uni = 2;
+ return TA_SUCCESS;
+ }
+ else {
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_UMODI1;
+ *num_tsc = 2;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ }
+ case U_UMODI2:
+ if (u_find_char(u_grantha, c1) < 0) {
+ tsc_str[i] = tsc_uukaaram[pos];
+ *num_uni = 2;
+ return TA_SUCCESS;
+ }
+ else {
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_UMODI2;
+ *num_tsc = 2;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ }
+ case U_KOMBU1:
+ /* Unicode seems to allow double modifiers for
+ * okaram, ookaaram & aukaaram. This is
+ * somewhat unnecessary. But we will handle
+ * that condition too.
+ */
+ switch(c3) {
+ case U_KAAL:
+ tsc_str[i++] = TSC_KOMBU1;
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_KAAL;
+ *num_tsc = 3;
+ *num_uni = 3;
+ return TA_SUCCESS;
+ case U_AUMARK:
+ tsc_str[i++] = TSC_KOMBU1;
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_AUMODI;
+ *num_tsc = 3;
+ *num_uni = 3;
+ return TA_SUCCESS;
+ default:
+ tsc_str[i++] = TSC_KOMBU1;
+ tsc_str[i] = tsc_amey[pos];
+ *num_tsc = 2;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ }
+ case U_KOMBU2:
+ if (c3 == U_KAAL) {
+ tsc_str[i++] = TSC_KOMBU2;
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_KAAL;
+ *num_tsc = 3;
+ *num_uni = 3;
+ return TA_SUCCESS;
+ }
+ else {
+ tsc_str[i++] = TSC_KOMBU2;
+ tsc_str[i] = tsc_amey[pos];
+ *num_tsc = 2;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ }
+ case U_AIMODI:
+ tsc_str[i++] = TSC_AIMODI;
+ tsc_str[i] = tsc_amey[pos];
+ *num_tsc = 2;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ case U_OMODI1:
+ tsc_str[i++] = TSC_KOMBU1;
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_KAAL;
+ *num_tsc = 3;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ case U_OMODI2:
+ tsc_str[i++] = TSC_KOMBU2;
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_KAAL;
+ *num_tsc = 3;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ case U_AUMODI:
+ tsc_str[i++] = TSC_KOMBU1;
+ tsc_str[i++] = tsc_amey[pos];
+ tsc_str[i] = TSC_AUMODI;
+ *num_tsc = 3;
+ *num_uni = 2;
+ return TA_SUCCESS;
+ default:
+ tsc_str[i] = tsc_amey[pos];
+ return TA_SUCCESS;
+ }
+ }
+
+ /* modifiers - illegal sequence */
+ if (c1 >= 0x0BBE && c1 <= 0x0BD7) {
+ if ((pos = u_find_char(u_modi, c1)) < 0) {
+ tsc_str[i] = SPACE;
+ return TA_NOT_DEFINED;
+ }
+ tsc_str[i] = tsc_modi[pos];
+ return TA_ILL_SEQ;
+ }
+ tsc_str[i] = SPACE;
+ return TA_NOT_DEFINED;
+}
+
+int is_tsc_uyir(unsigned char c)
+{
+ if (c >= 0xAB && c <= 0xB7)
+ return TA_TRUE;
+ else
+ return TA_FALSE;
+
+}
+
+int is_tsc_modi(unsigned char c)
+{
+ if ((c >= 0xA1 && c <= 0xA8) ||
+ c == 0xAA )
+ return TA_TRUE;
+ else
+ return TA_FALSE;
+}
+
+int is_tsc_amey(unsigned char c)
+{
+ if ((c >= 0x83 && c <= 0x87) ||
+ (c >= 0xB8 && c <= 0xC9))
+ return TA_TRUE;
+ else
+ return TA_FALSE;
+}
+
+int is_tsc_mey(unsigned char c)
+{
+ if ((c >= 0x88 && c <= 0x8C) ||
+ (c >= 0xEC && c <= 0xFD))
+ return TA_TRUE;
+ else
+ return TA_FALSE;
+}
+
+int is_tsc_number(unsigned char c)
+{
+ if ( c >= 0x81 ||
+ (c >= 0x8D && c <= 0x90) ||
+ (c >= 0x95 && c <= 0x98) ||
+ (c >= 0x9D && c <= 0x9F))
+ return TA_TRUE;
+ else
+ return TA_FALSE;
+}
+
+int is_uni_uyir(unsigned int c)
+{
+ if ((c >= 0x0B85 && c <= 0x0B8A) ||
+ (c >= 0x0B8E && c <= 0x0B90) ||
+ (c >= 0x0B92 && c <= 0x0B94) ||
+ (c == 0x0B83))
+ return TA_TRUE;
+ else
+ return TA_FALSE;
+}
+
+int is_uni_amey(unsigned int c)
+{
+ if (u_find_char(u_amey, c) < 0)
+ return TA_FALSE;
+ else
+ return TA_TRUE;
+}
+
+int is_uni_modi(unsigned int c)
+{
+ if (u_find_char(u_modi, c) < 0)
+ return TA_FALSE;
+ else
+ return TA_TRUE;
+}
+
+int is_uni_numb(unsigned int c)
+{
+ if ((c >= 0x0BE7 && c <= 0x0BF2))
+ return TA_FALSE;
+ else
+ return TA_TRUE;
+}
diff --git a/modules/tamil/taconv.h b/modules/tamil/taconv.h
new file mode 100644
index 00000000..dad44078
--- /dev/null
+++ b/modules/tamil/taconv.h
@@ -0,0 +1,55 @@
+/* taconv.h:
+ * Author: Sivaraj D (sivaraj@tamil.net)
+ * Date : 4-Jan-2000
+ */
+
+/* Return codes */
+#define TA_SUCCESS 0
+#define TA_ILL_SEQ 1 /* Sequence is illegal in language */
+#define TA_INSUF_IN 2
+#define TA_INSUF_OUT 3 /* Need atleast three chars */
+#define TA_OUT_OF_RANGE 4 /* Char outside 0x00-0x7f or 0xb80-0xbff */
+#define TA_NOT_DEFINED 5 /* Within 0xb80-0xbff but not defined
+ * by unicode*/
+
+#define TA_NULL 0x00
+#define TA_TRUE 1
+#define TA_FALSE 0
+
+/* tsc2uni: Get the first TSCII Tamil character in tsc_str and convert it to
+ * corresponding unicode character in uni_str.
+ * tsc_str: TSCII string (in)
+ * uni_str: Unicode string (out)
+ * num_tsc: Number of TSCII characters processed (out)
+ * num_uni: Number of Unicode characters returned (out)
+ * num_in_left: Number of characters left in input buffer (in)
+ * num_out_left: Number of characters left in output buffer (in)
+ */
+int tsc2uni(unsigned char *tsc_str, unsigned int *uni_str,
+ int *num_tsc, int *num_uni,
+ int num_in_left, int num_out_left);
+
+/* uni2tsc: Get the first Unicode character in uni_str and convert it to
+ * corresponding TSCII Tamil character in tsc_str.
+ * uni_str: Unicode string (out)
+ * tsc_str: TSCII string (in)
+ * num_uni: Number of Unicode characters returned (out)
+ * num_tsc: Number of TSCII characters processed (out)
+ * num_in_left: Number of characters left in input buffer (in)
+ * num_out_left: Number of characters left in output buffer (in)
+ */
+int uni2tsc(unsigned int *uni_str, unsigned char *tsc_str,
+ int *num_uni, int *num_tsc,
+ int num_in_left, int num_out_left);
+
+int is_tsc_uyir(unsigned char c); /* Returns 1 if c is a vowel, else 0 */
+int is_tsc_modi(unsigned char c); /* Returns 1 if c is a modifier */
+int is_tsc_amey(unsigned char c); /* Returns 1 if c is a akara mey */
+int is_tsc_mey(unsigned char c); /* Returns 1 if c is a consonant */
+int is_tsc_number(unsigned char c); /* Returns 1 if c is a number */
+
+int is_uni_uyir(unsigned int c); /* Returns 1 if c is a vowel, else 0 */
+int is_uni_modi(unsigned int c); /* Returns 1 if c is a modifier */
+int is_uni_amey(unsigned int c); /* Returns 1 if c is a akara mey */
+int is_uni_numb(unsigned int c); /* Returns 1 if c is a number */
+
diff --git a/modules/tamil/tadefs.h b/modules/tamil/tadefs.h
new file mode 100644
index 00000000..5e587471
--- /dev/null
+++ b/modules/tamil/tadefs.h
@@ -0,0 +1,132 @@
+/* Author: Sivaraj D (sivaraj@tamil.net)
+ * Date : 4-Jan-2000
+ */
+
+/* Defining Unicode unsigned characters */
+
+unsigned int
+u_uyir[] = { 0x0B85, 0x0B86, 0x0B87, 0x0B88,
+ 0x0B89, 0x0B8A, 0x0B8E, 0x0B8F,
+ 0x0B90, 0x0B92, 0x0B93, 0x0B94,
+ 0x0B83, 0x00A9,
+ 0x2018, 0x2019, 0x201c, 0x201D,
+ 0x0BE7, 0x0BE8, 0x0BE9, 0x0BEA,
+ 0x0BEB, 0x0BEC, 0x0BED, 0x0BEE,
+ 0x0BEF, 0x0BF0, 0x0BF1, 0x0BF2,
+ 0x0000 };
+
+/* akaramEriya mey */
+unsigned int
+u_amey[] = { 0x0B95, 0x0B99, 0x0B9A, 0x0B9E,
+ 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8,
+ 0x0BAA, 0x0BAE, 0x0BAF, 0x0BB0,
+ 0x0BB2, 0x0BB5, 0x0BB4, 0x0BB3,
+ 0x0BB1, 0x0BA9, 0x0B9C, 0x0BB7,
+ 0x0BB8, 0x0BB9, 0x0000 };
+
+unsigned int
+u_modi[] = { 0x0BBE, 0x0BBF, 0x0BC0, 0x0BC1,
+ 0x0BC2, 0x0BC6, 0x0BC7, 0x0BC8,
+ 0x0BCA, 0x0BCB, 0x0BCC, 0x0BCD,
+ 0x0BD7, 0x0000 };
+
+unsigned int
+u_grantha[] = { 0x0B9C, 0x0BB7, 0x0BB8, 0x0BB9,
+ 0x0000 };
+
+#define U_KAAL 0x0BBE
+#define U_KOKKI1 0x0BBF
+#define U_KOKKI2 0x0BC0
+#define U_UMODI1 0x0BC1
+#define U_UMODI2 0x0BC2
+#define U_KOMBU1 0x0BC6
+#define U_KOMBU2 0x0BC7
+#define U_AIMODI 0x0BC8
+#define U_OMODI1 0x0BCA
+#define U_OMODI2 0x0BCB
+#define U_AUMODI 0x0BCC
+#define U_AUMARK 0x0BD7
+#define U_PULLI 0x0BCD
+
+#define U_SPACE 0x0020
+#define U_ZWSP 0x200B
+#define U_LSQUOT 0x2018
+#define U_RSQUOT 0x2019
+#define U_LDQUOT 0x201C
+#define U_RDQUOT 0x201D
+
+/* Defining TSCII unsigned characters - we define only those unsigned characters
+ * that are useful in Unicode */
+
+#define SPACE 0x20
+
+/* Vowel modifiers */
+#define TSC_KAAL 0xA1
+#define TSC_KOKKI1 0xA2
+#define TSC_KOKKI2 0xA3
+#define TSC_UMODI1 0xA4
+#define TSC_UMODI2 0xA5
+#define TSC_KOMBU1 0xA6
+#define TSC_KOMBU2 0xA7
+#define TSC_AIMODI 0xA8
+#define TSC_AUMODI 0xAA
+
+unsigned char
+tsc_modi[] = { 0xA1, 0xA2, 0xA3, 0xA4,
+ 0xA5, 0xA6, 0xA7, 0xA8,
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xAA, 0x00 };
+
+/* all uyirs & aaytham: these convert directly to unicode */
+unsigned char
+tsc_uyir[] = { 0xAB, 0xAC, 0xAD, 0xAE, /* a, aa, i, ii */
+ 0xAF, 0xB0, 0xB1, 0xB2, /* u, uu, e, ee */
+ 0xB3, 0xB4, 0xB5, 0xB6, /* ai, o, oo, au */
+ 0xB7, 0xA9, /* aaytham, copyright*/
+ 0x91, 0x92, 0x93, 0x94, /* quotes */
+ 0x81, 0x8D, 0x8E, 0x8F,
+ 0x90, 0x95, 0x96, 0x97,
+ 0x98, 0x9D, 0x9E, 0x9F,
+ 0x00 };
+
+/* all mey */
+unsigned char
+tsc_mey[] = { 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3,
+ 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB,
+ 0xFC, 0xFD, 0x88, 0x89,
+ 0x8A, 0x8B, 0x00 };
+
+/* akaramEriya mey */
+unsigned char
+tsc_amey[] = { 0xB8, 0xB9, 0xBA, 0xBB,
+ 0xBC, 0xBD, 0xBE, 0xBF,
+ 0xC0, 0xC1, 0xC2, 0xC3,
+ 0xC4, 0xC5, 0xC6, 0xC7,
+ 0xC8, 0xC9, 0x83, 0x84,
+ 0x85, 0x86, 0x00 };
+
+
+/* ikaram, iikaaram for T */
+#define TSC_TI 0xCA
+#define TSC_TII 0xCB
+
+/* ukaram, uukaaram & grantha ameys*/
+unsigned char
+tsc_ukaram[] = { 0xCC, 0x99, 0xCD, 0x9A,
+ 0xCE, 0xCF, 0xD0, 0xD1,
+ 0xD2, 0xD3, 0xD4, 0xD5,
+ 0xD6, 0xD7, 0xD8, 0xD9,
+ 0xDA, 0xDB, 0x00 };
+
+unsigned char
+tsc_uukaaram[] = { 0xDC, 0x9B, 0xDD, 0x9C,
+ 0xDE, 0xDF, 0xE0, 0xE1,
+ 0xE2, 0xE3, 0xE4, 0xE5,
+ 0xE6, 0xE7, 0xE8, 0xE9,
+ 0xEA, 0xEB, 0x00 };
+
+unsigned char
+tsc_grantha[] = { 0x83, 0x84, 0x85, 0x86, 0x00 };
+
diff --git a/modules/tamil/tamil-x.c b/modules/tamil/tamil-x.c
new file mode 100644
index 00000000..934bd216
--- /dev/null
+++ b/modules/tamil/tamil-x.c
@@ -0,0 +1,226 @@
+/* Pango - Tamil module
+ * tamil.c:
+ *
+ * Copyright (C) 2000 Sivaraj D
+ *
+ */
+
+#include <stdio.h>
+#include <glib.h>
+#include "pango.h"
+#include "pangox.h"
+#include "utils.h"
+#include "taconv.h"
+
+#define MEMBERS(strct) sizeof(strct) / sizeof(strct[1])
+
+static PangoEngineRange tamil_range[] = {
+ { 0x0b80, 0x0bff, "*" },
+};
+
+static PangoEngineInfo script_engines[] = {
+ {
+ "TamilScriptEngineLang",
+ PANGO_ENGINE_TYPE_LANG,
+ PANGO_RENDER_TYPE_NONE,
+ tamil_range, MEMBERS(tamil_range)
+ },
+ {
+ "TamilScriptEngineX",
+ PANGO_ENGINE_TYPE_SHAPE,
+ PANGO_RENDER_TYPE_X,
+ tamil_range, MEMBERS(tamil_range)
+ }
+};
+
+static gint n_script_engines = MEMBERS (script_engines);
+
+/*
+ * Language script engine
+ */
+
+static void
+tamil_engine_break (gchar *text,
+ gint len,
+ PangoAnalysis *analysis,
+ PangoLogAttr *attrs)
+{
+/* Most of the code comes from pango_break
+ * only difference is char stop based on modifiers
+ */
+
+ gchar *cur = text;
+ gchar *next;
+ gint i = 0;
+ GUChar4 wc;
+
+ while (*cur)
+ {
+ if (!_pango_utf8_iterate (cur, &next, &wc))
+ return;
+ if (cur == next)
+ break;
+ if ((next - text) > len)
+ break;
+ cur = next;
+
+ attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
+ attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
+ attrs[i].is_char_stop = (is_uni_modi(wc)) ? 0 : 1;
+ attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
+
+ i++;
+ }
+}
+
+static PangoEngine *
+tamil_engine_lang_new ()
+{
+ PangoEngineLang *result;
+
+ result = g_new (PangoEngineLang, 1);
+
+ result->engine.id = "TamilScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_break = tamil_engine_break;
+
+ return (PangoEngine *)result;
+}
+
+/*
+ * X window system script engine portion
+ */
+
+/* We will need some type of kerning support for use with ikaram/iikaaram.
+ * But we can live with this for time being
+ */
+static void
+set_glyph (PangoGlyphString *glyphs, gint i, PangoCFont *cfont, PangoGlyphIndex glyph)
+{
+ gint width;
+
+ glyphs->glyphs[i].font = cfont;
+ glyphs->glyphs[i].glyph = glyph;
+
+ glyphs->geometry[i].x_offset = 0;
+ glyphs->geometry[i].y_offset = 0;
+
+ glyphs->log_clusters[i] = i;
+
+ pango_x_glyph_extents (&glyphs->glyphs[i],
+ NULL, NULL, &width, NULL, NULL, NULL, NULL);
+ glyphs->geometry[i].width = width * 72;
+}
+
+static void
+tamil_engine_shape (PangoFont *font,
+ gchar *text,
+ gint length,
+ PangoAnalysis *analysis,
+ PangoGlyphString *glyphs)
+{
+ int n_chars, n_glyph;
+ int i, j;
+ char *p, *next;
+ GUChar4 *wc, *uni_str;
+ int res;
+ unsigned char tsc_str[6];
+ int ntsc, nuni;
+
+ PangoCFont *tscii_font = NULL;
+
+ g_return_if_fail (font != NULL);
+ g_return_if_fail (text != NULL);
+ g_return_if_fail (length >= 0);
+ g_return_if_fail (analysis != NULL);
+
+ tscii_font = pango_x_find_cfont (font, "tscii-0");
+ pango_cfont_ref (tscii_font);
+
+ n_chars = _pango_utf8_len (text, length);
+
+ /* temporarily set the size to 3 times the number of unicode chars */
+ pango_glyph_string_set_size (glyphs, n_chars * 3);
+ wc = (GUChar4 *)g_malloc(sizeof(GUChar4)*n_chars);
+
+ p = text;
+ for (i=0; i < n_chars; i++)
+ {
+ _pango_utf8_iterate (p, &next, &wc[i]);
+ p = next;
+ }
+
+ n_glyph = 0;
+ uni_str = wc;
+
+ j = 0;
+ while (j < n_chars)
+ {
+ res = uni2tsc(uni_str, tsc_str, &nuni, &ntsc, n_chars - j, 6);
+
+ uni_str = uni_str + nuni;
+ /* We need to differentiate between different return codes later */
+ if (res != TA_SUCCESS)
+ {
+ set_glyph (glyphs, n_glyph, tscii_font, ' ');
+ n_glyph++;
+ j = j + nuni;
+ continue;
+ }
+ for (i = 0; i < ntsc; i++)
+ {
+ set_glyph (glyphs, n_glyph, tscii_font, (PangoGlyphIndex) tsc_str[i]);
+ n_glyph++;
+ }
+ j = j + nuni;
+ }
+
+ pango_glyph_string_set_size (glyphs, n_glyph);
+
+ if (tscii_font)
+ pango_cfont_unref (tscii_font);
+ g_free(wc);
+}
+
+static PangoEngine *
+tamil_engine_x_new ()
+{
+ PangoEngineShape *result;
+
+ result = g_new (PangoEngineShape, 1);
+
+ result->engine.id = "TamilScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_shape = tamil_engine_shape;
+
+ return (PangoEngine *)result;
+}
+
+/* The following three functions provide the public module API for
+ * Pango
+ */
+void
+script_engine_list (PangoEngineInfo **engines, gint *n_engines)
+{
+ *engines = script_engines;
+ *n_engines = n_script_engines;
+}
+
+PangoEngine *
+script_engine_load (const char *id)
+{
+ if (!strcmp (id, "TamilScriptEngineLang"))
+ return tamil_engine_lang_new ();
+ else if (!strcmp (id, "TamilScriptEngineX"))
+ return tamil_engine_x_new ();
+ else
+ return NULL;
+}
+
+void
+script_engine_unload (PangoEngine *engine)
+{
+}
+
diff --git a/modules/tamil/tamil.c b/modules/tamil/tamil.c
new file mode 100644
index 00000000..934bd216
--- /dev/null
+++ b/modules/tamil/tamil.c
@@ -0,0 +1,226 @@
+/* Pango - Tamil module
+ * tamil.c:
+ *
+ * Copyright (C) 2000 Sivaraj D
+ *
+ */
+
+#include <stdio.h>
+#include <glib.h>
+#include "pango.h"
+#include "pangox.h"
+#include "utils.h"
+#include "taconv.h"
+
+#define MEMBERS(strct) sizeof(strct) / sizeof(strct[1])
+
+static PangoEngineRange tamil_range[] = {
+ { 0x0b80, 0x0bff, "*" },
+};
+
+static PangoEngineInfo script_engines[] = {
+ {
+ "TamilScriptEngineLang",
+ PANGO_ENGINE_TYPE_LANG,
+ PANGO_RENDER_TYPE_NONE,
+ tamil_range, MEMBERS(tamil_range)
+ },
+ {
+ "TamilScriptEngineX",
+ PANGO_ENGINE_TYPE_SHAPE,
+ PANGO_RENDER_TYPE_X,
+ tamil_range, MEMBERS(tamil_range)
+ }
+};
+
+static gint n_script_engines = MEMBERS (script_engines);
+
+/*
+ * Language script engine
+ */
+
+static void
+tamil_engine_break (gchar *text,
+ gint len,
+ PangoAnalysis *analysis,
+ PangoLogAttr *attrs)
+{
+/* Most of the code comes from pango_break
+ * only difference is char stop based on modifiers
+ */
+
+ gchar *cur = text;
+ gchar *next;
+ gint i = 0;
+ GUChar4 wc;
+
+ while (*cur)
+ {
+ if (!_pango_utf8_iterate (cur, &next, &wc))
+ return;
+ if (cur == next)
+ break;
+ if ((next - text) > len)
+ break;
+ cur = next;
+
+ attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0;
+ attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white;
+ attrs[i].is_char_stop = (is_uni_modi(wc)) ? 0 : 1;
+ attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white;
+
+ i++;
+ }
+}
+
+static PangoEngine *
+tamil_engine_lang_new ()
+{
+ PangoEngineLang *result;
+
+ result = g_new (PangoEngineLang, 1);
+
+ result->engine.id = "TamilScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_break = tamil_engine_break;
+
+ return (PangoEngine *)result;
+}
+
+/*
+ * X window system script engine portion
+ */
+
+/* We will need some type of kerning support for use with ikaram/iikaaram.
+ * But we can live with this for time being
+ */
+static void
+set_glyph (PangoGlyphString *glyphs, gint i, PangoCFont *cfont, PangoGlyphIndex glyph)
+{
+ gint width;
+
+ glyphs->glyphs[i].font = cfont;
+ glyphs->glyphs[i].glyph = glyph;
+
+ glyphs->geometry[i].x_offset = 0;
+ glyphs->geometry[i].y_offset = 0;
+
+ glyphs->log_clusters[i] = i;
+
+ pango_x_glyph_extents (&glyphs->glyphs[i],
+ NULL, NULL, &width, NULL, NULL, NULL, NULL);
+ glyphs->geometry[i].width = width * 72;
+}
+
+static void
+tamil_engine_shape (PangoFont *font,
+ gchar *text,
+ gint length,
+ PangoAnalysis *analysis,
+ PangoGlyphString *glyphs)
+{
+ int n_chars, n_glyph;
+ int i, j;
+ char *p, *next;
+ GUChar4 *wc, *uni_str;
+ int res;
+ unsigned char tsc_str[6];
+ int ntsc, nuni;
+
+ PangoCFont *tscii_font = NULL;
+
+ g_return_if_fail (font != NULL);
+ g_return_if_fail (text != NULL);
+ g_return_if_fail (length >= 0);
+ g_return_if_fail (analysis != NULL);
+
+ tscii_font = pango_x_find_cfont (font, "tscii-0");
+ pango_cfont_ref (tscii_font);
+
+ n_chars = _pango_utf8_len (text, length);
+
+ /* temporarily set the size to 3 times the number of unicode chars */
+ pango_glyph_string_set_size (glyphs, n_chars * 3);
+ wc = (GUChar4 *)g_malloc(sizeof(GUChar4)*n_chars);
+
+ p = text;
+ for (i=0; i < n_chars; i++)
+ {
+ _pango_utf8_iterate (p, &next, &wc[i]);
+ p = next;
+ }
+
+ n_glyph = 0;
+ uni_str = wc;
+
+ j = 0;
+ while (j < n_chars)
+ {
+ res = uni2tsc(uni_str, tsc_str, &nuni, &ntsc, n_chars - j, 6);
+
+ uni_str = uni_str + nuni;
+ /* We need to differentiate between different return codes later */
+ if (res != TA_SUCCESS)
+ {
+ set_glyph (glyphs, n_glyph, tscii_font, ' ');
+ n_glyph++;
+ j = j + nuni;
+ continue;
+ }
+ for (i = 0; i < ntsc; i++)
+ {
+ set_glyph (glyphs, n_glyph, tscii_font, (PangoGlyphIndex) tsc_str[i]);
+ n_glyph++;
+ }
+ j = j + nuni;
+ }
+
+ pango_glyph_string_set_size (glyphs, n_glyph);
+
+ if (tscii_font)
+ pango_cfont_unref (tscii_font);
+ g_free(wc);
+}
+
+static PangoEngine *
+tamil_engine_x_new ()
+{
+ PangoEngineShape *result;
+
+ result = g_new (PangoEngineShape, 1);
+
+ result->engine.id = "TamilScriptEngine";
+ result->engine.type = PANGO_ENGINE_TYPE_LANG;
+ result->engine.length = sizeof (result);
+ result->script_shape = tamil_engine_shape;
+
+ return (PangoEngine *)result;
+}
+
+/* The following three functions provide the public module API for
+ * Pango
+ */
+void
+script_engine_list (PangoEngineInfo **engines, gint *n_engines)
+{
+ *engines = script_engines;
+ *n_engines = n_script_engines;
+}
+
+PangoEngine *
+script_engine_load (const char *id)
+{
+ if (!strcmp (id, "TamilScriptEngineLang"))
+ return tamil_engine_lang_new ();
+ else if (!strcmp (id, "TamilScriptEngineX"))
+ return tamil_engine_x_new ();
+ else
+ return NULL;
+}
+
+void
+script_engine_unload (PangoEngine *engine)
+{
+}
+