diff options
author | Owen Taylor <otaylor@src.gnome.org> | 2000-01-13 04:12:07 +0000 |
---|---|---|
committer | Owen Taylor <otaylor@src.gnome.org> | 2000-01-13 04:12:07 +0000 |
commit | 282fb5fc9f9d01c4d933a6eb6bee6ae9f0d512c7 (patch) | |
tree | 0b19c69692a2fe52a1b33d5e27c97b26a64417e3 /modules/tamil | |
parent | 66ae87f9fc45f5f564acf5014e9bd1341d052958 (diff) | |
download | pango-282fb5fc9f9d01c4d933a6eb6bee6ae9f0d512c7.tar.gz |
Initial revision
Diffstat (limited to 'modules/tamil')
-rw-r--r-- | modules/tamil/Makefile.am | 10 | ||||
-rw-r--r-- | modules/tamil/taconv.c | 590 | ||||
-rw-r--r-- | modules/tamil/taconv.h | 55 | ||||
-rw-r--r-- | modules/tamil/tadefs.h | 132 | ||||
-rw-r--r-- | modules/tamil/tamil-x.c | 226 | ||||
-rw-r--r-- | modules/tamil/tamil.c | 226 |
6 files changed, 1239 insertions, 0 deletions
diff --git a/modules/tamil/Makefile.am b/modules/tamil/Makefile.am new file mode 100644 index 00000000..476a14fa --- /dev/null +++ b/modules/tamil/Makefile.am @@ -0,0 +1,10 @@ +## Process this file with automake to create Makefile.in. + +noinst_LTLIBRARIES = pango-tamil.la + +INCLUDES = -I$(top_srcdir)/libpango/ + +pango_tamil_la_SOURCES = tamil.c taconv.c taconv.h tadefs.h +pango_tamil_la_LDFLAGS = -rpath $(libdir) -export-dynamic -avoid-version -module + +EXTRA_DIST= diff --git a/modules/tamil/taconv.c b/modules/tamil/taconv.c new file mode 100644 index 00000000..85e15de6 --- /dev/null +++ b/modules/tamil/taconv.c @@ -0,0 +1,590 @@ +/* taconv.h + * Author: Sivaraj D (sivaraj@tamil.net) + * Date : 4-Jan-2000 + */ + +/* Warning: These routines are very inefficient. + */ + +#include <stdio.h> +#include "taconv.h" +#include "tadefs.h" + +/* tsc_find_char: Search the string for given character, + * and return its position */ +int tsc_find_char(unsigned char *str, unsigned char c) +{ + int i=0; + while(str[i]) { + if (c==str[i]) + return i; + i++; + } + return -1; /* character not found */ +} + +/* u_find_char: Search the string of integers for given unicode character + * and return its position */ +int u_find_char(unsigned int *str, unsigned int c) +{ + int i=0; + while(str[i]) { + if (c==str[i]) + return i; + i++; + } + return -1; /* character not found */ +} + +int tsc2uni(unsigned char *tsc_str, unsigned int *uni_str, + int *num_tsc, int *num_uni, + int num_in_left, int num_out_left) +{ + int i = 0, pos; + unsigned char c1, c2, c3; + + /* default is 1 char */ + *num_tsc = 1; + *num_uni = 1; + + if (num_in_left > 0) + c1 = tsc_str[0]; + else + return TA_INSUF_IN; + + if (num_in_left > 1) + c2 = tsc_str[1]; + else + c2 = TA_NULL; + + if (num_in_left > 2) + c3 = tsc_str[2]; + else + c3 = TA_NULL; + + if (num_out_left < 3) + return TA_INSUF_OUT; /* Need atleast 3 chars */ + + /* us-ascii characters */ + if (c1 < 0x80) { + uni_str[i] = (int)c1; + return TA_SUCCESS; + } + + /* direct one to one translation - uyirs, aaytham, quotes, + * copyright & numbers */ + if((pos = tsc_find_char(tsc_uyir, c1)) >= 0 ) { + uni_str[i] = u_uyir[pos]; + return TA_SUCCESS; + } + + /* mey is always amey + puLLI in unicode */ + if (is_tsc_mey(c1)) { + if (c1 == 0x8C) { + uni_str[i++] = 0x0B95; + uni_str[i++] = 0x0BCD; + } + pos = tsc_find_char(tsc_mey, c1); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_PULLI; + *num_uni = i+1; + return TA_SUCCESS; + } + + /* ukaram = amey + umodi1 */ + if ((c1 >= 0xCC && c1 <= 0xDB) || + c1 == 0x99 || c1 == 0x9A ) { + pos = tsc_find_char(tsc_ukaram, c1); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_UMODI1; + *num_uni = i+1; + return TA_SUCCESS; + } + + /* uukaram = amey + umodi2 */ + if ((c1 >= 0xDC && c1 <= 0xEB) || + c1 == 0x9B || c1 == 0x9C ) { + pos = tsc_find_char(tsc_uukaaram, c1); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_UMODI2; + *num_uni = i+1; + return TA_SUCCESS; + } + + /* TI */ + if (c1 == TSC_TI) { + uni_str[i++] = 0x0B9F; /* TA */ + uni_str[i] = U_KOKKI1; + *num_uni = i+1; + return TA_SUCCESS; + } + + /* TII */ + if (c1 == TSC_TI) { + uni_str[i++] = 0x0B9F; /* TA */ + uni_str[i] = U_KOKKI2; + *num_uni = i+1; + return TA_SUCCESS; + } + + /* characters starting with akarameriya meys */ + if (is_tsc_amey(c1)) { + if (c1 == 0x87) { /* KSHA = KA+puLLi+SHA in unicode */ + uni_str[i++] = 0x0B95; /* KA */ + uni_str[i++] = 0x0BCD; /* puLLi */ + } + pos = tsc_find_char(tsc_amey, c1); + switch(c2) { + case TSC_KAAL: + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_KAAL; + *num_tsc = 2; + *num_uni = i+1; + return TA_SUCCESS; + case TSC_KOKKI1: + pos = tsc_find_char(tsc_amey, c1); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_KOKKI1; + *num_tsc = 2; + *num_uni = i+1; + return TA_SUCCESS; + case TSC_KOKKI2: + pos = tsc_find_char(tsc_amey, c1); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_KOKKI2; + *num_tsc = 2; + *num_uni = i+1; + return TA_SUCCESS; + case TSC_UMODI1: /* ok, I know this is not correct */ + pos = tsc_find_char(tsc_amey, c1); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_UMODI1; + *num_tsc = 2; + *num_uni = i+1; + return TA_SUCCESS; + case TSC_UMODI2: + pos = tsc_find_char(tsc_amey, c1); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_UMODI2; + *num_tsc = 2; + *num_uni = i+1; + return TA_SUCCESS; + default: + pos = tsc_find_char(tsc_amey, c1); + uni_str[i] = u_amey[pos]; + *num_uni = i+1; + return TA_SUCCESS; + } + } + + /* ekaram, okaram & aukaaram */ + if (c1 == TSC_KOMBU1) { + if (((c2 >= 0xB8 && c2 <= 0xC9) || + (c2 >= 0x93 && c2 <= 0x96)) && + num_in_left > 2) { + pos = tsc_find_char(tsc_amey, c2); + switch(c3) { + case TSC_KAAL: + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_OMODI1; + *num_tsc = 3; + *num_uni = i+1; + return TA_SUCCESS; + case TSC_AUMODI: + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_AUMODI; + *num_tsc = 3; + *num_uni = i+1; + return TA_SUCCESS; + default: /* it is ekaram */ + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_KOMBU1; + *num_tsc = 2; + *num_uni = i+1; + return TA_SUCCESS; + } /* switch */ + } /* c2 */ + else { + /* if the sequence is illegal, handle it gracefully */ + uni_str[i++] = U_ZWSP; /* zero width space */ + uni_str[i] = U_KOMBU1; + *num_uni = i+1; + return TA_ILL_SEQ; + } /* c2 */ + } /* c1 */ + + + /* eekaaram, ookaaram */ + if (c1 == TSC_KOMBU2) { + if ((c2 >= 0xB8 && c2 <= 0xC9) || + (c2 >= 0x93 && c2 <= 0x96)) { + switch(c3) { + case TSC_KAAL: + pos = tsc_find_char(tsc_amey, c2); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_OMODI2; + *num_tsc = 3; + *num_uni = i+1; + return TA_SUCCESS; + default: /* it is eekaaram */ + pos = tsc_find_char(tsc_amey, c2); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_KOMBU2; + *num_tsc = 2; + *num_uni = i+1; + return TA_SUCCESS; + } /* switch */ + } /* c2 */ + else { + /* if the sequence is illegal, handle it gracefully */ + uni_str[i++] = U_ZWSP; /* zero width space */ + uni_str[i] = U_KOMBU2; + *num_uni = i+1; + return TA_ILL_SEQ; + } /* c2 */ + } /* c1 */ + + /* aikaaram */ + if (c1 == TSC_AIMODI) { + if ((c2 >= 0xB8 && c2 <= 0xC9) || + (c2 >= 0x93 && c2 <= 0x96)) { + pos = tsc_find_char(tsc_amey, c2); + uni_str[i++] = u_amey[pos]; + uni_str[i] = U_AIMODI; + *num_tsc = 2; + *num_uni = i+1; + return TA_SUCCESS; + } /* c2 */ + else { + /* if the sequence is illegal, handle it gracefully */ + uni_str[i++] = U_ZWSP; /* zero width space */ + uni_str[i] = U_AIMODI; + *num_uni = i+1; + return TA_ILL_SEQ; + } /* c2 */ + } /* c1 */ + + /* It is illegal in the language for the modifiers to appear alone. + * However in practice, they might appear alone, for example, when + * teaching the language. We will precede those with a zero width + * space to avoid combining them improperly */ + if (c1 == TSC_KAAL || c1 == TSC_AUMODI || + c1 == TSC_KOKKI1 || c1 == TSC_KOKKI2 || + c1 == TSC_UMODI1 || c1 == TSC_UMODI2 ) { + pos = tsc_find_char(tsc_modi, c1); + uni_str[i++] = U_ZWSP; + uni_str[i] = u_modi[pos]; + *num_uni = i+1; + return TA_ILL_SEQ; + } + + /* These two characters were undefined in TSCII */ + if (c1 == 0xFE || c1 == 0xFF ) { + uni_str[i++] = U_ZWSP; + return TA_NOT_DEFINED; + } + + /* For everything else, display a space */ + uni_str[i++] = U_SPACE; + return TA_ILL_SEQ; +} + + +int uni2tsc(unsigned int *uni_str, unsigned char *tsc_str, + int *num_uni, int *num_tsc, + int num_in_left, int num_out_left) +{ + int i = 0, pos; + unsigned int c1, c2, c3; + + /* default is 1 char */ + *num_uni = 1; + *num_tsc = 1; + + if (num_in_left > 0) + c1 = uni_str[0]; + else + return TA_INSUF_IN; + + if (num_in_left > 1) + c2 = uni_str[1]; + else + c2 = TA_NULL; + + if (num_in_left > 2) + c3 = uni_str[2]; + else + c3 = TA_NULL; + + if (num_out_left < 3) + return TA_INSUF_OUT; /* Need atleast three chars */ + + if (c1 < 0x80) { + tsc_str[i] = (char)c1; + return TA_SUCCESS; + } + + if (c1 < 0x0B80 || c1 > 0x0BFF) { + tsc_str[i] = SPACE; + return TA_OUT_OF_RANGE; + } + + if (c1 < 0x0B82) + return TA_ILL_SEQ; + + if (c1 == 0x0B82) { + tsc_str[i] = SPACE; + return TA_SUCCESS; /* Don't know any TAMIL SIGN ANUSVARA */ + } + + /* uyir, aaytham & numbers */ + if ((c1 >= 0x0B83 && c1 <= 0x0B94) || + (c1 >= 0x0BE7 && c1 <= 0x0BF2)) { + if ((pos = u_find_char(u_uyir, c1)) < 0) { + tsc_str[i] = SPACE; + return TA_NOT_DEFINED; + } + tsc_str[i] = tsc_uyir[pos]; + return TA_SUCCESS; + } + + /* akarameriya mey */ + if (c1 >= 0x0B95 && c1 <= 0x0BB9) { + if ((pos = u_find_char(u_amey, c1)) < 0) { + tsc_str[i] = SPACE; + return TA_NOT_DEFINED; + } + switch(c2) { + case U_PULLI: + tsc_str[i] = tsc_mey[pos]; + *num_uni = 2; + return TA_SUCCESS; + case U_KAAL: + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_KAAL; + *num_tsc = 2; + *num_uni = 2; + return TA_SUCCESS; + case U_KOKKI1: + /* TI & TII case */ + if (c1 == 0x0B9f) { + tsc_str[i] = TSC_TI; + *num_uni = 2; + return TA_SUCCESS; + } + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_KOKKI1; + *num_tsc = 2; + *num_uni = 2; + return TA_SUCCESS; + case U_KOKKI2: + /* TI & TII case */ + if (c1 == 0x0B9f) { + tsc_str[i] = TSC_TII; + *num_uni = 2; + return TA_SUCCESS; + } + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_KOKKI2; + *num_tsc = 2; + *num_uni = 2; + return TA_SUCCESS; + case U_UMODI1: + /* If it is a grantha add a hook, otherwise + * we have separate chars in TSCII */ + if (u_find_char(u_grantha, c1) < 0) { + tsc_str[i] = tsc_ukaram[pos]; + *num_uni = 2; + return TA_SUCCESS; + } + else { + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_UMODI1; + *num_tsc = 2; + *num_uni = 2; + return TA_SUCCESS; + } + case U_UMODI2: + if (u_find_char(u_grantha, c1) < 0) { + tsc_str[i] = tsc_uukaaram[pos]; + *num_uni = 2; + return TA_SUCCESS; + } + else { + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_UMODI2; + *num_tsc = 2; + *num_uni = 2; + return TA_SUCCESS; + } + case U_KOMBU1: + /* Unicode seems to allow double modifiers for + * okaram, ookaaram & aukaaram. This is + * somewhat unnecessary. But we will handle + * that condition too. + */ + switch(c3) { + case U_KAAL: + tsc_str[i++] = TSC_KOMBU1; + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_KAAL; + *num_tsc = 3; + *num_uni = 3; + return TA_SUCCESS; + case U_AUMARK: + tsc_str[i++] = TSC_KOMBU1; + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_AUMODI; + *num_tsc = 3; + *num_uni = 3; + return TA_SUCCESS; + default: + tsc_str[i++] = TSC_KOMBU1; + tsc_str[i] = tsc_amey[pos]; + *num_tsc = 2; + *num_uni = 2; + return TA_SUCCESS; + } + case U_KOMBU2: + if (c3 == U_KAAL) { + tsc_str[i++] = TSC_KOMBU2; + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_KAAL; + *num_tsc = 3; + *num_uni = 3; + return TA_SUCCESS; + } + else { + tsc_str[i++] = TSC_KOMBU2; + tsc_str[i] = tsc_amey[pos]; + *num_tsc = 2; + *num_uni = 2; + return TA_SUCCESS; + } + case U_AIMODI: + tsc_str[i++] = TSC_AIMODI; + tsc_str[i] = tsc_amey[pos]; + *num_tsc = 2; + *num_uni = 2; + return TA_SUCCESS; + case U_OMODI1: + tsc_str[i++] = TSC_KOMBU1; + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_KAAL; + *num_tsc = 3; + *num_uni = 2; + return TA_SUCCESS; + case U_OMODI2: + tsc_str[i++] = TSC_KOMBU2; + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_KAAL; + *num_tsc = 3; + *num_uni = 2; + return TA_SUCCESS; + case U_AUMODI: + tsc_str[i++] = TSC_KOMBU1; + tsc_str[i++] = tsc_amey[pos]; + tsc_str[i] = TSC_AUMODI; + *num_tsc = 3; + *num_uni = 2; + return TA_SUCCESS; + default: + tsc_str[i] = tsc_amey[pos]; + return TA_SUCCESS; + } + } + + /* modifiers - illegal sequence */ + if (c1 >= 0x0BBE && c1 <= 0x0BD7) { + if ((pos = u_find_char(u_modi, c1)) < 0) { + tsc_str[i] = SPACE; + return TA_NOT_DEFINED; + } + tsc_str[i] = tsc_modi[pos]; + return TA_ILL_SEQ; + } + tsc_str[i] = SPACE; + return TA_NOT_DEFINED; +} + +int is_tsc_uyir(unsigned char c) +{ + if (c >= 0xAB && c <= 0xB7) + return TA_TRUE; + else + return TA_FALSE; + +} + +int is_tsc_modi(unsigned char c) +{ + if ((c >= 0xA1 && c <= 0xA8) || + c == 0xAA ) + return TA_TRUE; + else + return TA_FALSE; +} + +int is_tsc_amey(unsigned char c) +{ + if ((c >= 0x83 && c <= 0x87) || + (c >= 0xB8 && c <= 0xC9)) + return TA_TRUE; + else + return TA_FALSE; +} + +int is_tsc_mey(unsigned char c) +{ + if ((c >= 0x88 && c <= 0x8C) || + (c >= 0xEC && c <= 0xFD)) + return TA_TRUE; + else + return TA_FALSE; +} + +int is_tsc_number(unsigned char c) +{ + if ( c >= 0x81 || + (c >= 0x8D && c <= 0x90) || + (c >= 0x95 && c <= 0x98) || + (c >= 0x9D && c <= 0x9F)) + return TA_TRUE; + else + return TA_FALSE; +} + +int is_uni_uyir(unsigned int c) +{ + if ((c >= 0x0B85 && c <= 0x0B8A) || + (c >= 0x0B8E && c <= 0x0B90) || + (c >= 0x0B92 && c <= 0x0B94) || + (c == 0x0B83)) + return TA_TRUE; + else + return TA_FALSE; +} + +int is_uni_amey(unsigned int c) +{ + if (u_find_char(u_amey, c) < 0) + return TA_FALSE; + else + return TA_TRUE; +} + +int is_uni_modi(unsigned int c) +{ + if (u_find_char(u_modi, c) < 0) + return TA_FALSE; + else + return TA_TRUE; +} + +int is_uni_numb(unsigned int c) +{ + if ((c >= 0x0BE7 && c <= 0x0BF2)) + return TA_FALSE; + else + return TA_TRUE; +} diff --git a/modules/tamil/taconv.h b/modules/tamil/taconv.h new file mode 100644 index 00000000..dad44078 --- /dev/null +++ b/modules/tamil/taconv.h @@ -0,0 +1,55 @@ +/* taconv.h: + * Author: Sivaraj D (sivaraj@tamil.net) + * Date : 4-Jan-2000 + */ + +/* Return codes */ +#define TA_SUCCESS 0 +#define TA_ILL_SEQ 1 /* Sequence is illegal in language */ +#define TA_INSUF_IN 2 +#define TA_INSUF_OUT 3 /* Need atleast three chars */ +#define TA_OUT_OF_RANGE 4 /* Char outside 0x00-0x7f or 0xb80-0xbff */ +#define TA_NOT_DEFINED 5 /* Within 0xb80-0xbff but not defined + * by unicode*/ + +#define TA_NULL 0x00 +#define TA_TRUE 1 +#define TA_FALSE 0 + +/* tsc2uni: Get the first TSCII Tamil character in tsc_str and convert it to + * corresponding unicode character in uni_str. + * tsc_str: TSCII string (in) + * uni_str: Unicode string (out) + * num_tsc: Number of TSCII characters processed (out) + * num_uni: Number of Unicode characters returned (out) + * num_in_left: Number of characters left in input buffer (in) + * num_out_left: Number of characters left in output buffer (in) + */ +int tsc2uni(unsigned char *tsc_str, unsigned int *uni_str, + int *num_tsc, int *num_uni, + int num_in_left, int num_out_left); + +/* uni2tsc: Get the first Unicode character in uni_str and convert it to + * corresponding TSCII Tamil character in tsc_str. + * uni_str: Unicode string (out) + * tsc_str: TSCII string (in) + * num_uni: Number of Unicode characters returned (out) + * num_tsc: Number of TSCII characters processed (out) + * num_in_left: Number of characters left in input buffer (in) + * num_out_left: Number of characters left in output buffer (in) + */ +int uni2tsc(unsigned int *uni_str, unsigned char *tsc_str, + int *num_uni, int *num_tsc, + int num_in_left, int num_out_left); + +int is_tsc_uyir(unsigned char c); /* Returns 1 if c is a vowel, else 0 */ +int is_tsc_modi(unsigned char c); /* Returns 1 if c is a modifier */ +int is_tsc_amey(unsigned char c); /* Returns 1 if c is a akara mey */ +int is_tsc_mey(unsigned char c); /* Returns 1 if c is a consonant */ +int is_tsc_number(unsigned char c); /* Returns 1 if c is a number */ + +int is_uni_uyir(unsigned int c); /* Returns 1 if c is a vowel, else 0 */ +int is_uni_modi(unsigned int c); /* Returns 1 if c is a modifier */ +int is_uni_amey(unsigned int c); /* Returns 1 if c is a akara mey */ +int is_uni_numb(unsigned int c); /* Returns 1 if c is a number */ + diff --git a/modules/tamil/tadefs.h b/modules/tamil/tadefs.h new file mode 100644 index 00000000..5e587471 --- /dev/null +++ b/modules/tamil/tadefs.h @@ -0,0 +1,132 @@ +/* Author: Sivaraj D (sivaraj@tamil.net) + * Date : 4-Jan-2000 + */ + +/* Defining Unicode unsigned characters */ + +unsigned int +u_uyir[] = { 0x0B85, 0x0B86, 0x0B87, 0x0B88, + 0x0B89, 0x0B8A, 0x0B8E, 0x0B8F, + 0x0B90, 0x0B92, 0x0B93, 0x0B94, + 0x0B83, 0x00A9, + 0x2018, 0x2019, 0x201c, 0x201D, + 0x0BE7, 0x0BE8, 0x0BE9, 0x0BEA, + 0x0BEB, 0x0BEC, 0x0BED, 0x0BEE, + 0x0BEF, 0x0BF0, 0x0BF1, 0x0BF2, + 0x0000 }; + +/* akaramEriya mey */ +unsigned int +u_amey[] = { 0x0B95, 0x0B99, 0x0B9A, 0x0B9E, + 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, + 0x0BAA, 0x0BAE, 0x0BAF, 0x0BB0, + 0x0BB2, 0x0BB5, 0x0BB4, 0x0BB3, + 0x0BB1, 0x0BA9, 0x0B9C, 0x0BB7, + 0x0BB8, 0x0BB9, 0x0000 }; + +unsigned int +u_modi[] = { 0x0BBE, 0x0BBF, 0x0BC0, 0x0BC1, + 0x0BC2, 0x0BC6, 0x0BC7, 0x0BC8, + 0x0BCA, 0x0BCB, 0x0BCC, 0x0BCD, + 0x0BD7, 0x0000 }; + +unsigned int +u_grantha[] = { 0x0B9C, 0x0BB7, 0x0BB8, 0x0BB9, + 0x0000 }; + +#define U_KAAL 0x0BBE +#define U_KOKKI1 0x0BBF +#define U_KOKKI2 0x0BC0 +#define U_UMODI1 0x0BC1 +#define U_UMODI2 0x0BC2 +#define U_KOMBU1 0x0BC6 +#define U_KOMBU2 0x0BC7 +#define U_AIMODI 0x0BC8 +#define U_OMODI1 0x0BCA +#define U_OMODI2 0x0BCB +#define U_AUMODI 0x0BCC +#define U_AUMARK 0x0BD7 +#define U_PULLI 0x0BCD + +#define U_SPACE 0x0020 +#define U_ZWSP 0x200B +#define U_LSQUOT 0x2018 +#define U_RSQUOT 0x2019 +#define U_LDQUOT 0x201C +#define U_RDQUOT 0x201D + +/* Defining TSCII unsigned characters - we define only those unsigned characters + * that are useful in Unicode */ + +#define SPACE 0x20 + +/* Vowel modifiers */ +#define TSC_KAAL 0xA1 +#define TSC_KOKKI1 0xA2 +#define TSC_KOKKI2 0xA3 +#define TSC_UMODI1 0xA4 +#define TSC_UMODI2 0xA5 +#define TSC_KOMBU1 0xA6 +#define TSC_KOMBU2 0xA7 +#define TSC_AIMODI 0xA8 +#define TSC_AUMODI 0xAA + +unsigned char +tsc_modi[] = { 0xA1, 0xA2, 0xA3, 0xA4, + 0xA5, 0xA6, 0xA7, 0xA8, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xAA, 0x00 }; + +/* all uyirs & aaytham: these convert directly to unicode */ +unsigned char +tsc_uyir[] = { 0xAB, 0xAC, 0xAD, 0xAE, /* a, aa, i, ii */ + 0xAF, 0xB0, 0xB1, 0xB2, /* u, uu, e, ee */ + 0xB3, 0xB4, 0xB5, 0xB6, /* ai, o, oo, au */ + 0xB7, 0xA9, /* aaytham, copyright*/ + 0x91, 0x92, 0x93, 0x94, /* quotes */ + 0x81, 0x8D, 0x8E, 0x8F, + 0x90, 0x95, 0x96, 0x97, + 0x98, 0x9D, 0x9E, 0x9F, + 0x00 }; + +/* all mey */ +unsigned char +tsc_mey[] = { 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, + 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, + 0xFC, 0xFD, 0x88, 0x89, + 0x8A, 0x8B, 0x00 }; + +/* akaramEriya mey */ +unsigned char +tsc_amey[] = { 0xB8, 0xB9, 0xBA, 0xBB, + 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, + 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0x83, 0x84, + 0x85, 0x86, 0x00 }; + + +/* ikaram, iikaaram for T */ +#define TSC_TI 0xCA +#define TSC_TII 0xCB + +/* ukaram, uukaaram & grantha ameys*/ +unsigned char +tsc_ukaram[] = { 0xCC, 0x99, 0xCD, 0x9A, + 0xCE, 0xCF, 0xD0, 0xD1, + 0xD2, 0xD3, 0xD4, 0xD5, + 0xD6, 0xD7, 0xD8, 0xD9, + 0xDA, 0xDB, 0x00 }; + +unsigned char +tsc_uukaaram[] = { 0xDC, 0x9B, 0xDD, 0x9C, + 0xDE, 0xDF, 0xE0, 0xE1, + 0xE2, 0xE3, 0xE4, 0xE5, + 0xE6, 0xE7, 0xE8, 0xE9, + 0xEA, 0xEB, 0x00 }; + +unsigned char +tsc_grantha[] = { 0x83, 0x84, 0x85, 0x86, 0x00 }; + diff --git a/modules/tamil/tamil-x.c b/modules/tamil/tamil-x.c new file mode 100644 index 00000000..934bd216 --- /dev/null +++ b/modules/tamil/tamil-x.c @@ -0,0 +1,226 @@ +/* Pango - Tamil module + * tamil.c: + * + * Copyright (C) 2000 Sivaraj D + * + */ + +#include <stdio.h> +#include <glib.h> +#include "pango.h" +#include "pangox.h" +#include "utils.h" +#include "taconv.h" + +#define MEMBERS(strct) sizeof(strct) / sizeof(strct[1]) + +static PangoEngineRange tamil_range[] = { + { 0x0b80, 0x0bff, "*" }, +}; + +static PangoEngineInfo script_engines[] = { + { + "TamilScriptEngineLang", + PANGO_ENGINE_TYPE_LANG, + PANGO_RENDER_TYPE_NONE, + tamil_range, MEMBERS(tamil_range) + }, + { + "TamilScriptEngineX", + PANGO_ENGINE_TYPE_SHAPE, + PANGO_RENDER_TYPE_X, + tamil_range, MEMBERS(tamil_range) + } +}; + +static gint n_script_engines = MEMBERS (script_engines); + +/* + * Language script engine + */ + +static void +tamil_engine_break (gchar *text, + gint len, + PangoAnalysis *analysis, + PangoLogAttr *attrs) +{ +/* Most of the code comes from pango_break + * only difference is char stop based on modifiers + */ + + gchar *cur = text; + gchar *next; + gint i = 0; + GUChar4 wc; + + while (*cur) + { + if (!_pango_utf8_iterate (cur, &next, &wc)) + return; + if (cur == next) + break; + if ((next - text) > len) + break; + cur = next; + + attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0; + attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white; + attrs[i].is_char_stop = (is_uni_modi(wc)) ? 0 : 1; + attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white; + + i++; + } +} + +static PangoEngine * +tamil_engine_lang_new () +{ + PangoEngineLang *result; + + result = g_new (PangoEngineLang, 1); + + result->engine.id = "TamilScriptEngine"; + result->engine.type = PANGO_ENGINE_TYPE_LANG; + result->engine.length = sizeof (result); + result->script_break = tamil_engine_break; + + return (PangoEngine *)result; +} + +/* + * X window system script engine portion + */ + +/* We will need some type of kerning support for use with ikaram/iikaaram. + * But we can live with this for time being + */ +static void +set_glyph (PangoGlyphString *glyphs, gint i, PangoCFont *cfont, PangoGlyphIndex glyph) +{ + gint width; + + glyphs->glyphs[i].font = cfont; + glyphs->glyphs[i].glyph = glyph; + + glyphs->geometry[i].x_offset = 0; + glyphs->geometry[i].y_offset = 0; + + glyphs->log_clusters[i] = i; + + pango_x_glyph_extents (&glyphs->glyphs[i], + NULL, NULL, &width, NULL, NULL, NULL, NULL); + glyphs->geometry[i].width = width * 72; +} + +static void +tamil_engine_shape (PangoFont *font, + gchar *text, + gint length, + PangoAnalysis *analysis, + PangoGlyphString *glyphs) +{ + int n_chars, n_glyph; + int i, j; + char *p, *next; + GUChar4 *wc, *uni_str; + int res; + unsigned char tsc_str[6]; + int ntsc, nuni; + + PangoCFont *tscii_font = NULL; + + g_return_if_fail (font != NULL); + g_return_if_fail (text != NULL); + g_return_if_fail (length >= 0); + g_return_if_fail (analysis != NULL); + + tscii_font = pango_x_find_cfont (font, "tscii-0"); + pango_cfont_ref (tscii_font); + + n_chars = _pango_utf8_len (text, length); + + /* temporarily set the size to 3 times the number of unicode chars */ + pango_glyph_string_set_size (glyphs, n_chars * 3); + wc = (GUChar4 *)g_malloc(sizeof(GUChar4)*n_chars); + + p = text; + for (i=0; i < n_chars; i++) + { + _pango_utf8_iterate (p, &next, &wc[i]); + p = next; + } + + n_glyph = 0; + uni_str = wc; + + j = 0; + while (j < n_chars) + { + res = uni2tsc(uni_str, tsc_str, &nuni, &ntsc, n_chars - j, 6); + + uni_str = uni_str + nuni; + /* We need to differentiate between different return codes later */ + if (res != TA_SUCCESS) + { + set_glyph (glyphs, n_glyph, tscii_font, ' '); + n_glyph++; + j = j + nuni; + continue; + } + for (i = 0; i < ntsc; i++) + { + set_glyph (glyphs, n_glyph, tscii_font, (PangoGlyphIndex) tsc_str[i]); + n_glyph++; + } + j = j + nuni; + } + + pango_glyph_string_set_size (glyphs, n_glyph); + + if (tscii_font) + pango_cfont_unref (tscii_font); + g_free(wc); +} + +static PangoEngine * +tamil_engine_x_new () +{ + PangoEngineShape *result; + + result = g_new (PangoEngineShape, 1); + + result->engine.id = "TamilScriptEngine"; + result->engine.type = PANGO_ENGINE_TYPE_LANG; + result->engine.length = sizeof (result); + result->script_shape = tamil_engine_shape; + + return (PangoEngine *)result; +} + +/* The following three functions provide the public module API for + * Pango + */ +void +script_engine_list (PangoEngineInfo **engines, gint *n_engines) +{ + *engines = script_engines; + *n_engines = n_script_engines; +} + +PangoEngine * +script_engine_load (const char *id) +{ + if (!strcmp (id, "TamilScriptEngineLang")) + return tamil_engine_lang_new (); + else if (!strcmp (id, "TamilScriptEngineX")) + return tamil_engine_x_new (); + else + return NULL; +} + +void +script_engine_unload (PangoEngine *engine) +{ +} + diff --git a/modules/tamil/tamil.c b/modules/tamil/tamil.c new file mode 100644 index 00000000..934bd216 --- /dev/null +++ b/modules/tamil/tamil.c @@ -0,0 +1,226 @@ +/* Pango - Tamil module + * tamil.c: + * + * Copyright (C) 2000 Sivaraj D + * + */ + +#include <stdio.h> +#include <glib.h> +#include "pango.h" +#include "pangox.h" +#include "utils.h" +#include "taconv.h" + +#define MEMBERS(strct) sizeof(strct) / sizeof(strct[1]) + +static PangoEngineRange tamil_range[] = { + { 0x0b80, 0x0bff, "*" }, +}; + +static PangoEngineInfo script_engines[] = { + { + "TamilScriptEngineLang", + PANGO_ENGINE_TYPE_LANG, + PANGO_RENDER_TYPE_NONE, + tamil_range, MEMBERS(tamil_range) + }, + { + "TamilScriptEngineX", + PANGO_ENGINE_TYPE_SHAPE, + PANGO_RENDER_TYPE_X, + tamil_range, MEMBERS(tamil_range) + } +}; + +static gint n_script_engines = MEMBERS (script_engines); + +/* + * Language script engine + */ + +static void +tamil_engine_break (gchar *text, + gint len, + PangoAnalysis *analysis, + PangoLogAttr *attrs) +{ +/* Most of the code comes from pango_break + * only difference is char stop based on modifiers + */ + + gchar *cur = text; + gchar *next; + gint i = 0; + GUChar4 wc; + + while (*cur) + { + if (!_pango_utf8_iterate (cur, &next, &wc)) + return; + if (cur == next) + break; + if ((next - text) > len) + break; + cur = next; + + attrs[i].is_white = (wc == ' ' || wc == '\t' || wc == 'n') ? 1 : 0; + attrs[i].is_break = (i > 0 && attrs[i-1].is_white) || attrs[i].is_white; + attrs[i].is_char_stop = (is_uni_modi(wc)) ? 0 : 1; + attrs[i].is_word_stop = (i == 0) || attrs[i-1].is_white; + + i++; + } +} + +static PangoEngine * +tamil_engine_lang_new () +{ + PangoEngineLang *result; + + result = g_new (PangoEngineLang, 1); + + result->engine.id = "TamilScriptEngine"; + result->engine.type = PANGO_ENGINE_TYPE_LANG; + result->engine.length = sizeof (result); + result->script_break = tamil_engine_break; + + return (PangoEngine *)result; +} + +/* + * X window system script engine portion + */ + +/* We will need some type of kerning support for use with ikaram/iikaaram. + * But we can live with this for time being + */ +static void +set_glyph (PangoGlyphString *glyphs, gint i, PangoCFont *cfont, PangoGlyphIndex glyph) +{ + gint width; + + glyphs->glyphs[i].font = cfont; + glyphs->glyphs[i].glyph = glyph; + + glyphs->geometry[i].x_offset = 0; + glyphs->geometry[i].y_offset = 0; + + glyphs->log_clusters[i] = i; + + pango_x_glyph_extents (&glyphs->glyphs[i], + NULL, NULL, &width, NULL, NULL, NULL, NULL); + glyphs->geometry[i].width = width * 72; +} + +static void +tamil_engine_shape (PangoFont *font, + gchar *text, + gint length, + PangoAnalysis *analysis, + PangoGlyphString *glyphs) +{ + int n_chars, n_glyph; + int i, j; + char *p, *next; + GUChar4 *wc, *uni_str; + int res; + unsigned char tsc_str[6]; + int ntsc, nuni; + + PangoCFont *tscii_font = NULL; + + g_return_if_fail (font != NULL); + g_return_if_fail (text != NULL); + g_return_if_fail (length >= 0); + g_return_if_fail (analysis != NULL); + + tscii_font = pango_x_find_cfont (font, "tscii-0"); + pango_cfont_ref (tscii_font); + + n_chars = _pango_utf8_len (text, length); + + /* temporarily set the size to 3 times the number of unicode chars */ + pango_glyph_string_set_size (glyphs, n_chars * 3); + wc = (GUChar4 *)g_malloc(sizeof(GUChar4)*n_chars); + + p = text; + for (i=0; i < n_chars; i++) + { + _pango_utf8_iterate (p, &next, &wc[i]); + p = next; + } + + n_glyph = 0; + uni_str = wc; + + j = 0; + while (j < n_chars) + { + res = uni2tsc(uni_str, tsc_str, &nuni, &ntsc, n_chars - j, 6); + + uni_str = uni_str + nuni; + /* We need to differentiate between different return codes later */ + if (res != TA_SUCCESS) + { + set_glyph (glyphs, n_glyph, tscii_font, ' '); + n_glyph++; + j = j + nuni; + continue; + } + for (i = 0; i < ntsc; i++) + { + set_glyph (glyphs, n_glyph, tscii_font, (PangoGlyphIndex) tsc_str[i]); + n_glyph++; + } + j = j + nuni; + } + + pango_glyph_string_set_size (glyphs, n_glyph); + + if (tscii_font) + pango_cfont_unref (tscii_font); + g_free(wc); +} + +static PangoEngine * +tamil_engine_x_new () +{ + PangoEngineShape *result; + + result = g_new (PangoEngineShape, 1); + + result->engine.id = "TamilScriptEngine"; + result->engine.type = PANGO_ENGINE_TYPE_LANG; + result->engine.length = sizeof (result); + result->script_shape = tamil_engine_shape; + + return (PangoEngine *)result; +} + +/* The following three functions provide the public module API for + * Pango + */ +void +script_engine_list (PangoEngineInfo **engines, gint *n_engines) +{ + *engines = script_engines; + *n_engines = n_script_engines; +} + +PangoEngine * +script_engine_load (const char *id) +{ + if (!strcmp (id, "TamilScriptEngineLang")) + return tamil_engine_lang_new (); + else if (!strcmp (id, "TamilScriptEngineX")) + return tamil_engine_x_new (); + else + return NULL; +} + +void +script_engine_unload (PangoEngine *engine) +{ +} + |