From b1e23366755fee7d93b3f706bc24337aaeef0ef3 Mon Sep 17 00:00:00 2001 From: Owen Taylor Date: Wed, 24 Sep 2003 22:38:14 +0000 Subject: Handle non-hebrew characters. Wed Sep 24 18:29:34 2003 Owen Taylor * modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster): Handle non-hebrew characters. --- ChangeLog | 6 +++++- ChangeLog.pre-1-10 | 6 +++++- ChangeLog.pre-1-4 | 6 +++++- ChangeLog.pre-1-6 | 6 +++++- ChangeLog.pre-1-8 | 6 +++++- modules/hebrew/hebrew-shaper.c | 34 +++++++++++++++++++++------------- 6 files changed, 46 insertions(+), 18 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c703651..30bf1961 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Wed Sep 24 18:29:34 2003 Owen Taylor + + * modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster): + Handle non-hebrew characters. + 2003-09-25 Matthias Clasen * pango/break.c (pango_default_break): Only reset the word @@ -5,7 +10,6 @@ start. Otherwise both 't' and 'e' are classified as word start in '123test'. (#122754, Hidetoshi Tajima) - Tue Sep 23 19:43:05 2003 Owen Taylor * pango/pango-context.c (itemize_state_add_character): Don't diff --git a/ChangeLog.pre-1-10 b/ChangeLog.pre-1-10 index 6c703651..30bf1961 100644 --- a/ChangeLog.pre-1-10 +++ b/ChangeLog.pre-1-10 @@ -1,3 +1,8 @@ +Wed Sep 24 18:29:34 2003 Owen Taylor + + * modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster): + Handle non-hebrew characters. + 2003-09-25 Matthias Clasen * pango/break.c (pango_default_break): Only reset the word @@ -5,7 +10,6 @@ start. Otherwise both 't' and 'e' are classified as word start in '123test'. (#122754, Hidetoshi Tajima) - Tue Sep 23 19:43:05 2003 Owen Taylor * pango/pango-context.c (itemize_state_add_character): Don't diff --git a/ChangeLog.pre-1-4 b/ChangeLog.pre-1-4 index 6c703651..30bf1961 100644 --- a/ChangeLog.pre-1-4 +++ b/ChangeLog.pre-1-4 @@ -1,3 +1,8 @@ +Wed Sep 24 18:29:34 2003 Owen Taylor + + * modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster): + Handle non-hebrew characters. + 2003-09-25 Matthias Clasen * pango/break.c (pango_default_break): Only reset the word @@ -5,7 +10,6 @@ start. Otherwise both 't' and 'e' are classified as word start in '123test'. (#122754, Hidetoshi Tajima) - Tue Sep 23 19:43:05 2003 Owen Taylor * pango/pango-context.c (itemize_state_add_character): Don't diff --git a/ChangeLog.pre-1-6 b/ChangeLog.pre-1-6 index 6c703651..30bf1961 100644 --- a/ChangeLog.pre-1-6 +++ b/ChangeLog.pre-1-6 @@ -1,3 +1,8 @@ +Wed Sep 24 18:29:34 2003 Owen Taylor + + * modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster): + Handle non-hebrew characters. + 2003-09-25 Matthias Clasen * pango/break.c (pango_default_break): Only reset the word @@ -5,7 +10,6 @@ start. Otherwise both 't' and 'e' are classified as word start in '123test'. (#122754, Hidetoshi Tajima) - Tue Sep 23 19:43:05 2003 Owen Taylor * pango/pango-context.c (itemize_state_add_character): Don't diff --git a/ChangeLog.pre-1-8 b/ChangeLog.pre-1-8 index 6c703651..30bf1961 100644 --- a/ChangeLog.pre-1-8 +++ b/ChangeLog.pre-1-8 @@ -1,3 +1,8 @@ +Wed Sep 24 18:29:34 2003 Owen Taylor + + * modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster): + Handle non-hebrew characters. + 2003-09-25 Matthias Clasen * pango/break.c (pango_default_break): Only reset the word @@ -5,7 +10,6 @@ start. Otherwise both 't' and 'e' are classified as word start in '123test'. (#122754, Hidetoshi Tajima) - Tue Sep 23 19:43:05 2003 Owen Taylor * pango/pango-context.c (itemize_state_add_character): Don't diff --git a/modules/hebrew/hebrew-shaper.c b/modules/hebrew/hebrew-shaper.c index 717a43d6..6e6890fa 100644 --- a/modules/hebrew/hebrew-shaper.c +++ b/modules/hebrew/hebrew-shaper.c @@ -33,7 +33,8 @@ #include "pango-engine.h" /* Wrap all characters above 0xF00 to ALEF. */ -#define ucs2iso8859_8(wc) (wc>0xF000 ? 0x11 : (unsigned int)((unsigned int)(wc) - 0x0590 + 0x10)) +#define ishebrew(wc) ((wc)>0x590 && (wc)<0x600) +#define ucs2iso8859_8(wc) ((unsigned int)((unsigned int)(wc) - 0x0590 + 0x10)) #define iso8859_8_2uni(c) ((gunichar)(c) - 0x10 + 0x0590) #define MAX_CLUSTER_CHRS 256 @@ -207,18 +208,16 @@ static const gint Unicode_shape_table[128] = { }; /* Treat all characters above 0xF000 as characters */ -#define is_char_class(wc, mask) (wc > 0xF000 \ - || char_class_table[ucs2iso8859_8 ((wc))] & (mask)) +#define is_hebrew(wc) ((wc) >= 0x590 && (wc) < 0x600) +#define is_char_class(wc, mask) (char_class_table[ucs2iso8859_8 ((wc))] & (mask)) #define is_composible(cur_wc, nxt_wc) (compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\ [char_type_table[ucs2iso8859_8 (nxt_wc)]]) - - const char * -hebrew_shaper_get_next_cluster(const char *text, +hebrew_shaper_get_next_cluster(const char *text, gint length, - gunichar *cluster, - gint *num_chrs) + gunichar *cluster, + gint *num_chrs) { const char *p; gint n_chars = 0; @@ -229,14 +228,23 @@ hebrew_shaper_get_next_cluster(const char *text, { gunichar current = g_utf8_get_char (p); - if (n_chars == 0 || - is_composible ((gunichar)(cluster[0]), current) ) + if (!ishebrew (current) || + (n_chars == 0 && is_char_class(current, ~(NoDefine|SpacingLetter)))) + { + /* Not a legal Hebrew cluster */ + + if (n_chars == 0) + { + cluster[n_chars++] = current; + p = g_utf8_next_char (p); + } + break; + } + else if (n_chars == 0 || + is_composible (cluster[0], current)) { cluster[n_chars++] = current; p = g_utf8_next_char (p); - if (n_chars == 1 && - is_char_class(cluster[0], ~(NoDefine|SpacingLetter)) ) - break; } else break; -- cgit v1.2.1