From b1e23366755fee7d93b3f706bc24337aaeef0ef3 Mon Sep 17 00:00:00 2001
From: Owen Taylor <otaylor@redhat.com>
Date: Wed, 24 Sep 2003 22:38:14 +0000
Subject: Handle non-hebrew characters.

Wed Sep 24 18:29:34 2003  Owen Taylor  <otaylor@redhat.com>

        * modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster):
        Handle non-hebrew characters.
---
 ChangeLog                      |  6 +++++-
 ChangeLog.pre-1-10             |  6 +++++-
 ChangeLog.pre-1-4              |  6 +++++-
 ChangeLog.pre-1-6              |  6 +++++-
 ChangeLog.pre-1-8              |  6 +++++-
 modules/hebrew/hebrew-shaper.c | 34 +++++++++++++++++++++-------------
 6 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6c703651..30bf1961 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Wed Sep 24 18:29:34 2003  Owen Taylor  <otaylor@redhat.com>
+
+	* modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster):
+	Handle non-hebrew characters.
+
 2003-09-25  Matthias Clasen  <maclas@gmx.de>
 
 	* pango/break.c (pango_default_break): Only reset the word 
@@ -5,7 +10,6 @@
 	start. Otherwise both 't' and 'e' are classified as word 
 	start in '123test'.  (#122754, Hidetoshi Tajima)
 	
-
 Tue Sep 23 19:43:05 2003  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-context.c (itemize_state_add_character): Don't
diff --git a/ChangeLog.pre-1-10 b/ChangeLog.pre-1-10
index 6c703651..30bf1961 100644
--- a/ChangeLog.pre-1-10
+++ b/ChangeLog.pre-1-10
@@ -1,3 +1,8 @@
+Wed Sep 24 18:29:34 2003  Owen Taylor  <otaylor@redhat.com>
+
+	* modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster):
+	Handle non-hebrew characters.
+
 2003-09-25  Matthias Clasen  <maclas@gmx.de>
 
 	* pango/break.c (pango_default_break): Only reset the word 
@@ -5,7 +10,6 @@
 	start. Otherwise both 't' and 'e' are classified as word 
 	start in '123test'.  (#122754, Hidetoshi Tajima)
 	
-
 Tue Sep 23 19:43:05 2003  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-context.c (itemize_state_add_character): Don't
diff --git a/ChangeLog.pre-1-4 b/ChangeLog.pre-1-4
index 6c703651..30bf1961 100644
--- a/ChangeLog.pre-1-4
+++ b/ChangeLog.pre-1-4
@@ -1,3 +1,8 @@
+Wed Sep 24 18:29:34 2003  Owen Taylor  <otaylor@redhat.com>
+
+	* modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster):
+	Handle non-hebrew characters.
+
 2003-09-25  Matthias Clasen  <maclas@gmx.de>
 
 	* pango/break.c (pango_default_break): Only reset the word 
@@ -5,7 +10,6 @@
 	start. Otherwise both 't' and 'e' are classified as word 
 	start in '123test'.  (#122754, Hidetoshi Tajima)
 	
-
 Tue Sep 23 19:43:05 2003  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-context.c (itemize_state_add_character): Don't
diff --git a/ChangeLog.pre-1-6 b/ChangeLog.pre-1-6
index 6c703651..30bf1961 100644
--- a/ChangeLog.pre-1-6
+++ b/ChangeLog.pre-1-6
@@ -1,3 +1,8 @@
+Wed Sep 24 18:29:34 2003  Owen Taylor  <otaylor@redhat.com>
+
+	* modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster):
+	Handle non-hebrew characters.
+
 2003-09-25  Matthias Clasen  <maclas@gmx.de>
 
 	* pango/break.c (pango_default_break): Only reset the word 
@@ -5,7 +10,6 @@
 	start. Otherwise both 't' and 'e' are classified as word 
 	start in '123test'.  (#122754, Hidetoshi Tajima)
 	
-
 Tue Sep 23 19:43:05 2003  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-context.c (itemize_state_add_character): Don't
diff --git a/ChangeLog.pre-1-8 b/ChangeLog.pre-1-8
index 6c703651..30bf1961 100644
--- a/ChangeLog.pre-1-8
+++ b/ChangeLog.pre-1-8
@@ -1,3 +1,8 @@
+Wed Sep 24 18:29:34 2003  Owen Taylor  <otaylor@redhat.com>
+
+	* modules/hebrew/hebrew-shaper.c (hebrew_shaper_get_next_cluster):
+	Handle non-hebrew characters.
+
 2003-09-25  Matthias Clasen  <maclas@gmx.de>
 
 	* pango/break.c (pango_default_break): Only reset the word 
@@ -5,7 +10,6 @@
 	start. Otherwise both 't' and 'e' are classified as word 
 	start in '123test'.  (#122754, Hidetoshi Tajima)
 	
-
 Tue Sep 23 19:43:05 2003  Owen Taylor  <otaylor@redhat.com>
 
 	* pango/pango-context.c (itemize_state_add_character): Don't
diff --git a/modules/hebrew/hebrew-shaper.c b/modules/hebrew/hebrew-shaper.c
index 717a43d6..6e6890fa 100644
--- a/modules/hebrew/hebrew-shaper.c
+++ b/modules/hebrew/hebrew-shaper.c
@@ -33,7 +33,8 @@
 #include "pango-engine.h"
 
 /* Wrap all characters above 0xF00 to ALEF. */
-#define ucs2iso8859_8(wc)		(wc>0xF000 ? 0x11 : (unsigned int)((unsigned int)(wc) - 0x0590 + 0x10))
+#define ishebrew(wc)                    ((wc)>0x590 && (wc)<0x600)
+#define ucs2iso8859_8(wc)		((unsigned int)((unsigned int)(wc) - 0x0590 + 0x10))
 #define iso8859_8_2uni(c)		((gunichar)(c) - 0x10 + 0x0590)
 
 #define MAX_CLUSTER_CHRS	256
@@ -207,18 +208,16 @@ static const gint Unicode_shape_table[128] = {
 };
 
 /* Treat all characters above 0xF000 as characters */
-#define is_char_class(wc, mask)	(wc > 0xF000 \
-                                 || char_class_table[ucs2iso8859_8 ((wc))] & (mask))
+#define is_hebrew(wc) ((wc) >= 0x590 && (wc) < 0x600)
+#define is_char_class(wc, mask)	(char_class_table[ucs2iso8859_8 ((wc))] & (mask))
 #define	is_composible(cur_wc, nxt_wc)	(compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\
 						      [char_type_table[ucs2iso8859_8 (nxt_wc)]])
 
-
-
 const char *
-hebrew_shaper_get_next_cluster(const char	*text,
+hebrew_shaper_get_next_cluster(const char      *text,
 			       gint		length,
-			       gunichar       *cluster,
-			       gint		*num_chrs)
+			       gunichar        *cluster,
+			       gint	       *num_chrs)
 {  
   const char *p;
   gint n_chars = 0;
@@ -229,14 +228,23 @@ hebrew_shaper_get_next_cluster(const char	*text,
     {
       gunichar current = g_utf8_get_char (p);
       
-      if (n_chars == 0 ||
-	  is_composible ((gunichar)(cluster[0]), current) )
+      if (!ishebrew (current) ||
+	  (n_chars == 0 && is_char_class(current, ~(NoDefine|SpacingLetter))))
+	{
+	  /* Not a legal Hebrew cluster */
+	  
+	  if (n_chars == 0)
+	    {
+	      cluster[n_chars++] = current;
+	      p = g_utf8_next_char (p);
+	    }
+	  break;
+	}
+      else if (n_chars == 0 ||
+	       is_composible (cluster[0], current))
 	{
 	  cluster[n_chars++] = current;
 	  p = g_utf8_next_char (p);
-	  if (n_chars == 1 &&
-	      is_char_class(cluster[0], ~(NoDefine|SpacingLetter)) )
-	      break;
 	}
       else
 	break;
-- 
cgit v1.2.1