From d494abc4bdf1fc9ad9280261f8fb1d54881355fc Mon Sep 17 00:00:00 2001
From: Chookij Vanatham <chookij@src.gnome.org>
Date: Tue, 17 Jul 2001 19:07:46 +0000
Subject: Updated the improvements of Hebrew points positioning from Dov
 Grobgeld <dov@imagic.weizmann.ac.il>

---
 modules/hebrew/hebrew-x.c | 216 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 189 insertions(+), 27 deletions(-)

(limited to 'modules')

diff --git a/modules/hebrew/hebrew-x.c b/modules/hebrew/hebrew-x.c
index ac4be81f..d0214926 100644
--- a/modules/hebrew/hebrew-x.c
+++ b/modules/hebrew/hebrew-x.c
@@ -7,6 +7,9 @@
  * Copyright (c) 1996-2000 by Sun Microsystems, Inc.
  * Author: Chookij Vanatham <Chookij.Vanatham@Eng.Sun.COM>
  *
+ * Hebrew points positioning improvements 2001
+ * Author: Dov Grobgeld <dov@imagic.weizmann.ac.il>
+ *  
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
  * License as published by the Free Software Foundation; either
@@ -41,7 +44,7 @@
 #define _ND			0
 #define _SP			1
 #define _NS			(1<<1)
-#define	_DA			(1<<2)
+#define	_DA			(1<<2)	/* only for dagesh... */
 
 #define	NoDefine		_ND
 #define	SpacingLetter		_SP
@@ -53,6 +56,16 @@
 #define	__NS			2
 #define	__DA			3
 
+/* Unicode definitions ... */
+#define UNI_VAV			0x5d5
+#define UNI_LAMED		0x5DC
+#define UNI_SHIN		0x5E9
+#define UNI_FINAL_PE		0x05E3
+#define UNI_PE			0x05E4
+#define UNI_SHIN_DOT		0x5c1
+#define UNI_SIN_DOT		0x5c2
+#define UNI_MAPIQ		0x5bc
+
 #define is_char_class(wc, mask)	(char_class_table[ucs2iso8859_8 ((wc))] & (mask))
 #define	is_composible(cur_wc, nxt_wc)	(compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\
 						      [char_type_table[ucs2iso8859_8 (nxt_wc)]])
@@ -95,6 +108,15 @@ struct _HebrewFontInfo
   PangoXSubfont subfont;
 };
 
+/*======================================================================
+//  In the tables below all Hebrew characters are categorized to
+//  one of the following four classes:
+//
+//      non used entries              Not defined  (ND)
+//      accents, points               Non spacing  (NS)
+//      punctuation and characters    Spacing characters (SP)
+//      dagesh                        "Dagesh"    (DA)
+//----------------------------------------------------------------------*/
 static const gint char_class_table[128] = {
   /*       0,   1,   2,   3,   4,   5,   6,   7 */
 
@@ -139,6 +161,21 @@ static const gint char_type_table[128] = {
 	 __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,
 };
 
+/*======================================================================
+//  The following table answers the question whether two characters
+//  are composible or not. The decision is made by looking at the
+//  char_type_table values for the first character in a cluster
+//  vs a following charactrer. The only three combinations that
+//  are composible in Hebrew according to the table are:
+//
+//     1. a spacing character followed by non-spacing character
+//     2. a spacing character followed by a dagesh.
+//     3. a dagesh followed by a non-spacing character.
+//
+//  Note that a spacing character may be followed by several non-spacing
+//  accents, as the decision is always made on the base character of
+//  a combination.
+//----------------------------------------------------------------------*/
 static const gboolean compose_table[4][4] = {
       /* Cn */ /*     0,     1,     2,     3, */
 /* Cn-1 00 */	{ FALSE, FALSE, FALSE, FALSE },
@@ -147,9 +184,9 @@ static const gboolean compose_table[4][4] = {
   /* 30 */	{ FALSE, FALSE,  TRUE, FALSE },
 };
 
-/* Sun Hebrew Font Layout
+/* ISO 8859_8 Hebrew Font Layout. Does not include any accents.
  */
-static const gint Sun_shape_table[128] = {
+static const gint iso_8859_8_shape_table[128] = {
   0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00,
 
@@ -173,24 +210,25 @@ static const gint Sun_shape_table[128] = {
 /* Unicode Hebrew Font Layout
  */
 static const gint Unicode_shape_table[128] = {
-  0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00,
-
-  0x0000, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597,
-  0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
-  0x05A0, 0x05A1, 0x0000, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7,
-  0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
-  0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
-  0x05B8, 0x05B9, 0x0000, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
-  0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x0000, 0x0000, 0x0000,
-  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-
-  0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
-  0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
-  0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
-  0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-  0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x0000, 0x0000, 0x0000,
-  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+  /* 00 */    0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00,
+              0x00, 0x00, 0x00, 0x00,  0x00, 0x00, 0x00, 0x00,
+  /* cantillation marks followed by accents */
+  /* 10 */    0x0000, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597,
+              0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
+  /* 20 */    0x05A0, 0x05A1, 0x0000, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7,
+              0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
+  /* 30 */    0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
+              0x05B8, 0x05B9, 0x0000, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
+  /* 40 */    0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x0000, 0x0000, 0x0000,
+              0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+
+  /* Aleph-Tav, Yiddish ligatures, and punctuation */
+  /* 50 */    0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
+              0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
+  /* 60 */    0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
+              0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+  /* 70 */    0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x0000, 0x0000, 0x0000,
+              0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 };
 
 /* Returns a structure with information we will use to rendering given the
@@ -266,7 +304,7 @@ add_glyph (HebrewFontInfo   *font_info,
 	   PangoGlyphString *glyphs, 
 	   gint              cluster_start, 
 	   PangoGlyph        glyph,
-	   gboolean          combining)
+	   gboolean          is_combining)
 {
   PangoRectangle ink_rect, logical_rect;
   gint index = glyphs->num_glyphs;
@@ -274,17 +312,19 @@ add_glyph (HebrewFontInfo   *font_info,
   pango_glyph_string_set_size (glyphs, index + 1);
   
   glyphs->glyphs[index].glyph = glyph;
-  glyphs->glyphs[index].attr.is_cluster_start = combining ? 0 : 1;
+  glyphs->glyphs[index].attr.is_cluster_start = is_combining ? 0 : 1;
   
   glyphs->log_clusters[index] = cluster_start;
 
   pango_font_get_glyph_extents (font_info->font,
 				glyphs->glyphs[index].glyph, &ink_rect, &logical_rect);
 
-  if (combining)
+  if (is_combining)
     {
       if (font_info->type == HEBREW_FONT_ISO8859_8)
 	{
+	  /* There are no accents in 8859_8 so this should never be
+	     called... Therefore I have't even checked his. */
           glyphs->glyphs[index].geometry.width =
 		logical_rect.width + glyphs->glyphs[index - 1].geometry.width;
           if (logical_rect.width > 0)
@@ -295,9 +335,18 @@ add_glyph (HebrewFontInfo   *font_info,
 	}
       else
         {
+	  /* Unicode. Always make width of cluster according to the width
+	     of the base character and never take the punctuation into
+	     consideration.
+	   */
 	  glyphs->glyphs[index].geometry.width =
 		MAX (logical_rect.width, glyphs->glyphs[index -1].geometry.width);
+	  /* Dov's new logic... */
+	  glyphs->glyphs[index].geometry.width =  glyphs->glyphs[index -1].geometry.width;
+
 	  glyphs->glyphs[index - 1].geometry.width = 0;
+
+	  /* Here we should put in heuristics to center nikud. */
 	  glyphs->glyphs[index].geometry.x_offset = 0;
         }
     }
@@ -364,7 +413,7 @@ get_glyphs_list (HebrewFontInfo	*font_info,
 
       case HEBREW_FONT_ISO8859_8:
         return get_adjusted_glyphs_list (font_info, cluster,
-			num_chrs, glyph_lists, Sun_shape_table);
+			num_chrs, glyph_lists, iso_8859_8_shape_table);
       
       case HEBREW_FONT_ISO10646:
         return get_adjusted_glyphs_list (font_info, cluster,
@@ -389,6 +438,116 @@ add_cluster (HebrewFontInfo	*font_info,
   for (i=0; i<num_glyphs; i++)
        add_glyph (font_info, glyphs, cluster_start, glyphs_list[i],
 	    		i == 0 ? FALSE : TRUE);
+
+  /* Here the fun starts. Post process the positions of glyphs in the
+     cluster in order to make nikud look nice... The following is based
+     on lots of heuristic rules and could probably be improved. Especially
+     we could improve things considerably if we would access the rendered
+     bitmap and move nikud to avoid collisions etc.
+
+     Todo:
+     
+     * Take care of several points and accents below the characters.
+     
+     * Figure out what to do with dot inside vav if it the vav does
+       not have a "roof". (Happens e.g. in Ariel).
+  */
+  if (num_glyphs > 1)
+    {
+      int i;
+      int cluster_start_idx = glyphs->num_glyphs - num_glyphs;
+      
+      if (font_info->type == HEBREW_FONT_ISO10646)
+	{
+	  PangoRectangle ink_rect, logical_rect;
+	  int base_char = glyphs_list[0] & 0x0fff;
+	  int base_ink_x_offset;
+	  int base_ink_width, base_ink_height;
+	  
+	  pango_font_get_glyph_extents (font_info->font,
+					glyphs->glyphs[cluster_start_idx].glyph, &ink_rect, &logical_rect);
+	  base_ink_x_offset = ink_rect.x;
+	  base_ink_width = ink_rect.width;
+	  base_ink_height = ink_rect.height;
+	  
+	  for (i=1; i<num_glyphs; i++)
+	    {
+	      int gl = glyphs_list[i] & 0x0fff;
+
+	      /* Check if it is a point */
+	      if (gl < 0x5B0 || gl >= 0x05D0)
+		continue;
+	      
+	      pango_font_get_glyph_extents (font_info->font,
+					    glyphs->glyphs[cluster_start_idx+i].glyph, &ink_rect, &logical_rect);
+
+	      /* The list of logical rules */
+
+	      /* Center dot of VAV */
+	      if (gl == UNI_MAPIQ && base_char == UNI_VAV)
+		{   
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset - ink_rect.x;
+
+		  /* If VAV is a vertical bar without a roof, then we
+		     need to make room for the dot by increasing the
+		     cluster width. But how can I check if that is the
+		     case??
+		  */
+		}
+
+	      /* Dot over SHIN */
+	      else if (gl == UNI_SHIN_DOT && base_char == UNI_SHIN)
+		{   
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset + base_ink_width
+		    - ink_rect.x - ink_rect.width;
+		}
+
+	      /* Dot over SIN */
+	      else if (gl == UNI_SIN_DOT && base_char == UNI_SHIN)
+		{  
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset -ink_rect.x;
+		}
+
+	      /* VOWEL DOT next to LAMED */
+	      else if (gl == UNI_SIN_DOT && base_char == UNI_LAMED)
+		{  
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset -ink_rect.x - 2*ink_rect.width;
+		}
+
+	      /* MAPIQ in PE or FINAL PE */
+	      else if (gl == UNI_MAPIQ
+		       && (base_char == UNI_PE || base_char == UNI_FINAL_PE))
+		{
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset - ink_rect.x
+		    + base_ink_width * 2/3 - ink_rect.width/2;
+
+		  /* Another option is to offset the MAPIQ in y...
+		     glyphs->glyphs[cluster_start_idx+i].geometry.y_offset
+		     -= base_ink_height/5; */
+		}
+
+	      /* VOWEL DOT next to any other character */
+	      else if (gl == UNI_SIN_DOT)
+		{   
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset -ink_rect.x;
+		}
+
+	      /* Center by default */
+	      else
+		{  
+		  glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+		    = base_ink_x_offset - ink_rect.x
+		    + base_ink_width/2 - ink_rect.width/2;
+		}
+	    }
+	}
+    }
 }
 
 static const char *
@@ -401,12 +560,15 @@ get_next_cluster(const char	*text,
   gint n_chars = 0;
   
   p = text;
-  while (p < text + length && n_chars < 3)  
+  /* What is the maximum size of a Hebrew cluster? It is certainly
+     bigger than two characters... */
+  while (p < text + length && n_chars < MAX_CLUSTER_CHRS)  
+
     {
       gunichar current = g_utf8_get_char (p);
       
       if (n_chars == 0 ||
-	  is_composible ((gunichar)(cluster[n_chars - 1]), current) )
+	  is_composible ((gunichar)(cluster[0]), current) )
 	{
 	  cluster[n_chars++] = current;
 	  p = g_utf8_next_char (p);
-- 
cgit v1.2.1