summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChookij Vanatham <chookij@src.gnome.org>2001-07-17 19:07:46 +0000
committerChookij Vanatham <chookij@src.gnome.org>2001-07-17 19:07:46 +0000
commitd494abc4bdf1fc9ad9280261f8fb1d54881355fc (patch)
tree759ad258c3511a879f32eee6f56f92372866e1b4
parent1cf950aae2902ca4f56a7e0f75ed46d719e8a4e8 (diff)
downloadpango-d494abc4bdf1fc9ad9280261f8fb1d54881355fc.tar.gz
Updated the improvements of Hebrew points positioning
from Dov Grobgeld <dov@imagic.weizmann.ac.il>
-rw-r--r--modules/hebrew/hebrew-x.c216
1 files changed, 189 insertions, 27 deletions
diff --git a/modules/hebrew/hebrew-x.c b/modules/hebrew/hebrew-x.c
index ac4be81f..d0214926 100644
--- a/modules/hebrew/hebrew-x.c
+++ b/modules/hebrew/hebrew-x.c
@@ -7,6 +7,9 @@
* Copyright (c) 1996-2000 by Sun Microsystems, Inc.
* Author: Chookij Vanatham <Chookij.Vanatham@Eng.Sun.COM>
*
+ * Hebrew points positioning improvements 2001
+ * Author: Dov Grobgeld <dov@imagic.weizmann.ac.il>
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
@@ -41,7 +44,7 @@
#define _ND 0
#define _SP 1
#define _NS (1<<1)
-#define _DA (1<<2)
+#define _DA (1<<2) /* only for dagesh... */
#define NoDefine _ND
#define SpacingLetter _SP
@@ -53,6 +56,16 @@
#define __NS 2
#define __DA 3
+/* Unicode definitions ... */
+#define UNI_VAV 0x5d5
+#define UNI_LAMED 0x5DC
+#define UNI_SHIN 0x5E9
+#define UNI_FINAL_PE 0x05E3
+#define UNI_PE 0x05E4
+#define UNI_SHIN_DOT 0x5c1
+#define UNI_SIN_DOT 0x5c2
+#define UNI_MAPIQ 0x5bc
+
#define is_char_class(wc, mask) (char_class_table[ucs2iso8859_8 ((wc))] & (mask))
#define is_composible(cur_wc, nxt_wc) (compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\
[char_type_table[ucs2iso8859_8 (nxt_wc)]])
@@ -95,6 +108,15 @@ struct _HebrewFontInfo
PangoXSubfont subfont;
};
+/*======================================================================
+// In the tables below all Hebrew characters are categorized to
+// one of the following four classes:
+//
+// non used entries Not defined (ND)
+// accents, points Non spacing (NS)
+// punctuation and characters Spacing characters (SP)
+// dagesh "Dagesh" (DA)
+//----------------------------------------------------------------------*/
static const gint char_class_table[128] = {
/* 0, 1, 2, 3, 4, 5, 6, 7 */
@@ -139,6 +161,21 @@ static const gint char_type_table[128] = {
__ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND,
};
+/*======================================================================
+// The following table answers the question whether two characters
+// are composible or not. The decision is made by looking at the
+// char_type_table values for the first character in a cluster
+// vs a following charactrer. The only three combinations that
+// are composible in Hebrew according to the table are:
+//
+// 1. a spacing character followed by non-spacing character
+// 2. a spacing character followed by a dagesh.
+// 3. a dagesh followed by a non-spacing character.
+//
+// Note that a spacing character may be followed by several non-spacing
+// accents, as the decision is always made on the base character of
+// a combination.
+//----------------------------------------------------------------------*/
static const gboolean compose_table[4][4] = {
/* Cn */ /* 0, 1, 2, 3, */
/* Cn-1 00 */ { FALSE, FALSE, FALSE, FALSE },
@@ -147,9 +184,9 @@ static const gboolean compose_table[4][4] = {
/* 30 */ { FALSE, FALSE, TRUE, FALSE },
};
-/* Sun Hebrew Font Layout
+/* ISO 8859_8 Hebrew Font Layout. Does not include any accents.
*/
-static const gint Sun_shape_table[128] = {
+static const gint iso_8859_8_shape_table[128] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -173,24 +210,25 @@ static const gint Sun_shape_table[128] = {
/* Unicode Hebrew Font Layout
*/
static const gint Unicode_shape_table[128] = {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
- 0x0000, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597,
- 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
- 0x05A0, 0x05A1, 0x0000, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7,
- 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
- 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
- 0x05B8, 0x05B9, 0x0000, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
- 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-
- 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
- 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
- 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
- 0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ /* 00 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ /* cantillation marks followed by accents */
+ /* 10 */ 0x0000, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597,
+ 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
+ /* 20 */ 0x05A0, 0x05A1, 0x0000, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7,
+ 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
+ /* 30 */ 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
+ 0x05B8, 0x05B9, 0x0000, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
+ /* 40 */ 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+
+ /* Aleph-Tav, Yiddish ligatures, and punctuation */
+ /* 50 */ 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
+ 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
+ /* 60 */ 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
+ 0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ /* 70 */ 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
};
/* Returns a structure with information we will use to rendering given the
@@ -266,7 +304,7 @@ add_glyph (HebrewFontInfo *font_info,
PangoGlyphString *glyphs,
gint cluster_start,
PangoGlyph glyph,
- gboolean combining)
+ gboolean is_combining)
{
PangoRectangle ink_rect, logical_rect;
gint index = glyphs->num_glyphs;
@@ -274,17 +312,19 @@ add_glyph (HebrewFontInfo *font_info,
pango_glyph_string_set_size (glyphs, index + 1);
glyphs->glyphs[index].glyph = glyph;
- glyphs->glyphs[index].attr.is_cluster_start = combining ? 0 : 1;
+ glyphs->glyphs[index].attr.is_cluster_start = is_combining ? 0 : 1;
glyphs->log_clusters[index] = cluster_start;
pango_font_get_glyph_extents (font_info->font,
glyphs->glyphs[index].glyph, &ink_rect, &logical_rect);
- if (combining)
+ if (is_combining)
{
if (font_info->type == HEBREW_FONT_ISO8859_8)
{
+ /* There are no accents in 8859_8 so this should never be
+ called... Therefore I have't even checked his. */
glyphs->glyphs[index].geometry.width =
logical_rect.width + glyphs->glyphs[index - 1].geometry.width;
if (logical_rect.width > 0)
@@ -295,9 +335,18 @@ add_glyph (HebrewFontInfo *font_info,
}
else
{
+ /* Unicode. Always make width of cluster according to the width
+ of the base character and never take the punctuation into
+ consideration.
+ */
glyphs->glyphs[index].geometry.width =
MAX (logical_rect.width, glyphs->glyphs[index -1].geometry.width);
+ /* Dov's new logic... */
+ glyphs->glyphs[index].geometry.width = glyphs->glyphs[index -1].geometry.width;
+
glyphs->glyphs[index - 1].geometry.width = 0;
+
+ /* Here we should put in heuristics to center nikud. */
glyphs->glyphs[index].geometry.x_offset = 0;
}
}
@@ -364,7 +413,7 @@ get_glyphs_list (HebrewFontInfo *font_info,
case HEBREW_FONT_ISO8859_8:
return get_adjusted_glyphs_list (font_info, cluster,
- num_chrs, glyph_lists, Sun_shape_table);
+ num_chrs, glyph_lists, iso_8859_8_shape_table);
case HEBREW_FONT_ISO10646:
return get_adjusted_glyphs_list (font_info, cluster,
@@ -389,6 +438,116 @@ add_cluster (HebrewFontInfo *font_info,
for (i=0; i<num_glyphs; i++)
add_glyph (font_info, glyphs, cluster_start, glyphs_list[i],
i == 0 ? FALSE : TRUE);
+
+ /* Here the fun starts. Post process the positions of glyphs in the
+ cluster in order to make nikud look nice... The following is based
+ on lots of heuristic rules and could probably be improved. Especially
+ we could improve things considerably if we would access the rendered
+ bitmap and move nikud to avoid collisions etc.
+
+ Todo:
+
+ * Take care of several points and accents below the characters.
+
+ * Figure out what to do with dot inside vav if it the vav does
+ not have a "roof". (Happens e.g. in Ariel).
+ */
+ if (num_glyphs > 1)
+ {
+ int i;
+ int cluster_start_idx = glyphs->num_glyphs - num_glyphs;
+
+ if (font_info->type == HEBREW_FONT_ISO10646)
+ {
+ PangoRectangle ink_rect, logical_rect;
+ int base_char = glyphs_list[0] & 0x0fff;
+ int base_ink_x_offset;
+ int base_ink_width, base_ink_height;
+
+ pango_font_get_glyph_extents (font_info->font,
+ glyphs->glyphs[cluster_start_idx].glyph, &ink_rect, &logical_rect);
+ base_ink_x_offset = ink_rect.x;
+ base_ink_width = ink_rect.width;
+ base_ink_height = ink_rect.height;
+
+ for (i=1; i<num_glyphs; i++)
+ {
+ int gl = glyphs_list[i] & 0x0fff;
+
+ /* Check if it is a point */
+ if (gl < 0x5B0 || gl >= 0x05D0)
+ continue;
+
+ pango_font_get_glyph_extents (font_info->font,
+ glyphs->glyphs[cluster_start_idx+i].glyph, &ink_rect, &logical_rect);
+
+ /* The list of logical rules */
+
+ /* Center dot of VAV */
+ if (gl == UNI_MAPIQ && base_char == UNI_VAV)
+ {
+ glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+ = base_ink_x_offset - ink_rect.x;
+
+ /* If VAV is a vertical bar without a roof, then we
+ need to make room for the dot by increasing the
+ cluster width. But how can I check if that is the
+ case??
+ */
+ }
+
+ /* Dot over SHIN */
+ else if (gl == UNI_SHIN_DOT && base_char == UNI_SHIN)
+ {
+ glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+ = base_ink_x_offset + base_ink_width
+ - ink_rect.x - ink_rect.width;
+ }
+
+ /* Dot over SIN */
+ else if (gl == UNI_SIN_DOT && base_char == UNI_SHIN)
+ {
+ glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+ = base_ink_x_offset -ink_rect.x;
+ }
+
+ /* VOWEL DOT next to LAMED */
+ else if (gl == UNI_SIN_DOT && base_char == UNI_LAMED)
+ {
+ glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+ = base_ink_x_offset -ink_rect.x - 2*ink_rect.width;
+ }
+
+ /* MAPIQ in PE or FINAL PE */
+ else if (gl == UNI_MAPIQ
+ && (base_char == UNI_PE || base_char == UNI_FINAL_PE))
+ {
+ glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+ = base_ink_x_offset - ink_rect.x
+ + base_ink_width * 2/3 - ink_rect.width/2;
+
+ /* Another option is to offset the MAPIQ in y...
+ glyphs->glyphs[cluster_start_idx+i].geometry.y_offset
+ -= base_ink_height/5; */
+ }
+
+ /* VOWEL DOT next to any other character */
+ else if (gl == UNI_SIN_DOT)
+ {
+ glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+ = base_ink_x_offset -ink_rect.x;
+ }
+
+ /* Center by default */
+ else
+ {
+ glyphs->glyphs[cluster_start_idx+i].geometry.x_offset
+ = base_ink_x_offset - ink_rect.x
+ + base_ink_width/2 - ink_rect.width/2;
+ }
+ }
+ }
+ }
}
static const char *
@@ -401,12 +560,15 @@ get_next_cluster(const char *text,
gint n_chars = 0;
p = text;
- while (p < text + length && n_chars < 3)
+ /* What is the maximum size of a Hebrew cluster? It is certainly
+ bigger than two characters... */
+ while (p < text + length && n_chars < MAX_CLUSTER_CHRS)
+
{
gunichar current = g_utf8_get_char (p);
if (n_chars == 0 ||
- is_composible ((gunichar)(cluster[n_chars - 1]), current) )
+ is_composible ((gunichar)(cluster[0]), current) )
{
cluster[n_chars++] = current;
p = g_utf8_next_char (p);