From d494abc4bdf1fc9ad9280261f8fb1d54881355fc Mon Sep 17 00:00:00 2001 From: Chookij Vanatham Date: Tue, 17 Jul 2001 19:07:46 +0000 Subject: Updated the improvements of Hebrew points positioning from Dov Grobgeld --- modules/hebrew/hebrew-x.c | 216 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 189 insertions(+), 27 deletions(-) (limited to 'modules') diff --git a/modules/hebrew/hebrew-x.c b/modules/hebrew/hebrew-x.c index ac4be81f..d0214926 100644 --- a/modules/hebrew/hebrew-x.c +++ b/modules/hebrew/hebrew-x.c @@ -7,6 +7,9 @@ * Copyright (c) 1996-2000 by Sun Microsystems, Inc. * Author: Chookij Vanatham * + * Hebrew points positioning improvements 2001 + * Author: Dov Grobgeld + * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either @@ -41,7 +44,7 @@ #define _ND 0 #define _SP 1 #define _NS (1<<1) -#define _DA (1<<2) +#define _DA (1<<2) /* only for dagesh... */ #define NoDefine _ND #define SpacingLetter _SP @@ -53,6 +56,16 @@ #define __NS 2 #define __DA 3 +/* Unicode definitions ... */ +#define UNI_VAV 0x5d5 +#define UNI_LAMED 0x5DC +#define UNI_SHIN 0x5E9 +#define UNI_FINAL_PE 0x05E3 +#define UNI_PE 0x05E4 +#define UNI_SHIN_DOT 0x5c1 +#define UNI_SIN_DOT 0x5c2 +#define UNI_MAPIQ 0x5bc + #define is_char_class(wc, mask) (char_class_table[ucs2iso8859_8 ((wc))] & (mask)) #define is_composible(cur_wc, nxt_wc) (compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\ [char_type_table[ucs2iso8859_8 (nxt_wc)]]) @@ -95,6 +108,15 @@ struct _HebrewFontInfo PangoXSubfont subfont; }; +/*====================================================================== +// In the tables below all Hebrew characters are categorized to +// one of the following four classes: +// +// non used entries Not defined (ND) +// accents, points Non spacing (NS) +// punctuation and characters Spacing characters (SP) +// dagesh "Dagesh" (DA) +//----------------------------------------------------------------------*/ static const gint char_class_table[128] = { /* 0, 1, 2, 3, 4, 5, 6, 7 */ @@ -139,6 +161,21 @@ static const gint char_type_table[128] = { __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, }; +/*====================================================================== +// The following table answers the question whether two characters +// are composible or not. The decision is made by looking at the +// char_type_table values for the first character in a cluster +// vs a following charactrer. The only three combinations that +// are composible in Hebrew according to the table are: +// +// 1. a spacing character followed by non-spacing character +// 2. a spacing character followed by a dagesh. +// 3. a dagesh followed by a non-spacing character. +// +// Note that a spacing character may be followed by several non-spacing +// accents, as the decision is always made on the base character of +// a combination. +//----------------------------------------------------------------------*/ static const gboolean compose_table[4][4] = { /* Cn */ /* 0, 1, 2, 3, */ /* Cn-1 00 */ { FALSE, FALSE, FALSE, FALSE }, @@ -147,9 +184,9 @@ static const gboolean compose_table[4][4] = { /* 30 */ { FALSE, FALSE, TRUE, FALSE }, }; -/* Sun Hebrew Font Layout +/* ISO 8859_8 Hebrew Font Layout. Does not include any accents. */ -static const gint Sun_shape_table[128] = { +static const gint iso_8859_8_shape_table[128] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -173,24 +210,25 @@ static const gint Sun_shape_table[128] = { /* Unicode Hebrew Font Layout */ static const gint Unicode_shape_table[128] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - - 0x0000, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597, - 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F, - 0x05A0, 0x05A1, 0x0000, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7, - 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF, - 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, - 0x05B8, 0x05B9, 0x0000, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, - 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - - 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, - 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, - 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, - 0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 00 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* cantillation marks followed by accents */ + /* 10 */ 0x0000, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597, + 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F, + /* 20 */ 0x05A0, 0x05A1, 0x0000, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7, + 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF, + /* 30 */ 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, + 0x05B8, 0x05B9, 0x0000, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, + /* 40 */ 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + + /* Aleph-Tav, Yiddish ligatures, and punctuation */ + /* 50 */ 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, + 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF, + /* 60 */ 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, + 0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + /* 70 */ 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, }; /* Returns a structure with information we will use to rendering given the @@ -266,7 +304,7 @@ add_glyph (HebrewFontInfo *font_info, PangoGlyphString *glyphs, gint cluster_start, PangoGlyph glyph, - gboolean combining) + gboolean is_combining) { PangoRectangle ink_rect, logical_rect; gint index = glyphs->num_glyphs; @@ -274,17 +312,19 @@ add_glyph (HebrewFontInfo *font_info, pango_glyph_string_set_size (glyphs, index + 1); glyphs->glyphs[index].glyph = glyph; - glyphs->glyphs[index].attr.is_cluster_start = combining ? 0 : 1; + glyphs->glyphs[index].attr.is_cluster_start = is_combining ? 0 : 1; glyphs->log_clusters[index] = cluster_start; pango_font_get_glyph_extents (font_info->font, glyphs->glyphs[index].glyph, &ink_rect, &logical_rect); - if (combining) + if (is_combining) { if (font_info->type == HEBREW_FONT_ISO8859_8) { + /* There are no accents in 8859_8 so this should never be + called... Therefore I have't even checked his. */ glyphs->glyphs[index].geometry.width = logical_rect.width + glyphs->glyphs[index - 1].geometry.width; if (logical_rect.width > 0) @@ -295,9 +335,18 @@ add_glyph (HebrewFontInfo *font_info, } else { + /* Unicode. Always make width of cluster according to the width + of the base character and never take the punctuation into + consideration. + */ glyphs->glyphs[index].geometry.width = MAX (logical_rect.width, glyphs->glyphs[index -1].geometry.width); + /* Dov's new logic... */ + glyphs->glyphs[index].geometry.width = glyphs->glyphs[index -1].geometry.width; + glyphs->glyphs[index - 1].geometry.width = 0; + + /* Here we should put in heuristics to center nikud. */ glyphs->glyphs[index].geometry.x_offset = 0; } } @@ -364,7 +413,7 @@ get_glyphs_list (HebrewFontInfo *font_info, case HEBREW_FONT_ISO8859_8: return get_adjusted_glyphs_list (font_info, cluster, - num_chrs, glyph_lists, Sun_shape_table); + num_chrs, glyph_lists, iso_8859_8_shape_table); case HEBREW_FONT_ISO10646: return get_adjusted_glyphs_list (font_info, cluster, @@ -389,6 +438,116 @@ add_cluster (HebrewFontInfo *font_info, for (i=0; i 1) + { + int i; + int cluster_start_idx = glyphs->num_glyphs - num_glyphs; + + if (font_info->type == HEBREW_FONT_ISO10646) + { + PangoRectangle ink_rect, logical_rect; + int base_char = glyphs_list[0] & 0x0fff; + int base_ink_x_offset; + int base_ink_width, base_ink_height; + + pango_font_get_glyph_extents (font_info->font, + glyphs->glyphs[cluster_start_idx].glyph, &ink_rect, &logical_rect); + base_ink_x_offset = ink_rect.x; + base_ink_width = ink_rect.width; + base_ink_height = ink_rect.height; + + for (i=1; i= 0x05D0) + continue; + + pango_font_get_glyph_extents (font_info->font, + glyphs->glyphs[cluster_start_idx+i].glyph, &ink_rect, &logical_rect); + + /* The list of logical rules */ + + /* Center dot of VAV */ + if (gl == UNI_MAPIQ && base_char == UNI_VAV) + { + glyphs->glyphs[cluster_start_idx+i].geometry.x_offset + = base_ink_x_offset - ink_rect.x; + + /* If VAV is a vertical bar without a roof, then we + need to make room for the dot by increasing the + cluster width. But how can I check if that is the + case?? + */ + } + + /* Dot over SHIN */ + else if (gl == UNI_SHIN_DOT && base_char == UNI_SHIN) + { + glyphs->glyphs[cluster_start_idx+i].geometry.x_offset + = base_ink_x_offset + base_ink_width + - ink_rect.x - ink_rect.width; + } + + /* Dot over SIN */ + else if (gl == UNI_SIN_DOT && base_char == UNI_SHIN) + { + glyphs->glyphs[cluster_start_idx+i].geometry.x_offset + = base_ink_x_offset -ink_rect.x; + } + + /* VOWEL DOT next to LAMED */ + else if (gl == UNI_SIN_DOT && base_char == UNI_LAMED) + { + glyphs->glyphs[cluster_start_idx+i].geometry.x_offset + = base_ink_x_offset -ink_rect.x - 2*ink_rect.width; + } + + /* MAPIQ in PE or FINAL PE */ + else if (gl == UNI_MAPIQ + && (base_char == UNI_PE || base_char == UNI_FINAL_PE)) + { + glyphs->glyphs[cluster_start_idx+i].geometry.x_offset + = base_ink_x_offset - ink_rect.x + + base_ink_width * 2/3 - ink_rect.width/2; + + /* Another option is to offset the MAPIQ in y... + glyphs->glyphs[cluster_start_idx+i].geometry.y_offset + -= base_ink_height/5; */ + } + + /* VOWEL DOT next to any other character */ + else if (gl == UNI_SIN_DOT) + { + glyphs->glyphs[cluster_start_idx+i].geometry.x_offset + = base_ink_x_offset -ink_rect.x; + } + + /* Center by default */ + else + { + glyphs->glyphs[cluster_start_idx+i].geometry.x_offset + = base_ink_x_offset - ink_rect.x + + base_ink_width/2 - ink_rect.width/2; + } + } + } + } } static const char * @@ -401,12 +560,15 @@ get_next_cluster(const char *text, gint n_chars = 0; p = text; - while (p < text + length && n_chars < 3) + /* What is the maximum size of a Hebrew cluster? It is certainly + bigger than two characters... */ + while (p < text + length && n_chars < MAX_CLUSTER_CHRS) + { gunichar current = g_utf8_get_char (p); if (n_chars == 0 || - is_composible ((gunichar)(cluster[n_chars - 1]), current) ) + is_composible ((gunichar)(cluster[0]), current) ) { cluster[n_chars++] = current; p = g_utf8_next_char (p); -- cgit v1.2.1