diff options
Diffstat (limited to 'trunk/modules/hebrew/hebrew-shaper.c')
-rw-r--r-- | trunk/modules/hebrew/hebrew-shaper.c | 477 |
1 files changed, 477 insertions, 0 deletions
diff --git a/trunk/modules/hebrew/hebrew-shaper.c b/trunk/modules/hebrew/hebrew-shaper.c new file mode 100644 index 00000000..72566758 --- /dev/null +++ b/trunk/modules/hebrew/hebrew-shaper.c @@ -0,0 +1,477 @@ +/* Pango + * hebrew-shaper.c: + * + * Copyright (c) 2001 by Sun Microsystems, Inc. + * Author: Chookij Vanatham <Chookij.Vanatham@Eng.Sun.COM> + * + * Hebrew points positioning improvements 2001 + * Author: Dov Grobgeld <dov@imagic.weizmann.ac.il> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Note March 9, 2003: I fixed a crash with regards to precomposed + * characters, by wraping all of them to be considered as ALEF as + * far as consideration about composability is concerned. The rendering + * with regards to precomposed characters AND nikud comes out really + * bad though, and should be fixed, once I have more time. + */ + +#include <config.h> +#include <glib.h> +#include "pango-engine.h" +#include "hebrew-shaper.h" + +/* Wrap all characters above 0xF00 to ALEF. */ +#define ishebrew(wc) ((wc)>0x590 && (wc)<0x600) +#define ucs2iso8859_8(wc) ((unsigned int)((unsigned int)(wc) - 0x0590 + 0x10)) +#define iso8859_8_2uni(c) ((gunichar)(c) - 0x10 + 0x0590) + +#define MAX_CLUSTER_CHRS 256 + +/* Define Hebrew character classes */ +#define _ND 0 +#define _SP 1 +#define _NS (1<<1) +#define _DA (1<<2) /* only for dagesh... */ + +#define NoDefine _ND +#define SpacingLetter _SP +#define NonSpacingPunc _NS + +/* Define Hebrew character types */ +#define __ND 0 +#define __SP 1 +#define __NS 2 +#define __DA 3 + +/* Unicode definitions needed in logics below... */ +#define UNI_ALEF 0x05D0 +#define UNI_BET 0x05D1 +#define UNI_GIMMEL 0x05d2 +#define UNI_DALED 0x05D3 +#define UNI_KAF 0x05DB +#define UNI_FINAL_KAF 0x05DA +#define UNI_VAV 0x05D5 +#define UNI_YOD 0x05D9 +#define UNI_RESH 0x05E8 +#define UNI_LAMED 0x05DC +#define UNI_SHIN 0x05E9 +#define UNI_FINAL_PE 0x05E3 +#define UNI_PE 0x05E4 +#define UNI_QOF 0x05E7 +#define UNI_TAV 0x05EA +#define UNI_SHIN_DOT 0x05C1 +#define UNI_SIN_DOT 0x05C2 +#define UNI_MAPIQ 0x05BC +#define UNI_SHEVA 0x05B0 +#define UNI_HOLAM 0x05B9 +#define UNI_QUBUTS 0x05BB +#define UNI_HATAF_SEGOL 0x05B1 +#define UNI_HATAF_QAMATZ 0x05B3 +#define UNI_TSERE 0x05B5 +#define UNI_QAMATS 0x05B8 +#define UNI_QUBUTS 0x05BB + +/*====================================================================== +// In the tables below all Hebrew characters are categorized to +// one of the following four classes: +// +// non used entries Not defined (ND) +// accents, points Non spacing (NS) +// punctuation and characters Spacing characters (SP) +// dagesh "Dagesh" (DA) +//----------------------------------------------------------------------*/ +static const gint char_class_table[128] = { + /* 0, 1, 2, 3, 4, 5, 6, 7 */ + + /*00*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, + _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, + + /*10*/ _ND, _NS, _NS, _NS, _NS, _NS, _NS, _NS, + _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, + /*20*/ _NS, _NS, _ND, _NS, _NS, _NS, _NS, _NS, + _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, + /*30*/ _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, + _NS, _NS, _ND, _NS, _DA, _NS, _SP, _NS, + /*40*/ _SP, _NS, _NS, _SP, _NS, _ND, _ND, _ND, + _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, + /*50*/ _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, + _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, + /*60*/ _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, + _SP, _SP, _SP, _ND, _ND, _ND, _ND, _ND, + /*70*/ _SP, _SP, _SP, _SP, _SP, _ND, _ND, _ND, + _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, +}; + +static const gint char_type_table[128] = { + /* 0, 1, 2, 3, 4, 5, 6, 7 */ + + /*00*/ __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, + __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, + + /*10*/ __ND, __NS, __NS, __NS, __NS, __NS, __NS, __NS, + __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, + /*20*/ __NS, __NS, __ND, __NS, __NS, __NS, __NS, __NS, + __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, + /*30*/ __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, + __NS, __NS, __ND, __NS, __DA, __NS, __SP, __NS, + /*40*/ __SP, __NS, __NS, __SP, __NS, __ND, __ND, __ND, + __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, + /*50*/ __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, + __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, + /*60*/ __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, + __SP, __SP, __SP, __ND, __ND, __ND, __ND, __ND, + /*70*/ __SP, __SP, __SP, __SP, __SP, __ND, __ND, __ND, + __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, +}; + +/*====================================================================== +// The following table answers the question whether two characters +// are composible or not. The decision is made by looking at the +// char_type_table values for the first character in a cluster +// vs a following charactrer. The only three combinations that +// are composible in Hebrew according to the table are: +// +// 1. a spacing character followed by non-spacing character +// 2. a spacing character followed by a dagesh. +// 3. a dagesh followed by a non-spacing character. +// +// Note that a spacing character may be followed by several non-spacing +// accents, as the decision is always made on the base character of +// a combination. +//----------------------------------------------------------------------*/ +static const gboolean compose_table[4][4] = { + /* Cn */ /* 0, 1, 2, 3, */ +/* Cn-1 00 */ { FALSE, FALSE, FALSE, FALSE }, + /* 10 */ { FALSE, FALSE, TRUE, TRUE }, + /* 20 */ { FALSE, FALSE, FALSE, FALSE }, + /* 30 */ { FALSE, FALSE, TRUE, FALSE }, +}; + +/* Treat all characters above 0xF000 as characters */ +#define is_hebrew(wc) ((wc) >= 0x590 && (wc) < 0x600) +#define is_char_class(wc, mask) (char_class_table[ucs2iso8859_8 ((wc))] & (mask)) +#define is_composible(cur_wc, nxt_wc) (compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\ + [char_type_table[ucs2iso8859_8 (nxt_wc)]]) + +G_CONST_RETURN char * +hebrew_shaper_get_next_cluster(const char *text, + gint length, + gunichar *cluster, + gint *num_chrs) +{ + const char *p; + gint n_chars = 0; + + p = text; + + while (p < text + length && n_chars < MAX_CLUSTER_CHRS) + { + gunichar current = g_utf8_get_char (p); + + if (!ishebrew (current) || + (n_chars == 0 && is_char_class(current, ~(NoDefine|SpacingLetter)))) + { + /* Not a legal Hebrew cluster */ + + if (n_chars == 0) + { + cluster[n_chars++] = current; + p = g_utf8_next_char (p); + } + break; + } + else if (n_chars == 0 || + is_composible (cluster[0], current)) + { + cluster[n_chars++] = current; + p = g_utf8_next_char (p); + } + else + break; + } + + *num_chrs = n_chars; + return p; +} + +void +hebrew_shaper_get_cluster_kerning(gunichar *cluster, + gint cluster_length, + PangoRectangle ink_rect[], + + /* input and output */ + gint width[], + gint x_offset[], + gint y_offset[]) +{ + int i; + int base_ink_x_offset, base_ink_y_offset, base_ink_width, base_ink_height; + gunichar base_char = cluster[0]; + + x_offset[0] = 0; + y_offset[0] = 0; + + if (cluster_length == 1) + { + /* Make lone 'vav dot' have zero width */ + if (base_char == UNI_SHIN_DOT + || base_char == UNI_SIN_DOT + || base_char == UNI_HOLAM + ) { + x_offset[0] = -ink_rect[0].x - ink_rect[0].width; + width[0] = 0; + } + + return; + } + + base_ink_x_offset = ink_rect[0].x; + base_ink_y_offset = ink_rect[0].y; + base_ink_width = ink_rect[0].width; + base_ink_height = ink_rect[0].height; + + /* Do heuristics */ + for (i=1; i<cluster_length; i++) + { + int gl = cluster[i]; + x_offset[i] = 0; + y_offset[i] = 0; + + /* Check if it is a point */ + if (gl < 0x5B0 || gl >= 0x05D0) + continue; + + /* Center dot of VAV */ + if (gl == UNI_MAPIQ && base_char == UNI_VAV) + { + x_offset[i] = base_ink_x_offset - ink_rect[i].x; + + /* If VAV is a vertical bar without a roof, then we + need to make room for the dot by increasing the + cluster width. But how can I check if that is the + case?? + */ + /* This is wild, but it does the job of differentiating + between two M$ fonts... Base the decision on the + aspect ratio of the vav... + */ + if (base_ink_height > base_ink_width * 3.5) + { + int j; + double space = 0.7; + double kern = 0.5; + + /* Shift all characters to make place for the mapiq */ + for (j=0; j<i; j++) + x_offset[j] += ink_rect[i].width*(1+space-kern); + + width[cluster_length-1] += ink_rect[i].width*(1+space-kern); + x_offset[i] -= ink_rect[i].width*(kern); + } + } + + /* Dot over SHIN */ + else if (gl == UNI_SHIN_DOT && base_char == UNI_SHIN) + { + x_offset[i] = base_ink_x_offset + base_ink_width + - ink_rect[i].x - ink_rect[i].width; + } + + /* Dot over SIN */ + else if (gl == UNI_SIN_DOT && base_char == UNI_SHIN) + { + x_offset[i] = base_ink_x_offset - ink_rect[i].x; + } + + /* VOWEL DOT above to any other character than + SHIN or VAV should stick out a bit to the left. */ + else if ((gl == UNI_SIN_DOT || gl == UNI_HOLAM) + && base_char != UNI_SHIN && base_char != UNI_VAV) + { + x_offset[i] = base_ink_x_offset -ink_rect[i].x - ink_rect[i].width * 3/ 2; + } + + /* VOWELS under resh or vav are right aligned, if they are + narrower than the characters. Otherwise they are centered. + */ + else if ((base_char == UNI_VAV + || base_char == UNI_RESH + || base_char == UNI_YOD + || base_char == UNI_DALED + ) + && ((gl >= UNI_SHEVA && gl <= UNI_QAMATS) || + gl == UNI_QUBUTS) + && ink_rect[i].width < base_ink_width + ) + { + x_offset[i] = base_ink_x_offset + base_ink_width + - ink_rect[i].x - ink_rect[i].width; + } + + /* VOWELS under FINAL KAF are offset centered and offset in + y */ + else if ((base_char == UNI_FINAL_KAF + ) + && ((gl >= UNI_SHEVA && gl <= UNI_QAMATS) || + gl == UNI_QUBUTS)) + { + /* x are at 1/3 to take into accoun the stem */ + x_offset[i] = base_ink_x_offset - ink_rect[i].x + + base_ink_width * 1/3 - ink_rect[i].width/2; + + /* Center in y */ + y_offset[i] = base_ink_y_offset - ink_rect[i].y + + base_ink_height * 1/2 - ink_rect[i].height/2; + } + + + /* MAPIQ in PE or FINAL PE */ + else if (gl == UNI_MAPIQ + && (base_char == UNI_PE || base_char == UNI_FINAL_PE)) + { + x_offset[i]= base_ink_x_offset - ink_rect[i].x + + base_ink_width * 2/3 - ink_rect[i].width/2; + + /* Another option is to offset the MAPIQ in y... + glyphs->glyphs[cluster_start_idx+i].geometry.y_offset + -= base_ink_height/5; */ + } + + /* MAPIQ in SHIN should be moved a bit to the right */ + else if (gl == UNI_MAPIQ + && base_char == UNI_SHIN) + { + x_offset[i]= base_ink_x_offset - ink_rect[i].x + + base_ink_width * 3/5 - ink_rect[i].width/2; + } + + /* MAPIQ in YUD is right aligned */ + else if (gl == UNI_MAPIQ + && base_char == UNI_YOD) + { + x_offset[i]= base_ink_x_offset - ink_rect[i].x; + + /* Lower left in y */ + y_offset[i] = base_ink_y_offset - ink_rect[i].y + + base_ink_height - ink_rect[i].height*1.75; + + if (base_ink_height > base_ink_width * 2) + { + int j; + double space = 0.7; + double kern = 0.5; + + /* Shift all cluster characters to make space for mapiq */ + for (j=0; j<i; j++) + x_offset[j] += ink_rect[i].width*(1+space-kern); + + width[cluster_length-1] += ink_rect[i].width*(1+space-kern); + } + + } + + /* VOWEL DOT next to any other character */ + else if ((gl == UNI_SIN_DOT || gl == UNI_HOLAM) + && (base_char != UNI_VAV)) + { + x_offset[i] = base_ink_x_offset -ink_rect[i].x; + } + + /* Move nikud of taf a bit ... */ + else if (base_char == UNI_TAV && gl == UNI_MAPIQ) + { + x_offset[i] = base_ink_x_offset - ink_rect[i].x + + base_ink_width * 5/8 - ink_rect[i].width/2; + } + + /* Move center dot of characters with a right stem and no + left stem. */ + else if (gl == UNI_MAPIQ && + (base_char == UNI_BET + || base_char == UNI_DALED + || base_char == UNI_KAF + || base_char == UNI_GIMMEL + )) + { + x_offset[i] = base_ink_x_offset - ink_rect[i].x + + base_ink_width * 3/8 - ink_rect[i].width/2; + } + + /* Right align wide nikud under QOF */ + else if (base_char == UNI_QOF && + ( (gl >= UNI_HATAF_SEGOL + && gl <= UNI_HATAF_QAMATZ) + || (gl >= UNI_TSERE + && gl<= UNI_QAMATS) + || (gl == UNI_QUBUTS))) + { + x_offset[i] = base_ink_x_offset + base_ink_width + - ink_rect[i].x - ink_rect[i].width; + } + + /* Center by default */ + else + { + x_offset[i] = base_ink_x_offset - ink_rect[i].x + + base_ink_width/2 - ink_rect[i].width/2; + } + } + +} + +void +hebrew_shaper_swap_range (PangoGlyphString *glyphs, + int start, + int end) +{ + int i, j; + + for (i = start, j = end - 1; i < j; i++, j--) + { + PangoGlyphInfo glyph_info; + gint log_cluster; + + glyph_info = glyphs->glyphs[i]; + glyphs->glyphs[i] = glyphs->glyphs[j]; + glyphs->glyphs[j] = glyph_info; + + log_cluster = glyphs->log_clusters[i]; + glyphs->log_clusters[i] = glyphs->log_clusters[j]; + glyphs->log_clusters[j] = log_cluster; + } +} + +void +hebrew_shaper_bidi_reorder(PangoGlyphString *glyphs) +{ + int start, end; + + /* Swap all glyphs */ + hebrew_shaper_swap_range (glyphs, 0, glyphs->num_glyphs); + + /* Now reorder glyphs within each cluster back to LTR */ + for (start = 0; start < glyphs->num_glyphs;) + { + end = start; + while (end < glyphs->num_glyphs && + glyphs->log_clusters[end] == glyphs->log_clusters[start]) + end++; + + hebrew_shaper_swap_range (glyphs, start, end); + start = end; + } +} |