/* Pango * hebrew-shaper.c: * * Copyright (c) 2001 by Sun Microsystems, Inc. * Author: Chookij Vanatham * * Hebrew points positioning improvements 2001 * Author: Dov Grobgeld * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. * * Note March 9, 2003: I fixed a crash with regards to precomposed * characters, by wraping all of them to be considered as ALEF as * far as consideration about composability is concerned. The rendering * with regards to precomposed characters AND nikud comes out really * bad though, and should be fixed, once I have more time. */ #include #include #include "pango-engine.h" #include "hebrew-shaper.h" /* Wrap all characters above 0xF00 to ALEF. */ #define ishebrew(wc) ((wc)>0x590 && (wc)<0x600) #define ucs2iso8859_8(wc) ((unsigned int)((unsigned int)(wc) - 0x0590 + 0x10)) #define iso8859_8_2uni(c) ((gunichar)(c) - 0x10 + 0x0590) #define MAX_CLUSTER_CHRS 256 /* Define Hebrew character classes */ #define _ND 0 #define _SP 1 #define _NS (1<<1) #define _DA (1<<2) /* only for dagesh... */ #define NoDefine _ND #define SpacingLetter _SP #define NonSpacingPunc _NS /* Define Hebrew character types */ #define __ND 0 #define __SP 1 #define __NS 2 #define __DA 3 /* Unicode definitions needed in logics below... */ #define UNI_ALEF 0x05D0 #define UNI_BET 0x05D1 #define UNI_GIMMEL 0x05d2 #define UNI_DALED 0x05D3 #define UNI_KAF 0x05DB #define UNI_FINAL_KAF 0x05DA #define UNI_VAV 0x05D5 #define UNI_YOD 0x05D9 #define UNI_RESH 0x05E8 #define UNI_LAMED 0x05DC #define UNI_SHIN 0x05E9 #define UNI_FINAL_PE 0x05E3 #define UNI_PE 0x05E4 #define UNI_QOF 0x05E7 #define UNI_TAV 0x05EA #define UNI_SHIN_DOT 0x05C1 #define UNI_SIN_DOT 0x05C2 #define UNI_MAPIQ 0x05BC #define UNI_SHEVA 0x05B0 #define UNI_HOLAM 0x05B9 #define UNI_QUBUTS 0x05BB #define UNI_HATAF_SEGOL 0x05B1 #define UNI_HATAF_QAMATZ 0x05B3 #define UNI_TSERE 0x05B5 #define UNI_QAMATS 0x05B8 #define UNI_QUBUTS 0x05BB /*====================================================================== // In the tables below all Hebrew characters are categorized to // one of the following four classes: // // non used entries Not defined (ND) // accents, points Non spacing (NS) // punctuation and characters Spacing characters (SP) // dagesh "Dagesh" (DA) //----------------------------------------------------------------------*/ static const gint char_class_table[128] = { /* 0, 1, 2, 3, 4, 5, 6, 7 */ /*00*/ _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, /*10*/ _ND, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, /*20*/ _NS, _NS, _ND, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, /*30*/ _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _NS, _ND, _NS, _DA, _NS, _SP, _NS, /*40*/ _SP, _NS, _NS, _SP, _NS, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, /*50*/ _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, /*60*/ _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _SP, _ND, _ND, _ND, _ND, _ND, /*70*/ _SP, _SP, _SP, _SP, _SP, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, _ND, }; static const gint char_type_table[128] = { /* 0, 1, 2, 3, 4, 5, 6, 7 */ /*00*/ __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, /*10*/ __ND, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, /*20*/ __NS, __NS, __ND, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, /*30*/ __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __NS, __ND, __NS, __DA, __NS, __SP, __NS, /*40*/ __SP, __NS, __NS, __SP, __NS, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, /*50*/ __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, /*60*/ __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __SP, __ND, __ND, __ND, __ND, __ND, /*70*/ __SP, __SP, __SP, __SP, __SP, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, __ND, }; /*====================================================================== // The following table answers the question whether two characters // are composible or not. The decision is made by looking at the // char_type_table values for the first character in a cluster // vs a following charactrer. The only three combinations that // are composible in Hebrew according to the table are: // // 1. a spacing character followed by non-spacing character // 2. a spacing character followed by a dagesh. // 3. a dagesh followed by a non-spacing character. // // Note that a spacing character may be followed by several non-spacing // accents, as the decision is always made on the base character of // a combination. //----------------------------------------------------------------------*/ static const gboolean compose_table[4][4] = { /* Cn */ /* 0, 1, 2, 3, */ /* Cn-1 00 */ { FALSE, FALSE, FALSE, FALSE }, /* 10 */ { FALSE, FALSE, TRUE, TRUE }, /* 20 */ { FALSE, FALSE, FALSE, FALSE }, /* 30 */ { FALSE, FALSE, TRUE, FALSE }, }; /* Treat all characters above 0xF000 as characters */ #define is_hebrew(wc) ((wc) >= 0x590 && (wc) < 0x600) #define is_char_class(wc, mask) (char_class_table[ucs2iso8859_8 ((wc))] & (mask)) #define is_composible(cur_wc, nxt_wc) (compose_table[char_type_table[ucs2iso8859_8 (cur_wc)]]\ [char_type_table[ucs2iso8859_8 (nxt_wc)]]) G_CONST_RETURN char * hebrew_shaper_get_next_cluster(const char *text, gint length, gunichar *cluster, gint *num_chrs) { const char *p; gint n_chars = 0; p = text; while (p < text + length && n_chars < MAX_CLUSTER_CHRS) { gunichar current = g_utf8_get_char (p); if (!ishebrew (current) || (n_chars == 0 && is_char_class(current, ~(NoDefine|SpacingLetter)))) { /* Not a legal Hebrew cluster */ if (n_chars == 0) { cluster[n_chars++] = current; p = g_utf8_next_char (p); } break; } else if (n_chars == 0 || is_composible (cluster[0], current)) { cluster[n_chars++] = current; p = g_utf8_next_char (p); } else break; } *num_chrs = n_chars; return p; } void hebrew_shaper_get_cluster_kerning(gunichar *cluster, gint cluster_length, PangoRectangle ink_rect[], /* input and output */ gint width[], gint x_offset[], gint y_offset[]) { int i; int base_ink_x_offset, base_ink_y_offset, base_ink_width, base_ink_height; gunichar base_char = cluster[0]; x_offset[0] = 0; y_offset[0] = 0; if (cluster_length == 1) { /* Make lone 'vav dot' have zero width */ if (base_char == UNI_SHIN_DOT || base_char == UNI_SIN_DOT || base_char == UNI_HOLAM ) { x_offset[0] = -ink_rect[0].x - ink_rect[0].width; width[0] = 0; } return; } base_ink_x_offset = ink_rect[0].x; base_ink_y_offset = ink_rect[0].y; base_ink_width = ink_rect[0].width; base_ink_height = ink_rect[0].height; /* Do heuristics */ for (i=1; i= 0x05D0) continue; /* Center dot of VAV */ if (gl == UNI_MAPIQ && base_char == UNI_VAV) { x_offset[i] = base_ink_x_offset - ink_rect[i].x; /* If VAV is a vertical bar without a roof, then we need to make room for the dot by increasing the cluster width. But how can I check if that is the case?? */ /* This is wild, but it does the job of differentiating between two M$ fonts... Base the decision on the aspect ratio of the vav... */ if (base_ink_height > base_ink_width * 3.5) { int j; double space = 0.7; double kern = 0.5; /* Shift all characters to make place for the mapiq */ for (j=0; j= UNI_SHEVA && gl <= UNI_QAMATS) || gl == UNI_QUBUTS) && ink_rect[i].width < base_ink_width ) { x_offset[i] = base_ink_x_offset + base_ink_width - ink_rect[i].x - ink_rect[i].width; } /* VOWELS under FINAL KAF are offset centered and offset in y */ else if ((base_char == UNI_FINAL_KAF ) && ((gl >= UNI_SHEVA && gl <= UNI_QAMATS) || gl == UNI_QUBUTS)) { /* x are at 1/3 to take into accoun the stem */ x_offset[i] = base_ink_x_offset - ink_rect[i].x + base_ink_width * 1/3 - ink_rect[i].width/2; /* Center in y */ y_offset[i] = base_ink_y_offset - ink_rect[i].y + base_ink_height * 1/2 - ink_rect[i].height/2; } /* MAPIQ in PE or FINAL PE */ else if (gl == UNI_MAPIQ && (base_char == UNI_PE || base_char == UNI_FINAL_PE)) { x_offset[i]= base_ink_x_offset - ink_rect[i].x + base_ink_width * 2/3 - ink_rect[i].width/2; /* Another option is to offset the MAPIQ in y... glyphs->glyphs[cluster_start_idx+i].geometry.y_offset -= base_ink_height/5; */ } /* MAPIQ in SHIN should be moved a bit to the right */ else if (gl == UNI_MAPIQ && base_char == UNI_SHIN) { x_offset[i]= base_ink_x_offset - ink_rect[i].x + base_ink_width * 3/5 - ink_rect[i].width/2; } /* MAPIQ in YUD is right aligned */ else if (gl == UNI_MAPIQ && base_char == UNI_YOD) { x_offset[i]= base_ink_x_offset - ink_rect[i].x; /* Lower left in y */ y_offset[i] = base_ink_y_offset - ink_rect[i].y + base_ink_height - ink_rect[i].height*1.75; if (base_ink_height > base_ink_width * 2) { int j; double space = 0.7; double kern = 0.5; /* Shift all cluster characters to make space for mapiq */ for (j=0; j= UNI_HATAF_SEGOL && gl <= UNI_HATAF_QAMATZ) || (gl >= UNI_TSERE && gl<= UNI_QAMATS) || (gl == UNI_QUBUTS))) { x_offset[i] = base_ink_x_offset + base_ink_width - ink_rect[i].x - ink_rect[i].width; } /* Center by default */ else { x_offset[i] = base_ink_x_offset - ink_rect[i].x + base_ink_width/2 - ink_rect[i].width/2; } } } void hebrew_shaper_swap_range (PangoGlyphString *glyphs, int start, int end) { int i, j; for (i = start, j = end - 1; i < j; i++, j--) { PangoGlyphInfo glyph_info; gint log_cluster; glyph_info = glyphs->glyphs[i]; glyphs->glyphs[i] = glyphs->glyphs[j]; glyphs->glyphs[j] = glyph_info; log_cluster = glyphs->log_clusters[i]; glyphs->log_clusters[i] = glyphs->log_clusters[j]; glyphs->log_clusters[j] = log_cluster; } } void hebrew_shaper_bidi_reorder(PangoGlyphString *glyphs) { int start, end; /* Swap all glyphs */ hebrew_shaper_swap_range (glyphs, 0, glyphs->num_glyphs); /* Now reorder glyphs within each cluster back to LTR */ for (start = 0; start < glyphs->num_glyphs;) { end = start; while (end < glyphs->num_glyphs && glyphs->log_clusters[end] == glyphs->log_clusters[start]) end++; hebrew_shaper_swap_range (glyphs, start, end); start = end; } }