From f9cae2d7edabf388e67b40962a9b1057d4e42613 Mon Sep 17 00:00:00 2001 From: Changwoo Ryu Date: Tue, 19 Dec 2006 18:05:05 +0000 Subject: Optimizing for non-old Hangul users; no more waste of decompose and 2006-12-20 Changwoo Ryu * modules/hangul/hangul-defs.h: * modules/hangul/hangul-fc.c (hangul_engine_shape): Optimizing for non-old Hangul users; no more waste of decompose and compose. (render_syllable): The string argument is now in UTF-8. Normalization is now done only when the entire sequence is equivalent to a precomposed syllable. (render_isolated_tone): Conditioned out tone mark's dummy base character drawing. --- ChangeLog | 12 +++ modules/hangul/hangul-defs.h | 15 ++- modules/hangul/hangul-fc.c | 212 +++++++++++++++++++++++++++---------------- 3 files changed, 158 insertions(+), 81 deletions(-) diff --git a/ChangeLog b/ChangeLog index b7a60cc7..64834ea5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2006-12-20 Changwoo Ryu + + * modules/hangul/hangul-defs.h: + * modules/hangul/hangul-fc.c + (hangul_engine_shape): Optimizing for non-old Hangul users; no + more waste of decompose and compose. + (render_syllable): The string argument is now in UTF-8. Normalization + is now done only when the entire sequence is equivalent to a + precomposed syllable. + (render_isolated_tone): Conditioned out tone mark's dummy base + character drawing. + 2006-12-17 Behdad Esfahbod * pango/pangocairo-render.c (pango_cairo_renderer_draw_glyphs): diff --git a/modules/hangul/hangul-defs.h b/modules/hangul/hangul-defs.h index 1c6512f8..8cef8ba8 100644 --- a/modules/hangul/hangul-defs.h +++ b/modules/hangul/hangul-defs.h @@ -1,7 +1,7 @@ /* Pango * hangul-defs.h: * - * Copyright (C) 2002-2005 Changwoo Ryu + * Copyright (C) 2002-2006 Changwoo Ryu * Author: Changwoo Ryu * * This library is free software; you can redistribute it and/or @@ -70,6 +70,19 @@ /* if a syllable has a jongseong */ #define S_HAS_T(s) (((s) - SBASE) % TCOUNT) +/* non hangul */ +#define IS_HANGUL(wc) (IS_S(wc) || IS_JAMO(wc) || IS_M(wc)) + +/* syllable boundary condition */ +#define IS_BOUNDARY(prev,next) \ + ((!IS_L(prev) && IS_S(wc)) || \ + !IS_HANGUL(next) || \ + (IS_S(prev) && S_HAS_T(prev) && IS_L(next)) || \ + (IS_T(prev) && (IS_L(next) || IS_V(next))) || \ + (IS_S(prev) && !S_HAS_T(prev) && IS_L(next)) || \ + (IS_V(prev) && IS_L(next)) || \ + IS_M(prev)) + /* composing/decomposing */ #define S_FROM_LVT(l,v,t) (SBASE + (((l) - LBASE) * VCOUNT + ((v) - VBASE)) * TCOUNT + ((t) - TBASE)) #define S_FROM_LV(l,v) (SBASE + (((l) - LBASE) * VCOUNT + ((v) - VBASE)) * TCOUNT) diff --git a/modules/hangul/hangul-fc.c b/modules/hangul/hangul-fc.c index 8b85c19a..9828ef8f 100644 --- a/modules/hangul/hangul-fc.c +++ b/modules/hangul/hangul-fc.c @@ -1,7 +1,7 @@ /* Pango * hangul-fc.c: Hangul shaper for FreeType based backends * - * Copyright (C) 2002-2004 Changwoo Ryu + * Copyright (C) 2002-2006 Changwoo Ryu * Author: Changwoo Ryu * * This library is free software; you can redistribute it and/or @@ -127,8 +127,6 @@ set_glyph_tone (PangoFont *font, PangoGlyphString *glyphs, int i, #define find_char(font,wc) \ pango_fc_font_get_glyph((PangoFcFont *)font, wc) -#define get_unknown_glyph(font,wc) \ - PANGO_GET_UNKNOWN_GLYPH ( wc) static void render_tone (PangoFont *font, gunichar tone, PangoGlyphString *glyphs, @@ -152,7 +150,7 @@ render_tone (PangoFont *font, gunichar tone, PangoGlyphString *glyphs, } else set_glyph (font, glyphs, *n_glyphs, cluster_offset, - get_unknown_glyph (font, tone)); + PANGO_GET_UNKNOWN_GLYPH (tone)); } (*n_glyphs)++; } @@ -164,6 +162,7 @@ static void render_isolated_tone (PangoFont *font, gunichar tone, PangoGlyphString *glyphs, int *n_glyphs, int cluster_offset) { +#if 0 /* FIXME: what kind of hack is it? it draws dummy glyphs. */ /* Find a base character to render the mark on */ int index = find_char (font, 0x25cc); /* DOTTED CIRCLE */ @@ -172,71 +171,151 @@ render_isolated_tone (PangoFont *font, gunichar tone, PangoGlyphString *glyphs, if (!index) index = find_char (font, ' '); /* Space */ if (!index) /* Unknown glyph box with 0000 in it */ - index = find_char (font, get_unknown_glyph (font, 0)); + index = find_char (font, PANGO_GET_UNKNOWN_GLYPH (0)); /* Add the base character */ pango_glyph_string_set_size (glyphs, *n_glyphs + 1); set_glyph (font, glyphs, *n_glyphs, cluster_offset, index); (*n_glyphs)++; +#endif - /* And the tone mrak + /* And the tone mark */ render_tone(font, tone, glyphs, n_glyphs, cluster_offset); } static void -render_syllable (PangoFont *font, gunichar *text, int length, +render_syllable (PangoFont *font, const char *str, int length, PangoGlyphString *glyphs, int *n_glyphs, int cluster_offset) { int n_prev_glyphs = *n_glyphs; int index; - gunichar wc, tone; - int i, j, composed; - - if (IS_M (text[length - 1])) + gunichar wc = 0, tone = 0, text[4]; + int i, j, composed = 0; + const char *p; + + /* Normalize it only when the entire sequence is equivalent to a + * precomposed syllable. It's usually better than prefix + * normalization both for poor-featured fonts and for smart fonts. + * I have seen no smart font which can render S+T as a syllable + * form. + */ + + if (length == 3 || length == 4) { - tone = text[length - 1]; - length--; + p = str; + text[0] = g_utf8_get_char(p); + p = g_utf8_next_char(p); + text[1] = g_utf8_get_char(p); + p = g_utf8_next_char(p); + text[2] = g_utf8_get_char(p); + + if (length == 4 && !IS_M(g_utf8_get_char(g_utf8_next_char(p)))) + goto lvt_out; /* draw the tone mark later */ + + if (IS_L_S(text[0]) && IS_V_S(text[1]) && IS_T_S(text[2])) + { + composed = 3; + wc = S_FROM_LVT(text[0], text[1], text[2]); + str = g_utf8_next_char(p); + goto normalize_out; + } } - else - tone = 0; + lvt_out: - if (length >= 3 && IS_L_S(text[0]) && IS_V_S(text[1]) && IS_T_S(text[2])) - composed = 3; - else if (length >= 2 && IS_L_S(text[0]) && IS_V_S(text[1])) - composed = 2; - else - composed = 0; + if (length == 2 || length == 3) + { + p = str; + text[0] = g_utf8_get_char(p); + p = g_utf8_next_char(p); + text[1] = g_utf8_get_char(p); + + if (length == 3 && !IS_M(g_utf8_get_char(g_utf8_next_char(p)))) + goto lv_out; /* draw the tone mark later */ + if (IS_L_S(text[0]) && IS_V_S(text[1])) + { + composed = 2; + wc = S_FROM_LV(text[0], text[1]); + str = g_utf8_next_char(p); + } + else if (IS_S(text[0] && !S_HAS_T(text[0]) && IS_T_S(text[1]))) + { + composed = 2; + wc = text[0] + (text[1] - TBASE); + str = g_utf8_next_char(p); + goto normalize_out; + } + } + lv_out: + normalize_out: if (composed) { - if (composed == 3) - wc = S_FROM_LVT(text[0], text[1], text[2]); - else - wc = S_FROM_LV(text[0], text[1]); index = find_char (font, wc); pango_glyph_string_set_size (glyphs, *n_glyphs + 1); if (!index) set_glyph (font, glyphs, *n_glyphs, cluster_offset, - get_unknown_glyph (font, wc)); + PANGO_GET_UNKNOWN_GLYPH (wc)); else set_glyph (font, glyphs, *n_glyphs, cluster_offset, index); (*n_glyphs)++; - text += composed; length -= composed; } /* Render the remaining text as uncomposed forms as a fallback. */ - for (i = 0; i < length; i++) + for (i = 0; i < length; i++, str = g_utf8_next_char(str)) { int jindex; int oldlen; - if (text[i] == LFILL || text[i] == VFILL) + wc = g_utf8_get_char(str); + + if (wc == LFILL || wc == VFILL) continue; - index = find_char (font, text[i]); + if (IS_M(wc)) + { + tone = wc; + break; + } + + if (IS_S(wc)) + { + oldlen = *n_glyphs; + + text[0] = L_FROM_S(wc); + text[1] = V_FROM_S(wc); + if (S_HAS_T(wc)) + { + text[2] = T_FROM_S(wc); + composed = 3; + } + else + composed = 2; + + for (j = 0; j < composed; j++) + { + index = find_char (font, text[j]); + if (index) + { + pango_glyph_string_set_size (glyphs, *n_glyphs + 1); + set_glyph (font, glyphs, *n_glyphs, cluster_offset, index); + (*n_glyphs)++; + } + else + goto decompose_cancel; + } + + continue; + + decompose_cancel: + /* The font doesn't have jamos. Cancel it. */ + *n_glyphs = oldlen; + pango_glyph_string_set_size (glyphs, *n_glyphs); + } + + index = find_char (font, wc); if (index) { pango_glyph_string_set_size (glyphs, *n_glyphs + 1); @@ -247,7 +326,7 @@ render_syllable (PangoFont *font, gunichar *text, int length, /* This font has no glyphs on the Hangul Jamo area! Find a fallback from the Hangul Compatibility Jamo area. */ - jindex = text[i] - LBASE; + jindex = wc - LBASE; oldlen = *n_glyphs; for (j = 0; j < 3 && (__jamo_to_ksc5601[jindex][j] != 0); j++) { @@ -259,7 +338,7 @@ render_syllable (PangoFont *font, gunichar *text, int length, *n_glyphs = oldlen; pango_glyph_string_set_size (glyphs, *n_glyphs + 1); set_glyph (font, glyphs, *n_glyphs, cluster_offset, - get_unknown_glyph (font, text[i])); + PANGO_GET_UNKNOWN_GLYPH (text[i])); (*n_glyphs)++; break; } @@ -270,11 +349,11 @@ render_syllable (PangoFont *font, gunichar *text, int length, } if (n_prev_glyphs == *n_glyphs) { - index = find_char (font, 0x3164); + index = find_char (font, 0x3164); /* U+3164 HANGUL FILLER */ pango_glyph_string_set_size (glyphs, *n_glyphs + 1); if (!index) set_glyph (font, glyphs, *n_glyphs, cluster_offset, - get_unknown_glyph (font, index)); + PANGO_GET_UNKNOWN_GLYPH (index)); else set_glyph (font, glyphs, *n_glyphs, cluster_offset, index); glyphs->log_clusters[*n_glyphs] = cluster_offset; @@ -303,7 +382,7 @@ render_basic (PangoFont *font, gunichar wc, if (index) set_glyph (font, glyphs, *n_glyphs, cluster_offset, index); else - set_glyph (font, glyphs, *n_glyphs, cluster_offset, get_unknown_glyph (font, wc)); + set_glyph (font, glyphs, *n_glyphs, cluster_offset, PANGO_GET_UNKNOWN_GLYPH (wc)); } (*n_glyphs)++; } @@ -316,16 +395,14 @@ hangul_engine_shape (PangoEngineShape *engine, const PangoAnalysis *analysis, PangoGlyphString *glyphs) { - int n_chars, n_glyphs; + int n_chars = g_utf8_strlen (text, length); + int n_glyphs; int i; const char *p, *start; - gunichar jamos_static[8]; - gint max_jamos = G_N_ELEMENTS (jamos_static); - gunichar *jamos = jamos_static; int n_jamos; + gunichar prev = 0; - n_chars = g_utf8_strlen (text, length); n_glyphs = 0; start = p = text; n_jamos = 0; @@ -337,48 +414,26 @@ hangul_engine_shape (PangoEngineShape *engine, wc = g_utf8_get_char (p); /* Check syllable boundaries. */ - if (n_jamos) + if (n_jamos && IS_BOUNDARY (prev, wc)) { - gunichar prev = jamos[n_jamos - 1]; - if ((!IS_JAMO (wc) && !IS_S (wc) && !IS_M (wc)) || - (!IS_L (prev) && IS_S (wc)) || - (IS_T (prev) && IS_L (wc)) || - (IS_V (prev) && IS_L (wc)) || - (IS_T (prev) && IS_V (wc)) || - IS_M (prev)) - { - /* Draw a syllable with these jamos. */ - render_syllable (font, jamos, n_jamos, glyphs, - &n_glyphs, start - text); - n_jamos = 0; - start = p; - } - } - - if (n_jamos >= max_jamos - 3) - { - max_jamos += 8; /* at most 3 for each syllable code (L+V+T) */ - if (jamos == jamos_static) - { - jamos = g_new (gunichar, max_jamos); - memcpy (jamos, jamos_static, n_jamos*sizeof(gunichar)); - } + if (n_jamos == 1 && IS_S (prev)) + /* common case which the most people use */ + render_basic (font, prev, glyphs, &n_glyphs, start - text); else - jamos = g_renew (gunichar, jamos, max_jamos); + /* possibly complex composition */ + render_syllable (font, start, n_jamos, glyphs, + &n_glyphs, start - text); + n_jamos = 0; + start = p; } - if (!IS_JAMO (wc) && !IS_S (wc) && !IS_M (wc)) + prev = wc; + + if (!IS_HANGUL (wc)) { render_basic (font, wc, glyphs, &n_glyphs, start - text); start = g_utf8_next_char (p); } - else if (IS_S (wc)) - { - jamos[n_jamos++] = L_FROM_S (wc); - jamos[n_jamos++] = V_FROM_S (wc); - if (S_HAS_T (wc)) - jamos[n_jamos++] = T_FROM_S (wc); - } else if (IS_M (wc) && !n_jamos) { /* Tone mark not following syllable */ @@ -386,16 +441,13 @@ hangul_engine_shape (PangoEngineShape *engine, start = g_utf8_next_char (p); } else - jamos[n_jamos++] = wc; + n_jamos++; p = g_utf8_next_char (p); } - if (n_jamos != 0) - render_syllable (font, jamos, n_jamos, glyphs, &n_glyphs, + if (n_jamos > 0) + render_syllable (font, start, n_jamos, glyphs, &n_glyphs, start - text); - - if (jamos != jamos_static) - g_free(jamos); } static void -- cgit v1.2.1