diff options
author | Federico Mena Quintero <federico@ximian.com> | 2005-11-08 04:36:11 +0000 |
---|---|---|
committer | Federico Mena Quintero <federico@src.gnome.org> | 2005-11-08 04:36:11 +0000 |
commit | f03772dc687d1b157036ebe65872bae6a5507cbf (patch) | |
tree | 96876ae035a576e75e9393f7acd4e0db061461f3 /pango/pango-script.c | |
parent | d7157323acccf40ca481aa43be8b1a71bd1f79ac (diff) | |
download | pango-f03772dc687d1b157036ebe65872bae6a5507cbf.tar.gz |
Fixes bug #320666:
2005-11-07 Federico Mena Quintero <federico@ximian.com>
Fixes bug #320666:
Instead of doing a bsearch() for every gunichar to map it to a
PangoScript, use a precomputed table for the first 8192 code
points. Also, remember the last script that we computed on each
invocation; this will also help CJK and the other scripts above
U+2000.
This table also holds information on whether the characters in it
are paired characters. We can use this to avoid doing the
expensive get_pair_index() call most of the time.
Many thanks to Matthias Clasen for his suggestions for this patch.
* tools/gen-easy-scripts-table.c: New program to generate
pango_easy_scripts_table.
* tools/Makefile.am: Build gen-easy-scripts-table.
* pango/pango-easy-scripts-table.h: New file with a mapping of the
first 8192 Unicode characters to their corresponding scripts. The
table also says whether each character has a paired char or not.
* pango/Makefile.am (libpango_1_0_la_SOURCES): Add pango-easy-scripts-table.h.
* pango/pango-script-table.h: Remove everything below U+2000, and
add a note to that effect.
* pango/pango-script.c (pango_script_for_unichar_with_last_index):
New function. This is the old pango_script_for_unichar(), but it
lets the caller keep around the computed index in
pango_script_table. This works under the assumption that a
character is likely to be in the same script block as the
preceding character in a string.
(pango_script_for_unichar): First, do a quick check against the
pango_easy_scripts_table. Then, do the expensive check with
pango_script_for_unichar_with_last_index().
(pango_script_iter_next): If the character is within the easy
script range, find out if it is a paired character by using
PANGO_PAIRED_CHAR_FLAG.
(struct _PangoScriptIter): Add a last_index_for_script_lookup
field. We use this to maintain the last-lookup index from
pango_script_for_unichar_with_last_index().
(pango_script_iter_next): If the character is not within the easy
script range, use pango_script_for_unichar_with_last_index(), and
store the index in the last_index_for_script_lookup field of the
PangoScriptIter.
Diffstat (limited to 'pango/pango-script.c')
-rw-r--r-- | pango/pango-script.c | 72 |
1 files changed, 54 insertions, 18 deletions
diff --git a/pango/pango-script.c b/pango/pango-script.c index 06839406..abf9efe2 100644 --- a/pango/pango-script.c +++ b/pango/pango-script.c @@ -58,6 +58,7 @@ #include "pango-script.h" #include "pango-script-table.h" +#include "pango-easy-scripts-table.h" #define PAREN_STACK_DEPTH 128 @@ -80,8 +81,37 @@ struct _PangoScriptIter ParenStackEntry paren_stack[PAREN_STACK_DEPTH]; int paren_sp; + + int last_index_for_script_lookup; }; +#define PANGO_SCRIPT_TABLE_MIDPOINT (G_N_ELEMENTS (pango_script_table) / 2) + +static PangoScript +pango_script_for_unichar_with_last_index (gunichar ch, int *last_index) +{ + int lower = 0; + int upper = G_N_ELEMENTS (pango_script_table) - 1; + int mid = *last_index; + + do + { + if (ch < pango_script_table[mid].start) + upper = mid - 1; + else if (ch >= pango_script_table[mid].start + pango_script_table[mid].chars) + lower = mid + 1; + else + { + *last_index = mid; + return pango_script_table[mid].script; + } + mid = (lower + upper) / 2; + } + while (lower <= upper); + + return PANGO_SCRIPT_COMMON; +} + /** * pango_script_for_unichar: * @ch: a unicode characters @@ -96,21 +126,12 @@ struct _PangoScriptIter PangoScript pango_script_for_unichar (gunichar ch) { - int lower = 0; - int upper = G_N_ELEMENTS (pango_script_table) - 1; + int index = PANGO_SCRIPT_TABLE_MIDPOINT; - while (lower <= upper) - { - int mid = (lower + upper) / 2; - if (ch < pango_script_table[mid].start) - upper = mid - 1; - else if (ch >= pango_script_table[mid].start + pango_script_table[mid].chars) - lower = mid + 1; - else - return pango_script_table[mid].script; - } - - return PANGO_SCRIPT_COMMON; + if (ch < PANGO_EASY_SCRIPTS_RANGE) + return pango_easy_scripts_table[ch] & PANGO_EASY_SCRIPTS_MASK; + else + return pango_script_for_unichar_with_last_index (ch, &index); } /**********************************************************************/ @@ -146,6 +167,7 @@ pango_script_iter_new (const char *text, iter->script_code = PANGO_SCRIPT_COMMON; iter->paren_sp = -1; + iter->last_index_for_script_lookup = PANGO_SCRIPT_TABLE_MIDPOINT; pango_script_iter_next (iter); @@ -259,15 +281,29 @@ pango_script_iter_next (PangoScriptIter *iter) start_sp = iter->paren_sp; iter->script_code = PANGO_SCRIPT_COMMON; iter->script_start = iter->script_end; - + for (; iter->script_end < iter->text_end; iter->script_end = g_utf8_next_char (iter->script_end)) { gunichar ch = g_utf8_get_char (iter->script_end); PangoScript sc; int pair_index; - - sc = pango_script_for_unichar (ch); - pair_index = get_pair_index (ch); + + if (ch < PANGO_EASY_SCRIPTS_RANGE) + { + sc = pango_easy_scripts_table[ch] & PANGO_EASY_SCRIPTS_MASK; + if ((pango_easy_scripts_table[ch] & PANGO_PAIRED_CHAR_FLAG) == 0) + pair_index = -1; + else + pair_index = get_pair_index (ch); + } + else + { + sc = pango_script_for_unichar_with_last_index (ch, &iter->last_index_for_script_lookup); + if (sc != PANGO_SCRIPT_COMMON) + pair_index = -1; + else + pair_index = get_pair_index (ch); + } /* * Paired character handling: |