summaryrefslogtreecommitdiff
path: root/pango/pango-script.c
diff options
context:
space:
mode:
authorFederico Mena Quintero <federico@ximian.com>2005-11-08 04:36:11 +0000
committerFederico Mena Quintero <federico@src.gnome.org>2005-11-08 04:36:11 +0000
commitf03772dc687d1b157036ebe65872bae6a5507cbf (patch)
tree96876ae035a576e75e9393f7acd4e0db061461f3 /pango/pango-script.c
parentd7157323acccf40ca481aa43be8b1a71bd1f79ac (diff)
downloadpango-f03772dc687d1b157036ebe65872bae6a5507cbf.tar.gz
Fixes bug #320666:
2005-11-07 Federico Mena Quintero <federico@ximian.com> Fixes bug #320666: Instead of doing a bsearch() for every gunichar to map it to a PangoScript, use a precomputed table for the first 8192 code points. Also, remember the last script that we computed on each invocation; this will also help CJK and the other scripts above U+2000. This table also holds information on whether the characters in it are paired characters. We can use this to avoid doing the expensive get_pair_index() call most of the time. Many thanks to Matthias Clasen for his suggestions for this patch. * tools/gen-easy-scripts-table.c: New program to generate pango_easy_scripts_table. * tools/Makefile.am: Build gen-easy-scripts-table. * pango/pango-easy-scripts-table.h: New file with a mapping of the first 8192 Unicode characters to their corresponding scripts. The table also says whether each character has a paired char or not. * pango/Makefile.am (libpango_1_0_la_SOURCES): Add pango-easy-scripts-table.h. * pango/pango-script-table.h: Remove everything below U+2000, and add a note to that effect. * pango/pango-script.c (pango_script_for_unichar_with_last_index): New function. This is the old pango_script_for_unichar(), but it lets the caller keep around the computed index in pango_script_table. This works under the assumption that a character is likely to be in the same script block as the preceding character in a string. (pango_script_for_unichar): First, do a quick check against the pango_easy_scripts_table. Then, do the expensive check with pango_script_for_unichar_with_last_index(). (pango_script_iter_next): If the character is within the easy script range, find out if it is a paired character by using PANGO_PAIRED_CHAR_FLAG. (struct _PangoScriptIter): Add a last_index_for_script_lookup field. We use this to maintain the last-lookup index from pango_script_for_unichar_with_last_index(). (pango_script_iter_next): If the character is not within the easy script range, use pango_script_for_unichar_with_last_index(), and store the index in the last_index_for_script_lookup field of the PangoScriptIter.
Diffstat (limited to 'pango/pango-script.c')
-rw-r--r--pango/pango-script.c72
1 files changed, 54 insertions, 18 deletions
diff --git a/pango/pango-script.c b/pango/pango-script.c
index 06839406..abf9efe2 100644
--- a/pango/pango-script.c
+++ b/pango/pango-script.c
@@ -58,6 +58,7 @@
#include "pango-script.h"
#include "pango-script-table.h"
+#include "pango-easy-scripts-table.h"
#define PAREN_STACK_DEPTH 128
@@ -80,8 +81,37 @@ struct _PangoScriptIter
ParenStackEntry paren_stack[PAREN_STACK_DEPTH];
int paren_sp;
+
+ int last_index_for_script_lookup;
};
+#define PANGO_SCRIPT_TABLE_MIDPOINT (G_N_ELEMENTS (pango_script_table) / 2)
+
+static PangoScript
+pango_script_for_unichar_with_last_index (gunichar ch, int *last_index)
+{
+ int lower = 0;
+ int upper = G_N_ELEMENTS (pango_script_table) - 1;
+ int mid = *last_index;
+
+ do
+ {
+ if (ch < pango_script_table[mid].start)
+ upper = mid - 1;
+ else if (ch >= pango_script_table[mid].start + pango_script_table[mid].chars)
+ lower = mid + 1;
+ else
+ {
+ *last_index = mid;
+ return pango_script_table[mid].script;
+ }
+ mid = (lower + upper) / 2;
+ }
+ while (lower <= upper);
+
+ return PANGO_SCRIPT_COMMON;
+}
+
/**
* pango_script_for_unichar:
* @ch: a unicode characters
@@ -96,21 +126,12 @@ struct _PangoScriptIter
PangoScript
pango_script_for_unichar (gunichar ch)
{
- int lower = 0;
- int upper = G_N_ELEMENTS (pango_script_table) - 1;
+ int index = PANGO_SCRIPT_TABLE_MIDPOINT;
- while (lower <= upper)
- {
- int mid = (lower + upper) / 2;
- if (ch < pango_script_table[mid].start)
- upper = mid - 1;
- else if (ch >= pango_script_table[mid].start + pango_script_table[mid].chars)
- lower = mid + 1;
- else
- return pango_script_table[mid].script;
- }
-
- return PANGO_SCRIPT_COMMON;
+ if (ch < PANGO_EASY_SCRIPTS_RANGE)
+ return pango_easy_scripts_table[ch] & PANGO_EASY_SCRIPTS_MASK;
+ else
+ return pango_script_for_unichar_with_last_index (ch, &index);
}
/**********************************************************************/
@@ -146,6 +167,7 @@ pango_script_iter_new (const char *text,
iter->script_code = PANGO_SCRIPT_COMMON;
iter->paren_sp = -1;
+ iter->last_index_for_script_lookup = PANGO_SCRIPT_TABLE_MIDPOINT;
pango_script_iter_next (iter);
@@ -259,15 +281,29 @@ pango_script_iter_next (PangoScriptIter *iter)
start_sp = iter->paren_sp;
iter->script_code = PANGO_SCRIPT_COMMON;
iter->script_start = iter->script_end;
-
+
for (; iter->script_end < iter->text_end; iter->script_end = g_utf8_next_char (iter->script_end))
{
gunichar ch = g_utf8_get_char (iter->script_end);
PangoScript sc;
int pair_index;
-
- sc = pango_script_for_unichar (ch);
- pair_index = get_pair_index (ch);
+
+ if (ch < PANGO_EASY_SCRIPTS_RANGE)
+ {
+ sc = pango_easy_scripts_table[ch] & PANGO_EASY_SCRIPTS_MASK;
+ if ((pango_easy_scripts_table[ch] & PANGO_PAIRED_CHAR_FLAG) == 0)
+ pair_index = -1;
+ else
+ pair_index = get_pair_index (ch);
+ }
+ else
+ {
+ sc = pango_script_for_unichar_with_last_index (ch, &iter->last_index_for_script_lookup);
+ if (sc != PANGO_SCRIPT_COMMON)
+ pair_index = -1;
+ else
+ pair_index = get_pair_index (ch);
+ }
/*
* Paired character handling: