diff options
author | Owen Taylor <otaylor@redhat.com> | 2003-08-03 21:57:35 +0000 |
---|---|---|
committer | Owen Taylor <otaylor@src.gnome.org> | 2003-08-03 21:57:35 +0000 |
commit | 2584212cd0976f5f95d9381e829917e7d2a10d28 (patch) | |
tree | 5773e9f6802f8316c5c7463aa55677babe67343c /pango/pango-script.c | |
parent | 95a8d1788e884b7d8d29d4171a1adc51c61e880e (diff) | |
download | pango-2584212cd0976f5f95d9381e829917e7d2a10d28.tar.gz |
Make PangoEngine{,Lang,Shape} GObjects, and use a GTypeModule-based
Sat Aug 2 23:19:16 2003 Owen Taylor <otaylor@redhat.com>
* pango/pango-engine.[ch] modules/*/*-{fc,win32,x}.c
pango/modules.c pango/break.c pango/pango-context.c
pango/pango-layout.c pango/pango-modules.h
pango/querymodules.c pango/shape.c: Make
PangoEngine{,Lang,Shape} GObjects, and use a
GTypeModule-based module-loading system closely based
on the one used for GtkIMContext and GtkThemeEngine.
* pango/pango-impl-utils.h: OK, I'm tired of typing
in get_type() functions.
* pango/pango-script.[ch] pango/pango-script-table.h
tests/testscript.c tools/gen-script-table.pl: Add port
of script-range code from ICU in preparation for future
use. (#91542)
* tools/gen-script-for-lang.c: Utility program to determine
the script for each fontconfig .orth file.
* docs/tmpl/{scripts.sgml,pango-engine-lang.sgml,
pango-engine-shape.sgml} docs/pango-sections.txt docs/pango-docs.sgml:
Redo to go along with the above changes.
* configure.in: chmod +x tests/runtests.sh
Diffstat (limited to 'pango/pango-script.c')
-rw-r--r-- | pango/pango-script.c | 343 |
1 files changed, 343 insertions, 0 deletions
diff --git a/pango/pango-script.c b/pango/pango-script.c new file mode 100644 index 00000000..39a95641 --- /dev/null +++ b/pango/pango-script.c @@ -0,0 +1,343 @@ +/* -*- mode: C; c-file-style: "gnu" -*- */ +/* Pango + * pango-script.c: Script tag handling + * + * Copyright (C) 2002 Red Hat Software + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Implementation of pango_script_iter is derived from ICU: + * + * icu/sources/common/usc_impl.c + * + ********************************************************************** + * Copyright (C) 1999-2002, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, and/or sell copies of the Software, and to permit persons + * to whom the Software is furnished to do so, provided that the above + * copyright notice(s) and this permission notice appear in all copies of + * the Software and that both the above copyright notice(s) and this + * permission notice appear in supporting documentation. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder + * shall not be used in advertising or otherwise to promote the sale, use + * or other dealings in this Software without prior written authorization + * of the copyright holder. + */ +#include <string.h> + +#include "pango-script.h" +#include "pango-script-table.h" + +#define PAREN_STACK_DEPTH 128 + +typedef struct _ParenStackEntry ParenStackEntry; + +struct _ParenStackEntry +{ + int pair_index; + PangoScript script_code; +}; + +struct _PangoScriptIter +{ + const gchar *text_start; + const gchar *text_end; + + const gchar *script_start; + const gchar *script_end; + PangoScript script_code; + + ParenStackEntry paren_stack[PAREN_STACK_DEPTH]; + int paren_sp; +}; + +/** + * pango_script_for_unichar: + * @ch: a unicode characters + * + * Looks up the #PangoScript for a particular character (as defined by + * Unicode Technical report #24). No check is made for @ch being + * valid unicode character; if you pass in invalid character, the + * result is undefined. + * + * Return value: the #PangoScript for the character. + **/ +PangoScript +pango_script_for_unichar (gunichar ch) +{ + int lower = 0; + int upper = G_N_ELEMENTS (pango_script_table) - 1; + + while (lower <= upper) + { + int mid = (lower + upper) / 2; + if (ch < pango_script_table[mid].start) + upper = mid - 1; + else if (ch >= pango_script_table[mid].start + pango_script_table[mid].chars) + lower = mid + 1; + else + return pango_script_table[mid].script; + } + + return PANGO_SCRIPT_COMMON; +} + +/**********************************************************************/ + +/** + * pango_script_iter_new: + * @text: a UTF-8 string + * @length: length of @text, or -1 if @text is NUL-terminated. + * + * Create a new #PangoScriptIter, used to break a string of + * Unicode into runs by text. No copy is made of @text, so + * the caller needs to make sure it remains valid until + * the iterator is freed with pango_script_iter_free ().x + * + * Return value: the newly created script iterator, initialized + * to point at the first range in the text. If the string is + * empty, it will point at an empty range. + **/ +PangoScriptIter * +pango_script_iter_new (const char *text, + int length) +{ + PangoScriptIter *iter = g_new (PangoScriptIter, 1); + + iter->text_start = text; + if (length >= 0) + iter->text_end = text + length; + else + iter->text_end = text + strlen (text); + + iter->script_start = text; + iter->script_end = text; + iter->script_code = PANGO_SCRIPT_COMMON; + + iter->paren_sp = -1; + + pango_script_iter_next (iter); + + return iter; +} + +/** + * pango_script_iter_free: + * @iter: a #PangoScriptIter + * + * Frees a #PangoScriptIter created with pango_script_iter_new(). + **/ +void +pango_script_iter_free (PangoScriptIter *iter) +{ + g_free (iter); +} + +/** + * pango_script_iter_get_range: + * @iter: a #PangoScriptIter + * @start: location to store start position of the range, or %NULL + * @end: location to store end position of the range, or %NULL + * @script: location to store script for range, or %NULL + * + * Gets information about the range to which @iter currently points. + * The range is the is the set of locations p where *start <= p < *end. + * (That is, it doesn't include the character stored at *end) + **/ +void +pango_script_iter_get_range (PangoScriptIter *iter, + G_CONST_RETURN char **start, + G_CONST_RETURN char **end, + PangoScript *script) +{ + if (start) + *start = iter->script_start; + if (end) + *end = iter->script_end; + if (script) + *script = iter->script_code; +} + +static const gunichar paired_chars[] = { + 0x0028, 0x0029, /* ascii paired punctuation */ + 0x003c, 0x003e, + 0x005b, 0x005d, + 0x007b, 0x007d, + 0x00ab, 0x00bb, /* guillemets */ + 0x2018, 0x2019, /* general punctuation */ + 0x201c, 0x201d, + 0x2039, 0x203a, + 0x3008, 0x3009, /* chinese paired punctuation */ + 0x300a, 0x300b, + 0x300c, 0x300d, + 0x300e, 0x300f, + 0x3010, 0x3011, + 0x3014, 0x3015, + 0x3016, 0x3017, + 0x3018, 0x3019, + 0x301a, 0x301b +}; + +int +get_pair_index (gunichar ch) +{ + int lower = 0; + int upper = G_N_ELEMENTS (paired_chars) - 1; + + while (lower <= upper) + { + int mid = (lower + upper) / 2; + + if (ch < paired_chars[mid]) + upper = mid - 1; + else if (ch > paired_chars[mid]) + lower = mid + 1; + else + return mid; + } + + return -1; +} + +#define REAL_SCRIPT(script) \ + ((script) > PANGO_SCRIPT_INHERITED) + +#define SAME_SCRIPT(script1, script2) \ + (!REAL_SCRIPT (script1) || !REAL_SCRIPT (script2) || (script1) == (script2)) + +#define IS_OPEN(pair_index) (((pair_index) & 1) == 0) + +/** + * pango_script_iter_next: + * @iter: a #PangoScriptIter + * + * Advances a #PangoScriptIter to the next range. If the iter + * is already at the end, it is left unchanged and %FALSE + * is returned. + * + * Return value: %TRUE if the iter was succesfully advanced. + **/ +gboolean +pango_script_iter_next (PangoScriptIter *iter) +{ + int start_sp; + + if (iter->script_end == iter->text_end) + return FALSE; + + start_sp = iter->paren_sp; + iter->script_code = PANGO_SCRIPT_COMMON; + iter->script_start = iter->script_end; + + for (; iter->script_end < iter->text_end; iter->script_end = g_utf8_next_char (iter->script_end)) + { + gunichar ch = g_utf8_get_char (iter->script_end); + PangoScript sc; + int pair_index; + + sc = pango_script_for_unichar (ch); + pair_index = get_pair_index (ch); + + /* + * Paired character handling: + * + * if it's an open character, push it onto the stack. + * if it's a close character, find the matching open on the + * stack, and use that script code. Any non-matching open + * characters above it on the stack will be poped. + */ + if (pair_index >= 0) + { + if (IS_OPEN (pair_index)) + { + /* + * If the paren stack is full, empty it. This + * means that deeply nested paired punctuation + * characters will be ignored, but that's an unusual + * case, and it's better to ignore them than to + * write off the end of the stack... + */ + if (++iter->paren_sp >= PAREN_STACK_DEPTH) + iter->paren_sp = 0; + + iter->paren_stack[iter->paren_sp].pair_index = pair_index; + iter->paren_stack[iter->paren_sp].script_code = iter->script_code; + } + else if (iter->paren_sp >= 0) + { + int pi = pair_index & ~1; + + while (iter->paren_sp >= 0 && iter->paren_stack[iter->paren_sp].pair_index != pi) + iter->paren_sp--; + + if (iter->paren_sp < start_sp) + start_sp = iter->paren_sp; + + if (iter->paren_sp >= 0) + sc = iter->paren_stack[iter->paren_sp].script_code; + } + } + + if (SAME_SCRIPT (iter->script_code, sc)) + { + if (!REAL_SCRIPT (iter->script_code) && REAL_SCRIPT (sc)) + { + iter->script_code = sc; + + /* + * now that we have a final script code, fix any open + * characters we pushed before we knew the script code. + */ + while (start_sp < iter->paren_sp) + iter->paren_stack[++start_sp].script_code = iter->script_code; + } + + /* + * if this character is a close paired character, + * pop it from the stack + */ + if (pair_index >= 0 && !IS_OPEN (pair_index) != 0 && iter->paren_sp >= 0) + { + iter->paren_sp--; + start_sp--; + } + } + else + { + /* Different script, we're done */ + break; + } + } + + return TRUE; +} |