summaryrefslogtreecommitdiff
path: root/pango2/pango-script.c
diff options
context:
space:
mode:
Diffstat (limited to 'pango2/pango-script.c')
-rw-r--r--pango2/pango-script.c373
1 files changed, 373 insertions, 0 deletions
diff --git a/pango2/pango-script.c b/pango2/pango-script.c
new file mode 100644
index 00000000..e65c6265
--- /dev/null
+++ b/pango2/pango-script.c
@@ -0,0 +1,373 @@
+/* Pango2
+ * pango-script.c: Script tag handling
+ *
+ * Copyright (C) 2002 Red Hat Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Implementation of pango2_script_iter is derived from ICU:
+ *
+ * icu/sources/common/usc_impl.c
+ *
+ **********************************************************************
+ * Copyright (C) 1999-2002, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, and/or sell copies of the Software, and to permit persons
+ * to whom the Software is furnished to do so, provided that the above
+ * copyright notice(s) and this permission notice appear in all copies of
+ * the Software and that both the above copyright notice(s) and this
+ * permission notice appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+ * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder
+ * shall not be used in advertising or otherwise to promote the sale, use
+ * or other dealings in this Software without prior written authorization
+ * of the copyright holder.
+ */
+
+#include "config.h"
+#include <stdlib.h>
+#include <string.h>
+
+#include "pango-script.h"
+#include "pango-script-private.h"
+
+
+/**********************************************************************/
+
+static Pango2ScriptIter *pango2_script_iter_copy (Pango2ScriptIter *iter);
+
+G_DEFINE_BOXED_TYPE (Pango2ScriptIter,
+ pango2_script_iter,
+ pango2_script_iter_copy,
+ pango2_script_iter_free)
+
+Pango2ScriptIter *
+_pango2_script_iter_init (Pango2ScriptIter *iter,
+ const char *text,
+ int length)
+{
+ iter->text_start = text;
+ if (length >= 0)
+ iter->text_end = text + length;
+ else
+ iter->text_end = text + strlen (text);
+
+ iter->script_start = text;
+ iter->script_end = text;
+ iter->script_code = G_UNICODE_SCRIPT_COMMON;
+
+ iter->paren_sp = -1;
+
+ pango2_script_iter_next (iter);
+
+ return iter;
+}
+
+/**
+ * pango2_script_iter_new:
+ * @text: a UTF-8 string
+ * @length: length of @text, or -1 if @text is nul-terminated
+ *
+ * Create a new `Pango2ScriptIter`, used to break a string of
+ * Unicode text into runs by Unicode script.
+ *
+ * No copy is made of @text, so the caller needs to make
+ * sure it remains valid until the iterator is freed with
+ * [method@Pango2.ScriptIter.free].
+ *
+ * Return value: the new script iterator, initialized
+ * to point at the first range in the text, which should be
+ * freed with [method@Pango2.ScriptIter.free]. If the string is
+ * empty, it will point at an empty range.
+ */
+Pango2ScriptIter *
+pango2_script_iter_new (const char *text,
+ int length)
+{
+ return _pango2_script_iter_init (g_slice_new (Pango2ScriptIter), text, length);
+}
+
+static Pango2ScriptIter *
+pango2_script_iter_copy (Pango2ScriptIter *iter)
+{
+ return g_slice_dup (Pango2ScriptIter, iter);
+}
+
+void
+_pango2_script_iter_fini (Pango2ScriptIter *iter)
+{
+}
+
+/**
+ * pango2_script_iter_free:
+ * @iter: a `Pango2ScriptIter`
+ *
+ * Frees a `Pango2ScriptIter`.
+ */
+void
+pango2_script_iter_free (Pango2ScriptIter *iter)
+{
+ _pango2_script_iter_fini (iter);
+ g_slice_free (Pango2ScriptIter, iter);
+}
+
+/**
+ * pango2_script_iter_get_range:
+ * @iter: a `Pango2ScriptIter`
+ * @start: (out) (optional): location to store start position of the range
+ * @end: (out) (optional): location to store end position of the range
+ * @script: (out) (optional): location to store script for range
+ *
+ * Gets information about the range to which @iter currently points.
+ *
+ * The range is the set of locations p where *start <= p < *end.
+ * (That is, it doesn't include the character stored at *end)
+ */
+void
+pango2_script_iter_get_range (Pango2ScriptIter *iter,
+ const char **start,
+ const char **end,
+ GUnicodeScript *script)
+{
+ if (start)
+ *start = iter->script_start;
+ if (end)
+ *end = iter->script_end;
+ if (script)
+ *script = iter->script_code;
+}
+
+static const gunichar paired_chars[] = {
+ 0x0028, 0x0029, /* ascii paired punctuation */
+ 0x003c, 0x003e,
+ 0x005b, 0x005d,
+ 0x007b, 0x007d,
+ 0x00ab, 0x00bb, /* guillemets */
+ 0x0f3a, 0x0f3b, /* tibetan */
+ 0x0f3c, 0x0f3d,
+ 0x169b, 0x169c, /* ogham */
+ 0x2018, 0x2019, /* general punctuation */
+ 0x201c, 0x201d,
+ 0x2039, 0x203a,
+ 0x2045, 0x2046,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x27e6, 0x27e7, /* math */
+ 0x27e8, 0x27e9,
+ 0x27ea, 0x27eb,
+ 0x27ec, 0x27ed,
+ 0x27ee, 0x27ef,
+ 0x2983, 0x2984,
+ 0x2985, 0x2986,
+ 0x2987, 0x2988,
+ 0x2989, 0x298a,
+ 0x298b, 0x298c,
+ 0x298d, 0x298e,
+ 0x298f, 0x2990,
+ 0x2991, 0x2992,
+ 0x2993, 0x2994,
+ 0x2995, 0x2996,
+ 0x2997, 0x2998,
+ 0x29fc, 0x29fd,
+ 0x2e02, 0x2e03,
+ 0x2e04, 0x2e05,
+ 0x2e09, 0x2e0a,
+ 0x2e0c, 0x2e0d,
+ 0x2e1c, 0x2e1d,
+ 0x2e20, 0x2e21,
+ 0x2e22, 0x2e23,
+ 0x2e24, 0x2e25,
+ 0x2e26, 0x2e27,
+ 0x2e28, 0x2e29,
+ 0x3008, 0x3009, /* chinese paired punctuation */
+ 0x300a, 0x300b,
+ 0x300c, 0x300d,
+ 0x300e, 0x300f,
+ 0x3010, 0x3011,
+ 0x3014, 0x3015,
+ 0x3016, 0x3017,
+ 0x3018, 0x3019,
+ 0x301a, 0x301b,
+ 0xfe59, 0xfe5a,
+ 0xfe5b, 0xfe5c,
+ 0xfe5d, 0xfe5e,
+ 0xff08, 0xff09,
+ 0xff3b, 0xff3d,
+ 0xff5b, 0xff5d,
+ 0xff5f, 0xff60,
+ 0xff62, 0xff63
+};
+
+static int
+get_pair_index (gunichar ch)
+{
+ int lower = 0;
+ int upper = G_N_ELEMENTS (paired_chars) - 1;
+
+ while (lower <= upper)
+ {
+ int mid = (lower + upper) / 2;
+
+ if (ch < paired_chars[mid])
+ upper = mid - 1;
+ else if (ch > paired_chars[mid])
+ lower = mid + 1;
+ else
+ return mid;
+ }
+
+ return -1;
+}
+
+/* duplicated in pango-language.c */
+#define REAL_SCRIPT(script) \
+ ((script) > G_UNICODE_SCRIPT_INHERITED && (script) != G_UNICODE_SCRIPT_UNKNOWN)
+
+#define SAME_SCRIPT(script1, script2) \
+ (!REAL_SCRIPT (script1) || !REAL_SCRIPT (script2) || (script1) == (script2))
+
+#define IS_OPEN(pair_index) (((pair_index) & 1) == 0)
+
+/**
+ * pango2_script_iter_next:
+ * @iter: a `Pango2ScriptIter`
+ *
+ * Advances a `Pango2ScriptIter` to the next range.
+ *
+ * If @iter is already at the end, it is left unchanged
+ * and %FALSE is returned.
+ *
+ * Return value: %TRUE if @iter was successfully advanced
+ */
+gboolean
+pango2_script_iter_next (Pango2ScriptIter *iter)
+{
+ int start_sp;
+
+ if (iter->script_end == iter->text_end)
+ return FALSE;
+
+ start_sp = iter->paren_sp;
+ iter->script_code = G_UNICODE_SCRIPT_COMMON;
+ iter->script_start = iter->script_end;
+
+ for (; iter->script_end < iter->text_end; iter->script_end = g_utf8_next_char (iter->script_end))
+ {
+ gunichar ch = g_utf8_get_char (iter->script_end);
+ GUnicodeScript sc;
+ int pair_index;
+
+ sc = g_unichar_get_script (ch);
+ if (sc != G_UNICODE_SCRIPT_COMMON)
+ pair_index = -1;
+ else
+ pair_index = get_pair_index (ch);
+
+ /*
+ * Paired character handling:
+ *
+ * if it's an open character, push it onto the stack.
+ * if it's a close character, find the matching open on the
+ * stack, and use that script code. Any non-matching open
+ * characters above it on the stack will be poped.
+ */
+ if (pair_index >= 0)
+ {
+ if (IS_OPEN (pair_index))
+ {
+ /*
+ * If the paren stack is full, empty it. This
+ * means that deeply nested paired punctuation
+ * characters will be ignored, but that's an unusual
+ * case, and it's better to ignore them than to
+ * write off the end of the stack...
+ */
+ if (++iter->paren_sp >= PAREN_STACK_DEPTH)
+ iter->paren_sp = 0;
+
+ iter->paren_stack[iter->paren_sp].pair_index = pair_index;
+ iter->paren_stack[iter->paren_sp].script_code = iter->script_code;
+ }
+ else if (iter->paren_sp >= 0)
+ {
+ int pi = pair_index & ~1;
+
+ while (iter->paren_sp >= 0 && iter->paren_stack[iter->paren_sp].pair_index != pi)
+ iter->paren_sp--;
+
+ if (iter->paren_sp < start_sp)
+ start_sp = iter->paren_sp;
+
+ if (iter->paren_sp >= 0)
+ sc = iter->paren_stack[iter->paren_sp].script_code;
+ }
+ }
+
+ if (SAME_SCRIPT (iter->script_code, sc))
+ {
+ if (!REAL_SCRIPT (iter->script_code) && REAL_SCRIPT (sc))
+ {
+ iter->script_code = sc;
+
+ /*
+ * now that we have a final script code, fix any open
+ * characters we pushed before we knew the script code.
+ */
+ while (start_sp < iter->paren_sp)
+ iter->paren_stack[++start_sp].script_code = iter->script_code;
+ }
+
+ /*
+ * if this character is a close paired character,
+ * pop it from the stack
+ */
+ if (pair_index >= 0 && !IS_OPEN (pair_index) && iter->paren_sp >= 0)
+ {
+ iter->paren_sp--;
+
+ if (iter->paren_sp < start_sp)
+ start_sp = iter->paren_sp;
+ }
+ }
+ else
+ {
+ /* Different script, we're done */
+ break;
+ }
+ }
+
+ return TRUE;
+}
+
+/**********************************************************
+ * End of code from ICU
+ **********************************************************/