summaryrefslogtreecommitdiff
path: root/pango2/break-indic.c
diff options
context:
space:
mode:
Diffstat (limited to 'pango2/break-indic.c')
-rw-r--r--pango2/break-indic.c212
1 files changed, 212 insertions, 0 deletions
diff --git a/pango2/break-indic.c b/pango2/break-indic.c
new file mode 100644
index 00000000..62ecdaab
--- /dev/null
+++ b/pango2/break-indic.c
@@ -0,0 +1,212 @@
+/* Pango2
+ * break-indic.c:
+ *
+ * Copyright (C) 2006 Red Hat Software
+ * Author: Akira TAGOH <tagoh@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+
+#include "pango-break.h"
+#include "pango-item-private.h"
+
+#define DEV_RRA 0x0931 /* 0930 + 093c */
+#define DEV_QA 0x0958 /* 0915 + 093c */
+#define DEV_YA 0x095F /* 092f + 003c */
+#define DEV_KHHA 0x0959
+#define DEV_GHHA 0x095A
+#define DEV_ZA 0x095B
+#define DEV_DDDHA 0x095C
+#define DEV_RHA 0x095D
+#define DEV_FA 0x095E
+#define DEV_YYA 0x095F
+
+/* Bengali */
+/* for split matras in all brahmi based script */
+#define BENGALI_SIGN_O 0x09CB /* 09c7 + 09be */
+#define BENGALI_SIGN_AU 0x09CC /* 09c7 + 09d7 */
+#define BENGALI_RRA 0x09DC
+#define BENGALI_RHA 0x09DD
+#define BENGALI_YYA 0x09DF
+
+/* Gurumukhi */
+#define GURUMUKHI_LLA 0x0A33
+#define GURUMUKHI_SHA 0x0A36
+#define GURUMUKHI_KHHA 0x0A59
+#define GURUMUKHI_GHHA 0x0A5A
+#define GURUMUKHI_ZA 0x0A5B
+#define GURUMUKHI_RRA 0x0A5C
+#define GURUMUKHI_FA 0x0A5E
+
+/* Oriya */
+#define ORIYA_AI 0x0B48
+#define ORIYA_O 0x0B4B
+#define ORIYA_AU 0x0B4C
+
+/* Telugu */
+#define TELUGU_EE 0x0C47
+#define TELUGU_AI 0x0C48
+
+/* Tamil */
+#define TAMIL_O 0x0BCA
+#define TAMIL_OO 0x0BCB
+#define TAMIL_AU 0x0BCC
+
+/* Kannada */
+#define KNDA_EE 0x0CC7
+#define KNDA_AI 0x0CC8
+#define KNDA_O 0x0CCA
+#define KNDA_OO 0x0CCB
+
+/* Malayalam */
+#define MLYM_O 0x0D4A
+#define MLYM_OO 0x0D4B
+#define MLYM_AU 0x0D4C
+
+#define IS_COMPOSITE_WITH_BRAHMI_NUKTA(c) ( \
+ (c >= BENGALI_RRA && c <= BENGALI_YYA) || \
+ (c >= DEV_QA && c <= DEV_YA) || (c == DEV_RRA) || (c >= DEV_KHHA && c <= DEV_YYA) || \
+ (c >= KNDA_EE && c <= KNDA_AI) ||(c >= KNDA_O && c <= KNDA_OO) || \
+ (c == TAMIL_O) || (c == TAMIL_OO) || (c == TAMIL_AU) || \
+ (c == TELUGU_EE) || (c == TELUGU_AI) || \
+ (c == ORIYA_AI) || (c == ORIYA_O) || (c == ORIYA_AU) || \
+ (c >= GURUMUKHI_KHHA && c <= GURUMUKHI_RRA) || (c == GURUMUKHI_FA)|| (c == GURUMUKHI_LLA)|| (c == GURUMUKHI_SHA) || \
+ FALSE)
+#define IS_SPLIT_MATRA_BRAHMI(c) ( \
+ (c == BENGALI_SIGN_O) || (c == BENGALI_SIGN_AU) || \
+ (c >= MLYM_O && c <= MLYM_AU) || \
+ FALSE)
+
+static void
+not_cursor_position (Pango2LogAttr *attr)
+{
+ if (!attr->is_mandatory_break)
+ {
+ attr->is_cursor_position = FALSE;
+ attr->is_char_break = FALSE;
+ attr->is_line_break = FALSE;
+ attr->is_mandatory_break = FALSE;
+ }
+}
+
+static void
+break_indic (const char *text,
+ int length,
+ const Pango2Analysis *analysis,
+ Pango2LogAttr *attrs,
+ int attrs_len G_GNUC_UNUSED)
+{
+ const char *p, *next = NULL, *next_next;
+ gunichar prev_wc, this_wc, next_wc, next_next_wc;
+ gboolean is_conjunct = FALSE;
+ int i;
+
+ for (p = text, prev_wc = 0, i = 0;
+ p != NULL && p < (text + length);
+ p = next, prev_wc = this_wc, i++)
+ {
+ this_wc = g_utf8_get_char (p);
+ next = g_utf8_next_char (p);
+
+ if (G_UNLIKELY (
+ IS_COMPOSITE_WITH_BRAHMI_NUKTA(this_wc) || IS_SPLIT_MATRA_BRAHMI(this_wc))) {
+ attrs[i+1].backspace_deletes_character = FALSE;
+ }
+
+ if (next != NULL && next < (text + length))
+ {
+ next_wc = g_utf8_get_char (next);
+ next_next = g_utf8_next_char (next);
+ }
+ else
+ {
+ next_wc = 0;
+ next_next = NULL;
+ }
+ if (next_next != NULL && next_next < (text + length))
+ next_next_wc = g_utf8_get_char (next_next);
+ else
+ next_next_wc = 0;
+
+ switch (analysis->script)
+ {
+ case G_UNICODE_SCRIPT_SINHALA:
+ /*
+ * TODO: The cursor position should be based on the state table.
+ * This is the wrong place to be doing this.
+ */
+
+ /*
+ * The cursor should treat as a single glyph:
+ * SINHALA CONS + 0x0DCA + 0x200D + SINHALA CONS
+ * SINHALA CONS + 0x200D + 0x0DCA + SINHALA CONS
+ */
+ if ((this_wc == 0x0DCA && next_wc == 0x200D)
+ || (this_wc == 0x200D && next_wc == 0x0DCA))
+ {
+ not_cursor_position(&attrs[i]);
+ not_cursor_position(&attrs[i + 1]);
+ is_conjunct = TRUE;
+ }
+ else if (is_conjunct
+ && (prev_wc == 0x200D || prev_wc == 0x0DCA)
+ && this_wc >= 0x0D9A
+ && this_wc <= 0x0DC6)
+ {
+ not_cursor_position(&attrs[i]);
+ is_conjunct = FALSE;
+ }
+ /*
+ * Consonant clusters do NOT result in implicit conjuncts
+ * in SINHALA orthography.
+ */
+ else if (!is_conjunct && prev_wc == 0x0DCA && this_wc != 0x200D)
+ {
+ attrs[i].is_cursor_position = TRUE;
+ }
+
+ break;
+
+ default:
+
+ if (prev_wc != 0 && (this_wc == 0x200D || this_wc == 0x200C))
+ {
+ not_cursor_position(&attrs[i]);
+ if (next_wc != 0)
+ {
+ not_cursor_position(&attrs[i+1]);
+ if ((next_next_wc != 0) &&
+ (next_wc == 0x09CD || /* Bengali */
+ next_wc == 0x0ACD || /* Gujarati */
+ next_wc == 0x094D || /* Hindi */
+ next_wc == 0x0CCD || /* Kannada */
+ next_wc == 0x0D4D || /* Malayalam */
+ next_wc == 0x0B4D || /* Oriya */
+ next_wc == 0x0A4D || /* Punjabi */
+ next_wc == 0x0BCD || /* Tamil */
+ next_wc == 0x0C4D)) /* Telugu */
+ {
+ not_cursor_position(&attrs[i+2]);
+ }
+ }
+ }
+
+ break;
+ }
+ }
+}