summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pango/break.c271
1 files changed, 130 insertions, 141 deletions
diff --git a/pango/break.c b/pango/break.c
index 9eb4a291..b8f70855 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -29,6 +29,8 @@
#include "pango-impl-utils.h"
#include <string.h>
+/* {{{ Unicode line breaking and segmentation */
+
#define PARAGRAPH_SEPARATOR 0x2029
/* See http://www.unicode.org/unicode/reports/tr14/ if you hope
@@ -1584,20 +1586,116 @@ pango_default_break (const gchar *text,
attrs[0].is_line_break = FALSE; /* Rule LB2 */
}
+/* }}} */
+/* {{{ Tailoring */
+/* {{{ Script-specific tailoring */
+
+#include "break-arabic.c"
+#include "break-indic.c"
+#include "break-thai.c"
+
static gboolean
break_script (const char *item_text,
unsigned int item_length,
const PangoAnalysis *analysis,
PangoLogAttr *attrs,
- int attrs_len);
+ int attrs_len)
+{
+ switch (analysis->script)
+ {
+ case PANGO_SCRIPT_ARABIC:
+ break_arabic (item_text, item_length, analysis, attrs, attrs_len);
+ break;
+
+ case PANGO_SCRIPT_DEVANAGARI:
+ case PANGO_SCRIPT_BENGALI:
+ case PANGO_SCRIPT_GURMUKHI:
+ case PANGO_SCRIPT_GUJARATI:
+ case PANGO_SCRIPT_ORIYA:
+ case PANGO_SCRIPT_TAMIL:
+ case PANGO_SCRIPT_TELUGU:
+ case PANGO_SCRIPT_KANNADA:
+ case PANGO_SCRIPT_MALAYALAM:
+ case PANGO_SCRIPT_SINHALA:
+ break_indic (item_text, item_length, analysis, attrs, attrs_len);
+ break;
+
+ case PANGO_SCRIPT_THAI:
+ break_thai (item_text, item_length, analysis, attrs, attrs_len);
+ break;
+ default:
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/* }}} */
+/* {{{ Attribute-based tailoring */
static gboolean
break_attrs (const char *text,
- int length,
+ int length,
GSList *attributes,
- int item_offset,
- PangoLogAttr *attrs,
- int attrs_len);
+ int offset,
+ PangoLogAttr *log_attrs,
+ int log_attrs_len)
+{
+ PangoAttrList list;
+ PangoAttrIterator iter;
+ GSList *l;
+
+ _pango_attr_list_init (&list);
+ for (l = attributes; l; l = l->next)
+ {
+ PangoAttribute *attr = l->data;
+
+ if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
+ pango_attr_list_insert (&list, pango_attribute_copy (attr));
+ }
+
+ if (!_pango_attr_list_has_attributes (&list))
+ {
+ _pango_attr_list_destroy (&list);
+ return FALSE;
+ }
+
+ _pango_attr_list_get_iterator (&list, &iter);
+ do {
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_ALLOW_BREAKS);
+
+ if (attr && ((PangoAttrInt*)attr)->value == 0)
+ {
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ pango_attr_iterator_range (&iter, &start, &end);
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ {
+ log_attrs[pos].is_mandatory_break = FALSE;
+ log_attrs[pos].is_line_break = FALSE;
+ log_attrs[pos].is_char_break = FALSE;
+ }
+ }
+ } while (pango_attr_iterator_next (&iter));
+
+ _pango_attr_iterator_destroy (&iter);
+ _pango_attr_list_destroy (&list);
+
+ return TRUE;
+}
+
+/* }}} */
static gboolean
tailor_break (const char *text,
@@ -1622,6 +1720,9 @@ tailor_break (const char *text,
return res;
}
+/* }}} */
+/* {{{ Public API */
+
/**
* pango_break:
* @text: the text to process. Must be valid UTF-8
@@ -1641,16 +1742,16 @@ tailor_break (const char *text,
*/
void
pango_break (const gchar *text,
- gint length,
- PangoAnalysis *analysis,
- PangoLogAttr *attrs,
- int attrs_len)
+ gint length,
+ PangoAnalysis *analysis,
+ PangoLogAttr *attrs,
+ int attrs_len)
{
g_return_if_fail (analysis != NULL);
g_return_if_fail (attrs != NULL);
pango_default_break (text, length, analysis, attrs, attrs_len);
- tailor_break (text, length, analysis, -1, attrs, attrs_len);
+ tailor_break (text, length, analysis, -1, attrs, attrs_len);
}
/**
@@ -1700,28 +1801,6 @@ pango_tailor_break (const char *text,
}
}
-static int
-tailor_segment (const char *range_start,
- const char *range_end,
- int chars_broken,
- PangoAnalysis *analysis,
- PangoLogAttr *log_attrs)
-{
- int chars_in_range;
- PangoLogAttr *start = log_attrs + chars_broken;
-
- chars_in_range = pango_utf8_strlen (range_start, range_end - range_start);
-
- pango_tailor_break (range_start,
- range_end - range_start,
- analysis,
- -1,
- start,
- chars_in_range + 1);
-
- return chars_in_range;
-}
-
/**
* pango_get_log_attrs:
* @text: text to process. Must be valid UTF-8
@@ -1744,11 +1823,11 @@ tailor_segment (const char *range_start,
*/
void
pango_get_log_attrs (const char *text,
- int length,
- int level,
- PangoLanguage *language,
- PangoLogAttr *log_attrs,
- int attrs_len)
+ int length,
+ int level,
+ PangoLanguage *language,
+ PangoLogAttr *log_attrs,
+ int attrs_len)
{
int chars_broken;
PangoAnalysis analysis = { NULL };
@@ -1769,119 +1848,29 @@ pango_get_log_attrs (const char *text,
{
const char *run_start, *run_end;
PangoScript script;
+ int chars_in_range;
pango_script_iter_get_range (&iter, &run_start, &run_end, &script);
analysis.script = script;
- chars_broken += tailor_segment (run_start, run_end, chars_broken, &analysis, log_attrs);
+ chars_in_range = pango_utf8_strlen (run_start, run_end - run_start);
+
+ pango_tailor_break (run_start,
+ run_end - run_start,
+ &analysis,
+ -1,
+ log_attrs + chars_broken,
+ chars_in_range + 1);
+
+ chars_broken += chars_in_range;
}
while (pango_script_iter_next (&iter));
_pango_script_iter_fini (&iter);
if (chars_broken + 1 > attrs_len)
g_warning ("pango_get_log_attrs: attrs_len should have been at least %d, but was %d. Expect corrupted memory.",
- chars_broken + 1,
- attrs_len);
-}
-
-#include "break-arabic.c"
-#include "break-indic.c"
-#include "break-thai.c"
-
-static gboolean
-break_script (const char *item_text,
- unsigned int item_length,
- const PangoAnalysis *analysis,
- PangoLogAttr *attrs,
- int attrs_len)
-{
- switch (analysis->script)
- {
- case PANGO_SCRIPT_ARABIC:
- break_arabic (item_text, item_length, analysis, attrs, attrs_len);
- break;
-
- case PANGO_SCRIPT_DEVANAGARI:
- case PANGO_SCRIPT_BENGALI:
- case PANGO_SCRIPT_GURMUKHI:
- case PANGO_SCRIPT_GUJARATI:
- case PANGO_SCRIPT_ORIYA:
- case PANGO_SCRIPT_TAMIL:
- case PANGO_SCRIPT_TELUGU:
- case PANGO_SCRIPT_KANNADA:
- case PANGO_SCRIPT_MALAYALAM:
- case PANGO_SCRIPT_SINHALA:
- break_indic (item_text, item_length, analysis, attrs, attrs_len);
- break;
-
- case PANGO_SCRIPT_THAI:
- break_thai (item_text, item_length, analysis, attrs, attrs_len);
- break;
- default:
- return FALSE;
- }
-
- return TRUE;
+ chars_broken + 1,
+ attrs_len);
}
-static gboolean
-break_attrs (const char *text,
- int length,
- GSList *attributes,
- int offset,
- PangoLogAttr *log_attrs,
- int log_attrs_len)
-{
- PangoAttrList list;
- PangoAttrIterator iter;
- GSList *l;
-
- _pango_attr_list_init (&list);
- for (l = attributes; l; l = l->next)
- {
- PangoAttribute *attr = l->data;
-
- if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
- pango_attr_list_insert (&list, pango_attribute_copy (attr));
- }
-
- if (!_pango_attr_list_has_attributes (&list))
- {
- _pango_attr_list_destroy (&list);
- return FALSE;
- }
-
- _pango_attr_list_get_iterator (&list, &iter);
- do {
- const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_ALLOW_BREAKS);
-
- if (attr && ((PangoAttrInt*)attr)->value == 0)
- {
- int start, end;
- int start_pos, end_pos;
- int pos;
-
- pango_attr_iterator_range (&iter, &start, &end);
- if (start < offset)
- start_pos = 0;
- else
- start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
- if (end >= offset + length)
- end_pos = log_attrs_len;
- else
- end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
-
- for (pos = start_pos + 1; pos < end_pos; pos++)
- {
- log_attrs[pos].is_mandatory_break = FALSE;
- log_attrs[pos].is_line_break = FALSE;
- log_attrs[pos].is_char_break = FALSE;
- }
- }
- } while (pango_attr_iterator_next (&iter));
-
- _pango_attr_iterator_destroy (&iter);
- _pango_attr_list_destroy (&list);
-
- return TRUE;
-}
+/* }}} */