diff options
-rw-r--r-- | pango/break.c | 271 |
1 files changed, 130 insertions, 141 deletions
diff --git a/pango/break.c b/pango/break.c index 9eb4a291..b8f70855 100644 --- a/pango/break.c +++ b/pango/break.c @@ -29,6 +29,8 @@ #include "pango-impl-utils.h" #include <string.h> +/* {{{ Unicode line breaking and segmentation */ + #define PARAGRAPH_SEPARATOR 0x2029 /* See http://www.unicode.org/unicode/reports/tr14/ if you hope @@ -1584,20 +1586,116 @@ pango_default_break (const gchar *text, attrs[0].is_line_break = FALSE; /* Rule LB2 */ } +/* }}} */ +/* {{{ Tailoring */ +/* {{{ Script-specific tailoring */ + +#include "break-arabic.c" +#include "break-indic.c" +#include "break-thai.c" + static gboolean break_script (const char *item_text, unsigned int item_length, const PangoAnalysis *analysis, PangoLogAttr *attrs, - int attrs_len); + int attrs_len) +{ + switch (analysis->script) + { + case PANGO_SCRIPT_ARABIC: + break_arabic (item_text, item_length, analysis, attrs, attrs_len); + break; + + case PANGO_SCRIPT_DEVANAGARI: + case PANGO_SCRIPT_BENGALI: + case PANGO_SCRIPT_GURMUKHI: + case PANGO_SCRIPT_GUJARATI: + case PANGO_SCRIPT_ORIYA: + case PANGO_SCRIPT_TAMIL: + case PANGO_SCRIPT_TELUGU: + case PANGO_SCRIPT_KANNADA: + case PANGO_SCRIPT_MALAYALAM: + case PANGO_SCRIPT_SINHALA: + break_indic (item_text, item_length, analysis, attrs, attrs_len); + break; + + case PANGO_SCRIPT_THAI: + break_thai (item_text, item_length, analysis, attrs, attrs_len); + break; + default: + return FALSE; + } + + return TRUE; +} + +/* }}} */ +/* {{{ Attribute-based tailoring */ static gboolean break_attrs (const char *text, - int length, + int length, GSList *attributes, - int item_offset, - PangoLogAttr *attrs, - int attrs_len); + int offset, + PangoLogAttr *log_attrs, + int log_attrs_len) +{ + PangoAttrList list; + PangoAttrIterator iter; + GSList *l; + + _pango_attr_list_init (&list); + for (l = attributes; l; l = l->next) + { + PangoAttribute *attr = l->data; + + if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS) + pango_attr_list_insert (&list, pango_attribute_copy (attr)); + } + + if (!_pango_attr_list_has_attributes (&list)) + { + _pango_attr_list_destroy (&list); + return FALSE; + } + + _pango_attr_list_get_iterator (&list, &iter); + do { + const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_ALLOW_BREAKS); + + if (attr && ((PangoAttrInt*)attr)->value == 0) + { + int start, end; + int start_pos, end_pos; + int pos; + + pango_attr_iterator_range (&iter, &start, &end); + if (start < offset) + start_pos = 0; + else + start_pos = g_utf8_pointer_to_offset (text, text + start - offset); + if (end >= offset + length) + end_pos = log_attrs_len; + else + end_pos = g_utf8_pointer_to_offset (text, text + end - offset); + + for (pos = start_pos + 1; pos < end_pos; pos++) + { + log_attrs[pos].is_mandatory_break = FALSE; + log_attrs[pos].is_line_break = FALSE; + log_attrs[pos].is_char_break = FALSE; + } + } + } while (pango_attr_iterator_next (&iter)); + + _pango_attr_iterator_destroy (&iter); + _pango_attr_list_destroy (&list); + + return TRUE; +} + +/* }}} */ static gboolean tailor_break (const char *text, @@ -1622,6 +1720,9 @@ tailor_break (const char *text, return res; } +/* }}} */ +/* {{{ Public API */ + /** * pango_break: * @text: the text to process. Must be valid UTF-8 @@ -1641,16 +1742,16 @@ tailor_break (const char *text, */ void pango_break (const gchar *text, - gint length, - PangoAnalysis *analysis, - PangoLogAttr *attrs, - int attrs_len) + gint length, + PangoAnalysis *analysis, + PangoLogAttr *attrs, + int attrs_len) { g_return_if_fail (analysis != NULL); g_return_if_fail (attrs != NULL); pango_default_break (text, length, analysis, attrs, attrs_len); - tailor_break (text, length, analysis, -1, attrs, attrs_len); + tailor_break (text, length, analysis, -1, attrs, attrs_len); } /** @@ -1700,28 +1801,6 @@ pango_tailor_break (const char *text, } } -static int -tailor_segment (const char *range_start, - const char *range_end, - int chars_broken, - PangoAnalysis *analysis, - PangoLogAttr *log_attrs) -{ - int chars_in_range; - PangoLogAttr *start = log_attrs + chars_broken; - - chars_in_range = pango_utf8_strlen (range_start, range_end - range_start); - - pango_tailor_break (range_start, - range_end - range_start, - analysis, - -1, - start, - chars_in_range + 1); - - return chars_in_range; -} - /** * pango_get_log_attrs: * @text: text to process. Must be valid UTF-8 @@ -1744,11 +1823,11 @@ tailor_segment (const char *range_start, */ void pango_get_log_attrs (const char *text, - int length, - int level, - PangoLanguage *language, - PangoLogAttr *log_attrs, - int attrs_len) + int length, + int level, + PangoLanguage *language, + PangoLogAttr *log_attrs, + int attrs_len) { int chars_broken; PangoAnalysis analysis = { NULL }; @@ -1769,119 +1848,29 @@ pango_get_log_attrs (const char *text, { const char *run_start, *run_end; PangoScript script; + int chars_in_range; pango_script_iter_get_range (&iter, &run_start, &run_end, &script); analysis.script = script; - chars_broken += tailor_segment (run_start, run_end, chars_broken, &analysis, log_attrs); + chars_in_range = pango_utf8_strlen (run_start, run_end - run_start); + + pango_tailor_break (run_start, + run_end - run_start, + &analysis, + -1, + log_attrs + chars_broken, + chars_in_range + 1); + + chars_broken += chars_in_range; } while (pango_script_iter_next (&iter)); _pango_script_iter_fini (&iter); if (chars_broken + 1 > attrs_len) g_warning ("pango_get_log_attrs: attrs_len should have been at least %d, but was %d. Expect corrupted memory.", - chars_broken + 1, - attrs_len); -} - -#include "break-arabic.c" -#include "break-indic.c" -#include "break-thai.c" - -static gboolean -break_script (const char *item_text, - unsigned int item_length, - const PangoAnalysis *analysis, - PangoLogAttr *attrs, - int attrs_len) -{ - switch (analysis->script) - { - case PANGO_SCRIPT_ARABIC: - break_arabic (item_text, item_length, analysis, attrs, attrs_len); - break; - - case PANGO_SCRIPT_DEVANAGARI: - case PANGO_SCRIPT_BENGALI: - case PANGO_SCRIPT_GURMUKHI: - case PANGO_SCRIPT_GUJARATI: - case PANGO_SCRIPT_ORIYA: - case PANGO_SCRIPT_TAMIL: - case PANGO_SCRIPT_TELUGU: - case PANGO_SCRIPT_KANNADA: - case PANGO_SCRIPT_MALAYALAM: - case PANGO_SCRIPT_SINHALA: - break_indic (item_text, item_length, analysis, attrs, attrs_len); - break; - - case PANGO_SCRIPT_THAI: - break_thai (item_text, item_length, analysis, attrs, attrs_len); - break; - default: - return FALSE; - } - - return TRUE; + chars_broken + 1, + attrs_len); } -static gboolean -break_attrs (const char *text, - int length, - GSList *attributes, - int offset, - PangoLogAttr *log_attrs, - int log_attrs_len) -{ - PangoAttrList list; - PangoAttrIterator iter; - GSList *l; - - _pango_attr_list_init (&list); - for (l = attributes; l; l = l->next) - { - PangoAttribute *attr = l->data; - - if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS) - pango_attr_list_insert (&list, pango_attribute_copy (attr)); - } - - if (!_pango_attr_list_has_attributes (&list)) - { - _pango_attr_list_destroy (&list); - return FALSE; - } - - _pango_attr_list_get_iterator (&list, &iter); - do { - const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_ALLOW_BREAKS); - - if (attr && ((PangoAttrInt*)attr)->value == 0) - { - int start, end; - int start_pos, end_pos; - int pos; - - pango_attr_iterator_range (&iter, &start, &end); - if (start < offset) - start_pos = 0; - else - start_pos = g_utf8_pointer_to_offset (text, text + start - offset); - if (end >= offset + length) - end_pos = log_attrs_len; - else - end_pos = g_utf8_pointer_to_offset (text, text + end - offset); - - for (pos = start_pos + 1; pos < end_pos; pos++) - { - log_attrs[pos].is_mandatory_break = FALSE; - log_attrs[pos].is_line_break = FALSE; - log_attrs[pos].is_char_break = FALSE; - } - } - } while (pango_attr_iterator_next (&iter)); - - _pango_attr_iterator_destroy (&iter); - _pango_attr_list_destroy (&list); - - return TRUE; -} +/* }}} */ |