From 801e93cfa39a450d28a35ad8a36f84faab19fd36 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Sat, 21 Aug 2021 15:41:39 -0400 Subject: Add api to validate log attrs This is based on code that previously lived in tests/validate-test-boundaries.c Fixes: #129 --- pango/break.c | 492 +++++++++++++++++++++++++++++++++++++++++- pango/pango-break.h | 22 ++ tests/meson.build | 2 +- tests/test-break.c | 1 - tests/validate-log-attrs.c | 528 --------------------------------------------- tests/validate-log-attrs.h | 52 ----- 6 files changed, 514 insertions(+), 583 deletions(-) delete mode 100644 tests/validate-log-attrs.c delete mode 100644 tests/validate-log-attrs.h diff --git a/pango/break.c b/pango/break.c index 0cb6666d..20756506 100644 --- a/pango/break.c +++ b/pango/break.c @@ -1759,7 +1759,9 @@ remove_breaks_from_range (const char *text, ch == 0x007C; /* ZWS sequence */ - if (after_zws && bt != G_UNICODE_BREAK_SPACE) + if (after_zws && + bt != G_UNICODE_BREAK_SPACE && + bt != G_UNICODE_BREAK_ZERO_WIDTH_SPACE) log_attrs[pos].is_line_break = TRUE; after_zws = bt == G_UNICODE_BREAK_ZERO_WIDTH_SPACE || @@ -2141,6 +2143,428 @@ tailor_break (const char *text, return res; } +/* }}} */ +/* {{{ Validation */ + +G_DEFINE_QUARK(pango-validate-error-quark, pango_validate_error) + +typedef gboolean (* CharForeachFunc) (int pos, + gunichar wc, + gunichar prev_wc, + gunichar next_wc, + GUnicodeType type, + GUnicodeType prev_type, + GUnicodeType next_type, + const PangoLogAttr *attr, + const PangoLogAttr *prev_attr, + const PangoLogAttr *next_attr, + gboolean *after_zws, + GError **error); + +static gboolean +log_attr_foreach (const char *text, + int length, + const PangoLogAttr *attrs, + int attrs_len, + CharForeachFunc func, + GError **error) +{ + const gchar *next = text; + const gchar *end = text + length; + gint i = 0; + gunichar prev_wc; + gunichar next_wc; + GUnicodeType prev_type; + GUnicodeType next_type; + gboolean after_zws; + + if (next == end) + goto done; + + prev_type = (GUnicodeType) -1; + prev_wc = 0; + + next_wc = g_utf8_get_char (next); + next_type = g_unichar_type (next_wc); + + after_zws = FALSE; + + while (next_wc != 0) + { + GUnicodeType type; + gunichar wc; + + wc = next_wc; + type = next_type; + + next = g_utf8_next_char (next); + + if (next >= end) + next_wc = 0; + else + next_wc = g_utf8_get_char (next); + + if (next_wc) + next_type = g_unichar_type (next_wc); + + if (!func (i, + wc, prev_wc, next_wc, + type, prev_type, next_type, + &attrs[i], + i != 0 ? &attrs[i - 1] : NULL, + &attrs[i + 1], + &after_zws, + error)) + return FALSE; + + prev_type = type; + prev_wc = wc; + i++; + } + +done: + return TRUE; +} + +static gboolean +check_line_char (int pos, + gunichar wc, + gunichar prev_wc, + gunichar next_wc, + GUnicodeType type, + GUnicodeType prev_type, + GUnicodeType next_type, + const PangoLogAttr *attr, + const PangoLogAttr *prev_attr, + const PangoLogAttr *next_attr, + gboolean *after_zws, + GError **error) +{ + GUnicodeBreakType break_type; + GUnicodeBreakType prev_break_type; + + break_type = g_unichar_break_type (wc); + + if (prev_wc) + prev_break_type = g_unichar_break_type (prev_wc); + else + prev_break_type = G_UNICODE_BREAK_UNKNOWN; + + if (prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE || + (prev_break_type == G_UNICODE_BREAK_SPACE && *after_zws)) + *after_zws = TRUE; + else + *after_zws = FALSE; + + if (wc == '\n' && prev_wc == '\r') + { + if (attr->is_line_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Do not break between \\r and \\n (LB5)", wc, pos); + return FALSE; + } + } + + if (prev_wc == 0 && wc != 0) + { + if (attr->is_line_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Do not break before first char (LB2)", wc, pos); + return FALSE; + } + } + + if (next_wc == 0) + { + if (!next_attr->is_line_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Always break after the last char (LB3)", wc, pos); + return FALSE; + } + } + + if (prev_break_type == G_UNICODE_BREAK_MANDATORY) + { + if (!attr->is_mandatory_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Always break after hard line breaks (LB4)", wc, pos); + return FALSE; + } + } + + if (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN || + prev_break_type == G_UNICODE_BREAK_LINE_FEED || + prev_break_type == G_UNICODE_BREAK_NEXT_LINE) + { + if (!attr->is_mandatory_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Always break after CR, LF and NL (LB5)", wc, pos); + return FALSE; + } + } + + if (break_type == G_UNICODE_BREAK_MANDATORY || + break_type == G_UNICODE_BREAK_CARRIAGE_RETURN || + break_type == G_UNICODE_BREAK_LINE_FEED || + break_type == G_UNICODE_BREAK_NEXT_LINE) + { + if (attr->is_line_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Do not break before hard line beaks (LB6)", wc, pos); + return FALSE; + } + } + + if (break_type == G_UNICODE_BREAK_SPACE || + break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE) + { + if (attr->is_line_break && prev_attr != NULL && + !attr->is_mandatory_break && + !(next_wc && g_unichar_break_type (next_wc) == G_UNICODE_BREAK_COMBINING_MARK)) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Can't break before a space unless mandatory precedes or combining mark follows (LB7)", wc, pos); + return FALSE; + } + } + + if (break_type != G_UNICODE_BREAK_ZERO_WIDTH_SPACE && + break_type != G_UNICODE_BREAK_SPACE && + *after_zws) + { + if (!attr->is_line_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Break before a char following ZWS, even if spaces intervene (LB8)", wc, pos); + return FALSE; + } + } + + if (break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER) + { + if (attr->is_line_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Do not break after ZWJ (LB8a)", wc, pos); + return FALSE; + } + } + + /* TODO: check LB9 */ + + if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER || + break_type == G_UNICODE_BREAK_WORD_JOINER) + { + if (attr->is_line_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Do not break before or after WJ (LB11)", wc, pos); + return FALSE; + } + } + + if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Do not break after GL (LB12)", wc, pos); + return FALSE; + } + + /* internal consistency */ + + if (attr->is_mandatory_break && !attr->is_line_break) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, + "char %#x %d: Mandatory breaks must also be marked as regular breaks", wc, pos); + return FALSE; + } + + return TRUE; +} + +static gboolean +check_line_invariants (const char *text, + int length, + const PangoLogAttr *attrs, + int attrs_len, + GError **error) +{ + return log_attr_foreach (text, length, + attrs, attrs_len, + check_line_char, error); +} + +static gboolean +check_grapheme_invariants (const char *text, + int length, + const PangoLogAttr *attrs, + int attrs_len, + GError **error) +{ + return TRUE; +} + +static gboolean +check_word_invariants (const char *text, + int length, + const PangoLogAttr *attrs, + int attrs_len, + GError **error) +{ + enum { + AFTER_START, + AFTER_END + } state = AFTER_END; + + for (int i = 0; i < attrs_len; i++) + { + /* Check that word starts and ends are alternating */ + switch (state) + { + case AFTER_END: + if (attrs[i].is_word_start) + { + if (attrs[i].is_word_end) + state = AFTER_END; + else + state = AFTER_START; + break; + } + if (attrs[i].is_word_end) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD, + "char %d: Unexpected word end", i); + return FALSE; + } + break; + + case AFTER_START: + if (attrs[i].is_word_end) + { + if (attrs[i].is_word_start) + state = AFTER_START; + else + state = AFTER_END; + break; + } + if (attrs[i].is_word_start) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD, + "char %d: Unexpected word start", i); + return FALSE; + } + break; + + default: + g_assert_not_reached (); + } + } + + return TRUE; +} + +static gboolean +check_sentence_invariants (const char *text, + int length, + const PangoLogAttr *attrs, + int attrs_len, + GError **error) +{ + enum { + AFTER_START, + AFTER_END + } state = AFTER_END; + + for (int i = 0; i < attrs_len; i++) + { + /* Check that word starts and ends are alternating */ + switch (state) + { + case AFTER_END: + if (attrs[i].is_sentence_start) + { + if (attrs[i].is_sentence_end) + state = AFTER_END; + else + state = AFTER_START; + break; + } + if (attrs[i].is_sentence_end) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE, + "char %d: Unexpected sentence end", i); + return FALSE; + } + break; + + case AFTER_START: + if (attrs[i].is_sentence_end) + { + if (attrs[i].is_sentence_start) + state = AFTER_START; + else + state = AFTER_END; + break; + } + if (attrs[i].is_sentence_start) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE, + "char %d: Unexpected sentence start", i); + return FALSE; + } + break; + + default: + g_assert_not_reached (); + } + } + + return TRUE; +} + +static gboolean +check_space_invariants (const char *text, + int length, + const PangoLogAttr *log_attrs, + int attrs_len, + GError **error) +{ + for (int i = 0; i < attrs_len; i++) + { + if (log_attrs[i].is_expandable_space && !log_attrs[i].is_white) + { + g_set_error (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SPACE, + "char %d: Expandable space must be space", i); + return FALSE; + } + } + + return TRUE; +} + /* }}} */ /* {{{ Public API */ @@ -2377,6 +2801,72 @@ pango_get_log_attrs (const char *text, attrs_len); } +/** + * pango_validate_log_attrs: + * @text: text to which @log_attrs belong + * @length: length of @text + * @log_attrs: `PangoLogAttr` array to validate + * @attrs_len: length of @log_attrs + * + * Apply sanity checks to @log_attrs. + * + * This function checks some conditions that Pango + * relies on. It is not guaranteed to be an exhaustive + * validity test. Currentlty, it checks that + * + * - There's no break before the first char + * - Mandatory breaks are line breaks + * - Line breaks are char breaks + * - Lines aren't broken between \\r and \\n + * - Lines aren't broken before a space (unless the break + * is mandatory, or the space precedes a combining mark) + * - Lines aren't broken between two open punctuation + * or between two close punctuation characters + * - Lines aren't broken between a letter and a quotation mark + * - Word starts and ends alternate + * - Sentence starts and ends alternate + * - Expandable spaces are spaces + * + * Returns: %TRUE if @log_attrs are valid + * + * Since: 1.50 + */ +gboolean +pango_validate_log_attrs (const char *text, + int length, + const PangoLogAttr *log_attrs, + int attrs_len, + GError **error) +{ + int n_chars; + + n_chars = g_utf8_strlen (text, length); + if (attrs_len != n_chars + 1) + { + g_set_error_literal (error, + PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_FAILED, + "Array has wrong length"); + return FALSE; + } + + if (!check_line_invariants (text, length, log_attrs, attrs_len, error)) + return FALSE; + + if (!check_grapheme_invariants (text, length, log_attrs, attrs_len, error)) + return FALSE; + + if (!check_word_invariants (text, length, log_attrs, attrs_len, error)) + return FALSE; + + if (!check_sentence_invariants (text, length, log_attrs, attrs_len, error)) + return FALSE; + + if (!check_space_invariants (text, length, log_attrs, attrs_len, error)) + return FALSE; + + return TRUE; +} + /* }}} */ /* vim:set foldmethod=marker expandtab: */ diff --git a/pango/pango-break.h b/pango/pango-break.h index 5d791e27..b29d713a 100644 --- a/pango/pango-break.h +++ b/pango/pango-break.h @@ -137,6 +137,28 @@ void pango_attr_break (const char *text, PangoLogAttr *attrs, int attrs_len); +#define PANGO_VALIDATE_ERROR (pango_validate_error_quark ()) + +typedef enum +{ + PANGO_VALIDATE_ERROR_FAILED, + PANGO_VALIDATE_ERROR_BREAK, + PANGO_VALIDATE_ERROR_GRAPHEME, + PANGO_VALIDATE_ERROR_WORD, + PANGO_VALIDATE_ERROR_SENTENCE, + PANGO_VALIDATE_ERROR_SPACE +} PangoValidateError; + +PANGO_AVAILABLE_IN_1_50 +GQuark pango_validate_error_quark (void); + +PANGO_AVAILABLE_IN_1_50 +gboolean pango_validate_log_attrs (const char *text, + int length, + const PangoLogAttr *log_attrs, + int attrs_len, + GError **error); + G_END_DECLS #endif /* __PANGO_BREAK_H__ */ diff --git a/tests/meson.build b/tests/meson.build index 90bb94ba..4776b2c9 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -52,7 +52,7 @@ if cairo_dep.found() [ 'testmisc', [ 'testmisc.c' ], [ libpangocairo_dep, libpangoft2_dep, glib_dep, harfbuzz_dep ] ], [ 'cxx-test', [ 'cxx-test.cpp' ], [ libpangocairo_dep, gobject_dep, harfbuzz_dep ] ], [ 'test-harfbuzz', [ 'test-harfbuzz.c' ], [ libpangocairo_dep, gobject_dep, harfbuzz_dep ] ], - [ 'test-break', [ 'test-break.c', 'test-common.c', 'validate-log-attrs.c' ], [libpangocairo_dep, glib_dep, harfbuzz_dep ] ] + [ 'test-break', [ 'test-break.c', 'test-common.c' ], [libpangocairo_dep, glib_dep, harfbuzz_dep ] ] ] if pango_cairo_backends.contains('png') diff --git a/tests/test-break.c b/tests/test-break.c index fecca168..672d44e1 100644 --- a/tests/test-break.c +++ b/tests/test-break.c @@ -30,7 +30,6 @@ #include "config.h" #include #include "test-common.h" -#include "validate-log-attrs.h" static PangoContext *context; diff --git a/tests/validate-log-attrs.c b/tests/validate-log-attrs.c deleted file mode 100644 index 6cbdd6f9..00000000 --- a/tests/validate-log-attrs.c +++ /dev/null @@ -1,528 +0,0 @@ -/* Pango - * - * Copyright (C) 1999 Red Hat Software - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - */ - -#include "config.h" - -#include "validate-log-attrs.h" -#include "pango.h" -#include - -/* {{{ Validation */ - -G_DEFINE_QUARK(pango-validate-error-quark, pango_validate_error) - -typedef gboolean (* CharForeachFunc) (int pos, - gunichar wc, - gunichar prev_wc, - gunichar next_wc, - GUnicodeType type, - GUnicodeType prev_type, - GUnicodeType next_type, - const PangoLogAttr *attr, - const PangoLogAttr *prev_attr, - const PangoLogAttr *next_attr, - gboolean *after_zws, - GError **error); - -static gboolean -log_attr_foreach (const char *text, - int length, - const PangoLogAttr *attrs, - int attrs_len, - CharForeachFunc func, - GError **error) -{ - const gchar *next = text; - const gchar *end = text + length; - gint i = 0; - gunichar prev_wc; - gunichar next_wc; - GUnicodeType prev_type; - GUnicodeType next_type; - gboolean after_zws; - - if (next == end) - goto done; - - prev_type = (GUnicodeType) -1; - prev_wc = 0; - - next_wc = g_utf8_get_char (next); - next_type = g_unichar_type (next_wc); - - after_zws = FALSE; - - while (next_wc != 0) - { - GUnicodeType type; - gunichar wc; - - wc = next_wc; - type = next_type; - - next = g_utf8_next_char (next); - - if (next >= end) - next_wc = 0; - else - next_wc = g_utf8_get_char (next); - - if (next_wc) - next_type = g_unichar_type (next_wc); - - if (!func (i, - wc, prev_wc, next_wc, - type, prev_type, next_type, - &attrs[i], - i != 0 ? &attrs[i - 1] : NULL, - &attrs[i + 1], - &after_zws, - error)) - return FALSE; - - prev_type = type; - prev_wc = wc; - i++; - } - -done: - return TRUE; -} - -static gboolean -check_line_char (int pos, - gunichar wc, - gunichar prev_wc, - gunichar next_wc, - GUnicodeType type, - GUnicodeType prev_type, - GUnicodeType next_type, - const PangoLogAttr *attr, - const PangoLogAttr *prev_attr, - const PangoLogAttr *next_attr, - gboolean *after_zws, - GError **error) -{ - GUnicodeBreakType break_type; - GUnicodeBreakType prev_break_type; - - break_type = g_unichar_break_type (wc); - - if (prev_wc) - prev_break_type = g_unichar_break_type (prev_wc); - else - prev_break_type = G_UNICODE_BREAK_UNKNOWN; - - if (prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE || - (prev_break_type == G_UNICODE_BREAK_SPACE && *after_zws)) - *after_zws = TRUE; - else - *after_zws = FALSE; - - if (wc == '\n' && prev_wc == '\r') - { - if (attr->is_line_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Do not break between \\r and \\n (LB5)", wc, pos); - return FALSE; - } - } - - if (prev_wc == 0 && wc != 0) - { - if (attr->is_line_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Do not break before first char (LB2)", wc, pos); - return FALSE; - } - } - - if (next_wc == 0) - { - if (!next_attr->is_line_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Always break after the last char (LB3)", wc, pos); - return FALSE; - } - } - - if (prev_break_type == G_UNICODE_BREAK_MANDATORY) - { - if (!attr->is_mandatory_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Always break after hard line breaks (LB4)", wc, pos); - return FALSE; - } - } - - if (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN || - prev_break_type == G_UNICODE_BREAK_LINE_FEED || - prev_break_type == G_UNICODE_BREAK_NEXT_LINE) - { - if (!attr->is_mandatory_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Always break after CR, LF and NL (LB5)", wc, pos); - return FALSE; - } - } - - if (break_type == G_UNICODE_BREAK_MANDATORY || - break_type == G_UNICODE_BREAK_CARRIAGE_RETURN || - break_type == G_UNICODE_BREAK_LINE_FEED || - break_type == G_UNICODE_BREAK_NEXT_LINE) - { - if (attr->is_line_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Do not break before hard line beaks (LB6)", wc, pos); - return FALSE; - } - } - - if (break_type == G_UNICODE_BREAK_SPACE || - break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE) - { - if (attr->is_line_break && prev_attr != NULL && - !attr->is_mandatory_break && - !(next_wc && g_unichar_break_type (next_wc) == G_UNICODE_BREAK_COMBINING_MARK)) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Can't break before a space unless mandatory precedes or combining mark follows (LB7)", wc, pos); - return FALSE; - } - } - - if (break_type != G_UNICODE_BREAK_ZERO_WIDTH_SPACE && - break_type != G_UNICODE_BREAK_SPACE && - *after_zws) - { - if (!attr->is_line_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Break before a char following ZWS, even if spaces intervene (LB8)", wc, pos); - return FALSE; - } - } - - if (break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER) - { - if (attr->is_line_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Do not break after ZWJ (LB8a)", wc, pos); - return FALSE; - } - } - - /* TODO: check LB9 */ - - if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER || - break_type == G_UNICODE_BREAK_WORD_JOINER) - { - if (attr->is_line_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Do not break before or after WJ (LB11)", wc, pos); - return FALSE; - } - } - - if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Do not break after GL (LB12)", wc, pos); - return FALSE; - } - - /* internal consistency */ - - if (attr->is_mandatory_break && !attr->is_line_break) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK, - "char %#x %d: Mandatory breaks must also be marked as regular breaks", wc, pos); - return FALSE; - } - - return TRUE; -} - -static gboolean -check_line_invariants (const char *text, - int length, - const PangoLogAttr *attrs, - int attrs_len, - GError **error) -{ - return log_attr_foreach (text, length, - attrs, attrs_len, - check_line_char, error); -} - -static gboolean -check_grapheme_invariants (const char *text, - int length, - const PangoLogAttr *attrs, - int attrs_len, - GError **error) -{ - return TRUE; -} - -static gboolean -check_word_invariants (const char *text, - int length, - const PangoLogAttr *attrs, - int attrs_len, - GError **error) -{ - enum { - AFTER_START, - AFTER_END - } state = AFTER_END; - - for (int i = 0; i < attrs_len; i++) - { - /* Check that word starts and ends are alternating */ - switch (state) - { - case AFTER_END: - if (attrs[i].is_word_start) - { - if (attrs[i].is_word_end) - state = AFTER_END; - else - state = AFTER_START; - break; - } - if (attrs[i].is_word_end) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD, - "char %d: Unexpected word end", i); - return FALSE; - } - break; - - case AFTER_START: - if (attrs[i].is_word_end) - { - if (attrs[i].is_word_start) - state = AFTER_START; - else - state = AFTER_END; - break; - } - if (attrs[i].is_word_start) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD, - "char %d: Unexpected word start", i); - return FALSE; - } - break; - - default: - g_assert_not_reached (); - } - - /* Check that words don't end in the middle of graphemes */ - if (attrs[i].is_word_boundary && !attrs[i].is_cursor_position) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE, - "char %d: Word ends inside a grapheme", i); - return FALSE; - } - } - - return TRUE; -} - -static gboolean -check_sentence_invariants (const char *text, - int length, - const PangoLogAttr *attrs, - int attrs_len, - GError **error) -{ - enum { - AFTER_START, - AFTER_END - } state = AFTER_END; - - for (int i = 0; i < attrs_len; i++) - { - /* Check that word starts and ends are alternating */ - switch (state) - { - case AFTER_END: - if (attrs[i].is_sentence_start) - { - if (attrs[i].is_sentence_end) - state = AFTER_END; - else - state = AFTER_START; - break; - } - if (attrs[i].is_sentence_end) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE, - "char %d: Unexpected sentence end", i); - return FALSE; - } - break; - - case AFTER_START: - if (attrs[i].is_sentence_end) - { - if (attrs[i].is_sentence_start) - state = AFTER_START; - else - state = AFTER_END; - break; - } - if (attrs[i].is_sentence_start) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE, - "char %d: Unexpected sentence start", i); - return FALSE; - } - break; - - default: - g_assert_not_reached (); - } - } - - return TRUE; -} - -static gboolean -check_space_invariants (const char *text, - int length, - const PangoLogAttr *log_attrs, - int attrs_len, - GError **error) -{ - for (int i = 0; i < attrs_len; i++) - { - if (log_attrs[i].is_expandable_space && !log_attrs[i].is_white) - { - g_set_error (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SPACE, - "char %d: Expandable space must be space", i); - return FALSE; - } - } - - return TRUE; -} - -/* }}} */ -/* {{{ Public API */ - -/* - * pango_validate_log_attrs: - * @text: text to which @log_attrs belong - * @length: length of @text - * @log_attrs: `PangoLogAttr` array to validate - * @attrs_len: length of @log_attrs - * - * Apply sanity checks to @log_attrs. - * - * This function checks some conditions that Pango - * relies on. It is not guaranteed to be an exhaustive - * validity test. Currentlty, it checks that - * - * - There's no break before the first char - * - Mandatory breaks are line breaks - * - Line breaks are char breaks - * - Lines aren't broken between \\r and \\n - * - Lines aren't broken before a space (unless the break - * is mandatory, or the space precedes a combining mark) - * - Lines aren't broken between two open punctuation - * or between two close punctuation characters - * - Lines aren't broken between a letter and a quotation mark - * - Word starts and ends alternate - * - Sentence starts and ends alternate - * - Expandable spaces are spaces - * - Words don't end in the middle of graphemes - * - Sentences don't end in the middle of words - * - * Returns: %TRUE if @log_attrs are valid - */ -gboolean -pango_validate_log_attrs (const char *text, - int length, - const PangoLogAttr *log_attrs, - int attrs_len, - GError **error) -{ - int n_chars; - - n_chars = g_utf8_strlen (text, length); - if (attrs_len != n_chars + 1) - { - g_set_error_literal (error, - PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_FAILED, - "Array has wrong length"); - return FALSE; - } - - if (!check_line_invariants (text, length, log_attrs, attrs_len, error)) - return FALSE; - - if (!check_grapheme_invariants (text, length, log_attrs, attrs_len, error)) - return FALSE; - - if (!check_word_invariants (text, length, log_attrs, attrs_len, error)) - return FALSE; - - if (!check_sentence_invariants (text, length, log_attrs, attrs_len, error)) - return FALSE; - - if (!check_space_invariants (text, length, log_attrs, attrs_len, error)) - return FALSE; - - return TRUE; -} - - /* }}} */ - -/* vim:set foldmethod=marker expandtab: */ diff --git a/tests/validate-log-attrs.h b/tests/validate-log-attrs.h deleted file mode 100644 index 22b9ae63..00000000 --- a/tests/validate-log-attrs.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Pango - * - * Copyright (C) 1999 Red Hat Software - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - */ - -#ifndef __VALIDATE_LOG_ATTRS_H__ -#define __VALIDATE_LOG_ATTRS_H__ - -#include - -G_BEGIN_DECLS - -#include - -#define PANGO_VALIDATE_ERROR (pango_validate_error_quark ()) - -typedef enum -{ - PANGO_VALIDATE_ERROR_FAILED, - PANGO_VALIDATE_ERROR_BREAK, - PANGO_VALIDATE_ERROR_GRAPHEME, - PANGO_VALIDATE_ERROR_WORD, - PANGO_VALIDATE_ERROR_SENTENCE, - PANGO_VALIDATE_ERROR_SPACE -} PangoValidateError; - -GQuark pango_validate_error_quark (void); - -gboolean pango_validate_log_attrs (const char *text, - int length, - const PangoLogAttr *log_attrs, - int attrs_len, - GError **error); - -G_END_DECLS - -#endif /* __VALIDATE_LOG_ATTRS_H__ */ -- cgit v1.2.1