From c8d8a8ed6f782efbcbdbe90467050c0eb9fb76b3 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Wed, 29 Dec 2021 00:06:00 +0100 Subject: unilbrk: Restore deviation for (IS,AL) pair. * lib/gen-uni-tables.c (output_lbrk_rules_as_tables): Disable rule LB29. * lib/unilbrk/lbrktables.c: Regenerated. * tests/unilbrk/test-u8-possible-linebreaks.c (main): Add test case with HTML markup. * tests/unilbrk/test-u16-possible-linebreaks.c (main): Likewise. * tests/unilbrk/test-u32-possible-linebreaks.c (main): Likewise. * tests/unilbrk/test-ulc-possible-linebreaks.c (main): Likewise. --- tests/unilbrk/test-u16-possible-linebreaks.c | 19 +++++++++++++++++++ tests/unilbrk/test-u32-possible-linebreaks.c | 19 +++++++++++++++++++ tests/unilbrk/test-u8-possible-linebreaks.c | 17 ++++++++++++++++- tests/unilbrk/test-ulc-possible-linebreaks.c | 15 +++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/unilbrk/test-u16-possible-linebreaks.c b/tests/unilbrk/test-u16-possible-linebreaks.c index b27d75b2bc..c8c5c50a7a 100644 --- a/tests/unilbrk/test-u16-possible-linebreaks.c +++ b/tests/unilbrk/test-u16-possible-linebreaks.c @@ -95,5 +95,24 @@ main () free (p); } + /* Test line breaking in a string with HTML markup. */ + { + static const uint16_t input[21] = + { + '<', 'P', '>', 'S', 'o', 'm', 'e', ' ', 's', 'e', 'n', 't', + 'e', 'n', 'c', 'e', '.', '<', '/', 'P', '>' + }; + char *p = (char *) malloc (SIZEOF (input)); + size_t i; + + u16_possible_linebreaks (input, SIZEOF (input), "UTF-8", p); + for (i = 0; i < 21; i++) + { + ASSERT (p[i] == (i == 8 || i == 17 || i == 19 ? UC_BREAK_POSSIBLE : + UC_BREAK_PROHIBITED)); + } + free (p); + } + return 0; } diff --git a/tests/unilbrk/test-u32-possible-linebreaks.c b/tests/unilbrk/test-u32-possible-linebreaks.c index 1474ae1c27..b41f49cf2d 100644 --- a/tests/unilbrk/test-u32-possible-linebreaks.c +++ b/tests/unilbrk/test-u32-possible-linebreaks.c @@ -95,5 +95,24 @@ main () free (p); } + /* Test line breaking in a string with HTML markup. */ + { + static const uint32_t input[21] = + { + '<', 'P', '>', 'S', 'o', 'm', 'e', ' ', 's', 'e', 'n', 't', + 'e', 'n', 'c', 'e', '.', '<', '/', 'P', '>' + }; + char *p = (char *) malloc (SIZEOF (input)); + size_t i; + + u32_possible_linebreaks (input, SIZEOF (input), "UTF-8", p); + for (i = 0; i < 21; i++) + { + ASSERT (p[i] == (i == 8 || i == 17 || i == 19 ? UC_BREAK_POSSIBLE : + UC_BREAK_PROHIBITED)); + } + free (p); + } + return 0; } diff --git a/tests/unilbrk/test-u8-possible-linebreaks.c b/tests/unilbrk/test-u8-possible-linebreaks.c index c55e900696..b07464c289 100644 --- a/tests/unilbrk/test-u8-possible-linebreaks.c +++ b/tests/unilbrk/test-u8-possible-linebreaks.c @@ -82,12 +82,27 @@ main () size_t i; u8_possible_linebreaks (input, SIZEOF (input), "UTF-8", p); - for (i = 0; i < 4; i++) + for (i = 0; i < 6; i++) { ASSERT (p[i] == (i == 5 ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED)); } free (p); } + /* Test line breaking in a string with HTML markup. */ + { + static const uint8_t input[21] = "

Some sentence.

"; + char *p = (char *) malloc (SIZEOF (input)); + size_t i; + + u8_possible_linebreaks (input, SIZEOF (input), "UTF-8", p); + for (i = 0; i < 21; i++) + { + ASSERT (p[i] == (i == 8 || i == 17 || i == 19 ? UC_BREAK_POSSIBLE : + UC_BREAK_PROHIBITED)); + } + free (p); + } + return 0; } diff --git a/tests/unilbrk/test-ulc-possible-linebreaks.c b/tests/unilbrk/test-ulc-possible-linebreaks.c index 24bae4b717..f3ad7f9222 100644 --- a/tests/unilbrk/test-ulc-possible-linebreaks.c +++ b/tests/unilbrk/test-ulc-possible-linebreaks.c @@ -48,6 +48,21 @@ main () } free (p); } + + /* Test line breaking in a string with HTML markup. */ + { + static const char input[21] = "

Some sentence.

"; + char *p = (char *) malloc (SIZEOF (input)); + size_t i; + + ulc_possible_linebreaks (input, SIZEOF (input), "UTF-8", p); + for (i = 0; i < 21; i++) + { + ASSERT (p[i] == (i == 8 || i == 17 || i == 19 ? UC_BREAK_POSSIBLE : + UC_BREAK_PROHIBITED)); + } + free (p); + } #endif return 0; -- cgit v1.2.1