summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2021-12-29 09:12:34 +0100
committerBruno Haible <bruno@clisp.org>2021-12-29 09:13:12 +0100
commitad814f4ddc420d5a05dc489c7c8086fe29d76ecb (patch)
tree0c6720197123c3d41fde8ada450398917652c3b0 /lib
parenta2c2b549a759952349baaffcaa37caa647d59e71 (diff)
downloadgnulib-ad814f4ddc420d5a05dc489c7c8086fe29d76ecb.tar.gz
unilbrk: Update handling of Hebrew letter + hyphen for Unicode 10.0.0.
* lib/unilbrk/lbrktables.h (LBP_HL_BA): New enum value. * lib/unilbrk/u8-possible-linebreaks.c (u8_possible_linebreaks): Add code for handling break-after character after Hebrew letter. * lib/unilbrk/u16-possible-linebreaks.c (u16_possible_linebreaks): Likewise. * lib/unilbrk/u32-possible-linebreaks.c (u32_possible_linebreaks): Likewise. * tests/unilbrk/test-u8-possible-linebreaks.c (main): Add a test regarding break-after character after Hebrew letter. * tests/unilbrk/test-u16-possible-linebreaks.c (main): Likewise. * tests/unilbrk/test-u32-possible-linebreaks.c (main): Likewise.
Diffstat (limited to 'lib')
-rw-r--r--lib/unilbrk/lbrktables.h4
-rw-r--r--lib/unilbrk/u16-possible-linebreaks.c9
-rw-r--r--lib/unilbrk/u32-possible-linebreaks.c9
-rw-r--r--lib/unilbrk/u8-possible-linebreaks.c9
4 files changed, 27 insertions, 4 deletions
diff --git a/lib/unilbrk/lbrktables.h b/lib/unilbrk/lbrktables.h
index 9c8f0bb745..31ce91ac17 100644
--- a/lib/unilbrk/lbrktables.h
+++ b/lib/unilbrk/lbrktables.h
@@ -72,7 +72,9 @@ enum
LBP_ZWJ = 27, /* zero width joiner */
LBP_EB = 28, /* emoji base */
LBP_EM = 29, /* emoji modifier */
- LBP_XX = 37 /* unknown */
+ LBP_XX = 37, /* unknown */
+ /* Artificial values that exist only at runtime, not in the tables. */
+ LBP_HL_BA = 100
};
#include "lbrkprop1.h"
diff --git a/lib/unilbrk/u16-possible-linebreaks.c b/lib/unilbrk/u16-possible-linebreaks.c
index 7ad88367c7..ee11730450 100644
--- a/lib/unilbrk/u16-possible-linebreaks.c
+++ b/lib/unilbrk/u16-possible-linebreaks.c
@@ -165,6 +165,11 @@ u16_possible_linebreaks (const uint16_t *s, size_t n, const char *encoding, char
? UC_BREAK_POSSIBLE
: UC_BREAK_PROHIBITED);
}
+ else if (prev_prop == LBP_HL_BA)
+ {
+ /* (LB21a) Don't break after Hebrew + Hyphen/Break-After. */
+ *p = UC_BREAK_PROHIBITED;
+ }
else
{
switch (unilbrk_table [last_prop] [prop])
@@ -186,7 +191,9 @@ u16_possible_linebreaks (const uint16_t *s, size_t n, const char *encoding, char
seen_space = NULL;
}
- prev_prop = prop;
+ prev_prop = (prev_prop == LBP_HL && (prop == LBP_HY || prop == LBP_BA)
+ ? LBP_HL_BA
+ : prop);
}
if (prop == LBP_RI)
diff --git a/lib/unilbrk/u32-possible-linebreaks.c b/lib/unilbrk/u32-possible-linebreaks.c
index 0c84ff4f1a..d36e2db882 100644
--- a/lib/unilbrk/u32-possible-linebreaks.c
+++ b/lib/unilbrk/u32-possible-linebreaks.c
@@ -159,6 +159,11 @@ u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char
? UC_BREAK_POSSIBLE
: UC_BREAK_PROHIBITED);
}
+ else if (prev_prop == LBP_HL_BA)
+ {
+ /* (LB21a) Don't break after Hebrew + Hyphen/Break-After. */
+ *p = UC_BREAK_PROHIBITED;
+ }
else
{
switch (unilbrk_table [last_prop] [prop])
@@ -180,7 +185,9 @@ u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char
seen_space = NULL;
}
- prev_prop = prop;
+ prev_prop = (prev_prop == LBP_HL && (prop == LBP_HY || prop == LBP_BA)
+ ? LBP_HL_BA
+ : prop);
}
if (prop == LBP_RI)
diff --git a/lib/unilbrk/u8-possible-linebreaks.c b/lib/unilbrk/u8-possible-linebreaks.c
index b631ddeec6..06b914af91 100644
--- a/lib/unilbrk/u8-possible-linebreaks.c
+++ b/lib/unilbrk/u8-possible-linebreaks.c
@@ -165,6 +165,11 @@ u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, char *
? UC_BREAK_POSSIBLE
: UC_BREAK_PROHIBITED);
}
+ else if (prev_prop == LBP_HL_BA)
+ {
+ /* (LB21a) Don't break after Hebrew + Hyphen/Break-After. */
+ *p = UC_BREAK_PROHIBITED;
+ }
else
{
switch (unilbrk_table [last_prop] [prop])
@@ -186,7 +191,9 @@ u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, char *
seen_space = NULL;
}
- prev_prop = prop;
+ prev_prop = (prev_prop == LBP_HL && (prop == LBP_HY || prop == LBP_BA)
+ ? LBP_HL_BA
+ : prop);
}
if (prop == LBP_RI)