diff options
author | Bruno Haible <bruno@clisp.org> | 2021-12-29 09:12:34 +0100 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2021-12-29 09:13:12 +0100 |
commit | ad814f4ddc420d5a05dc489c7c8086fe29d76ecb (patch) | |
tree | 0c6720197123c3d41fde8ada450398917652c3b0 /lib | |
parent | a2c2b549a759952349baaffcaa37caa647d59e71 (diff) | |
download | gnulib-ad814f4ddc420d5a05dc489c7c8086fe29d76ecb.tar.gz |
unilbrk: Update handling of Hebrew letter + hyphen for Unicode 10.0.0.
* lib/unilbrk/lbrktables.h (LBP_HL_BA): New enum value.
* lib/unilbrk/u8-possible-linebreaks.c (u8_possible_linebreaks): Add
code for handling break-after character after Hebrew letter.
* lib/unilbrk/u16-possible-linebreaks.c (u16_possible_linebreaks):
Likewise.
* lib/unilbrk/u32-possible-linebreaks.c (u32_possible_linebreaks):
Likewise.
* tests/unilbrk/test-u8-possible-linebreaks.c (main): Add a test
regarding break-after character after Hebrew letter.
* tests/unilbrk/test-u16-possible-linebreaks.c (main): Likewise.
* tests/unilbrk/test-u32-possible-linebreaks.c (main): Likewise.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/unilbrk/lbrktables.h | 4 | ||||
-rw-r--r-- | lib/unilbrk/u16-possible-linebreaks.c | 9 | ||||
-rw-r--r-- | lib/unilbrk/u32-possible-linebreaks.c | 9 | ||||
-rw-r--r-- | lib/unilbrk/u8-possible-linebreaks.c | 9 |
4 files changed, 27 insertions, 4 deletions
diff --git a/lib/unilbrk/lbrktables.h b/lib/unilbrk/lbrktables.h index 9c8f0bb745..31ce91ac17 100644 --- a/lib/unilbrk/lbrktables.h +++ b/lib/unilbrk/lbrktables.h @@ -72,7 +72,9 @@ enum LBP_ZWJ = 27, /* zero width joiner */ LBP_EB = 28, /* emoji base */ LBP_EM = 29, /* emoji modifier */ - LBP_XX = 37 /* unknown */ + LBP_XX = 37, /* unknown */ + /* Artificial values that exist only at runtime, not in the tables. */ + LBP_HL_BA = 100 }; #include "lbrkprop1.h" diff --git a/lib/unilbrk/u16-possible-linebreaks.c b/lib/unilbrk/u16-possible-linebreaks.c index 7ad88367c7..ee11730450 100644 --- a/lib/unilbrk/u16-possible-linebreaks.c +++ b/lib/unilbrk/u16-possible-linebreaks.c @@ -165,6 +165,11 @@ u16_possible_linebreaks (const uint16_t *s, size_t n, const char *encoding, char ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED); } + else if (prev_prop == LBP_HL_BA) + { + /* (LB21a) Don't break after Hebrew + Hyphen/Break-After. */ + *p = UC_BREAK_PROHIBITED; + } else { switch (unilbrk_table [last_prop] [prop]) @@ -186,7 +191,9 @@ u16_possible_linebreaks (const uint16_t *s, size_t n, const char *encoding, char seen_space = NULL; } - prev_prop = prop; + prev_prop = (prev_prop == LBP_HL && (prop == LBP_HY || prop == LBP_BA) + ? LBP_HL_BA + : prop); } if (prop == LBP_RI) diff --git a/lib/unilbrk/u32-possible-linebreaks.c b/lib/unilbrk/u32-possible-linebreaks.c index 0c84ff4f1a..d36e2db882 100644 --- a/lib/unilbrk/u32-possible-linebreaks.c +++ b/lib/unilbrk/u32-possible-linebreaks.c @@ -159,6 +159,11 @@ u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED); } + else if (prev_prop == LBP_HL_BA) + { + /* (LB21a) Don't break after Hebrew + Hyphen/Break-After. */ + *p = UC_BREAK_PROHIBITED; + } else { switch (unilbrk_table [last_prop] [prop]) @@ -180,7 +185,9 @@ u32_possible_linebreaks (const uint32_t *s, size_t n, const char *encoding, char seen_space = NULL; } - prev_prop = prop; + prev_prop = (prev_prop == LBP_HL && (prop == LBP_HY || prop == LBP_BA) + ? LBP_HL_BA + : prop); } if (prop == LBP_RI) diff --git a/lib/unilbrk/u8-possible-linebreaks.c b/lib/unilbrk/u8-possible-linebreaks.c index b631ddeec6..06b914af91 100644 --- a/lib/unilbrk/u8-possible-linebreaks.c +++ b/lib/unilbrk/u8-possible-linebreaks.c @@ -165,6 +165,11 @@ u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, char * ? UC_BREAK_POSSIBLE : UC_BREAK_PROHIBITED); } + else if (prev_prop == LBP_HL_BA) + { + /* (LB21a) Don't break after Hebrew + Hyphen/Break-After. */ + *p = UC_BREAK_PROHIBITED; + } else { switch (unilbrk_table [last_prop] [prop]) @@ -186,7 +191,9 @@ u8_possible_linebreaks (const uint8_t *s, size_t n, const char *encoding, char * seen_space = NULL; } - prev_prop = prop; + prev_prop = (prev_prop == LBP_HL && (prop == LBP_HY || prop == LBP_BA) + ? LBP_HL_BA + : prop); } if (prop == LBP_RI) |