summaryrefslogtreecommitdiff
path: root/lib/unictype.in.h
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2021-12-30 16:45:39 +0100
committerBruno Haible <bruno@clisp.org>2021-12-30 18:20:55 +0100
commitecbed643ffd4e817a924a645832f73dc6a0abdd0 (patch)
tree878cfb2fcf77bebdf7505194de8b2324f5a05428 /lib/unictype.in.h
parentef4c53b0329bd6ce418bebbbac3fdf8b52aeb2aa (diff)
downloadgnulib-ecbed643ffd4e817a924a645832f73dc6a0abdd0.tar.gz
Update to Unicode 11.0.0.
* lib/gen-uni-tables.c (is_property_default_ignorable_code_point): Simplify by use of PROP_PREPENDED_CONCATENATION_MARK. (UC_JOINING_GROUP_HANIFI_ROHINGYA_PA, UC_JOINING_GROUP_HANIFI_ROHINGYA_KINNA_YA): New enum values. (fill_arabicshaping, joining_group_as_c_identifier): Recognize these joining groups. (get_lbp): Update such that unilbrk/lbrkprop.txt comes out as expected. (WBP_EB, WBP_EM, WBP_GAZ, WBP_EBG): Remove enum values. (WBP_WSS): New enum value. (get_wbp): Update such that uniwbrk/wbrkprop.txt comes out as expected. (debug_output_wbp, fill_org_wbp, debug_output_org_wbp, output_wbp): Update for changed enum values. * lib/unictype.in.h (UC_JOINING_GROUP_HANIFI_ROHINGYA_*): New enum values. * lib/unictype/joininggroup_name.h: Add the HANIFI_ROHINGYA_* joining groups. * lib/unictype/joininggroup_byname.gperf: Likewise. * lib/unigbrk.in.h: Mark 4 enum values as obsolete. * lib/unigbrk/u-grapheme-breaks.h (FUNC): Handle emoji modifier sequence according to Unicode 11.0.0. * lib/unigbrk/u8-grapheme-breaks.c: Include <stdbool.h>, unictype.h. * lib/unigbrk/u16-grapheme-breaks.c: Likewise. * lib/unigbrk/u32-grapheme-breaks.c: Likewise. * lib/unigbrk/uc-grapheme-breaks.c: Likewise. * modules/unigbrk/u8-grapheme-breaks (Depends-on): Add unictype/property-extended-pictographic, stdbool. * modules/unigbrk/u16-grapheme-breaks (Depends-on): Likewise. * modules/unigbrk/u32-grapheme-breaks (Depends-on): Likewise. * modules/unigbrk/uc-grapheme-breaks (Depends-on): Likewise. * tests/unigbrk/test-u8-grapheme-breaks.c (main): Add test for emoji modifier / ZWJ sequence. * tests/unigbrk/test-u16-grapheme-breaks.c (main): Likewise. * tests/unigbrk/test-u32-grapheme-breaks.c (main): Likewise. * tests/unigbrk/test-uc-is-grapheme-break.c: Include <stdbool.h>, unictype.h. (main): Update workaround logic to match the one in lib/unigbrk/u-grapheme-breaks.h. * modules/unigbrk/uc-is-grapheme-break-tests (Depends-on): Add unictype/property-extended-pictographic, stdbool. * lib/uniwbrk.in.h: Mark 4 enum values as obsolete. (WBP_WSS): New enum value. * lib/uniwbrk/u-wordbreaks.h (FUNC): Handle emoji ZWJ sequences and horizontal whitespace according to Unicode 11.0.0. * lib/uniwbrk/u8-wordbreaks.c: Include unictype.h. * lib/uniwbrk/u16-wordbreaks.c: Likewise. * lib/uniwbrk/u32-wordbreaks.c: Likewise. * lib/uniwbrk/wbrktable.c (uniwbrk_prop_index, uniwbrk_table): Add a row and column for WBP_WSS. * lib/uniwbrk/wbrktable.h (uniwbrk_prop_index, uniwbrk_table): Update declarations. * modules/uniwbrk/u8-wordbreaks (Depends-on): Add unictype/property-extended-pictographic. * modules/uniwbrk/u16-wordbreaks (Depends-on): Likewise. * modules/uniwbrk/u32-wordbreaks (Depends-on): Likewise. * tests/uniwbrk/test-u8-wordbreaks.c (main): Update expected results. * tests/uniwbrk/test-u16-wordbreaks.c (main): Likewise. * tests/uniwbrk/test-u32-wordbreaks.c (main): Likewise. * tests/uniwbrk/test-uc-wordbreaks.c (wordbreakproperty_to_string): Update. * lib/unilbrk/u8-possible-linebreaks.c (u8_possible_linebreaks_loop): Handle ZWJ according to Unicode 11.0.0. * lib/unilbrk/u16-possible-linebreaks.c (u16_possible_linebreaks_loop): Likewise. * lib/unilbrk/u32-possible-linebreaks.c (u32_possible_linebreaks_loop): Likewise. * lib/uniwidth/width.c (nonspacing_table_data, nonspacing_table_ind): Update. (uc_width): Assign width 2 to the characters 0x187ED..0x187F1, 0x1F6F9, 0x1F9E7..0x1F9FF. * tests/uniwidth/test-uc_width2.sh: Expect width 0 for the characters 0x07FD, 0x08D3, 0x09FE, 0x0C04, 0xA8FF, 0x10D24..0x10D27, 0x10F46..0x10F50, 0x110CD, 0x111C9, 0x1133B, 0x1145E, 0x1182F..0x11837, 0x11839..0x1183A, 0x11D90..0x11D91, 0x11D95, 0x11D97, 0x11EF3..0x11EF4. Expect width 2 for the characters 0x187ED..0x187F1, 0x1F6F9, 0x1F9E7..0x1F9FF. * All generated files under lib/uni* and tests/uni*: Regenerate. * tests/uniname/NameAliases.txt: Update. * tests/uniname/UnicodeData.txt: Update. * tests/uninorm/NormalizationTest.txt: Update. * tests/unigbrk/GraphemeBreakTest.txt: Update. * tests/uniwbrk/WordBreakTest.txt: Update. * All the affected modules: Bump required libunistring version.
Diffstat (limited to 'lib/unictype.in.h')
-rw-r--r--lib/unictype.in.h202
1 files changed, 102 insertions, 100 deletions
diff --git a/lib/unictype.in.h b/lib/unictype.in.h
index cb4902b5f6..8e63e070e7 100644
--- a/lib/unictype.in.h
+++ b/lib/unictype.in.h
@@ -466,106 +466,108 @@ extern int
This enumeration may be extended in the future. */
enum
{
- UC_JOINING_GROUP_NONE, /* No_Joining_Group */
- UC_JOINING_GROUP_AIN, /* Ain */
- UC_JOINING_GROUP_ALAPH, /* Alaph */
- UC_JOINING_GROUP_ALEF, /* Alef */
- UC_JOINING_GROUP_BEH, /* Beh */
- UC_JOINING_GROUP_BETH, /* Beth */
- UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE, /* Burushaski_Yeh_Barree */
- UC_JOINING_GROUP_DAL, /* Dal */
- UC_JOINING_GROUP_DALATH_RISH, /* Dalath_Rish */
- UC_JOINING_GROUP_E, /* E */
- UC_JOINING_GROUP_FARSI_YEH, /* Farsi_Yeh */
- UC_JOINING_GROUP_FE, /* Fe */
- UC_JOINING_GROUP_FEH, /* Feh */
- UC_JOINING_GROUP_FINAL_SEMKATH, /* Final_Semkath */
- UC_JOINING_GROUP_GAF, /* Gaf */
- UC_JOINING_GROUP_GAMAL, /* Gamal */
- UC_JOINING_GROUP_HAH, /* Hah */
- UC_JOINING_GROUP_HE, /* He */
- UC_JOINING_GROUP_HEH, /* Heh */
- UC_JOINING_GROUP_HEH_GOAL, /* Heh_Goal */
- UC_JOINING_GROUP_HETH, /* Heth */
- UC_JOINING_GROUP_KAF, /* Kaf */
- UC_JOINING_GROUP_KAPH, /* Kaph */
- UC_JOINING_GROUP_KHAPH, /* Khaph */
- UC_JOINING_GROUP_KNOTTED_HEH, /* Knotted_Heh */
- UC_JOINING_GROUP_LAM, /* Lam */
- UC_JOINING_GROUP_LAMADH, /* Lamadh */
- UC_JOINING_GROUP_MEEM, /* Meem */
- UC_JOINING_GROUP_MIM, /* Mim */
- UC_JOINING_GROUP_NOON, /* Noon */
- UC_JOINING_GROUP_NUN, /* Nun */
- UC_JOINING_GROUP_NYA, /* Nya */
- UC_JOINING_GROUP_PE, /* Pe */
- UC_JOINING_GROUP_QAF, /* Qaf */
- UC_JOINING_GROUP_QAPH, /* Qaph */
- UC_JOINING_GROUP_REH, /* Reh */
- UC_JOINING_GROUP_REVERSED_PE, /* Reversed_Pe */
- UC_JOINING_GROUP_SAD, /* Sad */
- UC_JOINING_GROUP_SADHE, /* Sadhe */
- UC_JOINING_GROUP_SEEN, /* Seen */
- UC_JOINING_GROUP_SEMKATH, /* Semkath */
- UC_JOINING_GROUP_SHIN, /* Shin */
- UC_JOINING_GROUP_SWASH_KAF, /* Swash_Kaf */
- UC_JOINING_GROUP_SYRIAC_WAW, /* Syriac_Waw */
- UC_JOINING_GROUP_TAH, /* Tah */
- UC_JOINING_GROUP_TAW, /* Taw */
- UC_JOINING_GROUP_TEH_MARBUTA, /* Teh_Marbuta */
- UC_JOINING_GROUP_TEH_MARBUTA_GOAL, /* Teh_Marbuta_Goal */
- UC_JOINING_GROUP_TETH, /* Teth */
- UC_JOINING_GROUP_WAW, /* Waw */
- UC_JOINING_GROUP_YEH, /* Yeh */
- UC_JOINING_GROUP_YEH_BARREE, /* Yeh_Barree */
- UC_JOINING_GROUP_YEH_WITH_TAIL, /* Yeh_With_Tail */
- UC_JOINING_GROUP_YUDH, /* Yudh */
- UC_JOINING_GROUP_YUDH_HE, /* Yudh_He */
- UC_JOINING_GROUP_ZAIN, /* Zain */
- UC_JOINING_GROUP_ZHAIN, /* Zhain */
- UC_JOINING_GROUP_ROHINGYA_YEH, /* Rohingya_Yeh */
- UC_JOINING_GROUP_STRAIGHT_WAW, /* Straight_Waw */
- UC_JOINING_GROUP_MANICHAEAN_ALEPH, /* Manichaean_Aleph */
- UC_JOINING_GROUP_MANICHAEAN_BETH, /* Manichaean_Beth */
- UC_JOINING_GROUP_MANICHAEAN_GIMEL, /* Manichaean_Gimel */
- UC_JOINING_GROUP_MANICHAEAN_DALETH, /* Manichaean_Daleth */
- UC_JOINING_GROUP_MANICHAEAN_WAW, /* Manichaean_Waw */
- UC_JOINING_GROUP_MANICHAEAN_ZAYIN, /* Manichaean_Zayin */
- UC_JOINING_GROUP_MANICHAEAN_HETH, /* Manichaean_Heth */
- UC_JOINING_GROUP_MANICHAEAN_TETH, /* Manichaean_Teth */
- UC_JOINING_GROUP_MANICHAEAN_YODH, /* Manichaean_Yodh */
- UC_JOINING_GROUP_MANICHAEAN_KAPH, /* Manichaean_Kaph */
- UC_JOINING_GROUP_MANICHAEAN_LAMEDH, /* Manichaean_Lamedh */
- UC_JOINING_GROUP_MANICHAEAN_DHAMEDH, /* Manichaean_Dhamedh */
- UC_JOINING_GROUP_MANICHAEAN_THAMEDH, /* Manichaean_Thamedh */
- UC_JOINING_GROUP_MANICHAEAN_MEM, /* Manichaean_Mem */
- UC_JOINING_GROUP_MANICHAEAN_NUN, /* Manichaean_Nun */
- UC_JOINING_GROUP_MANICHAEAN_SAMEKH, /* Manichaean_Aleph */
- UC_JOINING_GROUP_MANICHAEAN_AYIN, /* Manichaean_Ayin */
- UC_JOINING_GROUP_MANICHAEAN_PE, /* Manichaean_Pe */
- UC_JOINING_GROUP_MANICHAEAN_SADHE, /* Manichaean_Sadhe */
- UC_JOINING_GROUP_MANICHAEAN_QOPH, /* Manichaean_Qoph */
- UC_JOINING_GROUP_MANICHAEAN_RESH, /* Manichaean_Resh */
- UC_JOINING_GROUP_MANICHAEAN_TAW, /* Manichaean_Taw */
- UC_JOINING_GROUP_MANICHAEAN_ONE, /* Manichaean_One */
- UC_JOINING_GROUP_MANICHAEAN_FIVE, /* Manichaean_Five */
- UC_JOINING_GROUP_MANICHAEAN_TEN, /* Manichaean_Ten */
- UC_JOINING_GROUP_MANICHAEAN_TWENTY, /* Manichaean_Twenty */
- UC_JOINING_GROUP_MANICHAEAN_HUNDRED, /* Manichaean_Hundred */
- UC_JOINING_GROUP_AFRICAN_FEH, /* African_Feh */
- UC_JOINING_GROUP_AFRICAN_QAF, /* African_Qaf */
- UC_JOINING_GROUP_AFRICAN_NOON, /* African_Noon */
- UC_JOINING_GROUP_MALAYALAM_NGA, /* Malayalam_Nga */
- UC_JOINING_GROUP_MALAYALAM_JA, /* Malayalam_Ja */
- UC_JOINING_GROUP_MALAYALAM_NYA, /* Malayalam_Nya */
- UC_JOINING_GROUP_MALAYALAM_TTA, /* Malayalam_Tta */
- UC_JOINING_GROUP_MALAYALAM_NNA, /* Malayalam_Nna */
- UC_JOINING_GROUP_MALAYALAM_NNNA, /* Malayalam_Nnna */
- UC_JOINING_GROUP_MALAYALAM_BHA, /* Malayalam_Bha */
- UC_JOINING_GROUP_MALAYALAM_RA, /* Malayalam_Ra */
- UC_JOINING_GROUP_MALAYALAM_LLA, /* Malayalam_Lla */
- UC_JOINING_GROUP_MALAYALAM_LLLA, /* Malayalam_Llla */
- UC_JOINING_GROUP_MALAYALAM_SSA /* Malayalam_Ssa */
+ UC_JOINING_GROUP_NONE, /* No_Joining_Group */
+ UC_JOINING_GROUP_AIN, /* Ain */
+ UC_JOINING_GROUP_ALAPH, /* Alaph */
+ UC_JOINING_GROUP_ALEF, /* Alef */
+ UC_JOINING_GROUP_BEH, /* Beh */
+ UC_JOINING_GROUP_BETH, /* Beth */
+ UC_JOINING_GROUP_BURUSHASKI_YEH_BARREE, /* Burushaski_Yeh_Barree */
+ UC_JOINING_GROUP_DAL, /* Dal */
+ UC_JOINING_GROUP_DALATH_RISH, /* Dalath_Rish */
+ UC_JOINING_GROUP_E, /* E */
+ UC_JOINING_GROUP_FARSI_YEH, /* Farsi_Yeh */
+ UC_JOINING_GROUP_FE, /* Fe */
+ UC_JOINING_GROUP_FEH, /* Feh */
+ UC_JOINING_GROUP_FINAL_SEMKATH, /* Final_Semkath */
+ UC_JOINING_GROUP_GAF, /* Gaf */
+ UC_JOINING_GROUP_GAMAL, /* Gamal */
+ UC_JOINING_GROUP_HAH, /* Hah */
+ UC_JOINING_GROUP_HE, /* He */
+ UC_JOINING_GROUP_HEH, /* Heh */
+ UC_JOINING_GROUP_HEH_GOAL, /* Heh_Goal */
+ UC_JOINING_GROUP_HETH, /* Heth */
+ UC_JOINING_GROUP_KAF, /* Kaf */
+ UC_JOINING_GROUP_KAPH, /* Kaph */
+ UC_JOINING_GROUP_KHAPH, /* Khaph */
+ UC_JOINING_GROUP_KNOTTED_HEH, /* Knotted_Heh */
+ UC_JOINING_GROUP_LAM, /* Lam */
+ UC_JOINING_GROUP_LAMADH, /* Lamadh */
+ UC_JOINING_GROUP_MEEM, /* Meem */
+ UC_JOINING_GROUP_MIM, /* Mim */
+ UC_JOINING_GROUP_NOON, /* Noon */
+ UC_JOINING_GROUP_NUN, /* Nun */
+ UC_JOINING_GROUP_NYA, /* Nya */
+ UC_JOINING_GROUP_PE, /* Pe */
+ UC_JOINING_GROUP_QAF, /* Qaf */
+ UC_JOINING_GROUP_QAPH, /* Qaph */
+ UC_JOINING_GROUP_REH, /* Reh */
+ UC_JOINING_GROUP_REVERSED_PE, /* Reversed_Pe */
+ UC_JOINING_GROUP_SAD, /* Sad */
+ UC_JOINING_GROUP_SADHE, /* Sadhe */
+ UC_JOINING_GROUP_SEEN, /* Seen */
+ UC_JOINING_GROUP_SEMKATH, /* Semkath */
+ UC_JOINING_GROUP_SHIN, /* Shin */
+ UC_JOINING_GROUP_SWASH_KAF, /* Swash_Kaf */
+ UC_JOINING_GROUP_SYRIAC_WAW, /* Syriac_Waw */
+ UC_JOINING_GROUP_TAH, /* Tah */
+ UC_JOINING_GROUP_TAW, /* Taw */
+ UC_JOINING_GROUP_TEH_MARBUTA, /* Teh_Marbuta */
+ UC_JOINING_GROUP_TEH_MARBUTA_GOAL, /* Teh_Marbuta_Goal */
+ UC_JOINING_GROUP_TETH, /* Teth */
+ UC_JOINING_GROUP_WAW, /* Waw */
+ UC_JOINING_GROUP_YEH, /* Yeh */
+ UC_JOINING_GROUP_YEH_BARREE, /* Yeh_Barree */
+ UC_JOINING_GROUP_YEH_WITH_TAIL, /* Yeh_With_Tail */
+ UC_JOINING_GROUP_YUDH, /* Yudh */
+ UC_JOINING_GROUP_YUDH_HE, /* Yudh_He */
+ UC_JOINING_GROUP_ZAIN, /* Zain */
+ UC_JOINING_GROUP_ZHAIN, /* Zhain */
+ UC_JOINING_GROUP_ROHINGYA_YEH, /* Rohingya_Yeh */
+ UC_JOINING_GROUP_STRAIGHT_WAW, /* Straight_Waw */
+ UC_JOINING_GROUP_MANICHAEAN_ALEPH, /* Manichaean_Aleph */
+ UC_JOINING_GROUP_MANICHAEAN_BETH, /* Manichaean_Beth */
+ UC_JOINING_GROUP_MANICHAEAN_GIMEL, /* Manichaean_Gimel */
+ UC_JOINING_GROUP_MANICHAEAN_DALETH, /* Manichaean_Daleth */
+ UC_JOINING_GROUP_MANICHAEAN_WAW, /* Manichaean_Waw */
+ UC_JOINING_GROUP_MANICHAEAN_ZAYIN, /* Manichaean_Zayin */
+ UC_JOINING_GROUP_MANICHAEAN_HETH, /* Manichaean_Heth */
+ UC_JOINING_GROUP_MANICHAEAN_TETH, /* Manichaean_Teth */
+ UC_JOINING_GROUP_MANICHAEAN_YODH, /* Manichaean_Yodh */
+ UC_JOINING_GROUP_MANICHAEAN_KAPH, /* Manichaean_Kaph */
+ UC_JOINING_GROUP_MANICHAEAN_LAMEDH, /* Manichaean_Lamedh */
+ UC_JOINING_GROUP_MANICHAEAN_DHAMEDH, /* Manichaean_Dhamedh */
+ UC_JOINING_GROUP_MANICHAEAN_THAMEDH, /* Manichaean_Thamedh */
+ UC_JOINING_GROUP_MANICHAEAN_MEM, /* Manichaean_Mem */
+ UC_JOINING_GROUP_MANICHAEAN_NUN, /* Manichaean_Nun */
+ UC_JOINING_GROUP_MANICHAEAN_SAMEKH, /* Manichaean_Aleph */
+ UC_JOINING_GROUP_MANICHAEAN_AYIN, /* Manichaean_Ayin */
+ UC_JOINING_GROUP_MANICHAEAN_PE, /* Manichaean_Pe */
+ UC_JOINING_GROUP_MANICHAEAN_SADHE, /* Manichaean_Sadhe */
+ UC_JOINING_GROUP_MANICHAEAN_QOPH, /* Manichaean_Qoph */
+ UC_JOINING_GROUP_MANICHAEAN_RESH, /* Manichaean_Resh */
+ UC_JOINING_GROUP_MANICHAEAN_TAW, /* Manichaean_Taw */
+ UC_JOINING_GROUP_MANICHAEAN_ONE, /* Manichaean_One */
+ UC_JOINING_GROUP_MANICHAEAN_FIVE, /* Manichaean_Five */
+ UC_JOINING_GROUP_MANICHAEAN_TEN, /* Manichaean_Ten */
+ UC_JOINING_GROUP_MANICHAEAN_TWENTY, /* Manichaean_Twenty */
+ UC_JOINING_GROUP_MANICHAEAN_HUNDRED, /* Manichaean_Hundred */
+ UC_JOINING_GROUP_AFRICAN_FEH, /* African_Feh */
+ UC_JOINING_GROUP_AFRICAN_QAF, /* African_Qaf */
+ UC_JOINING_GROUP_AFRICAN_NOON, /* African_Noon */
+ UC_JOINING_GROUP_MALAYALAM_NGA, /* Malayalam_Nga */
+ UC_JOINING_GROUP_MALAYALAM_JA, /* Malayalam_Ja */
+ UC_JOINING_GROUP_MALAYALAM_NYA, /* Malayalam_Nya */
+ UC_JOINING_GROUP_MALAYALAM_TTA, /* Malayalam_Tta */
+ UC_JOINING_GROUP_MALAYALAM_NNA, /* Malayalam_Nna */
+ UC_JOINING_GROUP_MALAYALAM_NNNA, /* Malayalam_Nnna */
+ UC_JOINING_GROUP_MALAYALAM_BHA, /* Malayalam_Bha */
+ UC_JOINING_GROUP_MALAYALAM_RA, /* Malayalam_Ra */
+ UC_JOINING_GROUP_MALAYALAM_LLA, /* Malayalam_Lla */
+ UC_JOINING_GROUP_MALAYALAM_LLLA, /* Malayalam_Llla */
+ UC_JOINING_GROUP_MALAYALAM_SSA, /* Malayalam_Ssa */
+ UC_JOINING_GROUP_HANIFI_ROHINGYA_PA, /* Hanifi_Rohingya_Pa */
+ UC_JOINING_GROUP_HANIFI_ROHINGYA_KINNA_YA /* Hanifi_Rohingya_Kinna_Ya */
};
/* Return the name of a joining group. */