summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>1999-08-06 13:13:05 +0000
committerJarkko Hietaniemi <jhi@iki.fi>1999-08-06 13:13:05 +0000
commit11695a7329b411687637c00af24e82ec54980447 (patch)
treee63083f4155abe3de3d7fa7254b7994586238f3b
parentac88732c4568487a2aa5fbad75ba393744f616ba (diff)
downloadperl-11695a7329b411687637c00af24e82ec54980447.tar.gz
Character class equivalence tables.
p4raw-id: //depot/cfgperl@3930
-rw-r--r--MANIFEST2
-rw-r--r--lib/unicode/Eq/Latin116
-rw-r--r--lib/unicode/Eq/Unicode248
-rwxr-xr-xlib/unicode/mktables.PL79
4 files changed, 344 insertions, 1 deletions
diff --git a/MANIFEST b/MANIFEST
index 0c0c0771f8..ddba85e3c3 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -696,6 +696,8 @@ lib/unicode/Block.pl Unicode character database
lib/unicode/Category.pl Unicode character database
lib/unicode/CombiningClass.pl Unicode character database
lib/unicode/Decomposition.pl Unicode character database
+lib/unicode/Eq/Latin1 Unicode character database
+lib/unicode/Eq/Unicode Unicode character database
lib/unicode/EthiopicSyllables.txt Unicode character database
lib/unicode/In/AlphabeticPresentationForms.pl Unicode character database
lib/unicode/In/Arabic.pl Unicode character database
diff --git a/lib/unicode/Eq/Latin1 b/lib/unicode/Eq/Latin1
new file mode 100644
index 0000000000..b184e3c472
--- /dev/null
+++ b/lib/unicode/Eq/Latin1
@@ -0,0 +1,16 @@
+0041 00C0 00C1 00C2 00C3 00C4 00C5
+0043 00C7
+0045 00C8 00C9 00CA 00CB
+0049 00CC 00CD 00CE 00CF
+004E 00D1
+004F 00D2 00D3 00D4 00D5 00D6 00D8
+0055 00D9 00DA 00DB 00DC
+0059 00DD
+0061 00E0 00E1 00E2 00E3 00E4 00E5
+0063 00E7
+0065 00E8 00E9 00EA 00EB
+0069 00EC 00ED 00EE 00EF
+006E 00F1
+006F 00F2 00F3 00F4 00F5 00F6 00F8
+0075 00F9 00FA 00FB 00FC
+0079 00FD 00FF
diff --git a/lib/unicode/Eq/Unicode b/lib/unicode/Eq/Unicode
new file mode 100644
index 0000000000..6282b3779c
--- /dev/null
+++ b/lib/unicode/Eq/Unicode
@@ -0,0 +1,248 @@
+0041 00C0 00C1 00C2 00C3 00C4 00C5 0100 0102 0104 01CD 01DE 01E0 01FA 0200 0202 1E00 1EA0 1EA2 1EA4 1EA6 1EA8 1EAA 1EAC 1EAE 1EB0 1EB2 1EB4 1EB6
+0042 0181 0182 1E02 1E04 1E06
+0043 00C7 0106 0108 010A 010C 0187 1E08
+0044 010E 0110 018A 018B 01C5 01F2 1E0A 1E0C 1E0E 1E10 1E12
+0045 00C8 00C9 00CA 00CB 0112 0114 0116 0118 011A 0204 0206 1E14 1E16 1E18 1E1A 1E1C 1EB8 1EBA 1EBC 1EBE 1EC0 1EC2 1EC4 1EC6
+0046 0191 1E1E
+0047 011C 011E 0120 0122 0193 01E4 01E6 01F4 1E20
+0048 0124 0126 1E22 1E24 1E26 1E28 1E2A
+0049 00CC 00CD 00CE 00CF 0128 012A 012C 012E 0130 0197 01CF 0208 020A 1E2C 1E2E 1EC8 1ECA
+004A 0134
+004B 0136 0198 01E8 1E30 1E32 1E34
+004C 0139 013B 013D 013F 0141 01C8 1E36 1E38 1E3A 1E3C
+004D 1E3E 1E40 1E42
+004E 00D1 0143 0145 0147 019D 01CB 1E44 1E46 1E48 1E4A
+004F 00D2 00D3 00D4 00D5 00D6 00D8 014C 014E 0150 019F 01A0 01D1 01EA 01EC 01FE 020C 020E 1E4C 1E4E 1E50 1E52 1ECC 1ECE 1ED0 1ED2 1ED4 1ED6 1ED8 1EDA 1EDC 1EDE 1EE0 1EE2
+0050 01A4 1E54 1E56
+0052 0154 0156 0158 0210 0212 1E58 1E5A 1E5C 1E5E
+0053 015A 015C 015E 0160 1E60 1E62 1E64 1E66 1E68
+0054 0162 0164 0166 01AC 01AE 1E6A 1E6C 1E6E 1E70
+0055 00D9 00DA 00DB 00DC 0168 016A 016C 016E 0170 0172 01AF 01D3 01D5 01D7 01D9 01DB 0214 0216 1E72 1E74 1E76 1E78 1E7A 1EE4 1EE6 1EE8 1EEA 1EEC 1EEE 1EF0
+0056 01B2 1E7C 1E7E
+0057 0174 1E80 1E82 1E84 1E86 1E88
+0058 1E8A 1E8C
+0059 00DD 0176 0178 01B3 1E8E 1EF2 1EF4 1EF6 1EF8
+005A 0179 017B 017D 01B5 1E90 1E92 1E94
+0061 00E0 00E1 00E2 00E3 00E4 00E5 0101 0103 0105 01CE 01DF 01E1 01FB 0201 0203 1E01 1E9A 1EA1 1EA3 1EA5 1EA7 1EA9 1EAB 1EAD 1EAF 1EB1 1EB3 1EB5 1EB7
+0062 0180 0183 0253 1E03 1E05 1E07
+0063 00E7 0107 0109 010B 010D 0188 0255 1E09
+0064 010F 0111 018C 0256 0257 1E0B 1E0D 1E0F 1E11 1E13
+0065 00E8 00E9 00EA 00EB 0113 0115 0117 0119 011B 0205 0207 1E15 1E17 1E19 1E1B 1E1D 1EB9 1EBB 1EBD 1EBF 1EC1 1EC3 1EC5 1EC7
+0066 0192 1E1F
+0067 011D 011F 0121 0123 01E5 01E7 01F5 0260 1E21
+0068 0125 0127 0266 1E23 1E25 1E27 1E29 1E2B 1E96
+0069 00EC 00ED 00EE 00EF 0129 012B 012D 012F 01D0 0209 020B 0268 1E2D 1E2F 1EC9 1ECB
+006A 0135 01F0 029D
+006B 0137 0199 01E9 1E31 1E33 1E35
+006C 013A 013C 013E 0140 0142 019A 026B 026C 026D 1E37 1E39 1E3B 1E3D
+006D 0271 1E3F 1E41 1E43
+006E 00F1 0144 0146 0148 019E 0272 0273 1E45 1E47 1E49 1E4B
+006F 00F2 00F3 00F4 00F5 00F6 00F8 014D 014F 0151 01A1 01D2 01EB 01ED 01FF 020D 020F 1E4D 1E4F 1E51 1E53 1ECD 1ECF 1ED1 1ED3 1ED5 1ED7 1ED9 1EDB 1EDD 1EDF 1EE1 1EE3
+0070 01A5 1E55 1E57
+0071 02A0
+0072 0155 0157 0159 0211 0213 027C 027D 027E 1E59 1E5B 1E5D 1E5F
+0073 015B 015D 015F 0161 0282 1E61 1E63 1E65 1E67 1E69
+0074 0163 0165 0167 01AB 01AD 0288 1E6B 1E6D 1E6F 1E71 1E97
+0075 00F9 00FA 00FB 00FC 0169 016B 016D 016F 0171 0173 01B0 01D4 01D6 01D8 01DA 01DC 0215 0217 1E73 1E75 1E77 1E79 1E7B 1EE5 1EE7 1EE9 1EEB 1EED 1EEF 1EF1
+0076 028B 1E7D 1E7F
+0077 0175 1E81 1E83 1E85 1E87 1E89 1E98
+0078 1E8B 1E8D
+0079 00FD 00FF 0177 01B4 1E8F 1E99 1EF3 1EF5 1EF7 1EF9
+007A 017A 017C 017E 01B6 0290 0291 1E91 1E93 1E95
+00C6 01E2 01FC
+00E6 01E3 01FD
+017F 1E9B
+01B7 01EE
+01F1 01C4
+01F3 01C6
+0259 025A
+025C 025D
+026F 0270
+0279 027A 027B
+0283 0286
+0292 01BA 01EF 0293
+02A3 02A5
+0391 0386 1F08 1F09 1F0A 1F0B 1F0C 1F0D 1F0E 1F0F 1F88 1F89 1F8A 1F8B 1F8C 1F8D 1F8E 1F8F 1FB8 1FB9 1FBA 1FBB 1FBC
+0395 0388 1F18 1F19 1F1A 1F1B 1F1C 1F1D 1FC8 1FC9
+0397 0389 1F28 1F29 1F2A 1F2B 1F2C 1F2D 1F2E 1F2F 1F98 1F99 1F9A 1F9B 1F9C 1F9D 1F9E 1F9F 1FCA 1FCB 1FCC
+0399 038A 03AA 1F38 1F39 1F3A 1F3B 1F3C 1F3D 1F3E 1F3F 1FD8 1FD9 1FDA 1FDB
+039F 038C 1F48 1F49 1F4A 1F4B 1F4C 1F4D 1FF8 1FF9
+03A1 1FEC
+03A5 038E 03AB 1F59 1F5B 1F5D 1F5F 1FE8 1FE9 1FEA 1FEB
+03A9 038F 1F68 1F69 1F6A 1F6B 1F6C 1F6D 1F6E 1F6F 1FA8 1FA9 1FAA 1FAB 1FAC 1FAD 1FAE 1FAF 1FFA 1FFB 1FFC
+03B1 03AC 1F00 1F01 1F02 1F03 1F04 1F05 1F06 1F07 1F70 1F71 1F80 1F81 1F82 1F83 1F84 1F85 1F86 1F87 1FB0 1FB1 1FB2 1FB3 1FB4 1FB6 1FB7
+03B5 03AD 1F10 1F11 1F12 1F13 1F14 1F15 1F72 1F73
+03B7 03AE 1F20 1F21 1F22 1F23 1F24 1F25 1F26 1F27 1F74 1F75 1F90 1F91 1F92 1F93 1F94 1F95 1F96 1F97 1FC2 1FC3 1FC4 1FC6 1FC7
+03B9 0390 03AF 03CA 1F30 1F31 1F32 1F33 1F34 1F35 1F36 1F37 1F76 1F77 1FD0 1FD1 1FD2 1FD3 1FD6 1FD7
+03BF 03CC 1F40 1F41 1F42 1F43 1F44 1F45 1F78 1F79
+03C1 1FE4 1FE5
+03C5 03B0 03CB 03CD 1F50 1F51 1F52 1F53 1F54 1F55 1F56 1F57 1F7A 1F7B 1FE0 1FE1 1FE2 1FE3 1FE6 1FE7
+03C9 03CE 1F60 1F61 1F62 1F63 1F64 1F65 1F66 1F67 1F7C 1F7D 1FA0 1FA1 1FA2 1FA3 1FA4 1FA5 1FA6 1FA7 1FF2 1FF3 1FF4 1FF6 1FF7
+0410 04D0 04D2 04D4
+0413 0490 0492 0494 04A4
+0415 04D4 04D6
+0416 0496 04C1 04DC
+0417 0498 04DE
+0418 04E2 04E4
+041A 049A 049C 049E 04C3
+041D 04A2 04A4 04C7
+041E 04E6
+041F 04A6
+0421 04AA
+0422 04AC 04B4
+0423 04EE 04F0 04F2
+0425 04B2
+0426 04B4
+0427 04B6 04B8 04F4
+042B 04F8
+0430 04D1 04D3 04D5
+0433 0491 0493 0495 04A5
+0435 04D5 04D7
+0436 0497 04C2 04DD
+0437 0499 04DF
+0438 04E3 04E5
+043A 049B 049D 049F 04C4
+043D 04A3 04A5 04C8
+043E 04E7
+043F 04A7
+0441 04AB
+0442 04AD 04B5
+0443 04EF 04F1 04F3
+0445 04B3
+0446 04B5
+0447 04B7 04B9 04F5
+044B 04F9
+0460 047C
+0461 047D
+0474 0476
+0475 0477
+04AE 04B0
+04AF 04B1
+04BC 04BE
+04BD 04BF
+04D8 04DA
+04D9 04DB
+04E8 04EA
+04E9 04EB
+0565 0587 FB14
+056B FB15
+056D FB17
+0574 FB13 FB14 FB15 FB17
+0576 FB13 FB16
+057E FB16
+0582 0587
+0627 0622 0623 0625 0672 0673 FD3C FD3D
+0628 FC05 FC06 FC07 FC08 FC09 FC0A FC6A FC6B FC6C FC6D FC6E FC6F FC9C FC9D FC9E FC9F FCA0 FCE1 FCE2 FD9E FDC2
+062A 067C 067D FC0B FC0C FC0D FC0E FC0F FC10 FC70 FC71 FC72 FC73 FC74 FC75 FCA1 FCA2 FCA3 FCA4 FCA5 FCE3 FCE4 FD50 FD51 FD52 FD53 FD54 FD55 FD56 FD57 FD9F FDA0 FDA1 FDA2 FDA3 FDA4
+062B FC11 FC12 FC13 FC14 FC76 FC77 FC78 FC79 FC7A FC7B FCA6 FCE5 FCE6
+062C FC15 FC16 FCA7 FCA8 FD01 FD02 FD1D FD1E FD58 FD59 FDA5 FDA6 FDA7 FDBE
+062D 0681 0682 0685 FC17 FC18 FCA9 FCAA FCFF FD00 FD1B FD1C FD5A FD5B FDBF
+062E FC19 FC1A FC1B FCAB FCAC FD03 FD04 FD1F FD20
+062F 0689 068A 068B 068F 0690
+0630 FC5B
+0631 0692 0693 0694 0695 0696 0697 0699 FC5C
+0633 069A 069B 069C FC1C FC1D FC1E FC1F FCAD FCAE FCAF FCB0 FCE7 FCE8 FCFB FCFC FD0E FD17 FD18 FD2A FD31 FD34 FD35 FD36 FD5C FD5D FD5E FD5F FD60 FD61 FD62 FD63 FDA8 FDC6
+0634 FCE9 FCEA FCFD FCFE FD09 FD0A FD0B FD0C FD0D FD19 FD1A FD25 FD26 FD27 FD28 FD29 FD2D FD2E FD2F FD30 FD32 FD37 FD38 FD39 FD67 FD68 FD69 FD6A FD6B FD6C FD6D FDAA
+0635 069D 069E FC20 FC21 FCB1 FCB2 FCB3 FD05 FD06 FD0F FD21 FD22 FD2B FD64 FD65 FD66 FDA9 FDC5
+0636 FC22 FC23 FC24 FC25 FCB4 FCB5 FCB6 FCB7 FD07 FD08 FD10 FD23 FD24 FD2C FD6E FD6F FD70 FDAB
+0637 069F FC26 FC27 FCB8 FCF5 FCF6 FD11 FD12 FD33 FD3A FD71 FD72 FD73 FD74
+0638 FC28 FCB9 FD3B
+0639 06A0 FC29 FC2A FCBA FCBB FCF7 FCF8 FD13 FD14 FD75 FD76 FD77 FD78 FDB6 FDC4
+063A FC2B FC2C FCBC FCBD FCF9 FCFA FD15 FD16 FD79 FD7A FD7B
+0641 06A2 06A3 06A5 FC2D FC2E FC2F FC30 FC31 FC32 FC7C FC7D FCBE FCBF FCC0 FCC1 FD7C FD7D FDC1
+0642 06A7 06A8 FC33 FC34 FC35 FC36 FC7E FC7F FCC2 FCC3 FD7E FD7F FDB2 FDB4
+0643 06AB 06AC 06AE FC37 FC38 FC39 FC3A FC3B FC3C FC3D FC3E FC80 FC81 FC82 FC83 FC84 FCC4 FCC5 FCC6 FCC7 FCC8 FCEB FCEC FDB7 FDBB FDC3
+0644 06B5 06B6 06B7 FC3F FC40 FC41 FC42 FC43 FC44 FC85 FC86 FC87 FCC9 FCCA FCCB FCCC FCCD FCED FD80 FD81 FD82 FD83 FD84 FD85 FD86 FD87 FD88 FDAC FDAD FDB5 FDBA FDBC FEF5 FEF6 FEF7 FEF8 FEF9 FEFA FEFB FEFC
+0645 FC45 FC46 FC47 FC48 FC49 FC4A FC88 FC89 FCCE FCCF FCD0 FCD1 FD89 FD8A FD8B FD8C FD8D FD8E FD8F FD92 FDB1 FDB9 FDC0
+0646 06BC 06BD FC4B FC4C FC4D FC4E FC4F FC50 FC8A FC8B FC8C FC8D FC8E FC8F FCD2 FCD3 FCD4 FCD5 FCD6 FCEE FCEF FD95 FD96 FD97 FD98 FD99 FD9A FD9B FDB3 FDB8 FDBD FDC7
+0647 06C0 FC51 FC52 FC53 FC54 FCD7 FCD8 FCD9 FD93 FD94
+0648 0624 06C4 06CA
+0649 FC5D FC90
+064A 0626 06CD 06CE 06D1 FBEA FBEB FBEC FBED FBEE FBEF FBF0 FBF1 FBF2 FBF3 FBF4 FBF5 FBF6 FBF7 FBF8 FC00 FC01 FC02 FC03 FC04 FC55 FC56 FC57 FC58 FC59 FC5A FC64 FC65 FC66 FC67 FC68 FC69 FC91 FC92 FC93 FC94 FC95 FC96 FC97 FC98 FC99 FC9A FC9B FCDA FCDB FCDC FCDD FCDE FCDF FCE0 FCF0 FCF1 FD9C FD9D FDAE FDAF FDB0
+06AF 06B0 06B2 06B4
+06C7 0677
+FBD7 FBDD
+FE8D FD3D FE81 FE83 FE87
+FE8E FD3C FE82 FE84 FE88
+FE8F FC05 FC06 FC07 FC08 FC09 FC0A
+FE90 FC6A FC6B FC6C FC6D FC6E FC6F FD9E FDC2
+FE91 FC9C FC9D FC9E FC9F FCA0
+FE92 FCE1 FCE2
+FE95 FC0B FC0C FC0D FC0E FC0F FC10
+FE96 FC70 FC71 FC72 FC73 FC74 FC75 FD51 FD9F FDA0 FDA1 FDA2 FDA3 FDA4
+FE97 FCA1 FCA2 FCA3 FCA4 FCA5 FD50 FD52 FD53 FD54 FD55 FD56 FD57
+FE98 FCE3 FCE4
+FE99 FC11 FC12 FC13 FC14
+FE9A FC76 FC77 FC78 FC79 FC7A FC7B
+FE9B FCA6
+FE9C FCE5 FCE6
+FE9D FC15 FC16 FD01 FD02
+FE9E FD1D FD1E FD58 FDA5 FDA6 FDA7 FDBE
+FE9F FCA7 FCA8 FD59
+FEA1 FC17 FC18 FCFF FD00
+FEA2 FD1B FD1C FD5A FD5B FDBF
+FEA3 FCA9 FCAA
+FEA5 FC19 FC1A FC1B FD03 FD04
+FEA6 FD1F FD20
+FEA7 FCAB FCAC
+FEAB FC5B
+FEAD FC5C
+FEB1 FC1C FC1D FC1E FC1F FCFB FCFC FD0E
+FEB2 FD17 FD18 FD2A FD5E FD5F FD62 FDA8 FDC6
+FEB3 FCAD FCAE FCAF FCB0 FD31 FD5C FD5D FD60 FD61 FD63
+FEB4 FCE7 FCE8 FD34 FD35 FD36
+FEB5 FCFD FCFE FD09 FD0A FD0B FD0C FD0D
+FEB6 FD19 FD1A FD25 FD26 FD27 FD28 FD29 FD67 FD69 FD6A FD6C FDAA
+FEB7 FD2D FD2E FD2F FD30 FD32 FD68 FD6B FD6D
+FEB8 FCE9 FCEA FD37 FD38 FD39
+FEB9 FC20 FC21 FD05 FD06 FD0F
+FEBA FD21 FD22 FD2B FD64 FD66 FDA9
+FEBB FCB1 FCB2 FCB3 FD65 FDC5
+FEBD FC22 FC23 FC24 FC25 FD07 FD08 FD10
+FEBE FD23 FD24 FD2C FD6E FD6F FDAB
+FEBF FCB4 FCB5 FCB6 FCB7 FD70
+FEC1 FC26 FC27 FCF5 FCF6
+FEC2 FD11 FD12 FD71 FD74
+FEC3 FCB8 FD33 FD72 FD73
+FEC4 FD3A
+FEC5 FC28
+FEC7 FCB9
+FEC8 FD3B
+FEC9 FC29 FC2A FCF7 FCF8
+FECA FD13 FD14 FD75 FD76 FD78 FDB6
+FECB FCBA FCBB FD77 FDC4
+FECD FC2B FC2C FCF9 FCFA
+FECE FD15 FD16 FD79 FD7A FD7B
+FECF FCBC FCBD
+FED1 FC2D FC2E FC2F FC30 FC31 FC32
+FED2 FC7C FC7D FD7C FDC1
+FED3 FCBE FCBF FCC0 FCC1 FD7D
+FED5 FC33 FC34 FC35 FC36
+FED6 FC7E FC7F FD7E FD7F FDB2
+FED7 FCC2 FCC3 FDB4
+FED9 FC37 FC38 FC39 FC3A FC3B FC3C FC3D FC3E
+FEDA FC80 FC81 FC82 FC83 FC84 FDB7 FDBB
+FEDB FCC4 FCC5 FCC6 FCC7 FCC8 FDC3
+FEDC FCEB FCEC
+FEDD FC3F FC40 FC41 FC42 FC43 FC44 FEF5 FEF7 FEF9 FEFB
+FEDE FC85 FC86 FC87 FD80 FD81 FD82 FD84 FD85 FD87 FDAC FDAD FDBC FEF6 FEF8 FEFA FEFC
+FEDF FCC9 FCCA FCCB FCCC FCCD FD83 FD86 FD88 FDB5 FDBA
+FEE0 FCED
+FEE1 FC45 FC46 FC47 FC48 FC49 FC4A
+FEE2 FC88 FC89 FD8B FDB1 FDB9 FDC0
+FEE3 FCCE FCCF FCD0 FCD1 FD89 FD8A FD8C FD8D FD8E FD8F FD92
+FEE5 FC4B FC4C FC4D FC4E FC4F FC50
+FEE6 FC8A FC8B FC8C FC8D FC8E FC8F FD96 FD97 FD99 FD9A FD9B FDB3 FDBD FDC7
+FEE7 FCD2 FCD3 FCD4 FCD5 FCD6 FD95 FD98 FDB8
+FEE8 FCEE FCEF
+FEE9 FBA4 FC51 FC52 FC53 FC54
+FEEA FBA5
+FEEB FCD7 FCD8 FCD9 FD93 FD94
+FEED FE85
+FEEE FE86
+FEEF FC5D
+FEF0 FC90
+FEF1 FBEA FBEC FBEE FBF0 FBF2 FBF4 FBF6 FC00 FC01 FC02 FC03 FC04 FC55 FC56 FC57 FC58 FC59 FC5A FE89
+FEF2 FBEB FBED FBEF FBF1 FBF3 FBF5 FBF7 FC64 FC65 FC66 FC67 FC68 FC69 FC91 FC92 FC93 FC94 FC95 FC96 FD9C FDAE FDAF FDB0 FE8A
+FEF3 FBF8 FC97 FC98 FC99 FC9A FC9B FCDA FCDB FCDC FCDD FCDE FD9D FE8B
+FEF4 FCDF FCE0 FCF0 FCF1 FE8C
diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL
index 82d83077d9..184e5ca87b 100755
--- a/lib/unicode/mktables.PL
+++ b/lib/unicode/mktables.PL
@@ -1,10 +1,15 @@
#!../../miniperl
+# Create the equivalence mappings.
+
+$UnicodeData = "UnicodeData-Latest.txt";
+
# Note: we try to keep filenames unique within first 8 chars. Using
# subdirectories for the following helps.
mkdir "In", 0777;
mkdir "Is", 0777;
mkdir "To", 0777;
+mkdir "Eq", 0777;
@todo = (
# typical
@@ -221,7 +226,7 @@ sub proplist {
$split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';
}
else {
- open(UD, "UnicodeData-Latest.txt") or warn "Can't open $table: $!";
+ open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!";
$split = '($code, $name, $cat, $comb, $bid, $decomp, $dec, $dig, $num, $mir, $uni1,
$comment, $up, $down, $title) = split(/;/);';
@@ -298,3 +303,75 @@ END
}
$out;
}
+
+open(UNICODEDATA, $UnicodeData) || die "$0: $UnicodeData: $!\n";
+
+while (<UNICODEDATA>) {
+ ($code, $name) = split /;/;
+
+ $code{$name} = $code;
+ $name{$code} = $name;
+
+ if ($name =~ /^((?:LATIN|GREEK|CYRILLIC|HEBREW|BENGALI) .+? LETTER .+?) WITH /) {
+ push @base, [ $code, $1 ];
+ } elsif ($name =~ /^(ARABIC LETTER \w+?) WITH .+ (\w+ FORM)$/) {
+ push @base, [ $code, "$1 $2" ];
+ } elsif ($name =~ /^(ARABIC LETTER \w+?) WITH /) {
+ push @base, [ $code, $1 ];
+# Is the concept of turning ligatures into character classes sound?
+ } elsif ($name =~ /^(ARABIC) LIGATURE (.+?) (WITH .+ )+(\w+ FORM)$/) {
+ my $script = $1;
+ my $base = $2;
+ my $with = $3;
+ my $form = $4;
+ push @base, [ $code, "$script LETTER $base" ];
+ push @base, [ $code, "$script LETTER $base $form" ];
+ my @with = split(/\bWITH\s+/, $with);
+ shift @with;
+ @with = grep { ! /^ (?:ABOVE|BELOW)/ } @with;
+ foreach my $base (@with) {
+ push @base, [ $code, "$script LETTER $base" ];
+ push @base, [ $code, "$script LETTER $base $form" ];
+ }
+ } elsif ($name =~ /^((?:ARMENIAN|CYRILLIC) .+) LIGATURE (\w+) (\w+)$/) {
+ push @base, [ $code, "$1 LETTER $2" ];
+ push @base, [ $code, "$1 LETTER $3" ];
+# Latin ligatures (ae, oe, ij, ff, fi, fl, ffi, ffl, long st, st) ignored.
+# Hebrew Yiddish ligatures (double vav, vav yod, double yod, yod yod patah,
+# alef lamed) ignored.
+ } else {
+ next;
+ }
+
+}
+
+foreach my $b (@base) {
+ ($code, $base) = @$b;
+ next unless exists $code{$base};
+ push @{$unicode{$code{$base}}}, $code;
+ print "$code: $name{$code} -> $base\n",
+}
+
+@unicode = sort keys %unicode;
+
+if (open(EQ_UNICODE, ">Eq/Unicode")) {
+ foreach my $c (@unicode) {
+ print EQ_UNICODE "$c @{$unicode{$c}}\n";
+ }
+ close EQ_UNICODE;
+} else {
+ die "$0: failed to open Eq/Unicode for writing: $!\n";
+}
+
+if (open(EQ_LATIN1, ">Eq/Latin1")) {
+ foreach my $c (@unicode) {
+ last if hex($c) > 255;
+ my @c = grep { hex($_) <= 255 } @{$unicode{$c}};
+ next unless @c;
+ print EQ_LATIN1 "$c @c\n";
+ }
+ close EQ_LATIN1;
+} else {
+ die "$0: failed to open Eq/Latin1 for writing: $!\n";
+}
+