diff options
-rw-r--r-- | MANIFEST | 3 | ||||
-rw-r--r-- | lib/unicode/EthiopicSyllables.txt | 314 | ||||
-rwxr-xr-x | lib/unicode/MakeEthiopicSyllables.PL | 63 | ||||
-rwxr-xr-x | lib/unicode/mktables.PL | 20 | ||||
-rw-r--r-- | lib/unicode/syllables.txt | 426 |
5 files changed, 447 insertions, 379 deletions
@@ -698,7 +698,6 @@ lib/unicode/CombiningClass.pl Unicode character database lib/unicode/Decomposition.pl Unicode character database lib/unicode/Eq/Latin1 Unicode character database lib/unicode/Eq/Unicode Unicode character database -lib/unicode/EthiopicSyllables.txt Unicode character database lib/unicode/In/AlphabeticPresentationForms.pl Unicode character database lib/unicode/In/Arabic.pl Unicode character database lib/unicode/In/ArabicPresentationForms-A.pl Unicode character database @@ -841,7 +840,6 @@ lib/unicode/Is/Zl.pl Unicode character database lib/unicode/Is/Zp.pl Unicode character database lib/unicode/Is/Zs.pl Unicode character database lib/unicode/JamoShort.pl Unicode character database -lib/unicode/MakeEthiopicSyllables.PL Unicode character database lib/unicode/Makefile Unicode character database lib/unicode/Name.pl Unicode character database lib/unicode/Number.pl Unicode character database @@ -859,6 +857,7 @@ lib/unicode/mktables.PL Unicode character database generator lib/unicode/names2.txt Unicode character database lib/unicode/props2.txt Unicode character database lib/unicode/readme.txt Unicode character database info +lib/unicode/syllables.txt Unicode character database lib/utf8.pm Pragma to control Unicode support lib/utf8_heavy.pl Support routines for utf8 pragma lib/validate.pl Perl library supporting wholesale file mode validation diff --git a/lib/unicode/EthiopicSyllables.txt b/lib/unicode/EthiopicSyllables.txt deleted file mode 100644 index 6d807b6052..0000000000 --- a/lib/unicode/EthiopicSyllables.txt +++ /dev/null @@ -1,314 +0,0 @@ -1200 0 -1201 1 -1202 2 -1203 3 -1204 4 -1205 5 -1206 6 -1208 0 -1209 1 -120a 2 -120b 3 -120c 4 -120d 5 -120e 6 -120f 11 -1210 0 -1211 1 -1212 2 -1213 3 -1214 4 -1215 5 -1216 6 -1217 11 -1218 0 -1219 1 -121a 2 -121b 3 -121c 4 -121d 5 -121e 6 -121f 11 -1220 0 -1221 1 -1222 2 -1223 3 -1224 4 -1225 5 -1226 6 -1227 11 -1228 0 -1229 1 -122a 2 -122b 3 -122c 4 -122d 5 -122e 6 -122f 11 -1230 0 -1231 1 -1232 2 -1233 3 -1234 4 -1235 5 -1236 6 -1237 11 -1238 0 -1239 1 -123a 2 -123b 3 -123c 4 -123d 5 -123e 6 -123f 11 -1240 0 -1241 1 -1242 2 -1243 3 -1244 4 -1245 5 -1246 6 -1248 8 -124a 10 -124b 11 -124c 12 -124d 13 -1250 0 -1251 1 -1252 2 -1253 3 -1254 4 -1255 5 -1256 6 -1258 8 -125a 10 -125b 11 -125c 12 -125d 13 -1260 0 -1261 1 -1262 2 -1263 3 -1264 4 -1265 5 -1266 6 -1267 11 -1268 0 -1269 1 -126a 2 -126b 3 -126c 4 -126d 5 -126e 6 -126f 11 -1270 0 -1271 1 -1272 2 -1273 3 -1274 4 -1275 5 -1276 6 -1277 11 -1278 0 -1279 1 -127a 2 -127b 3 -127c 4 -127d 5 -127e 6 -127f 11 -1280 0 -1281 1 -1282 2 -1283 3 -1284 4 -1285 5 -1286 6 -1288 8 -128a 10 -128b 11 -128c 12 -128d 13 -1290 0 -1291 1 -1292 2 -1293 3 -1294 4 -1295 5 -1296 6 -1297 11 -1298 0 -1299 1 -129a 2 -129b 3 -129c 4 -129d 5 -129e 6 -129f 11 -12a0 0 -12a1 1 -12a2 2 -12a3 3 -12a4 4 -12a5 5 -12a6 6 -12a7 11 -12a8 0 -12a9 1 -12aa 2 -12ab 3 -12ac 4 -12ad 5 -12ae 6 -12b0 8 -12b2 10 -12b3 11 -12b4 12 -12b5 13 -12b8 0 -12b9 1 -12ba 2 -12bb 3 -12bc 4 -12bd 5 -12be 6 -12c0 8 -12c2 10 -12c3 11 -12c4 12 -12c5 13 -12c8 0 -12c9 1 -12ca 2 -12cb 3 -12cc 4 -12cd 5 -12ce 6 -12d0 0 -12d1 1 -12d2 2 -12d3 3 -12d4 4 -12d5 5 -12d6 6 -12d8 0 -12d9 1 -12da 2 -12db 3 -12dc 4 -12dd 5 -12de 6 -12df 11 -12e0 0 -12e1 1 -12e2 2 -12e3 3 -12e4 4 -12e5 5 -12e6 6 -12e7 11 -12e8 0 -12e9 1 -12ea 2 -12eb 3 -12ec 4 -12ed 5 -12ee 6 -12f0 0 -12f1 1 -12f2 2 -12f3 3 -12f4 4 -12f5 5 -12f6 6 -12f7 11 -12f8 0 -12f9 1 -12fa 2 -12fb 3 -12fc 4 -12fd 5 -12fe 6 -12ff 11 -1300 0 -1301 1 -1302 2 -1303 3 -1304 4 -1305 5 -1306 6 -1307 11 -1308 0 -1309 1 -130a 2 -130b 3 -130c 4 -130d 5 -130e 6 -1310 8 -1312 10 -1313 11 -1314 12 -1315 13 -1318 0 -1319 1 -131a 2 -131b 3 -131c 4 -131d 5 -131e 6 -1320 0 -1321 1 -1322 2 -1323 3 -1324 4 -1325 5 -1326 6 -1327 11 -1328 0 -1329 1 -132a 2 -132b 3 -132c 4 -132d 5 -132e 6 -132f 11 -1330 0 -1331 1 -1332 2 -1333 3 -1334 4 -1335 5 -1336 6 -1337 11 -1338 0 -1339 1 -133a 2 -133b 3 -133c 4 -133d 5 -133e 6 -133f 11 -1340 0 -1341 1 -1342 2 -1343 3 -1344 4 -1345 5 -1346 6 -1348 0 -1349 1 -134a 2 -134b 3 -134c 4 -134d 5 -134e 6 -134f 11 -1350 0 -1351 1 -1352 2 -1353 3 -1354 4 -1355 5 -1356 6 -1357 11 diff --git a/lib/unicode/MakeEthiopicSyllables.PL b/lib/unicode/MakeEthiopicSyllables.PL deleted file mode 100755 index bccec321cf..0000000000 --- a/lib/unicode/MakeEthiopicSyllables.PL +++ /dev/null @@ -1,63 +0,0 @@ -#!../../miniperl - -# -# We use the "two rows of 8" perspective to map the syllables onto the -# expected forms as per the various asundry Ethiopic locales... -# -open (GEEZ, ">Is/Y0.pl"); -open (KAIB, ">Is/Y1.pl"); -open (SALS, ">Is/Y2.pl"); -open (RABI, ">Is/Y3.pl"); -open (HAMS, ">Is/Y4.pl"); -open (SADS, ">Is/Y5.pl"); -open (SABI, ">Is/Y6.pl"); - -open (DIQALA_GEEZ, ">Is/Y8.pl"); -open (DIQALA_SALS, ">Is/Y10.pl"); -open (DIQALA_RABI, ">Is/Y11.pl"); # which is sometimes just DIQALA -open (DIQALA_HAMS, ">Is/Y12.pl"); -open (DIQALA_SADS, ">Is/Y13.pl"); # though people outside of unicode.org - # might say DIQALA_KAIB... - -@fh = qw( - GEEZ KAIB SALS RABI HAMS SADS SABI none - DIQALA_GEEZ none DIQALA_SALS DIQALA_RABI DIQALA_HAMS DIQALA_SADS - ); - - -for $form (0..$#fh) { - $FILE = $fh[$form]; - print $FILE "return <<'END'\n" if ($FILE ne "none"); -} - - -open (ETHIOPIC, "EthiopicSyllables.txt"); -while (<ETHIOPIC>) { - ($uni, $form) = split (/\s+/ ); - $FILE = $fh[$form]; - print $FILE "$uni\n"; -} -close (ETHIOPIC); - - -for $form (0..$#fh) { - $FILE = $fh[$form]; - print $FILE "END\n" if ($FILE ne "none"); -} - -close (GEEZ); -close (KAIB); -close (SALS); -close (RABI); -close (HAMS); -close (SADS); -close (SABI); - -close (DIQALA_GEEZ); -close (DIQALA_SALS); -close (DIQALA_RABI); -close (DIQALA_HAMS); -close (DIQALA_SADS); - -symlink ( "Is/Y11.pl", "Is/Y7.pl" ); -symlink ( "Is/Y13.pl", "Is/Y9.pl" ); diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index 41b192ba81..f54ea692c4 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -152,6 +152,21 @@ mkdir "Eq", 0777; # Jamo ['JamoShort', '1', '$short'], + +# Syllables + + ['IsSylV', '$syl eq "V"', ''], + ['IsSylU', '$syl eq "U"', ''], + ['IsSylI', '$syl eq "I"', ''], + ['IsSylA', '$syl eq "A"', ''], + ['IsSylE', '$syl eq "E"', ''], + ['IsSylC', '$syl eq "C"', ''], + ['IsSylO', '$syl eq "O"', ''], + ['IsSylWV', '$syl eq "V"', ''], + ['IsSylWI', '$syl eq "I"', ''], + ['IsSylWA', '$syl eq "A"', ''], + ['IsSylWE', '$syl eq "E"', ''], + ['IsSylWC', '$syl eq "C"', ''], ); # This is not written for speed... @@ -223,6 +238,11 @@ sub proplist { $split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;'; } + elsif ($table =~ /^IsSyl/) { + open(UD, "syllables.txt") or warn "Can't open $table: $!"; + + $split = '($code, $short, $syl) = split(/; */); $code =~ s/^U\+//;'; + } else { open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!"; diff --git a/lib/unicode/syllables.txt b/lib/unicode/syllables.txt new file mode 100644 index 0000000000..8d3095c4bf --- /dev/null +++ b/lib/unicode/syllables.txt @@ -0,0 +1,426 @@ +################################################################################ +# +# V: as "u" in "but" (often represented with schwa or small uppercase lambda) +# U: as "oo" in "fool" +# I: as "ea" in "meat" +# A: as "a" in "father" +# E: as "a" in "hate" +# C: the consonant form having no vowel element +# O: as "o" in "note" +# +# Vowel identifiers are assumed short, doubled identifiers are considered long +# (following Cushitic rules). Dipthong syllables are identified with "W" as +# per Ethiopic and Canadian syllabary character names. +# +# +# WV WVV WU WUU WI WII WA WAA WE WEE WC WO WOO +# +# V VV U UU I II A AA E EE C O OO +# +################################################################################ + +# +# Ethiopic +# +1200; HA; V +1201; HU; U +1202; HI; I +1203; HAA; A +1204; HEE; E +1205; HE; C +1206; HO; O +1208; LA; V +1209; LU; U +120A; LI; I +120B; LAA; A +120C; LEE; E +120D; LE; C +120E; LO; O +120F; LWA; WA +1210; HHA; V +1211; HHU; U +1212; HHI; I +1213; HHAA; A +1214; HHEE; E +1215; HHE; C +1216; HHO; O +1217; HHWA; WA +1218; MA; V +1219; MU; U +121A; MI; I +121B; MAA; A +121C; MEE; E +121D; ME; C +121E; MO; O +121F; MWA; WA +1220; SZA; V +1221; SZU; U +1222; SZI; I +1223; SZAA; A +1224; SZEE; E +1225; SZE; C +1226; SZO; O +1227; SZWA; WA +1228; RA; V +1229; RU; U +122A; RI; I +122B; RAA; A +122C; REE; E +122D; RE; C +122E; RO; O +122F; RWA; WA +1230; SA; V +1231; SU; U +1232; SI; I +1233; SAA; A +1234; SEE; E +1235; SE; C +1236; SO; O +1237; SWA; WA +1238; SHA; V +1239; SHU; U +123A; SHI; I +123B; SHAA; A +123C; SHEE; E +123D; SHE; C +123E; SHO; O +123F; SHWA; WA +1240; QA; V +1241; QU; U +1242; QI; I +1243; QAA; A +1244; QEE; E +1245; QE; C +1246; QO; O +1248; QWA; WV +124A; QWI; WI +124B; QWAA; WA +124C; QWEE; WE +124D; QWE; WC +1250; QHA; V +1251; QHU; U +1252; QHI; I +1253; QHAA; A +1254; QHEE; E +1255; QHE; C +1256; QHO; O +1258; QHWA; WV +125A; QHWI; WI +125B; QHWAA; WA +125C; QHWEE; WE +125D; QHWE; WC +1260; BA; V +1261; BU; U +1262; BI; I +1263; BAA; A +1264; BEE; E +1265; BE; C +1266; BO; O +1267; BWA; WA +1268; VA; V +1269; VU; U +126A; VI; I +126B; VAA; A +126C; VEE; E +126D; VE; C +126E; VO; O +126F; VWA; WA +1270; TA; V +1271; TU; U +1272; TI; I +1273; TAA; A +1274; TEE; E +1275; TE; C +1276; TO; O +1277; TWA; WA +1278; CA; V +1279; CU; U +127A; CI; I +127B; CAA; A +127C; CEE; E +127D; CE; C +127E; CO; O +127F; CWA; WA +1280; XA; V +1281; XU; U +1282; XI; I +1283; XAA; A +1284; XEE; E +1285; XE; C +1286; XO; O +1288; XWA; WV +128A; XWI; WI +128B; XWAA; WA +128C; XWEE; WE +128D; XWE; WC +1290; NA; V +1291; NU; U +1292; NI; I +1293; NAA; A +1294; NEE; E +1295; NE; C +1296; NO; O +1297; NWA; WA +1298; NYA; V +1299; NYU; U +129A; NYI; I +129B; NYAA; A +129C; NYEE; E +129D; NYE; C +129E; NYO; O +129F; NYWA; WA +12A0; GLOTTAL A; V +12A1; GLOTTAL U; U +12A2; GLOTTAL I; I +12A3; GLOTTAL AA; A +12A4; GLOTTAL EE; E +12A5; GLOTTAL E; C +12A6; GLOTTAL O; O +12A7; GLOTTAL WA; WA +12A8; KA; V +12A9; KU; U +12AA; KI; I +12AB; KAA; A +12AC; KEE; E +12AD; KE; C +12AE; KO; O +12B0; KWA; WV +12B2; KWI; WI +12B3; KWAA; WA +12B4; KWEE; WE +12B5; KWE; WC +12B8; KXA; V +12B9; KXU; U +12BA; KXI; I +12BB; KXAA; A +12BC; KXEE; E +12BD; KXE; C +12BE; KXO; O +12C0; KXWA; WV +12C2; KXWI; WI +12C3; KXWAA; WA +12C4; KXWEE; WE +12C5; KXWE; WC +12C8; WA; V +12C9; WU; U +12CA; WI; I +12CB; WAA; A +12CC; WEE; E +12CD; WE; C +12CE; WO; O +12D0; PHARYNGEAL A; V +12D1; PHARYNGEAL U; U +12D2; PHARYNGEAL I; I +12D3; PHARYNGEAL AA; A +12D4; PHARYNGEAL EE; E +12D5; PHARYNGEAL E; C +12D6; PHARYNGEAL O; O +12D8; ZA; V +12D9; ZU; U +12DA; ZI; I +12DB; ZAA; A +12DC; ZEE; E +12DD; ZE; C +12DE; ZO; O +12DF; ZWA; WA +12E0; ZHA; V +12E1; ZHU; U +12E2; ZHI; I +12E3; ZHAA; A +12E4; ZHEE; E +12E5; ZHE; C +12E6; ZHO; O +12E7; ZHWA; WA +12E8; YA; V +12E9; YU; U +12EA; YI; I +12EB; YAA; A +12EC; YEE; E +12ED; YE; C +12EE; YO; O +12F0; DA; V +12F1; DU; U +12F2; DI; I +12F3; DAA; A +12F4; DEE; E +12F5; DE; C +12F6; DO; O +12F7; DWA; WA +12F8; DDA; V +12F9; DDU; U +12FA; DDI; I +12FB; DDAA; A +12FC; DDEE; E +12FD; DDE; C +12FE; DDO; O +12FF; DDWA; WA +1300; JA; V +1301; JU; U +1302; JI; I +1303; JAA; A +1304; JEE; E +1305; JE; C +1306; JO; O +1307; JWA; WA +1308; GA; V +1309; GU; U +130A; GI; I +130B; GAA; A +130C; GEE; E +130D; GE; C +130E; GO; O +1310; GWA; WV +1312; GWI; WI +1313; GWAA; WA +1314; GWEE; WE +1315; GWE; WC +1318; GGA; V +1319; GGU; U +131A; GGI; I +131B; GGAA; A +131C; GGEE; E +131D; GGE; C +131E; GGO; O +1320; THA; V +1321; THU; U +1322; THI; I +1323; THAA; A +1324; THEE; E +1325; THE; C +1326; THO; O +1327; THWA; WA +1328; CHA; V +1329; CHU; U +132A; CHI; I +132B; CHAA; A +132C; CHEE; E +132D; CHE; C +132E; CHO; O +132F; CHWA; WA +1330; PHA; V +1331; PHU; U +1332; PHI; I +1333; PHAA; A +1334; PHEE; E +1335; PHE; C +1336; PHO; O +1337; PHWA; WA +1338; TSA; V +1339; TSU; U +133A; TSI; I +133B; TSAA; A +133C; TSEE; E +133D; TSE; C +133E; TSO; O +133F; TSWA; WA +1340; TZA; V +1341; TZU; U +1342; TZI; I +1343; TZAA; A +1344; TZEE; E +1345; TZE; C +1346; TZO; O +1348; FA; V +1349; FU; U +134A; FI; I +134B; FAA; A +134C; FEE; E +134D; FE; C +134E; FO; O +134F; FWA; WA +1350; PA; V +1351; PU; U +1352; PI; I +1353; PAA; A +1354; PEE; E +1355; PE; C +1356; PO; O +1357; PWA; WA +# +# Cherokee +# +13A0; A; A +13A1; E; E +13A2; I; I +13A3; O; O +13A4; U; U +13A5; V; V +13A6; GA; A +13A7; KA; A +13A8; GE; E +13A9; GI; I +13AA; GO; O +13AB; GU; U +13AC; GV; V +13AD; HA; A +13AE; HE; E +13AF; HI; I +13B0; HO; O +13B1; HU; U +13B2; HV; V +13B3; LA; A +13B4; LE; E +13B5; LI; I +13B6; LO; O +13B7; LU; U +13B8; LV; V +13B9; MA; A +13BA; ME; E +13BB; MI; I +13BC; MO; O +13BD; MU; U +13BE; NA; A +13BF; HNA; A +13C0; NAH; C +13C1; NE; E +13C2; NI; I +13C3; NO; O +13C4; NU; U +13C5; NV; V +13C6; QUA; A +13C7; QUE; E +13C8; QUI; I +13C9; QUO; O +13CA; QUU; U +13CB; QUV; V +13CC; SA; A +13CD; S; C +13CE; SE; E +13CF; SI; I +13D0; SO; O +13D1; SU; U +13D2; SV; V +13D3; DA; A +13D4; TA; A +13D5; DE; E +13D6; TE; E +13D7; DI; I +13D8; TI; I +13D9; DO; O +13DA; DU; U +13DB; DV; V +13DC; DLA; A +13DD; TLA; A +13DE; TLE; E +13DF; TLI; I +13E0; TLO; O +13E1; TLU; U +13E2; TLV; V +13E3; TSA; A +13E4; TSE; E +13E5; TSI; I +13E6; TSO; O +13E7; TSU; U +13E8; TSV; V +13E9; WA; A +13EA; WE; E +13EB; WI; I +13EC; WO; O +13ED; WU; U +13EE; WV; V +13EF; YA; A +13F0; YE; E +13F1; YI; I +13F2; YO; O +13F3; YU; U +13F4; YV; V |