summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST3
-rw-r--r--lib/unicode/EthiopicSyllables.txt314
-rwxr-xr-xlib/unicode/MakeEthiopicSyllables.PL63
-rwxr-xr-xlib/unicode/mktables.PL20
-rw-r--r--lib/unicode/syllables.txt426
5 files changed, 447 insertions, 379 deletions
diff --git a/MANIFEST b/MANIFEST
index ddba85e3c3..6ad17ef68a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -698,7 +698,6 @@ lib/unicode/CombiningClass.pl Unicode character database
lib/unicode/Decomposition.pl Unicode character database
lib/unicode/Eq/Latin1 Unicode character database
lib/unicode/Eq/Unicode Unicode character database
-lib/unicode/EthiopicSyllables.txt Unicode character database
lib/unicode/In/AlphabeticPresentationForms.pl Unicode character database
lib/unicode/In/Arabic.pl Unicode character database
lib/unicode/In/ArabicPresentationForms-A.pl Unicode character database
@@ -841,7 +840,6 @@ lib/unicode/Is/Zl.pl Unicode character database
lib/unicode/Is/Zp.pl Unicode character database
lib/unicode/Is/Zs.pl Unicode character database
lib/unicode/JamoShort.pl Unicode character database
-lib/unicode/MakeEthiopicSyllables.PL Unicode character database
lib/unicode/Makefile Unicode character database
lib/unicode/Name.pl Unicode character database
lib/unicode/Number.pl Unicode character database
@@ -859,6 +857,7 @@ lib/unicode/mktables.PL Unicode character database generator
lib/unicode/names2.txt Unicode character database
lib/unicode/props2.txt Unicode character database
lib/unicode/readme.txt Unicode character database info
+lib/unicode/syllables.txt Unicode character database
lib/utf8.pm Pragma to control Unicode support
lib/utf8_heavy.pl Support routines for utf8 pragma
lib/validate.pl Perl library supporting wholesale file mode validation
diff --git a/lib/unicode/EthiopicSyllables.txt b/lib/unicode/EthiopicSyllables.txt
deleted file mode 100644
index 6d807b6052..0000000000
--- a/lib/unicode/EthiopicSyllables.txt
+++ /dev/null
@@ -1,314 +0,0 @@
-1200 0
-1201 1
-1202 2
-1203 3
-1204 4
-1205 5
-1206 6
-1208 0
-1209 1
-120a 2
-120b 3
-120c 4
-120d 5
-120e 6
-120f 11
-1210 0
-1211 1
-1212 2
-1213 3
-1214 4
-1215 5
-1216 6
-1217 11
-1218 0
-1219 1
-121a 2
-121b 3
-121c 4
-121d 5
-121e 6
-121f 11
-1220 0
-1221 1
-1222 2
-1223 3
-1224 4
-1225 5
-1226 6
-1227 11
-1228 0
-1229 1
-122a 2
-122b 3
-122c 4
-122d 5
-122e 6
-122f 11
-1230 0
-1231 1
-1232 2
-1233 3
-1234 4
-1235 5
-1236 6
-1237 11
-1238 0
-1239 1
-123a 2
-123b 3
-123c 4
-123d 5
-123e 6
-123f 11
-1240 0
-1241 1
-1242 2
-1243 3
-1244 4
-1245 5
-1246 6
-1248 8
-124a 10
-124b 11
-124c 12
-124d 13
-1250 0
-1251 1
-1252 2
-1253 3
-1254 4
-1255 5
-1256 6
-1258 8
-125a 10
-125b 11
-125c 12
-125d 13
-1260 0
-1261 1
-1262 2
-1263 3
-1264 4
-1265 5
-1266 6
-1267 11
-1268 0
-1269 1
-126a 2
-126b 3
-126c 4
-126d 5
-126e 6
-126f 11
-1270 0
-1271 1
-1272 2
-1273 3
-1274 4
-1275 5
-1276 6
-1277 11
-1278 0
-1279 1
-127a 2
-127b 3
-127c 4
-127d 5
-127e 6
-127f 11
-1280 0
-1281 1
-1282 2
-1283 3
-1284 4
-1285 5
-1286 6
-1288 8
-128a 10
-128b 11
-128c 12
-128d 13
-1290 0
-1291 1
-1292 2
-1293 3
-1294 4
-1295 5
-1296 6
-1297 11
-1298 0
-1299 1
-129a 2
-129b 3
-129c 4
-129d 5
-129e 6
-129f 11
-12a0 0
-12a1 1
-12a2 2
-12a3 3
-12a4 4
-12a5 5
-12a6 6
-12a7 11
-12a8 0
-12a9 1
-12aa 2
-12ab 3
-12ac 4
-12ad 5
-12ae 6
-12b0 8
-12b2 10
-12b3 11
-12b4 12
-12b5 13
-12b8 0
-12b9 1
-12ba 2
-12bb 3
-12bc 4
-12bd 5
-12be 6
-12c0 8
-12c2 10
-12c3 11
-12c4 12
-12c5 13
-12c8 0
-12c9 1
-12ca 2
-12cb 3
-12cc 4
-12cd 5
-12ce 6
-12d0 0
-12d1 1
-12d2 2
-12d3 3
-12d4 4
-12d5 5
-12d6 6
-12d8 0
-12d9 1
-12da 2
-12db 3
-12dc 4
-12dd 5
-12de 6
-12df 11
-12e0 0
-12e1 1
-12e2 2
-12e3 3
-12e4 4
-12e5 5
-12e6 6
-12e7 11
-12e8 0
-12e9 1
-12ea 2
-12eb 3
-12ec 4
-12ed 5
-12ee 6
-12f0 0
-12f1 1
-12f2 2
-12f3 3
-12f4 4
-12f5 5
-12f6 6
-12f7 11
-12f8 0
-12f9 1
-12fa 2
-12fb 3
-12fc 4
-12fd 5
-12fe 6
-12ff 11
-1300 0
-1301 1
-1302 2
-1303 3
-1304 4
-1305 5
-1306 6
-1307 11
-1308 0
-1309 1
-130a 2
-130b 3
-130c 4
-130d 5
-130e 6
-1310 8
-1312 10
-1313 11
-1314 12
-1315 13
-1318 0
-1319 1
-131a 2
-131b 3
-131c 4
-131d 5
-131e 6
-1320 0
-1321 1
-1322 2
-1323 3
-1324 4
-1325 5
-1326 6
-1327 11
-1328 0
-1329 1
-132a 2
-132b 3
-132c 4
-132d 5
-132e 6
-132f 11
-1330 0
-1331 1
-1332 2
-1333 3
-1334 4
-1335 5
-1336 6
-1337 11
-1338 0
-1339 1
-133a 2
-133b 3
-133c 4
-133d 5
-133e 6
-133f 11
-1340 0
-1341 1
-1342 2
-1343 3
-1344 4
-1345 5
-1346 6
-1348 0
-1349 1
-134a 2
-134b 3
-134c 4
-134d 5
-134e 6
-134f 11
-1350 0
-1351 1
-1352 2
-1353 3
-1354 4
-1355 5
-1356 6
-1357 11
diff --git a/lib/unicode/MakeEthiopicSyllables.PL b/lib/unicode/MakeEthiopicSyllables.PL
deleted file mode 100755
index bccec321cf..0000000000
--- a/lib/unicode/MakeEthiopicSyllables.PL
+++ /dev/null
@@ -1,63 +0,0 @@
-#!../../miniperl
-
-#
-# We use the "two rows of 8" perspective to map the syllables onto the
-# expected forms as per the various asundry Ethiopic locales...
-#
-open (GEEZ, ">Is/Y0.pl");
-open (KAIB, ">Is/Y1.pl");
-open (SALS, ">Is/Y2.pl");
-open (RABI, ">Is/Y3.pl");
-open (HAMS, ">Is/Y4.pl");
-open (SADS, ">Is/Y5.pl");
-open (SABI, ">Is/Y6.pl");
-
-open (DIQALA_GEEZ, ">Is/Y8.pl");
-open (DIQALA_SALS, ">Is/Y10.pl");
-open (DIQALA_RABI, ">Is/Y11.pl"); # which is sometimes just DIQALA
-open (DIQALA_HAMS, ">Is/Y12.pl");
-open (DIQALA_SADS, ">Is/Y13.pl"); # though people outside of unicode.org
- # might say DIQALA_KAIB...
-
-@fh = qw(
- GEEZ KAIB SALS RABI HAMS SADS SABI none
- DIQALA_GEEZ none DIQALA_SALS DIQALA_RABI DIQALA_HAMS DIQALA_SADS
- );
-
-
-for $form (0..$#fh) {
- $FILE = $fh[$form];
- print $FILE "return <<'END'\n" if ($FILE ne "none");
-}
-
-
-open (ETHIOPIC, "EthiopicSyllables.txt");
-while (<ETHIOPIC>) {
- ($uni, $form) = split (/\s+/ );
- $FILE = $fh[$form];
- print $FILE "$uni\n";
-}
-close (ETHIOPIC);
-
-
-for $form (0..$#fh) {
- $FILE = $fh[$form];
- print $FILE "END\n" if ($FILE ne "none");
-}
-
-close (GEEZ);
-close (KAIB);
-close (SALS);
-close (RABI);
-close (HAMS);
-close (SADS);
-close (SABI);
-
-close (DIQALA_GEEZ);
-close (DIQALA_SALS);
-close (DIQALA_RABI);
-close (DIQALA_HAMS);
-close (DIQALA_SADS);
-
-symlink ( "Is/Y11.pl", "Is/Y7.pl" );
-symlink ( "Is/Y13.pl", "Is/Y9.pl" );
diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL
index 41b192ba81..f54ea692c4 100755
--- a/lib/unicode/mktables.PL
+++ b/lib/unicode/mktables.PL
@@ -152,6 +152,21 @@ mkdir "Eq", 0777;
# Jamo
['JamoShort', '1', '$short'],
+
+# Syllables
+
+ ['IsSylV', '$syl eq "V"', ''],
+ ['IsSylU', '$syl eq "U"', ''],
+ ['IsSylI', '$syl eq "I"', ''],
+ ['IsSylA', '$syl eq "A"', ''],
+ ['IsSylE', '$syl eq "E"', ''],
+ ['IsSylC', '$syl eq "C"', ''],
+ ['IsSylO', '$syl eq "O"', ''],
+ ['IsSylWV', '$syl eq "V"', ''],
+ ['IsSylWI', '$syl eq "I"', ''],
+ ['IsSylWA', '$syl eq "A"', ''],
+ ['IsSylWE', '$syl eq "E"', ''],
+ ['IsSylWC', '$syl eq "C"', ''],
);
# This is not written for speed...
@@ -223,6 +238,11 @@ sub proplist {
$split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';
}
+ elsif ($table =~ /^IsSyl/) {
+ open(UD, "syllables.txt") or warn "Can't open $table: $!";
+
+ $split = '($code, $short, $syl) = split(/; */); $code =~ s/^U\+//;';
+ }
else {
open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!";
diff --git a/lib/unicode/syllables.txt b/lib/unicode/syllables.txt
new file mode 100644
index 0000000000..8d3095c4bf
--- /dev/null
+++ b/lib/unicode/syllables.txt
@@ -0,0 +1,426 @@
+################################################################################
+#
+# V: as "u" in "but" (often represented with schwa or small uppercase lambda)
+# U: as "oo" in "fool"
+# I: as "ea" in "meat"
+# A: as "a" in "father"
+# E: as "a" in "hate"
+# C: the consonant form having no vowel element
+# O: as "o" in "note"
+#
+# Vowel identifiers are assumed short, doubled identifiers are considered long
+# (following Cushitic rules). Dipthong syllables are identified with "W" as
+# per Ethiopic and Canadian syllabary character names.
+#
+#
+# WV WVV WU WUU WI WII WA WAA WE WEE WC WO WOO
+#
+# V VV U UU I II A AA E EE C O OO
+#
+################################################################################
+
+#
+# Ethiopic
+#
+1200; HA; V
+1201; HU; U
+1202; HI; I
+1203; HAA; A
+1204; HEE; E
+1205; HE; C
+1206; HO; O
+1208; LA; V
+1209; LU; U
+120A; LI; I
+120B; LAA; A
+120C; LEE; E
+120D; LE; C
+120E; LO; O
+120F; LWA; WA
+1210; HHA; V
+1211; HHU; U
+1212; HHI; I
+1213; HHAA; A
+1214; HHEE; E
+1215; HHE; C
+1216; HHO; O
+1217; HHWA; WA
+1218; MA; V
+1219; MU; U
+121A; MI; I
+121B; MAA; A
+121C; MEE; E
+121D; ME; C
+121E; MO; O
+121F; MWA; WA
+1220; SZA; V
+1221; SZU; U
+1222; SZI; I
+1223; SZAA; A
+1224; SZEE; E
+1225; SZE; C
+1226; SZO; O
+1227; SZWA; WA
+1228; RA; V
+1229; RU; U
+122A; RI; I
+122B; RAA; A
+122C; REE; E
+122D; RE; C
+122E; RO; O
+122F; RWA; WA
+1230; SA; V
+1231; SU; U
+1232; SI; I
+1233; SAA; A
+1234; SEE; E
+1235; SE; C
+1236; SO; O
+1237; SWA; WA
+1238; SHA; V
+1239; SHU; U
+123A; SHI; I
+123B; SHAA; A
+123C; SHEE; E
+123D; SHE; C
+123E; SHO; O
+123F; SHWA; WA
+1240; QA; V
+1241; QU; U
+1242; QI; I
+1243; QAA; A
+1244; QEE; E
+1245; QE; C
+1246; QO; O
+1248; QWA; WV
+124A; QWI; WI
+124B; QWAA; WA
+124C; QWEE; WE
+124D; QWE; WC
+1250; QHA; V
+1251; QHU; U
+1252; QHI; I
+1253; QHAA; A
+1254; QHEE; E
+1255; QHE; C
+1256; QHO; O
+1258; QHWA; WV
+125A; QHWI; WI
+125B; QHWAA; WA
+125C; QHWEE; WE
+125D; QHWE; WC
+1260; BA; V
+1261; BU; U
+1262; BI; I
+1263; BAA; A
+1264; BEE; E
+1265; BE; C
+1266; BO; O
+1267; BWA; WA
+1268; VA; V
+1269; VU; U
+126A; VI; I
+126B; VAA; A
+126C; VEE; E
+126D; VE; C
+126E; VO; O
+126F; VWA; WA
+1270; TA; V
+1271; TU; U
+1272; TI; I
+1273; TAA; A
+1274; TEE; E
+1275; TE; C
+1276; TO; O
+1277; TWA; WA
+1278; CA; V
+1279; CU; U
+127A; CI; I
+127B; CAA; A
+127C; CEE; E
+127D; CE; C
+127E; CO; O
+127F; CWA; WA
+1280; XA; V
+1281; XU; U
+1282; XI; I
+1283; XAA; A
+1284; XEE; E
+1285; XE; C
+1286; XO; O
+1288; XWA; WV
+128A; XWI; WI
+128B; XWAA; WA
+128C; XWEE; WE
+128D; XWE; WC
+1290; NA; V
+1291; NU; U
+1292; NI; I
+1293; NAA; A
+1294; NEE; E
+1295; NE; C
+1296; NO; O
+1297; NWA; WA
+1298; NYA; V
+1299; NYU; U
+129A; NYI; I
+129B; NYAA; A
+129C; NYEE; E
+129D; NYE; C
+129E; NYO; O
+129F; NYWA; WA
+12A0; GLOTTAL A; V
+12A1; GLOTTAL U; U
+12A2; GLOTTAL I; I
+12A3; GLOTTAL AA; A
+12A4; GLOTTAL EE; E
+12A5; GLOTTAL E; C
+12A6; GLOTTAL O; O
+12A7; GLOTTAL WA; WA
+12A8; KA; V
+12A9; KU; U
+12AA; KI; I
+12AB; KAA; A
+12AC; KEE; E
+12AD; KE; C
+12AE; KO; O
+12B0; KWA; WV
+12B2; KWI; WI
+12B3; KWAA; WA
+12B4; KWEE; WE
+12B5; KWE; WC
+12B8; KXA; V
+12B9; KXU; U
+12BA; KXI; I
+12BB; KXAA; A
+12BC; KXEE; E
+12BD; KXE; C
+12BE; KXO; O
+12C0; KXWA; WV
+12C2; KXWI; WI
+12C3; KXWAA; WA
+12C4; KXWEE; WE
+12C5; KXWE; WC
+12C8; WA; V
+12C9; WU; U
+12CA; WI; I
+12CB; WAA; A
+12CC; WEE; E
+12CD; WE; C
+12CE; WO; O
+12D0; PHARYNGEAL A; V
+12D1; PHARYNGEAL U; U
+12D2; PHARYNGEAL I; I
+12D3; PHARYNGEAL AA; A
+12D4; PHARYNGEAL EE; E
+12D5; PHARYNGEAL E; C
+12D6; PHARYNGEAL O; O
+12D8; ZA; V
+12D9; ZU; U
+12DA; ZI; I
+12DB; ZAA; A
+12DC; ZEE; E
+12DD; ZE; C
+12DE; ZO; O
+12DF; ZWA; WA
+12E0; ZHA; V
+12E1; ZHU; U
+12E2; ZHI; I
+12E3; ZHAA; A
+12E4; ZHEE; E
+12E5; ZHE; C
+12E6; ZHO; O
+12E7; ZHWA; WA
+12E8; YA; V
+12E9; YU; U
+12EA; YI; I
+12EB; YAA; A
+12EC; YEE; E
+12ED; YE; C
+12EE; YO; O
+12F0; DA; V
+12F1; DU; U
+12F2; DI; I
+12F3; DAA; A
+12F4; DEE; E
+12F5; DE; C
+12F6; DO; O
+12F7; DWA; WA
+12F8; DDA; V
+12F9; DDU; U
+12FA; DDI; I
+12FB; DDAA; A
+12FC; DDEE; E
+12FD; DDE; C
+12FE; DDO; O
+12FF; DDWA; WA
+1300; JA; V
+1301; JU; U
+1302; JI; I
+1303; JAA; A
+1304; JEE; E
+1305; JE; C
+1306; JO; O
+1307; JWA; WA
+1308; GA; V
+1309; GU; U
+130A; GI; I
+130B; GAA; A
+130C; GEE; E
+130D; GE; C
+130E; GO; O
+1310; GWA; WV
+1312; GWI; WI
+1313; GWAA; WA
+1314; GWEE; WE
+1315; GWE; WC
+1318; GGA; V
+1319; GGU; U
+131A; GGI; I
+131B; GGAA; A
+131C; GGEE; E
+131D; GGE; C
+131E; GGO; O
+1320; THA; V
+1321; THU; U
+1322; THI; I
+1323; THAA; A
+1324; THEE; E
+1325; THE; C
+1326; THO; O
+1327; THWA; WA
+1328; CHA; V
+1329; CHU; U
+132A; CHI; I
+132B; CHAA; A
+132C; CHEE; E
+132D; CHE; C
+132E; CHO; O
+132F; CHWA; WA
+1330; PHA; V
+1331; PHU; U
+1332; PHI; I
+1333; PHAA; A
+1334; PHEE; E
+1335; PHE; C
+1336; PHO; O
+1337; PHWA; WA
+1338; TSA; V
+1339; TSU; U
+133A; TSI; I
+133B; TSAA; A
+133C; TSEE; E
+133D; TSE; C
+133E; TSO; O
+133F; TSWA; WA
+1340; TZA; V
+1341; TZU; U
+1342; TZI; I
+1343; TZAA; A
+1344; TZEE; E
+1345; TZE; C
+1346; TZO; O
+1348; FA; V
+1349; FU; U
+134A; FI; I
+134B; FAA; A
+134C; FEE; E
+134D; FE; C
+134E; FO; O
+134F; FWA; WA
+1350; PA; V
+1351; PU; U
+1352; PI; I
+1353; PAA; A
+1354; PEE; E
+1355; PE; C
+1356; PO; O
+1357; PWA; WA
+#
+# Cherokee
+#
+13A0; A; A
+13A1; E; E
+13A2; I; I
+13A3; O; O
+13A4; U; U
+13A5; V; V
+13A6; GA; A
+13A7; KA; A
+13A8; GE; E
+13A9; GI; I
+13AA; GO; O
+13AB; GU; U
+13AC; GV; V
+13AD; HA; A
+13AE; HE; E
+13AF; HI; I
+13B0; HO; O
+13B1; HU; U
+13B2; HV; V
+13B3; LA; A
+13B4; LE; E
+13B5; LI; I
+13B6; LO; O
+13B7; LU; U
+13B8; LV; V
+13B9; MA; A
+13BA; ME; E
+13BB; MI; I
+13BC; MO; O
+13BD; MU; U
+13BE; NA; A
+13BF; HNA; A
+13C0; NAH; C
+13C1; NE; E
+13C2; NI; I
+13C3; NO; O
+13C4; NU; U
+13C5; NV; V
+13C6; QUA; A
+13C7; QUE; E
+13C8; QUI; I
+13C9; QUO; O
+13CA; QUU; U
+13CB; QUV; V
+13CC; SA; A
+13CD; S; C
+13CE; SE; E
+13CF; SI; I
+13D0; SO; O
+13D1; SU; U
+13D2; SV; V
+13D3; DA; A
+13D4; TA; A
+13D5; DE; E
+13D6; TE; E
+13D7; DI; I
+13D8; TI; I
+13D9; DO; O
+13DA; DU; U
+13DB; DV; V
+13DC; DLA; A
+13DD; TLA; A
+13DE; TLE; E
+13DF; TLI; I
+13E0; TLO; O
+13E1; TLU; U
+13E2; TLV; V
+13E3; TSA; A
+13E4; TSE; E
+13E5; TSI; I
+13E6; TSO; O
+13E7; TSU; U
+13E8; TSV; V
+13E9; WA; A
+13EA; WE; E
+13EB; WI; I
+13EC; WO; O
+13ED; WU; U
+13EE; WV; V
+13EF; YA; A
+13F0; YE; E
+13F1; YI; I
+13F2; YO; O
+13F3; YU; U
+13F4; YV; V