diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-10-16 12:03:49 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-10-16 12:03:49 +0000 |
commit | 79af22e4562a90bdaa3aa9d22a11a3fa41f9c6bc (patch) | |
tree | bffa86924d8cce7481014522aaedebabd8416277 /lib/unicore | |
parent | 035d37be8456f0df63748a1c95063999b5824c09 (diff) | |
download | perl-79af22e4562a90bdaa3aa9d22a11a3fa41f9c6bc.tar.gz |
A forgotten deletion.
p4raw-id: //depot/perl@12452
Diffstat (limited to 'lib/unicore')
-rwxr-xr-x | lib/unicore/mktables.PL | 879 |
1 files changed, 0 insertions, 879 deletions
diff --git a/lib/unicore/mktables.PL b/lib/unicore/mktables.PL deleted file mode 100755 index e281edd972..0000000000 --- a/lib/unicore/mktables.PL +++ /dev/null @@ -1,879 +0,0 @@ -#!../../miniperl - -use bytes; - -$UnicodeData = "Unicode.txt"; -$SyllableData = "syllables.txt"; -$PropData = "PropList.txt"; - -my $UnicodeLastHex = '10FFFF'; - -# Note: we try to keep filenames unique within first 8 chars. Using -# subdirectories for the following helps. -mkdir "In", 0755; -mkdir "Is", 0755; -mkdir "To", 0755; - -@todo = ( -# typical - - # 005F: SPACING UNDERSCROE - ['IsWord', '$cat =~ /^[LMN]/ or $code eq "005F"', ''], - ['IsAlnum', '$cat =~ /^[LMN]/', ''], - ['IsAlpha', '$cat =~ /^[LM]/', ''], - # 0009: HORIZONTAL TABULATION - # 000A: LINE FEED - # 000B: VERTICAL TABULATION - # 000C: FORM FEED - # 000D: CARRIAGE RETURN - # 0020: SPACE - ['IsSpace', '$cat =~ /^Z/ || - $code =~ /^(0009|000A|000B|000C|000D)$/', ''], - ['IsSpacePerl', - '$cat =~ /^Z/ || - $code =~ /^(0009|000A|000C|000D)$/', ''], - ['IsBlank', '$code =~ /^(0020|0009)$/ || - $cat =~ /^Z[^lp]$/', ''], - ['IsDigit', '$cat =~ /^Nd$/', ''], - ['IsUpper', '$cat =~ /^L[ut]$/', ''], - ['IsLower', '$cat =~ /^Ll$/', ''], - ['IsASCII', '$code le "007f"', ''], - ['IsCntrl', '$cat =~ /^C/', ''], - ['IsGraph', '$cat =~ /^([LMNPS]|Co)/', ''], - ['IsPrint', '$cat =~ /^([LMNPS]|Co|Zs)/', ''], - ['IsPunct', '$cat =~ /^P/', ''], - # 003[0-9]: DIGIT ZERO..NINE, 00[46][1-6]: A..F, a..f - ['IsXDigit', '$code =~ /^00(3[0-9]|[46][1-6])$/', ''], - ['ToUpper', '$up', '$up'], - ['ToLower', '$down', '$down'], - ['ToTitle', '$title', '$title'], - ['ToDigit', '$dec ne ""', '$dec'], - -# Name - - ['Name', '$name', '$name'], - -# Category - - ['Category', '$cat', '$cat'], - -# Normative - - ['IsM', '$cat =~ /^M/', ''], # Mark - ['IsMn', '$cat eq "Mn"', ''], # Mark, Non-Spacing - ['IsMc', '$cat eq "Mc"', ''], # Mark, Combining - ['IsMe', '$cat eq "Me"', ''], # Mark, Enclosing - - ['IsN', '$cat =~ /^N/', ''], # Number - ['IsNd', '$cat eq "Nd"', ''], # Number, Decimal Digit - ['IsNo', '$cat eq "No"', ''], # Number, Other - ['IsNl', '$cat eq "Nl"', ''], # Number, Letter - - ['IsZ', '$cat =~ /^Z/', ''], # Separator - ['IsZs', '$cat eq "Zs"', ''], # Separator, Space - ['IsZl', '$cat eq "Zl"', ''], # Separator, Line - ['IsZp', '$cat eq "Zp"', ''], # Separator, Paragraph - - ['IsC', '$cat =~ /^C/', ''], # Crazy - ['IsCc', '$cat eq "Cc"', ''], # Other, Control or Format - ['IsCo', '$cat eq "Co"', ''], # Other, Private Use - ['IsCn', '$cat eq "Cn"', ''], # Other, Not Assigned - ['IsCf', '$cat eq "Cf"', ''], # Other, Format - ['IsCs', '$cat eq "Cs"', ''], # Other, Surrogate - ['IsCn', 'Unassigned Code Value',$PropData], # Other, Not Assigned - -# Informative - - ['IsL', '$cat =~ /^L/', ''], # Letter - ['IsLu', '$cat eq "Lu"', ''], # Letter, Uppercase - ['IsLl', '$cat eq "Ll"', ''], # Letter, Lowercase - ['IsLt', '$cat eq "Lt"', ''], # Letter, Titlecase - ['IsLm', '$cat eq "Lm"', ''], # Letter, Modifier - ['IsLo', '$cat eq "Lo"', ''], # Letter, Other - - ['IsP', '$cat =~ /^P/', ''], # Punctuation - ['IsPd', '$cat eq "Pd"', ''], # Punctuation, Dash - ['IsPs', '$cat eq "Ps"', ''], # Punctuation, Open - ['IsPe', '$cat eq "Pe"', ''], # Punctuation, Close - ['IsPo', '$cat eq "Po"', ''], # Punctuation, Other - ['IsPc', '$cat eq "Pc"', ''], # Punctuation, Connector - ['IsPi', '$cat eq "Pi"', ''], # Punctuation, Initial quote - ['IsPf', '$cat eq "Pf"', ''], # Punctuation, Final quote - - ['IsS', '$cat =~ /^S/', ''], # Symbol - ['IsSm', '$cat eq "Sm"', ''], # Symbol, Math - ['IsSk', '$cat eq "Sk"', ''], # Symbol, Modifier - ['IsSc', '$cat eq "Sc"', ''], # Symbol, Currency - ['IsSo', '$cat eq "So"', ''], # Symbol, Other - -# Combining class - ['CombiningClass', '$comb', '$comb'], - -# BIDIRECTIONAL PROPERTIES - - ['Bidirectional', '$bid', '$bid'], - -# Strong types: - - ['IsBidiL', '$bid eq "L"', ''], # Left-Right; Most alphabetic, - # syllabic, and logographic - # characters (e.g., CJK - # ideographs) - ['IsBidiR', '$bid eq "R"', ''], # Right-Left; Arabic, Hebrew, - # and punctuation specific to - # those scripts - - ['IsBidiLRE', '$bid eq "LRE"', ''], # Left-to-Right Embedding - ['IsBidiLRO', '$bid eq "LRO"', ''], # Left-to-Right Override - ['IsBidiAL', '$bid eq "AL"', ''], # Right-to-Left Arabic - ['IsBidiRLE', '$bid eq "RLE"', ''], # Right-to-Left Embedding - ['IsBidiRLO', '$bid eq "RLO"', ''], # Right-to-Left Override - ['IsBidiPDF', '$bid eq "PDF"', ''], # Pop Directional Format - ['IsBidiNSM', '$bid eq "NSM"', ''], # Non-Spacing Mark - ['IsBidiBN', '$bid eq "BN"', ''], # Boundary Neutral - -# Weak types: - - ['IsBidiEN','$bid eq "EN"', ''], # European Number - ['IsBidiES','$bid eq "ES"', ''], # European Number Separator - ['IsBidiET','$bid eq "ET"', ''], # European Number Terminator - ['IsBidiAN','$bid eq "AN"', ''], # Arabic Number - ['IsBidiCS','$bid eq "CS"', ''], # Common Number Separator - -# Separators: - - ['IsBidiB', '$bid eq "B"', ''], # Block Separator - ['IsBidiS', '$bid eq "S"', ''], # Segment Separator - -# Neutrals: - - ['IsBidiWS','$bid eq "WS"', ''], # Whitespace - ['IsBidiON','$bid eq "ON"', ''], # Other Neutrals ; All other - # characters: punctuation, - # symbols - -# Decomposition - - ['Decomposition', '$decomp', '$decomp'], - ['IsDecoCanon', '$decomp && $decomp !~ /^</', ''], - ['IsDecoCompat', '$decomp =~ /^</', ''], - ['IsDCfont', '$decomp =~ /^<font>/', ''], - ['IsDCnoBreak', '$decomp =~ /^<noBreak>/', ''], - ['IsDCinitial', '$decomp =~ /^<initial>/', ''], - ['IsDCmedial', '$decomp =~ /^<medial>/', ''], - ['IsDCfinal', '$decomp =~ /^<final>/', ''], - ['IsDCisolated', '$decomp =~ /^<isolated>/', ''], - ['IsDCcircle', '$decomp =~ /^<circle>/', ''], - ['IsDCsuper', '$decomp =~ /^<super>/', ''], - ['IsDCsub', '$decomp =~ /^<sub>/', ''], - ['IsDCvertical', '$decomp =~ /^<vertical>/', ''], - ['IsDCwide', '$decomp =~ /^<wide>/', ''], - ['IsDCnarrow', '$decomp =~ /^<narrow>/', ''], - ['IsDCsmall', '$decomp =~ /^<small>/', ''], - ['IsDCsquare', '$decomp =~ /^<square>/', ''], - ['IsDCfraction', '$decomp =~ /^<fraction>/', ''], - ['IsDCcompat', '$decomp =~ /^<compat>/', ''], - -# Number - - ['Number', '$num ne ""', '$num'], - -# Mirrored - - ['IsMirrored', '$mir eq "Y"', ''], - -# Arabic - - ['ArabLink', '1', '$link'], - ['ArabLnkGrp', '1', '$linkgroup'], - -# Jamo - - ['JamoShort', '1', '$short'], - -# Syllables - - syllable_defs(), - -# Line break properties - Normative - - ['IsLbrkBK','$brk eq "BK"', ''], # Mandatory Break - ['IsLbrkCR','$brk eq "CR"', ''], # Carriage Return - ['IsLbrkLF','$brk eq "LF"', ''], # Line Feed - ['IsLbrkCM','$brk eq "CM"', ''], # Attached Characters and Combining Marks - ['IsLbrkSG','$brk eq "SG"', ''], # Surrogates - ['IsLbrkGL','$brk eq "GL"', ''], # Non-breaking (Glue) - ['IsLbrkCB','$brk eq "CB"', ''], # Contingent Break Opportunity - ['IsLbrkSP','$brk eq "SP"', ''], # Space - ['IsLbrkZW','$brk eq "ZW"', ''], # Zero Width Space - -# Line break properties - Informative - ['IsLbrkXX','$brk eq "XX"', ''], # Unknown - ['IsLbrkOP','$brk eq "OP"', ''], # Opening Punctuation - ['IsLbrkCL','$brk eq "CL"', ''], # Closing Punctuation - ['IsLbrkQU','$brk eq "QU"', ''], # Ambiguous Quotation - ['IsLbrkNS','$brk eq "NS"', ''], # Non Starter - ['IsLbrkEX','$brk eq "EX"', ''], # Exclamation/Interrogation - ['IsLbrkSY','$brk eq "SY"', ''], # Symbols Allowing Breaks - ['IsLbrkIS','$brk eq "IS"', ''], # Infix Separator (Numeric) - ['IsLbrkPR','$brk eq "PR"', ''], # Prefix (Numeric) - ['IsLbrkPO','$brk eq "PO"', ''], # Postfix (Numeric) - ['IsLbrkNU','$brk eq "NU"', ''], # Numeric - ['IsLbrkAL','$brk eq "AL"', ''], # Ordinary Alphabetic and Symbol Characters - ['IsLbrkID','$brk eq "ID"', ''], # Ideographic - ['IsLbrkIN','$brk eq "IN"', ''], # Inseparable - ['IsLbrkHY','$brk eq "HY"', ''], # Hyphen - ['IsLbrkBB','$brk eq "BB"', ''], # Break Opportunity Before - ['IsLbrkBA','$brk eq "BA"', ''], # Break Opportunity After - ['IsLbrkSA','$brk eq "SA"', ''], # Complex Context (South East Asian) - ['IsLbrkAI','$brk eq "AI"', ''], # Ambiguous (Alphabetic or Ideographic) - ['IsLbrkB2','$brk eq "B2"', ''], # Break Opportunity Before and After -); - -# This is not written for speed... - -my %InIdScript; -my %InIdBlock; -my $InId = 0; - -foreach $file (@todo) { - my ($table, $wanted, $val) = @$file; - next if @ARGV and not grep { $_ eq $table } @ARGV; - print $table, "\n"; - $table =~ s/\W+//g; - if ($table =~ /^(Is|To)(.+)/) { - open(OUT, ">$1/$2.pl") or die "Can't create $1/$2.pl: $!\n"; - } - else { - open(OUT, ">$table.pl") or die "Can't create $table.pl: $!\n"; - } - print OUT <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -EOH - print OUT <<"END"; -return <<'END'; -END - print OUT proplist($table, $wanted, $val); - print OUT "END\n"; - close OUT; -} - -print "Scripts\n"; -open(UD, 'Scripts.txt') or die "Can't open Scripts.txt: $!\n"; -open(OUT, ">Scripts.pl") or die "Can't create Scripts.pl: $!\n"; -print OUT <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -EOH -print OUT <<"END"; -return <<'END'; -END - -my %Scripts; -my $ScriptsVec = ''; -my $lastlast = 0; - -while (<UD>) { - next if /^#/; - next if /^$/; - chomp; - ($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+;\s+(.+)\s+\#/i; - if ($name) { - my $InName = $name; - my $id; - unless (exists $InIdScript{$InName}) { - print "\t$InName\n"; - $id = $Scripts{$InName} = $InIdScript{$InName} = $InId++; - open(SCRIPT, ">In/$id.pl") or die "create In/$id.pl: $!\n"; - print SCRIPT <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<'END'; -EOH - close(SCRIPT); - } else { - $id = $InIdScript{$InName}; - } - $last = "" unless defined $last; - print OUT "$code\t$last\t$name\t# In/$id.pl\n"; - open(SCRIPT, ">>In/$id.pl"); - print SCRIPT <<END; -$code $last -END - close SCRIPT; - } - my $firsti = hex($code); - my $lasti = $last ? hex($last) : $firsti; - for my $i ($firsti..$lasti) { - vec($ScriptsVec, $i, 1) = 1; - } - $lastlast = $lasti if $lasti > $lastlast; - print "\t\t$code..$last\n"; -} - -for my $id (values %InIdScript) { - open(SCRIPT, ">>In/$id.pl"); - print SCRIPT <<END2; -END -END2 - close(SCRIPT); -} - -print OUT "END\n"; -close OUT; - -# Must treat blocks specially. - -exit if @ARGV and not grep { $_ eq Block } @ARGV; -print "Blocks\n"; -open(UD, 'Blocks.txt') or die "Can't open Blocks.txt: $!\n"; -open(OUT, ">Blocks.pl") or die "Can't create Blocks.pl: $!\n"; -print OUT <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -EOH -print OUT <<"END"; -return <<'END'; -END - -while (<UD>) { - next if /^#/; - next if /^$/; - chomp; - ($code, $last, $name) = /^([0-9a-f]+)\.\.([0-9a-f]+); (.+?)\s*$/i; - if ($name) { - my $InName = $name; - print "\t$InName\n"; - my $id; - # TODO: only the first one of Private Use blocks qualifies - unless (exists $InIdBlock{$InName}) { - $InIdBlock{$InName} = $InId++; - } - $id = $InIdBlock{$InName}; - open(BLOCK, ">In/$id.pl") or die "create In/$id.pl: $!\n"; - print OUT "$code\t$last\t$name\t# In/$id.pl\n"; - print BLOCK <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -EOH - print BLOCK <<"END2"; -return <<'END'; -$code $last -END -END2 - close BLOCK; - } -} - -print OUT "END\n"; -close OUT; - -# -# \p{Common} is any code point not assigned to a script -# - -my $first; - -sub flush_zero_range { - my ($i) = @_; - if (defined $first) { - my $last = $i - 1; - $last = $last == $first ? "" : sprintf("%04x", $last); - printf SCRIPT "%04x\t$last\n", $first; - printf "\t\t%04x..$last\n", $first; - undef $first; - } -} - -print "\tCommon\n"; -my $CommonId = $Scripts{Common} = $InIdScript{Common} = $InId++; -open(SCRIPT, ">In/$CommonId.pl") or die "create In/$CommonId.pl: $!\n"; -print SCRIPT <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<'END'; -EOH - -undef $first; -for my $i (0..$lastlast) { - if (vec($ScriptsVec, $i, 1)) { - defined $first && flush_zero_range($i); - } else { - $first = $i unless defined $first; - } -} -flush_zero_range($lastlast+1); -print SCRIPT "END\n"; -close(SCRIPT); - -# -# \p{Any} is 0..10FFFF (in Unicode 3.1.1) -# - -print "\tAny\n"; -my $AnyId = $Scripts{Any} = $InIdScript{Any} = $InId++; -open(SCRIPT, ">In/$AnyId.pl") or die "create In/$AnyId.pl: $!\n"; -print SCRIPT <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<END; -0000 $UnicodeLastHex -END -EOH - -my $CnVec = ''; - -open(UD, 'PropList.txt') or die "Can't open PropList.txt: $!\n"; - -my $InIdProp; -while (<UD>) { - next if /^#/; - next if /^$/; - chomp; - ($code, $last, $name) = /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s+; (\w+)\s/i; - $last = "" unless defined $last; - if ($name) { - my $InName = $name; - my $id; - unless (exists $InIdScript{$InName}) { - print "\t$InName\n"; - print PROP <<EOH if defined $InIdProp; -END -EOH - $id = $InIdProp = $InIdScript{$InName} = $InId++; - open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n"; - print PROP <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<END; -EOH - } - $id = $InIdScript{$InName}; - print PROP "\L$code\t\L$last\n"; - if ($InName eq 'Noncharacter_Code_Point') { - my $firsti = hex($code); - my $lasti = $last ? hex($last) : $firsti; - for my $i ($firsti..$lasti) { - vec($CnVec, $i, 1) = 1; - } - } - } -} -print PROP "END\n"; - -print "\tAssigned\n"; -my $AssignedId = $Scripts{Assigned} = $InIdScript{Assigned} = $InId++; -open(SCRIPT, ">In/$AssignedId.pl") or die "create In/$AssignedId.pl: $!\n"; -print SCRIPT <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<'END'; -EOH - -undef $first; -for my $i (0..hex($UnicodeLastHex)) { - if (vec($CnVec, $i, 1)) { - defined $first && flush_zero_range($i); - } else { - $first = $i unless defined $first; - } -} -flush_zero_range(hex($UnicodeLastHex)+1); -print SCRIPT "END\n"; - -# -# \p{Alphabetic} is \pL and \p{Other_Alphabetic} -# - -print "\tAlphabetic\n"; -my @Alphabetic; -push @Alphabetic, split(/\n/, do "Is/L.pl"); -push @Alphabetic, split(/\n/, do "In/$InIdScript{Other_Alphabetic}.pl"); -$id = $InIdScript{Alphabetic} = $InId++; -open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n"; -print PROP <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<END; -EOH -for (sort { hex($a) <=> hex($b) } @Alphabetic) { - print PROP "$_\n"; -} -print PROP <<EOH; -END -EOH - -# -# \p{Lowercase} is \p{Ll} and \p{Other_Lowercase} -# - -print "\tLowercase\n"; -my @Lowercase; -push @Lowercase, split(/\n/, do "Is/Ll.pl"); -push @Lowercase, split(/\n/, do "In/$InIdScript{Other_Lowercase}.pl"); -$id = $InIdScript{Lowercase} = $InId++; -open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n"; -print PROP <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<END; -EOH -for (sort { hex($a) <=> hex($b) } @Lowercase) { - print PROP "$_\n"; -} -print PROP <<EOH; -END -EOH - -# -# \p{Uppercase} is \p{Lu} and \p{Other_Uppercase} -# - -print "\tUppercase\n"; -my @Uppercase; -push @Uppercase, split(/\n/, do "Is/Lu.pl"); -push @Uppercase, split(/\n/, do "In/$InIdScript{Other_Uppercase}.pl"); -$id = $InIdScript{Uppercase} = $InId++; -open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n"; -print PROP <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<END; -EOH -for (sort { hex($a) <=> hex($b) } @Uppercase) { - print PROP "$_\n"; -} -print PROP <<EOH; -END -EOH - -# -# \p{Math} is \p{Sm} and \p{Other_Math} -# - -print "\tMath\n"; -my @Math; -push @Math, split(/\n/, do "Is/Sm.pl"); -push @Math, split(/\n/, do "In/$InIdScript{Other_Math}.pl"); -$id = $InIdScript{Math} = $InId++; -open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n"; -print PROP <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<END; -EOH -for (sort { hex($a) <=> hex($b) } @Math) { - print PROP "$_\n"; -} -print PROP <<EOH; -END -EOH - -# -# \p{L&} is \p{Ll}, \p{Lu} and \p{Lt} -# - -print "\tLampersand\n"; -my @Lampersand; -push @Lampersand, split(/\n/, do "Is/Ll.pl"); -push @Lampersand, split(/\n/, do "Is/Lu.pl"); -push @Lampersand, split(/\n/, do "Is/Lt.pl"); -$id = $InIdScript{Lampersand} = $InId++; -open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n"; -print PROP <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<END; -EOH -for (sort { hex($a) <=> hex($b) } @Lampersand) { - print PROP "$_\n"; -} -print PROP <<EOH; -END -EOH - -# -# \p{ID_Start} is \p{Ll}, \p{Lu}, \p{Lt}, \p{Lm}, \p{Lo}, and \p{Nl} -# - -print "\tID_Start\n"; -my @ID_Start; -push @ID_Start, split(/\n/, do "Is/Ll.pl"); -push @ID_Start, split(/\n/, do "Is/Lu.pl"); -push @ID_Start, split(/\n/, do "Is/Lt.pl"); -push @ID_Start, split(/\n/, do "Is/Lm.pl"); -push @ID_Start, split(/\n/, do "Is/Lo.pl"); -push @ID_Start, split(/\n/, do "Is/Nl.pl"); -$id = $InIdScript{ID_Start} = $InId++; -open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n"; -print PROP <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<END; -EOH -for (sort { hex($a) <=> hex($b) } @ID_Start) { - print PROP "$_\n"; -} -print PROP <<EOH; -END -EOH - -# -# \p{ID_Continue} is \p{ID_Start}, \p{Mn}, \p{Mc}, \p{Nd}, and \p{Pc} -# - -print "\tID_Continue\n"; -my @ID_Continue; -push @ID_Continue, split(/\n/, do "In/$InIdScript{ID_Start}.pl"); -push @ID_Continue, split(/\n/, do "Is/Mn.pl"); -push @ID_Continue, split(/\n/, do "Is/Mc.pl"); -push @ID_Continue, split(/\n/, do "Is/Nd.pl"); -push @ID_Continue, split(/\n/, do "Is/Pc.pl"); -$id = $InIdScript{ID_Continue} = $InId++; -open(PROP, ">In/$id.pl") or die "create In/$id.pl: $!\n"; -print PROP <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -return <<END; -EOH -for (sort { hex($a) <=> hex($b) } @ID_Continue) { - print PROP "$_\n"; -} -print PROP <<EOH; -END -EOH - -open(INID, ">In.pl"); - -print INID <<EOH; -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by $0 from e.g. $UnicodeData. -# Any changes made here will be lost! -%utf8::In = ( -EOH - -my %InIdScriptById = reverse %InIdScript; -my %InIdBlockById = reverse %InIdBlock; - -my @InIdScriptById = sort { $a <=> $b } keys %InIdScriptById; -my @InIdBlockById = sort { $a <=> $b } keys %InIdBlockById; - -my %InId; -my %IdIdLcName; - -for my $id (@InIdScriptById) { - my $name = $InIdScriptById{$id}; - my $lcname = lc($name); - $InId{$name} = $id; - $IdIdLcName{$lcname} = $id; -} - -for my $id (@InIdBlockById) { - my $name = $InIdBlockById{$id}; - my $lcname = lc($name); - if (exists $IdIdLcName{$lcname}) { - $InId{"$name Block"} = $id; - } else { - $InId{$name} = $id; - } - $IdIdLcName{$lcname} = $id; -} - -my @InId = sort { $InId{$a} <=> $InId{$b} } keys %InId; - -my %InIdPrefix; - -foreach my $in (@InId) { - my $inpat = $in; - $inpat =~ s/([- _])/(?:[-_]|\\s+)?/g; - my $inprefix = lc(substr($in, 0, 2)); - push @{$InIdPrefix{$inprefix}}, [ $in, $inpat ]; - printf INID "%-45s => %3d,\n", "'$in'", $InId{$in}; -} - -print INID ");\n"; - -print INID <<EOH; -%utf8::InPat = ( -EOH - -foreach my $prefix (sort keys %InIdPrefix) { - printf INID "'$prefix' => {\n"; - foreach my $ininpat (@{$InIdPrefix{$prefix}}) { - my ($in, $inpat) = @$ininpat; - printf INID "\t'$inpat' => '$in',\n"; - } - printf INID "},\n"; -} - -print INID ");\n"; - -close(INID); - -################################################## - -sub proplist { - my ($table, $wanted, $val) = @_; - my @wanted; - my $out; - my $split; - - return listFromPropFile($wanted) if $val eq $PropData; - - if ($table =~ /^Arab/) { - open(UD, "ArabShap.txt") or warn "Can't open $table: $!"; - - $split = '($code, $name, $link, $linkgroup) = split(/; */);'; - } - elsif ($table =~ /^Jamo/) { - open(UD, "Jamo.txt") or warn "Can't open $table: $!"; - - $split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;'; - } - elsif ($table =~ /^IsSyl/) { - open(UD, $SyllableData) or warn "Can't open $table: $!"; - - $split = '($code, $short, $syl) = split(/; */); $code =~ s/^U\+//;'; - } - elsif ($table =~ /^IsLbrk/) { - open(UD, "LineBrk.txt") or warn "Can't open $table: $!"; - - $split = '($code, $brk, $name) = /^([0-9a-f]+);(\w+) # (.+)/i;'; - } - else { - open(UD, $UnicodeData) or warn "Can't open $UnicodeData: $!"; - - $split = '($code, $name, $cat, $comb, $bid, $decomp, $dec, $dig, $num, $mir, $uni1, - $comment, $up, $down, $title) = split(/;/);'; - } - - if ($table =~ /^(?:To|Is)[A-Z]/) { - eval <<"END"; - while (<UD>) { - next if /^#/; - next if /^\\s/; - s/\\s+\$//; - $split - if ($wanted) { - push(\@wanted, [hex \$code, hex $val, \$name =~ /, First>\$/]); - } - } -END - die $@ if $@; - - while (@wanted) { - $beg = shift @wanted; - $last = $beg; - while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and - (not $val or $wanted[0]->[1] == $last->[1] + 1)) { - $last = shift @wanted; - } - $out .= sprintf "%04x", $beg->[0]; - if ($beg->[2]) { - $last = shift @wanted; - } - if ($beg == $last) { - $out .= "\t"; - } - else { - $out .= sprintf "\t%04x", $last->[0]; - } - $out .= sprintf "\t%04x", $beg->[1] if $val; - $out .= "\n"; - } - } - else { - eval <<"END"; - while (<UD>) { - next if /^#/; - next if /^\\s*\$/; - chop; - $split - if ($wanted) { - push(\@wanted, [hex \$code, $val, \$name =~ /, First>\$/]); - } - } -END - die $@ if $@; - - while (@wanted) { - $beg = shift @wanted; - $last = $beg; - while (@wanted and $wanted[0]->[0] == $last->[0] + 1 and - ($wanted[0]->[1] eq $last->[1])) { - $last = shift @wanted; - } - $out .= sprintf "%04x", $beg->[0]; - if ($beg->[2]) { - $last = shift @wanted; - } - if ($beg == $last) { - $out .= "\t"; - } - else { - $out .= sprintf "\t%04x", $last->[0]; - } - $out .= sprintf "\t%s\n", $beg->[1]; - } - } - $out; -} - -sub listFromPropFile { - my ($wanted) = @_; - my $out; - - open (UD, $PropData) or die "Can't open $PropData: $!\n"; - local($/) = "\n" . '*' x 43 . "\n\nProperty dump for:"; # not 42? - - <UD>; - while (<UD>) { - chomp; - if (s/0x[\d\w]+\s+\((.*?)\)// and $wanted eq $1) { - s/\(\d+ chars\)//g; - s/^\s+//mg; - s/\s+$//mg; - s/\.\./\t/g; - $out = lc $_; - last; - } - } - close (UD); - "$out\n"; -} - -sub syllable_defs { - my @defs; - my %seen; - - open (SD, $SyllableData) or die "Can't open $SyllableData: $!\n"; - while (<SD>) { - next if /^\s*(#|$)/; - s/\s+$//; - ($code, $name, $syl) = split /; */; - next unless $syl; - push (@defs, ["IsSyl$syl", qq{\$syl eq "$syl"}, '']) - unless $seen{$syl}++; - } - close (SD); - return (@defs); -} - -# eof |