summaryrefslogtreecommitdiff
path: root/lib/utf8_heavy.pl
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-10-13 20:54:17 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-10-13 20:54:17 +0000
commitd73e53025d089803adeff195eaa8b562b9f4d911 (patch)
tree68a207dbbd8fdcb1fc9466f9df6496e1e33b074e /lib/utf8_heavy.pl
parentf05db7a1ca746e197911ed297a203f4ab9d9d70c (diff)
downloadperl-d73e53025d089803adeff195eaa8b562b9f4d911.tar.gz
Rewrite mktables from scratch.
- Cleaner. - Faster: 15-20 seconds as opposed to several minutes. - More dynamic: the names of the various categories such as the linebreak ones are dynamic, not static. - Is.pl: long names for the general category properties are now available. - Ranges (<... ,First>, <..., Last>) from the general categories work now. - No more mktables.PL because the mktables.PL is not and never has been run to create a mktables. - syllables.txt and Is/Syl*.pl removed: non-standard (not part of the Unicode), and the whole concept is being reworked (http://syllabary.sourceforge.net/), the old way wouldn't even work with the new Syllables.txt (it would result in 1000+ new categories) p4raw-id: //depot/perl@12427
Diffstat (limited to 'lib/utf8_heavy.pl')
-rw-r--r--lib/utf8_heavy.pl93
1 files changed, 64 insertions, 29 deletions
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index ef950c6f39..bbc082b1e0 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -12,51 +12,86 @@ sub SWASHNEW {
print STDERR "SWASHNEW @_\n" if DEBUG;
- my $file;
-
if ($type and ref ${"${class}::{$type}"} eq $class) {
warn qq/Found \${"${class}::{$type}"}\n/ if DEBUG;
return ${"${class}::{$type}"}; # Already there...
}
if ($type) {
+ $type =~ s/^\s+//;
+ $type =~ s/\s+$//;
- defined %utf8::In || do "unicore/In.pl";
+ print "type = $type\n" if DEBUG;
- $type =~ s/^In(?:[-_]|\s+)?(?!herited$)//i;
- $type =~ s/\s+$//;
+ my $file;
- $type = 'Lampersand' if $type =~ /^(?:Is)?L&$/;
-
- my $inprefix = substr(lc($type), 0, 2);
- if (exists $utf8::InPat{$inprefix}) {
- my $In = $type;
- for my $k (keys %{$utf8::InPat{$inprefix}}) {
- if ($In =~ /^$k$/i) {
- $In = $utf8::InPat{$inprefix}->{$k};
- if (exists $utf8::In{$In}) {
- $file = "unicore/In/$utf8::In{$In}";
- print "inprefix = $inprefix, In = $In, k = $k, file = $file\n" if DEBUG;
- last;
+ unless (defined $file) {
+ defined %utf8::Is || do "unicore/Is.pl";
+ if ($type =~ /^(?:Is)?[- _]?([A-Z].*)$/i) {
+ my $istype = $1;
+ print "istype = $istype\n" if DEBUG;
+ unless ($list = do "unicore/Is/$istype.pl") {
+ if (exists $utf8::Is{$istype}) {
+ $file = "unicore/Is/$utf8::Is{$istype}";
+ } else {
+ my $isprefix = substr(lc($istype), 0, 2);
+ print "isprefix = $isprefix\n" if DEBUG;
+ if (exists $utf8::IsPat{$isprefix}) {
+ my $Is = $istype;
+ print "isprefix = $isprefix, Is = $Is\n" if DEBUG;
+ for my $k (keys %{$utf8::IsPat{$isprefix}}) {
+ print "isprefix = $isprefix, Is = $Is, k = $k\n" if DEBUG;
+ if ($Is =~ /^$k$/i) {
+ $file = "unicore/Is/$utf8::IsPat{$isprefix}->{$k}";
+ print "isprefix = $isprefix, Is = $Is, k = $k, file = $file\n" if DEBUG;
+ last;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ unless (defined $file) {
+ defined %utf8::In || do "unicore/In.pl";
+ $type = 'Lampersand' if $type =~ /^(?:Is)?L&$/;
+ if ($type =~ /^(?:In)?[- _]?(?!herited$)(.+)/i) {
+ my $intype = $1;
+ print "intype = $intype\n" if DEBUG;
+ if (exists $utf8::Is{$istype}) {
+ $file = "unicore/In/$utf8::In{$intype}";
+ } else {
+ my $inprefix = substr(lc($intype), 0, 2);
+ print "inprefix = $inprefix\n" if DEBUG;
+ if (exists $utf8::InPat{$inprefix}) {
+ my $In = $intype;
+ print "inprefix = $inprefix, In = $In\n" if DEBUG;
+ for my $k (keys %{$utf8::InPat{$inprefix}}) {
+ print "inprefix = $inprefix, In = $In, k = $k\n" if DEBUG;
+ if ($In =~ /^$k$/i) {
+ $file = "unicore/In/$utf8::InPat{$inprefix}->{$k}";
+ print "inprefix = $inprefix, In = $In, k = $k, file = $file\n" if DEBUG;
+ last;
+ }
+ }
+ }
}
}
}
- }
- unless (defined $file) {
- # This is separate from 'To' in preparation of Is.pl (a la In.pl).
- if ($type =~ /^Is([A-Z][A-Za-z]*)$/) {
- $file = "unicore/Is/$1";
- } elsif ((not defined $file) && $type =~ /^To([A-Z][A-Za-z]*)$/) {
- $file = "unicore/To/$1";
+ unless (defined $file) {
+ if ($type =~ /^To([A-Z][A-Za-z]+)$/) {
+ $file = "unicore/To/$1";
+ }
}
}
- }
- {
- $list ||= do "$file.pl"
- || do "unicore/Is/$type.pl"
- || croak("Can't find Unicode character property \"$type\"");
+ if (defined $file) {
+ $list = do "$file.pl";
+ }
+
+ croak("Can't find Unicode character property \"$type\"")
+ unless $list;
}
my $extras;