diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-10-13 20:54:17 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-10-13 20:54:17 +0000 |
commit | d73e53025d089803adeff195eaa8b562b9f4d911 (patch) | |
tree | 68a207dbbd8fdcb1fc9466f9df6496e1e33b074e /lib/utf8_heavy.pl | |
parent | f05db7a1ca746e197911ed297a203f4ab9d9d70c (diff) | |
download | perl-d73e53025d089803adeff195eaa8b562b9f4d911.tar.gz |
Rewrite mktables from scratch.
- Cleaner.
- Faster: 15-20 seconds as opposed to several minutes.
- More dynamic: the names of the various categories
such as the linebreak ones are dynamic, not static.
- Is.pl: long names for the general category properties
are now available.
- Ranges (<... ,First>, <..., Last>) from the general
categories work now.
- No more mktables.PL because the mktables.PL is not
and never has been run to create a mktables.
- syllables.txt and Is/Syl*.pl removed: non-standard
(not part of the Unicode), and the whole concept is
being reworked (http://syllabary.sourceforge.net/),
the old way wouldn't even work with the new Syllables.txt
(it would result in 1000+ new categories)
p4raw-id: //depot/perl@12427
Diffstat (limited to 'lib/utf8_heavy.pl')
-rw-r--r-- | lib/utf8_heavy.pl | 93 |
1 files changed, 64 insertions, 29 deletions
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index ef950c6f39..bbc082b1e0 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -12,51 +12,86 @@ sub SWASHNEW { print STDERR "SWASHNEW @_\n" if DEBUG; - my $file; - if ($type and ref ${"${class}::{$type}"} eq $class) { warn qq/Found \${"${class}::{$type}"}\n/ if DEBUG; return ${"${class}::{$type}"}; # Already there... } if ($type) { + $type =~ s/^\s+//; + $type =~ s/\s+$//; - defined %utf8::In || do "unicore/In.pl"; + print "type = $type\n" if DEBUG; - $type =~ s/^In(?:[-_]|\s+)?(?!herited$)//i; - $type =~ s/\s+$//; + my $file; - $type = 'Lampersand' if $type =~ /^(?:Is)?L&$/; - - my $inprefix = substr(lc($type), 0, 2); - if (exists $utf8::InPat{$inprefix}) { - my $In = $type; - for my $k (keys %{$utf8::InPat{$inprefix}}) { - if ($In =~ /^$k$/i) { - $In = $utf8::InPat{$inprefix}->{$k}; - if (exists $utf8::In{$In}) { - $file = "unicore/In/$utf8::In{$In}"; - print "inprefix = $inprefix, In = $In, k = $k, file = $file\n" if DEBUG; - last; + unless (defined $file) { + defined %utf8::Is || do "unicore/Is.pl"; + if ($type =~ /^(?:Is)?[- _]?([A-Z].*)$/i) { + my $istype = $1; + print "istype = $istype\n" if DEBUG; + unless ($list = do "unicore/Is/$istype.pl") { + if (exists $utf8::Is{$istype}) { + $file = "unicore/Is/$utf8::Is{$istype}"; + } else { + my $isprefix = substr(lc($istype), 0, 2); + print "isprefix = $isprefix\n" if DEBUG; + if (exists $utf8::IsPat{$isprefix}) { + my $Is = $istype; + print "isprefix = $isprefix, Is = $Is\n" if DEBUG; + for my $k (keys %{$utf8::IsPat{$isprefix}}) { + print "isprefix = $isprefix, Is = $Is, k = $k\n" if DEBUG; + if ($Is =~ /^$k$/i) { + $file = "unicore/Is/$utf8::IsPat{$isprefix}->{$k}"; + print "isprefix = $isprefix, Is = $Is, k = $k, file = $file\n" if DEBUG; + last; + } + } + } + } + } + } + + unless (defined $file) { + defined %utf8::In || do "unicore/In.pl"; + $type = 'Lampersand' if $type =~ /^(?:Is)?L&$/; + if ($type =~ /^(?:In)?[- _]?(?!herited$)(.+)/i) { + my $intype = $1; + print "intype = $intype\n" if DEBUG; + if (exists $utf8::Is{$istype}) { + $file = "unicore/In/$utf8::In{$intype}"; + } else { + my $inprefix = substr(lc($intype), 0, 2); + print "inprefix = $inprefix\n" if DEBUG; + if (exists $utf8::InPat{$inprefix}) { + my $In = $intype; + print "inprefix = $inprefix, In = $In\n" if DEBUG; + for my $k (keys %{$utf8::InPat{$inprefix}}) { + print "inprefix = $inprefix, In = $In, k = $k\n" if DEBUG; + if ($In =~ /^$k$/i) { + $file = "unicore/In/$utf8::InPat{$inprefix}->{$k}"; + print "inprefix = $inprefix, In = $In, k = $k, file = $file\n" if DEBUG; + last; + } + } + } } } } - } - unless (defined $file) { - # This is separate from 'To' in preparation of Is.pl (a la In.pl). - if ($type =~ /^Is([A-Z][A-Za-z]*)$/) { - $file = "unicore/Is/$1"; - } elsif ((not defined $file) && $type =~ /^To([A-Z][A-Za-z]*)$/) { - $file = "unicore/To/$1"; + unless (defined $file) { + if ($type =~ /^To([A-Z][A-Za-z]+)$/) { + $file = "unicore/To/$1"; + } } } - } - { - $list ||= do "$file.pl" - || do "unicore/Is/$type.pl" - || croak("Can't find Unicode character property \"$type\""); + if (defined $file) { + $list = do "$file.pl"; + } + + croak("Can't find Unicode character property \"$type\"") + unless $list; } my $extras; |