From 3a2263fe90d1c0e6c8f9368f10e6672379a975a2 Mon Sep 17 00:00:00 2001 From: Rafael Garcia-Suarez Date: Tue, 10 Dec 2002 21:30:10 +0000 Subject: Integrate from the maint-5.8/ branch : changes 18219, 18236, 18242-3, 18247-8, 18253-5, 18257, 18273-6 p4raw-id: //depot/perl@18280 p4raw-branched: from //depot/maint-5.8/perl@18279 'branch in' t/op/lc_user.t p4raw-integrated: from //depot/maint-5.8/perl@18279 'copy in' lib/File/Copy.pm (@17645..) lib/utf8_heavy.pl pod/perlsec.pod (@18080..) hints/irix_6.sh (@18173..) t/uni/tr_utf8.t (@18197..) pod/perlunicode.pod (@18242..) t/op/pat.t (@18248..) t/op/split.t (@18274..) 'edit in' pod/perlguts.pod (@18242..) 'merge in' pp.c (@18126..) MANIFEST (@18234..) p4raw-integrated: from //depot/maint-5.8/perl@18254 'merge in' pod/perldiag.pod (@18234..) --- lib/utf8_heavy.pl | 56 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 15 deletions(-) (limited to 'lib/utf8_heavy.pl') diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 4c5ef27a7f..d8c46b5642 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -11,7 +11,8 @@ my %Cache; sub croak { require Carp; Carp::croak(@_) } ## -## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape +## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape. +## It's a data structure that encodes a set of Unicode characters. ## sub SWASHNEW { @@ -87,10 +88,10 @@ sub SWASHNEW { ## It could be a user-defined property. ## - my $caller = caller(1); + my $caller1 = caller(1); - if (defined $caller && $type =~ /^(?:\w+)$/) { - my $prop = $caller . "::" . ( $wasIs ? "Is" : "" ) . $type; + if (defined $caller1 && $type =~ /^(?:\w+)$/) { + my $prop = $caller1 . "::" . ( $wasIs ? "Is" : "" ) . $type; if (exists &{$prop}) { no strict 'refs'; @@ -99,10 +100,29 @@ sub SWASHNEW { } } + ## + ## See if it's a user-level "To". + ## + + my $caller0 = caller(0); + + if (defined $caller0 && $type =~ /^To(?:\w+)$/) { + my $map = $caller0 . "::" . $type; + if (exists &{$map}) { + no strict 'refs'; + + $list = &{$map}; + last GETFILE; + } + } + ## - ## Last attempt -- see if it's a "To" name (e.g. "ToLower") + ## Last attempt -- see if it's a standard "To" name + ## (e.g. "ToLower") ToTitle is used by ucfirst(). + ## The user-level way to access ToDigit() and ToFold() + ## is to use Unicode::UCD. ## - if ($type =~ /^To([A-Z][A-Za-z]+)$/) + if ($type =~ /^To(Digit|Fold|Lower|Title|Upper)$/) { $file = "unicore/To/$1.pl"; ## would like to test to see if $file actually exists.... @@ -122,7 +142,7 @@ sub SWASHNEW { ## ## If we reach here, it was due to a 'last GETFILE' above - ## (exception: user-defined properties), so we + ## (exception: user-defined properties and mappings), so we ## have a filename, so now we load it if we haven't already. ## If we have, return the cached results. The cache key is the ## file to load. @@ -162,10 +182,10 @@ sub SWASHNEW { if ($minbits < 32) { my $top = 0; - while ($list =~ /^([0-9a-fA-F]+)(?:\t([0-9a-fA-F]+)?)(?:\t([0-9a-fA-F]+))?/mg) { + while ($list =~ /^([0-9a-fA-F]+)(?:[\t]([0-9a-fA-F]+)?)(?:[ \t]([0-9a-fA-F]+))?/mg) { my $min = hex $1; - my $max = hex(defined $2 ? $2 : $1); - my $val = hex(defined $3 ? $3 : ""); + my $max = defined $2 ? hex $2 : $min; + my $val = defined $3 ? hex $3 : 0; $val += $max - $min if defined $3; $top = $val if $val > $top; } @@ -239,10 +259,15 @@ sub SWASHGET { pos $_ = 0; if ($bits > 1) { LINE: - while (/^([0-9a-fA-F]+)(?:\t([0-9a-fA-F]+)?)(?:\t([0-9a-fA-F]+))?/mg) { - my $min = hex $1; - my $max = (defined $2 ? hex $2 : $min); - my $val = hex $3; + while (/^([0-9a-fA-F]+)(?:[ \t]([0-9a-fA-F]+)?)?(?:[ \t]([0-9a-fA-F]+))?/mg) { + chomp; + my ($a, $b, $c) = ($1, $2, $3); + croak "$type: illegal mapping '$_'" + if $type =~ /^To/ && + !(defined $a && defined $c); + my $min = hex $a; + my $max = defined $b ? hex $b : $min; + my $val = defined $c ? hex $c : 0; next if $max < $start; print "$min $max $val\n" if DEBUG; if ($none) { @@ -273,8 +298,9 @@ sub SWASHGET { else { LINE: while (/^([0-9a-fA-F]+)(?:[ \t]+([0-9a-fA-F]+))?/mg) { + chomp; my $min = hex $1; - my $max = (defined $2 ? hex $2 : $min); + my $max = defined $2 ? hex $2 : $min; next if $max < $start; if ($min < $start) { $min = $start; -- cgit v1.2.1