summaryrefslogtreecommitdiff
path: root/lib/utf8_heavy.pl
diff options
context:
space:
mode:
authorRafael Garcia-Suarez <rgarciasuarez@gmail.com>2002-12-10 21:30:10 +0000
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2002-12-10 21:30:10 +0000
commit3a2263fe90d1c0e6c8f9368f10e6672379a975a2 (patch)
treef4ecc8075c4fe608fca0d50cea8273adb3179ea8 /lib/utf8_heavy.pl
parent05b465836ef698192f94eef4a60cd63313013848 (diff)
downloadperl-3a2263fe90d1c0e6c8f9368f10e6672379a975a2.tar.gz
Integrate from the maint-5.8/ branch :
changes 18219, 18236, 18242-3, 18247-8, 18253-5, 18257, 18273-6 p4raw-id: //depot/perl@18280 p4raw-branched: from //depot/maint-5.8/perl@18279 'branch in' t/op/lc_user.t p4raw-integrated: from //depot/maint-5.8/perl@18279 'copy in' lib/File/Copy.pm (@17645..) lib/utf8_heavy.pl pod/perlsec.pod (@18080..) hints/irix_6.sh (@18173..) t/uni/tr_utf8.t (@18197..) pod/perlunicode.pod (@18242..) t/op/pat.t (@18248..) t/op/split.t (@18274..) 'edit in' pod/perlguts.pod (@18242..) 'merge in' pp.c (@18126..) MANIFEST (@18234..) p4raw-integrated: from //depot/maint-5.8/perl@18254 'merge in' pod/perldiag.pod (@18234..)
Diffstat (limited to 'lib/utf8_heavy.pl')
-rw-r--r--lib/utf8_heavy.pl56
1 files changed, 41 insertions, 15 deletions
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index 4c5ef27a7f..d8c46b5642 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -11,7 +11,8 @@ my %Cache;
sub croak { require Carp; Carp::croak(@_) }
##
-## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape
+## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape.
+## It's a data structure that encodes a set of Unicode characters.
##
sub SWASHNEW {
@@ -87,10 +88,10 @@ sub SWASHNEW {
## It could be a user-defined property.
##
- my $caller = caller(1);
+ my $caller1 = caller(1);
- if (defined $caller && $type =~ /^(?:\w+)$/) {
- my $prop = $caller . "::" . ( $wasIs ? "Is" : "" ) . $type;
+ if (defined $caller1 && $type =~ /^(?:\w+)$/) {
+ my $prop = $caller1 . "::" . ( $wasIs ? "Is" : "" ) . $type;
if (exists &{$prop}) {
no strict 'refs';
@@ -99,10 +100,29 @@ sub SWASHNEW {
}
}
+ ##
+ ## See if it's a user-level "To".
+ ##
+
+ my $caller0 = caller(0);
+
+ if (defined $caller0 && $type =~ /^To(?:\w+)$/) {
+ my $map = $caller0 . "::" . $type;
+ if (exists &{$map}) {
+ no strict 'refs';
+
+ $list = &{$map};
+ last GETFILE;
+ }
+ }
+
##
- ## Last attempt -- see if it's a "To" name (e.g. "ToLower")
+ ## Last attempt -- see if it's a standard "To" name
+ ## (e.g. "ToLower") ToTitle is used by ucfirst().
+ ## The user-level way to access ToDigit() and ToFold()
+ ## is to use Unicode::UCD.
##
- if ($type =~ /^To([A-Z][A-Za-z]+)$/)
+ if ($type =~ /^To(Digit|Fold|Lower|Title|Upper)$/)
{
$file = "unicore/To/$1.pl";
## would like to test to see if $file actually exists....
@@ -122,7 +142,7 @@ sub SWASHNEW {
##
## If we reach here, it was due to a 'last GETFILE' above
- ## (exception: user-defined properties), so we
+ ## (exception: user-defined properties and mappings), so we
## have a filename, so now we load it if we haven't already.
## If we have, return the cached results. The cache key is the
## file to load.
@@ -162,10 +182,10 @@ sub SWASHNEW {
if ($minbits < 32) {
my $top = 0;
- while ($list =~ /^([0-9a-fA-F]+)(?:\t([0-9a-fA-F]+)?)(?:\t([0-9a-fA-F]+))?/mg) {
+ while ($list =~ /^([0-9a-fA-F]+)(?:[\t]([0-9a-fA-F]+)?)(?:[ \t]([0-9a-fA-F]+))?/mg) {
my $min = hex $1;
- my $max = hex(defined $2 ? $2 : $1);
- my $val = hex(defined $3 ? $3 : "");
+ my $max = defined $2 ? hex $2 : $min;
+ my $val = defined $3 ? hex $3 : 0;
$val += $max - $min if defined $3;
$top = $val if $val > $top;
}
@@ -239,10 +259,15 @@ sub SWASHGET {
pos $_ = 0;
if ($bits > 1) {
LINE:
- while (/^([0-9a-fA-F]+)(?:\t([0-9a-fA-F]+)?)(?:\t([0-9a-fA-F]+))?/mg) {
- my $min = hex $1;
- my $max = (defined $2 ? hex $2 : $min);
- my $val = hex $3;
+ while (/^([0-9a-fA-F]+)(?:[ \t]([0-9a-fA-F]+)?)?(?:[ \t]([0-9a-fA-F]+))?/mg) {
+ chomp;
+ my ($a, $b, $c) = ($1, $2, $3);
+ croak "$type: illegal mapping '$_'"
+ if $type =~ /^To/ &&
+ !(defined $a && defined $c);
+ my $min = hex $a;
+ my $max = defined $b ? hex $b : $min;
+ my $val = defined $c ? hex $c : 0;
next if $max < $start;
print "$min $max $val\n" if DEBUG;
if ($none) {
@@ -273,8 +298,9 @@ sub SWASHGET {
else {
LINE:
while (/^([0-9a-fA-F]+)(?:[ \t]+([0-9a-fA-F]+))?/mg) {
+ chomp;
my $min = hex $1;
- my $max = (defined $2 ? hex $2 : $min);
+ my $max = defined $2 ? hex $2 : $min;
next if $max < $start;
if ($min < $start) {
$min = $start;