diff options
author | Karl Williamson <khw@cpan.org> | 2019-11-06 10:32:31 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2019-11-06 21:22:25 -0700 |
commit | 048bdb720dd091aa62a709b37e2e074164fd7cdc (patch) | |
tree | 3923b5d18b4af797a0b271848cd300fba9324bbf | |
parent | 29791e7dd4d3e5de7d243a10a71109b4ef20189d (diff) | |
download | perl-048bdb720dd091aa62a709b37e2e074164fd7cdc.tar.gz |
Remove lib/unicore/Heavy.pl
This file was for the use of utf8_heavy.pl. But now that that is
incorporated into Unicode::UCD, move the definitions from Heavy.pl to
lib/unicore/UCD.pl which is used by Unicode::UCD. This allows removing
package names.
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | charclass_invlists.h | 6 | ||||
-rw-r--r-- | lib/Unicode/UCD.pm | 130 | ||||
-rw-r--r-- | lib/Unicode/UCD.t | 45 | ||||
-rw-r--r-- | lib/unicore/mktables | 295 | ||||
-rw-r--r-- | lib/unicore/uni_keywords.pl | 8 | ||||
-rw-r--r-- | regcharclass.h | 4 | ||||
-rw-r--r-- | regen/mk_invlists.pl | 32 | ||||
-rw-r--r-- | regen/mph.pl | 2 | ||||
-rw-r--r-- | t/op/utftaint.t | 2 | ||||
-rw-r--r-- | t/re/regexp.t | 2 | ||||
-rw-r--r-- | t/re/rt122747.t | 2 | ||||
-rw-r--r-- | t/re/uniprops01.t | 2 | ||||
-rw-r--r-- | t/re/uniprops02.t | 2 | ||||
-rw-r--r-- | t/re/uniprops03.t | 2 | ||||
-rw-r--r-- | t/re/uniprops04.t | 2 | ||||
-rw-r--r-- | t/re/uniprops05.t | 2 | ||||
-rw-r--r-- | t/re/uniprops06.t | 2 | ||||
-rw-r--r-- | t/re/uniprops07.t | 2 | ||||
-rw-r--r-- | t/re/uniprops08.t | 2 | ||||
-rw-r--r-- | t/re/uniprops09.t | 2 | ||||
-rw-r--r-- | t/re/uniprops10.t | 2 | ||||
-rw-r--r-- | t/run/fresh_perl.t | 2 | ||||
-rw-r--r-- | t/test.pl | 2 | ||||
-rw-r--r-- | uni_keywords.h | 8 | ||||
-rw-r--r-- | vms/descrip_mms.template | 2 | ||||
-rw-r--r-- | win32/GNUmakefile | 2 | ||||
-rw-r--r-- | win32/makefile.mk | 2 |
28 files changed, 279 insertions, 288 deletions
diff --git a/.gitignore b/.gitignore index 36683de00c..471fde5100 100644 --- a/.gitignore +++ b/.gitignore @@ -122,7 +122,6 @@ lib/perldoc.pod lib/buildcustomize.pl lib/unicore/CombiningClass.pl lib/unicore/Decomposition.pl -lib/unicore/Heavy.pl lib/unicore/Name.pl lib/unicore/Name.pm lib/unicore/TestProp.pl diff --git a/charclass_invlists.h b/charclass_invlists.h index 4d86d8680c..05573bba0b 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -395261,7 +395261,7 @@ static const U8 WB_table[23][23] = { #define MAX_FOLD_FROMS 3 /* Generated from: - * 486ed9a6bcca738e67b88da8199ebc831063808044dc1d0ea98b494ab59ee34a lib/Unicode/UCD.pm + * 64f46a4b25d29a7f952077ee277909df8599a7a803759805c865914d981671a2 lib/Unicode/UCD.pm * 5e91b649379ec79af7cfb6b09410a24557cba4c6d733cd0a2b8a78a1448736d2 lib/unicore/ArabicShaping.txt * f5feb19cd084b2b1568fbc0f94f4b4b54941406e7fb36c7570f8352fd5022dbe lib/unicore/BidiBrackets.txt * e6cbd8ffe94f2e0fbfa6695d6c06c1e72eef7d3aa93cb6329d111285198b5e62 lib/unicore/BidiMirroring.txt @@ -395307,9 +395307,9 @@ static const U8 WB_table[23][23] = { * 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt * 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt * 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt - * 74dc5134f7e509239e1b3c8af319df951d1f41f917eceae9bd113c6740a613e6 lib/unicore/mktables + * 5214f368c189077a2a748b7ef0a5300abd0d012be568d18c1bbd8bede55818ae lib/unicore/mktables * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * e9283c761c5a95e3379384ca47c13a284f08d743c2be6e5091f1152b1b6b7a37 regen/mk_PL_charclass.pl - * 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a regen/mk_invlists.pl + * 74442760b048f85cf5e9e87c3baffc94e861ba397dda0d33f4c22b40ef7efbe6 regen/mk_invlists.pl * ex: set ro: */ diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index d66cf134f8..531a37be64 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -143,6 +143,21 @@ Note that the largest code point in Unicode is U+10FFFF. =cut +our %caseless_equivalent; +our $e_precision; +our %file_to_swash_name; +our @inline_definitions; +our %loose_property_name_of; +our %loose_property_to_file_of; +our %loose_to_file_of; +our $MAX_CP; +our %nv_floating_to_rational; +our %prop_aliases; +our %stricter_to_file_of; +our %strict_property_to_file_of; +our %SwashInfo; +our %why_deprecated; + my $v_unicode_version; # v-string. sub openunicode { @@ -367,7 +382,7 @@ my $number = qr{ ^ $sign $digits+ $ sub loose_name ($) { # Given a lowercase property or property-value name, return its # standardized version that is expected for look-up in the 'loose' hashes - # in Heavy.pl (hence, this depends on what mktables does). This squeezes + # in UCD.pl (hence, this depends on what mktables does). This squeezes # out blanks, underscores and dashes. The complication stems from the # grandfathered-in 'L_', which retains a single trailing underscore. @@ -419,7 +434,7 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; ## Called from swash_init (see utf8.c) or SWASHNEW itself. ## ## Callers of swash_init: - ## Unicode::UCD::prop_invlist + ## prop_invlist ## Unicode::UCD::prop_invmap ## ## Given a $type, our goal is to fill $list with the set of codepoint @@ -443,7 +458,7 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; ## # If a problem is found $type is returned; # Upon success, a new (or cached) blessed object is returned with - # keys TYPE, BITS, EXTRAS, LIST, and NONE with values having the + # keys TYPE, BITS, EXTRAS, LIST, and with values having the # same meanings as the input parameters. # SPECIALS contains a reference to any special-treatment hash in the # property. @@ -513,7 +528,7 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; } } - require "$unicore_dir/Heavy.pl"; + require "$unicore_dir/UCD.pl"; # All property names are matched caselessly my $property_and_table = CORE::lc $type; @@ -545,7 +560,7 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; $property = loose_name($property) =~ s/^is//r; # And convert to canonical form. Quit if not valid. - $property = $utf8::loose_property_name_of{$property}; + $property = $loose_property_name_of{$property}; if (! defined $property) { pop @recursed if @recursed; return $type; @@ -625,11 +640,11 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; # 5.00E-01, and look that up in the hash my $float = sprintf "%.*e", - $utf8::e_precision, + $e_precision, 0 + $parts[0]; - if (exists $utf8::nv_floating_to_rational{$float}) { - $table = $utf8::nv_floating_to_rational{$float}; + if (exists $nv_floating_to_rational{$float}) { + $table = $nv_floating_to_rational{$float}; } else { pop @recursed if @recursed; return $type; @@ -645,7 +660,7 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; print STDERR __LINE__, ": $property_and_table\n" if DEBUG; # First try stricter matching. - $file = $utf8::stricter_to_file_of{$property_and_table}; + $file = $stricter_to_file_of{$property_and_table}; # If didn't find it, try again with looser matching by editing # out the applicable characters on the rhs and looking up @@ -658,7 +673,8 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; $table = loose_name($table); $property_and_table = "$prefix$table"; print STDERR __LINE__, ": $property_and_table\n" if DEBUG; - $file = $utf8::loose_to_file_of{$property_and_table}; + $file = $loose_to_file_of{$property_and_table}; + print STDERR __LINE__, ": $property_and_table\n" if DEBUG; } # Add the constant and go fetch it in. @@ -669,9 +685,9 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; $invert_it = 0 + $file =~ s/!//; if ($caseless - && exists $utf8::caseless_equivalent{$property_and_table}) + && exists $caseless_equivalent{$property_and_table}) { - $file = $utf8::caseless_equivalent{$property_and_table}; + $file = $caseless_equivalent{$property_and_table}; } # The pseudo-directory '#' means that there really isn't a @@ -693,19 +709,19 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; if ($property_and_table =~ s/^to//) { # Look input up in list of properties for which we have # mapping files. First do it with the strict approach - if (defined ($file = $utf8::strict_property_to_file_of{ + if (defined ($file = $strict_property_to_file_of{ $strict_property_and_table})) { - $type = $utf8::file_to_swash_name{$file}; + $type = $file_to_swash_name{$file}; print STDERR __LINE__, ": type set to $type\n" if DEBUG; $file = "$unicore_dir/$file.pl"; last GETFILE; } elsif (defined ($file = - $utf8::loose_property_to_file_of{$property_and_table})) + $loose_property_to_file_of{$property_and_table})) { - $type = $utf8::file_to_swash_name{$file}; + $type = $file_to_swash_name{$file}; print STDERR __LINE__, ": type set to $type\n" if DEBUG; $file = "$unicore_dir/$file.pl"; @@ -713,7 +729,7 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; } # If that fails see if there is a corresponding binary # property file elsif (defined ($file = - $utf8::loose_to_file_of{$property_and_table})) + $loose_to_file_of{$property_and_table})) { # Here, there is no map file for the property we @@ -757,7 +773,7 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; # new-lines. Since it is in-line there is no advantage to # caching the result if ($file =~ s!^#/!!) { - $list = $utf8::inline_definitions[$file]; + $list = $inline_definitions[$file]; } else { # Here, we have an actual file to read in and load, but it @@ -787,7 +803,7 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; my $bits = $minbits; - # mktables lists don't have extras, like '&utf8::prop', so don't need + # mktables lists don't have extras, like '&prop', so don't need # to separate them; also lists are already sorted, so don't need to do # that. if ($list && ! $list_is_from_mktables) { @@ -859,7 +875,7 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; if ($char =~ /[-+!&]/) { my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really my $subobj; - if ($c eq 'utf8') { + if ($c eq 'utf8') { # khw is unsure of this $subobj = SWASHNEW($t, "", $minbits, 0); } elsif (exists &$name) { @@ -901,10 +917,10 @@ my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; if ($file) { $Cache{$class, $file, $invert_it} = $SWASH; if ($type - && exists $utf8::SwashInfo{$type} - && exists $utf8::SwashInfo{$type}{'specials_name'}) + && exists $SwashInfo{$type} + && exists $SwashInfo{$type}{'specials_name'}) { - my $specials_name = $utf8::SwashInfo{$type}{'specials_name'}; + my $specials_name = $SwashInfo{$type}{'specials_name'}; no strict "refs"; print STDERR "\nspecials_name => $specials_name\n" if DEBUG; $SWASH->{'SPECIALS'} = \%$specials_name; @@ -982,7 +998,7 @@ sub charinfo { @CATEGORIES =_read_table("To/Gc.pl") unless @CATEGORIES; $prop{'category'} = _search(\@CATEGORIES, 0, $#CATEGORIES, $code) - // $utf8::SwashInfo{'ToGc'}{'missing'}; + // $SwashInfo{'ToGc'}{'missing'}; # Return undef if category value is 'Unassigned' or one of its synonyms return if grep { lc $_ eq 'unassigned' } prop_value_aliases('Gc', $prop{'category'}); @@ -995,7 +1011,7 @@ sub charinfo { @BIDIS =_read_table("To/Bc.pl") unless @BIDIS; $prop{'bidi'} = _search(\@BIDIS, 0, $#BIDIS, $code) - // $utf8::SwashInfo{'ToBc'}{'missing'}; + // $SwashInfo{'ToBc'}{'missing'}; # For most code points, we can just read in "unicore/Decomposition.pl", as # its contents are exactly what should be output. But that file doesn't @@ -1122,9 +1138,9 @@ sub _read_table ($;$) { # return takes much less memory when there are large ranges. # # This function has the side effect of setting - # $utf8::SwashInfo{$property}{'format'} to be the mktables format of the + # $SwashInfo{$property}{'format'} to be the mktables format of the # table; and - # $utf8::SwashInfo{$property}{'missing'} to be the value for all entries + # $SwashInfo{$property}{'missing'} to be the value for all entries # not listed in the table. # where $property is the Unicode property name, preceded by 'To' for map # properties., e.g., 'ToSc'. @@ -1143,11 +1159,11 @@ sub _read_table ($;$) { # Look up if this property requires adjustments, which we do below if it # does. - require "unicore/Heavy.pl"; + require "unicore/UCD.pl"; my $property = $table =~ s/\.pl//r; - $property = $utf8::file_to_swash_name{$property}; + $property = $file_to_swash_name{$property}; my $to_adjust = defined $property - && $utf8::SwashInfo{$property}{'format'} =~ / ^ a /x; + && $SwashInfo{$property}{'format'} =~ / ^ a /x; for (split /^/m, $list) { my ($start, $end, $value) = / ^ (.+?) \t (.*?) \t (.+?) @@ -1156,7 +1172,7 @@ sub _read_table ($;$) { my $decimal_start = hex $start; my $decimal_end = ($end eq "") ? $decimal_start : hex $end; $value = hex $value if $to_adjust - && $utf8::SwashInfo{$property}{'format'} eq 'ax'; + && $SwashInfo{$property}{'format'} eq 'ax'; if ($return_hash) { foreach my $i ($decimal_start .. $decimal_end) { $return{$i} = ($to_adjust) @@ -1302,7 +1318,7 @@ sub charprop ($$;$) { # extensions. But this is misleading. For now, return undef for # these, as currently documented. undef $map unless - exists $Unicode::UCD::prop_aliases{loose_name(lc $prop)}; + exists $prop_aliases{loose_name(lc $prop)}; } return $map; } @@ -1377,7 +1393,7 @@ sub charprops_all($) { require "unicore/UCD.pl"; - foreach my $prop (keys %Unicode::UCD::prop_aliases) { + foreach my $prop (keys %prop_aliases) { # Don't return a Perl extension. (This is the only one that # %prop_aliases has in it.) @@ -1578,7 +1594,7 @@ sub charscript { if (defined $code) { my $result = _search(\@SCRIPTS, 0, $#SCRIPTS, $code); return $result if defined $result; - return $utf8::SwashInfo{'ToSc'}{'missing'}; + return $SwashInfo{'ToSc'}{'missing'}; } elsif (exists $SCRIPTS{$arg}) { return _dclone $SCRIPTS{$arg}; } @@ -2641,14 +2657,12 @@ about (and which is documented below in L</prop_invmap()>). our %string_property_loose_to_name; our %ambiguous_names; our %loose_perlprop_to_name; -our %prop_aliases; sub prop_aliases ($) { my $prop = $_[0]; return unless defined $prop; require "unicore/UCD.pl"; - require "unicore/Heavy.pl"; # The property name may be loosely or strictly matched; we don't know yet. # But both types use lower-case. @@ -2656,7 +2670,7 @@ sub prop_aliases ($) { # It is loosely matched if its lower case isn't known to be strict. my $list_ref; - if (! exists $utf8::stricter_to_file_of{$prop}) { + if (! exists $stricter_to_file_of{$prop}) { my $loose = loose_name($prop); # There is a hash that converts from any loose name to its standard @@ -2664,7 +2678,7 @@ sub prop_aliases ($) { # as a key into another hash. The whole concept is for memory # savings, as the second hash doesn't have to have all the # combinations. Actually, there are two hashes that do the - # conversion. One is stored in Heavy.pl) for looking up properties + # conversion. One is stored in UCD.pl) for looking up properties # matchable in regexes. This function needs to access string # properties, which aren't available in regexes, so a second # conversion hash is made for them (stored in UCD.pl). Look in the @@ -2678,7 +2692,7 @@ sub prop_aliases ($) { else { my $retrying = 0; # bool. ? Has an initial 'is' been stripped RETRY: - if (exists $utf8::loose_property_name_of{$loose} + if (exists $loose_property_name_of{$loose} && (! $retrying || ! exists $ambiguous_names{$loose})) { @@ -2691,7 +2705,7 @@ sub prop_aliases ($) { # for the gc, script, or block properties, and the stripped # 'is' means that they mean one of those, and not one of # these - $prop = $utf8::loose_property_name_of{$loose}; + $prop = $loose_property_name_of{$loose}; } elsif (exists $loose_perlprop_to_name{$loose}) { @@ -2706,7 +2720,7 @@ sub prop_aliases ($) { $list_ref = \@list; } } - elsif (! exists $utf8::loose_to_file_of{$loose}) { + elsif (! exists $loose_to_file_of{$loose}) { # loose_to_file_of is a complete list of loose names. If not # there, the input is unknown. @@ -2964,7 +2978,7 @@ sub prop_value_aliases ($$) { # a Perl-extension All perl extensions are binary, hence are # enumerateds, which means that we know that the input unknown value # is illegal. - return if ! exists $Unicode::UCD::prop_aliases{$prop}; + return if ! exists $prop_aliases{$prop}; # Otherwise, we assume it's valid, as documented. return $value; @@ -2989,7 +3003,7 @@ sub prop_value_aliases ($$) { # %prop_value_aliases is set up so that the strict matches will appear as # if they were in loose form. Thus, if the non-loose version is legal, # we're ok, can skip the further check. - if (! exists $utf8::stricter_to_file_of{"$prop=$value"} + if (! exists $stricter_to_file_of{"$prop=$value"} # We're also ok and skip the further check if value loosely matches. # mktables has verified that no strict name under loose rules maps to @@ -3002,12 +3016,12 @@ sub prop_value_aliases ($$) { # 2) When the values are numeric, in which case we need to look # further, but their squeezed-out loose values will be in # %stricter_to_file_of - && exists $utf8::stricter_to_file_of{"$prop=$loose_value"}) + && exists $stricter_to_file_of{"$prop=$loose_value"}) { # The only thing that's legal loosely under strict is that can have an # underscore between digit pairs XXX while ($value =~ s/(\d)_(\d)/$1$2/g) {} - return unless exists $utf8::stricter_to_file_of{"$prop=$value"}; + return unless exists $stricter_to_file_of{"$prop=$value"}; } # Here, we know that the combination exists. Return it. @@ -3026,7 +3040,7 @@ sub prop_value_aliases ($$) { } # All 1 bits but the top one is the largest possible IV. -$Unicode::UCD::MAX_CP = (~0) >> 1; +$MAX_CP = (~0) >> 1; =pod @@ -3212,7 +3226,7 @@ sub prop_invlist ($;$) { # beyond the end of the range. no warnings 'portable'; my $end = hex $hex_end; - last if $end == $Unicode::UCD::MAX_CP; + last if $end == $MAX_CP; push @invlist, $end + 1; } else { # No end of range, is a single code point. @@ -3873,8 +3887,8 @@ RETRY: my %blocks; $blocks{'LIST'} = ""; $blocks{'TYPE'} = "ToBlk"; - $utf8::SwashInfo{ToBlk}{'missing'} = "No_Block"; - $utf8::SwashInfo{ToBlk}{'format'} = "s"; + $SwashInfo{ToBlk}{'missing'} = "No_Block"; + $SwashInfo{ToBlk}{'format'} = "s"; foreach my $block (@BLOCKS) { $blocks{'LIST'} .= sprintf "%x\t%x\t%s\n", @@ -3957,8 +3971,8 @@ RETRY: } # End of loop through all the names $names{'TYPE'} = "ToNa"; - $utf8::SwashInfo{ToNa}{'missing'} = ""; - $utf8::SwashInfo{ToNa}{'format'} = "n"; + $SwashInfo{ToNa}{'missing'} = ""; + $SwashInfo{ToNa}{'format'} = "n"; $swash = \%names; } elsif ($second_try =~ / ^ ( d [mt] ) $ /x) { @@ -3970,8 +3984,8 @@ RETRY: if ($second_try eq 'dt') { $decomps{'TYPE'} = "ToDt"; - $utf8::SwashInfo{'ToDt'}{'missing'} = "None"; - $utf8::SwashInfo{'ToDt'}{'format'} = "s"; + $SwashInfo{'ToDt'}{'missing'} = "None"; + $SwashInfo{'ToDt'}{'format'} = "s"; } # 'dm' is handled below, with 'nfkccf' $decomps{'LIST'} = ""; @@ -4165,8 +4179,8 @@ RETRY: $revised_swash{'SPECIALS'} = $swash->{'SPECIALS'}; $swash = \%revised_swash; - $utf8::SwashInfo{$type}{'missing'} = 0; - $utf8::SwashInfo{$type}{'format'} = 'a'; + $SwashInfo{$type}{'missing'} = 0; + $SwashInfo{$type}{'format'} = 'a'; } } @@ -4180,10 +4194,10 @@ RETRY: # All properties but binary ones should have 'missing' and 'format' # entries - $missing = $utf8::SwashInfo{$returned_prop}{'missing'}; + $missing = $SwashInfo{$returned_prop}{'missing'}; $missing = 'N' unless defined $missing; - $format = $utf8::SwashInfo{$returned_prop}{'format'}; + $format = $SwashInfo{$returned_prop}{'format'}; $format = 'b' unless defined $format; my $requires_adjustment = $format =~ /^a/; @@ -4379,7 +4393,7 @@ RETRY: # iteration will pop this, unless there is no next iteration, and # we have filled all of the Unicode code space, so check for that # and skip. - if ($end < $Unicode::UCD::MAX_CP) { + if ($end < $MAX_CP) { push @invlist, $end + 1; push @invmap, $missing; } diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index f8e0a7af26..327a38d70b 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -903,8 +903,6 @@ is(prop_aliases("Is_Is_Any"), undef, is(prop_aliases("ccc=vr"), undef, "prop_aliases('ccc=vr') doesn't generate a warning"); -require "unicore/Heavy.pl"; - # Keys are lists of properties. Values are defined if have been tested. my %props; @@ -995,7 +993,7 @@ while (<$props>) { # official properties. We have no way of knowing if mktables omitted a Perl # extension or not, but we do the best we can from its generated lists -foreach my $alias (sort keys %utf8::loose_to_file_of) { +foreach my $alias (sort keys %Unicode::UCD::loose_to_file_of) { next if $alias =~ /=/; my $lc_name = lc $alias; my $loose = &Unicode::UCD::loose_name($lc_name); @@ -1050,7 +1048,7 @@ for my $prop (qw(Alnum Blank Cntrl Digit Graph Print Word XDigit)) { } my $done_equals = 0; -foreach my $alias (keys %utf8::stricter_to_file_of) { +foreach my $alias (keys %Unicode::UCD::stricter_to_file_of) { if ($alias =~ /=/) { # Only test one case where there is an equals next if $done_equals; $done_equals = 1; @@ -1240,7 +1238,7 @@ while (<$propvalues>) { } # End of SKIP block # And test as best we can, the non-official pva's that mktables generates. -foreach my $hash (\%utf8::loose_to_file_of, \%utf8::stricter_to_file_of) { +foreach my $hash (\%Unicode::UCD::loose_to_file_of, \%Unicode::UCD::stricter_to_file_of) { foreach my $test (sort keys %$hash) { next if exists $pva_tested{$test}; # Skip if already tested @@ -1248,7 +1246,7 @@ foreach my $hash (\%utf8::loose_to_file_of, \%utf8::stricter_to_file_of) { next unless defined $value; # prop_value_aliases() requires an input # 'value' my $mod_value; - if ($hash == \%utf8::loose_to_file_of) { + if ($hash == \%Unicode::UCD::loose_to_file_of) { # Add extra characters to test loose-match rhs value $mod_value = "$extra_chars$value"; @@ -1466,7 +1464,7 @@ my %tested_invlist; # Look at everything we think that mktables tells us exists, both loose and # strict -foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of) +foreach my $set_of_tables (\%Unicode::UCD::stricter_to_file_of, \%Unicode::UCD::loose_to_file_of) { foreach my $table (sort keys %$set_of_tables) { @@ -1475,7 +1473,7 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of if (defined $value) { # If this is to be loose matched, add in characters to test that. - if ($set_of_tables == \%utf8::loose_to_file_of) { + if ($set_of_tables == \%Unicode::UCD::loose_to_file_of) { $value = "$extra_chars$value"; } else { # Strict match @@ -1497,7 +1495,7 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of # Like above, use loose if required, and insert underscores # between digits if strict. - if ($set_of_tables == \%utf8::loose_to_file_of) { + if ($set_of_tables == \%Unicode::UCD::loose_to_file_of) { $mod_table = "$extra_chars$table"; } else { @@ -1531,7 +1529,7 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of # it being an actual file to read. The file is an index in to the # array of the definitions if ($file =~ s!^#/!!) { - $official = $utf8::inline_definitions[$file]; + $official = $Unicode::UCD::inline_definitions[$file]; } else { $official = do "unicore/lib/$file.pl"; @@ -1650,7 +1648,7 @@ my %tested_invmaps; # returned by the function with the tables that mktables generates. Some of # these tables are directly stored as files on disk, in either the unicore or # unicore/To directories, and most should be listed in the mktables generated -# hash %utf8::loose_property_to_file_of, with a few additional ones that this +# hash %Unicode::UCD::loose_property_to_file_of, with a few additional ones that this # handles specially. For these, the files are read in directly, massaged, and # compared with what invmap() returns. The SPECIALS hash in some of these # files overrides values in the main part of the file. @@ -1690,13 +1688,13 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { # This legacy property is otherwise unknown to Perl; so shouldn't # have any information about it already. - ok(! exists $utf8::loose_property_to_file_of{$loose_prop}, + ok(! exists $Unicode::UCD::loose_property_to_file_of{$loose_prop}, "There isn't a hash entry for file lookup of $prop"); - $utf8::loose_property_to_file_of{$loose_prop} = $base_file; + $Unicode::UCD::loose_property_to_file_of{$loose_prop} = $base_file; - ok(! exists $utf8::file_to_swash_name{$loose_prop}, + ok(! exists $Unicode::UCD::file_to_swash_name{$loose_prop}, "There isn't a hash entry for swash lookup of $prop"); - $utf8::file_to_swash_name{$base_file} + $Unicode::UCD::file_to_swash_name{$base_file} = $legacy_props{$prop}->{'swash_name'}; $display_prop = $prop; $is_legacy = 1; @@ -1916,8 +1914,8 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { if ($name ne 'na' && ($name eq 'blk' || defined - ($base_file = $utf8::loose_property_to_file_of{$proxy_prop}) - || exists $utf8::loose_to_file_of{$proxy_prop} + ($base_file = $Unicode::UCD::loose_property_to_file_of{$proxy_prop}) + || exists $Unicode::UCD::loose_to_file_of{$proxy_prop} || $name eq "dm")) { # In the above, blk is done unconditionally, as we need to test that @@ -1987,7 +1985,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { # work would be needed in the unlikely event that an inverted # property comes along without these characteristics if (!defined $base_file) { - $base_file = $utf8::loose_to_file_of{$proxy_prop}; + $base_file = $Unicode::UCD::loose_to_file_of{$proxy_prop}; $is_binary = ($base_file =~ s/!//) ? -1 : 1; $base_file = "lib/$base_file" unless $base_file =~ m!^#/!; } @@ -1996,7 +1994,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { # special case where the contents are in-lined with semi-colons # meaning new-lines, instead of it being an actual file to read. if ($base_file =~ s!^#/!!) { - $official = $utf8::inline_definitions[$base_file]; + $official = $Unicode::UCD::inline_definitions[$base_file]; } else { $official = do "unicore/$base_file.pl"; @@ -2034,11 +2032,11 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { # Get the format for the file, and if there are any special elements, # get a reference to them. - my $swash_name = $utf8::file_to_swash_name{$base_file}; + my $swash_name = $Unicode::UCD::file_to_swash_name{$base_file}; my $specials_ref; my $file_format; # The 'format' given inside the file if ($swash_name) { - $specials_ref = $utf8::SwashInfo{$swash_name}{'specials_name'}; + $specials_ref = $Unicode::UCD::SwashInfo{$swash_name}{'specials_name'}; if ($specials_ref) { # Convert from the name to the actual reference. @@ -2046,7 +2044,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { $specials_ref = \%{$specials_ref}; } - $file_format = $utf8::SwashInfo{$swash_name}{'format'}; + $file_format = $Unicode::UCD::SwashInfo{$swash_name}{'format'}; } # Leading zeros used to be used with the values in the files that give, @@ -2711,8 +2709,7 @@ if ($v_unicode_version ge v3.1.0) { # No Script property before this ok($/ eq $input_record_separator, "The record separator didn't get overridden"); -@warnings = grep { $_ !~ /Use of '.*' in \\p\{} or \\P\{} is deprecated because/ } @warnings; -if (! ok(@warnings == 0, "The only warnings generated are about deprecated properties")) { +if (! ok(@warnings == 0, "No warnings were generated")) { diag(join "\n", "The warnings are:", @warnings); } diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 78d522f7cb..5f032b4ea4 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -1321,8 +1321,7 @@ my $EXTRACTED_DIR = (-d 'extracted') ? 'extracted' : ""; my $EXTRACTED = ($EXTRACTED_DIR) ? "$EXTRACTED_DIR/" : ""; my $AUXILIARY = 'auxiliary'; -# Hashes and arrays that will eventually go into Heavy.pl for the use of -# UCD.pl for the use of UCD.pm +# Hashes and arrays that will eventually go into UCD.pl for the use of UCD.pm my %loose_to_file_of; # loosely maps table names to their respective # files my %stricter_to_file_of; # same; but for stricter mapping. @@ -1335,7 +1334,7 @@ my @inline_definitions = "V0"; # Each element gives a definition of a unique # this array. The 0th element is initialized to # the definition for a zero length inversion list my %file_to_swash_name; # Maps the file name to its corresponding key name - # in the hash %utf8::SwashInfo + # in the hash %Unicode::UCD::SwashInfo my %nv_floating_to_rational; # maps numeric values floating point numbers to # their rational equivalent my %loose_property_name_of; # Loosely maps (non_string) property names to @@ -6084,7 +6083,7 @@ END # utf8.c, which can't really deal with empty tables, but it can # deal with a table that matches nothing, as the inverse of 'All' # does. - push @OUT, "!utf8::All\n"; + push @OUT, "!Unicode::UCD::All\n"; } elsif ($self->name eq 'N' @@ -6102,7 +6101,7 @@ END && $range_list{$addr}->ranges > 15 && ! $annotate) # Under --annotate, want to see everything { - push @OUT, "!utf8::" . $self->property->name . "\n"; + push @OUT, "!Unicode::UCD::" . $self->property->name . "\n"; } else { my $range_size_1 = $range_size_1{$addr}; @@ -7576,7 +7575,7 @@ END # respective header and trailer my $specials_name = ""; if (@multi_code_point_maps) { - $specials_name = "utf8::ToSpec$name"; + $specials_name = "Unicode::UCD::ToSpec$name"; $pre_body .= <<END; # Some code points require special handling because their mappings are each to @@ -7617,18 +7616,18 @@ END # The name this table is to be known by, with the format of the mappings in # the main body of the table, and what all code points missing from this file # map to. -\$utf8::SwashInfo{'To$name'}{'format'} = '$format'; # $map_table_formats{$format} +\$Unicode::UCD::SwashInfo{'To$name'}{'format'} = '$format'; # $map_table_formats{$format} END if ($specials_name) { $return .= <<END; -\$utf8::SwashInfo{'To$name'}{'specials_name'} = '$specials_name'; # Name of hash of special mappings +\$Unicode::UCD::SwashInfo{'To$name'}{'specials_name'} = '$specials_name'; # Name of hash of special mappings END } my $default_map = $default_map{$addr}; # For $CODE_POINT default maps and using adjustments, instead the default # becomes zero. - $return .= "\$utf8::SwashInfo{'To$name'}{'missing'} = '" + $return .= "\$Unicode::UCD::SwashInfo{'To$name'}{'missing'} = '" . (($output_adjusted && $default_map eq $CODE_POINT) ? "0" : $default_map) @@ -7797,7 +7796,7 @@ use parent '-norequire', '_Base_Table'; # two tables are unrelated but equivalent, one is arbitrarily chosen as the # 'leader', and the others are 'equivalents'. This concept is useful # to minimize the number of tables written out. Only one file is used for -# any identical set of code points, with entries in Heavy.pl mapping all +# any identical set of code points, with entries in UCD.pl mapping all # the involved tables to it. # # Related tables will always be identical; we set them up to be so. Thus @@ -10046,7 +10045,7 @@ sub standardize ($) { sub UCD_name ($$) { # Returns the name that Unicode::UCD will use to find a table. XXX - # perhaps this function should be placed somewhere, like Heavy.pl so that + # perhaps this function should be placed somewhere, like UCD.pm so that # Unicode::UCD can use it directly without duplicating code that can get # out-of sync. @@ -16174,7 +16173,7 @@ sub register_file_for_name($$$) { # Keep a list of the deprecated properties and their filenames if ($deprecated && $complement == 0) { - $utf8::why_deprecated{$sub_filename} = $deprecated; + $Unicode::UCD::why_deprecated{$sub_filename} = $deprecated; } # And a substitute table, if any, for case-insensitive matching @@ -17666,146 +17665,6 @@ END return; } -sub make_Heavy () { - # Create and write Heavy.pl, which passes info about the tables to - # Unicode::UCD - - # Stringify structures for output - my $loose_property_name_of - = simple_dumper(\%loose_property_name_of, ' ' x 4); - chomp $loose_property_name_of; - - my $strict_property_name_of - = simple_dumper(\%strict_property_name_of, ' ' x 4); - chomp $strict_property_name_of; - - my $stricter_to_file_of = simple_dumper(\%stricter_to_file_of, ' ' x 4); - chomp $stricter_to_file_of; - - my $inline_definitions = simple_dumper(\@inline_definitions, " " x 4); - chomp $inline_definitions; - - my $loose_to_file_of = simple_dumper(\%loose_to_file_of, ' ' x 4); - chomp $loose_to_file_of; - - my $nv_floating_to_rational - = simple_dumper(\%nv_floating_to_rational, ' ' x 4); - chomp $nv_floating_to_rational; - - my $why_deprecated = simple_dumper(\%utf8::why_deprecated, ' ' x 4); - chomp $why_deprecated; - - # We set the key to the file when we associated files with tables, but we - # couldn't do the same for the value then, as we might not have the file - # for the alternate table figured out at that time. - foreach my $cased (keys %caseless_equivalent_to) { - my @path = $caseless_equivalent_to{$cased}->file_path; - my $path; - if ($path[0] eq "#") { # Pseudo-directory '#' - $path = join '/', @path; - } - else { # Gets rid of lib/ - $path = join '/', @path[1, -1]; - } - $caseless_equivalent_to{$cased} = $path; - } - my $caseless_equivalent_to - = simple_dumper(\%caseless_equivalent_to, ' ' x 4); - chomp $caseless_equivalent_to; - - my $loose_property_to_file_of - = simple_dumper(\%loose_property_to_file_of, ' ' x 4); - chomp $loose_property_to_file_of; - - my $strict_property_to_file_of - = simple_dumper(\%strict_property_to_file_of, ' ' x 4); - chomp $strict_property_to_file_of; - - my $file_to_swash_name = simple_dumper(\%file_to_swash_name, ' ' x 4); - chomp $file_to_swash_name; - - my @heavy = <<END; -$HEADER -$INTERNAL_ONLY_HEADER - -# This file is for the use of Unicode::UCD - -# Maps Unicode (not Perl single-form extensions) property names in loose -# standard form to their corresponding standard names -\%utf8::loose_property_name_of = ( -$loose_property_name_of -); - -# Same, but strict names -\%utf8::strict_property_name_of = ( -$strict_property_name_of -); - -# Gives the definitions (in the form of inversion lists) for those properties -# whose definitions aren't kept in files -\@utf8::inline_definitions = ( -$inline_definitions -); - -# Maps property, table to file for those using stricter matching. For paths -# whose directory is '#', the file is in the form of a numeric index into -# \@inline_definitions -\%utf8::stricter_to_file_of = ( -$stricter_to_file_of -); - -# Maps property, table to file for those using loose matching. For paths -# whose directory is '#', the file is in the form of a numeric index into -# \@inline_definitions -\%utf8::loose_to_file_of = ( -$loose_to_file_of -); - -# Maps floating point to fractional form -\%utf8::nv_floating_to_rational = ( -$nv_floating_to_rational -); - -# If a %e floating point number doesn't have this number of digits in it after -# the decimal point to get this close to a fraction, it isn't considered to be -# that fraction even if all the digits it does have match. -\$utf8::e_precision = $E_FLOAT_PRECISION; - -# Deprecated tables to generate a warning for. The key is the file containing -# the table, so as to avoid duplication, as many property names can map to the -# file, but we only need one entry for all of them. -\%utf8::why_deprecated = ( -$why_deprecated -); - -# A few properties have different behavior under /i matching. This maps -# those to substitute files to use under /i. -\%utf8::caseless_equivalent = ( -$caseless_equivalent_to -); - -# Property names to mapping files -\%utf8::loose_property_to_file_of = ( -$loose_property_to_file_of -); - -# Property names to mapping files -\%utf8::strict_property_to_file_of = ( -$strict_property_to_file_of -); - -# Files to the swash names within them. -\%utf8::file_to_swash_name = ( -$file_to_swash_name -); - -1; -END - - main::write("Heavy.pl", 0, \@heavy); # The 0 means no utf8. - return; -} - sub make_Name_pm () { # Create and write Name.pm, which contains subroutines and data to use in # conjunction with Name.pl @@ -18060,6 +17919,60 @@ sub make_UCD () { # Create and write UCD.pl, which passes info about the tables to # Unicode::UCD + # Stringify structures for output + my $loose_property_name_of + = simple_dumper(\%loose_property_name_of, ' ' x 4); + chomp $loose_property_name_of; + + my $strict_property_name_of + = simple_dumper(\%strict_property_name_of, ' ' x 4); + chomp $strict_property_name_of; + + my $stricter_to_file_of = simple_dumper(\%stricter_to_file_of, ' ' x 4); + chomp $stricter_to_file_of; + + my $inline_definitions = simple_dumper(\@inline_definitions, " " x 4); + chomp $inline_definitions; + + my $loose_to_file_of = simple_dumper(\%loose_to_file_of, ' ' x 4); + chomp $loose_to_file_of; + + my $nv_floating_to_rational + = simple_dumper(\%nv_floating_to_rational, ' ' x 4); + chomp $nv_floating_to_rational; + + my $why_deprecated = simple_dumper(\%Unicode::UCD::why_deprecated, ' ' x 4); + chomp $why_deprecated; + + # We set the key to the file when we associated files with tables, but we + # couldn't do the same for the value then, as we might not have the file + # for the alternate table figured out at that time. + foreach my $cased (keys %caseless_equivalent_to) { + my @path = $caseless_equivalent_to{$cased}->file_path; + my $path; + if ($path[0] eq "#") { # Pseudo-directory '#' + $path = join '/', @path; + } + else { # Gets rid of lib/ + $path = join '/', @path[1, -1]; + } + $caseless_equivalent_to{$cased} = $path; + } + my $caseless_equivalent_to + = simple_dumper(\%caseless_equivalent_to, ' ' x 4); + chomp $caseless_equivalent_to; + + my $loose_property_to_file_of + = simple_dumper(\%loose_property_to_file_of, ' ' x 4); + chomp $loose_property_to_file_of; + + my $strict_property_to_file_of + = simple_dumper(\%strict_property_to_file_of, ' ' x 4); + chomp $strict_property_to_file_of; + + my $file_to_swash_name = simple_dumper(\%file_to_swash_name, ' ' x 4); + chomp $file_to_swash_name; + # Create a mapping from each alias of Perl single-form extensions to all # its equivalent aliases, for quick look-up. my %perlprop_to_aliases; @@ -18236,6 +18149,75 @@ $INTERNAL_ONLY_HEADER \$Unicode::UCD::HANGUL_BEGIN = $SBase_string; \$Unicode::UCD::HANGUL_COUNT = $SCount; +# Maps Unicode (not Perl single-form extensions) property names in loose +# standard form to their corresponding standard names +\%Unicode::UCD::loose_property_name_of = ( +$loose_property_name_of +); + +# Same, but strict names +\%Unicode::UCD::strict_property_name_of = ( +$strict_property_name_of +); + +# Gives the definitions (in the form of inversion lists) for those properties +# whose definitions aren't kept in files +\@Unicode::UCD::inline_definitions = ( +$inline_definitions +); + +# Maps property, table to file for those using stricter matching. For paths +# whose directory is '#', the file is in the form of a numeric index into +# \@inline_definitions +\%Unicode::UCD::stricter_to_file_of = ( +$stricter_to_file_of +); + +# Maps property, table to file for those using loose matching. For paths +# whose directory is '#', the file is in the form of a numeric index into +# \@inline_definitions +\%Unicode::UCD::loose_to_file_of = ( +$loose_to_file_of +); + +# Maps floating point to fractional form +\%Unicode::UCD::nv_floating_to_rational = ( +$nv_floating_to_rational +); + +# If a %e floating point number doesn't have this number of digits in it after +# the decimal point to get this close to a fraction, it isn't considered to be +# that fraction even if all the digits it does have match. +\$Unicode::UCD::e_precision = $E_FLOAT_PRECISION; + +# Deprecated tables to generate a warning for. The key is the file containing +# the table, so as to avoid duplication, as many property names can map to the +# file, but we only need one entry for all of them. +\%Unicode::UCD::why_deprecated = ( +$why_deprecated +); + +# A few properties have different behavior under /i matching. This maps +# those to substitute files to use under /i. +\%Unicode::UCD::caseless_equivalent = ( +$caseless_equivalent_to +); + +# Property names to mapping files +\%Unicode::UCD::loose_property_to_file_of = ( +$loose_property_to_file_of +); + +# Property names to mapping files +\%Unicode::UCD::strict_property_to_file_of = ( +$strict_property_to_file_of +); + +# Files to the swash names within them. +\%Unicode::UCD::file_to_swash_name = ( +$file_to_swash_name +); + # Keys are all the possible "prop=value" combinations, in loose form; values # are the standard loose name for the 'value' part of the key \%Unicode::UCD::loose_to_standard_value = ( @@ -18537,7 +18519,7 @@ sub write_all_tables() { # they will be identical, so don't have to compare tables # frequently. The tables have to have the same status to # share a file, so add this to the bucket hash. (The - # reason for this latter is that Heavy.pl associates a + # reason for this latter is that UCD.pm associates a # status with a file.) We don't check tables that are # inverses of others, as it would lead to some coding # complications, and checking all the regular ones should @@ -18735,10 +18717,10 @@ sub write_all_tables() { my $is_property = ($table == $property); # For very short tables, instead of writing them out to actual files, - # we in-line their inversion list definitions into Heavy.pl. The + # we in-line their inversion list definitions into UCD.pm. The # definition replaces the file name, and the special pseudo-directory # '#' is used to signal this. This significantly cuts down the number - # of files written at little extra cost to the hashes in Heavy.pl. + # of files written at little extra cost to the hashes in UCD.pm. # And it means, no run-time files to read to get the definitions. if (! $is_property && ! $annotate # For annotation, we want to explicitly show @@ -18821,8 +18803,7 @@ sub write_all_tables() { # Write out the pod file make_pod; - # And Heavy.pl, Name.pm, UCD.pl - make_Heavy; + # And Name.pm, UCD.pl make_Name_pm; make_UCD; diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index 62f7b8e2bd..45d2934ba2 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -3,7 +3,7 @@ # This file is built by regen/mk_invlists.pl from Unicode::UCD. # Any changes made here will be lost! -%utf8::uni_prop_ptrs_indices = ( +%Unicode::UCD::uni_prop_ptrs_indices = ( '_perl_any_folds' => 1154, '_perl_charname_begin' => 1155, '_perl_charname_continue' => 1156, @@ -1215,7 +1215,7 @@ 1; # Generated from: -# 486ed9a6bcca738e67b88da8199ebc831063808044dc1d0ea98b494ab59ee34a lib/Unicode/UCD.pm +# 64f46a4b25d29a7f952077ee277909df8599a7a803759805c865914d981671a2 lib/Unicode/UCD.pm # 5e91b649379ec79af7cfb6b09410a24557cba4c6d733cd0a2b8a78a1448736d2 lib/unicore/ArabicShaping.txt # f5feb19cd084b2b1568fbc0f94f4b4b54941406e7fb36c7570f8352fd5022dbe lib/unicore/BidiBrackets.txt # e6cbd8ffe94f2e0fbfa6695d6c06c1e72eef7d3aa93cb6329d111285198b5e62 lib/unicore/BidiMirroring.txt @@ -1261,9 +1261,9 @@ # 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt # 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt # 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt -# 74dc5134f7e509239e1b3c8af319df951d1f41f917eceae9bd113c6740a613e6 lib/unicore/mktables +# 5214f368c189077a2a748b7ef0a5300abd0d012be568d18c1bbd8bede55818ae lib/unicore/mktables # a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl # e9283c761c5a95e3379384ca47c13a284f08d743c2be6e5091f1152b1b6b7a37 regen/mk_PL_charclass.pl -# 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a regen/mk_invlists.pl +# 74442760b048f85cf5e9e87c3baffc94e861ba397dda0d33f4c22b40ef7efbe6 regen/mk_invlists.pl # ex: set ro: diff --git a/regcharclass.h b/regcharclass.h index 4d3c51d505..39cef9f506 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -1855,7 +1855,7 @@ #endif /* PERL_REGCHARCLASS_H_ */ /* Generated from: - * 486ed9a6bcca738e67b88da8199ebc831063808044dc1d0ea98b494ab59ee34a lib/Unicode/UCD.pm + * 64f46a4b25d29a7f952077ee277909df8599a7a803759805c865914d981671a2 lib/Unicode/UCD.pm * 5e91b649379ec79af7cfb6b09410a24557cba4c6d733cd0a2b8a78a1448736d2 lib/unicore/ArabicShaping.txt * f5feb19cd084b2b1568fbc0f94f4b4b54941406e7fb36c7570f8352fd5022dbe lib/unicore/BidiBrackets.txt * e6cbd8ffe94f2e0fbfa6695d6c06c1e72eef7d3aa93cb6329d111285198b5e62 lib/unicore/BidiMirroring.txt @@ -1901,7 +1901,7 @@ * 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt * 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt * 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt - * 74dc5134f7e509239e1b3c8af319df951d1f41f917eceae9bd113c6740a613e6 lib/unicore/mktables + * 5214f368c189077a2a748b7ef0a5300abd0d012be568d18c1bbd8bede55818ae lib/unicore/mktables * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 8cffbf838b6e8ea5310e4ad2e0498ad9c1d87d4babead678081859473591317c regen/regcharclass.pl diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 6853a64272..8136f437e7 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -13,7 +13,7 @@ use Unicode::UCD qw(prop_aliases ); require './regen/regen_lib.pl'; require './regen/charset_translations.pl'; -require './lib/unicore/Heavy.pl'; +require './lib/unicore/UCD.pl'; use re "/aa"; # This program outputs charclass_invlists.h, which contains various inversion @@ -93,8 +93,8 @@ my @a2n; my %prop_name_aliases; # Invert this hash so that for each canonical name, we get a list of things # that map to it (excluding itself) -foreach my $name (sort keys %utf8::loose_property_name_of) { - my $canonical = $utf8::loose_property_name_of{$name}; +foreach my $name (sort keys %Unicode::UCD::loose_property_name_of) { + my $canonical = $Unicode::UCD::loose_property_name_of{$name}; push @{$prop_name_aliases{$canonical}}, $name if $canonical ne $name; } @@ -2382,12 +2382,12 @@ my $float_e_format = qr/ ^ -? \d \. \d+ e [-+] \d+ $ /x; # 'nv=5.00e-01' => 'Nv/1_2', # # %stricter_to_file_of contains far more than just the rationals. Instead we -# use %utf8::nv_floating_to_rational which should have an entry for each +# use %Unicode::UCD::nv_floating_to_rational which should have an entry for each # nv in the former hash. my %floating_to_file_of; -foreach my $key (keys %utf8::nv_floating_to_rational) { - my $value = $utf8::nv_floating_to_rational{$key}; - $floating_to_file_of{$key} = $utf8::stricter_to_file_of{"nv=$value"}; +foreach my $key (keys %Unicode::UCD::nv_floating_to_rational) { + my $value = $Unicode::UCD::nv_floating_to_rational{$key}; + $floating_to_file_of{$key} = $Unicode::UCD::stricter_to_file_of{"nv=$value"}; } # Properties that are specified with a prop=value syntax @@ -2406,16 +2406,16 @@ foreach my $property (sort or $a =~ /!/ <=> $b =~ /!/ or length $a <=> length $b or $a cmp $b - } keys %utf8::loose_to_file_of, - keys %utf8::stricter_to_file_of, + } keys %Unicode::UCD::loose_to_file_of, + keys %Unicode::UCD::stricter_to_file_of, keys %floating_to_file_of ) { # These two hashes map properties to values that can be considered to # be checksums. If two properties have the same checksum, they have # identical entries. Otherwise they differ in some way. - my $tag = $utf8::loose_to_file_of{$property}; - $tag = $utf8::stricter_to_file_of{$property} unless defined $tag; + my $tag = $Unicode::UCD::loose_to_file_of{$property}; + $tag = $Unicode::UCD::stricter_to_file_of{$property} unless defined $tag; $tag = $floating_to_file_of{$property} unless defined $tag; # The tag may contain an '!' meaning it is identical to the one formed @@ -2452,7 +2452,7 @@ foreach my $property (sort # stand-alone properties. no warnings 'once'; next if $rhs eq "" && grep { $alias eq $_ } - keys %utf8::loose_property_to_file_of; + keys %Unicode::UCD::loose_property_to_file_of; my $new_entry = $alias . $rhs; push @this_entries, $new_entry; @@ -2484,9 +2484,9 @@ foreach my $property (sort # Some properties are deprecated. This hash tells us so, and the # warning message to raise if they are used. - if (exists $utf8::why_deprecated{$tag}) { + if (exists $Unicode::UCD::why_deprecated{$tag}) { $deprecated_tags{$enums{$tag}} = scalar @deprecated_messages; - push @deprecated_messages, $utf8::why_deprecated{$tag}; + push @deprecated_messages, $Unicode::UCD::why_deprecated{$tag}; } # Our sort above should have made sure that we see the @@ -3185,7 +3185,7 @@ my $uni_pl = open_new('lib/unicore/uni_keywords.pl', '>', {style => '*', by => 'regen/mk_invlists.pl', from => "Unicode::UCD"}); { - print $uni_pl "\%utf8::uni_prop_ptrs_indices = (\n"; + print $uni_pl "\%Unicode::UCD::uni_prop_ptrs_indices = (\n"; for my $name (sort keys %name_to_index) { print $uni_pl " '$name' => $name_to_index{$name},\n"; } @@ -3211,7 +3211,7 @@ my $keywords_fh = open_new('uni_keywords.h', '>', no warnings 'once'; print $keywords_fh <<"EOF"; /* The precision to use in "%.*e" formats */ -#define PL_E_FORMAT_PRECISION $utf8::e_precision +#define PL_E_FORMAT_PRECISION $Unicode::UCD::e_precision EOF diff --git a/regen/mph.pl b/regen/mph.pl index b045108435..00a36ec1a8 100644 --- a/regen/mph.pl +++ b/regen/mph.pl @@ -479,7 +479,7 @@ unless (caller) { my %hash; { no warnings; - do "../perl/lib/unicore/Heavy.pl"; + do "../perl/lib/unicore/UCD.pl"; %hash= %utf8::loose_to_file_of; } if ($ENV{MERGE_KEYS}) { diff --git a/t/op/utftaint.t b/t/op/utftaint.t index da4f842906..d8fc1f5747 100644 --- a/t/op/utftaint.t +++ b/t/op/utftaint.t @@ -143,7 +143,7 @@ for my $ary ([ascii => 'perl'], [latin1 => "\xB6"]) { SKIP: { if (is_miniperl()) { skip_if_miniperl("Unicode tables not built yet", 2) - unless eval 'require "unicore/Heavy.pl"'; + unless eval 'require "unicore/UCD.pl"'; } fresh_perl_is('$a = substr $^X, 0, 0; /\x{100}/i; /$a\x{100}/i || print q,ok,', 'ok', {switches => ["-T", "-l"]}, diff --git a/t/re/regexp.t b/t/re/regexp.t index 7e4f9ebac6..1d30ffbdcf 100644 --- a/t/re/regexp.t +++ b/t/re/regexp.t @@ -69,7 +69,7 @@ BEGIN { @INC = qw '../lib ../ext/re'; if (!defined &DynaLoader::boot_DynaLoader) { # miniperl print("1..0 # Skip Unicode tables not built yet\n"), exit - unless eval 'require "unicore/Heavy.pl"'; + unless eval 'require "unicore/UCD.pl"'; } # Some of the tests need a locale; which one doesn't much matter, except diff --git a/t/re/rt122747.t b/t/re/rt122747.t index c05b0610ff..c9586c289b 100644 --- a/t/re/rt122747.t +++ b/t/re/rt122747.t @@ -12,7 +12,7 @@ BEGIN { } if (is_miniperl()) { skip_all_if_miniperl("Unicode tables not built yet", 2) - unless eval 'require "unicore/Heavy.pl"'; + unless eval 'require "unicore/UCD.pl"'; } plan tests => 3; diff --git a/t/re/uniprops01.t b/t/re/uniprops01.t index 4b4231c7c6..768c609c5a 100644 --- a/t/re/uniprops01.t +++ b/t/re/uniprops01.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/re/uniprops02.t b/t/re/uniprops02.t index 8895ae9ae3..1de337e751 100644 --- a/t/re/uniprops02.t +++ b/t/re/uniprops02.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/re/uniprops03.t b/t/re/uniprops03.t index c866407de7..54fd781503 100644 --- a/t/re/uniprops03.t +++ b/t/re/uniprops03.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/re/uniprops04.t b/t/re/uniprops04.t index 7689df0e4d..b1d206d07d 100644 --- a/t/re/uniprops04.t +++ b/t/re/uniprops04.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/re/uniprops05.t b/t/re/uniprops05.t index 0573377547..68bbaf24b7 100644 --- a/t/re/uniprops05.t +++ b/t/re/uniprops05.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/re/uniprops06.t b/t/re/uniprops06.t index 74e6c45d4a..6a3dabf198 100644 --- a/t/re/uniprops06.t +++ b/t/re/uniprops06.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/re/uniprops07.t b/t/re/uniprops07.t index fe6795498e..97c80355ee 100644 --- a/t/re/uniprops07.t +++ b/t/re/uniprops07.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/re/uniprops08.t b/t/re/uniprops08.t index a9b412a903..52d5036b57 100644 --- a/t/re/uniprops08.t +++ b/t/re/uniprops08.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/re/uniprops09.t b/t/re/uniprops09.t index c9b469b439..d58aafaf94 100644 --- a/t/re/uniprops09.t +++ b/t/re/uniprops09.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/re/uniprops10.t b/t/re/uniprops10.t index 0d0e1edd22..0b78d358b1 100644 --- a/t/re/uniprops10.t +++ b/t/re/uniprops10.t @@ -29,7 +29,7 @@ do '../lib/unicore/TestProp.pl'; # Since TestProp.pl explicitly exits, we will only get here if it # could not load. if (defined &DynaLoader::boot_DynaLoader # not miniperl - || eval 'require "unicore/Heavy.pl"' # or tables are built + || eval 'require "unicore/UCD.pl"' # or tables are built ) { die "Could not run lib/unicore/TestProp.pl: ", $@||$!; } diff --git a/t/run/fresh_perl.t b/t/run/fresh_perl.t index 411ff04b9c..5f0e2ce216 100644 --- a/t/run/fresh_perl.t +++ b/t/run/fresh_perl.t @@ -743,7 +743,7 @@ utf8::upgrade($_); # the original code used a UTF-8 locale (affects STDIN) /^([[:digit:]]+)/; EXPECT ######## [perl #20667] unicode regex vs non-unicode regex -# SKIP: !defined &DynaLoader::boot_DynaLoader && !eval 'require "unicore/Heavy.pl"' +# SKIP: !defined &DynaLoader::boot_DynaLoader && !eval 'require "unicore/UCD.pl"' # (skip under miniperl if Unicode tables are not built yet) $toto = 'Hello'; $toto =~ /\w/; # this line provokes the problem! @@ -169,7 +169,7 @@ sub skip_all_without_config { sub skip_all_without_unicode_tables { # (but only under miniperl) if (is_miniperl()) { skip_all_if_miniperl("Unicode tables not built yet") - unless eval 'require "unicore/Heavy.pl"'; + unless eval 'require "unicore/UCD.pl"'; } } diff --git a/uni_keywords.h b/uni_keywords.h index 7983d43f0a..eca8459d1d 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7238,7 +7238,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { } /* Generated from: - * 486ed9a6bcca738e67b88da8199ebc831063808044dc1d0ea98b494ab59ee34a lib/Unicode/UCD.pm + * 64f46a4b25d29a7f952077ee277909df8599a7a803759805c865914d981671a2 lib/Unicode/UCD.pm * 5e91b649379ec79af7cfb6b09410a24557cba4c6d733cd0a2b8a78a1448736d2 lib/unicore/ArabicShaping.txt * f5feb19cd084b2b1568fbc0f94f4b4b54941406e7fb36c7570f8352fd5022dbe lib/unicore/BidiBrackets.txt * e6cbd8ffe94f2e0fbfa6695d6c06c1e72eef7d3aa93cb6329d111285198b5e62 lib/unicore/BidiMirroring.txt @@ -7284,10 +7284,10 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { * 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt * 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt * 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt - * 74dc5134f7e509239e1b3c8af319df951d1f41f917eceae9bd113c6740a613e6 lib/unicore/mktables + * 5214f368c189077a2a748b7ef0a5300abd0d012be568d18c1bbd8bede55818ae lib/unicore/mktables * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * e9283c761c5a95e3379384ca47c13a284f08d743c2be6e5091f1152b1b6b7a37 regen/mk_PL_charclass.pl - * 44a3e3e2047a58e56ed8e3338ad85bedabae470dd119bf0862ca8129545ebf8a regen/mk_invlists.pl - * e80fb4dd6c15dc1b543793552ab5c7255a0f7b50d6ca9cce3a30a4dadf187b53 regen/mph.pl + * 74442760b048f85cf5e9e87c3baffc94e861ba397dda0d33f4c22b40ef7efbe6 regen/mk_invlists.pl + * cf1d68efb7d919d302c4005641eae8d36da6d7850816ad374b0c00b45e609f43 regen/mph.pl * ex: set ro: */ diff --git a/vms/descrip_mms.template b/vms/descrip_mms.template index 66b2bfa2fd..96de3e6bb6 100644 --- a/vms/descrip_mms.template +++ b/vms/descrip_mms.template @@ -279,7 +279,7 @@ CRTLOPTS =,$(CRTL)/Options unidatafiles = lib/unicore/Decomposition.pl lib/unicore/TestProp.pl \ lib/unicore/CombiningClass.pl lib/unicore/Name.pl \ lib/unicore/UCD.pl lib/unicore/Name.pm \ - lib/unicore/Heavy.pl lib/unicore/mktables.lst + lib/unicore/mktables.lst # Directories of Unicode data files generated by mktables unidatadirs = lib/unicore/To lib/unicore/lib diff --git a/win32/GNUmakefile b/win32/GNUmakefile index 2d67f35bd3..b6b6746bc6 100644 --- a/win32/GNUmakefile +++ b/win32/GNUmakefile @@ -950,7 +950,7 @@ endif UNIDATAFILES = ..\lib\unicore\Decomposition.pl ..\lib\unicore\TestProp.pl \ ..\lib\unicore\CombiningClass.pl ..\lib\unicore\Name.pl \ ..\lib\unicore\UCD.pl ..\lib\unicore\Name.pm \ - ..\lib\unicore\Heavy.pl ..\lib\unicore\mktables.lst + ..\lib\unicore\mktables.lst # Directories of Unicode data files generated by mktables UNIDATADIR1 = ..\lib\unicore\To diff --git a/win32/makefile.mk b/win32/makefile.mk index df37e345e7..37763483f4 100644 --- a/win32/makefile.mk +++ b/win32/makefile.mk @@ -895,7 +895,7 @@ PERLSTATIC = UNIDATAFILES = ..\lib\unicore\Decomposition.pl ..\lib\unicore\TestProp.pl \ ..\lib\unicore\CombiningClass.pl ..\lib\unicore\Name.pl \ ..\lib\unicore\UCD.pl ..\lib\unicore\Name.pm \ - ..\lib\unicore\Heavy.pl ..\lib\unicore\mktables.lst + ..\lib\unicore\mktables.lst # Directories of Unicode data files generated by mktables UNIDATADIR1 = ..\lib\unicore\To |