summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-09-03 13:47:19 -0600
committerKarl Williamson <public@khwilliamson.com>2011-09-03 17:01:15 -0600
commit807807b755f89001385a00934f7e5fd6c42ec6ea (patch)
tree0e33fc2f02650b21e1c79134f898558b49664e3a
parent046cc26cf77f76bc63fd4d206fef560054f5d298 (diff)
downloadperl-807807b755f89001385a00934f7e5fd6c42ec6ea.tar.gz
mktables: make_Heavy.pl smaller
During the time that mktables was being heavily revamped, Unicode proposed a number of new properties whose names began with 'Is'. Perl since 5.6 has alllowed properties to have an optional 'Is' prefix. Thus there was the possibility that a new property name from Unicode would conflict with one of Perl's name extensions. Thus, the code was written to individually suppress such an extension when there was a conflict; though this would be an undesirable state of affairs, where previously working code would have to be changed due to a new Unicode release. However, it turns out that other Unicode implementors had done the same thing Perl had done, and the protests forced the new Unicode standard to remove the 'Is' from the new properties; and apparently they won't try to do this again. However, the more general case code was still in mktables, doing no harm, except enlarging significantly several of the hashes in Heavy.pl to have essentially double the entries; one for the 'Is', and one without. There have been some murmurings that Heavy.pl takes up too much memory. Further, future commits are planned that would take up space as well, and that could be made much smaller by not having the individual basis for having Is/non-Is. So, this commit takes away the individual property Is/non-Is and replaces it with a blanket approach that has it for all Unicode properties. This requires a single extra statement in utf8_heavy. This has already been added mistakenly, but harmlessly, in commit 45376db692c38e06f27b8331d036652ec5fbb3d1 Note that perl extensions still have the individual Is/non-Is. This is because the blanket approach won't work for them, as some of the 'Is' synonyms do routinely get approaced in the case of the Block property. It is documented that one shouldn't use the 'Is' prefix for the Block property, because future Unicode versions could break your code.
-rw-r--r--lib/unicore/mktables41
-rw-r--r--lib/utf8_heavy.pl10
2 files changed, 25 insertions, 26 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 22364d19ba..f4958ba0cb 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -11309,31 +11309,22 @@ sub finish_Unicode() {
$table->add_range($range->start, $range->end);
}
- # And add the Is_ prefix synonyms for Perl 5.6 compatibility, in which
- # all properties have this optional prefix. These do not get a
- # separate entry in the pod file, because are covered by a wild-card
- # entry
+ # For Perl 5.6 compatibility, all properties matchable in regexes can
+ # have an optional 'Is_' prefix. This is now done in utf8_heavy.pl.
+ # But warn if this creates a conflict with a (new) Unicode property
+ # name, although it appears that Unicode has made a decision never to
+ # begin a property name with 'Is_', so this shouldn't happen.
foreach my $alias ($property->aliases) {
my $Is_name = 'Is_' . $alias->name;
- if (! defined (my $pre_existing = property_ref($Is_name))) {
- $property->add_alias($Is_name,
- Pod_Entry => 0,
- Status => $alias->status,
- Externally_Ok => 0);
- }
- else {
-
- # It seemed too much work to add in these warnings when it
- # appears that Unicode has made a decision never to begin a
- # property name with 'Is_', so this shouldn't happen, but just
- # in case, it is a warning.
+ if (defined (my $pre_existing = property_ref($Is_name))) {
Carp::my_carp(<<END
-There is already an alias named $Is_name (from " . $pre_existing . "), so not
-creating this alias for $property. The generated table and pod files do not
-warn users of this conflict.
+There is already an alias named $Is_name (from " . $pre_existing . "), so
+creating one for $property won't work. This is bad news. If it is not too
+late, get Unicode to back off. Otherwise go back to the old scheme (findable
+from the git blame log for this area of the code that suppressed individual
+aliases that conflict with the new Unicode names. Proceeding anyway.
END
);
- $has_Is_conflicts++;
}
} # End of loop through aliases for this property
} # End of loop through all Unicode properties.
@@ -14270,6 +14261,16 @@ sub make_property_test_script() {
# in the set_final_comment() for Tables
my @table_aliases = $table->aliases;
my @property_aliases = $table->property->aliases;
+
+ # Every property can be optionally be prefixed by 'Is_', so test
+ # that those work, by creating such a new alias for each
+ # pre-existing one.
+ push @property_aliases, map { Alias->new("Is_" . $_->name,
+ $_->loose_match,
+ $_->make_pod_entry,
+ $_->externally_ok,
+ $_->status)
+ } @property_aliases;
my $max = max(scalar @table_aliases, scalar @property_aliases);
for my $j (0 .. $max - 1) {
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index 326c730eb5..dc8211cb16 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -190,12 +190,10 @@ sub loose_name ($) {
print STDERR __LINE__, ": $property\n" if DEBUG;
# Here it is the compound property=table form. The property
- # name is always loosely matched, which means remove any of
- # these:
-
- $property = loose_name($property);
-
- $property =~ s/^is//;
+ # name is always loosely matched, and always can have an
+ # optional 'is' prefix (which isn't true in the single
+ # form).
+ $property = loose_name($property) =~ s/^is//r;
# And convert to canonical form. Quit if not valid.
$property = $utf8::loose_property_name_of{$property};