diff options
Diffstat (limited to 'lib/utf8_heavy.pl')
-rw-r--r-- | lib/utf8_heavy.pl | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 699a26f1dd..84a81676f0 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -525,9 +525,10 @@ sub _loose_name ($) { if ($list) { my $taint = substr($list,0,0); # maintain taint - # Separate the extras from the code point list, and - # make sure the latter are well-behaved + # Separate the extras from the code point list, and for + # user-defined properties, make sure the latter are well-behaved # for downstream code. + if ($user_defined) { my @tmp = split(/^/m, $list); my %seen; no warnings; @@ -543,6 +544,23 @@ sub _loose_name ($) { sort { $a->[0] <=> $b->[0] } map { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] } grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right + } + else { + # mktables has gone to some trouble to make non-user defined + # properties well-behaved, so we can skip the effort we do for + # user-defined ones. Any extras are at the very beginning of + # the string. + + # This regex splits out the first lines of $list into $1 and + # strips them off from $list, until we get one that begins + # with a hex number, alone on the line, or followed by a tab. + # Either portion may be empty. + $list =~ s/ \A ( .*? ) + (?: \z | (?= ^ [0-9a-fA-F]+ (?: \t | $) ) ) + //msx; + + $extras = "$taint$1"; + } } if ($none) { |