summaryrefslogtreecommitdiff
path: root/lib/utf8_heavy.pl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/utf8_heavy.pl')
-rw-r--r--lib/utf8_heavy.pl28
1 files changed, 19 insertions, 9 deletions
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index 08d901ff69..699a26f1dd 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -524,15 +524,25 @@ sub _loose_name ($) {
if ($list) {
my $taint = substr($list,0,0); # maintain taint
- my @tmp = split(/^/m, $list);
- my %seen;
- no warnings;
- $extras = join '', $taint, grep /^[^0-9a-fA-F]/, @tmp;
- $list = join '', $taint,
- map { $_->[1] }
- sort { $a->[0] <=> $b->[0] }
- map { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] }
- grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right
+
+ # Separate the extras from the code point list, and
+ # make sure the latter are well-behaved
+ # for downstream code.
+ my @tmp = split(/^/m, $list);
+ my %seen;
+ no warnings;
+
+ # The extras are anything that doesn't begin with a hex digit.
+ $extras = join '', $taint, grep /^[^0-9a-fA-F]/, @tmp;
+
+ # Remove the extras, and sort the remaining entries by the
+ # numeric value of their beginning hex digits, removing any
+ # duplicates.
+ $list = join '', $taint,
+ map { $_->[1] }
+ sort { $a->[0] <=> $b->[0] }
+ map { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] }
+ grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right
}
if ($none) {