summaryrefslogtreecommitdiff
path: root/lib/utf8_heavy.pl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/utf8_heavy.pl')
-rw-r--r--lib/utf8_heavy.pl22
1 files changed, 20 insertions, 2 deletions
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index 699a26f1dd..84a81676f0 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -525,9 +525,10 @@ sub _loose_name ($) {
if ($list) {
my $taint = substr($list,0,0); # maintain taint
- # Separate the extras from the code point list, and
- # make sure the latter are well-behaved
+ # Separate the extras from the code point list, and for
+ # user-defined properties, make sure the latter are well-behaved
# for downstream code.
+ if ($user_defined) {
my @tmp = split(/^/m, $list);
my %seen;
no warnings;
@@ -543,6 +544,23 @@ sub _loose_name ($) {
sort { $a->[0] <=> $b->[0] }
map { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] }
grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right
+ }
+ else {
+ # mktables has gone to some trouble to make non-user defined
+ # properties well-behaved, so we can skip the effort we do for
+ # user-defined ones. Any extras are at the very beginning of
+ # the string.
+
+ # This regex splits out the first lines of $list into $1 and
+ # strips them off from $list, until we get one that begins
+ # with a hex number, alone on the line, or followed by a tab.
+ # Either portion may be empty.
+ $list =~ s/ \A ( .*? )
+ (?: \z | (?= ^ [0-9a-fA-F]+ (?: \t | $) ) )
+ //msx;
+
+ $extras = "$taint$1";
+ }
}
if ($none) {