diff options
author | Karl Williamson <khw@cpan.org> | 2016-01-28 21:31:36 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-02-03 10:34:23 -0700 |
commit | 816a3af06847abb86bc457a107817117165c72c5 (patch) | |
tree | 0f0be444ff107e9e5ca5b253f1aecc3865a7e083 /lib | |
parent | 943e349709f8085fe002fc6394302ac0ffe8bbf6 (diff) | |
download | perl-816a3af06847abb86bc457a107817117165c72c5.tar.gz |
re/uniprops: Fix EBCDIC issue
Things like qr/\s/ are expecting native code points, not EBCDIC.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/unicore/mktables | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 8cb5f0fc7f..83333dcb89 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -19689,10 +19689,11 @@ my $WB_Extend_or_Format_re = eval 'qr/[\p{WB=Extend}\p{WB=Format}]/'; sub _test_break($$) { # Test various break property matches. The 2nd parameter gives the # property name. The input is a line from auxiliary/*Test.txt for the - # given property. Each such line is a sequence of code points given by - # their hex numbers, separated by the two characters defined just before - # this subroutine that indicate that either there can or cannot be a break - # between the adjacent code points. All these are tested. + # given property. Each such line is a sequence of Unicode (not native) + # code points given by their hex numbers, separated by the two characters + # defined just before this subroutine that indicate that either there can + # or cannot be a break between the adjacent code points. All these are + # tested. # # For the gcb property extra tests are made. if there isn't a break, that # means the sequence forms an extended grapheme cluster, which means that @@ -19736,8 +19737,8 @@ sub _test_break($$) { # non-breaks. next if $line[$i+1] =~ /$nobreak/; - my $lhs = chr hex $line[$i]; - my $rhs = chr hex $line[$i+2]; + my $lhs = chr utf8::unicode_to_native(hex $line[$i]); + my $rhs = chr utf8::unicode_to_native(hex $line[$i+2]); # And it only affects adjacent space characters. next if $lhs !~ /\s/u; |