diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-10-16 21:44:23 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-10-16 22:17:09 -0600 |
commit | 2430372414b6a23811d2683eed43009456f4f8ea (patch) | |
tree | f89748049b00cc82253e7c2993c1e9e45fb9a638 /lib/Unicode | |
parent | cb4e82463ea6b3435de7bb0b1973c230d010b6c3 (diff) | |
download | perl-2430372414b6a23811d2683eed43009456f4f8ea.tar.gz |
Change mktables output for some tables to use hex
This makes all the tables in the lib/unicore/To directory that map from
code point to code point be formatted so that the mapped-to code point
is expressed as hexadecimal.
This allows for uniform treatment of these tables in utf8.c, and removes
the final use of strtol() in the (non-CPAN) core. strtol() should be
avoided because it is subject to locale rules, and some older libc
implementations have been buggy. It was used because Perl doesn't have
an efficient way of parsing a decimal number and advancing the parse
pointer to beyond it; we do have such a method for hex numbers.
The input to mktables published by Unicode is also in hex, so this now
conforms to that convention.
This also will facilitate the new work currently being done to read in
the tables that find the closing bracket given an opening one.
Diffstat (limited to 'lib/Unicode')
-rw-r--r-- | lib/Unicode/UCD.pm | 8 | ||||
-rw-r--r-- | lib/Unicode/UCD.t | 11 |
2 files changed, 13 insertions, 6 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 81e671072f..14752ae2b1 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -5,7 +5,7 @@ use warnings; no warnings 'surrogate'; # surrogates can be inputs to this use charnames (); -our $VERSION = '0.54'; +our $VERSION = '0.55'; require Exporter; @@ -548,7 +548,7 @@ sub _read_table ($;$) { my $property = $table =~ s/\.pl//r; $property = $utf8::file_to_swash_name{$property}; my $to_adjust = defined $property - && $utf8::SwashInfo{$property}{'format'} eq 'a'; + && $utf8::SwashInfo{$property}{'format'} =~ / ^ a /x; for (split /^/m, $list) { my ($start, $end, $value) = / ^ (.+?) \t (.*?) \t (.+?) @@ -556,6 +556,8 @@ sub _read_table ($;$) { $ /x; my $decimal_start = hex $start; my $decimal_end = ($end eq "") ? $decimal_start : hex $end; + $value = hex $value if $to_adjust + && $utf8::SwashInfo{$property}{'format'} eq 'ax'; if ($return_hash) { foreach my $i ($decimal_start .. $decimal_end) { $return{$i} = ($to_adjust) @@ -3360,7 +3362,7 @@ RETRY: # Otherwise, convert hex formatted list entries to decimal; add a # 'Y' map for the missing value in binary properties, or # otherwise, use the input map unchanged. - $map = ($format eq 'x') + $map = ($format eq 'x' || $format eq 'ax') ? hex $map : $format eq 'b' ? 'Y' diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 0ba312e632..c21b7a9800 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -1465,7 +1465,7 @@ foreach my $prop (sort keys %props) { my ($start, $end, $value) = / ^ (.+?) \t (.*?) \t (.+?) \s* ( \# .* )? $ /x; $end = $start if $end eq ""; - push @list, [ hex $start, hex $end, $value ]; + push @list, [ hex $start, hex $end, hex $value ]; } # For these mappings, the file contains all the simple mappings, @@ -1523,10 +1523,10 @@ foreach my $prop (sort keys %props) { for my $element (@list) { $official .= "\n" if $official; if ($element->[1] == $element->[0]) { - $official .= sprintf "%04X\t\t%s", $element->[0], $element->[2]; + $official .= sprintf "%04X\t\t%X", $element->[0], $element->[2]; } else { - $official .= sprintf "%04X\t%04X\t%s", $element->[0], $element->[1], $element->[2]; + $official .= sprintf "%04X\t%04X\t%X", $element->[0], $element->[1], $element->[2]; } } } @@ -1646,6 +1646,11 @@ foreach my $prop (sort keys %props) { next PROPERTY; } } + elsif ($full_name =~ # These maps are in hex + /(Simple_)?(Case_Folding|(Lower|Title|Upper)case_Mapping)/) + { + $invmap_ref->[$i] = sprintf("%X", $invmap_ref->[$i]); + } elsif ($format eq 'ad' || $format eq 'ale') { # The numerics in the returned map are stored as adjusted |