diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-02-10 15:13:10 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-02-10 15:54:26 -0700 |
commit | 4f143a729773018c42dbe31ef4414f1d7fd9f9e9 (patch) | |
tree | c06833bef4d765f2558e2f11dcbdc9f1f1d4e1d5 /lib/Unicode | |
parent | d11155ec2b4e3f6cf952e2a25615aec506a8e296 (diff) | |
download | perl-4f143a729773018c42dbe31ef4414f1d7fd9f9e9.tar.gz |
Unicode::UCD::prop_invmap: Store Nv property as adjusted type
By converting this property to requiring adjustments to get the proper
values, its storage size decreases by more than half.
Diffstat (limited to 'lib/Unicode')
-rw-r--r-- | lib/Unicode/UCD.pm | 59 | ||||
-rw-r--r-- | lib/Unicode/UCD.t | 5 |
2 files changed, 39 insertions, 25 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index ceb491797c..6cefe977a0 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -1324,10 +1324,17 @@ sub _numeric { my $real = $rational[0] / $rational[1]; $real_to_rational{$real} = $value; $value = $real; - } - for my $i ($start .. $end) { - $NUMERIC{$i} = $value; + # Should only be single element, but just in case... + for my $i ($start .. $end) { + $NUMERIC{$i} = $value; + } + } + else { + # The values require adjusting, as is in 'a' format + for my $i ($start .. $end) { + $NUMERIC{$i} = $value + $i - $start; + } } } @@ -2415,43 +2422,43 @@ An example slice is: 0x00B0 0 ... -=item B<C<r>> +=item B<C<ar>> means that all the elements of the map array are either rational numbers or the string C<"NaN">, meaning "Not a Number". A rational number is either an integer, or two integers separated by a solidus (C<"/">). The second integer represents the denominator of the division implied by the solidus, and is -guaranteed not to be 0. If you want to convert them to scalar numbers, you +guaranteed not to be 0. When the element is a plain integer (without the +solidus), it may need to be adjusted to get the correct value by adding the +offset, just as other C<"a"> properties. No adjustment is needed for +fractions, as the range is guaranteed to have just a single element, and so +the offset is always 0. + +If you want to convert the returned map to entirely scalar numbers, you can use something like this: my ($invlist_ref, $invmap_ref, $format) = prop_invmap($property); - if ($format && $format eq "r") { + if ($format && $format eq "ar") { map { $_ = eval $_ } @$invmap_ref; } Here's some entries from the output of the property "Nv", which has format -C<"r">. +C<"ar">. - @numerics_ranges @numerics_maps Note + @numerics_ranges @numerics_maps Note 0x00 "NaN" - 0x30 0 DIGIT 0 - 0x31 1 - 0x32 2 - ... - 0x37 7 - 0x38 8 - 0x39 9 DIGIT 9 + 0x30 0 DIGIT 0 .. DIGIT 9 0x3A "NaN" - 0xB2 2 SUPERSCRIPT 2 - 0xB3 3 SUPERSCRIPT 2 + 0xB2 2 SUPERSCRIPTs 2 and 3 0xB4 "NaN" - 0xB9 1 SUPERSCRIPT 1 + 0xB9 1 SUPERSCRIPT 1 0xBA "NaN" - 0xBC 1/4 VULGAR FRACTION 1/4 - 0xBD 1/2 VULGAR FRACTION 1/2 - 0xBE 3/4 VULGAR FRACTION 3/4 + 0xBC 1/4 VULGAR FRACTION 1/4 + 0xBD 1/2 VULGAR FRACTION 1/2 + 0xBE 3/4 VULGAR FRACTION 3/4 0xBF "NaN" - 0x660 0 ARABIC-INDIC DIGIT ZERO + 0x660 0 ARABIC-INDIC DIGIT ZERO .. NINE + 0x66A "NaN" =item B<C<n>> @@ -3253,7 +3260,13 @@ RETRY: elsif ($returned_prop eq 'ToPerlDecimalDigit') { $format = 'ae'; } - elsif ($format ne 'n' && $format ne 'r') { + elsif ($returned_prop eq 'ToNv') { + + # The one property that has this format is stored as a delta, so needs + # to indicate that need to add code point to it. + $format = 'ar'; + } + elsif ($format ne 'n') { # All others are simple scalars $format = 's'; diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 4188671a80..99ffc9dbc4 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -509,6 +509,7 @@ is(num("\N{SUPERSCRIPT TWO}"), 2, 'Verify num("\N{SUPERSCRIPT TWO} == 2'); is(num("\N{ETHIOPIC NUMBER TEN THOUSAND}"), 10000, 'Verify num("\N{ETHIOPIC NUMBER TEN THOUSAND}") == 10000'); is(num("\N{NORTH INDIC FRACTION ONE HALF}"), .5, 'Verify num("\N{NORTH INDIC FRACTION ONE HALF}") == .5'); is(num("\N{U+12448}"), 9, 'Verify num("\N{U+12448}") == 9'); +is(num("\N{U+5146}"), 1000000000000, 'Verify num("\N{U+5146}") == 1000000000000'); # Create a user-defined property sub InKana {<<'END'} @@ -1259,7 +1260,7 @@ foreach my $prop (keys %props) { next PROPERTY; } } - elsif ($format =~ /^ a /x) { + elsif ($format =~ /^ a (?!r) /x) { if ($full_name eq 'Perl_Decimal_Digit') { if ($missing ne "") { fail("prop_invmap('$mod_prop')"); @@ -1891,7 +1892,7 @@ foreach my $prop (keys %props) { next PROPERTY; } } - elsif ($format eq 's' || $format eq 'r') { + elsif ($format eq 's') { # Here the map is not more or less directly from a file stored on # disk. We try a different tack. These should all be properties that |