diff options
author | Karl Williamson <khw@cpan.org> | 2015-02-18 12:24:42 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-02-18 12:51:34 -0700 |
commit | 40416981c6fd7d3e5668285c2762ac534f4d1e06 (patch) | |
tree | 7530697e67b8cdf55daa6c3745461a9242baa880 /lib | |
parent | 12fee290b7258f8f62e45cf0982cd8c3d4a08e11 (diff) | |
download | perl-40416981c6fd7d3e5668285c2762ac534f4d1e06.tar.gz |
Unicode::UCD: Add charprops_all() public function
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Unicode/UCD.pm | 44 | ||||
-rw-r--r-- | lib/Unicode/UCD.t | 24 | ||||
-rw-r--r-- | lib/unicore/mktables | 5 |
3 files changed, 70 insertions, 3 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index b0f770adc6..06fbfd1143 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -16,6 +16,7 @@ our @EXPORT_OK = qw(charinfo charblocks charscripts charinrange charprop + charprops_all general_categories bidi_types compexcl casefold all_casefolds casespec @@ -46,6 +47,9 @@ Unicode::UCD - Unicode character database use Unicode::UCD 'charprop'; my $value = charprop($codepoint, $property); + use Unicode::UCD 'charprops_all'; + my $all_values_hash_ref = charprops_all($codepoint); + use Unicode::UCD 'casefold'; my $casefold = casefold($codepoint); @@ -775,6 +779,46 @@ sub charprop ($$) { } } +=head2 B<charprops_all()> + + use Unicode::UCD 'charprops_all'; + + my $%properties_of_A_hash_ref = charprops_all("U+41"); + +This returns a reference to a hash whose keys are all the distinct Unicode (no +Perl extension) properties, and whose values are the respective values for +those properties for the input L</code point argument>. + +Each key is the property name in its longest, most descriptive form. The +values are what L</charprop()> would return. + +This function is expensive in time and memory. + +=cut + +sub charprops_all($) { + my $input_cp = shift; + + my $cp = _getcode($input_cp); + croak __PACKAGE__, "::charprops_all: unknown code point '$input_cp'" unless defined $cp; + + my %return; + + require "unicore/UCD.pl"; + + foreach my $prop (keys %Unicode::UCD::prop_aliases) { + + # Don't return a Perl extension. (This is the only one that + # %prop_aliases has in it.) + next if $prop eq 'perldecimaldigit'; + + # Use long name for $prop in the hash + $return{scalar prop_aliases($prop)} = charprop($cp, $prop); + } + + return \%return; +} + =head2 B<charblock()> use Unicode::UCD 'charblock'; diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index dd4072b333..ee6ec0350b 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -17,7 +17,7 @@ local $SIG{__WARN__} = sub { push @warnings, @_ }; use strict; use Test::More; -use Unicode::UCD qw(charinfo charprop); +use Unicode::UCD qw(charinfo charprop charprops_all); my $input_record_separator = 7; # Make sure Unicode::UCD isn't affected by $/ = $input_record_separator; # setting this. @@ -492,6 +492,28 @@ is(charprop(ord("9"), "nv"), 9, is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "", "Verify charprop can handle an \"\" in ae-type property"); +my $mark_props_ref = charprops_all(0x300); +is($mark_props_ref->{'Bidi_Class'}, "Nonspacing_Mark", + "Next tests are charprops_all of 0x300"); +is($mark_props_ref->{'Bidi_Mirrored'}, "No"); +is($mark_props_ref->{'Canonical_Combining_Class'}, "Above"); +is($mark_props_ref->{'Case_Folding'}, "\x{300}"); +is($mark_props_ref->{'Decomposition_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Decomposition_Type'}, "None"); +is($mark_props_ref->{'General_Category'}, "Nonspacing_Mark"); +is($mark_props_ref->{'ISO_Comment'}, ""); +is($mark_props_ref->{'Lowercase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Name'}, "COMBINING GRAVE ACCENT"); +is($mark_props_ref->{'Numeric_Type'}, "None"); +is($mark_props_ref->{'Numeric_Value'}, "NaN"); +is($mark_props_ref->{'Simple_Case_Folding'}, "\x{300}"); +is($mark_props_ref->{'Simple_Lowercase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Simple_Titlecase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Simple_Uppercase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Titlecase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Unicode_1_Name'}, "NON-SPACING GRAVE"); +is($mark_props_ref->{'Uppercase_Mapping'}, "\x{300}"); + use Unicode::UCD qw(charblocks charscripts); my $charblocks = charblocks(); diff --git a/lib/unicore/mktables b/lib/unicore/mktables index c1c29431da..4a16d83972 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -16178,9 +16178,10 @@ $zero_matches The value of any Unicode (not including Perl extensions) character property mentioned above for any single code point is available through -L<Unicode::UCD/charprop()>. +L<Unicode::UCD/charprop()>. L<Unicode::UCD/charprops_all()> returns the +values of all the Unicode properties for a given code point. -Besides this, all the Unicode character properties mentioned above +Besides these, all the Unicode character properties mentioned above (except for those marked as for internal use by Perl) are also accessible by L<Unicode::UCD/prop_invlist()>. |