diff options
Diffstat (limited to 'lib/Unicode')
-rw-r--r-- | lib/Unicode/UCD.pm | 44 | ||||
-rw-r--r-- | lib/Unicode/UCD.t | 24 |
2 files changed, 67 insertions, 1 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index b0f770adc6..06fbfd1143 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -16,6 +16,7 @@ our @EXPORT_OK = qw(charinfo charblocks charscripts charinrange charprop + charprops_all general_categories bidi_types compexcl casefold all_casefolds casespec @@ -46,6 +47,9 @@ Unicode::UCD - Unicode character database use Unicode::UCD 'charprop'; my $value = charprop($codepoint, $property); + use Unicode::UCD 'charprops_all'; + my $all_values_hash_ref = charprops_all($codepoint); + use Unicode::UCD 'casefold'; my $casefold = casefold($codepoint); @@ -775,6 +779,46 @@ sub charprop ($$) { } } +=head2 B<charprops_all()> + + use Unicode::UCD 'charprops_all'; + + my $%properties_of_A_hash_ref = charprops_all("U+41"); + +This returns a reference to a hash whose keys are all the distinct Unicode (no +Perl extension) properties, and whose values are the respective values for +those properties for the input L</code point argument>. + +Each key is the property name in its longest, most descriptive form. The +values are what L</charprop()> would return. + +This function is expensive in time and memory. + +=cut + +sub charprops_all($) { + my $input_cp = shift; + + my $cp = _getcode($input_cp); + croak __PACKAGE__, "::charprops_all: unknown code point '$input_cp'" unless defined $cp; + + my %return; + + require "unicore/UCD.pl"; + + foreach my $prop (keys %Unicode::UCD::prop_aliases) { + + # Don't return a Perl extension. (This is the only one that + # %prop_aliases has in it.) + next if $prop eq 'perldecimaldigit'; + + # Use long name for $prop in the hash + $return{scalar prop_aliases($prop)} = charprop($cp, $prop); + } + + return \%return; +} + =head2 B<charblock()> use Unicode::UCD 'charblock'; diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index dd4072b333..ee6ec0350b 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -17,7 +17,7 @@ local $SIG{__WARN__} = sub { push @warnings, @_ }; use strict; use Test::More; -use Unicode::UCD qw(charinfo charprop); +use Unicode::UCD qw(charinfo charprop charprops_all); my $input_record_separator = 7; # Make sure Unicode::UCD isn't affected by $/ = $input_record_separator; # setting this. @@ -492,6 +492,28 @@ is(charprop(ord("9"), "nv"), 9, is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "", "Verify charprop can handle an \"\" in ae-type property"); +my $mark_props_ref = charprops_all(0x300); +is($mark_props_ref->{'Bidi_Class'}, "Nonspacing_Mark", + "Next tests are charprops_all of 0x300"); +is($mark_props_ref->{'Bidi_Mirrored'}, "No"); +is($mark_props_ref->{'Canonical_Combining_Class'}, "Above"); +is($mark_props_ref->{'Case_Folding'}, "\x{300}"); +is($mark_props_ref->{'Decomposition_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Decomposition_Type'}, "None"); +is($mark_props_ref->{'General_Category'}, "Nonspacing_Mark"); +is($mark_props_ref->{'ISO_Comment'}, ""); +is($mark_props_ref->{'Lowercase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Name'}, "COMBINING GRAVE ACCENT"); +is($mark_props_ref->{'Numeric_Type'}, "None"); +is($mark_props_ref->{'Numeric_Value'}, "NaN"); +is($mark_props_ref->{'Simple_Case_Folding'}, "\x{300}"); +is($mark_props_ref->{'Simple_Lowercase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Simple_Titlecase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Simple_Uppercase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Titlecase_Mapping'}, "\x{300}"); +is($mark_props_ref->{'Unicode_1_Name'}, "NON-SPACING GRAVE"); +is($mark_props_ref->{'Uppercase_Mapping'}, "\x{300}"); + use Unicode::UCD qw(charblocks charscripts); my $charblocks = charblocks(); |