summaryrefslogtreecommitdiff
path: root/lib/Unicode
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Unicode')
-rw-r--r--lib/Unicode/UCD.pm44
-rw-r--r--lib/Unicode/UCD.t24
2 files changed, 67 insertions, 1 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index b0f770adc6..06fbfd1143 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -16,6 +16,7 @@ our @EXPORT_OK = qw(charinfo
charblocks charscripts
charinrange
charprop
+ charprops_all
general_categories bidi_types
compexcl
casefold all_casefolds casespec
@@ -46,6 +47,9 @@ Unicode::UCD - Unicode character database
use Unicode::UCD 'charprop';
my $value = charprop($codepoint, $property);
+ use Unicode::UCD 'charprops_all';
+ my $all_values_hash_ref = charprops_all($codepoint);
+
use Unicode::UCD 'casefold';
my $casefold = casefold($codepoint);
@@ -775,6 +779,46 @@ sub charprop ($$) {
}
}
+=head2 B<charprops_all()>
+
+ use Unicode::UCD 'charprops_all';
+
+ my $%properties_of_A_hash_ref = charprops_all("U+41");
+
+This returns a reference to a hash whose keys are all the distinct Unicode (no
+Perl extension) properties, and whose values are the respective values for
+those properties for the input L</code point argument>.
+
+Each key is the property name in its longest, most descriptive form. The
+values are what L</charprop()> would return.
+
+This function is expensive in time and memory.
+
+=cut
+
+sub charprops_all($) {
+ my $input_cp = shift;
+
+ my $cp = _getcode($input_cp);
+ croak __PACKAGE__, "::charprops_all: unknown code point '$input_cp'" unless defined $cp;
+
+ my %return;
+
+ require "unicore/UCD.pl";
+
+ foreach my $prop (keys %Unicode::UCD::prop_aliases) {
+
+ # Don't return a Perl extension. (This is the only one that
+ # %prop_aliases has in it.)
+ next if $prop eq 'perldecimaldigit';
+
+ # Use long name for $prop in the hash
+ $return{scalar prop_aliases($prop)} = charprop($cp, $prop);
+ }
+
+ return \%return;
+}
+
=head2 B<charblock()>
use Unicode::UCD 'charblock';
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index dd4072b333..ee6ec0350b 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -17,7 +17,7 @@ local $SIG{__WARN__} = sub { push @warnings, @_ };
use strict;
use Test::More;
-use Unicode::UCD qw(charinfo charprop);
+use Unicode::UCD qw(charinfo charprop charprops_all);
my $input_record_separator = 7; # Make sure Unicode::UCD isn't affected by
$/ = $input_record_separator; # setting this.
@@ -492,6 +492,28 @@ is(charprop(ord("9"), "nv"), 9,
is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "",
"Verify charprop can handle an \"\" in ae-type property");
+my $mark_props_ref = charprops_all(0x300);
+is($mark_props_ref->{'Bidi_Class'}, "Nonspacing_Mark",
+ "Next tests are charprops_all of 0x300");
+is($mark_props_ref->{'Bidi_Mirrored'}, "No");
+is($mark_props_ref->{'Canonical_Combining_Class'}, "Above");
+is($mark_props_ref->{'Case_Folding'}, "\x{300}");
+is($mark_props_ref->{'Decomposition_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Decomposition_Type'}, "None");
+is($mark_props_ref->{'General_Category'}, "Nonspacing_Mark");
+is($mark_props_ref->{'ISO_Comment'}, "");
+is($mark_props_ref->{'Lowercase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Name'}, "COMBINING GRAVE ACCENT");
+is($mark_props_ref->{'Numeric_Type'}, "None");
+is($mark_props_ref->{'Numeric_Value'}, "NaN");
+is($mark_props_ref->{'Simple_Case_Folding'}, "\x{300}");
+is($mark_props_ref->{'Simple_Lowercase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Simple_Titlecase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Simple_Uppercase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Titlecase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Unicode_1_Name'}, "NON-SPACING GRAVE");
+is($mark_props_ref->{'Uppercase_Mapping'}, "\x{300}");
+
use Unicode::UCD qw(charblocks charscripts);
my $charblocks = charblocks();