Unicode::UCD: Add charprops_all() public function

author: Karl Williamson <khw@cpan.org> 2015-02-18 12:24:42 -0700
committer: Karl Williamson <khw@cpan.org> 2015-02-18 12:51:34 -0700
commit: 40416981c6fd7d3e5668285c2762ac534f4d1e06 (patch)
tree: 7530697e67b8cdf55daa6c3745461a9242baa880
parent: 12fee290b7258f8f62e45cf0982cd8c3d4a08e11 (diff)
download: perl-40416981c6fd7d3e5668285c2762ac534f4d1e06.tar.gz
5 files changed, 77 insertions, 4 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index fdb94c19a6..0100dc86be 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -50101,7 +50101,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
 #endif	/* EBCDIC POSIX-BC */
 
 /* Generated from:
- * a8eba313c19fa856f367bbe7fad314704c885e24516676452256c0d4420a9c55 lib/Unicode/UCD.pm
+ * 083180df694deb1fc173361406c1a75619fb8376403db3a76dc585c1e3951eca lib/Unicode/UCD.pm
  * 827aa7ee45ca9fe09f3e0969a5a27a69ce58a6c7134548125266210018d27b49 lib/unicore/ArabicShaping.txt
  * 3748fbbe9d280a9da700bfd0c28beaaf6f32a67ec263a124fcb0a4095a30fae5 lib/unicore/BidiBrackets.txt
  * 3925329c2432fa7248b2e180cddcedb9a4f9eafbbb10ab9e105eaf833043b2fb lib/unicore/BidiMirroring.txt
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index b0f770adc6..06fbfd1143 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -16,6 +16,7 @@ our @EXPORT_OK = qw(charinfo
 		    charblocks charscripts
 		    charinrange
 		    charprop
+		    charprops_all
 		    general_categories bidi_types
 		    compexcl
 		    casefold all_casefolds casespec
@@ -46,6 +47,9 @@ Unicode::UCD - Unicode character database
     use Unicode::UCD 'charprop';
     my $value  = charprop($codepoint, $property);
 
+    use Unicode::UCD 'charprops_all';
+    my $all_values_hash_ref = charprops_all($codepoint);
+
     use Unicode::UCD 'casefold';
     my $casefold = casefold($codepoint);
 
@@ -775,6 +779,46 @@ sub charprop ($$) {
     }
 }
 
+=head2 B<charprops_all()>
+
+    use Unicode::UCD 'charprops_all';
+
+    my $%properties_of_A_hash_ref = charprops_all("U+41");
+
+This returns a reference to a hash whose keys are all the distinct Unicode (no
+Perl extension) properties, and whose values are the respective values for
+those properties for the input L</code point argument>.
+
+Each key is the property name in its longest, most descriptive form.  The
+values are what L</charprop()> would return.
+
+This function is expensive in time and memory.
+
+=cut
+
+sub charprops_all($) {
+    my $input_cp = shift;
+
+    my $cp = _getcode($input_cp);
+    croak __PACKAGE__, "::charprops_all: unknown code point '$input_cp'" unless defined $cp;
+
+    my %return;
+
+    require "unicore/UCD.pl";
+
+    foreach my $prop (keys %Unicode::UCD::prop_aliases) {
+
+        # Don't return a Perl extension.  (This is the only one that
+        # %prop_aliases has in it.)
+        next if $prop eq 'perldecimaldigit';
+
+        # Use long name for $prop in the hash
+        $return{scalar prop_aliases($prop)} = charprop($cp, $prop);
+    }
+
+    return \%return;
+}
+
 =head2 B<charblock()>
 
     use Unicode::UCD 'charblock';
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index dd4072b333..ee6ec0350b 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -17,7 +17,7 @@ local $SIG{__WARN__} = sub { push @warnings, @_  };
 use strict;
 use Test::More;
 
-use Unicode::UCD qw(charinfo charprop);
+use Unicode::UCD qw(charinfo charprop charprops_all);
 
 my $input_record_separator = 7; # Make sure Unicode::UCD isn't affected by
 $/ = $input_record_separator;   # setting this.
@@ -492,6 +492,28 @@ is(charprop(ord("9"), "nv"),     9,
 is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "",
                     "Verify charprop can handle an \"\" in ae-type property");
 
+my $mark_props_ref = charprops_all(0x300);
+is($mark_props_ref->{'Bidi_Class'}, "Nonspacing_Mark",
+                                    "Next tests are charprops_all of 0x300");
+is($mark_props_ref->{'Bidi_Mirrored'}, "No");
+is($mark_props_ref->{'Canonical_Combining_Class'}, "Above");
+is($mark_props_ref->{'Case_Folding'}, "\x{300}");
+is($mark_props_ref->{'Decomposition_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Decomposition_Type'}, "None");
+is($mark_props_ref->{'General_Category'}, "Nonspacing_Mark");
+is($mark_props_ref->{'ISO_Comment'}, "");
+is($mark_props_ref->{'Lowercase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Name'}, "COMBINING GRAVE ACCENT");
+is($mark_props_ref->{'Numeric_Type'}, "None");
+is($mark_props_ref->{'Numeric_Value'}, "NaN");
+is($mark_props_ref->{'Simple_Case_Folding'}, "\x{300}");
+is($mark_props_ref->{'Simple_Lowercase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Simple_Titlecase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Simple_Uppercase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Titlecase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Unicode_1_Name'}, "NON-SPACING GRAVE");
+is($mark_props_ref->{'Uppercase_Mapping'}, "\x{300}");
+
 use Unicode::UCD qw(charblocks charscripts);
 
 my $charblocks = charblocks();
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index c1c29431da..4a16d83972 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -16178,9 +16178,10 @@ $zero_matches
 
 The value of any Unicode (not including Perl extensions) character
 property mentioned above for any single code point is available through
-L<Unicode::UCD/charprop()>.
+L<Unicode::UCD/charprop()>.  L<Unicode::UCD/charprops_all()> returns the
+values of all the Unicode properties for a given code point.
 
-Besides this, all the Unicode character properties mentioned above
+Besides these, all the Unicode character properties mentioned above
 (except for those marked as for internal use by Perl) are also
 accessible by L<Unicode::UCD/prop_invlist()>.
 
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 6e01071300..1dfd2817b4 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -196,6 +196,12 @@ point.
 
 =item *
 
+A new function L<charprop()|Unicode::UCD/charprops_all()>
+has been added to return the values of all Unicode properties for a
+given code point.
+
+=item *
+
 A bug has been fixed so that L<propaliases()|Unicode::UCD/prop_aliases()>
 returns the correct short and long names for the Perl extensions where
 it was incorrect.
author	Karl Williamson <khw@cpan.org>	2015-02-18 12:24:42 -0700
committer	Karl Williamson <khw@cpan.org>	2015-02-18 12:51:34 -0700
commit	40416981c6fd7d3e5668285c2762ac534f4d1e06 (patch)
tree	7530697e67b8cdf55daa6c3745461a9242baa880
parent	12fee290b7258f8f62e45cf0982cd8c3d4a08e11 (diff)
download	perl-40416981c6fd7d3e5668285c2762ac534f4d1e06.tar.gz