summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-02-18 12:24:42 -0700
committerKarl Williamson <khw@cpan.org>2015-02-18 12:51:34 -0700
commit40416981c6fd7d3e5668285c2762ac534f4d1e06 (patch)
tree7530697e67b8cdf55daa6c3745461a9242baa880
parent12fee290b7258f8f62e45cf0982cd8c3d4a08e11 (diff)
downloadperl-40416981c6fd7d3e5668285c2762ac534f4d1e06.tar.gz
Unicode::UCD: Add charprops_all() public function
-rw-r--r--charclass_invlists.h2
-rw-r--r--lib/Unicode/UCD.pm44
-rw-r--r--lib/Unicode/UCD.t24
-rw-r--r--lib/unicore/mktables5
-rw-r--r--pod/perldelta.pod6
5 files changed, 77 insertions, 4 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index fdb94c19a6..0100dc86be 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -50101,7 +50101,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
#endif /* EBCDIC POSIX-BC */
/* Generated from:
- * a8eba313c19fa856f367bbe7fad314704c885e24516676452256c0d4420a9c55 lib/Unicode/UCD.pm
+ * 083180df694deb1fc173361406c1a75619fb8376403db3a76dc585c1e3951eca lib/Unicode/UCD.pm
* 827aa7ee45ca9fe09f3e0969a5a27a69ce58a6c7134548125266210018d27b49 lib/unicore/ArabicShaping.txt
* 3748fbbe9d280a9da700bfd0c28beaaf6f32a67ec263a124fcb0a4095a30fae5 lib/unicore/BidiBrackets.txt
* 3925329c2432fa7248b2e180cddcedb9a4f9eafbbb10ab9e105eaf833043b2fb lib/unicore/BidiMirroring.txt
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index b0f770adc6..06fbfd1143 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -16,6 +16,7 @@ our @EXPORT_OK = qw(charinfo
charblocks charscripts
charinrange
charprop
+ charprops_all
general_categories bidi_types
compexcl
casefold all_casefolds casespec
@@ -46,6 +47,9 @@ Unicode::UCD - Unicode character database
use Unicode::UCD 'charprop';
my $value = charprop($codepoint, $property);
+ use Unicode::UCD 'charprops_all';
+ my $all_values_hash_ref = charprops_all($codepoint);
+
use Unicode::UCD 'casefold';
my $casefold = casefold($codepoint);
@@ -775,6 +779,46 @@ sub charprop ($$) {
}
}
+=head2 B<charprops_all()>
+
+ use Unicode::UCD 'charprops_all';
+
+ my $%properties_of_A_hash_ref = charprops_all("U+41");
+
+This returns a reference to a hash whose keys are all the distinct Unicode (no
+Perl extension) properties, and whose values are the respective values for
+those properties for the input L</code point argument>.
+
+Each key is the property name in its longest, most descriptive form. The
+values are what L</charprop()> would return.
+
+This function is expensive in time and memory.
+
+=cut
+
+sub charprops_all($) {
+ my $input_cp = shift;
+
+ my $cp = _getcode($input_cp);
+ croak __PACKAGE__, "::charprops_all: unknown code point '$input_cp'" unless defined $cp;
+
+ my %return;
+
+ require "unicore/UCD.pl";
+
+ foreach my $prop (keys %Unicode::UCD::prop_aliases) {
+
+ # Don't return a Perl extension. (This is the only one that
+ # %prop_aliases has in it.)
+ next if $prop eq 'perldecimaldigit';
+
+ # Use long name for $prop in the hash
+ $return{scalar prop_aliases($prop)} = charprop($cp, $prop);
+ }
+
+ return \%return;
+}
+
=head2 B<charblock()>
use Unicode::UCD 'charblock';
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index dd4072b333..ee6ec0350b 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -17,7 +17,7 @@ local $SIG{__WARN__} = sub { push @warnings, @_ };
use strict;
use Test::More;
-use Unicode::UCD qw(charinfo charprop);
+use Unicode::UCD qw(charinfo charprop charprops_all);
my $input_record_separator = 7; # Make sure Unicode::UCD isn't affected by
$/ = $input_record_separator; # setting this.
@@ -492,6 +492,28 @@ is(charprop(ord("9"), "nv"), 9,
is(charprop(utf8::unicode_to_native(0xAD), "NFKC_Casefold"), "",
"Verify charprop can handle an \"\" in ae-type property");
+my $mark_props_ref = charprops_all(0x300);
+is($mark_props_ref->{'Bidi_Class'}, "Nonspacing_Mark",
+ "Next tests are charprops_all of 0x300");
+is($mark_props_ref->{'Bidi_Mirrored'}, "No");
+is($mark_props_ref->{'Canonical_Combining_Class'}, "Above");
+is($mark_props_ref->{'Case_Folding'}, "\x{300}");
+is($mark_props_ref->{'Decomposition_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Decomposition_Type'}, "None");
+is($mark_props_ref->{'General_Category'}, "Nonspacing_Mark");
+is($mark_props_ref->{'ISO_Comment'}, "");
+is($mark_props_ref->{'Lowercase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Name'}, "COMBINING GRAVE ACCENT");
+is($mark_props_ref->{'Numeric_Type'}, "None");
+is($mark_props_ref->{'Numeric_Value'}, "NaN");
+is($mark_props_ref->{'Simple_Case_Folding'}, "\x{300}");
+is($mark_props_ref->{'Simple_Lowercase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Simple_Titlecase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Simple_Uppercase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Titlecase_Mapping'}, "\x{300}");
+is($mark_props_ref->{'Unicode_1_Name'}, "NON-SPACING GRAVE");
+is($mark_props_ref->{'Uppercase_Mapping'}, "\x{300}");
+
use Unicode::UCD qw(charblocks charscripts);
my $charblocks = charblocks();
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index c1c29431da..4a16d83972 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -16178,9 +16178,10 @@ $zero_matches
The value of any Unicode (not including Perl extensions) character
property mentioned above for any single code point is available through
-L<Unicode::UCD/charprop()>.
+L<Unicode::UCD/charprop()>. L<Unicode::UCD/charprops_all()> returns the
+values of all the Unicode properties for a given code point.
-Besides this, all the Unicode character properties mentioned above
+Besides these, all the Unicode character properties mentioned above
(except for those marked as for internal use by Perl) are also
accessible by L<Unicode::UCD/prop_invlist()>.
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 6e01071300..1dfd2817b4 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -196,6 +196,12 @@ point.
=item *
+A new function L<charprop()|Unicode::UCD/charprops_all()>
+has been added to return the values of all Unicode properties for a
+given code point.
+
+=item *
+
A bug has been fixed so that L<propaliases()|Unicode::UCD/prop_aliases()>
returns the correct short and long names for the Perl extensions where
it was incorrect.