diff options
author | Karl Williamson <public@khwilliamson.com> | 2010-11-19 12:04:53 -0700 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2010-11-20 18:15:15 -0800 |
commit | 71a442a8e083048f771614ff45898af022ade6b7 (patch) | |
tree | d9401977771e09665041f8773a79efc88a7dbf28 /lib | |
parent | 98ef7649bc2d994db925c45a7d2fdce7dff098b3 (diff) | |
download | perl-71a442a8e083048f771614ff45898af022ade6b7.tar.gz |
UCD.pm: Don't use CompositionExclusions.txt
The motiviation for this patch was to remove dependence of UCD on
another Unicode DB .txt file.
But the subroutine that uses it is out-of-date, now that this property,
and an even more convenient one are accessible from the core. So the
documentation is also updated to educate people.
Instead of using the file, the routine just uses the core's access
method
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Unicode/UCD.pm | 52 | ||||
-rw-r--r-- | lib/Unicode/UCD.t | 2 |
2 files changed, 26 insertions, 28 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index eb4de2882d..522c540dd9 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -710,45 +710,43 @@ sub bidi_types { my $compexcl = compexcl(0x09dc); -This returns B<true> if the -L</code point argument> should not be produced by composition normalization, -B<AND> if that fact is not otherwise determinable from the Unicode data base. -It currently does not return B<true> if the code point has a decomposition +This routine is included for backwards compatibility, but as of Perl 5.12, for +most purposes it is probably more convenient to use one of the following +instead: + + my $compexcl = chr(0x09dc) =~ /\p{Comp_Ex}; + my $compexcl = chr(0x09dc) =~ /\p{Full_Composition_Exclusion}; + +or even + + my $compexcl = chr(0x09dc) =~ /\p{CE}; + my $compexcl = chr(0x09dc) =~ /\p{Composition_Exclusion}; + +The first two forms return B<true> if the L</code point argument> should not +be produced by composition normalization. The final two forms +additionally require that this fact not otherwise be determinable from +the Unicode data base for them to return B<true>. + +This routine behaves identically to the final two forms. That is, +it does not return B<true> if the code point has a decomposition consisting of another single code point, nor if its decomposition starts with a code point whose combining class is non-zero. Code points that meet either of these conditions should also not be produced by composition -normalization. +normalization, which is probably why you should use the +C<Full_Composition_Exclusion> property instead, as shown above. -It returns B<false> otherwise. +The routine returns B<false> otherwise. =cut -my %COMPEXCL; - -sub _compexcl { - unless (%COMPEXCL) { - if (openunicode(\$COMPEXCLFH, "CompositionExclusions.txt")) { - local $_; - while (<$COMPEXCLFH>) { - if (/^([0-9A-F]+)\s+\#\s+/) { - my $code = hex($1); - $COMPEXCL{$code} = undef; - } - } - close($COMPEXCLFH); - } - } -} - sub compexcl { my $arg = shift; my $code = _getcode($arg); croak __PACKAGE__, "::compexcl: unknown code '$arg'" unless defined $code; - _compexcl() unless %COMPEXCL; - - return exists $COMPEXCL{$code}; + no warnings "utf8"; # So works on surrogates and non-Unicode code points + return chr($code) =~ /\p{Composition_Exclusion}/; } =head2 B<casefold()> @@ -1233,8 +1231,6 @@ if you are wondering where one of your filehandles went, that's where. Does not yet support EBCDIC platforms. -L</compexcl()> should give a complete list of excluded code points. - =head1 AUTHOR Jarkko Hietaniemi diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 795888aa2f..ae8432cfaf 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -301,6 +301,8 @@ is(Unicode::UCD::UnicodeVersion, '6.0.0', 'UnicodeVersion'); use Unicode::UCD qw(compexcl); ok(!compexcl(0x0100), 'compexcl'); +ok(!compexcl(0xD801), 'compexcl of surrogate'); +ok(!compexcl(0x110000), 'compexcl of non-Unicode code point'); ok( compexcl(0x0958)); use Unicode::UCD qw(casefold); |