diff options
author | Karl Williamson <public@khwilliamson.com> | 2014-02-12 13:30:01 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2014-02-15 16:22:44 -0700 |
commit | 2da736a23e7189ff341f094e8b28d1a903b3aa8d (patch) | |
tree | c5974d0adff381859b3344660d0e7b661ce8e2a2 | |
parent | 5fd8fad5ea5cd316704373bc784473dd4e3865b0 (diff) | |
download | perl-2da736a23e7189ff341f094e8b28d1a903b3aa8d.tar.gz |
Deprecate uses of POSIX::isfoo()
These functions have been supplanted in more modern Perls by
/[[:posix:]]/. The documentation has been wrong; they don't handle
UTF-8 and return true on an empty string. Rather than try to fix them,
the decision has been made to deprecate them instead.
See http://markmail.org/thread/jhqcag5njmx7jpyu
This commit also updates the documentation to be accurate.
-rw-r--r-- | ext/POSIX/POSIX.xs | 19 | ||||
-rw-r--r-- | ext/POSIX/lib/POSIX.pod | 219 | ||||
-rw-r--r-- | ext/POSIX/t/is.t | 37 | ||||
-rw-r--r-- | ext/POSIX/t/posix.t | 3 | ||||
-rw-r--r-- | pod/perldelta.pod | 12 | ||||
-rw-r--r-- | pod/perldiag.pod | 5 | ||||
-rw-r--r-- | pod/perllocale.pod | 2 |
7 files changed, 231 insertions, 66 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index f8d7c24675..6a4fca4719 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -553,6 +553,8 @@ static XSPROTO(is_common); /* prototype to pass -Wmissing-prototypes */ static XSPROTO(is_common) { dXSARGS; + static PTR_TBL_t * is_common_ptr_table; + if (items != 1) croak_xs_usage(cv, "charstring"); @@ -564,6 +566,23 @@ static XSPROTO(is_common) unsigned char *e = s + len; isfunc_t isfunc = (isfunc_t) XSANY.any_dptr; + if (ckWARN_d(WARN_DEPRECATED)) { + + /* Warn exactly once for each lexical place this function is + * called. See thread at + * http://markmail.org/thread/jhqcag5njmx7jpyu */ + + if (! is_common_ptr_table) { + is_common_ptr_table = ptr_table_new(); + } + if (! ptr_table_fetch(is_common_ptr_table, PL_op)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "Calling POSIX::%"HEKf"() is deprecated", + HEKfARG(GvNAME_HEK(CvGV(cv)))); + ptr_table_store(is_common_ptr_table, PL_op, (void *) 1); + } + } + for (RETVAL = 1; RETVAL && s < e; s++) if (!isfunc(*s)) RETVAL = 0; diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod index 8f4362aa3d..237b98397a 100644 --- a/ext/POSIX/lib/POSIX.pod +++ b/ext/POSIX/lib/POSIX.pod @@ -589,20 +589,38 @@ see L<perlfunc/gmtime>. =item isalnum -This is identical to the C function, except that it can apply to a -single character or to a whole string. Note that locale settings may -affect what characters are considered C<isalnum>. Does not work on -Unicode characters code point 256 or higher. Consider using regular -expressions and the C</[[:alnum:]]/> construct instead, or possibly -the C</\w/> construct. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:alnum:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. + +You may want to use the C<L<E<sol>\wE<sol>|perlrecharclass/Word +characters>> construct instead. =item isalpha -This is identical to the C function, except that it can apply to -a single character or to a whole string. Note that locale settings -may affect what characters are considered C<isalpha>. Does not work -on Unicode characters code point 256 or higher. Consider using regular -expressions and the C</[[:alpha:]]/> construct instead. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:alpha:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. =item isatty @@ -611,82 +629,157 @@ to a tty. Similar to the C<-t> operator, see L<perlfunc/-X>. =item iscntrl -This is identical to the C function, except that it can apply to -a single character or to a whole string. Note that locale settings -may affect what characters are considered C<iscntrl>. Does not work -on Unicode characters code point 256 or higher. Consider using regular -expressions and the C</[[:cntrl:]]/> construct instead. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:cntrl:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. =item isdigit -This is identical to the C function, except that it can apply to -a single character or to a whole string. Note that locale settings -may affect what characters are considered C<isdigit> (unlikely, but -still possible). Does not work on Unicode characters code point 256 -or higher. Consider using regular expressions and the C</[[:digit:]]/> -construct instead, or the C</\d/> construct. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:digit:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. + +You may want to use the C<L<E<sol>\dE<sol>|perlrecharclass/Digits>> +construct instead. =item isgraph -This is identical to the C function, except that it can apply to -a single character or to a whole string. Note that locale settings -may affect what characters are considered C<isgraph>. Does not work -on Unicode characters code point 256 or higher. Consider using regular -expressions and the C</[[:graph:]]/> construct instead. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:graph:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. =item islower -This is identical to the C function, except that it can apply to -a single character or to a whole string. Note that locale settings -may affect what characters are considered C<islower>. Does not work -on Unicode characters code point 256 or higher. Consider using regular -expressions and the C</[[:lower:]]/> construct instead. Do B<not> use -C</[a-z]/>. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:lower:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. + +Do B<not> use C</[a-z]/> unless you don't care about the current locale. =item isprint -This is identical to the C function, except that it can apply to -a single character or to a whole string. Note that locale settings -may affect what characters are considered C<isprint>. Does not work -on Unicode characters code point 256 or higher. Consider using regular -expressions and the C</[[:print:]]/> construct instead. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:print:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. =item ispunct -This is identical to the C function, except that it can apply to -a single character or to a whole string. Note that locale settings -may affect what characters are considered C<ispunct>. Does not work -on Unicode characters code point 256 or higher. Consider using regular -expressions and the C</[[:punct:]]/> construct instead. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:punct:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. =item isspace -This is identical to the C function, except that it can apply to -a single character or to a whole string. Note that locale settings -may affect what characters are considered C<isspace>. Does not work -on Unicode characters code point 256 or higher. Consider using regular -expressions and the C</[[:space:]]/> construct instead, or the C</\s/> -construct. (Note that C</\s/> and C</[[:space:]]/> are slightly -different in that C</[[:space:]]/> can normally match a vertical tab, -while C</\s/> does not.) +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:space:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. + +You may want to use the C<L<E<sol>\sE<sol>|perlrecharclass/Whitespace>> +construct instead. =item isupper -This is identical to the C function, except that it can apply to -a single character or to a whole string. Note that locale settings -may affect what characters are considered C<isupper>. Does not work -on Unicode characters code point 256 or higher. Consider using regular -expressions and the C</[[:upper:]]/> construct instead. Do B<not> use -C</[A-Z]/>. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:upper:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. + +Do B<not> use C</[A-Z]/> unless you don't care about the current locale. =item isxdigit -This is identical to the C function, except that it can apply to a single -character or to a whole string. Note that locale settings may affect what -characters are considered C<isxdigit> (unlikely, but still possible). -Does not work on Unicode characters code point 256 or higher. -Consider using regular expressions and the C</[[:xdigit:]]/> -construct instead, or simply C</[0-9a-f]/i>. +Deprecated function whose use raises a warning, and which is slated to +be removed in a future Perl version. It is very similar to matching +against S<C<qr/ ^ [[:xdigit:]]+ $ /x>>, which you should convert to use +instead. The function is deprecated because 1) it doesn't handle UTF-8 +encoded strings properly; and 2) it returns C<TRUE> even if the input is +the empty string. The function return is always based on the current +locale, whereas using locale rules is optional with the regular +expression, based on pragmas in effect and pattern modifiers (see +L<perlre/Character set modifiers> and L<perlre/Which character set +modifier is in effect?>). + +The function returns C<TRUE> if the input string is empty, or if the +corresponding C function returns C<TRUE> for every byte in the string. =item kill diff --git a/ext/POSIX/t/is.t b/ext/POSIX/t/is.t index e029004b17..0ab328e2f1 100644 --- a/ext/POSIX/t/is.t +++ b/ext/POSIX/t/is.t @@ -71,7 +71,7 @@ foreach my $s (keys %classes) { # Expected number of tests is one each for every combination of a # known is<xxx> function and string listed above. -plan(tests => keys(%classes) * keys(%functions)); +plan(tests => keys(%classes) * keys(%functions) + 1); # Main test loop: Run all POSIX::is<xxx> tests on each string defined above. # Only the character classes listed for that string should return 1. We @@ -81,8 +81,41 @@ plan(tests => keys(%classes) * keys(%functions)); foreach my $s (sort keys %classes) { foreach my $f (sort keys %functions) { my $expected = exists $classes{$s}->{$f}; - my $actual = eval "POSIX::$f( \$s )"; + my $actual = eval "no warnings 'deprecated'; POSIX::$f( \$s )"; cmp_ok($actual, '==', $expected, "$f('$s')"); } } + +{ + my @warnings; + local $SIG {__WARN__} = sub { push @warnings, @_; }; + + foreach (0 .. 3) { + my $a; + $a =POSIX::isalnum("a"); + $a =POSIX::isalpha("a"); + $a =POSIX::iscntrl("a"); + $a =POSIX::isdigit("a"); + $a =POSIX::isgraph("a"); + $a =POSIX::islower("a"); + $a =POSIX::ispunct("a"); + $a =POSIX::isspace("a"); + $a =POSIX::isupper("a"); + $a =POSIX::isxdigit("a"); + $a =POSIX::isalnum("a"); + $a =POSIX::isalpha("a"); + $a =POSIX::iscntrl("a"); + $a =POSIX::isdigit("a"); + $a =POSIX::isgraph("a"); + $a =POSIX::islower("a"); + $a =POSIX::ispunct("a"); + $a =POSIX::isspace("a"); + $a =POSIX::isupper("a"); + $a =POSIX::isxdigit("a"); + } + + # Each of the 10 classes should warn twice, because each has 2 lexical + # calls + is(scalar @warnings, 20); +} diff --git a/ext/POSIX/t/posix.t b/ext/POSIX/t/posix.t index 37292d360f..723a2c333e 100644 --- a/ext/POSIX/t/posix.t +++ b/ext/POSIX/t/posix.t @@ -290,6 +290,8 @@ is ($result, undef, "fgets should fail"); like ($@, qr/^Use method IO::Handle::gets\(\) instead/, "check its redef message"); +{ +no warnings 'deprecated'; # Simplistic tests for the isXXX() functions (bug #16799) ok( POSIX::isalnum('1'), 'isalnum' ); ok(!POSIX::isalnum('*'), 'isalnum' ); @@ -320,6 +322,7 @@ ok( POSIX::isalnum(undef),'isalnum undef' ); # those functions should stringify their arguments ok(!POSIX::isalpha([]), 'isalpha []' ); ok( POSIX::isprint([]), 'isprint []' ); +} eval { use strict; POSIX->import("S_ISBLK"); my $x = S_ISBLK }; unlike( $@, qr/Can't use string .* as a symbol ref/, "Can import autoloaded constants" ); diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 65c3a92d45..2854b58f9f 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -131,6 +131,18 @@ context and while it used to behave the same as setting C<$/> to the address of the references in future it may behave differently, so we have forbidden this usage. +=item * + +Use of any of these functions in the C<POSIX> module is now deprecated: +C<isalnum>, C<isalpha>, C<iscntrl>, C<isdigit>, C<isgraph>, C<islower>, +C<isprint>, C<ispunct>, C<isspace>, C<isupper>, and C<isxdigit>. The +functions are buggy and don't work on UTF-8 encoded strings. See their +entries in L<POSIX> for more information. + +A warning is raised on the first call to any of them from each place in +the code that they are called. (Hence a repeated statement in a loop +will raise just the one warning.) + =back =head2 Module removals diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 96d95ad66f..8a72c53bd5 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -550,6 +550,11 @@ which was too long, so it was truncated to the string shown. (F) A subroutine invoked from an external package via call_sv() exited by calling exit. +=item Calling POSIX::%s() is deprecated + +(D deprecated) You called a function whose use is deprecated. See +the function's name in L<POSIX> for details. + =item %s() called too early to check prototype (W prototype) You've called a function that has a prototype before the diff --git a/pod/perllocale.pod b/pod/perllocale.pod index 47fcb0abd0..7bccbf63cc 100644 --- a/pod/perllocale.pod +++ b/pod/perllocale.pod @@ -806,7 +806,7 @@ interpolation with C<\F>, C<\l>, C<\L>, C<\u>, or C<\U> in double-quoted strings and C<s///> substitutions; and case-independent regular expression pattern matching using the C<i> modifier. -Finally, C<LC_CTYPE> affects the POSIX character-class test +Finally, C<LC_CTYPE> affects the (deprecated) POSIX character-class test functions--C<POSIX::isalpha()>, C<POSIX::islower()>, and so on. For example, if you move from the "C" locale to a 7-bit Scandinavian one, you may find--possibly to your surprise--that "|" moves from the |