summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-02-12 13:30:01 -0700
committerKarl Williamson <public@khwilliamson.com>2014-02-15 16:22:44 -0700
commit2da736a23e7189ff341f094e8b28d1a903b3aa8d (patch)
treec5974d0adff381859b3344660d0e7b661ce8e2a2
parent5fd8fad5ea5cd316704373bc784473dd4e3865b0 (diff)
downloadperl-2da736a23e7189ff341f094e8b28d1a903b3aa8d.tar.gz
Deprecate uses of POSIX::isfoo()
These functions have been supplanted in more modern Perls by /[[:posix:]]/. The documentation has been wrong; they don't handle UTF-8 and return true on an empty string. Rather than try to fix them, the decision has been made to deprecate them instead. See http://markmail.org/thread/jhqcag5njmx7jpyu This commit also updates the documentation to be accurate.
-rw-r--r--ext/POSIX/POSIX.xs19
-rw-r--r--ext/POSIX/lib/POSIX.pod219
-rw-r--r--ext/POSIX/t/is.t37
-rw-r--r--ext/POSIX/t/posix.t3
-rw-r--r--pod/perldelta.pod12
-rw-r--r--pod/perldiag.pod5
-rw-r--r--pod/perllocale.pod2
7 files changed, 231 insertions, 66 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index f8d7c24675..6a4fca4719 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -553,6 +553,8 @@ static XSPROTO(is_common); /* prototype to pass -Wmissing-prototypes */
static XSPROTO(is_common)
{
dXSARGS;
+ static PTR_TBL_t * is_common_ptr_table;
+
if (items != 1)
croak_xs_usage(cv, "charstring");
@@ -564,6 +566,23 @@ static XSPROTO(is_common)
unsigned char *e = s + len;
isfunc_t isfunc = (isfunc_t) XSANY.any_dptr;
+ if (ckWARN_d(WARN_DEPRECATED)) {
+
+ /* Warn exactly once for each lexical place this function is
+ * called. See thread at
+ * http://markmail.org/thread/jhqcag5njmx7jpyu */
+
+ if (! is_common_ptr_table) {
+ is_common_ptr_table = ptr_table_new();
+ }
+ if (! ptr_table_fetch(is_common_ptr_table, PL_op)) {
+ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+ "Calling POSIX::%"HEKf"() is deprecated",
+ HEKfARG(GvNAME_HEK(CvGV(cv))));
+ ptr_table_store(is_common_ptr_table, PL_op, (void *) 1);
+ }
+ }
+
for (RETVAL = 1; RETVAL && s < e; s++)
if (!isfunc(*s))
RETVAL = 0;
diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod
index 8f4362aa3d..237b98397a 100644
--- a/ext/POSIX/lib/POSIX.pod
+++ b/ext/POSIX/lib/POSIX.pod
@@ -589,20 +589,38 @@ see L<perlfunc/gmtime>.
=item isalnum
-This is identical to the C function, except that it can apply to a
-single character or to a whole string. Note that locale settings may
-affect what characters are considered C<isalnum>. Does not work on
-Unicode characters code point 256 or higher. Consider using regular
-expressions and the C</[[:alnum:]]/> construct instead, or possibly
-the C</\w/> construct.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:alnum:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+You may want to use the C<L<E<sol>\wE<sol>|perlrecharclass/Word
+characters>> construct instead.
=item isalpha
-This is identical to the C function, except that it can apply to
-a single character or to a whole string. Note that locale settings
-may affect what characters are considered C<isalpha>. Does not work
-on Unicode characters code point 256 or higher. Consider using regular
-expressions and the C</[[:alpha:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:alpha:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
=item isatty
@@ -611,82 +629,157 @@ to a tty. Similar to the C<-t> operator, see L<perlfunc/-X>.
=item iscntrl
-This is identical to the C function, except that it can apply to
-a single character or to a whole string. Note that locale settings
-may affect what characters are considered C<iscntrl>. Does not work
-on Unicode characters code point 256 or higher. Consider using regular
-expressions and the C</[[:cntrl:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:cntrl:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
=item isdigit
-This is identical to the C function, except that it can apply to
-a single character or to a whole string. Note that locale settings
-may affect what characters are considered C<isdigit> (unlikely, but
-still possible). Does not work on Unicode characters code point 256
-or higher. Consider using regular expressions and the C</[[:digit:]]/>
-construct instead, or the C</\d/> construct.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:digit:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+You may want to use the C<L<E<sol>\dE<sol>|perlrecharclass/Digits>>
+construct instead.
=item isgraph
-This is identical to the C function, except that it can apply to
-a single character or to a whole string. Note that locale settings
-may affect what characters are considered C<isgraph>. Does not work
-on Unicode characters code point 256 or higher. Consider using regular
-expressions and the C</[[:graph:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:graph:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
=item islower
-This is identical to the C function, except that it can apply to
-a single character or to a whole string. Note that locale settings
-may affect what characters are considered C<islower>. Does not work
-on Unicode characters code point 256 or higher. Consider using regular
-expressions and the C</[[:lower:]]/> construct instead. Do B<not> use
-C</[a-z]/>.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:lower:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+Do B<not> use C</[a-z]/> unless you don't care about the current locale.
=item isprint
-This is identical to the C function, except that it can apply to
-a single character or to a whole string. Note that locale settings
-may affect what characters are considered C<isprint>. Does not work
-on Unicode characters code point 256 or higher. Consider using regular
-expressions and the C</[[:print:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:print:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
=item ispunct
-This is identical to the C function, except that it can apply to
-a single character or to a whole string. Note that locale settings
-may affect what characters are considered C<ispunct>. Does not work
-on Unicode characters code point 256 or higher. Consider using regular
-expressions and the C</[[:punct:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:punct:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
=item isspace
-This is identical to the C function, except that it can apply to
-a single character or to a whole string. Note that locale settings
-may affect what characters are considered C<isspace>. Does not work
-on Unicode characters code point 256 or higher. Consider using regular
-expressions and the C</[[:space:]]/> construct instead, or the C</\s/>
-construct. (Note that C</\s/> and C</[[:space:]]/> are slightly
-different in that C</[[:space:]]/> can normally match a vertical tab,
-while C</\s/> does not.)
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:space:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+You may want to use the C<L<E<sol>\sE<sol>|perlrecharclass/Whitespace>>
+construct instead.
=item isupper
-This is identical to the C function, except that it can apply to
-a single character or to a whole string. Note that locale settings
-may affect what characters are considered C<isupper>. Does not work
-on Unicode characters code point 256 or higher. Consider using regular
-expressions and the C</[[:upper:]]/> construct instead. Do B<not> use
-C</[A-Z]/>.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:upper:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+Do B<not> use C</[A-Z]/> unless you don't care about the current locale.
=item isxdigit
-This is identical to the C function, except that it can apply to a single
-character or to a whole string. Note that locale settings may affect what
-characters are considered C<isxdigit> (unlikely, but still possible).
-Does not work on Unicode characters code point 256 or higher.
-Consider using regular expressions and the C</[[:xdigit:]]/>
-construct instead, or simply C</[0-9a-f]/i>.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version. It is very similar to matching
+against S<C<qr/ ^ [[:xdigit:]]+ $ /x>>, which you should convert to use
+instead. The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string. The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
=item kill
diff --git a/ext/POSIX/t/is.t b/ext/POSIX/t/is.t
index e029004b17..0ab328e2f1 100644
--- a/ext/POSIX/t/is.t
+++ b/ext/POSIX/t/is.t
@@ -71,7 +71,7 @@ foreach my $s (keys %classes) {
# Expected number of tests is one each for every combination of a
# known is<xxx> function and string listed above.
-plan(tests => keys(%classes) * keys(%functions));
+plan(tests => keys(%classes) * keys(%functions) + 1);
# Main test loop: Run all POSIX::is<xxx> tests on each string defined above.
# Only the character classes listed for that string should return 1. We
@@ -81,8 +81,41 @@ plan(tests => keys(%classes) * keys(%functions));
foreach my $s (sort keys %classes) {
foreach my $f (sort keys %functions) {
my $expected = exists $classes{$s}->{$f};
- my $actual = eval "POSIX::$f( \$s )";
+ my $actual = eval "no warnings 'deprecated'; POSIX::$f( \$s )";
cmp_ok($actual, '==', $expected, "$f('$s')");
}
}
+
+{
+ my @warnings;
+ local $SIG {__WARN__} = sub { push @warnings, @_; };
+
+ foreach (0 .. 3) {
+ my $a;
+ $a =POSIX::isalnum("a");
+ $a =POSIX::isalpha("a");
+ $a =POSIX::iscntrl("a");
+ $a =POSIX::isdigit("a");
+ $a =POSIX::isgraph("a");
+ $a =POSIX::islower("a");
+ $a =POSIX::ispunct("a");
+ $a =POSIX::isspace("a");
+ $a =POSIX::isupper("a");
+ $a =POSIX::isxdigit("a");
+ $a =POSIX::isalnum("a");
+ $a =POSIX::isalpha("a");
+ $a =POSIX::iscntrl("a");
+ $a =POSIX::isdigit("a");
+ $a =POSIX::isgraph("a");
+ $a =POSIX::islower("a");
+ $a =POSIX::ispunct("a");
+ $a =POSIX::isspace("a");
+ $a =POSIX::isupper("a");
+ $a =POSIX::isxdigit("a");
+ }
+
+ # Each of the 10 classes should warn twice, because each has 2 lexical
+ # calls
+ is(scalar @warnings, 20);
+}
diff --git a/ext/POSIX/t/posix.t b/ext/POSIX/t/posix.t
index 37292d360f..723a2c333e 100644
--- a/ext/POSIX/t/posix.t
+++ b/ext/POSIX/t/posix.t
@@ -290,6 +290,8 @@ is ($result, undef, "fgets should fail");
like ($@, qr/^Use method IO::Handle::gets\(\) instead/,
"check its redef message");
+{
+no warnings 'deprecated';
# Simplistic tests for the isXXX() functions (bug #16799)
ok( POSIX::isalnum('1'), 'isalnum' );
ok(!POSIX::isalnum('*'), 'isalnum' );
@@ -320,6 +322,7 @@ ok( POSIX::isalnum(undef),'isalnum undef' );
# those functions should stringify their arguments
ok(!POSIX::isalpha([]), 'isalpha []' );
ok( POSIX::isprint([]), 'isprint []' );
+}
eval { use strict; POSIX->import("S_ISBLK"); my $x = S_ISBLK };
unlike( $@, qr/Can't use string .* as a symbol ref/, "Can import autoloaded constants" );
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 65c3a92d45..2854b58f9f 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -131,6 +131,18 @@ context and while it used to behave the same as setting C<$/> to
the address of the references in future it may behave differently, so we
have forbidden this usage.
+=item *
+
+Use of any of these functions in the C<POSIX> module is now deprecated:
+C<isalnum>, C<isalpha>, C<iscntrl>, C<isdigit>, C<isgraph>, C<islower>,
+C<isprint>, C<ispunct>, C<isspace>, C<isupper>, and C<isxdigit>. The
+functions are buggy and don't work on UTF-8 encoded strings. See their
+entries in L<POSIX> for more information.
+
+A warning is raised on the first call to any of them from each place in
+the code that they are called. (Hence a repeated statement in a loop
+will raise just the one warning.)
+
=back
=head2 Module removals
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 96d95ad66f..8a72c53bd5 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -550,6 +550,11 @@ which was too long, so it was truncated to the string shown.
(F) A subroutine invoked from an external package via call_sv()
exited by calling exit.
+=item Calling POSIX::%s() is deprecated
+
+(D deprecated) You called a function whose use is deprecated. See
+the function's name in L<POSIX> for details.
+
=item %s() called too early to check prototype
(W prototype) You've called a function that has a prototype before the
diff --git a/pod/perllocale.pod b/pod/perllocale.pod
index 47fcb0abd0..7bccbf63cc 100644
--- a/pod/perllocale.pod
+++ b/pod/perllocale.pod
@@ -806,7 +806,7 @@ interpolation with C<\F>, C<\l>, C<\L>, C<\u>, or C<\U> in double-quoted
strings and C<s///> substitutions; and case-independent regular expression
pattern matching using the C<i> modifier.
-Finally, C<LC_CTYPE> affects the POSIX character-class test
+Finally, C<LC_CTYPE> affects the (deprecated) POSIX character-class test
functions--C<POSIX::isalpha()>, C<POSIX::islower()>, and so on. For
example, if you move from the "C" locale to a 7-bit Scandinavian one,
you may find--possibly to your surprise--that "|" moves from the