Deprecate uses of POSIX::isfoo()

These functions have been supplanted in more modern Perls by /[[:posix:]]/. The documentation has been wrong; they don't handle UTF-8 and return true on an empty string. Rather than try to fix them, the decision has been made to deprecate them instead. See http://markmail.org/thread/jhqcag5njmx7jpyu This commit also updates the documentation to be accurate.
author: Karl Williamson <public@khwilliamson.com> 2014-02-12 13:30:01 -0700
committer: Karl Williamson <public@khwilliamson.com> 2014-02-15 16:22:44 -0700
commit: 2da736a23e7189ff341f094e8b28d1a903b3aa8d (patch)
tree: c5974d0adff381859b3344660d0e7b661ce8e2a2 /ext
parent: 5fd8fad5ea5cd316704373bc784473dd4e3865b0 (diff)
download: perl-2da736a23e7189ff341f094e8b28d1a903b3aa8d.tar.gz
4 files changed, 213 insertions, 65 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index f8d7c24675..6a4fca4719 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -553,6 +553,8 @@ static XSPROTO(is_common); /* prototype to pass -Wmissing-prototypes */
 static XSPROTO(is_common)
 {
     dXSARGS;
+    static PTR_TBL_t * is_common_ptr_table;
+
     if (items != 1)
        croak_xs_usage(cv,  "charstring");
 
@@ -564,6 +566,23 @@ static XSPROTO(is_common)
 	unsigned char *e = s + len;
 	isfunc_t isfunc = (isfunc_t) XSANY.any_dptr;
 
+        if (ckWARN_d(WARN_DEPRECATED)) {
+
+            /* Warn exactly once for each lexical place this function is
+             * called.  See thread at
+             * http://markmail.org/thread/jhqcag5njmx7jpyu */
+
+	    if (! is_common_ptr_table) {
+               is_common_ptr_table = ptr_table_new();
+            }
+	    if (! ptr_table_fetch(is_common_ptr_table, PL_op)) {
+                Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                            "Calling POSIX::%"HEKf"() is deprecated",
+                            HEKfARG(GvNAME_HEK(CvGV(cv))));
+                ptr_table_store(is_common_ptr_table, PL_op, (void *) 1);
+            }
+        }
+
 	for (RETVAL = 1; RETVAL && s < e; s++)
 	    if (!isfunc(*s))
 		RETVAL = 0;
diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod
index 8f4362aa3d..237b98397a 100644
--- a/ext/POSIX/lib/POSIX.pod
+++ b/ext/POSIX/lib/POSIX.pod
@@ -589,20 +589,38 @@ see L<perlfunc/gmtime>.
 
 =item isalnum
 
-This is identical to the C function, except that it can apply to a
-single character or to a whole string.  Note that locale settings may
-affect what characters are considered C<isalnum>.  Does not work on
-Unicode characters code point 256 or higher.  Consider using regular
-expressions and the C</[[:alnum:]]/> construct instead, or possibly
-the C</\w/> construct.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:alnum:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+You may want to use the C<L<E<sol>\wE<sol>|perlrecharclass/Word
+characters>> construct instead.
 
 =item isalpha
 
-This is identical to the C function, except that it can apply to
-a single character or to a whole string.  Note that locale settings
-may affect what characters are considered C<isalpha>.  Does not work
-on Unicode characters code point 256 or higher.  Consider using regular
-expressions and the C</[[:alpha:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:alpha:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
 
 =item isatty
 
@@ -611,82 +629,157 @@ to a tty.  Similar to the C<-t> operator, see L<perlfunc/-X>.
 
 =item iscntrl
 
-This is identical to the C function, except that it can apply to
-a single character or to a whole string.  Note that locale settings
-may affect what characters are considered C<iscntrl>.  Does not work
-on Unicode characters code point 256 or higher.  Consider using regular
-expressions and the C</[[:cntrl:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:cntrl:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
 
 =item isdigit
 
-This is identical to the C function, except that it can apply to
-a single character or to a whole string.  Note that locale settings
-may affect what characters are considered C<isdigit> (unlikely, but
-still possible). Does not work on Unicode characters code point 256
-or higher.  Consider using regular expressions and the C</[[:digit:]]/>
-construct instead, or the C</\d/> construct.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:digit:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+You may want to use the C<L<E<sol>\dE<sol>|perlrecharclass/Digits>>
+construct instead.
 
 =item isgraph
 
-This is identical to the C function, except that it can apply to
-a single character or to a whole string.  Note that locale settings
-may affect what characters are considered C<isgraph>.  Does not work
-on Unicode characters code point 256 or higher.  Consider using regular
-expressions and the C</[[:graph:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:graph:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
 
 =item islower
 
-This is identical to the C function, except that it can apply to
-a single character or to a whole string.  Note that locale settings
-may affect what characters are considered C<islower>.  Does not work
-on Unicode characters code point 256 or higher.  Consider using regular
-expressions and the C</[[:lower:]]/> construct instead.  Do B<not> use
-C</[a-z]/>.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:lower:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+Do B<not> use C</[a-z]/> unless you don't care about the current locale.
 
 =item isprint
 
-This is identical to the C function, except that it can apply to
-a single character or to a whole string.  Note that locale settings
-may affect what characters are considered C<isprint>.  Does not work
-on Unicode characters code point 256 or higher.  Consider using regular
-expressions and the C</[[:print:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:print:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
 
 =item ispunct
 
-This is identical to the C function, except that it can apply to
-a single character or to a whole string.  Note that locale settings
-may affect what characters are considered C<ispunct>.  Does not work
-on Unicode characters code point 256 or higher.  Consider using regular
-expressions and the C</[[:punct:]]/> construct instead.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:punct:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
 
 =item isspace
 
-This is identical to the C function, except that it can apply to
-a single character or to a whole string.  Note that locale settings
-may affect what characters are considered C<isspace>.  Does not work
-on Unicode characters code point 256 or higher.  Consider using regular
-expressions and the C</[[:space:]]/> construct instead, or the C</\s/>
-construct.  (Note that C</\s/> and C</[[:space:]]/> are slightly
-different in that C</[[:space:]]/> can normally match a vertical tab,
-while C</\s/> does not.)
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:space:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+You may want to use the C<L<E<sol>\sE<sol>|perlrecharclass/Whitespace>>
+construct instead.
 
 =item isupper
 
-This is identical to the C function, except that it can apply to
-a single character or to a whole string.  Note that locale settings
-may affect what characters are considered C<isupper>.  Does not work
-on Unicode characters code point 256 or higher.  Consider using regular
-expressions and the C</[[:upper:]]/> construct instead.  Do B<not> use
-C</[A-Z]/>.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:upper:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
+
+Do B<not> use C</[A-Z]/> unless you don't care about the current locale.
 
 =item isxdigit
 
-This is identical to the C function, except that it can apply to a single
-character or to a whole string.  Note that locale settings may affect what
-characters are considered C<isxdigit> (unlikely, but still possible).
-Does not work on Unicode characters code point 256 or higher.
-Consider using regular expressions and the C</[[:xdigit:]]/>
-construct instead, or simply C</[0-9a-f]/i>.
+Deprecated function whose use raises a warning, and which is slated to
+be removed in a future Perl version.  It is very similar to matching
+against S<C<qr/ ^ [[:xdigit:]]+ $ /x>>, which you should convert to use
+instead.  The function is deprecated because 1) it doesn't handle UTF-8
+encoded strings properly; and 2) it returns C<TRUE> even if the input is
+the empty string.  The function return is always based on the current
+locale, whereas using locale rules is optional with the regular
+expression, based on pragmas in effect and pattern modifiers (see
+L<perlre/Character set modifiers> and L<perlre/Which character set
+modifier is in effect?>).
+
+The function returns C<TRUE> if the input string is empty, or if the
+corresponding C function returns C<TRUE> for every byte in the string.
 
 =item kill
 
diff --git a/ext/POSIX/t/is.t b/ext/POSIX/t/is.t
index e029004b17..0ab328e2f1 100644
--- a/ext/POSIX/t/is.t
+++ b/ext/POSIX/t/is.t
@@ -71,7 +71,7 @@ foreach my $s (keys %classes) {
 
 # Expected number of tests is one each for every combination of a
 # known is<xxx> function and string listed above.
-plan(tests => keys(%classes) * keys(%functions));
+plan(tests => keys(%classes) * keys(%functions) + 1);
 
 # Main test loop: Run all POSIX::is<xxx> tests on each string defined above.
 # Only the character classes listed for that string should return 1.  We
@@ -81,8 +81,41 @@ plan(tests => keys(%classes) * keys(%functions));
 foreach my $s (sort keys %classes) {
     foreach my $f (sort keys %functions) {
 	my $expected = exists $classes{$s}->{$f};
-	my $actual   = eval "POSIX::$f( \$s )";
+	my $actual   = eval "no warnings 'deprecated'; POSIX::$f( \$s )";
 
 	cmp_ok($actual, '==', $expected, "$f('$s')");
     }
 }
+
+{
+    my @warnings;
+    local $SIG {__WARN__} = sub { push @warnings, @_; };
+
+    foreach (0 .. 3) {
+        my $a;
+        $a =POSIX::isalnum("a");
+        $a =POSIX::isalpha("a");
+        $a =POSIX::iscntrl("a");
+        $a =POSIX::isdigit("a");
+        $a =POSIX::isgraph("a");
+        $a =POSIX::islower("a");
+        $a =POSIX::ispunct("a");
+        $a =POSIX::isspace("a");
+        $a =POSIX::isupper("a");
+        $a =POSIX::isxdigit("a");
+        $a =POSIX::isalnum("a");
+        $a =POSIX::isalpha("a");
+        $a =POSIX::iscntrl("a");
+        $a =POSIX::isdigit("a");
+        $a =POSIX::isgraph("a");
+        $a =POSIX::islower("a");
+        $a =POSIX::ispunct("a");
+        $a =POSIX::isspace("a");
+        $a =POSIX::isupper("a");
+        $a =POSIX::isxdigit("a");
+    }
+
+    # Each of the 10 classes should warn twice, because each has 2 lexical
+    # calls
+    is(scalar @warnings, 20);
+}
diff --git a/ext/POSIX/t/posix.t b/ext/POSIX/t/posix.t
index 37292d360f..723a2c333e 100644
--- a/ext/POSIX/t/posix.t
+++ b/ext/POSIX/t/posix.t
@@ -290,6 +290,8 @@ is ($result, undef, "fgets should fail");
 like ($@, qr/^Use method IO::Handle::gets\(\) instead/,
       "check its redef message");
 
+{
+no warnings 'deprecated';
 # Simplistic tests for the isXXX() functions (bug #16799)
 ok( POSIX::isalnum('1'),  'isalnum' );
 ok(!POSIX::isalnum('*'),  'isalnum' );
@@ -320,6 +322,7 @@ ok( POSIX::isalnum(undef),'isalnum undef' );
 # those functions should stringify their arguments
 ok(!POSIX::isalpha([]),   'isalpha []' );
 ok( POSIX::isprint([]),   'isprint []' );
+}
 
 eval { use strict; POSIX->import("S_ISBLK"); my $x = S_ISBLK };
 unlike( $@, qr/Can't use string .* as a symbol ref/, "Can import autoloaded constants" );
author	Karl Williamson <public@khwilliamson.com>	2014-02-12 13:30:01 -0700
committer	Karl Williamson <public@khwilliamson.com>	2014-02-15 16:22:44 -0700
commit	2da736a23e7189ff341f094e8b28d1a903b3aa8d (patch)
tree	c5974d0adff381859b3344660d0e7b661ce8e2a2 /ext
parent	5fd8fad5ea5cd316704373bc784473dd4e3865b0 (diff)
download	perl-2da736a23e7189ff341f094e8b28d1a903b3aa8d.tar.gz