Fix /[\8]/ to not match NULL; give correct warning

8 and 9 are not treated as alphas in parsing as opposed to illegal octals. This also adds tests to verify that 1-3 digits work in char classes. I created an isOCTAL macro in case that lookup gets moved to a bit field, as I plan to do later, for speed.
author: Karl Williamson <khw@khw-desktop.(none)> 2010-07-18 12:28:14 -0600
committer: Rafael Garcia-Suarez <rgs@consttype.org> 2010-09-16 15:09:26 +0200
commit: c99e91e919b4bb89bab7829a9026ee01b1fff2a1 (patch)
tree: 5b069e6bd24b358e42e94e01b97424d5a09d9bfc
parent: eb06eac93f0120092363c6c7ba87bb7054e76844 (diff)
download: perl-c99e91e919b4bb89bab7829a9026ee01b1fff2a1.tar.gz
5 files changed, 42 insertions, 8 deletions
diff --git a/handy.h b/handy.h
index a1d753dea8..e091a9b8cd 100644
--- a/handy.h
+++ b/handy.h
@@ -455,6 +455,10 @@ whitespace.
 Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
 digit.
 
+=for apidoc Am|bool|isOCTAL|char ch
+Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
+octal digit, [0-7].
+
 =for apidoc Am|bool|isUPPER|char ch
 Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
 uppercase character.
@@ -516,6 +520,7 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
 #define isPSXSPC(c)	(isSPACE(c) || (c) == '\v')
 #define isBLANK(c)	((c) == ' ' || (c) == '\t')
 #define isDIGIT(c)	((c) >= '0' && (c) <= '9')
+#define isOCTAL(c)	((c) >= '0' && (c) <= '7')
 #ifdef EBCDIC
     /* In EBCDIC we do not do locales: therefore() isupper() is fine. */
 #   define isUPPER(c)	isupper(c)
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 08da209fc0..cb0a291d92 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -1141,9 +1141,6 @@ the left with zeros to make three digits.  For larger ordinals, either use
 C<\o{}> , or convert to someething else, such as to hex and use C<\x{}>
 instead.
 
-A backslash followed by a non-octal digit in a bracketed character class
-(C<[\8]> or C<[\9]>) will be interpreted as a NULL character and the digit.
-
 Having fewer than 3 digits may lead to a misleading warning message that says
 that what follows is ignored.  For example, C<"\128"> in the ASCII character set
 is equivalent to the two characters C<"\n8">, but the warning C<Illegal octal
diff --git a/regcomp.c b/regcomp.c
index 13f82e2496..1b4e1beb87 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -7555,8 +7555,9 @@ tryagain:
 		    case '0': case '1': case '2': case '3':case '4':
 		    case '5': case '6': case '7': case '8':case '9':
 			if (*p == '0' ||
-			  (isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) {
-                            I32 flags = 0;
+			    (isOCTAL(p[1]) && atoi(p) >= RExC_npar))
+			{
+			    I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
 			    STRLEN numlen = 3;
 			    ender = grok_oct(p, &numlen, &flags, NULL);
 			    if (ender > 0xff) {
@@ -8179,9 +8180,10 @@ parseit:
 		value = grok_bslash_c(*RExC_parse++, SIZE_ONLY);
 		break;
 	    case '0': case '1': case '2': case '3': case '4':
-	    case '5': case '6': case '7': case '8': case '9':
+	    case '5': case '6': case '7':
 		{
-		    I32 flags = 0;
+		    /* Take 1-3 octal digits */
+		    I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
 		    numlen = 3;
 		    value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
 		    RExC_parse += numlen;
@@ -8199,10 +8201,12 @@ parseit:
 		    break;
 		}
 	    default:
-		if (!SIZE_ONLY && isALPHA(value))
+		/* Allow \_ to not give an error */
+		if (!SIZE_ONLY && isALNUM(value) && value != '_') {
 		    ckWARN2reg(RExC_parse,
 			       "Unrecognized escape \\%c in character class passed through",
 			       (int)value);
+		}
 		break;
 	    }
 	} /* end of \blah */
diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp
index 6bc684568b..98280f61f4 100644
--- a/t/lib/warnings/regcomp
+++ b/t/lib/warnings/regcomp
@@ -237,3 +237,16 @@ Missing braces on \o{} in regex; marked by <-- HERE in m/[\o <-- HERE ]/ at - li
 $a = qr/[\o{}]/;
 EXPECT
 Number with no digits in regex; marked by <-- HERE in m/[\o{} <-- HERE ]/ at - line 2.
+########
+# regcomp.c [S_regclass]
+use warnings 'regexp' ;
+$a = qr/[\8\9]/;
+$a = qr/[\_\0]/; # Should have no warnings on this and the remainder of this test
+$a = qr/[\07]/;
+$a = qr/[\006]/;
+$a = qr/[\0005]/;
+no warnings 'regexp' ;
+$a = qr/[\8\9]/;
+EXPECT
+Unrecognized escape \8 in character class passed through in regex; marked by <-- HERE in m/[\8 <-- HERE \9]/ at - line 3.
+Unrecognized escape \9 in character class passed through in regex; marked by <-- HERE in m/[\8\9 <-- HERE ]/ at - line 3.
diff --git a/t/re/re_tests b/t/re/re_tests
index 36a2f4cee5..1c45583f43 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1467,4 +1467,19 @@ abc\N{def	-	c	-	\\N{NAME} must be resolved by the lexer
 [a\o{400}]	\x{100}	y	$&	\x{100}
 [a\o{1000}]	\x{200}	y	$&	\x{200}
 
+# The below was inserting a NULL into the character class.
+[\8\9]	\000	n	-	-
+[\8\9]	8	y	$&	8
+[\8\9]	9	y	$&	9
+
+# Verify that reads 1-3 octal digits, and that \_ works in char class
+[\0]	\000	y	$&	\000
+[\07]	\007	y	$&	\007
+[\07]	7\000	n	-	-
+[\006]	\006	y	$&	\006
+[\006]	6\000	n	-	-
+[\0005]	\0005	y	$&	\000
+[\0005]	5\000	y	$&	5
+[\_]	_	y	$&	_
+
 # vim: softtabstop=0 noexpandtab
author	Karl Williamson <khw@khw-desktop.(none)>	2010-07-18 12:28:14 -0600
committer	Rafael Garcia-Suarez <rgs@consttype.org>	2010-09-16 15:09:26 +0200
commit	c99e91e919b4bb89bab7829a9026ee01b1fff2a1 (patch)
tree	5b069e6bd24b358e42e94e01b97424d5a09d9bfc
parent	eb06eac93f0120092363c6c7ba87bb7054e76844 (diff)
download	perl-c99e91e919b4bb89bab7829a9026ee01b1fff2a1.tar.gz