summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@khw-desktop.(none)>2010-07-18 12:28:14 -0600
committerRafael Garcia-Suarez <rgs@consttype.org>2010-09-16 15:09:26 +0200
commitc99e91e919b4bb89bab7829a9026ee01b1fff2a1 (patch)
tree5b069e6bd24b358e42e94e01b97424d5a09d9bfc
parenteb06eac93f0120092363c6c7ba87bb7054e76844 (diff)
downloadperl-c99e91e919b4bb89bab7829a9026ee01b1fff2a1.tar.gz
Fix /[\8]/ to not match NULL; give correct warning
8 and 9 are not treated as alphas in parsing as opposed to illegal octals. This also adds tests to verify that 1-3 digits work in char classes. I created an isOCTAL macro in case that lookup gets moved to a bit field, as I plan to do later, for speed.
-rw-r--r--handy.h5
-rw-r--r--pod/perlop.pod3
-rw-r--r--regcomp.c14
-rw-r--r--t/lib/warnings/regcomp13
-rw-r--r--t/re/re_tests15
5 files changed, 42 insertions, 8 deletions
diff --git a/handy.h b/handy.h
index a1d753dea8..e091a9b8cd 100644
--- a/handy.h
+++ b/handy.h
@@ -455,6 +455,10 @@ whitespace.
Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
digit.
+=for apidoc Am|bool|isOCTAL|char ch
+Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
+octal digit, [0-7].
+
=for apidoc Am|bool|isUPPER|char ch
Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin)
uppercase character.
@@ -516,6 +520,7 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
#define isPSXSPC(c) (isSPACE(c) || (c) == '\v')
#define isBLANK(c) ((c) == ' ' || (c) == '\t')
#define isDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define isOCTAL(c) ((c) >= '0' && (c) <= '7')
#ifdef EBCDIC
/* In EBCDIC we do not do locales: therefore() isupper() is fine. */
# define isUPPER(c) isupper(c)
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 08da209fc0..cb0a291d92 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -1141,9 +1141,6 @@ the left with zeros to make three digits. For larger ordinals, either use
C<\o{}> , or convert to someething else, such as to hex and use C<\x{}>
instead.
-A backslash followed by a non-octal digit in a bracketed character class
-(C<[\8]> or C<[\9]>) will be interpreted as a NULL character and the digit.
-
Having fewer than 3 digits may lead to a misleading warning message that says
that what follows is ignored. For example, C<"\128"> in the ASCII character set
is equivalent to the two characters C<"\n8">, but the warning C<Illegal octal
diff --git a/regcomp.c b/regcomp.c
index 13f82e2496..1b4e1beb87 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -7555,8 +7555,9 @@ tryagain:
case '0': case '1': case '2': case '3':case '4':
case '5': case '6': case '7': case '8':case '9':
if (*p == '0' ||
- (isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) {
- I32 flags = 0;
+ (isOCTAL(p[1]) && atoi(p) >= RExC_npar))
+ {
+ I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
STRLEN numlen = 3;
ender = grok_oct(p, &numlen, &flags, NULL);
if (ender > 0xff) {
@@ -8179,9 +8180,10 @@ parseit:
value = grok_bslash_c(*RExC_parse++, SIZE_ONLY);
break;
case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
+ case '5': case '6': case '7':
{
- I32 flags = 0;
+ /* Take 1-3 octal digits */
+ I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
numlen = 3;
value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
RExC_parse += numlen;
@@ -8199,10 +8201,12 @@ parseit:
break;
}
default:
- if (!SIZE_ONLY && isALPHA(value))
+ /* Allow \_ to not give an error */
+ if (!SIZE_ONLY && isALNUM(value) && value != '_') {
ckWARN2reg(RExC_parse,
"Unrecognized escape \\%c in character class passed through",
(int)value);
+ }
break;
}
} /* end of \blah */
diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp
index 6bc684568b..98280f61f4 100644
--- a/t/lib/warnings/regcomp
+++ b/t/lib/warnings/regcomp
@@ -237,3 +237,16 @@ Missing braces on \o{} in regex; marked by <-- HERE in m/[\o <-- HERE ]/ at - li
$a = qr/[\o{}]/;
EXPECT
Number with no digits in regex; marked by <-- HERE in m/[\o{} <-- HERE ]/ at - line 2.
+########
+# regcomp.c [S_regclass]
+use warnings 'regexp' ;
+$a = qr/[\8\9]/;
+$a = qr/[\_\0]/; # Should have no warnings on this and the remainder of this test
+$a = qr/[\07]/;
+$a = qr/[\006]/;
+$a = qr/[\0005]/;
+no warnings 'regexp' ;
+$a = qr/[\8\9]/;
+EXPECT
+Unrecognized escape \8 in character class passed through in regex; marked by <-- HERE in m/[\8 <-- HERE \9]/ at - line 3.
+Unrecognized escape \9 in character class passed through in regex; marked by <-- HERE in m/[\8\9 <-- HERE ]/ at - line 3.
diff --git a/t/re/re_tests b/t/re/re_tests
index 36a2f4cee5..1c45583f43 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1467,4 +1467,19 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
[a\o{400}] \x{100} y $& \x{100}
[a\o{1000}] \x{200} y $& \x{200}
+# The below was inserting a NULL into the character class.
+[\8\9] \000 n - -
+[\8\9] 8 y $& 8
+[\8\9] 9 y $& 9
+
+# Verify that reads 1-3 octal digits, and that \_ works in char class
+[\0] \000 y $& \000
+[\07] \007 y $& \007
+[\07] 7\000 n - -
+[\006] \006 y $& \006
+[\006] 6\000 n - -
+[\0005] \0005 y $& \000
+[\0005] 5\000 y $& 5
+[\_] _ y $& _
+
# vim: softtabstop=0 noexpandtab