diff options
author | Karl Williamson <khw@khw-desktop.(none)> | 2010-07-18 12:28:14 -0600 |
---|---|---|
committer | Rafael Garcia-Suarez <rgs@consttype.org> | 2010-09-16 15:09:26 +0200 |
commit | c99e91e919b4bb89bab7829a9026ee01b1fff2a1 (patch) | |
tree | 5b069e6bd24b358e42e94e01b97424d5a09d9bfc | |
parent | eb06eac93f0120092363c6c7ba87bb7054e76844 (diff) | |
download | perl-c99e91e919b4bb89bab7829a9026ee01b1fff2a1.tar.gz |
Fix /[\8]/ to not match NULL; give correct warning
8 and 9 are not treated as alphas in parsing as opposed to illegal
octals.
This also adds tests to verify that 1-3 digits work in char classes.
I created an isOCTAL macro in case that lookup gets moved to a bit
field, as I plan to do later, for speed.
-rw-r--r-- | handy.h | 5 | ||||
-rw-r--r-- | pod/perlop.pod | 3 | ||||
-rw-r--r-- | regcomp.c | 14 | ||||
-rw-r--r-- | t/lib/warnings/regcomp | 13 | ||||
-rw-r--r-- | t/re/re_tests | 15 |
5 files changed, 42 insertions, 8 deletions
@@ -455,6 +455,10 @@ whitespace. Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin) digit. +=for apidoc Am|bool|isOCTAL|char ch +Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin) +octal digit, [0-7]. + =for apidoc Am|bool|isUPPER|char ch Returns a boolean indicating whether the C C<char> is a US-ASCII (Basic Latin) uppercase character. @@ -516,6 +520,7 @@ patched there. The file as of this writing is cpan/Devel-PPPort/parts/inc/misc #define isPSXSPC(c) (isSPACE(c) || (c) == '\v') #define isBLANK(c) ((c) == ' ' || (c) == '\t') #define isDIGIT(c) ((c) >= '0' && (c) <= '9') +#define isOCTAL(c) ((c) >= '0' && (c) <= '7') #ifdef EBCDIC /* In EBCDIC we do not do locales: therefore() isupper() is fine. */ # define isUPPER(c) isupper(c) diff --git a/pod/perlop.pod b/pod/perlop.pod index 08da209fc0..cb0a291d92 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1141,9 +1141,6 @@ the left with zeros to make three digits. For larger ordinals, either use C<\o{}> , or convert to someething else, such as to hex and use C<\x{}> instead. -A backslash followed by a non-octal digit in a bracketed character class -(C<[\8]> or C<[\9]>) will be interpreted as a NULL character and the digit. - Having fewer than 3 digits may lead to a misleading warning message that says that what follows is ignored. For example, C<"\128"> in the ASCII character set is equivalent to the two characters C<"\n8">, but the warning C<Illegal octal @@ -7555,8 +7555,9 @@ tryagain: case '0': case '1': case '2': case '3':case '4': case '5': case '6': case '7': case '8':case '9': if (*p == '0' || - (isDIGIT(p[1]) && atoi(p) >= RExC_npar) ) { - I32 flags = 0; + (isOCTAL(p[1]) && atoi(p) >= RExC_npar)) + { + I32 flags = PERL_SCAN_SILENT_ILLDIGIT; STRLEN numlen = 3; ender = grok_oct(p, &numlen, &flags, NULL); if (ender > 0xff) { @@ -8179,9 +8180,10 @@ parseit: value = grok_bslash_c(*RExC_parse++, SIZE_ONLY); break; case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': + case '5': case '6': case '7': { - I32 flags = 0; + /* Take 1-3 octal digits */ + I32 flags = PERL_SCAN_SILENT_ILLDIGIT; numlen = 3; value = grok_oct(--RExC_parse, &numlen, &flags, NULL); RExC_parse += numlen; @@ -8199,10 +8201,12 @@ parseit: break; } default: - if (!SIZE_ONLY && isALPHA(value)) + /* Allow \_ to not give an error */ + if (!SIZE_ONLY && isALNUM(value) && value != '_') { ckWARN2reg(RExC_parse, "Unrecognized escape \\%c in character class passed through", (int)value); + } break; } } /* end of \blah */ diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp index 6bc684568b..98280f61f4 100644 --- a/t/lib/warnings/regcomp +++ b/t/lib/warnings/regcomp @@ -237,3 +237,16 @@ Missing braces on \o{} in regex; marked by <-- HERE in m/[\o <-- HERE ]/ at - li $a = qr/[\o{}]/; EXPECT Number with no digits in regex; marked by <-- HERE in m/[\o{} <-- HERE ]/ at - line 2. +######## +# regcomp.c [S_regclass] +use warnings 'regexp' ; +$a = qr/[\8\9]/; +$a = qr/[\_\0]/; # Should have no warnings on this and the remainder of this test +$a = qr/[\07]/; +$a = qr/[\006]/; +$a = qr/[\0005]/; +no warnings 'regexp' ; +$a = qr/[\8\9]/; +EXPECT +Unrecognized escape \8 in character class passed through in regex; marked by <-- HERE in m/[\8 <-- HERE \9]/ at - line 3. +Unrecognized escape \9 in character class passed through in regex; marked by <-- HERE in m/[\8\9 <-- HERE ]/ at - line 3. diff --git a/t/re/re_tests b/t/re/re_tests index 36a2f4cee5..1c45583f43 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1467,4 +1467,19 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer [a\o{400}] \x{100} y $& \x{100} [a\o{1000}] \x{200} y $& \x{200} +# The below was inserting a NULL into the character class. +[\8\9] \000 n - - +[\8\9] 8 y $& 8 +[\8\9] 9 y $& 9 + +# Verify that reads 1-3 octal digits, and that \_ works in char class +[\0] \000 y $& \000 +[\07] \007 y $& \007 +[\07] 7\000 n - - +[\006] \006 y $& \006 +[\006] 6\000 n - - +[\0005] \0005 y $& \000 +[\0005] 5\000 y $& 5 +[\_] _ y $& _ + # vim: softtabstop=0 noexpandtab |