summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>1999-10-14 10:08:44 +0000
committerJarkko Hietaniemi <jhi@iki.fi>1999-10-14 10:08:44 +0000
commit73b437c8b23cd73848b265b0c5a0c71d47dc6532 (patch)
tree5f2e0298da733d0eeaffd2d29cb6db617d09c29a
parent4ad36b56fc9a36a6ae97dede5c0a0b63c849714d (diff)
downloadperl-73b437c8b23cd73848b265b0c5a0c71d47dc6532.tar.gz
Warn about false ranges like \d-\w (see the change #4355).
The invalid ranges (b-a) warning message also enhanced. p4raw-link: @4355 on //depot/cfgperl: 1209ba901e0b2880eea69ad70613848af5543517 p4raw-id: //depot/cfgperl@4374
-rw-r--r--pod/perldelta.pod19
-rw-r--r--pod/perldiag.pod9
-rw-r--r--regcomp.c680
-rw-r--r--t/op/re_tests8
-rw-r--r--t/pragma/warn/regcomp68
5 files changed, 465 insertions, 319 deletions
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index ed395be00e..b4d4d217de 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -15,8 +15,8 @@ This document describes differences between the 5.005 release and this one.
=head2 Perl Source Incompatibilities
-Beware that any new warnings that have been added are B<not> considered
-incompatible changes.
+Beware that any new warnings that have been added or enhanced old
+warnings are B<not> considered incompatible changes.
Since all new warnings must be explicitly requested via the C<-w>
switch or the C<warnings> pragma, it is ultimately the programmer's
@@ -1012,7 +1012,7 @@ change#4052
=item Data::Dumper
A C<Maxdepth> setting can be specified to avoid venturing
-too deeply into depp data structures. See L<Data::Dumper>.
+too deeply into deep data structures. See L<Data::Dumper>.
Dumping C<qr//> objects works correctly.
@@ -1342,7 +1342,7 @@ A tutorial on managing class data for object modules.
=back
-=head1 New Diagnostics
+=head1 New or Changed Diagnostics
=over 4
@@ -1561,6 +1561,13 @@ See Server error.
(F) While under the C<use filetest> pragma, switching the real and
effective uids or gids failed.
+=item false [] range "%s" in regexp
+
+(W) A character class range must start and end at a literal character, not
+another character class like C<\d> or C<[:alpha:]>. The "-" in your false
+range is interpreted as a literal "-". Consider quoting the "-", "\-".
+See L<perlre>.
+
=item Filehandle %s opened only for output
(W) You tried to read from a filehandle opened only for writing. If you
@@ -1624,6 +1631,10 @@ by Perl or by a user-supplied handler. See L<attributes>.
The indicated attributes for a subroutine or variable were not recognized
by Perl or by a user-supplied handler. See L<attributes>.
+=item invalid [] range "%s" in regexp
+
+The offending range is now explicitly displayed.
+
=item Invalid separator character %s in attribute list
(F) Something other than a comma or whitespace was seen between the
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 8988730c8d..11758e0e88 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1380,6 +1380,13 @@ the effect of blessing the reference into the package main. This is
usually not what you want. Consider providing a default target
package, e.g. bless($ref, $p || 'MyPackage');
+=item false [] range "%s" in regexp
+
+(W) A character class range must start and end at a literal character, not
+another character class like C<\d> or C<[:alpha:]>. The "-" in your false
+range is interpreted as a literal "-". Consider quoting the "-", "\-".
+See L<perlre>.
+
=item Fatal VMS error at %s, line %d
(P) An error peculiar to VMS. Something untoward happened in a VMS system
@@ -1680,7 +1687,7 @@ by Perl or by a user-supplied handler. See L<attributes>.
The indicated attributes for a subroutine or variable were not recognized
by Perl or by a user-supplied handler. See L<attributes>.
-=item invalid [] range in regexp
+=item invalid [] range "%s" in regexp
(F) The range specified in a character class had a minimum character
greater than the maximum character. See L<perlre>.
diff --git a/regcomp.c b/regcomp.c
index 02dca515a8..0dafdd0b9b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2300,6 +2300,7 @@ S_regclass(pTHX)
register I32 def;
I32 numlen;
I32 namedclass;
+ char *rangebegin;
s = opnd = MASK(PL_regcode);
ret = reg_node(ANYOF);
@@ -2329,6 +2330,8 @@ S_regclass(pTHX)
while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
skipcond:
namedclass = OOB_NAMEDCLASS;
+ if (!range)
+ rangebegin = PL_regcomp_parse;
value = UCHARAT(PL_regcomp_parse++);
if (value == '[')
namedclass = regpposixcc(value);
@@ -2363,258 +2366,274 @@ S_regclass(pTHX)
break;
}
}
- if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
- if (range) {
- ANYOF_BITMAP_SET(opnd, lastvalue);
- ANYOF_BITMAP_SET(opnd, '-');
- }
- switch (namedclass) {
- case ANYOF_ALNUM:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
- else {
- for (value = 0; value < 256; value++)
- if (isALNUM(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NALNUM:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
- else {
- for (value = 0; value < 256; value++)
- if (!isALNUM(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_SPACE:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
- else {
- for (value = 0; value < 256; value++)
- if (isSPACE(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NSPACE:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
- else {
- for (value = 0; value < 256; value++)
- if (!isSPACE(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_DIGIT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
- else {
- for (value = '0'; value <= '9'; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NDIGIT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
- else {
- for (value = 0; value < '0'; value++)
- ANYOF_BITMAP_SET(opnd, value);
- for (value = '9' + 1; value < 256; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NALNUMC:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
- else {
- for (value = 0; value < 256; value++)
- if (!isALNUMC(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_ALNUMC:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
- else {
- for (value = 0; value < 256; value++)
- if (isALNUMC(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_ALPHA:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
- else {
- for (value = 0; value < 256; value++)
- if (isALPHA(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NALPHA:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
- else {
- for (value = 0; value < 256; value++)
- if (!isALPHA(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_ASCII:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
- else {
- for (value = 0; value < 128; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NASCII:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
- else {
- for (value = 128; value < 256; value++)
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_CNTRL:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
- else {
- for (value = 0; value < 256; value++)
- if (isCNTRL(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- lastvalue = OOB_CHAR8;
- break;
- case ANYOF_NCNTRL:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
- else {
- for (value = 0; value < 256; value++)
- if (!isCNTRL(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_GRAPH:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
- else {
- for (value = 0; value < 256; value++)
- if (isGRAPH(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NGRAPH:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
- else {
- for (value = 0; value < 256; value++)
- if (!isGRAPH(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_LOWER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
- else {
- for (value = 0; value < 256; value++)
- if (isLOWER(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NLOWER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
- else {
- for (value = 0; value < 256; value++)
- if (!isLOWER(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_PRINT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
- else {
- for (value = 0; value < 256; value++)
- if (isPRINT(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NPRINT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
- else {
- for (value = 0; value < 256; value++)
- if (!isPRINT(value))
- ANYOF_BITMAP_SET(opnd, value);
+ if (namedclass > OOB_NAMEDCLASS) {
+ if (range) { /* a-\d, a-[:digit:] */
+ if (!SIZE_ONLY) {
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ ANYOF_BITMAP_SET(opnd, lastvalue);
+ ANYOF_BITMAP_SET(opnd, '-');
}
- break;
- case ANYOF_PUNCT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
- else {
- for (value = 0; value < 256; value++)
- if (isPUNCT(value))
+ range = 0; /* this is not a true range */
+ }
+ if (!SIZE_ONLY) {
+ switch (namedclass) {
+ case ANYOF_ALNUM:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isALNUM(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NALNUM:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isALNUM(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_SPACE:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isSPACE(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NSPACE:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isSPACE(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_DIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
+ else {
+ for (value = '0'; value <= '9'; value++)
ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NPUNCT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
- else {
- for (value = 0; value < 256; value++)
- if (!isPUNCT(value))
+ }
+ break;
+ case ANYOF_NDIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
+ else {
+ for (value = 0; value < '0'; value++)
ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_UPPER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
- else {
- for (value = 0; value < 256; value++)
- if (isUPPER(value))
+ for (value = '9' + 1; value < 256; value++)
ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_NUPPER:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
- else {
- for (value = 0; value < 256; value++)
- if (!isUPPER(value))
+ }
+ break;
+ case ANYOF_NALNUMC:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isALNUMC(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_ALNUMC:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isALNUMC(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_ALPHA:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isALPHA(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NALPHA:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isALPHA(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_ASCII:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
+ else {
+ for (value = 0; value < 128; value++)
ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- case ANYOF_XDIGIT:
- if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
- else {
- for (value = 0; value < 256; value++)
- if (isXDIGIT(value))
+ }
+ break;
+ case ANYOF_NASCII:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
+ else {
+ for (value = 128; value < 256; value++)
ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_CNTRL:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isCNTRL(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ lastvalue = OOB_CHAR8;
+ break;
+ case ANYOF_NCNTRL:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isCNTRL(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_GRAPH:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isGRAPH(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NGRAPH:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isGRAPH(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_LOWER:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isLOWER(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NLOWER:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isLOWER(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_PRINT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isPRINT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NPRINT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isPRINT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_PUNCT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isPUNCT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NPUNCT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isPUNCT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_UPPER:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isUPPER(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NUPPER:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isUPPER(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_XDIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (isXDIGIT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ case ANYOF_NXDIGIT:
+ if (LOC)
+ ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
+ else {
+ for (value = 0; value < 256; value++)
+ if (!isXDIGIT(value))
+ ANYOF_BITMAP_SET(opnd, value);
+ }
+ break;
+ default:
+ FAIL("invalid [::] class in regexp");
+ break;
}
- break;
- case ANYOF_NXDIGIT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
- else {
- for (value = 0; value < 256; value++)
- if (!isXDIGIT(value))
- ANYOF_BITMAP_SET(opnd, value);
- }
- break;
- default:
- FAIL("invalid [::] class in regexp");
- break;
+ ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
+ continue;
}
- if (LOC)
- ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
- continue;
}
- if (range && namedclass > OOB_NAMEDCLASS)
- range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
if (range) {
- if (lastvalue > value)
- FAIL("invalid [] range in regexp"); /* [b-a] */
+ if (lastvalue > value) /* b-a */ {
+ Perl_croak(aTHX_
+ "/%.127s/: invalid [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ }
range = 0;
}
else {
@@ -2622,7 +2641,18 @@ S_regclass(pTHX)
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
PL_regcomp_parse[1] != ']') {
PL_regcomp_parse++;
- range = 1;
+ if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ if (!SIZE_ONLY)
+ ANYOF_BITMAP_SET(opnd, '-');
+ } else
+ range = 1;
continue; /* do it next time */
}
}
@@ -2684,6 +2714,7 @@ S_regclassutf8(pTHX)
SV *listsv;
U8 flags = 0;
I32 namedclass;
+ char *rangebegin;
if (*PL_regcomp_parse == '^') { /* Complement of range. */
PL_regnaughty++;
@@ -2707,9 +2738,10 @@ S_regclassutf8(pTHX)
while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') {
skipcond:
namedclass = OOB_NAMEDCLASS;
+ if (!range)
+ rangebegin = PL_regcomp_parse;
value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen);
PL_regcomp_parse += numlen;
-
if (value == '[')
namedclass = regpposixcc(value);
else if (value == '\\') {
@@ -2778,73 +2810,89 @@ S_regclassutf8(pTHX)
break;
}
}
- if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
- if (range) /* [a-\d], [a-[:digit:]] */
- Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */
- "%04"UVxf"\n%002D\n", (UV)lastvalue);
- switch (namedclass) {
- case ANYOF_ALNUM:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
- case ANYOF_NALNUM:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break;
- case ANYOF_ALNUMC:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break;
- case ANYOF_NALNUMC:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break;
- case ANYOF_ALPHA:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break;
- case ANYOF_NALPHA:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break;
- case ANYOF_ASCII:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break;
- case ANYOF_NASCII:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break;
- case ANYOF_CNTRL:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break;
- case ANYOF_NCNTRL:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break;
- case ANYOF_GRAPH:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break;
- case ANYOF_NGRAPH:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break;
- case ANYOF_DIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break;
- case ANYOF_NDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break;
- case ANYOF_LOWER:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break;
- case ANYOF_NLOWER:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break;
- case ANYOF_PRINT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break;
- case ANYOF_NPRINT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break;
- case ANYOF_PUNCT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break;
- case ANYOF_NPUNCT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
- case ANYOF_SPACE:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
- case ANYOF_NSPACE:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
- case ANYOF_UPPER:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
- case ANYOF_NUPPER:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break;
- case ANYOF_XDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break;
- case ANYOF_NXDIGIT:
- Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break;
+ if (namedclass > OOB_NAMEDCLASS) {
+ if (range) { /* a-\d, a-[:digit:] */
+ if (!SIZE_ONLY) {
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ Perl_sv_catpvf(aTHX_ listsv,
+ /* 0x002D is Unicode for '-' */
+ "%04"UVxf"\n002D\n", (UV)lastvalue);
+ }
+ range = 0;
+ }
+ if (!SIZE_ONLY) {
+ switch (namedclass) {
+ case ANYOF_ALNUM:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
+ case ANYOF_NALNUM:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break;
+ case ANYOF_ALNUMC:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break;
+ case ANYOF_NALNUMC:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break;
+ case ANYOF_ALPHA:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break;
+ case ANYOF_NALPHA:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break;
+ case ANYOF_ASCII:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break;
+ case ANYOF_NASCII:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break;
+ case ANYOF_CNTRL:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break;
+ case ANYOF_NCNTRL:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break;
+ case ANYOF_GRAPH:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break;
+ case ANYOF_NGRAPH:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break;
+ case ANYOF_DIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break;
+ case ANYOF_NDIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break;
+ case ANYOF_LOWER:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break;
+ case ANYOF_NLOWER:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break;
+ case ANYOF_PRINT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break;
+ case ANYOF_NPRINT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break;
+ case ANYOF_PUNCT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break;
+ case ANYOF_NPUNCT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break;
+ case ANYOF_SPACE:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break;
+ case ANYOF_NSPACE:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break;
+ case ANYOF_UPPER:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break;
+ case ANYOF_NUPPER:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break;
+ case ANYOF_XDIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break;
+ case ANYOF_NXDIGIT:
+ Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break;
+ }
+ continue;
}
- continue;
}
- if (range && namedclass > OOB_NAMEDCLASS)
- range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
if (range) {
- if (lastvalue > value)
- FAIL("invalid [] range in regexp"); /* [b-a] */
- if (!SIZE_ONLY)
- Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", (UV)lastvalue, (UV)value);
+ if (lastvalue > value) { /* b-a */
+ Perl_croak(aTHX_
+ "/%.127s/: invalid [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ }
range = 0;
}
else {
@@ -2852,13 +2900,27 @@ S_regclassutf8(pTHX)
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
PL_regcomp_parse[1] != ']') {
PL_regcomp_parse++;
- range = 1;
+ if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */
+ if (ckWARN(WARN_UNSAFE))
+ Perl_warner(aTHX_ WARN_UNSAFE,
+ "/%.127s/: false [] range \"%*.*s\" in regexp",
+ PL_regprecomp,
+ PL_regcomp_parse - rangebegin,
+ PL_regcomp_parse - rangebegin,
+ rangebegin);
+ if (!SIZE_ONLY)
+ Perl_sv_catpvf(aTHX_ listsv,
+ /* 0x002D is Unicode for '-' */
+ "002D\n");
+ } else
+ range = 1;
continue; /* do it next time */
}
}
/* now is the next time */
if (!SIZE_ONLY)
- Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", (UV)value);
+ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
+ (UV)lastvalue, (UV)value);
range = 0;
}
diff --git a/t/op/re_tests b/t/op/re_tests
index 974bec5988..d72a0f73b2 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -45,7 +45,7 @@ a[b-d]e ace y $& ace
a[b-d] aac y $& ac
a[-b] a- y $& a-
a[b-] a- y $& a-
-a[b-a] - c - /a[b-a]/: invalid [] range in regexp
+a[b-a] - c - /a[b-a]/: invalid [] range "b-a" in regexp
a[]b - c - /a[]b/: unmatched [] in regexp
a[ - c - /a[/: unmatched [] in regexp
a] a] y $& a]
@@ -218,7 +218,7 @@ a[-]?c ac y $& ac
'a[b-d]'i AAC y $& AC
'a[-b]'i A- y $& A-
'a[b-]'i A- y $& A-
-'a[b-a]'i - c - /a[b-a]/: invalid [] range in regexp
+'a[b-a]'i - c - /a[b-a]/: invalid [] range "b-a" in regexp
'a[]b'i - c - /a[]b/: unmatched [] in regexp
'a['i - c - /a[/: unmatched [] in regexp
'a]'i A] y $& A]
@@ -736,9 +736,9 @@ foo.bart foo.bart y - -
.[X][X](.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
tt+$ xxxtt y - -
([a-\d]+) za-9z y $1 a-9
-([\d-\s]+) a0- z y $1 0-
([\d-z]+) a0-za y $1 0-z
+([\d-\s]+) a0- z y $1 0-
([a-[:digit:]]+) za-9z y $1 a-9
-([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z
([[:digit:]-z]+) =0-z= y $1 0-z
+([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z
\GX.*X aaaXbX n - -
diff --git a/t/pragma/warn/regcomp b/t/pragma/warn/regcomp
index 9c3677ee10..88909626db 100644
--- a/t/pragma/warn/regcomp
+++ b/t/pragma/warn/regcomp
@@ -15,8 +15,9 @@
Character class syntax [%c %c] belongs inside character classes [S_checkposixcc]
+ /%.127s/: false [] range \"%*.*s\" in regexp [S_regclass]
-
+ /%.127s/: false [] range \"%*.*s\" in regexp [S_regclassutf8]
__END__
# regcomp.c [S_regpiece]
@@ -73,3 +74,68 @@ Character class syntax [. .] is reserved for future extensions at - line 8.
Character class syntax [= =] is reserved for future extensions at - line 9.
Character class syntax [: :] belongs inside character classes at - line 10.
Character class [:zog:] unknown at - line 19.
+########
+# regcomp.c [S_regclass]
+$_ = "";
+use warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+no warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+EXPECT
+/[a-\d]/: false [] range "a-\d" in regexp at - line 5.
+/[\d-b]/: false [] range "\d-" in regexp at - line 6.
+/[\s-\d]/: false [] range "\s-" in regexp at - line 7.
+/[\d-\s]/: false [] range "\d-" in regexp at - line 8.
+/[a-[:digit:]]/: false [] range "a-[:digit:]" in regexp at - line 9.
+/[[:digit:]-b]/: false [] range "[:digit:]-" in regexp at - line 10.
+/[[:alpha:]-[:digit:]]/: false [] range "[:alpha:]-" in regexp at - line 11.
+/[[:digit:]-[:alpha:]]/: false [] range "[:digit:]-" in regexp at - line 12.
+########
+# regcomp.c [S_regclassutf8]
+use utf8;
+$_ = "";
+use warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+no warnings 'unsafe' ;
+/[a-b]/;
+/[a-\d]/;
+/[\d-b]/;
+/[\s-\d]/;
+/[\d-\s]/;
+/[a-[:digit:]]/;
+/[[:digit:]-b]/;
+/[[:alpha:]-[:digit:]]/;
+/[[:digit:]-[:alpha:]]/;
+EXPECT
+/[a-\d]/: false [] range "a-\d" in regexp at - line 6.
+/[\d-b]/: false [] range "\d-" in regexp at - line 7.
+/[\s-\d]/: false [] range "\s-" in regexp at - line 8.
+/[\d-\s]/: false [] range "\d-" in regexp at - line 9.
+/[a-[:digit:]]/: false [] range "a-[:digit:]" in regexp at - line 10.
+/[[:digit:]-b]/: false [] range "[:digit:]-" in regexp at - line 11.
+/[[:alpha:]-[:digit:]]/: false [] range "[:alpha:]-" in regexp at - line 12.
+/[[:digit:]-[:alpha:]]/: false [] range "[:digit:]-" in regexp at - line 13.