summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pod/perldiag.pod3
-rw-r--r--pod/perlre.pod8
-rw-r--r--regcomp.c21
-rw-r--r--t/op/re_tests10
4 files changed, 25 insertions, 17 deletions
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index d0f1be8a76..fb5c7e658f 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1681,8 +1681,7 @@ by Perl or by a user-supplied handler. See L<attributes>.
=item invalid [] range in regexp
(F) The range specified in a character class had a minimum character
-greater than the maximum character, or the range didn't start/end with
-a literal character. See L<perlre>.
+greater than the maximum character. See L<perlre>.
=item Invalid conversion in %s: "%s"
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 9a06305629..1610254da5 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -185,8 +185,9 @@ Use C<\w+> to match a string of Perl-identifier characters (which isn't
the same as matching an English word). If C<use locale> is in effect, the
list of alphabetic characters generated by C<\w> is taken from the
current locale. See L<perllocale>. You may use C<\w>, C<\W>, C<\s>, C<\S>,
-C<\d>, and C<\D> within character classes (though not as either end of
-a range). See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>.
+C<\d>, and C<\D> within character classes, but if you try to use them
+as endpoints of a range, that's not a range, the "-" is understood literally.
+See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>.
The POSIX character class syntax
@@ -940,6 +941,9 @@ at the start or end of the list, or escape it with a backslash. (The
following all specify the same class of three characters: C<[-az]>,
C<[az-]>, and C<[a\-z]>. All are different from C<[a-z]>, which
specifies a class containing twenty-six characters.)
+Also, if you try to use the character classes C<\w>, C<\W>, C<\s>,
+C<\S>, C<\d>, or C<\D> as endpoints of a range, that's not a range,
+the "-" is understood literally.
Note also that the whole range idea is rather unportable between
character sets--and even within character sets they may cause results
diff --git a/regcomp.c b/regcomp.c
index 99423e199d..02dca515a8 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2364,8 +2364,10 @@ S_regclass(pTHX)
}
}
if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
- if (range)
- FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */
+ if (range) {
+ ANYOF_BITMAP_SET(opnd, lastvalue);
+ ANYOF_BITMAP_SET(opnd, '-');
+ }
switch (namedclass) {
case ANYOF_ALNUM:
if (LOC)
@@ -2608,6 +2610,8 @@ S_regclass(pTHX)
ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
continue;
}
+ if (range && namedclass > OOB_NAMEDCLASS)
+ range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
if (range) {
if (lastvalue > value)
FAIL("invalid [] range in regexp"); /* [b-a] */
@@ -2617,8 +2621,6 @@ S_regclass(pTHX)
lastvalue = value;
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
PL_regcomp_parse[1] != ']') {
- if (namedclass > OOB_NAMEDCLASS)
- FAIL("invalid [] range in regexp"); /* [\w-a] */
PL_regcomp_parse++;
range = 1;
continue; /* do it next time */
@@ -2777,9 +2779,10 @@ S_regclassutf8(pTHX)
}
}
if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
- if (range)
- FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */
- switch (namedclass) {
+ if (range) /* [a-\d], [a-[:digit:]] */
+ Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */
+ "%04"UVxf"\n%002D\n", (UV)lastvalue);
+ switch (namedclass) {
case ANYOF_ALNUM:
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
case ANYOF_NALNUM:
@@ -2835,6 +2838,8 @@ S_regclassutf8(pTHX)
}
continue;
}
+ if (range && namedclass > OOB_NAMEDCLASS)
+ range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
if (range) {
if (lastvalue > value)
FAIL("invalid [] range in regexp"); /* [b-a] */
@@ -2846,8 +2851,6 @@ S_regclassutf8(pTHX)
lastvalue = value;
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
PL_regcomp_parse[1] != ']') {
- if (namedclass > OOB_NAMEDCLASS)
- FAIL("invalid [] range in regexp"); /* [\w-a] */
PL_regcomp_parse++;
range = 1;
continue; /* do it next time */
diff --git a/t/op/re_tests b/t/op/re_tests
index 695672da71..974bec5988 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -735,8 +735,10 @@ foo.bart foo.bart y - -
.[X](.+)+[X][X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
.[X][X](.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
tt+$ xxxtt y - -
-[a-\w] - c - /[a-\w]/: invalid [] range in regexp
-[\w-z] - c - /[\w-z]/: invalid [] range in regexp
-[0-[:digit:]] - c - /[0-[:digit:]]/: invalid [] range in regexp
-[[:digit:]-9] - c - /[[:digit:]-9]/: invalid [] range in regexp
+([a-\d]+) za-9z y $1 a-9
+([\d-\s]+) a0- z y $1 0-
+([\d-z]+) a0-za y $1 0-z
+([a-[:digit:]]+) za-9z y $1 a-9
+([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z
+([[:digit:]-z]+) =0-z= y $1 0-z
\GX.*X aaaXbX n - -