diff options
-rw-r--r-- | pod/perldiag.pod | 3 | ||||
-rw-r--r-- | pod/perlre.pod | 8 | ||||
-rw-r--r-- | regcomp.c | 21 | ||||
-rw-r--r-- | t/op/re_tests | 10 |
4 files changed, 25 insertions, 17 deletions
diff --git a/pod/perldiag.pod b/pod/perldiag.pod index d0f1be8a76..fb5c7e658f 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1681,8 +1681,7 @@ by Perl or by a user-supplied handler. See L<attributes>. =item invalid [] range in regexp (F) The range specified in a character class had a minimum character -greater than the maximum character, or the range didn't start/end with -a literal character. See L<perlre>. +greater than the maximum character. See L<perlre>. =item Invalid conversion in %s: "%s" diff --git a/pod/perlre.pod b/pod/perlre.pod index 9a06305629..1610254da5 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -185,8 +185,9 @@ Use C<\w+> to match a string of Perl-identifier characters (which isn't the same as matching an English word). If C<use locale> is in effect, the list of alphabetic characters generated by C<\w> is taken from the current locale. See L<perllocale>. You may use C<\w>, C<\W>, C<\s>, C<\S>, -C<\d>, and C<\D> within character classes (though not as either end of -a range). See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>. +C<\d>, and C<\D> within character classes, but if you try to use them +as endpoints of a range, that's not a range, the "-" is understood literally. +See L<utf8> for details about C<\pP>, C<\PP>, and C<\X>. The POSIX character class syntax @@ -940,6 +941,9 @@ at the start or end of the list, or escape it with a backslash. (The following all specify the same class of three characters: C<[-az]>, C<[az-]>, and C<[a\-z]>. All are different from C<[a-z]>, which specifies a class containing twenty-six characters.) +Also, if you try to use the character classes C<\w>, C<\W>, C<\s>, +C<\S>, C<\d>, or C<\D> as endpoints of a range, that's not a range, +the "-" is understood literally. Note also that the whole range idea is rather unportable between character sets--and even within character sets they may cause results @@ -2364,8 +2364,10 @@ S_regclass(pTHX) } } if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { - if (range) - FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */ + if (range) { + ANYOF_BITMAP_SET(opnd, lastvalue); + ANYOF_BITMAP_SET(opnd, '-'); + } switch (namedclass) { case ANYOF_ALNUM: if (LOC) @@ -2608,6 +2610,8 @@ S_regclass(pTHX) ANYOF_FLAGS(opnd) |= ANYOF_CLASS; continue; } + if (range && namedclass > OOB_NAMEDCLASS) + range = 0; /* [a-\d], [a-[:digit:]], not a true range. */ if (range) { if (lastvalue > value) FAIL("invalid [] range in regexp"); /* [b-a] */ @@ -2617,8 +2621,6 @@ S_regclass(pTHX) lastvalue = value; if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && PL_regcomp_parse[1] != ']') { - if (namedclass > OOB_NAMEDCLASS) - FAIL("invalid [] range in regexp"); /* [\w-a] */ PL_regcomp_parse++; range = 1; continue; /* do it next time */ @@ -2777,9 +2779,10 @@ S_regclassutf8(pTHX) } } if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { - if (range) - FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */ - switch (namedclass) { + if (range) /* [a-\d], [a-[:digit:]] */ + Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */ + "%04"UVxf"\n%002D\n", (UV)lastvalue); + switch (namedclass) { case ANYOF_ALNUM: Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; case ANYOF_NALNUM: @@ -2835,6 +2838,8 @@ S_regclassutf8(pTHX) } continue; } + if (range && namedclass > OOB_NAMEDCLASS) + range = 0; /* [a-\d], [a-[:digit:]], not a true range. */ if (range) { if (lastvalue > value) FAIL("invalid [] range in regexp"); /* [b-a] */ @@ -2846,8 +2851,6 @@ S_regclassutf8(pTHX) lastvalue = value; if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && PL_regcomp_parse[1] != ']') { - if (namedclass > OOB_NAMEDCLASS) - FAIL("invalid [] range in regexp"); /* [\w-a] */ PL_regcomp_parse++; range = 1; continue; /* do it next time */ diff --git a/t/op/re_tests b/t/op/re_tests index 695672da71..974bec5988 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -735,8 +735,10 @@ foo.bart foo.bart y - - .[X](.+)+[X][X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - .[X][X](.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - tt+$ xxxtt y - - -[a-\w] - c - /[a-\w]/: invalid [] range in regexp -[\w-z] - c - /[\w-z]/: invalid [] range in regexp -[0-[:digit:]] - c - /[0-[:digit:]]/: invalid [] range in regexp -[[:digit:]-9] - c - /[[:digit:]-9]/: invalid [] range in regexp +([a-\d]+) za-9z y $1 a-9 +([\d-\s]+) a0- z y $1 0- +([\d-z]+) a0-za y $1 0-z +([a-[:digit:]]+) za-9z y $1 a-9 +([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z +([[:digit:]-z]+) =0-z= y $1 0-z \GX.*X aaaXbX n - - |