diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 1999-10-14 10:08:44 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 1999-10-14 10:08:44 +0000 |
commit | 73b437c8b23cd73848b265b0c5a0c71d47dc6532 (patch) | |
tree | 5f2e0298da733d0eeaffd2d29cb6db617d09c29a /regcomp.c | |
parent | 4ad36b56fc9a36a6ae97dede5c0a0b63c849714d (diff) | |
download | perl-73b437c8b23cd73848b265b0c5a0c71d47dc6532.tar.gz |
Warn about false ranges like \d-\w (see the change #4355).
The invalid ranges (b-a) warning message also enhanced.
p4raw-link: @4355 on //depot/cfgperl: 1209ba901e0b2880eea69ad70613848af5543517
p4raw-id: //depot/cfgperl@4374
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 680 |
1 files changed, 371 insertions, 309 deletions
@@ -2300,6 +2300,7 @@ S_regclass(pTHX) register I32 def; I32 numlen; I32 namedclass; + char *rangebegin; s = opnd = MASK(PL_regcode); ret = reg_node(ANYOF); @@ -2329,6 +2330,8 @@ S_regclass(pTHX) while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') { skipcond: namedclass = OOB_NAMEDCLASS; + if (!range) + rangebegin = PL_regcomp_parse; value = UCHARAT(PL_regcomp_parse++); if (value == '[') namedclass = regpposixcc(value); @@ -2363,258 +2366,274 @@ S_regclass(pTHX) break; } } - if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { - if (range) { - ANYOF_BITMAP_SET(opnd, lastvalue); - ANYOF_BITMAP_SET(opnd, '-'); - } - switch (namedclass) { - case ANYOF_ALNUM: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ALNUM); - else { - for (value = 0; value < 256; value++) - if (isALNUM(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NALNUM: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NALNUM); - else { - for (value = 0; value < 256; value++) - if (!isALNUM(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_SPACE: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_SPACE); - else { - for (value = 0; value < 256; value++) - if (isSPACE(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NSPACE: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NSPACE); - else { - for (value = 0; value < 256; value++) - if (!isSPACE(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_DIGIT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_DIGIT); - else { - for (value = '0'; value <= '9'; value++) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NDIGIT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT); - else { - for (value = 0; value < '0'; value++) - ANYOF_BITMAP_SET(opnd, value); - for (value = '9' + 1; value < 256; value++) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NALNUMC: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC); - else { - for (value = 0; value < 256; value++) - if (!isALNUMC(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_ALNUMC: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC); - else { - for (value = 0; value < 256; value++) - if (isALNUMC(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_ALPHA: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ALPHA); - else { - for (value = 0; value < 256; value++) - if (isALPHA(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NALPHA: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NALPHA); - else { - for (value = 0; value < 256; value++) - if (!isALPHA(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_ASCII: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ASCII); - else { - for (value = 0; value < 128; value++) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NASCII: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NASCII); - else { - for (value = 128; value < 256; value++) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_CNTRL: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_CNTRL); - else { - for (value = 0; value < 256; value++) - if (isCNTRL(value)) - ANYOF_BITMAP_SET(opnd, value); - } - lastvalue = OOB_CHAR8; - break; - case ANYOF_NCNTRL: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL); - else { - for (value = 0; value < 256; value++) - if (!isCNTRL(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_GRAPH: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_GRAPH); - else { - for (value = 0; value < 256; value++) - if (isGRAPH(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NGRAPH: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH); - else { - for (value = 0; value < 256; value++) - if (!isGRAPH(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_LOWER: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_LOWER); - else { - for (value = 0; value < 256; value++) - if (isLOWER(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NLOWER: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NLOWER); - else { - for (value = 0; value < 256; value++) - if (!isLOWER(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_PRINT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_PRINT); - else { - for (value = 0; value < 256; value++) - if (isPRINT(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NPRINT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NPRINT); - else { - for (value = 0; value < 256; value++) - if (!isPRINT(value)) - ANYOF_BITMAP_SET(opnd, value); + if (namedclass > OOB_NAMEDCLASS) { + if (range) { /* a-\d, a-[:digit:] */ + if (!SIZE_ONLY) { + if (ckWARN(WARN_UNSAFE)) + Perl_warner(aTHX_ WARN_UNSAFE, + "/%.127s/: false [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + ANYOF_BITMAP_SET(opnd, lastvalue); + ANYOF_BITMAP_SET(opnd, '-'); } - break; - case ANYOF_PUNCT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_PUNCT); - else { - for (value = 0; value < 256; value++) - if (isPUNCT(value)) + range = 0; /* this is not a true range */ + } + if (!SIZE_ONLY) { + switch (namedclass) { + case ANYOF_ALNUM: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_ALNUM); + else { + for (value = 0; value < 256; value++) + if (isALNUM(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NALNUM: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NALNUM); + else { + for (value = 0; value < 256; value++) + if (!isALNUM(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_SPACE: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_SPACE); + else { + for (value = 0; value < 256; value++) + if (isSPACE(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NSPACE: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NSPACE); + else { + for (value = 0; value < 256; value++) + if (!isSPACE(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_DIGIT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_DIGIT); + else { + for (value = '0'; value <= '9'; value++) ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NPUNCT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT); - else { - for (value = 0; value < 256; value++) - if (!isPUNCT(value)) + } + break; + case ANYOF_NDIGIT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT); + else { + for (value = 0; value < '0'; value++) ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_UPPER: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_UPPER); - else { - for (value = 0; value < 256; value++) - if (isUPPER(value)) + for (value = '9' + 1; value < 256; value++) ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_NUPPER: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NUPPER); - else { - for (value = 0; value < 256; value++) - if (!isUPPER(value)) + } + break; + case ANYOF_NALNUMC: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC); + else { + for (value = 0; value < 256; value++) + if (!isALNUMC(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_ALNUMC: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC); + else { + for (value = 0; value < 256; value++) + if (isALNUMC(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_ALPHA: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_ALPHA); + else { + for (value = 0; value < 256; value++) + if (isALPHA(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NALPHA: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NALPHA); + else { + for (value = 0; value < 256; value++) + if (!isALPHA(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_ASCII: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_ASCII); + else { + for (value = 0; value < 128; value++) ANYOF_BITMAP_SET(opnd, value); - } - break; - case ANYOF_XDIGIT: - if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT); - else { - for (value = 0; value < 256; value++) - if (isXDIGIT(value)) + } + break; + case ANYOF_NASCII: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NASCII); + else { + for (value = 128; value < 256; value++) ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_CNTRL: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_CNTRL); + else { + for (value = 0; value < 256; value++) + if (isCNTRL(value)) + ANYOF_BITMAP_SET(opnd, value); + } + lastvalue = OOB_CHAR8; + break; + case ANYOF_NCNTRL: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL); + else { + for (value = 0; value < 256; value++) + if (!isCNTRL(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_GRAPH: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_GRAPH); + else { + for (value = 0; value < 256; value++) + if (isGRAPH(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NGRAPH: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH); + else { + for (value = 0; value < 256; value++) + if (!isGRAPH(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_LOWER: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_LOWER); + else { + for (value = 0; value < 256; value++) + if (isLOWER(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NLOWER: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NLOWER); + else { + for (value = 0; value < 256; value++) + if (!isLOWER(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_PRINT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_PRINT); + else { + for (value = 0; value < 256; value++) + if (isPRINT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NPRINT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NPRINT); + else { + for (value = 0; value < 256; value++) + if (!isPRINT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_PUNCT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_PUNCT); + else { + for (value = 0; value < 256; value++) + if (isPUNCT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NPUNCT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT); + else { + for (value = 0; value < 256; value++) + if (!isPUNCT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_UPPER: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_UPPER); + else { + for (value = 0; value < 256; value++) + if (isUPPER(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NUPPER: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NUPPER); + else { + for (value = 0; value < 256; value++) + if (!isUPPER(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_XDIGIT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT); + else { + for (value = 0; value < 256; value++) + if (isXDIGIT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + case ANYOF_NXDIGIT: + if (LOC) + ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT); + else { + for (value = 0; value < 256; value++) + if (!isXDIGIT(value)) + ANYOF_BITMAP_SET(opnd, value); + } + break; + default: + FAIL("invalid [::] class in regexp"); + break; } - break; - case ANYOF_NXDIGIT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT); - else { - for (value = 0; value < 256; value++) - if (!isXDIGIT(value)) - ANYOF_BITMAP_SET(opnd, value); - } - break; - default: - FAIL("invalid [::] class in regexp"); - break; + ANYOF_FLAGS(opnd) |= ANYOF_CLASS; + continue; } - if (LOC) - ANYOF_FLAGS(opnd) |= ANYOF_CLASS; - continue; } - if (range && namedclass > OOB_NAMEDCLASS) - range = 0; /* [a-\d], [a-[:digit:]], not a true range. */ if (range) { - if (lastvalue > value) - FAIL("invalid [] range in regexp"); /* [b-a] */ + if (lastvalue > value) /* b-a */ { + Perl_croak(aTHX_ + "/%.127s/: invalid [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + } range = 0; } else { @@ -2622,7 +2641,18 @@ S_regclass(pTHX) if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && PL_regcomp_parse[1] != ']') { PL_regcomp_parse++; - range = 1; + if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ + if (ckWARN(WARN_UNSAFE)) + Perl_warner(aTHX_ WARN_UNSAFE, + "/%.127s/: false [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + if (!SIZE_ONLY) + ANYOF_BITMAP_SET(opnd, '-'); + } else + range = 1; continue; /* do it next time */ } } @@ -2684,6 +2714,7 @@ S_regclassutf8(pTHX) SV *listsv; U8 flags = 0; I32 namedclass; + char *rangebegin; if (*PL_regcomp_parse == '^') { /* Complement of range. */ PL_regnaughty++; @@ -2707,9 +2738,10 @@ S_regclassutf8(pTHX) while (PL_regcomp_parse < PL_regxend && *PL_regcomp_parse != ']') { skipcond: namedclass = OOB_NAMEDCLASS; + if (!range) + rangebegin = PL_regcomp_parse; value = utf8_to_uv((U8*)PL_regcomp_parse, &numlen); PL_regcomp_parse += numlen; - if (value == '[') namedclass = regpposixcc(value); else if (value == '\\') { @@ -2778,73 +2810,89 @@ S_regclassutf8(pTHX) break; } } - if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { - if (range) /* [a-\d], [a-[:digit:]] */ - Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */ - "%04"UVxf"\n%002D\n", (UV)lastvalue); - switch (namedclass) { - case ANYOF_ALNUM: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; - case ANYOF_NALNUM: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break; - case ANYOF_ALNUMC: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break; - case ANYOF_NALNUMC: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break; - case ANYOF_ALPHA: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break; - case ANYOF_NALPHA: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break; - case ANYOF_ASCII: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break; - case ANYOF_NASCII: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break; - case ANYOF_CNTRL: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break; - case ANYOF_NCNTRL: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break; - case ANYOF_GRAPH: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break; - case ANYOF_NGRAPH: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break; - case ANYOF_DIGIT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break; - case ANYOF_NDIGIT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break; - case ANYOF_LOWER: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break; - case ANYOF_NLOWER: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break; - case ANYOF_PRINT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break; - case ANYOF_NPRINT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break; - case ANYOF_PUNCT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break; - case ANYOF_NPUNCT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break; - case ANYOF_SPACE: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; - case ANYOF_NSPACE: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; - case ANYOF_UPPER: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; - case ANYOF_NUPPER: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break; - case ANYOF_XDIGIT: - Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break; - case ANYOF_NXDIGIT: - Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break; + if (namedclass > OOB_NAMEDCLASS) { + if (range) { /* a-\d, a-[:digit:] */ + if (!SIZE_ONLY) { + if (ckWARN(WARN_UNSAFE)) + Perl_warner(aTHX_ WARN_UNSAFE, + "/%.127s/: false [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + Perl_sv_catpvf(aTHX_ listsv, + /* 0x002D is Unicode for '-' */ + "%04"UVxf"\n002D\n", (UV)lastvalue); + } + range = 0; + } + if (!SIZE_ONLY) { + switch (namedclass) { + case ANYOF_ALNUM: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; + case ANYOF_NALNUM: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsWord\n"); break; + case ANYOF_ALNUMC: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlnum\n"); break; + case ANYOF_NALNUMC: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlnum\n"); break; + case ANYOF_ALPHA: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsAlpha\n"); break; + case ANYOF_NALPHA: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsAlpha\n"); break; + case ANYOF_ASCII: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsASCII\n"); break; + case ANYOF_NASCII: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsASCII\n"); break; + case ANYOF_CNTRL: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsCntrl\n"); break; + case ANYOF_NCNTRL: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsCntrl\n"); break; + case ANYOF_GRAPH: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsGraph\n"); break; + case ANYOF_NGRAPH: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsGraph\n"); break; + case ANYOF_DIGIT: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsDigit\n"); break; + case ANYOF_NDIGIT: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsDigit\n"); break; + case ANYOF_LOWER: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsLower\n"); break; + case ANYOF_NLOWER: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsLower\n"); break; + case ANYOF_PRINT: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPrint\n"); break; + case ANYOF_NPRINT: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPrint\n"); break; + case ANYOF_PUNCT: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsPunct\n"); break; + case ANYOF_NPUNCT: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsPunct\n"); break; + case ANYOF_SPACE: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsSpace\n"); break; + case ANYOF_NSPACE: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsSpace\n"); break; + case ANYOF_UPPER: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsUpper\n"); break; + case ANYOF_NUPPER: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsUpper\n"); break; + case ANYOF_XDIGIT: + Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsXDigit\n"); break; + case ANYOF_NXDIGIT: + Perl_sv_catpvf(aTHX_ listsv, "!utf8::IsXDigit\n"); break; + } + continue; } - continue; } - if (range && namedclass > OOB_NAMEDCLASS) - range = 0; /* [a-\d], [a-[:digit:]], not a true range. */ if (range) { - if (lastvalue > value) - FAIL("invalid [] range in regexp"); /* [b-a] */ - if (!SIZE_ONLY) - Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", (UV)lastvalue, (UV)value); + if (lastvalue > value) { /* b-a */ + Perl_croak(aTHX_ + "/%.127s/: invalid [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + } range = 0; } else { @@ -2852,13 +2900,27 @@ S_regclassutf8(pTHX) if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && PL_regcomp_parse[1] != ']') { PL_regcomp_parse++; - range = 1; + if (namedclass > OOB_NAMEDCLASS) { /* \w-, [:word:]- */ + if (ckWARN(WARN_UNSAFE)) + Perl_warner(aTHX_ WARN_UNSAFE, + "/%.127s/: false [] range \"%*.*s\" in regexp", + PL_regprecomp, + PL_regcomp_parse - rangebegin, + PL_regcomp_parse - rangebegin, + rangebegin); + if (!SIZE_ONLY) + Perl_sv_catpvf(aTHX_ listsv, + /* 0x002D is Unicode for '-' */ + "002D\n"); + } else + range = 1; continue; /* do it next time */ } } /* now is the next time */ if (!SIZE_ONLY) - Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n", (UV)value); + Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", + (UV)lastvalue, (UV)value); range = 0; } |