diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 1999-10-12 15:30:05 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 1999-10-12 15:30:05 +0000 |
commit | 1209ba901e0b2880eea69ad70613848af5543517 (patch) | |
tree | 5f95fbab5155907bf49140ca545292a3e8267190 /regcomp.c | |
parent | 26744161d10595c3db74919d9b07ee8f8354b06a (diff) | |
download | perl-1209ba901e0b2880eea69ad70613848af5543517.tar.gz |
Revert the parts of #3926 that outlawed character ranges
that have character classes such as \w as either endpoint.
This change re-establishes the old behavior which meant that
such ranges weren't really ranges, the "-" was literal.
Moreover, this change also fixes the old behavior to be
more consistent: [\w-.] and [\s-\w] worked, but [.-\w] didn't.
Now they all do work as described above. The #3926 outlawed
all of those.
p4raw-id: //depot/cfgperl@4355
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 21 |
1 files changed, 12 insertions, 9 deletions
@@ -2364,8 +2364,10 @@ S_regclass(pTHX) } } if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { - if (range) - FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */ + if (range) { + ANYOF_BITMAP_SET(opnd, lastvalue); + ANYOF_BITMAP_SET(opnd, '-'); + } switch (namedclass) { case ANYOF_ALNUM: if (LOC) @@ -2608,6 +2610,8 @@ S_regclass(pTHX) ANYOF_FLAGS(opnd) |= ANYOF_CLASS; continue; } + if (range && namedclass > OOB_NAMEDCLASS) + range = 0; /* [a-\d], [a-[:digit:]], not a true range. */ if (range) { if (lastvalue > value) FAIL("invalid [] range in regexp"); /* [b-a] */ @@ -2617,8 +2621,6 @@ S_regclass(pTHX) lastvalue = value; if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && PL_regcomp_parse[1] != ']') { - if (namedclass > OOB_NAMEDCLASS) - FAIL("invalid [] range in regexp"); /* [\w-a] */ PL_regcomp_parse++; range = 1; continue; /* do it next time */ @@ -2777,9 +2779,10 @@ S_regclassutf8(pTHX) } } if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) { - if (range) - FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */ - switch (namedclass) { + if (range) /* [a-\d], [a-[:digit:]] */ + Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */ + "%04"UVxf"\n%002D\n", (UV)lastvalue); + switch (namedclass) { case ANYOF_ALNUM: Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break; case ANYOF_NALNUM: @@ -2835,6 +2838,8 @@ S_regclassutf8(pTHX) } continue; } + if (range && namedclass > OOB_NAMEDCLASS) + range = 0; /* [a-\d], [a-[:digit:]], not a true range. */ if (range) { if (lastvalue > value) FAIL("invalid [] range in regexp"); /* [b-a] */ @@ -2846,8 +2851,6 @@ S_regclassutf8(pTHX) lastvalue = value; if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend && PL_regcomp_parse[1] != ']') { - if (namedclass > OOB_NAMEDCLASS) - FAIL("invalid [] range in regexp"); /* [\w-a] */ PL_regcomp_parse++; range = 1; continue; /* do it next time */ |