summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>1999-10-12 15:30:05 +0000
committerJarkko Hietaniemi <jhi@iki.fi>1999-10-12 15:30:05 +0000
commit1209ba901e0b2880eea69ad70613848af5543517 (patch)
tree5f95fbab5155907bf49140ca545292a3e8267190 /regcomp.c
parent26744161d10595c3db74919d9b07ee8f8354b06a (diff)
downloadperl-1209ba901e0b2880eea69ad70613848af5543517.tar.gz
Revert the parts of #3926 that outlawed character ranges
that have character classes such as \w as either endpoint. This change re-establishes the old behavior which meant that such ranges weren't really ranges, the "-" was literal. Moreover, this change also fixes the old behavior to be more consistent: [\w-.] and [\s-\w] worked, but [.-\w] didn't. Now they all do work as described above. The #3926 outlawed all of those. p4raw-id: //depot/cfgperl@4355
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c21
1 files changed, 12 insertions, 9 deletions
diff --git a/regcomp.c b/regcomp.c
index 99423e199d..02dca515a8 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2364,8 +2364,10 @@ S_regclass(pTHX)
}
}
if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
- if (range)
- FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */
+ if (range) {
+ ANYOF_BITMAP_SET(opnd, lastvalue);
+ ANYOF_BITMAP_SET(opnd, '-');
+ }
switch (namedclass) {
case ANYOF_ALNUM:
if (LOC)
@@ -2608,6 +2610,8 @@ S_regclass(pTHX)
ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
continue;
}
+ if (range && namedclass > OOB_NAMEDCLASS)
+ range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
if (range) {
if (lastvalue > value)
FAIL("invalid [] range in regexp"); /* [b-a] */
@@ -2617,8 +2621,6 @@ S_regclass(pTHX)
lastvalue = value;
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
PL_regcomp_parse[1] != ']') {
- if (namedclass > OOB_NAMEDCLASS)
- FAIL("invalid [] range in regexp"); /* [\w-a] */
PL_regcomp_parse++;
range = 1;
continue; /* do it next time */
@@ -2777,9 +2779,10 @@ S_regclassutf8(pTHX)
}
}
if (!SIZE_ONLY && namedclass > OOB_NAMEDCLASS) {
- if (range)
- FAIL("invalid [] range in regexp"); /* [a-\w], [a-[:word:]] */
- switch (namedclass) {
+ if (range) /* [a-\d], [a-[:digit:]] */
+ Perl_sv_catpvf(aTHX_ listsv, /* 0x002D is Unicode for '-' */
+ "%04"UVxf"\n%002D\n", (UV)lastvalue);
+ switch (namedclass) {
case ANYOF_ALNUM:
Perl_sv_catpvf(aTHX_ listsv, "+utf8::IsWord\n"); break;
case ANYOF_NALNUM:
@@ -2835,6 +2838,8 @@ S_regclassutf8(pTHX)
}
continue;
}
+ if (range && namedclass > OOB_NAMEDCLASS)
+ range = 0; /* [a-\d], [a-[:digit:]], not a true range. */
if (range) {
if (lastvalue > value)
FAIL("invalid [] range in regexp"); /* [b-a] */
@@ -2846,8 +2851,6 @@ S_regclassutf8(pTHX)
lastvalue = value;
if (*PL_regcomp_parse == '-' && PL_regcomp_parse+1 < PL_regxend &&
PL_regcomp_parse[1] != ']') {
- if (namedclass > OOB_NAMEDCLASS)
- FAIL("invalid [] range in regexp"); /* [\w-a] */
PL_regcomp_parse++;
range = 1;
continue; /* do it next time */