summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>1999-10-27 21:32:30 +0000
committerGurusamy Sarathy <gsar@cpan.org>1999-10-27 21:32:30 +0000
commitbca6c107bab5aabfb35be60303a5b31e9c9ec4a4 (patch)
tree79bf0e7823de9ee0de4c8197e29dba1118b79184 /regcomp.c
parentd543acb6a7b5779dc3f569b762833cd9da27fd50 (diff)
parent8f753cb54274cceba5865fd4e206073865f6e190 (diff)
downloadperl-bca6c107bab5aabfb35be60303a5b31e9c9ec4a4.tar.gz
integrate cfgperl contents into mainline; merge conflicts
p4raw-id: //depot/perl@4475
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c216
1 files changed, 116 insertions, 100 deletions
diff --git a/regcomp.c b/regcomp.c
index b522077e25..2d69750d8e 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -844,7 +844,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
DEBUG_r(if (!PL_colorset) reginitcolors());
DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
PL_colors[4],PL_colors[5],PL_colors[0],
- xend - exp, PL_regprecomp, PL_colors[1]));
+ (int)(xend - exp), PL_regprecomp, PL_colors[1]));
PL_regflags = pm->op_pmflags;
PL_regsawback = 0;
@@ -944,7 +944,12 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
/* Starting-point info. */
again:
- if (OP(first) == EXACT); /* Empty, get anchored substr later. */
+ if (PL_regkind[(U8)OP(first) == EXACT]) {
+ if (OP(first) == EXACT); /* Empty, get anchored substr later. */
+ else if ((OP(first) == EXACTF || OP(first) == EXACTFL)
+ && !UTF)
+ r->regstclass = first;
+ }
else if (strchr((char*)PL_simple+4,OP(first)))
r->regstclass = first;
else if (PL_regkind[(U8)OP(first)] == BOUND ||
@@ -986,8 +991,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
r->reganch |= ROPT_SKIP;
/* Scan is after the zeroth branch, first is atomic matcher. */
- DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %d\n",
- first - scan + 1));
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n",
+ (IV)(first - scan + 1)));
/*
* If there's something expensive in the r.e., find the
* longest literal string that must appear and make it the
@@ -2156,7 +2161,7 @@ S_regpposixcc(pTHX_ I32 value)
{
dTHR;
char *posixcc = 0;
- I32 namedclass = -1;
+ I32 namedclass = OOB_NAMEDCLASS;
if (value == '[' && PL_regcomp_parse + 1 < PL_regxend &&
/* I smell either [: or [= or [. -- POSIX has been here, right? */
@@ -2269,7 +2274,7 @@ S_regpposixcc(pTHX_ I32 value)
STATIC void
S_checkposixcc(pTHX)
{
- if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY &&
+ if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) &&
(*PL_regcomp_parse == ':' ||
*PL_regcomp_parse == '=' ||
*PL_regcomp_parse == '.')) {
@@ -2292,7 +2297,6 @@ STATIC regnode *
S_regclass(pTHX)
{
dTHR;
- register char *opnd, *s;
register UV value;
register I32 lastvalue = OOB_CHAR8;
register I32 range = 0;
@@ -2301,29 +2305,29 @@ S_regclass(pTHX)
I32 numlen;
I32 namedclass;
char *rangebegin;
+ bool need_class = 0;
- s = opnd = MASK(PL_regcode);
ret = reg_node(ANYOF);
- for (value = 0; value < ANYOF_SIZE; value++)
- REGC(0, s++);
+ if (SIZE_ONLY)
+ PL_regsize += ANYOF_SKIP;
+ else {
+ ret->flags = 0;
+ ANYOF_BITMAP_ZERO(ret);
+ PL_regcode += ANYOF_SKIP;
+ if (FOLD)
+ ANYOF_FLAGS(ret) |= ANYOF_FOLD;
+ if (LOC)
+ ANYOF_FLAGS(ret) |= ANYOF_LOCALE;
+ }
if (*PL_regcomp_parse == '^') { /* Complement of range. */
PL_regnaughty++;
PL_regcomp_parse++;
if (!SIZE_ONLY)
- ANYOF_FLAGS(opnd) |= ANYOF_INVERT;
- }
- if (!SIZE_ONLY) {
- PL_regcode += ANY_SKIP;
- if (FOLD)
- ANYOF_FLAGS(opnd) |= ANYOF_FOLD;
- if (LOC)
- ANYOF_FLAGS(opnd) |= ANYOF_LOCALE;
- }
- else {
- PL_regsize += ANY_SKIP;
+ ANYOF_FLAGS(ret) |= ANYOF_INVERT;
}
- checkposixcc();
+ if (!SIZE_ONLY && ckWARN(WARN_UNSAFE))
+ checkposixcc();
if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-')
goto skipcond; /* allow 1st char to be ] or - */
@@ -2374,6 +2378,9 @@ S_regclass(pTHX)
}
}
if (namedclass > OOB_NAMEDCLASS) {
+ if (!need_class && !SIZE_ONLY)
+ ANYOF_CLASS_ZERO(ret);
+ need_class = 1;
if (range) { /* a-\d, a-[:digit:] */
if (!SIZE_ONLY) {
if (ckWARN(WARN_UNSAFE))
@@ -2383,8 +2390,8 @@ S_regclass(pTHX)
PL_regcomp_parse - rangebegin,
PL_regcomp_parse - rangebegin,
rangebegin);
- ANYOF_BITMAP_SET(opnd, lastvalue);
- ANYOF_BITMAP_SET(opnd, '-');
+ ANYOF_BITMAP_SET(ret, lastvalue);
+ ANYOF_BITMAP_SET(ret, '-');
}
range = 0; /* this is not a true range */
}
@@ -2392,235 +2399,235 @@ S_regclass(pTHX)
switch (namedclass) {
case ANYOF_ALNUM:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALNUM);
+ ANYOF_CLASS_SET(ret, ANYOF_ALNUM);
else {
for (value = 0; value < 256; value++)
if (isALNUM(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NALNUM:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALNUM);
+ ANYOF_CLASS_SET(ret, ANYOF_NALNUM);
else {
for (value = 0; value < 256; value++)
if (!isALNUM(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_SPACE:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_SPACE);
+ ANYOF_CLASS_SET(ret, ANYOF_SPACE);
else {
for (value = 0; value < 256; value++)
if (isSPACE(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NSPACE:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NSPACE);
+ ANYOF_CLASS_SET(ret, ANYOF_NSPACE);
else {
for (value = 0; value < 256; value++)
if (!isSPACE(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_DIGIT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_DIGIT);
+ ANYOF_CLASS_SET(ret, ANYOF_DIGIT);
else {
for (value = '0'; value <= '9'; value++)
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NDIGIT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT);
+ ANYOF_CLASS_SET(ret, ANYOF_NDIGIT);
else {
for (value = 0; value < '0'; value++)
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
for (value = '9' + 1; value < 256; value++)
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NALNUMC:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC);
+ ANYOF_CLASS_SET(ret, ANYOF_NALNUMC);
else {
for (value = 0; value < 256; value++)
if (!isALNUMC(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_ALNUMC:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC);
+ ANYOF_CLASS_SET(ret, ANYOF_ALNUMC);
else {
for (value = 0; value < 256; value++)
if (isALNUMC(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_ALPHA:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ALPHA);
+ ANYOF_CLASS_SET(ret, ANYOF_ALPHA);
else {
for (value = 0; value < 256; value++)
if (isALPHA(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NALPHA:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NALPHA);
+ ANYOF_CLASS_SET(ret, ANYOF_NALPHA);
else {
for (value = 0; value < 256; value++)
if (!isALPHA(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_ASCII:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_ASCII);
+ ANYOF_CLASS_SET(ret, ANYOF_ASCII);
else {
for (value = 0; value < 128; value++)
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NASCII:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NASCII);
+ ANYOF_CLASS_SET(ret, ANYOF_NASCII);
else {
for (value = 128; value < 256; value++)
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_CNTRL:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_CNTRL);
+ ANYOF_CLASS_SET(ret, ANYOF_CNTRL);
else {
for (value = 0; value < 256; value++)
if (isCNTRL(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
lastvalue = OOB_CHAR8;
break;
case ANYOF_NCNTRL:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL);
+ ANYOF_CLASS_SET(ret, ANYOF_NCNTRL);
else {
for (value = 0; value < 256; value++)
if (!isCNTRL(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_GRAPH:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_GRAPH);
+ ANYOF_CLASS_SET(ret, ANYOF_GRAPH);
else {
for (value = 0; value < 256; value++)
if (isGRAPH(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NGRAPH:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH);
+ ANYOF_CLASS_SET(ret, ANYOF_NGRAPH);
else {
for (value = 0; value < 256; value++)
if (!isGRAPH(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_LOWER:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_LOWER);
+ ANYOF_CLASS_SET(ret, ANYOF_LOWER);
else {
for (value = 0; value < 256; value++)
if (isLOWER(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NLOWER:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NLOWER);
+ ANYOF_CLASS_SET(ret, ANYOF_NLOWER);
else {
for (value = 0; value < 256; value++)
if (!isLOWER(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_PRINT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_PRINT);
+ ANYOF_CLASS_SET(ret, ANYOF_PRINT);
else {
for (value = 0; value < 256; value++)
if (isPRINT(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NPRINT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NPRINT);
+ ANYOF_CLASS_SET(ret, ANYOF_NPRINT);
else {
for (value = 0; value < 256; value++)
if (!isPRINT(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_PUNCT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_PUNCT);
+ ANYOF_CLASS_SET(ret, ANYOF_PUNCT);
else {
for (value = 0; value < 256; value++)
if (isPUNCT(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NPUNCT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT);
+ ANYOF_CLASS_SET(ret, ANYOF_NPUNCT);
else {
for (value = 0; value < 256; value++)
if (!isPUNCT(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_UPPER:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_UPPER);
+ ANYOF_CLASS_SET(ret, ANYOF_UPPER);
else {
for (value = 0; value < 256; value++)
if (isUPPER(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NUPPER:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NUPPER);
+ ANYOF_CLASS_SET(ret, ANYOF_NUPPER);
else {
for (value = 0; value < 256; value++)
if (!isUPPER(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_XDIGIT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT);
+ ANYOF_CLASS_SET(ret, ANYOF_XDIGIT);
else {
for (value = 0; value < 256; value++)
if (isXDIGIT(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
case ANYOF_NXDIGIT:
if (LOC)
- ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT);
+ ANYOF_CLASS_SET(ret, ANYOF_NXDIGIT);
else {
for (value = 0; value < 256; value++)
if (!isXDIGIT(value))
- ANYOF_BITMAP_SET(opnd, value);
+ ANYOF_BITMAP_SET(ret, value);
}
break;
default:
@@ -2628,7 +2635,7 @@ S_regclass(pTHX)
break;
}
if (LOC)
- ANYOF_FLAGS(opnd) |= ANYOF_CLASS;
+ ANYOF_FLAGS(ret) |= ANYOF_CLASS;
continue;
}
}
@@ -2657,7 +2664,7 @@ S_regclass(pTHX)
PL_regcomp_parse - rangebegin,
rangebegin);
if (!SIZE_ONLY)
- ANYOF_BITMAP_SET(opnd, '-');
+ ANYOF_BITMAP_SET(ret, '-');
} else
range = 1;
continue; /* do it next time */
@@ -2673,36 +2680,42 @@ S_regclass(pTHX)
if (isLOWER(lastvalue)) {
for (i = lastvalue; i <= value; i++)
if (isLOWER(i))
- ANYOF_BITMAP_SET(opnd, i);
+ ANYOF_BITMAP_SET(ret, i);
} else {
for (i = lastvalue; i <= value; i++)
if (isUPPER(i))
- ANYOF_BITMAP_SET(opnd, i);
+ ANYOF_BITMAP_SET(ret, i);
}
}
else
#endif
for ( ; lastvalue <= value; lastvalue++)
- ANYOF_BITMAP_SET(opnd, lastvalue);
+ ANYOF_BITMAP_SET(ret, lastvalue);
}
range = 0;
}
+ if (need_class) {
+ if (SIZE_ONLY)
+ PL_regsize += ANYOF_CLASS_ADD_SKIP;
+ else
+ PL_regcode += ANYOF_CLASS_ADD_SKIP;
+ }
/* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */
if (!SIZE_ONLY &&
- (ANYOF_FLAGS(opnd) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) {
+ (ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) {
for (value = 0; value < 256; ++value) {
- if (ANYOF_BITMAP_TEST(opnd, value)) {
+ if (ANYOF_BITMAP_TEST(ret, value)) {
I32 cf = PL_fold[value];
- ANYOF_BITMAP_SET(opnd, cf);
+ ANYOF_BITMAP_SET(ret, cf);
}
}
- ANYOF_FLAGS(opnd) &= ~ANYOF_FOLD;
+ ANYOF_FLAGS(ret) &= ~ANYOF_FOLD;
}
/* optimize inverted simple patterns (e.g. [^a-z]) */
- if (!SIZE_ONLY && (ANYOF_FLAGS(opnd) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
+ if (!SIZE_ONLY && (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) {
for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
- opnd[ANYOF_BITMAP_OFFSET + value] ^= ANYOF_FLAGS_ALL;
- ANYOF_FLAGS(opnd) = 0;
+ ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL;
+ ANYOF_FLAGS(ret) = 0;
}
return ret;
}
@@ -2711,7 +2724,7 @@ STATIC regnode *
S_regclassutf8(pTHX)
{
dTHR;
- register char *opnd, *e;
+ register char *e;
register UV value;
register U32 lastvalue = OOB_UTF8;
register I32 range = 0;
@@ -2737,7 +2750,8 @@ S_regclassutf8(pTHX)
listsv = newSVpvn("# comment\n",10);
}
- checkposixcc();
+ if (!SIZE_ONLY && ckWARN(WARN_UNSAFE))
+ checkposixcc();
if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-')
goto skipcond; /* allow 1st char to be ] or - */
@@ -3173,12 +3187,12 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
if (OP(node) == OPTIMIZED)
goto after_print;
regprop(sv, node);
- PerlIO_printf(Perl_debug_log, "%4d:%*s%s", node - start,
+ PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
2*l + 1, "", SvPVX(sv));
if (next == NULL) /* Next ptr. */
PerlIO_printf(Perl_debug_log, "(0)");
else
- PerlIO_printf(Perl_debug_log, "(%d)", next - start);
+ PerlIO_printf(Perl_debug_log, "(%"IVdf")", (IV)(next - start));
(void)PerlIO_putc(Perl_debug_log, '\n');
after_print:
if (PL_regkind[(U8)op] == BRANCHJ) {
@@ -3205,7 +3219,7 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
}
else if (op == ANYOF) {
node = NEXTOPER(node);
- node += ANY_SKIP;
+ node += ANYOF_SKIP;
}
else if (PL_regkind[(U8)op] == EXACT) {
/* Literal string, where present. */
@@ -3239,21 +3253,23 @@ Perl_regdump(pTHX_ regexp *r)
/* Header fields of interest. */
if (r->anchored_substr)
- PerlIO_printf(Perl_debug_log, "anchored `%s%.*s%s'%s at %d ",
+ PerlIO_printf(Perl_debug_log,
+ "anchored `%s%.*s%s'%s at %"IVdf" ",
PL_colors[0],
- SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0),
+ (int)(SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0)),
SvPVX(r->anchored_substr),
PL_colors[1],
SvTAIL(r->anchored_substr) ? "$" : "",
- r->anchored_offset);
+ (IV)r->anchored_offset);
if (r->float_substr)
- PerlIO_printf(Perl_debug_log, "floating `%s%.*s%s'%s at %d..%u ",
+ PerlIO_printf(Perl_debug_log,
+ "floating `%s%.*s%s'%s at %"IVdf"..%"UVuf" ",
PL_colors[0],
- SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0),
+ (int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)),
SvPVX(r->float_substr),
PL_colors[1],
SvTAIL(r->float_substr) ? "$" : "",
- r->float_min_offset, r->float_max_offset);
+ (IV)r->float_min_offset, (UV)r->float_max_offset);
if (r->check_substr)
PerlIO_printf(Perl_debug_log,
r->check_substr == r->float_substr