diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 1999-10-27 21:32:30 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 1999-10-27 21:32:30 +0000 |
commit | bca6c107bab5aabfb35be60303a5b31e9c9ec4a4 (patch) | |
tree | 79bf0e7823de9ee0de4c8197e29dba1118b79184 /regcomp.c | |
parent | d543acb6a7b5779dc3f569b762833cd9da27fd50 (diff) | |
parent | 8f753cb54274cceba5865fd4e206073865f6e190 (diff) | |
download | perl-bca6c107bab5aabfb35be60303a5b31e9c9ec4a4.tar.gz |
integrate cfgperl contents into mainline; merge conflicts
p4raw-id: //depot/perl@4475
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 216 |
1 files changed, 116 insertions, 100 deletions
@@ -844,7 +844,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) DEBUG_r(if (!PL_colorset) reginitcolors()); DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n", PL_colors[4],PL_colors[5],PL_colors[0], - xend - exp, PL_regprecomp, PL_colors[1])); + (int)(xend - exp), PL_regprecomp, PL_colors[1])); PL_regflags = pm->op_pmflags; PL_regsawback = 0; @@ -944,7 +944,12 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) /* Starting-point info. */ again: - if (OP(first) == EXACT); /* Empty, get anchored substr later. */ + if (PL_regkind[(U8)OP(first) == EXACT]) { + if (OP(first) == EXACT); /* Empty, get anchored substr later. */ + else if ((OP(first) == EXACTF || OP(first) == EXACTFL) + && !UTF) + r->regstclass = first; + } else if (strchr((char*)PL_simple+4,OP(first))) r->regstclass = first; else if (PL_regkind[(U8)OP(first)] == BOUND || @@ -986,8 +991,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->reganch |= ROPT_SKIP; /* Scan is after the zeroth branch, first is atomic matcher. */ - DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %d\n", - first - scan + 1)); + DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %"IVdf"\n", + (IV)(first - scan + 1))); /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the @@ -2156,7 +2161,7 @@ S_regpposixcc(pTHX_ I32 value) { dTHR; char *posixcc = 0; - I32 namedclass = -1; + I32 namedclass = OOB_NAMEDCLASS; if (value == '[' && PL_regcomp_parse + 1 < PL_regxend && /* I smell either [: or [= or [. -- POSIX has been here, right? */ @@ -2269,7 +2274,7 @@ S_regpposixcc(pTHX_ I32 value) STATIC void S_checkposixcc(pTHX) { - if (ckWARN(WARN_UNSAFE) && !SIZE_ONLY && + if (!SIZE_ONLY && ckWARN(WARN_UNSAFE) && (*PL_regcomp_parse == ':' || *PL_regcomp_parse == '=' || *PL_regcomp_parse == '.')) { @@ -2292,7 +2297,6 @@ STATIC regnode * S_regclass(pTHX) { dTHR; - register char *opnd, *s; register UV value; register I32 lastvalue = OOB_CHAR8; register I32 range = 0; @@ -2301,29 +2305,29 @@ S_regclass(pTHX) I32 numlen; I32 namedclass; char *rangebegin; + bool need_class = 0; - s = opnd = MASK(PL_regcode); ret = reg_node(ANYOF); - for (value = 0; value < ANYOF_SIZE; value++) - REGC(0, s++); + if (SIZE_ONLY) + PL_regsize += ANYOF_SKIP; + else { + ret->flags = 0; + ANYOF_BITMAP_ZERO(ret); + PL_regcode += ANYOF_SKIP; + if (FOLD) + ANYOF_FLAGS(ret) |= ANYOF_FOLD; + if (LOC) + ANYOF_FLAGS(ret) |= ANYOF_LOCALE; + } if (*PL_regcomp_parse == '^') { /* Complement of range. */ PL_regnaughty++; PL_regcomp_parse++; if (!SIZE_ONLY) - ANYOF_FLAGS(opnd) |= ANYOF_INVERT; - } - if (!SIZE_ONLY) { - PL_regcode += ANY_SKIP; - if (FOLD) - ANYOF_FLAGS(opnd) |= ANYOF_FOLD; - if (LOC) - ANYOF_FLAGS(opnd) |= ANYOF_LOCALE; - } - else { - PL_regsize += ANY_SKIP; + ANYOF_FLAGS(ret) |= ANYOF_INVERT; } - checkposixcc(); + if (!SIZE_ONLY && ckWARN(WARN_UNSAFE)) + checkposixcc(); if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-') goto skipcond; /* allow 1st char to be ] or - */ @@ -2374,6 +2378,9 @@ S_regclass(pTHX) } } if (namedclass > OOB_NAMEDCLASS) { + if (!need_class && !SIZE_ONLY) + ANYOF_CLASS_ZERO(ret); + need_class = 1; if (range) { /* a-\d, a-[:digit:] */ if (!SIZE_ONLY) { if (ckWARN(WARN_UNSAFE)) @@ -2383,8 +2390,8 @@ S_regclass(pTHX) PL_regcomp_parse - rangebegin, PL_regcomp_parse - rangebegin, rangebegin); - ANYOF_BITMAP_SET(opnd, lastvalue); - ANYOF_BITMAP_SET(opnd, '-'); + ANYOF_BITMAP_SET(ret, lastvalue); + ANYOF_BITMAP_SET(ret, '-'); } range = 0; /* this is not a true range */ } @@ -2392,235 +2399,235 @@ S_regclass(pTHX) switch (namedclass) { case ANYOF_ALNUM: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ALNUM); + ANYOF_CLASS_SET(ret, ANYOF_ALNUM); else { for (value = 0; value < 256; value++) if (isALNUM(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NALNUM: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NALNUM); + ANYOF_CLASS_SET(ret, ANYOF_NALNUM); else { for (value = 0; value < 256; value++) if (!isALNUM(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_SPACE: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_SPACE); + ANYOF_CLASS_SET(ret, ANYOF_SPACE); else { for (value = 0; value < 256; value++) if (isSPACE(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NSPACE: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NSPACE); + ANYOF_CLASS_SET(ret, ANYOF_NSPACE); else { for (value = 0; value < 256; value++) if (!isSPACE(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_DIGIT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_DIGIT); + ANYOF_CLASS_SET(ret, ANYOF_DIGIT); else { for (value = '0'; value <= '9'; value++) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NDIGIT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NDIGIT); + ANYOF_CLASS_SET(ret, ANYOF_NDIGIT); else { for (value = 0; value < '0'; value++) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); for (value = '9' + 1; value < 256; value++) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NALNUMC: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NALNUMC); + ANYOF_CLASS_SET(ret, ANYOF_NALNUMC); else { for (value = 0; value < 256; value++) if (!isALNUMC(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_ALNUMC: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ALNUMC); + ANYOF_CLASS_SET(ret, ANYOF_ALNUMC); else { for (value = 0; value < 256; value++) if (isALNUMC(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_ALPHA: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ALPHA); + ANYOF_CLASS_SET(ret, ANYOF_ALPHA); else { for (value = 0; value < 256; value++) if (isALPHA(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NALPHA: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NALPHA); + ANYOF_CLASS_SET(ret, ANYOF_NALPHA); else { for (value = 0; value < 256; value++) if (!isALPHA(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_ASCII: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_ASCII); + ANYOF_CLASS_SET(ret, ANYOF_ASCII); else { for (value = 0; value < 128; value++) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NASCII: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NASCII); + ANYOF_CLASS_SET(ret, ANYOF_NASCII); else { for (value = 128; value < 256; value++) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_CNTRL: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_CNTRL); + ANYOF_CLASS_SET(ret, ANYOF_CNTRL); else { for (value = 0; value < 256; value++) if (isCNTRL(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } lastvalue = OOB_CHAR8; break; case ANYOF_NCNTRL: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NCNTRL); + ANYOF_CLASS_SET(ret, ANYOF_NCNTRL); else { for (value = 0; value < 256; value++) if (!isCNTRL(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_GRAPH: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_GRAPH); + ANYOF_CLASS_SET(ret, ANYOF_GRAPH); else { for (value = 0; value < 256; value++) if (isGRAPH(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NGRAPH: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NGRAPH); + ANYOF_CLASS_SET(ret, ANYOF_NGRAPH); else { for (value = 0; value < 256; value++) if (!isGRAPH(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_LOWER: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_LOWER); + ANYOF_CLASS_SET(ret, ANYOF_LOWER); else { for (value = 0; value < 256; value++) if (isLOWER(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NLOWER: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NLOWER); + ANYOF_CLASS_SET(ret, ANYOF_NLOWER); else { for (value = 0; value < 256; value++) if (!isLOWER(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_PRINT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_PRINT); + ANYOF_CLASS_SET(ret, ANYOF_PRINT); else { for (value = 0; value < 256; value++) if (isPRINT(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NPRINT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NPRINT); + ANYOF_CLASS_SET(ret, ANYOF_NPRINT); else { for (value = 0; value < 256; value++) if (!isPRINT(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_PUNCT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_PUNCT); + ANYOF_CLASS_SET(ret, ANYOF_PUNCT); else { for (value = 0; value < 256; value++) if (isPUNCT(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NPUNCT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NPUNCT); + ANYOF_CLASS_SET(ret, ANYOF_NPUNCT); else { for (value = 0; value < 256; value++) if (!isPUNCT(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_UPPER: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_UPPER); + ANYOF_CLASS_SET(ret, ANYOF_UPPER); else { for (value = 0; value < 256; value++) if (isUPPER(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NUPPER: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NUPPER); + ANYOF_CLASS_SET(ret, ANYOF_NUPPER); else { for (value = 0; value < 256; value++) if (!isUPPER(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_XDIGIT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_XDIGIT); + ANYOF_CLASS_SET(ret, ANYOF_XDIGIT); else { for (value = 0; value < 256; value++) if (isXDIGIT(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; case ANYOF_NXDIGIT: if (LOC) - ANYOF_CLASS_SET(opnd, ANYOF_NXDIGIT); + ANYOF_CLASS_SET(ret, ANYOF_NXDIGIT); else { for (value = 0; value < 256; value++) if (!isXDIGIT(value)) - ANYOF_BITMAP_SET(opnd, value); + ANYOF_BITMAP_SET(ret, value); } break; default: @@ -2628,7 +2635,7 @@ S_regclass(pTHX) break; } if (LOC) - ANYOF_FLAGS(opnd) |= ANYOF_CLASS; + ANYOF_FLAGS(ret) |= ANYOF_CLASS; continue; } } @@ -2657,7 +2664,7 @@ S_regclass(pTHX) PL_regcomp_parse - rangebegin, rangebegin); if (!SIZE_ONLY) - ANYOF_BITMAP_SET(opnd, '-'); + ANYOF_BITMAP_SET(ret, '-'); } else range = 1; continue; /* do it next time */ @@ -2673,36 +2680,42 @@ S_regclass(pTHX) if (isLOWER(lastvalue)) { for (i = lastvalue; i <= value; i++) if (isLOWER(i)) - ANYOF_BITMAP_SET(opnd, i); + ANYOF_BITMAP_SET(ret, i); } else { for (i = lastvalue; i <= value; i++) if (isUPPER(i)) - ANYOF_BITMAP_SET(opnd, i); + ANYOF_BITMAP_SET(ret, i); } } else #endif for ( ; lastvalue <= value; lastvalue++) - ANYOF_BITMAP_SET(opnd, lastvalue); + ANYOF_BITMAP_SET(ret, lastvalue); } range = 0; } + if (need_class) { + if (SIZE_ONLY) + PL_regsize += ANYOF_CLASS_ADD_SKIP; + else + PL_regcode += ANYOF_CLASS_ADD_SKIP; + } /* optimize case-insensitive simple patterns (e.g. /[a-z]/i) */ if (!SIZE_ONLY && - (ANYOF_FLAGS(opnd) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) { + (ANYOF_FLAGS(ret) & (ANYOF_FLAGS_ALL ^ ANYOF_INVERT)) == ANYOF_FOLD) { for (value = 0; value < 256; ++value) { - if (ANYOF_BITMAP_TEST(opnd, value)) { + if (ANYOF_BITMAP_TEST(ret, value)) { I32 cf = PL_fold[value]; - ANYOF_BITMAP_SET(opnd, cf); + ANYOF_BITMAP_SET(ret, cf); } } - ANYOF_FLAGS(opnd) &= ~ANYOF_FOLD; + ANYOF_FLAGS(ret) &= ~ANYOF_FOLD; } /* optimize inverted simple patterns (e.g. [^a-z]) */ - if (!SIZE_ONLY && (ANYOF_FLAGS(opnd) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) { + if (!SIZE_ONLY && (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT) { for (value = 0; value < ANYOF_BITMAP_SIZE; ++value) - opnd[ANYOF_BITMAP_OFFSET + value] ^= ANYOF_FLAGS_ALL; - ANYOF_FLAGS(opnd) = 0; + ANYOF_BITMAP(ret)[value] ^= ANYOF_FLAGS_ALL; + ANYOF_FLAGS(ret) = 0; } return ret; } @@ -2711,7 +2724,7 @@ STATIC regnode * S_regclassutf8(pTHX) { dTHR; - register char *opnd, *e; + register char *e; register UV value; register U32 lastvalue = OOB_UTF8; register I32 range = 0; @@ -2737,7 +2750,8 @@ S_regclassutf8(pTHX) listsv = newSVpvn("# comment\n",10); } - checkposixcc(); + if (!SIZE_ONLY && ckWARN(WARN_UNSAFE)) + checkposixcc(); if (*PL_regcomp_parse == ']' || *PL_regcomp_parse == '-') goto skipcond; /* allow 1st char to be ] or - */ @@ -3173,12 +3187,12 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l) if (OP(node) == OPTIMIZED) goto after_print; regprop(sv, node); - PerlIO_printf(Perl_debug_log, "%4d:%*s%s", node - start, + PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start), 2*l + 1, "", SvPVX(sv)); if (next == NULL) /* Next ptr. */ PerlIO_printf(Perl_debug_log, "(0)"); else - PerlIO_printf(Perl_debug_log, "(%d)", next - start); + PerlIO_printf(Perl_debug_log, "(%"IVdf")", (IV)(next - start)); (void)PerlIO_putc(Perl_debug_log, '\n'); after_print: if (PL_regkind[(U8)op] == BRANCHJ) { @@ -3205,7 +3219,7 @@ S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l) } else if (op == ANYOF) { node = NEXTOPER(node); - node += ANY_SKIP; + node += ANYOF_SKIP; } else if (PL_regkind[(U8)op] == EXACT) { /* Literal string, where present. */ @@ -3239,21 +3253,23 @@ Perl_regdump(pTHX_ regexp *r) /* Header fields of interest. */ if (r->anchored_substr) - PerlIO_printf(Perl_debug_log, "anchored `%s%.*s%s'%s at %d ", + PerlIO_printf(Perl_debug_log, + "anchored `%s%.*s%s'%s at %"IVdf" ", PL_colors[0], - SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0), + (int)(SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0)), SvPVX(r->anchored_substr), PL_colors[1], SvTAIL(r->anchored_substr) ? "$" : "", - r->anchored_offset); + (IV)r->anchored_offset); if (r->float_substr) - PerlIO_printf(Perl_debug_log, "floating `%s%.*s%s'%s at %d..%u ", + PerlIO_printf(Perl_debug_log, + "floating `%s%.*s%s'%s at %"IVdf"..%"UVuf" ", PL_colors[0], - SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0), + (int)(SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0)), SvPVX(r->float_substr), PL_colors[1], SvTAIL(r->float_substr) ? "$" : "", - r->float_min_offset, r->float_max_offset); + (IV)r->float_min_offset, (UV)r->float_max_offset); if (r->check_substr) PerlIO_printf(Perl_debug_log, r->check_substr == r->float_substr |