diff options
author | Ilya Zakharevich <ilya@math.ohio-state.edu> | 1997-02-04 06:02:10 -0500 |
---|---|---|
committer | Chip Salzenberg <chip@atlantic.net> | 1997-02-11 07:29:00 +1200 |
commit | 47109bfb6ec14235ab32462f69f32cdab1822cee (patch) | |
tree | 9979887b385d897bcc7c8e24eaeaed27cf94ea8a /regcomp.c | |
parent | 360e57411903f2d16ad89bcdf4a462fc97239653 (diff) | |
download | perl-47109bfb6ec14235ab32462f69f32cdab1822cee.tar.gz |
Regexp optimizations
Subject: Re: Regexp optimizations
p5p-msgid: <199702051450.JAA13439@rio.atlantic.net>
private-msgid: <199702041102.GAA24805@monk.mps.ohio-state.edu>
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 72 |
1 files changed, 57 insertions, 15 deletions
@@ -145,6 +145,13 @@ PMOP* pm; I32 minlen = 0; I32 sawplus = 0; I32 sawopen = 0; +#define MAX_REPEAT_DEPTH 12 + struct { + char *opcode; + I32 count; + } repeat_stack[MAX_REPEAT_DEPTH]; + I32 repeat_depth = 0; + I32 repeat_count = 1; /* We start unmultiplied. */ if (exp == NULL) croak("NULL regexp argument"); @@ -289,9 +296,9 @@ PMOP* pm; char *t; first = scan; - while (OP(t = regnext(scan)) == CLOSE) + while ((t = regnext(scan)) && OP(t) == CLOSE) scan = t; - minlen += *OPERAND(first); + minlen += *OPERAND(first) * repeat_count; if (curback - backish == len) { sv_catpvn(longish, OPERAND(first)+1, *OPERAND(first)); @@ -310,22 +317,57 @@ PMOP* pm; curback += *OPERAND(first); } else if (strchr(varies,OP(scan))) { - curback = -30000; + int tcount; + char *next; + + if (repeat_depth < MAX_REPEAT_DEPTH + && ((OP(scan) == PLUS + && (tcount = 1) + && (next = NEXTOPER(scan))) + || (regkind[(U8)OP(scan)] == CURLY + && (tcount = ARG1(scan)) + && (next = NEXTOPER(scan)+4)))) + { + /* We treat (abc)+ as (abc)(abc)*. */ + + /* Mark the place to return back. */ + repeat_stack[repeat_depth].opcode = regnext(scan); + repeat_stack[repeat_depth].count = repeat_count; + repeat_depth++; + repeat_count *= tcount; + + /* Go deeper: */ + scan = next; + continue; + } + else { + curback = -30000; + len = 0; + if (SvCUR(longish) > SvCUR(longest)) { + sv_setsv(longest,longish); + backest = backish; + } + sv_setpvn(longish,"",0); + } + } + else if (strchr(simple,OP(scan))) { + curback++; + minlen += repeat_count; len = 0; if (SvCUR(longish) > SvCUR(longest)) { sv_setsv(longest,longish); backest = backish; } sv_setpvn(longish,"",0); - if (OP(scan) == PLUS && strchr(simple,OP(NEXTOPER(scan)))) - minlen++; - else if (regkind[(U8)OP(scan)] == CURLY && - strchr(simple,OP(NEXTOPER(scan)+4))) - minlen += ARG1(scan); } - else if (strchr(simple,OP(scan))) { - curback++; - minlen++; + scan = regnext(scan); + if (!scan) { /* Go up PLUS or CURLY. */ + if (!repeat_depth--) + croak("panic: re scan"); + scan = repeat_stack[repeat_depth].opcode; + repeat_count = repeat_stack[repeat_depth].count; + /* Need to submit the longest string found: */ + curback = -30000; len = 0; if (SvCUR(longish) > SvCUR(longest)) { sv_setsv(longest,longish); @@ -333,12 +375,11 @@ PMOP* pm; } sv_setpvn(longish,"",0); } - scan = regnext(scan); } /* Prefer earlier on tie, unless we can tail match latter */ - if (SvCUR(longish) + (regkind[(U8)OP(first)] == EOL) + if (SvCUR(longish) + (first && regkind[(U8)OP(first)] == EOL) > SvCUR(longest)) { sv_setsv(longest,longish); @@ -357,11 +398,12 @@ PMOP* pm; if (backest < 0) backest = -1; r->regback = backest; - if (SvCUR(longest) > !(sawstudy || regkind[(U8)OP(first)] == EOL)) + if (SvCUR(longest) > !(sawstudy || + (first && regkind[(U8)OP(first)] == EOL))) fbm_compile(r->regmust); (void)SvUPGRADE(r->regmust, SVt_PVBM); BmUSEFUL(r->regmust) = 100; - if (regkind[(U8)OP(first)] == EOL && SvCUR(longish)) + if (first && regkind[(U8)OP(first)] == EOL && SvCUR(longish)) SvTAIL_on(r->regmust); } else { |