summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Zakharevich <ilya@math.berkeley.edu>1999-07-31 01:13:38 -0400
committerGurusamy Sarathy <gsar@cpan.org>1999-08-01 21:05:54 +0000
commit82ba1be6639bfd31cc63b76f90d26dc1dafd9221 (patch)
tree552ff3741b6e83f69dea6f59d001065349acec53
parent3cfae81b38bc8edd77142113464ee12c2a2e5af0 (diff)
downloadperl-82ba1be6639bfd31cc63b76f90d26dc1dafd9221.tar.gz
More optimizations to REx engine
Message-Id: <199907311407.IAA25038@localhost.frii.com> p4raw-id: //depot/perl@3857
-rwxr-xr-xembed.pl5
-rw-r--r--embedvar.h20
-rw-r--r--ext/Thread/Thread.xs1
-rw-r--r--objXSUB.h10
-rw-r--r--perl.c1
-rw-r--r--perl.h20
-rw-r--r--proto.h4
-rw-r--r--regcomp.c71
-rw-r--r--regexec.c367
-rw-r--r--t/op/re_tests20
-rw-r--r--thrdvar.h5
11 files changed, 424 insertions, 100 deletions
diff --git a/embed.pl b/embed.pl
index 6260550c43..661a1aca86 100755
--- a/embed.pl
+++ b/embed.pl
@@ -1932,9 +1932,10 @@ s |char*|regwhite |char *|char *
s |char*|nextchar
s |regnode*|dumpuntil |regnode *start|regnode *node \
|regnode *last|SV* sv|I32 l
-s |void |scan_commit |scan_data_t *data
+s |void |scan_commit |struct scan_data_t *data
s |I32 |study_chunk |regnode **scanp|I32 *deltap \
- |regnode *last|scan_data_t *data|U32 flags
+ |regnode *last|struct scan_data_t *data \
+ |U32 flags
s |I32 |add_data |I32 n|char *s
rs |void|re_croak2 |const char* pat1|const char* pat2|...
s |I32 |regpposixcc |I32 value
diff --git a/embedvar.h b/embedvar.h
index 42d96de8bf..39bf22b734 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -63,16 +63,21 @@
#define PL_reg_eval_set (my_perl->Treg_eval_set)
#define PL_reg_flags (my_perl->Treg_flags)
#define PL_reg_ganch (my_perl->Treg_ganch)
+#define PL_reg_leftiter (my_perl->Treg_leftiter)
#define PL_reg_magic (my_perl->Treg_magic)
+#define PL_reg_maxiter (my_perl->Treg_maxiter)
#define PL_reg_oldcurpm (my_perl->Treg_oldcurpm)
#define PL_reg_oldpos (my_perl->Treg_oldpos)
#define PL_reg_oldsaved (my_perl->Treg_oldsaved)
#define PL_reg_oldsavedlen (my_perl->Treg_oldsavedlen)
+#define PL_reg_poscache (my_perl->Treg_poscache)
+#define PL_reg_poscache_size (my_perl->Treg_poscache_size)
#define PL_reg_re (my_perl->Treg_re)
#define PL_reg_start_tmp (my_perl->Treg_start_tmp)
#define PL_reg_start_tmpl (my_perl->Treg_start_tmpl)
#define PL_reg_starttry (my_perl->Treg_starttry)
#define PL_reg_sv (my_perl->Treg_sv)
+#define PL_reg_whilem_seen (my_perl->Treg_whilem_seen)
#define PL_regbol (my_perl->Tregbol)
#define PL_regcc (my_perl->Tregcc)
#define PL_regcode (my_perl->Tregcode)
@@ -193,16 +198,21 @@
#define PL_reg_eval_set (PERL_GET_INTERP->Treg_eval_set)
#define PL_reg_flags (PERL_GET_INTERP->Treg_flags)
#define PL_reg_ganch (PERL_GET_INTERP->Treg_ganch)
+#define PL_reg_leftiter (PERL_GET_INTERP->Treg_leftiter)
#define PL_reg_magic (PERL_GET_INTERP->Treg_magic)
+#define PL_reg_maxiter (PERL_GET_INTERP->Treg_maxiter)
#define PL_reg_oldcurpm (PERL_GET_INTERP->Treg_oldcurpm)
#define PL_reg_oldpos (PERL_GET_INTERP->Treg_oldpos)
#define PL_reg_oldsaved (PERL_GET_INTERP->Treg_oldsaved)
#define PL_reg_oldsavedlen (PERL_GET_INTERP->Treg_oldsavedlen)
+#define PL_reg_poscache (PERL_GET_INTERP->Treg_poscache)
+#define PL_reg_poscache_size (PERL_GET_INTERP->Treg_poscache_size)
#define PL_reg_re (PERL_GET_INTERP->Treg_re)
#define PL_reg_start_tmp (PERL_GET_INTERP->Treg_start_tmp)
#define PL_reg_start_tmpl (PERL_GET_INTERP->Treg_start_tmpl)
#define PL_reg_starttry (PERL_GET_INTERP->Treg_starttry)
#define PL_reg_sv (PERL_GET_INTERP->Treg_sv)
+#define PL_reg_whilem_seen (PERL_GET_INTERP->Treg_whilem_seen)
#define PL_regbol (PERL_GET_INTERP->Tregbol)
#define PL_regcc (PERL_GET_INTERP->Tregcc)
#define PL_regcode (PERL_GET_INTERP->Tregcode)
@@ -864,16 +874,21 @@
#define PL_Treg_eval_set PL_reg_eval_set
#define PL_Treg_flags PL_reg_flags
#define PL_Treg_ganch PL_reg_ganch
+#define PL_Treg_leftiter PL_reg_leftiter
#define PL_Treg_magic PL_reg_magic
+#define PL_Treg_maxiter PL_reg_maxiter
#define PL_Treg_oldcurpm PL_reg_oldcurpm
#define PL_Treg_oldpos PL_reg_oldpos
#define PL_Treg_oldsaved PL_reg_oldsaved
#define PL_Treg_oldsavedlen PL_reg_oldsavedlen
+#define PL_Treg_poscache PL_reg_poscache
+#define PL_Treg_poscache_size PL_reg_poscache_size
#define PL_Treg_re PL_reg_re
#define PL_Treg_start_tmp PL_reg_start_tmp
#define PL_Treg_start_tmpl PL_reg_start_tmpl
#define PL_Treg_starttry PL_reg_starttry
#define PL_Treg_sv PL_reg_sv
+#define PL_Treg_whilem_seen PL_reg_whilem_seen
#define PL_Tregbol PL_regbol
#define PL_Tregcc PL_regcc
#define PL_Tregcode PL_regcode
@@ -1005,16 +1020,21 @@
#define PL_reg_eval_set (thr->Treg_eval_set)
#define PL_reg_flags (thr->Treg_flags)
#define PL_reg_ganch (thr->Treg_ganch)
+#define PL_reg_leftiter (thr->Treg_leftiter)
#define PL_reg_magic (thr->Treg_magic)
+#define PL_reg_maxiter (thr->Treg_maxiter)
#define PL_reg_oldcurpm (thr->Treg_oldcurpm)
#define PL_reg_oldpos (thr->Treg_oldpos)
#define PL_reg_oldsaved (thr->Treg_oldsaved)
#define PL_reg_oldsavedlen (thr->Treg_oldsavedlen)
+#define PL_reg_poscache (thr->Treg_poscache)
+#define PL_reg_poscache_size (thr->Treg_poscache_size)
#define PL_reg_re (thr->Treg_re)
#define PL_reg_start_tmp (thr->Treg_start_tmp)
#define PL_reg_start_tmpl (thr->Treg_start_tmpl)
#define PL_reg_starttry (thr->Treg_starttry)
#define PL_reg_sv (thr->Treg_sv)
+#define PL_reg_whilem_seen (thr->Treg_whilem_seen)
#define PL_regbol (thr->Tregbol)
#define PL_regcc (thr->Tregcc)
#define PL_regcode (thr->Tregcode)
diff --git a/ext/Thread/Thread.xs b/ext/Thread/Thread.xs
index 4043a02e57..ad99e2c409 100644
--- a/ext/Thread/Thread.xs
+++ b/ext/Thread/Thread.xs
@@ -180,6 +180,7 @@ threadstart(void *arg)
Safefree(PL_reg_start_tmp);
SvREFCNT_dec(PL_lastscream);
SvREFCNT_dec(PL_defoutgv);
+ Safefree(PL_reg_poscache);
MUTEX_LOCK(&thr->mutex);
DEBUG_S(PerlIO_printf(PerlIO_stderr(),
diff --git a/objXSUB.h b/objXSUB.h
index 7ae62f384b..c3faf68190 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -636,8 +636,12 @@
#define PL_reg_flags (*Perl_Treg_flags_ptr(aTHXo))
#undef PL_reg_ganch
#define PL_reg_ganch (*Perl_Treg_ganch_ptr(aTHXo))
+#undef PL_reg_leftiter
+#define PL_reg_leftiter (*Perl_Treg_leftiter_ptr(aTHXo))
#undef PL_reg_magic
#define PL_reg_magic (*Perl_Treg_magic_ptr(aTHXo))
+#undef PL_reg_maxiter
+#define PL_reg_maxiter (*Perl_Treg_maxiter_ptr(aTHXo))
#undef PL_reg_oldcurpm
#define PL_reg_oldcurpm (*Perl_Treg_oldcurpm_ptr(aTHXo))
#undef PL_reg_oldpos
@@ -646,6 +650,10 @@
#define PL_reg_oldsaved (*Perl_Treg_oldsaved_ptr(aTHXo))
#undef PL_reg_oldsavedlen
#define PL_reg_oldsavedlen (*Perl_Treg_oldsavedlen_ptr(aTHXo))
+#undef PL_reg_poscache
+#define PL_reg_poscache (*Perl_Treg_poscache_ptr(aTHXo))
+#undef PL_reg_poscache_size
+#define PL_reg_poscache_size (*Perl_Treg_poscache_size_ptr(aTHXo))
#undef PL_reg_re
#define PL_reg_re (*Perl_Treg_re_ptr(aTHXo))
#undef PL_reg_start_tmp
@@ -656,6 +664,8 @@
#define PL_reg_starttry (*Perl_Treg_starttry_ptr(aTHXo))
#undef PL_reg_sv
#define PL_reg_sv (*Perl_Treg_sv_ptr(aTHXo))
+#undef PL_reg_whilem_seen
+#define PL_reg_whilem_seen (*Perl_Treg_whilem_seen_ptr(aTHXo))
#undef PL_regbol
#define PL_regbol (*Perl_Tregbol_ptr(aTHXo))
#undef PL_regcc
diff --git a/perl.c b/perl.c
index 3a3505de93..d81187989f 100644
--- a/perl.c
+++ b/perl.c
@@ -507,6 +507,7 @@ perl_destruct(pTHXx)
Safefree(PL_reg_start_tmp);
if (PL_reg_curpm)
Safefree(PL_reg_curpm);
+ Safefree(PL_reg_poscache);
Safefree(HeKEY_hek(&PL_hv_fetch_ent_mh));
Safefree(PL_op_mask);
nuke_stacks();
diff --git a/perl.h b/perl.h
index 0e43ee4b36..6891b37315 100644
--- a/perl.h
+++ b/perl.h
@@ -1727,25 +1727,7 @@ struct _sublex_info {
typedef struct magic_state MGS; /* struct magic_state defined in mg.c */
-/* Length of a variant. */
-
-typedef struct {
- I32 len_min;
- I32 len_delta;
- I32 pos_min;
- I32 pos_delta;
- SV *last_found;
- I32 last_end; /* min value, <0 unless valid. */
- I32 last_start_min;
- I32 last_start_max;
- SV **longest; /* Either &l_fixed, or &l_float. */
- SV *longest_fixed;
- I32 offset_fixed;
- SV *longest_float;
- I32 offset_float_min;
- I32 offset_float_max;
- I32 flags;
-} scan_data_t;
+struct scan_data_t; /* Used in S_* functions in regcomp.c */
typedef I32 CHECKPOINT;
diff --git a/proto.h b/proto.h
index 7bed4c7215..90b25003b7 100644
--- a/proto.h
+++ b/proto.h
@@ -868,8 +868,8 @@ STATIC void S_regtail(pTHX_ regnode *, regnode *);
STATIC char* S_regwhite(pTHX_ char *, char *);
STATIC char* S_nextchar(pTHX);
STATIC regnode* S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l);
-STATIC void S_scan_commit(pTHX_ scan_data_t *data);
-STATIC I32 S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *data, U32 flags);
+STATIC void S_scan_commit(pTHX_ struct scan_data_t *data);
+STATIC I32 S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, struct scan_data_t *data, U32 flags);
STATIC I32 S_add_data(pTHX_ I32 n, char *s);
STATIC void S_re_croak2(pTHX_ const char* pat1, const char* pat2, ...) __attribute__((noreturn));
STATIC I32 S_regpposixcc(pTHX_ I32 value);
diff --git a/regcomp.c b/regcomp.c
index 2d81da18d4..fac31e6991 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -132,12 +132,33 @@
#define SPSTART 0x4 /* Starts with * or +. */
#define TRYAGAIN 0x8 /* Weeded out a declaration. */
+/* Length of a variant. */
+
+typedef struct scan_data_t {
+ I32 len_min;
+ I32 len_delta;
+ I32 pos_min;
+ I32 pos_delta;
+ SV *last_found;
+ I32 last_end; /* min value, <0 unless valid. */
+ I32 last_start_min;
+ I32 last_start_max;
+ SV **longest; /* Either &l_fixed, or &l_float. */
+ SV *longest_fixed;
+ I32 offset_fixed;
+ SV *longest_float;
+ I32 offset_float_min;
+ I32 offset_float_max;
+ I32 flags;
+ I32 whilem_c;
+} scan_data_t;
+
/*
* Forward declarations for pregcomp()'s friends.
*/
static scan_data_t zero_scan_data = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0 };
+ 0, 0, 0, 0 };
#define SF_BEFORE_EOL (SF_BEFORE_SEOL|SF_BEFORE_MEOL)
#define SF_BEFORE_SEOL 0x1
@@ -328,6 +349,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da
num++;
data_fake.flags = 0;
+ if (data)
+ data_fake.whilem_c = data->whilem_c;
next = regnext(scan);
scan = NEXTOPER(scan);
if (code != BRANCH)
@@ -346,6 +369,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da
pars++;
if (data && (data_fake.flags & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
+ if (data)
+ data->whilem_c = data_fake.whilem_c;
if (code == SUSPEND)
break;
}
@@ -562,6 +587,16 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da
else
oscan->flags = 0;
}
+ else if (OP(oscan) == CURLYX && data && ++data->whilem_c < 16) {
+ /* This stays as CURLYX, and can put the count/of pair. */
+ /* Find WHILEM (as in regexec.c) */
+ regnode *nxt = oscan + NEXT_OFF(oscan);
+
+ if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
+ nxt += ARG(nxt);
+ PREVOPER(nxt)->flags = data->whilem_c
+ | (PL_reg_whilem_seen << 4); /* On WHILEM */
+ }
if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
pars++;
if (flags & SCF_DO_SUBSTR) {
@@ -653,6 +688,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da
regnode *nscan;
data_fake.flags = 0;
+ if (data)
+ data_fake.whilem_c = data->whilem_c;
next = regnext(scan);
nscan = NEXTOPER(NEXTOPER(scan));
minnext = study_chunk(&nscan, &deltanext, last, &data_fake, 0);
@@ -669,6 +706,8 @@ S_study_chunk(pTHX_ regnode **scanp, I32 *deltap, regnode *last, scan_data_t *da
pars++;
if (data && (data_fake.flags & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
+ if (data)
+ data->whilem_c = data_fake.whilem_c;
}
else if (OP(scan) == OPEN) {
pars++;
@@ -787,6 +826,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
I32 minlen = 0;
I32 sawplus = 0;
I32 sawopen = 0;
+ scan_data_t data;
if (exp == NULL)
FAIL("NULL regexp argument");
@@ -798,7 +838,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
PL_regprecomp = savepvn(exp, xend - exp);
DEBUG_r(if (!PL_colorset) reginitcolors());
- DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling%s RE `%s%*s%s'\n",
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
PL_colors[4],PL_colors[5],PL_colors[0],
xend - exp, PL_regprecomp, PL_colors[1]));
PL_regflags = pm->op_pmflags;
@@ -816,6 +856,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
PL_regnpar = 1;
PL_regsize = 0L;
PL_regcode = &PL_regdummy;
+ PL_reg_whilem_seen = 0;
regc((U8)REG_MAGIC, (char*)PL_regcode);
if (reg(0, &flags) == NULL) {
Safefree(PL_regprecomp);
@@ -830,6 +871,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
PL_regsize += PL_extralen;
else
PL_extralen = 0;
+ if (PL_reg_whilem_seen > 15)
+ PL_reg_whilem_seen = 15;
/* Allocate space and initialize. */
Newc(1001, r, sizeof(regexp) + (unsigned)PL_regsize * sizeof(regnode),
@@ -876,12 +919,11 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
3-units-long substrs field. */
Newz(1004, r->substrs, 1, struct reg_substr_data);
+ StructCopy(&zero_scan_data, &data, scan_data_t);
if (OP(scan) != BRANCH) { /* Only one top-level choice. */
- scan_data_t data;
I32 fake;
STRLEN longest_float_length, longest_fixed_length;
- StructCopy(&zero_scan_data, &data, scan_data_t);
first = scan;
/* Skip introductions and multiplicators >= 1. */
while ((OP(first) == OPEN && (sawopen = 1)) ||
@@ -1042,7 +1084,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
DEBUG_r(PerlIO_printf(Perl_debug_log, "\n"));
scan = r->program + 1;
- minlen = study_chunk(&scan, &fake, scan + PL_regsize, NULL, 0);
+ minlen = study_chunk(&scan, &fake, scan + PL_regsize, &data, 0);
r->check_substr = r->anchored_substr = r->float_substr = Nullsv;
}
@@ -1520,8 +1562,10 @@ S_regpiece(pTHX_ I32 *flagp)
reginsert(CURLY, ret);
}
else {
- PL_regnaughty += 4 + PL_regnaughty; /* compound interest */
- regtail(ret, reg_node(WHILEM));
+ regnode *w = reg_node(WHILEM);
+
+ w->flags = 0;
+ regtail(ret, w);
if (!SIZE_ONLY && PL_extralen) {
reginsert(LONGJMP,ret);
reginsert(NOTHING,ret);
@@ -1532,7 +1576,8 @@ S_regpiece(pTHX_ I32 *flagp)
NEXT_OFF(ret) = 3; /* Go over NOTHING to LONGJMP. */
regtail(ret, reg_node(NOTHING));
if (SIZE_ONLY)
- PL_extralen += 3;
+ PL_reg_whilem_seen++, PL_extralen += 3;
+ PL_regnaughty += 4 + PL_regnaughty; /* compound interest */
}
ret->flags = 0;
@@ -3115,16 +3160,18 @@ Perl_regdump(pTHX_ regexp *r)
/* Header fields of interest. */
if (r->anchored_substr)
- PerlIO_printf(Perl_debug_log, "anchored `%s%s%s'%s at %d ",
+ PerlIO_printf(Perl_debug_log, "anchored `%s%.*s%s'%s at %d ",
PL_colors[0],
+ SvCUR(r->anchored_substr) - (SvTAIL(r->anchored_substr)!=0),
SvPVX(r->anchored_substr),
PL_colors[1],
SvTAIL(r->anchored_substr) ? "$" : "",
r->anchored_offset);
if (r->float_substr)
- PerlIO_printf(Perl_debug_log, "floating `%s%s%s'%s at %d..%u ",
+ PerlIO_printf(Perl_debug_log, "floating `%s%.*s%s'%s at %d..%u ",
PL_colors[0],
- SvPVX(r->float_substr),
+ SvCUR(r->float_substr) - (SvTAIL(r->float_substr)!=0),
+ SvPVX(r->float_substr),
PL_colors[1],
SvTAIL(r->float_substr) ? "$" : "",
r->float_min_offset, r->float_max_offset);
@@ -3192,6 +3239,8 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
Perl_sv_catpvf(aTHX_ sv, " {%d,%d}", ARG1(o), ARG2(o));
}
+ else if (k == WHILEM && o->flags) /* Ordinal/of */
+ Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP )
Perl_sv_catpvf(aTHX_ sv, "%d", ARG(o)); /* Parenth number */
else if (k == LOGICAL)
diff --git a/regexec.c b/regexec.c
index e69c4ffd4e..b464a40e8a 100644
--- a/regexec.c
+++ b/regexec.c
@@ -270,25 +270,33 @@ S_cache_re(pTHX_ regexp *prog)
/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
-/* If SCREAM, then sv should be compatible with strpos and strend.
+/* If SCREAM, then SvPVX(sv) should be compatible with strpos and strend.
Otherwise, only SvCUR(sv) is used to get strbeg. */
/* XXXX We assume that strpos is strbeg unless sv. */
+/* A failure to find a constant substring means that there is no need to make
+ an expensive call to REx engine, thus we celebrate a failure. Similarly,
+ finding a substring too deep into the string means that less calls to
+ regtry() should be needed. */
+
char *
Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
char *strend, U32 flags, re_scream_pos_data *data)
{
- I32 start_shift;
+ register I32 start_shift;
/* Should be nonnegative! */
- I32 end_shift;
- char *s;
+ register I32 end_shift;
+ register char *s;
+ register SV *check;
char *t;
I32 ml_anch;
+ char *tmp;
+ register char *other_last = Nullch;
DEBUG_r( if (!PL_colorset) reginitcolors() );
DEBUG_r(PerlIO_printf(Perl_debug_log,
- "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
+ "%sGuessing start of match, REx%s `%s%.60s%s%s' against `%s%.*s%s%s'...\n",
PL_colors[4],PL_colors[5],PL_colors[0],
prog->precomp,
PL_colors[1],
@@ -299,128 +307,296 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
(strend - strpos > 60 ? "..." : ""))
);
- if (prog->minlen > strend - strpos)
+ if (prog->minlen > strend - strpos) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "String too short...\n"));
goto fail;
-
- /* XXXX Move further down? */
- start_shift = prog->check_offset_min; /* okay to underestimate on CC */
- /* Should be nonnegative! */
- end_shift = prog->minlen - start_shift -
- CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
-
- if (prog->reganch & ROPT_ANCH) {
+ }
+ if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */
ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
|| ( (prog->reganch & ROPT_ANCH_BOL)
- && !PL_multiline ) );
+ && !PL_multiline ) ); /* Check after \n? */
if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
- /* Anchored... */
+ /* Substring at constant offset from beg-of-str... */
I32 slen;
if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
- && (sv && (strpos + SvCUR(sv) != strend)) )
+ && (sv && (strpos + SvCUR(sv) != strend)) ) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
goto fail;
-
+ }
PL_regeol = strend; /* Used in HOP() */
- s = (char*)HOP((U8*)strpos, prog->check_offset_min);
+ s = HOPc(strpos, prog->check_offset_min);
if (SvTAIL(prog->check_substr)) {
slen = SvCUR(prog->check_substr); /* >= 1 */
- if ( strend - s > slen || strend - s < slen - 1 ) {
- s = Nullch;
- goto finish;
- }
- if ( strend - s == slen && strend[-1] != '\n') {
- s = Nullch;
- goto finish;
+ if ( strend - s > slen || strend - s < slen - 1
+ || (strend - s == slen && strend[-1] != '\n')) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
+ goto fail_finish;
}
/* Now should match s[0..slen-2] */
slen--;
if (slen && (*SvPVX(prog->check_substr) != *s
|| (slen > 1
- && memNE(SvPVX(prog->check_substr), s, slen))))
- s = Nullch;
+ && memNE(SvPVX(prog->check_substr), s, slen)))) {
+ report_neq:
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
+ goto fail_finish;
+ }
}
else if (*SvPVX(prog->check_substr) != *s
|| ((slen = SvCUR(prog->check_substr)) > 1
&& memNE(SvPVX(prog->check_substr), s, slen)))
- s = Nullch;
- else
- s = strpos;
- goto finish;
+ goto report_neq;
+ goto success_at_start;
}
+ /* Match is anchored, but substr is not anchored wrt beg-of-str. */
s = strpos;
- if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen))
- end_shift += strend - s - prog->minlen - prog->check_offset_max;
+ start_shift = prog->check_offset_min; /* okay to underestimate on CC */
+ /* Should be nonnegative! */
+ end_shift = prog->minlen - start_shift -
+ CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
+ if (!ml_anch) {
+ I32 end = prog->check_offset_max + CHR_SVLEN(prog->check_substr)
+ - (SvTAIL(prog->check_substr) != 0);
+ I32 eshift = strend - s - end;
+
+ if (end_shift < eshift)
+ end_shift = eshift;
+ }
}
- else {
+ else { /* Can match at random position */
ml_anch = 0;
s = strpos;
+ start_shift = prog->check_offset_min; /* okay to underestimate on CC */
+ /* Should be nonnegative! */
+ end_shift = prog->minlen - start_shift -
+ CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
}
- restart:
+#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
if (end_shift < 0)
- end_shift = 0; /* can happen when strend == strpos */
+ croak("panic: end_shift");
+#endif
+
+ check = prog->check_substr;
+ restart:
+ /* Find a possible match in the region s..strend by looking for
+ the "check" substring in the region corrected by start/end_shift. */
if (flags & REXEC_SCREAM) {
- SV *c = prog->check_substr;
char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
I32 p = -1; /* Internal iterator of scream. */
I32 *pp = data ? data->scream_pos : &p;
- if (PL_screamfirst[BmRARE(c)] >= 0
- || ( BmRARE(c) == '\n'
- && (BmPREVIOUS(c) == SvCUR(c) - 1)
- && SvTAIL(c) ))
- s = screaminstr(sv, prog->check_substr,
- start_shift + (strpos - strbeg), end_shift, pp, 0);
+ if (PL_screamfirst[BmRARE(check)] >= 0
+ || ( BmRARE(check) == '\n'
+ && (BmPREVIOUS(check) == SvCUR(check) - 1)
+ && SvTAIL(check) ))
+ s = screaminstr(sv, check,
+ start_shift + (s - strbeg), end_shift, pp, 0);
else
- s = Nullch;
+ goto fail_finish;
if (data)
*data->scream_olds = s;
}
else
s = fbm_instr((unsigned char*)s + start_shift,
(unsigned char*)strend - end_shift,
- prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
+ check, PL_multiline ? FBMrf_MULTILINE : 0);
/* Update the count-of-usability, remove useless subpatterns,
unshift s. */
- finish:
- if (!s) {
- ++BmUSEFUL(prog->check_substr); /* hooray */
- goto fail; /* not present */
+
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%s %s substr `%s%.*s%s'%s%s",
+ (s ? "Found" : "Did not find"),
+ ((check == prog->anchored_substr) ? "anchored" : "floating"),
+ PL_colors[0],
+ SvCUR(check) - (SvTAIL(check)!=0), SvPVX(check),
+ PL_colors[1], (SvTAIL(check) ? "$" : ""),
+ (s ? " at offset " : "...\n") ) );
+
+ if (!s)
+ goto fail_finish;
+
+ /* Finish the diagnostic message */
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - strpos)) );
+
+ /* Got a candidate. Check MBOL anchoring, and the *other* substr.
+ Start with the other substr.
+ XXXX no SCREAM optimization yet - and a very coarse implementation
+ XXXX /ttx+/ results in anchored=`ttx', floating=`x'. floating will
+ *always* match. Probably should be marked during compile...
+ Probably it is right to do no SCREAM here...
+ */
+
+ if (prog->float_substr && prog->anchored_substr) {
+ /* Take into account the anchored substring. */
+ /* XXXX May be hopelessly wrong for UTF... */
+ if (!other_last)
+ other_last = strpos - 1;
+ if (check == prog->float_substr) {
+ char *last = s - start_shift, *last1, *last2;
+ char *s1 = s;
+
+ tmp = PL_bostr;
+ t = s - prog->check_offset_max;
+ if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
+ && (!(prog->reganch & ROPT_UTF8)
+ || (PL_bostr = strpos, /* Used in regcopmaybe() */
+ (t = reghopmaybe_c(s, -(prog->check_offset_max)))
+ && t > strpos)))
+ ;
+ else
+ t = strpos;
+ t += prog->anchored_offset;
+ if (t <= other_last)
+ t = other_last + 1;
+ PL_bostr = tmp;
+ last2 = last1 = strend - prog->minlen;
+ if (last < last1)
+ last1 = last;
+ /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
+ /* On end-of-str: see comment below. */
+ s = fbm_instr((unsigned char*)t,
+ (unsigned char*)last1 + prog->anchored_offset
+ + SvCUR(prog->anchored_substr)
+ - (SvTAIL(prog->anchored_substr)!=0),
+ prog->anchored_substr, PL_multiline ? FBMrf_MULTILINE : 0);
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%s anchored substr `%s%.*s%s'%s",
+ (s ? "Found" : "Contradicts"),
+ PL_colors[0],
+ SvCUR(prog->anchored_substr)
+ - (SvTAIL(prog->anchored_substr)!=0),
+ SvPVX(prog->anchored_substr),
+ PL_colors[1], (SvTAIL(prog->anchored_substr) ? "$" : "")));
+ if (!s) {
+ if (last1 >= last2) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ ", giving up...\n"));
+ goto fail_finish;
+ }
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ ", trying floating at offset %ld...\n",
+ (long)(s1 + 1 - strpos)));
+ PL_regeol = strend; /* Used in HOP() */
+ other_last = last1 + prog->anchored_offset;
+ s = HOPc(last, 1);
+ goto restart;
+ }
+ else {
+ DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
+ (long)(s - strpos)));
+ t = s - prog->anchored_offset;
+ other_last = s - 1;
+ if (t == strpos)
+ goto try_at_start;
+ s = s1;
+ goto try_at_offset;
+ }
+ }
+ else { /* Take into account the floating substring. */
+ char *last, *last1;
+ char *s1 = s;
+
+ t = s - start_shift;
+ last1 = last = strend - prog->minlen + prog->float_min_offset;
+ if (last - t > prog->float_max_offset)
+ last = t + prog->float_max_offset;
+ s = t + prog->float_min_offset;
+ if (s <= other_last)
+ s = other_last + 1;
+ /* XXXX It is not documented what units *_offsets are in. Assume bytes. */
+ /* fbm_instr() takes into account exact value of end-of-str
+ if the check is SvTAIL(ed). Since false positives are OK,
+ and end-of-str is not later than strend we are OK. */
+ s = fbm_instr((unsigned char*)s,
+ (unsigned char*)last + SvCUR(prog->float_substr)
+ - (SvTAIL(prog->float_substr)!=0),
+ prog->float_substr, PL_multiline ? FBMrf_MULTILINE : 0);
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%s floating substr `%s%.*s%s'%s",
+ (s ? "Found" : "Contradicts"),
+ PL_colors[0],
+ SvCUR(prog->float_substr)
+ - (SvTAIL(prog->float_substr)!=0),
+ SvPVX(prog->float_substr),
+ PL_colors[1], (SvTAIL(prog->float_substr) ? "$" : "")));
+ if (!s) {
+ if (last1 == last) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ ", giving up...\n"));
+ goto fail_finish;
+ }
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ ", trying anchored starting at offset %ld...\n",
+ (long)(s1 + 1 - strpos)));
+ other_last = last;
+ PL_regeol = strend; /* Used in HOP() */
+ s = HOPc(t, 1);
+ goto restart;
+ }
+ else {
+ DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
+ (long)(s - strpos)));
+ other_last = s - 1;
+ if (t == strpos)
+ goto try_at_start;
+ s = s1;
+ goto try_at_offset;
+ }
+ }
}
- else if (s - strpos > prog->check_offset_max &&
- ((prog->reganch & ROPT_UTF8)
- ? ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
- && t >= strpos)
- : (t = s - prog->check_offset_max) != 0) ) {
+
+ t = s - prog->check_offset_max;
+ tmp = PL_bostr;
+ if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
+ && (!(prog->reganch & ROPT_UTF8)
+ || (PL_bostr = strpos, /* Used in regcopmaybe() */
+ ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
+ && t > strpos)))) {
+ PL_bostr = tmp;
+ /* Fixed substring is found far enough so that the match
+ cannot start at strpos. */
+ try_at_offset:
if (ml_anch && t[-1] != '\n') {
- find_anchor:
- while (t < strend - end_shift - prog->minlen) {
+ find_anchor: /* Eventually fbm_*() should handle this */
+ while (t < strend - prog->minlen) {
if (*t == '\n') {
if (t < s - prog->check_offset_min) {
s = t + 1;
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
+ PL_colors[0],PL_colors[1], (long)(s - strpos)));
goto set_useful;
}
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting at offset %ld...\n",
+ PL_colors[0],PL_colors[1], (long)(t + 1 - strpos)));
s = t + 1;
goto restart;
}
t++;
}
- s = Nullch;
- goto finish;
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
+ PL_colors[0],PL_colors[1]));
+ goto fail_finish;
}
s = t;
set_useful:
- ++BmUSEFUL(prog->check_substr); /* hooray/2 */
+ ++BmUSEFUL(prog->check_substr); /* hooray/5 */
}
else {
- if (ml_anch && sv
+ PL_bostr = tmp;
+ /* The found string does not prohibit matching at beg-of-str
+ - no optimization of calling REx engine can be performed,
+ unless it was an MBOL and we are not after MBOL. */
+ try_at_start:
+ /* Even in this situation we may use MBOL flag if strpos is offset
+ wrt the start of the string. */
+ if (ml_anch && sv
&& (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
t = strpos;
goto find_anchor;
}
+ success_at_start:
if (!(prog->reganch & ROPT_NAUGHTY)
&& --BmUSEFUL(prog->check_substr) < 0
&& prog->check_substr == prog->float_substr) { /* boo */
@@ -435,11 +611,14 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
s = strpos;
}
- DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n",
- PL_colors[4],PL_colors[5], (long)(s - strpos)) );
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
+ PL_colors[4], PL_colors[5], (long)(s - strpos)) );
return s;
+
+ fail_finish: /* Substring not found */
+ BmUSEFUL(prog->check_substr) += 5; /* hooray */
fail:
- DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n",
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
PL_colors[4],PL_colors[5]));
return Nullch;
}
@@ -504,6 +683,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags = 0;
PL_reg_eval_set = 0;
+ PL_reg_maxiter = 0;
if (prog->reganch & ROPT_UTF8)
PL_reg_flags |= RF_utf8;
@@ -552,7 +732,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
DEBUG_r( if (!PL_colorset) reginitcolors() );
DEBUG_r(PerlIO_printf(Perl_debug_log,
- "%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
+ "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
PL_colors[4],PL_colors[5],PL_colors[0],
prog->precomp,
PL_colors[1],
@@ -3162,6 +3342,7 @@ S_regmatch(pTHX_ regnode *prog)
case REFF:
n = ARG(scan); /* which paren pair */
ln = PL_regstartp[n];
+ PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
if (*PL_reglastparen < n || ln == -1)
sayNO; /* Do not match unless seen CLOSEn. */
if (ln == PL_regendp[n])
@@ -3306,6 +3487,10 @@ S_regmatch(pTHX_ regnode *prog)
*PL_reglastparen = 0;
PL_reg_call_cc = &state;
PL_reginput = locinput;
+
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+
if (regmatch(re->program + 1)) {
ReREFCNT_dec(re);
regcpblow(cp);
@@ -3323,6 +3508,10 @@ S_regmatch(pTHX_ regnode *prog)
PL_regcc = state.cc;
PL_reg_re = state.re;
cache_re(PL_reg_re);
+
+ /* XXXX This is too dramatic a measure... */
+ PL_reg_maxiter = 0;
+
sayNO;
}
sw = SvTRUE(ret);
@@ -3350,6 +3539,7 @@ S_regmatch(pTHX_ regnode *prog)
sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
break;
case IFTHEN:
+ PL_reg_leftiter = PL_reg_maxiter; /* Void cache */
if (sw)
next = NEXTOPER(NEXTOPER(scan));
else {
@@ -3388,7 +3578,7 @@ S_regmatch(pTHX_ regnode *prog)
/*
* This is really hard to understand, because after we match
* what we're trying to match, we must make sure the rest of
- * the RE is going to match for sure, and to do that we have
+ * the REx is going to match for sure, and to do that we have
* to go back UP the parse tree by recursing ever deeper. And
* if it fails, we have to reset our parent's current state
* that we can try again after backing off.
@@ -3448,6 +3638,51 @@ S_regmatch(pTHX_ regnode *prog)
sayNO;
}
+ if (scan->flags) {
+ /* Check whether we already were at this position.
+ Postpone detection until we know the match is not
+ *that* much linear. */
+ if (!PL_reg_maxiter) {
+ PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
+ PL_reg_leftiter = PL_reg_maxiter;
+ }
+ if (PL_reg_leftiter-- == 0) {
+ I32 size = (PL_reg_maxiter + 7)/8;
+ if (PL_reg_poscache) {
+ if (PL_reg_poscache_size < size) {
+ Renew(PL_reg_poscache, size, char);
+ PL_reg_poscache_size = size;
+ }
+ Zero(PL_reg_poscache, size, char);
+ }
+ else {
+ PL_reg_poscache_size = size;
+ Newz(29, PL_reg_poscache, size, char);
+ }
+ DEBUG_r(
+ PerlIO_printf(Perl_debug_log,
+ "%sDetected a super-linear match, switching on caching%s...\n",
+ PL_colors[4], PL_colors[5])
+ );
+ }
+ if (PL_reg_leftiter < 0) {
+ I32 o = locinput - PL_bostr, b;
+
+ o = (scan->flags & 0xf) - 1 + o * (scan->flags>>4);
+ b = o % 8;
+ o /= 8;
+ if (PL_reg_poscache[o] & (1<<b)) {
+ DEBUG_r(
+ PerlIO_printf(Perl_debug_log,
+ "%*s already tried at this position...\n",
+ REPORT_CODE_OFF+PL_regindent*2, "")
+ );
+ sayNO;
+ }
+ PL_reg_poscache[o] |= (1<<b);
+ }
+ }
+
/* Prefer next over scan for minimal matching. */
if (cc->minmod) {
diff --git a/t/op/re_tests b/t/op/re_tests
index 34b6e29414..899b35ee83 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -715,3 +715,23 @@ round\(((?>[^()]+))\) _I(round(xs * sz),1) y $1 xs * sz
'((?x:.) )' x y $1- x -
'((?-x:.) )'x x y $1- x-
foo.bart foo.bart y - -
+'^d[x][x][x]'m abcd\ndxxx y - -
+.X(.+)+X bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - -
+.X(.+)+XX bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - -
+.XX(.+)+X bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - -
+.X(.+)+X bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
+.X(.+)+XX bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
+.XX(.+)+X bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
+.X(.+)+[X] bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - -
+.X(.+)+[X][X] bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - -
+.XX(.+)+[X] bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - -
+.X(.+)+[X] bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
+.X(.+)+[X][X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
+.XX(.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
+.[X](.+)+[X] bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - -
+.[X](.+)+[X][X] bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - -
+.[X][X](.+)+[X] bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - -
+.[X](.+)+[X] bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
+.[X](.+)+[X][X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
+.[X][X](.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - -
+tt+$ xxxtt y - -
diff --git a/thrdvar.h b/thrdvar.h
index 32a0c7fe9e..4434b5ddb2 100644
--- a/thrdvar.h
+++ b/thrdvar.h
@@ -143,6 +143,7 @@ PERLVAR(Tregcomp_rx, regexp *) /* from regcomp.c */
PERLVAR(Textralen, I32) /* from regcomp.c */
PERLVAR(Tcolorset, int) /* from regcomp.c */
PERLVARA(Tcolors,6, char *) /* from regcomp.c */
+PERLVAR(Treg_whilem_seen, I32) /* number of WHILEM in this expr */
PERLVAR(Treginput, char *) /* String-input pointer. */
PERLVAR(Tregbol, char *) /* Beginning of input, for ^ check. */
PERLVAR(Tregeol, char *) /* End of input, for $ check. */
@@ -172,6 +173,10 @@ PERLVARI(Treg_oldcurpm, PMOP*, NULL) /* curpm before match */
PERLVARI(Treg_curpm, PMOP*, NULL) /* curpm during match */
PERLVAR(Treg_oldsaved, char*) /* old saved substr during match */
PERLVAR(Treg_oldsavedlen, STRLEN) /* old length of saved substr during match */
+PERLVAR(Treg_maxiter, I32) /* max wait until caching pos */
+PERLVAR(Treg_leftiter, I32) /* wait until caching pos */
+PERLVARI(Treg_poscache, char *, Nullch) /* cache of pos of WHILEM */
+PERLVAR(Treg_poscache_size, STRLEN) /* size of pos cache of WHILEM */
PERLVARI(Tregcompp, regcomp_t, MEMBER_TO_FPTR(Perl_pregcomp))
/* Pointer to REx compiler */