summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Zakharevich <ilya@math.berkeley.edu>1999-05-24 22:42:23 -0400
committerGurusamy Sarathy <gsar@cpan.org>1999-05-25 09:14:51 +0000
commitcf93c79d660ae36ccc5f83d949c599473fc522ce (patch)
tree990c1003b82a9b439144bee4075daab1ba3d13d9
parenta99f88279527854d73f6b517b869d613f02be3d1 (diff)
downloadperl-cf93c79d660ae36ccc5f83d949c599473fc522ce.tar.gz
REx engine improvements
Message-Id: <199905250642.CAA06208@monk.mps.ohio-state.edu> p4raw-id: //depot/perl@3475
-rw-r--r--embedvar.h6
-rw-r--r--mg.c48
-rw-r--r--objXSUB.h4
-rw-r--r--pp.c21
-rw-r--r--pp_ctl.c29
-rw-r--r--pp_hot.c134
-rw-r--r--regcomp.c37
-rw-r--r--regexec.c191
-rw-r--r--regexp.h42
-rwxr-xr-xt/op/pat.t16
-rw-r--r--t/op/re_tests201
-rwxr-xr-xt/op/regexp.t9
-rw-r--r--thrdvar.h6
-rw-r--r--util.c334
14 files changed, 758 insertions, 320 deletions
diff --git a/embedvar.h b/embedvar.h
index e6dad21009..73c674caf2 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -62,6 +62,8 @@
#define PL_reg_magic (PL_curinterp->Treg_magic)
#define PL_reg_oldcurpm (PL_curinterp->Treg_oldcurpm)
#define PL_reg_oldpos (PL_curinterp->Treg_oldpos)
+#define PL_reg_oldsaved (PL_curinterp->Treg_oldsaved)
+#define PL_reg_oldsavedlen (PL_curinterp->Treg_oldsavedlen)
#define PL_reg_re (PL_curinterp->Treg_re)
#define PL_reg_start_tmp (PL_curinterp->Treg_start_tmp)
#define PL_reg_start_tmpl (PL_curinterp->Treg_start_tmpl)
@@ -453,6 +455,8 @@
#define PL_Treg_magic PL_reg_magic
#define PL_Treg_oldcurpm PL_reg_oldcurpm
#define PL_Treg_oldpos PL_reg_oldpos
+#define PL_Treg_oldsaved PL_reg_oldsaved
+#define PL_Treg_oldsavedlen PL_reg_oldsavedlen
#define PL_Treg_re PL_reg_re
#define PL_Treg_start_tmp PL_reg_start_tmp
#define PL_Treg_start_tmpl PL_reg_start_tmpl
@@ -589,6 +593,8 @@
#define PL_reg_magic (thr->Treg_magic)
#define PL_reg_oldcurpm (thr->Treg_oldcurpm)
#define PL_reg_oldpos (thr->Treg_oldpos)
+#define PL_reg_oldsaved (thr->Treg_oldsaved)
+#define PL_reg_oldsavedlen (thr->Treg_oldsavedlen)
#define PL_reg_re (thr->Treg_re)
#define PL_reg_start_tmp (thr->Treg_start_tmp)
#define PL_reg_start_tmpl (thr->Treg_start_tmpl)
diff --git a/mg.c b/mg.c
index 9183104339..adfad7d4ad 100644
--- a/mg.c
+++ b/mg.c
@@ -341,23 +341,23 @@ magic_regdatum_get(SV *sv, MAGIC *mg)
{
dTHR;
register I32 paren;
- register char *s;
+ register I32 s;
register I32 i;
register REGEXP *rx;
- char *t;
+ I32 t;
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
paren = mg->mg_len;
if (paren < 0)
return 0;
if (paren <= rx->nparens &&
- (s = rx->startp[paren]) &&
- (t = rx->endp[paren]))
+ (s = rx->startp[paren]) != -1 &&
+ (t = rx->endp[paren]) != -1)
{
if (mg->mg_obj) /* @+ */
- i = t - rx->subbeg;
+ i = t;
else /* @- */
- i = s - rx->subbeg;
+ i = s;
sv_setiv(sv,i);
}
}
@@ -378,13 +378,15 @@ magic_len(SV *sv, MAGIC *mg)
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '&':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
+ I32 s1, t1;
+
paren = atoi(mg->mg_ptr);
getparen:
if (paren <= rx->nparens &&
- (s = rx->startp[paren]) &&
- (t = rx->endp[paren]))
+ (s1 = rx->startp[paren]) != -1 &&
+ (t1 = rx->endp[paren]) != -1)
{
- i = t - s;
+ i = t1 - s1;
if (i >= 0)
return i;
}
@@ -399,8 +401,8 @@ magic_len(SV *sv, MAGIC *mg)
return 0;
case '`':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
- if ((s = rx->subbeg) && rx->startp[0]) {
- i = rx->startp[0] - s;
+ if (rx->startp[0] != -1) {
+ i = rx->startp[0];
if (i >= 0)
return i;
}
@@ -408,8 +410,8 @@ magic_len(SV *sv, MAGIC *mg)
return 0;
case '\'':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
- if (rx->subend && (s = rx->endp[0])) {
- i = rx->subend - s;
+ if (rx->endp[0] != -1) {
+ i = rx->sublen - rx->endp[0];
if (i >= 0)
return i;
}
@@ -589,6 +591,8 @@ magic_get(SV *sv, MAGIC *mg)
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '&':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
+ I32 s1, t1;
+
/*
* Pre-threads, this was paren = atoi(GvENAME((GV*)mg->mg_obj));
* XXX Does the new way break anything?
@@ -596,10 +600,11 @@ magic_get(SV *sv, MAGIC *mg)
paren = atoi(mg->mg_ptr);
getparen:
if (paren <= rx->nparens &&
- (s = rx->startp[paren]) &&
- (t = rx->endp[paren]))
+ (s1 = rx->startp[paren]) != -1 &&
+ (t1 = rx->endp[paren]) != -1)
{
- i = t - s;
+ i = t1 - s1;
+ s = rx->subbeg + s1;
getrx:
if (i >= 0) {
bool was_tainted;
@@ -607,7 +612,7 @@ magic_get(SV *sv, MAGIC *mg)
was_tainted = PL_tainted;
PL_tainted = FALSE;
}
- sv_setpvn(sv,s,i);
+ sv_setpvn(sv, s, i);
if (PL_tainting)
PL_tainted = (was_tainted || RX_MATCH_TAINTED(rx));
break;
@@ -626,8 +631,8 @@ magic_get(SV *sv, MAGIC *mg)
break;
case '`':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
- if ((s = rx->subbeg) && rx->startp[0]) {
- i = rx->startp[0] - s;
+ if ((s = rx->subbeg) && rx->startp[0] != -1) {
+ i = rx->startp[0];
goto getrx;
}
}
@@ -635,8 +640,9 @@ magic_get(SV *sv, MAGIC *mg)
break;
case '\'':
if (PL_curpm && (rx = PL_curpm->op_pmregexp)) {
- if (rx->subend && (s = rx->endp[0])) {
- i = rx->subend - s;
+ if (rx->subbeg && rx->endp[0] != -1) {
+ s = rx->subbeg + rx->endp[0];
+ i = rx->sublen - rx->endp[0];
goto getrx;
}
}
diff --git a/objXSUB.h b/objXSUB.h
index c29bc06315..658e5ce226 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -518,6 +518,10 @@
#define PL_reg_oldcurpm pPerl->PL_reg_oldcurpm
#undef PL_reg_oldpos
#define PL_reg_oldpos pPerl->PL_reg_oldpos
+#undef PL_reg_oldsaved
+#define PL_reg_oldsaved pPerl->PL_reg_oldsaved
+#undef PL_reg_oldsavedlen
+#define PL_reg_oldsavedlen pPerl->PL_reg_oldsavedlen
#undef PL_reg_re
#define PL_reg_re pPerl->PL_reg_re
#undef PL_reg_start_tmp
diff --git a/pp.c b/pp.c
index 42fd9b8de8..1b9ebdd542 100644
--- a/pp.c
+++ b/pp.c
@@ -5006,8 +5006,10 @@ PP(pp_split)
else if (rx->check_substr && !rx->nparens
&& (rx->reganch & ROPT_CHECK_ALL)
&& !(rx->reganch & ROPT_ANCH)) {
+ int tail = SvTAIL(rx->check_substr) != 0;
+
i = SvCUR(rx->check_substr);
- if (i == 1 && !SvTAIL(rx->check_substr)) {
+ if (i == 1 && !tail) {
i = *SvPVX(rx->check_substr);
while (--limit) {
/*SUPPRESS 530*/
@@ -5026,7 +5028,7 @@ PP(pp_split)
#ifndef lint
while (s < strend && --limit &&
(m=fbm_instr((unsigned char*)s, (unsigned char*)strend,
- rx->check_substr, 0)) )
+ rx->check_substr, PL_multiline ? FBMrf_MULTILINE : 0)) )
#endif
{
dstr = NEWSV(31, m-s);
@@ -5034,7 +5036,7 @@ PP(pp_split)
if (make_mortal)
sv_2mortal(dstr);
XPUSHs(dstr);
- s = m + i;
+ s = m + i - tail; /* Fake \n at the end */
}
}
}
@@ -5044,15 +5046,14 @@ PP(pp_split)
CALLREGEXEC(rx, s, strend, orig, 1, sv, NULL, 0))
{
TAINT_IF(RX_MATCH_TAINTED(rx));
- if (rx->subbase
- && rx->subbase != orig) {
+ if (RX_MATCH_COPIED(rx) && rx->subbeg != orig) {
m = s;
s = orig;
- orig = rx->subbase;
+ orig = rx->subbeg;
s = orig + (m - s);
strend = s + (strend - m);
}
- m = rx->startp[0];
+ m = rx->startp[0] + orig;
dstr = NEWSV(32, m-s);
sv_setpvn(dstr, s, m-s);
if (make_mortal)
@@ -5060,8 +5061,8 @@ PP(pp_split)
XPUSHs(dstr);
if (rx->nparens) {
for (i = 1; i <= rx->nparens; i++) {
- s = rx->startp[i];
- m = rx->endp[i];
+ s = rx->startp[i] + orig;
+ m = rx->endp[i] + orig;
if (m && s) {
dstr = NEWSV(33, m-s);
sv_setpvn(dstr, s, m-s);
@@ -5073,7 +5074,7 @@ PP(pp_split)
XPUSHs(dstr);
}
}
- s = rx->endp[0];
+ s = rx->endp[0] + orig;
}
}
diff --git a/pp_ctl.c b/pp_ctl.c
index 3e4db3b31f..a4c0247168 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -172,8 +172,8 @@ PP(pp_substcont)
if (cx->sb_once || !CALLREGEXEC(rx, s, cx->sb_strend, orig,
s == m, cx->sb_targ, NULL,
((cx->sb_rflags & REXEC_COPY_STR)
- ? REXEC_IGNOREPOS
- : (REXEC_COPY_STR|REXEC_IGNOREPOS))))
+ ? (REXEC_IGNOREPOS|REXEC_NOT_FIRST)
+ : (REXEC_COPY_STR|REXEC_IGNOREPOS|REXEC_NOT_FIRST))))
{
SV *targ = cx->sb_targ;
sv_catpvn(dstr, s, cx->sb_strend - s);
@@ -201,16 +201,16 @@ PP(pp_substcont)
RETURNOP(pm->op_next);
}
}
- if (rx->subbase && rx->subbase != orig) {
+ if (RX_MATCH_COPIED(rx) && rx->subbeg != orig) {
m = s;
s = orig;
- cx->sb_orig = orig = rx->subbase;
+ cx->sb_orig = orig = rx->subbeg;
s = orig + (m - s);
cx->sb_strend = s + (cx->sb_strend - m);
}
- cx->sb_m = m = rx->startp[0];
+ cx->sb_m = m = rx->startp[0] + orig;
sv_catpvn(dstr, s, m-s);
- cx->sb_s = rx->endp[0];
+ cx->sb_s = rx->endp[0] + orig;
cx->sb_rxtainted |= RX_MATCH_TAINTED(rx);
rxres_save(&cx->sb_rxres, rx);
RETURNOP(pm->op_pmreplstart);
@@ -231,13 +231,13 @@ rxres_save(void **rsp, REGEXP *rx)
*rsp = (void*)p;
}
- *p++ = (UV)rx->subbase;
- rx->subbase = Nullch;
+ *p++ = (UV)(RX_MATCH_COPIED(rx) ? rx->subbeg : Nullch);
+ RX_MATCH_COPIED_off(rx);
*p++ = rx->nparens;
*p++ = (UV)rx->subbeg;
- *p++ = (UV)rx->subend;
+ *p++ = (UV)rx->sublen;
for (i = 0; i <= rx->nparens; ++i) {
*p++ = (UV)rx->startp[i];
*p++ = (UV)rx->endp[i];
@@ -250,17 +250,18 @@ rxres_restore(void **rsp, REGEXP *rx)
UV *p = (UV*)*rsp;
U32 i;
- Safefree(rx->subbase);
- rx->subbase = (char*)(*p);
+ if (RX_MATCH_COPIED(rx))
+ Safefree(rx->subbeg);
+ RX_MATCH_COPIED_set(rx, *p);
*p++ = 0;
rx->nparens = *p++;
rx->subbeg = (char*)(*p++);
- rx->subend = (char*)(*p++);
+ rx->sublen = (I32)(*p++);
for (i = 0; i <= rx->nparens; ++i) {
- rx->startp[i] = (char*)(*p++);
- rx->endp[i] = (char*)(*p++);
+ rx->startp[i] = (I32)(*p++);
+ rx->endp[i] = (I32)(*p++);
}
}
diff --git a/pp_hot.c b/pp_hot.c
index 76e5e53845..599a2afe61 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -846,7 +846,9 @@ PP(pp_match)
char *strend;
I32 global;
I32 r_flags = 0;
- char *truebase;
+ char *truebase; /* Start of string, may be
+ relocated if REx engine
+ copies the string. */
register REGEXP *rx = pm->op_pmregexp;
bool rxtainted;
I32 gimme = GIMME;
@@ -888,15 +890,15 @@ PP(pp_match)
/* XXXX What part of this is needed with true \G-support? */
if (global = pm->op_pmflags & PMf_GLOBAL) {
- rx->startp[0] = 0;
+ rx->startp[0] = -1;
if (SvTYPE(TARG) >= SVt_PVMG && SvMAGIC(TARG)) {
MAGIC* mg = mg_find(TARG, 'g');
if (mg && mg->mg_len >= 0) {
if (!(rx->reganch & ROPT_GPOS_SEEN))
- rx->endp[0] = rx->startp[0] = s + mg->mg_len;
+ rx->endp[0] = rx->startp[0] = mg->mg_len;
else if (rx->reganch & ROPT_ANCH_GPOS) {
r_flags |= REXEC_IGNOREPOS;
- rx->endp[0] = rx->startp[0] = s + mg->mg_len;
+ rx->endp[0] = rx->startp[0] = mg->mg_len;
}
minmatch = (mg->mg_flags & MGf_MINMATCH);
update_minmatch = 0;
@@ -917,8 +919,8 @@ PP(pp_match)
}
play_it_again:
- if (global && rx->startp[0]) {
- t = s = rx->endp[0];
+ if (global && rx->startp[0] != -1) {
+ t = s = rx->endp[0] + truebase;
if ((s + rx->minlen) > strend)
goto nope;
if (update_minmatch++)
@@ -926,29 +928,33 @@ play_it_again:
}
if (rx->check_substr) {
if (!(rx->reganch & ROPT_NOSCAN)) { /* Floating checkstring. */
+ SV *c = rx->check_substr;
+
if (r_flags & REXEC_SCREAM) {
I32 p = -1;
char *b;
-
- if (PL_screamfirst[BmRARE(rx->check_substr)] < 0)
+
+ if (PL_screamfirst[BmRARE(c)] < 0
+ && !( BmRARE(c) == '\n' && (BmPREVIOUS(c) == SvCUR(c) - 1)
+ && SvTAIL(c) ))
goto nope;
b = (char*)HOP((U8*)s, rx->check_offset_min);
- if (!(s = screaminstr(TARG, rx->check_substr, b - s, 0, &p, 0)))
+ if (!(s = screaminstr(TARG, c, b - s, 0, &p, 0)))
goto nope;
if ((rx->reganch & ROPT_CHECK_ALL)
- && !PL_sawampersand && !SvTAIL(rx->check_substr))
+ && !PL_sawampersand && !SvTAIL(c))
goto yup;
}
else if (!(s = fbm_instr((unsigned char*)HOP((U8*)s, rx->check_offset_min),
- (unsigned char*)strend,
- rx->check_substr, 0)))
+ (unsigned char*)strend, c,
+ PL_multiline ? FBMrf_MULTILINE : 0)))
goto nope;
else if ((rx->reganch & ROPT_CHECK_ALL) && !PL_sawampersand)
goto yup;
if (s && rx->check_offset_max < s - t) {
- ++BmUSEFUL(rx->check_substr);
+ ++BmUSEFUL(c);
s = (char*)HOP((U8*)s, -rx->check_offset_max);
}
else
@@ -959,10 +965,30 @@ play_it_again:
else if (!PL_multiline) { /* Anchored near beginning of string. */
I32 slen;
char *b = (char*)HOP((U8*)s, rx->check_offset_min);
- if (*SvPVX(rx->check_substr) != *b
- || ((slen = SvCUR(rx->check_substr)) > 1
- && memNE(SvPVX(rx->check_substr), b, slen)))
- goto nope;
+
+ if (SvTAIL(rx->check_substr)) {
+ slen = SvCUR(rx->check_substr); /* >= 1 */
+
+ if ( strend - b > slen || strend - b < slen - 1 )
+ goto nope;
+ if ( strend - b == slen && strend[-1] != '\n')
+ goto nope;
+ /* Now should match b[0..slen-2] */
+ slen--;
+ if (slen && (*SvPVX(rx->check_substr) != *b
+ || (slen > 1
+ && memNE(SvPVX(rx->check_substr), b, slen))))
+ goto nope;
+ if ((rx->reganch & ROPT_CHECK_ALL) && !PL_sawampersand)
+ goto yup;
+ } else { /* Assume len > 0 */
+ if (*SvPVX(rx->check_substr) != *b
+ || ((slen = SvCUR(rx->check_substr)) > 1
+ && memNE(SvPVX(rx->check_substr), b, slen)))
+ goto nope;
+ if ((rx->reganch & ROPT_CHECK_ALL) && !PL_sawampersand)
+ goto yup;
+ }
}
if (!(rx->reganch & ROPT_NAUGHTY) && --BmUSEFUL(rx->check_substr) < 0
&& rx->check_substr == rx->float_substr) {
@@ -1000,17 +1026,17 @@ play_it_again:
for (i = !i; i <= iters; i++) {
PUSHs(sv_newmortal());
/*SUPPRESS 560*/
- if ((s = rx->startp[i]) && rx->endp[i] ) {
- len = rx->endp[i] - s;
+ if ((rx->startp[i] != -1) && rx->endp[i] != -1 ) {
+ len = rx->endp[i] - rx->startp[i];
+ s = rx->startp[i] + truebase;
sv_setpvn(*SP, s, len);
}
}
if (global) {
- truebase = rx->subbeg;
- strend = rx->subend;
- had_zerolen = (rx->startp[0] && rx->startp[0] == rx->endp[0]);
+ had_zerolen = (rx->startp[0] != -1
+ && rx->startp[0] == rx->endp[0]);
PUTBACK; /* EVAL blocks may use stack */
- r_flags |= REXEC_IGNOREPOS;
+ r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST;
goto play_it_again;
}
else if (!iters)
@@ -1027,8 +1053,8 @@ play_it_again:
sv_magic(TARG, (SV*)0, 'g', Nullch, 0);
mg = mg_find(TARG, 'g');
}
- if (rx->startp[0]) {
- mg->mg_len = rx->endp[0] - rx->subbeg;
+ if (rx->startp[0] != -1) {
+ mg->mg_len = rx->endp[0];
if (rx->startp[0] == rx->endp[0])
mg->mg_flags |= MGf_MINMATCH;
else
@@ -1047,23 +1073,29 @@ yup: /* Confirmed by check_substr */
PL_curpm = pm;
if (pm->op_pmflags & PMf_ONCE)
pm->op_pmdynflags |= PMdf_USED;
- Safefree(rx->subbase);
- rx->subbase = Nullch;
+ if (RX_MATCH_COPIED(rx))
+ Safefree(rx->subbeg);
+ RX_MATCH_COPIED_off(rx);
+ rx->subbeg = Nullch;
if (global) {
rx->subbeg = truebase;
- rx->subend = strend;
- rx->startp[0] = s;
- rx->endp[0] = s + SvCUR(rx->check_substr);
+ rx->startp[0] = s - truebase;
+ rx->endp[0] = s - truebase + SvCUR(rx->check_substr);
+ rx->sublen = strend - truebase;
goto gotcha;
- }
+ }
if (PL_sawampersand) {
- char *tmps;
+ I32 off;
- tmps = rx->subbase = savepvn(t, strend-t);
- rx->subbeg = tmps;
- rx->subend = tmps + (strend-t);
- tmps = rx->startp[0] = tmps + (s - t);
- rx->endp[0] = tmps + SvCUR(rx->check_substr);
+ rx->subbeg = savepvn(t, strend - t);
+ rx->sublen = strend - t;
+ RX_MATCH_COPIED_on(rx);
+ off = rx->startp[0] = s - t;
+ rx->endp[0] = off + SvCUR(rx->check_substr);
+ }
+ else { /* startp/endp are used by @- @+. */
+ rx->startp[0] = s - truebase;
+ rx->endp[0] = s - truebase + SvCUR(rx->check_substr);
}
LEAVE_SCOPE(oldsave);
RETPUSHYES;
@@ -1714,7 +1746,8 @@ PP(pp_subst)
}
else if (!(s = fbm_instr((unsigned char*)HOP((U8*)s, rx->check_offset_min),
(unsigned char*)strend,
- rx->check_substr, 0)))
+ rx->check_substr,
+ PL_multiline ? FBMrf_MULTILINE : 0)))
goto nope;
if (s && rx->check_offset_max < s - m) {
++BmUSEFUL(rx->check_substr);
@@ -1766,13 +1799,8 @@ PP(pp_subst)
SvSCREAM_off(TARG); /* disable possible screamer */
if (once) {
rxtainted |= RX_MATCH_TAINTED(rx);
- if (rx->subbase) {
- m = orig + (rx->startp[0] - rx->subbase);
- d = orig + (rx->endp[0] - rx->subbase);
- } else {
- m = rx->startp[0];
- d = rx->endp[0];
- }
+ m = orig + rx->startp[0];
+ d = orig + rx->endp[0];
s = orig;
if (m - s > strend - d) { /* faster to shorten from end */
if (clen) {
@@ -1815,7 +1843,7 @@ PP(pp_subst)
if (iters++ > maxiters)
DIE("Substitution loop");
rxtainted |= RX_MATCH_TAINTED(rx);
- m = rx->startp[0];
+ m = rx->startp[0] + orig;
/*SUPPRESS 560*/
if (i = m - s) {
if (s != d)
@@ -1826,9 +1854,9 @@ PP(pp_subst)
Copy(c, d, clen, char);
d += clen;
}
- s = rx->endp[0];
+ s = rx->endp[0] + orig;
} while (CALLREGEXEC(rx, s, strend, orig, s == m,
- Nullsv, NULL, 0)); /* don't match same null twice */
+ Nullsv, NULL, REXEC_NOT_FIRST)); /* don't match same null twice */
if (s != d) {
i = strend - s;
SvCUR_set(TARG, d - SvPVX(TARG) + i);
@@ -1866,21 +1894,21 @@ PP(pp_subst)
PUSHSUBST(cx);
RETURNOP(cPMOP->op_pmreplroot);
}
- r_flags |= REXEC_IGNOREPOS;
+ r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST;
do {
if (iters++ > maxiters)
DIE("Substitution loop");
rxtainted |= RX_MATCH_TAINTED(rx);
- if (rx->subbase && rx->subbase != orig) {
+ if (RX_MATCH_COPIED(rx) && rx->subbeg != orig) {
m = s;
s = orig;
- orig = rx->subbase;
+ orig = rx->subbeg;
s = orig + (m - s);
strend = s + (strend - m);
}
- m = rx->startp[0];
+ m = rx->startp[0] + orig;
sv_catpvn(dstr, s, m-s);
- s = rx->endp[0];
+ s = rx->endp[0] + orig;
if (clen)
sv_catpvn(dstr, c, clen);
if (once)
diff --git a/regcomp.c b/regcomp.c
index 34640b7b8e..a360f6abc4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -875,7 +875,8 @@ pregcomp(char *exp, char *xend, PMOP *pm)
r->refcnt = 1;
r->prelen = xend - exp;
r->precomp = PL_regprecomp;
- r->subbeg = r->subbase = NULL;
+ r->subbeg = NULL;
+ r->reganch = pm->op_pmflags & PMf_COMPILETIME;
r->nparens = PL_regnpar - 1; /* set early to validate backrefs */
r->substrs = 0; /* Useful during FAIL. */
@@ -898,7 +899,7 @@ pregcomp(char *exp, char *xend, PMOP *pm)
return(NULL);
/* Dig out information for optimizations. */
- r->reganch = pm->op_pmflags & PMf_COMPILETIME;
+ r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */
pm->op_pmflags = PL_regflags;
if (UTF)
r->reganch |= ROPT_UTF8;
@@ -998,6 +999,8 @@ pregcomp(char *exp, char *xend, PMOP *pm)
|| (data.flags & SF_FL_BEFORE_EOL
&& (!(data.flags & SF_FL_BEFORE_MEOL)
|| (PL_regflags & PMf_MULTILINE)))) {
+ int t;
+
if (SvCUR(data.longest_fixed) /* ok to leave SvCUR */
&& data.offset_fixed == data.offset_float_min
&& SvCUR(data.longest_fixed) == SvCUR(data.longest_float))
@@ -1006,12 +1009,10 @@ pregcomp(char *exp, char *xend, PMOP *pm)
r->float_substr = data.longest_float;
r->float_min_offset = data.offset_float_min;
r->float_max_offset = data.offset_float_max;
- fbm_compile(r->float_substr, 0);
- BmUSEFUL(r->float_substr) = 100;
- if (data.flags & SF_FL_BEFORE_EOL /* Cannot have SEOL and MULTI */
- && (!(data.flags & SF_FL_BEFORE_MEOL)
- || (PL_regflags & PMf_MULTILINE)))
- SvTAIL_on(r->float_substr);
+ t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */
+ && (!(data.flags & SF_FL_BEFORE_MEOL)
+ || (PL_regflags & PMf_MULTILINE)));
+ fbm_compile(r->float_substr, t ? FBMcf_TAIL : 0);
}
else {
remove_float:
@@ -1025,14 +1026,14 @@ pregcomp(char *exp, char *xend, PMOP *pm)
|| (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
&& (!(data.flags & SF_FIX_BEFORE_MEOL)
|| (PL_regflags & PMf_MULTILINE)))) {
+ int t;
+
r->anchored_substr = data.longest_fixed;
r->anchored_offset = data.offset_fixed;
- fbm_compile(r->anchored_substr, 0);
- BmUSEFUL(r->anchored_substr) = 100;
- if (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */
- && (!(data.flags & SF_FIX_BEFORE_MEOL)
- || (PL_regflags & PMf_MULTILINE)))
- SvTAIL_on(r->anchored_substr);
+ t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */
+ && (!(data.flags & SF_FIX_BEFORE_MEOL)
+ || (PL_regflags & PMf_MULTILINE)));
+ fbm_compile(r->anchored_substr, t ? FBMcf_TAIL : 0);
}
else {
r->anchored_substr = Nullsv;
@@ -1070,8 +1071,8 @@ pregcomp(char *exp, char *xend, PMOP *pm)
r->reganch |= ROPT_LOOKBEHIND_SEEN;
if (PL_regseen & REG_SEEN_EVAL)
r->reganch |= ROPT_EVAL_SEEN;
- Newz(1002, r->startp, PL_regnpar, char*);
- Newz(1002, r->endp, PL_regnpar, char*);
+ Newz(1002, r->startp, PL_regnpar, I32);
+ Newz(1002, r->endp, PL_regnpar, I32);
DEBUG_r(regdump(r));
return(r);
}
@@ -2946,8 +2947,8 @@ pregfree(struct regexp *r)
return;
if (r->precomp)
Safefree(r->precomp);
- if (r->subbase)
- Safefree(r->subbase);
+ if (RX_MATCH_COPIED(r))
+ Safefree(r->subbeg);
if (r->substrs) {
if (r->anchored_substr)
SvREFCNT_dec(r->anchored_substr);
diff --git a/regexec.c b/regexec.c
index 86317120b5..5806767d1e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -139,8 +139,8 @@ regcppush(I32 parenfloor)
SSCHECK(i + 5);
for (p = PL_regsize; p > parenfloor; p--) {
- SSPUSHPTR(PL_regendp[p]);
- SSPUSHPTR(PL_regstartp[p]);
+ SSPUSHINT(PL_regendp[p]);
+ SSPUSHINT(PL_regstartp[p]);
SSPUSHPTR(PL_reg_start_tmp[p]);
SSPUSHINT(p);
}
@@ -169,7 +169,7 @@ regcppop(void)
I32 i = SSPOPINT;
U32 paren = 0;
char *input;
- char *tmps;
+ I32 tmps;
assert(i == SAVEt_REGCONTEXT);
i = SSPOPINT;
input = (char *) SSPOPPTR;
@@ -178,16 +178,16 @@ regcppop(void)
for (i -= 3; i > 0; i -= 4) {
paren = (U32)SSPOPINT;
PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
- PL_regstartp[paren] = (char *) SSPOPPTR;
- tmps = (char*)SSPOPPTR;
+ PL_regstartp[paren] = SSPOPINT;
+ tmps = SSPOPINT;
if (paren <= *PL_reglastparen)
PL_regendp[paren] = tmps;
DEBUG_r(
PerlIO_printf(Perl_debug_log,
" restoring \\%d to %d(%d)..%d%s\n",
- paren, PL_regstartp[paren] - PL_regbol,
- PL_reg_start_tmp[paren] - PL_regbol,
- PL_regendp[paren] - PL_regbol,
+ paren, PL_regstartp[paren],
+ PL_reg_start_tmp[paren] - PL_bostr,
+ PL_regendp[paren],
(paren > *PL_reglastparen ? "(no)" : ""));
);
}
@@ -200,8 +200,8 @@ regcppop(void)
);
for (paren = *PL_reglastparen + 1; paren <= PL_regnpar; paren++) {
if (paren > PL_regsize)
- PL_regstartp[paren] = Nullch;
- PL_regendp[paren] = Nullch;
+ PL_regstartp[paren] = -1;
+ PL_regendp[paren] = -1;
}
return input;
}
@@ -266,7 +266,12 @@ STATIC void
restore_pos(void *arg)
{
dTHR;
- if (PL_reg_eval_set) {
+ if (PL_reg_eval_set) {
+ if (PL_reg_oldsaved) {
+ PL_reg_re->subbeg = PL_reg_oldsaved;
+ PL_reg_re->sublen = PL_reg_oldsavedlen;
+ RX_MATCH_COPIED_on(PL_reg_re);
+ }
PL_reg_magic->mg_len = PL_reg_oldpos;
PL_reg_eval_set = 0;
PL_curpm = PL_reg_oldcurpm;
@@ -363,9 +368,15 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
char *t;
start_shift = prog->check_offset_min; /* okay to underestimate on CC */
/* Should be nonnegative! */
- end_shift = minlen - start_shift - CHR_SVLEN(prog->check_substr);
+ end_shift = minlen - start_shift -
+ CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
if (flags & REXEC_SCREAM) {
- if (PL_screamfirst[BmRARE(prog->check_substr)] >= 0)
+ SV *c = prog->check_substr;
+
+ if (PL_screamfirst[BmRARE(c)] >= 0
+ || ( BmRARE(c) == '\n'
+ && (BmPREVIOUS(c) == SvCUR(c) - 1)
+ && SvTAIL(c) ))
s = screaminstr(sv, prog->check_substr,
start_shift + (stringarg - strbeg),
end_shift, &scream_pos, 0);
@@ -376,7 +387,7 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
else
s = fbm_instr((unsigned char*)s + start_shift,
(unsigned char*)strend - end_shift,
- prog->check_substr, 0);
+ prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
if (!s) {
++BmUSEFUL(prog->check_substr); /* hooray */
goto phooey; /* not present */
@@ -493,7 +504,8 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
I32 back_min =
prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
I32 delta = back_max - back_min;
- char *last = HOPc(strend, 0-(CHR_SVLEN(must) + back_min)); /* Cannot start after this */
+ char *last = HOPc(strend, /* Cannot start after this */
+ -(CHR_SVLEN(must) - (SvTAIL(must) != 0) + back_min));
char *last1; /* Last position checked before */
if (s > PL_bostr)
@@ -511,7 +523,8 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
end_shift, &scream_pos, 0))
: (s = fbm_instr((unsigned char*)HOP(s, back_min),
- (unsigned char*)strend, must, 0))) ) {
+ (unsigned char*)strend, must,
+ PL_multiline ? FBMrf_MULTILINE : 0))) ) {
if (HOPc(s, -back_max) > last1) {
last1 = HOPc(s, -back_min);
s = HOPc(s, -back_max);
@@ -943,17 +956,28 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
if (flags & REXEC_SCREAM) {
last = screaminstr(sv, prog->float_substr, s - strbeg,
end_shift, &scream_pos, 1); /* last one */
- if (!last) {
+ if (!last)
last = scream_olds; /* Only one occurence. */
- }
}
else {
STRLEN len;
char *little = SvPV(prog->float_substr, len);
- if (len)
- last = rninstr(s, strend, little, little + len);
- else
- last = strend; /* matching `$' */
+
+ if (SvTAIL(prog->float_substr)) {
+ if (memEQ(strend - len + 1, little, len - 1))
+ last = strend - len + 1;
+ else if (!PL_multiline)
+ last = memEQ(strend - len, little, len)
+ ? strend - len : Nullch;
+ else
+ goto find_last;
+ } else {
+ find_last:
+ if (len)
+ last = rninstr(s, strend, little, little + len);
+ else
+ last = strend; /* matching `$' */
+ }
}
if (last == NULL) goto phooey; /* Should not happen! */
dontbother = strend - last + prog->float_min_offset;
@@ -983,34 +1007,8 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
goto phooey;
got_it:
- prog->subbeg = strbeg;
- prog->subend = PL_regeol; /* strend may have been modified */
RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
- /* make sure $`, $&, $', and $digit will work later */
- if (strbeg != prog->subbase) { /* second+ //g match. */
- if (!(flags & REXEC_COPY_STR)) {
- if (prog->subbase) {
- Safefree(prog->subbase);
- prog->subbase = Nullch;
- }
- }
- else {
- I32 i = PL_regeol - startpos + (stringarg - strbeg);
- s = savepvn(strbeg, i);
- Safefree(prog->subbase);
- prog->subbase = s;
- prog->subbeg = prog->subbase;
- prog->subend = prog->subbase + i;
- s = prog->subbase + (stringarg - strbeg);
- for (i = 0; i <= prog->nparens; i++) {
- if (prog->endp[i]) {
- prog->startp[i] = s + (prog->startp[i] - startpos);
- prog->endp[i] = s + (prog->endp[i] - startpos);
- }
- }
- }
- }
if (PL_reg_eval_set) {
/* Preserve the current value of $^R */
if (oreplsv != GvSV(PL_replgv))
@@ -1019,6 +1017,26 @@ got_it:
the same. */
restore_pos(0);
}
+
+ /* make sure $`, $&, $', and $digit will work later */
+ if ( !(flags & REXEC_NOT_FIRST) ) {
+ if (RX_MATCH_COPIED(prog)) {
+ Safefree(prog->subbeg);
+ RX_MATCH_COPIED_off(prog);
+ }
+ if (flags & REXEC_COPY_STR) {
+ I32 i = PL_regeol - startpos + (stringarg - strbeg);
+
+ s = savepvn(strbeg, i);
+ prog->subbeg = s;
+ prog->sublen = i;
+ RX_MATCH_COPIED_on(prog);
+ }
+ else {
+ prog->subbeg = strbeg;
+ prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
+ }
+ }
return 1;
@@ -1036,8 +1054,8 @@ regtry(regexp *prog, char *startpos)
{
dTHR;
register I32 i;
- register char **sp;
- register char **ep;
+ register I32 *sp;
+ register I32 *ep;
CHECKPOINT lastcp;
if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) {
@@ -1080,10 +1098,20 @@ regtry(regexp *prog, char *startpos)
PL_reg_curpm->op_pmregexp = prog;
PL_reg_oldcurpm = PL_curpm;
PL_curpm = PL_reg_curpm;
+ if (RX_MATCH_COPIED(prog)) {
+ /* Here is a serious problem: we cannot rewrite subbeg,
+ since it may be needed if this match fails. Thus
+ $` inside (?{}) could fail... */
+ PL_reg_oldsaved = prog->subbeg;
+ PL_reg_oldsavedlen = prog->sublen;
+ RX_MATCH_COPIED_off(prog);
+ }
+ else
+ PL_reg_oldsaved = Nullch;
prog->subbeg = PL_bostr;
- prog->subend = PL_regeol; /* strend may have been modified */
+ prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
}
- prog->startp[0] = startpos;
+ prog->startp[0] = startpos - PL_bostr;
PL_reginput = startpos;
PL_regstartp = prog->startp;
PL_regendp = prog->endp;
@@ -1106,13 +1134,13 @@ regtry(regexp *prog, char *startpos)
ep = prog->endp;
if (prog->nparens) {
for (i = prog->nparens; i >= 1; i--) {
- *++sp = NULL;
- *++ep = NULL;
+ *++sp = -1;
+ *++ep = -1;
}
}
REGCP_SET;
if (regmatch(prog->program + 1)) {
- prog->endp[0] = PL_reginput;
+ prog->endp[0] = PL_reginput - PL_bostr;
return 1;
}
REGCP_UNWIND;
@@ -1590,15 +1618,16 @@ regmatch(regnode *prog)
case REF:
case REFF:
n = ARG(scan); /* which paren pair */
- s = PL_regstartp[n];
- if (*PL_reglastparen < n || !s)
+ ln = PL_regstartp[n];
+ if (*PL_reglastparen < n || ln == -1)
sayNO; /* Do not match unless seen CLOSEn. */
- if (s == PL_regendp[n])
+ if (ln == PL_regendp[n])
break;
+ s = PL_bostr + ln;
if (UTF && OP(scan) != REF) { /* REF can do byte comparison */
char *l = locinput;
- char *e = PL_regendp[n];
+ char *e = PL_bostr + PL_regendp[n];
/*
* Note that we can't do the "other character" lookup trick as
* in the 8-bit case (no pun intended) because in Unicode we
@@ -1635,7 +1664,7 @@ regmatch(regnode *prog)
(UCHARAT(s) != ((OP(scan) == REFF
? PL_fold : PL_fold_locale)[nextchr]))))
sayNO;
- ln = PL_regendp[n] - s;
+ ln = PL_regendp[n] - ln;
if (locinput + ln > PL_regeol)
sayNO;
if (ln > 1 && (OP(scan) == REF
@@ -1665,8 +1694,7 @@ regmatch(regnode *prog)
PL_op = (OP_4tree*)PL_regdata->data[n];
DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", PL_op) );
PL_curpad = AvARRAY((AV*)PL_regdata->data[n + 2]);
- PL_reg_magic->mg_len = locinput - PL_bostr;
- PL_regendp[0] = locinput;
+ PL_regendp[0] = PL_reg_magic->mg_len = locinput - PL_bostr;
CALLRUNOPS(); /* Scalar context. */
SPAGAIN;
@@ -1769,14 +1797,14 @@ regmatch(regnode *prog)
break;
case CLOSE:
n = ARG(scan); /* which paren pair */
- PL_regstartp[n] = PL_reg_start_tmp[n];
- PL_regendp[n] = locinput;
+ PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
+ PL_regendp[n] = locinput - PL_bostr;
if (n > *PL_reglastparen)
*PL_reglastparen = n;
break;
case GROUPP:
n = ARG(scan); /* which paren pair */
- sw = (*PL_reglastparen >= n && PL_regendp[n] != NULL);
+ sw = (*PL_reglastparen >= n && PL_regendp[n] != -1);
break;
case IFTHEN:
if (sw)
@@ -1999,7 +2027,7 @@ regmatch(regnode *prog)
sayYES;
REGCP_UNWIND;
for (n = *PL_reglastparen; n > lastparen; n--)
- PL_regendp[n] = 0;
+ PL_regendp[n] = -1;
*PL_reglastparen = n;
scan = next;
/*SUPPRESS 560*/
@@ -2073,11 +2101,12 @@ regmatch(regnode *prog)
{
if (paren) {
if (n) {
- PL_regstartp[paren] = HOPc(PL_reginput, -l);
- PL_regendp[paren] = PL_reginput;
+ PL_regstartp[paren] =
+ HOPc(PL_reginput, -l) - PL_bostr;
+ PL_regendp[paren] = PL_reginput - PL_bostr;
}
else
- PL_regendp[paren] = NULL;
+ PL_regendp[paren] = -1;
}
if (regmatch(next))
sayYES;
@@ -2134,11 +2163,11 @@ regmatch(regnode *prog)
);
if (paren) {
if (n) {
- PL_regstartp[paren] = HOPc(PL_reginput, -l);
- PL_regendp[paren] = PL_reginput;
+ PL_regstartp[paren] = HOPc(PL_reginput, -l) - PL_bostr;
+ PL_regendp[paren] = PL_reginput - PL_bostr;
}
else
- PL_regendp[paren] = NULL;
+ PL_regendp[paren] = -1;
}
if (regmatch(next))
sayYES;
@@ -2233,11 +2262,11 @@ regmatch(regnode *prog)
/* PL_reginput == locinput now */
if (paren) {
if (ln) {
- PL_regstartp[paren] = HOPc(locinput, -1);
- PL_regendp[paren] = locinput;
+ PL_regstartp[paren] = HOPc(locinput, -1) - PL_bostr;
+ PL_regendp[paren] = locinput - PL_bostr;
}
else
- PL_regendp[paren] = NULL;
+ PL_regendp[paren] = -1;
}
if (regmatch(next))
sayYES;
@@ -2256,11 +2285,11 @@ regmatch(regnode *prog)
{
if (paren) {
if (n) {
- PL_regstartp[paren] = HOPc(PL_reginput, -1);
- PL_regendp[paren] = PL_reginput;
+ PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
+ PL_regendp[paren] = PL_reginput - PL_bostr;
}
else
- PL_regendp[paren] = NULL;
+ PL_regendp[paren] = -1;
}
if (regmatch(next))
sayYES;
@@ -2293,11 +2322,11 @@ regmatch(regnode *prog)
{
if (paren && n) {
if (n) {
- PL_regstartp[paren] = HOPc(PL_reginput, -1);
- PL_regendp[paren] = PL_reginput;
+ PL_regstartp[paren] = HOPc(PL_reginput, -1) - PL_bostr;
+ PL_regendp[paren] = PL_reginput - PL_bostr;
}
else
- PL_regendp[paren] = NULL;
+ PL_regendp[paren] = -1;
}
if (regmatch(next))
sayYES;
diff --git a/regexp.h b/regexp.h
index b1170f1e06..9da5bd47e0 100644
--- a/regexp.h
+++ b/regexp.h
@@ -34,20 +34,9 @@ struct reg_substr_data {
};
typedef struct regexp {
- I32 refcnt;
- char **startp;
- char **endp;
+ I32 *startp;
+ I32 *endp;
regnode *regstclass;
- I32 minlen; /* mininum possible length of $& */
- I32 prelen; /* length of precomp */
- U32 nparens; /* number of parentheses */
- U32 lastparen; /* last paren matched */
- char *precomp; /* pre-compilation regular expression */
- char *subbase; /* saved string so \digit works forever */
- char *subbeg; /* same, but not responsible for allocation */
- char *subend; /* end of subbase */
- U32 reganch; /* Internal use only +
- Tainted information used by regexec? */
#if 0
SV *anchored_substr; /* Substring at fixed position wrt start. */
I32 anchored_offset; /* Position of it. */
@@ -60,7 +49,18 @@ typedef struct regexp {
#else
struct reg_substr_data *substrs;
#endif
+ char *precomp; /* pre-compilation regular expression */
struct reg_data *data; /* Additional data. */
+ char *subbeg; /* saved or original string
+ so \digit works forever. */
+ I32 sublen; /* Length of string pointed by subbeg */
+ I32 refcnt;
+ I32 minlen; /* mininum possible length of $& */
+ I32 prelen; /* length of precomp */
+ U32 nparens; /* number of parentheses */
+ U32 lastparen; /* last paren matched */
+ U32 reganch; /* Internal use only +
+ Tainted information used by regexec? */
regnode program[1]; /* Unwarranted chumminess with compiler. */
} regexp;
@@ -92,6 +92,7 @@ typedef struct regexp {
#define ROPT_UTF8 0x10000
#define ROPT_NAUGHTY 0x20000 /* how exponential is this pattern? */
+#define ROPT_COPY_DONE 0x40000 /* subbeg is a copy of the string */
#define RX_MATCH_TAINTED(prog) ((prog)->reganch & ROPT_TAINTED_SEEN)
#define RX_MATCH_TAINTED_on(prog) ((prog)->reganch |= ROPT_TAINTED_SEEN)
@@ -100,10 +101,25 @@ typedef struct regexp {
? RX_MATCH_TAINTED_on(prog) \
: RX_MATCH_TAINTED_off(prog))
+#define RX_MATCH_COPIED(prog) ((prog)->reganch & ROPT_COPY_DONE)
+#define RX_MATCH_COPIED_on(prog) ((prog)->reganch |= ROPT_COPY_DONE)
+#define RX_MATCH_COPIED_off(prog) ((prog)->reganch &= ~ROPT_COPY_DONE)
+#define RX_MATCH_COPIED_set(prog,t) ((t) \
+ ? RX_MATCH_COPIED_on(prog) \
+ : RX_MATCH_COPIED_off(prog))
+
#define REXEC_COPY_STR 1 /* Need to copy the string. */
#define REXEC_CHECKED 2 /* check_substr already checked. */
#define REXEC_SCREAM 4 /* use scream table. */
#define REXEC_IGNOREPOS 8 /* \G matches at start. */
+#define REXEC_NOT_FIRST 0x10 /* This is another iteration of //g. */
#define ReREFCNT_inc(re) ((re && re->refcnt++), re)
#define ReREFCNT_dec(re) pregfree(re)
+
+#define FBMcf_TAIL_DOLLAR 1
+#define FBMcf_TAIL_Z 2
+#define FBMcf_TAIL_z 4
+#define FBMcf_TAIL (FBMcf_TAIL_DOLLAR|FBMcf_TAIL_Z|FBMcf_TAIL_z)
+
+#define FBMrf_MULTILINE 1
diff --git a/t/op/pat.t b/t/op/pat.t
index b6a3a3a240..a086c12eaf 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -4,7 +4,7 @@
# the format supported by op/regexp.t. If you want to add a test
# that does fit that format, add it to op/re_tests, not here.
-print "1..186\n";
+print "1..188\n";
BEGIN {
chdir 't' if -d 't';
@@ -858,3 +858,17 @@ $test++;
print "$1\n";
$test++;
+# See if $i work inside (?{}) in the presense of saved substrings and
+# changing $_
+@a = qw(foo bar);
+@b = ();
+s/(\w)(?{push @b, $1})/,$1,/g for @a;
+
+print "# \@b='@b', expect 'f o o b a r'\nnot " unless("@b" eq "f o o b a r");
+print "ok $test\n";
+$test++;
+
+print "not " unless("@a" eq ",f,,o,,o, ,b,,a,,r,");
+print "ok $test\n";
+$test++;
+
diff --git a/t/op/re_tests b/t/op/re_tests
index ba824aeefa..466fc856c9 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -482,11 +482,204 @@ $(?<=^(a)) a y $1 a
((?>[^()]+)|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x
(?<=x+)y - c - /(?<=x+)y/: variable length lookbehind not implemented
a{37,17} - c - /a{37,17}/: Can't do {n,m} with n > m
+\Z a\nb\n y $-[0] 3
+\z a\nb\n y $-[0] 4
+$ a\nb\n y $-[0] 3
+\Z b\na\n y $-[0] 3
+\z b\na\n y $-[0] 4
+$ b\na\n y $-[0] 3
+\Z b\na y $-[0] 3
+\z b\na y $-[0] 3
+$ b\na y $-[0] 3
+'\Z'm a\nb\n y $-[0] 3
+'\z'm a\nb\n y $-[0] 4
+'$'m a\nb\n y $-[0] 1
+'\Z'm b\na\n y $-[0] 3
+'\z'm b\na\n y $-[0] 4
+'$'m b\na\n y $-[0] 1
+'\Z'm b\na y $-[0] 3
+'\z'm b\na y $-[0] 3
+'$'m b\na y $-[0] 1
a\Z a\nb\n n - -
-b\Z a\nb\n y - -
-b\z a\nb\n n - -
-b\Z a\nb y - -
-b\z a\nb y - -
+a\z a\nb\n n - -
+a$ a\nb\n n - -
+a\Z b\na\n y $-[0] 2
+a\z b\na\n n - -
+a$ b\na\n y $-[0] 2
+a\Z b\na y $-[0] 2
+a\z b\na y $-[0] 2
+a$ b\na y $-[0] 2
+'a\Z'm a\nb\n bn - -
+'a\z'm a\nb\n n - -
+'a$'m a\nb\n y $-[0] 0
+'a\Z'm b\na\n y $-[0] 2
+'a\z'm b\na\n n - -
+'a$'m b\na\n y $-[0] 2
+'a\Z'm b\na y $-[0] 2
+'a\z'm b\na y $-[0] 2
+'a$'m b\na y $-[0] 2
+aa\Z aa\nb\n n - -
+aa\z aa\nb\n n - -
+aa$ aa\nb\n n - -
+aa\Z b\naa\n y $-[0] 2
+aa\z b\naa\n n - -
+aa$ b\naa\n y $-[0] 2
+aa\Z b\naa y $-[0] 2
+aa\z b\naa y $-[0] 2
+aa$ b\naa y $-[0] 2
+'aa\Z'm aa\nb\n bn - -
+'aa\z'm aa\nb\n n - -
+'aa$'m aa\nb\n y $-[0] 0
+'aa\Z'm b\naa\n y $-[0] 2
+'aa\z'm b\naa\n n - -
+'aa$'m b\naa\n y $-[0] 2
+'aa\Z'm b\naa y $-[0] 2
+'aa\z'm b\naa y $-[0] 2
+'aa$'m b\naa y $-[0] 2
+aa\Z ac\nb\n n - -
+aa\z ac\nb\n n - -
+aa$ ac\nb\n n - -
+aa\Z b\nac\n n - -
+aa\z b\nac\n n - -
+aa$ b\nac\n n - -
+aa\Z b\nac n - -
+aa\z b\nac n - -
+aa$ b\nac n - -
+'aa\Z'm ac\nb\n n - -
+'aa\z'm ac\nb\n n - -
+'aa$'m ac\nb\n n - -
+'aa\Z'm b\nac\n n - -
+'aa\z'm b\nac\n n - -
+'aa$'m b\nac\n n - -
+'aa\Z'm b\nac n - -
+'aa\z'm b\nac n - -
+'aa$'m b\nac n - -
+aa\Z ca\nb\n n - -
+aa\z ca\nb\n n - -
+aa$ ca\nb\n n - -
+aa\Z b\nca\n n - -
+aa\z b\nca\n n - -
+aa$ b\nca\n n - -
+aa\Z b\nca n - -
+aa\z b\nca n - -
+aa$ b\nca n - -
+'aa\Z'm ca\nb\n n - -
+'aa\z'm ca\nb\n n - -
+'aa$'m ca\nb\n n - -
+'aa\Z'm b\nca\n n - -
+'aa\z'm b\nca\n n - -
+'aa$'m b\nca\n n - -
+'aa\Z'm b\nca n - -
+'aa\z'm b\nca n - -
+'aa$'m b\nca n - -
+ab\Z ab\nb\n n - -
+ab\z ab\nb\n n - -
+ab$ ab\nb\n n - -
+ab\Z b\nab\n y $-[0] 2
+ab\z b\nab\n n - -
+ab$ b\nab\n y $-[0] 2
+ab\Z b\nab y $-[0] 2
+ab\z b\nab y $-[0] 2
+ab$ b\nab y $-[0] 2
+'ab\Z'm ab\nb\n bn - -
+'ab\z'm ab\nb\n n - -
+'ab$'m ab\nb\n y $-[0] 0
+'ab\Z'm b\nab\n y $-[0] 2
+'ab\z'm b\nab\n n - -
+'ab$'m b\nab\n y $-[0] 2
+'ab\Z'm b\nab y $-[0] 2
+'ab\z'm b\nab y $-[0] 2
+'ab$'m b\nab y $-[0] 2
+ab\Z ac\nb\n n - -
+ab\z ac\nb\n n - -
+ab$ ac\nb\n n - -
+ab\Z b\nac\n n - -
+ab\z b\nac\n n - -
+ab$ b\nac\n n - -
+ab\Z b\nac n - -
+ab\z b\nac n - -
+ab$ b\nac n - -
+'ab\Z'm ac\nb\n n - -
+'ab\z'm ac\nb\n n - -
+'ab$'m ac\nb\n n - -
+'ab\Z'm b\nac\n n - -
+'ab\z'm b\nac\n n - -
+'ab$'m b\nac\n n - -
+'ab\Z'm b\nac n - -
+'ab\z'm b\nac n - -
+'ab$'m b\nac n - -
+ab\Z ca\nb\n n - -
+ab\z ca\nb\n n - -
+ab$ ca\nb\n n - -
+ab\Z b\nca\n n - -
+ab\z b\nca\n n - -
+ab$ b\nca\n n - -
+ab\Z b\nca n - -
+ab\z b\nca n - -
+ab$ b\nca n - -
+'ab\Z'm ca\nb\n n - -
+'ab\z'm ca\nb\n n - -
+'ab$'m ca\nb\n n - -
+'ab\Z'm b\nca\n n - -
+'ab\z'm b\nca\n n - -
+'ab$'m b\nca\n n - -
+'ab\Z'm b\nca n - -
+'ab\z'm b\nca n - -
+'ab$'m b\nca n - -
+abb\Z abb\nb\n n - -
+abb\z abb\nb\n n - -
+abb$ abb\nb\n n - -
+abb\Z b\nabb\n y $-[0] 2
+abb\z b\nabb\n n - -
+abb$ b\nabb\n y $-[0] 2
+abb\Z b\nabb y $-[0] 2
+abb\z b\nabb y $-[0] 2
+abb$ b\nabb y $-[0] 2
+'abb\Z'm abb\nb\n bn - -
+'abb\z'm abb\nb\n n - -
+'abb$'m abb\nb\n y $-[0] 0
+'abb\Z'm b\nabb\n y $-[0] 2
+'abb\z'm b\nabb\n n - -
+'abb$'m b\nabb\n y $-[0] 2
+'abb\Z'm b\nabb y $-[0] 2
+'abb\z'm b\nabb y $-[0] 2
+'abb$'m b\nabb y $-[0] 2
+abb\Z ac\nb\n n - -
+abb\z ac\nb\n n - -
+abb$ ac\nb\n n - -
+abb\Z b\nac\n n - -
+abb\z b\nac\n n - -
+abb$ b\nac\n n - -
+abb\Z b\nac n - -
+abb\z b\nac n - -
+abb$ b\nac n - -
+'abb\Z'm ac\nb\n n - -
+'abb\z'm ac\nb\n n - -
+'abb$'m ac\nb\n n - -
+'abb\Z'm b\nac\n n - -
+'abb\z'm b\nac\n n - -
+'abb$'m b\nac\n n - -
+'abb\Z'm b\nac n - -
+'abb\z'm b\nac n - -
+'abb$'m b\nac n - -
+abb\Z ca\nb\n n - -
+abb\z ca\nb\n n - -
+abb$ ca\nb\n n - -
+abb\Z b\nca\n n - -
+abb\z b\nca\n n - -
+abb$ b\nca\n n - -
+abb\Z b\nca n - -
+abb\z b\nca n - -
+abb$ b\nca n - -
+'abb\Z'm ca\nb\n n - -
+'abb\z'm ca\nb\n n - -
+'abb$'m ca\nb\n n - -
+'abb\Z'm b\nca\n n - -
+'abb\z'm b\nca\n n - -
+'abb$'m b\nca\n n - -
+'abb\Z'm b\nca n - -
+'abb\z'm b\nca n - -
+'abb$'m b\nca n - -
(^|x)(c) ca y $2 c
a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz x n - -
a(?{$a=2;$b=3;($b)=$a})b yabz y $b 2
diff --git a/t/op/regexp.t b/t/op/regexp.t
index 98d998d9e5..66b2d1c116 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -16,6 +16,8 @@ $ENV{PERL_DESTRUCT_LEVEL} = 0 unless $ENV{PERL_DESTRUCT_LEVEL} > 3;
# y expect a match
# n expect no match
# c expect an error
+# B test exposes a known bug in Perl, should be skipped
+# b test exposes a known bug in Perl, should be skipped if noamp
#
# Columns 4 and 5 are used only if column 3 contains C<y> or C<c>.
#
@@ -62,7 +64,9 @@ while (<TESTS>) {
$subject =~ s/\\n/\n/g;
$expect =~ s/\\n/\n/g;
$expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/;
- for $study ("", "study \$subject") {
+ $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//));
+ $result =~ s/B//i unless $skip;
+ for $study ('', 'study \$subject') {
$c = $iters;
eval "$study; \$match = (\$subject =~ m$pat) while \$c--; \$got = \"$repl\";";
chomp( $err = $@ );
@@ -70,6 +74,9 @@ while (<TESTS>) {
if ($err !~ m!^\Q$expect!) { print "not ok $. (compile) $input => `$err'\n"; next TEST }
last; # no need to study a syntax error
}
+ elsif ( $skip ) {
+ print "ok $. # Skipped: not fixed yet\n"; next TEST;
+ }
elsif ($@) {
print "not ok $. $input => error `$err'\n"; next TEST;
}
diff --git a/thrdvar.h b/thrdvar.h
index 7fae131b64..dcaaccbce5 100644
--- a/thrdvar.h
+++ b/thrdvar.h
@@ -142,8 +142,8 @@ PERLVAR(Tcolors[6], char *) /* from regcomp.c */
PERLVAR(Treginput, char *) /* String-input pointer. */
PERLVAR(Tregbol, char *) /* Beginning of input, for ^ check. */
PERLVAR(Tregeol, char *) /* End of input, for $ check. */
-PERLVAR(Tregstartp, char **) /* Pointer to startp array. */
-PERLVAR(Tregendp, char **) /* Ditto for endp. */
+PERLVAR(Tregstartp, I32 *) /* Pointer to startp array. */
+PERLVAR(Tregendp, I32 *) /* Ditto for endp. */
PERLVAR(Treglastparen, U32 *) /* Similarly for lastparen. */
PERLVAR(Tregtill, char *) /* How far we are required to go. */
PERLVAR(Tregprev, char) /* char before regbol, \n if none */
@@ -166,6 +166,8 @@ PERLVAR(Treg_magic, MAGIC *) /* pos-magic of what we match */
PERLVAR(Treg_oldpos, I32) /* old pos of what we match */
PERLVARI(Treg_oldcurpm, PMOP*, NULL) /* curpm before match */
PERLVARI(Treg_curpm, PMOP*, NULL) /* curpm during match */
+PERLVAR(Treg_oldsaved, char*) /* old saved substr during match */
+PERLVAR(Treg_oldsavedlen, STRLEN) /* old length of saved substr during match */
PERLVARI(Tregcompp, regcomp_t, FUNC_NAME_TO_PTR(pregcomp))
/* Pointer to RE compiler */
diff --git a/util.c b/util.c
index 67c030b056..0c2b0523ca 100644
--- a/util.c
+++ b/util.c
@@ -889,6 +889,14 @@ mem_collxfrm(const char *s, STRLEN len, STRLEN *xlen)
#endif /* USE_LOCALE_COLLATE */
+#define FBM_TABLE_OFFSET 2 /* Number of bytes between EOS and table*/
+
+/* As a space optimization, we do not compile tables for strings of length
+ 0 and 1, and for strings of length 2 unless FBMcf_TAIL. These are
+ special-cased in fbm_instr().
+
+ If FBMcf_TAIL, the table is created as if the string has a trailing \n. */
+
void
fbm_compile(SV *sv, U32 flags /* not used yet */)
{
@@ -899,24 +907,32 @@ fbm_compile(SV *sv, U32 flags /* not used yet */)
I32 rarest = 0;
U32 frequency = 256;
+ if (flags & FBMcf_TAIL)
+ sv_catpvn(sv, "\n", 1); /* Taken into account in fbm_instr() */
s = (U8*)SvPV_force(sv, len);
(void)SvUPGRADE(sv, SVt_PVBM);
- if (len > 255 || len == 0) /* TAIL might be on on a zero-length string. */
- return; /* can't have offsets that big */
+ if (len == 0) /* TAIL might be on on a zero-length string. */
+ return;
if (len > 2) {
- Sv_Grow(sv,len + 258);
- table = (unsigned char*)(SvPVX(sv) + len + 1);
- s = table - 2;
+ I32 mlen = len;
+ unsigned char *sb;
+
+ if (mlen > 255)
+ mlen = 255;
+ Sv_Grow(sv,len + 256 + FBM_TABLE_OFFSET);
+ table = (unsigned char*)(SvPVX(sv) + len + FBM_TABLE_OFFSET);
+ s = table - 1 - FBM_TABLE_OFFSET; /* Last char */
for (i = 0; i < 256; i++) {
- table[i] = len;
+ table[i] = mlen;
}
+ table[-1] = flags; /* Not used yet */
i = 0;
- while (s >= (unsigned char*)(SvPVX(sv)))
- {
- if (table[*s] == len)
- table[*s] = i;
- s--,i++;
- }
+ sb = s - mlen;
+ while (s >= sb) {
+ if (table[*s] == mlen)
+ table[*s] = i;
+ s--, i++;
+ }
}
sv_magic(sv, Nullsv, 'B', Nullch, 0); /* deep magic */
SvVALID_on(sv);
@@ -930,119 +946,200 @@ fbm_compile(SV *sv, U32 flags /* not used yet */)
}
BmRARE(sv) = s[rarest];
BmPREVIOUS(sv) = rarest;
+ BmUSEFUL(sv) = 100; /* Initial value */
+ if (flags & FBMcf_TAIL)
+ SvTAIL_on(sv);
DEBUG_r(PerlIO_printf(Perl_debug_log, "rarest char %c at %d\n",BmRARE(sv),BmPREVIOUS(sv)));
}
+/* If SvTAIL(littlestr), it has a fake '\n' at end. */
+/* If SvTAIL is actually due to \Z or \z, this gives false positives
+ if multiline */
+
char *
fbm_instr(unsigned char *big, register unsigned char *bigend, SV *littlestr, U32 flags)
{
register unsigned char *s;
- register I32 tmp;
- register I32 littlelen;
- register unsigned char *little;
- register unsigned char *table;
- register unsigned char *olds;
- register unsigned char *oldlittle;
+ STRLEN l;
+ register unsigned char *little = (unsigned char *)SvPV(littlestr,l);
+ register STRLEN littlelen = l;
+ register I32 multiline = flags & FBMrf_MULTILINE;
+
+ if (bigend - big < littlelen) {
+ check_tail:
+ if ( SvTAIL(littlestr)
+ && (bigend - big == littlelen - 1)
+ && (littlelen == 1
+ || *big == *little && memEQ(big, little, littlelen - 1)))
+ return (char*)big;
+ return Nullch;
+ }
- if (SvTYPE(littlestr) != SVt_PVBM || !SvVALID(littlestr)) {
- STRLEN len;
- char *l = SvPV(littlestr,len);
- if (!len) {
- if (SvTAIL(littlestr)) { /* Can be only 0-len constant
- substr => we can ignore SvVALID */
- if (PL_multiline) {
- char *t = "\n";
- if ((s = (unsigned char*)ninstr((char*)big, (char*)bigend,
- t, t + len))) {
- return (char*)s;
+ if (littlelen <= 2) { /* Special-cased */
+ register char c;
+
+ if (littlelen == 1) {
+ if (SvTAIL(littlestr) && !multiline) { /* Anchor only! */
+ /* Know that bigend != big. */
+ if (bigend[-1] == '\n')
+ return (char *)(bigend - 1);
+ return (char *) bigend;
+ }
+ s = big;
+ while (s < bigend) {
+ if (*s == *little)
+ return (char *)s;
+ s++;
+ }
+ if (SvTAIL(littlestr))
+ return (char *) bigend;
+ return Nullch;
+ }
+ if (!littlelen)
+ return (char*)big; /* Cannot be SvTAIL! */
+
+ /* littlelen is 2 */
+ if (SvTAIL(littlestr) && !multiline) {
+ if (bigend[-1] == '\n' && bigend[-2] == *little)
+ return (char*)bigend - 2;
+ if (bigend[-1] == *little)
+ return (char*)bigend - 1;
+ return Nullch;
+ }
+ {
+ /* This should be better than FBM if c1 == c2, and almost
+ as good otherwise: maybe better since we do less indirection.
+ And we save a lot of memory by caching no table. */
+ register unsigned char c1 = little[0];
+ register unsigned char c2 = little[1];
+
+ s = big + 1;
+ bigend--;
+ if (c1 != c2) {
+ while (s <= bigend) {
+ if (s[0] == c2) {
+ if (s[-1] == c1)
+ return (char*)s - 1;
+ s += 2;
+ continue;
+ }
+ next_chars:
+ if (s[0] == c1) {
+ if (s == bigend)
+ goto check_1char_anchor;
+ if (s[1] == c2)
+ return (char*)s;
+ else {
+ s++;
+ goto next_chars;
+ }
}
+ else
+ s += 2;
+ }
+ goto check_1char_anchor;
+ }
+ /* Now c1 == c2 */
+ while (s <= bigend) {
+ if (s[0] == c1) {
+ if (s[-1] == c1)
+ return (char*)s - 1;
+ if (s == bigend)
+ goto check_1char_anchor;
+ if (s[1] == c1)
+ return (char*)s;
+ s += 3;
}
- if (bigend > big && bigend[-1] == '\n')
- return (char *)(bigend - 1);
else
- return (char *) bigend;
+ s += 2;
}
- return (char*)big;
}
- return ninstr((char*)big,(char*)bigend, l, l + len);
+ check_1char_anchor: /* One char and anchor! */
+ if (SvTAIL(littlestr) && (*bigend == *little))
+ return (char *)bigend; /* bigend is already decremented. */
+ return Nullch;
}
-
- littlelen = SvCUR(littlestr);
- if (SvTAIL(littlestr) && !PL_multiline) { /* tail anchored? */
- if (littlelen > bigend - big)
- return Nullch;
- little = (unsigned char*)SvPVX(littlestr);
+ if (SvTAIL(littlestr) && !multiline) { /* tail anchored? */
s = bigend - littlelen;
- if (s > big
+ if (s >= big
&& bigend[-1] == '\n'
- && s[-1] == *little && memEQ((char*)s - 1,(char*)little,littlelen))
- return (char*)s - 1; /* how sweet it is */
- else if (*s == *little && memEQ((char*)s,(char*)little,littlelen))
+ && *s == *little
+ /* Automatically of length > 2 */
+ && memEQ((char*)s + 1, (char*)little + 1, littlelen - 2))
return (char*)s; /* how sweet it is */
+ if (s[1] == *little && memEQ((char*)s + 2,(char*)little + 1,
+ littlelen - 2))
+ return (char*)s + 1; /* how sweet it is */
return Nullch;
}
- if (littlelen <= 2) {
- unsigned char c1 = (unsigned char)SvPVX(littlestr)[0];
- unsigned char c2 = (unsigned char)SvPVX(littlestr)[1];
- /* This may do extra comparisons if littlelen == 2, but this
- should be hidden in the noise since we do less indirection. */
-
- s = big;
- bigend -= littlelen;
- while (s <= bigend) {
- if (s[0] == c1
- && (littlelen == 1 || s[1] == c2)
- && (!SvTAIL(littlestr)
- || s == bigend
- || s[littlelen] == '\n')) /* Automatically multiline */
- {
+ if (SvTYPE(littlestr) != SVt_PVBM || !SvVALID(littlestr)) {
+ char *b = ninstr((char*)big,(char*)bigend,
+ (char*)little, (char*)little + littlelen);
+
+ if (!b && SvTAIL(littlestr)) { /* Automatically multiline! */
+ /* Chop \n from littlestr: */
+ s = bigend - littlelen + 1;
+ if (*s == *little && memEQ((char*)s + 1, (char*)little + 1,
+ littlelen - 2))
return (char*)s;
- }
- s++;
+ return Nullch;
}
- return Nullch;
+ return b;
}
- table = (unsigned char*)(SvPVX(littlestr) + littlelen + 1);
- if (--littlelen >= bigend - big)
- return Nullch;
- s = big + littlelen;
- oldlittle = little = table - 2;
- if (s < bigend) {
- top2:
- /*SUPPRESS 560*/
- if (tmp = table[*s]) {
+
+ { /* Do actual FBM. */
+ register unsigned char *table = little + littlelen + FBM_TABLE_OFFSET;
+ register unsigned char *oldlittle;
+
+ if (littlelen > bigend - big)
+ return Nullch;
+ --littlelen; /* Last char found by table lookup */
+
+ s = big + littlelen;
+ little += littlelen; /* last char */
+ oldlittle = little;
+ if (s < bigend) {
+ register I32 tmp;
+
+ top2:
+ /*SUPPRESS 560*/
+ if (tmp = table[*s]) {
#ifdef POINTERRIGOR
- if (bigend - s > tmp) {
+ if (bigend - s > tmp) {
+ s += tmp;
+ goto top2;
+ }
s += tmp;
- goto top2;
- }
#else
- if ((s += tmp) < bigend)
- goto top2;
-#endif
- return Nullch;
- }
- else {
- tmp = littlelen; /* less expensive than calling strncmp() */
- olds = s;
- while (tmp--) {
- if (*--s == *--little)
- continue;
- differ:
- s = olds + 1; /* here we pay the price for failure */
- little = oldlittle;
- if (s < bigend) /* fake up continue to outer loop */
+ if ((s += tmp) < bigend)
goto top2;
- return Nullch;
+#endif
+ goto check_end;
+ }
+ else { /* less expensive than calling strncmp() */
+ register unsigned char *olds = s;
+
+ tmp = littlelen;
+
+ while (tmp--) {
+ if (*--s == *--little)
+ continue;
+ differ:
+ s = olds + 1; /* here we pay the price for failure */
+ little = oldlittle;
+ if (s < bigend) /* fake up continue to outer loop */
+ goto top2;
+ goto check_end;
+ }
+ return (char *)s;
}
- if (SvTAIL(littlestr) /* automatically multiline */
- && olds + 1 != bigend
- && olds[1] != '\n')
- goto differ;
- return (char *)s;
}
+ check_end:
+ if ( s == bigend && (table[-1] & FBMcf_TAIL)
+ && memEQ(bigend - littlelen, oldlittle - littlelen, littlelen) )
+ return (char*)bigend - littlelen;
+ return Nullch;
}
- return Nullch;
}
/* start_shift, end_shift are positive quantities which give offsets
@@ -1051,10 +1148,15 @@ fbm_instr(unsigned char *big, register unsigned char *bigend, SV *littlestr, U32
old_posp is the way of communication between consequent calls if
the next call needs to find the .
The initial *old_posp should be -1.
- Note that we do not take into account SvTAIL, so it may give wrong
- positives if _ALL flag is set.
+
+ Note that we take into account SvTAIL, so one can get extra
+ optimizations if _ALL flag is set.
*/
+/* If SvTAIL is actually due to \Z or \z, this gives false positives
+ if PL_multiline. In fact if !PL_multiline the autoritative answer
+ is not supported yet. */
+
char *
screaminstr(SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift, I32 *old_posp, I32 last)
{
@@ -1071,8 +1173,18 @@ screaminstr(SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift, I32 *old_
if (*old_posp == -1
? (pos = PL_screamfirst[BmRARE(littlestr)]) < 0
- : (((pos = *old_posp), pos += PL_screamnext[pos]) == 0))
+ : (((pos = *old_posp), pos += PL_screamnext[pos]) == 0)) {
+ cant_find:
+ if ( BmRARE(littlestr) == '\n'
+ && BmPREVIOUS(littlestr) == SvCUR(littlestr) - 1) {
+ little = (unsigned char *)(SvPVX(littlestr));
+ littleend = little + SvCUR(littlestr);
+ first = *little++;
+ goto check_tail;
+ }
return Nullch;
+ }
+
little = (unsigned char *)(SvPVX(littlestr));
littleend = little + SvCUR(littlestr);
first = *little++;
@@ -1081,10 +1193,14 @@ screaminstr(SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift, I32 *old_
big = (unsigned char *)(SvPVX(bigstr));
/* The value of pos we can stop at: */
stop_pos = SvCUR(bigstr) - end_shift - (SvCUR(littlestr) - 1 - previous);
- if (previous + start_shift > stop_pos) return Nullch;
+ if (previous + start_shift > stop_pos) {
+ if (previous + start_shift == stop_pos + 1) /* A fake '\n'? */
+ goto check_tail;
+ return Nullch;
+ }
while (pos < previous + start_shift) {
if (!(pos += PL_screamnext[pos]))
- return Nullch;
+ goto cant_find;
}
#ifdef POINTERRIGOR
do {
@@ -1122,8 +1238,22 @@ screaminstr(SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift, I32 *old_
found = 1;
}
} while ( pos += PL_screamnext[pos] );
- return (last && found) ? (char *)(big+(*old_posp)) : Nullch;
+ if (last && found)
+ return (char *)(big+(*old_posp));
#endif /* POINTERRIGOR */
+ check_tail:
+ if (!SvTAIL(littlestr) || (end_shift > 0))
+ return Nullch;
+ /* Ignore the trailing "\n". This code is not microoptimized */
+ big = (unsigned char *)(SvPVX(bigstr) + SvCUR(bigstr));
+ stop_pos = littleend - little; /* Actual littlestr len */
+ if (stop_pos == 0)
+ return (char*)big;
+ big -= stop_pos;
+ if (*big == first
+ && ((stop_pos == 1) || memEQ(big + 1, little, stop_pos - 1)))
+ return (char*)big;
+ return Nullch;
}
I32