summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cop.h6
-rw-r--r--embedvar.h6
-rw-r--r--objXSUB.h4
-rw-r--r--pp.c2
-rw-r--r--pp_ctl.c5
-rw-r--r--pp_hot.c42
-rw-r--r--regexec.c39
-rw-r--r--regexp.h2
-rwxr-xr-xt/op/pat.t29
-rw-r--r--thrdvar.h2
10 files changed, 95 insertions, 42 deletions
diff --git a/cop.h b/cop.h
index 043ea8d6d0..6bdb594bb8 100644
--- a/cop.h
+++ b/cop.h
@@ -213,7 +213,7 @@ struct block {
struct subst {
I32 sbu_iters;
I32 sbu_maxiters;
- I32 sbu_safebase;
+ I32 sbu_rflags;
I32 sbu_oldsave;
bool sbu_once;
bool sbu_rxtainted;
@@ -228,7 +228,7 @@ struct subst {
};
#define sb_iters cx_u.cx_subst.sbu_iters
#define sb_maxiters cx_u.cx_subst.sbu_maxiters
-#define sb_safebase cx_u.cx_subst.sbu_safebase
+#define sb_rflags cx_u.cx_subst.sbu_rflags
#define sb_oldsave cx_u.cx_subst.sbu_oldsave
#define sb_once cx_u.cx_subst.sbu_once
#define sb_rxtainted cx_u.cx_subst.sbu_rxtainted
@@ -244,7 +244,7 @@ struct subst {
#define PUSHSUBST(cx) CXINC, cx = &cxstack[cxstack_ix], \
cx->sb_iters = iters, \
cx->sb_maxiters = maxiters, \
- cx->sb_safebase = safebase, \
+ cx->sb_rflags = r_flags, \
cx->sb_oldsave = oldsave, \
cx->sb_once = once, \
cx->sb_rxtainted = rxtainted, \
diff --git a/embedvar.h b/embedvar.h
index 722561853f..733347d637 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -55,9 +55,11 @@
#define PL_reg_call_cc (PL_curinterp->Treg_call_cc)
#define PL_reg_eval_set (PL_curinterp->Treg_eval_set)
#define PL_reg_flags (PL_curinterp->Treg_flags)
+#define PL_reg_ganch (PL_curinterp->Treg_ganch)
#define PL_reg_re (PL_curinterp->Treg_re)
#define PL_reg_start_tmp (PL_curinterp->Treg_start_tmp)
#define PL_reg_start_tmpl (PL_curinterp->Treg_start_tmpl)
+#define PL_reg_sv (PL_curinterp->Treg_sv)
#define PL_regbol (PL_curinterp->Tregbol)
#define PL_regcc (PL_curinterp->Tregcc)
#define PL_regcode (PL_curinterp->Tregcode)
@@ -439,9 +441,11 @@
#define PL_Treg_call_cc PL_reg_call_cc
#define PL_Treg_eval_set PL_reg_eval_set
#define PL_Treg_flags PL_reg_flags
+#define PL_Treg_ganch PL_reg_ganch
#define PL_Treg_re PL_reg_re
#define PL_Treg_start_tmp PL_reg_start_tmp
#define PL_Treg_start_tmpl PL_reg_start_tmpl
+#define PL_Treg_sv PL_reg_sv
#define PL_Tregbol PL_regbol
#define PL_Tregcc PL_regcc
#define PL_Tregcode PL_regcode
@@ -566,9 +570,11 @@
#define PL_reg_call_cc (thr->Treg_call_cc)
#define PL_reg_eval_set (thr->Treg_eval_set)
#define PL_reg_flags (thr->Treg_flags)
+#define PL_reg_ganch (thr->Treg_ganch)
#define PL_reg_re (thr->Treg_re)
#define PL_reg_start_tmp (thr->Treg_start_tmp)
#define PL_reg_start_tmpl (thr->Treg_start_tmpl)
+#define PL_reg_sv (thr->Treg_sv)
#define PL_regbol (thr->Tregbol)
#define PL_regcc (thr->Tregcc)
#define PL_regcode (thr->Tregcode)
diff --git a/objXSUB.h b/objXSUB.h
index a9820ddbbf..d4d101d68b 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -498,12 +498,16 @@
#define PL_reg_eval_set pPerl->PL_reg_eval_set
#undef PL_reg_flags
#define PL_reg_flags pPerl->PL_reg_flags
+#undef PL_reg_ganch
+#define PL_reg_ganch pPerl->PL_reg_ganch
#undef PL_reg_re
#define PL_reg_re pPerl->PL_reg_re
#undef PL_reg_start_tmp
#define PL_reg_start_tmp pPerl->PL_reg_start_tmp
#undef PL_reg_start_tmpl
#define PL_reg_start_tmpl pPerl->PL_reg_start_tmpl
+#undef PL_reg_sv
+#define PL_reg_sv pPerl->PL_reg_sv
#undef PL_regbol
#define PL_regbol pPerl->PL_regbol
#undef PL_regcc
diff --git a/pp.c b/pp.c
index 0bd4842bb3..21a5dd39fe 100644
--- a/pp.c
+++ b/pp.c
@@ -4672,7 +4672,7 @@ PP(pp_split)
else {
maxiters += (strend - s) * rx->nparens;
while (s < strend && --limit &&
- CALLREGEXEC(rx, s, strend, orig, 1, Nullsv, NULL, 0))
+ CALLREGEXEC(rx, s, strend, orig, 1, sv, NULL, 0))
{
TAINT_IF(RX_MATCH_TAINTED(rx));
if (rx->subbase
diff --git a/pp_ctl.c b/pp_ctl.c
index e488749fe0..f2cee37774 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -164,8 +164,9 @@ PP(pp_substcont)
/* Are we done */
if (cx->sb_once || !CALLREGEXEC(rx, s, cx->sb_strend, orig,
- s == m, Nullsv, NULL,
- cx->sb_safebase ? 0 : REXEC_COPY_STR))
+ s == m, Nullsv, cx->sb_targ,
+ ((cx->sb_rflags & REXEC_COPY_STR)
+ ? 0 : REXEC_COPY_STR)))
{
SV *targ = cx->sb_targ;
sv_catpvn(dstr, s, cx->sb_strend - s);
diff --git a/pp_hot.c b/pp_hot.c
index 713b1d16a8..f9ff09dce7 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -832,7 +832,7 @@ PP(pp_match)
register char *s;
char *strend;
I32 global;
- I32 safebase;
+ I32 r_flags;
char *truebase;
register REGEXP *rx = pm->op_pmregexp;
bool rxtainted;
@@ -841,7 +841,6 @@ PP(pp_match)
I32 minmatch = 0;
I32 oldsave = PL_savestack_ix;
I32 update_minmatch = 1;
- SV *screamer;
if (PL_op->op_flags & OPf_STACKED)
TARG = POPs;
@@ -871,10 +870,6 @@ PP(pp_match)
}
if (rx->minlen > len) goto failure;
- screamer = ( (SvSCREAM(TARG) && rx->check_substr
- && SvTYPE(rx->check_substr) == SVt_PVBM
- && SvVALID(rx->check_substr))
- ? TARG : Nullsv);
truebase = t = s;
if (global = pm->op_pmflags & PMf_GLOBAL) {
rx->startp[0] = 0;
@@ -887,9 +882,14 @@ PP(pp_match)
}
}
}
- safebase = ((gimme != G_ARRAY && !global && rx->nparens)
+ r_flags = ((gimme != G_ARRAY && !global && rx->nparens)
|| SvTEMP(TARG) || PL_sawampersand)
? REXEC_COPY_STR : 0;
+ if (SvSCREAM(TARG) && rx->check_substr
+ && SvTYPE(rx->check_substr) == SVt_PVBM
+ && SvVALID(rx->check_substr))
+ r_flags |= REXEC_SCREAM;
+
if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) {
SAVEINT(PL_multiline);
PL_multiline = pm->op_pmflags & PMf_MULTILINE;
@@ -905,7 +905,7 @@ play_it_again:
}
if (rx->check_substr) {
if (!(rx->reganch & ROPT_NOSCAN)) { /* Floating checkstring. */
- if ( screamer ) {
+ if (r_flags & REXEC_SCREAM) {
I32 p = -1;
char *b;
@@ -950,8 +950,7 @@ play_it_again:
rx->float_substr = Nullsv;
}
}
- if (CALLREGEXEC(rx, s, strend, truebase, minmatch,
- screamer, NULL, safebase))
+ if (CALLREGEXEC(rx, s, strend, truebase, minmatch, TARG, NULL, r_flags))
{
PL_curpm = pm;
if (pm->op_pmflags & PMf_ONCE)
@@ -1602,13 +1601,12 @@ PP(pp_subst)
bool once;
bool rxtainted;
char *orig;
- I32 safebase;
+ I32 r_flags;
register REGEXP *rx = pm->op_pmregexp;
STRLEN len;
int force_on_match = 0;
I32 oldsave = PL_savestack_ix;
I32 update_minmatch = 1;
- SV *screamer;
/* known replacement string? */
dstr = (pm->op_pmflags & PMf_CONST) ? POPs : Nullsv;
@@ -1646,12 +1644,12 @@ PP(pp_subst)
pm = PL_curpm;
rx = pm->op_pmregexp;
}
- screamer = ( (SvSCREAM(TARG) && rx->check_substr
- && SvTYPE(rx->check_substr) == SVt_PVBM
- && SvVALID(rx->check_substr))
- ? TARG : Nullsv);
- safebase = (rx->nparens || SvTEMP(TARG) || PL_sawampersand)
+ r_flags = (rx->nparens || SvTEMP(TARG) || PL_sawampersand)
? REXEC_COPY_STR : 0;
+ if (SvSCREAM(TARG) && rx->check_substr
+ && SvTYPE(rx->check_substr) == SVt_PVBM
+ && SvVALID(rx->check_substr))
+ r_flags |= REXEC_SCREAM;
if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) {
SAVEINT(PL_multiline);
PL_multiline = pm->op_pmflags & PMf_MULTILINE;
@@ -1659,7 +1657,7 @@ PP(pp_subst)
orig = m = s;
if (rx->check_substr) {
if (!(rx->reganch & ROPT_NOSCAN)) { /* It floats. */
- if (screamer) {
+ if (r_flags & REXEC_SCREAM) {
I32 p = -1;
char *b;
@@ -1706,9 +1704,9 @@ PP(pp_subst)
c = dstr ? SvPV(dstr, clen) : Nullch;
/* can do inplace substitution? */
- if (c && clen <= rx->minlen && (once || !(safebase & REXEC_COPY_STR))
+ if (c && clen <= rx->minlen && (once || !(r_flags & REXEC_COPY_STR))
&& !(rx->reganch & ROPT_LOOKBEHIND_SEEN)) {
- if (!CALLREGEXEC(rx, s, strend, orig, 0, screamer, NULL, safebase)) {
+ if (!CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL, r_flags)) {
SPAGAIN;
PUSHs(&PL_sv_no);
LEAVE_SCOPE(oldsave);
@@ -1808,7 +1806,7 @@ PP(pp_subst)
RETURN;
}
- if (CALLREGEXEC(rx, s, strend, orig, 0, screamer, NULL, safebase)) {
+ if (CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL, r_flags)) {
if (force_on_match) {
force_on_match = 0;
s = SvPV_force(TARG, len);
@@ -1842,7 +1840,7 @@ PP(pp_subst)
sv_catpvn(dstr, c, clen);
if (once)
break;
- } while (CALLREGEXEC(rx, s, strend, orig, s == m, Nullsv, NULL, safebase));
+ } while (CALLREGEXEC(rx, s, strend, orig, s == m, Nullsv, NULL, r_flags));
sv_catpvn(dstr, s, strend - s);
(void)SvOOK_off(TARG);
diff --git a/regexec.c b/regexec.c
index 841b9008c2..46833c2f55 100644
--- a/regexec.c
+++ b/regexec.c
@@ -259,13 +259,13 @@ cache_re(regexp *prog)
PL_regdata = prog->data;
PL_reg_re = prog;
}
-
+
/*
- regexec_flags - match a regexp against a string
*/
I32
regexec_flags(register regexp *prog, char *stringarg, register char *strend,
- char *strbeg, I32 minend, SV *screamer, void *data, U32 flags)
+ char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
/* strend: pointer to null at end of string */
/* strbeg: real beginning of string */
/* minend: end of match must be >=minend after stringarg. */
@@ -349,9 +349,9 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
start_shift = prog->check_offset_min; /* okay to underestimate on CC */
/* Should be nonnegative! */
end_shift = minlen - start_shift - CHR_SVLEN(prog->check_substr);
- if (screamer) {
+ if (flags & REXEC_SCREAM) {
if (PL_screamfirst[BmRARE(prog->check_substr)] >= 0)
- s = screaminstr(screamer, prog->check_substr,
+ s = screaminstr(sv, prog->check_substr,
start_shift + (stringarg - strbeg),
end_shift, &scream_pos, 0);
else
@@ -401,14 +401,23 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
(strend - startpos > 60 ? "..." : ""))
);
+ if (prog->reganch & ROPT_GPOS_SEEN) {
+ MAGIC *mg;
+ int pos = 0;
+
+ if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)
+ && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
+ pos = mg->mg_len;
+ PL_reg_ganch = startpos + pos;
+ }
+
/* Simplest case: anchored match need be tried only once. */
/* [unless only anchor is BOL and multiline is set] */
- if (prog->reganch & ROPT_ANCH) {
+ if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
if (regtry(prog, startpos))
goto got_it;
- else if (!(prog->reganch & ROPT_ANCH_GPOS) &&
- (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
- || (prog->reganch & ROPT_ANCH_MBOL)))
+ else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
+ || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
{
if (minlen)
dontbother = minlen - 1;
@@ -424,6 +433,10 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
}
}
goto phooey;
+ } else if (prog->reganch & ROPT_ANCH_GPOS) {
+ if (regtry(prog, PL_reg_ganch))
+ goto got_it;
+ goto phooey;
}
/* Messy cases: unanchored match. */
@@ -479,8 +492,8 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
dontbother = end_shift;
strend = HOPc(strend, -dontbother);
while ( (s <= last) &&
- (screamer
- ? (s = screaminstr(screamer, must, HOPc(s, back_min) - strbeg,
+ ((flags & REXEC_SCREAM)
+ ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
end_shift, &scream_pos, 0))
: (s = fbm_instr((unsigned char*)HOP(s, back_min),
(unsigned char*)strend, must, 0))) ) {
@@ -912,8 +925,8 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend,
char *last;
I32 oldpos = scream_pos;
- if (screamer) {
- last = screaminstr(screamer, prog->float_substr, s - strbeg,
+ if (flags & REXEC_SCREAM) {
+ last = screaminstr(sv, prog->float_substr, s - strbeg,
end_shift, &scream_pos, 1); /* last one */
if (!last) {
last = scream_olds; /* Only one occurence. */
@@ -1159,7 +1172,7 @@ regmatch(regnode *prog)
break;
sayNO;
case GPOS:
- if (locinput == PL_regbol)
+ if (locinput == PL_reg_ganch)
break;
sayNO;
case EOL:
diff --git a/regexp.h b/regexp.h
index 5082610f57..67410a5e7b 100644
--- a/regexp.h
+++ b/regexp.h
@@ -86,6 +86,7 @@ typedef struct regexp {
#define ROPT_LOOKBEHIND_SEEN 0x00100
#define ROPT_EVAL_SEEN 0x00200
#define ROPT_TAINTED_SEEN 0x00400
+#define ROPT_ANCH_SBOL 0x00800
/* 0xf800 of reganch is used by PMf_COMPILETIME */
@@ -101,6 +102,7 @@ typedef struct regexp {
#define REXEC_COPY_STR 1 /* Need to copy the string. */
#define REXEC_CHECKED 2 /* check_substr already checked. */
+#define REXEC_SCREAM 4 /* use scream table. */
#define ReREFCNT_inc(re) ((re && re->refcnt++), re)
#define ReREFCNT_dec(re) pregfree(re)
diff --git a/t/op/pat.t b/t/op/pat.t
index cea2267f08..12b939708a 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -4,7 +4,7 @@
# the format supported by op/regexp.t. If you want to add a test
# that does fit that format, add it to op/re_tests, not here.
-print "1..162\n";
+print "1..168\n";
BEGIN {
chdir 't' if -d 't';
@@ -692,6 +692,33 @@ print "not "
print "ok $test\n";
$test++;
+$str = 'abcde';
+pos $str = 2;
+
+print "not " if $str =~ /^\G/;
+print "ok $test\n";
+$test++;
+
+print "not " if $str =~ /^.\G/;
+print "ok $test\n";
+$test++;
+
+print "not " unless $str =~ /^..\G/;
+print "ok $test\n";
+$test++;
+
+print "not " if $str =~ /^...\G/;
+print "ok $test\n";
+$test++;
+
+print "not " unless $str =~ /.\G./ and $& eq 'bc';
+print "ok $test\n";
+$test++;
+
+print "not " unless $str =~ /\G../ and $& eq 'cd';
+print "ok $test\n";
+$test++;
+
# see if matching against temporaries (created via pp_helem()) is safe
{ foo => "ok $test\n".$^X }->{foo} =~ /^(.*)\n/g;
print "$1\n";
diff --git a/thrdvar.h b/thrdvar.h
index 93c4546594..3e71fb5634 100644
--- a/thrdvar.h
+++ b/thrdvar.h
@@ -156,6 +156,8 @@ PERLVARI(Tregindent, int, 0) /* from regexec.c */
PERLVAR(Tregcc, CURCUR *) /* from regexec.c */
PERLVAR(Treg_call_cc, struct re_cc_state *) /* from regexec.c */
PERLVAR(Treg_re, regexp *) /* from regexec.c */
+PERLVAR(Treg_ganch, char *) /* position of \G */
+PERLVAR(Treg_sv, SV *) /* what we match against */
PERLVARI(Tregcompp, regcomp_t, FUNC_NAME_TO_PTR(pregcomp))
/* Pointer to RE compiler */