From ce862d02de7e5d8ac2078735cf4bd004193e837d Mon Sep 17 00:00:00 2001 From: Ilya Zakharevich Date: Tue, 9 Jun 1998 23:51:47 -0400 Subject: Bugs with (?{}), $^R and many-to-many subst Message-Id: <199806100751.DAA05441@monk.mps.ohio-state.edu> p4raw-id: //depot/perl@1117 --- interp.sym | 1 + intrpvar.h | 1 + op.c | 20 +++++++++++++++----- op.h | 2 +- perl.c | 3 +++ regcomp.c | 5 +++++ regcomp.h | 1 + regexec.c | 44 ++++++++++++++++++++++++++------------------ regexp.h | 1 + t/op/pat.t | 20 +++++++++++++++++++- t/op/subst.t | 30 +++++++++++++++++++++++++++++- 11 files changed, 102 insertions(+), 26 deletions(-) diff --git a/interp.sym b/interp.sym index 1a13e67011..7bbb11e5fc 100644 --- a/interp.sym +++ b/interp.sym @@ -145,6 +145,7 @@ regsize regstartp regtill regxend +replgv restartop rightgv rs diff --git a/intrpvar.h b/intrpvar.h index 6ee52ca153..74c914bb29 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -68,6 +68,7 @@ PERLVAR(Iscreamfirst, I32 *) PERLVAR(Iscreamnext, I32 *) PERLVARI(Imaxscream, I32, -1) PERLVAR(Ilastscream, SV *) +PERLVAR(Ireplgv, GV *) /* shortcuts to misc objects */ PERLVAR(Ierrgv, GV *) diff --git a/op.c b/op.c index 61001a41cc..3440b1cc36 100644 --- a/op.c +++ b/op.c @@ -2099,6 +2099,7 @@ pmruntime(OP *o, OP *expr, OP *repl) { PMOP *pm; LOGOP *rcop; + I32 repl_has_vars = 0; if (o->op_type == OP_TRANS) return pmtrans(o, expr, repl); @@ -2165,13 +2166,15 @@ pmruntime(OP *o, OP *expr, OP *repl) for (curop = LINKLIST(repl); curop!=repl; curop = LINKLIST(curop)) { if (opargs[curop->op_type] & OA_DANGEROUS) { #ifdef USE_THREADS - if (curop->op_type == OP_THREADSV - && strchr("&`'123456789+", curop->op_private)) { - break; + if (curop->op_type == OP_THREADSV) { + repl_has_vars = 1; + if (strchr("&`'123456789+", curop->op_private)) { + break; } #else if (curop->op_type == OP_GV) { GV *gv = ((GVOP*)curop)->op_gv; + repl_has_vars = 1; if (strchr("&`'123456789+", *GvENAME(gv))) break; } @@ -2189,7 +2192,7 @@ pmruntime(OP *o, OP *expr, OP *repl) curop->op_type == OP_PADAV || curop->op_type == OP_PADHV || curop->op_type == OP_PADANY) { - /* is okay */ + repl_has_vars = 1; } else break; @@ -2197,12 +2200,19 @@ pmruntime(OP *o, OP *expr, OP *repl) lastop = curop; } } - if (curop == repl) { + if (curop == repl + && !(repl_has_vars + && (!pm->op_pmregexp + || pm->op_pmregexp->reganch & ROPT_EVAL_SEEN))) { pm->op_pmflags |= PMf_CONST; /* const for long enough */ pm->op_pmpermflags |= PMf_CONST; /* const for long enough */ prepend_elem(o->op_type, scalar(repl), o); } else { + if (curop == repl && !pm->op_pmregexp) { /* Has variables. */ + pm->op_pmflags |= PMf_MAYBE_CONST; + pm->op_pmpermflags |= PMf_MAYBE_CONST; + } Newz(1101, rcop, 1, LOGOP); rcop->op_type = OP_SUBSTCONT; rcop->op_ppaddr = ppaddr[OP_SUBSTCONT]; diff --git a/op.h b/op.h index 0cc6be75d2..7c60aec46e 100644 --- a/op.h +++ b/op.h @@ -189,7 +189,7 @@ struct pmop { #define PMf_TAINTMEM 0x0001 /* taint $1 etc. if target tainted */ #define PMf_ONCE 0x0002 /* use pattern only once per reset */ #define PMf_REVERSED 0x0004 /* Should be matched right->left */ -/*#define PMf_ALL 0x0008*/ /* initial constant is whole pat */ +#define PMf_MAYBE_CONST 0x0008 /* replacement contains variables */ #define PMf_SKIPWHITE 0x0010 /* skip leading whitespace for split */ #define PMf_FOLD 0x0020 /* case insensitivity */ #define PMf_CONST 0x0040 /* subst replacement is constant */ diff --git a/perl.c b/perl.c index 9d70377406..f436f44d78 100644 --- a/perl.c +++ b/perl.c @@ -476,6 +476,7 @@ perl_destruct(register PerlInterpreter *sv_interp) argvoutgv = Nullgv; stdingv = Nullgv; last_in_gv = Nullgv; + replgv = Nullgv; /* reset so print() ends up where we expect */ setdefout(Nullgv); @@ -1818,6 +1819,8 @@ init_main_stash(void) defgv = gv_fetchpv("_",TRUE, SVt_PVAV); errgv = gv_HVadd(gv_fetchpv("@", TRUE, SVt_PV)); GvMULTI_on(errgv); + replgv = gv_HVadd(gv_fetchpv("\022", TRUE, SVt_PV)); /* ^R */ + GvMULTI_on(replgv); (void)form("%240s",""); /* Preallocate temp - for immediate signals. */ sv_grow(ERRSV, 240); /* Preallocate - for immediate signals. */ sv_setpvn(ERRSV, "", 0); diff --git a/regcomp.c b/regcomp.c index 05c3a80172..ceb5a292d3 100644 --- a/regcomp.c +++ b/regcomp.c @@ -954,6 +954,8 @@ pregcomp(char *exp, char *xend, PMOP *pm) r->reganch |= ROPT_GPOS_SEEN; if (regseen & REG_SEEN_LOOKBEHIND) r->reganch |= ROPT_LOOKBEHIND_SEEN; + if (regseen & REG_SEEN_EVAL) + r->reganch |= ROPT_EVAL_SEEN; Newz(1002, r->startp, regnpar, char*); Newz(1002, r->endp, regnpar, char*); DEBUG_r(regdump(r)); @@ -1028,6 +1030,7 @@ reg(I32 paren, I32 *flagp) OP_4tree *sop, *rop; seen_zerolen++; + regseen |= REG_SEEN_EVAL; while (count && (c = *regcomp_parse)) { if (c == '\\' && regcomp_parse[1]) regcomp_parse++; @@ -2354,6 +2357,8 @@ regdump(regexp *r) if (r->reganch & ROPT_IMPLICIT) PerlIO_printf(Perl_debug_log, "implicit "); PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen); + if (r->reganch & ROPT_EVAL_SEEN) + PerlIO_printf(Perl_debug_log, "with eval "); PerlIO_printf(Perl_debug_log, "\n"); #endif /* DEBUGGING */ } diff --git a/regcomp.h b/regcomp.h index bc7977d9af..7b0a12eb3a 100644 --- a/regcomp.h +++ b/regcomp.h @@ -452,4 +452,5 @@ const static char reg_off_by_arg[] = { #define REG_SEEN_ZERO_LEN 1 #define REG_SEEN_LOOKBEHIND 2 #define REG_SEEN_GPOS 4 +#define REG_SEEN_EVAL 8 diff --git a/regexec.c b/regexec.c index ac9f37b827..5ef0313870 100644 --- a/regexec.c +++ b/regexec.c @@ -57,7 +57,10 @@ #define RF_tainted 1 /* tainted information used? */ #define RF_warned 2 /* warned about big count? */ -#define RF_evaled 4 /* Did an EVAL? */ +#define RF_evaled 4 /* Did an EVAL with setting? */ + +#define RS_init 1 /* eval environment created */ +#define RS_set 2 /* replsv value is set */ #ifndef STATIC #define STATIC static @@ -194,6 +197,7 @@ regexec_flags(register regexp *prog, char *stringarg, register char *strend, cha I32 end_shift = 0; /* Same for the end. */ I32 scream_pos = -1; /* Internal iterator of scream. */ char *scream_olds; + SV* oreplsv = GvSV(replgv); cc.cur = 0; cc.oldcc = 0; @@ -632,6 +636,12 @@ got_it: } } } + /* Preserve the current value of $^R */ + if (oreplsv != GvSV(replgv)) { + sv_setsv(oreplsv, GvSV(replgv));/* So that when GvSV(replgv) is + restored, the value remains + the same. */ + } return 1; phooey: @@ -650,6 +660,19 @@ regtry(regexp *prog, char *startpos) register char **ep; CHECKPOINT lastcp; + if ((prog->reganch & ROPT_EVAL_SEEN) && !reg_eval_set) { + reg_eval_set = RS_init; + DEBUG_r(DEBUG_s( + PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n", stack_sp - stack_base); + )); + SAVEINT(cxstack[cxstack_ix].blk_oldsp); + cxstack[cxstack_ix].blk_oldsp = stack_sp - stack_base; + /* Otherwise OP_NEXTSTATE will free whatever on stack now. */ + SAVETMPS; + /* Apparently this is not needed, judging by wantarray. */ + /* SAVEINT(cxstack[cxstack_ix].blk_gimme); + cxstack[cxstack_ix].blk_gimme = G_SCALAR; */ + } reginput = startpos; regstartp = prog->startp; regendp = prog->endp; @@ -980,22 +1003,6 @@ regmatch(regnode *prog) op = (OP_4tree*)regdata->data[n]; DEBUG_r( PerlIO_printf(Perl_debug_log, " re_eval 0x%x\n", op) ); curpad = AvARRAY((AV*)regdata->data[n + 1]); - if (!reg_eval_set) { - /* Preserve whatever is on stack now, otherwise - OP_NEXTSTATE will overwrite it. */ - SAVEINT(reg_eval_set); /* Protect against unwinding. */ - reg_eval_set = 1; - DEBUG_r(DEBUG_s( - PerlIO_printf(Perl_debug_log, " setting stack tmpbase at %i\n", stack_sp - stack_base); - )); - SAVEINT(cxstack[cxstack_ix].blk_oldsp); - cxstack[cxstack_ix].blk_oldsp = stack_sp - stack_base; - /* Otherwise OP_NEXTSTATE will free whatever on stack now. */ - SAVETMPS; - /* Apparently this is not needed, judging by wantarray. */ - /* SAVEINT(cxstack[cxstack_ix].blk_gimme); - cxstack[cxstack_ix].blk_gimme = G_SCALAR; */ - } CALLRUNOPS(); /* Scalar context. */ SPAGAIN; @@ -1005,7 +1012,8 @@ regmatch(regnode *prog) if (logical) { logical = 0; sw = SvTRUE(ret); - } + } else + sv_setsv(save_scalar(replgv), ret); op = oop; curpad = ocurpad; curcop = ocurcop; diff --git a/regexp.h b/regexp.h index cb6b0c64fe..f1301d9102 100644 --- a/regexp.h +++ b/regexp.h @@ -85,6 +85,7 @@ typedef struct regexp { #define ROPT_GPOS_SEEN 0x40 #define ROPT_CHECK_ALL 0x80 #define ROPT_LOOKBEHIND_SEEN 0x100 +#define ROPT_EVAL_SEEN 0x200 #define ROPT_TAINTED_SEEN 0x8000 diff --git a/t/op/pat.t b/t/op/pat.t index e6b90158f9..5516ce595c 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -2,7 +2,7 @@ # $RCSfile: pat.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:12 $ -print "1..104\n"; +print "1..107\n"; $x = "abc\ndef\n"; @@ -355,6 +355,24 @@ print "not " unless f(pos($x)) == 4; print "ok $test\n"; $test++; +$x = $^R = 67; +'foot' =~ /foo(?{$x = 12; 75})[t]/; +print "not " unless $^R eq '75'; +print "ok $test\n"; +$test++; + +$x = $^R = 67; +'foot' =~ /foo(?{$x = 12; 75})[xy]/; +print "not " unless $^R eq '67' and $x eq '12'; +print "ok $test\n"; +$test++; + +$x = $^R = 67; +'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; +print "not " unless $^R eq '79' and $x eq '12'; +print "ok $test\n"; +$test++; + sub must_warn_pat { my $warn_pat = shift; return sub { print "not " unless $_[0] =~ /$warn_pat/ } diff --git a/t/op/subst.t b/t/op/subst.t index 248aa71b9d..92a848fe80 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -2,7 +2,7 @@ # $RCSfile: s.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:22 $ -print "1..69\n"; +print "1..70\n"; $x = 'foo'; $_ = "x"; @@ -270,3 +270,31 @@ print $_ eq "bbcbb" ? "ok 68\n" : "not ok 68 # `$_' ne `bbcbb'\n"; # XXX TODO: Most tests above don't test return values of the ops. They should. $_ = "ab"; print (s/a/b/ == 1 ? "ok 69\n" : "not ok 69\n"); + +$_ = <<'EOL'; + $url = new URI::URL "http://www/"; die if $url eq "xXx"; +EOL +$^R = 'junk'; + +$foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' . + ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' . + ' lowercase $@%#MiXeD$@%# '; + +s{ \d+ \b [,.;]? (?{ 'digits' }) + | + [a-z]+ \b [,.;]? (?{ 'lowercase' }) + | + [A-Z]+ \b [,.;]? (?{ 'UPPERCASE' }) + | + [A-Z] [a-z]+ \b [,.;]? (?{ 'Capitalized' }) + | + [A-Za-z]+ \b [,.;]? (?{ 'MiXeD' }) + | + [A-Za-z0-9]+ \b [,.;]? (?{ 'alphanumeric' }) + | + \s+ (?{ ' ' }) + | + [^A-Za-z0-9\s]+ (?{ '$@%#' }) +}{$^R}xg; +print ($_ eq $foo ? "ok 70\n" : "not ok 70\n#'$_'\n#'$foo'\n"); + -- cgit v1.2.1