diff options
-rw-r--r-- | bytecode.pl | 2 | ||||
-rw-r--r-- | dump.c | 18 | ||||
-rw-r--r-- | embed.fnc | 4 | ||||
-rw-r--r-- | ext/B/B.pm | 4 | ||||
-rw-r--r-- | ext/B/B.xs | 20 | ||||
-rw-r--r-- | ext/B/B/Asmdata.pm | 70 | ||||
-rw-r--r-- | ext/B/B/Deparse.pm | 7 | ||||
-rw-r--r-- | ext/B/defsubs_h.PL | 6 | ||||
-rw-r--r-- | ext/B/t/concise-xs.t | 6 | ||||
-rw-r--r-- | ext/re/re.xs | 2 | ||||
-rw-r--r-- | op.c | 21 | ||||
-rw-r--r-- | op.h | 19 | ||||
-rw-r--r-- | pod/perlreguts.pod | 34 | ||||
-rw-r--r-- | pod/perltoc.pod | 4 | ||||
-rw-r--r-- | pp.c | 16 | ||||
-rw-r--r-- | pp_ctl.c | 41 | ||||
-rw-r--r-- | pp_hot.c | 47 | ||||
-rw-r--r-- | proto.h | 10 | ||||
-rw-r--r-- | regcomp.c | 21 | ||||
-rw-r--r-- | regexec.c | 7 | ||||
-rw-r--r-- | regexp.h | 6 | ||||
-rw-r--r-- | sv.c | 6 | ||||
-rw-r--r-- | toke.c | 3 |
23 files changed, 195 insertions, 179 deletions
diff --git a/bytecode.pl b/bytecode.pl index 95b5b12322..49ad8f10e1 100644 --- a/bytecode.pl +++ b/bytecode.pl @@ -314,8 +314,6 @@ op_pmreplrootgv *(SV**)&cPMOP->op_pmreplroot svindex #endif pregcomp PL_op pvcontents x op_pmflags cPMOP->op_pmflags U16 -op_pmpermflags cPMOP->op_pmpermflags U16 -op_pmdynflags cPMOP->op_pmdynflags U8 op_sv cSVOP->op_sv svindex op_padix cPADOP->op_padix PADOFFSET op_pv cPVOP->op_pv pvcontents @@ -558,20 +558,26 @@ S_pm_description(pTHX_ const PMOP *pm) const REGEXP * regex = PM_GETRE(pm); const U32 pmflags = pm->op_pmflags; - if (pm->op_pmdynflags & PMdf_USED) - sv_catpv(desc, ",USED"); - if (pm->op_pmdynflags & PMdf_TAINTED) - sv_catpv(desc, ",TAINTED"); - if (pmflags & PMf_ONCE) sv_catpv(desc, ",ONCE"); +#ifdef USE_ITHREADS + if (SvREADONLY(PL_regex_pad[pm->op_pmoffset])) + sv_catpv(desc, ":USED"); +#else + if (pmflags & PMf_USED) + sv_catpv(desc, ":USED"); +#endif + if (regex->extflags & RXf_TAINTED) + sv_catpv(desc, ",TAINTED"); + + if (regex && regex->check_substr) { if (!(regex->extflags & RXf_NOSCAN)) sv_catpv(desc, ",SCANFIRST"); if (regex->extflags & RXf_CHECK_ALL) sv_catpv(desc, ",ALL"); } - if (pmflags & PMf_SKIPWHITE) + if (regex->extflags & RXf_SKIPWHITE) sv_catpv(desc, ",SKIPWHITE"); if (pmflags & PMf_CONST) sv_catpv(desc, ",CONST"); @@ -683,8 +683,8 @@ Ap |char * |reg_stringify |NN MAGIC *mg|NULLOK STRLEN *lp|NULLOK U32 *flags|NUL #if defined(USE_ITHREADS) Ap |void* |regdupe_internal|NN const regexp* r|NN CLONE_PARAMS* param #endif -Ap |regexp*|pregcomp |NN char* exp|NN char* xend|NN PMOP* pm -Ap |regexp*|re_compile |NN char* exp|NN char* xend|NN PMOP* pm +Ap |regexp*|pregcomp |NN char* exp|NN char* xend|U32 pm_flags +Ap |regexp*|re_compile |NN char* exp|NN char* xend|U32 pm_flags Ap |char* |re_intuit_start|NN regexp* prog|NULLOK SV* sv|NN char* strpos \ |NN char* strend|U32 flags \ |NULLOK struct re_scream_pos_data_s *data diff --git a/ext/B/B.pm b/ext/B/B.pm index caccf4bfb0..533616987a 100644 --- a/ext/B/B.pm +++ b/ext/B/B.pm @@ -1047,9 +1047,7 @@ This returns the op description from the global C PL_op_desc array =item pmflags -=item pmdynflags - -=item pmpermflags +=item extflags =item precomp diff --git a/ext/B/B.xs b/ext/B/B.xs index 6fdac03042..12eb6a3309 100644 --- a/ext/B/B.xs +++ b/ext/B/B.xs @@ -988,8 +988,6 @@ LISTOP_children(o) #define PMOP_pmstash(o) o->op_pmstash #endif #define PMOP_pmflags(o) o->op_pmflags -#define PMOP_pmpermflags(o) o->op_pmpermflags -#define PMOP_pmdynflags(o) o->op_pmdynflags MODULE = B PACKAGE = B::PMOP PREFIX = PMOP_ @@ -1044,14 +1042,6 @@ U32 PMOP_pmflags(o) B::PMOP o -U32 -PMOP_pmpermflags(o) - B::PMOP o - -U8 -PMOP_pmdynflags(o) - B::PMOP o - void PMOP_precomp(o) B::PMOP o @@ -1062,6 +1052,16 @@ PMOP_precomp(o) if (rx) sv_setpvn(ST(0), rx->precomp, rx->prelen); +void +PMOP_reflags(o) + B::PMOP o + REGEXP * rx = NO_INIT + CODE: + ST(0) = sv_newmortal(); + rx = PM_GETRE(o); + if (rx) + sv_setuv(ST(0), rx->extflags); + #define SVOP_sv(o) cSVOPo->op_sv #define SVOP_gv(o) ((GV*)cSVOPo->op_sv) diff --git a/ext/B/B/Asmdata.pm b/ext/B/B/Asmdata.pm index 1cdbe13cbe..b43f7bb54a 100644 --- a/ext/B/B/Asmdata.pm +++ b/ext/B/B/Asmdata.pm @@ -137,42 +137,40 @@ $insn_data{op_pmstash} = [109, \&PUT_svindex, "GET_svindex"]; $insn_data{op_pmreplrootgv} = [110, \&PUT_svindex, "GET_svindex"]; $insn_data{pregcomp} = [111, \&PUT_pvcontents, "GET_pvcontents"]; $insn_data{op_pmflags} = [112, \&PUT_U16, "GET_U16"]; -$insn_data{op_pmpermflags} = [113, \&PUT_U16, "GET_U16"]; -$insn_data{op_pmdynflags} = [114, \&PUT_U8, "GET_U8"]; -$insn_data{op_sv} = [115, \&PUT_svindex, "GET_svindex"]; -$insn_data{op_padix} = [116, \&PUT_PADOFFSET, "GET_PADOFFSET"]; -$insn_data{op_pv} = [117, \&PUT_pvcontents, "GET_pvcontents"]; -$insn_data{op_pv_tr} = [118, \&PUT_op_tr_array, "GET_op_tr_array"]; -$insn_data{op_redoop} = [119, \&PUT_opindex, "GET_opindex"]; -$insn_data{op_nextop} = [120, \&PUT_opindex, "GET_opindex"]; -$insn_data{op_lastop} = [121, \&PUT_opindex, "GET_opindex"]; -$insn_data{cop_label} = [122, \&PUT_pvindex, "GET_pvindex"]; -$insn_data{cop_stashpv} = [123, \&PUT_pvindex, "GET_pvindex"]; -$insn_data{cop_file} = [124, \&PUT_pvindex, "GET_pvindex"]; -$insn_data{cop_stash} = [125, \&PUT_svindex, "GET_svindex"]; -$insn_data{cop_filegv} = [126, \&PUT_svindex, "GET_svindex"]; -$insn_data{cop_seq} = [127, \&PUT_U32, "GET_U32"]; -$insn_data{cop_arybase} = [128, \&PUT_I32, "GET_I32"]; -$insn_data{cop_line} = [129, \&PUT_U32, "GET_U32"]; -$insn_data{cop_warnings} = [130, \&PUT_svindex, "GET_svindex"]; -$insn_data{main_start} = [131, \&PUT_opindex, "GET_opindex"]; -$insn_data{main_root} = [132, \&PUT_opindex, "GET_opindex"]; -$insn_data{main_cv} = [133, \&PUT_svindex, "GET_svindex"]; -$insn_data{curpad} = [134, \&PUT_svindex, "GET_svindex"]; -$insn_data{push_begin} = [135, \&PUT_svindex, "GET_svindex"]; -$insn_data{push_init} = [136, \&PUT_svindex, "GET_svindex"]; -$insn_data{push_end} = [137, \&PUT_svindex, "GET_svindex"]; -$insn_data{curstash} = [138, \&PUT_svindex, "GET_svindex"]; -$insn_data{defstash} = [139, \&PUT_svindex, "GET_svindex"]; -$insn_data{data} = [140, \&PUT_U8, "GET_U8"]; -$insn_data{incav} = [141, \&PUT_svindex, "GET_svindex"]; -$insn_data{load_glob} = [142, \&PUT_svindex, "GET_svindex"]; -$insn_data{regex_padav} = [143, \&PUT_svindex, "GET_svindex"]; -$insn_data{dowarn} = [144, \&PUT_U8, "GET_U8"]; -$insn_data{comppad_name} = [145, \&PUT_svindex, "GET_svindex"]; -$insn_data{xgv_stash} = [146, \&PUT_svindex, "GET_svindex"]; -$insn_data{signal} = [147, \&PUT_strconst, "GET_strconst"]; -$insn_data{formfeed} = [148, \&PUT_svindex, "GET_svindex"]; +$insn_data{op_sv} = [113, \&PUT_svindex, "GET_svindex"]; +$insn_data{op_padix} = [114, \&PUT_PADOFFSET, "GET_PADOFFSET"]; +$insn_data{op_pv} = [115, \&PUT_pvcontents, "GET_pvcontents"]; +$insn_data{op_pv_tr} = [116, \&PUT_op_tr_array, "GET_op_tr_array"]; +$insn_data{op_redoop} = [117, \&PUT_opindex, "GET_opindex"]; +$insn_data{op_nextop} = [118, \&PUT_opindex, "GET_opindex"]; +$insn_data{op_lastop} = [119, \&PUT_opindex, "GET_opindex"]; +$insn_data{cop_label} = [120, \&PUT_pvindex, "GET_pvindex"]; +$insn_data{cop_stashpv} = [121, \&PUT_pvindex, "GET_pvindex"]; +$insn_data{cop_file} = [122, \&PUT_pvindex, "GET_pvindex"]; +$insn_data{cop_stash} = [123, \&PUT_svindex, "GET_svindex"]; +$insn_data{cop_filegv} = [124, \&PUT_svindex, "GET_svindex"]; +$insn_data{cop_seq} = [125, \&PUT_U32, "GET_U32"]; +$insn_data{cop_arybase} = [126, \&PUT_I32, "GET_I32"]; +$insn_data{cop_line} = [127, \&PUT_U32, "GET_U32"]; +$insn_data{cop_warnings} = [128, \&PUT_svindex, "GET_svindex"]; +$insn_data{main_start} = [129, \&PUT_opindex, "GET_opindex"]; +$insn_data{main_root} = [130, \&PUT_opindex, "GET_opindex"]; +$insn_data{main_cv} = [131, \&PUT_svindex, "GET_svindex"]; +$insn_data{curpad} = [132, \&PUT_svindex, "GET_svindex"]; +$insn_data{push_begin} = [133, \&PUT_svindex, "GET_svindex"]; +$insn_data{push_init} = [134, \&PUT_svindex, "GET_svindex"]; +$insn_data{push_end} = [135, \&PUT_svindex, "GET_svindex"]; +$insn_data{curstash} = [136, \&PUT_svindex, "GET_svindex"]; +$insn_data{defstash} = [137, \&PUT_svindex, "GET_svindex"]; +$insn_data{data} = [138, \&PUT_U8, "GET_U8"]; +$insn_data{incav} = [139, \&PUT_svindex, "GET_svindex"]; +$insn_data{load_glob} = [140, \&PUT_svindex, "GET_svindex"]; +$insn_data{regex_padav} = [141, \&PUT_svindex, "GET_svindex"]; +$insn_data{dowarn} = [142, \&PUT_U8, "GET_U8"]; +$insn_data{comppad_name} = [143, \&PUT_svindex, "GET_svindex"]; +$insn_data{xgv_stash} = [144, \&PUT_svindex, "GET_svindex"]; +$insn_data{signal} = [145, \&PUT_strconst, "GET_strconst"]; +$insn_data{formfeed} = [146, \&PUT_svindex, "GET_svindex"]; my ($insn_name, $insn_data); while (($insn_name, $insn_data) = each %insn_data) { diff --git a/ext/B/B/Deparse.pm b/ext/B/B/Deparse.pm index 34339cc231..f663d353a7 100644 --- a/ext/B/B/Deparse.pm +++ b/ext/B/B/Deparse.pm @@ -18,8 +18,9 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring OPpSORT_REVERSE OPpSORT_INPLACE OPpSORT_DESCEND OPpITER_REVERSED SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR SVf_FAKE SVs_RMG SVs_SMG CVf_METHOD CVf_LOCKED CVf_LVALUE CVf_ASSERTION - PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE PMf_SKIPWHITE - PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED); + PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE + PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED + RXf_SKIPWHITE); $VERSION = 0.81; use strict; use vars qw/$AUTOLOAD/; @@ -4184,7 +4185,7 @@ sub pp_split { # handle special case of split(), and split(' ') that compiles to /\s+/ $kid = $op->first; - if ($kid->flags & OPf_SPECIAL and $kid->pmflags & PMf_SKIPWHITE) { + if ( $kid->flags & OPf_SPECIAL and $kid->reflags & RXf_SKIPWHITE ) { $exprs[0] = "' '"; } diff --git a/ext/B/defsubs_h.PL b/ext/B/defsubs_h.PL index eefaa7e63f..8f943c65fc 100644 --- a/ext/B/defsubs_h.PL +++ b/ext/B/defsubs_h.PL @@ -67,13 +67,15 @@ if ($] >= 5.009) { doconst(AVf_REAL); } -foreach my $file (qw(op.h cop.h)) +foreach my $tuple (['op.h'],['cop.h'],['regexp.h','RXf_']) { + my $file = $tuple->[0]; + my $pfx = $tuple->[1] || ''; my $path = $^O eq 'MacOS' ? ":::$file" : "../../$file"; open(OPH,"$path") || die "Cannot open $path:$!"; while (<OPH>) { - doconst($1) if (/#define\s+(\w+)\s+([\(\)\|\dx]+)\s*(?:$|\/\*)/); + doconst($1) if (/#define\s+($pfx\w+)\s+([\(\)\|\dx]+)\s*(?:$|\/\*)/); } close(OPH); } diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t index b19cf59926..76b5df884b 100644 --- a/ext/B/t/concise-xs.t +++ b/ext/B/t/concise-xs.t @@ -117,7 +117,7 @@ use Getopt::Std; use Carp; use Test::More tests => ( # per-pkg tests (function ct + require_ok) 40 + 16 # Data::Dumper, Digest::MD5 - + 517 + 239 # B::Deparse, B + + 517 + 262 # B::Deparse, B + 595 + 190 # POSIX, IO::Socket + 323 * ($] > 5.009) + 17 * ($] >= 5.009003) @@ -142,7 +142,7 @@ my $testpkgs = { Data::Dumper => { XS => [qw/ bootstrap Dumpxs /], dflt => 'perl' }, B => { - dflt => 'constant', # all but 47/274 + dflt => 'constant', # all but 47/297 skip => [ 'regex_padav' ], # threaded only perl => [qw( walksymtable walkoptree_slow walkoptree_exec @@ -176,7 +176,7 @@ my $testpkgs = { OPpSORT_REVERSE OPpTARGET_MY OPpTRANS_COMPLEMENT OPpTRANS_DELETE OPpTRANS_SQUASH PMf_CONTINUE PMf_EVAL PMf_EXTENDED PMf_FOLD PMf_GLOBAL PMf_KEEP - PMf_MULTILINE PMf_ONCE PMf_SINGLELINE PMf_SKIPWHITE + PMf_MULTILINE PMf_ONCE PMf_SINGLELINE RXf_SKIPWHITE POSTFIX SVf_FAKE SVf_IOK SVf_NOK SVf_POK SVf_ROK SVpad_OUR SVs_RMG SVs_SMG SWAP_CHILDREN OPpPAD_STATE /], diff --git a/ext/re/re.xs b/ext/re/re.xs index 7b3e9fb2e9..5ab5f7c095 100644 --- a/ext/re/re.xs +++ b/ext/re/re.xs @@ -11,7 +11,7 @@ START_EXTERN_C -extern regexp* my_re_compile (pTHX_ char* exp, char* xend, PMOP* pm); +extern regexp* my_re_compile (pTHX_ char* exp, char* xend, U32 pm_flags); extern I32 my_regexec (pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags); @@ -611,6 +611,7 @@ clear_pmop: #ifdef USE_ITHREADS if(PL_regex_pad) { /* We could be in destruction */ av_push((AV*) PL_regex_pad[0],(SV*) PL_regex_pad[(cPMOPo)->op_pmoffset]); + SvREADONLY_off(PL_regex_pad[(cPMOPo)->op_pmoffset]); SvREPADTMP_on(PL_regex_pad[(cPMOPo)->op_pmoffset]); PM_SETRE(cPMOPo, (cPMOPo)->op_pmoffset); } @@ -3268,10 +3269,10 @@ Perl_newPMOP(pTHX_ I32 type, I32 flags) pmop->op_private = (U8)(0 | (flags >> 8)); if (PL_hints & HINT_RE_TAINT) - pmop->op_pmpermflags |= PMf_RETAINT; + pmop->op_pmflags |= PMf_RETAINT; if (PL_hints & HINT_LOCALE) - pmop->op_pmpermflags |= PMf_LOCALE; - pmop->op_pmflags = pmop->op_pmpermflags; + pmop->op_pmflags |= PMf_LOCALE; + #ifdef USE_ITHREADS if (av_len((AV*) PL_regex_pad[0]) > -1) { @@ -3361,6 +3362,7 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg) STRLEN plen; SV * const pat = ((SVOP*)expr)->op_sv; const char *p = SvPV_const(pat, plen); + U32 pm_flags = pm->op_pmflags & PMf_COMPILETIME; if ((o->op_flags & OPf_SPECIAL) && (plen == 1 && *p == ' ')) { U32 was_readonly = SvREADONLY(pat); @@ -3379,16 +3381,13 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg) SvFLAGS(pat) |= was_readonly; p = SvPV_const(pat, plen); - pm->op_pmflags |= PMf_SKIPWHITE; + pm_flags |= RXf_SKIPWHITE; } if (DO_UTF8(pat)) - pm->op_pmdynflags |= PMdf_UTF8; + pm_flags |= RXf_UTF8; /* FIXME - can we make this function take const char * args? */ - PM_SETRE(pm, CALLREGCOMP((char*)p, (char*)p + plen, pm)); - if (PM_GETRE(pm)->extflags & RXf_WHITE) - pm->op_pmflags |= PMf_WHITE; - else - pm->op_pmflags &= ~PMf_WHITE; + PM_SETRE(pm, CALLREGCOMP((char*)p, (char*)p + plen, pm_flags)); + #ifdef PERL_MAD op_getmad(expr,(OP*)pm,'e'); #else @@ -3481,13 +3480,11 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg) || PM_GETRE(pm)->extflags & RXf_EVAL_SEEN))) { pm->op_pmflags |= PMf_CONST; /* const for long enough */ - pm->op_pmpermflags |= PMf_CONST; /* const for long enough */ prepend_elem(o->op_type, scalar(repl), o); } else { if (curop == repl && !PM_GETRE(pm)) { /* Has variables. */ pm->op_pmflags |= PMf_MAYBE_CONST; - pm->op_pmpermflags |= PMf_MAYBE_CONST; } NewOp(1101, rcop, 1, LOGOP); rcop->op_type = OP_SUBSTCONT; @@ -327,8 +327,6 @@ struct pmop { REGEXP * op_pmregexp; /* compiled expression */ #endif U32 op_pmflags; - U32 op_pmpermflags; - U8 op_pmdynflags; #ifdef USE_ITHREADS char * op_pmstashpv; #else @@ -351,19 +349,18 @@ struct pmop { #define PM_SETRE_SAFE PM_SETRE #endif -#define PMdf_USED 0x01 /* pm has been used once already */ -#define PMdf_TAINTED 0x02 /* pm compiled from tainted pattern */ -#define PMdf_UTF8 0x04 /* pm compiled from utf8 data */ -#define PMdf_DYN_UTF8 0x08 - -#define PMdf_CMP_UTF8 (PMdf_UTF8|PMdf_DYN_UTF8) #define PMf_RETAINT 0x0001 /* taint $1 etc. if target tainted */ -#define PMf_ONCE 0x0002 /* use pattern only once per reset */ +#define PMf_ONCE 0x0002 /* match successfully only once per + reset, with related flag RXf_USED + in re->extflags holding state */ + #define PMf_UNUSED 0x0004 /* free for use */ #define PMf_MAYBE_CONST 0x0008 /* replacement contains variables */ -#define PMf_SKIPWHITE 0x0010 /* skip leading whitespace for split */ -#define PMf_WHITE 0x0020 /* pattern is \s+ */ + +#define PMf_USED 0x0010 /* PMf_ONCE has matched successfully. + Not used under threading. */ + #define PMf_CONST 0x0040 /* subst replacement is constant */ #define PMf_KEEP 0x0080 /* keep 1st runtime pattern forever */ #define PMf_GLOBAL 0x0100 /* pattern had a g modifier */ diff --git a/pod/perlreguts.pod b/pod/perlreguts.pod index d119dfe4f2..c61a9cf793 100644 --- a/pod/perlreguts.pod +++ b/pod/perlreguts.pod @@ -987,7 +987,7 @@ than the default one. Each engine is supposed to provide access to a constant structure of the following format: typedef struct regexp_engine { - regexp* (*comp) (pTHX_ char* exp, char* xend, PMOP* pm); + regexp* (*comp) (pTHX_ char* exp, char* xend, U32 pm_flags); I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags); @@ -1022,22 +1022,28 @@ The routines are as follows: =item comp - regexp* comp(char *exp, char *xend, PMOP pm); + regexp* comp(char *exp, char *xend, U32 pm_flags); Compile the pattern between exp and xend using the flags contained in pm and return a pointer to a prepared regexp structure that can perform -the match. - -The utf8'ness of the string can be found by testing - - pm->op_pmdynflags & PMdf_CMP_UTF8 - -Additional various flags reflecting the modifiers used are contained in - - pm->op_pmflags - -some of these have exact equivelents in re->extflags. See regcomp.h and op.h -for details of these values. +the match. pm flags will have the following flag bits set as determined +by the context that comp() has been called from: + + RXf_UTF8 pattern is encoded in UTF8 + RXf_PMf_LOCALE use locale + RXf_PMf_MULTILINE /m + RXf_PMf_SINGLELINE /s + RXf_PMf_FOLD /i + RXf_PMf_EXTENDED /x + RXf_PMf_KEEPCOPY /k + RXf_SKIPWHITE split ' ' or split with no args + +In general these flags should be preserved in regex->extflags after +compilation, although it is possible the regex includes constructs that +changes them. The perl engine for instance may upgrade non-utf8 strings +to utf8 if the pattern includes constructs such as C<\x{...}> that can only +match unicode values. RXf_SKIPWHITE should always be preserved verbatim +in regex->extflags. =item exec diff --git a/pod/perltoc.pod b/pod/perltoc.pod index 6536974f8c..73e53a1e57 100644 --- a/pod/perltoc.pod +++ b/pod/perltoc.pod @@ -12534,8 +12534,8 @@ children =item B::PMOP Methods -pmreplroot, pmreplstart, pmnext, pmregexp, pmflags, pmdynflags, -pmpermflags, precomp, pmoffset +pmreplroot, pmreplstart, pmnext, pmregexp, pmflags, extflags, +precomp, pmoffset =item B::SVOP METHOD @@ -4567,8 +4567,8 @@ PP(pp_split) DIE(aTHX_ "panic: pp_split"); rx = PM_GETRE(pm); - TAINT_IF((pm->op_pmflags & PMf_LOCALE) && - (pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE))); + TAINT_IF((rx->extflags & RXf_PMf_LOCALE) && + (rx->extflags & (RXf_WHITE | RXf_SKIPWHITE))); RX_MATCH_UTF8_set(rx, do_utf8); @@ -4608,12 +4608,12 @@ PP(pp_split) } base = SP - PL_stack_base; orig = s; - if (pm->op_pmflags & PMf_SKIPWHITE) { + if (rx->extflags & RXf_SKIPWHITE) { if (do_utf8) { while (*s == ' ' || is_utf8_space((U8*)s)) s += UTF8SKIP(s); } - else if (pm->op_pmflags & PMf_LOCALE) { + else if (rx->extflags & RXf_PMf_LOCALE) { while (isSPACE_LC(*s)) s++; } @@ -4622,13 +4622,13 @@ PP(pp_split) s++; } } - if (pm->op_pmflags & PMf_MULTILINE) { + if (rx->extflags & PMf_MULTILINE) { multiline = 1; } if (!limit) limit = maxiters + 2; - if (pm->op_pmflags & PMf_WHITE) { + if (rx->extflags & RXf_WHITE) { while (--limit) { m = s; /* this one uses 'm' and is a negative test */ @@ -4641,7 +4641,7 @@ PP(pp_split) else m += t; } - } else if (pm->op_pmflags & PMf_LOCALE) { + } else if (rx->extflags & RXf_PMf_LOCALE) { while (m < strend && !isSPACE_LC(*m)) ++m; } else { @@ -4668,7 +4668,7 @@ PP(pp_split) if (do_utf8) { while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) )) s += UTF8SKIP(s); - } else if (pm->op_pmflags & PMf_LOCALE) { + } else if (rx->extflags & RXf_PMf_LOCALE) { while (s < strend && isSPACE_LC(*s)) ++s; } else { @@ -76,6 +76,7 @@ PP(pp_regcomp) register PMOP *pm = (PMOP*)cLOGOP->op_other; SV *tmpstr; MAGIC *mg = NULL; + regexp * re; /* prevent recompiling under /o and ithreads. */ #if defined(USE_ITHREADS) @@ -125,14 +126,14 @@ PP(pp_regcomp) else { STRLEN len; const char *t = SvPV_const(tmpstr, len); - regexp * const re = PM_GETRE(pm); + re = PM_GETRE(pm); /* Check against the last compiled regexp. */ if (!re || !re->precomp || re->prelen != (I32)len || memNE(re->precomp, t, len)) { const regexp_engine *eng = re ? re->engine : NULL; - + U32 pm_flags = pm->op_pmflags & PMf_COMPILETIME; if (re) { ReREFCNT_dec(re); PM_SETRE(pm, NULL); /* crucial if regcomp aborts */ @@ -146,50 +147,42 @@ PP(pp_regcomp) if (PL_op->op_flags & OPf_SPECIAL) PL_reginterp_cnt = I32_MAX; /* Mark as safe. */ - pm->op_pmflags = pm->op_pmpermflags; /* reset case sensitivity */ if (DO_UTF8(tmpstr)) - pm->op_pmdynflags |= PMdf_DYN_UTF8; - else { - pm->op_pmdynflags &= ~PMdf_DYN_UTF8; - if (pm->op_pmdynflags & PMdf_UTF8) - t = (char*)bytes_to_utf8((U8*)t, &len); - } + pm_flags |= RXf_UTF8; + if (eng) - PM_SETRE(pm, CALLREGCOMP_ENG(eng,(char *)t, (char *)t + len, pm)); + PM_SETRE(pm, CALLREGCOMP_ENG(eng,(char *)t, (char *)t + len, pm_flags)); else - PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm)); - - if (!DO_UTF8(tmpstr) && (pm->op_pmdynflags & PMdf_UTF8)) - Safefree(t); + PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm_flags)); + PL_reginterp_cnt = 0; /* XXXX Be extra paranoid - needed inside tie/overload accessors. */ } } + + re = PM_GETRE(pm); #ifndef INCOMPLETE_TAINTS if (PL_tainting) { if (PL_tainted) - pm->op_pmdynflags |= PMdf_TAINTED; + re->extflags |= RXf_TAINTED; else - pm->op_pmdynflags &= ~PMdf_TAINTED; + re->extflags &= ~RXf_TAINTED; } #endif if (!PM_GETRE(pm)->prelen && PL_curpm) pm = PL_curpm; - else if (PM_GETRE(pm)->extflags & RXf_WHITE) - pm->op_pmflags |= PMf_WHITE; - else - pm->op_pmflags &= ~PMf_WHITE; - /* XXX runtime compiled output needs to move to the pad */ + +#if !defined(USE_ITHREADS) + /* can't change the optree at runtime either */ + /* PMf_KEEP is handled differently under threads to avoid these problems */ if (pm->op_pmflags & PMf_KEEP) { pm->op_private &= ~OPpRUNTIME; /* no point compiling again */ -#if !defined(USE_ITHREADS) - /* XXX can't change the optree at runtime either */ cLOGOP->op_first->op_next = PL_op->op_next; -#endif } +#endif RETURN; } @@ -1180,9 +1180,10 @@ PP(pp_qr) register PMOP * const pm = cPMOP; SV * const rv = sv_newmortal(); SV * const sv = newSVrv(rv, "Regexp"); - if (pm->op_pmdynflags & PMdf_TAINTED) + regexp *re = PM_GETRE(pm); + if (re->extflags & RXf_TAINTED) SvTAINTED_on(rv); - sv_magic(sv,(SV*)ReREFCNT_inc(PM_GETRE(pm)), PERL_MAGIC_qr,0,0); + sv_magic(sv,(SV*)ReREFCNT_inc(re), PERL_MAGIC_qr,0,0); XPUSHs(rv); RETURN; } @@ -1222,20 +1223,28 @@ PP(pp_match) if (!s) DIE(aTHX_ "panic: pp_match"); strend = s + len; - rxtainted = ((pm->op_pmdynflags & PMdf_TAINTED) || + rxtainted = ((rx->extflags & RXf_TAINTED) || (PL_tainted && (pm->op_pmflags & PMf_RETAINT))); TAINT_NOT; RX_MATCH_UTF8_set(rx, DO_UTF8(TARG)); /* PMdf_USED is set after a ?? matches once */ - if (pm->op_pmdynflags & PMdf_USED) { + if ( +#ifdef USE_ITHREADS + SvREADONLY(PL_regex_pad[pm->op_pmoffset]) +#else + pm->op_pmflags & PMf_USED +#endif + ) { failure: if (gimme == G_ARRAY) RETURN; RETPUSHNO; } + + /* empty pattern special-cased to use last successful pattern if possible */ if (!rx->prelen && PL_curpm) { pm = PL_curpm; @@ -1271,7 +1280,7 @@ PP(pp_match) match. Test for the unsafe vars will fail as well*/ if (( /* !global && */ rx->nparens) || SvTEMP(TARG) || PL_sawampersand || - (pm->op_pmflags & (PMf_EVAL|PMf_KEEPCOPY))) + (rx->extflags & (RXf_EVAL_SEEN|RXf_PMf_KEEPCOPY))) r_flags |= REXEC_COPY_STR; if (SvSCREAM(TARG)) r_flags |= REXEC_SCREAM; @@ -1294,7 +1303,7 @@ play_it_again: goto nope; if ( (rx->extflags & RXf_CHECK_ALL) && !PL_sawampersand - && !(pm->op_pmflags & PMf_KEEPCOPY) + && !(rx->extflags & RXf_PMf_KEEPCOPY) && ((rx->extflags & RXf_NOSCAN) || !((rx->extflags & RXf_INTUIT_TAIL) && (r_flags & REXEC_SCREAM))) @@ -1304,8 +1313,13 @@ play_it_again: if (CALLREGEXEC(rx, (char*)s, (char *)strend, (char*)truebase, minmatch, TARG, INT2PTR(void*, gpos), r_flags)) { PL_curpm = pm; - if (dynpm->op_pmflags & PMf_ONCE) - dynpm->op_pmdynflags |= PMdf_USED; + if (dynpm->op_pmflags & PMf_ONCE) { +#ifdef USE_ITHREADS + SvREADONLY_on(PL_regex_pad[dynpm->op_pmoffset]); +#else + dynpm->op_pmflags |= PMf_USED; +#endif + } goto gotcha; } else @@ -1401,8 +1415,13 @@ yup: /* Confirmed by INTUIT */ RX_MATCH_TAINTED_on(rx); TAINT_IF(RX_MATCH_TAINTED(rx)); PL_curpm = pm; - if (dynpm->op_pmflags & PMf_ONCE) - dynpm->op_pmdynflags |= PMdf_USED; + if (dynpm->op_pmflags & PMf_ONCE) { +#ifdef USE_ITHREADS + SvREADONLY_on(PL_regex_pad[dynpm->op_pmoffset]); +#else + dynpm->op_pmflags |= PMf_USED; +#endif + } if (RX_MATCH_COPIED(rx)) Safefree(rx->subbeg); RX_MATCH_COPIED_off(rx); @@ -1421,7 +1440,7 @@ yup: /* Confirmed by INTUIT */ rx->sublen = strend - truebase; goto gotcha; } - if (PL_sawampersand || pm->op_pmflags & PMf_KEEPCOPY) { + if (PL_sawampersand || rx->extflags & RXf_PMf_KEEPCOPY) { I32 off; #ifdef PERL_OLD_COPY_ON_WRITE if (SvIsCOW(TARG) || (SvFLAGS(TARG) & CAN_COW_MASK) == CAN_COW_FLAGS) { @@ -2035,7 +2054,7 @@ PP(pp_subst) s = SvPV_mutable(TARG, len); if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV) force_on_match = 1; - rxtainted = ((pm->op_pmdynflags & PMdf_TAINTED) || + rxtainted = ((rx->extflags & RXf_TAINTED) || (PL_tainted && (pm->op_pmflags & PMf_RETAINT))); if (PL_tainted) rxtainted |= 2; @@ -2058,7 +2077,7 @@ PP(pp_subst) rx = PM_GETRE(pm); } r_flags = (rx->nparens || SvTEMP(TARG) || PL_sawampersand - || (pm->op_pmflags & (PMf_EVAL|PMf_KEEPCOPY)) ) + || (rx->extflags & (RXf_EVAL_SEEN|RXf_PMf_KEEPCOPY)) ) ? REXEC_COPY_STR : 0; if (SvSCREAM(TARG)) r_flags |= REXEC_SCREAM; @@ -2073,7 +2092,7 @@ PP(pp_subst) /* How to do it in subst? */ /* if ( (rx->extflags & RXf_CHECK_ALL) && !PL_sawampersand - && !(pm->op_pmflags & PMf_KEEPCOPY) + && !(rx->extflags & RXf_KEEPCOPY) && ((rx->extflags & RXf_NOSCAN) || !((rx->extflags & RXf_INTUIT_TAIL) && (r_flags & REXEC_SCREAM)))) @@ -1867,15 +1867,13 @@ PERL_CALLCONV void* Perl_regdupe_internal(pTHX_ const regexp* r, CLONE_PARAMS* p __attribute__nonnull__(pTHX_2); #endif -PERL_CALLCONV regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, PMOP* pm) +PERL_CALLCONV regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, U32 pm_flags) __attribute__nonnull__(pTHX_1) - __attribute__nonnull__(pTHX_2) - __attribute__nonnull__(pTHX_3); + __attribute__nonnull__(pTHX_2); -PERL_CALLCONV regexp* Perl_re_compile(pTHX_ char* exp, char* xend, PMOP* pm) +PERL_CALLCONV regexp* Perl_re_compile(pTHX_ char* exp, char* xend, U32 pm_flags) __attribute__nonnull__(pTHX_1) - __attribute__nonnull__(pTHX_2) - __attribute__nonnull__(pTHX_3); + __attribute__nonnull__(pTHX_2); PERL_CALLCONV char* Perl_re_intuit_start(pTHX_ regexp* prog, SV* sv, char* strpos, char* strend, U32 flags, struct re_scream_pos_data_s *data) __attribute__nonnull__(pTHX_1) @@ -4017,7 +4017,7 @@ extern const struct regexp_engine my_reg_engine; #ifndef PERL_IN_XSUB_RE regexp * -Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) +Perl_pregcomp(pTHX_ char *exp, char *xend, U32 pm_flags) { dVAR; HV * const table = GvHV(PL_hintgv); @@ -4032,15 +4032,15 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n", SvIV(*ptr)); }); - return CALLREGCOMP_ENG(eng, exp, xend, pm); + return CALLREGCOMP_ENG(eng, exp, xend, pm_flags); } } - return Perl_re_compile(aTHX_ exp, xend, pm); + return Perl_re_compile(aTHX_ exp, xend, pm_flags); } #endif regexp * -Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm) +Perl_re_compile(pTHX_ char *exp, char *xend, U32 pm_flags) { dVAR; register regexp *r; @@ -4064,7 +4064,7 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm) if (exp == NULL) FAIL("NULL regexp argument"); - RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; + RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8; DEBUG_COMPILE_r({ SV *dsv= sv_newmortal(); @@ -4076,7 +4076,7 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm) redo_first_pass: RExC_precomp = exp; - RExC_flags = pm->op_pmflags; + RExC_flags = pm_flags; RExC_sawback = 0; RExC_seen = 0; @@ -4171,7 +4171,7 @@ redo_first_pass: r->engine= RE_ENGINE_PTR; r->refcnt = 1; r->prelen = xend - exp; - r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; + r->extflags = pm_flags; { bool has_k = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY); bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD); @@ -4239,7 +4239,7 @@ redo_first_pass: RExC_rxi = ri; /* Second pass: emit code. */ - RExC_flags = pm->op_pmflags; /* don't let top level (?i) bleed */ + RExC_flags = pm_flags; /* don't let top level (?i) bleed */ RExC_parse = exp; RExC_end = xend; RExC_naughty = 0; @@ -4291,8 +4291,9 @@ reStudy: #endif /* Dig out information for optimizations. */ - r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; /* Again? */ - pm->op_pmflags = RExC_flags; + r->extflags = pm_flags; /* Again? */ + /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */ + if (UTF) r->extflags |= RXf_UTF8; /* Unicode in it? */ ri->regstclass = NULL; @@ -3665,12 +3665,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) else { STRLEN len; const char * const t = SvPV_const(ret, len); - PMOP pm; + U32 pm_flags = 0; const I32 osize = PL_regsize; - Zero(&pm, 1, PMOP); - if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8; - re = CALLREGCOMP((char*)t, (char*)t + len, &pm); + if (DO_UTF8(ret)) pm_flags |= RXf_UTF8; + re = CALLREGCOMP((char*)t, (char*)t + len, pm_flags); if (!(SvFLAGS(ret) & (SVs_TEMP | SVs_PADTMP | SVf_READONLY | SVs_GMG))) @@ -112,7 +112,7 @@ typedef struct re_scream_pos_data_s * Any regex engine implementation must be able to build one of these. */ typedef struct regexp_engine { - regexp* (*comp) (pTHX_ char* exp, char* xend, PMOP* pm); + regexp* (*comp) (pTHX_ char* exp, char* xend, U32 pm_flags); I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags); @@ -149,6 +149,7 @@ typedef struct regexp_engine { #define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS) /* Flags indicating special patterns */ +#define RXf_SKIPWHITE 0x00000100 /* Pattern is for a split / / */ #define RXf_START_ONLY 0x00000200 /* Pattern is /^/ */ #define RXf_WHITE 0x00000400 /* Pattern is /\s+/ */ @@ -224,7 +225,8 @@ typedef struct regexp_engine { /* Copy and tainted info */ #define RXf_COPY_DONE 0x10000000 #define RXf_TAINTED_SEEN 0x20000000 -/* two bits here */ +#define RXf_TAINTED 0x80000000 /* this pattern is tainted */ + #define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN) #define RX_MATCH_TAINTED(prog) ((prog)->extflags & RXf_TAINTED_SEEN) @@ -7269,7 +7269,11 @@ Perl_sv_reset(pTHX_ register const char *s, HV *stash) if (mg) { PMOP *pm = (PMOP *) mg->mg_obj; while (pm) { - pm->op_pmdynflags &= ~PMdf_USED; +#ifdef USE_ITHREADS + SvREADONLY_off(PL_regex_pad[pm->op_pmoffset]); +#else + pm->op_pmflags &= ~PMf_USED; +#endif pm = pm->op_pmnext; } } @@ -10860,8 +10860,6 @@ S_scan_pat(pTHX_ char *start, I32 type) "Use of /c modifier is meaningless without /g" ); } - pm->op_pmpermflags = pm->op_pmflags; - PL_lex_op = (OP*)pm; yylval.ival = OP_MATCH; return s; @@ -10962,7 +10960,6 @@ S_scan_subst(pTHX_ char *start) PL_lex_repl = repl; } - pm->op_pmpermflags = pm->op_pmflags; PL_lex_op = (OP*)pm; yylval.ival = OP_SUBST; return s; |