summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bytecode.pl2
-rw-r--r--dump.c18
-rw-r--r--embed.fnc4
-rw-r--r--ext/B/B.pm4
-rw-r--r--ext/B/B.xs20
-rw-r--r--ext/B/B/Asmdata.pm70
-rw-r--r--ext/B/B/Deparse.pm7
-rw-r--r--ext/B/defsubs_h.PL6
-rw-r--r--ext/B/t/concise-xs.t6
-rw-r--r--ext/re/re.xs2
-rw-r--r--op.c21
-rw-r--r--op.h19
-rw-r--r--pod/perlreguts.pod34
-rw-r--r--pod/perltoc.pod4
-rw-r--r--pp.c16
-rw-r--r--pp_ctl.c41
-rw-r--r--pp_hot.c47
-rw-r--r--proto.h10
-rw-r--r--regcomp.c21
-rw-r--r--regexec.c7
-rw-r--r--regexp.h6
-rw-r--r--sv.c6
-rw-r--r--toke.c3
23 files changed, 195 insertions, 179 deletions
diff --git a/bytecode.pl b/bytecode.pl
index 95b5b12322..49ad8f10e1 100644
--- a/bytecode.pl
+++ b/bytecode.pl
@@ -314,8 +314,6 @@ op_pmreplrootgv *(SV**)&cPMOP->op_pmreplroot svindex
#endif
pregcomp PL_op pvcontents x
op_pmflags cPMOP->op_pmflags U16
-op_pmpermflags cPMOP->op_pmpermflags U16
-op_pmdynflags cPMOP->op_pmdynflags U8
op_sv cSVOP->op_sv svindex
op_padix cPADOP->op_padix PADOFFSET
op_pv cPVOP->op_pv pvcontents
diff --git a/dump.c b/dump.c
index 544f9af64b..76aec2bd42 100644
--- a/dump.c
+++ b/dump.c
@@ -558,20 +558,26 @@ S_pm_description(pTHX_ const PMOP *pm)
const REGEXP * regex = PM_GETRE(pm);
const U32 pmflags = pm->op_pmflags;
- if (pm->op_pmdynflags & PMdf_USED)
- sv_catpv(desc, ",USED");
- if (pm->op_pmdynflags & PMdf_TAINTED)
- sv_catpv(desc, ",TAINTED");
-
if (pmflags & PMf_ONCE)
sv_catpv(desc, ",ONCE");
+#ifdef USE_ITHREADS
+ if (SvREADONLY(PL_regex_pad[pm->op_pmoffset]))
+ sv_catpv(desc, ":USED");
+#else
+ if (pmflags & PMf_USED)
+ sv_catpv(desc, ":USED");
+#endif
+ if (regex->extflags & RXf_TAINTED)
+ sv_catpv(desc, ",TAINTED");
+
+
if (regex && regex->check_substr) {
if (!(regex->extflags & RXf_NOSCAN))
sv_catpv(desc, ",SCANFIRST");
if (regex->extflags & RXf_CHECK_ALL)
sv_catpv(desc, ",ALL");
}
- if (pmflags & PMf_SKIPWHITE)
+ if (regex->extflags & RXf_SKIPWHITE)
sv_catpv(desc, ",SKIPWHITE");
if (pmflags & PMf_CONST)
sv_catpv(desc, ",CONST");
diff --git a/embed.fnc b/embed.fnc
index 27fa43db2c..679b443df4 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -683,8 +683,8 @@ Ap |char * |reg_stringify |NN MAGIC *mg|NULLOK STRLEN *lp|NULLOK U32 *flags|NUL
#if defined(USE_ITHREADS)
Ap |void* |regdupe_internal|NN const regexp* r|NN CLONE_PARAMS* param
#endif
-Ap |regexp*|pregcomp |NN char* exp|NN char* xend|NN PMOP* pm
-Ap |regexp*|re_compile |NN char* exp|NN char* xend|NN PMOP* pm
+Ap |regexp*|pregcomp |NN char* exp|NN char* xend|U32 pm_flags
+Ap |regexp*|re_compile |NN char* exp|NN char* xend|U32 pm_flags
Ap |char* |re_intuit_start|NN regexp* prog|NULLOK SV* sv|NN char* strpos \
|NN char* strend|U32 flags \
|NULLOK struct re_scream_pos_data_s *data
diff --git a/ext/B/B.pm b/ext/B/B.pm
index caccf4bfb0..533616987a 100644
--- a/ext/B/B.pm
+++ b/ext/B/B.pm
@@ -1047,9 +1047,7 @@ This returns the op description from the global C PL_op_desc array
=item pmflags
-=item pmdynflags
-
-=item pmpermflags
+=item extflags
=item precomp
diff --git a/ext/B/B.xs b/ext/B/B.xs
index 6fdac03042..12eb6a3309 100644
--- a/ext/B/B.xs
+++ b/ext/B/B.xs
@@ -988,8 +988,6 @@ LISTOP_children(o)
#define PMOP_pmstash(o) o->op_pmstash
#endif
#define PMOP_pmflags(o) o->op_pmflags
-#define PMOP_pmpermflags(o) o->op_pmpermflags
-#define PMOP_pmdynflags(o) o->op_pmdynflags
MODULE = B PACKAGE = B::PMOP PREFIX = PMOP_
@@ -1044,14 +1042,6 @@ U32
PMOP_pmflags(o)
B::PMOP o
-U32
-PMOP_pmpermflags(o)
- B::PMOP o
-
-U8
-PMOP_pmdynflags(o)
- B::PMOP o
-
void
PMOP_precomp(o)
B::PMOP o
@@ -1062,6 +1052,16 @@ PMOP_precomp(o)
if (rx)
sv_setpvn(ST(0), rx->precomp, rx->prelen);
+void
+PMOP_reflags(o)
+ B::PMOP o
+ REGEXP * rx = NO_INIT
+ CODE:
+ ST(0) = sv_newmortal();
+ rx = PM_GETRE(o);
+ if (rx)
+ sv_setuv(ST(0), rx->extflags);
+
#define SVOP_sv(o) cSVOPo->op_sv
#define SVOP_gv(o) ((GV*)cSVOPo->op_sv)
diff --git a/ext/B/B/Asmdata.pm b/ext/B/B/Asmdata.pm
index 1cdbe13cbe..b43f7bb54a 100644
--- a/ext/B/B/Asmdata.pm
+++ b/ext/B/B/Asmdata.pm
@@ -137,42 +137,40 @@ $insn_data{op_pmstash} = [109, \&PUT_svindex, "GET_svindex"];
$insn_data{op_pmreplrootgv} = [110, \&PUT_svindex, "GET_svindex"];
$insn_data{pregcomp} = [111, \&PUT_pvcontents, "GET_pvcontents"];
$insn_data{op_pmflags} = [112, \&PUT_U16, "GET_U16"];
-$insn_data{op_pmpermflags} = [113, \&PUT_U16, "GET_U16"];
-$insn_data{op_pmdynflags} = [114, \&PUT_U8, "GET_U8"];
-$insn_data{op_sv} = [115, \&PUT_svindex, "GET_svindex"];
-$insn_data{op_padix} = [116, \&PUT_PADOFFSET, "GET_PADOFFSET"];
-$insn_data{op_pv} = [117, \&PUT_pvcontents, "GET_pvcontents"];
-$insn_data{op_pv_tr} = [118, \&PUT_op_tr_array, "GET_op_tr_array"];
-$insn_data{op_redoop} = [119, \&PUT_opindex, "GET_opindex"];
-$insn_data{op_nextop} = [120, \&PUT_opindex, "GET_opindex"];
-$insn_data{op_lastop} = [121, \&PUT_opindex, "GET_opindex"];
-$insn_data{cop_label} = [122, \&PUT_pvindex, "GET_pvindex"];
-$insn_data{cop_stashpv} = [123, \&PUT_pvindex, "GET_pvindex"];
-$insn_data{cop_file} = [124, \&PUT_pvindex, "GET_pvindex"];
-$insn_data{cop_stash} = [125, \&PUT_svindex, "GET_svindex"];
-$insn_data{cop_filegv} = [126, \&PUT_svindex, "GET_svindex"];
-$insn_data{cop_seq} = [127, \&PUT_U32, "GET_U32"];
-$insn_data{cop_arybase} = [128, \&PUT_I32, "GET_I32"];
-$insn_data{cop_line} = [129, \&PUT_U32, "GET_U32"];
-$insn_data{cop_warnings} = [130, \&PUT_svindex, "GET_svindex"];
-$insn_data{main_start} = [131, \&PUT_opindex, "GET_opindex"];
-$insn_data{main_root} = [132, \&PUT_opindex, "GET_opindex"];
-$insn_data{main_cv} = [133, \&PUT_svindex, "GET_svindex"];
-$insn_data{curpad} = [134, \&PUT_svindex, "GET_svindex"];
-$insn_data{push_begin} = [135, \&PUT_svindex, "GET_svindex"];
-$insn_data{push_init} = [136, \&PUT_svindex, "GET_svindex"];
-$insn_data{push_end} = [137, \&PUT_svindex, "GET_svindex"];
-$insn_data{curstash} = [138, \&PUT_svindex, "GET_svindex"];
-$insn_data{defstash} = [139, \&PUT_svindex, "GET_svindex"];
-$insn_data{data} = [140, \&PUT_U8, "GET_U8"];
-$insn_data{incav} = [141, \&PUT_svindex, "GET_svindex"];
-$insn_data{load_glob} = [142, \&PUT_svindex, "GET_svindex"];
-$insn_data{regex_padav} = [143, \&PUT_svindex, "GET_svindex"];
-$insn_data{dowarn} = [144, \&PUT_U8, "GET_U8"];
-$insn_data{comppad_name} = [145, \&PUT_svindex, "GET_svindex"];
-$insn_data{xgv_stash} = [146, \&PUT_svindex, "GET_svindex"];
-$insn_data{signal} = [147, \&PUT_strconst, "GET_strconst"];
-$insn_data{formfeed} = [148, \&PUT_svindex, "GET_svindex"];
+$insn_data{op_sv} = [113, \&PUT_svindex, "GET_svindex"];
+$insn_data{op_padix} = [114, \&PUT_PADOFFSET, "GET_PADOFFSET"];
+$insn_data{op_pv} = [115, \&PUT_pvcontents, "GET_pvcontents"];
+$insn_data{op_pv_tr} = [116, \&PUT_op_tr_array, "GET_op_tr_array"];
+$insn_data{op_redoop} = [117, \&PUT_opindex, "GET_opindex"];
+$insn_data{op_nextop} = [118, \&PUT_opindex, "GET_opindex"];
+$insn_data{op_lastop} = [119, \&PUT_opindex, "GET_opindex"];
+$insn_data{cop_label} = [120, \&PUT_pvindex, "GET_pvindex"];
+$insn_data{cop_stashpv} = [121, \&PUT_pvindex, "GET_pvindex"];
+$insn_data{cop_file} = [122, \&PUT_pvindex, "GET_pvindex"];
+$insn_data{cop_stash} = [123, \&PUT_svindex, "GET_svindex"];
+$insn_data{cop_filegv} = [124, \&PUT_svindex, "GET_svindex"];
+$insn_data{cop_seq} = [125, \&PUT_U32, "GET_U32"];
+$insn_data{cop_arybase} = [126, \&PUT_I32, "GET_I32"];
+$insn_data{cop_line} = [127, \&PUT_U32, "GET_U32"];
+$insn_data{cop_warnings} = [128, \&PUT_svindex, "GET_svindex"];
+$insn_data{main_start} = [129, \&PUT_opindex, "GET_opindex"];
+$insn_data{main_root} = [130, \&PUT_opindex, "GET_opindex"];
+$insn_data{main_cv} = [131, \&PUT_svindex, "GET_svindex"];
+$insn_data{curpad} = [132, \&PUT_svindex, "GET_svindex"];
+$insn_data{push_begin} = [133, \&PUT_svindex, "GET_svindex"];
+$insn_data{push_init} = [134, \&PUT_svindex, "GET_svindex"];
+$insn_data{push_end} = [135, \&PUT_svindex, "GET_svindex"];
+$insn_data{curstash} = [136, \&PUT_svindex, "GET_svindex"];
+$insn_data{defstash} = [137, \&PUT_svindex, "GET_svindex"];
+$insn_data{data} = [138, \&PUT_U8, "GET_U8"];
+$insn_data{incav} = [139, \&PUT_svindex, "GET_svindex"];
+$insn_data{load_glob} = [140, \&PUT_svindex, "GET_svindex"];
+$insn_data{regex_padav} = [141, \&PUT_svindex, "GET_svindex"];
+$insn_data{dowarn} = [142, \&PUT_U8, "GET_U8"];
+$insn_data{comppad_name} = [143, \&PUT_svindex, "GET_svindex"];
+$insn_data{xgv_stash} = [144, \&PUT_svindex, "GET_svindex"];
+$insn_data{signal} = [145, \&PUT_strconst, "GET_strconst"];
+$insn_data{formfeed} = [146, \&PUT_svindex, "GET_svindex"];
my ($insn_name, $insn_data);
while (($insn_name, $insn_data) = each %insn_data) {
diff --git a/ext/B/B/Deparse.pm b/ext/B/B/Deparse.pm
index 34339cc231..f663d353a7 100644
--- a/ext/B/B/Deparse.pm
+++ b/ext/B/B/Deparse.pm
@@ -18,8 +18,9 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring
OPpSORT_REVERSE OPpSORT_INPLACE OPpSORT_DESCEND OPpITER_REVERSED
SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR SVf_FAKE SVs_RMG SVs_SMG
CVf_METHOD CVf_LOCKED CVf_LVALUE CVf_ASSERTION
- PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE PMf_SKIPWHITE
- PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED);
+ PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE
+ PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED
+ RXf_SKIPWHITE);
$VERSION = 0.81;
use strict;
use vars qw/$AUTOLOAD/;
@@ -4184,7 +4185,7 @@ sub pp_split {
# handle special case of split(), and split(' ') that compiles to /\s+/
$kid = $op->first;
- if ($kid->flags & OPf_SPECIAL and $kid->pmflags & PMf_SKIPWHITE) {
+ if ( $kid->flags & OPf_SPECIAL and $kid->reflags & RXf_SKIPWHITE ) {
$exprs[0] = "' '";
}
diff --git a/ext/B/defsubs_h.PL b/ext/B/defsubs_h.PL
index eefaa7e63f..8f943c65fc 100644
--- a/ext/B/defsubs_h.PL
+++ b/ext/B/defsubs_h.PL
@@ -67,13 +67,15 @@ if ($] >= 5.009) {
doconst(AVf_REAL);
}
-foreach my $file (qw(op.h cop.h))
+foreach my $tuple (['op.h'],['cop.h'],['regexp.h','RXf_'])
{
+ my $file = $tuple->[0];
+ my $pfx = $tuple->[1] || '';
my $path = $^O eq 'MacOS' ? ":::$file" : "../../$file";
open(OPH,"$path") || die "Cannot open $path:$!";
while (<OPH>)
{
- doconst($1) if (/#define\s+(\w+)\s+([\(\)\|\dx]+)\s*(?:$|\/\*)/);
+ doconst($1) if (/#define\s+($pfx\w+)\s+([\(\)\|\dx]+)\s*(?:$|\/\*)/);
}
close(OPH);
}
diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t
index b19cf59926..76b5df884b 100644
--- a/ext/B/t/concise-xs.t
+++ b/ext/B/t/concise-xs.t
@@ -117,7 +117,7 @@ use Getopt::Std;
use Carp;
use Test::More tests => ( # per-pkg tests (function ct + require_ok)
40 + 16 # Data::Dumper, Digest::MD5
- + 517 + 239 # B::Deparse, B
+ + 517 + 262 # B::Deparse, B
+ 595 + 190 # POSIX, IO::Socket
+ 323 * ($] > 5.009)
+ 17 * ($] >= 5.009003)
@@ -142,7 +142,7 @@ my $testpkgs = {
Data::Dumper => { XS => [qw/ bootstrap Dumpxs /],
dflt => 'perl' },
B => {
- dflt => 'constant', # all but 47/274
+ dflt => 'constant', # all but 47/297
skip => [ 'regex_padav' ], # threaded only
perl => [qw(
walksymtable walkoptree_slow walkoptree_exec
@@ -176,7 +176,7 @@ my $testpkgs = {
OPpSORT_REVERSE OPpTARGET_MY OPpTRANS_COMPLEMENT
OPpTRANS_DELETE OPpTRANS_SQUASH PMf_CONTINUE
PMf_EVAL PMf_EXTENDED PMf_FOLD PMf_GLOBAL PMf_KEEP
- PMf_MULTILINE PMf_ONCE PMf_SINGLELINE PMf_SKIPWHITE
+ PMf_MULTILINE PMf_ONCE PMf_SINGLELINE RXf_SKIPWHITE
POSTFIX SVf_FAKE SVf_IOK SVf_NOK SVf_POK SVf_ROK
SVpad_OUR SVs_RMG SVs_SMG SWAP_CHILDREN OPpPAD_STATE
/],
diff --git a/ext/re/re.xs b/ext/re/re.xs
index 7b3e9fb2e9..5ab5f7c095 100644
--- a/ext/re/re.xs
+++ b/ext/re/re.xs
@@ -11,7 +11,7 @@
START_EXTERN_C
-extern regexp* my_re_compile (pTHX_ char* exp, char* xend, PMOP* pm);
+extern regexp* my_re_compile (pTHX_ char* exp, char* xend, U32 pm_flags);
extern I32 my_regexec (pTHX_ regexp* prog, char* stringarg, char* strend,
char* strbeg, I32 minend, SV* screamer,
void* data, U32 flags);
diff --git a/op.c b/op.c
index 701d660658..537322ae08 100644
--- a/op.c
+++ b/op.c
@@ -611,6 +611,7 @@ clear_pmop:
#ifdef USE_ITHREADS
if(PL_regex_pad) { /* We could be in destruction */
av_push((AV*) PL_regex_pad[0],(SV*) PL_regex_pad[(cPMOPo)->op_pmoffset]);
+ SvREADONLY_off(PL_regex_pad[(cPMOPo)->op_pmoffset]);
SvREPADTMP_on(PL_regex_pad[(cPMOPo)->op_pmoffset]);
PM_SETRE(cPMOPo, (cPMOPo)->op_pmoffset);
}
@@ -3268,10 +3269,10 @@ Perl_newPMOP(pTHX_ I32 type, I32 flags)
pmop->op_private = (U8)(0 | (flags >> 8));
if (PL_hints & HINT_RE_TAINT)
- pmop->op_pmpermflags |= PMf_RETAINT;
+ pmop->op_pmflags |= PMf_RETAINT;
if (PL_hints & HINT_LOCALE)
- pmop->op_pmpermflags |= PMf_LOCALE;
- pmop->op_pmflags = pmop->op_pmpermflags;
+ pmop->op_pmflags |= PMf_LOCALE;
+
#ifdef USE_ITHREADS
if (av_len((AV*) PL_regex_pad[0]) > -1) {
@@ -3361,6 +3362,7 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg)
STRLEN plen;
SV * const pat = ((SVOP*)expr)->op_sv;
const char *p = SvPV_const(pat, plen);
+ U32 pm_flags = pm->op_pmflags & PMf_COMPILETIME;
if ((o->op_flags & OPf_SPECIAL) && (plen == 1 && *p == ' ')) {
U32 was_readonly = SvREADONLY(pat);
@@ -3379,16 +3381,13 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg)
SvFLAGS(pat) |= was_readonly;
p = SvPV_const(pat, plen);
- pm->op_pmflags |= PMf_SKIPWHITE;
+ pm_flags |= RXf_SKIPWHITE;
}
if (DO_UTF8(pat))
- pm->op_pmdynflags |= PMdf_UTF8;
+ pm_flags |= RXf_UTF8;
/* FIXME - can we make this function take const char * args? */
- PM_SETRE(pm, CALLREGCOMP((char*)p, (char*)p + plen, pm));
- if (PM_GETRE(pm)->extflags & RXf_WHITE)
- pm->op_pmflags |= PMf_WHITE;
- else
- pm->op_pmflags &= ~PMf_WHITE;
+ PM_SETRE(pm, CALLREGCOMP((char*)p, (char*)p + plen, pm_flags));
+
#ifdef PERL_MAD
op_getmad(expr,(OP*)pm,'e');
#else
@@ -3481,13 +3480,11 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg)
|| PM_GETRE(pm)->extflags & RXf_EVAL_SEEN)))
{
pm->op_pmflags |= PMf_CONST; /* const for long enough */
- pm->op_pmpermflags |= PMf_CONST; /* const for long enough */
prepend_elem(o->op_type, scalar(repl), o);
}
else {
if (curop == repl && !PM_GETRE(pm)) { /* Has variables. */
pm->op_pmflags |= PMf_MAYBE_CONST;
- pm->op_pmpermflags |= PMf_MAYBE_CONST;
}
NewOp(1101, rcop, 1, LOGOP);
rcop->op_type = OP_SUBSTCONT;
diff --git a/op.h b/op.h
index e7f1b6db54..0586592e6a 100644
--- a/op.h
+++ b/op.h
@@ -327,8 +327,6 @@ struct pmop {
REGEXP * op_pmregexp; /* compiled expression */
#endif
U32 op_pmflags;
- U32 op_pmpermflags;
- U8 op_pmdynflags;
#ifdef USE_ITHREADS
char * op_pmstashpv;
#else
@@ -351,19 +349,18 @@ struct pmop {
#define PM_SETRE_SAFE PM_SETRE
#endif
-#define PMdf_USED 0x01 /* pm has been used once already */
-#define PMdf_TAINTED 0x02 /* pm compiled from tainted pattern */
-#define PMdf_UTF8 0x04 /* pm compiled from utf8 data */
-#define PMdf_DYN_UTF8 0x08
-
-#define PMdf_CMP_UTF8 (PMdf_UTF8|PMdf_DYN_UTF8)
#define PMf_RETAINT 0x0001 /* taint $1 etc. if target tainted */
-#define PMf_ONCE 0x0002 /* use pattern only once per reset */
+#define PMf_ONCE 0x0002 /* match successfully only once per
+ reset, with related flag RXf_USED
+ in re->extflags holding state */
+
#define PMf_UNUSED 0x0004 /* free for use */
#define PMf_MAYBE_CONST 0x0008 /* replacement contains variables */
-#define PMf_SKIPWHITE 0x0010 /* skip leading whitespace for split */
-#define PMf_WHITE 0x0020 /* pattern is \s+ */
+
+#define PMf_USED 0x0010 /* PMf_ONCE has matched successfully.
+ Not used under threading. */
+
#define PMf_CONST 0x0040 /* subst replacement is constant */
#define PMf_KEEP 0x0080 /* keep 1st runtime pattern forever */
#define PMf_GLOBAL 0x0100 /* pattern had a g modifier */
diff --git a/pod/perlreguts.pod b/pod/perlreguts.pod
index d119dfe4f2..c61a9cf793 100644
--- a/pod/perlreguts.pod
+++ b/pod/perlreguts.pod
@@ -987,7 +987,7 @@ than the default one. Each engine is supposed to provide access to
a constant structure of the following format:
typedef struct regexp_engine {
- regexp* (*comp) (pTHX_ char* exp, char* xend, PMOP* pm);
+ regexp* (*comp) (pTHX_ char* exp, char* xend, U32 pm_flags);
I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend,
char* strbeg, I32 minend, SV* screamer,
void* data, U32 flags);
@@ -1022,22 +1022,28 @@ The routines are as follows:
=item comp
- regexp* comp(char *exp, char *xend, PMOP pm);
+ regexp* comp(char *exp, char *xend, U32 pm_flags);
Compile the pattern between exp and xend using the flags contained in
pm and return a pointer to a prepared regexp structure that can perform
-the match.
-
-The utf8'ness of the string can be found by testing
-
- pm->op_pmdynflags & PMdf_CMP_UTF8
-
-Additional various flags reflecting the modifiers used are contained in
-
- pm->op_pmflags
-
-some of these have exact equivelents in re->extflags. See regcomp.h and op.h
-for details of these values.
+the match. pm flags will have the following flag bits set as determined
+by the context that comp() has been called from:
+
+ RXf_UTF8 pattern is encoded in UTF8
+ RXf_PMf_LOCALE use locale
+ RXf_PMf_MULTILINE /m
+ RXf_PMf_SINGLELINE /s
+ RXf_PMf_FOLD /i
+ RXf_PMf_EXTENDED /x
+ RXf_PMf_KEEPCOPY /k
+ RXf_SKIPWHITE split ' ' or split with no args
+
+In general these flags should be preserved in regex->extflags after
+compilation, although it is possible the regex includes constructs that
+changes them. The perl engine for instance may upgrade non-utf8 strings
+to utf8 if the pattern includes constructs such as C<\x{...}> that can only
+match unicode values. RXf_SKIPWHITE should always be preserved verbatim
+in regex->extflags.
=item exec
diff --git a/pod/perltoc.pod b/pod/perltoc.pod
index 6536974f8c..73e53a1e57 100644
--- a/pod/perltoc.pod
+++ b/pod/perltoc.pod
@@ -12534,8 +12534,8 @@ children
=item B::PMOP Methods
-pmreplroot, pmreplstart, pmnext, pmregexp, pmflags, pmdynflags,
-pmpermflags, precomp, pmoffset
+pmreplroot, pmreplstart, pmnext, pmregexp, pmflags, extflags,
+precomp, pmoffset
=item B::SVOP METHOD
diff --git a/pp.c b/pp.c
index b2aa8e6677..bec9933c49 100644
--- a/pp.c
+++ b/pp.c
@@ -4567,8 +4567,8 @@ PP(pp_split)
DIE(aTHX_ "panic: pp_split");
rx = PM_GETRE(pm);
- TAINT_IF((pm->op_pmflags & PMf_LOCALE) &&
- (pm->op_pmflags & (PMf_WHITE | PMf_SKIPWHITE)));
+ TAINT_IF((rx->extflags & RXf_PMf_LOCALE) &&
+ (rx->extflags & (RXf_WHITE | RXf_SKIPWHITE)));
RX_MATCH_UTF8_set(rx, do_utf8);
@@ -4608,12 +4608,12 @@ PP(pp_split)
}
base = SP - PL_stack_base;
orig = s;
- if (pm->op_pmflags & PMf_SKIPWHITE) {
+ if (rx->extflags & RXf_SKIPWHITE) {
if (do_utf8) {
while (*s == ' ' || is_utf8_space((U8*)s))
s += UTF8SKIP(s);
}
- else if (pm->op_pmflags & PMf_LOCALE) {
+ else if (rx->extflags & RXf_PMf_LOCALE) {
while (isSPACE_LC(*s))
s++;
}
@@ -4622,13 +4622,13 @@ PP(pp_split)
s++;
}
}
- if (pm->op_pmflags & PMf_MULTILINE) {
+ if (rx->extflags & PMf_MULTILINE) {
multiline = 1;
}
if (!limit)
limit = maxiters + 2;
- if (pm->op_pmflags & PMf_WHITE) {
+ if (rx->extflags & RXf_WHITE) {
while (--limit) {
m = s;
/* this one uses 'm' and is a negative test */
@@ -4641,7 +4641,7 @@ PP(pp_split)
else
m += t;
}
- } else if (pm->op_pmflags & PMf_LOCALE) {
+ } else if (rx->extflags & RXf_PMf_LOCALE) {
while (m < strend && !isSPACE_LC(*m))
++m;
} else {
@@ -4668,7 +4668,7 @@ PP(pp_split)
if (do_utf8) {
while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
s += UTF8SKIP(s);
- } else if (pm->op_pmflags & PMf_LOCALE) {
+ } else if (rx->extflags & RXf_PMf_LOCALE) {
while (s < strend && isSPACE_LC(*s))
++s;
} else {
diff --git a/pp_ctl.c b/pp_ctl.c
index 3d4992f118..7fd8145be8 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -76,6 +76,7 @@ PP(pp_regcomp)
register PMOP *pm = (PMOP*)cLOGOP->op_other;
SV *tmpstr;
MAGIC *mg = NULL;
+ regexp * re;
/* prevent recompiling under /o and ithreads. */
#if defined(USE_ITHREADS)
@@ -125,14 +126,14 @@ PP(pp_regcomp)
else {
STRLEN len;
const char *t = SvPV_const(tmpstr, len);
- regexp * const re = PM_GETRE(pm);
+ re = PM_GETRE(pm);
/* Check against the last compiled regexp. */
if (!re || !re->precomp || re->prelen != (I32)len ||
memNE(re->precomp, t, len))
{
const regexp_engine *eng = re ? re->engine : NULL;
-
+ U32 pm_flags = pm->op_pmflags & PMf_COMPILETIME;
if (re) {
ReREFCNT_dec(re);
PM_SETRE(pm, NULL); /* crucial if regcomp aborts */
@@ -146,50 +147,42 @@ PP(pp_regcomp)
if (PL_op->op_flags & OPf_SPECIAL)
PL_reginterp_cnt = I32_MAX; /* Mark as safe. */
- pm->op_pmflags = pm->op_pmpermflags; /* reset case sensitivity */
if (DO_UTF8(tmpstr))
- pm->op_pmdynflags |= PMdf_DYN_UTF8;
- else {
- pm->op_pmdynflags &= ~PMdf_DYN_UTF8;
- if (pm->op_pmdynflags & PMdf_UTF8)
- t = (char*)bytes_to_utf8((U8*)t, &len);
- }
+ pm_flags |= RXf_UTF8;
+
if (eng)
- PM_SETRE(pm, CALLREGCOMP_ENG(eng,(char *)t, (char *)t + len, pm));
+ PM_SETRE(pm, CALLREGCOMP_ENG(eng,(char *)t, (char *)t + len, pm_flags));
else
- PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm));
-
- if (!DO_UTF8(tmpstr) && (pm->op_pmdynflags & PMdf_UTF8))
- Safefree(t);
+ PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm_flags));
+
PL_reginterp_cnt = 0; /* XXXX Be extra paranoid - needed
inside tie/overload accessors. */
}
}
+
+ re = PM_GETRE(pm);
#ifndef INCOMPLETE_TAINTS
if (PL_tainting) {
if (PL_tainted)
- pm->op_pmdynflags |= PMdf_TAINTED;
+ re->extflags |= RXf_TAINTED;
else
- pm->op_pmdynflags &= ~PMdf_TAINTED;
+ re->extflags &= ~RXf_TAINTED;
}
#endif
if (!PM_GETRE(pm)->prelen && PL_curpm)
pm = PL_curpm;
- else if (PM_GETRE(pm)->extflags & RXf_WHITE)
- pm->op_pmflags |= PMf_WHITE;
- else
- pm->op_pmflags &= ~PMf_WHITE;
- /* XXX runtime compiled output needs to move to the pad */
+
+#if !defined(USE_ITHREADS)
+ /* can't change the optree at runtime either */
+ /* PMf_KEEP is handled differently under threads to avoid these problems */
if (pm->op_pmflags & PMf_KEEP) {
pm->op_private &= ~OPpRUNTIME; /* no point compiling again */
-#if !defined(USE_ITHREADS)
- /* XXX can't change the optree at runtime either */
cLOGOP->op_first->op_next = PL_op->op_next;
-#endif
}
+#endif
RETURN;
}
diff --git a/pp_hot.c b/pp_hot.c
index 2f2876bac9..9e47946941 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1180,9 +1180,10 @@ PP(pp_qr)
register PMOP * const pm = cPMOP;
SV * const rv = sv_newmortal();
SV * const sv = newSVrv(rv, "Regexp");
- if (pm->op_pmdynflags & PMdf_TAINTED)
+ regexp *re = PM_GETRE(pm);
+ if (re->extflags & RXf_TAINTED)
SvTAINTED_on(rv);
- sv_magic(sv,(SV*)ReREFCNT_inc(PM_GETRE(pm)), PERL_MAGIC_qr,0,0);
+ sv_magic(sv,(SV*)ReREFCNT_inc(re), PERL_MAGIC_qr,0,0);
XPUSHs(rv);
RETURN;
}
@@ -1222,20 +1223,28 @@ PP(pp_match)
if (!s)
DIE(aTHX_ "panic: pp_match");
strend = s + len;
- rxtainted = ((pm->op_pmdynflags & PMdf_TAINTED) ||
+ rxtainted = ((rx->extflags & RXf_TAINTED) ||
(PL_tainted && (pm->op_pmflags & PMf_RETAINT)));
TAINT_NOT;
RX_MATCH_UTF8_set(rx, DO_UTF8(TARG));
/* PMdf_USED is set after a ?? matches once */
- if (pm->op_pmdynflags & PMdf_USED) {
+ if (
+#ifdef USE_ITHREADS
+ SvREADONLY(PL_regex_pad[pm->op_pmoffset])
+#else
+ pm->op_pmflags & PMf_USED
+#endif
+ ) {
failure:
if (gimme == G_ARRAY)
RETURN;
RETPUSHNO;
}
+
+
/* empty pattern special-cased to use last successful pattern if possible */
if (!rx->prelen && PL_curpm) {
pm = PL_curpm;
@@ -1271,7 +1280,7 @@ PP(pp_match)
match. Test for the unsafe vars will fail as well*/
if (( /* !global && */ rx->nparens)
|| SvTEMP(TARG) || PL_sawampersand ||
- (pm->op_pmflags & (PMf_EVAL|PMf_KEEPCOPY)))
+ (rx->extflags & (RXf_EVAL_SEEN|RXf_PMf_KEEPCOPY)))
r_flags |= REXEC_COPY_STR;
if (SvSCREAM(TARG))
r_flags |= REXEC_SCREAM;
@@ -1294,7 +1303,7 @@ play_it_again:
goto nope;
if ( (rx->extflags & RXf_CHECK_ALL)
&& !PL_sawampersand
- && !(pm->op_pmflags & PMf_KEEPCOPY)
+ && !(rx->extflags & RXf_PMf_KEEPCOPY)
&& ((rx->extflags & RXf_NOSCAN)
|| !((rx->extflags & RXf_INTUIT_TAIL)
&& (r_flags & REXEC_SCREAM)))
@@ -1304,8 +1313,13 @@ play_it_again:
if (CALLREGEXEC(rx, (char*)s, (char *)strend, (char*)truebase, minmatch, TARG, INT2PTR(void*, gpos), r_flags))
{
PL_curpm = pm;
- if (dynpm->op_pmflags & PMf_ONCE)
- dynpm->op_pmdynflags |= PMdf_USED;
+ if (dynpm->op_pmflags & PMf_ONCE) {
+#ifdef USE_ITHREADS
+ SvREADONLY_on(PL_regex_pad[dynpm->op_pmoffset]);
+#else
+ dynpm->op_pmflags |= PMf_USED;
+#endif
+ }
goto gotcha;
}
else
@@ -1401,8 +1415,13 @@ yup: /* Confirmed by INTUIT */
RX_MATCH_TAINTED_on(rx);
TAINT_IF(RX_MATCH_TAINTED(rx));
PL_curpm = pm;
- if (dynpm->op_pmflags & PMf_ONCE)
- dynpm->op_pmdynflags |= PMdf_USED;
+ if (dynpm->op_pmflags & PMf_ONCE) {
+#ifdef USE_ITHREADS
+ SvREADONLY_on(PL_regex_pad[dynpm->op_pmoffset]);
+#else
+ dynpm->op_pmflags |= PMf_USED;
+#endif
+ }
if (RX_MATCH_COPIED(rx))
Safefree(rx->subbeg);
RX_MATCH_COPIED_off(rx);
@@ -1421,7 +1440,7 @@ yup: /* Confirmed by INTUIT */
rx->sublen = strend - truebase;
goto gotcha;
}
- if (PL_sawampersand || pm->op_pmflags & PMf_KEEPCOPY) {
+ if (PL_sawampersand || rx->extflags & RXf_PMf_KEEPCOPY) {
I32 off;
#ifdef PERL_OLD_COPY_ON_WRITE
if (SvIsCOW(TARG) || (SvFLAGS(TARG) & CAN_COW_MASK) == CAN_COW_FLAGS) {
@@ -2035,7 +2054,7 @@ PP(pp_subst)
s = SvPV_mutable(TARG, len);
if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV)
force_on_match = 1;
- rxtainted = ((pm->op_pmdynflags & PMdf_TAINTED) ||
+ rxtainted = ((rx->extflags & RXf_TAINTED) ||
(PL_tainted && (pm->op_pmflags & PMf_RETAINT)));
if (PL_tainted)
rxtainted |= 2;
@@ -2058,7 +2077,7 @@ PP(pp_subst)
rx = PM_GETRE(pm);
}
r_flags = (rx->nparens || SvTEMP(TARG) || PL_sawampersand
- || (pm->op_pmflags & (PMf_EVAL|PMf_KEEPCOPY)) )
+ || (rx->extflags & (RXf_EVAL_SEEN|RXf_PMf_KEEPCOPY)) )
? REXEC_COPY_STR : 0;
if (SvSCREAM(TARG))
r_flags |= REXEC_SCREAM;
@@ -2073,7 +2092,7 @@ PP(pp_subst)
/* How to do it in subst? */
/* if ( (rx->extflags & RXf_CHECK_ALL)
&& !PL_sawampersand
- && !(pm->op_pmflags & PMf_KEEPCOPY)
+ && !(rx->extflags & RXf_KEEPCOPY)
&& ((rx->extflags & RXf_NOSCAN)
|| !((rx->extflags & RXf_INTUIT_TAIL)
&& (r_flags & REXEC_SCREAM))))
diff --git a/proto.h b/proto.h
index 4e0832f906..a59cdd4738 100644
--- a/proto.h
+++ b/proto.h
@@ -1867,15 +1867,13 @@ PERL_CALLCONV void* Perl_regdupe_internal(pTHX_ const regexp* r, CLONE_PARAMS* p
__attribute__nonnull__(pTHX_2);
#endif
-PERL_CALLCONV regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, PMOP* pm)
+PERL_CALLCONV regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, U32 pm_flags)
__attribute__nonnull__(pTHX_1)
- __attribute__nonnull__(pTHX_2)
- __attribute__nonnull__(pTHX_3);
+ __attribute__nonnull__(pTHX_2);
-PERL_CALLCONV regexp* Perl_re_compile(pTHX_ char* exp, char* xend, PMOP* pm)
+PERL_CALLCONV regexp* Perl_re_compile(pTHX_ char* exp, char* xend, U32 pm_flags)
__attribute__nonnull__(pTHX_1)
- __attribute__nonnull__(pTHX_2)
- __attribute__nonnull__(pTHX_3);
+ __attribute__nonnull__(pTHX_2);
PERL_CALLCONV char* Perl_re_intuit_start(pTHX_ regexp* prog, SV* sv, char* strpos, char* strend, U32 flags, struct re_scream_pos_data_s *data)
__attribute__nonnull__(pTHX_1)
diff --git a/regcomp.c b/regcomp.c
index 3519c8de00..ae9efbf53a 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4017,7 +4017,7 @@ extern const struct regexp_engine my_reg_engine;
#ifndef PERL_IN_XSUB_RE
regexp *
-Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
+Perl_pregcomp(pTHX_ char *exp, char *xend, U32 pm_flags)
{
dVAR;
HV * const table = GvHV(PL_hintgv);
@@ -4032,15 +4032,15 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
SvIV(*ptr));
});
- return CALLREGCOMP_ENG(eng, exp, xend, pm);
+ return CALLREGCOMP_ENG(eng, exp, xend, pm_flags);
}
}
- return Perl_re_compile(aTHX_ exp, xend, pm);
+ return Perl_re_compile(aTHX_ exp, xend, pm_flags);
}
#endif
regexp *
-Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
+Perl_re_compile(pTHX_ char *exp, char *xend, U32 pm_flags)
{
dVAR;
register regexp *r;
@@ -4064,7 +4064,7 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
if (exp == NULL)
FAIL("NULL regexp argument");
- RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
+ RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8;
DEBUG_COMPILE_r({
SV *dsv= sv_newmortal();
@@ -4076,7 +4076,7 @@ Perl_re_compile(pTHX_ char *exp, char *xend, PMOP *pm)
redo_first_pass:
RExC_precomp = exp;
- RExC_flags = pm->op_pmflags;
+ RExC_flags = pm_flags;
RExC_sawback = 0;
RExC_seen = 0;
@@ -4171,7 +4171,7 @@ redo_first_pass:
r->engine= RE_ENGINE_PTR;
r->refcnt = 1;
r->prelen = xend - exp;
- r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME;
+ r->extflags = pm_flags;
{
bool has_k = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
@@ -4239,7 +4239,7 @@ redo_first_pass:
RExC_rxi = ri;
/* Second pass: emit code. */
- RExC_flags = pm->op_pmflags; /* don't let top level (?i) bleed */
+ RExC_flags = pm_flags; /* don't let top level (?i) bleed */
RExC_parse = exp;
RExC_end = xend;
RExC_naughty = 0;
@@ -4291,8 +4291,9 @@ reStudy:
#endif
/* Dig out information for optimizations. */
- r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; /* Again? */
- pm->op_pmflags = RExC_flags;
+ r->extflags = pm_flags; /* Again? */
+ /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
+
if (UTF)
r->extflags |= RXf_UTF8; /* Unicode in it? */
ri->regstclass = NULL;
diff --git a/regexec.c b/regexec.c
index c65c33b6c8..1eb7ff2859 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3665,12 +3665,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
else {
STRLEN len;
const char * const t = SvPV_const(ret, len);
- PMOP pm;
+ U32 pm_flags = 0;
const I32 osize = PL_regsize;
- Zero(&pm, 1, PMOP);
- if (DO_UTF8(ret)) pm.op_pmdynflags |= PMdf_DYN_UTF8;
- re = CALLREGCOMP((char*)t, (char*)t + len, &pm);
+ if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
+ re = CALLREGCOMP((char*)t, (char*)t + len, pm_flags);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY
| SVs_GMG)))
diff --git a/regexp.h b/regexp.h
index bb73dabb3a..fb723b3e5f 100644
--- a/regexp.h
+++ b/regexp.h
@@ -112,7 +112,7 @@ typedef struct re_scream_pos_data_s
* Any regex engine implementation must be able to build one of these.
*/
typedef struct regexp_engine {
- regexp* (*comp) (pTHX_ char* exp, char* xend, PMOP* pm);
+ regexp* (*comp) (pTHX_ char* exp, char* xend, U32 pm_flags);
I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend,
char* strbeg, I32 minend, SV* screamer,
void* data, U32 flags);
@@ -149,6 +149,7 @@ typedef struct regexp_engine {
#define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS)
/* Flags indicating special patterns */
+#define RXf_SKIPWHITE 0x00000100 /* Pattern is for a split / / */
#define RXf_START_ONLY 0x00000200 /* Pattern is /^/ */
#define RXf_WHITE 0x00000400 /* Pattern is /\s+/ */
@@ -224,7 +225,8 @@ typedef struct regexp_engine {
/* Copy and tainted info */
#define RXf_COPY_DONE 0x10000000
#define RXf_TAINTED_SEEN 0x20000000
-/* two bits here */
+#define RXf_TAINTED 0x80000000 /* this pattern is tainted */
+
#define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN)
#define RX_MATCH_TAINTED(prog) ((prog)->extflags & RXf_TAINTED_SEEN)
diff --git a/sv.c b/sv.c
index 09a1772f88..2d3af25c6e 100644
--- a/sv.c
+++ b/sv.c
@@ -7269,7 +7269,11 @@ Perl_sv_reset(pTHX_ register const char *s, HV *stash)
if (mg) {
PMOP *pm = (PMOP *) mg->mg_obj;
while (pm) {
- pm->op_pmdynflags &= ~PMdf_USED;
+#ifdef USE_ITHREADS
+ SvREADONLY_off(PL_regex_pad[pm->op_pmoffset]);
+#else
+ pm->op_pmflags &= ~PMf_USED;
+#endif
pm = pm->op_pmnext;
}
}
diff --git a/toke.c b/toke.c
index f8e11c961e..63fdbfa470 100644
--- a/toke.c
+++ b/toke.c
@@ -10860,8 +10860,6 @@ S_scan_pat(pTHX_ char *start, I32 type)
"Use of /c modifier is meaningless without /g" );
}
- pm->op_pmpermflags = pm->op_pmflags;
-
PL_lex_op = (OP*)pm;
yylval.ival = OP_MATCH;
return s;
@@ -10962,7 +10960,6 @@ S_scan_subst(pTHX_ char *start)
PL_lex_repl = repl;
}
- pm->op_pmpermflags = pm->op_pmflags;
PL_lex_op = (OP*)pm;
yylval.ival = OP_SUBST;
return s;