summaryrefslogtreecommitdiff
path: root/regexp.h
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2013-05-19 09:38:23 +0100
committerDavid Mitchell <davem@iabyn.com>2013-06-02 22:28:51 +0100
commit8adc0f72b0398cece49d44d4acc0962d03543ea9 (patch)
tree44698d175bd412fed0431a239e971e146cac91c2 /regexp.h
parenta46737de7eb272b1765c352593c3e3627bccdad5 (diff)
downloadperl-8adc0f72b0398cece49d44d4acc0962d03543ea9.tar.gz
add regmatch_eval_state struct
Replace several PL_reg* vars with a new struct. This is part of the goal of removing all global regex state. These particular vars are used in the case of a regex with (?{}) code blocks. In this case, when the code in a block is called, various bits of state (such as $1, pos($_)) are temporarily set up, even though the match has not yet completed. This involves updating the current PL_curpm to point to a fake PMOP which points to the regex currently being executed. That regex has all its current fields that are associated with captures (such as subbeg) temporarily saved and overwritten with the current partial match results. Similarly, $_ is temporarily aliased to the current match string, and any old pos() position is saved. This saving was formerly done to the various PL_reg* vars. When the regex has finished executing (or if the code block croaks), its fields are restored to the original values. Since this can happen in a croak, it may be done using SAVEDESTRUCTOR_X() on the save stack. This precludes just moving the PL_reg* vars into the regmatch_info struct, since that is just allocated as a local var in regexec_flags(), and would have already been abandoned and possibly overwritten after the croak and longjmp, but before the SAVEDESTRUCTOR_X() action is taken. So instead we put all the vars into new struct, and malloc that on entry to the regex engine when we know we need to copy the various fields. We save a pointer to that in the regmatch_info struct, as well as passing it to SAVEDESTRUCTOR_X(). The destructor may get called up to twice in the non-croak case: first it's called explicitly at the end of regexec_flags(), which restores subbeg etc; then again from the savestack, which just free()s the struct. In the croak case, it's called just once, and does both the restoring and the freeing. The vars / PL_reg_state fields this commit eliminates are: re_state_eval_setup_done PL_reg_oldsaved PL_reg_oldsavedlen PL_reg_oldsavedoffset PL_reg_oldsavedcoffset PL_reg_magic PL_reg_oldpos PL_nrs PL_reg_oldcurpm
Diffstat (limited to 'regexp.h')
-rw-r--r--regexp.h43
1 files changed, 24 insertions, 19 deletions
diff --git a/regexp.h b/regexp.h
index d1f7a01073..6ec6214d61 100644
--- a/regexp.h
+++ b/regexp.h
@@ -576,6 +576,29 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define FBMrf_MULTILINE 1
+
+/* saved state when executing a regex that contains code blocks.
+ * These need restoring at the end of the match, or on croak().
+ * These fields can't be stored in regmatch_info since the latter is
+ * allocated directly on the stack in regexec_flags(), and they need to
+ * exist during a croak() after the stack has been unwound */
+
+typedef struct {
+ regexp *rex;
+ PMOP *curpm; /* saved PL_curpm */
+#ifdef PERL_ANY_COW
+ SV *saved_copy; /* saved saved_copy field from rex */
+#endif
+ char *subbeg; /* saved subbeg field from rex */
+ STRLEN sublen; /* saved sublen field from rex */
+ STRLEN suboffset; /* saved suboffset field from rex */
+ STRLEN subcoffset; /* saved subcoffset field from rex */
+ MAGIC *pos_magic; /* pos() magic attached to $_ */
+ I32 pos; /* the original value of pos() in pos_magic */
+ bool restored; /* we have already undone the save */
+ bool direct; /* we are calling the destructor directly */
+} regmatch_eval_state;
+
/* some basic information about the current match that is created by
* Perl_regexec_flags and then passed to regtry(), regmatch() etc */
@@ -587,6 +610,7 @@ typedef struct {
SV *sv;
char *ganch;
char *cutpoint;
+ regmatch_eval_state *eval_state; /* extra saved state for (?{}) */
bool intuit; /* re_intuit_start() is the top-level caller */
bool is_utf8_pat;
bool warned; /* we have issued a recursion warning; no need for more */
@@ -763,41 +787,22 @@ typedef struct regmatch_slab {
} regmatch_slab;
#define PL_reg_match_utf8 PL_reg_state.re_state_reg_match_utf8
-#define PL_reg_magic PL_reg_state.re_state_reg_magic
-#define PL_reg_oldpos PL_reg_state.re_state_reg_oldpos
-#define PL_reg_oldcurpm PL_reg_state.re_state_reg_oldcurpm
#define PL_reg_curpm PL_reg_state.re_state_reg_curpm
-#define PL_reg_oldsaved PL_reg_state.re_state_reg_oldsaved
-#define PL_reg_oldsavedlen PL_reg_state.re_state_reg_oldsavedlen
-#define PL_reg_oldsavedoffset PL_reg_state.re_state_reg_oldsavedoffset
-#define PL_reg_oldsavedcoffset PL_reg_state.re_state_reg_oldsavedcoffset
#define PL_reg_maxiter PL_reg_state.re_state_reg_maxiter
#define PL_reg_leftiter PL_reg_state.re_state_reg_leftiter
#define PL_reg_poscache PL_reg_state.re_state_reg_poscache
#define PL_reg_poscache_size PL_reg_state.re_state_reg_poscache_size
#define PL_reg_starttry PL_reg_state.re_state_reg_starttry
-#define PL_nrs PL_reg_state.re_state_nrs
struct re_save_state {
- bool re_state_eval_setup_done; /* from regexec.c */
bool re_state_reg_match_utf8; /* from regexec.c */
/* Space for U8 */
- I32 re_state_reg_oldpos; /* from regexec.c */
I32 re_state_reg_maxiter; /* max wait until caching pos */
I32 re_state_reg_leftiter; /* wait until caching pos */
- MAGIC *re_state_reg_magic; /* from regexec.c */
- PMOP *re_state_reg_oldcurpm; /* from regexec.c */
PMOP *re_state_reg_curpm; /* from regexec.c */
- char *re_state_reg_oldsaved; /* old saved substr during match */
- STRLEN re_state_reg_oldsavedlen; /* old length of saved substr during match */
- STRLEN re_state_reg_oldsavedoffset; /* old offset of saved substr during match */
- STRLEN re_state_reg_oldsavedcoffset;/* old coffset of saved substr during match */
STRLEN re_state_reg_poscache_size; /* size of pos cache of WHILEM */
char *re_state_reg_poscache; /* cache of pos of WHILEM */
char *re_state_reg_starttry; /* from regexec.c */
-#ifdef PERL_ANY_COW
- SV *re_state_nrs; /* was placeholder: unused since 5.8.0 (5.7.2 patch #12027 for bug ID 20010815.012). Used to save rx->saved_copy */
-#endif
};
#define SAVESTACK_ALLOC_FOR_RE_SAVE_STATE \