summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Greer <greerga@m-l.org>2009-07-12 14:53:29 -0400
committerYves Orton <demerphq@gemini.(none)>2009-07-26 23:28:59 +0200
commite9105d30edfbaa7f444bc7984c9bafc8e991ad12 (patch)
tree02f4a68781026819995c672ba8325caee5661d7f
parentbd54d59dd6a665482e3984318596e9a54427f810 (diff)
downloadperl-e9105d30edfbaa7f444bc7984c9bafc8e991ad12.tar.gz
much better swap logic to support reentrancy and fix assert failure
Commit c74340f9 added backreferences as well as the idea of a ->swap regex pointer to keep track of the match offsets in case of backtracking. The problem is that when Perl re-enters the regex engine to handle utf8::SWASHNEW, the ->swap is not saved/restored/cleared so any capture from the utf8 (Perl) code could inadvertently modify the regex match data that caused the utf8 swash to get built. This change should close out RT #60508
-rw-r--r--embed.fnc1
-rw-r--r--embed.h2
-rw-r--r--ext/Devel-PPPort/parts/embed.fnc1
-rw-r--r--pod/perlreapi.pod2
-rw-r--r--pod/perlreguts.pod13
-rw-r--r--proto.h5
-rw-r--r--regcomp.c2
-rw-r--r--regexec.c46
-rw-r--r--regexp.h2
-rw-r--r--t/op/pat.t20
10 files changed, 44 insertions, 50 deletions
diff --git a/embed.fnc b/embed.fnc
index 0001d1fe76..4dc2706ac5 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1678,7 +1678,6 @@ ERsn |U8* |reghop4 |NN U8 *s|I32 off|NN const U8 *llim \
#endif
ERsn |U8* |reghopmaybe3 |NN U8 *s|I32 off|NN const U8 *lim
ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK regmatch_info *reginfo
-Es |void |swap_match_buff|NN regexp * prog
Es |void |to_utf8_substr |NN regexp * prog
Es |void |to_byte_substr |NN regexp * prog
ERs |I32 |reg_check_named_buff_matched |NN const regexp *rex \
diff --git a/embed.h b/embed.h
index e702d32a47..0ef5ff2e54 100644
--- a/embed.h
+++ b/embed.h
@@ -1468,7 +1468,6 @@
#if defined(PERL_CORE) || defined(PERL_EXT)
#define reghopmaybe3 S_reghopmaybe3
#define find_byclass S_find_byclass
-#define swap_match_buff S_swap_match_buff
#define to_utf8_substr S_to_utf8_substr
#define to_byte_substr S_to_byte_substr
#define reg_check_named_buff_matched S_reg_check_named_buff_matched
@@ -3814,7 +3813,6 @@
#if defined(PERL_CORE) || defined(PERL_EXT)
#define reghopmaybe3 S_reghopmaybe3
#define find_byclass(a,b,c,d,e) S_find_byclass(aTHX_ a,b,c,d,e)
-#define swap_match_buff(a) S_swap_match_buff(aTHX_ a)
#define to_utf8_substr(a) S_to_utf8_substr(aTHX_ a)
#define to_byte_substr(a) S_to_byte_substr(aTHX_ a)
#define reg_check_named_buff_matched(a,b) S_reg_check_named_buff_matched(aTHX_ a,b)
diff --git a/ext/Devel-PPPort/parts/embed.fnc b/ext/Devel-PPPort/parts/embed.fnc
index 68f38171c7..48cb9f3973 100644
--- a/ext/Devel-PPPort/parts/embed.fnc
+++ b/ext/Devel-PPPort/parts/embed.fnc
@@ -1677,7 +1677,6 @@ ERsn |U8* |reghop4 |NN U8 *s|I32 off|NN const U8 *llim \
#endif
ERsn |U8* |reghopmaybe3 |NN U8 *s|I32 off|NN const U8 *lim
ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK regmatch_info *reginfo
-Es |void |swap_match_buff|NN regexp * prog
Es |void |to_utf8_substr |NN regexp * prog
Es |void |to_byte_substr |NN regexp * prog
ERs |I32 |reg_check_named_buff_matched |NN const regexp *rex \
diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod
index b0d6275810..03996fd51e 100644
--- a/pod/perlreapi.pod
+++ b/pod/perlreapi.pod
@@ -598,7 +598,7 @@ engine should use something else.
=head2 C<swap>
-TODO: document
+Unused. Left in for compatibility with perl 5.10.0.
=head2 C<offs>
diff --git a/pod/perlreguts.pod b/pod/perlreguts.pod
index 204993165c..9c54ec4aac 100644
--- a/pod/perlreguts.pod
+++ b/pod/perlreguts.pod
@@ -810,13 +810,12 @@ value to other engine implementations.
=item C<swap>
-C<swap> is an extra set of startp/endp stored in a C<regexp_paren_ofs>
-struct. This is used when the last successful match was from the same pattern
-as the current pattern, so that a partial match doesn't overwrite the
-previous match's results. When this field is data filled the matching
-engine will swap buffers before every match attempt. If the match fails,
-then it swaps them back. If it's successful it leaves them. This field
-is populated on demand and is by default null.
+C<swap> formerly was an extra set of startp/endp stored in a
+C<regexp_paren_ofs> struct. This was used when the last successful match
+was from the same pattern as the current pattern, so that a partial
+match didn't overwrite the previous match's results, but it caused a
+problem with re-entrant code such as trying to build the UTF-8 swashes.
+Currently unused and left for backward compatibility with 5.10.0.
=item C<offsets>
diff --git a/proto.h b/proto.h
index b2e9e90a14..5a08e3e548 100644
--- a/proto.h
+++ b/proto.h
@@ -5434,11 +5434,6 @@ STATIC char* S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, cons
#define PERL_ARGS_ASSERT_FIND_BYCLASS \
assert(prog); assert(c); assert(s); assert(strend)
-STATIC void S_swap_match_buff(pTHX_ regexp * prog)
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_SWAP_MATCH_BUFF \
- assert(prog)
-
STATIC void S_to_utf8_substr(pTHX_ regexp * prog)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_TO_UTF8_SUBSTR \
diff --git a/regcomp.c b/regcomp.c
index 22c02c82ad..696796b796 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9414,7 +9414,6 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
if (r->saved_copy)
SvREFCNT_dec(r->saved_copy);
#endif
- Safefree(r->swap);
Safefree(r->offs);
}
@@ -9474,7 +9473,6 @@ Perl_reg_temp_copy (pTHX_ REGEXP *rx)
ret->saved_copy = NULL;
#endif
ret->mother_re = rx;
- ret->swap = NULL;
return ret_x;
}
diff --git a/regexec.c b/regexec.c
index d3dd61230b..5d31d735ff 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1734,28 +1734,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
return s;
}
-static void
-S_swap_match_buff (pTHX_ regexp *prog)
-{
- regexp_paren_pair *t;
-
- PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
-
- if (!prog->swap) {
- /* We have to be careful. If the previous successful match
- was from this regex we don't want a subsequent paritally
- successful match to clobber the old results.
- So when we detect this possibility we add a swap buffer
- to the re, and switch the buffer each match. If we fail
- we switch it back, otherwise we leave it swapped.
- */
- Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
- }
- t = prog->swap;
- prog->swap = prog->offs;
- prog->offs = t;
-}
-
/*
- regexec_flags - match a regexp against a string
@@ -1785,7 +1763,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
I32 multiline;
RXi_GET_DECL(prog,progi);
regmatch_info reginfo; /* create some info to pass to regtry etc */
- bool swap_on_fail = 0;
+ regexp_paren_pair *swap = NULL;
GET_RE_DEBUG_FLAGS_DECL;
PERL_ARGS_ASSERT_REGEXEC_FLAGS;
@@ -1863,9 +1841,16 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
reginfo.ganch = strbeg;
}
if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
- swap_on_fail = 1;
- swap_match_buff(prog); /* do we need a save destructor here for
- eval dies? */
+ /* We have to be careful. If the previous successful match
+ was from this regex we don't want a subsequent partially
+ successful match to clobber the old results.
+ So when we detect this possibility we add a swap buffer
+ to the re, and switch the buffer each match. If we fail
+ we switch it back, otherwise we leave it swapped.
+ */
+ swap = prog->offs;
+ /* do we need a save destructor here for eval dies? */
+ Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
}
if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
re_scream_pos_data d;
@@ -2164,6 +2149,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
goto phooey;
got_it:
+ Safefree(swap);
RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
if (PL_reg_eval_set)
@@ -2209,10 +2195,12 @@ phooey:
PL_colors[4], PL_colors[5]));
if (PL_reg_eval_set)
restore_pos(aTHX_ prog);
- if (swap_on_fail)
+ if (swap) {
/* we failed :-( roll it back */
- swap_match_buff(prog);
-
+ Safefree(prog->offs);
+ prog->offs = swap;
+ }
+
return 0;
}
diff --git a/regexp.h b/regexp.h
index fe671425d8..90e3406a96 100644
--- a/regexp.h
+++ b/regexp.h
@@ -88,7 +88,7 @@ typedef struct regexp_paren_pair {
/* during matching */ \
U32 lastparen; /* last open paren matched */ \
U32 lastcloseparen; /* last close paren matched */ \
- regexp_paren_pair *swap; /* Swap copy of *offs */ \
+ regexp_paren_pair *swap; /* Unused: 5.10.1 and later */ \
/* Array of offsets for (@-) and (@+) */ \
regexp_paren_pair *offs; \
/* saved or original string so \digit works forever. */ \
diff --git a/t/op/pat.t b/t/op/pat.t
index 87a05dc676..00c54902f4 100644
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -13,7 +13,7 @@ sub run_tests;
$| = 1;
-my $EXPECTED_TESTS = 4065; # Update this when adding/deleting tests.
+my $EXPECTED_TESTS = 4066; # Update this when adding/deleting tests.
BEGIN {
chdir 't' if -d 't';
@@ -4347,6 +4347,24 @@ sub run_tests {
}
}
+ # This only works under -DEBUGGING because it relies on an assert().
+ {
+ local $BugId = '60508';
+ local $Message = "Check capture offset re-entrancy of utf8 code.";
+
+ sub fswash { $_[0] =~ s/([>X])//g; }
+
+ my $k1 = "." x 4 . ">>";
+ fswash($k1);
+
+ my $k2 = "\x{f1}\x{2022}";
+ $k2 =~ s/([\360-\362])/>/g;
+ fswash($k2);
+
+ iseq($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks");
+ }
+
+
{
local $BugId = 65372; # minimal CURLYM limited to 32767 matches
my @pat = (