diff options
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | ext/re/re.pm | 10 | ||||
-rw-r--r-- | regcomp.h | 4 | ||||
-rw-r--r-- | regexec.c | 23 | ||||
-rw-r--r-- | t/op/regexp_email.t | 94 |
5 files changed, 118 insertions, 14 deletions
@@ -3616,6 +3616,7 @@ t/op/regexp_qr_embed.t See if regular expressions work with embedded qr// t/op/regexp_qr.t See if regular expressions work as qr// t/op/regexp.t See if regular expressions work t/op/regexp_trielist.t See if regular expressions work with trie optimisation +t/op/regexp_email.t See if regex recursion works by parsing email addresses t/op/regmesg.t See if one can get regular expression errors t/op/repeat.t See if x operator works t/op/re_tests Regular expressions for regexp.t diff --git a/ext/re/re.pm b/ext/re/re.pm index 4f8d4105a8..c33ca3c522 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -67,8 +67,9 @@ my %flags = ( STATE => 0x080000, OPTIMISEM => 0x100000, STACK => 0x280000, + BUFFERS => 0x400000, ); -$flags{ALL} = -1; +$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS}); $flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE}; $flags{Extra} = $flags{EXECUTE} | $flags{COMPILE}; $flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE}; @@ -323,6 +324,11 @@ Enable debugging of start point optimisations. Turns on all "extra" debugging options. +=item BUFFERS + +Enable debugging the capture buffer storage during match. Warning, +this can potentially produce extremely large output. + =item TRIEM Enable enhanced TRIE debugging. Enhances both TRIEE @@ -373,7 +379,7 @@ These are useful shortcuts to save on the typing. =item ALL -Enable all compile and execute options at once. +Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS =item All @@ -695,6 +695,7 @@ re.pm, especially to the documentation. #define RE_DEBUG_EXTRA_OFFDEBUG 0x040000 #define RE_DEBUG_EXTRA_STATE 0x080000 #define RE_DEBUG_EXTRA_OPTIMISE 0x100000 +#define RE_DEBUG_EXTRA_BUFFERS 0x400000 /* combined */ #define RE_DEBUG_EXTRA_STACK 0x280000 @@ -732,6 +733,9 @@ re.pm, especially to the documentation. if (re_debug_flags & RE_DEBUG_EXTRA_STATE) x ) #define DEBUG_STACK_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_EXTRA_STACK) x ) +#define DEBUG_BUFFERS_r(x) DEBUG_r( \ + if (re_debug_flags & RE_DEBUG_EXTRA_BUFFERS) x ) + #define DEBUG_OPTIMISE_MORE_r(x) DEBUG_r( \ if ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \ (re_debug_flags & (RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE)) ) x ) @@ -193,7 +193,7 @@ S_regcppush(pTHX_ I32 parenfloor) SSPUSHINT(PL_regstartp[p]); SSPUSHPTR(PL_reg_start_tmp[p]); SSPUSHINT(p); - DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, + DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n", (UV)p, (IV)PL_regstartp[p], (IV)(PL_reg_start_tmp[p] - PL_bostr), @@ -263,7 +263,7 @@ S_regcppop(pTHX_ const regexp *rex) tmps = SSPOPINT; if (paren <= *PL_reglastparen) PL_regendp[paren] = tmps; - DEBUG_EXECUTE_r( + DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log, " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n", (UV)paren, (IV)PL_regstartp[paren], @@ -272,7 +272,7 @@ S_regcppop(pTHX_ const regexp *rex) (paren > *PL_reglastparen ? "(no)" : "")); ); } - DEBUG_EXECUTE_r( + DEBUG_BUFFERS_r( if (*PL_reglastparen + 1 <= rex->nparens) { PerlIO_printf(Perl_debug_log, " restoring \\%"IVdf"..\\%"IVdf" to undef\n", @@ -3568,8 +3568,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) regnode *startpoint; case GOSTART: - case GOSUB: /* /(...(?1))/ */ - if (cur_eval && cur_eval->locinput==locinput) { + case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */ + if (cur_eval && cur_eval->locinput==locinput) { if (cur_eval->u.eval.close_paren == (U32)ARG(scan)) Perl_croak(aTHX_ "Infinite recursion in regex"); if ( ++nochange_depth > max_nochange_depth ) @@ -3742,7 +3742,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) cur_curlyx = ST.prev_curlyx; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; - if ( nochange_depth > 0 ); + if ( nochange_depth ) nochange_depth--; sayYES; @@ -3760,7 +3760,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) cur_curlyx = ST.prev_curlyx; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; - if ( nochange_depth > 0 ); + if ( nochange_depth ) nochange_depth--; sayNO_SILENT; #undef ST @@ -4755,8 +4755,6 @@ NULL if (cur_eval) { /* we've just finished A in /(??{A})B/; now continue with B */ I32 tmpix; - - st->u.eval.toggle_reg_flags = cur_eval->u.eval.toggle_reg_flags; PL_reg_flags ^= st->u.eval.toggle_reg_flags; @@ -4782,9 +4780,10 @@ NULL DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n", REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval));); - if ( nochange_depth > 0 ); - nochange_depth++; - PUSH_YES_STATE_GOTO(EVAL_AB, + if ( nochange_depth ) + nochange_depth--; + + PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B); /* match B */ } diff --git a/t/op/regexp_email.t b/t/op/regexp_email.t new file mode 100644 index 0000000000..c53dd82860 --- /dev/null +++ b/t/op/regexp_email.t @@ -0,0 +1,94 @@ +#!./perl +# +# Tests to make sure the regexp engine doesn't run into limits too soon. +# + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +print "1..13\n"; + +my $email = qr { + (?(DEFINE) + (?<address> (?&mailbox) | (?&group)) + (?<mailbox> (?&name_addr) | (?&addr_spec)) + (?<name_addr> (?&display_name)? (?&angle_addr)) + (?<angle_addr> (?&CFWS)? < (?&addr_spec) > (?&CFWS)?) + (?<group> (?&display_name) : (?:(?&mailbox_list) | (?&CFWS))? ; + (?&CFWS)?) + (?<display_name> (?&phrase)) + (?<mailbox_list> (?&mailbox) (?: , (?&mailbox))*) + + (?<addr_spec> (?&local_part) \@ (?&domain)) + (?<local_part> (?&dot_atom) | (?"ed_string)) + (?<domain> (?&dot_atom) | (?&domain_literal)) + (?<domain_literal> (?&CFWS)? \[ (?: (?&FWS)? (?&dcontent))* (?&FWS)? + \] (?&CFWS)?) + (?<dcontent> (?&dtext) | (?"ed_pair)) + (?<dtext> (?&NO_WS_CTL) | [\x21-\x5a\x5e-\x7e]) + + (?<atext> (?&ALPHA) | (?&DIGIT) | [!#\$%&'*+-/=?^_`{|}~]) + (?<atom> (?&CFWS)? (?&atext)+ (?&CFWS)?) + (?<dot_atom> (?&CFWS)? (?&dot_atom_text) (?&CFWS)?) + (?<dot_atom_text> (?&atext)+ (?: \. (?&atext)+)*) + + (?<text> [\x01-\x09\x0b\x0c\x0e-\x7f]) + (?<quoted_pair> \\ (?&text)) + + (?<qtext> (?&NO_WS_CTL) | [\x21\x23-\x5b\x5d-\x7e]) + (?<qcontent> (?&qtext) | (?"ed_pair)) + (?<quoted_string> (?&CFWS)? (?&DQUOTE) (?:(?&FWS)? (?&qcontent))* + (?&FWS)? (?&DQUOTE) (?&CFWS)?) + + (?<word> (?&atom) | (?"ed_string)) + (?<phrase> (?&word)+) + + # Folding white space + (?<FWS> (?: (?&WSP)* (?&CRLF))? (?&WSP)+) + (?<ctext> (?&NO_WS_CTL) | [\x21-\x27\x2a-\x5b\x5d-\x7e]) + (?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment)) + (?<comment> \( (?: (?&FWS)? (?&ccontent))* (?&FWS)? \) ) + (?<CFWS> (?: (?&FWS)? (?&comment))* + (?: (?:(?&FWS)? (?&comment)) | (?&FWS))) + + # No whitespace control + (?<NO_WS_CTL> [\x01-\x08\x0b\x0c\x0e-\x1f\x7f]) + + (?<ALPHA> [A-Za-z]) + (?<DIGIT> [0-9]) + (?<CRLF> \x0d \x0a) + (?<DQUOTE> ") + (?<WSP> [\x20\x09]) + ) + + (?&address) +}x; + +my $count = 0; + +$| = 1; +while (<DATA>) { + chomp; + next if /^#/; + print /^$email$/ ? "ok " : "not ok ", ++ $count, "\n"; +} + +# +# Acme::MetaSyntactic ++ +# +__DATA__ +Jeff_Tracy@thunderbirds.org +"Lady Penelope"@thunderbirds.org +"The\ Hood"@thunderbirds.org +fred @ flintstones.net +barney (rubble) @ flintstones.org +bammbamm (bam! bam! (bam! bam! (bam!)) bam!) @ flintstones.org +Michelangelo@[127.0.0.1] +Donatello @ [127.0.0.1] +Raphael (He as well) @ [127.0.0.1] +"Leonardo" @ [127.0.0.1] +Barbapapa <barbapapa @ barbapapa.net> +"Barba Mama" <barbamama @ [127.0.0.1]> +Barbalala (lalalalalalalala) <barbalala (Yes, her!) @ (barba) barbapapa.net> |