summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2007-02-28 18:45:33 +0100
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2007-03-01 10:54:09 +0000
commite7707071e420c5a715c0621d0428dd393503e884 (patch)
treeb100855c405f6d78baace15a380385052daaf17d
parent88431378e1356f3c89a703791b4d6c396872635a (diff)
downloadperl-e7707071e420c5a715c0621d0428dd393503e884.tar.gz
Re: New file: t/op/regexp_email.t
Message-ID: <9b18b3110702280845p7860ca08taf1aead39a178aa4@mail.gmail.com> p4raw-id: //depot/perl@30436
-rw-r--r--MANIFEST1
-rw-r--r--ext/re/re.pm10
-rw-r--r--regcomp.h4
-rw-r--r--regexec.c23
-rw-r--r--t/op/regexp_email.t94
5 files changed, 118 insertions, 14 deletions
diff --git a/MANIFEST b/MANIFEST
index 02db239151..b70220bec3 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3616,6 +3616,7 @@ t/op/regexp_qr_embed.t See if regular expressions work with embedded qr//
t/op/regexp_qr.t See if regular expressions work as qr//
t/op/regexp.t See if regular expressions work
t/op/regexp_trielist.t See if regular expressions work with trie optimisation
+t/op/regexp_email.t See if regex recursion works by parsing email addresses
t/op/regmesg.t See if one can get regular expression errors
t/op/repeat.t See if x operator works
t/op/re_tests Regular expressions for regexp.t
diff --git a/ext/re/re.pm b/ext/re/re.pm
index 4f8d4105a8..c33ca3c522 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -67,8 +67,9 @@ my %flags = (
STATE => 0x080000,
OPTIMISEM => 0x100000,
STACK => 0x280000,
+ BUFFERS => 0x400000,
);
-$flags{ALL} = -1;
+$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS});
$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE};
$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE};
$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE};
@@ -323,6 +324,11 @@ Enable debugging of start point optimisations.
Turns on all "extra" debugging options.
+=item BUFFERS
+
+Enable debugging the capture buffer storage during match. Warning,
+this can potentially produce extremely large output.
+
=item TRIEM
Enable enhanced TRIE debugging. Enhances both TRIEE
@@ -373,7 +379,7 @@ These are useful shortcuts to save on the typing.
=item ALL
-Enable all compile and execute options at once.
+Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS
=item All
diff --git a/regcomp.h b/regcomp.h
index b07a63f781..72f415a0da 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -695,6 +695,7 @@ re.pm, especially to the documentation.
#define RE_DEBUG_EXTRA_OFFDEBUG 0x040000
#define RE_DEBUG_EXTRA_STATE 0x080000
#define RE_DEBUG_EXTRA_OPTIMISE 0x100000
+#define RE_DEBUG_EXTRA_BUFFERS 0x400000
/* combined */
#define RE_DEBUG_EXTRA_STACK 0x280000
@@ -732,6 +733,9 @@ re.pm, especially to the documentation.
if (re_debug_flags & RE_DEBUG_EXTRA_STATE) x )
#define DEBUG_STACK_r(x) DEBUG_r( \
if (re_debug_flags & RE_DEBUG_EXTRA_STACK) x )
+#define DEBUG_BUFFERS_r(x) DEBUG_r( \
+ if (re_debug_flags & RE_DEBUG_EXTRA_BUFFERS) x )
+
#define DEBUG_OPTIMISE_MORE_r(x) DEBUG_r( \
if ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \
(re_debug_flags & (RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE)) ) x )
diff --git a/regexec.c b/regexec.c
index 1ae984262b..d1f6b8ec87 100644
--- a/regexec.c
+++ b/regexec.c
@@ -193,7 +193,7 @@ S_regcppush(pTHX_ I32 parenfloor)
SSPUSHINT(PL_regstartp[p]);
SSPUSHPTR(PL_reg_start_tmp[p]);
SSPUSHINT(p);
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+ DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
" saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
(UV)p, (IV)PL_regstartp[p],
(IV)(PL_reg_start_tmp[p] - PL_bostr),
@@ -263,7 +263,7 @@ S_regcppop(pTHX_ const regexp *rex)
tmps = SSPOPINT;
if (paren <= *PL_reglastparen)
PL_regendp[paren] = tmps;
- DEBUG_EXECUTE_r(
+ DEBUG_BUFFERS_r(
PerlIO_printf(Perl_debug_log,
" restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
(UV)paren, (IV)PL_regstartp[paren],
@@ -272,7 +272,7 @@ S_regcppop(pTHX_ const regexp *rex)
(paren > *PL_reglastparen ? "(no)" : ""));
);
}
- DEBUG_EXECUTE_r(
+ DEBUG_BUFFERS_r(
if (*PL_reglastparen + 1 <= rex->nparens) {
PerlIO_printf(Perl_debug_log,
" restoring \\%"IVdf"..\\%"IVdf" to undef\n",
@@ -3568,8 +3568,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
regnode *startpoint;
case GOSTART:
- case GOSUB: /* /(...(?1))/ */
- if (cur_eval && cur_eval->locinput==locinput) {
+ case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */
+ if (cur_eval && cur_eval->locinput==locinput) {
if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
Perl_croak(aTHX_ "Infinite recursion in regex");
if ( ++nochange_depth > max_nochange_depth )
@@ -3742,7 +3742,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
cur_curlyx = ST.prev_curlyx;
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
- if ( nochange_depth > 0 );
+ if ( nochange_depth )
nochange_depth--;
sayYES;
@@ -3760,7 +3760,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
cur_curlyx = ST.prev_curlyx;
/* XXXX This is too dramatic a measure... */
PL_reg_maxiter = 0;
- if ( nochange_depth > 0 );
+ if ( nochange_depth )
nochange_depth--;
sayNO_SILENT;
#undef ST
@@ -4755,8 +4755,6 @@ NULL
if (cur_eval) {
/* we've just finished A in /(??{A})B/; now continue with B */
I32 tmpix;
-
-
st->u.eval.toggle_reg_flags
= cur_eval->u.eval.toggle_reg_flags;
PL_reg_flags ^= st->u.eval.toggle_reg_flags;
@@ -4782,9 +4780,10 @@ NULL
DEBUG_EXECUTE_r(
PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n",
REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
- if ( nochange_depth > 0 );
- nochange_depth++;
- PUSH_YES_STATE_GOTO(EVAL_AB,
+ if ( nochange_depth )
+ nochange_depth--;
+
+ PUSH_YES_STATE_GOTO(EVAL_AB,
st->u.eval.prev_eval->u.eval.B); /* match B */
}
diff --git a/t/op/regexp_email.t b/t/op/regexp_email.t
new file mode 100644
index 0000000000..c53dd82860
--- /dev/null
+++ b/t/op/regexp_email.t
@@ -0,0 +1,94 @@
+#!./perl
+#
+# Tests to make sure the regexp engine doesn't run into limits too soon.
+#
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+}
+
+print "1..13\n";
+
+my $email = qr {
+ (?(DEFINE)
+ (?<address> (?&mailbox) | (?&group))
+ (?<mailbox> (?&name_addr) | (?&addr_spec))
+ (?<name_addr> (?&display_name)? (?&angle_addr))
+ (?<angle_addr> (?&CFWS)? < (?&addr_spec) > (?&CFWS)?)
+ (?<group> (?&display_name) : (?:(?&mailbox_list) | (?&CFWS))? ;
+ (?&CFWS)?)
+ (?<display_name> (?&phrase))
+ (?<mailbox_list> (?&mailbox) (?: , (?&mailbox))*)
+
+ (?<addr_spec> (?&local_part) \@ (?&domain))
+ (?<local_part> (?&dot_atom) | (?&quoted_string))
+ (?<domain> (?&dot_atom) | (?&domain_literal))
+ (?<domain_literal> (?&CFWS)? \[ (?: (?&FWS)? (?&dcontent))* (?&FWS)?
+ \] (?&CFWS)?)
+ (?<dcontent> (?&dtext) | (?&quoted_pair))
+ (?<dtext> (?&NO_WS_CTL) | [\x21-\x5a\x5e-\x7e])
+
+ (?<atext> (?&ALPHA) | (?&DIGIT) | [!#\$%&'*+-/=?^_`{|}~])
+ (?<atom> (?&CFWS)? (?&atext)+ (?&CFWS)?)
+ (?<dot_atom> (?&CFWS)? (?&dot_atom_text) (?&CFWS)?)
+ (?<dot_atom_text> (?&atext)+ (?: \. (?&atext)+)*)
+
+ (?<text> [\x01-\x09\x0b\x0c\x0e-\x7f])
+ (?<quoted_pair> \\ (?&text))
+
+ (?<qtext> (?&NO_WS_CTL) | [\x21\x23-\x5b\x5d-\x7e])
+ (?<qcontent> (?&qtext) | (?&quoted_pair))
+ (?<quoted_string> (?&CFWS)? (?&DQUOTE) (?:(?&FWS)? (?&qcontent))*
+ (?&FWS)? (?&DQUOTE) (?&CFWS)?)
+
+ (?<word> (?&atom) | (?&quoted_string))
+ (?<phrase> (?&word)+)
+
+ # Folding white space
+ (?<FWS> (?: (?&WSP)* (?&CRLF))? (?&WSP)+)
+ (?<ctext> (?&NO_WS_CTL) | [\x21-\x27\x2a-\x5b\x5d-\x7e])
+ (?<ccontent> (?&ctext) | (?&quoted_pair) | (?&comment))
+ (?<comment> \( (?: (?&FWS)? (?&ccontent))* (?&FWS)? \) )
+ (?<CFWS> (?: (?&FWS)? (?&comment))*
+ (?: (?:(?&FWS)? (?&comment)) | (?&FWS)))
+
+ # No whitespace control
+ (?<NO_WS_CTL> [\x01-\x08\x0b\x0c\x0e-\x1f\x7f])
+
+ (?<ALPHA> [A-Za-z])
+ (?<DIGIT> [0-9])
+ (?<CRLF> \x0d \x0a)
+ (?<DQUOTE> ")
+ (?<WSP> [\x20\x09])
+ )
+
+ (?&address)
+}x;
+
+my $count = 0;
+
+$| = 1;
+while (<DATA>) {
+ chomp;
+ next if /^#/;
+ print /^$email$/ ? "ok " : "not ok ", ++ $count, "\n";
+}
+
+#
+# Acme::MetaSyntactic ++
+#
+__DATA__
+Jeff_Tracy@thunderbirds.org
+"Lady Penelope"@thunderbirds.org
+"The\ Hood"@thunderbirds.org
+fred @ flintstones.net
+barney (rubble) @ flintstones.org
+bammbamm (bam! bam! (bam! bam! (bam!)) bam!) @ flintstones.org
+Michelangelo@[127.0.0.1]
+Donatello @ [127.0.0.1]
+Raphael (He as well) @ [127.0.0.1]
+"Leonardo" @ [127.0.0.1]
+Barbapapa <barbapapa @ barbapapa.net>
+"Barba Mama" <barbamama @ [127.0.0.1]>
+Barbalala (lalalalalalalala) <barbalala (Yes, her!) @ (barba) barbapapa.net>