summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2017-02-14 13:22:58 -0700
committerKarl Williamson <khw@cpan.org>2017-02-14 13:58:03 -0700
commit7c449856260e14be9e73a4060cb86a5e2f680a65 (patch)
tree7040e1bea5a1461f81d951f5777cd3d1a031b9b2
parent5376c9c46602f4cb47b306edc76c03bc8500e61f (diff)
downloadperl-7c449856260e14be9e73a4060cb86a5e2f680a65.tar.gz
Improve handling pattern compilation errors
Perl tries to continue parsing in the face of errors for the convenience of the person running the script, so as to batch up as many errors as possible, and cut down the number of runs. Some errors will, however, have a cascading effect, resulting in the parser getting confused as to the intent. Perl currently aborts parsing if 10 errors accumulate. However, some things are reparsed as compilation continues, in particular tr///, s///, and qr//. The code that reparses has an expectation of basic sanity in what it is looking at, and so reparsing with known errors can lead to segfaults. Recent commits have tightened this up to avoid reparsing, or substitute valid stuff before reparsing. This all works, as the code won't execute until all the errors get fixed. Commit f065e1e68bf6a5541c8ceba8c9fcc6e18f51a32b changed things so that if there is an error in parsing a pattern, the whole compilation is immediately aborted. Since then, I realized it would be relatively simple to instead, skip compilation of that particular pattern, but continue on with the parsing of the program as a whole, up to the maximum number of allowed errors. And again the program will refuse to execute after compilation if there were any errors. This commit implements that, the benefit being that we don't try to reparse a pattern that failed the original parse, but can go on to find errors elsewhere in the program.
-rw-r--r--op.c5
-rw-r--r--op.h4
-rw-r--r--toke.c8
3 files changed, 13 insertions, 4 deletions
diff --git a/op.c b/op.c
index 54993b57ce..f16c6b509a 100644
--- a/op.c
+++ b/op.c
@@ -5843,6 +5843,11 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor)
rx_flags |= RXf_SPLIT;
}
+ /* Skip compiling if parser found an error for this pattern */
+ if (pm->op_pmflags & PMf_HAS_ERROR) {
+ return o;
+ }
+
if (!has_code || !eng->op_comp) {
/* compile-time simple constant pattern */
diff --git a/op.h b/op.h
index 97dea42769..5a29bfbe2c 100644
--- a/op.h
+++ b/op.h
@@ -327,6 +327,10 @@ struct pmop {
* other end instead; this preserves binary compatibility. */
#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+2)
+/* Set by the parser if it discovers an error, so the regex shouldn't be
+ * compiled */
+#define PMf_HAS_ERROR (1U<<(PMf_BASE_SHIFT+4))
+
/* 'use re "taint"' in scope: taint $1 etc. if target tainted */
#define PMf_RETAINT (1U<<(PMf_BASE_SHIFT+5))
diff --git a/toke.c b/toke.c
index b9ea8486c1..6d0ab625c0 100644
--- a/toke.c
+++ b/toke.c
@@ -5029,11 +5029,11 @@ Perl_yylex(pTHX)
s = scan_const(PL_bufptr);
- /* Quit if this was a pattern and there were errors. This prevents
- * us from trying to regex compile a broken pattern, which could
- * lead to segfaults, etc. */
+ /* Set flag if this was a pattern and there were errors. op.c will
+ * refuse to compile a pattern with this flag set. Otherwise, we
+ * could get segfaults, etc. */
if (PL_lex_inpat && PL_error_count > save_error_count) {
- yyquit();
+ ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
}
if (*s == '\\')
PL_lex_state = LEX_INTERPCASEMOD;