summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2017-02-13 19:01:46 -0700
committerKarl Williamson <khw@cpan.org>2017-02-13 21:24:10 -0700
commitf065e1e68bf6a5541c8ceba8c9fcc6e18f51a32b (patch)
treee5eb48b118b39cae84c6d853ed0ff80c584de72c
parentd43328d502ac91c4d98e218d0721cd5f3bcd3950 (diff)
downloadperl-f065e1e68bf6a5541c8ceba8c9fcc6e18f51a32b.tar.gz
Don't try to compile a pattern known to be in error
Regular expression patterns are parsed by the lexer/toker, and then compiled by the regex compiler. It is foolish to try to compile one that the parser has rejected as syntactically bad; assumptions may be violated and segfaults ensue. This commit abandons all parsing immediately if a pattern had errors in it. A better solution would be to flag this pattern as not to be compiled, and continue parsing other things so as to find the most errors in a single attempt, but I don't think it's worth the extra effort. Making this change caused some misleading error messages in the test suite to be replaced by better ones.
-rw-r--r--t/re/re_tests7
-rw-r--r--toke.c9
2 files changed, 11 insertions, 5 deletions
diff --git a/t/re/re_tests b/t/re/re_tests
index f210202657..410fceadac 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1464,13 +1464,10 @@ abc\N abc\n n
# Verify get errors. For these, we need // or else puts it in single quotes,
# and bypasses the lexer.
/\N{U+}/ - c - Invalid hexadecimal number
-# Below currently gives a misleading message
-/[\N{U+}]/ - Sc - Unmatched
-/[\N{U+}]/ - sc - Syntax error in (?[...])
+/[\N{U+}]/ - c - Invalid hexadecimal number
/abc\N{def/ - c - Missing right brace
/\N{U+4AG3}/ - c - Invalid hexadecimal number
-/[\N{U+4AG3}]/ - Sc - Unmatched
-/[\N{U+4AG3}]/ - sc - Syntax error in (?[...])
+/[\N{U+4AG3}]/ - c - Invalid hexadecimal number
# And verify that in single quotes which bypasses the lexer, the regex compiler
# figures it out.
diff --git a/toke.c b/toke.c
index 383203aecf..880ac53b7d 100644
--- a/toke.c
+++ b/toke.c
@@ -5021,7 +5021,16 @@ Perl_yylex(pTHX)
s = PL_bufend;
}
else {
+ int save_error_count = PL_error_count;
+
s = scan_const(PL_bufptr);
+
+ /* Quit if this was a pattern and there were errors. This prevents
+ * us from trying to regex compile a broken pattern, which could
+ * lead to segfaults, etc. */
+ if (PL_lex_inpat && PL_error_count > save_error_count) {
+ yyquit();
+ }
if (*s == '\\')
PL_lex_state = LEX_INTERPCASEMOD;
else