summaryrefslogtreecommitdiff
path: root/ext/tokenizer/tokenizer.c
diff options
context:
space:
mode:
authorAlex Dowad <alexinbeijing@gmail.com>2020-04-08 13:19:39 +0200
committerNikita Popov <nikita.ppv@gmail.com>2020-04-14 11:22:23 +0200
commit80598f12507c5cbde04163289e4d2575f05d2a0c (patch)
treef5d3c26400dcb577e6c508469d1de086f5fda695 /ext/tokenizer/tokenizer.c
parentd4471c6aaecab2f1cc467ea8e92040c73685bd21 (diff)
downloadphp-git-80598f12507c5cbde04163289e4d2575f05d2a0c.tar.gz
Syntax errors caused by unclosed {, [, ( mention specific location
Aside from a few very specific syntax errors for which detailed exceptions are thrown, generally PHP just emits the default error messages generated by bison on syntax error. These messages are very uninformative; they just say "Unexpected ... at line ...". This is most problematic with constructs which can span an arbitrary number of lines, such as blocks of code delimited by { }, 'if' conditions delimited by ( ), and so on. If a closing delimiter is missed, the block will run for the entire remainder of the source file (which could be thousands of lines), and then at the end, a parse error will be thrown with the dreaded words: "Unexpected end of file". Therefore, track the positions of opening and closing delimiters and ensure that they match up correctly. If any mismatch or missing delimiter is detected, immediately throw a parse error which points the user to the offending line. This is best done in the *lexer* and not in the parser. Thanks to Nikita Popov and George Peter Banyard for suggesting improvements. Fixes bug #79368. Closes GH-5364.
Diffstat (limited to 'ext/tokenizer/tokenizer.c')
-rw-r--r--ext/tokenizer/tokenizer.c4
1 files changed, 3 insertions, 1 deletions
diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c
index 222c3e96a3..364d70bf08 100644
--- a/ext/tokenizer/tokenizer.c
+++ b/ext/tokenizer/tokenizer.c
@@ -392,6 +392,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
array_init(return_value);
while ((token_type = lex_scan(&token, NULL))) {
+ ZEND_ASSERT(token_type != T_ERROR);
+
add_token(
return_value, token_type, zendtext, zendleng, token_line,
token_class, &interned_strings);
@@ -408,7 +410,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en
&& --need_tokens == 0
) {
/* fetch the rest into a T_INLINE_HTML */
- if (zendcursor != zendlimit) {
+ if (zendcursor < zendlimit) {
add_token(
return_value, T_INLINE_HTML, zendcursor, zendlimit - zendcursor,
token_line, token_class, &interned_strings);