summaryrefslogtreecommitdiff
path: root/Zend/zend_language_scanner.l
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2020-06-05 16:55:20 +0200
committerNikita Popov <nikita.ppv@gmail.com>2020-06-08 12:55:14 +0200
commitb03cafd19c01db57b89727ce77cc89a7d816077c (patch)
treeb8e39eeca2edf8fbb74e4b3fb7e2979470b959c0 /Zend/zend_language_scanner.l
parent08518b18b2095b8c5158e272b4fe6c339f0eb1b7 (diff)
downloadphp-git-b03cafd19c01db57b89727ce77cc89a7d816077c.tar.gz
Fix bug #77966: Cannot alias a method named "namespace"
This is a bit tricky: In this cases we have "namespace as", which means that we will only recognize "namespace" as an identifier when the lookahead token is already at the "as". This means that zend_lex_tstring picks up the wrong identifier. We solve this by actually assigning the identifier as the semantic value on the parser stack -- as in almost all cases we will not actually need the identifier, this is just an (offset, size) reference, not a copy of the string. Additionally, we need to teach the lexer feedback mechanism used by tokenizer TOKEN_PARSE mode to apply feedback to something other than the very last token. To that purpose we pass through the token text and check the tokens in reverse order to find the right one. Closes GH-5668.
Diffstat (limited to 'Zend/zend_language_scanner.l')
-rw-r--r--Zend/zend_language_scanner.l183
1 files changed, 100 insertions, 83 deletions
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l
index 8a3e88edfc..4aa024a69a 100644
--- a/Zend/zend_language_scanner.l
+++ b/Zend/zend_language_scanner.l
@@ -306,13 +306,15 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
}
}
-ZEND_API void zend_lex_tstring(zval *zv)
+ZEND_API void zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref)
{
+ char *ident = (char *) SCNG(yy_start) + ident_ref.offset;
+ size_t length = ident_ref.len;
if (SCNG(on_event)) {
- SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, SCNG(on_event_context));
+ SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, ident, length, SCNG(on_event_context));
}
- ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
+ ZVAL_STRINGL(zv, ident, length);
}
#define BOM_UTF32_BE "\x00\x00\xfe\xff"
@@ -1334,6 +1336,11 @@ static int check_nesting_at_end()
goto emit_token_with_str; \
} while (0)
+#define RETURN_TOKEN_WITH_IDENT(_token) do { \
+ token = _token; \
+ goto emit_token_with_ident; \
+ } while (0)
+
#define RETURN_OR_SKIP_TOKEN(_token) do { \
token = _token; \
if (PARSER_MODE()) { \
@@ -1386,169 +1393,169 @@ NEWLINE ("\r"|"\n"|"\r\n")
<!*> := yyleng = YYCURSOR - SCNG(yy_text);
<ST_IN_SCRIPTING>"exit" {
- RETURN_TOKEN(T_EXIT);
+ RETURN_TOKEN_WITH_IDENT(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
- RETURN_TOKEN(T_EXIT);
+ RETURN_TOKEN_WITH_IDENT(T_EXIT);
}
<ST_IN_SCRIPTING>"fn" {
- RETURN_TOKEN(T_FN);
+ RETURN_TOKEN_WITH_IDENT(T_FN);
}
<ST_IN_SCRIPTING>"function" {
- RETURN_TOKEN(T_FUNCTION);
+ RETURN_TOKEN_WITH_IDENT(T_FUNCTION);
}
<ST_IN_SCRIPTING>"const" {
- RETURN_TOKEN(T_CONST);
+ RETURN_TOKEN_WITH_IDENT(T_CONST);
}
<ST_IN_SCRIPTING>"return" {
- RETURN_TOKEN(T_RETURN);
+ RETURN_TOKEN_WITH_IDENT(T_RETURN);
}
<ST_IN_SCRIPTING>"yield"{WHITESPACE}"from"[^a-zA-Z0-9_\x80-\xff] {
yyless(yyleng - 1);
HANDLE_NEWLINES(yytext, yyleng);
- RETURN_TOKEN(T_YIELD_FROM);
+ RETURN_TOKEN_WITH_IDENT(T_YIELD_FROM);
}
<ST_IN_SCRIPTING>"yield" {
- RETURN_TOKEN(T_YIELD);
+ RETURN_TOKEN_WITH_IDENT(T_YIELD);
}
<ST_IN_SCRIPTING>"try" {
- RETURN_TOKEN(T_TRY);
+ RETURN_TOKEN_WITH_IDENT(T_TRY);
}
<ST_IN_SCRIPTING>"catch" {
- RETURN_TOKEN(T_CATCH);
+ RETURN_TOKEN_WITH_IDENT(T_CATCH);
}
<ST_IN_SCRIPTING>"finally" {
- RETURN_TOKEN(T_FINALLY);
+ RETURN_TOKEN_WITH_IDENT(T_FINALLY);
}
<ST_IN_SCRIPTING>"throw" {
- RETURN_TOKEN(T_THROW);
+ RETURN_TOKEN_WITH_IDENT(T_THROW);
}
<ST_IN_SCRIPTING>"if" {
- RETURN_TOKEN(T_IF);
+ RETURN_TOKEN_WITH_IDENT(T_IF);
}
<ST_IN_SCRIPTING>"elseif" {
- RETURN_TOKEN(T_ELSEIF);
+ RETURN_TOKEN_WITH_IDENT(T_ELSEIF);
}
<ST_IN_SCRIPTING>"endif" {
- RETURN_TOKEN(T_ENDIF);
+ RETURN_TOKEN_WITH_IDENT(T_ENDIF);
}
<ST_IN_SCRIPTING>"else" {
- RETURN_TOKEN(T_ELSE);
+ RETURN_TOKEN_WITH_IDENT(T_ELSE);
}
<ST_IN_SCRIPTING>"while" {
- RETURN_TOKEN(T_WHILE);
+ RETURN_TOKEN_WITH_IDENT(T_WHILE);
}
<ST_IN_SCRIPTING>"endwhile" {
- RETURN_TOKEN(T_ENDWHILE);
+ RETURN_TOKEN_WITH_IDENT(T_ENDWHILE);
}
<ST_IN_SCRIPTING>"do" {
- RETURN_TOKEN(T_DO);
+ RETURN_TOKEN_WITH_IDENT(T_DO);
}
<ST_IN_SCRIPTING>"for" {
- RETURN_TOKEN(T_FOR);
+ RETURN_TOKEN_WITH_IDENT(T_FOR);
}
<ST_IN_SCRIPTING>"endfor" {
- RETURN_TOKEN(T_ENDFOR);
+ RETURN_TOKEN_WITH_IDENT(T_ENDFOR);
}
<ST_IN_SCRIPTING>"foreach" {
- RETURN_TOKEN(T_FOREACH);
+ RETURN_TOKEN_WITH_IDENT(T_FOREACH);
}
<ST_IN_SCRIPTING>"endforeach" {
- RETURN_TOKEN(T_ENDFOREACH);
+ RETURN_TOKEN_WITH_IDENT(T_ENDFOREACH);
}
<ST_IN_SCRIPTING>"declare" {
- RETURN_TOKEN(T_DECLARE);
+ RETURN_TOKEN_WITH_IDENT(T_DECLARE);
}
<ST_IN_SCRIPTING>"enddeclare" {
- RETURN_TOKEN(T_ENDDECLARE);
+ RETURN_TOKEN_WITH_IDENT(T_ENDDECLARE);
}
<ST_IN_SCRIPTING>"instanceof" {
- RETURN_TOKEN(T_INSTANCEOF);
+ RETURN_TOKEN_WITH_IDENT(T_INSTANCEOF);
}
<ST_IN_SCRIPTING>"as" {
- RETURN_TOKEN(T_AS);
+ RETURN_TOKEN_WITH_IDENT(T_AS);
}
<ST_IN_SCRIPTING>"switch" {
- RETURN_TOKEN(T_SWITCH);
+ RETURN_TOKEN_WITH_IDENT(T_SWITCH);
}
<ST_IN_SCRIPTING>"endswitch" {
- RETURN_TOKEN(T_ENDSWITCH);
+ RETURN_TOKEN_WITH_IDENT(T_ENDSWITCH);
}
<ST_IN_SCRIPTING>"case" {
- RETURN_TOKEN(T_CASE);
+ RETURN_TOKEN_WITH_IDENT(T_CASE);
}
<ST_IN_SCRIPTING>"default" {
- RETURN_TOKEN(T_DEFAULT);
+ RETURN_TOKEN_WITH_IDENT(T_DEFAULT);
}
<ST_IN_SCRIPTING>"break" {
- RETURN_TOKEN(T_BREAK);
+ RETURN_TOKEN_WITH_IDENT(T_BREAK);
}
<ST_IN_SCRIPTING>"continue" {
- RETURN_TOKEN(T_CONTINUE);
+ RETURN_TOKEN_WITH_IDENT(T_CONTINUE);
}
<ST_IN_SCRIPTING>"goto" {
- RETURN_TOKEN(T_GOTO);
+ RETURN_TOKEN_WITH_IDENT(T_GOTO);
}
<ST_IN_SCRIPTING>"echo" {
- RETURN_TOKEN(T_ECHO);
+ RETURN_TOKEN_WITH_IDENT(T_ECHO);
}
<ST_IN_SCRIPTING>"print" {
- RETURN_TOKEN(T_PRINT);
+ RETURN_TOKEN_WITH_IDENT(T_PRINT);
}
<ST_IN_SCRIPTING>"class" {
- RETURN_TOKEN(T_CLASS);
+ RETURN_TOKEN_WITH_IDENT(T_CLASS);
}
<ST_IN_SCRIPTING>"interface" {
- RETURN_TOKEN(T_INTERFACE);
+ RETURN_TOKEN_WITH_IDENT(T_INTERFACE);
}
<ST_IN_SCRIPTING>"trait" {
- RETURN_TOKEN(T_TRAIT);
+ RETURN_TOKEN_WITH_IDENT(T_TRAIT);
}
<ST_IN_SCRIPTING>"extends" {
- RETURN_TOKEN(T_EXTENDS);
+ RETURN_TOKEN_WITH_IDENT(T_EXTENDS);
}
<ST_IN_SCRIPTING>"implements" {
- RETURN_TOKEN(T_IMPLEMENTS);
+ RETURN_TOKEN_WITH_IDENT(T_IMPLEMENTS);
}
<ST_IN_SCRIPTING>"->" {
@@ -1592,15 +1599,15 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>"new" {
- RETURN_TOKEN(T_NEW);
+ RETURN_TOKEN_WITH_IDENT(T_NEW);
}
<ST_IN_SCRIPTING>"clone" {
- RETURN_TOKEN(T_CLONE);
+ RETURN_TOKEN_WITH_IDENT(T_CLONE);
}
<ST_IN_SCRIPTING>"var" {
- RETURN_TOKEN(T_VAR);
+ RETURN_TOKEN_WITH_IDENT(T_VAR);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
@@ -1640,79 +1647,79 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>"eval" {
- RETURN_TOKEN(T_EVAL);
+ RETURN_TOKEN_WITH_IDENT(T_EVAL);
}
<ST_IN_SCRIPTING>"include" {
- RETURN_TOKEN(T_INCLUDE);
+ RETURN_TOKEN_WITH_IDENT(T_INCLUDE);
}
<ST_IN_SCRIPTING>"include_once" {
- RETURN_TOKEN(T_INCLUDE_ONCE);
+ RETURN_TOKEN_WITH_IDENT(T_INCLUDE_ONCE);
}
<ST_IN_SCRIPTING>"require" {
- RETURN_TOKEN(T_REQUIRE);
+ RETURN_TOKEN_WITH_IDENT(T_REQUIRE);
}
<ST_IN_SCRIPTING>"require_once" {
- RETURN_TOKEN(T_REQUIRE_ONCE);
+ RETURN_TOKEN_WITH_IDENT(T_REQUIRE_ONCE);
}
<ST_IN_SCRIPTING>"namespace" {
- RETURN_TOKEN(T_NAMESPACE);
+ RETURN_TOKEN_WITH_IDENT(T_NAMESPACE);
}
<ST_IN_SCRIPTING>"use" {
- RETURN_TOKEN(T_USE);
+ RETURN_TOKEN_WITH_IDENT(T_USE);
}
<ST_IN_SCRIPTING>"insteadof" {
- RETURN_TOKEN(T_INSTEADOF);
+ RETURN_TOKEN_WITH_IDENT(T_INSTEADOF);
}
<ST_IN_SCRIPTING>"global" {
- RETURN_TOKEN(T_GLOBAL);
+ RETURN_TOKEN_WITH_IDENT(T_GLOBAL);
}
<ST_IN_SCRIPTING>"isset" {
- RETURN_TOKEN(T_ISSET);
+ RETURN_TOKEN_WITH_IDENT(T_ISSET);
}
<ST_IN_SCRIPTING>"empty" {
- RETURN_TOKEN(T_EMPTY);
+ RETURN_TOKEN_WITH_IDENT(T_EMPTY);
}
<ST_IN_SCRIPTING>"__halt_compiler" {
- RETURN_TOKEN(T_HALT_COMPILER);
+ RETURN_TOKEN_WITH_IDENT(T_HALT_COMPILER);
}
<ST_IN_SCRIPTING>"static" {
- RETURN_TOKEN(T_STATIC);
+ RETURN_TOKEN_WITH_IDENT(T_STATIC);
}
<ST_IN_SCRIPTING>"abstract" {
- RETURN_TOKEN(T_ABSTRACT);
+ RETURN_TOKEN_WITH_IDENT(T_ABSTRACT);
}
<ST_IN_SCRIPTING>"final" {
- RETURN_TOKEN(T_FINAL);
+ RETURN_TOKEN_WITH_IDENT(T_FINAL);
}
<ST_IN_SCRIPTING>"private" {
- RETURN_TOKEN(T_PRIVATE);
+ RETURN_TOKEN_WITH_IDENT(T_PRIVATE);
}
<ST_IN_SCRIPTING>"protected" {
- RETURN_TOKEN(T_PROTECTED);
+ RETURN_TOKEN_WITH_IDENT(T_PROTECTED);
}
<ST_IN_SCRIPTING>"public" {
- RETURN_TOKEN(T_PUBLIC);
+ RETURN_TOKEN_WITH_IDENT(T_PUBLIC);
}
<ST_IN_SCRIPTING>"unset" {
- RETURN_TOKEN(T_UNSET);
+ RETURN_TOKEN_WITH_IDENT(T_UNSET);
}
<ST_IN_SCRIPTING>"=>" {
@@ -1720,15 +1727,15 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>"list" {
- RETURN_TOKEN(T_LIST);
+ RETURN_TOKEN_WITH_IDENT(T_LIST);
}
<ST_IN_SCRIPTING>"array" {
- RETURN_TOKEN(T_ARRAY);
+ RETURN_TOKEN_WITH_IDENT(T_ARRAY);
}
<ST_IN_SCRIPTING>"callable" {
- RETURN_TOKEN(T_CALLABLE);
+ RETURN_TOKEN_WITH_IDENT(T_CALLABLE);
}
<ST_IN_SCRIPTING>"++" {
@@ -1832,15 +1839,15 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>"OR" {
- RETURN_TOKEN(T_LOGICAL_OR);
+ RETURN_TOKEN_WITH_IDENT(T_LOGICAL_OR);
}
<ST_IN_SCRIPTING>"AND" {
- RETURN_TOKEN(T_LOGICAL_AND);
+ RETURN_TOKEN_WITH_IDENT(T_LOGICAL_AND);
}
<ST_IN_SCRIPTING>"XOR" {
- RETURN_TOKEN(T_LOGICAL_XOR);
+ RETURN_TOKEN_WITH_IDENT(T_LOGICAL_XOR);
}
<ST_IN_SCRIPTING>"<<" {
@@ -2096,35 +2103,35 @@ string:
}
<ST_IN_SCRIPTING>"__CLASS__" {
- RETURN_TOKEN(T_CLASS_C);
+ RETURN_TOKEN_WITH_IDENT(T_CLASS_C);
}
<ST_IN_SCRIPTING>"__TRAIT__" {
- RETURN_TOKEN(T_TRAIT_C);
+ RETURN_TOKEN_WITH_IDENT(T_TRAIT_C);
}
<ST_IN_SCRIPTING>"__FUNCTION__" {
- RETURN_TOKEN(T_FUNC_C);
+ RETURN_TOKEN_WITH_IDENT(T_FUNC_C);
}
<ST_IN_SCRIPTING>"__METHOD__" {
- RETURN_TOKEN(T_METHOD_C);
+ RETURN_TOKEN_WITH_IDENT(T_METHOD_C);
}
<ST_IN_SCRIPTING>"__LINE__" {
- RETURN_TOKEN(T_LINE);
+ RETURN_TOKEN_WITH_IDENT(T_LINE);
}
<ST_IN_SCRIPTING>"__FILE__" {
- RETURN_TOKEN(T_FILE);
+ RETURN_TOKEN_WITH_IDENT(T_FILE);
}
<ST_IN_SCRIPTING>"__DIR__" {
- RETURN_TOKEN(T_DIR);
+ RETURN_TOKEN_WITH_IDENT(T_DIR);
}
<ST_IN_SCRIPTING>"__NAMESPACE__" {
- RETURN_TOKEN(T_NS_C);
+ RETURN_TOKEN_WITH_IDENT(T_NS_C);
}
<SHEBANG>"#!" .* {NEWLINE} {
@@ -3013,14 +3020,24 @@ emit_token_with_val:
emit_token:
if (SCNG(on_event)) {
- SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
+ SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
+ }
+ return token;
+
+emit_token_with_ident:
+ if (PARSER_MODE()) {
+ elem->ident.offset = SCNG(yy_text) - SCNG(yy_start);
+ elem->ident.len = SCNG(yy_leng);
+ }
+ if (SCNG(on_event)) {
+ SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
}
return token;
return_whitespace:
HANDLE_NEWLINES(yytext, yyleng);
if (SCNG(on_event)) {
- SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, SCNG(on_event_context));
+ SCNG(on_event)(ON_TOKEN, T_WHITESPACE, start_line, yytext, yyleng, SCNG(on_event_context));
}
if (PARSER_MODE()) {
start_line = CG(zend_lineno);
@@ -3031,7 +3048,7 @@ return_whitespace:
skip_token:
if (SCNG(on_event)) {
- SCNG(on_event)(ON_TOKEN, token, start_line, SCNG(on_event_context));
+ SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));
}
start_line = CG(zend_lineno);
goto restart;