diff options
author | Aaron Piotrowski <aaron@trowski.com> | 2015-06-14 18:53:11 -0500 |
---|---|---|
committer | Aaron Piotrowski <aaron@trowski.com> | 2015-06-14 18:53:11 -0500 |
commit | 110e0a5a2cda3bfa7778bb871502ff2b50e59f76 (patch) | |
tree | fa736924b4befce442cc72429b4e07812856c541 /ext/tokenizer | |
parent | 5c54bf015dc4fd930394709d80665d9a731f6f99 (diff) | |
parent | 6cb7b48846d129f088b50c4a8c4b00b7dc886f5c (diff) | |
download | php-git-110e0a5a2cda3bfa7778bb871502ff2b50e59f76.tar.gz |
Merge branch 'master' into throwable-interface
# Conflicts:
# Zend/zend_language_scanner.c
# Zend/zend_language_scanner.l
# ext/simplexml/tests/SimpleXMLElement_xpath.phpt
Diffstat (limited to 'ext/tokenizer')
-rw-r--r-- | ext/tokenizer/tests/bug67395.phpt | 2 | ||||
-rw-r--r-- | ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt | 19 | ||||
-rw-r--r-- | ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt | 81 | ||||
-rw-r--r-- | ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt | 68 | ||||
-rw-r--r-- | ext/tokenizer/tests/token_get_all_error.phpt | 8 | ||||
-rw-r--r-- | ext/tokenizer/tokenizer.c | 143 |
6 files changed, 295 insertions, 26 deletions
diff --git a/ext/tokenizer/tests/bug67395.phpt b/ext/tokenizer/tests/bug67395.phpt index c9b7f3012f..8101c81edb 100644 --- a/ext/tokenizer/tests/bug67395.phpt +++ b/ext/tokenizer/tests/bug67395.phpt @@ -1,5 +1,7 @@ --TEST-- Bug 67395: token_name() does not return name for T_POW and T_POW_EQUAL token +--SKIPIF-- +<?php if (!extension_loaded("tokenizer")) print "skip"; ?> --FILE-- <?php diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt new file mode 100644 index 0000000000..b9da91502a --- /dev/null +++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt @@ -0,0 +1,19 @@ +--TEST-- +Parse errors during token_get_all() with TOKEN_PARSE flag +--SKIPIF-- +<?php if (!extension_loaded("tokenizer")) print "skip"; ?> +--FILE-- +<?php + +try { + token_get_all('<?php invalid code;', TOKEN_PARSE); +} catch (ParseError $e) { + echo $e->getMessage(), PHP_EOL; +} + +echo "Done"; + +?> +--EXPECT-- +syntax error, unexpected 'code' (T_STRING) +Done diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt new file mode 100644 index 0000000000..eb0e300964 --- /dev/null +++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt @@ -0,0 +1,81 @@ +--TEST-- +Semi reserved words support: member access +--SKIPIF-- +<?php if (!extension_loaded("tokenizer")) print "skip"; ?> +--FILE-- +<?php +$tokens = token_get_all('<?php +X::continue; +X::$continue; +$x->$continue; +X::continue(); +$x->continue(); +X::class; + +class X { + const CONTINUE = 1; + public $x = self::CONTINUE + 1; +} +', TOKEN_PARSE); + +array_walk($tokens, function($tk) { + if(is_array($tk)) { + if(($t = token_name($tk[0])) == 'T_WHITESPACE') return; + echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL; + } + else echo $tk, PHP_EOL; +}); + +echo "Done"; + +?> +--EXPECTF-- +L1: T_OPEN_TAG <?php + +L2: T_STRING X +L2: T_DOUBLE_COLON :: +L2: T_STRING continue +; +L3: T_STRING X +L3: T_DOUBLE_COLON :: +L3: T_VARIABLE $continue +; +L4: T_VARIABLE $x +L4: T_OBJECT_OPERATOR -> +L4: T_VARIABLE $continue +; +L5: T_STRING X +L5: T_DOUBLE_COLON :: +L5: T_STRING continue +( +) +; +L6: T_VARIABLE $x +L6: T_OBJECT_OPERATOR -> +L6: T_STRING continue +( +) +; +L7: T_STRING X +L7: T_DOUBLE_COLON :: +L7: T_STRING class +; +L9: T_CLASS class +L9: T_STRING X +{ +L10: T_CONST const +L10: T_STRING CONTINUE += +L10: T_LNUMBER 1 +; +L11: T_PUBLIC public +L11: T_VARIABLE $x += +L11: T_STRING self +L11: T_DOUBLE_COLON :: +L11: T_STRING CONTINUE ++ +L11: T_LNUMBER 1 +; +} +Done diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt new file mode 100644 index 0000000000..3dd8e14d84 --- /dev/null +++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt @@ -0,0 +1,68 @@ +--TEST-- +Semi reserved words support: class const +--SKIPIF-- +<?php if (!extension_loaded("tokenizer")) print "skip"; ?> +--FILE-- +<?php +$tokens = token_get_all('<?php + class SomeClass { + const CONST = 1; + const CONTINUE = (self::CONST + 1); + const ARRAY = [1, self::CONTINUE => [3, 4], 5]; + } +', TOKEN_PARSE); + +array_walk($tokens, function($tk) { + if(is_array($tk)) { + if(($t = token_name($tk[0])) == 'T_WHITESPACE') return; + echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL; + } + else echo $tk, PHP_EOL; +}); + +echo "Done"; + +?> +--EXPECTF-- +L1: T_OPEN_TAG <?php + +L2: T_CLASS class +L2: T_STRING SomeClass +{ +L3: T_CONST const +L3: T_STRING CONST += +L3: T_LNUMBER 1 +; +L4: T_CONST const +L4: T_STRING CONTINUE += +( +L4: T_STRING self +L4: T_DOUBLE_COLON :: +L4: T_STRING CONST ++ +L4: T_LNUMBER 1 +) +; +L5: T_CONST const +L5: T_STRING ARRAY += +[ +L5: T_LNUMBER 1 +, +L5: T_STRING self +L5: T_DOUBLE_COLON :: +L5: T_STRING CONTINUE +L5: T_DOUBLE_ARROW => +[ +L5: T_LNUMBER 3 +, +L5: T_LNUMBER 4 +] +, +L5: T_LNUMBER 5 +] +; +} +Done diff --git a/ext/tokenizer/tests/token_get_all_error.phpt b/ext/tokenizer/tests/token_get_all_error.phpt index 29e97c38c4..9ded0a1774 100644 --- a/ext/tokenizer/tests/token_get_all_error.phpt +++ b/ext/tokenizer/tests/token_get_all_error.phpt @@ -19,7 +19,7 @@ var_dump( token_get_all()); echo "-- Testing token_get_all() function with more than expected no. of arguments --\n"; $source = '<?php ?>'; $extra_arg = 10; -var_dump( token_get_all($source, $extra_arg)); +var_dump( token_get_all($source, true, $extra_arg)); echo "Done" ?> @@ -28,10 +28,10 @@ echo "Done" -- Testing token_get_all() function with zero arguments -- -Warning: token_get_all() expects exactly 1 parameter, 0 given in %s on line %d +Warning: token_get_all() expects at least 1 parameter, 0 given in %s on line 11 NULL -- Testing token_get_all() function with more than expected no. of arguments -- -Warning: token_get_all() expects exactly 1 parameter, 2 given in %s on line %d +Warning: token_get_all() expects at most 2 parameters, 3 given in %s on line 17 NULL -Done +Done
\ No newline at end of file diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index c011894441..63405ea6cd 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -37,6 +37,12 @@ #define zendcursor LANG_SCNG(yy_cursor) #define zendlimit LANG_SCNG(yy_limit) +#define TOKEN_PARSE 1 + +void tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS) { + REGISTER_LONG_CONSTANT("TOKEN_PARSE", TOKEN_PARSE, CONST_CS|CONST_PERSISTENT); +} + /* {{{ arginfo */ ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1) ZEND_ARG_INFO(0, source) @@ -83,6 +89,7 @@ ZEND_GET_MODULE(tokenizer) PHP_MINIT_FUNCTION(tokenizer) { tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU); + tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU); return SUCCESS; } /* }}} */ @@ -97,19 +104,33 @@ PHP_MINFO_FUNCTION(tokenizer) } /* }}} */ -static void tokenize(zval *return_value) +static zend_bool tokenize(zval *return_value, zend_string *source) { + zval source_zval; + zend_lex_state original_lex_state; zval token; zval keyword; int token_type; zend_bool destroy; int token_line = 1; - int need_tokens = -1; // for __halt_compiler lexing. -1 = disabled + int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */ + + ZVAL_STR_COPY(&source_zval, source); + zend_save_lexical_state(&original_lex_state); + if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) { + zend_restore_lexical_state(&original_lex_state); + return 0; + } + + LANG_SCNG(yy_state) = yycINITIAL; array_init(return_value); ZVAL_NULL(&token); while ((token_type = lex_scan(&token))) { + + if(token_type == T_ERROR) break; + destroy = 1; switch (token_type) { case T_CLOSE_TAG: @@ -123,8 +144,6 @@ static void tokenize(zval *return_value) case T_DOC_COMMENT: destroy = 0; break; - case T_ERROR: - return; } if (token_type >= 256) { @@ -147,13 +166,13 @@ static void tokenize(zval *return_value) } ZVAL_NULL(&token); - // after T_HALT_COMPILER collect the next three non-dropped tokens + /* after T_HALT_COMPILER collect the next three non-dropped tokens */ if (need_tokens != -1) { if (token_type != T_WHITESPACE && token_type != T_OPEN_TAG - && token_type != T_COMMENT && token_type != T_DOC_COMMENT - && --need_tokens == 0 + && token_type != T_COMMENT && token_type != T_DOC_COMMENT + && --need_tokens == 0 ) { - // fetch the rest into a T_INLINE_HTML + /* fetch the rest into a T_INLINE_HTML */ if (zendcursor != zendlimit) { array_init(&keyword); add_next_index_long(&keyword, T_INLINE_HTML); @@ -169,34 +188,114 @@ static void tokenize(zval *return_value) token_line = CG(zend_lineno); } + + zval_dtor(&source_zval); + zend_restore_lexical_state(&original_lex_state); + + return 1; } -/* {{{ proto array token_get_all(string source) - */ -PHP_FUNCTION(token_get_all) +zval token_stream; + +void on_event(zend_php_scanner_event event, int token, int line) { - zend_string *source; - zval source_zval; - zend_lex_state original_lex_state; + zval keyword; + HashTable *tokens_ht; + zval *token_zv; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &source) == FAILURE) { - return; + switch(event) { + case ON_TOKEN: + if (token == T_ERROR || token == END) break; + if (token >= 256) { + array_init(&keyword); + add_next_index_long(&keyword, token); + add_next_index_stringl(&keyword, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); + add_next_index_long(&keyword, line); + add_next_index_zval(&token_stream, &keyword); + } else { + add_next_index_stringl(&token_stream, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); + } + break; + case ON_FEEDBACK: + tokens_ht = Z_ARRVAL(token_stream); + token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1); + if (token_zv && Z_TYPE_P(token_zv) == IS_ARRAY) { + ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token); + } + break; + case ON_STOP: + if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) { + array_init(&keyword); + add_next_index_long(&keyword, T_INLINE_HTML); + add_next_index_stringl(&keyword, + (char *)LANG_SCNG(yy_cursor), LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor)); + add_next_index_long(&keyword, CG(zend_lineno)); + add_next_index_zval(&token_stream, &keyword); + } + break; } +} + +static zend_bool tokenize_parse(zval *return_value, zend_string *source) +{ + zval source_zval; + zend_lex_state original_lex_state; + zend_bool original_in_compilation; + zend_bool success; ZVAL_STR_COPY(&source_zval, source); + + original_in_compilation = CG(in_compilation); + CG(in_compilation) = 1; zend_save_lexical_state(&original_lex_state); - if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) { - zend_restore_lexical_state(&original_lex_state); - RETURN_FALSE; - } + if ((success = (zend_prepare_string_for_scanning(&source_zval, "") == SUCCESS))) { + CG(ast) = NULL; + CG(ast_arena) = zend_arena_create(1024 * 32); + LANG_SCNG(yy_state) = yycINITIAL; + LANG_SCNG(on_event) = on_event; - LANG_SCNG(yy_state) = yycINITIAL; + array_init(&token_stream); + if((success = (zendparse() == SUCCESS))) { + ZVAL_COPY_VALUE(return_value, &token_stream); + } else { + zval_ptr_dtor(&token_stream); + } - tokenize(return_value); + zend_ast_destroy(CG(ast)); + zend_arena_destroy(CG(ast_arena)); + } + /* restore compiler and scanner global states */ zend_restore_lexical_state(&original_lex_state); + CG(in_compilation) = original_in_compilation; + zval_dtor(&source_zval); + + return success; +} + +/* }}} */ + +/* {{{ proto array token_get_all(string source) + */ +PHP_FUNCTION(token_get_all) +{ + zend_string *source; + zend_long flags = 0; + zend_bool success; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &source, &flags) == FAILURE) { + return; + } + + if (flags & TOKEN_PARSE) { + success = tokenize_parse(return_value, source); + } else { + success = tokenize(return_value, source); + } + + if (!success) RETURN_FALSE; } /* }}} */ |