diff options
author | Xinchen Hui <laruence@gmail.com> | 2015-05-26 14:17:21 +0800 |
---|---|---|
committer | Xinchen Hui <laruence@gmail.com> | 2015-05-26 14:17:21 +0800 |
commit | 49b10ee264c88edc4fc52d4ab9ae1ca843be7c1f (patch) | |
tree | 37914a2c0af1c7a5a1691d7d2973f9cf0b1263da /ext/tokenizer/tokenizer.c | |
parent | 9f1788f00db71cf06f62b02a24973ce2718efeb6 (diff) | |
parent | 770a4629432a8722bd1ad46eee9bccdd00c61e30 (diff) | |
download | php-git-49b10ee264c88edc4fc52d4ab9ae1ca843be7c1f.tar.gz |
Merge branch 'master' into merge-fastcgi
Diffstat (limited to 'ext/tokenizer/tokenizer.c')
-rw-r--r-- | ext/tokenizer/tokenizer.c | 142 |
1 files changed, 120 insertions, 22 deletions
diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index c011894441..2a4fa90ca2 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -37,6 +37,12 @@ #define zendcursor LANG_SCNG(yy_cursor) #define zendlimit LANG_SCNG(yy_limit) +#define TOKEN_PARSE 1 + +void tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS) { + REGISTER_LONG_CONSTANT("TOKEN_PARSE", TOKEN_PARSE, CONST_CS|CONST_PERSISTENT); +} + /* {{{ arginfo */ ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1) ZEND_ARG_INFO(0, source) @@ -83,6 +89,7 @@ ZEND_GET_MODULE(tokenizer) PHP_MINIT_FUNCTION(tokenizer) { tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU); + tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU); return SUCCESS; } /* }}} */ @@ -97,19 +104,33 @@ PHP_MINFO_FUNCTION(tokenizer) } /* }}} */ -static void tokenize(zval *return_value) +static zend_bool tokenize(zval *return_value, zend_string *source) { + zval source_zval; + zend_lex_state original_lex_state; zval token; zval keyword; int token_type; zend_bool destroy; int token_line = 1; - int need_tokens = -1; // for __halt_compiler lexing. -1 = disabled + int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */ + + ZVAL_STR_COPY(&source_zval, source); + zend_save_lexical_state(&original_lex_state); + if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) { + zend_restore_lexical_state(&original_lex_state); + return 0; + } + + LANG_SCNG(yy_state) = yycINITIAL; array_init(return_value); ZVAL_NULL(&token); while ((token_type = lex_scan(&token))) { + + if(token_type == T_ERROR) break; + destroy = 1; switch (token_type) { case T_CLOSE_TAG: @@ -123,8 +144,6 @@ static void tokenize(zval *return_value) case T_DOC_COMMENT: destroy = 0; break; - case T_ERROR: - return; } if (token_type >= 256) { @@ -147,13 +166,13 @@ static void tokenize(zval *return_value) } ZVAL_NULL(&token); - // after T_HALT_COMPILER collect the next three non-dropped tokens + /* after T_HALT_COMPILER collect the next three non-dropped tokens */ if (need_tokens != -1) { if (token_type != T_WHITESPACE && token_type != T_OPEN_TAG - && token_type != T_COMMENT && token_type != T_DOC_COMMENT - && --need_tokens == 0 + && token_type != T_COMMENT && token_type != T_DOC_COMMENT + && --need_tokens == 0 ) { - // fetch the rest into a T_INLINE_HTML + /* fetch the rest into a T_INLINE_HTML */ if (zendcursor != zendlimit) { array_init(&keyword); add_next_index_long(&keyword, T_INLINE_HTML); @@ -169,34 +188,113 @@ static void tokenize(zval *return_value) token_line = CG(zend_lineno); } + + zval_dtor(&source_zval); + zend_restore_lexical_state(&original_lex_state); + + return 1; } -/* {{{ proto array token_get_all(string source) - */ -PHP_FUNCTION(token_get_all) +zval token_stream; + +void on_event(zend_php_scanner_event event, int token, int line) { - zend_string *source; - zval source_zval; - zend_lex_state original_lex_state; + zval keyword; + HashTable *tokens_ht; + zval *token_zv; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &source) == FAILURE) { - return; + switch(event) { + case ON_TOKEN: + if (token == T_ERROR || token == END) break; + if (token >= 256) { + array_init(&keyword); + add_next_index_long(&keyword, token); + add_next_index_stringl(&keyword, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); + add_next_index_long(&keyword, line); + add_next_index_zval(&token_stream, &keyword); + } else { + add_next_index_stringl(&token_stream, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); + } + break; + case ON_FEEDBACK: + tokens_ht = Z_ARRVAL(token_stream); + token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1); + if (token_zv && Z_TYPE_P(token_zv) == IS_ARRAY) { + ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token); + } + break; + case ON_STOP: + if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) { + array_init(&keyword); + add_next_index_long(&keyword, T_INLINE_HTML); + add_next_index_stringl(&keyword, + (char *)LANG_SCNG(yy_cursor), LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor)); + add_next_index_long(&keyword, CG(zend_lineno)); + add_next_index_zval(&token_stream, &keyword); + } + break; } +} + +static zend_bool tokenize_parse(zval *return_value, zend_string *source) +{ + zval source_zval; + zend_lex_state original_lex_state; + zend_bool original_in_compilation; + zend_bool success; ZVAL_STR_COPY(&source_zval, source); + + original_in_compilation = CG(in_compilation); + CG(in_compilation) = 1; zend_save_lexical_state(&original_lex_state); - if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) { - zend_restore_lexical_state(&original_lex_state); - RETURN_FALSE; - } + if ((success = (zend_prepare_string_for_scanning(&source_zval, "") == SUCCESS))) { + CG(ast) = NULL; + CG(ast_arena) = zend_arena_create(1024 * 32); + LANG_SCNG(yy_state) = yycINITIAL; + LANG_SCNG(on_event) = on_event; - LANG_SCNG(yy_state) = yycINITIAL; + array_init(&token_stream); + if((success = (zendparse() == SUCCESS))) { + ZVAL_ZVAL(return_value, &token_stream, 1, 0); + } + zval_dtor(&token_stream); - tokenize(return_value); + zend_ast_destroy(CG(ast)); + zend_arena_destroy(CG(ast_arena)); + } + /* restore compiler and scanner global states */ zend_restore_lexical_state(&original_lex_state); + CG(in_compilation) = original_in_compilation; + zval_dtor(&source_zval); + + return success; +} + +/* }}} */ + +/* {{{ proto array token_get_all(string source) + */ +PHP_FUNCTION(token_get_all) +{ + zend_string *source; + zend_long flags = 0; + zend_bool success; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &source, &flags) == FAILURE) { + return; + } + + if (flags & TOKEN_PARSE) { + success = tokenize_parse(return_value, source); + } else { + success = tokenize(return_value, source); + } + + if (!success) RETURN_FALSE; } /* }}} */ |