diff options
author | Sascha Schumann <sas@php.net> | 2001-07-04 15:30:21 +0000 |
---|---|---|
committer | Sascha Schumann <sas@php.net> | 2001-07-04 15:30:21 +0000 |
commit | ef13ab5750202d17a138566f6e4e9ffca84eccf5 (patch) | |
tree | 74341539bab35b3a65f44f7ae23542b2c150ef2d /ext/standard/url_scanner_ex.re | |
parent | 98b1d4bbf4ddb67919c57072aa4735c79cee055b (diff) | |
download | php-git-ef13ab5750202d17a138566f6e4e9ffca84eccf5.tar.gz |
Improve url scanner speed (up to 40% for large chunks of data)
and handle some corner cases better. The scanner has been changed
to the format as proposed in "RE2C - A More Versatile Scanner Generator"
by Cowan et al.
Diffstat (limited to 'ext/standard/url_scanner_ex.re')
-rw-r--r-- | ext/standard/url_scanner_ex.re | 159 |
1 files changed, 98 insertions, 61 deletions
diff --git a/ext/standard/url_scanner_ex.re b/ext/standard/url_scanner_ex.re index 681f08c2c0..0d95d68ae5 100644 --- a/ext/standard/url_scanner_ex.re +++ b/ext/standard/url_scanner_ex.re @@ -87,30 +87,37 @@ PHP_INI_BEGIN() STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=fakeentry", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals) PHP_INI_END() +/*!re2c +any = [\000-\377]; +N = (any\[<]); +alpha = [a-zA-Z]; +*/ + +#define YYFILL(n) goto done +#define YYCTYPE unsigned char +#define YYCURSOR p +#define YYLIMIT q +#define YYMARKER r + static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *name, smart_str *val, const char *separator) { - register const char *p, *q; + register const char *p, *q, *r; const char *bash = NULL; const char *sep = "?"; - q = url->c + url->len; - - for (p = url->c; p < q; p++) { - switch(*p) { - case ':': - smart_str_append(dest, url); - return; - case '?': - sep = separator; - break; - case '#': - bash = p; - break; - } - } + q = (p = url->c) + url->len; +scan: +/*!re2c + ":" { smart_str_append(dest, url); return; } + "?" { sep = separator; goto done; } + "#" { bash = p; goto done; } + (any\[:?#])+ { goto scan; } +*/ +done: + /* Don't modify URLs of the format "#mark" */ - if (bash - url->c == 0) { + if (bash && bash - url->c == 0) { smart_str_append(dest, url); return; } @@ -129,6 +136,12 @@ static inline void append_modified_url(smart_str *url, smart_str *dest, smart_st smart_str_appendl(dest, bash, q - bash); } +#undef YYFILL +#undef YYCTYPE +#undef YYCURSOR +#undef YYLIMIT +#undef YYMARKER + static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC) { char f = 0; @@ -146,7 +159,7 @@ static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC) } enum { - STATE_PLAIN, + STATE_PLAIN = 0, STATE_TAG, STATE_NEXT_ARG, STATE_ARG, @@ -232,68 +245,72 @@ static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size YYCURSOR = ctx->buf.c; YYLIMIT = ctx->buf.c + ctx->buf.len; -/*!re2c -any = [\000-\377]; -alpha = [a-zA-Z]; -*/ + switch (STATE) { + case STATE_PLAIN: goto state_plain; + case STATE_TAG: goto state_tag; + case STATE_NEXT_ARG: goto state_next_arg; + case STATE_ARG: goto state_arg; + case STATE_BEFORE_VAL: goto state_before_val; + case STATE_VAL: goto state_val; + } + + +state_plain_begin: + STATE = STATE_PLAIN; - while(1) { - start = YYCURSOR; - scdebug(("state %d at %s\n", STATE, YYCURSOR)); - switch(STATE) { - - case STATE_PLAIN: +state_plain: + start = YYCURSOR; /*!re2c - [<] { passthru(STD_ARGS); STATE = STATE_TAG; continue; } - (any\[<]) { passthru(STD_ARGS); continue; } + "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; } + N+ { passthru(STD_ARGS); goto state_plain; } */ - break; - - case STATE_TAG: + +state_tag: + start = YYCURSOR; /*!re2c - alpha+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); continue; } - any { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; } + alpha+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; } + any { passthru(STD_ARGS); goto state_plain_begin; } */ - break; - - case STATE_NEXT_ARG: + +state_next_arg_begin: + STATE = STATE_NEXT_ARG; + +state_next_arg: + start = YYCURSOR; /*!re2c - ">" { passthru(STD_ARGS); handle_form(STD_ARGS); STATE = STATE_PLAIN; continue; } - [ \n] { passthru(STD_ARGS); continue; } - alpha { YYCURSOR--; STATE = STATE_ARG; continue; } - any { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; } + ">" { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; } + [ \v\t\n]+ { passthru(STD_ARGS); goto state_next_arg; } + alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; } + any { passthru(STD_ARGS); goto state_plain_begin; } */ - break; - case STATE_ARG: +state_arg: + start = YYCURSOR; /*!re2c - alpha+ { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; continue; } - any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; } + alpha+ { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; } + any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; } */ - case STATE_BEFORE_VAL: +state_before_val: + start = YYCURSOR; /*!re2c - [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; continue; } - any { YYCURSOR--; STATE = STATE_NEXT_ARG; continue; } + [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; } + any { --YYCURSOR; goto state_next_arg_begin; } */ - break; - case STATE_VAL: + +state_val: + start = YYCURSOR; /*!re2c - ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); STATE = STATE_NEXT_ARG; continue; } - ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); STATE = STATE_NEXT_ARG; continue; } - (any\[ \n>"])+ { handle_val(STD_ARGS, 0, '"'); STATE = STATE_NEXT_ARG; continue; } - any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; } + ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; } + ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; } + (any\[ \n>"'])+ { handle_val(STD_ARGS, 0, '"'); goto state_next_arg_begin; } + any { passthru(STD_ARGS); goto state_next_arg_begin; } */ - break; - } - } stop: - scdebug(("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR)); - rest = YYLIMIT - start; - + scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest)); /* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */ if (rest < 0) rest = 0; @@ -301,6 +318,24 @@ stop: ctx->buf.len = rest; } +char *url_adapt_flush(size_t *newlen) +{ + char *ret = NULL; + url_adapt_state_ex_t *ctx; + BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); + + if (ctx->buf.len) { + ret = ctx->buf.c; + *newlen = ctx->buf.len; + ctx->buf.c = 0; + ctx->buf.len = 0; + } + + return ret; +} + char *url_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen) { smart_str surl = {0}; @@ -334,6 +369,8 @@ char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char mainloop(ctx, src, srclen); *newlen = ctx->result.len; + if (!ctx->result.c) + smart_str_appendl(&ctx->result, "", 0); smart_str_0(&ctx->result); ctx->result.len = 0; return ctx->result.c; |