diff options
author | Sascha Schumann <sas@php.net> | 2000-09-20 01:15:10 +0000 |
---|---|---|
committer | Sascha Schumann <sas@php.net> | 2000-09-20 01:15:10 +0000 |
commit | a2dc8a2fa5c40dbfca74d6ce72da2929277dba9a (patch) | |
tree | 2fcede60080f589a60e90de86a101d2b217672ac /ext | |
parent | ceedeb4a7e351e3582adc629f2acde8ff1182404 (diff) | |
download | php-git-a2dc8a2fa5c40dbfca74d6ce72da2929277dba9a.tar.gz |
2nd Rewrite of the main scanner engine. This version is cleaner and faster.
It handles the following tags currently:
<FRAME .. SRC=URL ..>
<INPUT .. TYPE=IMAGE SRC=URL ..>
<A .. HREF=URL ..>
<AREA .. HREF=URL ..>
<IMG .. SRC=URL ..>
Additional ones can be added within seconds.
The support for HTML forms has been significantly improved. The scanner
will now add proper Hidden Fields for the Session ID.
Diffstat (limited to 'ext')
-rw-r--r-- | ext/standard/url_scanner_ex.c | 834 | ||||
-rw-r--r-- | ext/standard/url_scanner_ex.h | 23 | ||||
-rw-r--r-- | ext/standard/url_scanner_ex.re | 357 |
3 files changed, 554 insertions, 660 deletions
diff --git a/ext/standard/url_scanner_ex.c b/ext/standard/url_scanner_ex.c index 7f3f0fbf7b..84021b07f4 100644 --- a/ext/standard/url_scanner_ex.c +++ b/ext/standard/url_scanner_ex.c @@ -1,4 +1,4 @@ -/* Generated by re2c 0.5 on Tue Sep 19 22:11:37 2000 */ +/* Generated by re2c 0.5 on Wed Sep 20 03:07:32 2000 */ #line 1 "/home/sas/src/php4/ext/standard/url_scanner_ex.re" /* +----------------------------------------------------------------------+ @@ -37,8 +37,6 @@ #define url_adapt_ext url_adapt_ext_ex #define url_scanner url_scanner_ex -#define url_adapt_state url_adapt_state_ex -#define url_adapt_state_t url_adapt_state_ex_t static inline void smart_str_append(smart_str *dest, smart_str *src) { @@ -90,15 +88,14 @@ static inline void smart_str_setl(smart_str *dest, const char *src, size_t len) dest->c = (char *) src; } -static inline void smart_str_appends(smart_str *dest, const char *src) -{ - smart_str_appendl(dest, src, strlen(src)); -} +#define smart_str_appends(dest, src) smart_str_appendl(dest, src, sizeof(src)-1) +#if 0 static inline void smart_str_copys(smart_str *dest, const char *src) { smart_str_copyl(dest, src, strlen(src)); } +#endif static inline void smart_str_sets(smart_str *dest, const char *src) { @@ -107,9 +104,9 @@ static inline void smart_str_sets(smart_str *dest, const char *src) static inline void attach_url(smart_str *url, smart_str *name, smart_str *val, const char *separator) { - if (strchr(url->c, ':')) return; + if (memchr(url->c, ':', url->len)) return; - if (strchr(url->c, '?')) + if (memchr(url->c, '?', url->len)) smart_str_appendl(url, separator, 1); else smart_str_appendl(url, "?", 1); @@ -131,12 +128,14 @@ struct php_tag_arg { static struct php_tag_arg check_tag_arg[] = { TAG_ARG_ENTRY(a, href) TAG_ARG_ENTRY(area, href) - TAG_ARG_ENTRY(frame, source) + TAG_ARG_ENTRY(frame, src) TAG_ARG_ENTRY(img, src) + TAG_ARG_ENTRY(input, src) + TAG_ARG_ENTRY(form, fake_entry_for_passing_on_form_tag) {0} }; -static inline void tag_arg(url_adapt_state_t *ctx PLS_DC) +static inline void tag_arg(url_adapt_state_ex_t *ctx PLS_DC) { char f = 0; int i; @@ -153,134 +152,137 @@ static inline void tag_arg(url_adapt_state_t *ctx PLS_DC) smart_str_appends(&ctx->result, "\""); if (f) { - attach_url(&ctx->para, &ctx->name, &ctx->value, PG(arg_separator)); + attach_url(&ctx->val, &ctx->q_name, &ctx->q_value, PG(arg_separator)); } - smart_str_append(&ctx->result, &ctx->para); + smart_str_append(&ctx->result, &ctx->val); smart_str_appends(&ctx->result, "\""); } -#line 162 - - -#define NEXT continue - -#define COPY_ALL \ - smart_str_appendl(&ctx->result, start, YYCURSOR - start); \ - start = NULL; \ - NEXT - -#define YYFILL(n) goto finish -#define YYCTYPE unsigned char -#define YYLIMIT endptr -#define YYCURSOR cursor -#define YYMARKER marker - -#define HANDLE_FORM \ - if (ctx->tag.len == 4 && strncasecmp(ctx->tag.c, "form", ctx->tag.len) == 0) { \ - smart_str_appends(&ctx->result, "><INPUT TYPE=HIDDEN NAME=\""); \ - smart_str_append(&ctx->result, &ctx->name); \ - smart_str_appends(&ctx->result, "\" VALUE=\""); \ - smart_str_append(&ctx->result, &ctx->value); \ - smart_str_appends(&ctx->result, "\""); \ - } - -#define GO(n) ctx->state = n - enum { STATE_PLAIN, STATE_TAG, STATE_NEXT_ARG, STATE_ARG, - STATE_PARA + STATE_BEFORE_VAL, + STATE_VAL }; -static void mainloop(url_adapt_state_t *ctx, smart_str *newstuff) +#define YYFILL(n) goto stop +#define YYCTYPE char +#define YYCURSOR xp +#define YYLIMIT end +#define YYMARKER q +#define STATE ctx->state + +#define PASSTHRU() {\ + smart_str_appendl(&ctx->result, start, YYCURSOR - start); \ +} + +#define HANDLE_FORM() {\ + if (ctx->tag.len == 4 && strncasecmp(ctx->tag.c, "form", 4) == 0) {\ + smart_str_appends(&ctx->result, "<INPUT TYPE=HIDDEN NAME=\""); \ + smart_str_append(&ctx->result, &ctx->q_name); \ + smart_str_appends(&ctx->result, "\" VALUE=\""); \ + smart_str_append(&ctx->result, &ctx->q_value); \ + smart_str_appends(&ctx->result, "\">"); \ + } \ +} + +/* + * HANDLE_TAG copies the HTML Tag and checks whether we + * have that tag in our table. If we might modify it, + * we continue to scan the tag, otherwise we simply copy the complete + * HTML stuff to the result buffer. + */ + +#define HANDLE_TAG() {\ + int __ok = 0; \ + int i; \ + smart_str_setl(&ctx->tag, start, YYCURSOR - start); \ + for (i = 0; check_tag_arg[i].tag; i++) { \ + if (ctx->tag.len == check_tag_arg[i].taglen \ + && strncasecmp(ctx->tag.c, check_tag_arg[i].tag, ctx->tag.len) == 0) { \ + __ok = 1; \ + break; \ + } \ + } \ + STATE = __ok ? STATE_NEXT_ARG : STATE_PLAIN; \ +} + +#define HANDLE_ARG() {\ + smart_str_setl(&ctx->arg, start, YYCURSOR - start); \ +} +#define HANDLE_VAL(quotes) {\ + smart_str_copyl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2); \ + tag_arg(ctx PLS_CC); \ +} + +/* + * Since arg/tag are read-only during the mainloop, we do not need + * to copy them. We need those variables across multiple calls + * to url_adapt() though, but they point to a private buffer. So we + * copy them before leaving the mainloop() and restore them at + * the beginning. + */ + +#define MOVE_TO_CTX(X) \ + if (ctx->X.c) \ + smart_str_copyl(&ctx->c_##X, ctx->X.c, ctx->X.len); \ + else \ + smart_str_free(&ctx->c_##X) + +#define FETCH_FROM_CTX(X) \ + smart_str_setl(&ctx->X, ctx->c_##X.c, ctx->c_##X.len) + +static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen) { - char *para_start, *arg_start, *tag_start; - char *start = NULL; - char *cursor; - char *marker; - char *endptr; + char *end, *q; + char *xp; + char *start; + int rest; PLS_FETCH(); - arg_start = para_start = tag_start = NULL; - smart_str_append(&ctx->work, newstuff); - smart_str_free(&ctx->result); + FETCH_FROM_CTX(arg); + FETCH_FROM_CTX(tag); + + smart_str_appendl(&ctx->buf, newdata, newlen); + + YYCURSOR = ctx->buf.c; + YYLIMIT = ctx->buf.c + ctx->buf.len; - smart_str_setl(&ctx->arg, ctx->c_arg.c, ctx->c_arg.len); - smart_str_setl(&ctx->tag, ctx->c_tag.c, ctx->c_tag.len); +#line 254 - cursor = ctx->work.c; - endptr = ctx->work.c + ctx->work.len; - while (YYCURSOR < YYLIMIT) { + while(1) { start = YYCURSOR; - #ifdef SCANNER_DEBUG - printf("state %d:%s'\n", ctx->state, YYCURSOR); + printf("state %d at %s\n", STATE, YYCURSOR); #endif - switch (ctx->state) { - + switch(STATE) { + case STATE_PLAIN: { YYCTYPE yych; unsigned int yyaccept; - static unsigned char yybm[] = { - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 0, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, - }; goto yy0; yy1: ++YYCURSOR; yy0: if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; - if(yybm[0+yych] & 128) goto yy4; + if(yych != '<') goto yy4; yy2: yych = *++YYCURSOR; yy3: -#line 225 - { tag_start = YYCURSOR; GO(STATE_TAG); COPY_ALL;} -yy4: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy5: if(yybm[0+yych] & 128) goto yy4; -yy6: -#line 226 - { COPY_ALL; } +#line 265 + { PASSTHRU(); STATE = STATE_TAG; continue; } +yy4: yych = *++YYCURSOR; +yy5: +#line 266 + { PASSTHRU(); continue; } } -#line 227 +#line 267 - break; - + break; + case STATE_TAG: { YYCTYPE yych; @@ -319,59 +321,84 @@ yy6: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - goto yy7; -yy8: ++YYCURSOR; -yy7: + goto yy6; +yy7: ++YYCURSOR; +yy6: if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych <= '@') goto yy11; - if(yych <= 'Z') goto yy9; - if(yych <= '`') goto yy11; - if(yych >= '{') goto yy11; -yy9: yyaccept = 0; - yych = *(YYMARKER = ++YYCURSOR); - if(yybm[0+yych] & 128) goto yy14; - if(yych == ' ') goto yy12; - if(yych == '>') goto yy12; -yy10: -#line 242 - { - YYCURSOR--; - GO(STATE_PLAIN); - tag_start = NULL; - NEXT; - } -yy11: yych = *++YYCURSOR; - goto yy10; -yy12: yych = *++YYCURSOR; -yy13: -#line 232 - { - YYCURSOR--; - arg_start = YYCURSOR; - smart_str_setl(&ctx->tag, start, YYCURSOR - start); -#ifdef SCANNER_DEBUG - printf("TAG(%s)\n", ctx->tag.c); -#endif - GO(STATE_NEXT_ARG); - COPY_ALL; - } -yy14: ++YYCURSOR; + if(yych <= '@') goto yy10; + if(yych <= 'Z') goto yy8; + if(yych <= '`') goto yy10; + if(yych >= '{') goto yy10; +yy8: yych = *++YYCURSOR; + goto yy13; +yy9: +#line 272 + { HANDLE_TAG() /* Sets STATE */; PASSTHRU(); continue; } +yy10: yych = *++YYCURSOR; +yy11: +#line 273 + { PASSTHRU(); continue; } +yy12: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy15: if(yybm[0+yych] & 128) goto yy14; - if(yych == ' ') goto yy12; - if(yych == '>') goto yy12; -yy16: YYCURSOR = YYMARKER; - switch(yyaccept){ - case 0: goto yy10; +yy13: if(yybm[0+yych] & 128) goto yy12; + goto yy9; +} +#line 274 + + break; + + case STATE_NEXT_ARG: +{ + YYCTYPE yych; + unsigned int yyaccept; + goto yy14; +yy15: ++YYCURSOR; +yy14: + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + if(yych <= '='){ + if(yych <= '\n'){ + if(yych <= '\t') goto yy22; + goto yy18; + } else { + if(yych == ' ') goto yy18; + goto yy22; + } + } else { + if(yych <= 'Z'){ + if(yych <= '>') goto yy16; + if(yych <= '@') goto yy22; + goto yy20; + } else { + if(yych <= '`') goto yy22; + if(yych <= 'z') goto yy20; + goto yy22; + } } +yy16: yych = *++YYCURSOR; +yy17: +#line 279 + { PASSTHRU(); HANDLE_FORM(); STATE = STATE_PLAIN; continue; } +yy18: yych = *++YYCURSOR; +yy19: +#line 280 + { PASSTHRU(); continue; } +yy20: yych = *++YYCURSOR; +yy21: +#line 281 + { YYCURSOR--; STATE = STATE_ARG; continue; } +yy22: yych = *++YYCURSOR; +yy23: +#line 282 + { PASSTHRU(); continue; } } -#line 248 +#line 283 - break; + break; - case STATE_NEXT_ARG: + case STATE_ARG: { YYCTYPE yych; unsigned int yyaccept; @@ -380,18 +407,18 @@ yy16: YYCURSOR = YYMARKER; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 128, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -409,373 +436,268 @@ yy16: YYCURSOR = YYMARKER; 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - goto yy17; -yy18: ++YYCURSOR; -yy17: - if(YYLIMIT == YYCURSOR) YYFILL(1); + goto yy24; +yy25: ++YYCURSOR; +yy24: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yybm[0+yych] & 128) goto yy20; - if(yych == '>') goto yy23; -yy19:yy20: ++YYCURSOR; + if(yych <= '@') goto yy28; + if(yych <= 'Z') goto yy26; + if(yych <= '`') goto yy28; + if(yych >= '{') goto yy28; +yy26: yych = *++YYCURSOR; + goto yy31; +yy27: +#line 288 + { PASSTHRU(); HANDLE_ARG(); STATE = STATE_BEFORE_VAL; continue; } +yy28: yych = *++YYCURSOR; +yy29: +#line 289 + { PASSTHRU(); STATE = STATE_NEXT_ARG; continue; } +yy30: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy21: if(yybm[0+yych] & 128) goto yy20; -yy22: -#line 253 - { - GO(STATE_ARG); - NEXT; - } -yy23: yych = *++YYCURSOR; -yy24: -#line 257 - { - HANDLE_FORM; - GO(STATE_PLAIN); - tag_start = NULL; - COPY_ALL; - } +yy31: if(yybm[0+yych] & 128) goto yy30; + goto yy27; } -#line 263 +#line 290 - break; - case STATE_ARG: - smart_str_appendl(&ctx->result, " ", 1); + case STATE_BEFORE_VAL: { YYCTYPE yych; unsigned int yyaccept; static unsigned char yybm[] = { - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 128, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 0, 0, 64, - 64, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 64, 64, 64, 64, 64, - 64, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, }; - goto yy25; -yy26: ++YYCURSOR; -yy25: - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yybm[0+yych] & 32) goto yy28; - if(yych == ' ') goto yy27; - if(yych <= '<') goto yy30; - if(yych >= '?') goto yy30; -yy27: -#line 281 - { - arg_start = YYCURSOR; - ctx->state--; - COPY_ALL; - } -yy28: yyaccept = 0; - YYMARKER = ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); + goto yy32; +yy33: ++YYCURSOR; +yy32: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; -yy29: if(yybm[0+yych] & 128) goto yy32; - if(yych <= '@'){ - if(yych <= '<') goto yy30; - if(yych <= '=') goto yy35; - if(yych <= '>') goto yy27; - } else { - if(yych <= 'Z') goto yy28; - if(yych <= '`') goto yy30; - if(yych <= 'z') goto yy28; - } -yy30: ++YYCURSOR; + if(yych == ' ') goto yy34; + if(yych == '=') goto yy36; + goto yy38; +yy34: yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + if(yych == ' ') goto yy41; + if(yych == '=') goto yy39; +yy35: +#line 295 + { YYCURSOR--; STATE = STATE_NEXT_ARG; continue; } +yy36: yych = *++YYCURSOR; + goto yy40; +yy37: +#line 294 + { PASSTHRU(); STATE = STATE_VAL; continue; } +yy38: yych = *++YYCURSOR; + goto yy35; +yy39: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy31: if(yybm[0+yych] & 64) goto yy30; - goto yy27; -yy32: ++YYCURSOR; +yy40: if(yybm[0+yych] & 128) goto yy39; + goto yy37; +yy41: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy33: if(yybm[0+yych] & 128) goto yy32; - if(yych == '=') goto yy35; -yy34: YYCURSOR = YYMARKER; +yy42: if(yych == ' ') goto yy41; + if(yych == '=') goto yy39; +yy43: YYCURSOR = YYMARKER; switch(yyaccept){ - case 0: goto yy27; + case 0: goto yy35; } -yy35: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy36: if(yych == ' ') goto yy35; -yy37: -#line 269 - { - char *p; - - for (p = start; isalpha(*p); p++); - smart_str_setl(&ctx->arg, start, p - start); -#ifdef SCANNER_DEBUG - printf("ARG(%s)\n", ctx->arg.c); -#endif - para_start = YYCURSOR; - ctx->state++; - COPY_ALL; - } } -#line 286 +#line 296 break; - case STATE_PARA: + case STATE_VAL: { YYCTYPE yych; unsigned int yyaccept; static unsigned char yybm[] = { - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 128, 224, 32, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 0, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 0, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, - 224, 224, 224, 224, 224, 224, 224, 224, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 128, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 128, 192, 0, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 0, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 192, 192, 192, 192, 192, 192, }; - goto yy38; -yy39: ++YYCURSOR; -yy38: - if((YYLIMIT - YYCURSOR) < 4) YYFILL(4); + goto yy44; +yy45: ++YYCURSOR; +yy44: + if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - if(yych <= '"'){ - if(yych == ' ') goto yy43; - if(yych <= '!') goto yy42; + if(yych <= ' '){ + if(yych == '\n') goto yy50; + if(yych <= '\037') goto yy48; + goto yy50; } else { - if(yych <= '>'){ - if(yych <= '=') goto yy42; - goto yy43; + if(yych <= '"'){ + if(yych <= '!') goto yy48; } else { - if(yych == '^') goto yy43; - goto yy42; + if(yych == '>') goto yy50; + goto yy48; } } -yy40: yyaccept = 0; - yych = *(YYMARKER = ++YYCURSOR); - if(yych != '^') goto yy51; -yy41: -#line 315 - { - YYCURSOR--; - ctx->state = 2; - NEXT; - } -yy42: yyaccept = 0; +yy46: yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); - if(yych == '^') goto yy41; - goto yy47; -yy43: yych = *++YYCURSOR; - goto yy41; -yy44: yych = *++YYCURSOR; -yy45: + if(yybm[0+yych] & 128) goto yy53; +yy47: #line 303 - { - YYCURSOR--; - para_start = NULL; - smart_str_copyl(&ctx->para, start, YYCURSOR - start); -#ifdef SCANNER_DEBUG - printf("PARA(%s)\n", ctx->para.c); -#endif - tag_arg(ctx PLS_CC); - arg_start = YYCURSOR; - GO(STATE_NEXT_ARG); - NEXT; - } -yy46: ++YYCURSOR; + { PASSTHRU(); STATE = STATE_NEXT_ARG; continue; } +yy48: yych = *++YYCURSOR; + goto yy52; +yy49: +#line 302 + { HANDLE_VAL(0); STATE = STATE_NEXT_ARG; continue; } +yy50: yych = *++YYCURSOR; + goto yy47; +yy51: ++YYCURSOR; + if(YYLIMIT == YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy52: if(yybm[0+yych] & 64) goto yy51; + goto yy49; +yy53: ++YYCURSOR; if(YYLIMIT == YYCURSOR) YYFILL(1); yych = *YYCURSOR; -yy47: if(yybm[0+yych] & 32) goto yy46; - if(yych <= ']') goto yy44; -yy48: YYCURSOR = YYMARKER; +yy54: if(yybm[0+yych] & 128) goto yy53; + if(yych <= '=') goto yy56; +yy55: YYCURSOR = YYMARKER; switch(yyaccept){ - case 1: goto yy45; - case 0: goto yy41; + case 0: goto yy47; } -yy49: yyaccept = 1; - yych = *(YYMARKER = ++YYCURSOR); - if(yych == '>') goto yy45; - if(yych == '^') goto yy45; - goto yy57; -yy50: ++YYCURSOR; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; -yy51: if(yybm[0+yych] & 64) goto yy50; - if(yych <= '!') goto yy49; - if(yych <= '"') goto yy52; - if(yych <= ']') goto yy44; - goto yy48; -yy52: yych = *++YYCURSOR; - if(yych == ' ') goto yy53; - if(yych != '>') goto yy47; -yy53: yych = *++YYCURSOR; -yy54: -#line 291 - { - YYCURSOR--; - para_start = NULL; - smart_str_copyl(&ctx->para, start + 1, YYCURSOR - start - 2); -#ifdef SCANNER_DEBUG - printf("PARA(%s)\n", ctx->para.c); -#endif - tag_arg(ctx PLS_CC); - arg_start = YYCURSOR; - GO(STATE_NEXT_ARG); - NEXT; - } -yy55: yych = *++YYCURSOR; - if(yych == ' ') goto yy58; - if(yych == '>') goto yy58; - goto yy48; -yy56: ++YYCURSOR; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; -yy57: if(yybm[0+yych] & 128) goto yy56; - if(yych <= '=') goto yy55; - goto yy48; -yy58: yych = *++YYCURSOR; - goto yy54; +yy56: yych = *++YYCURSOR; +yy57: +#line 301 + { HANDLE_VAL(1); STATE = STATE_NEXT_ARG; continue; } } -#line 320 +#line 304 break; - } } - -#define PRESERVE(s) \ - size_t n = ctx->work.len - (s - ctx->work.c); \ - memmove(ctx->work.c, s, n + 1); \ - ctx->work.len = n - -finish: - if (ctx->arg.c) - smart_str_copyl(&ctx->c_arg, ctx->arg.c, ctx->arg.len); - else - smart_str_free(&ctx->c_arg); - if (ctx->tag.c) - smart_str_copyl(&ctx->c_tag, ctx->tag.c, ctx->tag.len); - else - smart_str_free(&ctx->c_tag); - - if (ctx->state >= 2) { - if (para_start) { - PRESERVE(para_start); - ctx->state = 4; - } else { - if (arg_start) { PRESERVE(arg_start); } - ctx->state = 2; - } - } else if (tag_start) { - PRESERVE(tag_start); - ctx->state = 1; - } else { - ctx->state = 0; - if (start) smart_str_appendl(&ctx->result, start, YYCURSOR - start); - smart_str_free(&ctx->work); } -#ifdef SCANNER_DEBUG - if (ctx->work.c) { - printf("PRESERVING %s'\n", ctx->work.c); - } +stop: +#ifdef SCANNER_DEBUG + printf("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR); #endif + + MOVE_TO_CTX(tag); + MOVE_TO_CTX(arg); + + rest = YYLIMIT - start; + + memmove(ctx->buf.c, start, rest); + ctx->buf.c[rest] = '\0'; + ctx->buf.len = rest; } + char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char *value, size_t *newlen) { - smart_str str = {0,0}; char *ret; + url_adapt_state_ex_t *ctx; BLS_FETCH(); - smart_str_sets(&BG(url_adapt_state).name, name); - smart_str_sets(&BG(url_adapt_state).value, value); - str.c = (char *) src; - str.len = srclen; - mainloop(&BG(url_adapt_state), &str); + ctx = &BG(url_adapt_state_ex); - *newlen = BG(url_adapt_state).result.len; + smart_str_sets(&ctx->q_name, name); + smart_str_sets(&ctx->q_value, value); + mainloop(ctx, src, srclen); -#ifdef SCANNER_DEBUG - printf("(%d)NEW(%d): %s'\n", srclen, BG(url_adapt_state).result.len, BG(url_adapt_state).result.c); -#endif + *newlen = ctx->result.len; -#if 1 - ret = BG(url_adapt_state).result.c; - BG(url_adapt_state).result.c = NULL; + if (ctx->result.len == 0) { + return strdup(""); + } + ret = ctx->result.c; + ctx->result.c = NULL; + ctx->result.len = ctx->result.a = 0; return ret; -#else - return strdup(BG(url_adapt_state).result.c); -#endif } PHP_RINIT_FUNCTION(url_scanner) { + url_adapt_state_ex_t *ctx; BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); - memset(&BG(url_adapt_state), 0, sizeof(BG(url_adapt_state))); + memset(ctx, 0, sizeof(*ctx)); return SUCCESS; } PHP_RSHUTDOWN_FUNCTION(url_scanner) { + url_adapt_state_ex_t *ctx; BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); - smart_str_free(&BG(url_adapt_state).result); - smart_str_free(&BG(url_adapt_state).work); - smart_str_free(&BG(url_adapt_state).c_tag); - smart_str_free(&BG(url_adapt_state).c_arg); - smart_str_free(&BG(url_adapt_state).para); + smart_str_free(&ctx->result); + smart_str_free(&ctx->buf); + smart_str_free(&ctx->c_tag); + smart_str_free(&ctx->c_arg); + smart_str_free(&ctx->val); return SUCCESS; } diff --git a/ext/standard/url_scanner_ex.h b/ext/standard/url_scanner_ex.h index e207a4b431..059552b3de 100644 --- a/ext/standard/url_scanner_ex.h +++ b/ext/standard/url_scanner_ex.h @@ -30,17 +30,26 @@ typedef struct { size_t a; } smart_str; + typedef struct { - smart_str c_arg; + /* Used by the mainloop of the scanner */ + smart_str tag; /* read only */ + smart_str arg; /* read only */ + smart_str val; + smart_str buf; + + /* Dito, but they are used only for preserving data across calls */ smart_str c_tag; - smart_str arg; - smart_str tag; - smart_str para; - smart_str work; + smart_str c_arg; + + /* The result buffer */ smart_str result; + + /* The data which is appended to each relative URL */ + smart_str q_name; + smart_str q_value; + int state; - smart_str name; - smart_str value; } url_adapt_state_ex_t; #endif diff --git a/ext/standard/url_scanner_ex.re b/ext/standard/url_scanner_ex.re index ea4f24ad21..790f38ad62 100644 --- a/ext/standard/url_scanner_ex.re +++ b/ext/standard/url_scanner_ex.re @@ -35,8 +35,6 @@ #define url_adapt_ext url_adapt_ext_ex #define url_scanner url_scanner_ex -#define url_adapt_state url_adapt_state_ex -#define url_adapt_state_t url_adapt_state_ex_t static inline void smart_str_append(smart_str *dest, smart_str *src) { @@ -88,15 +86,14 @@ static inline void smart_str_setl(smart_str *dest, const char *src, size_t len) dest->c = (char *) src; } -static inline void smart_str_appends(smart_str *dest, const char *src) -{ - smart_str_appendl(dest, src, strlen(src)); -} +#define smart_str_appends(dest, src) smart_str_appendl(dest, src, sizeof(src)-1) +#if 0 static inline void smart_str_copys(smart_str *dest, const char *src) { smart_str_copyl(dest, src, strlen(src)); } +#endif static inline void smart_str_sets(smart_str *dest, const char *src) { @@ -105,9 +102,9 @@ static inline void smart_str_sets(smart_str *dest, const char *src) static inline void attach_url(smart_str *url, smart_str *name, smart_str *val, const char *separator) { - if (strchr(url->c, ':')) return; + if (memchr(url->c, ':', url->len)) return; - if (strchr(url->c, '?')) + if (memchr(url->c, '?', url->len)) smart_str_appendl(url, separator, 1); else smart_str_appendl(url, "?", 1); @@ -129,12 +126,14 @@ struct php_tag_arg { static struct php_tag_arg check_tag_arg[] = { TAG_ARG_ENTRY(a, href) TAG_ARG_ENTRY(area, href) - TAG_ARG_ENTRY(frame, source) + TAG_ARG_ENTRY(frame, src) TAG_ARG_ENTRY(img, src) + TAG_ARG_ENTRY(input, src) + TAG_ARG_ENTRY(form, fake_entry_for_passing_on_form_tag) {0} }; -static inline void tag_arg(url_adapt_state_t *ctx PLS_DC) +static inline void tag_arg(url_adapt_state_ex_t *ctx PLS_DC) { char f = 0; int i; @@ -151,261 +150,225 @@ static inline void tag_arg(url_adapt_state_t *ctx PLS_DC) smart_str_appends(&ctx->result, "\""); if (f) { - attach_url(&ctx->para, &ctx->name, &ctx->value, PG(arg_separator)); + attach_url(&ctx->val, &ctx->q_name, &ctx->q_value, PG(arg_separator)); } - smart_str_append(&ctx->result, &ctx->para); + smart_str_append(&ctx->result, &ctx->val); smart_str_appends(&ctx->result, "\""); } -/*!re2c -all = [\000-\377]; -*/ - -#define NEXT continue - -#define COPY_ALL \ - smart_str_appendl(&ctx->result, start, YYCURSOR - start); \ - start = NULL; \ - NEXT - -#define YYFILL(n) goto finish -#define YYCTYPE unsigned char -#define YYLIMIT endptr -#define YYCURSOR cursor -#define YYMARKER marker - -#define HANDLE_FORM \ - if (ctx->tag.len == 4 && strncasecmp(ctx->tag.c, "form", ctx->tag.len) == 0) { \ - smart_str_appends(&ctx->result, "><INPUT TYPE=HIDDEN NAME=\""); \ - smart_str_append(&ctx->result, &ctx->name); \ - smart_str_appends(&ctx->result, "\" VALUE=\""); \ - smart_str_append(&ctx->result, &ctx->value); \ - smart_str_appends(&ctx->result, "\""); \ - } - -#define GO(n) ctx->state = n - enum { STATE_PLAIN, STATE_TAG, STATE_NEXT_ARG, STATE_ARG, - STATE_PARA + STATE_BEFORE_VAL, + STATE_VAL }; -static void mainloop(url_adapt_state_t *ctx, smart_str *newstuff) +#define YYFILL(n) goto stop +#define YYCTYPE char +#define YYCURSOR xp +#define YYLIMIT end +#define YYMARKER q +#define STATE ctx->state + +#define PASSTHRU() {\ + smart_str_appendl(&ctx->result, start, YYCURSOR - start); \ +} + +#define HANDLE_FORM() {\ + if (ctx->tag.len == 4 && strncasecmp(ctx->tag.c, "form", 4) == 0) {\ + smart_str_appends(&ctx->result, "<INPUT TYPE=HIDDEN NAME=\""); \ + smart_str_append(&ctx->result, &ctx->q_name); \ + smart_str_appends(&ctx->result, "\" VALUE=\""); \ + smart_str_append(&ctx->result, &ctx->q_value); \ + smart_str_appends(&ctx->result, "\">"); \ + } \ +} + +/* + * HANDLE_TAG copies the HTML Tag and checks whether we + * have that tag in our table. If we might modify it, + * we continue to scan the tag, otherwise we simply copy the complete + * HTML stuff to the result buffer. + */ + +#define HANDLE_TAG() {\ + int __ok = 0; \ + int i; \ + smart_str_setl(&ctx->tag, start, YYCURSOR - start); \ + for (i = 0; check_tag_arg[i].tag; i++) { \ + if (ctx->tag.len == check_tag_arg[i].taglen \ + && strncasecmp(ctx->tag.c, check_tag_arg[i].tag, ctx->tag.len) == 0) { \ + __ok = 1; \ + break; \ + } \ + } \ + STATE = __ok ? STATE_NEXT_ARG : STATE_PLAIN; \ +} + +#define HANDLE_ARG() {\ + smart_str_setl(&ctx->arg, start, YYCURSOR - start); \ +} +#define HANDLE_VAL(quotes) {\ + smart_str_copyl(&ctx->val, start + quotes, YYCURSOR - start - quotes * 2); \ + tag_arg(ctx PLS_CC); \ +} + +/* + * Since arg/tag are read-only during the mainloop, we do not need + * to copy them. We need those variables across multiple calls + * to url_adapt() though, but they point to a private buffer. So we + * copy them before leaving the mainloop() and restore them at + * the beginning. + */ + +#define MOVE_TO_CTX(X) \ + if (ctx->X.c) \ + smart_str_copyl(&ctx->c_##X, ctx->X.c, ctx->X.len); \ + else \ + smart_str_free(&ctx->c_##X) + +#define FETCH_FROM_CTX(X) \ + smart_str_setl(&ctx->X, ctx->c_##X.c, ctx->c_##X.len) + +static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size_t newlen) { - char *para_start, *arg_start, *tag_start; - char *start = NULL; - char *cursor; - char *marker; - char *endptr; + char *end, *q; + char *xp; + char *start; + int rest; PLS_FETCH(); - arg_start = para_start = tag_start = NULL; - smart_str_append(&ctx->work, newstuff); - smart_str_free(&ctx->result); + FETCH_FROM_CTX(arg); + FETCH_FROM_CTX(tag); - smart_str_setl(&ctx->arg, ctx->c_arg.c, ctx->c_arg.len); - smart_str_setl(&ctx->tag, ctx->c_tag.c, ctx->c_tag.len); + smart_str_appendl(&ctx->buf, newdata, newlen); + + YYCURSOR = ctx->buf.c; + YYLIMIT = ctx->buf.c + ctx->buf.len; - cursor = ctx->work.c; - endptr = ctx->work.c + ctx->work.len; +/*!re2c +any = [\000-\377]; +alpha = [a-zA-Z]; +*/ - while (YYCURSOR < YYLIMIT) { + while(1) { start = YYCURSOR; - #ifdef SCANNER_DEBUG - printf("state %d:%s'\n", ctx->state, YYCURSOR); + printf("state %d at %s\n", STATE, YYCURSOR); #endif - switch (ctx->state) { - + switch(STATE) { + case STATE_PLAIN: /*!re2c - "<" { tag_start = YYCURSOR; GO(STATE_TAG); COPY_ALL;} - (all\[<])+ { COPY_ALL; } + [<] { PASSTHRU(); STATE = STATE_TAG; continue; } + (any\[<]) { PASSTHRU(); continue; } */ - break; - + break; + case STATE_TAG: /*!re2c - [a-zA-Z]+ [ >] { - YYCURSOR--; - arg_start = YYCURSOR; - smart_str_setl(&ctx->tag, start, YYCURSOR - start); -#ifdef SCANNER_DEBUG - printf("TAG(%s)\n", ctx->tag.c); -#endif - GO(STATE_NEXT_ARG); - COPY_ALL; - } - all { - YYCURSOR--; - GO(STATE_PLAIN); - tag_start = NULL; - NEXT; - } + alpha+ { HANDLE_TAG() /* Sets STATE */; PASSTHRU(); continue; } + any { PASSTHRU(); continue; } */ - break; - + break; + case STATE_NEXT_ARG: /*!re2c - [ ]+ { - GO(STATE_ARG); - NEXT; - } - ">" { - HANDLE_FORM; - GO(STATE_PLAIN); - tag_start = NULL; - COPY_ALL; - } + ">" { PASSTHRU(); HANDLE_FORM(); STATE = STATE_PLAIN; continue; } + [ \n] { PASSTHRU(); continue; } + alpha { YYCURSOR--; STATE = STATE_ARG; continue; } + any { PASSTHRU(); continue; } */ - break; + break; case STATE_ARG: - smart_str_appendl(&ctx->result, " ", 1); /*!re2c - [a-zA-Z]+ [ ]* "=" [ ]* { - char *p; + alpha+ { PASSTHRU(); HANDLE_ARG(); STATE = STATE_BEFORE_VAL; continue; } + any { PASSTHRU(); STATE = STATE_NEXT_ARG; continue; } +*/ - for (p = start; isalpha(*p); p++); - smart_str_setl(&ctx->arg, start, p - start); -#ifdef SCANNER_DEBUG - printf("ARG(%s)\n", ctx->arg.c); -#endif - para_start = YYCURSOR; - ctx->state++; - COPY_ALL; - } - (all\[ =>])* { - arg_start = YYCURSOR; - ctx->state--; - COPY_ALL; - } + case STATE_BEFORE_VAL: +/*!re2c + [ ]* "=" [ ]* { PASSTHRU(); STATE = STATE_VAL; continue; } + any { YYCURSOR--; STATE = STATE_NEXT_ARG; continue; } */ break; - case STATE_PARA: + case STATE_VAL: /*!re2c - ["] (all\[^>"])* ["] [ >] { - YYCURSOR--; - para_start = NULL; - smart_str_copyl(&ctx->para, start + 1, YYCURSOR - start - 2); -#ifdef SCANNER_DEBUG - printf("PARA(%s)\n", ctx->para.c); -#endif - tag_arg(ctx PLS_CC); - arg_start = YYCURSOR; - GO(STATE_NEXT_ARG); - NEXT; - } - (all\[^> ])+ [ >] { - YYCURSOR--; - para_start = NULL; - smart_str_copyl(&ctx->para, start, YYCURSOR - start); -#ifdef SCANNER_DEBUG - printf("PARA(%s)\n", ctx->para.c); -#endif - tag_arg(ctx PLS_CC); - arg_start = YYCURSOR; - GO(STATE_NEXT_ARG); - NEXT; - } - all { - YYCURSOR--; - ctx->state = 2; - NEXT; - } + ["] (any\[">])+ ["] { HANDLE_VAL(1); STATE = STATE_NEXT_ARG; continue; } + (any\[ \n>"])+ { HANDLE_VAL(0); STATE = STATE_NEXT_ARG; continue; } + any { PASSTHRU(); STATE = STATE_NEXT_ARG; continue; } */ break; - } } - -#define PRESERVE(s) \ - size_t n = ctx->work.len - (s - ctx->work.c); \ - memmove(ctx->work.c, s, n + 1); \ - ctx->work.len = n - -finish: - if (ctx->arg.c) - smart_str_copyl(&ctx->c_arg, ctx->arg.c, ctx->arg.len); - else - smart_str_free(&ctx->c_arg); - if (ctx->tag.c) - smart_str_copyl(&ctx->c_tag, ctx->tag.c, ctx->tag.len); - else - smart_str_free(&ctx->c_tag); - - if (ctx->state >= 2) { - if (para_start) { - PRESERVE(para_start); - ctx->state = 4; - } else { - if (arg_start) { PRESERVE(arg_start); } - ctx->state = 2; - } - } else if (tag_start) { - PRESERVE(tag_start); - ctx->state = 1; - } else { - ctx->state = 0; - if (start) smart_str_appendl(&ctx->result, start, YYCURSOR - start); - smart_str_free(&ctx->work); } -#ifdef SCANNER_DEBUG - if (ctx->work.c) { - printf("PRESERVING %s'\n", ctx->work.c); - } +stop: +#ifdef SCANNER_DEBUG + printf("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR); #endif + + MOVE_TO_CTX(tag); + MOVE_TO_CTX(arg); + + rest = YYLIMIT - start; + + memmove(ctx->buf.c, start, rest); + ctx->buf.c[rest] = '\0'; + ctx->buf.len = rest; } + char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char *value, size_t *newlen) { - smart_str str = {0,0}; char *ret; + url_adapt_state_ex_t *ctx; BLS_FETCH(); - smart_str_sets(&BG(url_adapt_state).name, name); - smart_str_sets(&BG(url_adapt_state).value, value); - str.c = (char *) src; - str.len = srclen; - mainloop(&BG(url_adapt_state), &str); + ctx = &BG(url_adapt_state_ex); - *newlen = BG(url_adapt_state).result.len; + smart_str_sets(&ctx->q_name, name); + smart_str_sets(&ctx->q_value, value); + mainloop(ctx, src, srclen); -#ifdef SCANNER_DEBUG - printf("(%d)NEW(%d): %s'\n", srclen, BG(url_adapt_state).result.len, BG(url_adapt_state).result.c); -#endif + *newlen = ctx->result.len; -#if 1 - ret = BG(url_adapt_state).result.c; - BG(url_adapt_state).result.c = NULL; + if (ctx->result.len == 0) { + return strdup(""); + } + ret = ctx->result.c; + ctx->result.c = NULL; + ctx->result.len = ctx->result.a = 0; return ret; -#else - return strdup(BG(url_adapt_state).result.c); -#endif } PHP_RINIT_FUNCTION(url_scanner) { + url_adapt_state_ex_t *ctx; BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); - memset(&BG(url_adapt_state), 0, sizeof(BG(url_adapt_state))); + memset(ctx, 0, sizeof(*ctx)); return SUCCESS; } PHP_RSHUTDOWN_FUNCTION(url_scanner) { + url_adapt_state_ex_t *ctx; BLS_FETCH(); + + ctx = &BG(url_adapt_state_ex); - smart_str_free(&BG(url_adapt_state).result); - smart_str_free(&BG(url_adapt_state).work); - smart_str_free(&BG(url_adapt_state).c_tag); - smart_str_free(&BG(url_adapt_state).c_arg); - smart_str_free(&BG(url_adapt_state).para); + smart_str_free(&ctx->result); + smart_str_free(&ctx->buf); + smart_str_free(&ctx->c_tag); + smart_str_free(&ctx->c_arg); + smart_str_free(&ctx->val); return SUCCESS; } |