summaryrefslogtreecommitdiff
path: root/ext/standard/url_scanner_ex.re
diff options
context:
space:
mode:
authorSascha Schumann <sas@php.net>2001-07-04 15:30:21 +0000
committerSascha Schumann <sas@php.net>2001-07-04 15:30:21 +0000
commitef13ab5750202d17a138566f6e4e9ffca84eccf5 (patch)
tree74341539bab35b3a65f44f7ae23542b2c150ef2d /ext/standard/url_scanner_ex.re
parent98b1d4bbf4ddb67919c57072aa4735c79cee055b (diff)
downloadphp-git-ef13ab5750202d17a138566f6e4e9ffca84eccf5.tar.gz
Improve url scanner speed (up to 40% for large chunks of data)
and handle some corner cases better. The scanner has been changed to the format as proposed in "RE2C - A More Versatile Scanner Generator" by Cowan et al.
Diffstat (limited to 'ext/standard/url_scanner_ex.re')
-rw-r--r--ext/standard/url_scanner_ex.re159
1 files changed, 98 insertions, 61 deletions
diff --git a/ext/standard/url_scanner_ex.re b/ext/standard/url_scanner_ex.re
index 681f08c2c0..0d95d68ae5 100644
--- a/ext/standard/url_scanner_ex.re
+++ b/ext/standard/url_scanner_ex.re
@@ -87,30 +87,37 @@ PHP_INI_BEGIN()
STD_PHP_INI_ENTRY("url_rewriter.tags", "a=href,area=href,frame=src,form=fakeentry", PHP_INI_ALL, OnUpdateTags, url_adapt_state_ex, php_basic_globals, basic_globals)
PHP_INI_END()
+/*!re2c
+any = [\000-\377];
+N = (any\[<]);
+alpha = [a-zA-Z];
+*/
+
+#define YYFILL(n) goto done
+#define YYCTYPE unsigned char
+#define YYCURSOR p
+#define YYLIMIT q
+#define YYMARKER r
+
static inline void append_modified_url(smart_str *url, smart_str *dest, smart_str *name, smart_str *val, const char *separator)
{
- register const char *p, *q;
+ register const char *p, *q, *r;
const char *bash = NULL;
const char *sep = "?";
- q = url->c + url->len;
-
- for (p = url->c; p < q; p++) {
- switch(*p) {
- case ':':
- smart_str_append(dest, url);
- return;
- case '?':
- sep = separator;
- break;
- case '#':
- bash = p;
- break;
- }
- }
+ q = (p = url->c) + url->len;
+scan:
+/*!re2c
+ ":" { smart_str_append(dest, url); return; }
+ "?" { sep = separator; goto done; }
+ "#" { bash = p; goto done; }
+ (any\[:?#])+ { goto scan; }
+*/
+done:
+
/* Don't modify URLs of the format "#mark" */
- if (bash - url->c == 0) {
+ if (bash && bash - url->c == 0) {
smart_str_append(dest, url);
return;
}
@@ -129,6 +136,12 @@ static inline void append_modified_url(smart_str *url, smart_str *dest, smart_st
smart_str_appendl(dest, bash, q - bash);
}
+#undef YYFILL
+#undef YYCTYPE
+#undef YYCURSOR
+#undef YYLIMIT
+#undef YYMARKER
+
static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC)
{
char f = 0;
@@ -146,7 +159,7 @@ static inline void tag_arg(url_adapt_state_ex_t *ctx, char quote PLS_DC)
}
enum {
- STATE_PLAIN,
+ STATE_PLAIN = 0,
STATE_TAG,
STATE_NEXT_ARG,
STATE_ARG,
@@ -232,68 +245,72 @@ static inline void mainloop(url_adapt_state_ex_t *ctx, const char *newdata, size
YYCURSOR = ctx->buf.c;
YYLIMIT = ctx->buf.c + ctx->buf.len;
-/*!re2c
-any = [\000-\377];
-alpha = [a-zA-Z];
-*/
+ switch (STATE) {
+ case STATE_PLAIN: goto state_plain;
+ case STATE_TAG: goto state_tag;
+ case STATE_NEXT_ARG: goto state_next_arg;
+ case STATE_ARG: goto state_arg;
+ case STATE_BEFORE_VAL: goto state_before_val;
+ case STATE_VAL: goto state_val;
+ }
+
+
+state_plain_begin:
+ STATE = STATE_PLAIN;
- while(1) {
- start = YYCURSOR;
- scdebug(("state %d at %s\n", STATE, YYCURSOR));
- switch(STATE) {
-
- case STATE_PLAIN:
+state_plain:
+ start = YYCURSOR;
/*!re2c
- [<] { passthru(STD_ARGS); STATE = STATE_TAG; continue; }
- (any\[<]) { passthru(STD_ARGS); continue; }
+ "<" { passthru(STD_ARGS); STATE = STATE_TAG; goto state_tag; }
+ N+ { passthru(STD_ARGS); goto state_plain; }
*/
- break;
-
- case STATE_TAG:
+
+state_tag:
+ start = YYCURSOR;
/*!re2c
- alpha+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); continue; }
- any { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; }
+ alpha+ { handle_tag(STD_ARGS); /* Sets STATE */; passthru(STD_ARGS); if (STATE == STATE_PLAIN) goto state_plain; else goto state_next_arg; }
+ any { passthru(STD_ARGS); goto state_plain_begin; }
*/
- break;
-
- case STATE_NEXT_ARG:
+
+state_next_arg_begin:
+ STATE = STATE_NEXT_ARG;
+
+state_next_arg:
+ start = YYCURSOR;
/*!re2c
- ">" { passthru(STD_ARGS); handle_form(STD_ARGS); STATE = STATE_PLAIN; continue; }
- [ \n] { passthru(STD_ARGS); continue; }
- alpha { YYCURSOR--; STATE = STATE_ARG; continue; }
- any { passthru(STD_ARGS); STATE = STATE_PLAIN; continue; }
+ ">" { passthru(STD_ARGS); handle_form(STD_ARGS); goto state_plain_begin; }
+ [ \v\t\n]+ { passthru(STD_ARGS); goto state_next_arg; }
+ alpha { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
+ any { passthru(STD_ARGS); goto state_plain_begin; }
*/
- break;
- case STATE_ARG:
+state_arg:
+ start = YYCURSOR;
/*!re2c
- alpha+ { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; continue; }
- any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; }
+ alpha+ { passthru(STD_ARGS); handle_arg(STD_ARGS); STATE = STATE_BEFORE_VAL; goto state_before_val; }
+ any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; goto state_next_arg; }
*/
- case STATE_BEFORE_VAL:
+state_before_val:
+ start = YYCURSOR;
/*!re2c
- [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; continue; }
- any { YYCURSOR--; STATE = STATE_NEXT_ARG; continue; }
+ [ ]* "=" [ ]* { passthru(STD_ARGS); STATE = STATE_VAL; goto state_val; }
+ any { --YYCURSOR; goto state_next_arg_begin; }
*/
- break;
- case STATE_VAL:
+
+state_val:
+ start = YYCURSOR;
/*!re2c
- ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); STATE = STATE_NEXT_ARG; continue; }
- ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); STATE = STATE_NEXT_ARG; continue; }
- (any\[ \n>"])+ { handle_val(STD_ARGS, 0, '"'); STATE = STATE_NEXT_ARG; continue; }
- any { passthru(STD_ARGS); STATE = STATE_NEXT_ARG; continue; }
+ ["] (any\[">])* ["] { handle_val(STD_ARGS, 1, '"'); goto state_next_arg_begin; }
+ ['] (any\['>])* ['] { handle_val(STD_ARGS, 1, '\''); goto state_next_arg_begin; }
+ (any\[ \n>"'])+ { handle_val(STD_ARGS, 0, '"'); goto state_next_arg_begin; }
+ any { passthru(STD_ARGS); goto state_next_arg_begin; }
*/
- break;
- }
- }
stop:
- scdebug(("stopped in state %d at pos %d (%d:%c)\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR));
-
rest = YYLIMIT - start;
-
+ scdebug(("stopped in state %d at pos %d (%d:%c) %d\n", STATE, YYCURSOR - ctx->buf.c, *YYCURSOR, *YYCURSOR, rest));
/* XXX: Crash avoidance. Need to work with reporter to figure out what goes wrong */
if (rest < 0) rest = 0;
@@ -301,6 +318,24 @@ stop:
ctx->buf.len = rest;
}
+char *url_adapt_flush(size_t *newlen)
+{
+ char *ret = NULL;
+ url_adapt_state_ex_t *ctx;
+ BLS_FETCH();
+
+ ctx = &BG(url_adapt_state_ex);
+
+ if (ctx->buf.len) {
+ ret = ctx->buf.c;
+ *newlen = ctx->buf.len;
+ ctx->buf.c = 0;
+ ctx->buf.len = 0;
+ }
+
+ return ret;
+}
+
char *url_adapt_single_url(const char *url, size_t urllen, const char *name, const char *value, size_t *newlen)
{
smart_str surl = {0};
@@ -334,6 +369,8 @@ char *url_adapt_ext(const char *src, size_t srclen, const char *name, const char
mainloop(ctx, src, srclen);
*newlen = ctx->result.len;
+ if (!ctx->result.c)
+ smart_str_appendl(&ctx->result, "", 0);
smart_str_0(&ctx->result);
ctx->result.len = 0;
return ctx->result.c;