diff options
author | Sascha Schumann <sas@php.net> | 2001-08-03 08:49:19 +0000 |
---|---|---|
committer | Sascha Schumann <sas@php.net> | 2001-08-03 08:49:19 +0000 |
commit | 7471943c064e93cc9caafda6044e7190a762a477 (patch) | |
tree | ca64c9f5a5c7628b1b8779db366674046868a8e5 /ext/standard/string.c | |
parent | e206bd33746b3d5079b3b25468a613e917c8a520 (diff) | |
download | php-git-7471943c064e93cc9caafda6044e7190a762a477.tar.gz |
Replace our strtok implementation with a binary-safe one which code
does not cause headaches when reading it. Also happens to be a bit quicker.
Diffstat (limited to 'ext/standard/string.c')
-rw-r--r-- | ext/standard/string.c | 102 |
1 files changed, 62 insertions, 40 deletions
diff --git a/ext/standard/string.c b/ext/standard/string.c index a2522823c2..c62d78a120 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -803,60 +803,82 @@ PHP_FUNCTION(implode) } /* }}} */ +#define STRTOK_TABLE(p) BG(strtok_table)[(unsigned char) *p] + /* {{{ proto string strtok([string str,] string token) Tokenize a string */ PHP_FUNCTION(strtok) { - zval **str, **tok; - char *token = NULL, *tokp=NULL; - char *first = NULL; - int argc; + zval **args[2]; + zval **tok, **str; + char *token; + char *token_end; + char *p; + char *pe; - argc = ZEND_NUM_ARGS(); - - if ((argc == 1 && zend_get_parameters_ex(1, &tok) == FAILURE) || - (argc == 2 && zend_get_parameters_ex(2, &str, &tok) == FAILURE) || - argc < 1 || argc > 2) { + if (ZEND_NUM_ARGS() < 1 || ZEND_NUM_ARGS() > 2 || + zend_get_parameters_array_ex(ZEND_NUM_ARGS(), args) == FAILURE) WRONG_PARAM_COUNT; - } - convert_to_string_ex(tok); - tokp = token = (*tok)->value.str.val; - - if (argc == 2) { + + switch (ZEND_NUM_ARGS()) { + case 1: + tok = args[0]; + break; + case 2: + str = args[0]; + tok = args[1]; convert_to_string_ex(str); STR_FREE(BG(strtok_string)); - BG(strtok_string) = estrndup((*str)->value.str.val,(*str)->value.str.len); - BG(strtok_pos1) = BG(strtok_string); - BG(strtok_pos2) = NULL; - } - if (BG(strtok_pos1) && *BG(strtok_pos1)) { - for ( /* NOP */ ; token && *token; token++) { - BG(strtok_pos2) = strchr(BG(strtok_pos1), (int) *token); - if (!first || (BG(strtok_pos2) && BG(strtok_pos2) < first)) { - first = BG(strtok_pos2); - } - } /* NB: token is unusable now */ + BG(strtok_last) = BG(strtok_string) = estrndup(Z_STRVAL_PP(str), Z_STRLEN_PP(str)); + BG(strtok_len) = Z_STRLEN_PP(str); + break; + } + + p = BG(strtok_last); /* Where we start to search */ + pe = BG(strtok_string) + BG(strtok_len); - BG(strtok_pos2) = first; - if (BG(strtok_pos2)) { - *BG(strtok_pos2) = '\0'; - } - RETVAL_STRING(BG(strtok_pos1),1); -#if 0 - /* skip 'token' white space for next call to strtok */ - while (BG(strtok_pos2) && - strchr(tokp, *(BG(strtok_pos2)+1))) { - BG(strtok_pos2)++; + if (!p || p >= pe) + RETURN_FALSE; + + convert_to_string_ex(tok); + + token = Z_STRVAL_PP(tok); + token_end = token + Z_STRLEN_PP(tok); + + while (token < token_end) + STRTOK_TABLE(token++) = 1; + + /* Skip leading delimiters */ + while (STRTOK_TABLE(p)) + if (++p >= pe) { + /* no other chars left */ + BG(strtok_last) = NULL; + RETVAL_FALSE; + goto restore; } -#endif - if (BG(strtok_pos2)) - BG(strtok_pos1) = BG(strtok_pos2) + 1; - else - BG(strtok_pos1) = NULL; + + /* We know at this place that *p is no delimiter, so skip it */ + while (++p < pe) + if (STRTOK_TABLE(p)) + goto return_token; + + if (p - BG(strtok_last)) { +return_token: + RETVAL_STRINGL(BG(strtok_last), p - BG(strtok_last), 1); + BG(strtok_last) = p + 1; } else { RETVAL_FALSE; + BG(strtok_last) = NULL; } + + /* Restore table -- usually faster then memset'ing the table + on every invocation */ +restore: + token = Z_STRVAL_PP(tok); + + while (token < token_end) + STRTOK_TABLE(token++) = 0; } /* }}} */ |