diff options
-rw-r--r-- | ChangeLog | 2 | ||||
-rw-r--r-- | ext/standard/file.c | 24 | ||||
-rw-r--r-- | ext/standard/php3_string.h | 2 | ||||
-rw-r--r-- | ext/standard/string.c | 116 |
4 files changed, 128 insertions, 16 deletions
@@ -2,6 +2,8 @@ PHP 4.0 CHANGE LOG ChangeLog ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ?? 1999, Version 4.0 Beta 3 +- Added optional allowable_tags arguments to strip_tags() and fgetss() to + allow you to specify a string of tags that are not to be stripped (Rasmus) - Upgraded var_dump() to take multiple arguments (Andrey) - Resourcified XML (Thies) - Fixed a memory leak in the Apache per-directory directives handler (Zeev) diff --git a/ext/standard/file.c b/ext/standard/file.c index ecd913856d..78922925f3 100644 --- a/ext/standard/file.c +++ b/ext/standard/file.c @@ -919,19 +919,33 @@ PHP_FUNCTION(fgetc) { /* Strip any HTML tags while reading */ -/* {{{ proto string fgetss(int fp, int length) +/* {{{ proto string fgetss(int fp, int length [, allowable_tags]) Get a line from file pointer and strip HTML tags */ PHP_FUNCTION(fgetss) { - pval *fd, *bytes; + pval *fd, *bytes, *allow=NULL; FILE *fp; int id, len, type; char *buf; int issock=0; int *sock,socketd=0; - if (ARG_COUNT(ht) != 2 || getParameters(ht, 2, &fd, &bytes) == FAILURE) { - WRONG_PARAM_COUNT; + switch(ARG_COUNT(ht)) { + case 2: + if (getParameters(ht, 2, &fd, &bytes) == FAILURE) { + RETURN_FALSE; + } + break; + case 3: + if (getParameters(ht, 3, &fd, &bytes, &allow) == FAILURE) { + RETURN_FALSE; + } + convert_to_string(allow); + break; + default: + WRONG_PARAM_COUNT; + /* NOTREACHED */ + break; } convert_to_long(fd); @@ -959,7 +973,7 @@ PHP_FUNCTION(fgetss) RETURN_FALSE; } - _php3_strip_tags(buf, fgetss_state); + _php3_strip_tags(buf, len, fgetss_state, allow->value.str.val); RETURN_STRING(buf, 0); } /* }}} */ diff --git a/ext/standard/php3_string.h b/ext/standard/php3_string.h index 448a49daac..7d8bda4903 100644 --- a/ext/standard/php3_string.h +++ b/ext/standard/php3_string.h @@ -99,7 +99,7 @@ extern PHPAPI char *php3i_stristr(unsigned char *s, unsigned char *t); extern PHPAPI char *_php3_str_to_str(char *haystack, int length, char *needle, int needle_len, char *str, int str_len, int *_new_length); extern PHPAPI void _php3_trim(pval *str, pval *return_value, int mode); -extern PHPAPI void _php3_strip_tags(char *rbuf, int state); +extern PHPAPI void _php3_strip_tags(char *rbuf, int len, int state, char *allow); extern PHPAPI void _php3_char_to_str(char *str, uint len, char from, char *to, int to_len, pval *result); diff --git a/ext/standard/string.c b/ext/standard/string.c index 2e26fa46fd..066ccf4a00 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -1807,19 +1807,32 @@ PHP_FUNCTION(nl2br) } /* }}} */ -/* {{{ proto string strip_tags(string str) +/* {{{ proto string strip_tags(string str [, allowable_tags]) Strips HTML and PHP tags from a string */ PHP_FUNCTION(strip_tags) { char *buf; - pval *str; + pval *str, *allow=NULL; - if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &str) == FAILURE) { - WRONG_PARAM_COUNT; + switch(ARG_COUNT(ht)) { + case 1: + if(getParameters(ht, 1, &str)==FAILURE) { + RETURN_FALSE; + } + break; + case 2: + if(getParameters(ht, 2, &str, &allow)==FAILURE) { + RETURN_FALSE; + } + convert_to_string(allow); + break; + default: + WRONG_PARAM_COUNT; + break; } convert_to_string(str); buf = estrdup(str->value.str.val); - _php3_strip_tags(buf, 0); + _php3_strip_tags(buf, str->value.str.len, 0, allow->value.str.val); RETURN_STRING(buf, 0); } /* }}} */ @@ -1888,6 +1901,59 @@ PHP_FUNCTION(parse_str) } /* }}} */ +#define PHP_TAG_BUF_SIZE 1023 + +/* Check if tag is in a set of tags + * + * states: + * + * 0 start tag + * 1 first non-whitespace char seen + */ +int php_tag_find(char *tag, int len, char *set) { + char c, *n, *t; + int i=0, state=0, done=0; + char *norm = emalloc(len); + + n = norm; + t = tag; + c = tolower(*t); + /* + normalize the tag removing leading and trailing whitespace + and turn any <a whatever...> into just <a> and any </tag> + into <tag> + */ + while(i<len && !done) { + switch(c) { + case '<': + *(n++) = c; + break; + case '>': + done =1; + break; + default: + if(!isspace(c)) { + if(state==0) { + state=1; + if(c!='/') *(n++) = c; + } else { + *(n++) = c; + } + } else { + if(state==1) done=1; + } + break; + } + c = tolower(*(++t)); + } + *(n++) = '>'; + *n = '\0'; + if(strstr(set,norm)) done=1; + else done=0; + efree(norm); + return done; +} + /* A simple little state-machine to strip out html and php tags State 0 is the output state, State 1 means we are inside a @@ -1898,10 +1964,14 @@ PHP_FUNCTION(parse_str) lc holds the last significant character read and br is a bracket counter. + + When an allow string is passed in we keep track of the string + in state 1 and when the tag is closed check it against the + allow string to see if we should allow it. */ -void _php3_strip_tags(char *rbuf, int state) { - char *buf, *p, *rp, c, lc; - int br; +void _php3_strip_tags(char *rbuf, int len, int state, char *allow) { + char *tbuf, *buf, *p, *tp, *rp, c, lc; + int br, i=0; buf = estrdup(rbuf); c = *buf; @@ -1909,13 +1979,21 @@ void _php3_strip_tags(char *rbuf, int state) { p = buf; rp = rbuf; br = 0; + if(allow) { + _php3_strtolower(allow); + tbuf = emalloc(PHP_TAG_BUF_SIZE+1); + tp = tbuf; + } else tp=NULL; - while (c) { /* This is not binary-safe. Don't see why it should be */ + while(i<len) { switch (c) { case '<': if (state == 0) { lc = '<'; state = 1; + if(allow) { + *(tp++) = '<'; + } } break; @@ -1945,6 +2023,15 @@ void _php3_strip_tags(char *rbuf, int state) { if (state == 1) { lc = '>'; state = 0; + if(allow) { + *(tp++) = '>'; + *tp='\0'; + if(php_tag_find(tbuf, tp-tbuf, allow)) { + memcpy(rp,tbuf,tp-tbuf); + rp += tp-tbuf; + } + tp = tbuf; + } } else if (state == 2) { if (!br && lc != '\"' && *(p-1)=='?') { state = 0; @@ -1961,6 +2048,8 @@ void _php3_strip_tags(char *rbuf, int state) { } } else if (state == 0) { *(rp++) = c; + } else if (allow && state == 1) { + *(tp++) = c; } break; @@ -1975,13 +2064,20 @@ void _php3_strip_tags(char *rbuf, int state) { default: if (state == 0) { *(rp++) = c; - } + } else if(allow && state == 1) { + *(tp++) = c; + if( (tp-tbuf)>=PHP_TAG_BUF_SIZE ) { /* no buffer overflows */ + tp = tbuf; + } + } break; } c = *(++p); + i++; } *rp = '\0'; efree(buf); + if(allow) efree(tbuf); } /* |