summaryrefslogtreecommitdiff
path: root/ext/standard/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/standard/string.c')
-rw-r--r--ext/standard/string.c116
1 files changed, 106 insertions, 10 deletions
diff --git a/ext/standard/string.c b/ext/standard/string.c
index 2e26fa46fd..066ccf4a00 100644
--- a/ext/standard/string.c
+++ b/ext/standard/string.c
@@ -1807,19 +1807,32 @@ PHP_FUNCTION(nl2br)
}
/* }}} */
-/* {{{ proto string strip_tags(string str)
+/* {{{ proto string strip_tags(string str [, allowable_tags])
Strips HTML and PHP tags from a string */
PHP_FUNCTION(strip_tags)
{
char *buf;
- pval *str;
+ pval *str, *allow=NULL;
- if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &str) == FAILURE) {
- WRONG_PARAM_COUNT;
+ switch(ARG_COUNT(ht)) {
+ case 1:
+ if(getParameters(ht, 1, &str)==FAILURE) {
+ RETURN_FALSE;
+ }
+ break;
+ case 2:
+ if(getParameters(ht, 2, &str, &allow)==FAILURE) {
+ RETURN_FALSE;
+ }
+ convert_to_string(allow);
+ break;
+ default:
+ WRONG_PARAM_COUNT;
+ break;
}
convert_to_string(str);
buf = estrdup(str->value.str.val);
- _php3_strip_tags(buf, 0);
+ _php3_strip_tags(buf, str->value.str.len, 0, allow->value.str.val);
RETURN_STRING(buf, 0);
}
/* }}} */
@@ -1888,6 +1901,59 @@ PHP_FUNCTION(parse_str)
}
/* }}} */
+#define PHP_TAG_BUF_SIZE 1023
+
+/* Check if tag is in a set of tags
+ *
+ * states:
+ *
+ * 0 start tag
+ * 1 first non-whitespace char seen
+ */
+int php_tag_find(char *tag, int len, char *set) {
+ char c, *n, *t;
+ int i=0, state=0, done=0;
+ char *norm = emalloc(len);
+
+ n = norm;
+ t = tag;
+ c = tolower(*t);
+ /*
+ normalize the tag removing leading and trailing whitespace
+ and turn any <a whatever...> into just <a> and any </tag>
+ into <tag>
+ */
+ while(i<len && !done) {
+ switch(c) {
+ case '<':
+ *(n++) = c;
+ break;
+ case '>':
+ done =1;
+ break;
+ default:
+ if(!isspace(c)) {
+ if(state==0) {
+ state=1;
+ if(c!='/') *(n++) = c;
+ } else {
+ *(n++) = c;
+ }
+ } else {
+ if(state==1) done=1;
+ }
+ break;
+ }
+ c = tolower(*(++t));
+ }
+ *(n++) = '>';
+ *n = '\0';
+ if(strstr(set,norm)) done=1;
+ else done=0;
+ efree(norm);
+ return done;
+}
+
/* A simple little state-machine to strip out html and php tags
State 0 is the output state, State 1 means we are inside a
@@ -1898,10 +1964,14 @@ PHP_FUNCTION(parse_str)
lc holds the last significant character read and br is a bracket
counter.
+
+ When an allow string is passed in we keep track of the string
+ in state 1 and when the tag is closed check it against the
+ allow string to see if we should allow it.
*/
-void _php3_strip_tags(char *rbuf, int state) {
- char *buf, *p, *rp, c, lc;
- int br;
+void _php3_strip_tags(char *rbuf, int len, int state, char *allow) {
+ char *tbuf, *buf, *p, *tp, *rp, c, lc;
+ int br, i=0;
buf = estrdup(rbuf);
c = *buf;
@@ -1909,13 +1979,21 @@ void _php3_strip_tags(char *rbuf, int state) {
p = buf;
rp = rbuf;
br = 0;
+ if(allow) {
+ _php3_strtolower(allow);
+ tbuf = emalloc(PHP_TAG_BUF_SIZE+1);
+ tp = tbuf;
+ } else tp=NULL;
- while (c) { /* This is not binary-safe. Don't see why it should be */
+ while(i<len) {
switch (c) {
case '<':
if (state == 0) {
lc = '<';
state = 1;
+ if(allow) {
+ *(tp++) = '<';
+ }
}
break;
@@ -1945,6 +2023,15 @@ void _php3_strip_tags(char *rbuf, int state) {
if (state == 1) {
lc = '>';
state = 0;
+ if(allow) {
+ *(tp++) = '>';
+ *tp='\0';
+ if(php_tag_find(tbuf, tp-tbuf, allow)) {
+ memcpy(rp,tbuf,tp-tbuf);
+ rp += tp-tbuf;
+ }
+ tp = tbuf;
+ }
} else if (state == 2) {
if (!br && lc != '\"' && *(p-1)=='?') {
state = 0;
@@ -1961,6 +2048,8 @@ void _php3_strip_tags(char *rbuf, int state) {
}
} else if (state == 0) {
*(rp++) = c;
+ } else if (allow && state == 1) {
+ *(tp++) = c;
}
break;
@@ -1975,13 +2064,20 @@ void _php3_strip_tags(char *rbuf, int state) {
default:
if (state == 0) {
*(rp++) = c;
- }
+ } else if(allow && state == 1) {
+ *(tp++) = c;
+ if( (tp-tbuf)>=PHP_TAG_BUF_SIZE ) { /* no buffer overflows */
+ tp = tbuf;
+ }
+ }
break;
}
c = *(++p);
+ i++;
}
*rp = '\0';
efree(buf);
+ if(allow) efree(tbuf);
}
/*