diff options
Diffstat (limited to 'ext/standard/string.c')
| -rw-r--r-- | ext/standard/string.c | 558 |
1 files changed, 415 insertions, 143 deletions
diff --git a/ext/standard/string.c b/ext/standard/string.c index bfc66735a9..1a5aecd3de 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP HTML Embedded Scripting Language Version 3.0 | +----------------------------------------------------------------------+ - | Copyright (c) 1997,1998 PHP Development Team (See Credits file) | + | Copyright (c) 1997-1999 PHP Development Team (See Credits file) | +----------------------------------------------------------------------+ | This program is free software; you can redistribute it and/or modify | | it under the terms of one of the following licenses: | @@ -24,21 +24,22 @@ | contact core@php.net. | +----------------------------------------------------------------------+ | Authors: Rasmus Lerdorf <rasmus@lerdorf.on.ca> | - | Stig Sæther Bakken <ssb@guardian.no> | + | Stig Sæther Bakken <ssb@fast.no> | | Zeev Suraski <bourbon@nevision.net.il> | +----------------------------------------------------------------------+ */ - /* $Id$ */ +/* Synced with php3 revision 1.193 1999-06-16 [ssb] */ + #include <stdio.h> #include "php.h" #include "reg.h" #include "post.h" #include "php3_string.h" -#if HAVE_SETLOCALE -#include <locale.h> +#ifdef HAVE_LOCALE_H +# include <locale.h> #endif #include "zend_execute.h" #include "php_globals.h" @@ -128,7 +129,7 @@ PHP_FUNCTION(strcasecmp) } convert_to_string(s1); convert_to_string(s2); - RETURN_LONG(strcasecmp(s1->value.str.val,s2->value.str.val)); + RETURN_LONG(zend_binary_strcasecmp(s1, s2)); } /* }}} */ @@ -162,12 +163,51 @@ PHP_FUNCTION(strcspn) } /* }}} */ +PHPAPI void _php3_trim(pval *str, pval * return_value, int mode) +/* mode 1 : trim left + mode 2 : trim right + mode 3 : trim left and right +*/ +{ + register int i; + int len = str->value.str.len; + int trimmed = 0; + char *c = str->value.str.val; + + if (mode & 1) { + for (i = 0; i < len; i++) { + if (c[i] == ' ' || c[i] == '\n' || c[i] == '\r' || + c[i] == '\t' || c[i] == '\v') { + trimmed++; + } else { + break; + } + } + len -= trimmed; + c += trimmed; + } + if (mode & 2) { + for (i = len - 1; i >= 0; i--) { + if (c[i] == ' ' || c[i] == '\n' || c[i] == '\r' || + c[i] == '\t' || c[i] == '\v') { + len--; + } else { + break; + } + } + } + RETVAL_STRINGL(c, len, 1); +} + +/* {{{ proto string rtrim(string str) + An alias for chop */ +/* }}} */ + /* {{{ proto string chop(string str) Remove trailing whitespace */ PHP_FUNCTION(chop) { pval *str; - register int i; if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &str) == FAILURE) { WRONG_PARAM_COUNT; @@ -175,17 +215,7 @@ PHP_FUNCTION(chop) convert_to_string(str); if (str->type == IS_STRING) { - int len = str->value.str.len; - char *c = str->value.str.val; - for (i = len - 1; i >= 0; i--) { - if (c[i] == ' ' || c[i] == '\n' || c[i] == '\r' || - c[i] == '\t' || c[i] == '\v') { - len--; - } else { - break; - } - } - RETVAL_STRINGL(c, len, 1); + _php3_trim(str, return_value, 2); return; } RETURN_FALSE; @@ -197,7 +227,6 @@ PHP_FUNCTION(chop) PHP_FUNCTION(trim) { pval *str; - register int i; if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &str) == FAILURE) { WRONG_PARAM_COUNT; @@ -205,28 +234,7 @@ PHP_FUNCTION(trim) convert_to_string(str); if (str->type == IS_STRING) { - int len = str->value.str.len; - int trimmed = 0; - char *c = str->value.str.val; - for (i = 0; i < len; i++) { - if (c[i] == ' ' || c[i] == '\n' || c[i] == '\r' || - c[i] == '\t' || c[i] == '\v') { - trimmed++; - } else { - break; - } - } - len-=trimmed; - c+=trimmed; - for (i = len - 1; i >= 0; i--) { - if (c[i] == ' ' || c[i] == '\n' || c[i] == '\r' || - c[i] == '\t' || c[i] == '\v') { - len--; - } else { - break; - } - } - RETVAL_STRINGL(c, len, 1); + _php3_trim(str, return_value, 3); return; } RETURN_FALSE; @@ -238,54 +246,24 @@ PHP_FUNCTION(trim) PHP_FUNCTION(ltrim) { pval *str; - register int i; if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &str) == FAILURE) { WRONG_PARAM_COUNT; } convert_to_string(str); - if (str->type == IS_STRING) { - int len = str->value.str.len; - int trimmed = 0; - char *c = str->value.str.val; - for (i = 0; i < len; i++) { - if (c[i] == ' ' || c[i] == '\n' || c[i] == '\r' || - c[i] == '\t' || c[i] == '\v') { - trimmed++; - } else { - break; - } - } - RETVAL_STRINGL(c+trimmed, len-trimmed, 1); + _php3_trim(str, return_value, 1); return; } RETURN_FALSE; } /* }}} */ -/* {{{ proto array(string separator, string str) - Split a string on string separator and return array of components */ -PHP_FUNCTION(explode) +void _php3_explode(pval *delim, pval *str, pval *return_value) { - pval *str, *delim; char *work_str, *p1, *p2; int i = 0; - if (ARG_COUNT(ht) != 2 || getParameters(ht, 2, &delim, &str) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_string(str); - convert_to_string(delim); - - if (strlen(delim->value.str.val)==0) { - /* the delimiter must be a valid C string that's at least 1 character long */ - php3_error(E_WARNING,"Empty delimiter"); - RETURN_FALSE; - } - if (array_init(return_value) == FAILURE) { - return; - } work_str = p1 = estrndup(str->value.str.val,str->value.str.len); p2 = strstr(p1, delim->value.str.val); if (p2 == NULL) { @@ -300,38 +278,46 @@ PHP_FUNCTION(explode) } efree(work_str); } -/* }}} */ -/* {{{ proto string implode(array src, string glue) - Join array elements placing glue string between items and return one string */ -PHP_FUNCTION(implode) +/* {{{ proto array explode(string separator, string str) + Split a string on string separator and return array of components */ +PHP_FUNCTION(explode) { - pval *arg1, *arg2, *delim, **tmp, *arr; - int len = 0, count = 0; - - if (ARG_COUNT(ht) != 2 || getParameters(ht, 2, &arg1, &arg2) == FAILURE) { + pval *str, *delim; + + if (ARG_COUNT(ht) != 2 || getParameters(ht, 2, &delim, &str) == FAILURE) { WRONG_PARAM_COUNT; } + convert_to_string(str); + convert_to_string(delim); - if (arg1->type == IS_ARRAY && arg2->type == IS_STRING) { - arr = arg1; - delim = arg2; - } else if (arg2->type == IS_ARRAY) { - convert_to_string(arg1); - arr = arg2; - delim = arg1; - } else { - php3_error(E_WARNING, "Bad arguments to %s()", - get_active_function_name()); + if (strlen(delim->value.str.val)==0) { + /* the delimiter must be a valid C string that's at least 1 character long */ + php3_error(E_WARNING,"Empty delimiter"); + RETURN_FALSE; + } + if (array_init(return_value) == FAILURE) { return; } + _php3_explode(delim, str, return_value); +} +/* }}} */ + +/* {{{ proto string join(array src, string glue) + An alias for implode */ +/* }}} */ + +void _php3_implode(pval *delim, pval *arr, pval *return_value) +{ + pval *tmp; + int len = 0, count = 0; /* convert everything to strings, and calculate length */ _php3_hash_internal_pointer_reset(arr->value.ht); while (_php3_hash_get_current_data(arr->value.ht, (void **) &tmp) == SUCCESS) { - convert_to_string(*tmp); - if ((*tmp)->type == IS_STRING) { - len += (*tmp)->value.str.len; + convert_to_string(tmp); + if (tmp->type == IS_STRING && tmp->value.str.val != undefined_variable_string) { + len += tmp->value.str.len; if (count>0) { len += delim->value.str.len; } @@ -346,9 +332,9 @@ PHP_FUNCTION(implode) return_value->value.str.val[len] = '\0'; _php3_hash_internal_pointer_reset(arr->value.ht); while (_php3_hash_get_current_data(arr->value.ht, (void **) &tmp) == SUCCESS) { - if ((*tmp)->type == IS_STRING) { + if (tmp->type == IS_STRING && tmp->value.str.val != undefined_variable_string) { count--; - strcat(return_value->value.str.val, (*tmp)->value.str.val); + strcat(return_value->value.str.val, tmp->value.str.val); if (count > 0) { strcat(return_value->value.str.val, delim->value.str.val); } @@ -356,12 +342,37 @@ PHP_FUNCTION(implode) _php3_hash_move_forward(arr->value.ht); } return_value->type = IS_STRING; - return_value->refcount = 1; - return_value->is_ref = 0; return_value->value.str.len = len; } + + +/* {{{ proto string implode(array src, string glue) + Join array elements placing glue string between items and return one string */ +PHP_FUNCTION(implode) +{ + pval *arg1, *arg2, *delim, *arr; + + if (ARG_COUNT(ht) != 2 || getParameters(ht, 2, &arg1, &arg2) == FAILURE) { + WRONG_PARAM_COUNT; + } + + if (arg1->type == IS_ARRAY && arg2->type == IS_STRING) { + arr = arg1; + delim = arg2; + } else if (arg2->type == IS_ARRAY) { + convert_to_string(arg1); + arr = arg2; + delim = arg1; + } else { + php3_error(E_WARNING, "Bad arguments to %s()", + get_active_function_name()); + return; + } + _php3_implode(delim, arr, return_value); +} /* }}} */ + #ifndef THREAD_SAFE char *strtok_string; #endif @@ -559,7 +570,7 @@ PHP_FUNCTION(dirname) /* }}} */ -/* case-insensitve strstr */ +/* case insensitve strstr */ PHPAPI char *php3i_stristr(unsigned char *s, unsigned char *t) { int i, j, k, l; @@ -635,7 +646,7 @@ PHP_FUNCTION(strstr) } /* }}} */ -/* {{{ proto int strpos(string haystack, string needle) +/* {{{ proto int strpos(string haystack, string needle [, int offset]) Find position of first occurrence of a string within another */ PHP_FUNCTION(strpos) { @@ -742,7 +753,8 @@ PHP_FUNCTION(strrchr) /* }}} */ static char * -_php3_chunk_split(char *src, int srclen, char *end, int endlen, int chunklen) +_php3_chunk_split(char *src, int srclen, char *end, int endlen, + int chunklen, int *destlen) { char *dest; char *p, *q; @@ -770,6 +782,9 @@ _php3_chunk_split(char *src, int srclen, char *end, int endlen, int chunklen) } *q = '\0'; + if (destlen) { + *destlen = q - dest; + } return(dest); } @@ -784,16 +799,15 @@ PHP_FUNCTION(chunk_split) char *end = "\r\n"; int endlen = 2; int chunklen = 76; + int result_len; argc = ARG_COUNT(ht); - if(!((argc == 1 && getParameters(ht, 1, &p_str) != FAILURE) || - (argc == 2 && getParameters(ht, 2, &p_str, &p_chunklen) != FAILURE) || - (argc == 3 && getParameters(ht, 3, &p_str, &p_chunklen, - &p_ending) != FAILURE))) { + if (argc < 1 || argc > 3 || + getParameters(ht, argc, &p_str, &p_chunklen, &p_ending) == FAILURE) { WRONG_PARAM_COUNT; } - + switch(argc) { case 3: convert_to_string(p_ending); @@ -812,10 +826,10 @@ PHP_FUNCTION(chunk_split) } result = _php3_chunk_split(p_str->value.str.val, p_str->value.str.len, - end, endlen, chunklen); + end, endlen, chunklen, &result_len); if(result) { - RETVAL_STRING(result, 0); + RETVAL_STRINGL(result, result_len, 0); } else { RETURN_FALSE; } @@ -997,20 +1011,42 @@ PHP_FUNCTION(ucwords) if(*(r+1)){ r++; *r=toupper((unsigned char)*r); - } else break; + } else { + break; + } } RETVAL_STRING(arg->value.str.val,1); } /* }}} */ +PHPAPI char *_php3_strtr(char *string, int len, char *str_from, + char *str_to, int trlen) +{ + int i; + unsigned char xlat[256]; + + if ((trlen < 1) || (len < 1)) { + return string; + } + + for (i = 0; i < 256; xlat[i] = i, i++); + + for (i = 0; i < trlen; i++) { + xlat[(unsigned char) str_from[i]] = str_to[i]; + } + + for (i = 0; i < len; i++) { + string[i] = xlat[(unsigned char) string[i]]; + } + + return string; +} + /* {{{ proto string strtr(string str, string from, string to) Translate characters in str using given translation tables */ PHP_FUNCTION(strtr) { /* strtr(STRING,FROM,TO) */ pval *str, *from, *to; - unsigned char xlat[256]; - unsigned char *str_from, *str_to, *string; - int i, len1, len2; if (ARG_COUNT(ht) != 3 || getParameters(ht, 3, &str, &from, &to) == FAILURE) { @@ -1020,28 +1056,12 @@ PHP_FUNCTION(strtr) convert_to_string(from); convert_to_string(to); - string = (unsigned char*) str->value.str.val; - str_from = (unsigned char*) from->value.str.val; - str_to = (unsigned char*) to->value.str.val; - - len1 = from->value.str.len; - len2 = to->value.str.len; - - if (len1 > len2) { - str_from[len2] = '\0'; - len1 = len2; - } - for (i = 0; i < 256; xlat[i] = i, i++); - - for (i = 0; i < len1; i++) { - xlat[(unsigned char) str_from[i]] = str_to[i]; - } - - for (i = 0; i < str->value.str.len; i++) { - string[i] = xlat[(unsigned char) string[i]]; - } - - RETVAL_STRING((char *)string,1); + RETVAL_STRING(_php3_strtr(str->value.str.val, + str->value.str.len, + from->value.str.val, + to->value.str.val, + MIN(from->value.str.len,to->value.str.len)), + 1); } /* }}} */ @@ -1073,6 +1093,82 @@ PHP_FUNCTION(strrev) } /* }}} */ +static void _php3_similar_str(const char *txt1, int len1, const char *txt2, + int len2, int *pos1, int *pos2, int *max) +{ + char *p, *q; + char *end1 = (char *) txt1 + len1; + char *end2 = (char *) txt2 + len2; + int l; + + *max = 0; + for (p = (char *) txt1; p < end1; p++) { + for (q = (char *) txt2; q < end2; q++) { + for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); + l++); + if (l > *max) { + *max = l; + *pos1 = p - txt1; + *pos2 = q - txt2; + } + } + } +} + +static int _php3_similar_char(const char *txt1, int len1, + const char *txt2, int len2) +{ + int sum; + int pos1, pos2, max; + + _php3_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max); + if ((sum = max)) { + if (pos1 && pos2) + sum += _php3_similar_char(txt1, pos1, txt2, pos2); + if ((pos1 + max < len1) && (pos2 + max < len2)) + sum += _php3_similar_char(txt1 + pos1 + max, len1 - pos1 - max, + txt2 + pos2 + max, len2 - pos2 -max); + } + return sum; +} + +/* {{{ proto int similar_text(string str1, string str2 [, double percent]) + Calculates the similarity between two strings */ +PHP_FUNCTION(similar_text) +{ + pval *t1, *t2, *percent; + int ac = ARG_COUNT(ht); + int sim; + + if (ac < 2 || ac > 3 || + getParameters(ht, ac, &t1, &t2, &percent) == FAILURE) { + WRONG_PARAM_COUNT; + } + + convert_to_string(t1); + convert_to_string(t2); + if (ac > 2) { + convert_to_double(percent); + } + + if ((t1->value.str.len + t2->value.str.len) == 0) { + if(ac > 2) { + percent->value.dval = 0; + } + RETURN_LONG(0); + } + + sim = _php3_similar_char(t1->value.str.val, t1->value.str.len, + t2->value.str.val, t2->value.str.len); + + if (ac > 2) { + percent->value.dval = sim * 200.0 / (t1->value.str.len + t2->value.str.len); + } + + RETURN_LONG(sim); +} +/* }}} */ + /* be careful, this edits the string in-place */ PHPAPI void _php3_stripslashes(char *string, int *len) @@ -1223,7 +1319,7 @@ PHPAPI char *_php3_addslashes(char *str, int length, int *new_length, int should #define _isblank(c) (((((unsigned char) c)==' ' || ((unsigned char) c)=='\t')) ? 1 : 0) #define _isnewline(c) (((((unsigned char) c)=='\n' || ((unsigned char) c)=='\r')) ? 1 : 0) -static void _php3_char_to_str(char *str,uint len,char from,char *to,int to_len,pval *result) +PHPAPI void _php3_char_to_str(char *str,uint len,char from,char *to,int to_len,pval *result) { int char_count=0; char *source,*target,*tmp,*source_end=str+len, *tmp_end=NULL; @@ -1259,6 +1355,7 @@ static void _php3_char_to_str(char *str,uint len,char from,char *to,int to_len,p *target = 0; } +#if 0 /* * this is a binary safe equivalent to strnstr * note that we don't check for the end in str_to_str but here @@ -1328,6 +1425,56 @@ finish: return new; } +#else + +static char *_php3_memstr(char *s, char *c, size_t n, size_t m) +{ + char *p; + + for(p = s; (p - s) < n; p++) + if(memcmp(p, c, m) == 0) + return p; + return NULL; +} + +#define ATTCHSTR(st, sz) \ + nl += sz; \ + n = erealloc(n, nl + 1); \ + memcpy(n + no, st, sz); \ + no += sz + + +static char *_php3_str_to_str(char *a, int al, char *b, int bl, char *c, int cl, + int *newlen) +{ + char *n = NULL, *p, *q; + int nl = 0; + int no = 0; + + /* run through all occurences of b in a */ + for(p = q = a; (p = _php3_memstr(p, b, al - (p - a), bl)); q = p) { + /* attach everything between the previous occ. and this one */ + ATTCHSTR(q, p - q); + /* attach the replacement string c */ + ATTCHSTR(c, cl); + /* jump over string b in a */ + p += bl; + } + + /* anything left over ? */ + if((al - (q - a)) > 0) { + ATTCHSTR(q, al - (q - a)); + } + + if(newlen) *newlen = nl; + n[nl] = '\0'; + + return n; +} + +#undef ATTCHSTR +#endif + /* {{{ proto string str_replace(string needle, string str, string haystack) Replace all occurrences of needle in haystack with str */ PHP_FUNCTION(str_replace) @@ -1343,7 +1490,19 @@ PHP_FUNCTION(str_replace) convert_to_string(haystack); convert_to_string(needle); - convert_to_string(str); if(needle->value.str.len == 1) { _php3_char_to_str(haystack->value.str.val,haystack->value.str.len,needle->value.str.val[0],str->value.str.val, str->value.str.len ,return_value); + convert_to_string(str); + + if(haystack->value.str.len == 0) { + RETURN_STRING(empty_string,1); + } + + if(needle->value.str.len == 1) { + _php3_char_to_str(haystack->value.str.val, + haystack->value.str.len, + needle->value.str.val[0], + str->value.str.val, + str->value.str.len, + return_value); return; } @@ -1353,9 +1512,8 @@ PHP_FUNCTION(str_replace) } new = _php3_str_to_str(haystack->value.str.val, haystack->value.str.len, - needle->value.str.val, needle->value.str.len, - str->value.str.val, str->value.str.len, - &len); + needle->value.str.val, needle->value.str.len, + str->value.str.val, str->value.str.len, &len); RETURN_STRINGL(new, len, 0); } /* }}} */ @@ -1535,9 +1693,9 @@ PHP_FUNCTION(hebrev) } /* }}} */ -/* {{{ proto string hebrev(string str [, int max_chars_per_line]) +/* {{{ proto string hebrevc(string str [, int max_chars_per_line]) Convert logical Hebrew text to visual text with newline conversion */ -PHP_FUNCTION(hebrev_with_conversion) +PHP_FUNCTION(hebrevc) { _php3_hebrev(INTERNAL_FUNCTION_PARAM_PASSTHRU,1); } @@ -1545,7 +1703,7 @@ PHP_FUNCTION(hebrev_with_conversion) /* {{{ proto string nl2br(string str) Converts newlines to HTML line breaks */ -PHP_FUNCTION(newline_to_br) +PHP_FUNCTION(nl2br) { pval *str; @@ -1559,6 +1717,23 @@ PHP_FUNCTION(newline_to_br) } /* }}} */ +/* {{{ proto string strip_tags(string str) + Strips HTML and PHP tags from a string */ +PHP_FUNCTION(strip_tags) +{ + char *buf; + pval *str; + + if (ARG_COUNT(ht) != 1 || getParameters(ht, 1, &str) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_string(str); + buf = estrdup(str->value.str.val); + _php3_strip_tags(buf, 0); + RETURN_STRING(buf, 0); +} +/* }}} */ + /* {{{ proto string setlocale(string category, string locale) Set locale information */ PHP_FUNCTION(setlocale) @@ -1569,7 +1744,7 @@ PHP_FUNCTION(setlocale) if (ARG_COUNT(ht)!=2 || getParameters(ht, 2, &category, &locale)==FAILURE) WRONG_PARAM_COUNT; -#if HAVE_SETLOCALE +#ifdef HAVE_SETLOCALE convert_to_string(category); convert_to_string(locale); if (!strcasecmp ("LC_ALL", category->value.str.val)) @@ -1618,6 +1793,103 @@ PHP_FUNCTION(parsestr) } php3_treat_data(PARSE_STRING, res); } +/* }}} */ + +/* A simple little state-machine to strip out html and php tags + + State 0 is the output state, State 1 means we are inside a + normal html tag and state 2 means we are inside a php tag. + + The state variable is passed in to allow a function like fgetss + to maintain state across calls to the function. + + lc holds the last significant character read and br is a bracket + counter. +*/ +void _php3_strip_tags(char *rbuf, int state) { + char *buf, *p, *rp, c, lc; + int br; + + buf = estrdup(rbuf); + c = *buf; + lc = '\0'; + p = buf; + rp = rbuf; + br = 0; + + while (c) { /* This is not binary-safe. Don't see why it should be */ + switch (c) { + case '<': + if (state == 0) { + lc = '<'; + state = 1; + } + break; + + case '(': + if (state == 2) { + if (lc != '\"') { + lc = '('; + br++; + } + } else if (state == 0) { + *(rp++) = c; + } + break; + + case ')': + if (state == 2) { + if (lc != '\"') { + lc = ')'; + br--; + } + } else if (state == 0) { + *(rp++) = c; + } + break; + + case '>': + if (state == 1) { + lc = '>'; + state = 0; + } else if (state == 2) { + if (!br && lc != '\"' && *(p-1)=='?') { + state = 0; + } + } + break; + + case '\"': + if (state == 2) { + if (lc == '\"') { + lc = '\0'; + } else if (lc != '\\') { + lc = '\"'; + } + } else if (state == 0) { + *(rp++) = c; + } + break; + + case '?': + if (state==1 && *(p-1)=='<') { + br=0; + state=2; + break; + } + /* fall-through */ + + default: + if (state == 0) { + *(rp++) = c; + } + break; + } + c = *(++p); + } + *rp = '\0'; + efree(buf); +} /* * Local variables: |
