diff options
-rw-r--r-- | ext/standard/Makefile.in | 2 | ||||
-rw-r--r-- | ext/standard/basic_functions.c | 1 | ||||
-rw-r--r-- | ext/standard/levenshtein.c | 117 | ||||
-rw-r--r-- | ext/standard/php_string.h | 1 |
4 files changed, 120 insertions, 1 deletions
diff --git a/ext/standard/Makefile.in b/ext/standard/Makefile.in index 221922f52c..d83ed1a49a 100644 --- a/ext/standard/Makefile.in +++ b/ext/standard/Makefile.in @@ -7,7 +7,7 @@ LTLIBRARY_SOURCES=\ link.c mail.c math.c md5.c metaphone.c microtime.c pack.c pageinfo.c \ parsedate.c quot_print.c rand.c reg.c soundex.c string.c \ syslog.c type.c uniqid.c url.c url_scanner.c var.c output.c assert.c \ - strnatcmp.c + strnatcmp.c levenshtein.c include $(top_srcdir)/build/dynlib.mk diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index 4a1aa32a0d..8a45900d67 100644 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -162,6 +162,7 @@ function_entry basic_functions[] = { PHP_FE(implode, NULL) PHP_FE(setlocale, NULL) PHP_FE(soundex, NULL) + PHP_FE(levdist, NULL) PHP_FE(chr, NULL) PHP_FE(ord, NULL) PHP_FE(parse_str, NULL) diff --git a/ext/standard/levenshtein.c b/ext/standard/levenshtein.c new file mode 100644 index 0000000000..946f36466c --- /dev/null +++ b/ext/standard/levenshtein.c @@ -0,0 +1,117 @@ +/* + +----------------------------------------------------------------------+ + | PHP version 4.0 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 2.02 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available at through the world-wide-web at | + | http://www.php.net/license/2_02.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> | + +----------------------------------------------------------------------+ + */ +/* $Id$ */ + +#include "php.h" +#include <stdlib.h> +#include <errno.h> +#include <ctype.h> +#include "php_string.h" + +int calc_levdist(const char *s1, const char *s2) /* faster, but obfuscated */ +{ + register char *p1,*p2; + register int i,j,n; + int l1=0,l2=0; + char r[512]; + + /* skip equal start sequence, if any */ + while(*s1==*s2) { + if(!*s1) break; + s1++; s2++; + } + + /* if we already used up one string, then + the result is the length of the other */ + if(*s1=='\0') return strlen(s2); + if(*s2=='\0') return strlen(s1); + + /* length count */ + while(*s1++) l1++; + while(*s2++) l2++; + + /* cut of equal tail sequence, if any */ + while(*--s1 == *--s2) { + l1--; l2--; + } + + + /* reset pointers, adjust length */ + s1-=l1++; + s2-=l2++; + + /* possible dist to great? */ + if(abs(l1-l2)>=255) return -1; + + /* swap if l2 longer than l1 */ + if(l1<l2) { + (long)s1 ^= (long)s2; (long)s2 ^= (long)s1; (long)s1 ^= (long)s2; + l1 ^= l2; l2 ^= l1; l1 ^= l2; + } + + + /* fill initial row */ + n=(*s1!=*s2); + for(i=0,p1=r;i<l1;i++,*p1++=n++,p1++) {/*empty*/} + + /* calc. rowwise */ + for(j=1;j<l2;j++) { + /* init pointers and col#0 */ + p1 = r + !(j&1); + p2 = r + (j&1); + n=*p1+1; + *p2++=n;p2++; + s2++; + + /* foreach column */ + for(i=1;i<l1;i++) { + if(*p1<n) n=*p1+(*(s1+i)!=*(s2)); /* replace cheaper than delete? */ + p1++; + if(*++p1<n) n=*p1+1; /* insert cheaper then insert ? */ + *p2++=n++; /* update field and cost for next col's delete */ + p2++; + } + } + + /* return result */ + return n-1; +} + + +/* {{{ proto int levdist(string str1, string str2) + Calculate Levenshtein distance between two strings */ +PHP_FUNCTION(levdist){ + zval **str1, **str2; + int l; + + if (ARG_COUNT(ht) != 2 || zend_get_parameters_ex(2, &str1, &str2) == FAILURE) { + WRONG_PARAM_COUNT; + } + convert_to_string_ex(str1); + convert_to_string_ex(str2); + + l = calc_levdist((*str1)->value.str.val, (*str2)->value.str.val); + + if(l<0) { + php_error(E_WARNING,"levdist(): argument string(s) to long"); + } + + RETURN_LONG(l); +} +/* }}} */ + diff --git a/ext/standard/php_string.h b/ext/standard/php_string.h index e71cf8f997..537c405fe7 100644 --- a/ext/standard/php_string.h +++ b/ext/standard/php_string.h @@ -43,6 +43,7 @@ PHP_FUNCTION(chop); PHP_FUNCTION(trim); PHP_FUNCTION(ltrim); PHP_FUNCTION(soundex); +PHP_FUNCTION(levdist); PHP_FUNCTION(count_chars); PHP_FUNCTION(explode); |