summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHartmut Holzgraefe <hholzgra@php.net>2000-05-23 14:10:37 +0000
committerHartmut Holzgraefe <hholzgra@php.net>2000-05-23 14:10:37 +0000
commit0eb51100b77996efbf0a728efe55ded996814025 (patch)
tree71eb143f99970ae6ffb284b12cec19719de30975
parent6d51f4a78879d5160999f7b7ed6094bad89acdaf (diff)
downloadphp-git-0eb51100b77996efbf0a728efe55ded996814025.tar.gz
added function "int levdist(string str1, string str2)"
that will calculate the Levenshtein distance between two strings (faster and possibly more accurate than similar_text())
-rw-r--r--ext/standard/Makefile.in2
-rw-r--r--ext/standard/basic_functions.c1
-rw-r--r--ext/standard/levenshtein.c117
-rw-r--r--ext/standard/php_string.h1
4 files changed, 120 insertions, 1 deletions
diff --git a/ext/standard/Makefile.in b/ext/standard/Makefile.in
index 221922f52c..d83ed1a49a 100644
--- a/ext/standard/Makefile.in
+++ b/ext/standard/Makefile.in
@@ -7,7 +7,7 @@ LTLIBRARY_SOURCES=\
link.c mail.c math.c md5.c metaphone.c microtime.c pack.c pageinfo.c \
parsedate.c quot_print.c rand.c reg.c soundex.c string.c \
syslog.c type.c uniqid.c url.c url_scanner.c var.c output.c assert.c \
- strnatcmp.c
+ strnatcmp.c levenshtein.c
include $(top_srcdir)/build/dynlib.mk
diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c
index 4a1aa32a0d..8a45900d67 100644
--- a/ext/standard/basic_functions.c
+++ b/ext/standard/basic_functions.c
@@ -162,6 +162,7 @@ function_entry basic_functions[] = {
PHP_FE(implode, NULL)
PHP_FE(setlocale, NULL)
PHP_FE(soundex, NULL)
+ PHP_FE(levdist, NULL)
PHP_FE(chr, NULL)
PHP_FE(ord, NULL)
PHP_FE(parse_str, NULL)
diff --git a/ext/standard/levenshtein.c b/ext/standard/levenshtein.c
new file mode 100644
index 0000000000..946f36466c
--- /dev/null
+++ b/ext/standard/levenshtein.c
@@ -0,0 +1,117 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP version 4.0 |
+ +----------------------------------------------------------------------+
+ | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 2.02 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available at through the world-wide-web at |
+ | http://www.php.net/license/2_02.txt. |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> |
+ +----------------------------------------------------------------------+
+ */
+/* $Id$ */
+
+#include "php.h"
+#include <stdlib.h>
+#include <errno.h>
+#include <ctype.h>
+#include "php_string.h"
+
+int calc_levdist(const char *s1, const char *s2) /* faster, but obfuscated */
+{
+ register char *p1,*p2;
+ register int i,j,n;
+ int l1=0,l2=0;
+ char r[512];
+
+ /* skip equal start sequence, if any */
+ while(*s1==*s2) {
+ if(!*s1) break;
+ s1++; s2++;
+ }
+
+ /* if we already used up one string, then
+ the result is the length of the other */
+ if(*s1=='\0') return strlen(s2);
+ if(*s2=='\0') return strlen(s1);
+
+ /* length count */
+ while(*s1++) l1++;
+ while(*s2++) l2++;
+
+ /* cut of equal tail sequence, if any */
+ while(*--s1 == *--s2) {
+ l1--; l2--;
+ }
+
+
+ /* reset pointers, adjust length */
+ s1-=l1++;
+ s2-=l2++;
+
+ /* possible dist to great? */
+ if(abs(l1-l2)>=255) return -1;
+
+ /* swap if l2 longer than l1 */
+ if(l1<l2) {
+ (long)s1 ^= (long)s2; (long)s2 ^= (long)s1; (long)s1 ^= (long)s2;
+ l1 ^= l2; l2 ^= l1; l1 ^= l2;
+ }
+
+
+ /* fill initial row */
+ n=(*s1!=*s2);
+ for(i=0,p1=r;i<l1;i++,*p1++=n++,p1++) {/*empty*/}
+
+ /* calc. rowwise */
+ for(j=1;j<l2;j++) {
+ /* init pointers and col#0 */
+ p1 = r + !(j&1);
+ p2 = r + (j&1);
+ n=*p1+1;
+ *p2++=n;p2++;
+ s2++;
+
+ /* foreach column */
+ for(i=1;i<l1;i++) {
+ if(*p1<n) n=*p1+(*(s1+i)!=*(s2)); /* replace cheaper than delete? */
+ p1++;
+ if(*++p1<n) n=*p1+1; /* insert cheaper then insert ? */
+ *p2++=n++; /* update field and cost for next col's delete */
+ p2++;
+ }
+ }
+
+ /* return result */
+ return n-1;
+}
+
+
+/* {{{ proto int levdist(string str1, string str2)
+ Calculate Levenshtein distance between two strings */
+PHP_FUNCTION(levdist){
+ zval **str1, **str2;
+ int l;
+
+ if (ARG_COUNT(ht) != 2 || zend_get_parameters_ex(2, &str1, &str2) == FAILURE) {
+ WRONG_PARAM_COUNT;
+ }
+ convert_to_string_ex(str1);
+ convert_to_string_ex(str2);
+
+ l = calc_levdist((*str1)->value.str.val, (*str2)->value.str.val);
+
+ if(l<0) {
+ php_error(E_WARNING,"levdist(): argument string(s) to long");
+ }
+
+ RETURN_LONG(l);
+}
+/* }}} */
+
diff --git a/ext/standard/php_string.h b/ext/standard/php_string.h
index e71cf8f997..537c405fe7 100644
--- a/ext/standard/php_string.h
+++ b/ext/standard/php_string.h
@@ -43,6 +43,7 @@ PHP_FUNCTION(chop);
PHP_FUNCTION(trim);
PHP_FUNCTION(ltrim);
PHP_FUNCTION(soundex);
+PHP_FUNCTION(levdist);
PHP_FUNCTION(count_chars);
PHP_FUNCTION(explode);