summaryrefslogtreecommitdiff
path: root/ext/standard/levenshtein.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/standard/levenshtein.c')
-rw-r--r--ext/standard/levenshtein.c252
1 files changed, 0 insertions, 252 deletions
diff --git a/ext/standard/levenshtein.c b/ext/standard/levenshtein.c
deleted file mode 100644
index d157cf7fab..0000000000
--- a/ext/standard/levenshtein.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- +----------------------------------------------------------------------+
- | PHP version 4.0 |
- +----------------------------------------------------------------------+
- | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group |
- +----------------------------------------------------------------------+
- | This source file is subject to version 2.02 of the PHP license, |
- | that is bundled with this package in the file LICENSE, and is |
- | available at through the world-wide-web at |
- | http://www.php.net/license/2_02.txt. |
- | If you did not receive a copy of the PHP license and are unable to |
- | obtain it through the world-wide-web, please send a note to |
- | license@php.net so we can mail you a copy immediately. |
- +----------------------------------------------------------------------+
- | Author: Hartmut Holzgraefe <hartmut@six.de> |
- +----------------------------------------------------------------------+
- */
-/* $Id$ */
-
-#include "php.h"
-#include <stdlib.h>
-#include <errno.h>
-#include <ctype.h>
-#include "php_string.h"
-
-/* faster, but obfuscated, all operations have a cost of 1 */
-static int fastest_levdist(const char *s1, const char *s2)
-{
- register char *p1,*p2;
- register int i,j,n;
- int l1=0,l2=0;
- char r[512];
- const char *tmp;
-
- /* skip equal start sequence, if any */
- while(*s1==*s2) {
- if(!*s1) break;
- s1++; s2++;
- }
-
- /* if we already used up one string, then
- the result is the length of the other */
- if(*s1=='\0') return strlen(s2);
- if(*s2=='\0') return strlen(s1);
-
- /* length count */
- while(*s1++) l1++;
- while(*s2++) l2++;
-
- /* cut of equal tail sequence, if any */
- while(*--s1 == *--s2) {
- l1--; l2--;
- }
-
-
- /* reset pointers, adjust length */
- s1-=l1++;
- s2-=l2++;
-
-
- /* possible dist to great? */
- if(abs(l1-l2)>=255) return -1;
-
- /* swap if l2 longer than l1 */
- if(l1<l2) {
- tmp=s1; s1=s2; s2=tmp;
- l1 ^= l2; l2 ^= l1; l1 ^= l2;
- }
-
-
- /* fill initial row */
- n=1;
- for(i=0,p1=r;i<l1;i++,*p1++=n++,p1++) {/*empty*/}
-
- /* calc. rowwise */
- for(j=1;j<l2;j++) {
- /* init pointers and col#0 */
- p1 = r + !(j&1);
- p2 = r + (j&1);
- n=*p1+1;
- *p2++=n;p2++;
- s2++;
-
- /* foreach column */
- for(i=1;i<l1;i++) {
- if(*p1<n) n=*p1+(*(s1+i)!=*(s2)); /* replace cheaper than delete? */
- p1++;
- if(*++p1<n) n=*p1+1; /* insert cheaper then replace ? */
- *p2++=n++; /* update field and cost for next col's delete */
- p2++;
- }
- }
-
- /* return result */
- return n-1;
-}
-
-
-static int weighted_levdist( const char *s1
- , const char *s2
- , const int cost_ins
- , const int cost_rep
- , const int cost_del
- )
-{
- register int *p1,*p2;
- register int i,j,n,c;
- int l1=0,l2=0;
- int r[512];
- const char *tmp;
-
- /* skip equal start sequence, if any */
- while(*s1==*s2) {
- if(!*s1) break;
- s1++; s2++;
- }
-
- /* if we already used up one string, then
- the result is the length of the other */
- if(*s1=='\0') return strlen(s2);
- if(*s2=='\0') return strlen(s1);
-
- /* length count */
- while(*s1++) l1++;
- while(*s2++) l2++;
-
- /* cut of equal tail sequence, if any */
- while(*--s1 == *--s2) {
- l1--; l2--;
- }
-
-
- /* reset pointers, adjust length */
- s1-=l1++;
- s2-=l2++;
-
-
- /* possible dist to great? */
- if(abs(l1-l2)>=255) return -1;
-
- /* swap if l2 longer than l1 */
- if(l1<l2) {
- tmp=s1; s1=s2; s2=tmp;
- l1 ^= l2; l2 ^= l1; l1 ^= l2;
- }
-
- if((l1==1)&&(l2==1)) {
- n= cost_del+cost_ins;
- return n<cost_rep?n:cost_rep;
- }
-
- /* fill initial row */
- n=cost_ins;
- for(i=0,p1=r;i<l1;i++,*p1++=n,p1++) {n+=cost_ins;}
-
- /* calc. rowwise */
- for(j=1;j<l2;j++) {
- /* init pointers and col#0 */
- p1 = r + !(j&1);
- p2 = r + (j&1);
- n=*p1+cost_del;
- *p2++=n;p2++;
- s2++;
-
- /* foreach column */
- for(i=1;i<l1;i++) {
- c = *p1; if(*(s1+i)!=*(s2)) c+=cost_rep;
- if(c<n) n=c; /* replace cheaper than delete? */
- p1++;
- c= *++p1+cost_ins;
- if(c<n) n=c; /* insert cheaper then replace ? */
- *p2++=n; /* update field and cost for next col's delete */
- n+=cost_del; /* update field and cost for next col's delete */
- p2++;
- }
- }
-
- /* return result */
- return n-=cost_del;
-}
-
-static int custom_levdist(char *str1,char *str2,char *callback_name)
-{
- php_error(E_WARNING,"the general Levenshtein support is not there yet");
- /* not there yet */
-
- return -1;
-}
-
-
-/* {{{ proto int levenshtein(string str1, string str2)
- Calculate Levenshtein distance between two strings */
-PHP_FUNCTION(levenshtein)
-{
- zval **str1, **str2, **cost_ins, **cost_rep, **cost_del,**callback_name;
- int distance=-1;
-
- switch(ZEND_NUM_ARGS()) {
- case 2: /* just two string: use maximum performance version */
- if (zend_get_parameters_ex(2, &str1, &str2) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
- convert_to_string_ex(str1);
- convert_to_string_ex(str2);
-
- distance = fastest_levdist((*str1)->value.str.val, (*str2)->value.str.val);
- break;
-
- case 5: /* more general version: calc cost by ins/rep/del weights */
- if (zend_get_parameters_ex(5, &str1, &str2, &cost_ins, &cost_rep, &cost_del) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
- convert_to_string_ex(str1);
- convert_to_string_ex(str2);
- convert_to_long_ex(cost_ins);
- convert_to_long_ex(cost_rep);
- convert_to_long_ex(cost_del);
-
- distance = weighted_levdist((*str1)->value.str.val
- , (*str2)->value.str.val
- , (*cost_ins)->value.lval
- , (*cost_rep)->value.lval
- , (*cost_del)->value.lval
- );
-
- break;
-
- case 3: /* most general version: calc cost by user-supplied function */
- if (zend_get_parameters_ex(3, &str1, &str2, &callback_name) == FAILURE) {
- WRONG_PARAM_COUNT;
- }
- convert_to_string_ex(str1);
- convert_to_string_ex(str2);
- convert_to_string_ex(callback_name);
-
- distance = custom_levdist((*str1)->value.str.val
- , (*str2)->value.str.val
- , (*callback_name)->value.str.val
- );
- break;
-
- default:
- WRONG_PARAM_COUNT;
- }
-
- if(distance<0) {
- php_error(E_WARNING,"levenshtein(): argument string(s) too long");
- }
-
- RETURN_LONG(distance);
-}
-/* }}} */