summaryrefslogtreecommitdiff
path: root/ext/standard/levenshtein.c
blob: ccb767b6d8f1ce8cb0d64681c1c75ec76b2a7b0b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*
   +----------------------------------------------------------------------+
   | PHP version 4.0                                                      |
   +----------------------------------------------------------------------+
   | Copyright (c) 1997, 1998, 1999, 2000 The PHP Group                   |
   +----------------------------------------------------------------------+
   | This source file is subject to version 2.02 of the PHP license,      |
   | that is bundled with this package in the file LICENSE, and is        |
   | available at through the world-wide-web at                           |
   | http://www.php.net/license/2_02.txt.                                 |
   | If you did not receive a copy of the PHP license and are unable to   |
   | obtain it through the world-wide-web, please send a note to          |
   | license@php.net so we can mail you a copy immediately.               |
   +----------------------------------------------------------------------+
   | Author: Hartmut Holzgraefe <hartmut@six.de>                          |
   +----------------------------------------------------------------------+
 */
/* $Id$ */

#include "php.h"
#include <stdlib.h>
#include <errno.h>
#include <ctype.h>
#include "php_string.h"

static int calc_levdist(const char *s1, const char *s2) /* faster, but obfuscated */
{
	register char *p1,*p2; 
	register int i,j,n;
	int l1=0,l2=0;
	char r[512];
	const char *tmp;

	/* skip equal start sequence, if any */
	while(*s1==*s2) {
		if(!*s1) break;
		s1++; s2++;
	}
	
	/* if we already used up one string, then
      the result is the length of the other */
	if(*s1=='\0') return strlen(s2);
	if(*s2=='\0') return strlen(s1);

	/* length count */
	while(*s1++) l1++;
	while(*s2++) l2++;
	
	/* cut of equal tail sequence, if any */
	while(*--s1 == *--s2) {
		l1--; l2--;		
	}
	

	/* reset pointers, adjust length */
	s1-=l1++;
	s2-=l2++;
	

	/* possible dist to great? */
 	if(abs(l1-l2)>=255) return -1;

	/* swap if l2 longer than l1 */
	if(l1<l2) {
		tmp=s1; s1=s2; s2=tmp;
		l1 ^= l2; l2 ^= l1; l1 ^= l2;
	}

	
	/* fill initial row */
	n=(*s1!=*s2);
	for(i=0,p1=r;i<l1;i++,*p1++=n++,p1++) {/*empty*/}
	
	/* calc. rowwise */
	for(j=1;j<l2;j++) {
		/* init pointers and col#0 */
		p1 = r + !(j&1);
		p2 = r + (j&1);
		n=*p1+1;
		*p2++=n;p2++;
		s2++;
		
		/* foreach column */
		for(i=1;i<l1;i++) {
			if(*p1<n) n=*p1+(*(s1+i)!=*(s2)); /* replace cheaper than delete? */
			p1++;
			if(*++p1<n) n=*p1+1; /* insert cheaper then insert ? */
			*p2++=n++; /* update field and cost for next col's delete */
			p2++;
		}	
	}

	/* return result */
	return n-1;
}


/* {{{ proto int levenshtein(string str1, string str2)
   Calculate Levenshtein distance between two strings */
PHP_FUNCTION(levenshtein)
{
	zval **str1, **str2;
	int l;

	if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &str1, &str2) == FAILURE) {
		WRONG_PARAM_COUNT;
	}
	convert_to_string_ex(str1);
	convert_to_string_ex(str2);
	
	l = calc_levdist((*str1)->value.str.val, (*str2)->value.str.val);

	if(l<0) {
		php_error(E_WARNING,"levenshtein(): argument string(s) too long");
	}

	RETURN_LONG(l);
}
/* }}} */