diff options
author | Bruno Haible <bruno@clisp.org> | 2008-09-14 21:26:31 +0200 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2008-09-14 21:26:31 +0200 |
commit | d7eadef6b504aec15eb2e1d3a88b2471062533d1 (patch) | |
tree | 78528f1f1e57621310c5da63ba15e983a29e0d7c | |
parent | 4facad5a3d557a73ea0bb1cf8b6e835163e1def1 (diff) | |
download | gnulib-d7eadef6b504aec15eb2e1d3a88b2471062533d1.tar.gz |
Simplify result computation.
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | lib/fstrcmp.c | 23 |
2 files changed, 19 insertions, 11 deletions
@@ -1,3 +1,10 @@ +2008-09-14 Bruno Haible <bruno@clisp.org> + + * lib/fstrcmp.c (EXTRA_CONTEXT_FIELDS): Combine xvec_edit_count and + yvec_edit_count. + (NOTE_DELETE, NOTE_INSERT): Increment the combined edit count. + (fstrcmp_bounded): Simplify result computation accordingly. + 2008-09-14 Ralf Wildenhues <Ralf.Wildenhues@gmx.de> * lib/fstrcmp.h (fstrcmp_bounded): New declaration. diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c index 86a351fa20..32aa0c2f00 100644 --- a/lib/fstrcmp.c +++ b/lib/fstrcmp.c @@ -65,11 +65,10 @@ #define EQUAL(x,y) ((x) == (y)) #define OFFSET int #define EXTRA_CONTEXT_FIELDS \ - /* The number of elements inserted or deleted. */ \ - int xvec_edit_count; \ - int yvec_edit_count; -#define NOTE_DELETE(ctxt, xoff) ctxt->xvec_edit_count++ -#define NOTE_INSERT(ctxt, yoff) ctxt->yvec_edit_count++ + /* The number of elements inserted, plus the number of elements deleted. */ \ + int edit_count; +#define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++ +#define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++ /* We don't need USE_HEURISTIC, since it is unlikely in typical uses of fstrcmp(). */ #include "diffseq.h" @@ -122,10 +121,10 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) with N edits, | yvec_length - xvec_length | <= N. (Proof by induction over N.) So, at the end, we will have - xvec_edit_count + yvec_edit_count >= | xvec_length - yvec_length |. + edit_count >= | xvec_length - yvec_length |. and hence result - = (xvec_length + yvec_length - (xvec_edit_count + yvec_edit_count)) + = (xvec_length + yvec_length - edit_count) / (xvec_length + yvec_length) <= (xvec_length + yvec_length - | yvec_length - xvec_length |) / (xvec_length + yvec_length) @@ -177,16 +176,18 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) ctxt.bdiag = ctxt.fdiag + fdiag_len; /* Now do the main comparison algorithm */ - ctxt.xvec_edit_count = 0; - ctxt.yvec_edit_count = 0; + ctxt.edit_count = 0; compareseq (0, xvec_length, 0, yvec_length, 0, &ctxt); /* The result is ((number of chars in common) / (average length of the strings)). + The numerator is + = xvec_length - (number of calls to NOTE_DELETE) + = yvec_length - (number of calls to NOTE_INSERT) + = 1/2 * (xvec_length + yvec_length - (number of edits)). This is admittedly biased towards finding that the strings are similar, however it does produce meaningful results. */ - return ((double) (xvec_length + yvec_length - - ctxt.yvec_edit_count - ctxt.xvec_edit_count) + return ((double) (xvec_length + yvec_length - ctxt.edit_count) / (xvec_length + yvec_length)); } |