summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2008-09-14 21:26:31 +0200
committerBruno Haible <bruno@clisp.org>2008-09-14 21:26:31 +0200
commitd7eadef6b504aec15eb2e1d3a88b2471062533d1 (patch)
tree78528f1f1e57621310c5da63ba15e983a29e0d7c
parent4facad5a3d557a73ea0bb1cf8b6e835163e1def1 (diff)
downloadgnulib-d7eadef6b504aec15eb2e1d3a88b2471062533d1.tar.gz
Simplify result computation.
-rw-r--r--ChangeLog7
-rw-r--r--lib/fstrcmp.c23
2 files changed, 19 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 39d568307f..4f7d9e6ab2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2008-09-14 Bruno Haible <bruno@clisp.org>
+
+ * lib/fstrcmp.c (EXTRA_CONTEXT_FIELDS): Combine xvec_edit_count and
+ yvec_edit_count.
+ (NOTE_DELETE, NOTE_INSERT): Increment the combined edit count.
+ (fstrcmp_bounded): Simplify result computation accordingly.
+
2008-09-14 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
* lib/fstrcmp.h (fstrcmp_bounded): New declaration.
diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c
index 86a351fa20..32aa0c2f00 100644
--- a/lib/fstrcmp.c
+++ b/lib/fstrcmp.c
@@ -65,11 +65,10 @@
#define EQUAL(x,y) ((x) == (y))
#define OFFSET int
#define EXTRA_CONTEXT_FIELDS \
- /* The number of elements inserted or deleted. */ \
- int xvec_edit_count; \
- int yvec_edit_count;
-#define NOTE_DELETE(ctxt, xoff) ctxt->xvec_edit_count++
-#define NOTE_INSERT(ctxt, yoff) ctxt->yvec_edit_count++
+ /* The number of elements inserted, plus the number of elements deleted. */ \
+ int edit_count;
+#define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++
+#define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++
/* We don't need USE_HEURISTIC, since it is unlikely in typical uses of
fstrcmp(). */
#include "diffseq.h"
@@ -122,10 +121,10 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound)
with N edits, | yvec_length - xvec_length | <= N. (Proof by
induction over N.)
So, at the end, we will have
- xvec_edit_count + yvec_edit_count >= | xvec_length - yvec_length |.
+ edit_count >= | xvec_length - yvec_length |.
and hence
result
- = (xvec_length + yvec_length - (xvec_edit_count + yvec_edit_count))
+ = (xvec_length + yvec_length - edit_count)
/ (xvec_length + yvec_length)
<= (xvec_length + yvec_length - | yvec_length - xvec_length |)
/ (xvec_length + yvec_length)
@@ -177,16 +176,18 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound)
ctxt.bdiag = ctxt.fdiag + fdiag_len;
/* Now do the main comparison algorithm */
- ctxt.xvec_edit_count = 0;
- ctxt.yvec_edit_count = 0;
+ ctxt.edit_count = 0;
compareseq (0, xvec_length, 0, yvec_length, 0,
&ctxt);
/* The result is
((number of chars in common) / (average length of the strings)).
+ The numerator is
+ = xvec_length - (number of calls to NOTE_DELETE)
+ = yvec_length - (number of calls to NOTE_INSERT)
+ = 1/2 * (xvec_length + yvec_length - (number of edits)).
This is admittedly biased towards finding that the strings are
similar, however it does produce meaningful results. */
- return ((double) (xvec_length + yvec_length
- - ctxt.yvec_edit_count - ctxt.xvec_edit_count)
+ return ((double) (xvec_length + yvec_length - ctxt.edit_count)
/ (xvec_length + yvec_length));
}