summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2006-12-18 13:21:20 +0000
committerBruno Haible <bruno@clisp.org>2006-12-18 13:21:20 +0000
commit015003ecc207067577e8dc098a34d8fbb9832c92 (patch)
tree376678daf5b2edeaf3d8e511f29efdb76a5cb667
parent9a9cbc04b5d0fb73951283d8b90ff3248a45d23c (diff)
downloadgnulib-015003ecc207067577e8dc098a34d8fbb9832c92.tar.gz
Make generic: New macro EXTRA_CONTEXT_FIELDS, NOTE_DELETE, NOTE_INSERT.
-rw-r--r--lib/analyze.c3
-rw-r--r--lib/diffseq.h21
-rw-r--r--lib/fstrcmp.c95
3 files changed, 68 insertions, 51 deletions
diff --git a/lib/analyze.c b/lib/analyze.c
index 772925b297..1832a6ebc0 100644
--- a/lib/analyze.c
+++ b/lib/analyze.c
@@ -31,6 +31,9 @@
#define ELEMENT lin
#define EQUAL(x,y) ((x) == (y))
#define OFFSET lin
+#define EXTRA_CONTEXT_FIELDS /* none */
+#define NOTE_DELETE(ctxt, xoff) files[0].changed[files[0].realindexes[xoff]] = 1
+#define NOTE_INSERT(ctxt, yoff) files[1].changed[files[1].realindexes[yoff]] = 1
#define USE_HEURISTIC 1
#include "diffseq.h"
diff --git a/lib/diffseq.h b/lib/diffseq.h
index 387578b6c6..0f9d6eb56a 100644
--- a/lib/diffseq.h
+++ b/lib/diffseq.h
@@ -18,7 +18,7 @@
/* The basic idea is to consider two vectors as similar if, when
transforming the first vector into the second vector through a
- sequence of edits (inserts and deletes of one character each),
+ sequence of edits (inserts and deletes of one element each),
this sequence is short - or equivalently, if the ordered list
of elements that are untouched by these edits is long. For a
good introduction to the subject, read about the "Levenshtein
@@ -45,6 +45,9 @@
OFFSET A signed integer type sufficient to hold the
difference between two indices. Usually
something like ssize_t.
+ EXTRA_CONTEXT_FIELDS Declarations of fields for 'struct context'.
+ NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff].
+ NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff].
USE_HEURISTIC (Optional) Define if you want to support the
heuristic for large vectors. */
@@ -70,6 +73,9 @@ struct context
const ELEMENT *xvec;
const ELEMENT *yvec;
+ /* Extra fields. */
+ EXTRA_CONTEXT_FIELDS
+
/* Vector, indexed by diagonal, containing 1 + the X coordinate of the point
furthest along the given diagonal in the forward search of the edit
matrix. */
@@ -440,10 +446,16 @@ compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
/* Handle simple cases. */
if (xoff == xlim)
while (yoff < ylim)
- files[1].changed[files[1].realindexes[yoff++]] = 1;
+ {
+ NOTE_INSERT (ctxt, yoff);
+ yoff++;
+ }
else if (yoff == ylim)
while (xoff < xlim)
- files[0].changed[files[0].realindexes[xoff++]] = 1;
+ {
+ NOTE_DELETE (ctxt, xoff);
+ xoff++;
+ }
else
{
struct partition part;
@@ -461,3 +473,6 @@ compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
#undef EQUAL
#undef OFFSET
#undef OFFSET_MAX
+#undef EXTRA_CONTEXT_FIELDS
+#undef NOTE_DELETE
+#undef NOTE_INSERT
diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c
index 217f8fc37a..721cd6d1ed 100644
--- a/lib/fstrcmp.c
+++ b/lib/fstrcmp.c
@@ -64,6 +64,12 @@
#define ELEMENT char
#define EQUAL(x,y) ((x) == (y))
#define OFFSET int
+#define EXTRA_CONTEXT_FIELDS \
+ /* The number of elements inserted or deleted. */ \
+ int xvec_edit_count; \
+ int yvec_edit_count;
+#define NOTE_DELETE(ctxt, xoff) ctxt->xvec_edit_count++
+#define NOTE_INSERT(ctxt, yoff) ctxt->yvec_edit_count++
/* We don't need USE_HEURISTIC, since it is unlikely in typical uses of
fstrcmp(). */
@@ -74,6 +80,9 @@
OFFSET A signed integer type sufficient to hold the
difference between two indices. Usually
something like ssize_t.
+ EXTRA_CONTEXT_FIELDS Declarations of fields for 'struct context'.
+ NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff].
+ NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff].
USE_HEURISTIC (Optional) Define if you want to support the
heuristic for large vectors. */
@@ -95,21 +104,13 @@
*/
struct context
{
- /*
- * Data on one input string being compared.
- */
- struct string_data
- {
- /* The string to be compared. */
- const ELEMENT *data;
-
- /* The length of the string to be compared. */
- int data_length;
-
- /* The number of elements inserted or deleted. */
- int edit_count;
- }
- string[2];
+ /* Vectors being compared. */
+ const ELEMENT *xvec;
+ const ELEMENT *yvec;
+
+ /* The number of elements inserted or deleted. */
+ int xvec_edit_count;
+ int yvec_edit_count;
/* Vector, indexed by diagonal, containing 1 + the X coordinate of the point
furthest along the given diagonal in the forward search of the edit
@@ -182,8 +183,8 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal,
{
OFFSET *const fd = ctxt->fdiag; /* Give the compiler a chance. */
OFFSET *const bd = ctxt->bdiag; /* Additional help for the compiler. */
- const ELEMENT *const xv = ctxt->string[0].data; /* Still more help for the compiler. */
- const ELEMENT *const yv = ctxt->string[1].data; /* And more and more . . . */
+ const ELEMENT *const xv = ctxt->xvec; /* Still more help for the compiler. */
+ const ELEMENT *const yv = ctxt->yvec; /* And more and more . . . */
const OFFSET dmin = xoff - ylim; /* Minimum valid diagonal. */
const OFFSET dmax = xlim - yoff; /* Maximum valid diagonal. */
const OFFSET fmid = xoff - yoff; /* Center diagonal of top-down search. */
@@ -462,8 +463,8 @@ static void
compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
bool find_minimal, struct context *ctxt)
{
- const ELEMENT *const xv = ctxt->string[0].data; /* Help the compiler. */
- const ELEMENT *const yv = ctxt->string[1].data;
+ const ELEMENT *const xv = ctxt->xvec; /* Help the compiler. */
+ const ELEMENT *const yv = ctxt->yvec;
/* Slide down the bottom initial diagonal. */
while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
@@ -481,21 +482,17 @@ compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
/* Handle simple cases. */
if (xoff == xlim)
- {
- while (yoff < ylim)
- {
- ctxt->string[1].edit_count++;
- ++yoff;
- }
- }
+ while (yoff < ylim)
+ {
+ NOTE_INSERT (ctxt, yoff);
+ yoff++;
+ }
else if (yoff == ylim)
- {
- while (xoff < xlim)
- {
- ctxt->string[0].edit_count++;
- ++xoff;
- }
- }
+ while (xoff < xlim)
+ {
+ NOTE_DELETE (ctxt, xoff);
+ xoff++;
+ }
else
{
struct partition part;
@@ -553,6 +550,8 @@ double
fstrcmp (const char *string1, const char *string2)
{
struct context ctxt;
+ int xvec_length;
+ int yvec_length;
int i;
size_t fdiag_len;
@@ -560,21 +559,21 @@ fstrcmp (const char *string1, const char *string2)
size_t bufmax;
/* set the info for each string. */
- ctxt.string[0].data = string1;
- ctxt.string[0].data_length = strlen (string1);
- ctxt.string[1].data = string2;
- ctxt.string[1].data_length = strlen (string2);
+ ctxt.xvec = string1;
+ xvec_length = strlen (string1);
+ ctxt.yvec = string2;
+ yvec_length = strlen (string2);
/* short-circuit obvious comparisons */
- if (ctxt.string[0].data_length == 0 && ctxt.string[1].data_length == 0)
+ if (xvec_length == 0 && yvec_length == 0)
return 1.0;
- if (ctxt.string[0].data_length == 0 || ctxt.string[1].data_length == 0)
+ if (xvec_length == 0 || yvec_length == 0)
return 0.0;
/* Set TOO_EXPENSIVE to be approximate square root of input size,
bounded below by 256. */
ctxt.too_expensive = 1;
- for (i = ctxt.string[0].data_length + ctxt.string[1].data_length;
+ for (i = xvec_length + yvec_length;
i != 0;
i >>= 2)
ctxt.too_expensive <<= 1;
@@ -582,7 +581,7 @@ fstrcmp (const char *string1, const char *string2)
ctxt.too_expensive = 256;
/* Allocate memory for fdiag and bdiag from a thread-local pool. */
- fdiag_len = ctxt.string[0].data_length + ctxt.string[1].data_length + 3;
+ fdiag_len = xvec_length + yvec_length + 3;
gl_once (keys_init_once, keys_init);
buffer = (int *) gl_tls_get (buffer_key);
bufmax = (size_t) (uintptr_t) gl_tls_get (bufmax_key);
@@ -600,20 +599,20 @@ fstrcmp (const char *string1, const char *string2)
gl_tls_set (buffer_key, buffer);
gl_tls_set (bufmax_key, (void *) (uintptr_t) bufmax);
}
- ctxt.fdiag = buffer + ctxt.string[1].data_length + 1;
+ ctxt.fdiag = buffer + yvec_length + 1;
ctxt.bdiag = ctxt.fdiag + fdiag_len;
/* Now do the main comparison algorithm */
- ctxt.string[0].edit_count = 0;
- ctxt.string[1].edit_count = 0;
- compareseq (0, ctxt.string[0].data_length, 0, ctxt.string[1].data_length, 0,
+ ctxt.xvec_edit_count = 0;
+ ctxt.yvec_edit_count = 0;
+ compareseq (0, xvec_length, 0, yvec_length, 0,
&ctxt);
/* The result is
((number of chars in common) / (average length of the strings)).
This is admittedly biased towards finding that the strings are
similar, however it does produce meaningful results. */
- return ((double) (ctxt.string[0].data_length + ctxt.string[1].data_length
- - ctxt.string[1].edit_count - ctxt.string[0].edit_count)
- / (ctxt.string[0].data_length + ctxt.string[1].data_length));
+ return ((double) (xvec_length + yvec_length
+ - ctxt.yvec_edit_count - ctxt.xvec_edit_count)
+ / (xvec_length + yvec_length));
}