summaryrefslogtreecommitdiff
path: root/lib/fstrcmp.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/fstrcmp.c')
-rw-r--r--lib/fstrcmp.c95
1 files changed, 47 insertions, 48 deletions
diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c
index 217f8fc37a..721cd6d1ed 100644
--- a/lib/fstrcmp.c
+++ b/lib/fstrcmp.c
@@ -64,6 +64,12 @@
#define ELEMENT char
#define EQUAL(x,y) ((x) == (y))
#define OFFSET int
+#define EXTRA_CONTEXT_FIELDS \
+ /* The number of elements inserted or deleted. */ \
+ int xvec_edit_count; \
+ int yvec_edit_count;
+#define NOTE_DELETE(ctxt, xoff) ctxt->xvec_edit_count++
+#define NOTE_INSERT(ctxt, yoff) ctxt->yvec_edit_count++
/* We don't need USE_HEURISTIC, since it is unlikely in typical uses of
fstrcmp(). */
@@ -74,6 +80,9 @@
OFFSET A signed integer type sufficient to hold the
difference between two indices. Usually
something like ssize_t.
+ EXTRA_CONTEXT_FIELDS Declarations of fields for 'struct context'.
+ NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff].
+ NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff].
USE_HEURISTIC (Optional) Define if you want to support the
heuristic for large vectors. */
@@ -95,21 +104,13 @@
*/
struct context
{
- /*
- * Data on one input string being compared.
- */
- struct string_data
- {
- /* The string to be compared. */
- const ELEMENT *data;
-
- /* The length of the string to be compared. */
- int data_length;
-
- /* The number of elements inserted or deleted. */
- int edit_count;
- }
- string[2];
+ /* Vectors being compared. */
+ const ELEMENT *xvec;
+ const ELEMENT *yvec;
+
+ /* The number of elements inserted or deleted. */
+ int xvec_edit_count;
+ int yvec_edit_count;
/* Vector, indexed by diagonal, containing 1 + the X coordinate of the point
furthest along the given diagonal in the forward search of the edit
@@ -182,8 +183,8 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal,
{
OFFSET *const fd = ctxt->fdiag; /* Give the compiler a chance. */
OFFSET *const bd = ctxt->bdiag; /* Additional help for the compiler. */
- const ELEMENT *const xv = ctxt->string[0].data; /* Still more help for the compiler. */
- const ELEMENT *const yv = ctxt->string[1].data; /* And more and more . . . */
+ const ELEMENT *const xv = ctxt->xvec; /* Still more help for the compiler. */
+ const ELEMENT *const yv = ctxt->yvec; /* And more and more . . . */
const OFFSET dmin = xoff - ylim; /* Minimum valid diagonal. */
const OFFSET dmax = xlim - yoff; /* Maximum valid diagonal. */
const OFFSET fmid = xoff - yoff; /* Center diagonal of top-down search. */
@@ -462,8 +463,8 @@ static void
compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
bool find_minimal, struct context *ctxt)
{
- const ELEMENT *const xv = ctxt->string[0].data; /* Help the compiler. */
- const ELEMENT *const yv = ctxt->string[1].data;
+ const ELEMENT *const xv = ctxt->xvec; /* Help the compiler. */
+ const ELEMENT *const yv = ctxt->yvec;
/* Slide down the bottom initial diagonal. */
while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
@@ -481,21 +482,17 @@ compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
/* Handle simple cases. */
if (xoff == xlim)
- {
- while (yoff < ylim)
- {
- ctxt->string[1].edit_count++;
- ++yoff;
- }
- }
+ while (yoff < ylim)
+ {
+ NOTE_INSERT (ctxt, yoff);
+ yoff++;
+ }
else if (yoff == ylim)
- {
- while (xoff < xlim)
- {
- ctxt->string[0].edit_count++;
- ++xoff;
- }
- }
+ while (xoff < xlim)
+ {
+ NOTE_DELETE (ctxt, xoff);
+ xoff++;
+ }
else
{
struct partition part;
@@ -553,6 +550,8 @@ double
fstrcmp (const char *string1, const char *string2)
{
struct context ctxt;
+ int xvec_length;
+ int yvec_length;
int i;
size_t fdiag_len;
@@ -560,21 +559,21 @@ fstrcmp (const char *string1, const char *string2)
size_t bufmax;
/* set the info for each string. */
- ctxt.string[0].data = string1;
- ctxt.string[0].data_length = strlen (string1);
- ctxt.string[1].data = string2;
- ctxt.string[1].data_length = strlen (string2);
+ ctxt.xvec = string1;
+ xvec_length = strlen (string1);
+ ctxt.yvec = string2;
+ yvec_length = strlen (string2);
/* short-circuit obvious comparisons */
- if (ctxt.string[0].data_length == 0 && ctxt.string[1].data_length == 0)
+ if (xvec_length == 0 && yvec_length == 0)
return 1.0;
- if (ctxt.string[0].data_length == 0 || ctxt.string[1].data_length == 0)
+ if (xvec_length == 0 || yvec_length == 0)
return 0.0;
/* Set TOO_EXPENSIVE to be approximate square root of input size,
bounded below by 256. */
ctxt.too_expensive = 1;
- for (i = ctxt.string[0].data_length + ctxt.string[1].data_length;
+ for (i = xvec_length + yvec_length;
i != 0;
i >>= 2)
ctxt.too_expensive <<= 1;
@@ -582,7 +581,7 @@ fstrcmp (const char *string1, const char *string2)
ctxt.too_expensive = 256;
/* Allocate memory for fdiag and bdiag from a thread-local pool. */
- fdiag_len = ctxt.string[0].data_length + ctxt.string[1].data_length + 3;
+ fdiag_len = xvec_length + yvec_length + 3;
gl_once (keys_init_once, keys_init);
buffer = (int *) gl_tls_get (buffer_key);
bufmax = (size_t) (uintptr_t) gl_tls_get (bufmax_key);
@@ -600,20 +599,20 @@ fstrcmp (const char *string1, const char *string2)
gl_tls_set (buffer_key, buffer);
gl_tls_set (bufmax_key, (void *) (uintptr_t) bufmax);
}
- ctxt.fdiag = buffer + ctxt.string[1].data_length + 1;
+ ctxt.fdiag = buffer + yvec_length + 1;
ctxt.bdiag = ctxt.fdiag + fdiag_len;
/* Now do the main comparison algorithm */
- ctxt.string[0].edit_count = 0;
- ctxt.string[1].edit_count = 0;
- compareseq (0, ctxt.string[0].data_length, 0, ctxt.string[1].data_length, 0,
+ ctxt.xvec_edit_count = 0;
+ ctxt.yvec_edit_count = 0;
+ compareseq (0, xvec_length, 0, yvec_length, 0,
&ctxt);
/* The result is
((number of chars in common) / (average length of the strings)).
This is admittedly biased towards finding that the strings are
similar, however it does produce meaningful results. */
- return ((double) (ctxt.string[0].data_length + ctxt.string[1].data_length
- - ctxt.string[1].edit_count - ctxt.string[0].edit_count)
- / (ctxt.string[0].data_length + ctxt.string[1].data_length));
+ return ((double) (xvec_length + yvec_length
+ - ctxt.yvec_edit_count - ctxt.xvec_edit_count)
+ / (xvec_length + yvec_length));
}