diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2015-02-07 18:09:00 -0800 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2015-02-07 18:09:53 -0800 |
commit | 1c6a3cf4ce203dd9235a9ce26437079c61b56091 (patch) | |
tree | 313a7db067a63c25385591b083574995fc9f4867 | |
parent | 59ebba1d813f624a0bd0a97450a940c289c9e9c4 (diff) | |
download | gnulib-1c6a3cf4ce203dd9235a9ce26437079c61b56091.tar.gz |
fstrcmp: don't assume strlen < INT_MAX
* lib/fstrcmp.c: Include stddef.h and stdint.h.
(uintptr_t): Remove, as we're now assuming stdint.
(OFFSET, EXTRA_CONTEXT_FIELDS, fstrcmp_bounded):
Prefer ptrdiff_t to int when the value could exceed INT_MAX
if the input string is long.
(fstrcmp_bounded): Check for size-calculation overflow. Prefer
uintptr_t to size_t when the underlying value is a pointer casted
to an unsigned integer. Avoid unnecessary 'buffer != NULL' test.
* modules/fstrcmp (Depends-on): Add stdint.
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | lib/fstrcmp.c | 65 | ||||
-rw-r--r-- | modules/fstrcmp | 1 |
3 files changed, 46 insertions, 31 deletions
@@ -1,5 +1,16 @@ 2015-02-07 Paul Eggert <eggert@cs.ucla.edu> + fstrcmp: don't assume strlen < INT_MAX + * lib/fstrcmp.c: Include stddef.h and stdint.h. + (uintptr_t): Remove, as we're now assuming stdint. + (OFFSET, EXTRA_CONTEXT_FIELDS, fstrcmp_bounded): + Prefer ptrdiff_t to int when the value could exceed INT_MAX + if the input string is long. + (fstrcmp_bounded): Check for size-calculation overflow. Prefer + uintptr_t to size_t when the underlying value is a pointer casted + to an unsigned integer. Avoid unnecessary 'buffer != NULL' test. + * modules/fstrcmp (Depends-on): Add stdint. + diffseq: prefer ptrdiff_t to ssize_t * lib/diffseq.h: In commentary, prefer ptrdiff_t to ssize_t. ptrdiff_t is the natural type for signed indexes. diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c index 70db50a2e0..d40e6ef524 100644 --- a/lib/fstrcmp.c +++ b/lib/fstrcmp.c @@ -23,7 +23,9 @@ #include <string.h> #include <stdbool.h> +#include <stddef.h> #include <stdio.h> +#include <stdint.h> #include <stdlib.h> #include <limits.h> @@ -32,20 +34,16 @@ #include "minmax.h" #include "xalloc.h" -#ifndef uintptr_t -# define uintptr_t unsigned long -#endif - #define ELEMENT char #define EQUAL(x,y) ((x) == (y)) -#define OFFSET int +#define OFFSET ptrdiff_t #define EXTRA_CONTEXT_FIELDS \ /* The number of edits beyond which the computation can be aborted. */ \ - int edit_count_limit; \ + ptrdiff_t edit_count_limit; \ /* The number of edits (= number of elements inserted, plus the number of \ elements deleted), temporarily minus edit_count_limit. */ \ - int edit_count; + ptrdiff_t edit_count; #define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++ #define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++ #define EARLY_ABORT(ctxt) ctxt->edit_count > 0 @@ -61,8 +59,8 @@ already allocated memory, store the allocated memory per thread. Free it only when the thread exits. */ -static gl_tls_key_t buffer_key; /* TLS key for a 'int *' */ -static gl_tls_key_t bufmax_key; /* TLS key for a 'size_t' */ +static gl_tls_key_t buffer_key; /* TLS key for a 'ptrdiff_t *' */ +static gl_tls_key_t bufmax_key; /* TLS key for a 'uintptr_t' */ static void keys_init (void) @@ -87,17 +85,22 @@ double fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) { struct context ctxt; - int xvec_length = strlen (string1); - int yvec_length = strlen (string2); - int i; + size_t xvec_length = strlen (string1); + size_t yvec_length = strlen (string2); + size_t length_sum = xvec_length + yvec_length; + ptrdiff_t i; - size_t fdiag_len; - int *buffer; - size_t bufmax; + ptrdiff_t fdiag_len; + ptrdiff_t *buffer; + uintptr_t bufmax; /* short-circuit obvious comparisons */ if (xvec_length == 0 || yvec_length == 0) /* Prob: 1% */ - return (xvec_length == 0 && yvec_length == 0 ? 1.0 : 0.0); + return length_sum == 0; + + if (! (xvec_length <= length_sum + && length_sum <= MIN (UINTPTR_MAX, PTRDIFF_MAX) - 3)) + xalloc_die (); if (lower_bound > 0) { @@ -117,9 +120,8 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) / (xvec_length + yvec_length) = 2 * min (xvec_length, yvec_length) / (xvec_length + yvec_length). */ - volatile double upper_bound = - (double) (2 * MIN (xvec_length, yvec_length)) - / (xvec_length + yvec_length); + ptrdiff_t length_min = MIN (xvec_length, yvec_length); + volatile double upper_bound = 2.0 * length_min / length_sum; if (upper_bound < lower_bound) /* Prob: 74% */ /* Return an arbitrary value < LOWER_BOUND. */ @@ -128,7 +130,7 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) #if CHAR_BIT <= 8 /* When X and Y are both small, avoid the overhead of setting up an array of size 256. */ - if (xvec_length + yvec_length >= 20) /* Prob: 99% */ + if (length_sum >= 20) /* Prob: 99% */ { /* Compute a less quick upper bound. Each edit is an insertion or deletion of a character, hence @@ -148,8 +150,9 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) <= (xvec_length + yvec_length - sum_C | OCC(X,C) - OCC(Y,C) |) / (xvec_length + yvec_length). */ - int occ_diff[UCHAR_MAX + 1]; /* array C -> OCC(X,C) - OCC(Y,C) */ - int sum; + ptrdiff_t occ_diff[UCHAR_MAX + 1]; /* array C -> OCC(X,C) - OCC(Y,C) */ + ptrdiff_t sum; + double dsum; /* Determine the occurrence counts in X. */ memset (occ_diff, 0, sizeof (occ_diff)); @@ -162,11 +165,12 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) sum = 0; for (i = 0; i <= UCHAR_MAX; i++) { - int d = occ_diff[i]; + ptrdiff_t d = occ_diff[i]; sum += (d >= 0 ? d : -d); } - upper_bound = 1.0 - (double) sum / (xvec_length + yvec_length); + dsum = sum; + upper_bound = 1.0 - dsum / length_sum; if (upper_bound < lower_bound) /* Prob: 66% */ /* Return an arbitrary value < LOWER_BOUND. */ @@ -180,10 +184,10 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) ctxt.yvec = string2; /* Allocate memory for fdiag and bdiag from a thread-local pool. */ - fdiag_len = xvec_length + yvec_length + 3; + fdiag_len = length_sum + 3; gl_once (keys_init_once, keys_init); - buffer = (int *) gl_tls_get (buffer_key); - bufmax = (size_t) (uintptr_t) gl_tls_get (bufmax_key); + buffer = gl_tls_get (buffer_key); + bufmax = (uintptr_t) gl_tls_get (bufmax_key); if (fdiag_len > bufmax) { /* Need more memory. */ @@ -192,9 +196,8 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) bufmax = fdiag_len; /* Calling xrealloc would be a waste: buffer's contents does not need to be preserved. */ - if (buffer != NULL) - free (buffer); - buffer = (int *) xnmalloc (bufmax, 2 * sizeof (int)); + free (buffer); + buffer = xnmalloc (bufmax, 2 * sizeof *buffer); gl_tls_set (buffer_key, buffer); gl_tls_set (bufmax_key, (void *) (uintptr_t) bufmax); } @@ -213,7 +216,7 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound) rounding errors. */ ctxt.edit_count_limit = (lower_bound < 1.0 - ? (int) ((xvec_length + yvec_length) * (1.0 - lower_bound + 0.000001)) + ? (ptrdiff_t) (length_sum * (1.0 - lower_bound + 0.000001)) : 0); /* Now do the main comparison algorithm */ diff --git a/modules/fstrcmp b/modules/fstrcmp index 1ea0f37dda..77de7a08dd 100644 --- a/modules/fstrcmp +++ b/modules/fstrcmp @@ -10,6 +10,7 @@ diffseq lock tls minmax +stdint xalloc configure.ac: |