summaryrefslogtreecommitdiff
path: root/libguile
diff options
context:
space:
mode:
authorRob Browning <rlb@defaultvalue.org>2023-03-12 14:26:10 -0500
committerRob Browning <rlb@defaultvalue.org>2023-03-18 13:24:43 -0500
commitffb95239aacf86d8dc622a438bdaacfac4a66efc (patch)
treed49911a0abac917a73b6ec6e0e2c0ec0ade6f426 /libguile
parentf0df1ed0fdca7a63cffda5e53c968e814b7b06ec (diff)
downloadguile-ffb95239aacf86d8dc622a438bdaacfac4a66efc.tar.gz
scm_i_utf8_string_hash: compute u8 chars not bytes
Noticed while investigating a migration to utf-8 strings. After making changes that routed non-ascii symbol hashing through this function, encoding-iso88597.test began intermittently failing because it would traverse trailing garbage when u8_strnlen reported 8 chars instead of 4. Change the scm_i_str2symbol and scm_i_str2uninterned_symbol internal hash type to unsigned long to explicitly match the scm_i_string_hash result type. * libguile/hash.c (scm_i_utf8_string_hash): Call u8_mbsnlen not u8_strnlen. * libguile/symbols.c (scm_i_str2symbol, scm_i_str2uninterned_symbol): Use unsigned long for scm_i_string_hash result. * test-suite/standalone/.gitignore: Add test-hashing. * test-suite/standalone/Makefile.am: Add test-hashing. * test-suite/standalone/test-hashing.c: Add.
Diffstat (limited to 'libguile')
-rw-r--r--libguile/hash.c2
-rw-r--r--libguile/symbols.c4
2 files changed, 3 insertions, 3 deletions
diff --git a/libguile/hash.c b/libguile/hash.c
index c192ac2e5..5abdfe397 100644
--- a/libguile/hash.c
+++ b/libguile/hash.c
@@ -185,7 +185,7 @@ scm_i_utf8_string_hash (const char *str, size_t len)
/* Invalid UTF-8; punt. */
return scm_i_string_hash (scm_from_utf8_stringn (str, len));
- length = u8_strnlen (ustr, len);
+ length = u8_mbsnlen (ustr, len);
/* Set up the internal state. */
a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + 47;
diff --git a/libguile/symbols.c b/libguile/symbols.c
index 02be7c1c4..086abf585 100644
--- a/libguile/symbols.c
+++ b/libguile/symbols.c
@@ -239,7 +239,7 @@ static SCM
scm_i_str2symbol (SCM str)
{
SCM symbol;
- size_t raw_hash = scm_i_string_hash (str);
+ unsigned long raw_hash = scm_i_string_hash (str);
symbol = lookup_interned_symbol (str, raw_hash);
if (scm_is_true (symbol))
@@ -261,7 +261,7 @@ scm_i_str2symbol (SCM str)
static SCM
scm_i_str2uninterned_symbol (SCM str)
{
- size_t raw_hash = scm_i_string_hash (str);
+ unsigned long raw_hash = scm_i_string_hash (str);
return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED, raw_hash);
}