From ffb95239aacf86d8dc622a438bdaacfac4a66efc Mon Sep 17 00:00:00 2001 From: Rob Browning Date: Sun, 12 Mar 2023 14:26:10 -0500 Subject: scm_i_utf8_string_hash: compute u8 chars not bytes Noticed while investigating a migration to utf-8 strings. After making changes that routed non-ascii symbol hashing through this function, encoding-iso88597.test began intermittently failing because it would traverse trailing garbage when u8_strnlen reported 8 chars instead of 4. Change the scm_i_str2symbol and scm_i_str2uninterned_symbol internal hash type to unsigned long to explicitly match the scm_i_string_hash result type. * libguile/hash.c (scm_i_utf8_string_hash): Call u8_mbsnlen not u8_strnlen. * libguile/symbols.c (scm_i_str2symbol, scm_i_str2uninterned_symbol): Use unsigned long for scm_i_string_hash result. * test-suite/standalone/.gitignore: Add test-hashing. * test-suite/standalone/Makefile.am: Add test-hashing. * test-suite/standalone/test-hashing.c: Add. --- libguile/hash.c | 2 +- libguile/symbols.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'libguile') diff --git a/libguile/hash.c b/libguile/hash.c index c192ac2e5..5abdfe397 100644 --- a/libguile/hash.c +++ b/libguile/hash.c @@ -185,7 +185,7 @@ scm_i_utf8_string_hash (const char *str, size_t len) /* Invalid UTF-8; punt. */ return scm_i_string_hash (scm_from_utf8_stringn (str, len)); - length = u8_strnlen (ustr, len); + length = u8_mbsnlen (ustr, len); /* Set up the internal state. */ a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + 47; diff --git a/libguile/symbols.c b/libguile/symbols.c index 02be7c1c4..086abf585 100644 --- a/libguile/symbols.c +++ b/libguile/symbols.c @@ -239,7 +239,7 @@ static SCM scm_i_str2symbol (SCM str) { SCM symbol; - size_t raw_hash = scm_i_string_hash (str); + unsigned long raw_hash = scm_i_string_hash (str); symbol = lookup_interned_symbol (str, raw_hash); if (scm_is_true (symbol)) @@ -261,7 +261,7 @@ scm_i_str2symbol (SCM str) static SCM scm_i_str2uninterned_symbol (SCM str) { - size_t raw_hash = scm_i_string_hash (str); + unsigned long raw_hash = scm_i_string_hash (str); return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED, raw_hash); } -- cgit v1.2.1