summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark H Weaver <mhw@netris.org>2019-05-06 21:11:26 -0400
committerMark H Weaver <mhw@netris.org>2019-05-07 04:41:30 -0400
commit7c2b48a6bd4b7ccd043b2e19471b498dc66a073d (patch)
treec853c850dc88cce21bde07283b40bf64bd54699c
parent91b5b1631f87067a63cb0b50df5dbfce977c18c7 (diff)
downloadguile-7c2b48a6bd4b7ccd043b2e19471b498dc66a073d.tar.gz
Strings, i18n: Limit the use of alloca to approximately 8 kilobytes.
* libguile/i18n.c (SCM_MAX_ALLOCA): New macro. (SCM_STRING_TO_U32_BUF): Accept an additional variable to remember whether we used malloc to allocate the buffer. Use malloc if the allocation size is greater than SCM_MAX_ALLOCA. (SCM_CLEANUP_U32_BUF): New macro. (compare_u32_strings, compare_u32_strings_ci, str_to_case): Adapt. * libguile/strings.c (SCM_MAX_ALLOCA): New macro. (normalize_str, unistring_escapes_to_r6rs_escapes): Use malloc if the allocation size is greater than SCM_MAX_ALLOCA. * test-suite/tests/i18n.test, test-suite/tests/strings.test: Add tests.
-rw-r--r--libguile/i18n.c72
-rw-r--r--libguile/strings.c43
-rw-r--r--test-suite/tests/i18n.test17
-rw-r--r--test-suite/tests/strings.test12
4 files changed, 107 insertions, 37 deletions
diff --git a/libguile/i18n.c b/libguile/i18n.c
index 6c87fdaae..5e6783700 100644
--- a/libguile/i18n.c
+++ b/libguile/i18n.c
@@ -40,6 +40,10 @@
#include <unicase.h>
#include <unistr.h>
+#ifndef SCM_MAX_ALLOCA
+# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
+#endif
+
#if defined HAVE_NEWLOCALE && defined HAVE_STRCOLL_L && defined HAVE_USELOCALE
/* The GNU thread-aware locale API is documented in ``Thread-Aware Locale
Model, a Proposal'', by Ulrich Drepper:
@@ -743,23 +747,35 @@ SCM_DEFINE (scm_locale_p, "locale?", 1, 0, 0,
A similar API can be found in MzScheme starting from version 200:
http://download.plt-scheme.org/chronology/mzmr200alpha14.html . */
-#define SCM_STRING_TO_U32_BUF(s1, c_s1) \
- do \
- { \
- if (scm_i_is_narrow_string (s1)) \
- { \
- size_t i, len; \
- const char *buf = scm_i_string_chars (s1); \
- \
- len = scm_i_string_length (s1); \
- c_s1 = alloca (sizeof (scm_t_wchar) * (len + 1)); \
- \
- for (i = 0; i < len; i ++) \
- c_s1[i] = (unsigned char ) buf[i]; \
- c_s1[len] = 0; \
- } \
- else \
- c_s1 = (scm_t_wchar *) scm_i_string_wide_chars (s1); \
+#define SCM_STRING_TO_U32_BUF(str, c_str, c_str_malloc_p) \
+ do \
+ { \
+ if (scm_i_is_narrow_string (str)) \
+ { \
+ size_t i, len, bytes; \
+ const char *buf = scm_i_string_chars (str); \
+ \
+ len = scm_i_string_length (str); \
+ bytes = (len + 1) * sizeof (scm_t_wchar); \
+ c_str_malloc_p = (bytes > SCM_MAX_ALLOCA); \
+ c_str = c_str_malloc_p ? malloc (bytes) : alloca (bytes); \
+ \
+ for (i = 0; i < len; i ++) \
+ c_str[i] = (unsigned char ) buf[i]; \
+ c_str[len] = 0; \
+ } \
+ else \
+ { \
+ c_str_malloc_p = 0; \
+ c_str = (scm_t_wchar *) scm_i_string_wide_chars (str); \
+ } \
+ } while (0)
+
+#define SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p) \
+ do \
+ { \
+ if (c_str_malloc_p) \
+ free (c_str); \
} while (0)
@@ -773,10 +789,11 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name)
int result;
scm_t_locale c_locale;
scm_t_wchar *c_s1, *c_s2;
+ int c_s1_malloc_p, c_s2_malloc_p;
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
- SCM_STRING_TO_U32_BUF (s1, c_s1);
- SCM_STRING_TO_U32_BUF (s2, c_s2);
+ SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
+ SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
if (c_locale)
RUN_IN_LOCALE_SECTION (c_locale,
@@ -786,6 +803,9 @@ compare_u32_strings (SCM s1, SCM s2, SCM locale, const char *func_name)
result = u32_strcoll ((const scm_t_uint32 *) c_s1,
(const scm_t_uint32 *) c_s2);
+ SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
+ SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
+
scm_remember_upto_here_2 (s1, s2);
scm_remember_upto_here (locale);
return result;
@@ -828,10 +848,11 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name)
int result, ret = 0;
scm_t_locale c_locale;
scm_t_wchar *c_s1, *c_s2;
+ int c_s1_malloc_p, c_s2_malloc_p;
SCM_VALIDATE_OPTIONAL_LOCALE_COPY (3, locale, c_locale);
- SCM_STRING_TO_U32_BUF (s1, c_s1);
- SCM_STRING_TO_U32_BUF (s2, c_s2);
+ SCM_STRING_TO_U32_BUF (s1, c_s1, c_s1_malloc_p);
+ SCM_STRING_TO_U32_BUF (s2, c_s2, c_s2_malloc_p);
if (c_locale)
RUN_IN_LOCALE_SECTION
@@ -846,6 +867,9 @@ compare_u32_strings_ci (SCM s1, SCM s2, SCM locale, const char *func_name)
(const scm_t_uint32 *) c_s2,
&result);
+ SCM_CLEANUP_U32_BUF(c_s1, c_s1_malloc_p);
+ SCM_CLEANUP_U32_BUF(c_s2, c_s2_malloc_p);
+
if (SCM_UNLIKELY (ret != 0))
{
errno = ret;
@@ -1212,13 +1236,13 @@ str_to_case (SCM str, scm_t_locale c_locale,
scm_t_wchar *c_str, *c_buf;
scm_t_uint32 *c_convstr;
size_t len, convlen;
- int ret;
+ int ret, c_str_malloc_p;
SCM convstr;
len = scm_i_string_length (str);
if (len == 0)
return scm_nullstr;
- SCM_STRING_TO_U32_BUF (str, c_str);
+ SCM_STRING_TO_U32_BUF (str, c_str, c_str_malloc_p);
if (c_locale)
RUN_IN_LOCALE_SECTION (c_locale, ret =
@@ -1230,6 +1254,8 @@ str_to_case (SCM str, scm_t_locale c_locale,
u32_locale_tocase ((scm_t_uint32 *) c_str, len,
&c_convstr, &convlen, func);
+ SCM_CLEANUP_U32_BUF(c_str, c_str_malloc_p);
+
scm_remember_upto_here (str);
if (SCM_UNLIKELY (ret != 0))
diff --git a/libguile/strings.c b/libguile/strings.c
index 5a150278d..e5c7f87d6 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -45,6 +45,10 @@
#include "libguile/validate.h"
#include "libguile/private-options.h"
+#ifndef SCM_MAX_ALLOCA
+# define SCM_MAX_ALLOCA 4096 /* Max bytes per string to allocate via alloca */
+#endif
+
/* {Strings}
@@ -1808,6 +1812,7 @@ static void
unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
{
char *before, *after;
+ int malloc_p;
size_t i, j;
/* The worst case is if the input string contains all 4-digit hex escapes.
"\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
@@ -1815,7 +1820,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
size_t nzeros, ndigits;
before = buf;
- after = alloca (max_out_len);
+ malloc_p = (max_out_len > SCM_MAX_ALLOCA);
+ after = malloc_p ? malloc (max_out_len) : alloca (max_out_len);
i = 0;
j = 0;
while (i < *lenp)
@@ -1873,6 +1879,8 @@ unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
}
*lenp = j;
memcpy (before, after, j);
+ if (malloc_p)
+ free (after);
}
char *
@@ -2313,28 +2321,37 @@ normalize_str (SCM string, uninorm_t form)
{
SCM ret;
scm_t_uint32 *w_str;
+ scm_t_uint32 *w_norm_str;
scm_t_wchar *cbuf;
- size_t rlen, len = scm_i_string_length (string);
+ int malloc_p;
+ size_t norm_len, len = scm_i_string_length (string);
if (scm_i_is_narrow_string (string))
{
- size_t i;
+ size_t i, bytes;
const char *buf = scm_i_string_chars (string);
-
- w_str = alloca (sizeof (scm_t_wchar) * (len + 1));
-
+
+ bytes = (len + 1) * sizeof (scm_t_wchar);
+ malloc_p = (bytes > SCM_MAX_ALLOCA);
+ w_str = malloc_p ? malloc (bytes) : alloca (bytes);
+
for (i = 0; i < len; i ++)
w_str[i] = (unsigned char) buf[i];
w_str[len] = 0;
}
- else
- w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string);
+ else
+ {
+ malloc_p = 0;
+ w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string);
+ }
- w_str = u32_normalize (form, w_str, len, NULL, &rlen);
-
- ret = scm_i_make_wide_string (rlen, &cbuf, 0);
- u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen);
- free (w_str);
+ w_norm_str = u32_normalize (form, w_str, len, NULL, &norm_len);
+
+ ret = scm_i_make_wide_string (norm_len, &cbuf, 0);
+ u32_cpy ((scm_t_uint32 *) cbuf, w_norm_str, norm_len);
+ free (w_norm_str);
+ if (malloc_p)
+ free (w_str);
scm_i_try_narrow_string (ret);
diff --git a/test-suite/tests/i18n.test b/test-suite/tests/i18n.test
index 811be7b10..427aef4f5 100644
--- a/test-suite/tests/i18n.test
+++ b/test-suite/tests/i18n.test
@@ -78,7 +78,13 @@
(pass-if "string-locale-ci<?"
(and (string-locale-ci<? "hello" "WORLD")
(string-locale-ci<? "hello" "WORLD"
- (make-locale (list LC_COLLATE) "C")))))
+ (make-locale (list LC_COLLATE) "C"))))
+ (pass-if "large strings"
+ ;; In Guile <= 2.2.4, these would overflow the C stack and crash.
+ (let ((large (make-string 4000000 #\a)))
+ (and (string-locale-ci=? large large)
+ (not (string-locale-ci<? large large))
+ (not (string-locale<? large large))))))
(define mingw?
@@ -333,6 +339,15 @@
(string=? "Hello, World" (string-locale-titlecase
"hello, world" (make-locale LC_ALL "C")))))
+ (pass-if "large strings"
+ ;; In Guile <= 2.2.4, these would overflow the C stack and crash.
+ (let ((hellos (string-join (make-list 700000 "hello")))
+ (HELLOs (string-join (make-list 700000 "HELLO")))
+ (Hellos (string-join (make-list 700000 "Hello"))))
+ (and (string=? hellos (string-locale-downcase Hellos))
+ (string=? HELLOs (string-locale-upcase Hellos))
+ (string=? Hellos (string-locale-titlecase hellos)))))
+
(pass-if "string-locale-upcase German"
(under-german-utf8-locale-or-unresolved
(lambda ()
diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test
index b404253ce..3258feb61 100644
--- a/test-suite/tests/strings.test
+++ b/test-suite/tests/strings.test
@@ -476,6 +476,18 @@
(equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
;;
+;; normalizing large strings
+;;
+
+(pass-if "string-normalize-{nfd,nfc,nfkd,nfkc} on large strings"
+ ;; In Guile <= 2.2.4, these would overflow the C stack and crash.
+ (let ((large (make-string 4000000 #\a)))
+ (and (string=? large (string-normalize-nfd large))
+ (string=? large (string-normalize-nfc large))
+ (string=? large (string-normalize-nfkd large))
+ (string=? large (string-normalize-nfkc large)))))
+
+;;
;; string-utf8-length
;;