From a8a9fd0a672789f304a0ecb83240b4e6d34f1d1f Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 13 Sep 2021 14:45:23 -0600 Subject: cygwin.c: Add utf8_to_wide_extra_len() This function is like utf8_to_wide, but has an additional parameter that reserves extra space. It thus is a superset of utf8_to_wide() which changes to use it with 0 extra space needed. And there is other code that parallels utf8_to_wide() but needs extra space reserved. That code is converted to use this (new, now common) function, thus simplifying things. --- cygwin/cygwin.c | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'cygwin') diff --git a/cygwin/cygwin.c b/cygwin/cygwin.c index 9ab533f301..3678beeec3 100644 --- a/cygwin/cygwin.c +++ b/cygwin/cygwin.c @@ -176,10 +176,15 @@ wide_to_utf8(const wchar_t *wsrc) } wchar_t* -utf8_to_wide(const char *buf) +utf8_to_wide_extra_len(const char *buf, Size_t *extra_len) { + /* Return the the conversion to UTF-16 of the UTF-8 string 'buf' + * (terminated by a NUL), making sure to have space for at least *extra_len + * extra (wide) characters in the result. The result must be freed by the + * caller when no longer needed */ + dTHX; - Size_t len = strlen(buf) + 1; + Size_t len = strlen(buf) + extra_len + 1; /* Max expansion factor is sizeof(wchar_t) */ Size_t wlen = sizeof(wchar_t) * len; @@ -191,9 +196,14 @@ utf8_to_wide(const char *buf) return wsrc; } +wchar_t* +utf8_to_wide(const char *buf) +{ + Size_t extra_len = 0; - return wsrc; + return utf8_to_wide_extra_len(buf, &extra_len); } + #endif /* cygwin 1.7 */ /* see also Cwd.pm */ @@ -302,27 +312,23 @@ S_convert_path_common(pTHX_ const direction_t direction) | ((direction == to_posix) ? CCP_WIN_W_TO_POSIX : CCP_POSIX_TO_WIN_W); - STRLEN wlen = sizeof(wchar_t)*(len + PATH_LEN_GUESS); - wchar_t *wconverted = (wchar_t *) safemalloc(sizeof(wchar_t)*len); - wchar_t *wsrc = (wchar_t *) safemalloc(wlen); - if (!IN_BYTES) { - mbstate_t mbs; - char *oldlocale; - - SETLOCALE_LOCK; + STRLEN wlen; + wchar_t *wsrc = NULL; + wchar_t *wconverted = NULL; - oldlocale = setlocale(LC_CTYPE, NULL); - setlocale(LC_CTYPE, "utf-8"); + if (!IN_BYTES) { + wlen = PATH_LEN_GUESS; + wconverted = utf8_to_wide_extra_len(src_path, &wlen); - wlen = mbsrtowcs(wconverted, (const char**)&src_path, wlen, &mbs); - if (wlen > 0) + if (wlen > 0) { + wsrc = (wchar_t *) safemalloc(wlen); err = cygwin_conv_path(what, wconverted, wsrc, wlen); - - if (oldlocale) setlocale(LC_CTYPE, oldlocale); - else setlocale(LC_CTYPE, "C"); - - SETLOCALE_UNLOCK; - } else { /* use bytes; assume already UTF-16 encoded bytestream */ + } + } + else { /* use bytes; assume already UTF-16 encoded bytestream */ + wlen = sizeof(wchar_t) * (len + PATH_LEN_GUESS); + wconverted = (wchar_t *) safemalloc(sizeof(wchar_t)*len); + wsrc = (wchar_t *) safemalloc(wlen); err = cygwin_conv_path(what, src_path, wsrc, wlen); } -- cgit v1.2.1