diff options
author | Michael Gran <spk121@yahoo.com> | 2009-08-30 15:41:56 -0700 |
---|---|---|
committer | Michael Gran <spk121@yahoo.com> | 2009-08-30 16:54:49 -0700 |
commit | fac32b518ef9f456e8f6465c00e6c6f40a1123a8 (patch) | |
tree | ade50d4935f7e3cc6143ac13d32f91ff2ad5c233 | |
parent | f84c500d2e29c619e6a989d0d11911fea414d795 (diff) | |
download | guile-fac32b518ef9f456e8f6465c00e6c6f40a1123a8.tar.gz |
Fix encoding errors with strings returned by string ports
String ports, being 8-bit, store strings using the character encoding
of the port. This fixes a bug where the default character encoding, and
not the port's encoding, was being used to convert the string port data
back to a string.
* libguile/strports.c: extra comments
(scm_strport_to_string): use port's encoding when converting port data
to a string
* libguile/strings.c (scm_i_from_stringn): renamed from scm_from_stringn
and made internal. All callers changed.
(scm_from_stringn): renamed to scm_i_from_stringn.
* libguile/strings.h: declaration for scm_i_from_stringn
-rw-r--r-- | libguile/strings.c | 15 | ||||
-rw-r--r-- | libguile/strings.h | 4 | ||||
-rw-r--r-- | libguile/strports.c | 26 |
3 files changed, 34 insertions, 11 deletions
diff --git a/libguile/strings.c b/libguile/strings.c index 4a8390d16..3b8d15db0 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1477,15 +1477,18 @@ scm_is_string (SCM obj) return IS_STRING (obj); } -static SCM -scm_from_stringn (const char *str, size_t len, const char *encoding, - scm_t_string_failed_conversion_handler handler) +SCM +scm_i_from_stringn (const char *str, size_t len, const char *encoding, + scm_t_string_failed_conversion_handler handler) { size_t u32len, i; scm_t_wchar *u32; int wide = 0; SCM res; + if (len == 0) + return scm_nullstr; + if (encoding == NULL) { /* If encoding is null, use Latin-1. */ @@ -1575,7 +1578,7 @@ scm_from_locale_stringn (const char *str, size_t len) hndl = SCM_FAILED_CONVERSION_ERROR; } - return scm_from_stringn (str, len, enc, hndl); + return scm_i_from_stringn (str, len, enc, hndl); } SCM @@ -1590,7 +1593,7 @@ scm_from_locale_string (const char *str) SCM scm_i_from_utf8_string (const scm_t_uint8 *str) { - return scm_from_stringn ((const char *) str, + return scm_i_from_stringn ((const char *) str, strlen ((char *) str), "UTF-8", SCM_FAILED_CONVERSION_ERROR); } @@ -1681,7 +1684,7 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp) } char * -scm_to_locale_stringn (SCM str, size_t * lenp) +scm_to_locale_stringn (SCM str, size_t *lenp) { SCM outport; scm_t_port *pt; diff --git a/libguile/strings.h b/libguile/strings.h index 2393aae91..c5219265e 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -111,6 +111,10 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end); SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end); SCM_API SCM scm_string_append (SCM args); +SCM_INTERNAL SCM scm_i_from_stringn (const char *str, size_t len, + const char *encoding, + scm_t_string_failed_conversion_handler + handler); SCM_API SCM scm_c_make_string (size_t len, SCM chr); SCM_API size_t scm_c_string_length (SCM str); SCM_API size_t scm_c_symbol_length (SCM sym); diff --git a/libguile/strports.c b/libguile/strports.c index 490a15f8b..5bfeaad20 100644 --- a/libguile/strports.c +++ b/libguile/strports.c @@ -301,9 +301,9 @@ scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len, long modes, co to a locale representation for storage. But, since string ports rely on string functionality for their memory management, we need to create a new string that has the 8-bit locale representation - of the underlying string. This violates the guideline that the - internal encoding of characters in strings is in unicode - codepoints. */ + of the underlying string. + + locale_str is already in the locale of the port. */ str = scm_i_make_string (str_len, &buf); memcpy (buf, locale_str, str_len); @@ -348,13 +348,18 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller) of the underlying string. This violates the guideline that the internal encoding of characters in strings is in unicode codepoints. */ + + /* Ports are initialized with the thread-default values for encoding and + invalid sequence handling. */ buf = scm_to_locale_stringn (str, &str_len); z = scm_i_mkstrport (pos, buf, str_len, modes, caller); free (buf); return z; } -/* create a new string from a string port's buffer. */ +/* Create a new string from a string port's buffer, converting from + the port's 8-bit locale-specific representation to the standard + string representation. */ SCM scm_strport_to_string (SCM port) { scm_t_port *pt = SCM_PTAB_ENTRY (port); @@ -363,7 +368,18 @@ SCM scm_strport_to_string (SCM port) if (pt->rw_active == SCM_PORT_WRITE) st_flush (port); - str = scm_from_locale_stringn ((char *)pt->read_buf, pt->read_buf_size); + if (pt->read_buf_size == 0) + return scm_nullstr; + + if (pt->encoding == NULL) + { + char *buf; + str = scm_i_make_string (pt->read_buf_size, &buf); + memcpy (buf, pt->read_buf, pt->read_buf_size); + } + else + str = scm_i_from_stringn ((char *)pt->read_buf, pt->read_buf_size, + pt->encoding, pt->ilseq_handler); scm_remember_upto_here_1 (port); return str; } |