summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Gran <spk121@yahoo.com>2009-08-30 15:41:56 -0700
committerMichael Gran <spk121@yahoo.com>2009-08-30 16:54:49 -0700
commitfac32b518ef9f456e8f6465c00e6c6f40a1123a8 (patch)
treeade50d4935f7e3cc6143ac13d32f91ff2ad5c233
parentf84c500d2e29c619e6a989d0d11911fea414d795 (diff)
downloadguile-fac32b518ef9f456e8f6465c00e6c6f40a1123a8.tar.gz
Fix encoding errors with strings returned by string ports
String ports, being 8-bit, store strings using the character encoding of the port. This fixes a bug where the default character encoding, and not the port's encoding, was being used to convert the string port data back to a string. * libguile/strports.c: extra comments (scm_strport_to_string): use port's encoding when converting port data to a string * libguile/strings.c (scm_i_from_stringn): renamed from scm_from_stringn and made internal. All callers changed. (scm_from_stringn): renamed to scm_i_from_stringn. * libguile/strings.h: declaration for scm_i_from_stringn
-rw-r--r--libguile/strings.c15
-rw-r--r--libguile/strings.h4
-rw-r--r--libguile/strports.c26
3 files changed, 34 insertions, 11 deletions
diff --git a/libguile/strings.c b/libguile/strings.c
index 4a8390d16..3b8d15db0 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1477,15 +1477,18 @@ scm_is_string (SCM obj)
return IS_STRING (obj);
}
-static SCM
-scm_from_stringn (const char *str, size_t len, const char *encoding,
- scm_t_string_failed_conversion_handler handler)
+SCM
+scm_i_from_stringn (const char *str, size_t len, const char *encoding,
+ scm_t_string_failed_conversion_handler handler)
{
size_t u32len, i;
scm_t_wchar *u32;
int wide = 0;
SCM res;
+ if (len == 0)
+ return scm_nullstr;
+
if (encoding == NULL)
{
/* If encoding is null, use Latin-1. */
@@ -1575,7 +1578,7 @@ scm_from_locale_stringn (const char *str, size_t len)
hndl = SCM_FAILED_CONVERSION_ERROR;
}
- return scm_from_stringn (str, len, enc, hndl);
+ return scm_i_from_stringn (str, len, enc, hndl);
}
SCM
@@ -1590,7 +1593,7 @@ scm_from_locale_string (const char *str)
SCM
scm_i_from_utf8_string (const scm_t_uint8 *str)
{
- return scm_from_stringn ((const char *) str,
+ return scm_i_from_stringn ((const char *) str,
strlen ((char *) str), "UTF-8",
SCM_FAILED_CONVERSION_ERROR);
}
@@ -1681,7 +1684,7 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
}
char *
-scm_to_locale_stringn (SCM str, size_t * lenp)
+scm_to_locale_stringn (SCM str, size_t *lenp)
{
SCM outport;
scm_t_port *pt;
diff --git a/libguile/strings.h b/libguile/strings.h
index 2393aae91..c5219265e 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -111,6 +111,10 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
SCM_API SCM scm_string_append (SCM args);
+SCM_INTERNAL SCM scm_i_from_stringn (const char *str, size_t len,
+ const char *encoding,
+ scm_t_string_failed_conversion_handler
+ handler);
SCM_API SCM scm_c_make_string (size_t len, SCM chr);
SCM_API size_t scm_c_string_length (SCM str);
SCM_API size_t scm_c_symbol_length (SCM sym);
diff --git a/libguile/strports.c b/libguile/strports.c
index 490a15f8b..5bfeaad20 100644
--- a/libguile/strports.c
+++ b/libguile/strports.c
@@ -301,9 +301,9 @@ scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len, long modes, co
to a locale representation for storage. But, since string ports
rely on string functionality for their memory management, we need
to create a new string that has the 8-bit locale representation
- of the underlying string. This violates the guideline that the
- internal encoding of characters in strings is in unicode
- codepoints. */
+ of the underlying string.
+
+ locale_str is already in the locale of the port. */
str = scm_i_make_string (str_len, &buf);
memcpy (buf, locale_str, str_len);
@@ -348,13 +348,18 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
of the underlying string. This violates the guideline that the
internal encoding of characters in strings is in unicode
codepoints. */
+
+ /* Ports are initialized with the thread-default values for encoding and
+ invalid sequence handling. */
buf = scm_to_locale_stringn (str, &str_len);
z = scm_i_mkstrport (pos, buf, str_len, modes, caller);
free (buf);
return z;
}
-/* create a new string from a string port's buffer. */
+/* Create a new string from a string port's buffer, converting from
+ the port's 8-bit locale-specific representation to the standard
+ string representation. */
SCM scm_strport_to_string (SCM port)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
@@ -363,7 +368,18 @@ SCM scm_strport_to_string (SCM port)
if (pt->rw_active == SCM_PORT_WRITE)
st_flush (port);
- str = scm_from_locale_stringn ((char *)pt->read_buf, pt->read_buf_size);
+ if (pt->read_buf_size == 0)
+ return scm_nullstr;
+
+ if (pt->encoding == NULL)
+ {
+ char *buf;
+ str = scm_i_make_string (pt->read_buf_size, &buf);
+ memcpy (buf, pt->read_buf, pt->read_buf_size);
+ }
+ else
+ str = scm_i_from_stringn ((char *)pt->read_buf, pt->read_buf_size,
+ pt->encoding, pt->ilseq_handler);
scm_remember_upto_here_1 (port);
return str;
}