diff options
author | Michael Gran <spk121@yahoo.com> | 2010-01-23 09:15:10 -0800 |
---|---|---|
committer | Michael Gran <spk121@yahoo.com> | 2010-01-23 09:21:46 -0800 |
commit | d31b95195168ded0d3300159403adb2c4917e291 (patch) | |
tree | d0c5b4f1061a4414269a159ba8c1c0f20e445384 | |
parent | 2ff9bf8522c5f8981af5fd524769733ac1e3e8de (diff) | |
download | guile-d31b95195168ded0d3300159403adb2c4917e291.tar.gz |
R6RS string escapes broken on string output
scm_to_stringn failed to do the necessary escape conversion for
R6RS hex escapes
* libguile/strings.c (unistring_escapes_to_r6rs_escapes): new function
(scm_to_stringn): use new function when r6rs hex escapes are enabled
* test-suite/tests/reader.test: new test for string display
-rw-r--r-- | libguile/strings.c | 98 | ||||
-rw-r--r-- | test-suite/tests/reader.test | 17 |
2 files changed, 102 insertions, 13 deletions
diff --git a/libguile/strings.c b/libguile/strings.c index 4ae07a2dd..eb9e3896e 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -39,6 +39,7 @@ #include "libguile/generalized-vectors.h" #include "libguile/deprecation.h" #include "libguile/validate.h" +#include "libguile/private-options.h" @@ -1596,6 +1597,80 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp) after = scm_realloc (after, j); } +/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXXXX; */ +static void +unistring_escapes_to_r6rs_escapes (char **bufp, size_t *lenp) +{ + char *before, *after; + size_t i, j; + /* The worst case is if the input string contains all 4-digit hex escapes. + "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */ + size_t max_out_len = (*lenp * 7) / 6 + 1; + size_t nzeros, ndigits; + + before = *bufp; + after = alloca (max_out_len); + i = 0; + j = 0; + while (i < *lenp) + { + if (((i <= *lenp - 6) && before[i] == '\\' && before[i + 1] == 'u') + || ((i <= *lenp - 10) && before[i] == '\\' && before[i + 1] == 'U')) + { + if (before[i + 1] == 'u') + ndigits = 4; + else if (before[i + 1] == 'U') + ndigits = 8; + else + abort (); + + /* Add the R6RS hex escape initial sequence. */ + after[j] = '\\'; + after[j + 1] = 'x'; + + /* Move string positions to the start of the hex numbers. */ + i += 2; + j += 2; + + /* Find the number of initial zeros in this hex number. */ + nzeros = 0; + while (before[i + nzeros] == '0' && nzeros < ndigits) + nzeros++; + + /* Copy the number, skipping initial zeros, and then move the string + positions. */ + if (nzeros == ndigits) + { + after[j] = '0'; + i += ndigits; + j += 1; + } + else + { + int pos; + for (pos = 0; pos < ndigits - nzeros; pos++) + after[j + pos] = tolower ((int) before[i + nzeros + pos]); + i += ndigits; + j += (ndigits - nzeros); + } + + /* Add terminating semicolon. */ + after[j] = ';'; + j++; + } + else + { + after[j] = before[i]; + i++; + j++; + } + } + *lenp = j; + before = scm_realloc (before, j); + memcpy (before, after, j); +} + + char * scm_to_locale_stringn (SCM str, size_t *lenp) { @@ -1683,26 +1758,27 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding, (enum iconv_ilseq_handler) handler, NULL, &buf, &len); - if (ret == 0 && handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) - unistring_escapes_to_guile_escapes (&buf, &len); - if (ret != 0) - scm_encoding_error (NULL, "cannot convert to output locale ~s: \"~s\"", - scm_list_2 (scm_from_locale_string (enc), str)); + scm_encoding_error (NULL, "cannot convert to output locale ~s: \"~s\"", + scm_list_2 (scm_from_locale_string (enc), str)); } else { - buf = u32_conv_to_encoding (enc, + buf = u32_conv_to_encoding (enc, (enum iconv_ilseq_handler) handler, - (scm_t_uint32 *) scm_i_string_wide_chars (str), + (scm_t_uint32 *) scm_i_string_wide_chars (str), ilen, NULL, NULL, &len); if (buf == NULL) - scm_encoding_error (NULL, "cannot convert to output locale ~s: \"~s\"", - scm_list_2 (scm_from_locale_string (enc), str)); - - if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) + scm_encoding_error (NULL, "cannot convert to output locale ~s: \"~s\"", + scm_list_2 (scm_from_locale_string (enc), str)); + } + if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) + { + if (SCM_R6RS_ESCAPES_P) + unistring_escapes_to_r6rs_escapes (&buf, &len); + else unistring_escapes_to_guile_escapes (&buf, &len); } if (lenp) diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test index f5af52c43..84c20b289 100644 --- a/test-suite/tests/reader.test +++ b/test-suite/tests/reader.test @@ -283,8 +283,7 @@ (with-input-from-string "\"\\x41;\\x0042;\\x000043;\"" read))) "ABC")) - (pass-if "write R6RS escapes" - + (pass-if "write R6RS string escapes" (let* ((s1 (apply string (map integer->char '(#x8 ; backspace #x20 ; space @@ -298,6 +297,20 @@ (lset= eqv? (string->list s2) (list #\" #\\ #\x #\8 #\; #\space #\0 #\@ #\")))) + + (pass-if "display R6RS string escapes" + (string=? + (with-read-options '(r6rs-hex-escapes) + (lambda () + (let ((pt (open-output-string)) + (s1 (apply string (map integer->char + '(#xFF #x100 #xFFF #x1000 #xFFFF #x10000))))) + (set-port-encoding! pt "ASCII") + (set-port-conversion-strategy! pt 'escape) + (display s1 pt) + (get-output-string pt)))) + "\\xff;\\x100;\\xfff;\\x1000;\\xffff;\\x10000;")) + (pass-if "one-digit hex escape" (eqv? (with-read-options '(r6rs-hex-escapes) (lambda () |