diff options
author | Michael Gran <spk121@yahoo.com> | 2009-12-27 18:53:52 -0800 |
---|---|---|
committer | Michael Gran <spk121@yahoo.com> | 2009-12-27 18:53:52 -0800 |
commit | cff6adf899dbc0336c7c017d52504f8138c89b3d (patch) | |
tree | a845bf05c3e42da296f48eeb7616715b9702be0f | |
parent | 0ca3a342d19ec89b8ae6bba0a74f0f9ecc5cf7c2 (diff) | |
download | guile-r6rs-strings.tar.gz |
Reader option to parse strings and chars using R6RS rulesr6rs-strings
Add the capability to use some of R6RS's string and character syntax,
e.g. #\xFFF and "\xFFF;" This is enabled using
(read-enable 'strict-r6rs-strings).
* libguile/print.c (PRINT_CHAR_ESCAPE): new macro
(iprin1): use new macro PRINT_CHAR_ESCAPE and add capability to print
strings according to R6RS
* libguile/chars.c (scm_r6rs_charnames, scm_r6rs_charnums, SCM_N_R6RS_CHARNAMES):
new constants of character names
(scm_i_charname): output R6RS charnames
(scm_i_charname_to_char): check for R6RS charnames
* libguile/private-options.h (SCM_R6RS_OPTIONS_P): new define
* libguile/read.c (scm_read_opts): add new option 'strict-r6rs-strings'
(SCM_READ_HEX_ESCAPE): new codeblock to parse character hex escapes
(SCM_READ_SPACE_LINE_SPACE): new codeblock to parse string backslashes
(scm_read_string): read R6RS string escapes
* test-suite/tests/chars.test: tests for R6RS character escapes
* test-suite/tests/strings.test: test for R6RS string escapes
-rw-r--r-- | libguile/chars.c | 94 | ||||
-rw-r--r-- | libguile/print.c | 103 | ||||
-rw-r--r-- | libguile/private-options.h | 8 | ||||
-rw-r--r-- | libguile/read.c | 231 | ||||
-rw-r--r-- | test-suite/tests/chars.test | 62 | ||||
-rw-r--r-- | test-suite/tests/strings.test | 64 |
6 files changed, 415 insertions, 147 deletions
diff --git a/libguile/chars.c b/libguile/chars.c index 16a2b90d6..2322db858 100644 --- a/libguile/chars.c +++ b/libguile/chars.c @@ -32,7 +32,7 @@ #include "libguile/chars.h" #include "libguile/srfi-14.h" - +#include "libguile/private-options.h" SCM_DEFINE (scm_char_p, "char?", 1, 0, 0, @@ -561,6 +561,18 @@ static const scm_t_uint32 const scm_alt_charnums[] = { #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *)) +static const char *const scm_r6rs_charnames[] = { + "nul", "alarm", "backspace", "tab", "linefeed", "newline", + "vtab", "page", "return", "esc", "space", "delete" +}; + +static const scm_t_uint32 const scm_r6rs_charnums[] = { + 0x00, 0x07, 0x08, 0x09, 0x0A, 0x0A, + 0x0B, 0x0C, 0x0D, 0x1B, 0x20, 0x7F +}; + +#define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *)) + /* Returns the string charname for a character if it exists, or NULL otherwise. */ const char * @@ -569,18 +581,25 @@ scm_i_charname (SCM chr) size_t c; scm_t_uint32 i = SCM_CHAR (chr); - for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++) - if (scm_r5rs_charnums[c] == i) - return scm_r5rs_charnames[c]; - - for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) - if (scm_C0_control_charnums[c] == i) - return scm_C0_control_charnames[c]; - - for (c = 0; c < SCM_N_ALT_CHARNAMES; c++) - if (scm_alt_charnums[c] == i) - return scm_alt_charnames[i]; - + if (!SCM_R6RS_STRINGS_P) + { + for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++) + if (scm_r5rs_charnums[c] == i) + return scm_r5rs_charnames[c]; + + for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) + if (scm_C0_control_charnums[c] == i) + return scm_C0_control_charnames[c]; + + for (c = 0; c < SCM_N_ALT_CHARNAMES; c++) + if (scm_alt_charnums[c] == i) + return scm_alt_charnames[i]; + } + else + for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++) + if (scm_r6rs_charnums[c] == i) + return scm_r6rs_charnames[c]; + return NULL; } @@ -590,25 +609,36 @@ scm_i_charname_to_char (const char *charname, size_t charname_len) { size_t c; - /* The R5RS charnames. These are supposed to be case - insensitive. */ - for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++) - if ((strlen (scm_r5rs_charnames[c]) == charname_len) - && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len))) - return SCM_MAKE_CHAR (scm_r5rs_charnums[c]); - - /* Then come the controls. These are not case sensitive. */ - for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) - if ((strlen (scm_C0_control_charnames[c]) == charname_len) - && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len))) - return SCM_MAKE_CHAR (scm_C0_control_charnums[c]); - - /* Lastly are some old names carried over for compatibility. */ - for (c = 0; c < SCM_N_ALT_CHARNAMES; c++) - if ((strlen (scm_alt_charnames[c]) == charname_len) - && (!strncasecmp (scm_alt_charnames[c], charname, charname_len))) - return SCM_MAKE_CHAR (scm_alt_charnums[c]); - + if (!SCM_R6RS_STRINGS_P) + { + /* The R5RS charnames. These are supposed to be case + insensitive. */ + for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++) + if ((strlen (scm_r5rs_charnames[c]) == charname_len) + && (!strncasecmp (scm_r5rs_charnames[c], charname, + charname_len))) + return SCM_MAKE_CHAR (scm_r5rs_charnums[c]); + + /* Then come the controls. These are not case sensitive. */ + for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) + if ((strlen (scm_C0_control_charnames[c]) == charname_len) + && (!strncasecmp (scm_C0_control_charnames[c], charname, + charname_len))) + return SCM_MAKE_CHAR (scm_C0_control_charnums[c]); + + /* Lastly are some old names carried over for compatibility. */ + for (c = 0; c < SCM_N_ALT_CHARNAMES; c++) + if ((strlen (scm_alt_charnames[c]) == charname_len) + && (!strncasecmp (scm_alt_charnames[c], charname, charname_len))) + return SCM_MAKE_CHAR (scm_alt_charnums[c]); + } + else + /* The strict R6RS charnames. These are case sensitive. */ + for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++) + if ((strlen (scm_r6rs_charnames[c]) == charname_len) + && (!strncmp (scm_r6rs_charnames[c], charname, charname_len))) + return SCM_MAKE_CHAR (scm_r6rs_charnums[c]); + return SCM_BOOL_F; } diff --git a/libguile/print.c b/libguile/print.c index d50df2d24..034aa12d1 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -408,6 +408,21 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display, g_display); static void iprin1 (SCM exp, SCM port, scm_print_state *pstate); +/* Character is graphic but unrepresentable in this port's encoding. */ +#define PRINT_CHAR_ESCAPE(i, port) \ + do \ + { \ + if (!SCM_R6RS_STRINGS_P) \ + scm_intprint (i, 8, port); \ + else \ + { \ + scm_puts ("x", port); \ + scm_intprint (i, 16, port); \ + } \ + } \ + while (0) + + void scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate) { @@ -487,7 +502,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) else /* Character is graphic but unrepresentable in this port's encoding. */ - scm_intprint (i, 8, port); + PRINT_CHAR_ESCAPE(i, port); } else { @@ -506,12 +521,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) else /* Character is graphic but unrepresentable in this port's encoding. */ - scm_intprint (i, 8, port); + PRINT_CHAR_ESCAPE (i, port); } } else /* Character is a non-graphical character. */ - scm_intprint (i, 8, port); + PRINT_CHAR_ESCAPE (i, port); } else scm_i_charprint (i, port); @@ -578,9 +593,9 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc7_string: if (SCM_WRITINGP (pstate)) { - size_t i, j, len; + size_t i, len; static char const hex[] = "0123456789abcdef"; - char buf[8]; + char buf[9]; scm_putc ('"', port); @@ -646,37 +661,59 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) { /* Character is graphic but unrepresentable in this port's encoding or is not graphic. */ - if (ch <= 0xFF) - { - buf[0] = '\\'; - buf[1] = 'x'; - buf[2] = hex[ch / 16]; - buf[3] = hex[ch % 16]; - scm_lfwrite (buf, 4, port); - } - else if (ch <= 0xFFFF) + if (!SCM_R6RS_STRINGS_P) { - buf[0] = '\\'; - buf[1] = 'u'; - buf[2] = hex[(ch & 0xF000) >> 12]; - buf[3] = hex[(ch & 0xF00) >> 8]; - buf[4] = hex[(ch & 0xF0) >> 4]; - buf[5] = hex[(ch & 0xF)]; - scm_lfwrite (buf, 6, port); - j = i + 1; + if (ch <= 0xFF) + { + buf[0] = '\\'; + buf[1] = 'x'; + buf[2] = hex[ch / 16]; + buf[3] = hex[ch % 16]; + scm_lfwrite (buf, 4, port); + } + else if (ch <= 0xFFFF) + { + buf[0] = '\\'; + buf[1] = 'u'; + buf[2] = hex[(ch & 0xF000) >> 12]; + buf[3] = hex[(ch & 0xF00) >> 8]; + buf[4] = hex[(ch & 0xF0) >> 4]; + buf[5] = hex[(ch & 0xF)]; + scm_lfwrite (buf, 6, port); + } + else if (ch > 0xFFFF) + { + buf[0] = '\\'; + buf[1] = 'U'; + buf[2] = hex[(ch & 0xF00000) >> 20]; + buf[3] = hex[(ch & 0xF0000) >> 16]; + buf[4] = hex[(ch & 0xF000) >> 12]; + buf[5] = hex[(ch & 0xF00) >> 8]; + buf[6] = hex[(ch & 0xF0) >> 4]; + buf[7] = hex[(ch & 0xF)]; + scm_lfwrite (buf, 8, port); + } } - else if (ch > 0xFFFF) + else { - buf[0] = '\\'; - buf[1] = 'U'; - buf[2] = hex[(ch & 0xF00000) >> 20]; - buf[3] = hex[(ch & 0xF0000) >> 16]; - buf[4] = hex[(ch & 0xF000) >> 12]; - buf[5] = hex[(ch & 0xF00) >> 8]; - buf[6] = hex[(ch & 0xF0) >> 4]; - buf[7] = hex[(ch & 0xF)]; - scm_lfwrite (buf, 8, port); - j = i + 1; + scm_t_wchar ch2 = ch; + + int i = 8; + buf[i] = ';'; + i --; + if (ch == 0) + buf[i--] = '0'; + else + while (ch2 > 0) + { + buf[i] = hex[ch2 & 0xF]; + ch2 >>= 4; + i --; + } + buf[i] = 'x'; + i --; + buf[i] = '\\'; + scm_lfwrite (buf + i, 9 - i, port); } } } diff --git a/libguile/private-options.h b/libguile/private-options.h index 703ca8a5b..35fa2e38e 100644 --- a/libguile/private-options.h +++ b/libguile/private-options.h @@ -94,9 +94,13 @@ SCM_API scm_t_option scm_read_opts[]; #if SCM_ENABLE_ELISP #define SCM_ELISP_VECTORS_P scm_read_opts[4].val #define SCM_ESCAPED_PARENS_P scm_read_opts[5].val -#define SCM_N_READ_OPTIONS 6 +#endif +#define SCM_R6RS_STRINGS_P scm_read_opts[6].val + +#if SCM_ENABLE_ELISP +#define SCM_N_READ_OPTIONS 7 #else -#define SCM_N_READ_OPTIONS 4 +#define SCM_N_READ_OPTIONS 5 #endif #endif /* PRIVATE_OPTIONS */ diff --git a/libguile/read.c b/libguile/read.c index da4a17433..0881e4fa6 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -29,6 +29,7 @@ #include <string.h> #include <unistd.h> #include <unicase.h> +#include <unictype.h> #include "libguile/_scm.h" #include "libguile/bytevectors.h" @@ -76,6 +77,8 @@ scm_t_option scm_read_opts[] = { { SCM_OPTION_BOOLEAN, "elisp-strings", 0, "Support `\\(' and `\\)' in strings."}, #endif + { SCM_OPTION_BOOLEAN, "strict-r6rs-strings", 0, + "Strictly follow R6RS rules for strings"}, { 0, }, }; @@ -412,6 +415,87 @@ scm_read_sexp (scm_t_wchar chr, SCM port) } #undef FUNC_NAME + +#define SCM_READ_HEX_ESCAPE(ndigits, terminator) \ + do \ + { \ + scm_t_wchar a; \ + size_t i = 0; \ + c = 0; \ + while (1) \ + { \ + if (i == ndigits) \ + break; \ + a = scm_getc (port); \ + if (a == EOF) \ + goto str_eof; \ + if (terminator \ + && (a == (scm_t_wchar) terminator) \ + && (i > 0)) \ + break; \ + if ('0' <= a && a <= '9') \ + a -= '0'; \ + else if ('A' <= a && a <= 'F') \ + a = a - 'A' + 10; \ + else if ('a' <= a && a <= 'f') \ + a = a - 'a' + 10; \ + else \ + { \ + c = a; \ + goto bad_escaped; \ + } \ + c = c * 16 + a; \ + i ++; \ + } \ + } while (0) + +#define SCM_READ_SPACE_LINE_SPACE \ + do \ + { \ + int line_ending_started = 0; \ + int line_ending_complete = 0; \ + while (1) \ + { \ + c = scm_getc (port); \ + if (c == EOF) \ + goto str_eof; \ + if (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR)) \ + { \ + if (line_ending_started) \ + line_ending_complete = 1; \ + continue; \ + } \ + else if (c == '\r') \ + { \ + if (line_ending_started) \ + { \ + line_ending_complete = 1; \ + break; \ + } \ + line_ending_started = 1; \ + continue; \ + } \ + else if ((c == '\n') \ + || (c == 0x0085) /* NEL */ \ + || (c == 0x2028)) /* LS */ \ + { \ + if (line_ending_complete) \ + break; \ + line_ending_started = 1; \ + line_ending_complete = 1; \ + continue; \ + } \ + else \ + { \ + if (!line_ending_started) \ + goto bad_escaped; \ + break; \ + } \ + } \ + } while (0) + + + static SCM scm_read_string (int chr, SCM port) #define FUNC_NAME "scm_lreadr" @@ -459,8 +543,11 @@ scm_read_string (int chr, SCM port) case '\n': continue; case '0': - c = '\0'; - break; + if (!SCM_R6RS_STRINGS_P) + { + c = '\0'; + break; + } case 'f': c = '\f'; break; @@ -479,90 +566,56 @@ scm_read_string (int chr, SCM port) case 'v': c = '\v'; break; + case 'b': + if (SCM_R6RS_STRINGS_P) + { + c = '\010'; + break; + } + case ' ': + case '\t': + case 0x00A0: /* no-break space */ + case 0x1680: /* ogham space */ + case 0x180E: /* mongolian vowel separator */ + case 0x2000: /* en quad */ + case 0x2001: /* em quad */ + case 0x2002: /* en space */ + case 0x2003: /* em space */ + case 0x2004: /* three-per-em space */ + case 0x2005: /* four-per-em space */ + case 0x2006: /* six-per-em space */ + case 0x2007: /* figure space */ + case 0x2008: /* punctuation space */ + case 0x2009: /* thin space */ + case 0x200A: /* hair space */ + case 0x202F: /* narrow no-break space */ + case 0x205F: /* medium mathematical space */ + case 0x3000: /* ideographic space */ + if (SCM_R6RS_STRINGS_P) + { + SCM_READ_SPACE_LINE_SPACE; + break; + } case 'x': { - scm_t_wchar a, b; - a = scm_getc (port); - if (a == EOF) - goto str_eof; - b = scm_getc (port); - if (b == EOF) - goto str_eof; - if ('0' <= a && a <= '9') - a -= '0'; - else if ('A' <= a && a <= 'F') - a = a - 'A' + 10; - else if ('a' <= a && a <= 'f') - a = a - 'a' + 10; - else - { - c = a; - goto bad_escaped; - } - if ('0' <= b && b <= '9') - b -= '0'; - else if ('A' <= b && b <= 'F') - b = b - 'A' + 10; - else if ('a' <= b && b <= 'f') - b = b - 'a' + 10; + if (SCM_R6RS_STRINGS_P) + SCM_READ_HEX_ESCAPE (10, ';'); else - { - c = b; - goto bad_escaped; - } - c = a * 16 + b; + SCM_READ_HEX_ESCAPE (2, '\0'); break; } case 'u': - { - scm_t_wchar a; - int i; - c = 0; - for (i = 0; i < 4; i++) - { - a = scm_getc (port); - if (a == EOF) - goto str_eof; - if ('0' <= a && a <= '9') - a -= '0'; - else if ('A' <= a && a <= 'F') - a = a - 'A' + 10; - else if ('a' <= a && a <= 'f') - a = a - 'a' + 10; - else - { - c = a; - goto bad_escaped; - } - c = c * 16 + a; - } - break; - } + if (!SCM_R6RS_STRINGS_P) + { + SCM_READ_HEX_ESCAPE (4, '\0'); + break; + } case 'U': - { - scm_t_wchar a; - int i; - c = 0; - for (i = 0; i < 6; i++) - { - a = scm_getc (port); - if (a == EOF) - goto str_eof; - if ('0' <= a && a <= '9') - a -= '0'; - else if ('A' <= a && a <= 'F') - a = a - 'A' + 10; - else if ('a' <= a && a <= 'f') - a = a - 'a' + 10; - else - { - c = a; - goto bad_escaped; - } - c = c * 16 + a; - } - break; - } + if (!SCM_R6RS_STRINGS_P) + { + SCM_READ_HEX_ESCAPE (6, '\0'); + break; + } default: bad_escaped: scm_i_input_error (FUNC_NAME, port, @@ -854,6 +907,26 @@ scm_read_character (scm_t_wchar chr, SCM port) if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2) return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1)); + if (cp == 'x' && (charname_len > 1) && SCM_R6RS_STRINGS_P) + { + SCM p; + scm_t_wchar chr; + + /* Convert from hex, skipping the initial 'x' character in CHARNAME */ + p = scm_string_to_number (scm_c_substring (charname, 1, charname_len), + scm_from_uint (16)); + if (SCM_I_INUMP (p)) + { + scm_t_wchar c = SCM_I_INUM (p); + if (SCM_IS_UNICODE_CHAR (c)) + return SCM_MAKE_CHAR (c); + else + scm_i_input_error (FUNC_NAME, port, + "out-of-range hex character escape: ~a", + scm_list_1 (charname)); + } + } + if (cp >= '0' && cp < '8') { /* Dirk:FIXME:: This type of character syntax is not R5RS diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test index cd1572feb..90114992f 100644 --- a/test-suite/tests/chars.test +++ b/test-suite/tests/chars.test @@ -295,3 +295,65 @@ (with-output-to-string (lambda () (write #\soh))) "#\\soh")))) +(with-test-prefix "r6rs char handling" + + (read-enable 'strict-r6rs-strings) + + (pass-if "#\\a" + (eqv? (read (open-input-string "#\\a")) (integer->char #x61))) + + (pass-if "#\\A" + (eqv? (with-input-from-string "#\\A" read) (integer->char #x41))) + + (pass-if "#\\(" + (eqv? (with-input-from-string "#\\(" read) (integer->char #x28))) + + (pass-if "#\\(space)" + (eqv? (with-input-from-string "#\\ " read) (integer->char #x20))) + + (pass-if "R6RS character names" + (and + (eqv? (with-input-from-string "#\\nul" read) (integer->char #x00)) + (eqv? (with-input-from-string "#\\alarm" read) (integer->char #x07)) + (eqv? (with-input-from-string "#\\backspace" read) (integer->char #x08)) + (eqv? (with-input-from-string "#\\tab" read) (integer->char #x09)) + (eqv? (with-input-from-string "#\\linefeed" read) (integer->char #x0A)) + (eqv? (with-input-from-string "#\\newline" read) (integer->char #x0A)) + (eqv? (with-input-from-string "#\\vtab" read) (integer->char #x0B)) + (eqv? (with-input-from-string "#\\page" read) (integer->char #x0C)) + (eqv? (with-input-from-string "#\\return" read) (integer->char #x0D)) + (eqv? (with-input-from-string "#\\esc" read) (integer->char #x1B)) + (eqv? (with-input-from-string "#\\space" read) (integer->char #x20)) + (eqv? (with-input-from-string "#\\delete" read) (integer->char #x7F)))) + + (pass-if-exception "R6RS bad charname" exception:unknown-character-name + (with-input-from-string "#\\blammo" read)) + + (pass-if-exception "R6RS charnames are case sensitive" + exception:unknown-character-name + (with-input-from-string "#\\Backspace" read)) + + (pass-if "one-digit hex escape" + (eqv? (with-input-from-string "#\\xA" read) (integer->char #x0A))) + + (pass-if "two-digit hex escape" + (eqv? (with-input-from-string "#\\xFF" read) (integer->char #xFF))) + + (pass-if "four-digit hex escape" + (eqv? (with-input-from-string "#\\x0001" read) (integer->char #x01))) + + (pass-if "eight-digit hex escape" + (eqv? (with-input-from-string "#\\x00006587" read) (integer->char #x6587))) + + (pass-if "write R6RS character names" + (string=? + (with-output-to-string (lambda () (write (integer->char #x07)))) + "#\\alarm")) + + (pass-if "write R6RS escapes" + (string=? + (with-output-to-string (lambda () (write (integer->char #x80)))) + "#\\x80")) + + (read-disable 'strict-r6rs-strings)) + diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test index 013c1a863..868ee1c88 100644 --- a/test-suite/tests/strings.test +++ b/test-suite/tests/strings.test @@ -18,7 +18,8 @@ ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA (define-module (test-strings) - #:use-module (test-suite lib)) + #:use-module (test-suite lib) + #:use-module (srfi srfi-1)) (define exception:read-only-string (cons 'misc-error "^string is read-only")) @@ -519,3 +520,64 @@ (string-upcase! (substring/shared str2 1 4)) (and (string=? str1 "foofoofoo") (string=? str2 "oFOOf"))))) + +(with-test-prefix "R6RS escapes" + + (read-enable 'strict-r6rs-strings) + + (pass-if-exception "non-hex char in two-digit hex-escape" + exception:illegal-escape + (with-input-from-string "\"\\x0g;\"" read)) + + (pass-if-exception "non-hex char in four-digit hex-escape" + exception:illegal-escape + (with-input-from-string "\"\\x000g;\"" read)) + + (pass-if-exception "non-hex char in six-digit hex-escape" + exception:illegal-escape + (with-input-from-string "\"\\x00000g;\"" read)) + + (pass-if-exception "no semicolon at termination of one-digit hex-escape" + exception:illegal-escape + (with-input-from-string "\"\\x0\"" read)) + + (pass-if-exception "no semicolon at termination of three-digit hex-escape" + exception:illegal-escape + (with-input-from-string "\"\\x000\"" read)) + + (pass-if "two-digit hex escape" + (eqv? (string-ref (with-input-from-string "\"--\\xff;--\"" read) 2) + (integer->char #xff))) + + (pass-if "four-digit hex escape" + (eqv? (string-ref (with-input-from-string "\"--\\x0100;--\"" read) 2) + (integer->char #x0100))) + + (pass-if "six-digit hex escape" + (eqv? (string-ref (with-input-from-string "\"--\\x010300;--\"" read) 2) + (integer->char #x010300))) + + (pass-if "escaped characters match non-escaped ASCII characters" + (string=? "ABC" + (with-input-from-string "\"\\x41;\\x0042;\\x000043;\"" read))) + + (pass-if "R6RS backslash character escapes" + (string=? (with-input-from-string "\"\\\"\\\\\"" read) + (string #\" #\\))) + + (pass-if "R6RS backslash control escapes" + (string=? (with-input-from-string "\"\\a\\b\\t\\n\\v\\f\\r\"" read) + (apply string (map integer->char '(7 8 9 10 11 12 13))))) + + (pass-if "write R6RS escapes" + (let* ((s1 (apply string + (map integer->char '(#x8 #x20 #x30 #x40)))) + (s2 (with-output-to-string + (lambda () (write s1))))) + (lset= eqv? + (string->list s2) + (list #\" #\\ #\x #\8 #\; #\space #\0 #\@ #\")))) + + (read-disable 'strict-r6rs-strings)) + + |