summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Gran <spk121@yahoo.com>2009-12-27 18:53:52 -0800
committerMichael Gran <spk121@yahoo.com>2009-12-27 18:53:52 -0800
commitcff6adf899dbc0336c7c017d52504f8138c89b3d (patch)
treea845bf05c3e42da296f48eeb7616715b9702be0f
parent0ca3a342d19ec89b8ae6bba0a74f0f9ecc5cf7c2 (diff)
downloadguile-r6rs-strings.tar.gz
Reader option to parse strings and chars using R6RS rulesr6rs-strings
Add the capability to use some of R6RS's string and character syntax, e.g. #\xFFF and "\xFFF;" This is enabled using (read-enable 'strict-r6rs-strings). * libguile/print.c (PRINT_CHAR_ESCAPE): new macro (iprin1): use new macro PRINT_CHAR_ESCAPE and add capability to print strings according to R6RS * libguile/chars.c (scm_r6rs_charnames, scm_r6rs_charnums, SCM_N_R6RS_CHARNAMES): new constants of character names (scm_i_charname): output R6RS charnames (scm_i_charname_to_char): check for R6RS charnames * libguile/private-options.h (SCM_R6RS_OPTIONS_P): new define * libguile/read.c (scm_read_opts): add new option 'strict-r6rs-strings' (SCM_READ_HEX_ESCAPE): new codeblock to parse character hex escapes (SCM_READ_SPACE_LINE_SPACE): new codeblock to parse string backslashes (scm_read_string): read R6RS string escapes * test-suite/tests/chars.test: tests for R6RS character escapes * test-suite/tests/strings.test: test for R6RS string escapes
-rw-r--r--libguile/chars.c94
-rw-r--r--libguile/print.c103
-rw-r--r--libguile/private-options.h8
-rw-r--r--libguile/read.c231
-rw-r--r--test-suite/tests/chars.test62
-rw-r--r--test-suite/tests/strings.test64
6 files changed, 415 insertions, 147 deletions
diff --git a/libguile/chars.c b/libguile/chars.c
index 16a2b90d6..2322db858 100644
--- a/libguile/chars.c
+++ b/libguile/chars.c
@@ -32,7 +32,7 @@
#include "libguile/chars.h"
#include "libguile/srfi-14.h"
-
+#include "libguile/private-options.h"
SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
@@ -561,6 +561,18 @@ static const scm_t_uint32 const scm_alt_charnums[] = {
#define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
+static const char *const scm_r6rs_charnames[] = {
+ "nul", "alarm", "backspace", "tab", "linefeed", "newline",
+ "vtab", "page", "return", "esc", "space", "delete"
+};
+
+static const scm_t_uint32 const scm_r6rs_charnums[] = {
+ 0x00, 0x07, 0x08, 0x09, 0x0A, 0x0A,
+ 0x0B, 0x0C, 0x0D, 0x1B, 0x20, 0x7F
+};
+
+#define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
+
/* Returns the string charname for a character if it exists, or NULL
otherwise. */
const char *
@@ -569,18 +581,25 @@ scm_i_charname (SCM chr)
size_t c;
scm_t_uint32 i = SCM_CHAR (chr);
- for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
- if (scm_r5rs_charnums[c] == i)
- return scm_r5rs_charnames[c];
-
- for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
- if (scm_C0_control_charnums[c] == i)
- return scm_C0_control_charnames[c];
-
- for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
- if (scm_alt_charnums[c] == i)
- return scm_alt_charnames[i];
-
+ if (!SCM_R6RS_STRINGS_P)
+ {
+ for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
+ if (scm_r5rs_charnums[c] == i)
+ return scm_r5rs_charnames[c];
+
+ for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
+ if (scm_C0_control_charnums[c] == i)
+ return scm_C0_control_charnames[c];
+
+ for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
+ if (scm_alt_charnums[c] == i)
+ return scm_alt_charnames[i];
+ }
+ else
+ for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
+ if (scm_r6rs_charnums[c] == i)
+ return scm_r6rs_charnames[c];
+
return NULL;
}
@@ -590,25 +609,36 @@ scm_i_charname_to_char (const char *charname, size_t charname_len)
{
size_t c;
- /* The R5RS charnames. These are supposed to be case
- insensitive. */
- for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
- if ((strlen (scm_r5rs_charnames[c]) == charname_len)
- && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
- return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
-
- /* Then come the controls. These are not case sensitive. */
- for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
- if ((strlen (scm_C0_control_charnames[c]) == charname_len)
- && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
- return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
-
- /* Lastly are some old names carried over for compatibility. */
- for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
- if ((strlen (scm_alt_charnames[c]) == charname_len)
- && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
- return SCM_MAKE_CHAR (scm_alt_charnums[c]);
-
+ if (!SCM_R6RS_STRINGS_P)
+ {
+ /* The R5RS charnames. These are supposed to be case
+ insensitive. */
+ for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
+ if ((strlen (scm_r5rs_charnames[c]) == charname_len)
+ && (!strncasecmp (scm_r5rs_charnames[c], charname,
+ charname_len)))
+ return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
+
+ /* Then come the controls. These are not case sensitive. */
+ for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
+ if ((strlen (scm_C0_control_charnames[c]) == charname_len)
+ && (!strncasecmp (scm_C0_control_charnames[c], charname,
+ charname_len)))
+ return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
+
+ /* Lastly are some old names carried over for compatibility. */
+ for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
+ if ((strlen (scm_alt_charnames[c]) == charname_len)
+ && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
+ return SCM_MAKE_CHAR (scm_alt_charnums[c]);
+ }
+ else
+ /* The strict R6RS charnames. These are case sensitive. */
+ for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
+ if ((strlen (scm_r6rs_charnames[c]) == charname_len)
+ && (!strncmp (scm_r6rs_charnames[c], charname, charname_len)))
+ return SCM_MAKE_CHAR (scm_r6rs_charnums[c]);
+
return SCM_BOOL_F;
}
diff --git a/libguile/print.c b/libguile/print.c
index d50df2d24..034aa12d1 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -408,6 +408,21 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display, g_display);
static void iprin1 (SCM exp, SCM port, scm_print_state *pstate);
+/* Character is graphic but unrepresentable in this port's encoding. */
+#define PRINT_CHAR_ESCAPE(i, port) \
+ do \
+ { \
+ if (!SCM_R6RS_STRINGS_P) \
+ scm_intprint (i, 8, port); \
+ else \
+ { \
+ scm_puts ("x", port); \
+ scm_intprint (i, 16, port); \
+ } \
+ } \
+ while (0)
+
+
void
scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate)
{
@@ -487,7 +502,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
else
/* Character is graphic but unrepresentable in
this port's encoding. */
- scm_intprint (i, 8, port);
+ PRINT_CHAR_ESCAPE(i, port);
}
else
{
@@ -506,12 +521,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
else
/* Character is graphic but unrepresentable in
this port's encoding. */
- scm_intprint (i, 8, port);
+ PRINT_CHAR_ESCAPE (i, port);
}
}
else
/* Character is a non-graphical character. */
- scm_intprint (i, 8, port);
+ PRINT_CHAR_ESCAPE (i, port);
}
else
scm_i_charprint (i, port);
@@ -578,9 +593,9 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
case scm_tc7_string:
if (SCM_WRITINGP (pstate))
{
- size_t i, j, len;
+ size_t i, len;
static char const hex[] = "0123456789abcdef";
- char buf[8];
+ char buf[9];
scm_putc ('"', port);
@@ -646,37 +661,59 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
{
/* Character is graphic but unrepresentable in
this port's encoding or is not graphic. */
- if (ch <= 0xFF)
- {
- buf[0] = '\\';
- buf[1] = 'x';
- buf[2] = hex[ch / 16];
- buf[3] = hex[ch % 16];
- scm_lfwrite (buf, 4, port);
- }
- else if (ch <= 0xFFFF)
+ if (!SCM_R6RS_STRINGS_P)
{
- buf[0] = '\\';
- buf[1] = 'u';
- buf[2] = hex[(ch & 0xF000) >> 12];
- buf[3] = hex[(ch & 0xF00) >> 8];
- buf[4] = hex[(ch & 0xF0) >> 4];
- buf[5] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 6, port);
- j = i + 1;
+ if (ch <= 0xFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[2] = hex[ch / 16];
+ buf[3] = hex[ch % 16];
+ scm_lfwrite (buf, 4, port);
+ }
+ else if (ch <= 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'u';
+ buf[2] = hex[(ch & 0xF000) >> 12];
+ buf[3] = hex[(ch & 0xF00) >> 8];
+ buf[4] = hex[(ch & 0xF0) >> 4];
+ buf[5] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 6, port);
+ }
+ else if (ch > 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'U';
+ buf[2] = hex[(ch & 0xF00000) >> 20];
+ buf[3] = hex[(ch & 0xF0000) >> 16];
+ buf[4] = hex[(ch & 0xF000) >> 12];
+ buf[5] = hex[(ch & 0xF00) >> 8];
+ buf[6] = hex[(ch & 0xF0) >> 4];
+ buf[7] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 8, port);
+ }
}
- else if (ch > 0xFFFF)
+ else
{
- buf[0] = '\\';
- buf[1] = 'U';
- buf[2] = hex[(ch & 0xF00000) >> 20];
- buf[3] = hex[(ch & 0xF0000) >> 16];
- buf[4] = hex[(ch & 0xF000) >> 12];
- buf[5] = hex[(ch & 0xF00) >> 8];
- buf[6] = hex[(ch & 0xF0) >> 4];
- buf[7] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 8, port);
- j = i + 1;
+ scm_t_wchar ch2 = ch;
+
+ int i = 8;
+ buf[i] = ';';
+ i --;
+ if (ch == 0)
+ buf[i--] = '0';
+ else
+ while (ch2 > 0)
+ {
+ buf[i] = hex[ch2 & 0xF];
+ ch2 >>= 4;
+ i --;
+ }
+ buf[i] = 'x';
+ i --;
+ buf[i] = '\\';
+ scm_lfwrite (buf + i, 9 - i, port);
}
}
}
diff --git a/libguile/private-options.h b/libguile/private-options.h
index 703ca8a5b..35fa2e38e 100644
--- a/libguile/private-options.h
+++ b/libguile/private-options.h
@@ -94,9 +94,13 @@ SCM_API scm_t_option scm_read_opts[];
#if SCM_ENABLE_ELISP
#define SCM_ELISP_VECTORS_P scm_read_opts[4].val
#define SCM_ESCAPED_PARENS_P scm_read_opts[5].val
-#define SCM_N_READ_OPTIONS 6
+#endif
+#define SCM_R6RS_STRINGS_P scm_read_opts[6].val
+
+#if SCM_ENABLE_ELISP
+#define SCM_N_READ_OPTIONS 7
#else
-#define SCM_N_READ_OPTIONS 4
+#define SCM_N_READ_OPTIONS 5
#endif
#endif /* PRIVATE_OPTIONS */
diff --git a/libguile/read.c b/libguile/read.c
index da4a17433..0881e4fa6 100644
--- a/libguile/read.c
+++ b/libguile/read.c
@@ -29,6 +29,7 @@
#include <string.h>
#include <unistd.h>
#include <unicase.h>
+#include <unictype.h>
#include "libguile/_scm.h"
#include "libguile/bytevectors.h"
@@ -76,6 +77,8 @@ scm_t_option scm_read_opts[] = {
{ SCM_OPTION_BOOLEAN, "elisp-strings", 0,
"Support `\\(' and `\\)' in strings."},
#endif
+ { SCM_OPTION_BOOLEAN, "strict-r6rs-strings", 0,
+ "Strictly follow R6RS rules for strings"},
{ 0, },
};
@@ -412,6 +415,87 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
}
#undef FUNC_NAME
+
+#define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
+ do \
+ { \
+ scm_t_wchar a; \
+ size_t i = 0; \
+ c = 0; \
+ while (1) \
+ { \
+ if (i == ndigits) \
+ break; \
+ a = scm_getc (port); \
+ if (a == EOF) \
+ goto str_eof; \
+ if (terminator \
+ && (a == (scm_t_wchar) terminator) \
+ && (i > 0)) \
+ break; \
+ if ('0' <= a && a <= '9') \
+ a -= '0'; \
+ else if ('A' <= a && a <= 'F') \
+ a = a - 'A' + 10; \
+ else if ('a' <= a && a <= 'f') \
+ a = a - 'a' + 10; \
+ else \
+ { \
+ c = a; \
+ goto bad_escaped; \
+ } \
+ c = c * 16 + a; \
+ i ++; \
+ } \
+ } while (0)
+
+#define SCM_READ_SPACE_LINE_SPACE \
+ do \
+ { \
+ int line_ending_started = 0; \
+ int line_ending_complete = 0; \
+ while (1) \
+ { \
+ c = scm_getc (port); \
+ if (c == EOF) \
+ goto str_eof; \
+ if (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR)) \
+ { \
+ if (line_ending_started) \
+ line_ending_complete = 1; \
+ continue; \
+ } \
+ else if (c == '\r') \
+ { \
+ if (line_ending_started) \
+ { \
+ line_ending_complete = 1; \
+ break; \
+ } \
+ line_ending_started = 1; \
+ continue; \
+ } \
+ else if ((c == '\n') \
+ || (c == 0x0085) /* NEL */ \
+ || (c == 0x2028)) /* LS */ \
+ { \
+ if (line_ending_complete) \
+ break; \
+ line_ending_started = 1; \
+ line_ending_complete = 1; \
+ continue; \
+ } \
+ else \
+ { \
+ if (!line_ending_started) \
+ goto bad_escaped; \
+ break; \
+ } \
+ } \
+ } while (0)
+
+
+
static SCM
scm_read_string (int chr, SCM port)
#define FUNC_NAME "scm_lreadr"
@@ -459,8 +543,11 @@ scm_read_string (int chr, SCM port)
case '\n':
continue;
case '0':
- c = '\0';
- break;
+ if (!SCM_R6RS_STRINGS_P)
+ {
+ c = '\0';
+ break;
+ }
case 'f':
c = '\f';
break;
@@ -479,90 +566,56 @@ scm_read_string (int chr, SCM port)
case 'v':
c = '\v';
break;
+ case 'b':
+ if (SCM_R6RS_STRINGS_P)
+ {
+ c = '\010';
+ break;
+ }
+ case ' ':
+ case '\t':
+ case 0x00A0: /* no-break space */
+ case 0x1680: /* ogham space */
+ case 0x180E: /* mongolian vowel separator */
+ case 0x2000: /* en quad */
+ case 0x2001: /* em quad */
+ case 0x2002: /* en space */
+ case 0x2003: /* em space */
+ case 0x2004: /* three-per-em space */
+ case 0x2005: /* four-per-em space */
+ case 0x2006: /* six-per-em space */
+ case 0x2007: /* figure space */
+ case 0x2008: /* punctuation space */
+ case 0x2009: /* thin space */
+ case 0x200A: /* hair space */
+ case 0x202F: /* narrow no-break space */
+ case 0x205F: /* medium mathematical space */
+ case 0x3000: /* ideographic space */
+ if (SCM_R6RS_STRINGS_P)
+ {
+ SCM_READ_SPACE_LINE_SPACE;
+ break;
+ }
case 'x':
{
- scm_t_wchar a, b;
- a = scm_getc (port);
- if (a == EOF)
- goto str_eof;
- b = scm_getc (port);
- if (b == EOF)
- goto str_eof;
- if ('0' <= a && a <= '9')
- a -= '0';
- else if ('A' <= a && a <= 'F')
- a = a - 'A' + 10;
- else if ('a' <= a && a <= 'f')
- a = a - 'a' + 10;
- else
- {
- c = a;
- goto bad_escaped;
- }
- if ('0' <= b && b <= '9')
- b -= '0';
- else if ('A' <= b && b <= 'F')
- b = b - 'A' + 10;
- else if ('a' <= b && b <= 'f')
- b = b - 'a' + 10;
+ if (SCM_R6RS_STRINGS_P)
+ SCM_READ_HEX_ESCAPE (10, ';');
else
- {
- c = b;
- goto bad_escaped;
- }
- c = a * 16 + b;
+ SCM_READ_HEX_ESCAPE (2, '\0');
break;
}
case 'u':
- {
- scm_t_wchar a;
- int i;
- c = 0;
- for (i = 0; i < 4; i++)
- {
- a = scm_getc (port);
- if (a == EOF)
- goto str_eof;
- if ('0' <= a && a <= '9')
- a -= '0';
- else if ('A' <= a && a <= 'F')
- a = a - 'A' + 10;
- else if ('a' <= a && a <= 'f')
- a = a - 'a' + 10;
- else
- {
- c = a;
- goto bad_escaped;
- }
- c = c * 16 + a;
- }
- break;
- }
+ if (!SCM_R6RS_STRINGS_P)
+ {
+ SCM_READ_HEX_ESCAPE (4, '\0');
+ break;
+ }
case 'U':
- {
- scm_t_wchar a;
- int i;
- c = 0;
- for (i = 0; i < 6; i++)
- {
- a = scm_getc (port);
- if (a == EOF)
- goto str_eof;
- if ('0' <= a && a <= '9')
- a -= '0';
- else if ('A' <= a && a <= 'F')
- a = a - 'A' + 10;
- else if ('a' <= a && a <= 'f')
- a = a - 'a' + 10;
- else
- {
- c = a;
- goto bad_escaped;
- }
- c = c * 16 + a;
- }
- break;
- }
+ if (!SCM_R6RS_STRINGS_P)
+ {
+ SCM_READ_HEX_ESCAPE (6, '\0');
+ break;
+ }
default:
bad_escaped:
scm_i_input_error (FUNC_NAME, port,
@@ -854,6 +907,26 @@ scm_read_character (scm_t_wchar chr, SCM port)
if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
+ if (cp == 'x' && (charname_len > 1) && SCM_R6RS_STRINGS_P)
+ {
+ SCM p;
+ scm_t_wchar chr;
+
+ /* Convert from hex, skipping the initial 'x' character in CHARNAME */
+ p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
+ scm_from_uint (16));
+ if (SCM_I_INUMP (p))
+ {
+ scm_t_wchar c = SCM_I_INUM (p);
+ if (SCM_IS_UNICODE_CHAR (c))
+ return SCM_MAKE_CHAR (c);
+ else
+ scm_i_input_error (FUNC_NAME, port,
+ "out-of-range hex character escape: ~a",
+ scm_list_1 (charname));
+ }
+ }
+
if (cp >= '0' && cp < '8')
{
/* Dirk:FIXME:: This type of character syntax is not R5RS
diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test
index cd1572feb..90114992f 100644
--- a/test-suite/tests/chars.test
+++ b/test-suite/tests/chars.test
@@ -295,3 +295,65 @@
(with-output-to-string (lambda () (write #\soh)))
"#\\soh"))))
+(with-test-prefix "r6rs char handling"
+
+ (read-enable 'strict-r6rs-strings)
+
+ (pass-if "#\\a"
+ (eqv? (read (open-input-string "#\\a")) (integer->char #x61)))
+
+ (pass-if "#\\A"
+ (eqv? (with-input-from-string "#\\A" read) (integer->char #x41)))
+
+ (pass-if "#\\("
+ (eqv? (with-input-from-string "#\\(" read) (integer->char #x28)))
+
+ (pass-if "#\\(space)"
+ (eqv? (with-input-from-string "#\\ " read) (integer->char #x20)))
+
+ (pass-if "R6RS character names"
+ (and
+ (eqv? (with-input-from-string "#\\nul" read) (integer->char #x00))
+ (eqv? (with-input-from-string "#\\alarm" read) (integer->char #x07))
+ (eqv? (with-input-from-string "#\\backspace" read) (integer->char #x08))
+ (eqv? (with-input-from-string "#\\tab" read) (integer->char #x09))
+ (eqv? (with-input-from-string "#\\linefeed" read) (integer->char #x0A))
+ (eqv? (with-input-from-string "#\\newline" read) (integer->char #x0A))
+ (eqv? (with-input-from-string "#\\vtab" read) (integer->char #x0B))
+ (eqv? (with-input-from-string "#\\page" read) (integer->char #x0C))
+ (eqv? (with-input-from-string "#\\return" read) (integer->char #x0D))
+ (eqv? (with-input-from-string "#\\esc" read) (integer->char #x1B))
+ (eqv? (with-input-from-string "#\\space" read) (integer->char #x20))
+ (eqv? (with-input-from-string "#\\delete" read) (integer->char #x7F))))
+
+ (pass-if-exception "R6RS bad charname" exception:unknown-character-name
+ (with-input-from-string "#\\blammo" read))
+
+ (pass-if-exception "R6RS charnames are case sensitive"
+ exception:unknown-character-name
+ (with-input-from-string "#\\Backspace" read))
+
+ (pass-if "one-digit hex escape"
+ (eqv? (with-input-from-string "#\\xA" read) (integer->char #x0A)))
+
+ (pass-if "two-digit hex escape"
+ (eqv? (with-input-from-string "#\\xFF" read) (integer->char #xFF)))
+
+ (pass-if "four-digit hex escape"
+ (eqv? (with-input-from-string "#\\x0001" read) (integer->char #x01)))
+
+ (pass-if "eight-digit hex escape"
+ (eqv? (with-input-from-string "#\\x00006587" read) (integer->char #x6587)))
+
+ (pass-if "write R6RS character names"
+ (string=?
+ (with-output-to-string (lambda () (write (integer->char #x07))))
+ "#\\alarm"))
+
+ (pass-if "write R6RS escapes"
+ (string=?
+ (with-output-to-string (lambda () (write (integer->char #x80))))
+ "#\\x80"))
+
+ (read-disable 'strict-r6rs-strings))
+
diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test
index 013c1a863..868ee1c88 100644
--- a/test-suite/tests/strings.test
+++ b/test-suite/tests/strings.test
@@ -18,7 +18,8 @@
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
(define-module (test-strings)
- #:use-module (test-suite lib))
+ #:use-module (test-suite lib)
+ #:use-module (srfi srfi-1))
(define exception:read-only-string
(cons 'misc-error "^string is read-only"))
@@ -519,3 +520,64 @@
(string-upcase! (substring/shared str2 1 4))
(and (string=? str1 "foofoofoo")
(string=? str2 "oFOOf")))))
+
+(with-test-prefix "R6RS escapes"
+
+ (read-enable 'strict-r6rs-strings)
+
+ (pass-if-exception "non-hex char in two-digit hex-escape"
+ exception:illegal-escape
+ (with-input-from-string "\"\\x0g;\"" read))
+
+ (pass-if-exception "non-hex char in four-digit hex-escape"
+ exception:illegal-escape
+ (with-input-from-string "\"\\x000g;\"" read))
+
+ (pass-if-exception "non-hex char in six-digit hex-escape"
+ exception:illegal-escape
+ (with-input-from-string "\"\\x00000g;\"" read))
+
+ (pass-if-exception "no semicolon at termination of one-digit hex-escape"
+ exception:illegal-escape
+ (with-input-from-string "\"\\x0\"" read))
+
+ (pass-if-exception "no semicolon at termination of three-digit hex-escape"
+ exception:illegal-escape
+ (with-input-from-string "\"\\x000\"" read))
+
+ (pass-if "two-digit hex escape"
+ (eqv? (string-ref (with-input-from-string "\"--\\xff;--\"" read) 2)
+ (integer->char #xff)))
+
+ (pass-if "four-digit hex escape"
+ (eqv? (string-ref (with-input-from-string "\"--\\x0100;--\"" read) 2)
+ (integer->char #x0100)))
+
+ (pass-if "six-digit hex escape"
+ (eqv? (string-ref (with-input-from-string "\"--\\x010300;--\"" read) 2)
+ (integer->char #x010300)))
+
+ (pass-if "escaped characters match non-escaped ASCII characters"
+ (string=? "ABC"
+ (with-input-from-string "\"\\x41;\\x0042;\\x000043;\"" read)))
+
+ (pass-if "R6RS backslash character escapes"
+ (string=? (with-input-from-string "\"\\\"\\\\\"" read)
+ (string #\" #\\)))
+
+ (pass-if "R6RS backslash control escapes"
+ (string=? (with-input-from-string "\"\\a\\b\\t\\n\\v\\f\\r\"" read)
+ (apply string (map integer->char '(7 8 9 10 11 12 13)))))
+
+ (pass-if "write R6RS escapes"
+ (let* ((s1 (apply string
+ (map integer->char '(#x8 #x20 #x30 #x40))))
+ (s2 (with-output-to-string
+ (lambda () (write s1)))))
+ (lset= eqv?
+ (string->list s2)
+ (list #\" #\\ #\x #\8 #\; #\space #\0 #\@ #\"))))
+
+ (read-disable 'strict-r6rs-strings))
+
+