Reader option to parse strings and chars using R6RS rulesr6rs-strings

Add the capability to use some of R6RS's string and character syntax, e.g. #\xFFF and "\xFFF;" This is enabled using (read-enable 'strict-r6rs-strings). * libguile/print.c (PRINT_CHAR_ESCAPE): new macro (iprin1): use new macro PRINT_CHAR_ESCAPE and add capability to print strings according to R6RS * libguile/chars.c (scm_r6rs_charnames, scm_r6rs_charnums, SCM_N_R6RS_CHARNAMES): new constants of character names (scm_i_charname): output R6RS charnames (scm_i_charname_to_char): check for R6RS charnames * libguile/private-options.h (SCM_R6RS_OPTIONS_P): new define * libguile/read.c (scm_read_opts): add new option 'strict-r6rs-strings' (SCM_READ_HEX_ESCAPE): new codeblock to parse character hex escapes (SCM_READ_SPACE_LINE_SPACE): new codeblock to parse string backslashes (scm_read_string): read R6RS string escapes * test-suite/tests/chars.test: tests for R6RS character escapes * test-suite/tests/strings.test: test for R6RS string escapes
author: Michael Gran <spk121@yahoo.com> 2009-12-27 18:53:52 -0800
committer: Michael Gran <spk121@yahoo.com> 2009-12-27 18:53:52 -0800
commit: cff6adf899dbc0336c7c017d52504f8138c89b3d (patch)
tree: a845bf05c3e42da296f48eeb7616715b9702be0f
parent: 0ca3a342d19ec89b8ae6bba0a74f0f9ecc5cf7c2 (diff)
download: guile-r6rs-strings.tar.gz
6 files changed, 415 insertions, 147 deletions
diff --git a/libguile/chars.c b/libguile/chars.c
index 16a2b90d6..2322db858 100644
--- a/libguile/chars.c
+++ b/libguile/chars.c
@@ -32,7 +32,7 @@
 
 #include "libguile/chars.h"
 #include "libguile/srfi-14.h"
-
+#include "libguile/private-options.h"
 
 
 SCM_DEFINE (scm_char_p, "char?", 1, 0, 0, 
@@ -561,6 +561,18 @@ static const scm_t_uint32 const scm_alt_charnums[] = {
 
 #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
 
+static const char *const scm_r6rs_charnames[] = {
+  "nul", "alarm", "backspace", "tab", "linefeed", "newline", 
+  "vtab", "page", "return", "esc", "space", "delete"
+};
+
+static const scm_t_uint32 const scm_r6rs_charnums[] = {
+  0x00, 0x07, 0x08, 0x09, 0x0A, 0x0A, 
+  0x0B, 0x0C, 0x0D, 0x1B, 0x20, 0x7F
+};
+
+#define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
+
 /* Returns the string charname for a character if it exists, or NULL
    otherwise.  */
 const char *
@@ -569,18 +581,25 @@ scm_i_charname (SCM chr)
   size_t c;
   scm_t_uint32 i = SCM_CHAR (chr);
 
-  for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
-    if (scm_r5rs_charnums[c] == i)
-      return scm_r5rs_charnames[c];
-
-  for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
-    if (scm_C0_control_charnums[c] == i)
-      return scm_C0_control_charnames[c];
-
-  for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
-    if (scm_alt_charnums[c] == i)
-      return scm_alt_charnames[i];
-
+  if (!SCM_R6RS_STRINGS_P)
+    {
+      for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
+        if (scm_r5rs_charnums[c] == i)
+          return scm_r5rs_charnames[c];
+      
+      for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
+        if (scm_C0_control_charnums[c] == i)
+          return scm_C0_control_charnames[c];
+      
+      for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
+        if (scm_alt_charnums[c] == i)
+          return scm_alt_charnames[i];
+    }
+  else
+    for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
+      if (scm_r6rs_charnums[c] == i)
+        return scm_r6rs_charnames[c];
+    
   return NULL;
 }
 
@@ -590,25 +609,36 @@ scm_i_charname_to_char (const char *charname, size_t charname_len)
 {
   size_t c;
 
-  /* The R5RS charnames.  These are supposed to be case
-     insensitive. */
-  for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
-    if ((strlen (scm_r5rs_charnames[c]) == charname_len)
-	&& (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
-      return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
-
-  /* Then come the controls.  These are not case sensitive.  */
-  for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
-    if ((strlen (scm_C0_control_charnames[c]) == charname_len)
-	&& (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
-      return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
-
-  /* Lastly are some old names carried over for compatibility.  */
-  for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
-    if ((strlen (scm_alt_charnames[c]) == charname_len)
-	&& (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
-      return SCM_MAKE_CHAR (scm_alt_charnums[c]);
-
+  if (!SCM_R6RS_STRINGS_P)
+    {
+      /* The R5RS charnames.  These are supposed to be case
+         insensitive. */
+      for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
+        if ((strlen (scm_r5rs_charnames[c]) == charname_len)
+            && (!strncasecmp (scm_r5rs_charnames[c], charname, 
+                              charname_len)))
+          return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
+      
+      /* Then come the controls.  These are not case sensitive.  */
+      for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
+        if ((strlen (scm_C0_control_charnames[c]) == charname_len)
+            && (!strncasecmp (scm_C0_control_charnames[c], charname, 
+                              charname_len)))
+          return SCM_MAKE_CHAR (scm_C0_control_charnums[c]);
+      
+      /* Lastly are some old names carried over for compatibility.  */
+      for (c = 0; c < SCM_N_ALT_CHARNAMES; c++)
+        if ((strlen (scm_alt_charnames[c]) == charname_len)
+            && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
+          return SCM_MAKE_CHAR (scm_alt_charnums[c]);
+    }
+  else
+    /* The strict R6RS charnames.  These are case sensitive. */
+    for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
+      if ((strlen (scm_r6rs_charnames[c]) == charname_len)
+          && (!strncmp (scm_r6rs_charnames[c], charname, charname_len)))
+        return SCM_MAKE_CHAR (scm_r6rs_charnums[c]);
+    
   return SCM_BOOL_F;
 }
 
diff --git a/libguile/print.c b/libguile/print.c
index d50df2d24..034aa12d1 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -408,6 +408,21 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display, g_display);
 
 static void iprin1 (SCM exp, SCM port, scm_print_state *pstate);
 
+/* Character is graphic but unrepresentable in this port's encoding.  */
+#define PRINT_CHAR_ESCAPE(i, port)                        \
+  do                                                      \
+    {                                                     \
+      if (!SCM_R6RS_STRINGS_P)                            \
+        scm_intprint (i, 8, port);                        \
+      else                                                \
+        {                                                 \
+          scm_puts ("x", port);                           \
+          scm_intprint (i, 16, port);                     \
+        }                                                 \
+    }                                                     \
+  while (0)
+
+
 void 
 scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate)
 {
@@ -487,7 +502,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
                       else
                         /* Character is graphic but unrepresentable in
                            this port's encoding.  */
-                        scm_intprint (i, 8, port);
+                        PRINT_CHAR_ESCAPE(i, port);
                     }
                   else
                     {
@@ -506,12 +521,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
                       else
                         /* Character is graphic but unrepresentable in
                            this port's encoding.  */
-                        scm_intprint (i, 8, port);
+                        PRINT_CHAR_ESCAPE (i, port);
                     }
                 }
               else
                 /* Character is a non-graphical character.  */
-                scm_intprint (i, 8, port);
+                PRINT_CHAR_ESCAPE (i, port);
 	    }
 	  else
 	    scm_i_charprint (i, port);
@@ -578,9 +593,9 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
         case scm_tc7_string:
           if (SCM_WRITINGP (pstate))
             {
-              size_t i, j, len;
+              size_t i, len;
               static char const hex[] = "0123456789abcdef";
-              char buf[8];
+              char buf[9];
 
 
               scm_putc ('"', port);
@@ -646,37 +661,59 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
                     {
                       /* Character is graphic but unrepresentable in
                          this port's encoding or is not graphic.  */
-                      if (ch <= 0xFF)
-                        {
-                          buf[0] = '\\';
-                          buf[1] = 'x';
-                          buf[2] = hex[ch / 16];
-                          buf[3] = hex[ch % 16];
-                          scm_lfwrite (buf, 4, port);
-                        }
-                      else if (ch <= 0xFFFF)
+                      if (!SCM_R6RS_STRINGS_P)
                         {
-                          buf[0] = '\\';
-                          buf[1] = 'u';
-                          buf[2] = hex[(ch & 0xF000) >> 12];
-                          buf[3] = hex[(ch & 0xF00) >> 8];
-                          buf[4] = hex[(ch & 0xF0) >> 4];
-                          buf[5] = hex[(ch & 0xF)];
-                          scm_lfwrite (buf, 6, port);
-                          j = i + 1;
+                          if (ch <= 0xFF)
+                            {
+                              buf[0] = '\\';
+                              buf[1] = 'x';
+                              buf[2] = hex[ch / 16];
+                              buf[3] = hex[ch % 16];
+                              scm_lfwrite (buf, 4, port);
+                            }
+                          else if (ch <= 0xFFFF)
+                            {
+                              buf[0] = '\\';
+                              buf[1] = 'u';
+                              buf[2] = hex[(ch & 0xF000) >> 12];
+                              buf[3] = hex[(ch & 0xF00) >> 8];
+                              buf[4] = hex[(ch & 0xF0) >> 4];
+                              buf[5] = hex[(ch & 0xF)];
+                              scm_lfwrite (buf, 6, port);
+                            }
+                          else if (ch > 0xFFFF)
+                            {
+                              buf[0] = '\\';
+                              buf[1] = 'U';
+                              buf[2] = hex[(ch & 0xF00000) >> 20];
+                              buf[3] = hex[(ch & 0xF0000) >> 16];
+                              buf[4] = hex[(ch & 0xF000) >> 12];
+                              buf[5] = hex[(ch & 0xF00) >> 8];
+                              buf[6] = hex[(ch & 0xF0) >> 4];
+                              buf[7] = hex[(ch & 0xF)];
+                              scm_lfwrite (buf, 8, port);
+                            }
                         }
-                      else if (ch > 0xFFFF)
+                      else
                         {
-                          buf[0] = '\\';
-                          buf[1] = 'U';
-                          buf[2] = hex[(ch & 0xF00000) >> 20];
-                          buf[3] = hex[(ch & 0xF0000) >> 16];
-                          buf[4] = hex[(ch & 0xF000) >> 12];
-                          buf[5] = hex[(ch & 0xF00) >> 8];
-                          buf[6] = hex[(ch & 0xF0) >> 4];
-                          buf[7] = hex[(ch & 0xF)];
-                          scm_lfwrite (buf, 8, port);
-                          j = i + 1;
+                          scm_t_wchar ch2 = ch;
+                          
+                          int i = 8;
+                          buf[i] = ';';
+                          i --;
+                          if (ch == 0)
+                            buf[i--] = '0';
+                          else
+                            while (ch2 > 0)
+                              {
+                                buf[i] = hex[ch2 & 0xF];
+                                ch2 >>= 4;
+                                i --;
+                              }
+                          buf[i] = 'x';
+                          i --;
+                          buf[i] = '\\';
+                          scm_lfwrite (buf + i, 9 - i, port);
                         }
                     }
                 }
diff --git a/libguile/private-options.h b/libguile/private-options.h
index 703ca8a5b..35fa2e38e 100644
--- a/libguile/private-options.h
+++ b/libguile/private-options.h
@@ -94,9 +94,13 @@ SCM_API scm_t_option scm_read_opts[];
 #if SCM_ENABLE_ELISP
 #define SCM_ELISP_VECTORS_P    scm_read_opts[4].val
 #define SCM_ESCAPED_PARENS_P   scm_read_opts[5].val
-#define SCM_N_READ_OPTIONS 6
+#endif
+#define SCM_R6RS_STRINGS_P     scm_read_opts[6].val
+
+#if SCM_ENABLE_ELISP
+#define SCM_N_READ_OPTIONS 7
 #else
-#define SCM_N_READ_OPTIONS 4
+#define SCM_N_READ_OPTIONS 5
 #endif
 
 #endif  /* PRIVATE_OPTIONS */ 
diff --git a/libguile/read.c b/libguile/read.c
index da4a17433..0881e4fa6 100644
--- a/libguile/read.c
+++ b/libguile/read.c
@@ -29,6 +29,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <unicase.h>
+#include <unictype.h>
 
 #include "libguile/_scm.h"
 #include "libguile/bytevectors.h"
@@ -76,6 +77,8 @@ scm_t_option scm_read_opts[] = {
   { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
     "Support `\\(' and `\\)' in strings."},
 #endif
+  { SCM_OPTION_BOOLEAN, "strict-r6rs-strings", 0,
+    "Strictly follow R6RS rules for strings"},
   { 0, },
 };
 
@@ -412,6 +415,87 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
 }
 #undef FUNC_NAME
 
+
+#define SCM_READ_HEX_ESCAPE(ndigits, terminator)        \
+  do                                                    \
+    {                                                   \
+      scm_t_wchar a;                                    \
+      size_t i = 0;                                     \
+      c = 0;                                            \
+      while (1)                                         \
+        {                                               \
+          if (i == ndigits)                             \
+            break;                                      \
+          a = scm_getc (port);                          \
+          if (a == EOF)                                 \
+            goto str_eof;                               \
+          if (terminator                                \
+              && (a == (scm_t_wchar) terminator)        \
+              && (i > 0))                               \
+            break;                                      \
+          if ('0' <= a && a <= '9')                     \
+            a -= '0';                                   \
+          else if ('A' <= a && a <= 'F')                \
+            a = a - 'A' + 10;                           \
+          else if ('a' <= a && a <= 'f')                \
+            a = a - 'a' + 10;                           \
+          else                                          \
+            {                                           \
+              c = a;                                    \
+              goto bad_escaped;                         \
+            }                                           \
+          c = c * 16 + a;                               \
+          i ++;                                         \
+        }                                               \
+    } while (0)
+
+#define SCM_READ_SPACE_LINE_SPACE                                       \
+  do                                                                    \
+    {                                                                   \
+      int line_ending_started = 0;                                      \
+      int line_ending_complete = 0;                                     \
+      while (1)                                                         \
+        {                                                               \
+          c = scm_getc (port);                                          \
+          if (c == EOF)                                                 \
+            goto str_eof;                                               \
+          if (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR)) \
+            {                                                           \
+              if (line_ending_started)                                  \
+                line_ending_complete = 1;                               \
+              continue;                                                 \
+            }                                                           \
+          else if (c == '\r')                                           \
+            {                                                           \
+              if (line_ending_started)                                  \
+                {                                                       \
+                  line_ending_complete = 1;                             \
+                  break;                                                \
+                }                                                       \
+              line_ending_started = 1;                                  \
+              continue;                                                 \
+            }                                                           \
+          else if ((c == '\n')                                          \
+                   || (c == 0x0085) /* NEL */                           \
+                   || (c == 0x2028)) /* LS */                           \
+            {                                                           \
+              if (line_ending_complete)                                 \
+                break;                                                  \
+              line_ending_started = 1;                                  \
+              line_ending_complete = 1;                                 \
+              continue;                                                 \
+            }                                                           \
+          else                                                          \
+            {                                                           \
+              if (!line_ending_started)                                 \
+                goto bad_escaped;                                       \
+              break;                                                    \
+            }                                                           \
+        }                                                               \
+    } while (0)
+  
+
+
 static SCM
 scm_read_string (int chr, SCM port)
 #define FUNC_NAME "scm_lreadr"
@@ -459,8 +543,11 @@ scm_read_string (int chr, SCM port)
             case '\n':
               continue;
             case '0':
-              c = '\0';
-              break;
+              if (!SCM_R6RS_STRINGS_P)
+                {
+                  c = '\0';
+                  break;
+                }
             case 'f':
               c = '\f';
               break;
@@ -479,90 +566,56 @@ scm_read_string (int chr, SCM port)
             case 'v':
               c = '\v';
               break;
+            case 'b':
+              if (SCM_R6RS_STRINGS_P)
+                {
+                  c = '\010';
+                  break;
+                }
+            case ' ':
+            case '\t':
+            case 0x00A0:        /* no-break space */
+            case 0x1680:        /* ogham space */
+            case 0x180E:        /* mongolian vowel separator */
+            case 0x2000:        /* en quad */
+            case 0x2001:        /* em quad */
+            case 0x2002:        /* en space */
+            case 0x2003:        /* em space */
+            case 0x2004:        /* three-per-em space */
+            case 0x2005:        /* four-per-em space */
+            case 0x2006:        /* six-per-em space */
+            case 0x2007:        /* figure space */
+            case 0x2008:        /* punctuation space */
+            case 0x2009:        /* thin space */
+            case 0x200A:        /* hair space */
+            case 0x202F:        /* narrow no-break space */
+            case 0x205F:        /* medium mathematical space */
+            case 0x3000:        /* ideographic space */
+              if (SCM_R6RS_STRINGS_P)
+                {
+                  SCM_READ_SPACE_LINE_SPACE;
+                  break;
+                }
             case 'x':
               {
-                scm_t_wchar a, b;
-                a = scm_getc (port);
-                if (a == EOF)
-                  goto str_eof;
-                b = scm_getc (port);
-                if (b == EOF)
-                  goto str_eof;
-                if ('0' <= a && a <= '9')
-                  a -= '0';
-                else if ('A' <= a && a <= 'F')
-                  a = a - 'A' + 10;
-                else if ('a' <= a && a <= 'f')
-                  a = a - 'a' + 10;
-                else
-                  {
-                    c = a;
-                    goto bad_escaped;
-                  }
-                if ('0' <= b && b <= '9')
-                  b -= '0';
-                else if ('A' <= b && b <= 'F')
-                  b = b - 'A' + 10;
-                else if ('a' <= b && b <= 'f')
-                  b = b - 'a' + 10;
+                if (SCM_R6RS_STRINGS_P)
+                  SCM_READ_HEX_ESCAPE (10, ';');
                 else
-                  {
-                    c = b;
-                    goto bad_escaped;
-                  }
-                c = a * 16 + b;
+                  SCM_READ_HEX_ESCAPE (2, '\0');
                 break;
               }
             case 'u':
-              {
-                scm_t_wchar a;
-                int i;
-                c = 0;
-                for (i = 0; i < 4; i++)
-                  {
-                    a = scm_getc (port);
-                    if (a == EOF)
-                      goto str_eof;
-                    if ('0' <= a && a <= '9')
-                      a -= '0';
-                    else if ('A' <= a && a <= 'F')
-                      a = a - 'A' + 10;
-                    else if ('a' <= a && a <= 'f')
-                      a = a - 'a' + 10;
-                    else
-                      {
-                        c = a;
-                        goto bad_escaped;
-                      }
-                    c = c * 16 + a;
-                  }
-                break;
-              }
+              if (!SCM_R6RS_STRINGS_P)
+                {
+                  SCM_READ_HEX_ESCAPE (4, '\0');
+                  break;
+                }
             case 'U':
-              {
-                scm_t_wchar a;
-                int i;
-                c = 0;
-                for (i = 0; i < 6; i++)
-                  {
-                    a = scm_getc (port);
-                    if (a == EOF)
-                      goto str_eof;
-                    if ('0' <= a && a <= '9')
-                      a -= '0';
-                    else if ('A' <= a && a <= 'F')
-                      a = a - 'A' + 10;
-                    else if ('a' <= a && a <= 'f')
-                      a = a - 'a' + 10;
-                    else
-                      {
-                        c = a;
-                        goto bad_escaped;
-                      }
-                    c = c * 16 + a;
-                  }
-                break;
-              }
+              if (!SCM_R6RS_STRINGS_P)
+                {
+                  SCM_READ_HEX_ESCAPE (6, '\0');
+                  break;
+                }
             default:
             bad_escaped:
               scm_i_input_error (FUNC_NAME, port,
@@ -854,6 +907,26 @@ scm_read_character (scm_t_wchar chr, SCM port)
   if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
     return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
 
+  if (cp == 'x' && (charname_len > 1) && SCM_R6RS_STRINGS_P)
+    {
+      SCM p;
+      scm_t_wchar chr;
+
+      /* Convert from hex, skipping the initial 'x' character in CHARNAME */
+      p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
+                                scm_from_uint (16));
+      if (SCM_I_INUMP (p))
+        {
+          scm_t_wchar c = SCM_I_INUM (p);
+          if (SCM_IS_UNICODE_CHAR (c))
+            return SCM_MAKE_CHAR (c);
+          else
+            scm_i_input_error (FUNC_NAME, port,
+                               "out-of-range hex character escape: ~a",
+                               scm_list_1 (charname));
+        }
+    }
+
   if (cp >= '0' && cp < '8')
     {
       /* Dirk:FIXME::  This type of character syntax is not R5RS
diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test
index cd1572feb..90114992f 100644
--- a/test-suite/tests/chars.test
+++ b/test-suite/tests/chars.test
@@ -295,3 +295,65 @@
        (with-output-to-string (lambda () (write #\soh)))
        "#\\soh"))))
 
+(with-test-prefix "r6rs char handling"
+
+  (read-enable 'strict-r6rs-strings)
+
+  (pass-if "#\\a"
+    (eqv? (read (open-input-string "#\\a")) (integer->char #x61)))
+
+  (pass-if "#\\A"
+    (eqv? (with-input-from-string "#\\A" read) (integer->char #x41)))
+
+  (pass-if "#\\("
+    (eqv? (with-input-from-string "#\\(" read) (integer->char #x28)))
+
+  (pass-if "#\\(space)"
+    (eqv? (with-input-from-string "#\\ " read) (integer->char #x20)))
+
+  (pass-if "R6RS character names"
+    (and
+     (eqv? (with-input-from-string "#\\nul" read)       (integer->char #x00))
+     (eqv? (with-input-from-string "#\\alarm" read)     (integer->char #x07))
+     (eqv? (with-input-from-string "#\\backspace" read) (integer->char #x08))
+     (eqv? (with-input-from-string "#\\tab" read)       (integer->char #x09))
+     (eqv? (with-input-from-string "#\\linefeed" read)  (integer->char #x0A))
+     (eqv? (with-input-from-string "#\\newline" read)   (integer->char #x0A))
+     (eqv? (with-input-from-string "#\\vtab" read)      (integer->char #x0B))
+     (eqv? (with-input-from-string "#\\page" read)      (integer->char #x0C))
+     (eqv? (with-input-from-string "#\\return" read)    (integer->char #x0D))
+     (eqv? (with-input-from-string "#\\esc" read)       (integer->char #x1B))
+     (eqv? (with-input-from-string "#\\space" read)     (integer->char #x20))
+     (eqv? (with-input-from-string "#\\delete" read)    (integer->char #x7F))))
+
+  (pass-if-exception "R6RS bad charname" exception:unknown-character-name
+    (with-input-from-string "#\\blammo" read))
+
+  (pass-if-exception "R6RS charnames are case sensitive" 
+                     exception:unknown-character-name
+    (with-input-from-string "#\\Backspace" read))
+
+  (pass-if "one-digit hex escape"
+    (eqv? (with-input-from-string "#\\xA" read) (integer->char #x0A)))
+
+  (pass-if "two-digit hex escape"
+    (eqv? (with-input-from-string "#\\xFF" read) (integer->char #xFF)))
+
+  (pass-if "four-digit hex escape"
+    (eqv? (with-input-from-string "#\\x0001" read) (integer->char #x01)))
+
+  (pass-if "eight-digit hex escape"
+    (eqv? (with-input-from-string "#\\x00006587" read) (integer->char #x6587)))
+
+  (pass-if "write R6RS character names"
+    (string=?
+     (with-output-to-string (lambda () (write (integer->char #x07))))
+     "#\\alarm"))
+
+  (pass-if "write R6RS escapes"
+    (string=?
+     (with-output-to-string (lambda () (write (integer->char #x80))))
+     "#\\x80"))
+
+  (read-disable 'strict-r6rs-strings))
+
diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test
index 013c1a863..868ee1c88 100644
--- a/test-suite/tests/strings.test
+++ b/test-suite/tests/strings.test
@@ -18,7 +18,8 @@
 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
 (define-module (test-strings)
-  #:use-module (test-suite lib))
+  #:use-module (test-suite lib)
+  #:use-module (srfi srfi-1))
 
 (define exception:read-only-string
   (cons 'misc-error "^string is read-only"))
@@ -519,3 +520,64 @@
       (string-upcase! (substring/shared str2 1 4))
       (and (string=? str1 "foofoofoo")
 	   (string=? str2 "oFOOf")))))
+
+(with-test-prefix "R6RS escapes"
+
+  (read-enable 'strict-r6rs-strings)
+
+  (pass-if-exception "non-hex char in two-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\x0g;\"" read))
+
+  (pass-if-exception "non-hex char in four-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\x000g;\"" read))
+
+  (pass-if-exception "non-hex char in six-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\x00000g;\"" read))
+
+  (pass-if-exception "no semicolon at termination of one-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\x0\"" read))
+
+  (pass-if-exception "no semicolon at termination of three-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\x000\"" read))
+
+  (pass-if "two-digit hex escape"
+    (eqv? (string-ref (with-input-from-string "\"--\\xff;--\"" read) 2)
+          (integer->char #xff)))
+
+  (pass-if "four-digit hex escape"
+    (eqv? (string-ref (with-input-from-string "\"--\\x0100;--\"" read) 2)
+          (integer->char #x0100)))
+
+  (pass-if "six-digit hex escape"
+    (eqv? (string-ref (with-input-from-string "\"--\\x010300;--\"" read) 2)
+          (integer->char #x010300)))
+
+  (pass-if "escaped characters match non-escaped ASCII characters"
+    (string=? "ABC" 
+              (with-input-from-string "\"\\x41;\\x0042;\\x000043;\"" read)))
+
+  (pass-if "R6RS backslash character escapes"
+    (string=? (with-input-from-string "\"\\\"\\\\\"" read)
+              (string #\" #\\)))
+
+  (pass-if "R6RS backslash control escapes"
+    (string=? (with-input-from-string "\"\\a\\b\\t\\n\\v\\f\\r\"" read)
+              (apply string (map integer->char '(7 8 9 10 11 12 13)))))
+
+  (pass-if "write R6RS escapes"
+    (let* ((s1 (apply string 
+                      (map integer->char '(#x8 #x20 #x30 #x40))))
+           (s2 (with-output-to-string 
+                 (lambda () (write s1)))))
+      (lset= eqv? 
+             (string->list s2)
+             (list #\" #\\ #\x #\8 #\; #\space #\0 #\@ #\"))))
+
+  (read-disable 'strict-r6rs-strings))
+
+
author	Michael Gran <spk121@yahoo.com>	2009-12-27 18:53:52 -0800
committer	Michael Gran <spk121@yahoo.com>	2009-12-27 18:53:52 -0800
commit	cff6adf899dbc0336c7c017d52504f8138c89b3d (patch)
tree	a845bf05c3e42da296f48eeb7616715b9702be0f
parent	0ca3a342d19ec89b8ae6bba0a74f0f9ecc5cf7c2 (diff)
download	guile-r6rs-strings.tar.gz