1 files changed, 209 insertions, 85 deletions
diff --git a/libgfortran/io/write.c b/libgfortran/io/write.c
index ed50e0d5705..65210bcbe1f 100644
--- a/libgfortran/io/write.c
+++ b/libgfortran/io/write.c
@@ -36,10 +36,161 @@ Boston, MA 02110-1301, USA.  */
 #include <ctype.h>
 #include <stdlib.h>
 #include <stdbool.h>
+#include <errno.h>
 #define star_fill(p, n) memset(p, '*', n)
 
 #include "write_float.def"
 
+typedef unsigned char uchar;
+
+/* Write out default char4.  */
+
+static void
+write_default_char4 (st_parameter_dt *dtp, gfc_char4_t *source,
+		     int src_len, int w_len)
+{
+  char *p;
+  int j, k = 0;
+  gfc_char4_t c;
+  uchar d;
+      
+  /* Take care of preceding blanks.  */
+  if (w_len > src_len)
+    {
+      k = w_len - src_len;
+      p = write_block (dtp, k);
+      if (p == NULL)
+	return;
+      memset (p, ' ', k);
+    }
+
+  /* Get ready to handle delimiters if needed.  */
+
+  switch (dtp->u.p.delim_status)
+    {
+    case DELIM_APOSTROPHE:
+      d = '\'';
+      break;
+    case DELIM_QUOTE:
+      d = '"';
+      break;
+    default:
+      d = ' ';
+      break;
+    }
+
+  /* Now process the remaining characters, one at a time.  */
+  for (j = k; j < src_len; j++)
+    {
+      c = source[j];
+    
+      /* Handle delimiters if any.  */
+      if (c == d && d != ' ')
+	{
+	  p = write_block (dtp, 2);
+	  if (p == NULL)
+	    return;
+	  *p++ = (uchar) c;
+	}
+      else
+	{
+	  p = write_block (dtp, 1);
+	  if (p == NULL)
+	    return;
+	}
+      *p = c > 255 ? '?' : (uchar) c;
+    }
+}
+
+
+/* Write out UTF-8 converted from char4.  */
+
+static void
+write_utf8_char4 (st_parameter_dt *dtp, gfc_char4_t *source,
+		     int src_len, int w_len)
+{
+  char *p;
+  int j, k = 0;
+  gfc_char4_t c;
+  static const uchar masks[6] =  { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+  static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE };
+  size_t nbytes;
+  uchar buf[6], d, *q; 
+
+  /* Take care of preceding blanks.  */
+  if (w_len > src_len)
+    {
+      k = w_len - src_len;
+      p = write_block (dtp, k);
+      if (p == NULL)
+	return;
+      memset (p, ' ', k);
+    }
+
+  /* Get ready to handle delimiters if needed.  */
+
+  switch (dtp->u.p.delim_status)
+    {
+    case DELIM_APOSTROPHE:
+      d = '\'';
+      break;
+    case DELIM_QUOTE:
+      d = '"';
+      break;
+    default:
+      d = ' ';
+      break;
+    }
+
+  /* Now process the remaining characters, one at a time.  */
+  for (j = k; j < src_len; j++)
+    {
+      c = source[j];
+      if (c < 0x80)
+	{
+	  /* Handle the delimiters if any.  */
+	  if (c == d && d != ' ')
+	    {
+	      p = write_block (dtp, 2);
+	      if (p == NULL)
+		return;
+	      *p++ = (uchar) c;
+	    }
+	  else
+	    {
+	      p = write_block (dtp, 1);
+	      if (p == NULL)
+		return;
+	    }
+	  *p = (uchar) c;
+	}
+      else
+	{
+	  /* Convert to UTF-8 sequence.  */
+	  nbytes = 1;
+	  q = &buf[6];
+
+	  do
+	    {
+	      *--q = ((c & 0x3F) | 0x80);
+	      c >>= 6;
+	      nbytes++;
+	    }
+	  while (c >= 0x3F || (c & limits[nbytes-1]));
+
+	  *--q = (c | masks[nbytes-1]);
+
+	  p = write_block (dtp, nbytes);
+	  if (p == NULL)
+	    return;
+
+	  while (q < &buf[6])
+	    *p++ = *q++;
+	}
+    }
+}
+
+
 void
 write_a (st_parameter_dt *dtp, const fnode *f, const char *source, int len)
 {
@@ -126,17 +277,16 @@ write_a (st_parameter_dt *dtp, const fnode *f, const char *source, int len)
 
 
 /* The primary difference between write_a_char4 and write_a is that we have to
-   deal with writing from the first byte of the 4-byte character and take care
-   of endianess.  This currently implements encoding="default" which means we
-   write the lowest significant byte. If the 3 most significant bytes are
-   not representable emit a '?'.  TODO: Implement encoding="UTF-8"
-   which will process all 4 bytes and translate to the encoded output.  */
+   deal with writing from the first byte of the 4-byte character and pay
+   attention to the most significant bytes.  For ENCODING="default" write the
+   lowest significant byte. If the 3 most significant bytes contain
+   non-zero values, emit a '?'.  For ENCODING="utf-8", convert the UCS-32 value
+   to the UTF-8 encoded string before writing out.  */
 
 void
 write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len)
 {
   int wlen;
-  char *p;
   gfc_char4_t *q;
 
   wlen = f->u.string.length < 0
@@ -158,6 +308,7 @@ write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len
       /* Write out any padding if needed.  */
       if (len < wlen)
 	{
+	  char *p;
 	  p = write_block (dtp, wlen - len);
 	  if (p == NULL)
 	    return;
@@ -173,19 +324,15 @@ write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len
 	      /* Write out the previously scanned characters in the string.  */
 	      if (bytes > 0)
 		{
-		  p = write_block (dtp, bytes);
-		  if (p == NULL)
-		    return;
-		  for (j = 0; j < bytes; j++)
-		    p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
+		  if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
+		    write_utf8_char4 (dtp, q, bytes, 0);
+		  else
+		    write_default_char4 (dtp, q, bytes, 0);
 		  bytes = 0;
 		}
 
 	      /* Write out the CR_LF sequence.  */ 
-	      p = write_block (dtp, 2);
-              if (p == NULL)
-                return;
-	      memcpy (p, crlf, 2);
+	      write_default_char4 (dtp, crlf, 2, 0);
 	    }
 	  else
 	    bytes++;
@@ -194,32 +341,19 @@ write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len
       /*  Write out any remaining bytes if no LF was found.  */
       if (bytes > 0)
 	{
-	  p = write_block (dtp, bytes);
-	  if (p == NULL)
-	    return;
-	  for (j = 0; j < bytes; j++)
-	    p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
+	  if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
+	    write_utf8_char4 (dtp, q, bytes, 0);
+	  else
+	    write_default_char4 (dtp, q, bytes, 0);
 	}
     }
   else
     {
 #endif
-      int j;
-      p = write_block (dtp, wlen);
-      if (p == NULL)
-	return;
-
-      if (wlen < len)
-	{
-	  for (j = 0; j < wlen; j++)
-	    p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
-	}
+      if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
+	write_utf8_char4 (dtp, q, len, wlen);
       else
-	{
-	  memset (p, ' ', wlen - len);
-	  for (j = wlen - len; j < wlen; j++)
-	    p[j] = q[j] > 255 ? '?' : (unsigned char) q[j];
-	}
+	write_default_char4 (dtp, q, len, wlen);
 #ifdef HAVE_CRLF
     }
 #endif
@@ -745,8 +879,6 @@ write_character (st_parameter_dt *dtp, const char *source, int kind, int length)
 {
   int i, extra;
   char *p, d;
-  gfc_char4_t *q;
-
 
   switch (dtp->u.p.delim_status)
     {
@@ -769,9 +901,9 @@ write_character (st_parameter_dt *dtp, const char *source, int kind, int length)
 	{
 	  extra = 2;
 
-	    for (i = 0; i < length; i++)
-	      if (source[i] == d)
-		extra++;
+	  for (i = 0; i < length; i++)
+	    if (source[i] == d)
+	      extra++;
 	}
 
       p = write_block (dtp, length + extra);
@@ -796,40 +928,24 @@ write_character (st_parameter_dt *dtp, const char *source, int kind, int length)
     }
   else
     {
-      /* We have to scan the source string looking for delimiters to determine
-	 how large the write block needs to be.  */
-      if (d == ' ')
-	extra = 0;
-      else
-	{
-	  extra = 2;
-
-	  q = (gfc_char4_t *) source;
-	  for (i = 0; i < length; i++, q++)
-	    if (*q == (gfc_char4_t) d)
-	      extra++;
-	}
-
-      p = write_block (dtp, length + extra);
-      if (p == NULL)
-	return;
-
       if (d == ' ')
 	{
-	  q = (gfc_char4_t *) source;
-	  for (i = 0; i < length; i++, q++)
-	    p[i] = *q > 255 ? '?' : (unsigned char) *q;
+	  if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
+	    write_utf8_char4 (dtp, (gfc_char4_t *) source, length, 0);
+	  else
+	    write_default_char4 (dtp, (gfc_char4_t *) source, length, 0);
 	}
       else
 	{
-	  *p++ = d;
-	  q = (gfc_char4_t *) source;
-	  for (i = 0; i < length; i++, q++)
-	    {
-	      *p++ = *q > 255 ? '?' : (unsigned char) *q;
-	      if (*q == (gfc_char4_t) d)
-		*p++ = d;
-	    }
+	  p = write_block (dtp, 1);
+	  *p = d;
+
+	  if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
+	    write_utf8_char4 (dtp, (gfc_char4_t *) source, length, 0);
+	  else
+	    write_default_char4 (dtp, (gfc_char4_t *) source, length, 0);
+
+	  p = write_block (dtp, 1);
 	  *p = d;
 	}
     }
@@ -1000,6 +1116,22 @@ list_formatted_write (st_parameter_dt *dtp, bt type, void *p, int kind,
 
 #define NML_DIGITS 20
 
+static void
+namelist_write_newline (st_parameter_dt *dtp)
+{
+  if (!is_internal_unit (dtp))
+    {
+#ifdef HAVE_CRLF
+      write_character (dtp, "\r\n", 1, 2);
+#else
+      write_character (dtp, "\n", 1, 1);
+#endif
+    }
+  else
+    write_character (dtp, " ", 1, 1);
+}
+
+
 static namelist_info *
 nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset,
 	       namelist_info * base, char * base_name)
@@ -1036,11 +1168,9 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset,
 
   if (obj->type != GFC_DTYPE_DERIVED)
     {
-#ifdef HAVE_CRLF
-      write_character (dtp, "\r\n ", 1, 3);
-#else
-      write_character (dtp, "\n ", 1, 2);
-#endif
+      namelist_write_newline (dtp);
+      write_character (dtp, " ", 1, 1);
+
       len = 0;
       if (base)
 	{
@@ -1245,11 +1375,8 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset,
 	  if (num > 5)
 	    {
 	      num = 0;
-#ifdef HAVE_CRLF
-	      write_character (dtp, "\r\n ", 1, 3);
-#else
-	      write_character (dtp, "\n ", 1, 2);
-#endif
+	      namelist_write_newline (dtp);
+	      write_character (dtp, " ", 1, 1);
 	    }
 	  rep_ctr = 1;
 	}
@@ -1276,6 +1403,7 @@ obj_loop:
   return retval;
 }
 
+
 /* This is the entry function for namelist writes.  It outputs the name
    of the namelist and iterates through the namelist by calls to
    nml_write_obj.  The call below has dummys in the arguments used in
@@ -1331,12 +1459,8 @@ namelist_write (st_parameter_dt *dtp)
 	}
     }
 
-#ifdef HAVE_CRLF
-  write_character (dtp, "  /\r\n", 1, 5);
-#else
-  write_character (dtp, "  /\n", 1, 4);
-#endif
-
+  write_character (dtp, "  /", 1, 3);
+  namelist_write_newline (dtp);
   /* Restore the original delimiter.  */
   dtp->u.p.delim_status = tmp_delim;
 }