diff options
Diffstat (limited to 'libgfortran/io/write.c')
-rw-r--r-- | libgfortran/io/write.c | 294 |
1 files changed, 209 insertions, 85 deletions
diff --git a/libgfortran/io/write.c b/libgfortran/io/write.c index ed50e0d5705..65210bcbe1f 100644 --- a/libgfortran/io/write.c +++ b/libgfortran/io/write.c @@ -36,10 +36,161 @@ Boston, MA 02110-1301, USA. */ #include <ctype.h> #include <stdlib.h> #include <stdbool.h> +#include <errno.h> #define star_fill(p, n) memset(p, '*', n) #include "write_float.def" +typedef unsigned char uchar; + +/* Write out default char4. */ + +static void +write_default_char4 (st_parameter_dt *dtp, gfc_char4_t *source, + int src_len, int w_len) +{ + char *p; + int j, k = 0; + gfc_char4_t c; + uchar d; + + /* Take care of preceding blanks. */ + if (w_len > src_len) + { + k = w_len - src_len; + p = write_block (dtp, k); + if (p == NULL) + return; + memset (p, ' ', k); + } + + /* Get ready to handle delimiters if needed. */ + + switch (dtp->u.p.delim_status) + { + case DELIM_APOSTROPHE: + d = '\''; + break; + case DELIM_QUOTE: + d = '"'; + break; + default: + d = ' '; + break; + } + + /* Now process the remaining characters, one at a time. */ + for (j = k; j < src_len; j++) + { + c = source[j]; + + /* Handle delimiters if any. */ + if (c == d && d != ' ') + { + p = write_block (dtp, 2); + if (p == NULL) + return; + *p++ = (uchar) c; + } + else + { + p = write_block (dtp, 1); + if (p == NULL) + return; + } + *p = c > 255 ? '?' : (uchar) c; + } +} + + +/* Write out UTF-8 converted from char4. */ + +static void +write_utf8_char4 (st_parameter_dt *dtp, gfc_char4_t *source, + int src_len, int w_len) +{ + char *p; + int j, k = 0; + gfc_char4_t c; + static const uchar masks[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE }; + size_t nbytes; + uchar buf[6], d, *q; + + /* Take care of preceding blanks. */ + if (w_len > src_len) + { + k = w_len - src_len; + p = write_block (dtp, k); + if (p == NULL) + return; + memset (p, ' ', k); + } + + /* Get ready to handle delimiters if needed. */ + + switch (dtp->u.p.delim_status) + { + case DELIM_APOSTROPHE: + d = '\''; + break; + case DELIM_QUOTE: + d = '"'; + break; + default: + d = ' '; + break; + } + + /* Now process the remaining characters, one at a time. */ + for (j = k; j < src_len; j++) + { + c = source[j]; + if (c < 0x80) + { + /* Handle the delimiters if any. */ + if (c == d && d != ' ') + { + p = write_block (dtp, 2); + if (p == NULL) + return; + *p++ = (uchar) c; + } + else + { + p = write_block (dtp, 1); + if (p == NULL) + return; + } + *p = (uchar) c; + } + else + { + /* Convert to UTF-8 sequence. */ + nbytes = 1; + q = &buf[6]; + + do + { + *--q = ((c & 0x3F) | 0x80); + c >>= 6; + nbytes++; + } + while (c >= 0x3F || (c & limits[nbytes-1])); + + *--q = (c | masks[nbytes-1]); + + p = write_block (dtp, nbytes); + if (p == NULL) + return; + + while (q < &buf[6]) + *p++ = *q++; + } + } +} + + void write_a (st_parameter_dt *dtp, const fnode *f, const char *source, int len) { @@ -126,17 +277,16 @@ write_a (st_parameter_dt *dtp, const fnode *f, const char *source, int len) /* The primary difference between write_a_char4 and write_a is that we have to - deal with writing from the first byte of the 4-byte character and take care - of endianess. This currently implements encoding="default" which means we - write the lowest significant byte. If the 3 most significant bytes are - not representable emit a '?'. TODO: Implement encoding="UTF-8" - which will process all 4 bytes and translate to the encoded output. */ + deal with writing from the first byte of the 4-byte character and pay + attention to the most significant bytes. For ENCODING="default" write the + lowest significant byte. If the 3 most significant bytes contain + non-zero values, emit a '?'. For ENCODING="utf-8", convert the UCS-32 value + to the UTF-8 encoded string before writing out. */ void write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len) { int wlen; - char *p; gfc_char4_t *q; wlen = f->u.string.length < 0 @@ -158,6 +308,7 @@ write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len /* Write out any padding if needed. */ if (len < wlen) { + char *p; p = write_block (dtp, wlen - len); if (p == NULL) return; @@ -173,19 +324,15 @@ write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len /* Write out the previously scanned characters in the string. */ if (bytes > 0) { - p = write_block (dtp, bytes); - if (p == NULL) - return; - for (j = 0; j < bytes; j++) - p[j] = q[j] > 255 ? '?' : (unsigned char) q[j]; + if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8) + write_utf8_char4 (dtp, q, bytes, 0); + else + write_default_char4 (dtp, q, bytes, 0); bytes = 0; } /* Write out the CR_LF sequence. */ - p = write_block (dtp, 2); - if (p == NULL) - return; - memcpy (p, crlf, 2); + write_default_char4 (dtp, crlf, 2, 0); } else bytes++; @@ -194,32 +341,19 @@ write_a_char4 (st_parameter_dt *dtp, const fnode *f, const char *source, int len /* Write out any remaining bytes if no LF was found. */ if (bytes > 0) { - p = write_block (dtp, bytes); - if (p == NULL) - return; - for (j = 0; j < bytes; j++) - p[j] = q[j] > 255 ? '?' : (unsigned char) q[j]; + if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8) + write_utf8_char4 (dtp, q, bytes, 0); + else + write_default_char4 (dtp, q, bytes, 0); } } else { #endif - int j; - p = write_block (dtp, wlen); - if (p == NULL) - return; - - if (wlen < len) - { - for (j = 0; j < wlen; j++) - p[j] = q[j] > 255 ? '?' : (unsigned char) q[j]; - } + if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8) + write_utf8_char4 (dtp, q, len, wlen); else - { - memset (p, ' ', wlen - len); - for (j = wlen - len; j < wlen; j++) - p[j] = q[j] > 255 ? '?' : (unsigned char) q[j]; - } + write_default_char4 (dtp, q, len, wlen); #ifdef HAVE_CRLF } #endif @@ -745,8 +879,6 @@ write_character (st_parameter_dt *dtp, const char *source, int kind, int length) { int i, extra; char *p, d; - gfc_char4_t *q; - switch (dtp->u.p.delim_status) { @@ -769,9 +901,9 @@ write_character (st_parameter_dt *dtp, const char *source, int kind, int length) { extra = 2; - for (i = 0; i < length; i++) - if (source[i] == d) - extra++; + for (i = 0; i < length; i++) + if (source[i] == d) + extra++; } p = write_block (dtp, length + extra); @@ -796,40 +928,24 @@ write_character (st_parameter_dt *dtp, const char *source, int kind, int length) } else { - /* We have to scan the source string looking for delimiters to determine - how large the write block needs to be. */ - if (d == ' ') - extra = 0; - else - { - extra = 2; - - q = (gfc_char4_t *) source; - for (i = 0; i < length; i++, q++) - if (*q == (gfc_char4_t) d) - extra++; - } - - p = write_block (dtp, length + extra); - if (p == NULL) - return; - if (d == ' ') { - q = (gfc_char4_t *) source; - for (i = 0; i < length; i++, q++) - p[i] = *q > 255 ? '?' : (unsigned char) *q; + if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8) + write_utf8_char4 (dtp, (gfc_char4_t *) source, length, 0); + else + write_default_char4 (dtp, (gfc_char4_t *) source, length, 0); } else { - *p++ = d; - q = (gfc_char4_t *) source; - for (i = 0; i < length; i++, q++) - { - *p++ = *q > 255 ? '?' : (unsigned char) *q; - if (*q == (gfc_char4_t) d) - *p++ = d; - } + p = write_block (dtp, 1); + *p = d; + + if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8) + write_utf8_char4 (dtp, (gfc_char4_t *) source, length, 0); + else + write_default_char4 (dtp, (gfc_char4_t *) source, length, 0); + + p = write_block (dtp, 1); *p = d; } } @@ -1000,6 +1116,22 @@ list_formatted_write (st_parameter_dt *dtp, bt type, void *p, int kind, #define NML_DIGITS 20 +static void +namelist_write_newline (st_parameter_dt *dtp) +{ + if (!is_internal_unit (dtp)) + { +#ifdef HAVE_CRLF + write_character (dtp, "\r\n", 1, 2); +#else + write_character (dtp, "\n", 1, 1); +#endif + } + else + write_character (dtp, " ", 1, 1); +} + + static namelist_info * nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset, namelist_info * base, char * base_name) @@ -1036,11 +1168,9 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset, if (obj->type != GFC_DTYPE_DERIVED) { -#ifdef HAVE_CRLF - write_character (dtp, "\r\n ", 1, 3); -#else - write_character (dtp, "\n ", 1, 2); -#endif + namelist_write_newline (dtp); + write_character (dtp, " ", 1, 1); + len = 0; if (base) { @@ -1245,11 +1375,8 @@ nml_write_obj (st_parameter_dt *dtp, namelist_info * obj, index_type offset, if (num > 5) { num = 0; -#ifdef HAVE_CRLF - write_character (dtp, "\r\n ", 1, 3); -#else - write_character (dtp, "\n ", 1, 2); -#endif + namelist_write_newline (dtp); + write_character (dtp, " ", 1, 1); } rep_ctr = 1; } @@ -1276,6 +1403,7 @@ obj_loop: return retval; } + /* This is the entry function for namelist writes. It outputs the name of the namelist and iterates through the namelist by calls to nml_write_obj. The call below has dummys in the arguments used in @@ -1331,12 +1459,8 @@ namelist_write (st_parameter_dt *dtp) } } -#ifdef HAVE_CRLF - write_character (dtp, " /\r\n", 1, 5); -#else - write_character (dtp, " /\n", 1, 4); -#endif - + write_character (dtp, " /", 1, 3); + namelist_write_newline (dtp); /* Restore the original delimiter. */ dtp->u.p.delim_status = tmp_delim; } |