diff options
author | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-03-16 17:23:21 +0000 |
---|---|---|
committer | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-03-16 17:23:21 +0000 |
commit | db42d1485c38c3442e7b62e63d45f5e5b9b66ee1 (patch) | |
tree | 3684e9dd00bb3225b0452a0b9bfd12a7840a365f /utf8.c | |
parent | 60f7a97ae5b801dc0b103022b69814f3a1161856 (diff) | |
download | perl-db42d1485c38c3442e7b62e63d45f5e5b9b66ee1.tar.gz |
EBCDIC Fixes.
p4raw-id: //depot/perlio@9180
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 31 |
1 files changed, 16 insertions, 15 deletions
@@ -668,9 +668,9 @@ is unchanged. Do nothing if C<is_utf8> points to 0. Sets C<is_utf8> to U8 * Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8) { - U8 *send; U8 *d; U8 *start = s; + U8 *send; I32 count = 0; if (!*is_utf8) @@ -679,28 +679,30 @@ Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8) /* ensure valid UTF8 and chars < 256 before converting string */ for (send = s + *len; s < send;) { U8 c = *s++; - if (!UTF8_IS_ASCII(c)) { - if (UTF8_IS_CONTINUATION(c) || s >= send || - !UTF8_IS_CONTINUATION(*s) || UTF8_IS_DOWNGRADEABLE_START(c)) + if (!UTF8_IS_ASCII(c)) { + if (UTF8_IS_DOWNGRADEABLE_START(c) && s < send && + (c = *s++) && UTF8_IS_CONTINUATION(c)) + count++; + else return start; - s++, count++; - } + } } *is_utf8 = 0; +#ifndef EBCDIC + /* Can use as-is if no high chars */ if (!count) return start; +#endif Newz(801, d, (*len) - count + 1, U8); s = start; start = d; while (s < send) { U8 c = *s++; - - if (UTF8_IS_ASCII(c)) - *d++ = c; - else - *d++ = UTF8_ACCUMULATE(c, *s++); + if (!UTF8_IS_ASCII(c)) + c = UTF8_ACCUMULATE(c, *s++); + *d++ = ASCII_TO_NATIVE(c); } *d = '\0'; *len = d - start; @@ -729,11 +731,10 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len) dst = d; while (s < send) { - if (UTF8_IS_ASCII(*s)) - *d++ = *s++; + UV uv = NATIVE_TO_ASCII(*s++); + if (UTF8_IS_ASCII(uv)) + *d++ = uv; else { - UV uv = *s++; - *d++ = UTF8_EIGHT_BIT_HI(uv); *d++ = UTF8_EIGHT_BIT_LO(uv); } |