summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorNick Ing-Simmons <nik@tiuk.ti.com>2001-03-16 17:23:21 +0000
committerNick Ing-Simmons <nik@tiuk.ti.com>2001-03-16 17:23:21 +0000
commitdb42d1485c38c3442e7b62e63d45f5e5b9b66ee1 (patch)
tree3684e9dd00bb3225b0452a0b9bfd12a7840a365f /utf8.c
parent60f7a97ae5b801dc0b103022b69814f3a1161856 (diff)
downloadperl-db42d1485c38c3442e7b62e63d45f5e5b9b66ee1.tar.gz
EBCDIC Fixes.
p4raw-id: //depot/perlio@9180
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c31
1 files changed, 16 insertions, 15 deletions
diff --git a/utf8.c b/utf8.c
index 7302bb77e2..227dcba3f3 100644
--- a/utf8.c
+++ b/utf8.c
@@ -668,9 +668,9 @@ is unchanged. Do nothing if C<is_utf8> points to 0. Sets C<is_utf8> to
U8 *
Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8)
{
- U8 *send;
U8 *d;
U8 *start = s;
+ U8 *send;
I32 count = 0;
if (!*is_utf8)
@@ -679,28 +679,30 @@ Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8)
/* ensure valid UTF8 and chars < 256 before converting string */
for (send = s + *len; s < send;) {
U8 c = *s++;
- if (!UTF8_IS_ASCII(c)) {
- if (UTF8_IS_CONTINUATION(c) || s >= send ||
- !UTF8_IS_CONTINUATION(*s) || UTF8_IS_DOWNGRADEABLE_START(c))
+ if (!UTF8_IS_ASCII(c)) {
+ if (UTF8_IS_DOWNGRADEABLE_START(c) && s < send &&
+ (c = *s++) && UTF8_IS_CONTINUATION(c))
+ count++;
+ else
return start;
- s++, count++;
- }
+ }
}
*is_utf8 = 0;
+#ifndef EBCDIC
+ /* Can use as-is if no high chars */
if (!count)
return start;
+#endif
Newz(801, d, (*len) - count + 1, U8);
s = start; start = d;
while (s < send) {
U8 c = *s++;
-
- if (UTF8_IS_ASCII(c))
- *d++ = c;
- else
- *d++ = UTF8_ACCUMULATE(c, *s++);
+ if (!UTF8_IS_ASCII(c))
+ c = UTF8_ACCUMULATE(c, *s++);
+ *d++ = ASCII_TO_NATIVE(c);
}
*d = '\0';
*len = d - start;
@@ -729,11 +731,10 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len)
dst = d;
while (s < send) {
- if (UTF8_IS_ASCII(*s))
- *d++ = *s++;
+ UV uv = NATIVE_TO_ASCII(*s++);
+ if (UTF8_IS_ASCII(uv))
+ *d++ = uv;
else {
- UV uv = *s++;
-
*d++ = UTF8_EIGHT_BIT_HI(uv);
*d++ = UTF8_EIGHT_BIT_LO(uv);
}