diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-01-30 18:18:51 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-01-30 18:18:51 +0000 |
commit | ef9edfd01e3c829c5c5b98c37662df9d1108dc9e (patch) | |
tree | 73b1b2a37f00358f6528aa3a939303c242a47b6d /utf8.c | |
parent | 2eb25c996f78f171b9e185efef03146c255a8fd3 (diff) | |
download | perl-ef9edfd01e3c829c5c5b98c37662df9d1108dc9e.tar.gz |
UTF-8 nit from Inaba Hiroto.
p4raw-id: //depot/perl@8615
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 18 |
1 files changed, 7 insertions, 11 deletions
@@ -587,10 +587,10 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len) Converts a string C<s> of length C<len> from UTF8 into byte encoding. Unlike <utf8_to_bytes> but like C<bytes_to_utf8>, returns a pointer to -the newly-created string, and updates C<len> to contain the new length. -Returns the original string if no conversion occurs, C<len> and -C<is_utf8> are unchanged. Do nothing if C<is_utf8> points to 0. Sets -C<is_utf8> to 0 if C<s> is converted or malformed . +the newly-created string, and updates C<len> to contain the new +length. Returns the original string if no conversion occurs, C<len> +is unchanged. Do nothing if C<is_utf8> points to 0. Sets C<is_utf8> to +0 if C<s> is converted or contains all 7bit characters. =cut */ @@ -605,16 +605,12 @@ Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8) if (!*is_utf8) return start; - /* ensure valid UTF8 and chars < 256 before updating string */ + /* ensure valid UTF8 and chars < 256 before converting string */ for (send = s + *len; s < send;) { U8 c = *s++; if (!UTF8_IS_ASCII(c)) { if (UTF8_IS_CONTINUATION(c) || s >= send || - !UTF8_IS_CONTINUATION(*s)) { - *is_utf8 = 0; - return start; - } - if ((c & 0xfc) != 0xc0) + !UTF8_IS_CONTINUATION(*s) || (c & 0xfc) != 0xc0) return start; s++, count++; } @@ -626,7 +622,7 @@ Perl_bytes_from_utf8(pTHX_ U8* s, STRLEN *len, bool *is_utf8) return start; Newz(801, d, (*len) - count + 1, U8); - d = s = start; + s = start; start = d; while (s < send) { U8 c = *s++; if (UTF8_IS_ASCII(c)) |