diff options
author | Simon Cozens <simon@netthink.co.uk> | 2000-06-26 04:55:45 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-06-27 02:24:00 +0000 |
commit | 1e72252ad7b8e23d1a1142285b8aa82986bd2491 (patch) | |
tree | 2339eb71d34c2aac1d9b384cb1041fce06d78252 | |
parent | 524710647511337b71f94221f2bea48278a5a629 (diff) | |
download | perl-1e72252ad7b8e23d1a1142285b8aa82986bd2491.tar.gz |
bytes<->utf8 fixes
Message-ID: <slrn8ldoih.fbd.simon@justanother.perlhacker.org>
p4raw-id: //depot/cfgperl@6242
-rwxr-xr-x | embed.pl | 2 | ||||
-rwxr-xr-x | perlapi.c | 2 | ||||
-rw-r--r-- | pod/perlapi.pod | 6 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | utf8.c | 17 |
5 files changed, 19 insertions, 10 deletions
@@ -2063,7 +2063,7 @@ Ap |U8* |utf16_to_utf8_reversed|U16* p|U8 *d|I32 bytelen Ap |I32 |utf8_distance |U8 *a|U8 *b Ap |U8* |utf8_hop |U8 *s|I32 off Ap |U8* |utf8_to_bytes |U8 *s|STRLEN len -Ap |U8* |bytes_to_utf8 |U8 *s|STRLEN len +Ap |U8* |bytes_to_utf8 |U8 *s|STRLEN *len Ap |UV |utf8_to_uv |U8 *s|I32* retlen Ap |U8* |uv_to_utf8 |U8 *d|UV uv p |void |vivify_defelem |SV* sv @@ -3352,7 +3352,7 @@ Perl_utf8_to_bytes(pTHXo_ U8 *s, STRLEN len) #undef Perl_bytes_to_utf8 U8* -Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN len) +Perl_bytes_to_utf8(pTHXo_ U8 *s, STRLEN *len) { return ((CPerlObj*)pPerl)->Perl_bytes_to_utf8(s, len); } diff --git a/pod/perlapi.pod b/pod/perlapi.pod index f274641029..1e488097b1 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -153,9 +153,10 @@ Found in file av.c =item bytes_to_utf8 Converts a string C<s> of length C<len> from ASCII into UTF8 encoding. -Returns a pointer to the newly-created string. +Returns a pointer to the newly-created string, and sets C<len> to +reflect the new length. - U8 * bytes_to_utf8(U8 *s, STRLEN len) + U8 * bytes_to_utf8(U8 *s, STRLEN *len) =for hackers Found in file utf8.c @@ -2942,6 +2943,7 @@ Found in file handy.h Converts a string C<s> of length C<len> from UTF8 into ASCII encoding. Unlike C<bytes_to_utf8>, this over-writes the original string. +Returns zero on failure after converting as much as possible. U8 * utf8_to_bytes(U8 *s, STRLEN len) @@ -810,7 +810,7 @@ PERL_CALLCONV U8* Perl_utf16_to_utf8_reversed(pTHX_ U16* p, U8 *d, I32 bytelen); PERL_CALLCONV I32 Perl_utf8_distance(pTHX_ U8 *a, U8 *b); PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off); PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN len); -PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN len); +PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len); PERL_CALLCONV UV Perl_utf8_to_uv(pTHX_ U8 *s, I32* retlen); PERL_CALLCONV U8* Perl_uv_to_utf8(pTHX_ U8 *d, UV uv); PERL_CALLCONV void Perl_vivify_defelem(pTHX_ SV* sv); @@ -227,6 +227,7 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off) Converts a string C<s> of length C<len> from UTF8 into ASCII encoding. Unlike C<bytes_to_utf8>, this over-writes the original string. +Returns zero on failure after converting as much as possible. =cut */ @@ -247,6 +248,10 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) else { I32 ulen; UV uv = utf8_to_uv(s, &ulen); + if (uv > 255) { + *d = '\0'; + return 0; + } s += ulen; *d++ = (U8)uv; } @@ -256,24 +261,25 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) } /* -=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN len +=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN *len Converts a string C<s> of length C<len> from ASCII into UTF8 encoding. -Returns a pointer to the newly-created string. +Returns a pointer to the newly-created string, and sets C<len> to +reflect the new length. =cut */ U8* -Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) +Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len) { dTHR; U8 *send; U8 *d; U8 *dst; - send = s + len; + send = s + (*len); - Newz(801, d, len * 2 + 1, U8); + Newz(801, d, (*len) * 2 + 1, U8); dst = d; while (s < send) { @@ -286,6 +292,7 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) } } *d = '\0'; + *len = d-dst; return dst; } |