diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 2000-07-11 18:21:19 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 2000-07-11 18:21:19 +0000 |
commit | 6662521eef19f96de52b97fb5fa07a85826679ee (patch) | |
tree | 3445a5d5bc8939118a426b52b520e63e7fe5035c /utf8.c | |
parent | 7fdcfa2fc732bf742f6e1c2b723ab188bf8dc010 (diff) | |
download | perl-6662521eef19f96de52b97fb5fa07a85826679ee.tar.gz |
integrate cfgperl changes#6242..6249 into mainline
p4raw-link: @6249 on //depot/cfgperl: cab27d238e930b8cddb5b1fb3260355f913b86a6
p4raw-link: @6242 on //depot/cfgperl: 1e72252ad7b8e23d1a1142285b8aa82986bd2491
p4raw-id: //depot/perl@6359
p4raw-integrated: from //depot/cfgperl@6358 'copy in'
ext/DynaLoader/DynaLoader_pm.PL (@5953..) t/lib/peek.t
(@6086..) t/lib/filefunc.t t/lib/filespec.t (@6230..)
pod/perlintern.pod (@6237..) pod/perlapi.pod utf8.c (@6242..)
p4raw-integrated: from //depot/cfgperl@6249 'copy in' lib/IPC/Open3.pm
(@5937..)
p4raw-integrated: from //depot/cfgperl@6248 'copy in' pod/perlfunc.pod
(@6206..)
p4raw-integrated: from //depot/cfgperl@6247 'ignore' lib/File/Spec.pm
(@6230..)
p4raw-integrated: from //depot/cfgperl@6244 'copy in' gv.c (@6217..)
'merge in' sv.c (@6196..)
p4raw-integrated: from //depot/cfgperl@6243 'copy in' pp_proto.h
(@6237..) 'ignore' embedvar.h perlapi.h (@6237..) 'merge in'
embed.h objXSUB.h (@6237..) embed.pl perlapi.c proto.h
(@6242..)
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 41 |
1 files changed, 36 insertions, 5 deletions
@@ -134,6 +134,30 @@ Perl_is_utf8_char(pTHX_ U8 *s) return len; } +/* +=for apidoc Am|bool_utf8_string|U8 *s|STRLEN len + +Returns true if first C<len> bytes of the given string form valid a UTF8 +string, false otherwise. + +=cut +*/ + +bool +Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) +{ + U8* x=s; + U8* send=s+len; + int c; + while (x < send) { + c = is_utf8_char(x); + x += c; + if (!c || x > send) + return 0; + } + return 1; +} + UV Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen) { @@ -227,6 +251,7 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off) Converts a string C<s> of length C<len> from UTF8 into ASCII encoding. Unlike C<bytes_to_utf8>, this over-writes the original string. +Returns zero on failure after converting as much as possible. =cut */ @@ -247,6 +272,10 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) else { I32 ulen; UV uv = utf8_to_uv(s, &ulen); + if (uv > 255) { + *d = '\0'; + return 0; + } s += ulen; *d++ = (U8)uv; } @@ -256,24 +285,25 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) } /* -=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN len +=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN *len Converts a string C<s> of length C<len> from ASCII into UTF8 encoding. -Returns a pointer to the newly-created string. +Returns a pointer to the newly-created string, and sets C<len> to +reflect the new length. =cut */ U8* -Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) +Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len) { dTHR; U8 *send; U8 *d; U8 *dst; - send = s + len; + send = s + (*len); - Newz(801, d, len * 2 + 1, U8); + Newz(801, d, (*len) * 2 + 1, U8); dst = d; while (s < send) { @@ -286,6 +316,7 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) } } *d = '\0'; + *len = d-dst; return dst; } |