diff options
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 41 |
1 files changed, 36 insertions, 5 deletions
@@ -134,6 +134,30 @@ Perl_is_utf8_char(pTHX_ U8 *s) return len; } +/* +=for apidoc Am|bool_utf8_string|U8 *s|STRLEN len + +Returns true if first C<len> bytes of the given string form valid a UTF8 +string, false otherwise. + +=cut +*/ + +bool +Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) +{ + U8* x=s; + U8* send=s+len; + int c; + while (x < send) { + c = is_utf8_char(x); + x += c; + if (!c || x > send) + return 0; + } + return 1; +} + UV Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen) { @@ -227,6 +251,7 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off) Converts a string C<s> of length C<len> from UTF8 into ASCII encoding. Unlike C<bytes_to_utf8>, this over-writes the original string. +Returns zero on failure after converting as much as possible. =cut */ @@ -247,6 +272,10 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) else { I32 ulen; UV uv = utf8_to_uv(s, &ulen); + if (uv > 255) { + *d = '\0'; + return 0; + } s += ulen; *d++ = (U8)uv; } @@ -256,24 +285,25 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN len) } /* -=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN len +=for apidoc Am|U8 *|bytes_to_utf8|U8 *s|STRLEN *len Converts a string C<s> of length C<len> from ASCII into UTF8 encoding. -Returns a pointer to the newly-created string. +Returns a pointer to the newly-created string, and sets C<len> to +reflect the new length. =cut */ U8* -Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) +Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN *len) { dTHR; U8 *send; U8 *d; U8 *dst; - send = s + len; + send = s + (*len); - Newz(801, d, len * 2 + 1, U8); + Newz(801, d, (*len) * 2 + 1, U8); dst = d; while (s < send) { @@ -286,6 +316,7 @@ Perl_bytes_to_utf8(pTHX_ U8* s, STRLEN len) } } *d = '\0'; + *len = d-dst; return dst; } |