diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-12-03 20:57:19 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-12-03 20:57:19 +0000 |
commit | ad391ad9bbfeaf73d3944b50240313a5677bcc60 (patch) | |
tree | 270b2e4acd993a5e6c9721113cd4130e6ca64f4e /utf8.c | |
parent | 801ca9cd5af7c86da741a601b4721bff3f94f9cd (diff) | |
download | perl-ad391ad9bbfeaf73d3944b50240313a5677bcc60.tar.gz |
Make uv_to_utf8() to zero-terminate its output buffer,
always use (at least) UTF8_MAXLEN + 1 U8s deep buffer.
p4raw-id: //depot/perl@7967
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 44 |
1 files changed, 26 insertions, 18 deletions
@@ -27,21 +27,24 @@ /* Unicode support */ U8 * -Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) +Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */ { if (uv < 0x80) { *d++ = uv; + *d = 0; return d; } if (uv < 0x800) { *d++ = (( uv >> 6) | 0xc0); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } if (uv < 0x10000) { *d++ = (( uv >> 12) | 0xe0); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } if (uv < 0x200000) { @@ -49,6 +52,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } if (uv < 0x4000000) { @@ -57,6 +61,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } if (uv < 0x80000000) { @@ -66,6 +71,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } #ifdef HAS_QUAD @@ -79,6 +85,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } #ifdef HAS_QUAD @@ -96,6 +103,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) *d++ = (((uv >> 12) & 0x3f) | 0x80); *d++ = (((uv >> 6) & 0x3f) | 0x80); *d++ = (( uv & 0x3f) | 0x80); + *d = 0; return d; } #endif @@ -593,7 +601,7 @@ Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen) bool Perl_is_uni_alnum(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_alnum(tmpbuf); } @@ -601,7 +609,7 @@ Perl_is_uni_alnum(pTHX_ U32 c) bool Perl_is_uni_alnumc(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_alnumc(tmpbuf); } @@ -609,7 +617,7 @@ Perl_is_uni_alnumc(pTHX_ U32 c) bool Perl_is_uni_idfirst(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_idfirst(tmpbuf); } @@ -617,7 +625,7 @@ Perl_is_uni_idfirst(pTHX_ U32 c) bool Perl_is_uni_alpha(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_alpha(tmpbuf); } @@ -625,7 +633,7 @@ Perl_is_uni_alpha(pTHX_ U32 c) bool Perl_is_uni_ascii(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_ascii(tmpbuf); } @@ -633,7 +641,7 @@ Perl_is_uni_ascii(pTHX_ U32 c) bool Perl_is_uni_space(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_space(tmpbuf); } @@ -641,7 +649,7 @@ Perl_is_uni_space(pTHX_ U32 c) bool Perl_is_uni_digit(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_digit(tmpbuf); } @@ -649,7 +657,7 @@ Perl_is_uni_digit(pTHX_ U32 c) bool Perl_is_uni_upper(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_upper(tmpbuf); } @@ -657,7 +665,7 @@ Perl_is_uni_upper(pTHX_ U32 c) bool Perl_is_uni_lower(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_lower(tmpbuf); } @@ -665,7 +673,7 @@ Perl_is_uni_lower(pTHX_ U32 c) bool Perl_is_uni_cntrl(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_cntrl(tmpbuf); } @@ -673,7 +681,7 @@ Perl_is_uni_cntrl(pTHX_ U32 c) bool Perl_is_uni_graph(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_graph(tmpbuf); } @@ -681,7 +689,7 @@ Perl_is_uni_graph(pTHX_ U32 c) bool Perl_is_uni_print(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_print(tmpbuf); } @@ -689,7 +697,7 @@ Perl_is_uni_print(pTHX_ U32 c) bool Perl_is_uni_punct(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_punct(tmpbuf); } @@ -697,7 +705,7 @@ Perl_is_uni_punct(pTHX_ U32 c) bool Perl_is_uni_xdigit(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return is_utf8_xdigit(tmpbuf); } @@ -705,7 +713,7 @@ Perl_is_uni_xdigit(pTHX_ U32 c) U32 Perl_to_uni_upper(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return to_utf8_upper(tmpbuf); } @@ -713,7 +721,7 @@ Perl_to_uni_upper(pTHX_ U32 c) U32 Perl_to_uni_title(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return to_utf8_title(tmpbuf); } @@ -721,7 +729,7 @@ Perl_to_uni_title(pTHX_ U32 c) U32 Perl_to_uni_lower(pTHX_ U32 c) { - U8 tmpbuf[UTF8_MAXLEN]; + U8 tmpbuf[UTF8_MAXLEN+1]; uv_to_utf8(tmpbuf, (UV)c); return to_utf8_lower(tmpbuf); } |