summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2000-12-03 20:57:19 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-12-03 20:57:19 +0000
commitad391ad9bbfeaf73d3944b50240313a5677bcc60 (patch)
tree270b2e4acd993a5e6c9721113cd4130e6ca64f4e /utf8.c
parent801ca9cd5af7c86da741a601b4721bff3f94f9cd (diff)
downloadperl-ad391ad9bbfeaf73d3944b50240313a5677bcc60.tar.gz
Make uv_to_utf8() to zero-terminate its output buffer,
always use (at least) UTF8_MAXLEN + 1 U8s deep buffer. p4raw-id: //depot/perl@7967
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c44
1 files changed, 26 insertions, 18 deletions
diff --git a/utf8.c b/utf8.c
index 9e943acb10..5713d65dea 100644
--- a/utf8.c
+++ b/utf8.c
@@ -27,21 +27,24 @@
/* Unicode support */
U8 *
-Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
+Perl_uv_to_utf8(pTHX_ U8 *d, UV uv) /* the d must be UTF8_MAXLEN+1 deep */
{
if (uv < 0x80) {
*d++ = uv;
+ *d = 0;
return d;
}
if (uv < 0x800) {
*d++ = (( uv >> 6) | 0xc0);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
if (uv < 0x10000) {
*d++ = (( uv >> 12) | 0xe0);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
if (uv < 0x200000) {
@@ -49,6 +52,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
if (uv < 0x4000000) {
@@ -57,6 +61,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
if (uv < 0x80000000) {
@@ -66,6 +71,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
#ifdef HAS_QUAD
@@ -79,6 +85,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
#ifdef HAS_QUAD
@@ -96,6 +103,7 @@ Perl_uv_to_utf8(pTHX_ U8 *d, UV uv)
*d++ = (((uv >> 12) & 0x3f) | 0x80);
*d++ = (((uv >> 6) & 0x3f) | 0x80);
*d++ = (( uv & 0x3f) | 0x80);
+ *d = 0;
return d;
}
#endif
@@ -593,7 +601,7 @@ Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
bool
Perl_is_uni_alnum(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_alnum(tmpbuf);
}
@@ -601,7 +609,7 @@ Perl_is_uni_alnum(pTHX_ U32 c)
bool
Perl_is_uni_alnumc(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_alnumc(tmpbuf);
}
@@ -609,7 +617,7 @@ Perl_is_uni_alnumc(pTHX_ U32 c)
bool
Perl_is_uni_idfirst(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_idfirst(tmpbuf);
}
@@ -617,7 +625,7 @@ Perl_is_uni_idfirst(pTHX_ U32 c)
bool
Perl_is_uni_alpha(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_alpha(tmpbuf);
}
@@ -625,7 +633,7 @@ Perl_is_uni_alpha(pTHX_ U32 c)
bool
Perl_is_uni_ascii(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_ascii(tmpbuf);
}
@@ -633,7 +641,7 @@ Perl_is_uni_ascii(pTHX_ U32 c)
bool
Perl_is_uni_space(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_space(tmpbuf);
}
@@ -641,7 +649,7 @@ Perl_is_uni_space(pTHX_ U32 c)
bool
Perl_is_uni_digit(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_digit(tmpbuf);
}
@@ -649,7 +657,7 @@ Perl_is_uni_digit(pTHX_ U32 c)
bool
Perl_is_uni_upper(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_upper(tmpbuf);
}
@@ -657,7 +665,7 @@ Perl_is_uni_upper(pTHX_ U32 c)
bool
Perl_is_uni_lower(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_lower(tmpbuf);
}
@@ -665,7 +673,7 @@ Perl_is_uni_lower(pTHX_ U32 c)
bool
Perl_is_uni_cntrl(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_cntrl(tmpbuf);
}
@@ -673,7 +681,7 @@ Perl_is_uni_cntrl(pTHX_ U32 c)
bool
Perl_is_uni_graph(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_graph(tmpbuf);
}
@@ -681,7 +689,7 @@ Perl_is_uni_graph(pTHX_ U32 c)
bool
Perl_is_uni_print(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_print(tmpbuf);
}
@@ -689,7 +697,7 @@ Perl_is_uni_print(pTHX_ U32 c)
bool
Perl_is_uni_punct(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_punct(tmpbuf);
}
@@ -697,7 +705,7 @@ Perl_is_uni_punct(pTHX_ U32 c)
bool
Perl_is_uni_xdigit(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return is_utf8_xdigit(tmpbuf);
}
@@ -705,7 +713,7 @@ Perl_is_uni_xdigit(pTHX_ U32 c)
U32
Perl_to_uni_upper(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return to_utf8_upper(tmpbuf);
}
@@ -713,7 +721,7 @@ Perl_to_uni_upper(pTHX_ U32 c)
U32
Perl_to_uni_title(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return to_utf8_title(tmpbuf);
}
@@ -721,7 +729,7 @@ Perl_to_uni_title(pTHX_ U32 c)
U32
Perl_to_uni_lower(pTHX_ U32 c)
{
- U8 tmpbuf[UTF8_MAXLEN];
+ U8 tmpbuf[UTF8_MAXLEN+1];
uv_to_utf8(tmpbuf, (UV)c);
return to_utf8_lower(tmpbuf);
}