summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-02-11 14:20:56 -0700
committerKarl Williamson <public@khwilliamson.com>2012-02-11 14:35:46 -0700
commit768483871f7d05689a92ec84d2182a1b6e3c0516 (patch)
tree61f7ee908b7ff0c498a272236a3d42160038df4e /utf8.c
parent492a624f4a0c250e011c6b74a3403bfc885ec961 (diff)
downloadperl-768483871f7d05689a92ec84d2182a1b6e3c0516.tar.gz
Deprecate is_utf8_char()
This function assumes that there is enough space in the buffer to read however many bytes are indicated by the first byte in the alleged UTF-8 encoded string. This may not be true, and so it can read beyond the buffer end. is_utf8_char_buf() should be used instead.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c10
1 files changed, 7 insertions, 3 deletions
diff --git a/utf8.c b/utf8.c
index 2e0429e476..5c1f7c0338 100644
--- a/utf8.c
+++ b/utf8.c
@@ -355,21 +355,25 @@ Perl_is_utf8_char_buf(const U8 *buf, const U8* buf_end)
/*
=for apidoc is_utf8_char
+DEPRECATED!
+
Tests if some arbitrary number of bytes begins in a valid UTF-8
character. Note that an INVARIANT (i.e. ASCII on non-EBCDIC machines)
character is a valid UTF-8 character. The actual number of bytes in the UTF-8
character will be returned if it is valid, otherwise 0.
-WARNING: use only if you *know* that C<s> has at least either UTF8_MAXBYTES or
-UTF8SKIP(s) bytes.
+This function is deprecated due to the possibility that malformed input could
+cause reading beyond the end of the input buffer. Use C<is_utf8_char_buf>
+instead.
=cut */
+
STRLEN
Perl_is_utf8_char(const U8 *s)
{
PERL_ARGS_ASSERT_IS_UTF8_CHAR;
- /* Assumes we have enough space */
+ /* Assumes we have enough space, which is why this is deprecated */
return is_utf8_char_buf(s, s + UTF8SKIP(s));
}