diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-01-09 12:37:03 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-01-09 19:29:02 -0700 |
commit | 0876b9a01ce95023535c197900c755d5c98d616f (patch) | |
tree | 7da60e7cc7ebd37721babdc7b4466a35a2f7e596 /utf8.c | |
parent | bcd05b946b5a96ca0b2734a1042bda6af592c97b (diff) | |
download | perl-0876b9a01ce95023535c197900c755d5c98d616f.tar.gz |
Add check_utf8_print()
This new function looks for problematic code points on output, and warns if any
are found, returning FALSE as well.
What it warns about may change, so is marked as experimental.
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 48 |
1 files changed, 48 insertions, 0 deletions
@@ -2649,6 +2649,54 @@ U32 flags) return UNI_TO_NATIVE(uv); } +bool +Perl_check_utf8_print(pTHX_ register const U8* s, const STRLEN len) +{ + /* May change: warns if surrogates, non-character code points, or + * non-Unicode code points are in s which has length len. Returns TRUE if + * none found; FALSE otherwise. The only other validity check is to make + * sure that this won't exceed the string's length */ + + const U8* const e = s + len; + bool ok = TRUE; + + PERL_ARGS_ASSERT_CHECK_UTF8_PRINT; + + while (s < e) { + if (UTF8SKIP(s) > len) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), + "%s in %s", unees, PL_op ? OP_DESC(PL_op) : "print"); + return FALSE; + } + if (*s >= UTF8_FIRST_PROBLEMATIC_CODE_POINT_FIRST_BYTE) { + STRLEN char_len; + if (UTF8_IS_SUPER(s)) { + UV uv = utf8_to_uvchr(s, &char_len); + Perl_warner(aTHX_ packWARN(WARN_UTF8), + "Code point 0x%04"UVXf" is not Unicode, may not be portable", uv); + ok = FALSE; + } + else if (UTF8_IS_SURROGATE(s)) { + UV uv = utf8_to_uvchr(s, &char_len); + Perl_warner(aTHX_ packWARN(WARN_UTF8), + "Unicode surrogate U+%04"UVXf" is illegal in UTF-8", uv); + ok = FALSE; + } + else if + (UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s)) + { + UV uv = utf8_to_uvchr(s, &char_len); + Perl_warner(aTHX_ packWARN(WARN_UTF8), + "Unicode non-character U+%04"UVXf" is illegal for open interchange", uv); + ok = FALSE; + } + } + s += UTF8SKIP(s); + } + + return ok; +} + /* =for apidoc pv_uni_display |