summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-01-09 12:37:03 -0700
committerKarl Williamson <public@khwilliamson.com>2011-01-09 19:29:02 -0700
commit0876b9a01ce95023535c197900c755d5c98d616f (patch)
tree7da60e7cc7ebd37721babdc7b4466a35a2f7e596 /utf8.c
parentbcd05b946b5a96ca0b2734a1042bda6af592c97b (diff)
downloadperl-0876b9a01ce95023535c197900c755d5c98d616f.tar.gz
Add check_utf8_print()
This new function looks for problematic code points on output, and warns if any are found, returning FALSE as well. What it warns about may change, so is marked as experimental.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c48
1 files changed, 48 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index 266cb9e5a4..9cc4d8963c 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2649,6 +2649,54 @@ U32 flags)
return UNI_TO_NATIVE(uv);
}
+bool
+Perl_check_utf8_print(pTHX_ register const U8* s, const STRLEN len)
+{
+ /* May change: warns if surrogates, non-character code points, or
+ * non-Unicode code points are in s which has length len. Returns TRUE if
+ * none found; FALSE otherwise. The only other validity check is to make
+ * sure that this won't exceed the string's length */
+
+ const U8* const e = s + len;
+ bool ok = TRUE;
+
+ PERL_ARGS_ASSERT_CHECK_UTF8_PRINT;
+
+ while (s < e) {
+ if (UTF8SKIP(s) > len) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+ "%s in %s", unees, PL_op ? OP_DESC(PL_op) : "print");
+ return FALSE;
+ }
+ if (*s >= UTF8_FIRST_PROBLEMATIC_CODE_POINT_FIRST_BYTE) {
+ STRLEN char_len;
+ if (UTF8_IS_SUPER(s)) {
+ UV uv = utf8_to_uvchr(s, &char_len);
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),
+ "Code point 0x%04"UVXf" is not Unicode, may not be portable", uv);
+ ok = FALSE;
+ }
+ else if (UTF8_IS_SURROGATE(s)) {
+ UV uv = utf8_to_uvchr(s, &char_len);
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),
+ "Unicode surrogate U+%04"UVXf" is illegal in UTF-8", uv);
+ ok = FALSE;
+ }
+ else if
+ (UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s))
+ {
+ UV uv = utf8_to_uvchr(s, &char_len);
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),
+ "Unicode non-character U+%04"UVXf" is illegal for open interchange", uv);
+ ok = FALSE;
+ }
+ }
+ s += UTF8SKIP(s);
+ }
+
+ return ok;
+}
+
/*
=for apidoc pv_uni_display