summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c48
1 files changed, 48 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index 266cb9e5a4..9cc4d8963c 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2649,6 +2649,54 @@ U32 flags)
return UNI_TO_NATIVE(uv);
}
+bool
+Perl_check_utf8_print(pTHX_ register const U8* s, const STRLEN len)
+{
+ /* May change: warns if surrogates, non-character code points, or
+ * non-Unicode code points are in s which has length len. Returns TRUE if
+ * none found; FALSE otherwise. The only other validity check is to make
+ * sure that this won't exceed the string's length */
+
+ const U8* const e = s + len;
+ bool ok = TRUE;
+
+ PERL_ARGS_ASSERT_CHECK_UTF8_PRINT;
+
+ while (s < e) {
+ if (UTF8SKIP(s) > len) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+ "%s in %s", unees, PL_op ? OP_DESC(PL_op) : "print");
+ return FALSE;
+ }
+ if (*s >= UTF8_FIRST_PROBLEMATIC_CODE_POINT_FIRST_BYTE) {
+ STRLEN char_len;
+ if (UTF8_IS_SUPER(s)) {
+ UV uv = utf8_to_uvchr(s, &char_len);
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),
+ "Code point 0x%04"UVXf" is not Unicode, may not be portable", uv);
+ ok = FALSE;
+ }
+ else if (UTF8_IS_SURROGATE(s)) {
+ UV uv = utf8_to_uvchr(s, &char_len);
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),
+ "Unicode surrogate U+%04"UVXf" is illegal in UTF-8", uv);
+ ok = FALSE;
+ }
+ else if
+ (UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s))
+ {
+ UV uv = utf8_to_uvchr(s, &char_len);
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),
+ "Unicode non-character U+%04"UVXf" is illegal for open interchange", uv);
+ ok = FALSE;
+ }
+ }
+ s += UTF8SKIP(s);
+ }
+
+ return ok;
+}
+
/*
=for apidoc pv_uni_display