summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2017-08-05 21:47:08 -0600
committerKarl Williamson <khw@cpan.org>2017-08-05 22:10:05 -0600
commitd819dc506b9fbd0d9bb316e42ca5bbefdd5f1d77 (patch)
tree085700c1685076094cc365f0d6e8bddb4ae79daf
parent3b890f763b669e3019e714e174a3d3dfc5075f55 (diff)
downloadperl-d819dc506b9fbd0d9bb316e42ca5bbefdd5f1d77.tar.gz
utf8_to_uvchr() EBCDIC fix
This fixes a warning message for EBCDIC. The native character set is different than Unicode, and needs special handling. I earlier tried to save an #ifdef, but the resulting warning was hard to test right, and that helped convince me that it would be confusing to anyone trying to make sense of the message. So, in goes the #ifdef.
-rw-r--r--ext/XS-APItest/t/utf8_warn_base.pl2
-rw-r--r--utf8.c16
2 files changed, 12 insertions, 6 deletions
diff --git a/ext/XS-APItest/t/utf8_warn_base.pl b/ext/XS-APItest/t/utf8_warn_base.pl
index 07652e8353..0f781c4002 100644
--- a/ext/XS-APItest/t/utf8_warn_base.pl
+++ b/ext/XS-APItest/t/utf8_warn_base.pl
@@ -121,7 +121,7 @@ my @tests;
],
[ "overlong malformation, highest 2-byte",
(isASCII) ? "\xc1\xbf" : I8_to_native("\xc4\xbf"),
- (isASCII) ? 0x7F : 0x9F, # Output as U+, so no need to xlate
+ (isASCII) ? 0x7F : 0xFF,
],
[ "overlong malformation, lowest 3-byte",
(isASCII) ? "\xe0\x80\x80" : I8_to_native("\xe0\xa0\xa0"),
diff --git a/utf8.c b/utf8.c
index 93cdd66f35..c24baeb2f2 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1875,11 +1875,17 @@ Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
}
else {
U8 tmpbuf[UTF8_MAXBYTES+1];
- const U8 * const e = uvoffuni_to_utf8_flags(tmpbuf,
- uv, 0);
- const char * preface = (uv <= PERL_UNICODE_MAX)
- ? "U+"
- : "0x";
+ const U8 * const e = uvchr_to_utf8_flags(tmpbuf,
+ uv, 0);
+ /* Don't use U+ for non-Unicode code points, which
+ * includes those in the Latin1 range */
+ const char * preface = ( uv > PERL_UNICODE_MAX
+#ifdef EBCDIC
+ || uv <= 0xFF
+#endif
+ )
+ ? "0x"
+ : "U+";
message = Perl_form(aTHX_
"%s: %s (overlong; instead use %s to represent"
" %s%0*" UVXf ")",