summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-12-16 16:12:57 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-12-16 16:12:57 +0000
commit3d401ffb834607a79a2bd57f63376985af7c39c2 (patch)
tree2aa99cffcc5ff833e603fc3c1b5c1f9f09473772 /utf8.c
parent6bd23935f1e5b7c6f7035440f4b48e4c9659d61f (diff)
downloadperl-3d401ffb834607a79a2bd57f63376985af7c39c2.tar.gz
Disallow also Unicode ranges 0xFDD0..0xFDEF and
0xFFFE..0xFFFF. Ranges 0x...FFFE..0x...FFFF in general, and characters beyond 0x10FFF should be disallowed, too, but some tests would need changing, but more importantly some APIs would need remodeling since one can easily generate such characters either by bitwise complements, tr complements, or v-strings. p4raw-id: //depot/perl@13722
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c7
1 files changed, 5 insertions, 2 deletions
diff --git a/utf8.c b/utf8.c
index 517b2e37ad..0979506f76 100644
--- a/utf8.c
+++ b/utf8.c
@@ -46,12 +46,15 @@ is the recommended Unicode-aware way of saying
U8 *
Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
{
+ if (UNICODE_IS_SURROGATE(uv))
+ Perl_croak(aTHX_ "UTF-16 surrogate 0x%04"UVxf, uv);
+ else if ((uv >= 0xFDD0 && uv <= 0xFDEF) ||
+ (uv == 0xFFFE || uv == 0xFFFF))
+ Perl_croak(aTHX_ "Unicode character 0x%04"UVxf" is illegal", uv);
if (UNI_IS_INVARIANT(uv)) {
*d++ = UTF_TO_NATIVE(uv);
return d;
}
- if (UNICODE_IS_SURROGATE(uv))
- Perl_croak(aTHX_ "UTF-16 surrogate 0x%04"UVxf, uv);
#if defined(EBCDIC)
else {
STRLEN len = UNISKIP(uv);