diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-12-16 16:12:57 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-12-16 16:12:57 +0000 |
commit | 3d401ffb834607a79a2bd57f63376985af7c39c2 (patch) | |
tree | 2aa99cffcc5ff833e603fc3c1b5c1f9f09473772 /utf8.c | |
parent | 6bd23935f1e5b7c6f7035440f4b48e4c9659d61f (diff) | |
download | perl-3d401ffb834607a79a2bd57f63376985af7c39c2.tar.gz |
Disallow also Unicode ranges 0xFDD0..0xFDEF and
0xFFFE..0xFFFF. Ranges 0x...FFFE..0x...FFFF in general,
and characters beyond 0x10FFF should be disallowed, too,
but some tests would need changing, but more importantly some
APIs would need remodeling since one can easily generate such
characters either by bitwise complements, tr complements, or
v-strings.
p4raw-id: //depot/perl@13722
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 7 |
1 files changed, 5 insertions, 2 deletions
@@ -46,12 +46,15 @@ is the recommended Unicode-aware way of saying U8 * Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) { + if (UNICODE_IS_SURROGATE(uv)) + Perl_croak(aTHX_ "UTF-16 surrogate 0x%04"UVxf, uv); + else if ((uv >= 0xFDD0 && uv <= 0xFDEF) || + (uv == 0xFFFE || uv == 0xFFFF)) + Perl_croak(aTHX_ "Unicode character 0x%04"UVxf" is illegal", uv); if (UNI_IS_INVARIANT(uv)) { *d++ = UTF_TO_NATIVE(uv); return d; } - if (UNICODE_IS_SURROGATE(uv)) - Perl_croak(aTHX_ "UTF-16 surrogate 0x%04"UVxf, uv); #if defined(EBCDIC) else { STRLEN len = UNISKIP(uv); |