summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-12-21 00:54:49 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-12-21 00:54:49 +0000
commitb851fbc1add6c3d9fa6158884279133c311a3efc (patch)
tree87e5be7072ad6e52eb1cfd85805e3c7ba034dab6 /utf8.c
parent70b75758f8fb2cc713010bb8bc05cd986151b9b2 (diff)
downloadperl-b851fbc1add6c3d9fa6158884279133c311a3efc.tar.gz
Make using U+FDD0..U+FDEF (noncharacters since Unicode 3.1),
U+...FFFE, U+...FFFF, and characters beyond U+10FFFF (the Unicode maximum code point) warnable offenses. p4raw-id: //depot/perl@13823
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c45
1 files changed, 38 insertions, 7 deletions
diff --git a/utf8.c b/utf8.c
index 81af39735a..debfb9ceac 100644
--- a/utf8.c
+++ b/utf8.c
@@ -27,15 +27,23 @@
/* Unicode support */
/*
-=for apidoc A|U8 *|uvuni_to_utf8|U8 *d|UV uv
+=for apidoc A|U8 *|uvuni_to_utf8_flags|U8 *d|UV uv|UV flags
Adds the UTF8 representation of the Unicode codepoint C<uv> to the end
of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free
bytes available. The return value is the pointer to the byte after the
end of the new character. In other words,
+ d = uvuni_to_utf8_flags(d, uv, flags);
+
+or, in most cases,
+
d = uvuni_to_utf8(d, uv);
+(which is equivalent to)
+
+ d = uvuni_to_utf8_flags(d, uv, 0);
+
is the recommended Unicode-aware way of saying
*(d++) = uv;
@@ -44,13 +52,26 @@ is the recommended Unicode-aware way of saying
*/
U8 *
-Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
+Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
{
if (ckWARN_d(WARN_UTF8)) {
- if (UNICODE_IS_SURROGATE(uv))
+ if (UNICODE_IS_SURROGATE(uv) &&
+ !(flags & UNICODE_ALLOW_SURROGATE))
Perl_warner(aTHX_ WARN_UTF8, "UTF-16 surrogate 0x%04"UVxf, uv);
- else if ((uv >= 0xFDD0 && uv <= 0xFDEF) ||
- (uv == 0xFFFE || uv == 0xFFFF))
+ else if (
+ ((uv >= 0xFDD0 && uv <= 0xFDEF &&
+ !(flags & UNICODE_ALLOW_FDD0))
+ ||
+ ((uv & 0xFFFF) == 0xFFFE &&
+ !(flags & UNICODE_ALLOW_FFFE))
+ ||
+ ((uv & 0xFFFF) == 0xFFFF &&
+ !(flags & UNICODE_ALLOW_FFFF))) &&
+ /* UNICODE_ALLOW_SUPER includes
+ * FFFEs and FFFFs beyond 0x10FFFF. */
+ ((uv <= PERL_UNICODE_MAX) ||
+ !(flags & UNICODE_ALLOW_SUPER))
+ )
Perl_warner(aTHX_ WARN_UTF8,
"Unicode character 0x%04"UVxf" is illegal", uv);
}
@@ -138,7 +159,12 @@ Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
#endif
#endif /* Loop style */
}
-
+
+U8 *
+Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
+{
+ return Perl_uvuni_to_utf8_flags(aTHX_ d, uv, 0);
+}
/*
@@ -1544,9 +1570,14 @@ is the recommended wide native character-aware way of saying
U8 *
Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv)
{
- return Perl_uvuni_to_utf8(aTHX_ d, NATIVE_TO_UNI(uv));
+ return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0);
}
+U8 *
+Perl_uvchr_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+{
+ return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), flags);
+}
/*
=for apidoc A|UV|utf8n_to_uvchr|U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags