Make using U+FDD0..U+FDEF (noncharacters since Unicode 3.1),

U+...FFFE, U+...FFFF, and characters beyond U+10FFFF (the Unicode maximum code point) warnable offenses. p4raw-id: //depot/perl@13823
author: Jarkko Hietaniemi <jhi@iki.fi> 2001-12-21 00:54:49 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2001-12-21 00:54:49 +0000
commit: b851fbc1add6c3d9fa6158884279133c311a3efc (patch)
tree: 87e5be7072ad6e52eb1cfd85805e3c7ba034dab6 /utf8.c
parent: 70b75758f8fb2cc713010bb8bc05cd986151b9b2 (diff)
download: perl-b851fbc1add6c3d9fa6158884279133c311a3efc.tar.gz
1 files changed, 38 insertions, 7 deletions
diff --git a/utf8.c b/utf8.c
index 81af39735a..debfb9ceac 100644
--- a/utf8.c
+++ b/utf8.c
@@ -27,15 +27,23 @@
 /* Unicode support */
 
 /*
-=for apidoc A|U8 *|uvuni_to_utf8|U8 *d|UV uv
+=for apidoc A|U8 *|uvuni_to_utf8_flags|U8 *d|UV uv|UV flags
 
 Adds the UTF8 representation of the Unicode codepoint C<uv> to the end
 of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free
 bytes available. The return value is the pointer to the byte after the
 end of the new character. In other words,
 
+    d = uvuni_to_utf8_flags(d, uv, flags);
+
+or, in most cases,
+
     d = uvuni_to_utf8(d, uv);
 
+(which is equivalent to)
+
+    d = uvuni_to_utf8_flags(d, uv, 0);
+
 is the recommended Unicode-aware way of saying
 
     *(d++) = uv;
@@ -44,13 +52,26 @@ is the recommended Unicode-aware way of saying
 */
 
 U8 *
-Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
+Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
 {
     if (ckWARN_d(WARN_UTF8)) {
-	 if (UNICODE_IS_SURROGATE(uv))
+	 if (UNICODE_IS_SURROGATE(uv) &&
+	     !(flags & UNICODE_ALLOW_SURROGATE))
 	      Perl_warner(aTHX_ WARN_UTF8, "UTF-16 surrogate 0x%04"UVxf, uv);
-	 else if ((uv >= 0xFDD0 && uv <= 0xFDEF) ||
-		  (uv == 0xFFFE || uv == 0xFFFF))
+	 else if (
+		  ((uv >= 0xFDD0 && uv <= 0xFDEF &&
+		    !(flags & UNICODE_ALLOW_FDD0))
+		   ||
+		   ((uv & 0xFFFF) == 0xFFFE &&
+		    !(flags & UNICODE_ALLOW_FFFE))
+		   ||
+		   ((uv & 0xFFFF) == 0xFFFF &&
+		    !(flags & UNICODE_ALLOW_FFFF))) &&
+		  /* UNICODE_ALLOW_SUPER includes
+		   * FFFEs and FFFFs beyond 0x10FFFF. */
+		  ((uv <= PERL_UNICODE_MAX) ||
+		   !(flags & UNICODE_ALLOW_SUPER))
+		  )
 	      Perl_warner(aTHX_ WARN_UTF8,
 			 "Unicode character 0x%04"UVxf" is illegal", uv);
     }
@@ -138,7 +159,12 @@ Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
 #endif
 #endif /* Loop style */
 }
-
+ 
+U8 *
+Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
+{
+    return Perl_uvuni_to_utf8_flags(aTHX_ d, uv, 0);
+}
 
 
 /*
@@ -1544,9 +1570,14 @@ is the recommended wide native character-aware way of saying
 U8 *
 Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv)
 {
-    return Perl_uvuni_to_utf8(aTHX_ d, NATIVE_TO_UNI(uv));
+    return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0);
 }
 
+U8 *
+Perl_uvchr_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+{
+    return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), flags);
+}
 
 /*
 =for apidoc A|UV|utf8n_to_uvchr|U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags
author	Jarkko Hietaniemi <jhi@iki.fi>	2001-12-21 00:54:49 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2001-12-21 00:54:49 +0000
commit	b851fbc1add6c3d9fa6158884279133c311a3efc (patch)
tree	87e5be7072ad6e52eb1cfd85805e3c7ba034dab6 /utf8.c
parent	70b75758f8fb2cc713010bb8bc05cd986151b9b2 (diff)
download	perl-b851fbc1add6c3d9fa6158884279133c311a3efc.tar.gz