summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-12-10 12:51:59 -0700
committerKarl Williamson <khw@cpan.org>2016-12-23 16:48:35 -0700
commit2d532c27c843a85ae0a9743642866ef4b70d1323 (patch)
tree66974e06132fbeffeedd20a97fc2d685d7acfc49 /utf8.h
parentf180b2926a9378db829862d88921feefe2460d35 (diff)
downloadperl-2d532c27c843a85ae0a9743642866ef4b70d1323.tar.gz
utf8.h: Don't allow zero length malformation unless requested
The bottom level Perl routine that decodes UTF-8 into a code point has long accepted inputs where the length is specified to be 0, returning a NUL. It considers this a malformation, which is accepted in some scenarios, but not others. In consultation with Tony Cook, we decided this really isn't a malformation, but is a bug in the calling program. Rather than call the decode routine when it has nothing to decode, it should just not call it. This commit removes the acceptance of a zero length string from any of the canned flag combinations passed to the decode function. One can convert to specify this flag explicitly, if necessary. However the next commit will cause this to fail under DEBUGGING builds, as a step towards removing the capability altogether.
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h16
1 files changed, 10 insertions, 6 deletions
diff --git a/utf8.h b/utf8.h
index 3da10e055c..a4cae099d8 100644
--- a/utf8.h
+++ b/utf8.h
@@ -787,12 +787,16 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
#define UTF8_WARN_ILLEGAL_INTERCHANGE \
(UTF8_WARN_ILLEGAL_C9_INTERCHANGE|UTF8_WARN_NONCHAR)
-#define UTF8_ALLOW_ANY \
- (~( UTF8_DISALLOW_ILLEGAL_INTERCHANGE|UTF8_DISALLOW_ABOVE_31_BIT \
- |UTF8_WARN_ILLEGAL_INTERCHANGE|UTF8_WARN_ABOVE_31_BIT))
-#define UTF8_ALLOW_ANYUV UTF8_ALLOW_EMPTY
-#define UTF8_ALLOW_DEFAULT (ckWARN(WARN_UTF8) ? 0 : \
- UTF8_ALLOW_ANYUV)
+#define UTF8_ALLOW_ANY ( UTF8_ALLOW_CONTINUATION \
+ |UTF8_ALLOW_NON_CONTINUATION \
+ |UTF8_ALLOW_SHORT \
+ |UTF8_ALLOW_LONG)
+
+/* Accept any Perl-extended UTF-8 that evaluates to any UV on the platform, but
+ * not any malformed. This is the default. (Note that UVs above IV_MAX are
+ * deprecated. */
+#define UTF8_ALLOW_ANYUV 0
+#define UTF8_ALLOW_DEFAULT UTF8_ALLOW_ANYUV
/*
=for apidoc Am|bool|UTF8_IS_SURROGATE|const U8 *s|const U8 *e