summaryrefslogtreecommitdiff
path: root/cpan
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-08-19 14:07:53 -0600
committerKarl Williamson <khw@cpan.org>2016-08-19 14:41:00 -0600
commitc8247c27c13d1cf152398e453793a91916d2185d (patch)
tree5d1ee37280e9b5e283040ccb30cf5d15f7936654 /cpan
parent675c73ca278d0bfeffeeb2a3f4cdea82e8b8b8c4 (diff)
downloadperl-c8247c27c13d1cf152398e453793a91916d2185d.tar.gz
Encode: revert commit that introduces security holes
This reverts a portion of commit 0f33e03c7e91f63bcd07b5ddfc00101715fa1fc0 which introduces some security holes in checking for UTF-8 malformations. In particular, it allows overflow in non-strict mode, and overlongs in either mode. See discussion at https://github.com/dankogai/p5-encode/issues/64 This reversion is to make sure that we don't release even a development version with known security holes. A final disposition is still to be determined
Diffstat (limited to 'cpan')
-rw-r--r--cpan/Encode/Encode.xs50
1 files changed, 12 insertions, 38 deletions
diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs
index 6b4fae9f64..222f39b2ea 100644
--- a/cpan/Encode/Encode.xs
+++ b/cpan/Encode/Encode.xs
@@ -318,39 +318,6 @@ strict_utf8(pTHX_ SV* sv)
return SvTRUE(*svp);
}
-/*
- * https://github.com/dankogai/p5-encode/pull/56#issuecomment-231959126
- */
-#ifndef UNICODE_IS_NONCHAR
-#define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE)
-#endif
-
-static UV
-convert_utf8_multi_seq(U8* s, STRLEN len, bool strict)
-{
- UV uv;
-
- if (strict && len > 4)
- return 0;
-
- uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len);
-
- len--;
- s++;
-
- while (len--) {
- if (!UTF8_IS_CONTINUATION(*s))
- return 0;
- uv = UTF8_ACCUMULATE(uv, *s);
- s++;
- }
-
- if (strict && (UNICODE_IS_SURROGATE(uv) || UNICODE_IS_NONCHAR(uv) || uv > PERL_UNICODE_MAX))
- return 0;
-
- return uv;
-}
-
static U8*
process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
bool encode, bool strict, bool stop_at_partial)
@@ -399,12 +366,19 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
goto malformed_byte;
}
- ulen = skip;
- uv = convert_utf8_multi_seq(s, skip, strict);
- if (uv == 0) {
+ uv = utf8n_to_uvuni(s, e - s, &ulen,
+ UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT :
+ UTF8_ALLOW_NONSTRICT)
+ );
+#if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */
+ if (strict && uv > PERL_UNICODE_MAX)
+ ulen = (STRLEN) -1;
+#endif
+ if (ulen == (STRLEN) -1) {
if (strict) {
- uv = convert_utf8_multi_seq(s, skip, 0);
- if (uv == 0)
+ uv = utf8n_to_uvuni(s, e - s, &ulen,
+ UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT);
+ if (ulen == (STRLEN) -1)
goto malformed_byte;
goto malformed;
}