diff options
author | Karl Williamson <khw@cpan.org> | 2016-08-19 14:07:53 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-08-19 14:41:00 -0600 |
commit | c8247c27c13d1cf152398e453793a91916d2185d (patch) | |
tree | 5d1ee37280e9b5e283040ccb30cf5d15f7936654 /cpan | |
parent | 675c73ca278d0bfeffeeb2a3f4cdea82e8b8b8c4 (diff) | |
download | perl-c8247c27c13d1cf152398e453793a91916d2185d.tar.gz |
Encode: revert commit that introduces security holes
This reverts a portion of commit
0f33e03c7e91f63bcd07b5ddfc00101715fa1fc0 which introduces some security
holes in checking for UTF-8 malformations. In particular, it allows
overflow in non-strict mode, and overlongs in either mode.
See discussion at https://github.com/dankogai/p5-encode/issues/64
This reversion is to make sure that we don't release even a development
version with known security holes. A final disposition is still to be
determined
Diffstat (limited to 'cpan')
-rw-r--r-- | cpan/Encode/Encode.xs | 50 |
1 files changed, 12 insertions, 38 deletions
diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs index 6b4fae9f64..222f39b2ea 100644 --- a/cpan/Encode/Encode.xs +++ b/cpan/Encode/Encode.xs @@ -318,39 +318,6 @@ strict_utf8(pTHX_ SV* sv) return SvTRUE(*svp); } -/* - * https://github.com/dankogai/p5-encode/pull/56#issuecomment-231959126 - */ -#ifndef UNICODE_IS_NONCHAR -#define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE) -#endif - -static UV -convert_utf8_multi_seq(U8* s, STRLEN len, bool strict) -{ - UV uv; - - if (strict && len > 4) - return 0; - - uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len); - - len--; - s++; - - while (len--) { - if (!UTF8_IS_CONTINUATION(*s)) - return 0; - uv = UTF8_ACCUMULATE(uv, *s); - s++; - } - - if (strict && (UNICODE_IS_SURROGATE(uv) || UNICODE_IS_NONCHAR(uv) || uv > PERL_UNICODE_MAX)) - return 0; - - return uv; -} - static U8* process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, bool encode, bool strict, bool stop_at_partial) @@ -399,12 +366,19 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, goto malformed_byte; } - ulen = skip; - uv = convert_utf8_multi_seq(s, skip, strict); - if (uv == 0) { + uv = utf8n_to_uvuni(s, e - s, &ulen, + UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT : + UTF8_ALLOW_NONSTRICT) + ); +#if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */ + if (strict && uv > PERL_UNICODE_MAX) + ulen = (STRLEN) -1; +#endif + if (ulen == (STRLEN) -1) { if (strict) { - uv = convert_utf8_multi_seq(s, skip, 0); - if (uv == 0) + uv = utf8n_to_uvuni(s, e - s, &ulen, + UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT); + if (ulen == (STRLEN) -1) goto malformed_byte; goto malformed; } |