diff options
author | Karl Williamson <khw@cpan.org> | 2018-03-27 15:49:06 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-03-31 15:36:46 -0600 |
commit | a78c2fa63b9d2e6faac531a43f1ef9e09eb25d9b (patch) | |
tree | 933232109f02e62293ec490641a1a3697e447775 | |
parent | 4d3d8522adb2df78f6a8713029eb6b13b41094d9 (diff) | |
download | perl-a78c2fa63b9d2e6faac531a43f1ef9e09eb25d9b.tar.gz |
regexec.c: Check for UTF-8 fitting
We've been burned before by malformed UTF-8 causing us to read outside
the buffer bounds. Here is a case I saw during code inspection, and
it's easy to add the buffer end limit
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | proto.h | 4 | ||||
-rw-r--r-- | regexec.c | 11 |
4 files changed, 10 insertions, 9 deletions
@@ -2543,7 +2543,7 @@ ERp |bool |_is_grapheme |NN const U8 * strbeg|NN const U8 * s|NN const U8 *stren #endif #if defined(PERL_IN_REGEXEC_C) -ERs |bool |isFOO_utf8_lc |const U8 classnum|NN const U8* character +ERs |bool |isFOO_utf8_lc |const U8 classnum|NN const U8* character|NN const U8* e ERns |char * |find_next_ascii|NN char* s|NN const char * send|const bool is_utf8 ERns |char * |find_next_non_ascii|NN char* s|NN const char * send|const bool is_utf8 ERns |U8 * |find_next_masked|NN U8 * s \ @@ -1127,7 +1127,7 @@ #define find_next_non_ascii S_find_next_non_ascii #define find_span_end S_find_span_end #define find_span_end_mask S_find_span_end_mask -#define isFOO_utf8_lc(a,b) S_isFOO_utf8_lc(aTHX_ a,b) +#define isFOO_utf8_lc(a,b,c) S_isFOO_utf8_lc(aTHX_ a,b,c) #define isGCB(a,b,c,d,e) S_isGCB(aTHX_ a,b,c,d,e) #define isLB(a,b,c,d,e,f) S_isLB(aTHX_ a,b,c,d,e,f) #define isSB(a,b,c,d,e,f) S_isSB(aTHX_ a,b,c,d,e,f) @@ -5620,10 +5620,10 @@ STATIC U8 * S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, co #define PERL_ARGS_ASSERT_FIND_SPAN_END_MASK \ assert(s); assert(send) -STATIC bool S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character) +STATIC bool S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_ISFOO_UTF8_LC \ - assert(character) + assert(character); assert(e) STATIC bool S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strbeg, const U8 * const curpos, const bool utf8_target) __attribute__warn_unused_result__; @@ -494,7 +494,7 @@ Perl_isFOO_lc(pTHX_ const U8 classnum, const U8 character) #endif STATIC bool -S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character) +S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e) { /* Returns a boolean as to whether or not the (well-formed) UTF-8-encoded * 'character' is a member of the Posix character class given by 'classnum' @@ -516,7 +516,7 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character) EIGHT_BIT_UTF8_TO_NATIVE(*character, *(character + 1))); } - _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(character, character + UTF8SKIP(character)); + _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(character, e); switch ((_char_class_number) classnum) { case _CC_ENUM_SPACE: return is_XPERLSPACE_high(character); @@ -525,7 +525,7 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character) case _CC_ENUM_VERTSPACE: return is_VERTWS_high(character); default: return _invlist_contains_cp(PL_XPosix_ptrs[classnum], - valid_utf8_to_uvchr(character, NULL)); + utf8_to_uvchr_buf(character, e, NULL)); } return FALSE; /* Things like CNTRL are always below 256 */ @@ -2789,7 +2789,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, case POSIXL: _CHECK_AND_WARN_PROBLEMATIC_LOCALE; - REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s)), + REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s, (U8 *) strend)), to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s))); break; @@ -9512,7 +9512,8 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, } else { while (hardcount < max && scan < loceol && to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(p), - (U8 *) scan))) + (U8 *) scan, + (U8 *) loceol))) { scan += UTF8SKIP(scan); hardcount++; |