diff options
author | Karl Williamson <public@khwilliamson.com> | 2014-03-12 14:11:58 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2014-03-12 14:32:36 -0600 |
commit | 8842e230bdaeef0ab4af48b7d7a9b29ee40b3890 (patch) | |
tree | bf7aae8012cfb9fd7c9072bedf942f3a18463df2 /regcharclass.h | |
parent | 5d9574c10ca04dcefc1cac5441cb321f7bb4cc37 (diff) | |
download | perl-8842e230bdaeef0ab4af48b7d7a9b29ee40b3890.tar.gz |
regcomp.c: Don't read past string-end
In doing an audit of regcomp.c, and experimenting using
Encode::_utf8_on(), I found this one instance of a regen/regcharclass.pl
macro that could read beyond the end of the string if given malformed
UTF-8. Hence we convert to use the 'safe' form. There are no other
uses of the non-safe version, so don't need to generate them.
Diffstat (limited to 'regcharclass.h')
-rw-r--r-- | regcharclass.h | 26 |
1 files changed, 0 insertions, 26 deletions
diff --git a/regcharclass.h b/regcharclass.h index b0f635d410..5e34ec0d91 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -723,15 +723,6 @@ \p{PatWS} */ /*** GENERATED CODE ***/ -#define is_PATWS(s,is_utf8) \ -( ( ( 0x09 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\ -: ( is_utf8 ) ? \ - ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ - : ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0x8E || ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\ -: ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ) - -/*** GENERATED CODE ***/ #define is_PATWS_safe(s,e,is_utf8) \ ( ((e) > (s)) ? \ ( ( ( 0x09 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\ @@ -745,14 +736,6 @@ : 0 ) /*** GENERATED CODE ***/ -#define is_PATWS_non_low(s,is_utf8) \ -( ( is_utf8 ) ? \ - ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \ - ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \ - : ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0x8E || ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\ -: ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ) - -/*** GENERATED CODE ***/ #define is_PATWS_non_low_safe(s,e,is_utf8) \ ( ((e) > (s)) ? \ ( (! is_utf8) ? \ @@ -773,15 +756,6 @@ ( 0x200F == NATIVE_TO_UNI(cp) || ( 0x200F < NATIVE_TO_UNI(cp) && \ ( 0x2028 == NATIVE_TO_UNI(cp) || 0x2029 == NATIVE_TO_UNI(cp) ) ) ) ) ) ) ) ) ) ) ) -/*** GENERATED CODE ***/ -#define is_PATWS_cp(cp) \ -( ( 0x09 <= NATIVE_TO_UNI(cp) && NATIVE_TO_UNI(cp) <= 0x0D ) || ( 0x0D < NATIVE_TO_UNI(cp) &&\ -( 0x20 == NATIVE_TO_UNI(cp) || ( 0x20 < NATIVE_TO_UNI(cp) && \ -( 0x85 == NATIVE_TO_UNI(cp) || ( 0x85 < NATIVE_TO_UNI(cp) && \ -( 0x200E == NATIVE_TO_UNI(cp) || ( 0x200E < NATIVE_TO_UNI(cp) && \ -( 0x200F == NATIVE_TO_UNI(cp) || ( 0x200F < NATIVE_TO_UNI(cp) && \ -( 0x2028 == NATIVE_TO_UNI(cp) || 0x2029 == NATIVE_TO_UNI(cp) ) ) ) ) ) ) ) ) ) ) ) - #endif /* H_REGCHARCLASS */ |