summaryrefslogtreecommitdiff
path: root/regcharclass.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-03-12 14:11:58 -0600
committerKarl Williamson <public@khwilliamson.com>2014-03-12 14:32:36 -0600
commit8842e230bdaeef0ab4af48b7d7a9b29ee40b3890 (patch)
treebf7aae8012cfb9fd7c9072bedf942f3a18463df2 /regcharclass.h
parent5d9574c10ca04dcefc1cac5441cb321f7bb4cc37 (diff)
downloadperl-8842e230bdaeef0ab4af48b7d7a9b29ee40b3890.tar.gz
regcomp.c: Don't read past string-end
In doing an audit of regcomp.c, and experimenting using Encode::_utf8_on(), I found this one instance of a regen/regcharclass.pl macro that could read beyond the end of the string if given malformed UTF-8. Hence we convert to use the 'safe' form. There are no other uses of the non-safe version, so don't need to generate them.
Diffstat (limited to 'regcharclass.h')
-rw-r--r--regcharclass.h26
1 files changed, 0 insertions, 26 deletions
diff --git a/regcharclass.h b/regcharclass.h
index b0f635d410..5e34ec0d91 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -723,15 +723,6 @@
\p{PatWS}
*/
/*** GENERATED CODE ***/
-#define is_PATWS(s,is_utf8) \
-( ( ( 0x09 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\
-: ( is_utf8 ) ? \
- ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \
- ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \
- : ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0x8E || ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\
-: ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) )
-
-/*** GENERATED CODE ***/
#define is_PATWS_safe(s,e,is_utf8) \
( ((e) > (s)) ? \
( ( ( 0x09 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\
@@ -745,14 +736,6 @@
: 0 )
/*** GENERATED CODE ***/
-#define is_PATWS_non_low(s,is_utf8) \
-( ( is_utf8 ) ? \
- ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? \
- ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 ) \
- : ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0x8E || ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\
-: ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) )
-
-/*** GENERATED CODE ***/
#define is_PATWS_non_low_safe(s,e,is_utf8) \
( ((e) > (s)) ? \
( (! is_utf8) ? \
@@ -773,15 +756,6 @@
( 0x200F == NATIVE_TO_UNI(cp) || ( 0x200F < NATIVE_TO_UNI(cp) && \
( 0x2028 == NATIVE_TO_UNI(cp) || 0x2029 == NATIVE_TO_UNI(cp) ) ) ) ) ) ) ) ) ) ) )
-/*** GENERATED CODE ***/
-#define is_PATWS_cp(cp) \
-( ( 0x09 <= NATIVE_TO_UNI(cp) && NATIVE_TO_UNI(cp) <= 0x0D ) || ( 0x0D < NATIVE_TO_UNI(cp) &&\
-( 0x20 == NATIVE_TO_UNI(cp) || ( 0x20 < NATIVE_TO_UNI(cp) && \
-( 0x85 == NATIVE_TO_UNI(cp) || ( 0x85 < NATIVE_TO_UNI(cp) && \
-( 0x200E == NATIVE_TO_UNI(cp) || ( 0x200E < NATIVE_TO_UNI(cp) && \
-( 0x200F == NATIVE_TO_UNI(cp) || ( 0x200F < NATIVE_TO_UNI(cp) && \
-( 0x2028 == NATIVE_TO_UNI(cp) || 0x2029 == NATIVE_TO_UNI(cp) ) ) ) ) ) ) ) ) ) ) )
-
#endif /* H_REGCHARCLASS */