diff options
author | Yves Orton <demerphq@gmail.com> | 2007-04-24 18:46:05 +0200 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2007-04-26 10:23:30 +0000 |
commit | 32e6a07c84b153f78f946de50870bc0ee030624f (patch) | |
tree | 80c02123a3e592ec2c9397c178cd62e38136d870 /regcharclass.h | |
parent | 0f68039566ac464bc1d4ff8f5b574153a1f6e9e9 (diff) | |
download | perl-32e6a07c84b153f78f946de50870bc0ee030624f.tar.gz |
Re: Analysis of problems with mixed encoding case insensitive matches in regex engine.
Message-ID: <9b18b3110704240746u461e4bdcl208ef7d7f9c5ef64@mail.gmail.com>
p4raw-id: //depot/perl@31081
Diffstat (limited to 'regcharclass.h')
-rw-r--r-- | regcharclass.h | 83 |
1 files changed, 68 insertions, 15 deletions
diff --git a/regcharclass.h b/regcharclass.h index 40d21bf5ff..8425693b0b 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -9,7 +9,7 @@ * * !!!!!!! DO NOT EDIT THIS FILE !!!!!!! * This file is built by Porting/regcharclass.pl. - * (Generated at: Mon Apr 23 15:30:51 2007 GMT) + * (Generated at: Tue Apr 24 12:19:13 2007 GMT) * Any changes made here will be lost! */ @@ -105,9 +105,9 @@ /*** GENERATED CODE ***/ #define is_LNBREAK_cp(cp) \ -( (0x0A <= cp && cp <= 0x0D) || ( cp > 13 && \ -( cp == 0x85 || ( cp > 133 && \ -( cp == 0x2028 || ( cp > 8232 && \ +( (0x0A <= cp && cp <= 0x0D) ||( cp > 0x0D && \ +( cp == 0x85 ||( cp > 0x85 && \ +( cp == 0x2028 ||( cp > 0x2028 && \ cp == 0x2029 ) ) ) ) ) ) /* @@ -227,14 +227,14 @@ cp == 0x2029 ) ) ) ) ) ) /*** GENERATED CODE ***/ #define is_HORIZWS_cp(cp) \ -( cp == 0x09 || ( cp > 9 && \ -( cp == 0x20 || ( cp > 32 && \ -( cp == 0xA0 || ( cp > 160 && \ -( cp == 0x1680 || ( cp > 5760 && \ -( cp == 0x180E || ( cp > 6158 && \ -( (0x2000 <= cp && cp <= 0x200A) || ( cp > 8202 && \ -( cp == 0x202F || ( cp > 8239 && \ -( cp == 0x205F || ( cp > 8287 && \ +( cp == 0x09 ||( cp > 0x09 && \ +( cp == 0x20 ||( cp > 0x20 && \ +( cp == 0xA0 ||( cp > 0xA0 && \ +( cp == 0x1680 ||( cp > 0x1680 && \ +( cp == 0x180E ||( cp > 0x180E && \ +( (0x2000 <= cp && cp <= 0x200A) ||( cp > 0x200A && \ +( cp == 0x202F ||( cp > 0x202F && \ +( cp == 0x205F ||( cp > 0x205F && \ cp == 0x3000 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) /* @@ -310,9 +310,62 @@ cp == 0x3000 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) /*** GENERATED CODE ***/ #define is_VERTWS_cp(cp) \ -( (0x0A <= cp && cp <= 0x0D) || ( cp > 13 && \ -( cp == 0x85 || ( cp > 133 && \ -( cp == 0x2028 || ( cp > 8232 && \ +( (0x0A <= cp && cp <= 0x0D) ||( cp > 0x0D && \ +( cp == 0x85 ||( cp > 0x85 && \ +( cp == 0x2028 ||( cp > 0x2028 && \ cp == 0x2029 ) ) ) ) ) ) +/* + TRICKYFOLD: Problematic fold case letters. + + 0x00DF # LATIN SMALL LETTER SHARP S + 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS + 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +*/ +/*** GENERATED CODE ***/ +#define is_TRICKYFOLD(s,is_utf8) \ +( (is_utf8) ? \ + ( ( ((U8*)s)[0] == 0xC3 ) ? \ + ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \ + ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) :\ + ( ((U8*)s)[0] == 0xDF ) ) + +/*** GENERATED CODE ***/ +#define is_TRICKYFOLD_safe(s,e,is_utf8) \ +( ( (e) - (s) > 1 ) ? \ +( (is_utf8) ? \ + ( ( ((U8*)s)[0] == 0xC3 ) ? \ + ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \ + ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) :\ + ( ((U8*)s)[0] == 0xDF ) ) : \ +((( (e) - (s) > 0 ) && (!is_utf8)) ? ( ((U8*)s)[0] == 0xDF ) : 0) ) + +/*** GENERATED CODE ***/ +#define is_TRICKYFOLD_utf8(s) \ +( ( ((U8*)s)[0] == 0xC3 ) ? \ + ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \ + ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) + +/*** GENERATED CODE ***/ +#define is_TRICKYFOLD_utf8_safe(s,e) \ +( ( (e) - (s) > 1 ) ? \ + ( ( ((U8*)s)[0] == 0xC3 ) ? \ + ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \ + ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) : 0 ) + +/*** GENERATED CODE ***/ +#define is_TRICKYFOLD_latin1(s) \ +( ((U8*)s)[0] == 0xDF ) + +/*** GENERATED CODE ***/ +#define is_TRICKYFOLD_latin1_safe(s,e) \ +( ( (e) - (s) > 0 ) ? \ + ( ((U8*)s)[0] == 0xDF ) : 0 ) + +/*** GENERATED CODE ***/ +#define is_TRICKYFOLD_cp(cp) \ +( cp == 0xDF ||( cp > 0xDF && \ +( cp == 0x390 ||( cp > 0x390 && \ +cp == 0x3B0 ) ) ) ) + /* ex: set ro: */ |