summaryrefslogtreecommitdiff
path: root/regcharclass.h
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2007-04-24 18:46:05 +0200
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2007-04-26 10:23:30 +0000
commit32e6a07c84b153f78f946de50870bc0ee030624f (patch)
tree80c02123a3e592ec2c9397c178cd62e38136d870 /regcharclass.h
parent0f68039566ac464bc1d4ff8f5b574153a1f6e9e9 (diff)
downloadperl-32e6a07c84b153f78f946de50870bc0ee030624f.tar.gz
Re: Analysis of problems with mixed encoding case insensitive matches in regex engine.
Message-ID: <9b18b3110704240746u461e4bdcl208ef7d7f9c5ef64@mail.gmail.com> p4raw-id: //depot/perl@31081
Diffstat (limited to 'regcharclass.h')
-rw-r--r--regcharclass.h83
1 files changed, 68 insertions, 15 deletions
diff --git a/regcharclass.h b/regcharclass.h
index 40d21bf5ff..8425693b0b 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -9,7 +9,7 @@
*
* !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
* This file is built by Porting/regcharclass.pl.
- * (Generated at: Mon Apr 23 15:30:51 2007 GMT)
+ * (Generated at: Tue Apr 24 12:19:13 2007 GMT)
* Any changes made here will be lost!
*/
@@ -105,9 +105,9 @@
/*** GENERATED CODE ***/
#define is_LNBREAK_cp(cp) \
-( (0x0A <= cp && cp <= 0x0D) || ( cp > 13 && \
-( cp == 0x85 || ( cp > 133 && \
-( cp == 0x2028 || ( cp > 8232 && \
+( (0x0A <= cp && cp <= 0x0D) ||( cp > 0x0D && \
+( cp == 0x85 ||( cp > 0x85 && \
+( cp == 0x2028 ||( cp > 0x2028 && \
cp == 0x2029 ) ) ) ) ) )
/*
@@ -227,14 +227,14 @@ cp == 0x2029 ) ) ) ) ) )
/*** GENERATED CODE ***/
#define is_HORIZWS_cp(cp) \
-( cp == 0x09 || ( cp > 9 && \
-( cp == 0x20 || ( cp > 32 && \
-( cp == 0xA0 || ( cp > 160 && \
-( cp == 0x1680 || ( cp > 5760 && \
-( cp == 0x180E || ( cp > 6158 && \
-( (0x2000 <= cp && cp <= 0x200A) || ( cp > 8202 && \
-( cp == 0x202F || ( cp > 8239 && \
-( cp == 0x205F || ( cp > 8287 && \
+( cp == 0x09 ||( cp > 0x09 && \
+( cp == 0x20 ||( cp > 0x20 && \
+( cp == 0xA0 ||( cp > 0xA0 && \
+( cp == 0x1680 ||( cp > 0x1680 && \
+( cp == 0x180E ||( cp > 0x180E && \
+( (0x2000 <= cp && cp <= 0x200A) ||( cp > 0x200A && \
+( cp == 0x202F ||( cp > 0x202F && \
+( cp == 0x205F ||( cp > 0x205F && \
cp == 0x3000 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
/*
@@ -310,9 +310,62 @@ cp == 0x3000 ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
/*** GENERATED CODE ***/
#define is_VERTWS_cp(cp) \
-( (0x0A <= cp && cp <= 0x0D) || ( cp > 13 && \
-( cp == 0x85 || ( cp > 133 && \
-( cp == 0x2028 || ( cp > 8232 && \
+( (0x0A <= cp && cp <= 0x0D) ||( cp > 0x0D && \
+( cp == 0x85 ||( cp > 0x85 && \
+( cp == 0x2028 ||( cp > 0x2028 && \
cp == 0x2029 ) ) ) ) ) )
+/*
+ TRICKYFOLD: Problematic fold case letters.
+
+ 0x00DF # LATIN SMALL LETTER SHARP S
+ 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+*/
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD(s,is_utf8) \
+( (is_utf8) ? \
+ ( ( ((U8*)s)[0] == 0xC3 ) ? \
+ ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \
+ ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) :\
+ ( ((U8*)s)[0] == 0xDF ) )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_safe(s,e,is_utf8) \
+( ( (e) - (s) > 1 ) ? \
+( (is_utf8) ? \
+ ( ( ((U8*)s)[0] == 0xC3 ) ? \
+ ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \
+ ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) :\
+ ( ((U8*)s)[0] == 0xDF ) ) : \
+((( (e) - (s) > 0 ) && (!is_utf8)) ? ( ((U8*)s)[0] == 0xDF ) : 0) )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_utf8(s) \
+( ( ((U8*)s)[0] == 0xC3 ) ? \
+ ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \
+ ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_utf8_safe(s,e) \
+( ( (e) - (s) > 1 ) ? \
+ ( ( ((U8*)s)[0] == 0xC3 ) ? \
+ ( ( ((U8*)s)[1] == 0x9F ) ? 2 : 0 ) : \
+ ((( ((U8*)s)[0] == 0xCE ) && ( ((U8*)s)[1] == 0x90 || ((U8*)s)[1] == 0xB0 )) ? 2 : 0) ) : 0 )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_latin1(s) \
+( ((U8*)s)[0] == 0xDF )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_latin1_safe(s,e) \
+( ( (e) - (s) > 0 ) ? \
+ ( ((U8*)s)[0] == 0xDF ) : 0 )
+
+/*** GENERATED CODE ***/
+#define is_TRICKYFOLD_cp(cp) \
+( cp == 0xDF ||( cp > 0xDF && \
+( cp == 0x390 ||( cp > 0x390 && \
+cp == 0x3B0 ) ) ) )
+
/* ex: set ro: */