summaryrefslogtreecommitdiff
path: root/regcharclass.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-03-19 19:29:17 -0600
committerKarl Williamson <public@khwilliamson.com>2011-03-20 12:16:13 -0600
commit90826b5cd27738a30509f332296d8b985731d3fc (patch)
tree0dfdc33edd5165afb20ffa9e6a9712d56d222e31 /regcharclass.h
parente286af2d135c6b1b03be2bd322f22f89e1b1aa5d (diff)
downloadperl-90826b5cd27738a30509f332296d8b985731d3fc.tar.gz
regcharclass: Add tricky fold characters.
The tricky fold characters need to be expanded to include the ones that map to the same ones as the original set. This isn't because the new ones have a length issue, it's that they get left out of comparisons because of the special regnodes generated for the tricky ones.
Diffstat (limited to 'regcharclass.h')
-rw-r--r--regcharclass.h82
1 files changed, 77 insertions, 5 deletions
diff --git a/regcharclass.h b/regcharclass.h
index ea5cb99733..47d4b41925 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -361,9 +361,12 @@
/*
TRICKYFOLD: Problematic fold case letters.
- 0x00DF # LATIN1 SMALL LETTER SHARP S
+ 0x00DF # LATIN SMALL LETTER SHARP S
0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ 0x1E9E # LATIN CAPITAL LETTER SHARP S, because maps to same as 00DF
+ 0x1FD3 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA; maps same as 0390
+ 0x1FE3 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA; maps same as 03B0
*/
/*** GENERATED CODE ***/
#define is_TRICKYFOLD(s,is_utf8) \
@@ -372,12 +375,32 @@
( ( 0x9F == ((U8*)s)[1] ) ? 2 : 0 ) \
: ( 0xCE == ((U8*)s)[0] ) ? \
( ( 0x90 == ((U8*)s)[1] || 0xB0 == ((U8*)s)[1] ) ? 2 : 0 ) \
+ : ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0xBA == ((U8*)s)[1] ) ? \
+ ( ( 0x9E == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xBF == ((U8*)s)[1] ) ? \
+ ( ( 0x93 == ((U8*)s)[2] || 0xA3 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : 0 ) \
: 0 ) \
: ( 0xDF == ((U8*)s)[0] ) )
/*** GENERATED CODE ***/
#define is_TRICKYFOLD_safe(s,e,is_utf8) \
-( ((e)-(s) > 1) ? \
+( ((e)-(s) > 2) ? \
+ ( ( is_utf8 ) ? \
+ ( ( 0xC3 == ((U8*)s)[0] ) ? \
+ ( ( 0x9F == ((U8*)s)[1] ) ? 2 : 0 ) \
+ : ( 0xCE == ((U8*)s)[0] ) ? \
+ ( ( 0x90 == ((U8*)s)[1] || 0xB0 == ((U8*)s)[1] ) ? 2 : 0 ) \
+ : ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0xBA == ((U8*)s)[1] ) ? \
+ ( ( 0x9E == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( 0xBF == ((U8*)s)[1] ) ? \
+ ( ( 0x93 == ((U8*)s)[2] || 0xA3 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : 0 ) \
+ : 0 ) \
+ : ( 0xDF == ((U8*)s)[0] ) ) \
+: ((e)-(s) > 1) ? \
( ( is_utf8 ) ? \
( ( 0xC3 == ((U8*)s)[0] ) ? \
( ( 0x9F == ((U8*)s)[1] ) ? 2 : 0 ) \
@@ -395,7 +418,10 @@
#define is_TRICKYFOLD_cp(cp) \
( 0xDF == cp || ( 0xDF < cp && \
( 0x390 == cp || ( 0x390 < cp && \
-0x3B0 == cp ) ) ) )
+( 0x3B0 == cp || ( 0x3B0 < cp && \
+( 0x1E9E == cp || ( 0x1E9E < cp && \
+( 0x1FD3 == cp || ( 0x1FD3 < cp && \
+0x1FE3 == cp ) ) ) ) ) ) ) ) ) )
/*** GENERATED CODE ***/
#define what_TRICKYFOLD(s,is_utf8) \
@@ -405,12 +431,35 @@
: ( 0xCE == ((U8*)s)[0] ) ? \
( ( 0x90 == ((U8*)s)[1] ) ? 0x390 \
: ( 0xB0 == ((U8*)s)[1] ) ? 0x3B0 : 0 ) \
+ : ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0xBA == ((U8*)s)[1] ) ? \
+ ( ( 0x9E == ((U8*)s)[2] ) ? 0x1E9E : 0 ) \
+ : ( 0xBF == ((U8*)s)[1] ) ? \
+ ( ( 0x93 == ((U8*)s)[2] ) ? 0x1FD3 \
+ : ( 0xA3 == ((U8*)s)[2] ) ? 0x1FE3 : 0 ) \
+ : 0 ) \
: 0 ) \
: ( 0xDF == ((U8*)s)[0] ) ? 0xDF : 0 )
/*** GENERATED CODE ***/
#define what_TRICKYFOLD_safe(s,e,is_utf8) \
-( ((e)-(s) > 1) ? \
+( ((e)-(s) > 2) ? \
+ ( ( is_utf8 ) ? \
+ ( ( 0xC3 == ((U8*)s)[0] ) ? \
+ ( ( 0x9F == ((U8*)s)[1] ) ? 0xDF : 0 ) \
+ : ( 0xCE == ((U8*)s)[0] ) ? \
+ ( ( 0x90 == ((U8*)s)[1] ) ? 0x390 \
+ : ( 0xB0 == ((U8*)s)[1] ) ? 0x3B0 : 0 ) \
+ : ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0xBA == ((U8*)s)[1] ) ? \
+ ( ( 0x9E == ((U8*)s)[2] ) ? 0x1E9E : 0 ) \
+ : ( 0xBF == ((U8*)s)[1] ) ? \
+ ( ( 0x93 == ((U8*)s)[2] ) ? 0x1FD3 \
+ : ( 0xA3 == ((U8*)s)[2] ) ? 0x1FE3 : 0 ) \
+ : 0 ) \
+ : 0 ) \
+ : ( 0xDF == ((U8*)s)[0] ) ? 0xDF : 0 ) \
+: ((e)-(s) > 1) ? \
( ( is_utf8 ) ? \
( ( 0xC3 == ((U8*)s)[0] ) ? \
( ( 0x9F == ((U8*)s)[1] ) ? 0xDF : 0 ) \
@@ -431,12 +480,35 @@
: ( 0xCE == ((U8*)s)[0] ) ? \
( ( 0x90 == ((U8*)s)[1] ) ? len=2, 0x390 \
: ( 0xB0 == ((U8*)s)[1] ) ? len=2, 0x3B0 : 0 ) \
+ : ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0xBA == ((U8*)s)[1] ) ? \
+ ( ( 0x9E == ((U8*)s)[2] ) ? len=3, 0x1E9E : 0 ) \
+ : ( 0xBF == ((U8*)s)[1] ) ? \
+ ( ( 0x93 == ((U8*)s)[2] ) ? len=3, 0x1FD3 \
+ : ( 0xA3 == ((U8*)s)[2] ) ? len=3, 0x1FE3 : 0 ) \
+ : 0 ) \
: 0 ) \
: ( 0xDF == ((U8*)s)[0] ) ? len=1, 0xDF : 0 )
/*** GENERATED CODE ***/
#define what_len_TRICKYFOLD_safe(s,e,is_utf8,len) \
-( ((e)-(s) > 1) ? \
+( ((e)-(s) > 2) ? \
+ ( ( is_utf8 ) ? \
+ ( ( 0xC3 == ((U8*)s)[0] ) ? \
+ ( ( 0x9F == ((U8*)s)[1] ) ? len=2, 0xDF : 0 ) \
+ : ( 0xCE == ((U8*)s)[0] ) ? \
+ ( ( 0x90 == ((U8*)s)[1] ) ? len=2, 0x390 \
+ : ( 0xB0 == ((U8*)s)[1] ) ? len=2, 0x3B0 : 0 ) \
+ : ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0xBA == ((U8*)s)[1] ) ? \
+ ( ( 0x9E == ((U8*)s)[2] ) ? len=3, 0x1E9E : 0 ) \
+ : ( 0xBF == ((U8*)s)[1] ) ? \
+ ( ( 0x93 == ((U8*)s)[2] ) ? len=3, 0x1FD3 \
+ : ( 0xA3 == ((U8*)s)[2] ) ? len=3, 0x1FE3 : 0 ) \
+ : 0 ) \
+ : 0 ) \
+ : ( 0xDF == ((U8*)s)[0] ) ? len=1, 0xDF : 0 ) \
+: ((e)-(s) > 1) ? \
( ( is_utf8 ) ? \
( ( 0xC3 == ((U8*)s)[0] ) ? \
( ( 0x9F == ((U8*)s)[1] ) ? len=2, 0xDF : 0 ) \