summaryrefslogtreecommitdiff
path: root/regcharclass.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-09-02 14:31:59 -0600
committerKarl Williamson <public@khwilliamson.com>2012-09-13 21:14:02 -0600
commit612ead590b8b5f05e4060738540192ece946c340 (patch)
tree9b2b95ac09aa2f976b8032e662f767483bba3e43 /regcharclass.h
parent2e39f0c2939879925d2d13a10d4494ef3487a454 (diff)
downloadperl-612ead590b8b5f05e4060738540192ece946c340.tar.gz
regen/regcharclass.pl: Generate macros for \X processing
\X is implemented in regexec.c as a complicated series of property look-ups. It turns out that many of those are for just a few code points, and so can be more efficiently implemented with a macro than a swash. This generates those.
Diffstat (limited to 'regcharclass.h')
-rw-r--r--regcharclass.h128
1 files changed, 128 insertions, 0 deletions
diff --git a/regcharclass.h b/regcharclass.h
index 7e6a7a3bb2..91ab67806d 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -332,6 +332,134 @@
( 0x2028 == cp || ( 0x2028 < cp && \
0x2029 == cp ) ) ) ) ) )
+/*
+ GCB_L: Grapheme_Cluster_Break=L
+
+ \p{_X_GCB_L}
+*/
+/*** GENERATED CODE ***/
+#define is_GCB_L_utf8(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x84 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x9F ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xEA == ((U8*)s)[0] ) ? \
+ ( ( ( 0xA5 == ((U8*)s)[1] ) && ( 0xA0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBC ) ) ? 3 : 0 )\
+: 0 )
+
+/*
+ GCB_LV_LVT_V: Grapheme_Cluster_Break=(LV or LVT or V)
+
+ \p{_X_LV_LVT_V}
+*/
+/*** GENERATED CODE ***/
+#define is_GCB_LV_LVT_V_utf8(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0xA0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x86 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA7 ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xEA == ((U8*)s)[0] ) ? \
+ ( ( ( 0xB0 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) && ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+: ( 0xEB == ((U8*)s)[0] || 0xEC == ((U8*)s)[0] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) && ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+: ( 0xED == ((U8*)s)[0] ) ? \
+ ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x9D ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA3 ) || ( 0xB0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+ : ( 0x9F == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) ? 3 : 0 ) \
+ : 0 ) \
+: 0 )
+
+/*
+ GCB_Prepend: Grapheme_Cluster_Break=Prepend
+
+ \p{_X_GCB_Prepend}
+*/
+/*** GENERATED CODE ***/
+#define is_GCB_Prepend_utf8(s) \
+( 0 )
+
+/*
+ GCB_RI: Grapheme_Cluster_Break=RI
+
+ \p{_X_RI}
+*/
+/*** GENERATED CODE ***/
+#define is_GCB_RI_utf8(s) \
+( ( ( ( ( 0xF0 == ((U8*)s)[0] ) && ( 0x9F == ((U8*)s)[1] ) ) && ( 0x87 == ((U8*)s)[2] ) ) && ( 0xA6 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBF ) ) ? 4 : 0 )
+
+/*
+ GCB_SPECIAL_BEGIN: Grapheme_Cluster_Break=special_begins
+
+ \p{_X_Special_Begin}
+*/
+/*** GENERATED CODE ***/
+#define is_GCB_SPECIAL_BEGIN_utf8(s) \
+( ( ( 0xE1 == ((U8*)s)[0] ) && ( 0x84 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x87 ) ) ? ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 )\
+: ( 0xEA == ((U8*)s)[0] ) ? \
+ ( ( 0xA5 == ((U8*)s)[1] ) ? \
+ ( ( 0xA0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBC ) ? 3 : 0 ) \
+ : ( 0xB0 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xEB == ((U8*)s)[0] || 0xEC == ((U8*)s)[0] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) && ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+: ( 0xED == ((U8*)s)[0] ) ? \
+ ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x9D ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA3 ) || ( 0xB0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ) ? 3 : 0 )\
+ : ( 0x9F == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) || ( 0x8B <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBB ) ) ? 3 : 0 )\
+ : 0 ) \
+: ( 0xF0 == ((U8*)s)[0] ) ? \
+ ( ( ( ( 0x9F == ((U8*)s)[1] ) && ( 0x87 == ((U8*)s)[2] ) ) && ( 0xA6 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBF ) ) ? 4 : 0 )\
+: 0 )
+
+/*
+ GCB_T: Grapheme_Cluster_Break=T
+
+ \p{_X_GCB_T}
+*/
+/*** GENERATED CODE ***/
+#define is_GCB_T_utf8(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x86 == ((U8*)s)[1] ) ? \
+ ( ( 0xA8 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x87 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xED == ((U8*)s)[0] ) ? \
+ ( ( ( 0x9F == ((U8*)s)[1] ) && ( 0x8B <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBB ) ) ? 3 : 0 )\
+: 0 )
+
+/*
+ GCB_V: Grapheme_Cluster_Break=V
+
+ \p{_X_GCB_V}
+*/
+/*** GENERATED CODE ***/
+#define is_GCB_V_utf8(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x85 == ((U8*)s)[1] ) ? \
+ ( ( 0xA0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x86 == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA7 ) ? 3 : 0 ) \
+ : 0 ) \
+: ( 0xED == ((U8*)s)[0] ) ? \
+ ( ( 0x9E == ((U8*)s)[1] ) ? \
+ ( ( 0xB0 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBF ) ? 3 : 0 ) \
+ : ( 0x9F == ((U8*)s)[1] ) ? \
+ ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) ? 3 : 0 ) \
+ : 0 ) \
+: 0 )
+
#endif /* H_REGCHARCLASS */