summaryrefslogtreecommitdiff
path: root/regcharclass.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-06-28 15:31:39 -0600
committerKarl Williamson <khw@cpan.org>2021-07-30 09:56:16 -0600
commit6b28089cc4992fbc129d40723e0ce46f86abf4da (patch)
tree68cd36a29124bd6053bce0c3255c555a602fa3f0 /regcharclass.h
parente4b3c400285f0401e5430d34e19a49cf3f114a11 (diff)
downloadperl-6b28089cc4992fbc129d40723e0ce46f86abf4da.tar.gz
regcharclass.h: #defines for non-chars by UTF8 length
This creates macros for the non-character code points so that, given the length of the UTF-8 sequence, only those ones that have that length match. This makes for more efficient processing, to be used in a future commit. The place where the length changes depends on the platform type, and these macros will keep the code from having to worry about that.
Diffstat (limited to 'regcharclass.h')
-rw-r--r--regcharclass.h132
1 files changed, 131 insertions, 1 deletions
diff --git a/regcharclass.h b/regcharclass.h
index a50105e990..3011f5c7ec 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -173,6 +173,48 @@
: ( ( ( ( 0xF4 == ((const U8*)s)[0] ) && ( 0x8F == ((const U8*)s)[1] ) ) && ( 0xBF == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0xBE, 0xBF) ) ) ? 4 : 0 ) : 0 )
/*
+ SHORTER_NON_CHARS: # 3 bytes
+
+ 0xFDD0 - 0xFDEF
+ 0xFFFE - 0xFFFF
+*/
+/*** GENERATED CODE ***/
+#define is_SHORTER_NON_CHARS_utf8(s) \
+( ( 0xEF == ((const U8*)s)[0] ) ? \
+ ( ( 0xB7 == ((const U8*)s)[1] ) ? \
+ ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x90, 0xAF) ) ? 3 : 0 ) \
+ : ( ( 0xBF == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0xBE, 0xBF) ) ) ? 3 : 0 )\
+: 0 )
+
+/*
+ LARGER_NON_CHARS: # 4 bytes
+
+ 0x1FFFE - 0x1FFFF
+ 0x2FFFE - 0x2FFFF
+ 0x3FFFE - 0x3FFFF
+ 0x4FFFE - 0x4FFFF
+ 0x5FFFE - 0x5FFFF
+ 0x6FFFE - 0x6FFFF
+ 0x7FFFE - 0x7FFFF
+ 0x8FFFE - 0x8FFFF
+ 0x9FFFE - 0x9FFFF
+ 0xAFFFE - 0xAFFFF
+ 0xBFFFE - 0xBFFFF
+ 0xCFFFE - 0xCFFFF
+ 0xDFFFE - 0xDFFFF
+ 0xEFFFE - 0xEFFFF
+ 0xFFFFE - 0xFFFFF
+ 0x10FFFE - 0x10FFFF
+*/
+/*** GENERATED CODE ***/
+#define is_LARGER_NON_CHARS_utf8(s) \
+( ( 0xF0 == ((const U8*)s)[0] ) ? \
+ ( ( ( ( ((const U8*)s)[1] == 0x9F || ( ( ((const U8*)s)[1] & 0xEF ) == 0xAF ) ) && ( 0xBF == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0xBE, 0xBF) ) ) ? 4 : 0 )\
+: ( inRANGE_helper_(U8, ((const U8*)s)[0], 0xF1, 0xF3) ) ? \
+ ( ( ( ( ( ((const U8*)s)[1] & 0xCF ) == 0x8F ) && ( 0xBF == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0xBE, 0xBF) ) ) ? 4 : 0 )\
+: ( ( ( ( 0xF4 == ((const U8*)s)[0] ) && ( 0x8F == ((const U8*)s)[1] ) ) && ( 0xBF == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0xBE, 0xBF) ) ) ? 4 : 0 )
+
+/*
SURROGATE: Surrogate code points
\p{_Perl_Surrogate}
@@ -1360,6 +1402,50 @@
: ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x42 == ((const U8*)s)[1] ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( 0x73 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x72, 0x73) ) ) ? 5 : 0 ) : 0 )
/*
+ SHORTER_NON_CHARS: # 4 bytes
+
+ 0xFDD0 - 0xFDEF
+ 0xFFFE - 0xFFFF
+ 0x1FFFE - 0x1FFFF
+ 0x2FFFE - 0x2FFFF
+ 0x3FFFE - 0x3FFFF
+*/
+/*** GENERATED CODE ***/
+#define is_SHORTER_NON_CHARS_utf8(s) \
+( ( 0xDD == ((const U8*)s)[0] ) ? \
+ ( ( 0x73 == ((const U8*)s)[1] ) ? \
+ ( ( 0x55 == ((const U8*)s)[2] ) ? \
+ ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x73) ) ? 4 : 0 )\
+ : ( 0x56 == ((const U8*)s)[2] ) ? \
+ ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x56) ) ? 4 : 0 )\
+ : ( ( 0x73 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x72, 0x73) ) ) ? 4 : 0 )\
+ : 0 ) \
+: ( ( ( ( 0xDF == ((const U8*)s)[0] || 0xEA == ((const U8*)s)[0] || 0xEC == ((const U8*)s)[0] ) && ( 0x73 == ((const U8*)s)[1] ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x72, 0x73) ) ) ? 4 : 0 )
+
+/*
+ LARGER_NON_CHARS: # 5 bytes
+
+ 0x4FFFE - 0x4FFFF
+ 0x5FFFE - 0x5FFFF
+ 0x6FFFE - 0x6FFFF
+ 0x7FFFE - 0x7FFFF
+ 0x8FFFE - 0x8FFFF
+ 0x9FFFE - 0x9FFFF
+ 0xAFFFE - 0xAFFFF
+ 0xBFFFE - 0xBFFFF
+ 0xCFFFE - 0xCFFFF
+ 0xDFFFE - 0xDFFFF
+ 0xEFFFE - 0xEFFFF
+ 0xFFFFE - 0xFFFFF
+ 0x10FFFE - 0x10FFFF
+*/
+/*** GENERATED CODE ***/
+#define is_LARGER_NON_CHARS_utf8(s) \
+( ( 0xED == ((const U8*)s)[0] ) ? \
+ ( ( ( ( ( ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x52 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x54 ) || ((const U8*)s)[1] == 0x58 || ((const U8*)s)[1] == 0x62 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x64 ) || ( ( ((const U8*)s)[1] & 0xFD ) == 0x68 ) || ( ( ((const U8*)s)[1] & 0xFD ) == 0x71 ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( 0x73 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x72, 0x73) ) ) ? 5 : 0 )\
+: ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x42 == ((const U8*)s)[1] ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( 0x73 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x72, 0x73) ) ) ? 5 : 0 )
+
+/*
SURROGATE: Surrogate code points
\p{_Perl_Surrogate}
@@ -2538,6 +2624,50 @@
: ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x42 == ((const U8*)s)[1] ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( 0x72 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x71, 0x72) ) ) ? 5 : 0 ) : 0 )
/*
+ SHORTER_NON_CHARS: # 4 bytes
+
+ 0xFDD0 - 0xFDEF
+ 0xFFFE - 0xFFFF
+ 0x1FFFE - 0x1FFFF
+ 0x2FFFE - 0x2FFFF
+ 0x3FFFE - 0x3FFFF
+*/
+/*** GENERATED CODE ***/
+#define is_SHORTER_NON_CHARS_utf8(s) \
+( ( 0xDD == ((const U8*)s)[0] ) ? \
+ ( ( 0x72 == ((const U8*)s)[1] ) ? \
+ ( ( 0x55 == ((const U8*)s)[2] ) ? \
+ ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || 0x5F == ((const U8*)s)[3] || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x72) ) ? 4 : 0 )\
+ : ( 0x56 == ((const U8*)s)[2] ) ? \
+ ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x56) ) ? 4 : 0 )\
+ : ( ( 0x72 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x71, 0x72) ) ) ? 4 : 0 )\
+ : 0 ) \
+: ( ( ( ( 0xDF == ((const U8*)s)[0] || 0xEA == ((const U8*)s)[0] || 0xEC == ((const U8*)s)[0] ) && ( 0x72 == ((const U8*)s)[1] ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x71, 0x72) ) ) ? 4 : 0 )
+
+/*
+ LARGER_NON_CHARS: # 5 bytes
+
+ 0x4FFFE - 0x4FFFF
+ 0x5FFFE - 0x5FFFF
+ 0x6FFFE - 0x6FFFF
+ 0x7FFFE - 0x7FFFF
+ 0x8FFFE - 0x8FFFF
+ 0x9FFFE - 0x9FFFF
+ 0xAFFFE - 0xAFFFF
+ 0xBFFFE - 0xBFFFF
+ 0xCFFFE - 0xCFFFF
+ 0xDFFFE - 0xDFFFF
+ 0xEFFFE - 0xEFFFF
+ 0xFFFFE - 0xFFFFF
+ 0x10FFFE - 0x10FFFF
+*/
+/*** GENERATED CODE ***/
+#define is_LARGER_NON_CHARS_utf8(s) \
+( ( 0xED == ((const U8*)s)[0] ) ? \
+ ( ( ( ( ( ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x52 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x54 ) || ((const U8*)s)[1] == 0x58 || ((const U8*)s)[1] == 0x5F || ((const U8*)s)[1] == 0x63 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x65 ) || ((const U8*)s)[1] == 0x69 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x70 ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( 0x72 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x71, 0x72) ) ) ? 5 : 0 )\
+: ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x42 == ((const U8*)s)[1] ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( 0x72 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x71, 0x72) ) ) ? 5 : 0 )
+
+/*
SURROGATE: Surrogate code points
\p{_Perl_Surrogate}
@@ -3617,6 +3747,6 @@
* 696e706fddd3ce8cd48c7ea91caf4c9edf5c296432d320aa7b78631f69aa9eac lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 24120d5e0c9685c442c93bc1dbea9b85ef973bf8e9474baf0e55b160c288226b regen/charset_translations.pl
- * 3635c6e564558e965018947bdab45f37d9a4fa82eb05b2694eae1a04bf7e65a3 regen/regcharclass.pl
+ * 424104324c56e77f414decea849f585b157bdd5c3daaf4dd39b39f23b9e18d85 regen/regcharclass.pl
* b2f896452d2b30da3e04800f478c60c1fd0b03d6b668689b020f1e3cf1f1cdd9 regen/regcharclass_multi_char_folds.pl
* ex: set ro: */