diff options
author | Karl Williamson <khw@cpan.org> | 2014-05-05 22:17:33 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2014-05-31 11:52:06 -0600 |
commit | 5dca92787911972e6827cbb3173c9b1f44ea8613 (patch) | |
tree | 4441de42446a50729bec14d361bc769963294449 /regcharclass.h | |
parent | 40f914fd7fc2115d5df1c2b1ecc1d960d5f0a210 (diff) | |
download | perl-5dca92787911972e6827cbb3173c9b1f44ea8613.tar.gz |
utf8.h: Use new macro type from previous commit
This allows for an efficient isUTF8_CHAR macro, which does its own
length checking, and uses the UTF8_INVARIANT macro for the first byte.
On EBCDIC systems this macro which does a table lookup is quite a bit
more efficient than all the branches that would normally have to be
done.
Diffstat (limited to 'regcharclass.h')
-rw-r--r-- | regcharclass.h | 21 |
1 files changed, 9 insertions, 12 deletions
diff --git a/regcharclass.h b/regcharclass.h index f4a7e083dc..ebda2f7a11 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -1009,14 +1009,13 @@ : ( ( 0x72 == ((U8*)s)[2] ) && ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x47 ) ) ? 4 : 0 ) : 0 ) /* - UTF8_CHAR: Matches legal UTF-EBCDIC encoded characters from 1 through 3 bytes + UTF8_CHAR: Matches legal UTF-EBCDIC encoded characters from 2 through 3 bytes - 0x0 - 0x3FFF + 0xA0 - 0x3FFF */ /*** GENERATED CODE ***/ #define is_UTF8_CHAR_utf8_no_length_checks(s) \ -( ( ( ( ((U8*)s)[0] & 0xC0 ) == 0x00 ) || ( ( ((U8*)s)[0] & 0xEF ) == 0x40 ) || ( ( ((U8*)s)[0] & 0xDF ) == 0x4B ) || ( ( ((U8*)s)[0] & 0xCC ) == 0x4C ) || ( ( ((U8*)s)[0] & 0xDE ) == 0x5A ) || ( ( ((U8*)s)[0] & 0xFE ) == 0x60 ) || ((U8*)s)[0] == 0x79 || ( ( ((U8*)s)[0] & 0xEF ) == 0x81 ) || ( ( ((U8*)s)[0] & 0xEE ) == 0x82 ) || ( ( ((U8*)s)[0] & 0xEC ) == 0x84 ) || ( ( ((U8*)s)[0] & 0xEE ) == 0x88 ) || ((U8*)s)[0] == 0xA1 || ( ( ((U8*)s)[0] & 0xBE ) == 0xA2 ) || ( ( ((U8*)s)[0] & 0xBC ) == 0xA4 ) || ( ( ((U8*)s)[0] & 0xFE ) == 0xA8 ) || ( ( ((U8*)s)[0] & 0xEF ) == 0xAD ) || ( ( ((U8*)s)[0] & 0xE8 ) == 0xC0 ) || ( ( ((U8*)s)[0] & 0xCE ) == 0xC8 ) || ((U8*)s)[0] == 0xE0 || ( ( ((U8*)s)[0] & 0xF8 ) == 0xF0 ) || ((U8*)s)[0] == 0xFF ) ? 1\ -: ( 0x80 == ((U8*)s)[0] || ( 0x8A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xAC ) || ( 0xAE <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xB6 ) ) ?\ +( ( 0x80 == ((U8*)s)[0] || ( 0x8A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xAC ) || ( 0xAE <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xB6 ) ) ?\ ( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFC ) == 0x70 ) ? 2 : 0 )\ : ( ( ( ( ( ((U8*)s)[0] & 0xFC ) == 0xB8 ) || ((U8*)s)[0] == 0xBC || ( ( ((U8*)s)[0] & 0xFE ) == 0xBE ) || ( ( ((U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((U8*)s)[0] & 0xFC ) == 0xCC ) ) && ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) ) ? 3 : 0 ) @@ -1727,14 +1726,13 @@ : ( ( 0x71 == ((U8*)s)[2] ) && ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x47 ) ) ? 4 : 0 ) : 0 ) /* - UTF8_CHAR: Matches legal UTF-EBCDIC encoded characters from 1 through 3 bytes + UTF8_CHAR: Matches legal UTF-EBCDIC encoded characters from 2 through 3 bytes - 0x0 - 0x3FFF + 0xA0 - 0x3FFF */ /*** GENERATED CODE ***/ #define is_UTF8_CHAR_utf8_no_length_checks(s) \ -( ( ( ( ((U8*)s)[0] & 0xC0 ) == 0x00 ) || ( ( ((U8*)s)[0] & 0xEF ) == 0x40 ) || ( ( ((U8*)s)[0] & 0xDF ) == 0x4B ) || ( ( ((U8*)s)[0] & 0xFC ) == 0x4C ) || ( ( ((U8*)s)[0] & 0xDE ) == 0x5A ) || ( ( ((U8*)s)[0] & 0xFE ) == 0x5C ) || ((U8*)s)[0] == 0x5E || ( ( ((U8*)s)[0] & 0xFE ) == 0x60 ) || ( ( ((U8*)s)[0] & 0xEC ) == 0x6C ) || ((U8*)s)[0] == 0x79 || ( ( ((U8*)s)[0] & 0xEF ) == 0x81 ) || ( ( ((U8*)s)[0] & 0xEE ) == 0x82 ) || ( ( ((U8*)s)[0] & 0xEC ) == 0x84 ) || ( ( ((U8*)s)[0] & 0xEE ) == 0x88 ) || ((U8*)s)[0] == 0xA1 || ( ( ((U8*)s)[0] & 0xBE ) == 0xA2 ) || ( ( ((U8*)s)[0] & 0xBC ) == 0xA4 ) || ( ( ((U8*)s)[0] & 0xFE ) == 0xA8 ) || ((U8*)s)[0] == 0xB0 || ( ( ((U8*)s)[0] & 0xFE ) == 0xBA ) || ( ( ((U8*)s)[0] & 0xE8 ) == 0xC0 ) || ( ( ((U8*)s)[0] & 0xCE ) == 0xC8 ) || ((U8*)s)[0] == 0xE0 || ( ( ((U8*)s)[0] & 0xF8 ) == 0xF0 ) || ((U8*)s)[0] == 0xFF ) ? 1\ -: ( 0x78 == ((U8*)s)[0] || 0x80 == ((U8*)s)[0] || ( 0x8A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xAF ) || ( 0xB1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xB5 ) ) ?\ +( ( 0x78 == ((U8*)s)[0] || 0x80 == ((U8*)s)[0] || ( 0x8A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xAF ) || ( 0xB1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xB5 ) ) ?\ ( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) ? 2 : 0 )\ : ( ( ( ((U8*)s)[0] == 0xB7 || ( ( ((U8*)s)[0] & 0xFE ) == 0xB8 ) || ( ( ((U8*)s)[0] & 0xFC ) == 0xBC ) || ( ( ((U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((U8*)s)[0] & 0xFC ) == 0xCC ) ) && ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ) ? 3 : 0 ) @@ -2453,14 +2451,13 @@ : ( ( 0x74 == ((U8*)s)[2] ) && ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x47 ) ) ? 4 : 0 ) : 0 ) /* - UTF8_CHAR: Matches legal UTF-EBCDIC encoded characters from 1 through 3 bytes + UTF8_CHAR: Matches legal UTF-EBCDIC encoded characters from 2 through 3 bytes - 0x0 - 0x3FFF + 0xA0 - 0x3FFF */ /*** GENERATED CODE ***/ #define is_UTF8_CHAR_utf8_no_length_checks(s) \ -( ( ( ( ((U8*)s)[0] & 0xC0 ) == 0x00 ) || ( ( ((U8*)s)[0] & 0xEF ) == 0x40 ) || ( ( ((U8*)s)[0] & 0xCE ) == 0x4A ) || ( ( ((U8*)s)[0] & 0xCC ) == 0x4C ) || ( ( ((U8*)s)[0] & 0xFE ) == 0x60 ) || ( ( ((U8*)s)[0] & 0xAF ) == 0x81 ) || ( ( ((U8*)s)[0] & 0xEE ) == 0x82 ) || ( ( ((U8*)s)[0] & 0xEC ) == 0x84 ) || ( ( ((U8*)s)[0] & 0xEE ) == 0x88 ) || ( ( ((U8*)s)[0] & 0xFE ) == 0xA2 ) || ( ( ((U8*)s)[0] & 0xFC ) == 0xA4 ) || ( ( ((U8*)s)[0] & 0xFE ) == 0xA8 ) || ( ( ((U8*)s)[0] & 0xBF ) == 0xBB ) || ( ( ((U8*)s)[0] & 0xFE ) == 0xBC ) || ( ( ((U8*)s)[0] & 0xCE ) == 0xC2 ) || ( ( ((U8*)s)[0] & 0xCC ) == 0xC4 ) || ( ( ((U8*)s)[0] & 0xCE ) == 0xC8 ) || ( ( ((U8*)s)[0] & 0xFE ) == 0xF0 ) || ( ( ((U8*)s)[0] & 0xFD ) == 0xFD ) ) ? 1\ -: ( ( 0x8A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xA1 ) || ( 0xAA <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xB5 ) ) ?\ +( ( ( 0x8A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xA1 ) || ( 0xAA <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xB5 ) ) ?\ ( ( ( ( ((U8*)s)[1] & 0xEF ) == 0x41 ) || ( ( ((U8*)s)[1] & 0xCE ) == 0x42 ) || ( ( ((U8*)s)[1] & 0xEC ) == 0x44 ) || ( ( ((U8*)s)[1] & 0xEE ) == 0x48 ) || ( ( ((U8*)s)[1] & 0xFC ) == 0x64 ) || ( ( ((U8*)s)[1] & 0xFE ) == 0x68 ) || ( ( ((U8*)s)[1] & 0xFA ) == 0x70 ) ) ? 2 : 0 )\ : ( ( ( ( 0xB7 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xBA ) || ( 0xBE <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xC0 ) || ( 0xCA <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xD0 ) || 0xDA == ((U8*)s)[0] ) && ( ( ( ((U8*)s)[1] & 0xEF ) == 0x41 ) || ( ( ((U8*)s)[1] & 0xCE ) == 0x42 ) || ( ( ((U8*)s)[1] & 0xEC ) == 0x44 ) || ( ( ((U8*)s)[1] & 0xEE ) == 0x48 ) || ( ( ((U8*)s)[1] & 0xFC ) == 0x64 ) || ( ( ((U8*)s)[1] & 0xFE ) == 0x68 ) || ( ( ((U8*)s)[1] & 0xFA ) == 0x70 ) ) ) && ( ( ( ((U8*)s)[2] & 0xEF ) == 0x41 ) || ( ( ((U8*)s)[2] & 0xCE ) == 0x42 ) || ( ( ((U8*)s)[2] & 0xEC ) == 0x44 ) || ( ( ((U8*)s)[2] & 0xEE ) == 0x48 ) || ( ( ((U8*)s)[2] & 0xFC ) == 0x64 ) || ( ( ((U8*)s)[2] & 0xFE ) == 0x68 ) || ( ( ((U8*)s)[2] & 0xFA ) == 0x70 ) ) ) ? 3 : 0 ) |