diff options
author | Tony Cook <tony@develop-help.com> | 2010-03-16 23:46:48 +1100 |
---|---|---|
committer | Tony Cook <tony@develop-help.com> | 2010-05-31 20:52:24 +1000 |
commit | 65ab9279784aa811d78b2903b57bc0e7947dec78 (patch) | |
tree | c8c63cd44705797a091def10c9b83a520d0a6cde /utf8.h | |
parent | e57ed4ecd4d7de38a79a316da8d657dad656f93f (diff) | |
download | perl-65ab9279784aa811d78b2903b57bc0e7947dec78.tar.gz |
handle perl extended utf8 start bytes
perl uses UTF8_IS_START() to test if a byte is a valid start byte,
this didn't take perl's extended UTF-8 range into account.
Diffstat (limited to 'utf8.h')
-rw-r--r-- | utf8.h | 4 |
1 files changed, 3 insertions, 1 deletions
@@ -104,13 +104,15 @@ As you can see, the continuation bytes all begin with C<10>, and the leading bits of the start byte tell how many bytes there are in the encoded character. +Perl's extended UTF-8 means we can have start bytes up to FF. + */ #define UNI_IS_INVARIANT(c) (((UV)c) < 0x80) /* Note that C0 and C1 are invalid in legal UTF8, so the lower bound of the * below might ought to be C2 */ -#define UTF8_IS_START(c) (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd)) +#define UTF8_IS_START(c) (((U8)c) >= 0xc0) #define UTF8_IS_CONTINUATION(c) (((U8)c) >= 0x80 && (((U8)c) <= 0xbf)) #define UTF8_IS_CONTINUED(c) (((U8)c) & 0x80) #define UTF8_IS_DOWNGRADEABLE_START(c) (((U8)c & 0xfc) == 0xc0) |