diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-12-29 07:08:32 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-12-29 07:08:32 +0000 |
commit | c512ce4f7f4a9bd0f491f91cb5a15fcb65ee37d9 (patch) | |
tree | faecae30ec45ee9fe9180f727c600de58a3a50d2 /utf8.h | |
parent | a7514e1ec900a5b60cda6ed25728476973d26ae0 (diff) | |
download | perl-c512ce4f7f4a9bd0f491f91cb5a15fcb65ee37d9.tar.gz |
(Retracted by #8264) Externally: join() was still quite UTF-8-unaware.
Internally: sv_catsv() wasn't quite okay on UTF-8, it assumed
that the only cases to care about are byte+byte and byte+character.
TODO: See how well pp_concat() could be implemented in terms
of sv_catsv().
p4raw-id: //depot/perl@8248
Diffstat (limited to 'utf8.h')
-rw-r--r-- | utf8.h | 13 |
1 files changed, 8 insertions, 5 deletions
@@ -62,15 +62,18 @@ END_EXTERN_C #define UTF8_QUAD_MAX UINT64_C(0x1000000000) -#define UTF8_IS_ASCII(c) ((c) < 0x80) -#define UTF8_IS_START(c) ((c) >= 0xc0 && ((c) <= 0xfd)) -#define UTF8_IS_CONTINUATION(c) ((c) >= 0x80 && ((c) <= 0xbf)) -#define UTF8_IS_CONTINUED(c) ((c) & 0x80) +#define UTF8_IS_ASCII(c) (((U8)c) < 0x80) +#define UTF8_IS_START(c) (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd)) +#define UTF8_IS_CONTINUATION(c) (((U8)c) >= 0x80 && (((U8)c) <= 0xbf)) +#define UTF8_IS_CONTINUED(c) (((U8)c) & 0x80) -#define UTF8_CONTINUATION_MASK 0x3f +#define UTF8_CONTINUATION_MASK ((U8)0x3f) #define UTF8_ACCUMULATION_SHIFT 6 #define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK)) +#define UTF8_EIGHT_BIT_HI(c) ( (((U8)c)>>6) |0xc0) +#define UTF8_EIGHT_BIT_LO(c) (((((U8)c)>>6)&0x3f)|0x80) + #ifdef HAS_QUAD #define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \ (uv) < 0x800 ? 2 : \ |