diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-12-08 01:19:08 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-12-08 01:19:08 +0000 |
commit | 421a8bf2e4d7253d8eb0dc22451e55b15fc6c1e2 (patch) | |
tree | 6b9c0ff622d3926302e9c26b03c5d7c90ed63526 /utf8.h | |
parent | 4dffa63e352fd05b59c46f19323b72952b04b8ce (diff) | |
download | perl-421a8bf2e4d7253d8eb0dc22451e55b15fc6c1e2.tar.gz |
Introduce macros for UTF8 decoding.
p4raw-id: //depot/perl@8028
Diffstat (limited to 'utf8.h')
-rw-r--r-- | utf8.h | 17 |
1 files changed, 16 insertions, 1 deletions
@@ -46,10 +46,26 @@ END_EXTERN_C #define UTF8_ALLOW_ANY 0x00ff #define UTF8_CHECK_ONLY 0x0100 +#define UNICODE_SURROGATE_FIRST 0xd800 +#define UNICODE_SURROGATE_LAST 0xdfff +#define UNICODE_REPLACEMENT 0xfffd +#define UNICODE_BYTER_ORDER_MARK 0xfffe +#define UNICODE_ILLEGAL 0xffff + +#define UNICODE_IS_SURROGATE(c) ((c) >= UNICODE_SURROGATE_FIRST && \ + (c) <= UNICODE_SURROGATE_LAST) +#define UNICODE_IS_REPLACEMENT(c) ((c) == UNICODE_REPLACMENT) +#define UNICODE_IS_BYTE_ORDER_MARK(c) ((c) == UNICODE_BYTER_ORDER_MARK) +#define UNICODE_IS_ILLEGAL(c) ((c) == UNICODE_ILLEGAL) + #define UTF8SKIP(s) PL_utf8skip[*(U8*)s] #define UTF8_QUAD_MAX UINT64_C(0x1000000000) +#define UTF8_IS_ASCII(c) ((c) < 0x80) +#define UTF8_IS_START(c) ((c) >= 0xc0 && ((c) <= 0xfd)) +#define UTF8_IS_CONTINUATION(c) ((c) >= 0x80 && ((c) <= 0xbf)) + #ifdef HAS_QUAD #define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \ (uv) < 0x800 ? 2 : \ @@ -68,7 +84,6 @@ END_EXTERN_C (uv) < 0x80000000 ? 6 : 7 ) #endif -#define UNICODE_REPLACEMENT_CHARACTER 0xfffd /* * Note: we try to be careful never to call the isXXX_utf8() functions |