summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2000-12-08 01:19:08 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-12-08 01:19:08 +0000
commit421a8bf2e4d7253d8eb0dc22451e55b15fc6c1e2 (patch)
tree6b9c0ff622d3926302e9c26b03c5d7c90ed63526 /utf8.h
parent4dffa63e352fd05b59c46f19323b72952b04b8ce (diff)
downloadperl-421a8bf2e4d7253d8eb0dc22451e55b15fc6c1e2.tar.gz
Introduce macros for UTF8 decoding.
p4raw-id: //depot/perl@8028
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h17
1 files changed, 16 insertions, 1 deletions
diff --git a/utf8.h b/utf8.h
index 25ddc14d09..bafdc57f97 100644
--- a/utf8.h
+++ b/utf8.h
@@ -46,10 +46,26 @@ END_EXTERN_C
#define UTF8_ALLOW_ANY 0x00ff
#define UTF8_CHECK_ONLY 0x0100
+#define UNICODE_SURROGATE_FIRST 0xd800
+#define UNICODE_SURROGATE_LAST 0xdfff
+#define UNICODE_REPLACEMENT 0xfffd
+#define UNICODE_BYTER_ORDER_MARK 0xfffe
+#define UNICODE_ILLEGAL 0xffff
+
+#define UNICODE_IS_SURROGATE(c) ((c) >= UNICODE_SURROGATE_FIRST && \
+ (c) <= UNICODE_SURROGATE_LAST)
+#define UNICODE_IS_REPLACEMENT(c) ((c) == UNICODE_REPLACMENT)
+#define UNICODE_IS_BYTE_ORDER_MARK(c) ((c) == UNICODE_BYTER_ORDER_MARK)
+#define UNICODE_IS_ILLEGAL(c) ((c) == UNICODE_ILLEGAL)
+
#define UTF8SKIP(s) PL_utf8skip[*(U8*)s]
#define UTF8_QUAD_MAX UINT64_C(0x1000000000)
+#define UTF8_IS_ASCII(c) ((c) < 0x80)
+#define UTF8_IS_START(c) ((c) >= 0xc0 && ((c) <= 0xfd))
+#define UTF8_IS_CONTINUATION(c) ((c) >= 0x80 && ((c) <= 0xbf))
+
#ifdef HAS_QUAD
#define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \
(uv) < 0x800 ? 2 : \
@@ -68,7 +84,6 @@ END_EXTERN_C
(uv) < 0x80000000 ? 6 : 7 )
#endif
-#define UNICODE_REPLACEMENT_CHARACTER 0xfffd
/*
* Note: we try to be careful never to call the isXXX_utf8() functions