summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-02-11 17:26:22 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-02-11 17:26:22 +0000
commit877d9f0d218f342333990f21c1962a28c01d42b0 (patch)
tree0a0432ac030a964f402885ff37e53f08ada683c0 /utf8.h
parent30edbf31611701ee49012392542f33ef851b0523 (diff)
downloadperl-877d9f0d218f342333990f21c1962a28c01d42b0.tar.gz
UTF-8 documentation.
p4raw-id: //depot/perl@8770
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/utf8.h b/utf8.h
index 00350f2fe7..8b0c8c3bd4 100644
--- a/utf8.h
+++ b/utf8.h
@@ -64,6 +64,22 @@ END_EXTERN_C
#define UTF8_QUAD_MAX UINT64_C(0x1000000000)
+/*
+
+ The following table is from Unicode 3.1.
+
+ Code Points 1st Byte 2nd Byte 3rd Byte 4th Byte
+
+ U+0000..U+007F 00..7F   
+ U+0080..U+07FF C2..DF 80..BF   
+ U+0800..U+0FFF E0 A0..BF 80..BF  
+ U+1000..U+FFFF E1..EF 80..BF 80..BF  
+ U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
+ U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
+ U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
+
+ */
+
#define UTF8_IS_ASCII(c) (((U8)c) < 0x80)
#define UTF8_IS_START(c) (((U8)c) >= 0xc0 && (((U8)c) <= 0xfd))
#define UTF8_IS_CONTINUATION(c) (((U8)c) >= 0x80 && (((U8)c) <= 0xbf))