summaryrefslogtreecommitdiff
path: root/unicode_constants.h
diff options
context:
space:
mode:
Diffstat (limited to 'unicode_constants.h')
-rw-r--r--unicode_constants.h36
1 files changed, 36 insertions, 0 deletions
diff --git a/unicode_constants.h b/unicode_constants.h
index 02d38113ae..1c0e62e356 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -21,6 +21,30 @@
* "_TAIL" if instead it represents all but the first byte. This, and
* with no additional suffix are both string constants */
+/*
+=head1 Unicode Support
+
+=for apidoc AmU|placeholder|BOM_UTF8
+
+This is a macro that evaluates to a string constant of the UTF-8 bytes that
+define the Unicode BYTE ORDER MARK (U+FEFF) for the platform that perl
+is compiled on. This allows code to use a mnemonic for this character that
+works on both ASCII and EBCDIC platforms.
+S<C<sizeof(BOM_UTF8) - 1>> can be used to get its length in
+bytes.
+
+=for apidoc AmU|placeholder|REPLACEMENT_CHARACTER_UTF8
+
+This is a macro that evaluates to a string constant of the UTF-8 bytes that
+define the Unicode REPLACEMENT CHARACTER (U+FFFD) for the platform that perl
+is compiled on. This allows code to use a mnemonic for this character that
+works on both ASCII and EBCDIC platforms.
+S<C<sizeof(REPLACEMENT_CHARACTER_UTF8) - 1>> can be used to get its length in
+bytes.
+
+=cut
+*/
+
#define UNICODE_MAJOR_VERSION 9
#define UNICODE_DOT_VERSION 0
#define UNICODE_DOT_DOT_VERSION 0
@@ -45,6 +69,10 @@
# define BOM_UTF8_FIRST_BYTE 0xEF /* U+FEFF */
# define BOM_UTF8_TAIL "\xBB\xBF" /* U+FEFF */
+# define BOM_UTF8 "\xEF\xBB\xBF" /* U+FEFF */
+
+# define REPLACEMENT_CHARACTER_UTF8 "\xEF\xBF\xBD" /* U+FFFD */
+
# define NBSP_NATIVE 0xA0 /* U+00A0 */
# define NBSP_UTF8 "\xC2\xA0" /* U+00A0 */
@@ -84,6 +112,10 @@
# define BOM_UTF8_FIRST_BYTE 0xDD /* U+FEFF */
# define BOM_UTF8_TAIL "\x73\x66\x73" /* U+FEFF */
+# define BOM_UTF8 "\xDD\x73\x66\x73" /* U+FEFF */
+
+# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x73\x73\x71" /* U+FFFD */
+
# define NBSP_NATIVE 0x41 /* U+00A0 */
# define NBSP_UTF8 "\x80\x41" /* U+00A0 */
@@ -123,6 +155,10 @@
# define BOM_UTF8_FIRST_BYTE 0xDD /* U+FEFF */
# define BOM_UTF8_TAIL "\x72\x65\x72" /* U+FEFF */
+# define BOM_UTF8 "\xDD\x72\x65\x72" /* U+FEFF */
+
+# define REPLACEMENT_CHARACTER_UTF8 "\xDD\x72\x72\x70" /* U+FFFD */
+
# define NBSP_NATIVE 0x41 /* U+00A0 */
# define NBSP_UTF8 "\x78\x41" /* U+00A0 */