From 69bc4c1f86bca21cf0baeb2b4812ea97d3bf438e Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Aug 2016 21:17:49 -0600 Subject: Add C macros for UTF-8 for BOM and REPLACEMENT CHARACTER This makes it easy for module authors to write XS code that can use these characters, and be automatically portable to EBCDIC systems. --- regen/unicode_constants.pl | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'regen') diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl index baf25f1258..acdbaa3d4e 100644 --- a/regen/unicode_constants.pl +++ b/regen/unicode_constants.pl @@ -28,6 +28,30 @@ print $out_fh <> can be used to get its length in +bytes. + +=for apidoc AmU|placeholder|REPLACEMENT_CHARACTER_UTF8 + +This is a macro that evaluates to a string constant of the UTF-8 bytes that +define the Unicode REPLACEMENT CHARACTER (U+FFFD) for the platform that perl +is compiled on. This allows code to use a mnemonic for this character that +works on both ASCII and EBCDIC platforms. +S> can be used to get its length in +bytes. + +=cut +*/ + END my $version = Unicode::UCD::UnicodeVersion(); @@ -180,6 +204,9 @@ read_only_bottom_close_and_rename($out_fh); # DATA FORMAT # +# Note that any apidoc comments you want in the file need to be added to one +# of the prints above +# # A blank line is output as-is. # Comments (lines whose first non-blank is a '#') are converted to C-style, # though empty comments are converted to blank lines. Otherwise, each line @@ -228,6 +255,10 @@ U+2010 string BOM first BOM tail +BOM string + +U+FFFD string + NBSP native NBSP string -- cgit v1.2.1