summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2022-02-13 19:23:50 -0700
committerKarl Williamson <khw@cpan.org>2022-03-19 23:17:51 -0600
commit63cd44e4d01aafda8bc32c13f34dbab0035ac382 (patch)
tree23125e021071c0696c7bf0d96fd0cf5e16a88f68
parente80ffedaa6971ee7c9654d690bc70f67b7de2669 (diff)
downloadperl-63cd44e4d01aafda8bc32c13f34dbab0035ac382.tar.gz
regen/unicode_constants.pl: Extract code into a fcn
This is in preparation for it to be used in multiple places in a future commit.
-rw-r--r--regen/unicode_constants.pl22
1 files changed, 21 insertions, 1 deletions
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index f2e065633c..67bc75752d 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -54,6 +54,26 @@ bytes.
END
+sub backslash_x_form($$;$) {
+ # Output the code point represented by the byte string $bytes as a
+ # sequence of \x{} constants. $bytes should be the UTF-8 for the code
+ # point if the final parameter is absent or empty. Otherwise it should be
+ # the Latin1 code point itself.
+ #
+ # The output is translated into the character set '$charset'.
+
+ my ($bytes, $charset, $non_utf8) = @_;
+ if ($non_utf8) {
+ die "Must be utf8 if above 255" if $bytes > 255;
+ my $a2n = get_a2n($charset);
+ return sprintf "\\x%02X", $a2n->[$bytes];
+ }
+ else {
+ return join "", map { sprintf "\\x%02X", ord $_ }
+ split //, cp_2_utfbytes($bytes, $charset);
+ }
+}
+
my $version = Unicode::UCD::UnicodeVersion();
my ($major, $dot, $dotdot) = $version =~ / (.*?) \. (.*?) (?: \. (.*) )? $ /x;
$dotdot = 0 unless defined $dotdot;
@@ -136,7 +156,7 @@ foreach my $charset (get_supported_code_pages()) {
$str = sprintf "0x%02X", $cp; # Is a numeric constant
}
else {
- $str = join "", map { sprintf "\\x%02X", ord $_ } split //, cp_2_utfbytes($U_cp, $charset);
+ $str = backslash_x_form($U_cp, $charset);
$suffix = '_UTF8';
if (! defined $flag || $flag =~ /^ string (_skip_if_undef)? $/x) {