diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-09-02 15:58:41 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-09-13 21:14:02 -0600 |
commit | 765ec46cc8c8533a1aed8a26490b879042d7fff3 (patch) | |
tree | c98bdfea7bdf51e08ed93a1fa9778f4af2aa4eca /regen | |
parent | a0e786e579832738092a40f177fc4b66505e1f6d (diff) | |
download | perl-765ec46cc8c8533a1aed8a26490b879042d7fff3.tar.gz |
regen/unicode_constants.pl: Add name parameter
A future commit will want to use the first surrogate code point's UTF-8
value. Add this to the generated macros, and give it a name, since
there is no official one. The program has to be modified to cope with
this.
Diffstat (limited to 'regen')
-rw-r--r-- | regen/unicode_constants.pl | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl index 56e53491b1..e3d588a599 100644 --- a/regen/unicode_constants.pl +++ b/regen/unicode_constants.pl @@ -44,6 +44,9 @@ END # native indicates that the output is the code point, converted to the # platform's native character set if applicable # +# If the code point has no official name, the desired name may be appended +# after the flag, which will be ignored if there is an official name. +# # This program is used to make it convenient to create compile time constants # of UTF-8, and to generate proper EBCDIC as well as ASCII without manually # having to figure things out. @@ -56,7 +59,8 @@ while ( <DATA> ) { chomp; unless ($_ =~ m/ ^ ( [^\ ]* ) # Name or code point token - (?: [\ ]+ ( .* ) )? # optional flag + (?: [\ ]+ ( [^ ]* ) )? # optional flag + (?: [\ ]+ ( .* ) )? # name if unnamed; flag is required /x) { die "Unexpected syntax at line $.: $_\n"; @@ -64,6 +68,7 @@ while ( <DATA> ) { my $name_or_cp = $1; my $flag = $2; + my $desired_name = $3; my $name; my $cp; @@ -77,11 +82,13 @@ while ( <DATA> ) { } else { $cp = $name_or_cp; - $name = charnames::viacode("0$cp"); # viacode requires a leading zero - # to be sure that the argument is hex + $name = charnames::viacode("0$cp") // ""; # viacode requires a leading + # zero to be sure that the + # argument is hex die "Unknown code point '$cp' at line $.: $_\n" unless defined $cp; } + $name = $desired_name if $name eq ""; $name =~ s/ /_/g; # The macro name can have no blanks in it my $str = join "", map { sprintf "\\x%02X", $_ } @@ -128,6 +135,7 @@ __DATA__ 03C5 tail 2010 string +D800 first FIRST_SURROGATE 007F native 00DF native |