summaryrefslogtreecommitdiff
path: root/regen
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-09-02 15:58:41 -0600
committerKarl Williamson <public@khwilliamson.com>2012-09-13 21:14:02 -0600
commit765ec46cc8c8533a1aed8a26490b879042d7fff3 (patch)
treec98bdfea7bdf51e08ed93a1fa9778f4af2aa4eca /regen
parenta0e786e579832738092a40f177fc4b66505e1f6d (diff)
downloadperl-765ec46cc8c8533a1aed8a26490b879042d7fff3.tar.gz
regen/unicode_constants.pl: Add name parameter
A future commit will want to use the first surrogate code point's UTF-8 value. Add this to the generated macros, and give it a name, since there is no official one. The program has to be modified to cope with this.
Diffstat (limited to 'regen')
-rw-r--r--regen/unicode_constants.pl14
1 files changed, 11 insertions, 3 deletions
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index 56e53491b1..e3d588a599 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -44,6 +44,9 @@ END
# native indicates that the output is the code point, converted to the
# platform's native character set if applicable
#
+# If the code point has no official name, the desired name may be appended
+# after the flag, which will be ignored if there is an official name.
+#
# This program is used to make it convenient to create compile time constants
# of UTF-8, and to generate proper EBCDIC as well as ASCII without manually
# having to figure things out.
@@ -56,7 +59,8 @@ while ( <DATA> ) {
chomp;
unless ($_ =~ m/ ^ ( [^\ ]* ) # Name or code point token
- (?: [\ ]+ ( .* ) )? # optional flag
+ (?: [\ ]+ ( [^ ]* ) )? # optional flag
+ (?: [\ ]+ ( .* ) )? # name if unnamed; flag is required
/x)
{
die "Unexpected syntax at line $.: $_\n";
@@ -64,6 +68,7 @@ while ( <DATA> ) {
my $name_or_cp = $1;
my $flag = $2;
+ my $desired_name = $3;
my $name;
my $cp;
@@ -77,11 +82,13 @@ while ( <DATA> ) {
}
else {
$cp = $name_or_cp;
- $name = charnames::viacode("0$cp"); # viacode requires a leading zero
- # to be sure that the argument is hex
+ $name = charnames::viacode("0$cp") // ""; # viacode requires a leading
+ # zero to be sure that the
+ # argument is hex
die "Unknown code point '$cp' at line $.: $_\n" unless defined $cp;
}
+ $name = $desired_name if $name eq "";
$name =~ s/ /_/g; # The macro name can have no blanks in it
my $str = join "", map { sprintf "\\x%02X", $_ }
@@ -128,6 +135,7 @@ __DATA__
03C5 tail
2010 string
+D800 first FIRST_SURROGATE
007F native
00DF native