summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Hay <steve.m.hay@googlemail.com>2010-09-18 21:29:05 +0100
committerSteve Hay <steve.m.hay@googlemail.com>2010-09-18 21:29:05 +0100
commit45a6a02cb9a19e39a4a35e0428611cf7b34fd39e (patch)
treefc90eb0ca05adb06a5bbbd5e5496fc0ce11cd545
parente86c8c9d6da6c53aa2b53fdfb9803aaf5264a6ad (diff)
downloadperl-45a6a02cb9a19e39a4a35e0428611cf7b34fd39e.tar.gz
Upgrade Encode from 2.39 to 2.40
-rw-r--r--MANIFEST1
-rwxr-xr-xPorting/Maintainers.pl2
-rw-r--r--cpan/Encode/Changes34
-rw-r--r--cpan/Encode/Encode.pm36
-rw-r--r--cpan/Encode/Encode.xs6
-rw-r--r--cpan/Encode/MANIFEST1
-rw-r--r--cpan/Encode/META.yml4
-rw-r--r--cpan/Encode/lib/Encode/Guess.pm6
-rw-r--r--cpan/Encode/lib/Encode/MIME/Header.pm5
-rw-r--r--cpan/Encode/lib/Encode/Unicode/UTF7.pm8
-rw-r--r--cpan/Encode/t/guess.t12
-rw-r--r--cpan/Encode/t/mime-header.t8
-rw-r--r--cpan/Encode/t/utf8ref.t20
-rw-r--r--cpan/Encode/t/utf8strict.t5
-rw-r--r--cpan/Encode/ucm/nextstep.ucm98
15 files changed, 211 insertions, 35 deletions
diff --git a/MANIFEST b/MANIFEST
index c19638e6b5..c7e9e91616 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -741,6 +741,7 @@ cpan/Encode/t/piconv.t Test for piconv.t
cpan/Encode/t/rt.pl test script
cpan/Encode/t/unibench.pl benchmark script
cpan/Encode/t/Unicode.t test script
+cpan/Encode/t/utf8ref.t test script
cpan/Encode/t/utf8strict.t test script
cpan/Encode/TW/Makefile.PL Encode extension
cpan/Encode/TW/TW.pm Encode extension
diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl
index 460f845c6c..9ba90e6c77 100755
--- a/Porting/Maintainers.pl
+++ b/Porting/Maintainers.pl
@@ -493,7 +493,7 @@ use File::Glob qw(:case);
'Encode' =>
{
'MAINTAINER' => 'dankogai',
- 'DISTRIBUTION' => 'DANKOGAI/Encode-2.39.tar.gz',
+ 'DISTRIBUTION' => 'DANKOGAI/Encode-2.40.tar.gz',
'FILES' => q[cpan/Encode],
'UPSTREAM' => undef,
},
diff --git a/cpan/Encode/Changes b/cpan/Encode/Changes
index 37868a0e3a..9eba2ed060 100644
--- a/cpan/Encode/Changes
+++ b/cpan/Encode/Changes
@@ -1,6 +1,36 @@
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 2.39 2009/11/26 09:23:59 dankogai Exp dankogai $
+# $Id: Changes,v 2.40 2010/09/18 18:39:51 dankogai Exp dankogai $
+#
+$Revision: 2.40 $ $Date: 2010/09/18 18:39:51 $
+! Encode.pm Encode.xs
++ t/utf8ref.t
+ Addressed: RT#59981: find_encoding("UTF-8")->encode crashes
+ decode_utf8() is now a little faster, too.
+ http://rt.cpan.org/Ticket/Display.html?id=59981
+ http://rt.cpan.org/Ticket/Display.html?id=58541
+! lib/Encode/Unicode/UTF7.pm
+ Addressed: RT#56443 utf-8 flag is not turned off after calling
+ Encode::encode('UTF-7', $string) to encode an ascii string
+ http://rt.cpan.org/Ticket/Display.html?id=56443
+! t/utf8strict.t
+ Addressed: RT#57799
+ http://rt.cpan.org/Ticket/Display.html?id=57799
+! lib/Encode/Guess.pm
+ Addressed: RT#46080: guess_encoding documentation
+ http://rt.cpan.org/Ticket/Display.html?id=46080
+! ucm/nextstep.ucm
+ Addressed: RT#59668: nextstep encoding is broken - missing ASCII characters
+ http://rt.cpan.org/Ticket/Display.html?id=59668
+! lib/Encode/MIME/Header.pm t/mime-header.t
+ Addressed: RT#52103: Encode::MIME::Header encoded words not separated by
+ white space
+ http://rt.cpan.org/Ticket/Display.html?id=52103
+! t/guess.t lib/Encode/Guess.pm
+ Addressed: Encode: silenced a warning by from_to(..., 'Guess', ...)
+ http://coderepos.org/share/changeset/37731
+
+2.39 2009/11/26 09:23:59
! Encode.xs t/fallback.t
$utf8 = decode('utf8', $malformed, sub{ ... }) # now works!
http://rt.cpan.org/Ticket/Display.html?id=51204
@@ -8,7 +38,7 @@
$ENV{'PERL_CORE'} tricks removed since they are no longer necessary.
Message-Id: <20091116161513.GA25556@bestpractical.com>
-$Revision: 2.39 $ $Date: 2009/11/26 09:23:59 $
+2.38 2009/11/16 14:08:13
! Encode.xs
Addressed: Encode memory corruption [perl #70528]
Message-Id: <alpine.LFD.2.00.0911152328070.9483@ein.m-l.org>
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
index f1dff7820c..4e8cd3c7e5 100644
--- a/cpan/Encode/Encode.pm
+++ b/cpan/Encode/Encode.pm
@@ -1,10 +1,10 @@
#
-# $Id: Encode.pm,v 2.39 2009/11/26 09:23:48 dankogai Exp $
+# $Id: Encode.pm,v 2.40 2010/09/18 18:39:51 dankogai Exp dankogai $
#
package Encode;
use strict;
use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.39 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.40 $ =~ /(\d+)/g;
sub DEBUG () { 0 }
use XSLoader ();
XSLoader::load( __PACKAGE__, $VERSION );
@@ -203,18 +203,32 @@ sub encode_utf8($) {
return $str;
}
+my $utf8enc;
+
sub decode_utf8($;$) {
- my ( $str, $check ) = @_;
- return $str if is_utf8($str);
- if ($check) {
- return decode( "utf8", $str, $check );
- }
- else {
- return decode( "utf8", $str );
- return $str;
- }
+ my ( $octets, $check ) = @_;
+ return $octets if is_utf8($octets);
+ return undef unless defined $octets;
+ $octets .= '' if ref $octets;
+ $check ||= 0;
+ $utf8enc ||= find_encoding('utf8');
+ my $string = $utf8enc->decode( $octets, $check );
+ $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() );
+ return $string;
}
+# sub decode_utf8($;$) {
+# my ( $str, $check ) = @_;
+# return $str if is_utf8($str);
+# if ($check) {
+# return decode( "utf8", $str, $check );
+# }
+# else {
+# return decode( "utf8", $str );
+# return $str;
+# }
+# }
+
predefine_encodings(1);
#
diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs
index b2e912785f..d014d318e2 100644
--- a/cpan/Encode/Encode.xs
+++ b/cpan/Encode/Encode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Encode.xs,v 2.18 2009/11/26 09:23:59 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.19 2010/09/18 18:39:51 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
@@ -432,7 +432,7 @@ PREINIT:
CODE:
{
dSP; ENTER; SAVETMPS;
- if (src == &PL_sv_undef) src = newSV(0);
+ if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
s = (U8 *) SvPV(src, slen);
e = (U8 *) SvEND(src);
dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */
@@ -496,7 +496,7 @@ PREINIT:
CODE:
{
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
- if (src == &PL_sv_undef) src = newSV(0);
+ if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
s = (U8 *) SvPV(src, slen);
e = (U8 *) SvEND(src);
dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */
diff --git a/cpan/Encode/MANIFEST b/cpan/Encode/MANIFEST
index 48fa14353c..3fe6582a22 100644
--- a/cpan/Encode/MANIFEST
+++ b/cpan/Encode/MANIFEST
@@ -97,6 +97,7 @@ t/perlio.t test script
t/piconv.t test script
t/rt.pl even more test script
t/unibench.pl benchmark script
+t/utf8ref.t test script
t/utf8strict.t test script
ucm/8859-1.ucm Unicode Character Map
ucm/8859-10.ucm Unicode Character Map
diff --git a/cpan/Encode/META.yml b/cpan/Encode/META.yml
index 2a5c1ab762..b1757ab22a 100644
--- a/cpan/Encode/META.yml
+++ b/cpan/Encode/META.yml
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: Encode
-version: 2.39
+version: 2.40
abstract: ~
author: []
license: unknown
@@ -14,7 +14,7 @@ no_index:
directory:
- t
- inc
-generated_by: ExtUtils::MakeMaker version 6.55_02
+generated_by: ExtUtils::MakeMaker version 6.56
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: 1.4
diff --git a/cpan/Encode/lib/Encode/Guess.pm b/cpan/Encode/lib/Encode/Guess.pm
index 1ad7147e7f..9636a8ad8a 100644
--- a/cpan/Encode/lib/Encode/Guess.pm
+++ b/cpan/Encode/lib/Encode/Guess.pm
@@ -2,7 +2,7 @@ package Encode::Guess;
use strict;
use warnings;
use Encode qw(:fallbacks find_encoding);
-our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
my $Canon = 'Guess';
sub DEBUG () { 0 }
@@ -53,7 +53,7 @@ sub decode($$;$) {
require Carp;
Carp::croak($guessed);
}
- my $utf8 = $guessed->decode( $octet, $chk );
+ my $utf8 = $guessed->decode( $octet, $chk || 0 );
$_[1] = $octet if $chk;
return $utf8;
}
@@ -279,7 +279,7 @@ the internal suspects list.
my $decoder = guess_encoding($data, qw/euc-jp euc-kr euc-cn/);
die $decoder unless ref($decoder);
my $utf8 = $decoder->decode($data);
- # check only ascii and utf8
+ # check only ascii, utf8 and UTF-(16|32) with BOM
my $decoder = guess_encoding($data);
=back
diff --git a/cpan/Encode/lib/Encode/MIME/Header.pm b/cpan/Encode/lib/Encode/MIME/Header.pm
index 9728dc32d8..5f209b2bd5 100644
--- a/cpan/Encode/lib/Encode/MIME/Header.pm
+++ b/cpan/Encode/lib/Encode/MIME/Header.pm
@@ -3,7 +3,7 @@ use strict;
use warnings;
no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.11 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.12 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(find_encoding encode_utf8 decode_utf8);
use MIME::Base64;
use Carp;
@@ -127,11 +127,12 @@ sub encode($$;$) {
for my $word (@word) {
use bytes ();
if ( bytes::length($subline) + bytes::length($word) >
- $obj->{bpl} )
+ $obj->{bpl} - 1 )
{
push @subline, $subline;
$subline = '';
}
+ $subline .= ' ' if ($subline =~ /\?=$/ and $word =~ /^=\?/);
$subline .= $word;
}
$subline and push @subline, $subline;
diff --git a/cpan/Encode/lib/Encode/Unicode/UTF7.pm b/cpan/Encode/lib/Encode/Unicode/UTF7.pm
index 6ee4619585..151917cc56 100644
--- a/cpan/Encode/lib/Encode/Unicode/UTF7.pm
+++ b/cpan/Encode/lib/Encode/Unicode/UTF7.pm
@@ -1,5 +1,5 @@
#
-# $Id: UTF7.pm,v 2.4 2006/06/03 20:28:48 dankogai Exp $
+# $Id: UTF7.pm,v 2.5 2010/09/18 18:39:51 dankogai Exp dankogai $
#
package Encode::Unicode::UTF7;
use strict;
@@ -7,7 +7,7 @@ use warnings;
no warnings 'redefine';
use base qw(Encode::Encoding);
__PACKAGE__->Define('UTF-7');
-our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use MIME::Base64;
use Encode;
@@ -35,7 +35,9 @@ sub encode($$;$) {
my $bytes = '';
while ( pos($str) < $len ) {
if ( $str =~ /\G($re_asis+)/ogc ) {
- $bytes .= $1;
+ my $octets = $1;
+ utf8::downgrade($octets);
+ $bytes .= $octets;
}
elsif ( $str =~ /\G($re_encoded+)/ogsc ) {
if ( $1 eq "+" ) {
diff --git a/cpan/Encode/t/guess.t b/cpan/Encode/t/guess.t
index 37ddd5a0b1..81ab91b562 100644
--- a/cpan/Encode/t/guess.t
+++ b/cpan/Encode/t/guess.t
@@ -17,7 +17,7 @@ use File::Spec;
use Encode qw(decode encode find_encoding _utf8_off);
#use Test::More qw(no_plan);
-use Test::More tests => 30;
+use Test::More tests => 32;
use_ok("Encode::Guess");
{
no warnings;
@@ -63,6 +63,16 @@ is (decode('Guess', encode('euc-jp', $utf8on)), $utf8on, "decode('Guess')");
eval{ encode('Guess', $utf8on) };
like($@, qr/not defined/io, "no encode()");
+{
+ my $warning;
+ local $SIG{__WARN__} = sub { $warning = shift };
+ my $euc_jp = my $euc_jp_clone = encode('euc-jp', $utf8on);
+ Encode::from_to($euc_jp, 'Guess', 'euc-jp');
+ is $euc_jp_clone, $euc_jp, "from_to(..., 'Guess')";
+ ok !$warning, "no warning";
+ diag $warning if $warning;
+}
+
my %CJKT =
(
'euc-cn' => File::Spec->catfile(dirname(__FILE__), 'gb2312.utf'),
diff --git a/cpan/Encode/t/mime-header.t b/cpan/Encode/t/mime-header.t
index 47d77ca56c..246eb46647 100644
--- a/cpan/Encode/t/mime-header.t
+++ b/cpan/Encode/t/mime-header.t
@@ -1,5 +1,5 @@
#
-# $Id: mime-header.t,v 2.4 2009/01/21 22:55:07 dankogai Exp $
+# $Id: mime-header.t,v 2.5 2010/09/18 18:39:51 dankogai Exp dankogai $
# This script is written in utf8
#
BEGIN {
@@ -74,8 +74,8 @@ EOS
my $bheader =<<'EOS';
From:=?UTF-8?B?IOWwj+mjvCDlvL4g?=<dankogai@dan.co.jp>
-To: dankogai@dan.co.jp (=?UTF-8?B?5bCP6aO8?==Kogai,=?UTF-8?B?IOW8vg==?==Dan
- )
+To: dankogai@dan.co.jp (=?UTF-8?B?5bCP6aO8?==Kogai,=?UTF-8?B?IOW8vg==?==
+ Dan)
Subject:
=?UTF-8?B?IOa8ouWtl+OAgeOCq+OCv+OCq+ODiuOAgeOBsuOCieOBjOOBquOCkuWQq+OCgA==?=
=?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=
@@ -123,6 +123,6 @@ is(Encode::encode('MIME-Q', "\x{fc}"), '=?UTF-8?Q?=C3=BC?=', 'Encode latin1 char
my $rt42627 = Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0");
is(Encode::encode('MIME-Q', $rt42627),
- '=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx?==?UTF-8?Q?0?=',
+ '=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx?= =?UTF-8?Q?0?=',
'MIME-Q encoding does not truncate trailing zeros');
__END__;
diff --git a/cpan/Encode/t/utf8ref.t b/cpan/Encode/t/utf8ref.t
new file mode 100644
index 0000000000..57cfcbcc96
--- /dev/null
+++ b/cpan/Encode/t/utf8ref.t
@@ -0,0 +1,20 @@
+#
+# $Id: utf8ref.t,v 1.1 2010/09/18 18:39:51 dankogai Exp dankogai $
+#
+
+use strict;
+use warnings;
+use Encode;
+use Test::More;
+plan tests => 4;
+#plan 'no_plan';
+
+# my $a = find_encoding('ASCII');
+my $u = find_encoding('UTF-8');
+my $r = [];
+no warnings 'uninitialized';
+is encode_utf8($r), ''.$r;
+is $u->encode($r), '';
+$r = {};
+is decode_utf8($r), ''.$r;
+is $u->decode($r), '';
diff --git a/cpan/Encode/t/utf8strict.t b/cpan/Encode/t/utf8strict.t
index 37e771340f..3f362f4981 100644
--- a/cpan/Encode/t/utf8strict.t
+++ b/cpan/Encode/t/utf8strict.t
@@ -16,6 +16,7 @@ BEGIN {
exit 0;
}
# http://smontagu.damowmow.com/utf8test.html
+ # The numbers below, like 2.1.2 are test numbers on this web page
%ORD = (
0x00000080 => 0, # 2.1.2
0x00000800 => 0, # 2.1.3
@@ -44,7 +45,7 @@ BEGIN {
%SEQ = (
qq/dd 64 73 73/ => 0, # 2.3.1
qq/dd 67 41 41/ => 0, # 2.3.2
- qq/ee 42 73 73 73/ => 0, # 2.3.3
+ qq/ee 42 73 73 71/ => 0, # 2.3.3
qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG
# "3 Malformed sequences" are checked by perl.
# "4 Overlong sequences" are checked by perl.
@@ -53,7 +54,7 @@ BEGIN {
%SEQ = (
qq/ed 9f bf/ => 0, # 2.3.1
qq/ee 80 80/ => 0, # 2.3.2
- qq/f4 8f bf bf/ => 0, # 2.3.3
+ qq/f4 8f bf bd/ => 0, # 2.3.3
qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG
# "3 Malformed sequences" are checked by perl.
# "4 Overlong sequences" are checked by perl.
diff --git a/cpan/Encode/ucm/nextstep.ucm b/cpan/Encode/ucm/nextstep.ucm
index b5d3d462ff..2ec37ad296 100644
--- a/cpan/Encode/ucm/nextstep.ucm
+++ b/cpan/Encode/ucm/nextstep.ucm
@@ -1,5 +1,5 @@
#
-# $Id: nextstep.ucm,v 2.0 2004/05/16 20:55:28 dankogai Exp $
+# $Id: nextstep.ucm,v 2.1 2010/09/18 18:39:51 dankogai Exp dankogai $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/NEXT/NEXTSTEP.TXT
@@ -41,6 +41,102 @@ CHARMAP
<U001D> \x1D |0 # <control>
<U001E> \x1E |0 # <control>
<U001F> \x1F |0 # <control>
+<U0020> \x20 |0 # SPACE
+<U0021> \x21 |0 # EXCLAMATION MARK
+<U0022> \x22 |0 # QUOTATION MARK
+<U0023> \x23 |0 # NUMBER SIGN
+<U0024> \x24 |0 # DOLLAR SIGN
+<U0025> \x25 |0 # PERCENT SIGN
+<U0026> \x26 |0 # AMPERSAND
+<U0027> \x27 |0 # APOSTROPHE
+<U0028> \x28 |0 # LEFT PARENTHESIS
+<U0029> \x29 |0 # RIGHT PARENTHESIS
+<U002A> \x2A |0 # ASTERISK
+<U002B> \x2B |0 # PLUS SIGN
+<U002C> \x2C |0 # COMMA
+<U002D> \x2D |0 # HYPHEN-MINUS
+<U002E> \x2E |0 # FULL STOP
+<U002F> \x2F |0 # SOLIDUS
+<U0030> \x30 |0 # DIGIT ZERO
+<U0031> \x31 |0 # DIGIT ONE
+<U0032> \x32 |0 # DIGIT TWO
+<U0033> \x33 |0 # DIGIT THREE
+<U0034> \x34 |0 # DIGIT FOUR
+<U0035> \x35 |0 # DIGIT FIVE
+<U0036> \x36 |0 # DIGIT SIX
+<U0037> \x37 |0 # DIGIT SEVEN
+<U0038> \x38 |0 # DIGIT EIGHT
+<U0039> \x39 |0 # DIGIT NINE
+<U003A> \x3A |0 # COLON
+<U003B> \x3B |0 # SEMICOLON
+<U003C> \x3C |0 # LESS-THAN SIGN
+<U003D> \x3D |0 # EQUALS SIGN
+<U003E> \x3E |0 # GREATER-THAN SIGN
+<U003F> \x3F |0 # QUESTION MARK
+<U0040> \x40 |0 # COMMERCIAL AT
+<U0041> \x41 |0 # LATIN CAPITAL LETTER A
+<U0042> \x42 |0 # LATIN CAPITAL LETTER B
+<U0043> \x43 |0 # LATIN CAPITAL LETTER C
+<U0044> \x44 |0 # LATIN CAPITAL LETTER D
+<U0045> \x45 |0 # LATIN CAPITAL LETTER E
+<U0046> \x46 |0 # LATIN CAPITAL LETTER F
+<U0047> \x47 |0 # LATIN CAPITAL LETTER G
+<U0048> \x48 |0 # LATIN CAPITAL LETTER H
+<U0049> \x49 |0 # LATIN CAPITAL LETTER I
+<U004A> \x4A |0 # LATIN CAPITAL LETTER J
+<U004B> \x4B |0 # LATIN CAPITAL LETTER K
+<U004C> \x4C |0 # LATIN CAPITAL LETTER L
+<U004D> \x4D |0 # LATIN CAPITAL LETTER M
+<U004E> \x4E |0 # LATIN CAPITAL LETTER N
+<U004F> \x4F |0 # LATIN CAPITAL LETTER O
+<U0050> \x50 |0 # LATIN CAPITAL LETTER P
+<U0051> \x51 |0 # LATIN CAPITAL LETTER Q
+<U0052> \x52 |0 # LATIN CAPITAL LETTER R
+<U0053> \x53 |0 # LATIN CAPITAL LETTER S
+<U0054> \x54 |0 # LATIN CAPITAL LETTER T
+<U0055> \x55 |0 # LATIN CAPITAL LETTER U
+<U0056> \x56 |0 # LATIN CAPITAL LETTER V
+<U0057> \x57 |0 # LATIN CAPITAL LETTER W
+<U0058> \x58 |0 # LATIN CAPITAL LETTER X
+<U0059> \x59 |0 # LATIN CAPITAL LETTER Y
+<U005A> \x5A |0 # LATIN CAPITAL LETTER Z
+<U005B> \x5B |0 # LEFT SQUARE BRACKET
+<U005C> \x5C |0 # REVERSE SOLIDUS
+<U005D> \x5D |0 # RIGHT SQUARE BRACKET
+<U005E> \x5E |0 # CIRCUMFLEX ACCENT
+<U005F> \x5F |0 # LOW LINE
+<U0060> \x60 |0 # GRAVE ACCENT
+<U0061> \x61 |0 # LATIN SMALL LETTER A
+<U0062> \x62 |0 # LATIN SMALL LETTER B
+<U0063> \x63 |0 # LATIN SMALL LETTER C
+<U0064> \x64 |0 # LATIN SMALL LETTER D
+<U0065> \x65 |0 # LATIN SMALL LETTER E
+<U0066> \x66 |0 # LATIN SMALL LETTER F
+<U0067> \x67 |0 # LATIN SMALL LETTER G
+<U0068> \x68 |0 # LATIN SMALL LETTER H
+<U0069> \x69 |0 # LATIN SMALL LETTER I
+<U006A> \x6A |0 # LATIN SMALL LETTER J
+<U006B> \x6B |0 # LATIN SMALL LETTER K
+<U006C> \x6C |0 # LATIN SMALL LETTER L
+<U006D> \x6D |0 # LATIN SMALL LETTER M
+<U006E> \x6E |0 # LATIN SMALL LETTER N
+<U006F> \x6F |0 # LATIN SMALL LETTER O
+<U0070> \x70 |0 # LATIN SMALL LETTER P
+<U0071> \x71 |0 # LATIN SMALL LETTER Q
+<U0072> \x72 |0 # LATIN SMALL LETTER R
+<U0073> \x73 |0 # LATIN SMALL LETTER S
+<U0074> \x74 |0 # LATIN SMALL LETTER T
+<U0075> \x75 |0 # LATIN SMALL LETTER U
+<U0076> \x76 |0 # LATIN SMALL LETTER V
+<U0077> \x77 |0 # LATIN SMALL LETTER W
+<U0078> \x78 |0 # LATIN SMALL LETTER X
+<U0079> \x79 |0 # LATIN SMALL LETTER Y
+<U007A> \x7A |0 # LATIN SMALL LETTER Z
+<U007B> \x7B |0 # LEFT CURLY BRACKET
+<U007C> \x7C |0 # VERTICAL LINE
+<U007D> \x7D |0 # RIGHT CURLY BRACKET
+<U007E> \x7E |0 # TILDE
+<U007F> \x7F |0 # <control>
<U00a0> \x80 |0 # NO-BREAK SPACE
<U00a1> \xa1 |0 # INVERTED EXCLAMATION MARK
<U00a2> \xa2 |0 # CENT SIGN