summaryrefslogtreecommitdiff
path: root/ext/Encode
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-09-23 09:11:23 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-09-23 09:11:23 +0000
commite74d743733c842b1419d7531d7585c088b422f2c (patch)
treeae45a7d2190537687abba1f8dfe4defcba3a22a6 /ext/Encode
parent8e0b04c797634d42e2e4586b2684cb43288cfb04 (diff)
downloadperl-e74d743733c842b1419d7531d7585c088b422f2c.tar.gz
[perl #23979] GSM 03.38 mapping in Encoding::Byte is incorrect
Fix as much as possible - the specification is still a bit unclear. p4raw-id: //depot/perl@21331
Diffstat (limited to 'ext/Encode')
-rw-r--r--ext/Encode/MANIFEST1
-rw-r--r--ext/Encode/lib/Encode/Supported.pod20
-rw-r--r--ext/Encode/t/gsm0338.t62
-rw-r--r--ext/Encode/ucm/gsm0338.ucm336
4 files changed, 375 insertions, 44 deletions
diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST
index f219c38431..6a6aab82ab 100644
--- a/ext/Encode/MANIFEST
+++ b/ext/Encode/MANIFEST
@@ -74,6 +74,7 @@ t/fallback.t test script
t/gb2312.enc test data
t/gb2312.utf test data
t/grow.t test script
+t/gsm0338.t test script
t/guess.t test script
t/jisx0201.enc test data
t/jisx0201.utf test data
diff --git a/ext/Encode/lib/Encode/Supported.pod b/ext/Encode/lib/Encode/Supported.pod
index d09fc0a513..9280a97e72 100644
--- a/ext/Encode/lib/Encode/Supported.pod
+++ b/ext/Encode/lib/Encode/Supported.pod
@@ -176,9 +176,23 @@ For gory details, see L<http://czyborra.com/charsets/cyrillic.html>
GSM0338 is for GSM handsets. Though it shares alphanumerals with
ASCII, control character ranges and other parts are mapped very
-differently, presumably to store Greek and Cyrillic alphabets.
-This is also covered in Encode::Byte even though it is not an
-"extended ASCII" encoding.
+differently, mainly to store Greek characters. There are also escape
+sequences (starting with 0x1B) to cover e.g. the Euro sign. Some
+special cases like a trailing 0x00 byte or a lone 0x1B byte are not
+well-defined and decode() will return an empty string for them.
+One possible workaround is
+
+ $gsm =~ s/\x00\z/\x00\x00/;
+ $uni = decode("gsm0338", $gsm);
+ $uni .= "\xA0" if $gsm =~ /\x1B\z/;
+
+Note that the Encode implementation of GSM0338 does not implement the
+reuse of Latin capital letters as Greek capital letters (for example,
+the 0x5A is U+005A (LATIN CAPITAL LETTER Z), not U+0396 (GREEK CAPITAL
+LETTER ZETA).
+
+The GSM0338 is also covered in Encode::Byte even though it is not
+an "extended ASCII" encoding.
=back
diff --git a/ext/Encode/t/gsm0338.t b/ext/Encode/t/gsm0338.t
new file mode 100644
index 0000000000..6066d7a7cb
--- /dev/null
+++ b/ext/Encode/t/gsm0338.t
@@ -0,0 +1,62 @@
+BEGIN {
+ if ($ENV{'PERL_CORE'}){
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use Test::More tests => 21;
+use Encode;
+
+# The specification of GSM 03.38 is not awfully clear.
+# (http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT)
+# The various combinations of 0x00 and 0x1B as leading bytes
+# are unclear, as is the semantics of those bytes as standalone
+# or as final single bytes.
+
+sub t { is(decode("gsm0338", my $t = $_[0]), $_[1]) }
+
+# t("\x00", "\x00"); # ???
+
+# "Round-trip".
+t("\x41", "\x41");
+
+t("\x01", "\xA3");
+t("\x02", "\x24");
+t("\x03", "\xA5");
+t("\x09", "\xE7");
+
+t("\x00\x00", "\x00\x00"); # Maybe?
+t("\x00\x1B", "\x40\xA0"); # Maybe?
+t("\x00\x41", "\x40\x41");
+
+# t("\x1B", "\x1B"); # ???
+
+# Escape with no special second byte is just a NBSP.
+t("\x1B\x41", "\xA0\x41");
+
+t("\x1B\x00", "\xA0\x40"); # Maybe?
+
+# Special escape characters.
+t("\x1B\x0A", "\x0C");
+t("\x1B\x14", "\x5E");
+t("\x1B\x28", "\x7B");
+t("\x1B\x29", "\x7D");
+t("\x1B\x2F", "\x5C");
+t("\x1B\x3C", "\x5B");
+t("\x1B\x3D", "\x7E");
+t("\x1B\x3E", "\x5D");
+t("\x1B\x40", "\x7C");
+t("\x1B\x40", "\x7C");
+t("\x1B\x65", "\x{20AC}");
+
+
+
+
diff --git a/ext/Encode/ucm/gsm0338.ucm b/ext/Encode/ucm/gsm0338.ucm
index f321fb77ce..11a379a60c 100644
--- a/ext/Encode/ucm/gsm0338.ucm
+++ b/ext/Encode/ucm/gsm0338.ucm
@@ -4,47 +4,19 @@
# ./compile -n gsm0338 -o Encode/gsm0338.ucm Encode/gsm0338.enc
<code_set_name> "gsm0338"
<mb_cur_min> 1
-<mb_cur_max> 1
+<mb_cur_max> 2
<subchar> \x3F
#
CHARMAP
-<U0040> \x00 |0 # COMMERCIAL AT
-<U00A3> \x01 |0 # POUND SIGN
-<U0024> \x02 |0 # DOLLAR SIGN
-<U00A5> \x03 |0 # YEN SIGN
-<U00E8> \x04 |0 # LATIN SMALL LETTER E WITH GRAVE
-<U00E9> \x05 |0 # LATIN SMALL LETTER E WITH ACUTE
-<U00F9> \x06 |0 # LATIN SMALL LETTER U WITH GRAVE
-<U00EC> \x07 |0 # LATIN SMALL LETTER I WITH GRAVE
-<U00F2> \x08 |0 # LATIN SMALL LETTER O WITH GRAVE
-<U00E7> \x09 |0 # LATIN SMALL LETTER C WITH CEDILLA
+<U0000><U0000> \x00\x00 |0 # NULL + NULL (?)
<U000A> \x0A |0 # LINE FEED
-<U00D8> \x0B |0 # LATIN CAPITAL LETTER O WITH STROKE
-<U00F8> \x0C |0 # LATIN SMALL LETTER O WITH STROKE
+<U000C> \x1B\x0A |0 # FORM FEED
<U000D> \x0D |0 # CARRIAGE RETURN
-<U00C5> \x0E |0 # LATIN CAPITAL LETTER A WITH RING ABOVE
-<U00E5> \x0F |0 # LATIN SMALL LETTER A WITH RING ABOVE
-<U0394> \x10 |0 # GREEK CAPITAL LETTER DELTA
-<U005F> \x11 |0 # LOW LINE
-<U03A6> \x12 |0 # GREEK CAPITAL LETTER PHI
-<U0393> \x13 |0 # GREEK CAPITAL LETTER GAMMA
-<U039B> \x14 |0 # GREEK CAPITAL LETTER LAMDA
-<U03A9> \x15 |0 # GREEK CAPITAL LETTER OMEGA
-<U03A0> \x16 |0 # GREEK CAPITAL LETTER PI
-<U03A8> \x17 |0 # GREEK CAPITAL LETTER PSI
-<U03A3> \x18 |0 # GREEK CAPITAL LETTER SIGMA
-<U0398> \x19 |0 # GREEK CAPITAL LETTER THETA
-<U039E> \x1A |0 # GREEK CAPITAL LETTER XI
-<U00A0> \x1B |0 # NO-BREAK SPACE
-<U00C6> \x1C |0 # LATIN CAPITAL LETTER AE
-<U00E6> \x1D |0 # LATIN SMALL LETTER AE
-<U00DF> \x1E |0 # LATIN SMALL LETTER SHARP S
-<U00C9> \x1F |0 # LATIN CAPITAL LETTER E WITH ACUTE
<U0020> \x20 |0 # SPACE
<U0021> \x21 |0 # EXCLAMATION MARK
<U0022> \x22 |0 # QUOTATION MARK
<U0023> \x23 |0 # NUMBER SIGN
-<U00A4> \x24 |0 # CURRENCY SIGN
+<U0024> \x02 |0 # DOLLAR SIGN
<U0025> \x25 |0 # PERCENT SIGN
<U0026> \x26 |0 # AMPERSAND
<U0027> \x27 |0 # APOSTROPHE
@@ -72,7 +44,133 @@ CHARMAP
<U003D> \x3D |0 # EQUALS SIGN
<U003E> \x3E |0 # GREATER-THAN SIGN
<U003F> \x3F |0 # QUESTION MARK
-<U00A1> \x40 |0 # INVERTED EXCLAMATION MARK
+<U0040><U000A> \x00\x0A |0 # COMMERCIAL AT + LINE FEED
+<U0040><U000D> \x00\x0D |0 # COMMERCIAL AT + CARRIAGE RETURN
+<U0040><U0020> \x00\x20 |0 # COMMERCIAL AT + SPACE
+<U0040><U0021> \x00\x21 |0 # COMMERCIAL AT + EXCLAMATION MARK
+<U0040><U0022> \x00\x22 |0 # COMMERCIAL AT + QUOTATION MARK
+<U0040><U0023> \x00\x23 |0 # COMMERCIAL AT + NUMBER SIGN
+<U0040><U0024> \x00\x02 |0 # COMMERCIAL AT + DOLLAR SIGN
+<U0040><U0025> \x00\x25 |0 # COMMERCIAL AT + PERCENT SIGN
+<U0040><U0026> \x00\x26 |0 # COMMERCIAL AT + AMPERSAND
+<U0040><U0027> \x00\x27 |0 # COMMERCIAL AT + APOSTROPHE
+<U0040><U0028> \x00\x28 |0 # COMMERCIAL AT + LEFT PARENTHESIS
+<U0040><U0029> \x00\x29 |0 # COMMERCIAL AT + RIGHT PARENTHESIS
+<U0040><U002A> \x00\x2A |0 # COMMERCIAL AT + ASTERISK
+<U0040><U002B> \x00\x2B |0 # COMMERCIAL AT + PLUS SIGN
+<U0040><U002C> \x00\x2C |0 # COMMERCIAL AT + COMMA
+<U0040><U002D> \x00\x2D |0 # COMMERCIAL AT + HYPHEN-MINUS
+<U0040><U002E> \x00\x2E |0 # COMMERCIAL AT + FULL STOP
+<U0040><U002F> \x00\x2F |0 # COMMERCIAL AT + SOLIDUS
+<U0040><U0030> \x00\x30 |0 # COMMERCIAL AT + DIGIT ZERO
+<U0040><U0031> \x00\x31 |0 # COMMERCIAL AT + DIGIT ONE
+<U0040><U0032> \x00\x32 |0 # COMMERCIAL AT + DIGIT TWO
+<U0040><U0033> \x00\x33 |0 # COMMERCIAL AT + DIGIT THREE
+<U0040><U0034> \x00\x34 |0 # COMMERCIAL AT + DIGIT FOUR
+<U0040><U0035> \x00\x35 |0 # COMMERCIAL AT + DIGIT FIVE
+<U0040><U0036> \x00\x36 |0 # COMMERCIAL AT + DIGIT SIX
+<U0040><U0037> \x00\x37 |0 # COMMERCIAL AT + DIGIT SEVEN
+<U0040><U0038> \x00\x38 |0 # COMMERCIAL AT + DIGIT EIGHT
+<U0040><U0039> \x00\x39 |0 # COMMERCIAL AT + DIGIT NINE
+<U0040><U003A> \x00\x3A |0 # COMMERCIAL AT + COLON
+<U0040><U003B> \x00\x3B |0 # COMMERCIAL AT + SEMICOLON
+<U0040><U003C> \x00\x3C |0 # COMMERCIAL AT + LESS-THAN SIGN
+<U0040><U003D> \x00\x3D |0 # COMMERCIAL AT + EQUALS SIGN
+<U0040><U003E> \x00\x3E |0 # COMMERCIAL AT + GREATER-THAN SIGN
+<U0040><U003F> \x00\x3F |0 # COMMERCIAL AT + QUESTION MARK
+<U0040><U0041> \x00\x41 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER A
+<U0040><U0042> \x00\x42 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER B
+<U0040><U0043> \x00\x43 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER C
+<U0040><U0044> \x00\x44 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER D
+<U0040><U0045> \x00\x45 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER E
+<U0040><U0046> \x00\x46 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER F
+<U0040><U0047> \x00\x47 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER G
+<U0040><U0048> \x00\x48 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER H
+<U0040><U0049> \x00\x49 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER I
+<U0040><U004A> \x00\x4A |0 # COMMERCIAL AT + LATIN CAPITAL LETTER J
+<U0040><U004B> \x00\x4B |0 # COMMERCIAL AT + LATIN CAPITAL LETTER K
+<U0040><U004C> \x00\x4C |0 # COMMERCIAL AT + LATIN CAPITAL LETTER L
+<U0040><U004D> \x00\x4D |0 # COMMERCIAL AT + LATIN CAPITAL LETTER M
+<U0040><U004E> \x00\x4E |0 # COMMERCIAL AT + LATIN CAPITAL LETTER N
+<U0040><U004F> \x00\x4F |0 # COMMERCIAL AT + LATIN CAPITAL LETTER O
+<U0040><U0050> \x00\x50 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER P
+<U0040><U0051> \x00\x51 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER Q
+<U0040><U0052> \x00\x52 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER R
+<U0040><U0053> \x00\x53 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER S
+<U0040><U0054> \x00\x54 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER T
+<U0040><U0055> \x00\x55 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER U
+<U0040><U0056> \x00\x56 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER V
+<U0040><U0057> \x00\x57 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER W
+<U0040><U0058> \x00\x58 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER X
+<U0040><U0059> \x00\x59 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER Y
+<U0040><U005A> \x00\x5A |0 # COMMERCIAL AT + LATIN CAPITAL LETTER Z
+<U0040><U005F> \x00\x11 |0 # COMMERCIAL AT + LOW LINE
+<U0040><U0061> \x00\x61 |0 # COMMERCIAL AT + LATIN SMALL LETTER A
+<U0040><U0062> \x00\x62 |0 # COMMERCIAL AT + LATIN SMALL LETTER B
+<U0040><U0063> \x00\x63 |0 # COMMERCIAL AT + LATIN SMALL LETTER C
+<U0040><U0064> \x00\x64 |0 # COMMERCIAL AT + LATIN SMALL LETTER D
+<U0040><U0065> \x00\x65 |0 # COMMERCIAL AT + LATIN SMALL LETTER E
+<U0040><U0066> \x00\x66 |0 # COMMERCIAL AT + LATIN SMALL LETTER F
+<U0040><U0067> \x00\x67 |0 # COMMERCIAL AT + LATIN SMALL LETTER G
+<U0040><U0068> \x00\x68 |0 # COMMERCIAL AT + LATIN SMALL LETTER H
+<U0040><U0069> \x00\x69 |0 # COMMERCIAL AT + LATIN SMALL LETTER I
+<U0040><U006A> \x00\x6A |0 # COMMERCIAL AT + LATIN SMALL LETTER J
+<U0040><U006B> \x00\x6B |0 # COMMERCIAL AT + LATIN SMALL LETTER K
+<U0040><U006C> \x00\x6C |0 # COMMERCIAL AT + LATIN SMALL LETTER L
+<U0040><U006D> \x00\x6D |0 # COMMERCIAL AT + LATIN SMALL LETTER M
+<U0040><U006E> \x00\x6E |0 # COMMERCIAL AT + LATIN SMALL LETTER N
+<U0040><U006F> \x00\x6F |0 # COMMERCIAL AT + LATIN SMALL LETTER O
+<U0040><U0070> \x00\x70 |0 # COMMERCIAL AT + LATIN SMALL LETTER P
+<U0040><U0071> \x00\x71 |0 # COMMERCIAL AT + LATIN SMALL LETTER Q
+<U0040><U0072> \x00\x72 |0 # COMMERCIAL AT + LATIN SMALL LETTER R
+<U0040><U0073> \x00\x73 |0 # COMMERCIAL AT + LATIN SMALL LETTER S
+<U0040><U0074> \x00\x74 |0 # COMMERCIAL AT + LATIN SMALL LETTER T
+<U0040><U0075> \x00\x75 |0 # COMMERCIAL AT + LATIN SMALL LETTER U
+<U0040><U0076> \x00\x76 |0 # COMMERCIAL AT + LATIN SMALL LETTER V
+<U0040><U0077> \x00\x77 |0 # COMMERCIAL AT + LATIN SMALL LETTER W
+<U0040><U0078> \x00\x78 |0 # COMMERCIAL AT + LATIN SMALL LETTER X
+<U0040><U0079> \x00\x79 |0 # COMMERCIAL AT + LATIN SMALL LETTER Y
+<U0040><U007A> \x00\x7A |0 # COMMERCIAL AT + LATIN SMALL LETTER Z
+<U0040><U00A0> \x00\x1B |0 # COMMERCIAL AT + NO-BREAK SPACE (?)
+<U0040><U00A1> \x00\x40 |0 # COMMERCIAL AT + INVERTED EXCLAMATION MARK
+<U0040><U00A3> \x00\x01 |0 # COMMERCIAL AT + POUND SIGN
+<U0040><U00A4> \x00\x24 |0 # COMMERCIAL AT + CURRENCY SIGN
+<U0040><U00A5> \x00\x03 |0 # COMMERCIAL AT + YEN SIGN
+<U0040><U00A7> \x00\x5F |0 # COMMERCIAL AT + SECTION SIGN
+<U0040><U00BF> \x00\x60 |0 # COMMERCIAL AT + INVERTED QUESTION MARK
+<U0040><U00C4> \x00\x5B |0 # COMMERCIAL AT + LATIN CAPITAL LETTER A WITH DIAERESIS
+<U0040><U00C5> \x00\x0E |0 # COMMERCIAL AT + LATIN CAPITAL LETTER A WITH RING ABOVE
+<U0040><U00C6> \x00\x1C |0 # COMMERCIAL AT + LATIN CAPITAL LETTER AE
+<U0040><U00C9> \x00\x1F |0 # COMMERCIAL AT + LATIN CAPITAL LETTER E WITH ACUTE
+<U0040><U00D1> \x00\x5D |0 # COMMERCIAL AT + LATIN CAPITAL LETTER N WITH TILDE
+<U0040><U00D6> \x00\x5C |0 # COMMERCIAL AT + LATIN CAPITAL LETTER O WITH DIAERESIS
+<U0040><U00D8> \x00\x0B |0 # COMMERCIAL AT + LATIN CAPITAL LETTER O WITH STROKE
+<U0040><U00DC> \x00\x5E |0 # COMMERCIAL AT + LATIN CAPITAL LETTER U WITH DIAERESIS
+<U0040><U00DF> \x00\x1E |0 # COMMERCIAL AT + LATIN SMALL LETTER SHARP S
+<U0040><U00E0> \x00\x7F |0 # COMMERCIAL AT + LATIN SMALL LETTER A WITH GRAVE
+<U0040><U00E4> \x00\x7B |0 # COMMERCIAL AT + LATIN SMALL LETTER A WITH DIAERESIS
+<U0040><U00E5> \x00\x0F |0 # COMMERCIAL AT + LATIN SMALL LETTER A WITH RING ABOVE
+<U0040><U00E6> \x00\x1D |0 # COMMERCIAL AT + LATIN SMALL LETTER AE
+<U0040><U00E7> \x00\x09 |0 # COMMERCIAL AT + LATIN SMALL LETTER C WITH CEDILLA
+<U0040><U00E8> \x00\x04 |0 # COMMERCIAL AT + LATIN SMALL LETTER E WITH GRAVE
+<U0040><U00E9> \x00\x05 |0 # COMMERCIAL AT + LATIN SMALL LETTER E WITH ACUTE
+<U0040><U00EC> \x00\x07 |0 # COMMERCIAL AT + LATIN SMALL LETTER I WITH GRAVE
+<U0040><U00F1> \x00\x7D |0 # COMMERCIAL AT + LATIN SMALL LETTER N WITH TILDE
+<U0040><U00F2> \x00\x08 |0 # COMMERCIAL AT + LATIN SMALL LETTER O WITH GRAVE
+<U0040><U00F6> \x00\x7C |0 # COMMERCIAL AT + LATIN SMALL LETTER O WITH DIAERESIS
+<U0040><U00F8> \x00\x0C |0 # COMMERCIAL AT + LATIN SMALL LETTER O WITH STROKE
+<U0040><U00F9> \x00\x06 |0 # COMMERCIAL AT + LATIN SMALL LETTER U WITH GRAVE
+<U0040><U00FC> \x00\x7E |0 # COMMERCIAL AT + LATIN SMALL LETTER U WITH DIAERESIS
+<U0040><U0393> \x00\x13 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER GAMMA
+<U0040><U0394> \x00\x10 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER DELTA
+<U0040><U0398> \x00\x19 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER THETA
+<U0040><U039B> \x00\x14 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER LAMDA
+<U0040><U039E> \x00\x1A |0 # COMMERCIAL AT + GREEK CAPITAL LETTER XI
+<U0040><U03A0> \x00\x16 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER PI
+<U0040><U03A3> \x00\x18 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER SIGMA
+<U0040><U03A6> \x00\x12 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER PHI
+<U0040><U03A8> \x00\x17 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER PSI
+<U0040><U03A9> \x00\x15 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER OMEGA
<U0041> \x41 |0 # LATIN CAPITAL LETTER A
<U0042> \x42 |0 # LATIN CAPITAL LETTER B
<U0043> \x43 |0 # LATIN CAPITAL LETTER C
@@ -99,12 +197,11 @@ CHARMAP
<U0058> \x58 |0 # LATIN CAPITAL LETTER X
<U0059> \x59 |0 # LATIN CAPITAL LETTER Y
<U005A> \x5A |0 # LATIN CAPITAL LETTER Z
-<U00C4> \x5B |0 # LATIN CAPITAL LETTER A WITH DIAERESIS
-<U00D6> \x5C |0 # LATIN CAPITAL LETTER O WITH DIAERESIS
-<U00D1> \x5D |0 # LATIN CAPITAL LETTER N WITH TILDE
-<U00DC> \x5E |0 # LATIN CAPITAL LETTER U WITH DIAERESIS
-<U00A7> \x5F |0 # SECTION SIGN
-<U00BF> \x60 |0 # INVERTED QUESTION MARK
+<U005B> \x1B\x3C |0 # LEFT SQUARE BRACKET
+<U005C> \x1B\x2F |0 # REVERSE SOLIDUS
+<U005D> \x1B\x3E |0 # RIGHT SQUARE BRACKET
+<U005E> \x1B\x14 |0 # CIRCUMFLEX ACCENT
+<U005F> \x11 |0 # LOW LINE
<U0061> \x61 |0 # LATIN SMALL LETTER A
<U0062> \x62 |0 # LATIN SMALL LETTER B
<U0063> \x63 |0 # LATIN SMALL LETTER C
@@ -131,9 +228,166 @@ CHARMAP
<U0078> \x78 |0 # LATIN SMALL LETTER X
<U0079> \x79 |0 # LATIN SMALL LETTER Y
<U007A> \x7A |0 # LATIN SMALL LETTER Z
+<U007B> \x1B\x28 |0 # LEFT CURLY BRACKET
+<U007C> \x1B\x40 |0 # VERTICAL LINE
+<U007D> \x1B\x29 |0 # RIGHT CURLY BRACKET
+<U007E> \x1B\x3D |0 # TILDE
+<U00A0><U000D> \x1B\x0D |0 # NO-BREAK SPACE + CARRIAGE RETURN
+<U00A0><U0020> \x1B\x20 |0 # NO-BREAK SPACE + SPACE
+<U00A0><U0021> \x1B\x21 |0 # NO-BREAK SPACE + EXCLAMATION MARK
+<U00A0><U0022> \x1B\x22 |0 # NO-BREAK SPACE + QUOTATION MARK
+<U00A0><U0023> \x1B\x23 |0 # NO-BREAK SPACE + NUMBER SIGN
+<U00A0><U0024> \x1B\x02 |0 # NO-BREAK SPACE + DOLLAR SIGN
+<U00A0><U0025> \x1B\x25 |0 # NO-BREAK SPACE + PERCENT SIGN
+<U00A0><U0026> \x1B\x26 |0 # NO-BREAK SPACE + AMPERSAND
+<U00A0><U0027> \x1B\x27 |0 # NO-BREAK SPACE + APOSTROPHE
+<U00A0><U002A> \x1B\x2A |0 # NO-BREAK SPACE + ASTERISK
+<U00A0><U002B> \x1B\x2B |0 # NO-BREAK SPACE + PLUS SIGN
+<U00A0><U002C> \x1B\x2C |0 # NO-BREAK SPACE + COMMA
+<U00A0><U002D> \x1B\x2D |0 # NO-BREAK SPACE + HYPHEN-MINUS
+<U00A0><U002E> \x1B\x2E |0 # NO-BREAK SPACE + FULL STOP
+<U00A0><U0030> \x1B\x30 |0 # NO-BREAK SPACE + DIGIT ZERO
+<U00A0><U0031> \x1B\x31 |0 # NO-BREAK SPACE + DIGIT ONE
+<U00A0><U0032> \x1B\x32 |0 # NO-BREAK SPACE + DIGIT TWO
+<U00A0><U0033> \x1B\x33 |0 # NO-BREAK SPACE + DIGIT THREE
+<U00A0><U0034> \x1B\x34 |0 # NO-BREAK SPACE + DIGIT FOUR
+<U00A0><U0035> \x1B\x35 |0 # NO-BREAK SPACE + DIGIT FIVE
+<U00A0><U0036> \x1B\x36 |0 # NO-BREAK SPACE + DIGIT SIX
+<U00A0><U0037> \x1B\x37 |0 # NO-BREAK SPACE + DIGIT SEVEN
+<U00A0><U0038> \x1B\x38 |0 # NO-BREAK SPACE + DIGIT EIGHT
+<U00A0><U0039> \x1B\x39 |0 # NO-BREAK SPACE + DIGIT NINE
+<U00A0><U003A> \x1B\x3A |0 # NO-BREAK SPACE + COLON
+<U00A0><U003B> \x1B\x3B |0 # NO-BREAK SPACE + SEMICOLON
+<U00A0><U003F> \x1B\x3F |0 # NO-BREAK SPACE + QUESTION MARK
+<U00A0><U0040> \x1B\x00 |0 # NO-BREAK SPACE + COMMERCIAL AT
+<U00A0><U0041> \x1B\x41 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER A
+<U00A0><U0042> \x1B\x42 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER B
+<U00A0><U0043> \x1B\x43 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER C
+<U00A0><U0044> \x1B\x44 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER D
+<U00A0><U0045> \x1B\x45 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER E
+<U00A0><U0046> \x1B\x46 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER F
+<U00A0><U0047> \x1B\x47 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER G
+<U00A0><U0048> \x1B\x48 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER H
+<U00A0><U0049> \x1B\x49 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER I
+<U00A0><U004A> \x1B\x4A |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER J
+<U00A0><U004B> \x1B\x4B |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER K
+<U00A0><U004C> \x1B\x4C |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER L
+<U00A0><U004D> \x1B\x4D |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER M
+<U00A0><U004E> \x1B\x4E |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER N
+<U00A0><U004F> \x1B\x4F |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER O
+<U00A0><U0050> \x1B\x50 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER P
+<U00A0><U0051> \x1B\x51 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER Q
+<U00A0><U0052> \x1B\x52 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER R
+<U00A0><U0053> \x1B\x53 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER S
+<U00A0><U0054> \x1B\x54 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER T
+<U00A0><U0055> \x1B\x55 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER U
+<U00A0><U0056> \x1B\x56 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER V
+<U00A0><U0057> \x1B\x57 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER W
+<U00A0><U0058> \x1B\x58 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER X
+<U00A0><U0059> \x1B\x59 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER Y
+<U00A0><U005A> \x1B\x5A |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER Z
+<U00A0><U005F> \x1B\x11 |0 # NO-BREAK SPACE + LOW LINE
+<U00A0><U0061> \x1B\x61 |0 # NO-BREAK SPACE + LATIN SMALL LETTER A
+<U00A0><U0062> \x1B\x62 |0 # NO-BREAK SPACE + LATIN SMALL LETTER B
+<U00A0><U0063> \x1B\x63 |0 # NO-BREAK SPACE + LATIN SMALL LETTER C
+<U00A0><U0064> \x1B\x64 |0 # NO-BREAK SPACE + LATIN SMALL LETTER D
+<U00A0><U0066> \x1B\x66 |0 # NO-BREAK SPACE + LATIN SMALL LETTER F
+<U00A0><U0067> \x1B\x67 |0 # NO-BREAK SPACE + LATIN SMALL LETTER G
+<U00A0><U0068> \x1B\x68 |0 # NO-BREAK SPACE + LATIN SMALL LETTER H
+<U00A0><U0069> \x1B\x69 |0 # NO-BREAK SPACE + LATIN SMALL LETTER I
+<U00A0><U006A> \x1B\x6A |0 # NO-BREAK SPACE + LATIN SMALL LETTER J
+<U00A0><U006B> \x1B\x6B |0 # NO-BREAK SPACE + LATIN SMALL LETTER K
+<U00A0><U006C> \x1B\x6C |0 # NO-BREAK SPACE + LATIN SMALL LETTER L
+<U00A0><U006D> \x1B\x6D |0 # NO-BREAK SPACE + LATIN SMALL LETTER M
+<U00A0><U006E> \x1B\x6E |0 # NO-BREAK SPACE + LATIN SMALL LETTER N
+<U00A0><U006F> \x1B\x6F |0 # NO-BREAK SPACE + LATIN SMALL LETTER O
+<U00A0><U0070> \x1B\x70 |0 # NO-BREAK SPACE + LATIN SMALL LETTER P
+<U00A0><U0071> \x1B\x71 |0 # NO-BREAK SPACE + LATIN SMALL LETTER Q
+<U00A0><U0072> \x1B\x72 |0 # NO-BREAK SPACE + LATIN SMALL LETTER R
+<U00A0><U0073> \x1B\x73 |0 # NO-BREAK SPACE + LATIN SMALL LETTER S
+<U00A0><U0074> \x1B\x74 |0 # NO-BREAK SPACE + LATIN SMALL LETTER T
+<U00A0><U0075> \x1B\x75 |0 # NO-BREAK SPACE + LATIN SMALL LETTER U
+<U00A0><U0076> \x1B\x76 |0 # NO-BREAK SPACE + LATIN SMALL LETTER V
+<U00A0><U0077> \x1B\x77 |0 # NO-BREAK SPACE + LATIN SMALL LETTER W
+<U00A0><U0078> \x1B\x78 |0 # NO-BREAK SPACE + LATIN SMALL LETTER X
+<U00A0><U0079> \x1B\x79 |0 # NO-BREAK SPACE + LATIN SMALL LETTER Y
+<U00A0><U007A> \x1B\x7A |0 # NO-BREAK SPACE + LATIN SMALL LETTER Z
+<U00A0><U00A3> \x1B\x01 |0 # NO-BREAK SPACE + POUND SIGN
+<U00A0><U00A4> \x1B\x24 |0 # NO-BREAK SPACE + CURRENCY SIGN
+<U00A0><U00A5> \x1B\x03 |0 # NO-BREAK SPACE + YEN SIGN
+<U00A0><U00A7> \x1B\x5F |0 # NO-BREAK SPACE + SECTION SIGN
+<U00A0><U00BF> \x1B\x60 |0 # NO-BREAK SPACE + INVERTED QUESTION MARK
+<U00A0><U00C4> \x1B\x5B |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER A WITH DIAERESIS
+<U00A0><U00C5> \x1B\x0E |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER A WITH RING ABOVE
+<U00A0><U00C6> \x1B\x1C |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER AE
+<U00A0><U00C9> \x1B\x1F |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER E WITH ACUTE
+<U00A0><U00D1> \x1B\x5D |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER N WITH TILDE
+<U00A0><U00D6> \x1B\x5C |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER O WITH DIAERESIS
+<U00A0><U00D8> \x1B\x0B |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER O WITH STROKE
+<U00A0><U00DC> \x1B\x5E |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER U WITH DIAERESIS
+<U00A0><U00DF> \x1B\x1E |0 # NO-BREAK SPACE + LATIN SMALL LETTER SHARP S
+<U00A0><U00E0> \x1B\x7F |0 # NO-BREAK SPACE + LATIN SMALL LETTER A WITH GRAVE
+<U00A0><U00E4> \x1B\x7B |0 # NO-BREAK SPACE + LATIN SMALL LETTER A WITH DIAERESIS
+<U00A0><U00E5> \x1B\x0F |0 # NO-BREAK SPACE + LATIN SMALL LETTER A WITH RING ABOVE
+<U00A0><U00E6> \x1B\x1D |0 # NO-BREAK SPACE + LATIN SMALL LETTER AE
+<U00A0><U00E7> \x1B\x09 |0 # NO-BREAK SPACE + LATIN SMALL LETTER C WITH CEDILLA
+<U00A0><U00E8> \x1B\x04 |0 # NO-BREAK SPACE + LATIN SMALL LETTER E WITH GRAVE
+<U00A0><U00E9> \x1B\x05 |0 # NO-BREAK SPACE + LATIN SMALL LETTER E WITH ACUTE
+<U00A0><U00EC> \x1B\x07 |0 # NO-BREAK SPACE + LATIN SMALL LETTER I WITH GRAVE
+<U00A0><U00F1> \x1B\x7D |0 # NO-BREAK SPACE + LATIN SMALL LETTER N WITH TILDE
+<U00A0><U00F2> \x1B\x08 |0 # NO-BREAK SPACE + LATIN SMALL LETTER O WITH GRAVE
+<U00A0><U00F6> \x1B\x7C |0 # NO-BREAK SPACE + LATIN SMALL LETTER O WITH DIAERESIS
+<U00A0><U00F8> \x1B\x0C |0 # NO-BREAK SPACE + LATIN SMALL LETTER O WITH STROKE
+<U00A0><U00F9> \x1B\x06 |0 # NO-BREAK SPACE + LATIN SMALL LETTER U WITH GRAVE
+<U00A0><U00FC> \x1B\x7E |0 # NO-BREAK SPACE + LATIN SMALL LETTER U WITH DIAERESIS
+<U00A0><U0393> \x1B\x13 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER GAMMA
+<U00A0><U0394> \x1B\x10 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER DELTA
+<U00A0><U0398> \x1B\x19 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER THETA
+<U00A0><U039E> \x1B\x1A |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER XI
+<U00A0><U03A0> \x1B\x16 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER PI
+<U00A0><U03A0> \x1B\x1B |0 # NO-BREAK SPACE + NO-BREAK SPACE (?)
+<U00A0><U03A3> \x1B\x18 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER SIGMA
+<U00A0><U03A6> \x1B\x12 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER PHI
+<U00A0><U03A8> \x1B\x17 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER PSI
+<U00A0><U03A9> \x1B\x15 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER OMEGA
+<U00A1> \x40 |0 # INVERTED EXCLAMATION MARK
+<U00A3> \x01 |0 # POUND SIGN
+<U00A4> \x24 |0 # CURRENCY SIGN
+<U00A5> \x03 |0 # YEN SIGN
+<U00A7> \x5F |0 # SECTION SIGN
+<U00BF> \x60 |0 # INVERTED QUESTION MARK
+<U00C4> \x5B |0 # LATIN CAPITAL LETTER A WITH DIAERESIS
+<U00C5> \x0E |0 # LATIN CAPITAL LETTER A WITH RING ABOVE
+<U00C6> \x1C |0 # LATIN CAPITAL LETTER AE
+<U00C9> \x1F |0 # LATIN CAPITAL LETTER E WITH ACUTE
+<U00D1> \x5D |0 # LATIN CAPITAL LETTER N WITH TILDE
+<U00D6> \x5C |0 # LATIN CAPITAL LETTER O WITH DIAERESIS
+<U00D8> \x0B |0 # LATIN CAPITAL LETTER O WITH STROKE
+<U00DC> \x5E |0 # LATIN CAPITAL LETTER U WITH DIAERESIS
+<U00DF> \x1E |0 # LATIN SMALL LETTER SHARP S
+<U00E0> \x7F |0 # LATIN SMALL LETTER A WITH GRAVE
<U00E4> \x7B |0 # LATIN SMALL LETTER A WITH DIAERESIS
-<U00F6> \x7C |0 # LATIN SMALL LETTER O WITH DIAERESIS
+<U00E5> \x0F |0 # LATIN SMALL LETTER A WITH RING ABOVE
+<U00E6> \x1D |0 # LATIN SMALL LETTER AE
+<U00E7> \x09 |0 # LATIN SMALL LETTER C WITH CEDILLA
+<U00E8> \x04 |0 # LATIN SMALL LETTER E WITH GRAVE
+<U00E9> \x05 |0 # LATIN SMALL LETTER E WITH ACUTE
+<U00EC> \x07 |0 # LATIN SMALL LETTER I WITH GRAVE
<U00F1> \x7D |0 # LATIN SMALL LETTER N WITH TILDE
+<U00F2> \x08 |0 # LATIN SMALL LETTER O WITH GRAVE
+<U00F6> \x7C |0 # LATIN SMALL LETTER O WITH DIAERESIS
+<U00F8> \x0C |0 # LATIN SMALL LETTER O WITH STROKE
+<U00F9> \x06 |0 # LATIN SMALL LETTER U WITH GRAVE
<U00FC> \x7E |0 # LATIN SMALL LETTER U WITH DIAERESIS
-<U00E0> \x7F |0 # LATIN SMALL LETTER A WITH GRAVE
+<U0393> \x13 |0 # GREEK CAPITAL LETTER GAMMA
+<U0394> \x10 |0 # GREEK CAPITAL LETTER DELTA
+<U0398> \x19 |0 # GREEK CAPITAL LETTER THETA
+<U039B> \x14 |0 # GREEK CAPITAL LETTER LAMDA
+<U039E> \x1A |0 # GREEK CAPITAL LETTER XI
+<U03A0> \x16 |0 # GREEK CAPITAL LETTER PI
+<U03A3> \x18 |0 # GREEK CAPITAL LETTER SIGMA
+<U03A6> \x12 |0 # GREEK CAPITAL LETTER PHI
+<U03A8> \x17 |0 # GREEK CAPITAL LETTER PSI
+<U03A9> \x15 |0 # GREEK CAPITAL LETTER OMEGA
+<U20AC> \x1B\x65 |0 # EURO SIGN
END CHARMAP