diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2003-09-23 09:11:23 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2003-09-23 09:11:23 +0000 |
commit | e74d743733c842b1419d7531d7585c088b422f2c (patch) | |
tree | ae45a7d2190537687abba1f8dfe4defcba3a22a6 /ext/Encode | |
parent | 8e0b04c797634d42e2e4586b2684cb43288cfb04 (diff) | |
download | perl-e74d743733c842b1419d7531d7585c088b422f2c.tar.gz |
[perl #23979] GSM 03.38 mapping in Encoding::Byte is incorrect
Fix as much as possible - the specification is still
a bit unclear.
p4raw-id: //depot/perl@21331
Diffstat (limited to 'ext/Encode')
-rw-r--r-- | ext/Encode/MANIFEST | 1 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/Supported.pod | 20 | ||||
-rw-r--r-- | ext/Encode/t/gsm0338.t | 62 | ||||
-rw-r--r-- | ext/Encode/ucm/gsm0338.ucm | 336 |
4 files changed, 375 insertions, 44 deletions
diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index f219c38431..6a6aab82ab 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -74,6 +74,7 @@ t/fallback.t test script t/gb2312.enc test data t/gb2312.utf test data t/grow.t test script +t/gsm0338.t test script t/guess.t test script t/jisx0201.enc test data t/jisx0201.utf test data diff --git a/ext/Encode/lib/Encode/Supported.pod b/ext/Encode/lib/Encode/Supported.pod index d09fc0a513..9280a97e72 100644 --- a/ext/Encode/lib/Encode/Supported.pod +++ b/ext/Encode/lib/Encode/Supported.pod @@ -176,9 +176,23 @@ For gory details, see L<http://czyborra.com/charsets/cyrillic.html> GSM0338 is for GSM handsets. Though it shares alphanumerals with ASCII, control character ranges and other parts are mapped very -differently, presumably to store Greek and Cyrillic alphabets. -This is also covered in Encode::Byte even though it is not an -"extended ASCII" encoding. +differently, mainly to store Greek characters. There are also escape +sequences (starting with 0x1B) to cover e.g. the Euro sign. Some +special cases like a trailing 0x00 byte or a lone 0x1B byte are not +well-defined and decode() will return an empty string for them. +One possible workaround is + + $gsm =~ s/\x00\z/\x00\x00/; + $uni = decode("gsm0338", $gsm); + $uni .= "\xA0" if $gsm =~ /\x1B\z/; + +Note that the Encode implementation of GSM0338 does not implement the +reuse of Latin capital letters as Greek capital letters (for example, +the 0x5A is U+005A (LATIN CAPITAL LETTER Z), not U+0396 (GREEK CAPITAL +LETTER ZETA). + +The GSM0338 is also covered in Encode::Byte even though it is not +an "extended ASCII" encoding. =back diff --git a/ext/Encode/t/gsm0338.t b/ext/Encode/t/gsm0338.t new file mode 100644 index 0000000000..6066d7a7cb --- /dev/null +++ b/ext/Encode/t/gsm0338.t @@ -0,0 +1,62 @@ +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + $| = 1; +} + +use strict; +use Test::More tests => 21; +use Encode; + +# The specification of GSM 03.38 is not awfully clear. +# (http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT) +# The various combinations of 0x00 and 0x1B as leading bytes +# are unclear, as is the semantics of those bytes as standalone +# or as final single bytes. + +sub t { is(decode("gsm0338", my $t = $_[0]), $_[1]) } + +# t("\x00", "\x00"); # ??? + +# "Round-trip". +t("\x41", "\x41"); + +t("\x01", "\xA3"); +t("\x02", "\x24"); +t("\x03", "\xA5"); +t("\x09", "\xE7"); + +t("\x00\x00", "\x00\x00"); # Maybe? +t("\x00\x1B", "\x40\xA0"); # Maybe? +t("\x00\x41", "\x40\x41"); + +# t("\x1B", "\x1B"); # ??? + +# Escape with no special second byte is just a NBSP. +t("\x1B\x41", "\xA0\x41"); + +t("\x1B\x00", "\xA0\x40"); # Maybe? + +# Special escape characters. +t("\x1B\x0A", "\x0C"); +t("\x1B\x14", "\x5E"); +t("\x1B\x28", "\x7B"); +t("\x1B\x29", "\x7D"); +t("\x1B\x2F", "\x5C"); +t("\x1B\x3C", "\x5B"); +t("\x1B\x3D", "\x7E"); +t("\x1B\x3E", "\x5D"); +t("\x1B\x40", "\x7C"); +t("\x1B\x40", "\x7C"); +t("\x1B\x65", "\x{20AC}"); + + + + diff --git a/ext/Encode/ucm/gsm0338.ucm b/ext/Encode/ucm/gsm0338.ucm index f321fb77ce..11a379a60c 100644 --- a/ext/Encode/ucm/gsm0338.ucm +++ b/ext/Encode/ucm/gsm0338.ucm @@ -4,47 +4,19 @@ # ./compile -n gsm0338 -o Encode/gsm0338.ucm Encode/gsm0338.enc <code_set_name> "gsm0338" <mb_cur_min> 1 -<mb_cur_max> 1 +<mb_cur_max> 2 <subchar> \x3F # CHARMAP -<U0040> \x00 |0 # COMMERCIAL AT -<U00A3> \x01 |0 # POUND SIGN -<U0024> \x02 |0 # DOLLAR SIGN -<U00A5> \x03 |0 # YEN SIGN -<U00E8> \x04 |0 # LATIN SMALL LETTER E WITH GRAVE -<U00E9> \x05 |0 # LATIN SMALL LETTER E WITH ACUTE -<U00F9> \x06 |0 # LATIN SMALL LETTER U WITH GRAVE -<U00EC> \x07 |0 # LATIN SMALL LETTER I WITH GRAVE -<U00F2> \x08 |0 # LATIN SMALL LETTER O WITH GRAVE -<U00E7> \x09 |0 # LATIN SMALL LETTER C WITH CEDILLA +<U0000><U0000> \x00\x00 |0 # NULL + NULL (?) <U000A> \x0A |0 # LINE FEED -<U00D8> \x0B |0 # LATIN CAPITAL LETTER O WITH STROKE -<U00F8> \x0C |0 # LATIN SMALL LETTER O WITH STROKE +<U000C> \x1B\x0A |0 # FORM FEED <U000D> \x0D |0 # CARRIAGE RETURN -<U00C5> \x0E |0 # LATIN CAPITAL LETTER A WITH RING ABOVE -<U00E5> \x0F |0 # LATIN SMALL LETTER A WITH RING ABOVE -<U0394> \x10 |0 # GREEK CAPITAL LETTER DELTA -<U005F> \x11 |0 # LOW LINE -<U03A6> \x12 |0 # GREEK CAPITAL LETTER PHI -<U0393> \x13 |0 # GREEK CAPITAL LETTER GAMMA -<U039B> \x14 |0 # GREEK CAPITAL LETTER LAMDA -<U03A9> \x15 |0 # GREEK CAPITAL LETTER OMEGA -<U03A0> \x16 |0 # GREEK CAPITAL LETTER PI -<U03A8> \x17 |0 # GREEK CAPITAL LETTER PSI -<U03A3> \x18 |0 # GREEK CAPITAL LETTER SIGMA -<U0398> \x19 |0 # GREEK CAPITAL LETTER THETA -<U039E> \x1A |0 # GREEK CAPITAL LETTER XI -<U00A0> \x1B |0 # NO-BREAK SPACE -<U00C6> \x1C |0 # LATIN CAPITAL LETTER AE -<U00E6> \x1D |0 # LATIN SMALL LETTER AE -<U00DF> \x1E |0 # LATIN SMALL LETTER SHARP S -<U00C9> \x1F |0 # LATIN CAPITAL LETTER E WITH ACUTE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK <U0023> \x23 |0 # NUMBER SIGN -<U00A4> \x24 |0 # CURRENCY SIGN +<U0024> \x02 |0 # DOLLAR SIGN <U0025> \x25 |0 # PERCENT SIGN <U0026> \x26 |0 # AMPERSAND <U0027> \x27 |0 # APOSTROPHE @@ -72,7 +44,133 @@ CHARMAP <U003D> \x3D |0 # EQUALS SIGN <U003E> \x3E |0 # GREATER-THAN SIGN <U003F> \x3F |0 # QUESTION MARK -<U00A1> \x40 |0 # INVERTED EXCLAMATION MARK +<U0040><U000A> \x00\x0A |0 # COMMERCIAL AT + LINE FEED +<U0040><U000D> \x00\x0D |0 # COMMERCIAL AT + CARRIAGE RETURN +<U0040><U0020> \x00\x20 |0 # COMMERCIAL AT + SPACE +<U0040><U0021> \x00\x21 |0 # COMMERCIAL AT + EXCLAMATION MARK +<U0040><U0022> \x00\x22 |0 # COMMERCIAL AT + QUOTATION MARK +<U0040><U0023> \x00\x23 |0 # COMMERCIAL AT + NUMBER SIGN +<U0040><U0024> \x00\x02 |0 # COMMERCIAL AT + DOLLAR SIGN +<U0040><U0025> \x00\x25 |0 # COMMERCIAL AT + PERCENT SIGN +<U0040><U0026> \x00\x26 |0 # COMMERCIAL AT + AMPERSAND +<U0040><U0027> \x00\x27 |0 # COMMERCIAL AT + APOSTROPHE +<U0040><U0028> \x00\x28 |0 # COMMERCIAL AT + LEFT PARENTHESIS +<U0040><U0029> \x00\x29 |0 # COMMERCIAL AT + RIGHT PARENTHESIS +<U0040><U002A> \x00\x2A |0 # COMMERCIAL AT + ASTERISK +<U0040><U002B> \x00\x2B |0 # COMMERCIAL AT + PLUS SIGN +<U0040><U002C> \x00\x2C |0 # COMMERCIAL AT + COMMA +<U0040><U002D> \x00\x2D |0 # COMMERCIAL AT + HYPHEN-MINUS +<U0040><U002E> \x00\x2E |0 # COMMERCIAL AT + FULL STOP +<U0040><U002F> \x00\x2F |0 # COMMERCIAL AT + SOLIDUS +<U0040><U0030> \x00\x30 |0 # COMMERCIAL AT + DIGIT ZERO +<U0040><U0031> \x00\x31 |0 # COMMERCIAL AT + DIGIT ONE +<U0040><U0032> \x00\x32 |0 # COMMERCIAL AT + DIGIT TWO +<U0040><U0033> \x00\x33 |0 # COMMERCIAL AT + DIGIT THREE +<U0040><U0034> \x00\x34 |0 # COMMERCIAL AT + DIGIT FOUR +<U0040><U0035> \x00\x35 |0 # COMMERCIAL AT + DIGIT FIVE +<U0040><U0036> \x00\x36 |0 # COMMERCIAL AT + DIGIT SIX +<U0040><U0037> \x00\x37 |0 # COMMERCIAL AT + DIGIT SEVEN +<U0040><U0038> \x00\x38 |0 # COMMERCIAL AT + DIGIT EIGHT +<U0040><U0039> \x00\x39 |0 # COMMERCIAL AT + DIGIT NINE +<U0040><U003A> \x00\x3A |0 # COMMERCIAL AT + COLON +<U0040><U003B> \x00\x3B |0 # COMMERCIAL AT + SEMICOLON +<U0040><U003C> \x00\x3C |0 # COMMERCIAL AT + LESS-THAN SIGN +<U0040><U003D> \x00\x3D |0 # COMMERCIAL AT + EQUALS SIGN +<U0040><U003E> \x00\x3E |0 # COMMERCIAL AT + GREATER-THAN SIGN +<U0040><U003F> \x00\x3F |0 # COMMERCIAL AT + QUESTION MARK +<U0040><U0041> \x00\x41 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER A +<U0040><U0042> \x00\x42 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER B +<U0040><U0043> \x00\x43 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER C +<U0040><U0044> \x00\x44 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER D +<U0040><U0045> \x00\x45 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER E +<U0040><U0046> \x00\x46 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER F +<U0040><U0047> \x00\x47 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER G +<U0040><U0048> \x00\x48 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER H +<U0040><U0049> \x00\x49 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER I +<U0040><U004A> \x00\x4A |0 # COMMERCIAL AT + LATIN CAPITAL LETTER J +<U0040><U004B> \x00\x4B |0 # COMMERCIAL AT + LATIN CAPITAL LETTER K +<U0040><U004C> \x00\x4C |0 # COMMERCIAL AT + LATIN CAPITAL LETTER L +<U0040><U004D> \x00\x4D |0 # COMMERCIAL AT + LATIN CAPITAL LETTER M +<U0040><U004E> \x00\x4E |0 # COMMERCIAL AT + LATIN CAPITAL LETTER N +<U0040><U004F> \x00\x4F |0 # COMMERCIAL AT + LATIN CAPITAL LETTER O +<U0040><U0050> \x00\x50 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER P +<U0040><U0051> \x00\x51 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER Q +<U0040><U0052> \x00\x52 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER R +<U0040><U0053> \x00\x53 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER S +<U0040><U0054> \x00\x54 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER T +<U0040><U0055> \x00\x55 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER U +<U0040><U0056> \x00\x56 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER V +<U0040><U0057> \x00\x57 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER W +<U0040><U0058> \x00\x58 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER X +<U0040><U0059> \x00\x59 |0 # COMMERCIAL AT + LATIN CAPITAL LETTER Y +<U0040><U005A> \x00\x5A |0 # COMMERCIAL AT + LATIN CAPITAL LETTER Z +<U0040><U005F> \x00\x11 |0 # COMMERCIAL AT + LOW LINE +<U0040><U0061> \x00\x61 |0 # COMMERCIAL AT + LATIN SMALL LETTER A +<U0040><U0062> \x00\x62 |0 # COMMERCIAL AT + LATIN SMALL LETTER B +<U0040><U0063> \x00\x63 |0 # COMMERCIAL AT + LATIN SMALL LETTER C +<U0040><U0064> \x00\x64 |0 # COMMERCIAL AT + LATIN SMALL LETTER D +<U0040><U0065> \x00\x65 |0 # COMMERCIAL AT + LATIN SMALL LETTER E +<U0040><U0066> \x00\x66 |0 # COMMERCIAL AT + LATIN SMALL LETTER F +<U0040><U0067> \x00\x67 |0 # COMMERCIAL AT + LATIN SMALL LETTER G +<U0040><U0068> \x00\x68 |0 # COMMERCIAL AT + LATIN SMALL LETTER H +<U0040><U0069> \x00\x69 |0 # COMMERCIAL AT + LATIN SMALL LETTER I +<U0040><U006A> \x00\x6A |0 # COMMERCIAL AT + LATIN SMALL LETTER J +<U0040><U006B> \x00\x6B |0 # COMMERCIAL AT + LATIN SMALL LETTER K +<U0040><U006C> \x00\x6C |0 # COMMERCIAL AT + LATIN SMALL LETTER L +<U0040><U006D> \x00\x6D |0 # COMMERCIAL AT + LATIN SMALL LETTER M +<U0040><U006E> \x00\x6E |0 # COMMERCIAL AT + LATIN SMALL LETTER N +<U0040><U006F> \x00\x6F |0 # COMMERCIAL AT + LATIN SMALL LETTER O +<U0040><U0070> \x00\x70 |0 # COMMERCIAL AT + LATIN SMALL LETTER P +<U0040><U0071> \x00\x71 |0 # COMMERCIAL AT + LATIN SMALL LETTER Q +<U0040><U0072> \x00\x72 |0 # COMMERCIAL AT + LATIN SMALL LETTER R +<U0040><U0073> \x00\x73 |0 # COMMERCIAL AT + LATIN SMALL LETTER S +<U0040><U0074> \x00\x74 |0 # COMMERCIAL AT + LATIN SMALL LETTER T +<U0040><U0075> \x00\x75 |0 # COMMERCIAL AT + LATIN SMALL LETTER U +<U0040><U0076> \x00\x76 |0 # COMMERCIAL AT + LATIN SMALL LETTER V +<U0040><U0077> \x00\x77 |0 # COMMERCIAL AT + LATIN SMALL LETTER W +<U0040><U0078> \x00\x78 |0 # COMMERCIAL AT + LATIN SMALL LETTER X +<U0040><U0079> \x00\x79 |0 # COMMERCIAL AT + LATIN SMALL LETTER Y +<U0040><U007A> \x00\x7A |0 # COMMERCIAL AT + LATIN SMALL LETTER Z +<U0040><U00A0> \x00\x1B |0 # COMMERCIAL AT + NO-BREAK SPACE (?) +<U0040><U00A1> \x00\x40 |0 # COMMERCIAL AT + INVERTED EXCLAMATION MARK +<U0040><U00A3> \x00\x01 |0 # COMMERCIAL AT + POUND SIGN +<U0040><U00A4> \x00\x24 |0 # COMMERCIAL AT + CURRENCY SIGN +<U0040><U00A5> \x00\x03 |0 # COMMERCIAL AT + YEN SIGN +<U0040><U00A7> \x00\x5F |0 # COMMERCIAL AT + SECTION SIGN +<U0040><U00BF> \x00\x60 |0 # COMMERCIAL AT + INVERTED QUESTION MARK +<U0040><U00C4> \x00\x5B |0 # COMMERCIAL AT + LATIN CAPITAL LETTER A WITH DIAERESIS +<U0040><U00C5> \x00\x0E |0 # COMMERCIAL AT + LATIN CAPITAL LETTER A WITH RING ABOVE +<U0040><U00C6> \x00\x1C |0 # COMMERCIAL AT + LATIN CAPITAL LETTER AE +<U0040><U00C9> \x00\x1F |0 # COMMERCIAL AT + LATIN CAPITAL LETTER E WITH ACUTE +<U0040><U00D1> \x00\x5D |0 # COMMERCIAL AT + LATIN CAPITAL LETTER N WITH TILDE +<U0040><U00D6> \x00\x5C |0 # COMMERCIAL AT + LATIN CAPITAL LETTER O WITH DIAERESIS +<U0040><U00D8> \x00\x0B |0 # COMMERCIAL AT + LATIN CAPITAL LETTER O WITH STROKE +<U0040><U00DC> \x00\x5E |0 # COMMERCIAL AT + LATIN CAPITAL LETTER U WITH DIAERESIS +<U0040><U00DF> \x00\x1E |0 # COMMERCIAL AT + LATIN SMALL LETTER SHARP S +<U0040><U00E0> \x00\x7F |0 # COMMERCIAL AT + LATIN SMALL LETTER A WITH GRAVE +<U0040><U00E4> \x00\x7B |0 # COMMERCIAL AT + LATIN SMALL LETTER A WITH DIAERESIS +<U0040><U00E5> \x00\x0F |0 # COMMERCIAL AT + LATIN SMALL LETTER A WITH RING ABOVE +<U0040><U00E6> \x00\x1D |0 # COMMERCIAL AT + LATIN SMALL LETTER AE +<U0040><U00E7> \x00\x09 |0 # COMMERCIAL AT + LATIN SMALL LETTER C WITH CEDILLA +<U0040><U00E8> \x00\x04 |0 # COMMERCIAL AT + LATIN SMALL LETTER E WITH GRAVE +<U0040><U00E9> \x00\x05 |0 # COMMERCIAL AT + LATIN SMALL LETTER E WITH ACUTE +<U0040><U00EC> \x00\x07 |0 # COMMERCIAL AT + LATIN SMALL LETTER I WITH GRAVE +<U0040><U00F1> \x00\x7D |0 # COMMERCIAL AT + LATIN SMALL LETTER N WITH TILDE +<U0040><U00F2> \x00\x08 |0 # COMMERCIAL AT + LATIN SMALL LETTER O WITH GRAVE +<U0040><U00F6> \x00\x7C |0 # COMMERCIAL AT + LATIN SMALL LETTER O WITH DIAERESIS +<U0040><U00F8> \x00\x0C |0 # COMMERCIAL AT + LATIN SMALL LETTER O WITH STROKE +<U0040><U00F9> \x00\x06 |0 # COMMERCIAL AT + LATIN SMALL LETTER U WITH GRAVE +<U0040><U00FC> \x00\x7E |0 # COMMERCIAL AT + LATIN SMALL LETTER U WITH DIAERESIS +<U0040><U0393> \x00\x13 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER GAMMA +<U0040><U0394> \x00\x10 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER DELTA +<U0040><U0398> \x00\x19 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER THETA +<U0040><U039B> \x00\x14 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER LAMDA +<U0040><U039E> \x00\x1A |0 # COMMERCIAL AT + GREEK CAPITAL LETTER XI +<U0040><U03A0> \x00\x16 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER PI +<U0040><U03A3> \x00\x18 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER SIGMA +<U0040><U03A6> \x00\x12 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER PHI +<U0040><U03A8> \x00\x17 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER PSI +<U0040><U03A9> \x00\x15 |0 # COMMERCIAL AT + GREEK CAPITAL LETTER OMEGA <U0041> \x41 |0 # LATIN CAPITAL LETTER A <U0042> \x42 |0 # LATIN CAPITAL LETTER B <U0043> \x43 |0 # LATIN CAPITAL LETTER C @@ -99,12 +197,11 @@ CHARMAP <U0058> \x58 |0 # LATIN CAPITAL LETTER X <U0059> \x59 |0 # LATIN CAPITAL LETTER Y <U005A> \x5A |0 # LATIN CAPITAL LETTER Z -<U00C4> \x5B |0 # LATIN CAPITAL LETTER A WITH DIAERESIS -<U00D6> \x5C |0 # LATIN CAPITAL LETTER O WITH DIAERESIS -<U00D1> \x5D |0 # LATIN CAPITAL LETTER N WITH TILDE -<U00DC> \x5E |0 # LATIN CAPITAL LETTER U WITH DIAERESIS -<U00A7> \x5F |0 # SECTION SIGN -<U00BF> \x60 |0 # INVERTED QUESTION MARK +<U005B> \x1B\x3C |0 # LEFT SQUARE BRACKET +<U005C> \x1B\x2F |0 # REVERSE SOLIDUS +<U005D> \x1B\x3E |0 # RIGHT SQUARE BRACKET +<U005E> \x1B\x14 |0 # CIRCUMFLEX ACCENT +<U005F> \x11 |0 # LOW LINE <U0061> \x61 |0 # LATIN SMALL LETTER A <U0062> \x62 |0 # LATIN SMALL LETTER B <U0063> \x63 |0 # LATIN SMALL LETTER C @@ -131,9 +228,166 @@ CHARMAP <U0078> \x78 |0 # LATIN SMALL LETTER X <U0079> \x79 |0 # LATIN SMALL LETTER Y <U007A> \x7A |0 # LATIN SMALL LETTER Z +<U007B> \x1B\x28 |0 # LEFT CURLY BRACKET +<U007C> \x1B\x40 |0 # VERTICAL LINE +<U007D> \x1B\x29 |0 # RIGHT CURLY BRACKET +<U007E> \x1B\x3D |0 # TILDE +<U00A0><U000D> \x1B\x0D |0 # NO-BREAK SPACE + CARRIAGE RETURN +<U00A0><U0020> \x1B\x20 |0 # NO-BREAK SPACE + SPACE +<U00A0><U0021> \x1B\x21 |0 # NO-BREAK SPACE + EXCLAMATION MARK +<U00A0><U0022> \x1B\x22 |0 # NO-BREAK SPACE + QUOTATION MARK +<U00A0><U0023> \x1B\x23 |0 # NO-BREAK SPACE + NUMBER SIGN +<U00A0><U0024> \x1B\x02 |0 # NO-BREAK SPACE + DOLLAR SIGN +<U00A0><U0025> \x1B\x25 |0 # NO-BREAK SPACE + PERCENT SIGN +<U00A0><U0026> \x1B\x26 |0 # NO-BREAK SPACE + AMPERSAND +<U00A0><U0027> \x1B\x27 |0 # NO-BREAK SPACE + APOSTROPHE +<U00A0><U002A> \x1B\x2A |0 # NO-BREAK SPACE + ASTERISK +<U00A0><U002B> \x1B\x2B |0 # NO-BREAK SPACE + PLUS SIGN +<U00A0><U002C> \x1B\x2C |0 # NO-BREAK SPACE + COMMA +<U00A0><U002D> \x1B\x2D |0 # NO-BREAK SPACE + HYPHEN-MINUS +<U00A0><U002E> \x1B\x2E |0 # NO-BREAK SPACE + FULL STOP +<U00A0><U0030> \x1B\x30 |0 # NO-BREAK SPACE + DIGIT ZERO +<U00A0><U0031> \x1B\x31 |0 # NO-BREAK SPACE + DIGIT ONE +<U00A0><U0032> \x1B\x32 |0 # NO-BREAK SPACE + DIGIT TWO +<U00A0><U0033> \x1B\x33 |0 # NO-BREAK SPACE + DIGIT THREE +<U00A0><U0034> \x1B\x34 |0 # NO-BREAK SPACE + DIGIT FOUR +<U00A0><U0035> \x1B\x35 |0 # NO-BREAK SPACE + DIGIT FIVE +<U00A0><U0036> \x1B\x36 |0 # NO-BREAK SPACE + DIGIT SIX +<U00A0><U0037> \x1B\x37 |0 # NO-BREAK SPACE + DIGIT SEVEN +<U00A0><U0038> \x1B\x38 |0 # NO-BREAK SPACE + DIGIT EIGHT +<U00A0><U0039> \x1B\x39 |0 # NO-BREAK SPACE + DIGIT NINE +<U00A0><U003A> \x1B\x3A |0 # NO-BREAK SPACE + COLON +<U00A0><U003B> \x1B\x3B |0 # NO-BREAK SPACE + SEMICOLON +<U00A0><U003F> \x1B\x3F |0 # NO-BREAK SPACE + QUESTION MARK +<U00A0><U0040> \x1B\x00 |0 # NO-BREAK SPACE + COMMERCIAL AT +<U00A0><U0041> \x1B\x41 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER A +<U00A0><U0042> \x1B\x42 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER B +<U00A0><U0043> \x1B\x43 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER C +<U00A0><U0044> \x1B\x44 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER D +<U00A0><U0045> \x1B\x45 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER E +<U00A0><U0046> \x1B\x46 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER F +<U00A0><U0047> \x1B\x47 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER G +<U00A0><U0048> \x1B\x48 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER H +<U00A0><U0049> \x1B\x49 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER I +<U00A0><U004A> \x1B\x4A |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER J +<U00A0><U004B> \x1B\x4B |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER K +<U00A0><U004C> \x1B\x4C |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER L +<U00A0><U004D> \x1B\x4D |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER M +<U00A0><U004E> \x1B\x4E |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER N +<U00A0><U004F> \x1B\x4F |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER O +<U00A0><U0050> \x1B\x50 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER P +<U00A0><U0051> \x1B\x51 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER Q +<U00A0><U0052> \x1B\x52 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER R +<U00A0><U0053> \x1B\x53 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER S +<U00A0><U0054> \x1B\x54 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER T +<U00A0><U0055> \x1B\x55 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER U +<U00A0><U0056> \x1B\x56 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER V +<U00A0><U0057> \x1B\x57 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER W +<U00A0><U0058> \x1B\x58 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER X +<U00A0><U0059> \x1B\x59 |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER Y +<U00A0><U005A> \x1B\x5A |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER Z +<U00A0><U005F> \x1B\x11 |0 # NO-BREAK SPACE + LOW LINE +<U00A0><U0061> \x1B\x61 |0 # NO-BREAK SPACE + LATIN SMALL LETTER A +<U00A0><U0062> \x1B\x62 |0 # NO-BREAK SPACE + LATIN SMALL LETTER B +<U00A0><U0063> \x1B\x63 |0 # NO-BREAK SPACE + LATIN SMALL LETTER C +<U00A0><U0064> \x1B\x64 |0 # NO-BREAK SPACE + LATIN SMALL LETTER D +<U00A0><U0066> \x1B\x66 |0 # NO-BREAK SPACE + LATIN SMALL LETTER F +<U00A0><U0067> \x1B\x67 |0 # NO-BREAK SPACE + LATIN SMALL LETTER G +<U00A0><U0068> \x1B\x68 |0 # NO-BREAK SPACE + LATIN SMALL LETTER H +<U00A0><U0069> \x1B\x69 |0 # NO-BREAK SPACE + LATIN SMALL LETTER I +<U00A0><U006A> \x1B\x6A |0 # NO-BREAK SPACE + LATIN SMALL LETTER J +<U00A0><U006B> \x1B\x6B |0 # NO-BREAK SPACE + LATIN SMALL LETTER K +<U00A0><U006C> \x1B\x6C |0 # NO-BREAK SPACE + LATIN SMALL LETTER L +<U00A0><U006D> \x1B\x6D |0 # NO-BREAK SPACE + LATIN SMALL LETTER M +<U00A0><U006E> \x1B\x6E |0 # NO-BREAK SPACE + LATIN SMALL LETTER N +<U00A0><U006F> \x1B\x6F |0 # NO-BREAK SPACE + LATIN SMALL LETTER O +<U00A0><U0070> \x1B\x70 |0 # NO-BREAK SPACE + LATIN SMALL LETTER P +<U00A0><U0071> \x1B\x71 |0 # NO-BREAK SPACE + LATIN SMALL LETTER Q +<U00A0><U0072> \x1B\x72 |0 # NO-BREAK SPACE + LATIN SMALL LETTER R +<U00A0><U0073> \x1B\x73 |0 # NO-BREAK SPACE + LATIN SMALL LETTER S +<U00A0><U0074> \x1B\x74 |0 # NO-BREAK SPACE + LATIN SMALL LETTER T +<U00A0><U0075> \x1B\x75 |0 # NO-BREAK SPACE + LATIN SMALL LETTER U +<U00A0><U0076> \x1B\x76 |0 # NO-BREAK SPACE + LATIN SMALL LETTER V +<U00A0><U0077> \x1B\x77 |0 # NO-BREAK SPACE + LATIN SMALL LETTER W +<U00A0><U0078> \x1B\x78 |0 # NO-BREAK SPACE + LATIN SMALL LETTER X +<U00A0><U0079> \x1B\x79 |0 # NO-BREAK SPACE + LATIN SMALL LETTER Y +<U00A0><U007A> \x1B\x7A |0 # NO-BREAK SPACE + LATIN SMALL LETTER Z +<U00A0><U00A3> \x1B\x01 |0 # NO-BREAK SPACE + POUND SIGN +<U00A0><U00A4> \x1B\x24 |0 # NO-BREAK SPACE + CURRENCY SIGN +<U00A0><U00A5> \x1B\x03 |0 # NO-BREAK SPACE + YEN SIGN +<U00A0><U00A7> \x1B\x5F |0 # NO-BREAK SPACE + SECTION SIGN +<U00A0><U00BF> \x1B\x60 |0 # NO-BREAK SPACE + INVERTED QUESTION MARK +<U00A0><U00C4> \x1B\x5B |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER A WITH DIAERESIS +<U00A0><U00C5> \x1B\x0E |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER A WITH RING ABOVE +<U00A0><U00C6> \x1B\x1C |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER AE +<U00A0><U00C9> \x1B\x1F |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER E WITH ACUTE +<U00A0><U00D1> \x1B\x5D |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER N WITH TILDE +<U00A0><U00D6> \x1B\x5C |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER O WITH DIAERESIS +<U00A0><U00D8> \x1B\x0B |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER O WITH STROKE +<U00A0><U00DC> \x1B\x5E |0 # NO-BREAK SPACE + LATIN CAPITAL LETTER U WITH DIAERESIS +<U00A0><U00DF> \x1B\x1E |0 # NO-BREAK SPACE + LATIN SMALL LETTER SHARP S +<U00A0><U00E0> \x1B\x7F |0 # NO-BREAK SPACE + LATIN SMALL LETTER A WITH GRAVE +<U00A0><U00E4> \x1B\x7B |0 # NO-BREAK SPACE + LATIN SMALL LETTER A WITH DIAERESIS +<U00A0><U00E5> \x1B\x0F |0 # NO-BREAK SPACE + LATIN SMALL LETTER A WITH RING ABOVE +<U00A0><U00E6> \x1B\x1D |0 # NO-BREAK SPACE + LATIN SMALL LETTER AE +<U00A0><U00E7> \x1B\x09 |0 # NO-BREAK SPACE + LATIN SMALL LETTER C WITH CEDILLA +<U00A0><U00E8> \x1B\x04 |0 # NO-BREAK SPACE + LATIN SMALL LETTER E WITH GRAVE +<U00A0><U00E9> \x1B\x05 |0 # NO-BREAK SPACE + LATIN SMALL LETTER E WITH ACUTE +<U00A0><U00EC> \x1B\x07 |0 # NO-BREAK SPACE + LATIN SMALL LETTER I WITH GRAVE +<U00A0><U00F1> \x1B\x7D |0 # NO-BREAK SPACE + LATIN SMALL LETTER N WITH TILDE +<U00A0><U00F2> \x1B\x08 |0 # NO-BREAK SPACE + LATIN SMALL LETTER O WITH GRAVE +<U00A0><U00F6> \x1B\x7C |0 # NO-BREAK SPACE + LATIN SMALL LETTER O WITH DIAERESIS +<U00A0><U00F8> \x1B\x0C |0 # NO-BREAK SPACE + LATIN SMALL LETTER O WITH STROKE +<U00A0><U00F9> \x1B\x06 |0 # NO-BREAK SPACE + LATIN SMALL LETTER U WITH GRAVE +<U00A0><U00FC> \x1B\x7E |0 # NO-BREAK SPACE + LATIN SMALL LETTER U WITH DIAERESIS +<U00A0><U0393> \x1B\x13 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER GAMMA +<U00A0><U0394> \x1B\x10 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER DELTA +<U00A0><U0398> \x1B\x19 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER THETA +<U00A0><U039E> \x1B\x1A |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER XI +<U00A0><U03A0> \x1B\x16 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER PI +<U00A0><U03A0> \x1B\x1B |0 # NO-BREAK SPACE + NO-BREAK SPACE (?) +<U00A0><U03A3> \x1B\x18 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER SIGMA +<U00A0><U03A6> \x1B\x12 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER PHI +<U00A0><U03A8> \x1B\x17 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER PSI +<U00A0><U03A9> \x1B\x15 |0 # NO-BREAK SPACE + GREEK CAPITAL LETTER OMEGA +<U00A1> \x40 |0 # INVERTED EXCLAMATION MARK +<U00A3> \x01 |0 # POUND SIGN +<U00A4> \x24 |0 # CURRENCY SIGN +<U00A5> \x03 |0 # YEN SIGN +<U00A7> \x5F |0 # SECTION SIGN +<U00BF> \x60 |0 # INVERTED QUESTION MARK +<U00C4> \x5B |0 # LATIN CAPITAL LETTER A WITH DIAERESIS +<U00C5> \x0E |0 # LATIN CAPITAL LETTER A WITH RING ABOVE +<U00C6> \x1C |0 # LATIN CAPITAL LETTER AE +<U00C9> \x1F |0 # LATIN CAPITAL LETTER E WITH ACUTE +<U00D1> \x5D |0 # LATIN CAPITAL LETTER N WITH TILDE +<U00D6> \x5C |0 # LATIN CAPITAL LETTER O WITH DIAERESIS +<U00D8> \x0B |0 # LATIN CAPITAL LETTER O WITH STROKE +<U00DC> \x5E |0 # LATIN CAPITAL LETTER U WITH DIAERESIS +<U00DF> \x1E |0 # LATIN SMALL LETTER SHARP S +<U00E0> \x7F |0 # LATIN SMALL LETTER A WITH GRAVE <U00E4> \x7B |0 # LATIN SMALL LETTER A WITH DIAERESIS -<U00F6> \x7C |0 # LATIN SMALL LETTER O WITH DIAERESIS +<U00E5> \x0F |0 # LATIN SMALL LETTER A WITH RING ABOVE +<U00E6> \x1D |0 # LATIN SMALL LETTER AE +<U00E7> \x09 |0 # LATIN SMALL LETTER C WITH CEDILLA +<U00E8> \x04 |0 # LATIN SMALL LETTER E WITH GRAVE +<U00E9> \x05 |0 # LATIN SMALL LETTER E WITH ACUTE +<U00EC> \x07 |0 # LATIN SMALL LETTER I WITH GRAVE <U00F1> \x7D |0 # LATIN SMALL LETTER N WITH TILDE +<U00F2> \x08 |0 # LATIN SMALL LETTER O WITH GRAVE +<U00F6> \x7C |0 # LATIN SMALL LETTER O WITH DIAERESIS +<U00F8> \x0C |0 # LATIN SMALL LETTER O WITH STROKE +<U00F9> \x06 |0 # LATIN SMALL LETTER U WITH GRAVE <U00FC> \x7E |0 # LATIN SMALL LETTER U WITH DIAERESIS -<U00E0> \x7F |0 # LATIN SMALL LETTER A WITH GRAVE +<U0393> \x13 |0 # GREEK CAPITAL LETTER GAMMA +<U0394> \x10 |0 # GREEK CAPITAL LETTER DELTA +<U0398> \x19 |0 # GREEK CAPITAL LETTER THETA +<U039B> \x14 |0 # GREEK CAPITAL LETTER LAMDA +<U039E> \x1A |0 # GREEK CAPITAL LETTER XI +<U03A0> \x16 |0 # GREEK CAPITAL LETTER PI +<U03A3> \x18 |0 # GREEK CAPITAL LETTER SIGMA +<U03A6> \x12 |0 # GREEK CAPITAL LETTER PHI +<U03A8> \x17 |0 # GREEK CAPITAL LETTER PSI +<U03A9> \x15 |0 # GREEK CAPITAL LETTER OMEGA +<U20AC> \x1B\x65 |0 # EURO SIGN END CHARMAP |