diff options
author | Dan Kogai <dankogai@dan.co.jp> | 2003-05-22 03:26:26 +0900 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2003-05-21 13:01:42 +0000 |
commit | 03871ea64ce22022ff5d907fffa52338dafdb782 (patch) | |
tree | e7f1f894079fc0f2c0bbf1b043d694d3d8114df7 /ext/Encode/ucm | |
parent | 66f3f260095e79305afd2e6927af42eda76ba830 (diff) | |
download | perl-03871ea64ce22022ff5d907fffa52338dafdb782.tar.gz |
[Encode] 1.95 released
Message-Id: <4B8D9AB5-8B6E-11D7-848A-000393AE4244@dan.co.jp>
p4raw-id: //depot/perl@19578
Diffstat (limited to 'ext/Encode/ucm')
-rw-r--r-- | ext/Encode/ucm/8859-1.ucm | 129 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-10.ucm | 219 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-11.ucm | 138 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-13.ucm | 227 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-14.ucm | 191 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-15.ucm | 145 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-16.ucm | 209 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-2.ucm | 239 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-3.ucm | 181 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-4.ucm | 223 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-5.ucm | 135 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-6.ucm | 151 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-7.ucm | 141 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-8.ucm | 139 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-9.ucm | 139 |
15 files changed, 1307 insertions, 1299 deletions
diff --git a/ext/Encode/ucm/8859-1.ucm b/ext/Encode/ucm/8859-1.ucm index 6e1caba220..080424b7da 100644 --- a/ext/Encode/ucm/8859-1.ucm +++ b/ext/Encode/ucm/8859-1.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-1.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-1.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT # -# ./compile -n iso-8859-1 -o Encode/iso8859-1.ucm Encode/iso8859-1.enc <code_set_name> "iso-8859-1" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,39 +136,39 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE <U00A1> \xA1 |0 # INVERTED EXCLAMATION MARK <U00A2> \xA2 |0 # CENT SIGN diff --git a/ext/Encode/ucm/8859-10.ucm b/ext/Encode/ucm/8859-10.ucm index dcf79fec8d..eb1a80c221 100644 --- a/ext/Encode/ucm/8859-10.ucm +++ b/ext/Encode/ucm/8859-10.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-10.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-10.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-10.TXT # -# ./compile -n iso-8859-10 -o Encode/iso8859-10.ucm Encode/iso8859-10.enc <code_set_name> "iso-8859-10" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,133 +136,133 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE -<U0104> \xA1 |0 # LATIN CAPITAL LETTER A WITH OGONEK -<U0112> \xA2 |0 # LATIN CAPITAL LETTER E WITH MACRON -<U0122> \xA3 |0 # LATIN CAPITAL LETTER G WITH CEDILLA -<U012A> \xA4 |0 # LATIN CAPITAL LETTER I WITH MACRON -<U0128> \xA5 |0 # LATIN CAPITAL LETTER I WITH TILDE -<U0136> \xA6 |0 # LATIN CAPITAL LETTER K WITH CEDILLA <U00A7> \xA7 |0 # SECTION SIGN -<U013B> \xA8 |0 # LATIN CAPITAL LETTER L WITH CEDILLA -<U0110> \xA9 |0 # LATIN CAPITAL LETTER D WITH STROKE -<U0160> \xAA |0 # LATIN CAPITAL LETTER S WITH CARON -<U0166> \xAB |0 # LATIN CAPITAL LETTER T WITH STROKE -<U017D> \xAC |0 # LATIN CAPITAL LETTER Z WITH CARON <U00AD> \xAD |0 # SOFT HYPHEN -<U016A> \xAE |0 # LATIN CAPITAL LETTER U WITH MACRON -<U014A> \xAF |0 # LATIN CAPITAL LETTER ENG <U00B0> \xB0 |0 # DEGREE SIGN -<U0105> \xB1 |0 # LATIN SMALL LETTER A WITH OGONEK -<U0113> \xB2 |0 # LATIN SMALL LETTER E WITH MACRON -<U0123> \xB3 |0 # LATIN SMALL LETTER G WITH CEDILLA -<U012B> \xB4 |0 # LATIN SMALL LETTER I WITH MACRON -<U0129> \xB5 |0 # LATIN SMALL LETTER I WITH TILDE -<U0137> \xB6 |0 # LATIN SMALL LETTER K WITH CEDILLA <U00B7> \xB7 |0 # MIDDLE DOT -<U013C> \xB8 |0 # LATIN SMALL LETTER L WITH CEDILLA -<U0111> \xB9 |0 # LATIN SMALL LETTER D WITH STROKE -<U0161> \xBA |0 # LATIN SMALL LETTER S WITH CARON -<U0167> \xBB |0 # LATIN SMALL LETTER T WITH STROKE -<U017E> \xBC |0 # LATIN SMALL LETTER Z WITH CARON -<U2015> \xBD |0 # HORIZONTAL BAR -<U016B> \xBE |0 # LATIN SMALL LETTER U WITH MACRON -<U014B> \xBF |0 # LATIN SMALL LETTER ENG -<U0100> \xC0 |0 # LATIN CAPITAL LETTER A WITH MACRON <U00C1> \xC1 |0 # LATIN CAPITAL LETTER A WITH ACUTE <U00C2> \xC2 |0 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX <U00C3> \xC3 |0 # LATIN CAPITAL LETTER A WITH TILDE <U00C4> \xC4 |0 # LATIN CAPITAL LETTER A WITH DIAERESIS <U00C5> \xC5 |0 # LATIN CAPITAL LETTER A WITH RING ABOVE <U00C6> \xC6 |0 # LATIN CAPITAL LETTER AE -<U012E> \xC7 |0 # LATIN CAPITAL LETTER I WITH OGONEK -<U010C> \xC8 |0 # LATIN CAPITAL LETTER C WITH CARON <U00C9> \xC9 |0 # LATIN CAPITAL LETTER E WITH ACUTE -<U0118> \xCA |0 # LATIN CAPITAL LETTER E WITH OGONEK <U00CB> \xCB |0 # LATIN CAPITAL LETTER E WITH DIAERESIS -<U0116> \xCC |0 # LATIN CAPITAL LETTER E WITH DOT ABOVE <U00CD> \xCD |0 # LATIN CAPITAL LETTER I WITH ACUTE <U00CE> \xCE |0 # LATIN CAPITAL LETTER I WITH CIRCUMFLEX <U00CF> \xCF |0 # LATIN CAPITAL LETTER I WITH DIAERESIS <U00D0> \xD0 |0 # LATIN CAPITAL LETTER ETH -<U0145> \xD1 |0 # LATIN CAPITAL LETTER N WITH CEDILLA -<U014C> \xD2 |0 # LATIN CAPITAL LETTER O WITH MACRON <U00D3> \xD3 |0 # LATIN CAPITAL LETTER O WITH ACUTE <U00D4> \xD4 |0 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX <U00D5> \xD5 |0 # LATIN CAPITAL LETTER O WITH TILDE <U00D6> \xD6 |0 # LATIN CAPITAL LETTER O WITH DIAERESIS -<U0168> \xD7 |0 # LATIN CAPITAL LETTER U WITH TILDE <U00D8> \xD8 |0 # LATIN CAPITAL LETTER O WITH STROKE -<U0172> \xD9 |0 # LATIN CAPITAL LETTER U WITH OGONEK <U00DA> \xDA |0 # LATIN CAPITAL LETTER U WITH ACUTE <U00DB> \xDB |0 # LATIN CAPITAL LETTER U WITH CIRCUMFLEX <U00DC> \xDC |0 # LATIN CAPITAL LETTER U WITH DIAERESIS <U00DD> \xDD |0 # LATIN CAPITAL LETTER Y WITH ACUTE <U00DE> \xDE |0 # LATIN CAPITAL LETTER THORN <U00DF> \xDF |0 # LATIN SMALL LETTER SHARP S -<U0101> \xE0 |0 # LATIN SMALL LETTER A WITH MACRON <U00E1> \xE1 |0 # LATIN SMALL LETTER A WITH ACUTE <U00E2> \xE2 |0 # LATIN SMALL LETTER A WITH CIRCUMFLEX <U00E3> \xE3 |0 # LATIN SMALL LETTER A WITH TILDE <U00E4> \xE4 |0 # LATIN SMALL LETTER A WITH DIAERESIS <U00E5> \xE5 |0 # LATIN SMALL LETTER A WITH RING ABOVE <U00E6> \xE6 |0 # LATIN SMALL LETTER AE -<U012F> \xE7 |0 # LATIN SMALL LETTER I WITH OGONEK -<U010D> \xE8 |0 # LATIN SMALL LETTER C WITH CARON <U00E9> \xE9 |0 # LATIN SMALL LETTER E WITH ACUTE -<U0119> \xEA |0 # LATIN SMALL LETTER E WITH OGONEK <U00EB> \xEB |0 # LATIN SMALL LETTER E WITH DIAERESIS -<U0117> \xEC |0 # LATIN SMALL LETTER E WITH DOT ABOVE <U00ED> \xED |0 # LATIN SMALL LETTER I WITH ACUTE <U00EE> \xEE |0 # LATIN SMALL LETTER I WITH CIRCUMFLEX <U00EF> \xEF |0 # LATIN SMALL LETTER I WITH DIAERESIS <U00F0> \xF0 |0 # LATIN SMALL LETTER ETH -<U0146> \xF1 |0 # LATIN SMALL LETTER N WITH CEDILLA -<U014D> \xF2 |0 # LATIN SMALL LETTER O WITH MACRON <U00F3> \xF3 |0 # LATIN SMALL LETTER O WITH ACUTE <U00F4> \xF4 |0 # LATIN SMALL LETTER O WITH CIRCUMFLEX <U00F5> \xF5 |0 # LATIN SMALL LETTER O WITH TILDE <U00F6> \xF6 |0 # LATIN SMALL LETTER O WITH DIAERESIS -<U0169> \xF7 |0 # LATIN SMALL LETTER U WITH TILDE <U00F8> \xF8 |0 # LATIN SMALL LETTER O WITH STROKE -<U0173> \xF9 |0 # LATIN SMALL LETTER U WITH OGONEK <U00FA> \xFA |0 # LATIN SMALL LETTER U WITH ACUTE <U00FB> \xFB |0 # LATIN SMALL LETTER U WITH CIRCUMFLEX <U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS <U00FD> \xFD |0 # LATIN SMALL LETTER Y WITH ACUTE <U00FE> \xFE |0 # LATIN SMALL LETTER THORN +<U0100> \xC0 |0 # LATIN CAPITAL LETTER A WITH MACRON +<U0101> \xE0 |0 # LATIN SMALL LETTER A WITH MACRON +<U0104> \xA1 |0 # LATIN CAPITAL LETTER A WITH OGONEK +<U0105> \xB1 |0 # LATIN SMALL LETTER A WITH OGONEK +<U010C> \xC8 |0 # LATIN CAPITAL LETTER C WITH CARON +<U010D> \xE8 |0 # LATIN SMALL LETTER C WITH CARON +<U0110> \xA9 |0 # LATIN CAPITAL LETTER D WITH STROKE +<U0111> \xB9 |0 # LATIN SMALL LETTER D WITH STROKE +<U0112> \xA2 |0 # LATIN CAPITAL LETTER E WITH MACRON +<U0113> \xB2 |0 # LATIN SMALL LETTER E WITH MACRON +<U0116> \xCC |0 # LATIN CAPITAL LETTER E WITH DOT ABOVE +<U0117> \xEC |0 # LATIN SMALL LETTER E WITH DOT ABOVE +<U0118> \xCA |0 # LATIN CAPITAL LETTER E WITH OGONEK +<U0119> \xEA |0 # LATIN SMALL LETTER E WITH OGONEK +<U0122> \xA3 |0 # LATIN CAPITAL LETTER G WITH CEDILLA +<U0123> \xB3 |0 # LATIN SMALL LETTER G WITH CEDILLA +<U0128> \xA5 |0 # LATIN CAPITAL LETTER I WITH TILDE +<U0129> \xB5 |0 # LATIN SMALL LETTER I WITH TILDE +<U012A> \xA4 |0 # LATIN CAPITAL LETTER I WITH MACRON +<U012B> \xB4 |0 # LATIN SMALL LETTER I WITH MACRON +<U012E> \xC7 |0 # LATIN CAPITAL LETTER I WITH OGONEK +<U012F> \xE7 |0 # LATIN SMALL LETTER I WITH OGONEK +<U0136> \xA6 |0 # LATIN CAPITAL LETTER K WITH CEDILLA +<U0137> \xB6 |0 # LATIN SMALL LETTER K WITH CEDILLA <U0138> \xFF |0 # LATIN SMALL LETTER KRA +<U013B> \xA8 |0 # LATIN CAPITAL LETTER L WITH CEDILLA +<U013C> \xB8 |0 # LATIN SMALL LETTER L WITH CEDILLA +<U0145> \xD1 |0 # LATIN CAPITAL LETTER N WITH CEDILLA +<U0146> \xF1 |0 # LATIN SMALL LETTER N WITH CEDILLA +<U014A> \xAF |0 # LATIN CAPITAL LETTER ENG +<U014B> \xBF |0 # LATIN SMALL LETTER ENG +<U014C> \xD2 |0 # LATIN CAPITAL LETTER O WITH MACRON +<U014D> \xF2 |0 # LATIN SMALL LETTER O WITH MACRON +<U0160> \xAA |0 # LATIN CAPITAL LETTER S WITH CARON +<U0161> \xBA |0 # LATIN SMALL LETTER S WITH CARON +<U0166> \xAB |0 # LATIN CAPITAL LETTER T WITH STROKE +<U0167> \xBB |0 # LATIN SMALL LETTER T WITH STROKE +<U0168> \xD7 |0 # LATIN CAPITAL LETTER U WITH TILDE +<U0169> \xF7 |0 # LATIN SMALL LETTER U WITH TILDE +<U016A> \xAE |0 # LATIN CAPITAL LETTER U WITH MACRON +<U016B> \xBE |0 # LATIN SMALL LETTER U WITH MACRON +<U0172> \xD9 |0 # LATIN CAPITAL LETTER U WITH OGONEK +<U0173> \xF9 |0 # LATIN SMALL LETTER U WITH OGONEK +<U017D> \xAC |0 # LATIN CAPITAL LETTER Z WITH CARON +<U017E> \xBC |0 # LATIN SMALL LETTER Z WITH CARON +<U2015> \xBD |0 # HORIZONTAL BAR END CHARMAP diff --git a/ext/Encode/ucm/8859-11.ucm b/ext/Encode/ucm/8859-11.ucm index 5047598d8d..29bd1cd222 100644 --- a/ext/Encode/ucm/8859-11.ucm +++ b/ext/Encode/ucm/8859-11.ucm @@ -1,46 +1,46 @@ # -# $Id: 8859-11.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-11.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT # -# Written $Id: 8859-11.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ -# ./compile -n iso-8859-11 -o Encode/iso8859-11.ucm Encode/iso8859-11.enc <code_set_name> "iso-8859-11" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -136,39 +136,39 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE <U0E01> \xA1 |0 # THAI CHARACTER KO KAI <U0E02> \xA2 |0 # THAI CHARACTER KHO KHAI @@ -228,10 +228,6 @@ CHARMAP <U0E38> \xD8 |0 # THAI CHARACTER SARA U <U0E39> \xD9 |0 # THAI CHARACTER SARA UU <U0E3A> \xDA |0 # THAI CHARACTER PHINTHU -<U00DB> \xDB |0 # LATIN CAPITAL LETTER U WITH CIRCUMFLEX -<U00DC> \xDC |0 # LATIN CAPITAL LETTER U WITH DIAERESIS -<U00DD> \xDD |0 # LATIN CAPITAL LETTER Y WITH ACUTE -<U00DE> \xDE |0 # LATIN CAPITAL LETTER THORN <U0E3F> \xDF |0 # THAI CURRENCY SYMBOL BAHT <U0E40> \xE0 |0 # THAI CHARACTER SARA E <U0E41> \xE1 |0 # THAI CHARACTER SARA AE @@ -261,8 +257,4 @@ CHARMAP <U0E59> \xF9 |0 # THAI DIGIT NINE <U0E5A> \xFA |0 # THAI CHARACTER ANGKHANKHU <U0E5B> \xFB |0 # THAI CHARACTER KHOMUT -<U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS -<U00FD> \xFD |0 # LATIN SMALL LETTER Y WITH ACUTE -<U00FE> \xFE |0 # LATIN SMALL LETTER THORN -<U00FF> \xFF |0 # LATIN SMALL LETTER Y WITH DIAERESIS END CHARMAP diff --git a/ext/Encode/ucm/8859-13.ucm b/ext/Encode/ucm/8859-13.ucm index f3a5eb44da..ee48e6c4d9 100644 --- a/ext/Encode/ucm/8859-13.ucm +++ b/ext/Encode/ucm/8859-13.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-13.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-13.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-13.TXT # -# ./compile -n iso-8859-13 -o Encode/iso8859-13.ucm Encode/iso8859-13.enc <code_set_name> "iso-8859-13" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,133 +136,133 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE -<U201D> \xA1 |0 # RIGHT DOUBLE QUOTATION MARK <U00A2> \xA2 |0 # CENT SIGN <U00A3> \xA3 |0 # POUND SIGN <U00A4> \xA4 |0 # CURRENCY SIGN -<U201E> \xA5 |0 # DOUBLE LOW-9 QUOTATION MARK <U00A6> \xA6 |0 # BROKEN BAR <U00A7> \xA7 |0 # SECTION SIGN -<U00D8> \xA8 |0 # LATIN CAPITAL LETTER O WITH STROKE <U00A9> \xA9 |0 # COPYRIGHT SIGN -<U0156> \xAA |0 # LATIN CAPITAL LETTER R WITH CEDILLA <U00AB> \xAB |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK <U00AC> \xAC |0 # NOT SIGN <U00AD> \xAD |0 # SOFT HYPHEN <U00AE> \xAE |0 # REGISTERED SIGN -<U00C6> \xAF |0 # LATIN CAPITAL LETTER AE <U00B0> \xB0 |0 # DEGREE SIGN <U00B1> \xB1 |0 # PLUS-MINUS SIGN <U00B2> \xB2 |0 # SUPERSCRIPT TWO <U00B3> \xB3 |0 # SUPERSCRIPT THREE -<U201C> \xB4 |0 # LEFT DOUBLE QUOTATION MARK <U00B5> \xB5 |0 # MICRO SIGN <U00B6> \xB6 |0 # PILCROW SIGN <U00B7> \xB7 |0 # MIDDLE DOT -<U00F8> \xB8 |0 # LATIN SMALL LETTER O WITH STROKE <U00B9> \xB9 |0 # SUPERSCRIPT ONE -<U0157> \xBA |0 # LATIN SMALL LETTER R WITH CEDILLA <U00BB> \xBB |0 # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK <U00BC> \xBC |0 # VULGAR FRACTION ONE QUARTER <U00BD> \xBD |0 # VULGAR FRACTION ONE HALF <U00BE> \xBE |0 # VULGAR FRACTION THREE QUARTERS -<U00E6> \xBF |0 # LATIN SMALL LETTER AE -<U0104> \xC0 |0 # LATIN CAPITAL LETTER A WITH OGONEK -<U012E> \xC1 |0 # LATIN CAPITAL LETTER I WITH OGONEK -<U0100> \xC2 |0 # LATIN CAPITAL LETTER A WITH MACRON -<U0106> \xC3 |0 # LATIN CAPITAL LETTER C WITH ACUTE <U00C4> \xC4 |0 # LATIN CAPITAL LETTER A WITH DIAERESIS <U00C5> \xC5 |0 # LATIN CAPITAL LETTER A WITH RING ABOVE -<U0118> \xC6 |0 # LATIN CAPITAL LETTER E WITH OGONEK -<U0112> \xC7 |0 # LATIN CAPITAL LETTER E WITH MACRON -<U010C> \xC8 |0 # LATIN CAPITAL LETTER C WITH CARON +<U00C6> \xAF |0 # LATIN CAPITAL LETTER AE <U00C9> \xC9 |0 # LATIN CAPITAL LETTER E WITH ACUTE -<U0179> \xCA |0 # LATIN CAPITAL LETTER Z WITH ACUTE -<U0116> \xCB |0 # LATIN CAPITAL LETTER E WITH DOT ABOVE -<U0122> \xCC |0 # LATIN CAPITAL LETTER G WITH CEDILLA -<U0136> \xCD |0 # LATIN CAPITAL LETTER K WITH CEDILLA -<U012A> \xCE |0 # LATIN CAPITAL LETTER I WITH MACRON -<U013B> \xCF |0 # LATIN CAPITAL LETTER L WITH CEDILLA -<U0160> \xD0 |0 # LATIN CAPITAL LETTER S WITH CARON -<U0143> \xD1 |0 # LATIN CAPITAL LETTER N WITH ACUTE -<U0145> \xD2 |0 # LATIN CAPITAL LETTER N WITH CEDILLA <U00D3> \xD3 |0 # LATIN CAPITAL LETTER O WITH ACUTE -<U014C> \xD4 |0 # LATIN CAPITAL LETTER O WITH MACRON <U00D5> \xD5 |0 # LATIN CAPITAL LETTER O WITH TILDE <U00D6> \xD6 |0 # LATIN CAPITAL LETTER O WITH DIAERESIS <U00D7> \xD7 |0 # MULTIPLICATION SIGN -<U0172> \xD8 |0 # LATIN CAPITAL LETTER U WITH OGONEK -<U0141> \xD9 |0 # LATIN CAPITAL LETTER L WITH STROKE -<U015A> \xDA |0 # LATIN CAPITAL LETTER S WITH ACUTE -<U016A> \xDB |0 # LATIN CAPITAL LETTER U WITH MACRON +<U00D8> \xA8 |0 # LATIN CAPITAL LETTER O WITH STROKE <U00DC> \xDC |0 # LATIN CAPITAL LETTER U WITH DIAERESIS -<U017B> \xDD |0 # LATIN CAPITAL LETTER Z WITH DOT ABOVE -<U017D> \xDE |0 # LATIN CAPITAL LETTER Z WITH CARON <U00DF> \xDF |0 # LATIN SMALL LETTER SHARP S -<U0105> \xE0 |0 # LATIN SMALL LETTER A WITH OGONEK -<U012F> \xE1 |0 # LATIN SMALL LETTER I WITH OGONEK -<U0101> \xE2 |0 # LATIN SMALL LETTER A WITH MACRON -<U0107> \xE3 |0 # LATIN SMALL LETTER C WITH ACUTE <U00E4> \xE4 |0 # LATIN SMALL LETTER A WITH DIAERESIS <U00E5> \xE5 |0 # LATIN SMALL LETTER A WITH RING ABOVE -<U0119> \xE6 |0 # LATIN SMALL LETTER E WITH OGONEK -<U0113> \xE7 |0 # LATIN SMALL LETTER E WITH MACRON -<U010D> \xE8 |0 # LATIN SMALL LETTER C WITH CARON +<U00E6> \xBF |0 # LATIN SMALL LETTER AE <U00E9> \xE9 |0 # LATIN SMALL LETTER E WITH ACUTE -<U017A> \xEA |0 # LATIN SMALL LETTER Z WITH ACUTE +<U00F3> \xF3 |0 # LATIN SMALL LETTER O WITH ACUTE +<U00F5> \xF5 |0 # LATIN SMALL LETTER O WITH TILDE +<U00F6> \xF6 |0 # LATIN SMALL LETTER O WITH DIAERESIS +<U00F7> \xF7 |0 # DIVISION SIGN +<U00F8> \xB8 |0 # LATIN SMALL LETTER O WITH STROKE +<U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS +<U0100> \xC2 |0 # LATIN CAPITAL LETTER A WITH MACRON +<U0101> \xE2 |0 # LATIN SMALL LETTER A WITH MACRON +<U0104> \xC0 |0 # LATIN CAPITAL LETTER A WITH OGONEK +<U0105> \xE0 |0 # LATIN SMALL LETTER A WITH OGONEK +<U0106> \xC3 |0 # LATIN CAPITAL LETTER C WITH ACUTE +<U0107> \xE3 |0 # LATIN SMALL LETTER C WITH ACUTE +<U010C> \xC8 |0 # LATIN CAPITAL LETTER C WITH CARON +<U010D> \xE8 |0 # LATIN SMALL LETTER C WITH CARON +<U0112> \xC7 |0 # LATIN CAPITAL LETTER E WITH MACRON +<U0113> \xE7 |0 # LATIN SMALL LETTER E WITH MACRON +<U0116> \xCB |0 # LATIN CAPITAL LETTER E WITH DOT ABOVE <U0117> \xEB |0 # LATIN SMALL LETTER E WITH DOT ABOVE +<U0118> \xC6 |0 # LATIN CAPITAL LETTER E WITH OGONEK +<U0119> \xE6 |0 # LATIN SMALL LETTER E WITH OGONEK +<U0122> \xCC |0 # LATIN CAPITAL LETTER G WITH CEDILLA <U0123> \xEC |0 # LATIN SMALL LETTER G WITH CEDILLA -<U0137> \xED |0 # LATIN SMALL LETTER K WITH CEDILLA +<U012A> \xCE |0 # LATIN CAPITAL LETTER I WITH MACRON <U012B> \xEE |0 # LATIN SMALL LETTER I WITH MACRON +<U012E> \xC1 |0 # LATIN CAPITAL LETTER I WITH OGONEK +<U012F> \xE1 |0 # LATIN SMALL LETTER I WITH OGONEK +<U0136> \xCD |0 # LATIN CAPITAL LETTER K WITH CEDILLA +<U0137> \xED |0 # LATIN SMALL LETTER K WITH CEDILLA +<U013B> \xCF |0 # LATIN CAPITAL LETTER L WITH CEDILLA <U013C> \xEF |0 # LATIN SMALL LETTER L WITH CEDILLA -<U0161> \xF0 |0 # LATIN SMALL LETTER S WITH CARON +<U0141> \xD9 |0 # LATIN CAPITAL LETTER L WITH STROKE +<U0142> \xF9 |0 # LATIN SMALL LETTER L WITH STROKE +<U0143> \xD1 |0 # LATIN CAPITAL LETTER N WITH ACUTE <U0144> \xF1 |0 # LATIN SMALL LETTER N WITH ACUTE +<U0145> \xD2 |0 # LATIN CAPITAL LETTER N WITH CEDILLA <U0146> \xF2 |0 # LATIN SMALL LETTER N WITH CEDILLA -<U00F3> \xF3 |0 # LATIN SMALL LETTER O WITH ACUTE +<U014C> \xD4 |0 # LATIN CAPITAL LETTER O WITH MACRON <U014D> \xF4 |0 # LATIN SMALL LETTER O WITH MACRON -<U00F5> \xF5 |0 # LATIN SMALL LETTER O WITH TILDE -<U00F6> \xF6 |0 # LATIN SMALL LETTER O WITH DIAERESIS -<U00F7> \xF7 |0 # DIVISION SIGN -<U0173> \xF8 |0 # LATIN SMALL LETTER U WITH OGONEK -<U0142> \xF9 |0 # LATIN SMALL LETTER L WITH STROKE +<U0156> \xAA |0 # LATIN CAPITAL LETTER R WITH CEDILLA +<U0157> \xBA |0 # LATIN SMALL LETTER R WITH CEDILLA +<U015A> \xDA |0 # LATIN CAPITAL LETTER S WITH ACUTE <U015B> \xFA |0 # LATIN SMALL LETTER S WITH ACUTE +<U0160> \xD0 |0 # LATIN CAPITAL LETTER S WITH CARON +<U0161> \xF0 |0 # LATIN SMALL LETTER S WITH CARON +<U016A> \xDB |0 # LATIN CAPITAL LETTER U WITH MACRON <U016B> \xFB |0 # LATIN SMALL LETTER U WITH MACRON -<U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS +<U0172> \xD8 |0 # LATIN CAPITAL LETTER U WITH OGONEK +<U0173> \xF8 |0 # LATIN SMALL LETTER U WITH OGONEK +<U0179> \xCA |0 # LATIN CAPITAL LETTER Z WITH ACUTE +<U017A> \xEA |0 # LATIN SMALL LETTER Z WITH ACUTE +<U017B> \xDD |0 # LATIN CAPITAL LETTER Z WITH DOT ABOVE <U017C> \xFD |0 # LATIN SMALL LETTER Z WITH DOT ABOVE +<U017D> \xDE |0 # LATIN CAPITAL LETTER Z WITH CARON <U017E> \xFE |0 # LATIN SMALL LETTER Z WITH CARON <U2019> \xFF |0 # RIGHT SINGLE QUOTATION MARK +<U201C> \xB4 |0 # LEFT DOUBLE QUOTATION MARK +<U201D> \xA1 |0 # RIGHT DOUBLE QUOTATION MARK +<U201E> \xA5 |0 # DOUBLE LOW-9 QUOTATION MARK END CHARMAP diff --git a/ext/Encode/ucm/8859-14.ucm b/ext/Encode/ucm/8859-14.ucm index a427b82bd3..4e9a9d3f32 100644 --- a/ext/Encode/ucm/8859-14.ucm +++ b/ext/Encode/ucm/8859-14.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-14.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-14.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-14.TXT # -# ./compile -n iso-8859-14 -o Encode/iso8859-14.ucm Encode/iso8859-14.enc <code_set_name> "iso-8859-14" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,71 +136,46 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE -<U1E02> \xA1 |0 # LATIN CAPITAL LETTER B WITH DOT ABOVE -<U1E03> \xA2 |0 # LATIN SMALL LETTER B WITH DOT ABOVE <U00A3> \xA3 |0 # POUND SIGN -<U010A> \xA4 |0 # LATIN CAPITAL LETTER C WITH DOT ABOVE -<U010B> \xA5 |0 # LATIN SMALL LETTER C WITH DOT ABOVE -<U1E0A> \xA6 |0 # LATIN CAPITAL LETTER D WITH DOT ABOVE <U00A7> \xA7 |0 # SECTION SIGN -<U1E80> \xA8 |0 # LATIN CAPITAL LETTER W WITH GRAVE <U00A9> \xA9 |0 # COPYRIGHT SIGN -<U1E82> \xAA |0 # LATIN CAPITAL LETTER W WITH ACUTE -<U1E0B> \xAB |0 # LATIN SMALL LETTER D WITH DOT ABOVE -<U1EF2> \xAC |0 # LATIN CAPITAL LETTER Y WITH GRAVE <U00AD> \xAD |0 # SOFT HYPHEN <U00AE> \xAE |0 # REGISTERED SIGN -<U0178> \xAF |0 # LATIN CAPITAL LETTER Y WITH DIAERESIS -<U1E1E> \xB0 |0 # LATIN CAPITAL LETTER F WITH DOT ABOVE -<U1E1F> \xB1 |0 # LATIN SMALL LETTER F WITH DOT ABOVE -<U0120> \xB2 |0 # LATIN CAPITAL LETTER G WITH DOT ABOVE -<U0121> \xB3 |0 # LATIN SMALL LETTER G WITH DOT ABOVE -<U1E40> \xB4 |0 # LATIN CAPITAL LETTER M WITH DOT ABOVE -<U1E41> \xB5 |0 # LATIN SMALL LETTER M WITH DOT ABOVE <U00B6> \xB6 |0 # PILCROW SIGN -<U1E56> \xB7 |0 # LATIN CAPITAL LETTER P WITH DOT ABOVE -<U1E81> \xB8 |0 # LATIN SMALL LETTER W WITH GRAVE -<U1E57> \xB9 |0 # LATIN SMALL LETTER P WITH DOT ABOVE -<U1E83> \xBA |0 # LATIN SMALL LETTER W WITH ACUTE -<U1E60> \xBB |0 # LATIN CAPITAL LETTER S WITH DOT ABOVE -<U1EF3> \xBC |0 # LATIN SMALL LETTER Y WITH GRAVE -<U1E84> \xBD |0 # LATIN CAPITAL LETTER W WITH DIAERESIS -<U1E85> \xBE |0 # LATIN SMALL LETTER W WITH DIAERESIS -<U1E61> \xBF |0 # LATIN SMALL LETTER S WITH DOT ABOVE <U00C0> \xC0 |0 # LATIN CAPITAL LETTER A WITH GRAVE <U00C1> \xC1 |0 # LATIN CAPITAL LETTER A WITH ACUTE <U00C2> \xC2 |0 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX @@ -216,21 +192,18 @@ CHARMAP <U00CD> \xCD |0 # LATIN CAPITAL LETTER I WITH ACUTE <U00CE> \xCE |0 # LATIN CAPITAL LETTER I WITH CIRCUMFLEX <U00CF> \xCF |0 # LATIN CAPITAL LETTER I WITH DIAERESIS -<U0174> \xD0 |0 # LATIN CAPITAL LETTER W WITH CIRCUMFLEX <U00D1> \xD1 |0 # LATIN CAPITAL LETTER N WITH TILDE <U00D2> \xD2 |0 # LATIN CAPITAL LETTER O WITH GRAVE <U00D3> \xD3 |0 # LATIN CAPITAL LETTER O WITH ACUTE <U00D4> \xD4 |0 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX <U00D5> \xD5 |0 # LATIN CAPITAL LETTER O WITH TILDE <U00D6> \xD6 |0 # LATIN CAPITAL LETTER O WITH DIAERESIS -<U1E6A> \xD7 |0 # LATIN CAPITAL LETTER T WITH DOT ABOVE <U00D8> \xD8 |0 # LATIN CAPITAL LETTER O WITH STROKE <U00D9> \xD9 |0 # LATIN CAPITAL LETTER U WITH GRAVE <U00DA> \xDA |0 # LATIN CAPITAL LETTER U WITH ACUTE <U00DB> \xDB |0 # LATIN CAPITAL LETTER U WITH CIRCUMFLEX <U00DC> \xDC |0 # LATIN CAPITAL LETTER U WITH DIAERESIS <U00DD> \xDD |0 # LATIN CAPITAL LETTER Y WITH ACUTE -<U0176> \xDE |0 # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX <U00DF> \xDF |0 # LATIN SMALL LETTER SHARP S <U00E0> \xE0 |0 # LATIN SMALL LETTER A WITH GRAVE <U00E1> \xE1 |0 # LATIN SMALL LETTER A WITH ACUTE @@ -248,20 +221,48 @@ CHARMAP <U00ED> \xED |0 # LATIN SMALL LETTER I WITH ACUTE <U00EE> \xEE |0 # LATIN SMALL LETTER I WITH CIRCUMFLEX <U00EF> \xEF |0 # LATIN SMALL LETTER I WITH DIAERESIS -<U0175> \xF0 |0 # LATIN SMALL LETTER W WITH CIRCUMFLEX <U00F1> \xF1 |0 # LATIN SMALL LETTER N WITH TILDE <U00F2> \xF2 |0 # LATIN SMALL LETTER O WITH GRAVE <U00F3> \xF3 |0 # LATIN SMALL LETTER O WITH ACUTE <U00F4> \xF4 |0 # LATIN SMALL LETTER O WITH CIRCUMFLEX <U00F5> \xF5 |0 # LATIN SMALL LETTER O WITH TILDE <U00F6> \xF6 |0 # LATIN SMALL LETTER O WITH DIAERESIS -<U1E6B> \xF7 |0 # LATIN SMALL LETTER T WITH DOT ABOVE <U00F8> \xF8 |0 # LATIN SMALL LETTER O WITH STROKE <U00F9> \xF9 |0 # LATIN SMALL LETTER U WITH GRAVE <U00FA> \xFA |0 # LATIN SMALL LETTER U WITH ACUTE <U00FB> \xFB |0 # LATIN SMALL LETTER U WITH CIRCUMFLEX <U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS <U00FD> \xFD |0 # LATIN SMALL LETTER Y WITH ACUTE -<U0177> \xFE |0 # LATIN SMALL LETTER Y WITH CIRCUMFLEX <U00FF> \xFF |0 # LATIN SMALL LETTER Y WITH DIAERESIS +<U010A> \xA4 |0 # LATIN CAPITAL LETTER C WITH DOT ABOVE +<U010B> \xA5 |0 # LATIN SMALL LETTER C WITH DOT ABOVE +<U0120> \xB2 |0 # LATIN CAPITAL LETTER G WITH DOT ABOVE +<U0121> \xB3 |0 # LATIN SMALL LETTER G WITH DOT ABOVE +<U0174> \xD0 |0 # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +<U0175> \xF0 |0 # LATIN SMALL LETTER W WITH CIRCUMFLEX +<U0176> \xDE |0 # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +<U0177> \xFE |0 # LATIN SMALL LETTER Y WITH CIRCUMFLEX +<U0178> \xAF |0 # LATIN CAPITAL LETTER Y WITH DIAERESIS +<U1E02> \xA1 |0 # LATIN CAPITAL LETTER B WITH DOT ABOVE +<U1E03> \xA2 |0 # LATIN SMALL LETTER B WITH DOT ABOVE +<U1E0A> \xA6 |0 # LATIN CAPITAL LETTER D WITH DOT ABOVE +<U1E0B> \xAB |0 # LATIN SMALL LETTER D WITH DOT ABOVE +<U1E1E> \xB0 |0 # LATIN CAPITAL LETTER F WITH DOT ABOVE +<U1E1F> \xB1 |0 # LATIN SMALL LETTER F WITH DOT ABOVE +<U1E40> \xB4 |0 # LATIN CAPITAL LETTER M WITH DOT ABOVE +<U1E41> \xB5 |0 # LATIN SMALL LETTER M WITH DOT ABOVE +<U1E56> \xB7 |0 # LATIN CAPITAL LETTER P WITH DOT ABOVE +<U1E57> \xB9 |0 # LATIN SMALL LETTER P WITH DOT ABOVE +<U1E60> \xBB |0 # LATIN CAPITAL LETTER S WITH DOT ABOVE +<U1E61> \xBF |0 # LATIN SMALL LETTER S WITH DOT ABOVE +<U1E6A> \xD7 |0 # LATIN CAPITAL LETTER T WITH DOT ABOVE +<U1E6B> \xF7 |0 # LATIN SMALL LETTER T WITH DOT ABOVE +<U1E80> \xA8 |0 # LATIN CAPITAL LETTER W WITH GRAVE +<U1E81> \xB8 |0 # LATIN SMALL LETTER W WITH GRAVE +<U1E82> \xAA |0 # LATIN CAPITAL LETTER W WITH ACUTE +<U1E83> \xBA |0 # LATIN SMALL LETTER W WITH ACUTE +<U1E84> \xBD |0 # LATIN CAPITAL LETTER W WITH DIAERESIS +<U1E85> \xBE |0 # LATIN SMALL LETTER W WITH DIAERESIS +<U1EF2> \xAC |0 # LATIN CAPITAL LETTER Y WITH GRAVE +<U1EF3> \xBC |0 # LATIN SMALL LETTER Y WITH GRAVE END CHARMAP diff --git a/ext/Encode/ucm/8859-15.ucm b/ext/Encode/ucm/8859-15.ucm index 69cf924bdb..7b722049e0 100644 --- a/ext/Encode/ucm/8859-15.ucm +++ b/ext/Encode/ucm/8859-15.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-15.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-15.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-15.TXT # -# ./compile -n iso-8859-15 -o Encode/iso8859-15.ucm Encode/iso8859-15.enc <code_set_name> "iso-8859-15" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,48 +136,45 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE <U00A1> \xA1 |0 # INVERTED EXCLAMATION MARK <U00A2> \xA2 |0 # CENT SIGN <U00A3> \xA3 |0 # POUND SIGN -<U20AC> \xA4 |0 # EURO SIGN <U00A5> \xA5 |0 # YEN SIGN -<U0160> \xA6 |0 # LATIN CAPITAL LETTER S WITH CARON <U00A7> \xA7 |0 # SECTION SIGN -<U0161> \xA8 |0 # LATIN SMALL LETTER S WITH CARON <U00A9> \xA9 |0 # COPYRIGHT SIGN <U00AA> \xAA |0 # FEMININE ORDINAL INDICATOR <U00AB> \xAB |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -188,17 +186,12 @@ CHARMAP <U00B1> \xB1 |0 # PLUS-MINUS SIGN <U00B2> \xB2 |0 # SUPERSCRIPT TWO <U00B3> \xB3 |0 # SUPERSCRIPT THREE -<U017D> \xB4 |0 # LATIN CAPITAL LETTER Z WITH CARON <U00B5> \xB5 |0 # MICRO SIGN <U00B6> \xB6 |0 # PILCROW SIGN <U00B7> \xB7 |0 # MIDDLE DOT -<U017E> \xB8 |0 # LATIN SMALL LETTER Z WITH CARON <U00B9> \xB9 |0 # SUPERSCRIPT ONE <U00BA> \xBA |0 # MASCULINE ORDINAL INDICATOR <U00BB> \xBB |0 # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -<U0152> \xBC |0 # LATIN CAPITAL LIGATURE OE -<U0153> \xBD |0 # LATIN SMALL LIGATURE OE -<U0178> \xBE |0 # LATIN CAPITAL LETTER Y WITH DIAERESIS <U00BF> \xBF |0 # INVERTED QUESTION MARK <U00C0> \xC0 |0 # LATIN CAPITAL LETTER A WITH GRAVE <U00C1> \xC1 |0 # LATIN CAPITAL LETTER A WITH ACUTE @@ -264,4 +257,12 @@ CHARMAP <U00FD> \xFD |0 # LATIN SMALL LETTER Y WITH ACUTE <U00FE> \xFE |0 # LATIN SMALL LETTER THORN <U00FF> \xFF |0 # LATIN SMALL LETTER Y WITH DIAERESIS +<U0152> \xBC |0 # LATIN CAPITAL LIGATURE OE +<U0153> \xBD |0 # LATIN SMALL LIGATURE OE +<U0160> \xA6 |0 # LATIN CAPITAL LETTER S WITH CARON +<U0161> \xA8 |0 # LATIN SMALL LETTER S WITH CARON +<U0178> \xBE |0 # LATIN CAPITAL LETTER Y WITH DIAERESIS +<U017D> \xB4 |0 # LATIN CAPITAL LETTER Z WITH CARON +<U017E> \xB8 |0 # LATIN SMALL LETTER Z WITH CARON +<U20AC> \xA4 |0 # EURO SIGN END CHARMAP diff --git a/ext/Encode/ucm/8859-16.ucm b/ext/Encode/ucm/8859-16.ucm index 8cc38991a7..87563aabc4 100644 --- a/ext/Encode/ucm/8859-16.ucm +++ b/ext/Encode/ucm/8859-16.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-16.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-16.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-16.TXT # -# ./compile -n iso-8859-16 -o Encode/iso8859-16.ucm Encode/iso8859-16.enc <code_set_name> "iso-8859-16" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,77 +136,53 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE -<U0104> \xA1 |0 # LATIN CAPITAL LETTER A WITH OGONEK -<U0105> \xA2 |0 # LATIN SMALL LETTER A WITH OGONEK -<U0141> \xA3 |0 # LATIN CAPITAL LETTER L WITH STROKE -<U20AC> \xA4 |0 # EURO SIGN -<U00AB> \xA5 |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -<U0160> \xA6 |0 # LATIN CAPITAL LETTER S WITH CARON <U00A7> \xA7 |0 # SECTION SIGN -<U0161> \xA8 |0 # LATIN SMALL LETTER S WITH CARON <U00A9> \xA9 |0 # COPYRIGHT SIGN -<U0218> \xAA |0 # LATIN CAPITAL LETTER S WITH COMMA BELOW -<U201E> \xAB |0 # DOUBLE LOW-9 QUOTATION MARK -<U0179> \xAC |0 # LATIN CAPITAL LETTER Z WITH ACUTE +<U00AB> \xAB |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK <U00AD> \xAD |0 # SOFT HYPHEN -<U017A> \xAE |0 # LATIN SMALL LETTER Z WITH ACUTE -<U017B> \xAF |0 # LATIN CAPITAL LETTER Z WITH DOT ABOVE <U00B0> \xB0 |0 # DEGREE SIGN <U00B1> \xB1 |0 # PLUS-MINUS SIGN -<U010C> \xB2 |0 # LATIN CAPITAL LETTER C WITH CARON -<U0142> \xB3 |0 # LATIN SMALL LETTER L WITH STROKE -<U017D> \xB4 |0 # LATIN CAPITAL LETTER Z WITH CARON -<U201D> \xB5 |0 # RIGHT DOUBLE QUOTATION MARK <U00B6> \xB6 |0 # PILCROW SIGN <U00B7> \xB7 |0 # MIDDLE DOT -<U017E> \xB8 |0 # LATIN SMALL LETTER Z WITH CARON -<U010D> \xB9 |0 # LATIN SMALL LETTER C WITH CARON -<U0219> \xBA |0 # LATIN SMALL LETTER S WITH COMMA BELOW <U00BB> \xBB |0 # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -<U0152> \xBC |0 # LATIN CAPITAL LIGATURE OE -<U0153> \xBD |0 # LATIN SMALL LIGATURE OE -<U0178> \xBE |0 # LATIN CAPITAL LETTER Y WITH DIAERESIS -<U017C> \xBF |0 # LATIN SMALL LETTER Z WITH DOT ABOVE <U00C0> \xC0 |0 # LATIN CAPITAL LETTER A WITH GRAVE <U00C1> \xC1 |0 # LATIN CAPITAL LETTER A WITH ACUTE <U00C2> \xC2 |0 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX -<U0102> \xC3 |0 # LATIN CAPITAL LETTER A WITH BREVE <U00C4> \xC4 |0 # LATIN CAPITAL LETTER A WITH DIAERESIS -<U0106> \xC5 |0 # LATIN CAPITAL LETTER C WITH ACUTE <U00C6> \xC6 |0 # LATIN CAPITAL LETTER AE <U00C7> \xC7 |0 # LATIN CAPITAL LETTER C WITH CEDILLA <U00C8> \xC8 |0 # LATIN CAPITAL LETTER E WITH GRAVE @@ -216,28 +193,19 @@ CHARMAP <U00CD> \xCD |0 # LATIN CAPITAL LETTER I WITH ACUTE <U00CE> \xCE |0 # LATIN CAPITAL LETTER I WITH CIRCUMFLEX <U00CF> \xCF |0 # LATIN CAPITAL LETTER I WITH DIAERESIS -<U0110> \xD0 |0 # LATIN CAPITAL LETTER D WITH STROKE -<U0143> \xD1 |0 # LATIN CAPITAL LETTER N WITH ACUTE <U00D2> \xD2 |0 # LATIN CAPITAL LETTER O WITH GRAVE <U00D3> \xD3 |0 # LATIN CAPITAL LETTER O WITH ACUTE <U00D4> \xD4 |0 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX -<U0150> \xD5 |0 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE <U00D6> \xD6 |0 # LATIN CAPITAL LETTER O WITH DIAERESIS -<U015A> \xD7 |0 # LATIN CAPITAL LETTER S WITH ACUTE -<U0170> \xD8 |0 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE <U00D9> \xD9 |0 # LATIN CAPITAL LETTER U WITH GRAVE <U00DA> \xDA |0 # LATIN CAPITAL LETTER U WITH ACUTE <U00DB> \xDB |0 # LATIN CAPITAL LETTER U WITH CIRCUMFLEX <U00DC> \xDC |0 # LATIN CAPITAL LETTER U WITH DIAERESIS -<U0118> \xDD |0 # LATIN CAPITAL LETTER E WITH OGONEK -<U021A> \xDE |0 # LATIN CAPITAL LETTER T WITH COMMA BELOW <U00DF> \xDF |0 # LATIN SMALL LETTER SHARP S <U00E0> \xE0 |0 # LATIN SMALL LETTER A WITH GRAVE <U00E1> \xE1 |0 # LATIN SMALL LETTER A WITH ACUTE <U00E2> \xE2 |0 # LATIN SMALL LETTER A WITH CIRCUMFLEX -<U0103> \xE3 |0 # LATIN SMALL LETTER A WITH BREVE <U00E4> \xE4 |0 # LATIN SMALL LETTER A WITH DIAERESIS -<U0107> \xE5 |0 # LATIN SMALL LETTER C WITH ACUTE <U00E6> \xE6 |0 # LATIN SMALL LETTER AE <U00E7> \xE7 |0 # LATIN SMALL LETTER C WITH CEDILLA <U00E8> \xE8 |0 # LATIN SMALL LETTER E WITH GRAVE @@ -248,20 +216,53 @@ CHARMAP <U00ED> \xED |0 # LATIN SMALL LETTER I WITH ACUTE <U00EE> \xEE |0 # LATIN SMALL LETTER I WITH CIRCUMFLEX <U00EF> \xEF |0 # LATIN SMALL LETTER I WITH DIAERESIS -<U0111> \xF0 |0 # LATIN SMALL LETTER D WITH STROKE -<U0144> \xF1 |0 # LATIN SMALL LETTER N WITH ACUTE <U00F2> \xF2 |0 # LATIN SMALL LETTER O WITH GRAVE <U00F3> \xF3 |0 # LATIN SMALL LETTER O WITH ACUTE <U00F4> \xF4 |0 # LATIN SMALL LETTER O WITH CIRCUMFLEX -<U0151> \xF5 |0 # LATIN SMALL LETTER O WITH DOUBLE ACUTE <U00F6> \xF6 |0 # LATIN SMALL LETTER O WITH DIAERESIS -<U015B> \xF7 |0 # LATIN SMALL LETTER S WITH ACUTE -<U0171> \xF8 |0 # LATIN SMALL LETTER U WITH DOUBLE ACUTE <U00F9> \xF9 |0 # LATIN SMALL LETTER U WITH GRAVE <U00FA> \xFA |0 # LATIN SMALL LETTER U WITH ACUTE <U00FB> \xFB |0 # LATIN SMALL LETTER U WITH CIRCUMFLEX <U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS +<U00FF> \xFF |0 # LATIN SMALL LETTER Y WITH DIAERESIS +<U0102> \xC3 |0 # LATIN CAPITAL LETTER A WITH BREVE +<U0103> \xE3 |0 # LATIN SMALL LETTER A WITH BREVE +<U0104> \xA1 |0 # LATIN CAPITAL LETTER A WITH OGONEK +<U0105> \xA2 |0 # LATIN SMALL LETTER A WITH OGONEK +<U0106> \xC5 |0 # LATIN CAPITAL LETTER C WITH ACUTE +<U0107> \xE5 |0 # LATIN SMALL LETTER C WITH ACUTE +<U010C> \xB2 |0 # LATIN CAPITAL LETTER C WITH CARON +<U010D> \xB9 |0 # LATIN SMALL LETTER C WITH CARON +<U0110> \xD0 |0 # LATIN CAPITAL LETTER D WITH STROKE +<U0111> \xF0 |0 # LATIN SMALL LETTER D WITH STROKE +<U0118> \xDD |0 # LATIN CAPITAL LETTER E WITH OGONEK <U0119> \xFD |0 # LATIN SMALL LETTER E WITH OGONEK +<U0141> \xA3 |0 # LATIN CAPITAL LETTER L WITH STROKE +<U0142> \xB3 |0 # LATIN SMALL LETTER L WITH STROKE +<U0143> \xD1 |0 # LATIN CAPITAL LETTER N WITH ACUTE +<U0144> \xF1 |0 # LATIN SMALL LETTER N WITH ACUTE +<U0150> \xD5 |0 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +<U0151> \xF5 |0 # LATIN SMALL LETTER O WITH DOUBLE ACUTE +<U0152> \xBC |0 # LATIN CAPITAL LIGATURE OE +<U0153> \xBD |0 # LATIN SMALL LIGATURE OE +<U015A> \xD7 |0 # LATIN CAPITAL LETTER S WITH ACUTE +<U015B> \xF7 |0 # LATIN SMALL LETTER S WITH ACUTE +<U0160> \xA6 |0 # LATIN CAPITAL LETTER S WITH CARON +<U0161> \xA8 |0 # LATIN SMALL LETTER S WITH CARON +<U0170> \xD8 |0 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +<U0171> \xF8 |0 # LATIN SMALL LETTER U WITH DOUBLE ACUTE +<U0178> \xBE |0 # LATIN CAPITAL LETTER Y WITH DIAERESIS +<U0179> \xAC |0 # LATIN CAPITAL LETTER Z WITH ACUTE +<U017A> \xAE |0 # LATIN SMALL LETTER Z WITH ACUTE +<U017B> \xAF |0 # LATIN CAPITAL LETTER Z WITH DOT ABOVE +<U017C> \xBF |0 # LATIN SMALL LETTER Z WITH DOT ABOVE +<U017D> \xB4 |0 # LATIN CAPITAL LETTER Z WITH CARON +<U017E> \xB8 |0 # LATIN SMALL LETTER Z WITH CARON +<U0218> \xAA |0 # LATIN CAPITAL LETTER S WITH COMMA BELOW +<U0219> \xBA |0 # LATIN SMALL LETTER S WITH COMMA BELOW +<U021A> \xDE |0 # LATIN CAPITAL LETTER T WITH COMMA BELOW <U021B> \xFE |0 # LATIN SMALL LETTER T WITH COMMA BELOW -<U00FF> \xFF |0 # LATIN SMALL LETTER Y WITH DIAERESIS +<U201D> \xB5 |0 # RIGHT DOUBLE QUOTATION MARK +<U201E> \xA5 |0 # DOUBLE LOW-9 QUOTATION MARK +<U20AC> \xA4 |0 # EURO SIGN END CHARMAP diff --git a/ext/Encode/ucm/8859-2.ucm b/ext/Encode/ucm/8859-2.ucm index eeb8ee3aa7..9e2fd38356 100644 --- a/ext/Encode/ucm/8859-2.ucm +++ b/ext/Encode/ucm/8859-2.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-2.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-2.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-2.TXT # -# ./compile -n iso-8859-2 -o Encode/iso8859-2.ucm Encode/iso8859-2.enc <code_set_name> "iso-8859-2" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,133 +136,133 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE -<U0104> \xA1 |0 # LATIN CAPITAL LETTER A WITH OGONEK -<U02D8> \xA2 |0 # BREVE -<U0141> \xA3 |0 # LATIN CAPITAL LETTER L WITH STROKE <U00A4> \xA4 |0 # CURRENCY SIGN -<U013D> \xA5 |0 # LATIN CAPITAL LETTER L WITH CARON -<U015A> \xA6 |0 # LATIN CAPITAL LETTER S WITH ACUTE <U00A7> \xA7 |0 # SECTION SIGN <U00A8> \xA8 |0 # DIAERESIS -<U0160> \xA9 |0 # LATIN CAPITAL LETTER S WITH CARON -<U015E> \xAA |0 # LATIN CAPITAL LETTER S WITH CEDILLA -<U0164> \xAB |0 # LATIN CAPITAL LETTER T WITH CARON -<U0179> \xAC |0 # LATIN CAPITAL LETTER Z WITH ACUTE <U00AD> \xAD |0 # SOFT HYPHEN -<U017D> \xAE |0 # LATIN CAPITAL LETTER Z WITH CARON -<U017B> \xAF |0 # LATIN CAPITAL LETTER Z WITH DOT ABOVE <U00B0> \xB0 |0 # DEGREE SIGN -<U0105> \xB1 |0 # LATIN SMALL LETTER A WITH OGONEK -<U02DB> \xB2 |0 # OGONEK -<U0142> \xB3 |0 # LATIN SMALL LETTER L WITH STROKE <U00B4> \xB4 |0 # ACUTE ACCENT -<U013E> \xB5 |0 # LATIN SMALL LETTER L WITH CARON -<U015B> \xB6 |0 # LATIN SMALL LETTER S WITH ACUTE -<U02C7> \xB7 |0 # CARON <U00B8> \xB8 |0 # CEDILLA -<U0161> \xB9 |0 # LATIN SMALL LETTER S WITH CARON -<U015F> \xBA |0 # LATIN SMALL LETTER S WITH CEDILLA -<U0165> \xBB |0 # LATIN SMALL LETTER T WITH CARON -<U017A> \xBC |0 # LATIN SMALL LETTER Z WITH ACUTE -<U02DD> \xBD |0 # DOUBLE ACUTE ACCENT -<U017E> \xBE |0 # LATIN SMALL LETTER Z WITH CARON -<U017C> \xBF |0 # LATIN SMALL LETTER Z WITH DOT ABOVE -<U0154> \xC0 |0 # LATIN CAPITAL LETTER R WITH ACUTE <U00C1> \xC1 |0 # LATIN CAPITAL LETTER A WITH ACUTE <U00C2> \xC2 |0 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX -<U0102> \xC3 |0 # LATIN CAPITAL LETTER A WITH BREVE <U00C4> \xC4 |0 # LATIN CAPITAL LETTER A WITH DIAERESIS -<U0139> \xC5 |0 # LATIN CAPITAL LETTER L WITH ACUTE -<U0106> \xC6 |0 # LATIN CAPITAL LETTER C WITH ACUTE <U00C7> \xC7 |0 # LATIN CAPITAL LETTER C WITH CEDILLA -<U010C> \xC8 |0 # LATIN CAPITAL LETTER C WITH CARON <U00C9> \xC9 |0 # LATIN CAPITAL LETTER E WITH ACUTE -<U0118> \xCA |0 # LATIN CAPITAL LETTER E WITH OGONEK <U00CB> \xCB |0 # LATIN CAPITAL LETTER E WITH DIAERESIS -<U011A> \xCC |0 # LATIN CAPITAL LETTER E WITH CARON <U00CD> \xCD |0 # LATIN CAPITAL LETTER I WITH ACUTE <U00CE> \xCE |0 # LATIN CAPITAL LETTER I WITH CIRCUMFLEX -<U010E> \xCF |0 # LATIN CAPITAL LETTER D WITH CARON -<U0110> \xD0 |0 # LATIN CAPITAL LETTER D WITH STROKE -<U0143> \xD1 |0 # LATIN CAPITAL LETTER N WITH ACUTE -<U0147> \xD2 |0 # LATIN CAPITAL LETTER N WITH CARON <U00D3> \xD3 |0 # LATIN CAPITAL LETTER O WITH ACUTE <U00D4> \xD4 |0 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX -<U0150> \xD5 |0 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE <U00D6> \xD6 |0 # LATIN CAPITAL LETTER O WITH DIAERESIS <U00D7> \xD7 |0 # MULTIPLICATION SIGN -<U0158> \xD8 |0 # LATIN CAPITAL LETTER R WITH CARON -<U016E> \xD9 |0 # LATIN CAPITAL LETTER U WITH RING ABOVE <U00DA> \xDA |0 # LATIN CAPITAL LETTER U WITH ACUTE -<U0170> \xDB |0 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE <U00DC> \xDC |0 # LATIN CAPITAL LETTER U WITH DIAERESIS <U00DD> \xDD |0 # LATIN CAPITAL LETTER Y WITH ACUTE -<U0162> \xDE |0 # LATIN CAPITAL LETTER T WITH CEDILLA <U00DF> \xDF |0 # LATIN SMALL LETTER SHARP S -<U0155> \xE0 |0 # LATIN SMALL LETTER R WITH ACUTE <U00E1> \xE1 |0 # LATIN SMALL LETTER A WITH ACUTE <U00E2> \xE2 |0 # LATIN SMALL LETTER A WITH CIRCUMFLEX -<U0103> \xE3 |0 # LATIN SMALL LETTER A WITH BREVE <U00E4> \xE4 |0 # LATIN SMALL LETTER A WITH DIAERESIS -<U013A> \xE5 |0 # LATIN SMALL LETTER L WITH ACUTE -<U0107> \xE6 |0 # LATIN SMALL LETTER C WITH ACUTE <U00E7> \xE7 |0 # LATIN SMALL LETTER C WITH CEDILLA -<U010D> \xE8 |0 # LATIN SMALL LETTER C WITH CARON <U00E9> \xE9 |0 # LATIN SMALL LETTER E WITH ACUTE -<U0119> \xEA |0 # LATIN SMALL LETTER E WITH OGONEK <U00EB> \xEB |0 # LATIN SMALL LETTER E WITH DIAERESIS -<U011B> \xEC |0 # LATIN SMALL LETTER E WITH CARON <U00ED> \xED |0 # LATIN SMALL LETTER I WITH ACUTE <U00EE> \xEE |0 # LATIN SMALL LETTER I WITH CIRCUMFLEX -<U010F> \xEF |0 # LATIN SMALL LETTER D WITH CARON -<U0111> \xF0 |0 # LATIN SMALL LETTER D WITH STROKE -<U0144> \xF1 |0 # LATIN SMALL LETTER N WITH ACUTE -<U0148> \xF2 |0 # LATIN SMALL LETTER N WITH CARON <U00F3> \xF3 |0 # LATIN SMALL LETTER O WITH ACUTE <U00F4> \xF4 |0 # LATIN SMALL LETTER O WITH CIRCUMFLEX -<U0151> \xF5 |0 # LATIN SMALL LETTER O WITH DOUBLE ACUTE <U00F6> \xF6 |0 # LATIN SMALL LETTER O WITH DIAERESIS <U00F7> \xF7 |0 # DIVISION SIGN -<U0159> \xF8 |0 # LATIN SMALL LETTER R WITH CARON -<U016F> \xF9 |0 # LATIN SMALL LETTER U WITH RING ABOVE <U00FA> \xFA |0 # LATIN SMALL LETTER U WITH ACUTE -<U0171> \xFB |0 # LATIN SMALL LETTER U WITH DOUBLE ACUTE <U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS <U00FD> \xFD |0 # LATIN SMALL LETTER Y WITH ACUTE +<U0102> \xC3 |0 # LATIN CAPITAL LETTER A WITH BREVE +<U0103> \xE3 |0 # LATIN SMALL LETTER A WITH BREVE +<U0104> \xA1 |0 # LATIN CAPITAL LETTER A WITH OGONEK +<U0105> \xB1 |0 # LATIN SMALL LETTER A WITH OGONEK +<U0106> \xC6 |0 # LATIN CAPITAL LETTER C WITH ACUTE +<U0107> \xE6 |0 # LATIN SMALL LETTER C WITH ACUTE +<U010C> \xC8 |0 # LATIN CAPITAL LETTER C WITH CARON +<U010D> \xE8 |0 # LATIN SMALL LETTER C WITH CARON +<U010E> \xCF |0 # LATIN CAPITAL LETTER D WITH CARON +<U010F> \xEF |0 # LATIN SMALL LETTER D WITH CARON +<U0110> \xD0 |0 # LATIN CAPITAL LETTER D WITH STROKE +<U0111> \xF0 |0 # LATIN SMALL LETTER D WITH STROKE +<U0118> \xCA |0 # LATIN CAPITAL LETTER E WITH OGONEK +<U0119> \xEA |0 # LATIN SMALL LETTER E WITH OGONEK +<U011A> \xCC |0 # LATIN CAPITAL LETTER E WITH CARON +<U011B> \xEC |0 # LATIN SMALL LETTER E WITH CARON +<U0139> \xC5 |0 # LATIN CAPITAL LETTER L WITH ACUTE +<U013A> \xE5 |0 # LATIN SMALL LETTER L WITH ACUTE +<U013D> \xA5 |0 # LATIN CAPITAL LETTER L WITH CARON +<U013E> \xB5 |0 # LATIN SMALL LETTER L WITH CARON +<U0141> \xA3 |0 # LATIN CAPITAL LETTER L WITH STROKE +<U0142> \xB3 |0 # LATIN SMALL LETTER L WITH STROKE +<U0143> \xD1 |0 # LATIN CAPITAL LETTER N WITH ACUTE +<U0144> \xF1 |0 # LATIN SMALL LETTER N WITH ACUTE +<U0147> \xD2 |0 # LATIN CAPITAL LETTER N WITH CARON +<U0148> \xF2 |0 # LATIN SMALL LETTER N WITH CARON +<U0150> \xD5 |0 # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +<U0151> \xF5 |0 # LATIN SMALL LETTER O WITH DOUBLE ACUTE +<U0154> \xC0 |0 # LATIN CAPITAL LETTER R WITH ACUTE +<U0155> \xE0 |0 # LATIN SMALL LETTER R WITH ACUTE +<U0158> \xD8 |0 # LATIN CAPITAL LETTER R WITH CARON +<U0159> \xF8 |0 # LATIN SMALL LETTER R WITH CARON +<U015A> \xA6 |0 # LATIN CAPITAL LETTER S WITH ACUTE +<U015B> \xB6 |0 # LATIN SMALL LETTER S WITH ACUTE +<U015E> \xAA |0 # LATIN CAPITAL LETTER S WITH CEDILLA +<U015F> \xBA |0 # LATIN SMALL LETTER S WITH CEDILLA +<U0160> \xA9 |0 # LATIN CAPITAL LETTER S WITH CARON +<U0161> \xB9 |0 # LATIN SMALL LETTER S WITH CARON +<U0162> \xDE |0 # LATIN CAPITAL LETTER T WITH CEDILLA <U0163> \xFE |0 # LATIN SMALL LETTER T WITH CEDILLA +<U0164> \xAB |0 # LATIN CAPITAL LETTER T WITH CARON +<U0165> \xBB |0 # LATIN SMALL LETTER T WITH CARON +<U016E> \xD9 |0 # LATIN CAPITAL LETTER U WITH RING ABOVE +<U016F> \xF9 |0 # LATIN SMALL LETTER U WITH RING ABOVE +<U0170> \xDB |0 # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +<U0171> \xFB |0 # LATIN SMALL LETTER U WITH DOUBLE ACUTE +<U0179> \xAC |0 # LATIN CAPITAL LETTER Z WITH ACUTE +<U017A> \xBC |0 # LATIN SMALL LETTER Z WITH ACUTE +<U017B> \xAF |0 # LATIN CAPITAL LETTER Z WITH DOT ABOVE +<U017C> \xBF |0 # LATIN SMALL LETTER Z WITH DOT ABOVE +<U017D> \xAE |0 # LATIN CAPITAL LETTER Z WITH CARON +<U017E> \xBE |0 # LATIN SMALL LETTER Z WITH CARON +<U02C7> \xB7 |0 # CARON +<U02D8> \xA2 |0 # BREVE <U02D9> \xFF |0 # DOT ABOVE +<U02DB> \xB2 |0 # OGONEK +<U02DD> \xBD |0 # DOUBLE ACUTE ACCENT END CHARMAP diff --git a/ext/Encode/ucm/8859-3.ucm b/ext/Encode/ucm/8859-3.ucm index 1c2e80f8f5..6066eb9148 100644 --- a/ext/Encode/ucm/8859-3.ucm +++ b/ext/Encode/ucm/8859-3.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-3.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-3.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-3.TXT # -# ./compile -n iso-8859-3 -o Encode/iso8859-3.ucm Encode/iso8859-3.enc <code_set_name> "iso-8859-3" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,74 +136,57 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE -<U0126> \xA1 |0 # LATIN CAPITAL LETTER H WITH STROKE -<U02D8> \xA2 |0 # BREVE <U00A3> \xA3 |0 # POUND SIGN <U00A4> \xA4 |0 # CURRENCY SIGN -<U0124> \xA6 |0 # LATIN CAPITAL LETTER H WITH CIRCUMFLEX <U00A7> \xA7 |0 # SECTION SIGN <U00A8> \xA8 |0 # DIAERESIS -<U0130> \xA9 |0 # LATIN CAPITAL LETTER I WITH DOT ABOVE -<U015E> \xAA |0 # LATIN CAPITAL LETTER S WITH CEDILLA -<U011E> \xAB |0 # LATIN CAPITAL LETTER G WITH BREVE -<U0134> \xAC |0 # LATIN CAPITAL LETTER J WITH CIRCUMFLEX <U00AD> \xAD |0 # SOFT HYPHEN -<U017B> \xAF |0 # LATIN CAPITAL LETTER Z WITH DOT ABOVE <U00B0> \xB0 |0 # DEGREE SIGN -<U0127> \xB1 |0 # LATIN SMALL LETTER H WITH STROKE <U00B2> \xB2 |0 # SUPERSCRIPT TWO <U00B3> \xB3 |0 # SUPERSCRIPT THREE <U00B4> \xB4 |0 # ACUTE ACCENT <U00B5> \xB5 |0 # MICRO SIGN -<U0125> \xB6 |0 # LATIN SMALL LETTER H WITH CIRCUMFLEX <U00B7> \xB7 |0 # MIDDLE DOT <U00B8> \xB8 |0 # CEDILLA -<U0131> \xB9 |0 # LATIN SMALL LETTER DOTLESS I -<U015F> \xBA |0 # LATIN SMALL LETTER S WITH CEDILLA -<U011F> \xBB |0 # LATIN SMALL LETTER G WITH BREVE -<U0135> \xBC |0 # LATIN SMALL LETTER J WITH CIRCUMFLEX <U00BD> \xBD |0 # VULGAR FRACTION ONE HALF -<U017C> \xBF |0 # LATIN SMALL LETTER Z WITH DOT ABOVE <U00C0> \xC0 |0 # LATIN CAPITAL LETTER A WITH GRAVE <U00C1> \xC1 |0 # LATIN CAPITAL LETTER A WITH ACUTE <U00C2> \xC2 |0 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX <U00C4> \xC4 |0 # LATIN CAPITAL LETTER A WITH DIAERESIS -<U010A> \xC5 |0 # LATIN CAPITAL LETTER C WITH DOT ABOVE -<U0108> \xC6 |0 # LATIN CAPITAL LETTER C WITH CIRCUMFLEX <U00C7> \xC7 |0 # LATIN CAPITAL LETTER C WITH CEDILLA <U00C8> \xC8 |0 # LATIN CAPITAL LETTER E WITH GRAVE <U00C9> \xC9 |0 # LATIN CAPITAL LETTER E WITH ACUTE @@ -216,23 +200,17 @@ CHARMAP <U00D2> \xD2 |0 # LATIN CAPITAL LETTER O WITH GRAVE <U00D3> \xD3 |0 # LATIN CAPITAL LETTER O WITH ACUTE <U00D4> \xD4 |0 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX -<U0120> \xD5 |0 # LATIN CAPITAL LETTER G WITH DOT ABOVE <U00D6> \xD6 |0 # LATIN CAPITAL LETTER O WITH DIAERESIS <U00D7> \xD7 |0 # MULTIPLICATION SIGN -<U011C> \xD8 |0 # LATIN CAPITAL LETTER G WITH CIRCUMFLEX <U00D9> \xD9 |0 # LATIN CAPITAL LETTER U WITH GRAVE <U00DA> \xDA |0 # LATIN CAPITAL LETTER U WITH ACUTE <U00DB> \xDB |0 # LATIN CAPITAL LETTER U WITH CIRCUMFLEX <U00DC> \xDC |0 # LATIN CAPITAL LETTER U WITH DIAERESIS -<U016C> \xDD |0 # LATIN CAPITAL LETTER U WITH BREVE -<U015C> \xDE |0 # LATIN CAPITAL LETTER S WITH CIRCUMFLEX <U00DF> \xDF |0 # LATIN SMALL LETTER SHARP S <U00E0> \xE0 |0 # LATIN SMALL LETTER A WITH GRAVE <U00E1> \xE1 |0 # LATIN SMALL LETTER A WITH ACUTE <U00E2> \xE2 |0 # LATIN SMALL LETTER A WITH CIRCUMFLEX <U00E4> \xE4 |0 # LATIN SMALL LETTER A WITH DIAERESIS -<U010B> \xE5 |0 # LATIN SMALL LETTER C WITH DOT ABOVE -<U0109> \xE6 |0 # LATIN SMALL LETTER C WITH CIRCUMFLEX <U00E7> \xE7 |0 # LATIN SMALL LETTER C WITH CEDILLA <U00E8> \xE8 |0 # LATIN SMALL LETTER E WITH GRAVE <U00E9> \xE9 |0 # LATIN SMALL LETTER E WITH ACUTE @@ -246,15 +224,38 @@ CHARMAP <U00F2> \xF2 |0 # LATIN SMALL LETTER O WITH GRAVE <U00F3> \xF3 |0 # LATIN SMALL LETTER O WITH ACUTE <U00F4> \xF4 |0 # LATIN SMALL LETTER O WITH CIRCUMFLEX -<U0121> \xF5 |0 # LATIN SMALL LETTER G WITH DOT ABOVE <U00F6> \xF6 |0 # LATIN SMALL LETTER O WITH DIAERESIS <U00F7> \xF7 |0 # DIVISION SIGN -<U011D> \xF8 |0 # LATIN SMALL LETTER G WITH CIRCUMFLEX <U00F9> \xF9 |0 # LATIN SMALL LETTER U WITH GRAVE <U00FA> \xFA |0 # LATIN SMALL LETTER U WITH ACUTE <U00FB> \xFB |0 # LATIN SMALL LETTER U WITH CIRCUMFLEX <U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS -<U016D> \xFD |0 # LATIN SMALL LETTER U WITH BREVE +<U0108> \xC6 |0 # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +<U0109> \xE6 |0 # LATIN SMALL LETTER C WITH CIRCUMFLEX +<U010A> \xC5 |0 # LATIN CAPITAL LETTER C WITH DOT ABOVE +<U010B> \xE5 |0 # LATIN SMALL LETTER C WITH DOT ABOVE +<U011C> \xD8 |0 # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +<U011D> \xF8 |0 # LATIN SMALL LETTER G WITH CIRCUMFLEX +<U011E> \xAB |0 # LATIN CAPITAL LETTER G WITH BREVE +<U011F> \xBB |0 # LATIN SMALL LETTER G WITH BREVE +<U0120> \xD5 |0 # LATIN CAPITAL LETTER G WITH DOT ABOVE +<U0121> \xF5 |0 # LATIN SMALL LETTER G WITH DOT ABOVE +<U0124> \xA6 |0 # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +<U0125> \xB6 |0 # LATIN SMALL LETTER H WITH CIRCUMFLEX +<U0126> \xA1 |0 # LATIN CAPITAL LETTER H WITH STROKE +<U0127> \xB1 |0 # LATIN SMALL LETTER H WITH STROKE +<U0130> \xA9 |0 # LATIN CAPITAL LETTER I WITH DOT ABOVE +<U0131> \xB9 |0 # LATIN SMALL LETTER DOTLESS I +<U0134> \xAC |0 # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +<U0135> \xBC |0 # LATIN SMALL LETTER J WITH CIRCUMFLEX +<U015C> \xDE |0 # LATIN CAPITAL LETTER S WITH CIRCUMFLEX <U015D> \xFE |0 # LATIN SMALL LETTER S WITH CIRCUMFLEX +<U015E> \xAA |0 # LATIN CAPITAL LETTER S WITH CEDILLA +<U015F> \xBA |0 # LATIN SMALL LETTER S WITH CEDILLA +<U016C> \xDD |0 # LATIN CAPITAL LETTER U WITH BREVE +<U016D> \xFD |0 # LATIN SMALL LETTER U WITH BREVE +<U017B> \xAF |0 # LATIN CAPITAL LETTER Z WITH DOT ABOVE +<U017C> \xBF |0 # LATIN SMALL LETTER Z WITH DOT ABOVE +<U02D8> \xA2 |0 # BREVE <U02D9> \xFF |0 # DOT ABOVE END CHARMAP diff --git a/ext/Encode/ucm/8859-4.ucm b/ext/Encode/ucm/8859-4.ucm index 66c2b83955..901fe0607a 100644 --- a/ext/Encode/ucm/8859-4.ucm +++ b/ext/Encode/ucm/8859-4.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-4.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-4.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-4.TXT # -# ./compile -n iso-8859-4 -o Encode/iso8859-4.ucm Encode/iso8859-4.enc <code_set_name> "iso-8859-4" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,133 +136,133 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE -<U0104> \xA1 |0 # LATIN CAPITAL LETTER A WITH OGONEK -<U0138> \xA2 |0 # LATIN SMALL LETTER KRA -<U0156> \xA3 |0 # LATIN CAPITAL LETTER R WITH CEDILLA <U00A4> \xA4 |0 # CURRENCY SIGN -<U0128> \xA5 |0 # LATIN CAPITAL LETTER I WITH TILDE -<U013B> \xA6 |0 # LATIN CAPITAL LETTER L WITH CEDILLA <U00A7> \xA7 |0 # SECTION SIGN <U00A8> \xA8 |0 # DIAERESIS -<U0160> \xA9 |0 # LATIN CAPITAL LETTER S WITH CARON -<U0112> \xAA |0 # LATIN CAPITAL LETTER E WITH MACRON -<U0122> \xAB |0 # LATIN CAPITAL LETTER G WITH CEDILLA -<U0166> \xAC |0 # LATIN CAPITAL LETTER T WITH STROKE <U00AD> \xAD |0 # SOFT HYPHEN -<U017D> \xAE |0 # LATIN CAPITAL LETTER Z WITH CARON <U00AF> \xAF |0 # MACRON <U00B0> \xB0 |0 # DEGREE SIGN -<U0105> \xB1 |0 # LATIN SMALL LETTER A WITH OGONEK -<U02DB> \xB2 |0 # OGONEK -<U0157> \xB3 |0 # LATIN SMALL LETTER R WITH CEDILLA <U00B4> \xB4 |0 # ACUTE ACCENT -<U0129> \xB5 |0 # LATIN SMALL LETTER I WITH TILDE -<U013C> \xB6 |0 # LATIN SMALL LETTER L WITH CEDILLA -<U02C7> \xB7 |0 # CARON <U00B8> \xB8 |0 # CEDILLA -<U0161> \xB9 |0 # LATIN SMALL LETTER S WITH CARON -<U0113> \xBA |0 # LATIN SMALL LETTER E WITH MACRON -<U0123> \xBB |0 # LATIN SMALL LETTER G WITH CEDILLA -<U0167> \xBC |0 # LATIN SMALL LETTER T WITH STROKE -<U014A> \xBD |0 # LATIN CAPITAL LETTER ENG -<U017E> \xBE |0 # LATIN SMALL LETTER Z WITH CARON -<U014B> \xBF |0 # LATIN SMALL LETTER ENG -<U0100> \xC0 |0 # LATIN CAPITAL LETTER A WITH MACRON <U00C1> \xC1 |0 # LATIN CAPITAL LETTER A WITH ACUTE <U00C2> \xC2 |0 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX <U00C3> \xC3 |0 # LATIN CAPITAL LETTER A WITH TILDE <U00C4> \xC4 |0 # LATIN CAPITAL LETTER A WITH DIAERESIS <U00C5> \xC5 |0 # LATIN CAPITAL LETTER A WITH RING ABOVE <U00C6> \xC6 |0 # LATIN CAPITAL LETTER AE -<U012E> \xC7 |0 # LATIN CAPITAL LETTER I WITH OGONEK -<U010C> \xC8 |0 # LATIN CAPITAL LETTER C WITH CARON <U00C9> \xC9 |0 # LATIN CAPITAL LETTER E WITH ACUTE -<U0118> \xCA |0 # LATIN CAPITAL LETTER E WITH OGONEK <U00CB> \xCB |0 # LATIN CAPITAL LETTER E WITH DIAERESIS -<U0116> \xCC |0 # LATIN CAPITAL LETTER E WITH DOT ABOVE <U00CD> \xCD |0 # LATIN CAPITAL LETTER I WITH ACUTE <U00CE> \xCE |0 # LATIN CAPITAL LETTER I WITH CIRCUMFLEX -<U012A> \xCF |0 # LATIN CAPITAL LETTER I WITH MACRON -<U0110> \xD0 |0 # LATIN CAPITAL LETTER D WITH STROKE -<U0145> \xD1 |0 # LATIN CAPITAL LETTER N WITH CEDILLA -<U014C> \xD2 |0 # LATIN CAPITAL LETTER O WITH MACRON -<U0136> \xD3 |0 # LATIN CAPITAL LETTER K WITH CEDILLA <U00D4> \xD4 |0 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX <U00D5> \xD5 |0 # LATIN CAPITAL LETTER O WITH TILDE <U00D6> \xD6 |0 # LATIN CAPITAL LETTER O WITH DIAERESIS <U00D7> \xD7 |0 # MULTIPLICATION SIGN <U00D8> \xD8 |0 # LATIN CAPITAL LETTER O WITH STROKE -<U0172> \xD9 |0 # LATIN CAPITAL LETTER U WITH OGONEK <U00DA> \xDA |0 # LATIN CAPITAL LETTER U WITH ACUTE <U00DB> \xDB |0 # LATIN CAPITAL LETTER U WITH CIRCUMFLEX <U00DC> \xDC |0 # LATIN CAPITAL LETTER U WITH DIAERESIS -<U0168> \xDD |0 # LATIN CAPITAL LETTER U WITH TILDE -<U016A> \xDE |0 # LATIN CAPITAL LETTER U WITH MACRON <U00DF> \xDF |0 # LATIN SMALL LETTER SHARP S -<U0101> \xE0 |0 # LATIN SMALL LETTER A WITH MACRON <U00E1> \xE1 |0 # LATIN SMALL LETTER A WITH ACUTE <U00E2> \xE2 |0 # LATIN SMALL LETTER A WITH CIRCUMFLEX <U00E3> \xE3 |0 # LATIN SMALL LETTER A WITH TILDE <U00E4> \xE4 |0 # LATIN SMALL LETTER A WITH DIAERESIS <U00E5> \xE5 |0 # LATIN SMALL LETTER A WITH RING ABOVE <U00E6> \xE6 |0 # LATIN SMALL LETTER AE -<U012F> \xE7 |0 # LATIN SMALL LETTER I WITH OGONEK -<U010D> \xE8 |0 # LATIN SMALL LETTER C WITH CARON <U00E9> \xE9 |0 # LATIN SMALL LETTER E WITH ACUTE -<U0119> \xEA |0 # LATIN SMALL LETTER E WITH OGONEK <U00EB> \xEB |0 # LATIN SMALL LETTER E WITH DIAERESIS -<U0117> \xEC |0 # LATIN SMALL LETTER E WITH DOT ABOVE <U00ED> \xED |0 # LATIN SMALL LETTER I WITH ACUTE <U00EE> \xEE |0 # LATIN SMALL LETTER I WITH CIRCUMFLEX -<U012B> \xEF |0 # LATIN SMALL LETTER I WITH MACRON -<U0111> \xF0 |0 # LATIN SMALL LETTER D WITH STROKE -<U0146> \xF1 |0 # LATIN SMALL LETTER N WITH CEDILLA -<U014D> \xF2 |0 # LATIN SMALL LETTER O WITH MACRON -<U0137> \xF3 |0 # LATIN SMALL LETTER K WITH CEDILLA <U00F4> \xF4 |0 # LATIN SMALL LETTER O WITH CIRCUMFLEX <U00F5> \xF5 |0 # LATIN SMALL LETTER O WITH TILDE <U00F6> \xF6 |0 # LATIN SMALL LETTER O WITH DIAERESIS <U00F7> \xF7 |0 # DIVISION SIGN <U00F8> \xF8 |0 # LATIN SMALL LETTER O WITH STROKE -<U0173> \xF9 |0 # LATIN SMALL LETTER U WITH OGONEK <U00FA> \xFA |0 # LATIN SMALL LETTER U WITH ACUTE <U00FB> \xFB |0 # LATIN SMALL LETTER U WITH CIRCUMFLEX <U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS +<U0100> \xC0 |0 # LATIN CAPITAL LETTER A WITH MACRON +<U0101> \xE0 |0 # LATIN SMALL LETTER A WITH MACRON +<U0104> \xA1 |0 # LATIN CAPITAL LETTER A WITH OGONEK +<U0105> \xB1 |0 # LATIN SMALL LETTER A WITH OGONEK +<U010C> \xC8 |0 # LATIN CAPITAL LETTER C WITH CARON +<U010D> \xE8 |0 # LATIN SMALL LETTER C WITH CARON +<U0110> \xD0 |0 # LATIN CAPITAL LETTER D WITH STROKE +<U0111> \xF0 |0 # LATIN SMALL LETTER D WITH STROKE +<U0112> \xAA |0 # LATIN CAPITAL LETTER E WITH MACRON +<U0113> \xBA |0 # LATIN SMALL LETTER E WITH MACRON +<U0116> \xCC |0 # LATIN CAPITAL LETTER E WITH DOT ABOVE +<U0117> \xEC |0 # LATIN SMALL LETTER E WITH DOT ABOVE +<U0118> \xCA |0 # LATIN CAPITAL LETTER E WITH OGONEK +<U0119> \xEA |0 # LATIN SMALL LETTER E WITH OGONEK +<U0122> \xAB |0 # LATIN CAPITAL LETTER G WITH CEDILLA +<U0123> \xBB |0 # LATIN SMALL LETTER G WITH CEDILLA +<U0128> \xA5 |0 # LATIN CAPITAL LETTER I WITH TILDE +<U0129> \xB5 |0 # LATIN SMALL LETTER I WITH TILDE +<U012A> \xCF |0 # LATIN CAPITAL LETTER I WITH MACRON +<U012B> \xEF |0 # LATIN SMALL LETTER I WITH MACRON +<U012E> \xC7 |0 # LATIN CAPITAL LETTER I WITH OGONEK +<U012F> \xE7 |0 # LATIN SMALL LETTER I WITH OGONEK +<U0136> \xD3 |0 # LATIN CAPITAL LETTER K WITH CEDILLA +<U0137> \xF3 |0 # LATIN SMALL LETTER K WITH CEDILLA +<U0138> \xA2 |0 # LATIN SMALL LETTER KRA +<U013B> \xA6 |0 # LATIN CAPITAL LETTER L WITH CEDILLA +<U013C> \xB6 |0 # LATIN SMALL LETTER L WITH CEDILLA +<U0145> \xD1 |0 # LATIN CAPITAL LETTER N WITH CEDILLA +<U0146> \xF1 |0 # LATIN SMALL LETTER N WITH CEDILLA +<U014A> \xBD |0 # LATIN CAPITAL LETTER ENG +<U014B> \xBF |0 # LATIN SMALL LETTER ENG +<U014C> \xD2 |0 # LATIN CAPITAL LETTER O WITH MACRON +<U014D> \xF2 |0 # LATIN SMALL LETTER O WITH MACRON +<U0156> \xA3 |0 # LATIN CAPITAL LETTER R WITH CEDILLA +<U0157> \xB3 |0 # LATIN SMALL LETTER R WITH CEDILLA +<U0160> \xA9 |0 # LATIN CAPITAL LETTER S WITH CARON +<U0161> \xB9 |0 # LATIN SMALL LETTER S WITH CARON +<U0166> \xAC |0 # LATIN CAPITAL LETTER T WITH STROKE +<U0167> \xBC |0 # LATIN SMALL LETTER T WITH STROKE +<U0168> \xDD |0 # LATIN CAPITAL LETTER U WITH TILDE <U0169> \xFD |0 # LATIN SMALL LETTER U WITH TILDE +<U016A> \xDE |0 # LATIN CAPITAL LETTER U WITH MACRON <U016B> \xFE |0 # LATIN SMALL LETTER U WITH MACRON +<U0172> \xD9 |0 # LATIN CAPITAL LETTER U WITH OGONEK +<U0173> \xF9 |0 # LATIN SMALL LETTER U WITH OGONEK +<U017D> \xAE |0 # LATIN CAPITAL LETTER Z WITH CARON +<U017E> \xBE |0 # LATIN SMALL LETTER Z WITH CARON +<U02C7> \xB7 |0 # CARON <U02D9> \xFF |0 # DOT ABOVE +<U02DB> \xB2 |0 # OGONEK END CHARMAP diff --git a/ext/Encode/ucm/8859-5.ucm b/ext/Encode/ucm/8859-5.ucm index c3b9898dc4..f03b49494f 100644 --- a/ext/Encode/ucm/8859-5.ucm +++ b/ext/Encode/ucm/8859-5.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-5.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-5.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT # -# ./compile -n iso-8859-5 -o Encode/iso8859-5.ucm Encode/iso8859-5.enc <code_set_name> "iso-8859-5" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,40 +136,42 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE +<U00A7> \xFD |0 # SECTION SIGN +<U00AD> \xAD |0 # SOFT HYPHEN <U0401> \xA1 |0 # CYRILLIC CAPITAL LETTER IO <U0402> \xA2 |0 # CYRILLIC CAPITAL LETTER DJE <U0403> \xA3 |0 # CYRILLIC CAPITAL LETTER GJE @@ -181,7 +184,6 @@ CHARMAP <U040A> \xAA |0 # CYRILLIC CAPITAL LETTER NJE <U040B> \xAB |0 # CYRILLIC CAPITAL LETTER TSHE <U040C> \xAC |0 # CYRILLIC CAPITAL LETTER KJE -<U00AD> \xAD |0 # SOFT HYPHEN <U040E> \xAE |0 # CYRILLIC CAPITAL LETTER SHORT U <U040F> \xAF |0 # CYRILLIC CAPITAL LETTER DZHE <U0410> \xB0 |0 # CYRILLIC CAPITAL LETTER A @@ -248,7 +250,6 @@ CHARMAP <U044D> \xED |0 # CYRILLIC SMALL LETTER E <U044E> \xEE |0 # CYRILLIC SMALL LETTER YU <U044F> \xEF |0 # CYRILLIC SMALL LETTER YA -<U2116> \xF0 |0 # NUMERO SIGN <U0451> \xF1 |0 # CYRILLIC SMALL LETTER IO <U0452> \xF2 |0 # CYRILLIC SMALL LETTER DJE <U0453> \xF3 |0 # CYRILLIC SMALL LETTER GJE @@ -261,7 +262,7 @@ CHARMAP <U045A> \xFA |0 # CYRILLIC SMALL LETTER NJE <U045B> \xFB |0 # CYRILLIC SMALL LETTER TSHE <U045C> \xFC |0 # CYRILLIC SMALL LETTER KJE -<U00A7> \xFD |0 # SECTION SIGN <U045E> \xFE |0 # CYRILLIC SMALL LETTER SHORT U <U045F> \xFF |0 # CYRILLIC SMALL LETTER DZHE +<U2116> \xF0 |0 # NUMERO SIGN END CHARMAP diff --git a/ext/Encode/ucm/8859-6.ucm b/ext/Encode/ucm/8859-6.ucm index 0a970cdd47..9c79e2567d 100644 --- a/ext/Encode/ucm/8859-6.ucm +++ b/ext/Encode/ucm/8859-6.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-6.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-6.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-6.TXT # -# ./compile -n iso-8859-6 -o Encode/iso8859-6.ucm Encode/iso8859-6.enc <code_set_name> "iso-8859-6" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -56,16 +57,16 @@ CHARMAP <U002D> \x2D |0 # HYPHEN-MINUS <U002E> \x2E |0 # FULL STOP <U002F> \x2F |0 # SOLIDUS -<U0660> \x30 |0 # ARABIC-INDIC DIGIT ZERO -<U0661> \x31 |0 # ARABIC-INDIC DIGIT ONE -<U0662> \x32 |0 # ARABIC-INDIC DIGIT TWO -<U0663> \x33 |0 # ARABIC-INDIC DIGIT THREE -<U0664> \x34 |0 # ARABIC-INDIC DIGIT FOUR -<U0665> \x35 |0 # ARABIC-INDIC DIGIT FIVE -<U0666> \x36 |0 # ARABIC-INDIC DIGIT SIX -<U0667> \x37 |0 # ARABIC-INDIC DIGIT SEVEN -<U0668> \x38 |0 # ARABIC-INDIC DIGIT EIGHT -<U0669> \x39 |0 # ARABIC-INDIC DIGIT NINE +<U0030> \x30 |0 # DIGIT ZERO +<U0031> \x31 |0 # DIGIT ONE +<U0032> \x32 |0 # DIGIT TWO +<U0033> \x33 |0 # DIGIT THREE +<U0034> \x34 |0 # DIGIT FOUR +<U0035> \x35 |0 # DIGIT FIVE +<U0036> \x36 |0 # DIGIT SIX +<U0037> \x37 |0 # DIGIT SEVEN +<U0038> \x38 |0 # DIGIT EIGHT +<U0039> \x39 |0 # DIGIT NINE <U003A> \x3A |0 # COLON <U003B> \x3B |0 # SEMICOLON <U003C> \x3C |0 # LESS-THAN SIGN @@ -135,43 +136,43 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE <U00A4> \xA4 |0 # CURRENCY SIGN -<U060C> \xAC |0 # ARABIC COMMA <U00AD> \xAD |0 # SOFT HYPHEN +<U060C> \xAC |0 # ARABIC COMMA <U061B> \xBB |0 # ARABIC SEMICOLON <U061F> \xBF |0 # ARABIC QUESTION MARK <U0621> \xC1 |0 # ARABIC LETTER HAMZA diff --git a/ext/Encode/ucm/8859-7.ucm b/ext/Encode/ucm/8859-7.ucm index f92a11a198..818ef79993 100644 --- a/ext/Encode/ucm/8859-7.ucm +++ b/ext/Encode/ucm/8859-7.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-7.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-7.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT # -# ./compile -n iso-8859-7 -o Encode/iso8859-7.ucm Encode/iso8859-7.enc <code_set_name> "iso-8859-7" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,42 +136,40 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE -<U02BD> \xA1 |0 # MODIFIER LETTER REVERSED COMMA -<U02BC> \xA2 |0 # MODIFIER LETTER APOSTROPHE <U00A3> \xA3 |0 # POUND SIGN <U00A6> \xA6 |0 # BROKEN BAR <U00A7> \xA7 |0 # SECTION SIGN @@ -179,21 +178,20 @@ CHARMAP <U00AB> \xAB |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK <U00AC> \xAC |0 # NOT SIGN <U00AD> \xAD |0 # SOFT HYPHEN -<U2015> \xAF |0 # HORIZONTAL BAR <U00B0> \xB0 |0 # DEGREE SIGN <U00B1> \xB1 |0 # PLUS-MINUS SIGN <U00B2> \xB2 |0 # SUPERSCRIPT TWO <U00B3> \xB3 |0 # SUPERSCRIPT THREE +<U00B7> \xB7 |0 # MIDDLE DOT +<U00BB> \xBB |0 # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +<U00BD> \xBD |0 # VULGAR FRACTION ONE HALF <U0384> \xB4 |0 # GREEK TONOS <U0385> \xB5 |0 # GREEK DIALYTIKA TONOS <U0386> \xB6 |0 # GREEK CAPITAL LETTER ALPHA WITH TONOS -<U00B7> \xB7 |0 # MIDDLE DOT <U0388> \xB8 |0 # GREEK CAPITAL LETTER EPSILON WITH TONOS <U0389> \xB9 |0 # GREEK CAPITAL LETTER ETA WITH TONOS <U038A> \xBA |0 # GREEK CAPITAL LETTER IOTA WITH TONOS -<U00BB> \xBB |0 # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK <U038C> \xBC |0 # GREEK CAPITAL LETTER OMICRON WITH TONOS -<U00BD> \xBD |0 # VULGAR FRACTION ONE HALF <U038E> \xBE |0 # GREEK CAPITAL LETTER UPSILON WITH TONOS <U038F> \xBF |0 # GREEK CAPITAL LETTER OMEGA WITH TONOS <U0390> \xC0 |0 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS @@ -258,4 +256,7 @@ CHARMAP <U03CC> \xFC |0 # GREEK SMALL LETTER OMICRON WITH TONOS <U03CD> \xFD |0 # GREEK SMALL LETTER UPSILON WITH TONOS <U03CE> \xFE |0 # GREEK SMALL LETTER OMEGA WITH TONOS +<U2015> \xAF |0 # HORIZONTAL BAR +<U2018> \xA1 |0 # LEFT SINGLE QUOTATION MARK +<U2019> \xA2 |0 # RIGHT SINGLE QUOTATION MARK END CHARMAP diff --git a/ext/Encode/ucm/8859-8.ucm b/ext/Encode/ucm/8859-8.ucm index b29179accc..87f70548aa 100644 --- a/ext/Encode/ucm/8859-8.ucm +++ b/ext/Encode/ucm/8859-8.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-8.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-8.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-8.TXT # -# ./compile -n iso-8859-8 -o Encode/iso8859-8.ucm Encode/iso8859-8.enc <code_set_name> "iso-8859-8" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,39 +136,39 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE <U00A2> \xA2 |0 # CENT SIGN <U00A3> \xA3 |0 # POUND SIGN @@ -177,12 +178,11 @@ CHARMAP <U00A7> \xA7 |0 # SECTION SIGN <U00A8> \xA8 |0 # DIAERESIS <U00A9> \xA9 |0 # COPYRIGHT SIGN -<U00D7> \xAA |0 # MULTIPLICATION SIGN <U00AB> \xAB |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK <U00AC> \xAC |0 # NOT SIGN <U00AD> \xAD |0 # SOFT HYPHEN <U00AE> \xAE |0 # REGISTERED SIGN -<U203E> \xAF |0 # OVERLINE +<U00AF> \xAF |0 # MACRON <U00B0> \xB0 |0 # DEGREE SIGN <U00B1> \xB1 |0 # PLUS-MINUS SIGN <U00B2> \xB2 |0 # SUPERSCRIPT TWO @@ -193,12 +193,12 @@ CHARMAP <U00B7> \xB7 |0 # MIDDLE DOT <U00B8> \xB8 |0 # CEDILLA <U00B9> \xB9 |0 # SUPERSCRIPT ONE -<U00F7> \xBA |0 # DIVISION SIGN <U00BB> \xBB |0 # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK <U00BC> \xBC |0 # VULGAR FRACTION ONE QUARTER <U00BD> \xBD |0 # VULGAR FRACTION ONE HALF <U00BE> \xBE |0 # VULGAR FRACTION THREE QUARTERS -<U2017> \xDF |0 # DOUBLE LOW LINE +<U00D7> \xAA |0 # MULTIPLICATION SIGN +<U00F7> \xBA |0 # DIVISION SIGN <U05D0> \xE0 |0 # HEBREW LETTER ALEF <U05D1> \xE1 |0 # HEBREW LETTER BET <U05D2> \xE2 |0 # HEBREW LETTER GIMEL @@ -226,4 +226,7 @@ CHARMAP <U05E8> \xF8 |0 # HEBREW LETTER RESH <U05E9> \xF9 |0 # HEBREW LETTER SHIN <U05EA> \xFA |0 # HEBREW LETTER TAV +<U200E> \xFD |0 # LEFT-TO-RIGHT MARK +<U200F> \xFE |0 # RIGHT-TO-LEFT MARK +<U2017> \xDF |0 # DOUBLE LOW LINE END CHARMAP diff --git a/ext/Encode/ucm/8859-9.ucm b/ext/Encode/ucm/8859-9.ucm index c763763b1b..249dc89487 100644 --- a/ext/Encode/ucm/8859-9.ucm +++ b/ext/Encode/ucm/8859-9.ucm @@ -1,45 +1,46 @@ # -# $Id: 8859-9.ucm,v 1.0 2002/03/28 23:26:24 dankogai Exp $ +# $Id: 8859-9.ucm,v 1.1 2003/05/21 09:06:36 dankogai Exp $ +# +# Original table can be obtained at +# http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-9.TXT # -# ./compile -n iso-8859-9 -o Encode/iso8859-9.ucm Encode/iso8859-9.enc <code_set_name> "iso-8859-9" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F -# CHARMAP -<U0000> \x00 |0 # <control> -<U0001> \x01 |0 # <control> -<U0002> \x02 |0 # <control> -<U0003> \x03 |0 # <control> -<U0004> \x04 |0 # <control> -<U0005> \x05 |0 # <control> -<U0006> \x06 |0 # <control> -<U0007> \x07 |0 # <control> -<U0008> \x08 |0 # <control> -<U0009> \x09 |0 # <control> -<U000A> \x0A |0 # <control> -<U000B> \x0B |0 # <control> -<U000C> \x0C |0 # <control> -<U000D> \x0D |0 # <control> -<U000E> \x0E |0 # <control> -<U000F> \x0F |0 # <control> -<U0010> \x10 |0 # <control> -<U0011> \x11 |0 # <control> -<U0012> \x12 |0 # <control> -<U0013> \x13 |0 # <control> -<U0014> \x14 |0 # <control> -<U0015> \x15 |0 # <control> -<U0016> \x16 |0 # <control> -<U0017> \x17 |0 # <control> -<U0018> \x18 |0 # <control> -<U0019> \x19 |0 # <control> -<U001A> \x1A |0 # <control> -<U001B> \x1B |0 # <control> -<U001C> \x1C |0 # <control> -<U001D> \x1D |0 # <control> -<U001E> \x1E |0 # <control> -<U001F> \x1F |0 # <control> +<U0000> \x00 |0 # NULL +<U0001> \x01 |0 # START OF HEADING +<U0002> \x02 |0 # START OF TEXT +<U0003> \x03 |0 # END OF TEXT +<U0004> \x04 |0 # END OF TRANSMISSION +<U0005> \x05 |0 # ENQUIRY +<U0006> \x06 |0 # ACKNOWLEDGE +<U0007> \x07 |0 # BELL +<U0008> \x08 |0 # BACKSPACE +<U0009> \x09 |0 # CHARACTER TABULATION +<U000A> \x0A |0 # LINE FEED (LF) +<U000B> \x0B |0 # LINE TABULATION +<U000C> \x0C |0 # FORM FEED (FF) +<U000D> \x0D |0 # CARRIAGE RETURN (CR) +<U000E> \x0E |0 # SHIFT OUT +<U000F> \x0F |0 # SHIFT IN +<U0010> \x10 |0 # DATA LINK ESCAPE +<U0011> \x11 |0 # DEVICE CONTROL ONE +<U0012> \x12 |0 # DEVICE CONTROL TWO +<U0013> \x13 |0 # DEVICE CONTROL THREE +<U0014> \x14 |0 # DEVICE CONTROL FOUR +<U0015> \x15 |0 # NEGATIVE ACKNOWLEDGE +<U0016> \x16 |0 # SYNCHRONOUS IDLE +<U0017> \x17 |0 # END OF TRANSMISSION BLOCK +<U0018> \x18 |0 # CANCEL +<U0019> \x19 |0 # END OF MEDIUM +<U001A> \x1A |0 # SUBSTITUTE +<U001B> \x1B |0 # ESCAPE +<U001C> \x1C |0 # INFORMATION SEPARATOR FOUR +<U001D> \x1D |0 # INFORMATION SEPARATOR THREE +<U001E> \x1E |0 # INFORMATION SEPARATOR TWO +<U001F> \x1F |0 # INFORMATION SEPARATOR ONE <U0020> \x20 |0 # SPACE <U0021> \x21 |0 # EXCLAMATION MARK <U0022> \x22 |0 # QUOTATION MARK @@ -135,39 +136,39 @@ CHARMAP <U007C> \x7C |0 # VERTICAL LINE <U007D> \x7D |0 # RIGHT CURLY BRACKET <U007E> \x7E |0 # TILDE -<U007F> \x7F |0 # <control> +<U007F> \x7F |0 # DELETE <U0080> \x80 |0 # <control> <U0081> \x81 |0 # <control> -<U0082> \x82 |0 # <control> -<U0083> \x83 |0 # <control> +<U0082> \x82 |0 # BREAK PERMITTED HERE +<U0083> \x83 |0 # NO BREAK HERE <U0084> \x84 |0 # <control> -<U0085> \x85 |0 # <control> -<U0086> \x86 |0 # <control> -<U0087> \x87 |0 # <control> -<U0088> \x88 |0 # <control> -<U0089> \x89 |0 # <control> -<U008A> \x8A |0 # <control> -<U008B> \x8B |0 # <control> -<U008C> \x8C |0 # <control> -<U008D> \x8D |0 # <control> -<U008E> \x8E |0 # <control> -<U008F> \x8F |0 # <control> -<U0090> \x90 |0 # <control> -<U0091> \x91 |0 # <control> -<U0092> \x92 |0 # <control> -<U0093> \x93 |0 # <control> -<U0094> \x94 |0 # <control> -<U0095> \x95 |0 # <control> -<U0096> \x96 |0 # <control> -<U0097> \x97 |0 # <control> -<U0098> \x98 |0 # <control> +<U0085> \x85 |0 # NEXT LINE (NEL) +<U0086> \x86 |0 # START OF SELECTED AREA +<U0087> \x87 |0 # END OF SELECTED AREA +<U0088> \x88 |0 # CHARACTER TABULATION SET +<U0089> \x89 |0 # CHARACTER TABULATION WITH JUSTIFICATION +<U008A> \x8A |0 # LINE TABULATION SET +<U008B> \x8B |0 # PARTIAL LINE FORWARD +<U008C> \x8C |0 # PARTIAL LINE BACKWARD +<U008D> \x8D |0 # REVERSE LINE FEED +<U008E> \x8E |0 # SINGLE SHIFT TWO +<U008F> \x8F |0 # SINGLE SHIFT THREE +<U0090> \x90 |0 # DEVICE CONTROL STRING +<U0091> \x91 |0 # PRIVATE USE ONE +<U0092> \x92 |0 # PRIVATE USE TWO +<U0093> \x93 |0 # SET TRANSMIT STATE +<U0094> \x94 |0 # CANCEL CHARACTER +<U0095> \x95 |0 # MESSAGE WAITING +<U0096> \x96 |0 # START OF GUARDED AREA +<U0097> \x97 |0 # END OF GUARDED AREA +<U0098> \x98 |0 # START OF STRING <U0099> \x99 |0 # <control> -<U009A> \x9A |0 # <control> -<U009B> \x9B |0 # <control> -<U009C> \x9C |0 # <control> -<U009D> \x9D |0 # <control> -<U009E> \x9E |0 # <control> -<U009F> \x9F |0 # <control> +<U009A> \x9A |0 # SINGLE CHARACTER INTRODUCER +<U009B> \x9B |0 # CONTROL SEQUENCE INTRODUCER +<U009C> \x9C |0 # STRING TERMINATOR +<U009D> \x9D |0 # OPERATING SYSTEM COMMAND +<U009E> \x9E |0 # PRIVACY MESSAGE +<U009F> \x9F |0 # APPLICATION PROGRAM COMMAND <U00A0> \xA0 |0 # NO-BREAK SPACE <U00A1> \xA1 |0 # INVERTED EXCLAMATION MARK <U00A2> \xA2 |0 # CENT SIGN @@ -216,7 +217,6 @@ CHARMAP <U00CD> \xCD |0 # LATIN CAPITAL LETTER I WITH ACUTE <U00CE> \xCE |0 # LATIN CAPITAL LETTER I WITH CIRCUMFLEX <U00CF> \xCF |0 # LATIN CAPITAL LETTER I WITH DIAERESIS -<U011E> \xD0 |0 # LATIN CAPITAL LETTER G WITH BREVE <U00D1> \xD1 |0 # LATIN CAPITAL LETTER N WITH TILDE <U00D2> \xD2 |0 # LATIN CAPITAL LETTER O WITH GRAVE <U00D3> \xD3 |0 # LATIN CAPITAL LETTER O WITH ACUTE @@ -229,8 +229,6 @@ CHARMAP <U00DA> \xDA |0 # LATIN CAPITAL LETTER U WITH ACUTE <U00DB> \xDB |0 # LATIN CAPITAL LETTER U WITH CIRCUMFLEX <U00DC> \xDC |0 # LATIN CAPITAL LETTER U WITH DIAERESIS -<U0130> \xDD |0 # LATIN CAPITAL LETTER I WITH DOT ABOVE -<U015E> \xDE |0 # LATIN CAPITAL LETTER S WITH CEDILLA <U00DF> \xDF |0 # LATIN SMALL LETTER SHARP S <U00E0> \xE0 |0 # LATIN SMALL LETTER A WITH GRAVE <U00E1> \xE1 |0 # LATIN SMALL LETTER A WITH ACUTE @@ -248,7 +246,6 @@ CHARMAP <U00ED> \xED |0 # LATIN SMALL LETTER I WITH ACUTE <U00EE> \xEE |0 # LATIN SMALL LETTER I WITH CIRCUMFLEX <U00EF> \xEF |0 # LATIN SMALL LETTER I WITH DIAERESIS -<U011F> \xF0 |0 # LATIN SMALL LETTER G WITH BREVE <U00F1> \xF1 |0 # LATIN SMALL LETTER N WITH TILDE <U00F2> \xF2 |0 # LATIN SMALL LETTER O WITH GRAVE <U00F3> \xF3 |0 # LATIN SMALL LETTER O WITH ACUTE @@ -261,7 +258,11 @@ CHARMAP <U00FA> \xFA |0 # LATIN SMALL LETTER U WITH ACUTE <U00FB> \xFB |0 # LATIN SMALL LETTER U WITH CIRCUMFLEX <U00FC> \xFC |0 # LATIN SMALL LETTER U WITH DIAERESIS +<U00FF> \xFF |0 # LATIN SMALL LETTER Y WITH DIAERESIS +<U011E> \xD0 |0 # LATIN CAPITAL LETTER G WITH BREVE +<U011F> \xF0 |0 # LATIN SMALL LETTER G WITH BREVE +<U0130> \xDD |0 # LATIN CAPITAL LETTER I WITH DOT ABOVE <U0131> \xFD |0 # LATIN SMALL LETTER DOTLESS I +<U015E> \xDE |0 # LATIN CAPITAL LETTER S WITH CEDILLA <U015F> \xFE |0 # LATIN SMALL LETTER S WITH CEDILLA -<U00FF> \xFF |0 # LATIN SMALL LETTER Y WITH DIAERESIS END CHARMAP |