summaryrefslogtreecommitdiff
path: root/enc/trans
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-30 05:47:01 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-30 05:47:01 +0000
commit6fd14ccae523c3fab7f202664bb3ef0125e80313 (patch)
tree9211a5886de44fdf2c012e1b39d2ed1df4cce7ba /enc/trans
parent5cdd7f52cc2c6cc25200e1738f53421b18698836 (diff)
downloadruby-6fd14ccae523c3fab7f202664bb3ef0125e80313.tar.gz
* enc/trans/single_byte.trans: refactoring to make it easier
to add more transcodings (with Yoshihiro Kambayashi) * enc/trans/iso-8859-1-tbl.rb: new file to avoid having to treat ISO-8859-1 as special git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@20054 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/trans')
-rw-r--r--enc/trans/iso-8859-1-tbl.rb98
-rw-r--r--enc/trans/single_byte.trans91
2 files changed, 135 insertions, 54 deletions
diff --git a/enc/trans/iso-8859-1-tbl.rb b/enc/trans/iso-8859-1-tbl.rb
new file mode 100644
index 0000000000..05397e6417
--- /dev/null
+++ b/enc/trans/iso-8859-1-tbl.rb
@@ -0,0 +1,98 @@
+ISO_8859_1_TO_UCS_TBL = [
+ ["A0",0xA0],
+ ["A1",0xA1],
+ ["A2",0xA2],
+ ["A3",0xA3],
+ ["A4",0xA4],
+ ["A5",0xA5],
+ ["A6",0xA6],
+ ["A7",0xA7],
+ ["A8",0xA8],
+ ["A9",0xA9],
+ ["AA",0xAA],
+ ["AB",0xAB],
+ ["AC",0xAC],
+ ["AD",0xAD],
+ ["AE",0xAE],
+ ["AF",0xAF],
+ ["B0",0xB0],
+ ["B1",0xB1],
+ ["B2",0xB2],
+ ["B3",0xB3],
+ ["B4",0xB4],
+ ["B5",0xB5],
+ ["B6",0xB6],
+ ["B7",0xB7],
+ ["B8",0xB8],
+ ["B9",0xB9],
+ ["BA",0xBA],
+ ["BB",0xBB],
+ ["BC",0xBC],
+ ["BD",0xBD],
+ ["BE",0xBE],
+ ["BF",0xBF],
+ ["C0",0xC0],
+ ["C1",0xC1],
+ ["C2",0xC2],
+ ["C3",0xC3],
+ ["C4",0xC4],
+ ["C5",0xC5],
+ ["C6",0xC6],
+ ["C7",0xC7],
+ ["C8",0xC8],
+ ["C9",0xC9],
+ ["CA",0xCA],
+ ["CB",0xCB],
+ ["CC",0xCC],
+ ["CD",0xCD],
+ ["CE",0xCE],
+ ["CF",0xCF],
+ ["D0",0xD0],
+ ["D1",0xD1],
+ ["D2",0xD2],
+ ["D3",0xD3],
+ ["D4",0xD4],
+ ["D5",0xD5],
+ ["D6",0xD6],
+ ["D7",0xD7],
+ ["D8",0xD8],
+ ["D9",0xD9],
+ ["DA",0xDA],
+ ["DB",0xDB],
+ ["DC",0xDC],
+ ["DD",0xDD],
+ ["DE",0xDE],
+ ["DF",0xDF],
+ ["E0",0xE0],
+ ["E1",0xE1],
+ ["E2",0xE2],
+ ["E3",0xE3],
+ ["E4",0xE4],
+ ["E5",0xE5],
+ ["E6",0xE6],
+ ["E7",0xE7],
+ ["E8",0xE8],
+ ["E9",0xE9],
+ ["EA",0xEA],
+ ["EB",0xEB],
+ ["EC",0xEC],
+ ["ED",0xED],
+ ["EE",0xEE],
+ ["EF",0xEF],
+ ["F0",0xF0],
+ ["F1",0xF1],
+ ["F2",0xF2],
+ ["F3",0xF3],
+ ["F4",0xF4],
+ ["F5",0xF5],
+ ["F6",0xF6],
+ ["F7",0xF7],
+ ["F8",0xF8],
+ ["F9",0xF9],
+ ["FA",0xFA],
+ ["FB",0xFB],
+ ["FC",0xFC],
+ ["FD",0xFD],
+ ["FE",0xFE],
+ ["FF",0xFF],
+]
diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans
index d445c8e130..b49bc779a1 100644
--- a/enc/trans/single_byte.trans
+++ b/enc/trans/single_byte.trans
@@ -3,38 +3,25 @@
<%
us_ascii_map = [["{00-7f}", :nomap]]
- ISO_8859_1_TO_UCS_TBL = (0x80..0xff).map {|c| ["%02X" % c, c] }
- CONTROL1_TO_UCS_TBL = (0x80..0x9f).map {|c| ["%02X" % c, c] }
-
- require 'iso-8859-2-tbl'
- require 'iso-8859-3-tbl'
- require 'iso-8859-4-tbl'
- require 'iso-8859-5-tbl'
- require 'iso-8859-6-tbl'
- require 'iso-8859-7-tbl'
- require 'iso-8859-8-tbl'
- require 'iso-8859-9-tbl'
- require 'iso-8859-10-tbl'
- require 'iso-8859-11-tbl'
- require 'iso-8859-13-tbl'
- require 'iso-8859-14-tbl'
- require 'iso-8859-15-tbl'
- require 'windows-874-tbl'
- require 'windows-1250-tbl'
- require 'windows-1251-tbl'
- require 'windows-1252-tbl'
- require 'windows-1253-tbl'
- require 'windows-1254-tbl'
- require 'windows-1255-tbl'
- require 'windows-1256-tbl'
- require 'windows-1257-tbl'
-
transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map
transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map
transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map
transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map
- def transcode_tblgen_singlebyte(name, tbl_to_ucs)
+ CONTROL1_TO_UCS_TBL = (0x80..0x9f).map {|c| ["%02X" % c, c] }
+
+ # Generate transcoding tables for single byte encoding from
+ # encoding name using table file.
+ #
+ # Conventions:
+ # name: encoding name as string, UPPER case, hyphens (e.g. 'ISO-8859-3')
+ # file name: lower case, hyphens, -tbl.rb suffix (e.g. iso-8859-3-tbl.rb)
+ # variable name: UPPER case, underscores, _TO_UCS_TBL suffix (e.g. ISO_8859_3_TO_UCS_TBL)
+ # If the name starts with "ISO-8859", the C1 control code area is added automatically.
+ def transcode_tblgen_singlebyte (name)
+ require(name.downcase + "-tbl")
+ control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : []
+ tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL")
set_valid_byte_pattern(name, '1byte')
code = ''
code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs])
@@ -43,33 +30,29 @@
code
end
- def transcode_tblgen_iso8859(name, tbl_to_ucs)
- transcode_tblgen_singlebyte(name, CONTROL1_TO_UCS_TBL + tbl_to_ucs)
- end
-
- transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL)
- transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL)
- transcode_tblgen_singlebyte("WINDOWS-874", WINDOWS_874_TO_UCS_TBL)
- transcode_tblgen_singlebyte("WINDOWS-1250", WINDOWS_1250_TO_UCS_TBL)
- transcode_tblgen_singlebyte("WINDOWS-1251", WINDOWS_1251_TO_UCS_TBL)
- transcode_tblgen_singlebyte("WINDOWS-1252", WINDOWS_1252_TO_UCS_TBL)
- transcode_tblgen_singlebyte("WINDOWS-1253", WINDOWS_1253_TO_UCS_TBL)
- transcode_tblgen_singlebyte("WINDOWS-1254", WINDOWS_1254_TO_UCS_TBL)
- transcode_tblgen_singlebyte("WINDOWS-1255", WINDOWS_1255_TO_UCS_TBL)
- transcode_tblgen_singlebyte("WINDOWS-1256", WINDOWS_1256_TO_UCS_TBL)
- transcode_tblgen_singlebyte("WINDOWS-1257", WINDOWS_1257_TO_UCS_TBL)
+ transcode_tblgen_singlebyte "ISO-8859-1"
+ transcode_tblgen_singlebyte "ISO-8859-2"
+ transcode_tblgen_singlebyte "ISO-8859-3"
+ transcode_tblgen_singlebyte "ISO-8859-4"
+ transcode_tblgen_singlebyte "ISO-8859-5"
+ transcode_tblgen_singlebyte "ISO-8859-6"
+ transcode_tblgen_singlebyte "ISO-8859-7"
+ transcode_tblgen_singlebyte "ISO-8859-8"
+ transcode_tblgen_singlebyte "ISO-8859-9"
+ transcode_tblgen_singlebyte "ISO-8859-10"
+ transcode_tblgen_singlebyte "ISO-8859-11"
+ transcode_tblgen_singlebyte "ISO-8859-13"
+ transcode_tblgen_singlebyte "ISO-8859-14"
+ transcode_tblgen_singlebyte "ISO-8859-15"
+ transcode_tblgen_singlebyte "WINDOWS-874"
+ transcode_tblgen_singlebyte "WINDOWS-1250"
+ transcode_tblgen_singlebyte "WINDOWS-1251"
+ transcode_tblgen_singlebyte "WINDOWS-1252"
+ transcode_tblgen_singlebyte "WINDOWS-1253"
+ transcode_tblgen_singlebyte "WINDOWS-1254"
+ transcode_tblgen_singlebyte "WINDOWS-1255"
+ transcode_tblgen_singlebyte "WINDOWS-1256"
+ transcode_tblgen_singlebyte "WINDOWS-1257"
%>
<%= transcode_generated_code %>