From b014f1bc021702ee2800752aa37bfe7848118f2a Mon Sep 17 00:00:00 2001 From: duerst Date: Sun, 19 Oct 2008 09:15:37 +0000 Subject: * enc/trans/single_byte.trans: adding WINDOWS-wwww encodings (wwww = 874/1250/1251/1253/1254/1255/1256/1257) (contributed by Yoshihiro Kambayashi) * enc/trans/windows-wwww-tbl.rb: 8 new files (contributed by Yoshihiro Kambayashi) * test/ruby/test_transcode.rb: added test_windows_wwww (contributed by Yoshihiro Kambayashi) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19846 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 12 ++ enc/trans/single_byte.trans | 16 +++ enc/trans/windows-1250-tbl.rb | 125 ++++++++++++++++++++ enc/trans/windows-1251-tbl.rb | 129 +++++++++++++++++++++ enc/trans/windows-1253-tbl.rb | 113 ++++++++++++++++++ enc/trans/windows-1254-tbl.rb | 123 ++++++++++++++++++++ enc/trans/windows-1255-tbl.rb | 141 +++++++++++++++++++++++ enc/trans/windows-1256-tbl.rb | 130 +++++++++++++++++++++ enc/trans/windows-1257-tbl.rb | 118 +++++++++++++++++++ enc/trans/windows-874-tbl.rb | 99 ++++++++++++++++ test/ruby/test_transcode.rb | 259 ++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 1265 insertions(+) create mode 100644 enc/trans/windows-1250-tbl.rb create mode 100644 enc/trans/windows-1251-tbl.rb create mode 100644 enc/trans/windows-1253-tbl.rb create mode 100644 enc/trans/windows-1254-tbl.rb create mode 100644 enc/trans/windows-1255-tbl.rb create mode 100644 enc/trans/windows-1256-tbl.rb create mode 100644 enc/trans/windows-1257-tbl.rb create mode 100644 enc/trans/windows-874-tbl.rb diff --git a/ChangeLog b/ChangeLog index d43b824ee9..81f32b04fc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +Sun Oct 19 18:15:15 2008 Martin Duerst + + * enc/trans/single_byte.trans: adding WINDOWS-wwww encodings + (wwww = 874/1250/1251/1253/1254/1255/1256/1257) + (contributed by Yoshihiro Kambayashi) + + * enc/trans/windows-wwww-tbl.rb: 8 new files + (contributed by Yoshihiro Kambayashi) + + * test/ruby/test_transcode.rb: added test_windows_wwww + (contributed by Yoshihiro Kambayashi) + Sun Oct 19 07:37:13 2008 Tadayoshi Funaba * lib/cmath.rb (log2,cbrt,frexp,ldexp,hypot,erf,erfc,gamma,lgamma): diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans index b303fb1e1c..d445c8e130 100644 --- a/enc/trans/single_byte.trans +++ b/enc/trans/single_byte.trans @@ -19,7 +19,15 @@ require 'iso-8859-13-tbl' require 'iso-8859-14-tbl' require 'iso-8859-15-tbl' + require 'windows-874-tbl' + require 'windows-1250-tbl' + require 'windows-1251-tbl' require 'windows-1252-tbl' + require 'windows-1253-tbl' + require 'windows-1254-tbl' + require 'windows-1255-tbl' + require 'windows-1256-tbl' + require 'windows-1257-tbl' transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map @@ -53,7 +61,15 @@ transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) + transcode_tblgen_singlebyte("WINDOWS-874", WINDOWS_874_TO_UCS_TBL) + transcode_tblgen_singlebyte("WINDOWS-1250", WINDOWS_1250_TO_UCS_TBL) + transcode_tblgen_singlebyte("WINDOWS-1251", WINDOWS_1251_TO_UCS_TBL) transcode_tblgen_singlebyte("WINDOWS-1252", WINDOWS_1252_TO_UCS_TBL) + transcode_tblgen_singlebyte("WINDOWS-1253", WINDOWS_1253_TO_UCS_TBL) + transcode_tblgen_singlebyte("WINDOWS-1254", WINDOWS_1254_TO_UCS_TBL) + transcode_tblgen_singlebyte("WINDOWS-1255", WINDOWS_1255_TO_UCS_TBL) + transcode_tblgen_singlebyte("WINDOWS-1256", WINDOWS_1256_TO_UCS_TBL) + transcode_tblgen_singlebyte("WINDOWS-1257", WINDOWS_1257_TO_UCS_TBL) %> <%= transcode_generated_code %> diff --git a/enc/trans/windows-1250-tbl.rb b/enc/trans/windows-1250-tbl.rb new file mode 100644 index 0000000000..52063e17b1 --- /dev/null +++ b/enc/trans/windows-1250-tbl.rb @@ -0,0 +1,125 @@ +WINDOWS_1250_TO_UCS_TBL = [ + ["A0",0xA0], + ["A4",0xA4], + ["A6",0xA6], + ["A7",0xA7], + ["A8",0xA8], + ["A9",0xA9], + ["AB",0xAB], + ["AC",0xAC], + ["AD",0xAD], + ["AE",0xAE], + ["B0",0xB0], + ["B1",0xB1], + ["B4",0xB4], + ["B5",0xB5], + ["B6",0xB6], + ["B7",0xB7], + ["B8",0xB8], + ["BB",0xBB], + ["C1",0xC1], + ["C2",0xC2], + ["C4",0xC4], + ["C7",0xC7], + ["C9",0xC9], + ["CB",0xCB], + ["CD",0xCD], + ["CE",0xCE], + ["D3",0xD3], + ["D4",0xD4], + ["D6",0xD6], + ["D7",0xD7], + ["DA",0xDA], + ["DC",0xDC], + ["DD",0xDD], + ["DF",0xDF], + ["E1",0xE1], + ["E2",0xE2], + ["E4",0xE4], + ["E7",0xE7], + ["E9",0xE9], + ["EB",0xEB], + ["ED",0xED], + ["EE",0xEE], + ["F3",0xF3], + ["F4",0xF4], + ["F6",0xF6], + ["F7",0xF7], + ["FA",0xFA], + ["FC",0xFC], + ["FD",0xFD], + ["C3",0x102], + ["E3",0x103], + ["A5",0x104], + ["B9",0x105], + ["C6",0x106], + ["E6",0x107], + ["C8",0x10C], + ["E8",0x10D], + ["CF",0x10E], + ["EF",0x10F], + ["D0",0x110], + ["F0",0x111], + ["CA",0x118], + ["EA",0x119], + ["CC",0x11A], + ["EC",0x11B], + ["C5",0x139], + ["E5",0x13A], + ["BC",0x13D], + ["BE",0x13E], + ["A3",0x141], + ["B3",0x142], + ["D1",0x143], + ["F1",0x144], + ["D2",0x147], + ["F2",0x148], + ["D5",0x150], + ["F5",0x151], + ["C0",0x154], + ["E0",0x155], + ["D8",0x158], + ["F8",0x159], + ["8C",0x15A], + ["9C",0x15B], + ["AA",0x15E], + ["BA",0x15F], + ["8A",0x160], + ["9A",0x161], + ["DE",0x162], + ["FE",0x163], + ["8D",0x164], + ["9D",0x165], + ["D9",0x16E], + ["F9",0x16F], + ["DB",0x170], + ["FB",0x171], + ["8F",0x179], + ["9F",0x17A], + ["AF",0x17B], + ["BF",0x17C], + ["8E",0x17D], + ["9E",0x17E], + ["A1",0x2C7], + ["A2",0x2D8], + ["FF",0x2D9], + ["B2",0x2DB], + ["BD",0x2DD], + ["96",0x2013], + ["97",0x2014], + ["91",0x2018], + ["92",0x2019], + ["82",0x201A], + ["93",0x201C], + ["94",0x201D], + ["84",0x201E], + ["86",0x2020], + ["87",0x2021], + ["95",0x2022], + ["85",0x2026], + ["89",0x2030], + ["8B",0x2039], + ["9B",0x203A], + ["80",0x20AC], + ["99",0x2122], +] \ No newline at end of file diff --git a/enc/trans/windows-1251-tbl.rb b/enc/trans/windows-1251-tbl.rb new file mode 100644 index 0000000000..870c718b72 --- /dev/null +++ b/enc/trans/windows-1251-tbl.rb @@ -0,0 +1,129 @@ +WINDOWS_1251_TO_UCS_TBL = [ + ["A0",0xA0], + ["A4",0xA4], + ["A6",0xA6], + ["A7",0xA7], + ["A9",0xA9], + ["AB",0xAB], + ["AC",0xAC], + ["AD",0xAD], + ["AE",0xAE], + ["B0",0xB0], + ["B1",0xB1], + ["B5",0xB5], + ["B6",0xB6], + ["B7",0xB7], + ["BB",0xBB], + ["A8",0x401], + ["80",0x402], + ["81",0x403], + ["AA",0x404], + ["BD",0x405], + ["B2",0x406], + ["AF",0x407], + ["A3",0x408], + ["8A",0x409], + ["8C",0x40A], + ["8E",0x40B], + ["8D",0x40C], + ["A1",0x40E], + ["8F",0x40F], + ["C0",0x410], + ["C1",0x411], + ["C2",0x412], + ["C3",0x413], + ["C4",0x414], + ["C5",0x415], + ["C6",0x416], + ["C7",0x417], + ["C8",0x418], + ["C9",0x419], + ["CA",0x41A], + ["CB",0x41B], + ["CC",0x41C], + ["CD",0x41D], + ["CE",0x41E], + ["CF",0x41F], + ["D0",0x420], + ["D1",0x421], + ["D2",0x422], + ["D3",0x423], + ["D4",0x424], + ["D5",0x425], + ["D6",0x426], + ["D7",0x427], + ["D8",0x428], + ["D9",0x429], + ["DA",0x42A], + ["DB",0x42B], + ["DC",0x42C], + ["DD",0x42D], + ["DE",0x42E], + ["DF",0x42F], + ["E0",0x430], + ["E1",0x431], + ["E2",0x432], + ["E3",0x433], + ["E4",0x434], + ["E5",0x435], + ["E6",0x436], + ["E7",0x437], + ["E8",0x438], + ["E9",0x439], + ["EA",0x43A], + ["EB",0x43B], + ["EC",0x43C], + ["ED",0x43D], + ["EE",0x43E], + ["EF",0x43F], + ["F0",0x440], + ["F1",0x441], + ["F2",0x442], + ["F3",0x443], + ["F4",0x444], + ["F5",0x445], + ["F6",0x446], + ["F7",0x447], + ["F8",0x448], + ["F9",0x449], + ["FA",0x44A], + ["FB",0x44B], + ["FC",0x44C], + ["FD",0x44D], + ["FE",0x44E], + ["FF",0x44F], + ["B8",0x451], + ["90",0x452], + ["83",0x453], + ["BA",0x454], + ["BE",0x455], + ["B3",0x456], + ["BF",0x457], + ["BC",0x458], + ["9A",0x459], + ["9C",0x45A], + ["9E",0x45B], + ["9D",0x45C], + ["A2",0x45E], + ["9F",0x45F], + ["A5",0x490], + ["B4",0x491], + ["96",0x2013], + ["97",0x2014], + ["91",0x2018], + ["92",0x2019], + ["82",0x201A], + ["93",0x201C], + ["94",0x201D], + ["84",0x201E], + ["86",0x2020], + ["87",0x2021], + ["95",0x2022], + ["85",0x2026], + ["89",0x2030], + ["8B",0x2039], + ["9B",0x203A], + ["88",0x20AC], + ["B9",0x2116], + ["99",0x2122], +] \ No newline at end of file diff --git a/enc/trans/windows-1253-tbl.rb b/enc/trans/windows-1253-tbl.rb new file mode 100644 index 0000000000..132edb60ba --- /dev/null +++ b/enc/trans/windows-1253-tbl.rb @@ -0,0 +1,113 @@ +WINDOWS_1253_TO_UCS_TBL = [ + ["A0",0xA0], + ["A3",0xA3], + ["A4",0xA4], + ["A5",0xA5], + ["A6",0xA6], + ["A7",0xA7], + ["A8",0xA8], + ["A9",0xA9], + ["AB",0xAB], + ["AC",0xAC], + ["AD",0xAD], + ["AE",0xAE], + ["B0",0xB0], + ["B1",0xB1], + ["B2",0xB2], + ["B3",0xB3], + ["B5",0xB5], + ["B6",0xB6], + ["B7",0xB7], + ["BB",0xBB], + ["BD",0xBD], + ["83",0x192], + ["B4",0x384], + ["A1",0x385], + ["A2",0x386], + ["B8",0x388], + ["B9",0x389], + ["BA",0x38A], + ["BC",0x38C], + ["BE",0x38E], + ["BF",0x38F], + ["C0",0x390], + ["C1",0x391], + ["C2",0x392], + ["C3",0x393], + ["C4",0x394], + ["C5",0x395], + ["C6",0x396], + ["C7",0x397], + ["C8",0x398], + ["C9",0x399], + ["CA",0x39A], + ["CB",0x39B], + ["CC",0x39C], + ["CD",0x39D], + ["CE",0x39E], + ["CF",0x39F], + ["D0",0x3A0], + ["D1",0x3A1], + ["D3",0x3A3], + ["D4",0x3A4], + ["D5",0x3A5], + ["D6",0x3A6], + ["D7",0x3A7], + ["D8",0x3A8], + ["D9",0x3A9], + ["DA",0x3AA], + ["DB",0x3AB], + ["DC",0x3AC], + ["DD",0x3AD], + ["DE",0x3AE], + ["DF",0x3AF], + ["E0",0x3B0], + ["E1",0x3B1], + ["E2",0x3B2], + ["E3",0x3B3], + ["E4",0x3B4], + ["E5",0x3B5], + ["E6",0x3B6], + ["E7",0x3B7], + ["E8",0x3B8], + ["E9",0x3B9], + ["EA",0x3BA], + ["EB",0x3BB], + ["EC",0x3BC], + ["ED",0x3BD], + ["EE",0x3BE], + ["EF",0x3BF], + ["F0",0x3C0], + ["F1",0x3C1], + ["F2",0x3C2], + ["F3",0x3C3], + ["F4",0x3C4], + ["F5",0x3C5], + ["F6",0x3C6], + ["F7",0x3C7], + ["F8",0x3C8], + ["F9",0x3C9], + ["FA",0x3CA], + ["FB",0x3CB], + ["FC",0x3CC], + ["FD",0x3CD], + ["FE",0x3CE], + ["96",0x2013], + ["97",0x2014], + ["AF",0x2015], + ["91",0x2018], + ["92",0x2019], + ["82",0x201A], + ["93",0x201C], + ["94",0x201D], + ["84",0x201E], + ["86",0x2020], + ["87",0x2021], + ["95",0x2022], + ["85",0x2026], + ["89",0x2030], + ["8B",0x2039], + ["9B",0x203A], + ["80",0x20AC], + ["99",0x2122], +] \ No newline at end of file diff --git a/enc/trans/windows-1254-tbl.rb b/enc/trans/windows-1254-tbl.rb new file mode 100644 index 0000000000..81a747afaa --- /dev/null +++ b/enc/trans/windows-1254-tbl.rb @@ -0,0 +1,123 @@ +WINDOWS_1254_TO_UCS_TBL = [ + ["A0",0xA0], + ["A1",0xA1], + ["A2",0xA2], + ["A3",0xA3], + ["A4",0xA4], + ["A5",0xA5], + ["A6",0xA6], + ["A7",0xA7], + ["A8",0xA8], + ["A9",0xA9], + ["AA",0xAA], + ["AB",0xAB], + ["AC",0xAC], + ["AD",0xAD], + ["AE",0xAE], + ["AF",0xAF], + ["B0",0xB0], + ["B1",0xB1], + ["B2",0xB2], + ["B3",0xB3], + ["B4",0xB4], + ["B5",0xB5], + ["B6",0xB6], + ["B7",0xB7], + ["B8",0xB8], + ["B9",0xB9], + ["BA",0xBA], + ["BB",0xBB], + ["BC",0xBC], + ["BD",0xBD], + ["BE",0xBE], + ["BF",0xBF], + ["C0",0xC0], + ["C1",0xC1], + ["C2",0xC2], + ["C3",0xC3], + ["C4",0xC4], + ["C5",0xC5], + ["C6",0xC6], + ["C7",0xC7], + ["C8",0xC8], + ["C9",0xC9], + ["CA",0xCA], + ["CB",0xCB], + ["CC",0xCC], + ["CD",0xCD], + ["CE",0xCE], + ["CF",0xCF], + ["D1",0xD1], + ["D2",0xD2], + ["D3",0xD3], + ["D4",0xD4], + ["D5",0xD5], + ["D6",0xD6], + ["D7",0xD7], + ["D8",0xD8], + ["D9",0xD9], + ["DA",0xDA], + ["DB",0xDB], + ["DC",0xDC], + ["DF",0xDF], + ["E0",0xE0], + ["E1",0xE1], + ["E2",0xE2], + ["E3",0xE3], + ["E4",0xE4], + ["E5",0xE5], + ["E6",0xE6], + ["E7",0xE7], + ["E8",0xE8], + ["E9",0xE9], + ["EA",0xEA], + ["EB",0xEB], + ["EC",0xEC], + ["ED",0xED], + ["EE",0xEE], + ["EF",0xEF], + ["F1",0xF1], + ["F2",0xF2], + ["F3",0xF3], + ["F4",0xF4], + ["F5",0xF5], + ["F6",0xF6], + ["F7",0xF7], + ["F8",0xF8], + ["F9",0xF9], + ["FA",0xFA], + ["FB",0xFB], + ["FC",0xFC], + ["FF",0xFF], + ["D0",0x11E], + ["F0",0x11F], + ["DD",0x130], + ["FD",0x131], + ["8C",0x152], + ["9C",0x153], + ["DE",0x15E], + ["FE",0x15F], + ["8A",0x160], + ["9A",0x161], + ["9F",0x178], + ["83",0x192], + ["88",0x2C6], + ["98",0x2DC], + ["96",0x2013], + ["97",0x2014], + ["91",0x2018], + ["92",0x2019], + ["82",0x201A], + ["93",0x201C], + ["94",0x201D], + ["84",0x201E], + ["86",0x2020], + ["87",0x2021], + ["95",0x2022], + ["85",0x2026], + ["89",0x2030], + ["8B",0x2039], + ["9B",0x203A], + ["80",0x20AC], + ["99",0x2122], +] \ No newline at end of file diff --git a/enc/trans/windows-1255-tbl.rb b/enc/trans/windows-1255-tbl.rb new file mode 100644 index 0000000000..9084a56a10 --- /dev/null +++ b/enc/trans/windows-1255-tbl.rb @@ -0,0 +1,141 @@ +WINDOWS_1255_TO_UCS_TBL = [ + ["A0",0xA0], + ["A1",0xA1], + ["A2",0xA2], + ["A3",0xA3], + ["A5",0xA5], + ["A6",0xA6], + ["A7",0xA7], + ["A8",0xA8], + ["A9",0xA9], + ["AB",0xAB], + ["AC",0xAC], + ["AD",0xAD], + ["AE",0xAE], + ["AF",0xAF], + ["B0",0xB0], + ["B1",0xB1], + ["B2",0xB2], + ["B3",0xB3], + ["B4",0xB4], + ["B5",0xB5], + ["B6",0xB6], + ["B7",0xB7], + ["B8",0xB8], + ["B9",0xB9], + ["BB",0xBB], + ["BC",0xBC], + ["BD",0xBD], + ["BE",0xBE], + ["BF",0xBF], + ["AA",0xD7], + ["BA",0xF7], + ["83",0x192], + ["88",0x2C6], + ["98",0x2DC], + ["C0",0x5B0], + ["C1",0x5B1], + ["C2",0x5B2], + ["C3",0x5B3], + ["C4",0x5B4], + ["C5",0x5B5], + ["C6",0x5B6], + ["C7",0x5B7], + ["C8",0x5B8], + ["C9",0x5B9], + ["CB",0x5BB], + ["CC",0x5BC], + ["CD",0x5BD], + ["CE",0x5BE], + ["CF",0x5BF], + ["D0",0x5C0], + ["D1",0x5C1], + ["D2",0x5C2], + ["D3",0x5C3], + ["E0",0x5D0], + ["E1",0x5D1], + ["E2",0x5D2], + ["E3",0x5D3], + ["E4",0x5D4], + ["E5",0x5D5], + ["E6",0x5D6], + ["E7",0x5D7], + ["E8",0x5D8], + ["E9",0x5D9], + ["EA",0x5DA], + ["EB",0x5DB], + ["EC",0x5DC], + ["ED",0x5DD], + ["EE",0x5DE], + ["EF",0x5DF], + ["F0",0x5E0], + ["F1",0x5E1], + ["F2",0x5E2], + ["F3",0x5E3], + ["F4",0x5E4], + ["F5",0x5E5], + ["F6",0x5E6], + ["F7",0x5E7], + ["F8",0x5E8], + ["F9",0x5E9], + ["FA",0x5EA], + ["D4",0x5F0], + ["D5",0x5F1], + ["D6",0x5F2], + ["D7",0x5F3], + ["D8",0x5F4], + ["FD",0x200E], + ["FE",0x200F], + ["96",0x2013], + ["97",0x2014], + ["91",0x2018], + ["92",0x2019], + ["82",0x201A], + ["93",0x201C], + ["94",0x201D], + ["84",0x201E], + ["86",0x2020], + ["87",0x2021], + ["95",0x2022], + ["85",0x2026], + ["89",0x2030], + ["8B",0x2039], + ["9B",0x203A], + ["A4",0x20AA], + ["80",0x20AC], + ["99",0x2122], + ["E9C4",0xFB1D], + ["D6C7",0xFB1F], + ["F9D1",0xFB2A], + ["F9D2",0xFB2B], + ["F9CCD1",0xFB2C], + ["F9CCD2",0xFB2D], + ["E0C7",0xFB2E], + ["E0C8",0xFB2F], + ["E0CC",0xFB30], + ["E1CC",0xFB31], + ["E2CC",0xFB32], + ["E3CC",0xFB33], + ["E4CC",0xFB34], + ["E5CC",0xFB35], + ["E6CC",0xFB36], + ["E8CC",0xFB38], + ["E9CC",0xFB39], + ["EACC",0xFB3A], + ["EBCC",0xFB3B], + ["ECCC",0xFB3C], + ["EECC",0xFB3E], + ["F0CC",0xFB40], + ["F1CC",0xFB41], + ["F3CC",0xFB43], + ["F4CC",0xFB44], + ["F6CC",0xFB46], + ["F7CC",0xFB47], + ["F8CC",0xFB48], + ["F9CC",0xFB49], + ["FACC",0xFB4A], + ["E5C9",0xFB4B], + ["E1CF",0xFB4C], + ["EBCF",0xFB4D], + ["F4CF",0xFB4E], +] \ No newline at end of file diff --git a/enc/trans/windows-1256-tbl.rb b/enc/trans/windows-1256-tbl.rb new file mode 100644 index 0000000000..25c5874fb0 --- /dev/null +++ b/enc/trans/windows-1256-tbl.rb @@ -0,0 +1,130 @@ +WINDOWS_1256_TO_UCS_TBL = [ + ["A0",0xA0], + ["A2",0xA2], + ["A3",0xA3], + ["A4",0xA4], + ["A5",0xA5], + ["A6",0xA6], + ["A7",0xA7], + ["A8",0xA8], + ["A9",0xA9], + ["AB",0xAB], + ["AC",0xAC], + ["AD",0xAD], + ["AE",0xAE], + ["AF",0xAF], + ["B0",0xB0], + ["B1",0xB1], + ["B2",0xB2], + ["B3",0xB3], + ["B4",0xB4], + ["B5",0xB5], + ["B6",0xB6], + ["B7",0xB7], + ["B8",0xB8], + ["B9",0xB9], + ["BB",0xBB], + ["BC",0xBC], + ["BD",0xBD], + ["BE",0xBE], + ["D7",0xD7], + ["E0",0xE0], + ["E2",0xE2], + ["E7",0xE7], + ["E8",0xE8], + ["E9",0xE9], + ["EA",0xEA], + ["EB",0xEB], + ["EE",0xEE], + ["EF",0xEF], + ["F4",0xF4], + ["F7",0xF7], + ["F9",0xF9], + ["FB",0xFB], + ["FC",0xFC], + ["8C",0x152], + ["9C",0x153], + ["83",0x192], + ["88",0x2C6], + ["A1",0x60C], + ["BA",0x61B], + ["BF",0x61F], + ["C1",0x621], + ["C2",0x622], + ["C3",0x623], + ["C4",0x624], + ["C5",0x625], + ["C6",0x626], + ["C7",0x627], + ["C8",0x628], + ["C9",0x629], + ["CA",0x62A], + ["CB",0x62B], + ["CC",0x62C], + ["CD",0x62D], + ["CE",0x62E], + ["CF",0x62F], + ["D0",0x630], + ["D1",0x631], + ["D2",0x632], + ["D3",0x633], + ["D4",0x634], + ["D5",0x635], + ["D6",0x636], + ["D8",0x637], + ["D9",0x638], + ["DA",0x639], + ["DB",0x63A], + ["DC",0x640], + ["DD",0x641], + ["DE",0x642], + ["DF",0x643], + ["E1",0x644], + ["E3",0x645], + ["E4",0x646], + ["E5",0x647], + ["E6",0x648], + ["EC",0x649], + ["ED",0x64A], + ["F0",0x64B], + ["F1",0x64C], + ["F2",0x64D], + ["F3",0x64E], + ["F5",0x64F], + ["F6",0x650], + ["F8",0x651], + ["FA",0x652], + ["8A",0x679], + ["81",0x67E], + ["8D",0x686], + ["8F",0x688], + ["9A",0x691], + ["8E",0x698], + ["98",0x6A9], + ["90",0x6AF], + ["9F",0x6BA], + ["AA",0x6BE], + ["C0",0x6C1], + ["FF",0x6D2], + ["9D",0x200C], + ["9E",0x200D], + ["FD",0x200E], + ["FE",0x200F], + ["96",0x2013], + ["97",0x2014], + ["91",0x2018], + ["92",0x2019], + ["82",0x201A], + ["93",0x201C], + ["94",0x201D], + ["84",0x201E], + ["86",0x2020], + ["87",0x2021], + ["95",0x2022], + ["85",0x2026], + ["89",0x2030], + ["8B",0x2039], + ["9B",0x203A], + ["80",0x20AC], + ["99",0x2122], +] \ No newline at end of file diff --git a/enc/trans/windows-1257-tbl.rb b/enc/trans/windows-1257-tbl.rb new file mode 100644 index 0000000000..9e89b2b0b5 --- /dev/null +++ b/enc/trans/windows-1257-tbl.rb @@ -0,0 +1,118 @@ +WINDOWS_1257_TO_UCS_TBL = [ + ["A0",0xA0], + ["A2",0xA2], + ["A3",0xA3], + ["A4",0xA4], + ["A6",0xA6], + ["A7",0xA7], + ["8D",0xA8], + ["A9",0xA9], + ["AB",0xAB], + ["AC",0xAC], + ["AD",0xAD], + ["AE",0xAE], + ["9D",0xAF], + ["B0",0xB0], + ["B1",0xB1], + ["B2",0xB2], + ["B3",0xB3], + ["B4",0xB4], + ["B5",0xB5], + ["B6",0xB6], + ["B7",0xB7], + ["8F",0xB8], + ["B9",0xB9], + ["BB",0xBB], + ["BC",0xBC], + ["BD",0xBD], + ["BE",0xBE], + ["C4",0xC4], + ["C5",0xC5], + ["AF",0xC6], + ["C9",0xC9], + ["D3",0xD3], + ["D5",0xD5], + ["D6",0xD6], + ["D7",0xD7], + ["A8",0xD8], + ["DC",0xDC], + ["DF",0xDF], + ["E4",0xE4], + ["E5",0xE5], + ["BF",0xE6], + ["E9",0xE9], + ["F3",0xF3], + ["F5",0xF5], + ["F6",0xF6], + ["F7",0xF7], + ["B8",0xF8], + ["FC",0xFC], + ["C2",0x100], + ["E2",0x101], + ["C0",0x104], + ["E0",0x105], + ["C3",0x106], + ["E3",0x107], + ["C8",0x10C], + ["E8",0x10D], + ["C7",0x112], + ["E7",0x113], + ["CB",0x116], + ["EB",0x117], + ["C6",0x118], + ["E6",0x119], + ["CC",0x122], + ["EC",0x123], + ["CE",0x12A], + ["EE",0x12B], + ["C1",0x12E], + ["E1",0x12F], + ["CD",0x136], + ["ED",0x137], + ["CF",0x13B], + ["EF",0x13C], + ["D9",0x141], + ["F9",0x142], + ["D1",0x143], + ["F1",0x144], + ["D2",0x145], + ["F2",0x146], + ["D4",0x14C], + ["F4",0x14D], + ["AA",0x156], + ["BA",0x157], + ["DA",0x15A], + ["FA",0x15B], + ["D0",0x160], + ["F0",0x161], + ["DB",0x16A], + ["FB",0x16B], + ["D8",0x172], + ["F8",0x173], + ["CA",0x179], + ["EA",0x17A], + ["DD",0x17B], + ["FD",0x17C], + ["DE",0x17D], + ["FE",0x17E], + ["8E",0x2C7], + ["FF",0x2D9], + ["9E",0x2DB], + ["96",0x2013], + ["97",0x2014], + ["91",0x2018], + ["92",0x2019], + ["82",0x201A], + ["93",0x201C], + ["94",0x201D], + ["84",0x201E], + ["86",0x2020], + ["87",0x2021], + ["95",0x2022], + ["85",0x2026], + ["89",0x2030], + ["8B",0x2039], + ["9B",0x203A], + ["80",0x20AC], + ["99",0x2122], +] \ No newline at end of file diff --git a/enc/trans/windows-874-tbl.rb b/enc/trans/windows-874-tbl.rb new file mode 100644 index 0000000000..0552df3d28 --- /dev/null +++ b/enc/trans/windows-874-tbl.rb @@ -0,0 +1,99 @@ +WINDOWS_874_TO_UCS_TBL = [ + ["A0",0xA0], + ["A1",0xE01], + ["A2",0xE02], + ["A3",0xE03], + ["A4",0xE04], + ["A5",0xE05], + ["A6",0xE06], + ["A7",0xE07], + ["A8",0xE08], + ["A9",0xE09], + ["AA",0xE0A], + ["AB",0xE0B], + ["AC",0xE0C], + ["AD",0xE0D], + ["AE",0xE0E], + ["AF",0xE0F], + ["B0",0xE10], + ["B1",0xE11], + ["B2",0xE12], + ["B3",0xE13], + ["B4",0xE14], + ["B5",0xE15], + ["B6",0xE16], + ["B7",0xE17], + ["B8",0xE18], + ["B9",0xE19], + ["BA",0xE1A], + ["BB",0xE1B], + ["BC",0xE1C], + ["BD",0xE1D], + ["BE",0xE1E], + ["BF",0xE1F], + ["C0",0xE20], + ["C1",0xE21], + ["C2",0xE22], + ["C3",0xE23], + ["C4",0xE24], + ["C5",0xE25], + ["C6",0xE26], + ["C7",0xE27], + ["C8",0xE28], + ["C9",0xE29], + ["CA",0xE2A], + ["CB",0xE2B], + ["CC",0xE2C], + ["CD",0xE2D], + ["CE",0xE2E], + ["CF",0xE2F], + ["D0",0xE30], + ["D1",0xE31], + ["D2",0xE32], + ["D3",0xE33], + ["D4",0xE34], + ["D5",0xE35], + ["D6",0xE36], + ["D7",0xE37], + ["D8",0xE38], + ["D9",0xE39], + ["DA",0xE3A], + ["DF",0xE3F], + ["E0",0xE40], + ["E1",0xE41], + ["E2",0xE42], + ["E3",0xE43], + ["E4",0xE44], + ["E5",0xE45], + ["E6",0xE46], + ["E7",0xE47], + ["E8",0xE48], + ["E9",0xE49], + ["EA",0xE4A], + ["EB",0xE4B], + ["EC",0xE4C], + ["ED",0xE4D], + ["EE",0xE4E], + ["EF",0xE4F], + ["F0",0xE50], + ["F1",0xE51], + ["F2",0xE52], + ["F3",0xE53], + ["F4",0xE54], + ["F5",0xE55], + ["F6",0xE56], + ["F7",0xE57], + ["F8",0xE58], + ["F9",0xE59], + ["FA",0xE5A], + ["FB",0xE5B], + ["96",0x2013], + ["97",0x2014], + ["91",0x2018], + ["92",0x2019], + ["93",0x201C], + ["94",0x201D], + ["95",0x2022], + ["85",0x2026], + ["80",0x20AC], +] \ No newline at end of file diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index d46a9ce322..5280bd41dc 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -131,6 +131,87 @@ class TestTranscode < Test::Unit::TestCase end end + def test_windows_874 + check_both_ways("\u20AC", "\x80", 'windows-874') # € + assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-874') } + assert_raise(Encoding::UndefinedConversionError) { "\x84".encode("utf-8", 'windows-874') } + check_both_ways("\u2026", "\x85", 'windows-874') # … + assert_raise(Encoding::UndefinedConversionError) { "\x86".encode("utf-8", 'windows-874') } + assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-874') } + assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-874') } + check_both_ways("\u2018", "\x91", 'windows-874') # ‘ + check_both_ways("\u2014", "\x97", 'windows-874') # — + assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-874') } + assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-874') } + check_both_ways("\u00A0", "\xA0", 'windows-874') # non-breaking space + check_both_ways("\u0E0F", "\xAF", 'windows-874') # ฏ + check_both_ways("\u0E10", "\xB0", 'windows-874') # ฐ + check_both_ways("\u0E1F", "\xBF", 'windows-874') # ฟ + check_both_ways("\u0E20", "\xC0", 'windows-874') # ภ + check_both_ways("\u0E2F", "\xCF", 'windows-874') # ฯ + check_both_ways("\u0E30", "\xD0", 'windows-874') # ะ + check_both_ways("\u0E3A", "\xDA", 'windows-874') # ฺ + assert_raise(Encoding::UndefinedConversionError) { "\xDB".encode("utf-8", 'windows-874') } + assert_raise(Encoding::UndefinedConversionError) { "\xDE".encode("utf-8", 'windows-874') } + check_both_ways("\u0E3F", "\xDF", 'windows-874') # ฿ + check_both_ways("\u0E40", "\xE0", 'windows-874') # เ + check_both_ways("\u0E4F", "\xEF", 'windows-874') # ๏ + check_both_ways("\u0E50", "\xF0", 'windows-874') # ๐ + check_both_ways("\u0E5B", "\xFB", 'windows-874') # ๛ + assert_raise(Encoding::UndefinedConversionError) { "\xFC".encode("utf-8", 'windows-874') } + assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'windows-874') } + end + + def test_windows_1250 + check_both_ways("\u20AC", "\x80", 'windows-1250') # € + assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1250') } + check_both_ways("\u201A", "\x82", 'windows-1250') # ‚ + assert_raise(Encoding::UndefinedConversionError) { "\x83".encode("utf-8", 'windows-1250') } + check_both_ways("\u201E", "\x84", 'windows-1250') # „ + check_both_ways("\u2021", "\x87", 'windows-1250') # ‡ + assert_raise(Encoding::UndefinedConversionError) { "\x88".encode("utf-8", 'windows-1250') } + check_both_ways("\u2030", "\x89", 'windows-1250') # ‰ + check_both_ways("\u0179", "\x8F", 'windows-1250') # Ź + assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1250') } + check_both_ways("\u2018", "\x91", 'windows-1250') # ‘ + check_both_ways("\u2014", "\x97", 'windows-1250') # — + assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1250') } + check_both_ways("\u2122", "\x99", 'windows-1250') # ™ + check_both_ways("\u00A0", "\xA0", 'windows-1250') # non-breaking space + check_both_ways("\u017B", "\xAF", 'windows-1250') # Ż + check_both_ways("\u00B0", "\xB0", 'windows-1250') # ° + check_both_ways("\u017C", "\xBF", 'windows-1250') # ż + check_both_ways("\u0154", "\xC0", 'windows-1250') # Ŕ + check_both_ways("\u010E", "\xCF", 'windows-1250') # Ď + check_both_ways("\u0110", "\xD0", 'windows-1250') # Đ + check_both_ways("\u00DF", "\xDF", 'windows-1250') # ß + check_both_ways("\u0155", "\xE0", 'windows-1250') # ŕ + check_both_ways("\u010F", "\xEF", 'windows-1250') # ď + check_both_ways("\u0111", "\xF0", 'windows-1250') # đ + check_both_ways("\u02D9", "\xFF", 'windows-1250') # ˙ + end + + def test_windows_1251 + check_both_ways("\u0402", "\x80", 'windows-1251') # Ђ + check_both_ways("\u20AC", "\x88", 'windows-1251') # € + check_both_ways("\u040F", "\x8F", 'windows-1251') # Џ + check_both_ways("\u0452", "\x90", 'windows-1251') # ђ + assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1251') } + check_both_ways("\u045F", "\x9F", 'windows-1251') # џ + check_both_ways("\u00A0", "\xA0", 'windows-1251') # non-breaking space + check_both_ways("\u0407", "\xAF", 'windows-1251') # Ї + check_both_ways("\u00B0", "\xB0", 'windows-1251') # ° + check_both_ways("\u0457", "\xBF", 'windows-1251') # ї + check_both_ways("\u0410", "\xC0", 'windows-1251') # А + check_both_ways("\u041F", "\xCF", 'windows-1251') # П + check_both_ways("\u0420", "\xD0", 'windows-1251') # Р + check_both_ways("\u042F", "\xDF", 'windows-1251') # Я + check_both_ways("\u0430", "\xE0", 'windows-1251') # а + check_both_ways("\u043F", "\xEF", 'windows-1251') # п + check_both_ways("\u0440", "\xF0", 'windows-1251') # р + check_both_ways("\u044F", "\xFF", 'windows-1251') # я + end + def test_windows_1252 check_both_ways("\u20AC", "\x80", 'windows-1252') # € assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1252') } @@ -158,6 +239,184 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u00FF", "\xFF", 'windows-1252') # ÿ end + def test_windows_1253 + check_both_ways("\u20AC", "\x80", 'windows-1253') # € + assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1253') } + check_both_ways("\u201A", "\x82", 'windows-1253') # ‚ + check_both_ways("\u2021", "\x87", 'windows-1253') # ‡ + assert_raise(Encoding::UndefinedConversionError) { "\x88".encode("utf-8", 'windows-1253') } + check_both_ways("\u2030", "\x89", 'windows-1253') # ‰ + assert_raise(Encoding::UndefinedConversionError) { "\x8A".encode("utf-8", 'windows-1253') } + check_both_ways("\u2039", "\x8B", 'windows-1253') # ‹ + assert_raise(Encoding::UndefinedConversionError) { "\x8C".encode("utf-8", 'windows-1253') } + assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1253') } + assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1253') } + check_both_ways("\u2018", "\x91", 'windows-1253') # ‘ + check_both_ways("\u2014", "\x97", 'windows-1253') # — + assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1253') } + check_both_ways("\u2122", "\x99", 'windows-1253') # ™ + assert_raise(Encoding::UndefinedConversionError) { "\x9A".encode("utf-8", 'windows-1253') } + check_both_ways("\u203A", "\x9B", 'windows-1253') # › + assert_raise(Encoding::UndefinedConversionError) { "\x9C".encode("utf-8", 'windows-1253') } + assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-1253') } + check_both_ways("\u00A0", "\xA0", 'windows-1253') # non-breaking space + check_both_ways("\u2015", "\xAF", 'windows-1253') # ― + check_both_ways("\u00B0", "\xB0", 'windows-1253') # ° + check_both_ways("\u038F", "\xBF", 'windows-1253') # Ώ + check_both_ways("\u0390", "\xC0", 'windows-1253') # ΐ + check_both_ways("\u039F", "\xCF", 'windows-1253') # Ο + check_both_ways("\u03A0", "\xD0", 'windows-1253') # Π + check_both_ways("\u03A1", "\xD1", 'windows-1253') # Ρ + assert_raise(Encoding::UndefinedConversionError) { "\xD2".encode("utf-8", 'windows-1253') } + check_both_ways("\u03A3", "\xD3", 'windows-1253') # Σ + check_both_ways("\u03AF", "\xDF", 'windows-1253') # ί + check_both_ways("\u03B0", "\xE0", 'windows-1253') # ΰ + check_both_ways("\u03BF", "\xEF", 'windows-1253') # ο + check_both_ways("\u03C0", "\xF0", 'windows-1253') # π + check_both_ways("\u03CE", "\xFE", 'windows-1253') # ώ + assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'windows-1253') } + end + + def test_windows_1254 + check_both_ways("\u20AC", "\x80", 'windows-1254') # € + assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1254') } + check_both_ways("\u201A", "\x82", 'windows-1254') # ‚ + check_both_ways("\u0152", "\x8C", 'windows-1254') # Œ + assert_raise(Encoding::UndefinedConversionError) { "\x8D".encode("utf-8", 'windows-1254') } + assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1254') } + assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1254') } + check_both_ways("\u2018", "\x91", 'windows-1254') # ‘ + check_both_ways("\u0153", "\x9C", 'windows-1254') # œ + assert_raise(Encoding::UndefinedConversionError) { "\x9D".encode("utf-8", 'windows-1254') } + assert_raise(Encoding::UndefinedConversionError) { "\x9E".encode("utf-8", 'windows-1254') } + check_both_ways("\u0178", "\x9F", 'windows-1254') # Ÿ + check_both_ways("\u00A0", "\xA0", 'windows-1254') # non-breaking space + check_both_ways("\u00AF", "\xAF", 'windows-1254') # ¯ + check_both_ways("\u00B0", "\xB0", 'windows-1254') # ° + check_both_ways("\u00BF", "\xBF", 'windows-1254') # ¿ + check_both_ways("\u00C0", "\xC0", 'windows-1254') # À + check_both_ways("\u00CF", "\xCF", 'windows-1254') # Ï + check_both_ways("\u011E", "\xD0", 'windows-1254') # Ğ + check_both_ways("\u00DF", "\xDF", 'windows-1254') # ß + check_both_ways("\u00E0", "\xE0", 'windows-1254') # à + check_both_ways("\u00EF", "\xEF", 'windows-1254') # ï + check_both_ways("\u011F", "\xF0", 'windows-1254') # ğ + check_both_ways("\u00FF", "\xFF", 'windows-1254') # ÿ + end + + def test_windows_1255 + check_both_ways("\u20AC", "\x80", 'windows-1255') # € + assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1255') } + check_both_ways("\u201A", "\x82", 'windows-1255') # ‚ + check_both_ways("\u2030", "\x89", 'windows-1255') # ‰ + assert_raise(Encoding::UndefinedConversionError) { "\x8A".encode("utf-8", 'windows-1255') } + check_both_ways("\u2039", "\x8B", 'windows-1255') # ‹ + assert_raise(Encoding::UndefinedConversionError) { "\x8C".encode("utf-8", 'windows-1255') } + assert_raise(Encoding::UndefinedConversionError) { "\x8F".encode("utf-8", 'windows-1255') } + assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1255') } + check_both_ways("\u2018", "\x91", 'windows-1255') # ‘ + check_both_ways("\u2122", "\x99", 'windows-1255') # ™ + assert_raise(Encoding::UndefinedConversionError) { "\x9A".encode("utf-8", 'windows-1255') } + check_both_ways("\u203A", "\x9B", 'windows-1255') # › + assert_raise(Encoding::UndefinedConversionError) { "\x9C".encode("utf-8", 'windows-1255') } + assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-1255') } + check_both_ways("\u00A0", "\xA0", 'windows-1255') # non-breaking space + check_both_ways("\u00A1", "\xA1", 'windows-1255') # ¡ + check_both_ways("\u00D7", "\xAA", 'windows-1255') # × + check_both_ways("\u00AF", "\xAF", 'windows-1255') # ¯ + check_both_ways("\u00B0", "\xB0", 'windows-1255') # ° + check_both_ways("\u00B8", "\xB8", 'windows-1255') # ¸ + check_both_ways("\u00F7", "\xBA", 'windows-1255') # ÷ + check_both_ways("\u00BF", "\xBF", 'windows-1255') # ¿ + check_both_ways("\u05B0", "\xC0", 'windows-1255') # ְ + check_both_ways("\u05B9", "\xC9", 'windows-1255') # ֹ + assert_raise(Encoding::UndefinedConversionError) { "\xCA".encode("utf-8", 'windows-1255') } + check_both_ways("\u05BB", "\xCB", 'windows-1255') # ֻ + check_both_ways("\u05BF", "\xCF", 'windows-1255') # ֿ + check_both_ways("\u05C0", "\xD0", 'windows-1255') # ׀ + check_both_ways("\u05F3", "\xD7", 'windows-1255') # ׳ + check_both_ways("\u05F4", "\xD8", 'windows-1255') # ״ + assert_raise(Encoding::UndefinedConversionError) { "\xD9".encode("utf-8", 'windows-1255') } + assert_raise(Encoding::UndefinedConversionError) { "\xDF".encode("utf-8", 'windows-1255') } + check_both_ways("\u05D0", "\xE0", 'windows-1255') # א + check_both_ways("\u05DF", "\xEF", 'windows-1255') # ן + check_both_ways("\u05E0", "\xF0", 'windows-1255') # נ + check_both_ways("\u05EA", "\xFA", 'windows-1255') # ת + assert_raise(Encoding::UndefinedConversionError) { "\xFB".encode("utf-8", 'windows-1255') } + assert_raise(Encoding::UndefinedConversionError) { "\xFC".encode("utf-8", 'windows-1255') } + check_both_ways("\u200E", "\xFD", 'windows-1255') # left-to-right mark + check_both_ways("\u200F", "\xFE", 'windows-1255') # right-to-left mark + assert_raise(Encoding::UndefinedConversionError) { "\xFF".encode("utf-8", 'windows-1255') } + end + + def test_windows_1256 + check_both_ways("\u20AC", "\x80", 'windows-1256') # € + check_both_ways("\u0679", "\x8A", 'windows-1256') # ٹ + check_both_ways("\u0688", "\x8F", 'windows-1256') # ڈ + check_both_ways("\u06AF", "\x90", 'windows-1256') # گ + check_both_ways("\u06A9", "\x98", 'windows-1256') # ک + check_both_ways("\u0691", "\x9A", 'windows-1256') # ڑ + check_both_ways("\u06BA", "\x9F", 'windows-1256') # ں + check_both_ways("\u00A0", "\xA0", 'windows-1256') # non-breaking space + check_both_ways("\u06BE", "\xAA", 'windows-1256') # ھ + check_both_ways("\u00AF", "\xAF", 'windows-1256') # ¯ + check_both_ways("\u00B0", "\xB0", 'windows-1256') # ° + check_both_ways("\u061F", "\xBF", 'windows-1256') # ؟ + check_both_ways("\u06C1", "\xC0", 'windows-1256') # ہ + check_both_ways("\u062F", "\xCF", 'windows-1256') # د + check_both_ways("\u0630", "\xD0", 'windows-1256') # ذ + check_both_ways("\u0643", "\xDF", 'windows-1256') # ك + check_both_ways("\u00E0", "\xE0", 'windows-1256') # à + check_both_ways("\u00EF", "\xEF", 'windows-1256') # ï + check_both_ways("\u064B", "\xF0", 'windows-1256') # ًً + check_both_ways("\u06D2", "\xFF", 'windows-1256') # ے + end + + def test_windows_1257 + check_both_ways("\u20AC", "\x80", 'windows-1257') # € + assert_raise(Encoding::UndefinedConversionError) { "\x81".encode("utf-8", 'windows-1257') } + check_both_ways("\u201A", "\x82", 'windows-1257') # ‚ + assert_raise(Encoding::UndefinedConversionError) { "\x83".encode("utf-8", 'windows-1257') } + check_both_ways("\u201E", "\x84", 'windows-1257') # „ + check_both_ways("\u2021", "\x87", 'windows-1257') # ‡ + assert_raise(Encoding::UndefinedConversionError) { "\x88".encode("utf-8", 'windows-1257') } + check_both_ways("\u2030", "\x89", 'windows-1257') # ‰ + assert_raise(Encoding::UndefinedConversionError) { "\x8A".encode("utf-8", 'windows-1257') } + check_both_ways("\u2039", "\x8B", 'windows-1257') # ‹ + assert_raise(Encoding::UndefinedConversionError) { "\x8C".encode("utf-8", 'windows-1257') } + check_both_ways("\u00A8", "\x8D", 'windows-1257') # ¨ + check_both_ways("\u02C7", "\x8E", 'windows-1257') # ˇ + check_both_ways("\u00B8", "\x8F", 'windows-1257') # ¸ + assert_raise(Encoding::UndefinedConversionError) { "\x90".encode("utf-8", 'windows-1257') } + check_both_ways("\u2018", "\x91", 'windows-1257') # ‘ + check_both_ways("\u2014", "\x97", 'windows-1257') # — + assert_raise(Encoding::UndefinedConversionError) { "\x98".encode("utf-8", 'windows-1257') } + check_both_ways("\u2122", "\x99", 'windows-1257') # ™ + assert_raise(Encoding::UndefinedConversionError) { "\x9A".encode("utf-8", 'windows-1257') } + check_both_ways("\u203A", "\x9B", 'windows-1257') # › + assert_raise(Encoding::UndefinedConversionError) { "\x9C".encode("utf-8", 'windows-1257') } + check_both_ways("\u00AF", "\x9D", 'windows-1257') # ¯ + check_both_ways("\u02DB", "\x9E", 'windows-1257') # ˛ + assert_raise(Encoding::UndefinedConversionError) { "\x9F".encode("utf-8", 'windows-1257') } + check_both_ways("\u00A0", "\xA0", 'windows-1257') # non-breaking space + assert_raise(Encoding::UndefinedConversionError) { "\xA1".encode("utf-8", 'windows-1257') } + check_both_ways("\u00A2", "\xA2", 'windows-1257') # ¢ + check_both_ways("\u00A4", "\xA4", 'windows-1257') # ¤ + assert_raise(Encoding::UndefinedConversionError) { "\xA5".encode("utf-8", 'windows-1257') } + check_both_ways("\u00A6", "\xA6", 'windows-1257') # ¦ + check_both_ways("\u00C6", "\xAF", 'windows-1257') # Æ + check_both_ways("\u00B0", "\xB0", 'windows-1257') # ° + check_both_ways("\u00E6", "\xBF", 'windows-1257') # æ + check_both_ways("\u0104", "\xC0", 'windows-1257') # Ą + check_both_ways("\u013B", "\xCF", 'windows-1257') # Ļ + check_both_ways("\u0160", "\xD0", 'windows-1257') # Š + check_both_ways("\u00DF", "\xDF", 'windows-1257') # ß + check_both_ways("\u0105", "\xE0", 'windows-1257') # ą + check_both_ways("\u013C", "\xEF", 'windows-1257') # ļ + check_both_ways("\u0161", "\xF0", 'windows-1257') # š + check_both_ways("\u02D9", "\xFF", 'windows-1257') # ˙ + end + def check_utf_16_both_ways(utf8, raw) copy = raw.dup 0.step(copy.length-1, 2) { |i| copy[i+1], copy[i] = copy[i], copy[i+1] } -- cgit v1.2.1