diff options
-rw-r--r-- | mysql-test/r/ctype_latin1.result | 298 | ||||
-rw-r--r-- | mysql-test/t/ctype_latin1.test | 55 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 340 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 8 | ||||
-rw-r--r-- | strings/dump_map.c | 10 |
5 files changed, 638 insertions, 73 deletions
diff --git a/mysql-test/r/ctype_latin1.result b/mysql-test/r/ctype_latin1.result new file mode 100644 index 00000000000..a8182438ac4 --- /dev/null +++ b/mysql-test/r/ctype_latin1.result @@ -0,0 +1,298 @@ +drop table if exists t1; +SET NAMES latin1; +CREATE TABLE t1 (a char(1) character set latin1); +INSERT INTO t1 VALUES (0x00),(0x01),(0x02),(0x03),(0x04),(0x05),(0x06),(0x07); +INSERT INTO t1 VALUES (0x08),(0x09),(0x0A),(0x0B),(0x0C),(0x0D),(0x0E),(0x0F); +INSERT INTO t1 VALUES (0x10),(0x11),(0x12),(0x13),(0x14),(0x15),(0x16),(0x17); +INSERT INTO t1 VALUES (0x18),(0x19),(0x1A),(0x1B),(0x1C),(0x1D),(0x1E),(0x1F); +INSERT INTO t1 VALUES (0x20),(0x21),(0x22),(0x23),(0x24),(0x25),(0x26),(0x27); +INSERT INTO t1 VALUES (0x28),(0x29),(0x2A),(0x2B),(0x2C),(0x2D),(0x2E),(0x2F); +INSERT INTO t1 VALUES (0x30),(0x31),(0x32),(0x33),(0x34),(0x35),(0x36),(0x37); +INSERT INTO t1 VALUES (0x38),(0x39),(0x3A),(0x3B),(0x3C),(0x3D),(0x3E),(0x3F); +INSERT INTO t1 VALUES (0x40),(0x41),(0x42),(0x43),(0x44),(0x45),(0x46),(0x47); +INSERT INTO t1 VALUES (0x48),(0x49),(0x4A),(0x4B),(0x4C),(0x4D),(0x4E),(0x4F); +INSERT INTO t1 VALUES (0x50),(0x51),(0x52),(0x53),(0x54),(0x55),(0x56),(0x57); +INSERT INTO t1 VALUES (0x58),(0x59),(0x5A),(0x5B),(0x5C),(0x5D),(0x5E),(0x5F); +INSERT INTO t1 VALUES (0x60),(0x61),(0x62),(0x63),(0x64),(0x65),(0x66),(0x67); +INSERT INTO t1 VALUES (0x68),(0x69),(0x6A),(0x6B),(0x6C),(0x6D),(0x6E),(0x6F); +INSERT INTO t1 VALUES (0x70),(0x71),(0x72),(0x73),(0x74),(0x75),(0x76),(0x77); +INSERT INTO t1 VALUES (0x78),(0x79),(0x7A),(0x7B),(0x7C),(0x7D),(0x7E),(0x7F); +INSERT INTO t1 VALUES (0x80),(0x81),(0x82),(0x83),(0x84),(0x85),(0x86),(0x87); +INSERT INTO t1 VALUES (0x88),(0x89),(0x8A),(0x8B),(0x8C),(0x8D),(0x8E),(0x8F); +INSERT INTO t1 VALUES (0x90),(0x91),(0x92),(0x93),(0x94),(0x95),(0x96),(0x97); +INSERT INTO t1 VALUES (0x98),(0x99),(0x9A),(0x9B),(0x9C),(0x9D),(0x9E),(0x9F); +INSERT INTO t1 VALUES (0xA0),(0xA1),(0xA2),(0xA3),(0xA4),(0xA5),(0xA6),(0xA7); +INSERT INTO t1 VALUES (0xA8),(0xA9),(0xAA),(0xAB),(0xAC),(0xAD),(0xAE),(0xAF); +INSERT INTO t1 VALUES (0xB0),(0xB1),(0xB2),(0xB3),(0xB4),(0xB5),(0xB6),(0xB7); +INSERT INTO t1 VALUES (0xB8),(0xB9),(0xBA),(0xBB),(0xBC),(0xBD),(0xBE),(0xBF); +INSERT INTO t1 VALUES (0xC0),(0xC1),(0xC2),(0xC3),(0xC4),(0xC5),(0xC6),(0xC7); +INSERT INTO t1 VALUES (0xC8),(0xC9),(0xCA),(0xCB),(0xCC),(0xCD),(0xCE),(0xCF); +INSERT INTO t1 VALUES (0xD0),(0xD1),(0xD2),(0xD3),(0xD4),(0xD5),(0xD6),(0xD7); +INSERT INTO t1 VALUES (0xD8),(0xD9),(0xDA),(0xDB),(0xDC),(0xDD),(0xDE),(0xDF); +INSERT INTO t1 VALUES (0xE0),(0xE1),(0xE2),(0xE3),(0xE4),(0xE5),(0xE6),(0xE7); +INSERT INTO t1 VALUES (0xE8),(0xE9),(0xEA),(0xEB),(0xEC),(0xED),(0xEE),(0xEF); +INSERT INTO t1 VALUES (0xF0),(0xF1),(0xF2),(0xF3),(0xF4),(0xF5),(0xF6),(0xF7); +INSERT INTO t1 VALUES (0xF8),(0xF9),(0xFA),(0xFB),(0xFC),(0xFD),(0xFE),(0xFF); +SELECT +hex(a), +hex(@u:=convert(a using utf8)), +hex(@l:=convert(@u using latin1)), +a=@l FROM t1; +hex(a) hex(@u:=convert(a using utf8)) hex(@l:=convert(@u using latin1)) a=@l +00 00 00 1 +01 01 01 1 +02 02 02 1 +03 03 03 1 +04 04 04 1 +05 05 05 1 +06 06 06 1 +07 07 07 1 +08 08 08 1 +09 09 09 1 +0A 0A 0A 1 +0B 0B 0B 1 +0C 0C 0C 1 +0D 0D 0D 1 +0E 0E 0E 1 +0F 0F 0F 1 +10 10 10 1 +11 11 11 1 +12 12 12 1 +13 13 13 1 +14 14 14 1 +15 15 15 1 +16 16 16 1 +17 17 17 1 +18 18 18 1 +19 19 19 1 +1A 1A 1A 1 +1B 1B 1B 1 +1C 1C 1C 1 +1D 1D 1D 1 +1E 1E 1E 1 +1F 1F 1F 1 + 1 +21 21 21 1 +22 22 22 1 +23 23 23 1 +24 24 24 1 +25 25 25 1 +26 26 26 1 +27 27 27 1 +28 28 28 1 +29 29 29 1 +2A 2A 2A 1 +2B 2B 2B 1 +2C 2C 2C 1 +2D 2D 2D 1 +2E 2E 2E 1 +2F 2F 2F 1 +30 30 30 1 +31 31 31 1 +32 32 32 1 +33 33 33 1 +34 34 34 1 +35 35 35 1 +36 36 36 1 +37 37 37 1 +38 38 38 1 +39 39 39 1 +3A 3A 3A 1 +3B 3B 3B 1 +3C 3C 3C 1 +3D 3D 3D 1 +3E 3E 3E 1 +3F 3F 3F 1 +40 40 40 1 +41 41 41 1 +42 42 42 1 +43 43 43 1 +44 44 44 1 +45 45 45 1 +46 46 46 1 +47 47 47 1 +48 48 48 1 +49 49 49 1 +4A 4A 4A 1 +4B 4B 4B 1 +4C 4C 4C 1 +4D 4D 4D 1 +4E 4E 4E 1 +4F 4F 4F 1 +50 50 50 1 +51 51 51 1 +52 52 52 1 +53 53 53 1 +54 54 54 1 +55 55 55 1 +56 56 56 1 +57 57 57 1 +58 58 58 1 +59 59 59 1 +5A 5A 5A 1 +5B 5B 5B 1 +5C 5C 5C 1 +5D 5D 5D 1 +5E 5E 5E 1 +5F 5F 5F 1 +60 60 60 1 +61 61 61 1 +62 62 62 1 +63 63 63 1 +64 64 64 1 +65 65 65 1 +66 66 66 1 +67 67 67 1 +68 68 68 1 +69 69 69 1 +6A 6A 6A 1 +6B 6B 6B 1 +6C 6C 6C 1 +6D 6D 6D 1 +6E 6E 6E 1 +6F 6F 6F 1 +70 70 70 1 +71 71 71 1 +72 72 72 1 +73 73 73 1 +74 74 74 1 +75 75 75 1 +76 76 76 1 +77 77 77 1 +78 78 78 1 +79 79 79 1 +7A 7A 7A 1 +7B 7B 7B 1 +7C 7C 7C 1 +7D 7D 7D 1 +7E 7E 7E 1 +7F 7F 7F 1 +80 E282AC 80 1 +81 3F 3F 0 +82 E2809A 82 1 +83 C692 83 1 +84 E2809E 84 1 +85 E280A6 85 1 +86 E280A0 86 1 +87 E280A1 87 1 +88 CB86 88 1 +89 E280B0 89 1 +8A C5A0 8A 1 +8B E280B9 8B 1 +8C C592 8C 1 +8D 3F 3F 0 +8E C5BD 8E 1 +8F 3F 3F 0 +90 3F 3F 0 +91 E28098 91 1 +92 E28099 92 1 +93 E2809C 93 1 +94 E2809D 94 1 +95 E280A2 95 1 +96 E28093 96 1 +97 E28094 97 1 +98 CB9C 98 1 +99 E284A2 99 1 +9A C5A1 9A 1 +9B E280BA 9B 1 +9C C593 9C 1 +9D 3F 3F 0 +9E C5BE 9E 1 +9F C5B8 9F 1 +A0 C2A0 A0 1 +A1 C2A1 A1 1 +A2 C2A2 A2 1 +A3 C2A3 A3 1 +A4 C2A4 A4 1 +A5 C2A5 A5 1 +A6 C2A6 A6 1 +A7 C2A7 A7 1 +A8 C2A8 A8 1 +A9 C2A9 A9 1 +AA C2AA AA 1 +AB C2AB AB 1 +AC C2AC AC 1 +AD C2AD AD 1 +AE C2AE AE 1 +AF C2AF AF 1 +B0 C2B0 B0 1 +B1 C2B1 B1 1 +B2 C2B2 B2 1 +B3 C2B3 B3 1 +B4 C2B4 B4 1 +B5 C2B5 B5 1 +B6 C2B6 B6 1 +B7 C2B7 B7 1 +B8 C2B8 B8 1 +B9 C2B9 B9 1 +BA C2BA BA 1 +BB C2BB BB 1 +BC C2BC BC 1 +BD C2BD BD 1 +BE C2BE BE 1 +BF C2BF BF 1 +C0 C380 C0 1 +C1 C381 C1 1 +C2 C382 C2 1 +C3 C383 C3 1 +C4 C384 C4 1 +C5 C385 C5 1 +C6 C386 C6 1 +C7 C387 C7 1 +C8 C388 C8 1 +C9 C389 C9 1 +CA C38A CA 1 +CB C38B CB 1 +CC C38C CC 1 +CD C38D CD 1 +CE C38E CE 1 +CF C38F CF 1 +D0 C390 D0 1 +D1 C391 D1 1 +D2 C392 D2 1 +D3 C393 D3 1 +D4 C394 D4 1 +D5 C395 D5 1 +D6 C396 D6 1 +D7 C397 D7 1 +D8 C398 D8 1 +D9 C399 D9 1 +DA C39A DA 1 +DB C39B DB 1 +DC C39C DC 1 +DD C39D DD 1 +DE C39E DE 1 +DF C39F DF 1 +E0 C3A0 E0 1 +E1 C3A1 E1 1 +E2 C3A2 E2 1 +E3 C3A3 E3 1 +E4 C3A4 E4 1 +E5 C3A5 E5 1 +E6 C3A6 E6 1 +E7 C3A7 E7 1 +E8 C3A8 E8 1 +E9 C3A9 E9 1 +EA C3AA EA 1 +EB C3AB EB 1 +EC C3AC EC 1 +ED C3AD ED 1 +EE C3AE EE 1 +EF C3AF EF 1 +F0 C3B0 F0 1 +F1 C3B1 F1 1 +F2 C3B2 F2 1 +F3 C3B3 F3 1 +F4 C3B4 F4 1 +F5 C3B5 F5 1 +F6 C3B6 F6 1 +F7 C3B7 F7 1 +F8 C3B8 F8 1 +F9 C3B9 F9 1 +FA C3BA FA 1 +FB C3BB FB 1 +FC C3BC FC 1 +FD C3BD FD 1 +FE C3BE FE 1 +FF C3BF FF 1 +DROP TABLE t1; diff --git a/mysql-test/t/ctype_latin1.test b/mysql-test/t/ctype_latin1.test new file mode 100644 index 00000000000..14062437428 --- /dev/null +++ b/mysql-test/t/ctype_latin1.test @@ -0,0 +1,55 @@ +# +# Tests with the latin1 character set +# +--disable_warnings +drop table if exists t1; +--enable_warnings + +# +# WL 1494: Treat latin1 as cp1252 for unicode conversion +# + +SET NAMES latin1; +CREATE TABLE t1 (a char(1) character set latin1); +INSERT INTO t1 VALUES (0x00),(0x01),(0x02),(0x03),(0x04),(0x05),(0x06),(0x07); +INSERT INTO t1 VALUES (0x08),(0x09),(0x0A),(0x0B),(0x0C),(0x0D),(0x0E),(0x0F); +INSERT INTO t1 VALUES (0x10),(0x11),(0x12),(0x13),(0x14),(0x15),(0x16),(0x17); +INSERT INTO t1 VALUES (0x18),(0x19),(0x1A),(0x1B),(0x1C),(0x1D),(0x1E),(0x1F); +INSERT INTO t1 VALUES (0x20),(0x21),(0x22),(0x23),(0x24),(0x25),(0x26),(0x27); +INSERT INTO t1 VALUES (0x28),(0x29),(0x2A),(0x2B),(0x2C),(0x2D),(0x2E),(0x2F); +INSERT INTO t1 VALUES (0x30),(0x31),(0x32),(0x33),(0x34),(0x35),(0x36),(0x37); +INSERT INTO t1 VALUES (0x38),(0x39),(0x3A),(0x3B),(0x3C),(0x3D),(0x3E),(0x3F); +INSERT INTO t1 VALUES (0x40),(0x41),(0x42),(0x43),(0x44),(0x45),(0x46),(0x47); +INSERT INTO t1 VALUES (0x48),(0x49),(0x4A),(0x4B),(0x4C),(0x4D),(0x4E),(0x4F); +INSERT INTO t1 VALUES (0x50),(0x51),(0x52),(0x53),(0x54),(0x55),(0x56),(0x57); +INSERT INTO t1 VALUES (0x58),(0x59),(0x5A),(0x5B),(0x5C),(0x5D),(0x5E),(0x5F); +INSERT INTO t1 VALUES (0x60),(0x61),(0x62),(0x63),(0x64),(0x65),(0x66),(0x67); +INSERT INTO t1 VALUES (0x68),(0x69),(0x6A),(0x6B),(0x6C),(0x6D),(0x6E),(0x6F); +INSERT INTO t1 VALUES (0x70),(0x71),(0x72),(0x73),(0x74),(0x75),(0x76),(0x77); +INSERT INTO t1 VALUES (0x78),(0x79),(0x7A),(0x7B),(0x7C),(0x7D),(0x7E),(0x7F); +INSERT INTO t1 VALUES (0x80),(0x81),(0x82),(0x83),(0x84),(0x85),(0x86),(0x87); +INSERT INTO t1 VALUES (0x88),(0x89),(0x8A),(0x8B),(0x8C),(0x8D),(0x8E),(0x8F); +INSERT INTO t1 VALUES (0x90),(0x91),(0x92),(0x93),(0x94),(0x95),(0x96),(0x97); +INSERT INTO t1 VALUES (0x98),(0x99),(0x9A),(0x9B),(0x9C),(0x9D),(0x9E),(0x9F); +INSERT INTO t1 VALUES (0xA0),(0xA1),(0xA2),(0xA3),(0xA4),(0xA5),(0xA6),(0xA7); +INSERT INTO t1 VALUES (0xA8),(0xA9),(0xAA),(0xAB),(0xAC),(0xAD),(0xAE),(0xAF); +INSERT INTO t1 VALUES (0xB0),(0xB1),(0xB2),(0xB3),(0xB4),(0xB5),(0xB6),(0xB7); +INSERT INTO t1 VALUES (0xB8),(0xB9),(0xBA),(0xBB),(0xBC),(0xBD),(0xBE),(0xBF); +INSERT INTO t1 VALUES (0xC0),(0xC1),(0xC2),(0xC3),(0xC4),(0xC5),(0xC6),(0xC7); +INSERT INTO t1 VALUES (0xC8),(0xC9),(0xCA),(0xCB),(0xCC),(0xCD),(0xCE),(0xCF); +INSERT INTO t1 VALUES (0xD0),(0xD1),(0xD2),(0xD3),(0xD4),(0xD5),(0xD6),(0xD7); +INSERT INTO t1 VALUES (0xD8),(0xD9),(0xDA),(0xDB),(0xDC),(0xDD),(0xDE),(0xDF); +INSERT INTO t1 VALUES (0xE0),(0xE1),(0xE2),(0xE3),(0xE4),(0xE5),(0xE6),(0xE7); +INSERT INTO t1 VALUES (0xE8),(0xE9),(0xEA),(0xEB),(0xEC),(0xED),(0xEE),(0xEF); +INSERT INTO t1 VALUES (0xF0),(0xF1),(0xF2),(0xF3),(0xF4),(0xF5),(0xF6),(0xF7); +INSERT INTO t1 VALUES (0xF8),(0xF9),(0xFA),(0xFB),(0xFC),(0xFD),(0xFE),(0xFF); + +# +# 0x81 0x8D 0x8F 0x90 0x9D are undefined in cp1252 +# +SELECT + hex(a), + hex(@u:=convert(a using utf8)), + hex(@l:=convert(@u using latin1)), + a=@l FROM t1; +DROP TABLE t1; diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 7a010c3bef8..520fec676b1 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -18,60 +18,6 @@ #include "m_string.h" #include "m_ctype.h" - -static uint16 latin1_uni[256]={ - 0,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007, -0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F, -0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017, -0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F, -0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027, -0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F, -0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037, -0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F, -0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047, -0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F, -0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057, -0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F, -0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067, -0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F, -0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077, -0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, -0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7, -0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF, -0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7, -0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF, -0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7, -0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF, -0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7, -0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF, -0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7, -0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF, -0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7, -0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF -}; - -static uchar uni_latin1[]={ -0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, -0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, -0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, -0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, -0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, -0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, -0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, -0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, -0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, -0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, -0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, -0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, -0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF}; - static uchar ctype_latin1[] = { 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, @@ -149,29 +95,287 @@ static uchar sort_order_latin1[] = { 68, 78, 79, 79, 79, 79, 93,247,216, 85, 85, 85, 89, 89,222,255 }; +/* + WL#1494 notes: + + We'll use cp1252 instead of iso-8859-1. + cp1252 contains printable characters in the range 0x80-0x9F. + In ISO 8859-1, these code points have no associated printable + characters. Therefore, by converting from CP1252 to ISO 8859-1, + one would lose the euro (for instance). Since most people are + unaware of the difference, and since we don't really want a + "Windows ANSI" to differ from a "Unix ANSI", we will: + + - continue to pretend the latin1 character set is ISO 8859-1 + - actually allow the storage of euro etc. so it's actually cp1252 +*/ + +unsigned short cs_to_uni[256]={ +0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007, +0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F, +0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017, +0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F, +0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027, +0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F, +0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037, +0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F, +0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047, +0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F, +0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057, +0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F, +0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067, +0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F, +0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077, +0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F, +0x20AC,0x0000,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021, +0x02C6,0x2030,0x0160,0x2039,0x0152,0x0000,0x017D,0x0000, +0x0000,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014, +0x02DC,0x2122,0x0161,0x203A,0x0153,0x0000,0x017E,0x0178, +0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7, +0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF, +0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7, +0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF, +0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7, +0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF, +0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7, +0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF, +0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7, +0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF, +0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7, +0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF +}; +unsigned char pl00[256]={ +0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, +0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, +0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, +0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, +0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37, +0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, +0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47, +0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, +0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57, +0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, +0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67, +0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, +0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77, +0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, +0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, +0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7, +0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, +0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7, +0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, +0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7, +0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, +0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7, +0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, +0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7, +0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF +}; +unsigned char pl01[256]={ +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x8C,0x9C,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x8A,0x9A,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x9F,0x00,0x00,0x00,0x00,0x8E,0x9E,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x83,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +}; +unsigned char pl02[256]={ +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x88,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x98,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +}; +unsigned char pl20[256]={ +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x96,0x97,0x00,0x00,0x00, +0x91,0x92,0x82,0x00,0x93,0x94,0x84,0x00, +0x86,0x87,0x95,0x00,0x00,0x00,0x85,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x89,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x8B,0x9B,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +}; +unsigned char pl21[256]={ +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x99,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +}; +unsigned char *uni_to_cs[256]={ +pl00,pl01,pl02,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +pl20,pl21,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL +}; static -int my_mb_wc_latin1(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t *wc, - const unsigned char *str, - const unsigned char *end) +int my_mb_wc_latin1(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *wc, + const unsigned char *str, + const unsigned char *end __attribute__((unused))) { if (str >= end) return MY_CS_TOOFEW(0); - return ((wc[0]= latin1_uni[*str]) || (!str[0])) ? 1 : MY_CS_ILSEQ; + *wc=cs_to_uni[*str]; + return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1; } static -int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)), - my_wc_t wc, - unsigned char *str, - unsigned char *end) +int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, + unsigned char *str, + unsigned char *end __attribute__((unused))) { + unsigned char *pl; + if (str >= end) return MY_CS_TOOSMALL; - - return ((wc < 256) && ((str[0]=uni_latin1[wc]) || (!wc))) ? 1 : MY_CS_ILUNI; + + pl= uni_to_cs[(wc>>8) & 0xFF]; + str[0]= pl ? pl[wc & 0xFF] : '\0'; + return (!str[0] && wc) ? MY_CS_ILUNI : 1; } static MY_CHARSET_HANDLER my_charset_handler= @@ -212,7 +416,7 @@ CHARSET_INFO my_charset_latin1= to_lower_latin1, to_upper_latin1, sort_order_latin1, - latin1_uni, /* tab_to_uni */ + cs_to_uni, /* tab_to_uni */ NULL, /* tab_from_uni */ "","", 1, /* strxfrm_multiply */ @@ -489,7 +693,7 @@ CHARSET_INFO my_charset_latin1_german2_ci= to_lower_latin1, to_upper_latin1, sort_order_latin1_de, - latin1_uni, /* tab_to_uni */ + cs_to_uni, /* tab_to_uni */ NULL, /* tab_from_uni */ "","", 2, /* strxfrm_multiply */ @@ -513,7 +717,7 @@ CHARSET_INFO my_charset_latin1_bin= to_lower_latin1, to_upper_latin1, sort_order_latin1_de, - latin1_uni, /* tab_to_uni */ + cs_to_uni, /* tab_to_uni */ NULL, /* tab_from_uni */ "", "", diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 68cd77f96fc..5f413305c88 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4474,6 +4474,11 @@ my_wc_mb_sjis(CHARSET_INFO *cs __attribute__((unused)), if ((int) wc < 0x80) { + if (wc == 0x5c) + { + code= 0x815f; + goto mb; + } s[0]= (uchar) wc; return 1; } @@ -4486,7 +4491,8 @@ my_wc_mb_sjis(CHARSET_INFO *cs __attribute__((unused)), s[0]= code; return 1; } - + +mb: if (s+2>e) return MY_CS_TOOSMALL; diff --git a/strings/dump_map.c b/strings/dump_map.c index f999160d0d4..708d9139f3c 100644 --- a/strings/dump_map.c +++ b/strings/dump_map.c @@ -1,13 +1,15 @@ #include <stdio.h> #include <string.h> -static void print_short_array(unsigned short *a) +static void print_short_array(unsigned short *a, size_t width) { int i; printf("{\n"); for (i=0; i<=0xFF; i++) { - printf("0x%04X%s%s",(int)a[i],i<0xFF?",":"",(i+1) % 8 ? "" :"\n"); + const char *fmt= (width==4) ? "0x%04X" : "0x%02X"; + printf(fmt,(int)a[i]); + printf("%s%s",i<0xFF?",":"",(i+1) % 8 ? "" :"\n"); } printf("};\n"); @@ -41,7 +43,7 @@ int main(void) } printf("unsigned short cs_to_uni[256]="); - print_short_array(touni); + print_short_array(touni, 4); for (i=0;i<=0xFF;i++) { @@ -53,7 +55,7 @@ int main(void) if (fromstat[i]) { printf("unsigned char pl%02X[256]=",i); - print_short_array(fromuni+i*256); + print_short_array(fromuni+i*256, 2); } } |