diff options
-rw-r--r-- | ChangeLog | 21 | ||||
-rwxr-xr-x | maint/Builducptable | 10 | ||||
-rw-r--r-- | testdata/testinput6 | 63 | ||||
-rw-r--r-- | testdata/testoutput6 | 112 | ||||
-rw-r--r-- | ucptable.h | 50 |
5 files changed, 238 insertions, 18 deletions
@@ -44,6 +44,27 @@ Version 7.5 12-Nov-07 10. Remove two redundant lines of code that can never be obeyed (their function was moved elsewhere). + +11. The program that makes PCRE's Unicode character property table had a bug + which caused it to generate incorrect table entries for sequences of + characters that have the same character type, but are in different scripts. + It amalgamated them into a single range, with the script of the first of + them. In other words, some characters were in the wrong script. There were + thirteen such cases, affecting characters in the following ranges: + + U+002b0 - U+002c1 + U+0060c - U+0060d + U+0061e - U+00612 + U+0064b - U+0065e + U+0074d - U+0076d + U+01800 - U+01805 + U+01d00 - U+01d77 + U+01d9b - U+01dbf + U+0200b - U+0200f + U+030fc - U+030fe + U+03260 - U+0327f + U+0fb46 - U+0fbb1 + U+10450 - U+1049d Version 7.4 21-Sep-07 diff --git a/maint/Builducptable b/maint/Builducptable index 359ab08..3c317d0 100755 --- a/maint/Builducptable +++ b/maint/Builducptable @@ -8,7 +8,9 @@ # The script is rather slow because it just searches linearly through the # Scripts data in order to find the script for each character or character # range. It could be made faster by sorting that data, or something, but hey, -# it is only ever run once in a blue moon. +# it is only ever run once in a blue moon. (It's even slower after I mended the +# "forgot to check for script number before amalgamation" bug, but even so, +# the effort of improving it isn't worth it.) # Subroutine: Given a character number, return the script number. The # Scripts.txt file has been read into an array, keeping just the codepoints @@ -164,7 +166,8 @@ while (<IN>) } else - { + { + my($startscript) = script($cp); my($ncp) = $cp + 1; while (<IN>) { @@ -172,7 +175,8 @@ while (<IN>) last if (hex($fields[0]) != $ncp || $fields[2] ne $gc || $fields[12] ne "" || - $fields[13] ne ""); + $fields[13] ne "" || + script($ncp) != $startscript); $ncp++; } diff --git a/testdata/testinput6 b/testdata/testinput6 index 53d2b32..6bed743 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -832,4 +832,67 @@ was broken in all cases./ /(\p{Yi}{0,3}+\277)*/ +/^[\p{Arabic}]/8 + \x{60e} + \x{656} + \x{657} + \x{658} + \x{659} + \x{65a} + \x{65b} + \x{65c} + \x{65d} + \x{65e} + \x{66a} + \x{6e9} + \x{6ef} + \x{6fa} + ** Failers + \x{600} + \x{650} + \x{651} + \x{652} + \x{653} + \x{654} + \x{655} + \x{65f} + +/^\p{Cyrillic}/8 + \x{1d2b} + +/^\p{Common}/8 + \x{589} + \x{60c} + \x{61f} + \x{964} + \x{965} + \x{970} + +/^\p{Inherited}/8 + \x{64b} + \x{654} + \x{655} + \x{200c} + ** Failers + \x{64a} + \x{656} + +/^\p{Shavian}/8 + \x{10450} + \x{1047f} + +/^\p{Deseret}/8 + \x{10400} + \x{1044f} + +/^\p{Osmanya}/8 + \x{10480} + \x{1049d} + \x{104a0} + \x{104a9} + ** Failers + \x{1049e} + \x{1049f} + \x{104aa} + / End of testinput6 / diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 0a58b84..049d1a3 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -1522,4 +1522,116 @@ No match /(\p{Yi}{0,3}+\277)*/ +/^[\p{Arabic}]/8 + \x{60e} + 0: \x{60e} + \x{656} + 0: \x{656} + \x{657} + 0: \x{657} + \x{658} + 0: \x{658} + \x{659} + 0: \x{659} + \x{65a} + 0: \x{65a} + \x{65b} + 0: \x{65b} + \x{65c} + 0: \x{65c} + \x{65d} + 0: \x{65d} + \x{65e} + 0: \x{65e} + \x{66a} + 0: \x{66a} + \x{6e9} + 0: \x{6e9} + \x{6ef} + 0: \x{6ef} + \x{6fa} + 0: \x{6fa} + ** Failers +No match + \x{600} +No match + \x{650} +No match + \x{651} +No match + \x{652} +No match + \x{653} +No match + \x{654} +No match + \x{655} +No match + \x{65f} +No match + +/^\p{Cyrillic}/8 + \x{1d2b} + 0: \x{1d2b} + +/^\p{Common}/8 + \x{589} + 0: \x{589} + \x{60c} + 0: \x{60c} + \x{61f} + 0: \x{61f} + \x{964} + 0: \x{964} + \x{965} + 0: \x{965} + \x{970} + 0: \x{970} + +/^\p{Inherited}/8 + \x{64b} + 0: \x{64b} + \x{654} + 0: \x{654} + \x{655} + 0: \x{655} + \x{200c} + 0: \x{200c} + ** Failers +No match + \x{64a} +No match + \x{656} +No match + +/^\p{Shavian}/8 + \x{10450} + 0: \x{10450} + \x{1047f} + 0: \x{1047f} + +/^\p{Deseret}/8 + \x{10400} + 0: \x{10400} + \x{1044f} + 0: \x{1044f} + +/^\p{Osmanya}/8 + \x{10480} + 0: \x{10480} + \x{1049d} + 0: \x{1049d} + \x{104a0} + 0: \x{104a0} + \x{104a9} + 0: \x{104a9} + ** Failers +No match + \x{1049e} +No match + \x{1049f} +No match + \x{104aa} +No match + / End of testinput6 / @@ -539,7 +539,8 @@ static const cnode ucp_table[] = { { 0x21000293, 0x14000000 }, { 0x21000294, 0x1c000000 }, { 0x21800295, 0x1400001a }, - { 0x218002b0, 0x18000011 }, + { 0x218002b0, 0x18000008 }, + { 0x098002b9, 0x18000008 }, { 0x098002c2, 0x60000003 }, { 0x098002c6, 0x1800000b }, { 0x098002d2, 0x6000000d }, @@ -1039,15 +1040,18 @@ static const cnode ucp_table[] = { { 0x198005f3, 0x54000001 }, { 0x09800600, 0x04000003 }, { 0x0000060b, 0x5c000000 }, - { 0x0980060c, 0x54000001 }, + { 0x0900060c, 0x54000000 }, + { 0x0000060d, 0x54000000 }, { 0x0080060e, 0x68000001 }, { 0x00800610, 0x30000005 }, { 0x0900061b, 0x54000000 }, - { 0x0080061e, 0x54000001 }, + { 0x0000061e, 0x54000000 }, + { 0x0900061f, 0x54000000 }, { 0x00800621, 0x1c000019 }, { 0x09000640, 0x18000000 }, { 0x00800641, 0x1c000009 }, - { 0x1b80064b, 0x30000013 }, + { 0x1b80064b, 0x3000000a }, + { 0x00800656, 0x30000008 }, { 0x09800660, 0x34000009 }, { 0x0080066a, 0x54000003 }, { 0x0080066e, 0x1c000001 }, @@ -1074,7 +1078,8 @@ static const cnode ucp_table[] = { { 0x31000711, 0x30000000 }, { 0x31800712, 0x1c00001d }, { 0x31800730, 0x3000001a }, - { 0x3180074d, 0x1c000020 }, + { 0x3180074d, 0x1c000002 }, + { 0x00800750, 0x1c00001d }, { 0x37800780, 0x1c000025 }, { 0x378007a6, 0x3000000a }, { 0x370007b1, 0x1c000000 }, @@ -1460,7 +1465,10 @@ static const cnode ucp_table[] = { { 0x1f0017dd, 0x30000000 }, { 0x1f8017e0, 0x34000009 }, { 0x1f8017f0, 0x3c000009 }, - { 0x25801800, 0x54000005 }, + { 0x25801800, 0x54000001 }, + { 0x09801802, 0x54000001 }, + { 0x25001804, 0x54000000 }, + { 0x09001805, 0x54000000 }, { 0x25001806, 0x44000000 }, { 0x25801807, 0x54000003 }, { 0x2580180b, 0x30000002 }, @@ -1513,14 +1521,20 @@ static const cnode ucp_table[] = { { 0x3d801b61, 0x68000009 }, { 0x3d801b6b, 0x30000008 }, { 0x3d801b74, 0x68000008 }, - { 0x21801d00, 0x1400002b }, - { 0x21801d2c, 0x18000035 }, - { 0x21801d62, 0x14000015 }, + { 0x21801d00, 0x14000025 }, + { 0x13801d26, 0x14000004 }, + { 0x0c001d2b, 0x14000000 }, + { 0x21801d2c, 0x18000030 }, + { 0x13801d5d, 0x18000004 }, + { 0x21801d62, 0x14000003 }, + { 0x13801d66, 0x14000004 }, + { 0x21801d6b, 0x1400000c }, { 0x0c001d78, 0x18000000 }, { 0x21801d79, 0x14000003 }, { 0x21001d7d, 0x14000ee6 }, { 0x21801d7e, 0x1400001c }, - { 0x21801d9b, 0x18000024 }, + { 0x21801d9b, 0x18000023 }, + { 0x13001dbf, 0x18000000 }, { 0x1b801dc0, 0x3000000a }, { 0x1b801dfe, 0x30000001 }, { 0x21001e00, 0x24000001 }, @@ -1982,7 +1996,9 @@ static const cnode ucp_table[] = { { 0x13001ffc, 0x2000fff7 }, { 0x13801ffd, 0x60000001 }, { 0x09802000, 0x7400000a }, - { 0x0980200b, 0x04000004 }, + { 0x0900200b, 0x04000000 }, + { 0x1b80200c, 0x04000001 }, + { 0x0980200e, 0x04000001 }, { 0x09802010, 0x44000005 }, { 0x09802016, 0x54000001 }, { 0x09002018, 0x50000000 }, @@ -2615,7 +2631,8 @@ static const cnode ucp_table[] = { { 0x090030a0, 0x44000000 }, { 0x1d8030a1, 0x1c000059 }, { 0x090030fb, 0x54000000 }, - { 0x098030fc, 0x18000002 }, + { 0x090030fc, 0x18000000 }, + { 0x1d8030fd, 0x18000001 }, { 0x1d0030ff, 0x1c000000 }, { 0x03803105, 0x1c000027 }, { 0x17803131, 0x1c00005d }, @@ -2630,7 +2647,8 @@ static const cnode ucp_table[] = { { 0x0980322a, 0x68000019 }, { 0x09003250, 0x68000000 }, { 0x09803251, 0x3c00000e }, - { 0x17803260, 0x6800001f }, + { 0x17803260, 0x6800001d }, + { 0x0980327e, 0x68000001 }, { 0x09803280, 0x3c000009 }, { 0x0980328a, 0x68000026 }, { 0x098032b1, 0x3c00000e }, @@ -2678,7 +2696,8 @@ static const cnode ucp_table[] = { { 0x1900fb3e, 0x1c000000 }, { 0x1980fb40, 0x1c000001 }, { 0x1980fb43, 0x1c000001 }, - { 0x1980fb46, 0x1c00006b }, + { 0x1980fb46, 0x1c000009 }, + { 0x0080fb50, 0x1c000061 }, { 0x0080fbd3, 0x1c00016a }, { 0x0900fd3e, 0x58000000 }, { 0x0900fd3f, 0x48000000 }, @@ -2944,7 +2963,8 @@ static const cnode ucp_table[] = { { 0x0d01044d, 0x1400ffd8 }, { 0x0d01044e, 0x1400ffd8 }, { 0x0d01044f, 0x1400ffd8 }, - { 0x2e810450, 0x1c00004d }, + { 0x2e810450, 0x1c00002f }, + { 0x2c810480, 0x1c00001d }, { 0x2c8104a0, 0x34000009 }, { 0x0b810800, 0x1c000005 }, { 0x0b010808, 0x1c000000 }, |