diff options
author | SADAHIRO Tomoyuki <BQW10602@nifty.com> | 2006-11-05 06:53:50 +0900 |
---|---|---|
committer | H.Merijn Brand <h.m.brand@xs4all.nl> | 2006-11-04 19:15:19 +0000 |
commit | 9e08bc66da56140ed8efaea283d1b4b6053eef0b (patch) | |
tree | 499e32602b95c1343f5a56af79b647f195b4f5ff /t/uni | |
parent | 96d4b0ee18db074ad085f9a9d1710201f6a87763 (diff) | |
download | perl-9e08bc66da56140ed8efaea283d1b4b6053eef0b.tar.gz |
Re: [perl #40641] crash with unicode characters in regex comment
Message-Id: <20061104215302.3325.BQW10602@nifty.com>
p4raw-id: //depot/perl@29204
Diffstat (limited to 't/uni')
-rw-r--r-- | t/uni/greek.t | 119 | ||||
-rw-r--r-- | t/uni/latin2.t | 153 | ||||
-rw-r--r-- | t/uni/tr_utf8.t | 10 |
3 files changed, 281 insertions, 1 deletions
diff --git a/t/uni/greek.t b/t/uni/greek.t new file mode 100644 index 0000000000..a8102f3880 --- /dev/null +++ b/t/uni/greek.t @@ -0,0 +1,119 @@ +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + @INC = '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + unless (PerlIO::Layer->find('perlio')){ + print "1..0 # Skip: PerlIO required\n"; + exit 0; + } + if ($ENV{PERL_CORE_MINITEST}) { + print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n"; + exit 0; + } + $| = 1; + require './test.pl'; +} + +plan tests => 72; + +use encoding "greek"; # iso 8859-7 + +# U+0391, \xC1, \301, GREEK CAPITAL LETTER ALPHA +# U+03B1, \xE1, \341, GREEK SMALL LETTER ALPHA + +ok("\xC1" =~ /\xC1/, '\xC1 to /\xC1/'); +ok("\x{391}" =~ /\xC1/, '\x{391} to /\xC1/'); +ok("\xC1" =~ /\x{C1}/, '\xC1 to /\x{C1}/'); +ok("\x{391}" =~ /\x{C1}/, '\x{391} to /\x{C1}/'); +ok("\xC1" =~ /\301/, '\xC1 to /\301/'); +ok("\x{391}" =~ /\301/, '\x{391} to /\301/'); +ok("\xC1" =~ /\x{391}/, '\xC1 to /\x{391}/'); +ok("\x{391}" =~ /\x{391}/, '\x{391} to /\x{391}/'); + +ok("\xC1" =~ /\xC1/i, '\xC1 to /\xC1/i'); +ok("\xE1" =~ /\xC1/i, '\xE1 to /\xC1/i'); +ok("\xC1" =~ /\xE1/i, '\xC1 to /\xE1/i'); +ok("\xE1" =~ /\xE1/i, '\xE1 to /\xE1/i'); +ok("\xC1" =~ /\x{391}/i, '\xC1 to /\x{391}/i'); +ok("\xE1" =~ /\x{391}/i, '\xE1 to /\x{391}/i'); +ok("\xC1" =~ /\x{3B1}/i, '\xC1 to /\x{3B1}/i'); +ok("\xE1" =~ /\x{3B1}/i, '\xE1 to /\x{3B1}/i'); + +ok("\xC1" =~ /[\xC1]/, '\xC1 to /[\xC1]/'); +ok("\x{391}" =~ /[\xC1]/, '\x{391} to /[\xC1]/'); +ok("\xC1" =~ /[\x{C1}]/, '\xC1 to /[\x{C1}]/'); +ok("\x{391}" =~ /[\x{C1}]/, '\x{391} to /[\x{C1}]/'); +ok("\xC1" =~ /[\301]/, '\xC1 to /[\301]/'); +ok("\x{391}" =~ /[\301]/, '\x{391} to /[\301]/'); +ok("\xC1" =~ /[\x{391}]/, '\xC1 to /[\x{391}]/'); +ok("\x{391}" =~ /[\x{391}]/, '\x{391} to /[\x{391}]/'); + +ok("\xC1" =~ /[\xC1]/i, '\xC1 to /[\xC1]/i'); +ok("\xE1" =~ /[\xC1]/i, '\xE1 to /[\xC1]/i'); +ok("\xC1" =~ /[\xE1]/i, '\xC1 to /[\xE1]/i'); +ok("\xE1" =~ /[\xE1]/i, '\xE1 to /[\xE1]/i'); +ok("\xC1" =~ /[\x{391}]/i, '\xC1 to /[\x{391}]/i'); +ok("\xE1" =~ /[\x{391}]/i, '\xE1 to /[\x{391}]/i'); +ok("\xC1" =~ /[\x{3B1}]/i, '\xC1 to /[\x{3B1}]/i'); +ok("\xE1" =~ /[\x{3B1}]/i, '\xE1 to /[\x{3B1}]/i'); + +ok("\xC1" =~ '\xC1', '\xC1 to \'\xC1\''); +ok("\xC1" =~ '\x{C1}', '\xC1 to \'\x{C1}\''); +ok("\xC1" =~ '\301', '\xC1 to \'\301\''); +ok("\xC1" =~ '\x{391}', '\xC1 to \'\x{391}\''); +ok("\xC1" =~ '[\xC1]', '\xC1 to \'[\xC1]\''); +ok("\xC1" =~ '[\x{C1}]', '\xC1 to \'[\x{C1}]\''); +ok("\xC1" =~ '[\301]', '\xC1 to \'[\301]\''); +ok("\xC1" =~ '[\x{391}]', '\xC1 to \'[\x{391}]\''); + +ok("\xC1" =~ /ม/, '\xC1 to /<ALPHA>/'); +ok("\xE1" !~ /ม/, '\xE1 to /<ALPHA>/'); +ok("\xC1" =~ /ม/i, '\xC1 to /<ALPHA>/i'); +ok("\xE1" =~ /ม/i, '\xE1 to /<ALPHA>/i'); +ok("\xC1" =~ /[ม]/, '\xC1 to /[<ALPHA>]/'); +ok("\xE1" !~ /[ม]/, '\xE1 to /[<ALPHA>]/'); +ok("\xC1" =~ /[ม]/i, '\xC1 to /[<ALPHA>]/i'); +ok("\xE1" =~ /[ม]/i, '\xE1 to /[<ALPHA>]/i'); + +ok("\xC1\xC1" =~ /ม\xC1/, '\xC1\xC1 to /<ALPHA>\xC1/'); +ok("\xC1\xC1" =~ /\xC1ม/, '\xC1\xC1 to /\xC1<ALPHA>/'); +ok("\xC1\xC1" =~ /ม\xC1/i, '\xC1\xC1 to /<ALPHA>\xC1/i'); +ok("\xC1\xC1" =~ /\xC1ม/i, '\xC1\xC1 to /\xC1<ALPHA>/i'); +ok("\xC1\xE1" =~ /ม\xC1/i, '\xC1\xE1 to /<ALPHA>\xC1/i'); +ok("\xC1\xE1" =~ /\xC1ม/i, '\xC1\xE1 to /\xC1<ALPHA>/i'); +ok("\xE1\xE1" =~ /ม\xC1/i, '\xE1\xE1 to /<ALPHA>\xC1/i'); +ok("\xE1\xE1" =~ /\xC1ม/i, '\xE1\xE1 to /\xC1<ALPHA>/i'); + +# U+038A, \xBA, GREEK CAPITAL LETTER IOTA WITH TONOS +# U+03AF, \xDF, GREEK SMALL LETTER IOTA WITH TONOS + +ok("\x{38A}" =~ /\xBA/, '\x{38A} to /\xBA/'); +ok("\x{38A}" !~ /\xDF/, '\x{38A} to /\xDF/'); +ok("\x{38A}" =~ /\xBA/i, '\x{38A} to /\xBA/i'); +ok("\x{38A}" =~ /\xDF/i, '\x{38A} to /\xDF/i'); +ok("\x{38A}" =~ /[\xBA]/, '\x{38A} to /[\xBA]/'); +ok("\x{38A}" !~ /[\xDF]/, '\x{38A} to /[\xDF]/'); +ok("\x{38A}" =~ /[\xBA]/i, '\x{38A} to /[\xBA]/i'); +ok("\x{38A}" =~ /[\xDF]/i, '\x{38A} to /[\xDF]/i'); + +# \xDF is not LATIN SMALL LETTER SHARP S + +ok("SS" !~ /\xDF/i, 'SS to /\xDF/i'); +ok("Ss" !~ /\xDF/i, 'Ss to /\xDF/i'); +ok("sS" !~ /\xDF/i, 'sS to /\xDF/i'); +ok("ss" !~ /\xDF/i, 'ss to /\xDF/i'); +ok("SS" !~ /฿/i, 'SS to /<iota-tonos>/i'); +ok("Ss" !~ /฿/i, 'Ss to /<iota-tonos>/i'); +ok("sS" !~ /฿/i, 'sS to /<iota-tonos>/i'); +ok("ss" !~ /฿/i, 'ss to /<iota-tonos>/i'); + diff --git a/t/uni/latin2.t b/t/uni/latin2.t new file mode 100644 index 0000000000..08928b6039 --- /dev/null +++ b/t/uni/latin2.t @@ -0,0 +1,153 @@ +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + @INC = '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + unless (PerlIO::Layer->find('perlio')){ + print "1..0 # Skip: PerlIO required\n"; + exit 0; + } + if ($ENV{PERL_CORE_MINITEST}) { + print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n"; + exit 0; + } + $| = 1; + require './test.pl'; +} + +plan tests => 94; + +use encoding "latin2"; # iso 8859-2 + +# U+00C1, \xC1, \301, LATIN CAPITAL LETTER A WITH ACUTE +# U+0102, \xC3, \402, LATIN CAPITAL LETTER A WITH BREVE +# U+00E1, \xE1, \303, LATIN SMALL LETTER A WITH ACUTE +# U+0103, \xE3, \403, LATIN SMALL LETTER A WITH BREVE + +ok("\xC1" =~ /\xC1/, '\xC1 to /\xC1/'); +ok("\x{C1}" =~ /\x{C1}/, '\x{C1} to /\x{C1}/'); +ok("\xC3" =~ /\xC3/, '\xC3 to /\xC3/'); +ok("\x{102}" =~ /\xC3/, '\x{102} to /\xC3/'); +ok("\xC3" =~ /\x{C3}/, '\xC3 to /\x{C3}/'); +ok("\x{102}" =~ /\x{C3}/, '\x{102} to /\x{C3}/'); +ok("\xC3" =~ /\x{102}/, '\xC3 to /\x{102}/'); +ok("\x{102}" =~ /\x{102}/, '\x{102} to /\x{102}/'); + +ok("\xC1" =~ /\xC1/i, '\xC1 to /\xC1/i'); +ok("\xE1" =~ /\xC1/i, '\xE1 to /\xC1/i'); +ok("\xC1" =~ /\xE1/i, '\xC1 to /\xE1/i'); +ok("\xE1" =~ /\xE1/i, '\xE1 to /\xE1/i'); +ok("\x{102}" =~ /\xC3/i, '\x{102} to /\xC3/i'); +ok("\x{103}" =~ /\xC3/i, '\x{103} to /\xC3/i'); +ok("\x{102}" =~ /\xE3/i, '\x{102} to /\xE3/i'); +ok("\x{103}" =~ /\xE3/i, '\x{103} to /\xE3/i'); + +ok("\xC1" =~ /[\xC1]/, '\xC1 to /[\xC1]/'); +ok("\x{C1}" =~ /[\x{C1}]/, '\x{C1} to /[\x{C1}]/'); +ok("\xC3" =~ /[\xC3]/, '\xC3 to /[\xC3]/'); +ok("\x{102}" =~ /[\xC3]/, '\x{102} to /[\xC3]/'); +ok("\xC3" =~ /[\x{C3}]/, '\xC3 to /[\x{C3}]/'); +ok("\x{102}" =~ /[\x{C3}]/, '\x{102} to /[\x{C3}]/'); +ok("\xC3" =~ /[\x{102}]/, '\xC3 to /[\x{102}]/'); +ok("\x{102}" =~ /[\x{102}]/, '\x{102} to /[\x{102}]/'); + +ok("\xC1" =~ /[\xC1]/i, '\xC1 to /[\xC1]/i'); +ok("\xE1" =~ /[\xC1]/i, '\xE1 to /[\xC1]/i'); +ok("\xC1" =~ /[\xE1]/i, '\xC1 to /[\xE1]/i'); +ok("\xE1" =~ /[\xE1]/i, '\xE1 to /[\xE1]/i'); +ok("\x{102}" =~ /[\xC3]/i, '\x{102} to /[\xC3]/i'); +ok("\x{103}" =~ /[\xC3]/i, '\x{103} to /[\xC3]/i'); +ok("\x{102}" =~ /[\xE3]/i, '\x{102} to /[\xE3]/i'); +ok("\x{103}" =~ /[\xE3]/i, '\x{103} to /[\xE3]/i'); + +ok("\xC1" =~ '\xC1', '\xC1 to \'\xC1\''); +ok("\xC1" =~ '\x{C1}', '\xC1 to \'\x{C1}\''); +ok("\xC3" =~ '\303', '\xC3 to \'\303\''); +ok("\xC3" =~ '\x{102}', '\xC3 to \'\x{102}\''); +ok("\xC1" =~ '[\xC1]', '\xC1 to \'[\xC1]\''); +ok("\xC1" =~ '[\x{C1}]', '\xC1 to \'[\x{C1}]\''); +ok("\xC3" =~ '[\303]', '\xC3 to \'[\303]\''); +ok("\xC3" =~ '[\x{102}]', '\xC3 to \'[\x{102}]\''); + +ok("\xC1" =~ /ม/, '\xC1 to /<A-acute>/'); +ok("\xE1" !~ /ม/, '\xE1 to /<A-acute>/'); +ok("\xC1" =~ /ม/i, '\xC1 to /<A-acute>/i'); +ok("\xE1" =~ /ม/i, '\xE1 to /<A-acute>/i'); +ok("\xC1" =~ /[ม]/, '\xC1 to /[<A-acute>]/'); +ok("\xE1" !~ /[ม]/, '\xE1 to /[<A-acute>]/'); +ok("\xC1" =~ /[ม]/i, '\xC1 to /[<A-acute>]/i'); +ok("\xE1" =~ /[ม]/i, '\xE1 to /[<A-acute>]/i'); + +ok("\xC1\xC1" =~ /ม\xC1/, '\xC1\xC1 to /<A-acute>\xC1/'); +ok("\xC1\xC1" =~ /\xC1ม/, '\xC1\xC1 to /\xC1<A-acute>/'); +ok("\xC1\xC1" =~ /ม\xC1/i, '\xC1\xC1 to /<A-acute>\xC1/i'); +ok("\xC1\xC1" =~ /\xC1ม/i, '\xC1\xC1 to /\xC1<A-acute>/i'); +ok("\xC1\xE1" =~ /ม\xC1/i, '\xC1\xE1 to /<A-acute>\xC1/i'); +ok("\xC1\xE1" =~ /\xC1ม/i, '\xC1\xE1 to /\xC1<A-acute>/i'); +ok("\xE1\xE1" =~ /ม\xC1/i, '\xE1\xE1 to /<A-acute>\xC1/i'); +ok("\xE1\xE1" =~ /\xC1ม/i, '\xE1\xE1 to /\xC1<A-acute>/i'); + +# \xDF is LATIN SMALL LETTER SHARP S + +ok("\xDF" =~ /\xDF/, '\xDF to /\xDF/'); +ok("\xDF" =~ /\xDF/i, '\xDF to /\xDF/i'); +ok("\xDF" =~ /[\xDF]/, '\xDF to /[\xDF]/'); +ok("\xDF" =~ /[\xDF]/i, '\xDF to /[\xDF]/i'); +ok("\xDF" =~ /฿/, '\xDF to /<sharp-s>/'); +ok("\xDF" =~ /฿/i, '\xDF to /<sharp-s>/i'); +ok("\xDF" =~ /[฿]/, '\xDF to /[<sharp-s>]/'); +ok("\xDF" =~ /[฿]/i, '\xDF to /[<sharp-s>]/i'); + +ok("SS" =~ /\xDF/i, 'SS to /\xDF/i'); +ok("Ss" =~ /\xDF/i, 'Ss to /\xDF/i'); +ok("sS" =~ /\xDF/i, 'sS to /\xDF/i'); +ok("ss" =~ /\xDF/i, 'ss to /\xDF/i'); +ok("SS" =~ /฿/i, 'SS to /<sharp-s>/i'); +ok("Ss" =~ /฿/i, 'Ss to /<sharp-s>/i'); +ok("sS" =~ /฿/i, 'sS to /<sharp-s>/i'); +ok("ss" =~ /฿/i, 'ss to /<sharp-s>/i'); + +ok("\xC3" =~ /\303/, '\xC1 to /\303/'); +ok("\303" =~ /\303/, '\303 to /\303/'); +ok("\xC3" =~ /\303/i, '\xC1 to /\303/i'); +ok("\xE3" =~ /\303/i, '\xC1 to /\303/i'); +ok("\xC3" =~ /[\303]/, '\xC1 to /[\303]/'); +ok("\303" =~ /[\303]/, '\303 to /[\303]/'); +ok("\xC3" =~ /[\303]/i, '\xC1 to /[\303]/i'); +ok("\xE3" =~ /[\303]/i, '\xC1 to /[\303]/i'); + +ok("\xC3" =~ /\402/, '\xC1 to /\402/'); +ok("\402" =~ /\402/, '\402 to /\402/'); +ok("\xC3" =~ /\402/i, '\xC1 to /\402/i'); +ok("\xE3" =~ /\402/i, '\xC1 to /\402/i'); +ok("\xC3" =~ /[\402]/, '\xC1 to /[\402]/'); +ok("\402" =~ /[\402]/, '\402 to /[\402]/'); +ok("\xC3" =~ /[\402]/i, '\xC1 to /[\402]/i'); +ok("\xE3" =~ /[\402]/i, '\xC1 to /[\402]/i'); + +{ + my $re = '(?i:\xC1)'; + + ok("\xC1" =~ $re, '\xC1 to (?i:\xC1)'); + ok("\xE1" =~ $re, '\xE1 to (?i:\xC1)'); + + utf8::downgrade($re); + + ok("\xC1" =~ $re, '\xC1 to (?i:\xC1) down'); + ok("\xE1" =~ $re, '\xE1 to (?i:\xC1) down'); + + utf8::upgrade($re); + + ok("\xC1" =~ $re, '\xC1 to (?i:\xC1) up'); + ok("\xE1" =~ $re, '\xE1 to (?i:\xC1) up'); +} + diff --git a/t/uni/tr_utf8.t b/t/uni/tr_utf8.t index 606a84a9c0..354156a641 100644 --- a/t/uni/tr_utf8.t +++ b/t/uni/tr_utf8.t @@ -31,7 +31,7 @@ BEGIN { } use strict; -use Test::More tests => 7; +use Test::More tests => 8; use encoding 'utf8'; @@ -67,4 +67,12 @@ is($str, $hiragana, "s/// # hiragana -> katakana"); $line =~ tr/bcdeghijklmnprstvwxyz$02578/ืืฆืืขืืืืฒืงืืื ืคึผืจืกืืฐืฉืืืืฉืฑืชืฒืื/; is($line, "aืืฆืืขfืืืืฒืงืืื oืคqึผืจืกuืืฐืฉืืืืฉ1ืฑ34ืช6ืฒื9", "[perl #16843]"); } + +{ + # [perl #40641] + my $str = qq/Gebรครครครครครครครครครครครครครครครครครครครคude/; + my $reg = qr/Gebรครครครครครครครครครครครครครครครครครครครคude/; + ok($str =~ /$reg/, "[perl #40641]"); +} + __END__ |