diff options
author | Yves Orton <demerphq@gmail.com> | 2006-09-19 03:37:19 +0200 |
---|---|---|
committer | H.Merijn Brand <h.m.brand@xs4all.nl> | 2006-09-19 06:56:36 +0000 |
commit | fc8cd66c26827f6c2ee1aa00ab2d3b3c320a4a28 (patch) | |
tree | b426e51c41b332c31c05ec65e7570a4cc620f20c /t | |
parent | a7ae1e4a956bbd5ffa44d286e0591bf4c0e7c341 (diff) | |
download | perl-fc8cd66c26827f6c2ee1aa00ab2d3b3c320a4a28.tar.gz |
Re: \N{...} in regular expression [PATCH]
Message-ID: <9b18b3110609181637m796d6c16o1b2741edc5f09eb2@mail.gmail.com>
p4raw-id: //depot/perl@28868
Diffstat (limited to 't')
-rw-r--r-- | t/lib/Cname.pm | 22 | ||||
-rwxr-xr-x | t/op/pat.t | 139 |
2 files changed, 117 insertions, 44 deletions
diff --git a/t/lib/Cname.pm b/t/lib/Cname.pm new file mode 100644 index 0000000000..d4b8a9ea4d --- /dev/null +++ b/t/lib/Cname.pm @@ -0,0 +1,22 @@ +package Cname; +our $Evil='A'; + +sub translator { + my $str = shift; + if ( $str eq 'EVIL' ) { + (my $c=substr("A".$Evil,-1))++; + my $r=$Evil; + $Evil.=$c; + return $r; + } + if ( $str eq 'EMPTY-STR') { + return ""; + } + return $str; +} + +sub import { + shift; + $^H{charnames} = \&translator; +} +1; diff --git a/t/op/pat.t b/t/op/pat.t index 4ff133b619..97bad61881 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,8 +6,7 @@ $| = 1; -# please update note at bottom of file when you change this -print "1..1232\n"; +# Test counter output is generated by a BEGIN block at bottom of file BEGIN { chdir 't' if -d 't'; @@ -1286,7 +1285,7 @@ print "ok 247\n"; { # bug id 20001008.001 - my $test = 248; + $test = 248; my @x = ("stra\337e 138","stra\337e 138"); for (@x) { s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; @@ -1376,7 +1375,7 @@ print "ok 247\n"; } SKIP: { - my $test = 264; # till 575 + $test = 264; # till 575 use charnames ":full"; @@ -2032,13 +2031,13 @@ print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; } -my $test = 687; +$test = 687; # Force scalar context on the patern match -sub ok ($$) { +sub ok ($;$) { my($ok, $name) = @_; - printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name; + printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name||'unnamed'; printf "# Failed test at line %d\n", (caller)[2] unless $ok; @@ -2604,35 +2603,21 @@ print "# some Unicode properties\n"; use charnames ':full'; - print "\N{LATIN SMALL LETTER SHARP S}" =~ - /\N{LATIN SMALL LETTER SHARP S}/ ? "ok 835\n" : "not ok 835\n"; + $test= 835; - print "\N{LATIN SMALL LETTER SHARP S}" =~ - /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 836\n" : "not ok 836\n"; + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/i); - print "\N{LATIN SMALL LETTER SHARP S}" =~ - /[\N{LATIN SMALL LETTER SHARP S}]/ ? "ok 837\n" : "not ok 837\n"; + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i); - print "\N{LATIN SMALL LETTER SHARP S}" =~ - /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 838\n" : "not ok 838\n"; + ok("ss" =~ /\N{LATIN SMALL LETTER SHARP S}/i); + ok("SS" =~ /\N{LATIN SMALL LETTER SHARP S}/i); + ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i); + ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i); - print "ss" =~ - /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 839\n" : "not ok 839\n"; - - print "SS" =~ - /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 840\n" : "not ok 840\n"; - - print "ss" =~ - /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n"; - - print "SS" =~ - /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n"; - - print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ? - "ok 843\n" : "not ok 843\n"; - - print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ? - "ok 844\n" : "not ok 844\n"; + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i); } { @@ -2751,7 +2736,7 @@ print "# some Unicode properties\n"; # check utf8/non-utf8 mixtures # try to force all float/anchored check combinations my $c = "\x{100}"; - my $test = 865; + $test = 865; my $subst; for my $re ( "xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx", @@ -2790,7 +2775,7 @@ print "# some Unicode properties\n"; { print "# qr/.../x\n"; - my $test = 893; + $test = 893; my $R = qr/ A B C # D E/x; @@ -2806,7 +2791,7 @@ print "# some Unicode properties\n"; { print "# illegal Unicode properties\n"; - my $test = 896; + $test = 896; print eval { "a" =~ /\pq / } ? "not ok $test\n" : "ok $test\n"; $test++; @@ -2818,7 +2803,7 @@ print "# some Unicode properties\n"; { print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n"; # requires reuse of last successful pattern - my $test = 898; + $test = 898; $test =~ /\d/; for (0 .. 1) { my $match = ?? + 0; @@ -3039,7 +3024,7 @@ ok("A" =~ /\p{AsciiHexAndDash}/, "'A' is AsciiHexAndDash"); my $ok = $s =~ /(\x{100}{4})/; my($ord, $len) = (ord $1, length $1); print +($ok && $ord == 0x100 && $len == 4) - ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n"; + ? "ok $test\n" : "not ok $test\t# [#18179] $ok/$ord/$len\n"; ++$test; } @@ -3404,10 +3389,12 @@ ok(("foba ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i) -{ +if (!$ENV{PERL_SKIP_PSYCHO_TEST}){ my @normal=qw(these are some normal words); my $psycho=join "|",@normal,map chr $_,255..20000; ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho'); +} else { + ok(1,'Skipped Psycho'); } # [perl #36207] mixed utf8 / latin-1 and case folding @@ -3533,22 +3520,22 @@ if ($ordA == 193) { my @chars = ("A".."Z"); my $delim = ","; my $size = 32771 - 4; - my $test = ''; + my $str = ''; # create some random junk. Inefficient, but it works. for ($i = 0 ; $i < $size ; $i++) { - $test .= $chars[int(rand(@chars))]; + $str .= $chars[int(rand(@chars))]; } - $test .= ($delim x 4); + $str .= ($delim x 4); my $res; my $matched; - if ($test =~ s/^(.*?)${delim}{4}//s) { + if ($str =~ s/^(.*?)${delim}{4}//s) { $res = $1; $matched=1; } ok($matched,'pattern matches'); - ok(length($test)==0,"Empty string"); + ok(length($str)==0,"Empty string"); ok(defined($res) && length($res)==$size,"\$1 is correct size"); } @@ -3578,9 +3565,73 @@ if ($ordA == 193) { ok("A@-B" =~ /A@{-}B/x, 'interpolation of @- in /@{-}/x'); } +{ + use lib 'lib'; + use Cname; + + ok('fooB'=~/\N{foo}[\N{B}\N{b}]/,"Passthrough charname"); + $test=1233; my $handle=make_must_warn('Ignoring excess chars from'); + $handle->('q(xxWxx) =~ /[\N{WARN}]/'); + { + my $code; + my $w=""; + local $SIG{__WARN__} = sub { $w.=shift }; + eval($code=<<'EOFTEST') or die "$@\n$code\n"; + { + use warnings; + + #1234 + ok("\0" !~ /[\N{EMPTY-STR}XY]/, + "Zerolength charname in charclass doesnt match \0"); + 1; + } +EOFTEST + ok($w=~/Ignoring zero length/, + "Got expected zero length warning"); + warn $code; + + } + $handle= make_must_warn('Ignoring zero length'); + $handle->('qq(\\0) =~ /[\N{EMPTY-STR}XY]/'); + ok('AB'=~/(\N{EVIL})/ && $1 eq 'A',"Charname caching $1"); + ok('ABC'=~/(\N{EVIL})/,"Charname caching $1"); + ok('xy'=~/x\N{EMPTY-STR}y/, 'Empty string charname produces NOTHING node'); + ok(''=~/\N{EMPTY-STR}/, 'Empty string charname produces NOTHING node 2'); + +} +{ + print "# MORE LATIN SMALL LETTER SHARP S\n"; + + use charnames ':full'; + + #see also test #835 + ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i, + "unoptimized named sequence in class 1"); + ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i, + "unoptimized named sequence in class 2"); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/, + "unoptimized named sequence in class 3"); + ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i, + "unoptimized named sequence in class 4"); + + ok('aabc' !~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against aabc'); + ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc'); + ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc'); + + ok(' A B'=~/\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, + 'Intermixed named and unicode escapes 1'); + ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}"=~ + /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, + 'Intermixed named and unicode escapes 2'); + ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042} 3"=~ + /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, + 'Intermixed named and unicode escapes'); +} # Keep the following test last -- it may crash perl ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274") or print "# Unexpected outcome: should pass or crash perl\n"; -# last test 1231 +# Don't forget to update this! +BEGIN{print "1..1251\n"}; + |