Re: \N{...} in regular expression [PATCH]

Message-ID: <9b18b3110609181637m796d6c16o1b2741edc5f09eb2@mail.gmail.com> p4raw-id: //depot/perl@28868
author: Yves Orton <demerphq@gmail.com> 2006-09-19 03:37:19 +0200
committer: H.Merijn Brand <h.m.brand@xs4all.nl> 2006-09-19 06:56:36 +0000
commit: fc8cd66c26827f6c2ee1aa00ab2d3b3c320a4a28 (patch)
tree: b426e51c41b332c31c05ec65e7570a4cc620f20c /t
parent: a7ae1e4a956bbd5ffa44d286e0591bf4c0e7c341 (diff)
download: perl-fc8cd66c26827f6c2ee1aa00ab2d3b3c320a4a28.tar.gz
2 files changed, 117 insertions, 44 deletions
diff --git a/t/lib/Cname.pm b/t/lib/Cname.pm
new file mode 100644
index 0000000000..d4b8a9ea4d
--- /dev/null
+++ b/t/lib/Cname.pm
@@ -0,0 +1,22 @@
+package Cname;
+our $Evil='A';
+
+sub translator {
+    my $str = shift;
+    if ( $str eq 'EVIL' ) {
+        (my $c=substr("A".$Evil,-1))++;
+        my $r=$Evil;
+        $Evil.=$c;
+        return $r;
+    }
+    if ( $str eq 'EMPTY-STR') {
+       return "";
+    }
+    return $str;
+}
+
+sub import {
+    shift;
+    $^H{charnames} = \&translator;
+}
+1;  
diff --git a/t/op/pat.t b/t/op/pat.t
index 4ff133b619..97bad61881 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,8 +6,7 @@
 
 $| = 1;
 
-# please update note at bottom of file when you change this
-print "1..1232\n"; 
+# Test counter output is generated by a BEGIN block at bottom of file
 
 BEGIN {
     chdir 't' if -d 't';
@@ -1286,7 +1285,7 @@ print "ok 247\n";
 {
     # bug id 20001008.001
 
-    my $test = 248;
+    $test = 248;
     my @x = ("stra\337e 138","stra\337e 138");
     for (@x) {
 	s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
@@ -1376,7 +1375,7 @@ print "ok 247\n";
 }
 
 SKIP: {
-    my $test = 264; # till 575
+    $test = 264; # till 575
 
     use charnames ":full";
 
@@ -2032,13 +2031,13 @@ print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r";
 }
 
 
-my $test = 687;
+$test = 687;
 
 # Force scalar context on the patern match
-sub ok ($$) {
+sub ok ($;$) {
     my($ok, $name) = @_;
 
-    printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name;
+    printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name||'unnamed';
 
     printf "# Failed test at line %d\n", (caller)[2] unless $ok;
 
@@ -2604,35 +2603,21 @@ print "# some Unicode properties\n";
 
     use charnames ':full';
 
-    print "\N{LATIN SMALL LETTER SHARP S}" =~
-	/\N{LATIN SMALL LETTER SHARP S}/    ? "ok 835\n" : "not ok 835\n";
+    $test= 835;
 
-    print "\N{LATIN SMALL LETTER SHARP S}" =~
-	/\N{LATIN SMALL LETTER SHARP S}/i   ? "ok 836\n" : "not ok 836\n";
+    ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/);
+    ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/i);
 
-    print "\N{LATIN SMALL LETTER SHARP S}" =~
-	/[\N{LATIN SMALL LETTER SHARP S}]/  ? "ok 837\n" : "not ok 837\n";
+    ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/);
+    ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i);
 
-    print "\N{LATIN SMALL LETTER SHARP S}" =~
-	/[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 838\n" : "not ok 838\n";
+    ok("ss" =~ /\N{LATIN SMALL LETTER SHARP S}/i);
+    ok("SS" =~ /\N{LATIN SMALL LETTER SHARP S}/i);
+    ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i);
+    ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i);
 
-    print "ss" =~
-	/\N{LATIN SMALL LETTER SHARP S}/i   ? "ok 839\n" : "not ok 839\n";
-
-    print "SS" =~
-	/\N{LATIN SMALL LETTER SHARP S}/i   ? "ok 840\n" : "not ok 840\n";
-
-    print "ss" =~
-	/[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n";
-
-    print "SS" =~
-	/[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n";
-
-    print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ?
-	"ok 843\n" : "not ok 843\n";
-
-    print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ?
-	"ok 844\n" : "not ok 844\n";
+    ok("\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i);
+    ok("\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i);
 }
 
 {
@@ -2751,7 +2736,7 @@ print "# some Unicode properties\n";
     # check utf8/non-utf8 mixtures
     # try to force all float/anchored check combinations
     my $c = "\x{100}";
-    my $test = 865;
+    $test = 865;
     my $subst;
     for my $re (
 	"xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx",
@@ -2790,7 +2775,7 @@ print "# some Unicode properties\n";
 
 {
     print "# qr/.../x\n";
-    my $test = 893;
+    $test = 893;
 
     my $R = qr/ A B C # D E/x;
 
@@ -2806,7 +2791,7 @@ print "# some Unicode properties\n";
 
 {
     print "# illegal Unicode properties\n";
-    my $test = 896;
+    $test = 896;
 
     print eval { "a" =~ /\pq / }      ? "not ok $test\n" : "ok $test\n";
     $test++;
@@ -2818,7 +2803,7 @@ print "# some Unicode properties\n";
 {
     print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n";
     # requires reuse of last successful pattern
-    my $test = 898;
+    $test = 898;
     $test =~ /\d/;
     for (0 .. 1) {
 	my $match = ?? + 0;
@@ -3039,7 +3024,7 @@ ok("A" =~ /\p{AsciiHexAndDash}/, "'A' is AsciiHexAndDash");
     my $ok = $s =~ /(\x{100}{4})/;
     my($ord, $len) = (ord $1, length $1);
     print +($ok && $ord == 0x100 && $len == 4)
-	    ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n";
+	    ? "ok $test\n" : "not ok $test\t# [#18179] $ok/$ord/$len\n";
     ++$test;
 }
 
@@ -3404,10 +3389,12 @@ ok(("foba  ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i)
 
 
 
-{
+if (!$ENV{PERL_SKIP_PSYCHO_TEST}){
     my @normal=qw(these are some normal words);
     my $psycho=join "|",@normal,map chr $_,255..20000;
     ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho');
+} else {
+    ok(1,'Skipped Psycho');
 }
 
 # [perl #36207] mixed utf8 / latin-1 and case folding
@@ -3533,22 +3520,22 @@ if ($ordA == 193) {
     my @chars = ("A".."Z");
     my $delim = ",";
     my $size = 32771 - 4;
-    my $test = '';
+    my $str = '';
 
     # create some random junk. Inefficient, but it works.
     for ($i = 0 ; $i < $size ; $i++) {
-        $test .= $chars[int(rand(@chars))];
+        $str .= $chars[int(rand(@chars))];
     }
 
-    $test .= ($delim x 4);
+    $str .= ($delim x 4);
     my $res;
     my $matched;
-    if ($test =~ s/^(.*?)${delim}{4}//s) {
+    if ($str =~ s/^(.*?)${delim}{4}//s) {
         $res = $1;
         $matched=1;
     } 
     ok($matched,'pattern matches');
-    ok(length($test)==0,"Empty string");
+    ok(length($str)==0,"Empty string");
     ok(defined($res) && length($res)==$size,"\$1 is correct size");
 }
 
@@ -3578,9 +3565,73 @@ if ($ordA == 193) {
     ok("A@-B"  =~ /A@{-}B/x, 'interpolation of @- in /@{-}/x');
 }
 
+{
+    use lib 'lib';
+    use Cname;
+    
+    ok('fooB'=~/\N{foo}[\N{B}\N{b}]/,"Passthrough charname");
+    $test=1233; my $handle=make_must_warn('Ignoring excess chars from');
+    $handle->('q(xxWxx) =~ /[\N{WARN}]/');
+    {
+        my $code;
+        my $w="";
+        local $SIG{__WARN__} = sub { $w.=shift };
+        eval($code=<<'EOFTEST') or die "$@\n$code\n";
+        {
+            use warnings;
+            
+            #1234
+            ok("\0" !~ /[\N{EMPTY-STR}XY]/,
+                "Zerolength charname in charclass doesnt match \0");
+            1;
+        }
+EOFTEST
+        ok($w=~/Ignoring zero length/,
+            "Got expected zero length warning");
+        warn $code;                    
+        
+    }
+    $handle= make_must_warn('Ignoring zero length');
+    $handle->('qq(\\0) =~ /[\N{EMPTY-STR}XY]/');
+    ok('AB'=~/(\N{EVIL})/ && $1 eq 'A',"Charname caching $1");
+    ok('ABC'=~/(\N{EVIL})/,"Charname caching $1");    
+    ok('xy'=~/x\N{EMPTY-STR}y/, 'Empty string charname produces NOTHING node');
+    ok(''=~/\N{EMPTY-STR}/, 'Empty string charname produces NOTHING node 2');
+        
+}
+{
+    print "# MORE LATIN SMALL LETTER SHARP S\n";
+
+    use charnames ':full';
+
+    #see also test #835
+    ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+        "unoptimized named sequence in class 1");
+    ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+        "unoptimized named sequence in class 2");        
+    ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/,
+        "unoptimized named sequence in class 3");
+    ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i,
+        "unoptimized named sequence in class 4");        
+    
+    ok('aabc' !~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against aabc');
+    ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc');
+    ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc');
+
+    ok(' A B'=~/\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/,
+        'Intermixed named and unicode escapes 1');
+    ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}"=~
+       /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/,
+        'Intermixed named and unicode escapes 2');
+    ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042} 3"=~
+       /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/,
+        'Intermixed named and unicode escapes');     
+}
 # Keep the following test last -- it may crash perl
 
 ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274")
     or print "# Unexpected outcome: should pass or crash perl\n";
 
-# last test 1231
+# Don't forget to update this!
+BEGIN{print "1..1251\n"};
+
author	Yves Orton <demerphq@gmail.com>	2006-09-19 03:37:19 +0200
committer	H.Merijn Brand <h.m.brand@xs4all.nl>	2006-09-19 06:56:36 +0000
commit	fc8cd66c26827f6c2ee1aa00ab2d3b3c320a4a28 (patch)
tree	b426e51c41b332c31c05ec65e7570a4cc620f20c /t
parent	a7ae1e4a956bbd5ffa44d286e0591bf4c0e7c341 (diff)
download	perl-fc8cd66c26827f6c2ee1aa00ab2d3b3c320a4a28.tar.gz