#!./perl # # ex: set ts=8 sts=4 sw=4 et: # # Here we test for optimizations in the regexp engine. # We try to distinguish between "nice to have" optimizations and those # we consider essential: failure of the latter should be considered bugs, # while failure of the former should at worst be TODO. # # Format of data lines is tab-separated: pattern, minlen, anchored, floating, # other-options, comment. # - pattern will be subject to string eval as "qr{$pattern}". # - minlen is a non-negative integer. # - anchored/floating are of the form "u23:45+string". If initial "u" is # present we expect a utf8 substring, else a byte substring; subsequent # digits are the min offset; optional /:\d+/ is the max offset (not # supported for anchored; assumed undef if not present for floating); # subsequent '-' or '+' indicates if this is the substring being checked; # "string" is the substring to expect. Use "-" for the whole entry to # indicate no substring of this type. # - other-options is a comma-separated list of bare flags or option=value # strings. Those with an initial "T" mark the corresponding test TODO. # Booleans (noscan, isall, skip, implicit, anchor SBOL, anchor MBOL, # anchor GPOS) are expected false if not mentioned, expected true if # supplied as bare flags. stclass may be supplied as a pattern match # as eg "stclass=~^ANYOF". # - as a special-case, minlenret is expected to be the same as minlen # unless specified in other-options. # use strict; use warnings; use 5.010; $| = 1; BEGIN { chdir 't' if -d 't'; require './test.pl'; set_up_inc('../lib'); skip_all_if_miniperl("no dynamic loading on miniperl, no re::optimization"); } no warnings qw{ experimental }; use feature qw{ refaliasing declared_refs }; our \$TODO = \$::TODO; use re (); while (<DATA>) { chomp; if (m{^\s*(?:#|\z)}) { # skip blank/comment lines next; } my($pat, $minlen, $anchored, $floating, $other, $comment) = split /\t/; my %todo; my %opt = map { my($k, $v) = split /=/, $_, 2; ($k =~ s/^T//) ? do { $todo{$k} = $v; () } : ($k => $v); } split /,/, $other // ''; $comment = (defined $comment && length $comment) ? "$pat ($comment):" : "$pat:"; my $o = re::optimization(eval "qr{$pat}"); ok($o, "$comment compiled ok"); my $skip = $o ? undef : "could not get info for qr{$pat}"; my $test = 0; my($got, $expect) = ($o->{minlen}, $minlen); if (exists $todo{minlen}) { ++$test; $skip || ok($got >= $expect, "$comment minlen $got >= $expect"); my $todo = $todo{minlen}; local $TODO = 1; $skip || is($got, $todo, "$comment minlen $got = $todo"); } else { ++$test; $skip || is($got, $expect, "$comment minlen $got = $expect"); } ($got, $expect) = ($o->{minlenret}, $opt{minlenret} // $minlen); if (exists $todo{minlenret}) { ++$test; $skip || ok($got >= $expect, "$comment minlenret $got >= $expect"); my $todo = $todo{minlenret}; local $TODO = 1; $skip || is($got, $todo, "$comment minlenret $got = $todo"); } else { ++$test; $skip || is($got, $expect, "$comment minlenret $got = $expect"); } my($autf, $aoff, $acheck, $astr) = ($anchored =~ m{ ^ (u?) (\d*) ([-+]) (.*) \z }sx) or die "Can't parse anchored test '$anchored'"; if ($autf eq 'u') { ++$test; $skip || is($o->{anchored}, undef, "$comment no anchored"); ++$test; local $TODO = 1 if exists $todo{'anchored utf8'}; $skip || is($o->{'anchored utf8'}, $astr, "$comment got anchored utf8"); } elsif (length $astr) { ++$test; $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8"); ++$test; local $TODO = 1 if exists $todo{anchored}; $skip || is($o->{anchored}, $astr, "$comment got anchored"); } else { ++$test; $skip || is($o->{anchored}, undef, "$comment no anchored"); ++$test; $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8"); } # skip offset checks if we failed to find a string my $local_skip = ( !$skip && !defined($o->{anchored} // $o->{anchored_utf8}) ) ? 'no anchored string' : undef; if (length $aoff) { ++$test; SKIP: { skip($local_skip) if $local_skip; local $TODO = 1 if exists $todo{'anchored min offset'}; $skip || is($o->{'anchored min offset'}, $aoff, "$comment anchored min offset"); } # we don't care about anchored max: it may be set same as min or 0 } my($futf, $fmin, $fmax, $fcheck, $fstr) = ($floating =~ m{ ^ (u?) (\d*) (?: : (\d*) )? ([-+]) (.*) \z }sx) or die "Can't parse floating test '$floating'"; if ($futf eq 'u') { ++$test; $skip || is($o->{floating}, undef, "$comment no floating"); ++$test; local $TODO = 1 if exists $todo{'floating utf8'}; $skip || is($o->{'floating utf8'}, $fstr, "$comment got floating utf8"); } elsif (length $fstr) { ++$test; $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8"); ++$test; local $TODO = 1 if exists $todo{floating}; $skip || is($o->{floating}, $fstr, "$comment got floating"); } else { ++$test; $skip || is($o->{floating}, undef, "$comment no floating"); ++$test; $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8"); } # skip offset checks if we failed to find a string $local_skip = ( !$skip && !defined($o->{floating} // $o->{floating_utf8}) ) ? 'no floating string' : undef; if (length $fmin) { ++$test; SKIP: { skip($local_skip) if $local_skip; local $TODO = 1 if exists $todo{'floating min offset'}; $skip || is($o->{'floating min offset'}, $fmin, "$comment floating min offset"); } } if (defined $fmax) { ++$test; SKIP: { skip($local_skip) if $local_skip; local $TODO = 1 if exists $todo{'floating max offset'}; $skip || is($o->{'floating max offset'}, $fmax, "$comment floating max offset"); } } my $check = ($acheck eq '+') ? 'anchored' : ($fcheck eq '+') ? 'floating' : ($acheck eq '-') ? undef : 'none'; $local_skip = ( !$skip && $check && ( ($check eq 'anchored' && !defined($o->{anchored} // $o->{anchored_utf8})) || ($check eq 'floating' && !defined($o->{floating} // $o->{floating_utf8})) ) ) ? "$check not found" : undef; if (defined $check) { ++$test; SKIP: { skip($local_skip) if $local_skip; local $TODO = 1 if exists $todo{checking}; $skip || is($o->{checking}, $check, "$comment checking $check"); } } # booleans for (qw{ noscan isall skip implicit }, 'anchor SBOL', 'anchor MBOL', 'anchor GPOS' ) { my $got = $o->{$_}; my $expect = exists($opt{$_}) ? ($opt{$_} // 1) : 0; ++$test; local $TODO = 1 if exists $todo{"T$_"}; $skip || is($got, $expect ? 1 : 0, "$comment $_"); } # integer for (qw{ gofs }) { my $got = $o->{$_}; my $expect = $opt{$_} // 0; ++$test; local $TODO = 1 if exists $todo{"T$_"}; $skip || is($got, $expect || 0, "$comment $_"); } # string for (qw{ stclass }) { my $got = $o->{$_}; my $expect = $opt{$_}; my $qr = (defined($expect) && ($expect =~ s{^~}{})) ? 1 : 0; ++$test; local $TODO = 1 if exists $todo{"T$_"}; $skip || ($qr ? like($got, qr{$expect}, "$comment $_") : is($got, $expect, "$comment $_") ); } skip($skip, $test) if $skip; } done_testing(); __END__ (?:) 0 - - Tisall # various forms of anchored substring abc 3 0+abc - isall .{10}abc 13 10+abc - - (?i:)abc 3 0+abc - isall a(?:)bc 3 0+abc - isall a()bc 3 0+abc - - a(?i:)bc 3 0+abc - isall a(b)c 3 0+abc - - a((?i:b))c 3 0+abc - Tanchored a[bB]c 3 0+abc - Tanchored (?=abc) 0 0+abc - Tanchored,Tminlen=3,minlenret=0 abc|abc 3 0+abc - isall abcd|abce 4 0+abc - - acde|bcde 4 1+cde - Tanchored,stclass=~[ab] acdef|bcdeg 5 1+cde - Tanchored,stclass=~[ab] # same as above, floating .?abc 3 - 0:1+abc - .?.{10}abc 13 - 10:11+abc - .?(?i:)abc 3 - 0:1+abc - .?a(?:)bc 3 - 0:1+abc - .?a()bc 3 - 0:1+abc - .?a(?i:)bc 3 - 0:1+abc - .?a(b)c 3 - 0+abc - .?a((?i:b))c 3 - 0+abc Tfloating .?a[bB]c 3 - 0:1+abc Tfloating .?(?=abc) 0 - 0:1+abc Tfloating,Tminlen=3,minlenret=0 .?(?:abc|abc) 3 - 0:1+abc - .?(?:abcd|abce) 4 - 0:1+abc - .?(?:acde|bcde) 4 - 1:2+cde Tfloating .?(?:acdef|bcdeg) 5 - 1:2+cde Tfloating a(b){2,3}c 4 -abb 1+bbc a(b|bb)c 3 -ab 1-bc Tfloating,Tfloating min offset a(b|bb){2}c 4 -abb 1-bbc Tanchored,Tfloating,Tfloating min offset abc(*COMMIT)xyz 6 0+abc - - abc(*ACCEPT)xyz 3 0+abc - - # Must not have stclass=[x] (*ACCEPT)xyz 0 - - -