diff options
author | Yves Orton <demerphq@gmail.com> | 2023-01-08 14:16:36 +0100 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2023-01-09 22:43:38 +0100 |
commit | 6f9328ca348b95ff60b2672e0c425242e6e87419 (patch) | |
tree | 03b62b9852452d8d1540fa105db610142ce85249 | |
parent | 239136505f79898ae40cd38f706babe0f23f0993 (diff) | |
download | perl-6f9328ca348b95ff60b2672e0c425242e6e87419.tar.gz |
regcomp.pl - fixup intflags debug data to handle gaps properly
We were not handling gaps in the sequence properly, and effectively
showing the wrong flag names or missing the last flag. Now we die if there
are any collisions or if any of the PREGf defines set more than one bit.
This also adds some crude tests to validate that intflags serialization is
working properly.
Note, extflags handles more complex scenarios and seems to handle this
gracefully already, hence the reason I haven't touched it as well.
This also tweaks a comment in lexical_debug.t which part of this was
cribbed from.
-rw-r--r-- | MANIFEST | 2 | ||||
-rw-r--r-- | ext/re/re.pm | 2 | ||||
-rw-r--r-- | ext/re/t/intflags.pl | 16 | ||||
-rw-r--r-- | ext/re/t/intflags.t | 25 | ||||
-rw-r--r-- | ext/re/t/lexical_debug.t | 2 | ||||
-rw-r--r-- | regen/regcomp.pl | 23 | ||||
-rw-r--r-- | regnodes.h | 29 |
7 files changed, 79 insertions, 20 deletions
@@ -4514,6 +4514,8 @@ ext/re/re.pm re extension Perl module ext/re/re.xs re extension external subroutines ext/re/re_comp.h re extension wrapper for regcomp.h ext/re/re_top.h re extension symbol hiding header +ext/re/t/intflags.pl Program used by intflags.t +ext/re/t/intflags.t Test that intflags are serialized properly ext/re/t/lexical_debug.pl generate debug output for lexical re 'debug' ext/re/t/lexical_debug.t test that lexical re 'debug' works ext/re/t/qr.t test that qr// is a Regexp diff --git a/ext/re/re.pm b/ext/re/re.pm index cf1f7421a2..6da4e02f32 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -4,7 +4,7 @@ package re; use strict; use warnings; -our $VERSION = "0.43"; +our $VERSION = "0.44"; our @ISA = qw(Exporter); our @EXPORT_OK = qw{ is_regexp regexp_pattern diff --git a/ext/re/t/intflags.pl b/ext/re/t/intflags.pl new file mode 100644 index 0000000000..a79180681e --- /dev/null +++ b/ext/re/t/intflags.pl @@ -0,0 +1,16 @@ +use re 'Debug' => qw(DUMP FLAGS); +our $count; +my $code= '(?{$count++})'; +my @p= ( + qr/(foo)(?1)?/, + qr/\Gfoo/, + qr/.*foo/, + qr/^foo/, + qr/(foo(*THEN)bar|food)/, + qr/a.*b.*/, + qr/a{1,4}\Gfoo/, + qr/a+/, + do { use re 'eval'; qr/a$code/}, +); + +print STDERR "-OK-\n"; diff --git a/ext/re/t/intflags.t b/ext/re/t/intflags.t new file mode 100644 index 0000000000..256bf1f007 --- /dev/null +++ b/ext/re/t/intflags.t @@ -0,0 +1,25 @@ +#!./perl + +BEGIN { + require Config; + if (($Config::Config{'extensions'} !~ /\bre\b/) ){ + print "1..0 # Skip -- Perl configured without re module\n"; + exit 0; + } +} + +use strict; + +# must use a BEGIN or the prototypes wont be respected meaning +# tests could pass that shouldn't. +BEGIN { require "../../t/test.pl"; } +my $out = runperl(progfile => "t/intflags.pl", stderr => 1 ); +like($out,qr/-OK-\n/, "intflags.pl ran to completion"); + +my %seen; +foreach my $line (split /\n/, $out) { + $line=~s/^r->intflags:\s+// or next; + length($_) and $seen{$_}++ for split /\s+/, $line; +} +is(0+keys %seen,14); +done_testing; diff --git a/ext/re/t/lexical_debug.t b/ext/re/t/lexical_debug.t index b2570f0e2d..4c8b47d54f 100644 --- a/ext/re/t/lexical_debug.t +++ b/ext/re/t/lexical_debug.t @@ -11,7 +11,7 @@ BEGIN { use strict; # must use a BEGIN or the prototypes wont be respected meaning - # tests could pass that shouldn't +# tests could pass that shouldn't BEGIN { require "../../t/test.pl"; } my $out = runperl(progfile => "t/lexical_debug.pl", stderr => 1 ); diff --git a/regen/regcomp.pl b/regen/regcomp.pl index 4999cfa034..94a53dbbb6 100644 --- a/regen/regcomp.pl +++ b/regen/regcomp.pl @@ -726,8 +726,10 @@ EOP my %reverse; my $REG_INTFLAGS_NAME_SIZE= 0; my $hp= HeaderParser->new(); + my $last_val = 0; foreach my $file ("regcomp.h") { $hp->read_file($file); + my @bit_tuples; foreach my $line_info (@{$hp->lines}) { next unless $line_info->{type} eq "content" and $line_info->{sub_type} eq "#define"; @@ -745,13 +747,26 @@ EOP my $hex= $3; my $comment= $4; my $val= hex($hex); + my $bin= sprintf "%b", $val; + if ($bin=~/1.*?1/) { die "Not expecting multiple bits in PREGf" } + my $bit= length($bin) - 1 ; $comment= $comment ? " - $comment" : ""; - - printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), - $val, $define, $comment; - $REG_INTFLAGS_NAME_SIZE++; + if ($bit_tuples[$bit]) { + die "Duplicate PREGf bit '$bit': $define $val ($hex)"; + } + $bit_tuples[$bit]= [ $bit, $val, $abbr, $define, $comment ]; + } + } + foreach my $i (0..$#bit_tuples) { + my $bit_tuple= $bit_tuples[$i]; + if (!$bit_tuple) { + $bit_tuple= [ $i, 1<<$i, "", "", "*UNUSED*" ]; } + my ($bit, $val, $abbr, $define, $comment)= @$bit_tuple; + printf $out qq(\t%-30s/* (1<<%2d) - 0x%08x - %s%s */\n), + qq("$abbr",), $bit, $val, $define, $comment; } + $REG_INTFLAGS_NAME_SIZE=0+@bit_tuples; } print $out <<EOP; diff --git a/regnodes.h b/regnodes.h index 398df445df..5275da64b3 100644 --- a/regnodes.h +++ b/regnodes.h @@ -2862,24 +2862,25 @@ EXTCONST char * const PL_reg_extflags_name[] = { EXTCONST char * PL_reg_intflags_name[]; #else EXTCONST char * const PL_reg_intflags_name[] = { - "SKIP", /* 0x00000001 - PREGf_SKIP */ - "IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */ - "NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */ - "VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */ - "CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */ - "USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */ - "NOSCAN", /* 0x00000040 - PREGf_NOSCAN */ - "GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */ - "GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */ - "ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */ - "ANCH_SBOL", /* 0x00000800 - PREGf_ANCH_SBOL */ - "ANCH_GPOS", /* 0x00001000 - PREGf_ANCH_GPOS */ - "RECURSE_SEEN", /* 0x00002000 - PREGf_RECURSE_SEEN */ + "SKIP", /* (1<< 0) - 0x00000001 - PREGf_SKIP */ + "IMPLICIT", /* (1<< 1) - 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */ + "NAUGHTY", /* (1<< 2) - 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */ + "VERBARG_SEEN", /* (1<< 3) - 0x00000008 - PREGf_VERBARG_SEEN */ + "CUTGROUP_SEEN", /* (1<< 4) - 0x00000010 - PREGf_CUTGROUP_SEEN */ + "USE_RE_EVAL", /* (1<< 5) - 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */ + "NOSCAN", /* (1<< 6) - 0x00000040 - PREGf_NOSCAN */ + "", /* (1<< 7) - 0x00000080 - *UNUSED* */ + "GPOS_SEEN", /* (1<< 8) - 0x00000100 - PREGf_GPOS_SEEN */ + "GPOS_FLOAT", /* (1<< 9) - 0x00000200 - PREGf_GPOS_FLOAT */ + "ANCH_MBOL", /* (1<<10) - 0x00000400 - PREGf_ANCH_MBOL */ + "ANCH_SBOL", /* (1<<11) - 0x00000800 - PREGf_ANCH_SBOL */ + "ANCH_GPOS", /* (1<<12) - 0x00001000 - PREGf_ANCH_GPOS */ + "RECURSE_SEEN", /* (1<<13) - 0x00002000 - PREGf_RECURSE_SEEN */ }; #endif /* DOINIT */ #ifdef DEBUGGING -# define REG_INTFLAGS_NAME_SIZE 13 +# define REG_INTFLAGS_NAME_SIZE 14 #endif /* The following have no fixed length. U8 so we can do strchr() on it. */ |