summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-01-08 14:16:36 +0100
committerYves Orton <demerphq@gmail.com>2023-01-09 22:43:38 +0100
commit6f9328ca348b95ff60b2672e0c425242e6e87419 (patch)
tree03b62b9852452d8d1540fa105db610142ce85249
parent239136505f79898ae40cd38f706babe0f23f0993 (diff)
downloadperl-6f9328ca348b95ff60b2672e0c425242e6e87419.tar.gz
regcomp.pl - fixup intflags debug data to handle gaps properly
We were not handling gaps in the sequence properly, and effectively showing the wrong flag names or missing the last flag. Now we die if there are any collisions or if any of the PREGf defines set more than one bit. This also adds some crude tests to validate that intflags serialization is working properly. Note, extflags handles more complex scenarios and seems to handle this gracefully already, hence the reason I haven't touched it as well. This also tweaks a comment in lexical_debug.t which part of this was cribbed from.
-rw-r--r--MANIFEST2
-rw-r--r--ext/re/re.pm2
-rw-r--r--ext/re/t/intflags.pl16
-rw-r--r--ext/re/t/intflags.t25
-rw-r--r--ext/re/t/lexical_debug.t2
-rw-r--r--regen/regcomp.pl23
-rw-r--r--regnodes.h29
7 files changed, 79 insertions, 20 deletions
diff --git a/MANIFEST b/MANIFEST
index fc7ad7d12c..40174a28b4 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -4514,6 +4514,8 @@ ext/re/re.pm re extension Perl module
ext/re/re.xs re extension external subroutines
ext/re/re_comp.h re extension wrapper for regcomp.h
ext/re/re_top.h re extension symbol hiding header
+ext/re/t/intflags.pl Program used by intflags.t
+ext/re/t/intflags.t Test that intflags are serialized properly
ext/re/t/lexical_debug.pl generate debug output for lexical re 'debug'
ext/re/t/lexical_debug.t test that lexical re 'debug' works
ext/re/t/qr.t test that qr// is a Regexp
diff --git a/ext/re/re.pm b/ext/re/re.pm
index cf1f7421a2..6da4e02f32 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -4,7 +4,7 @@ package re;
use strict;
use warnings;
-our $VERSION = "0.43";
+our $VERSION = "0.44";
our @ISA = qw(Exporter);
our @EXPORT_OK = qw{
is_regexp regexp_pattern
diff --git a/ext/re/t/intflags.pl b/ext/re/t/intflags.pl
new file mode 100644
index 0000000000..a79180681e
--- /dev/null
+++ b/ext/re/t/intflags.pl
@@ -0,0 +1,16 @@
+use re 'Debug' => qw(DUMP FLAGS);
+our $count;
+my $code= '(?{$count++})';
+my @p= (
+ qr/(foo)(?1)?/,
+ qr/\Gfoo/,
+ qr/.*foo/,
+ qr/^foo/,
+ qr/(foo(*THEN)bar|food)/,
+ qr/a.*b.*/,
+ qr/a{1,4}\Gfoo/,
+ qr/a+/,
+ do { use re 'eval'; qr/a$code/},
+);
+
+print STDERR "-OK-\n";
diff --git a/ext/re/t/intflags.t b/ext/re/t/intflags.t
new file mode 100644
index 0000000000..256bf1f007
--- /dev/null
+++ b/ext/re/t/intflags.t
@@ -0,0 +1,25 @@
+#!./perl
+
+BEGIN {
+ require Config;
+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){
+ print "1..0 # Skip -- Perl configured without re module\n";
+ exit 0;
+ }
+}
+
+use strict;
+
+# must use a BEGIN or the prototypes wont be respected meaning
+# tests could pass that shouldn't.
+BEGIN { require "../../t/test.pl"; }
+my $out = runperl(progfile => "t/intflags.pl", stderr => 1 );
+like($out,qr/-OK-\n/, "intflags.pl ran to completion");
+
+my %seen;
+foreach my $line (split /\n/, $out) {
+ $line=~s/^r->intflags:\s+// or next;
+ length($_) and $seen{$_}++ for split /\s+/, $line;
+}
+is(0+keys %seen,14);
+done_testing;
diff --git a/ext/re/t/lexical_debug.t b/ext/re/t/lexical_debug.t
index b2570f0e2d..4c8b47d54f 100644
--- a/ext/re/t/lexical_debug.t
+++ b/ext/re/t/lexical_debug.t
@@ -11,7 +11,7 @@ BEGIN {
use strict;
# must use a BEGIN or the prototypes wont be respected meaning
- # tests could pass that shouldn't
+# tests could pass that shouldn't
BEGIN { require "../../t/test.pl"; }
my $out = runperl(progfile => "t/lexical_debug.pl", stderr => 1 );
diff --git a/regen/regcomp.pl b/regen/regcomp.pl
index 4999cfa034..94a53dbbb6 100644
--- a/regen/regcomp.pl
+++ b/regen/regcomp.pl
@@ -726,8 +726,10 @@ EOP
my %reverse;
my $REG_INTFLAGS_NAME_SIZE= 0;
my $hp= HeaderParser->new();
+ my $last_val = 0;
foreach my $file ("regcomp.h") {
$hp->read_file($file);
+ my @bit_tuples;
foreach my $line_info (@{$hp->lines}) {
next unless $line_info->{type} eq "content"
and $line_info->{sub_type} eq "#define";
@@ -745,13 +747,26 @@ EOP
my $hex= $3;
my $comment= $4;
my $val= hex($hex);
+ my $bin= sprintf "%b", $val;
+ if ($bin=~/1.*?1/) { die "Not expecting multiple bits in PREGf" }
+ my $bit= length($bin) - 1 ;
$comment= $comment ? " - $comment" : "";
-
- printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",),
- $val, $define, $comment;
- $REG_INTFLAGS_NAME_SIZE++;
+ if ($bit_tuples[$bit]) {
+ die "Duplicate PREGf bit '$bit': $define $val ($hex)";
+ }
+ $bit_tuples[$bit]= [ $bit, $val, $abbr, $define, $comment ];
+ }
+ }
+ foreach my $i (0..$#bit_tuples) {
+ my $bit_tuple= $bit_tuples[$i];
+ if (!$bit_tuple) {
+ $bit_tuple= [ $i, 1<<$i, "", "", "*UNUSED*" ];
}
+ my ($bit, $val, $abbr, $define, $comment)= @$bit_tuple;
+ printf $out qq(\t%-30s/* (1<<%2d) - 0x%08x - %s%s */\n),
+ qq("$abbr",), $bit, $val, $define, $comment;
}
+ $REG_INTFLAGS_NAME_SIZE=0+@bit_tuples;
}
print $out <<EOP;
diff --git a/regnodes.h b/regnodes.h
index 398df445df..5275da64b3 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -2862,24 +2862,25 @@ EXTCONST char * const PL_reg_extflags_name[] = {
EXTCONST char * PL_reg_intflags_name[];
#else
EXTCONST char * const PL_reg_intflags_name[] = {
- "SKIP", /* 0x00000001 - PREGf_SKIP */
- "IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
- "NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
- "VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */
- "CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
- "USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
- "NOSCAN", /* 0x00000040 - PREGf_NOSCAN */
- "GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */
- "GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */
- "ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */
- "ANCH_SBOL", /* 0x00000800 - PREGf_ANCH_SBOL */
- "ANCH_GPOS", /* 0x00001000 - PREGf_ANCH_GPOS */
- "RECURSE_SEEN", /* 0x00002000 - PREGf_RECURSE_SEEN */
+ "SKIP", /* (1<< 0) - 0x00000001 - PREGf_SKIP */
+ "IMPLICIT", /* (1<< 1) - 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
+ "NAUGHTY", /* (1<< 2) - 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
+ "VERBARG_SEEN", /* (1<< 3) - 0x00000008 - PREGf_VERBARG_SEEN */
+ "CUTGROUP_SEEN", /* (1<< 4) - 0x00000010 - PREGf_CUTGROUP_SEEN */
+ "USE_RE_EVAL", /* (1<< 5) - 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
+ "NOSCAN", /* (1<< 6) - 0x00000040 - PREGf_NOSCAN */
+ "", /* (1<< 7) - 0x00000080 - *UNUSED* */
+ "GPOS_SEEN", /* (1<< 8) - 0x00000100 - PREGf_GPOS_SEEN */
+ "GPOS_FLOAT", /* (1<< 9) - 0x00000200 - PREGf_GPOS_FLOAT */
+ "ANCH_MBOL", /* (1<<10) - 0x00000400 - PREGf_ANCH_MBOL */
+ "ANCH_SBOL", /* (1<<11) - 0x00000800 - PREGf_ANCH_SBOL */
+ "ANCH_GPOS", /* (1<<12) - 0x00001000 - PREGf_ANCH_GPOS */
+ "RECURSE_SEEN", /* (1<<13) - 0x00002000 - PREGf_RECURSE_SEEN */
};
#endif /* DOINIT */
#ifdef DEBUGGING
-# define REG_INTFLAGS_NAME_SIZE 13
+# define REG_INTFLAGS_NAME_SIZE 14
#endif
/* The following have no fixed length. U8 so we can do strchr() on it. */