summaryrefslogtreecommitdiff
path: root/regen
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-03-28 21:57:22 +0200
committerYves Orton <demerphq@gmail.com>2023-04-05 15:22:55 +0800
commit67d37c55ddffebbcbf4847408106334b9ad9a6bc (patch)
tree3ec4fed15f1eae09374bbfc4d97f27ced64155fd /regen
parent0398b69bfddc8738a4788f056a79ba9e4d44fcfb (diff)
downloadperl-67d37c55ddffebbcbf4847408106334b9ad9a6bc.tar.gz
regen/HeaderParser.pm - improved expression formatting and wrapping
Karl complained about some of the wrapping logic we use for expressions. This tweaks the rules in a number of different ways in an attempt to produce more legible expressions. For instance if we have a complex expression with different parenthesized sub expressions, then try to put each sub expression on its own line. A previous patch ensures that we put shorter sub expressions first, and this patch builds on that to put each sub expression on its own line. We also use different logic to wrap the expressions, with the end result that each line should have the same number of defined() operations on it (with the exception of the last). We also try harder to line up logical operators and defined() functions.
Diffstat (limited to 'regen')
-rw-r--r--regen/HeaderParser.pm349
1 files changed, 285 insertions, 64 deletions
diff --git a/regen/HeaderParser.pm b/regen/HeaderParser.pm
index e9bcd79068..cc38f9ea9d 100644
--- a/regen/HeaderParser.pm
+++ b/regen/HeaderParser.pm
@@ -171,6 +171,9 @@ sub indent_chars {
# we use OO to store state, etc.
sub new {
my ($class, %args)= @_;
+ $args{add_commented_expr_after} //= 10;
+ $args{max_width} //= 78;
+ $args{min_break_width} //= 70;
return bless \%args,;
}
@@ -198,7 +201,7 @@ sub _tokenize_expr {
sub _count_ops {
my ($self, $term)= @_;
- my $count = 0;
+ my $count= 0;
$count++ while $term =~ m/(?: \|\| | \&\& | \? )/gx;
return $count;
}
@@ -219,7 +222,7 @@ sub _sort_terms {
$_ =~ m/PERL_IN_/ ? 1 : 0, # 2: PERL_IN_ labeled define
$_ =~ m/^\d/ ? 1 : 0, # 3: digit
$_ =~ m/DEBUGGING/ ? 1 : 0, # 4: DEBUGGING?
- $self->_count_ops($_), # 5: Number of ops (||, && and ternary)
+ $self->_count_ops($_), # 5: Number of ops (||, &&)
]
} @_;
my %seen;
@@ -449,7 +452,18 @@ sub _parse_expr_assoc {
push @$lhs, $rhs;
}
else {
- $lhs= [ $op_token->{op}, $lhs, $rhs ];
+ my @lt= ($lhs);
+ my @rt= ($rhs);
+
+ # if we have '( a && b ) && ( c && d)'
+ # turn it into 'a && b && c && d'
+ if (@$lhs > 2 && $lhs->[0] eq $op_token->{op}) {
+ (undef,@lt)= @$lhs; # throw away op.
+ }
+ if (@$rhs > 2 && $rhs->[0] eq $op_token->{op}) {
+ (undef,@rt)= @$rhs; # throw away op.
+ }
+ $lhs= [ $op_token->{op}, @lt, @rt ];
}
}
return $lhs;
@@ -662,7 +676,8 @@ sub parse_fh {
push @lines, $line_info;
if ($do_pop) {
- $line_info->{inner_lines} = $line_info->start_line_num - $cond_line[-1]->start_line_num;
+ $line_info->{inner_lines}=
+ $line_info->start_line_num - $cond_line[-1]->start_line_num;
pop @cond;
pop @cond_line;
}
@@ -799,14 +814,218 @@ sub _line_up_frag {
return $ret;
}
-sub line_up {
- my ($self, $line, @fragments)= @_;
+sub _fixup_indent {
+ my ($self, $line)= @_;
+ my @lines= split /\n/, $line;
+ if ($lines[0]=~/^(#\s*\w+(?:\s*\/\*)?\s)(\s+)/) {
+ my $first_left_len = length $1;
+
+ while (1) {
+ my $ok = 1;
+ for (@lines) {
+ /^.{$first_left_len} /
+ or do { $ok = 0; last; };
+ }
+ if ($ok) {
+ s/^(.{$first_left_len}) /$1/ for @lines;
+ } else {
+ last;
+ }
+ }
+ }
+
+ if ($lines[0]=~/^(#\s*\w+\s+)\(/) {
+ my $len = length($1);
+ for my $idx (1..$#lines) {
+ $lines[$idx]=~s/^([ ]{$len})(\s+)(\()/$1$3$2/;
+ }
+ }
+ my $ret= join "", map { "$_\n" } @lines;
+ return $ret;
+}
+
+# this is the workhorse for _break_line_at_op().
+sub __break_line_at_op {
+ my ($self, $limit, $line, $blank_prefix)= @_;
+ my @lines= ("");
+ while (length $line) {
+ my $part;
+ if ($line =~ s/^(.*?(?:\|\||&&)\s+)//) {
+ $part= $1;
+ }
+ else {
+ $part= $line;
+ $line= "";
+ }
+ if (length($lines[-1]) + length($part) < $limit) {
+ $lines[-1] .= $part;
+ }
+ else {
+ push @lines, $blank_prefix . $part;
+ }
+ }
+ return \@lines;
+}
+
+# Break a condition line into parts, while trying to keep the last
+# token on each line being an operator like || or && or ? or : We try
+# to keep each line at $limit characters, however, we also try to
+# ensure that each line has the same number of operators on it such
+# that across all the lines there are only two counts of operators (eg,
+# we either way each line to have two operators on it, or 0, or 1 or 0,
+# or 2 or 1, and so on.) If we cannot meet this requirement we reduce
+# the limit by 1 and try again, until we meet the objective, or the
+# limit ends up at 70 chars or less.
+sub _break_line_at_op {
+ my ($self, $limit, $line, $blank_prefix)= @_;
+ my $lines;
+ while (1) {
+ $lines= $self->__break_line_at_op($limit, $line, $blank_prefix);
+ my %op_counts;
+ foreach my $line_idx (0 .. $#$lines) {
+ my $line= $lines->[$line_idx];
+ my $count= 0;
+ $count++ while $line =~ /(\|\||&&|\?|:)/g;
+ $op_counts{$count}++;
+
+ }
+ if ($limit <= $self->{min_break_width} || keys(%op_counts) <= 2) {
+ last;
+ }
+ $limit--;
+ }
+
+ s/\s*\z/\n/ for @$lines;
+ return join "", @$lines;
+}
+
+sub _max { # cant use Scalar::Util so we roll our own
+ my $max= shift;
+ $max < $_ and $max= $_ for @_;
+ return $max;
+}
+
+# take a condition, split into $type and $rest
+# wrap it, and try to line up operators and defined() functions
+# that it contains. This is rather horrible code, but it does a
+# reasonable job applying the heuristics we need to lay our the
+# conditions in a reasonable way.
+sub _wrap_and_line_up_cond {
+ my ($self, $type, $rest)= @_;
+
+ my $limit= $self->{max_width};
+
+ # extract the expression part of the line, and normalize it, we do
+ # this here even though it might be duplicative as it is possible
+ # that the caller code has munged the expression in some way, and we
+ # might want to simplify the expression first. Eg:
+ # 'defined(FOO) && (defined(BAR) && defined(BAZ))' should be turned into
+ # 'defined(FOO) && defined(BAR) && defined(BAZ)' if possible.
+ my $rest_head= "";
+ my $rest_tail= "";
+ if ($rest =~ s!(if\s+)!!) {
+ $rest_head= $1;
+ }
+ if ($rest =~ s!(\s*/\*.*?\*/)\s*\z!! || $rest =~ s!(\s*\*/\s*)\z!!) {
+ $rest_tail= $1;
+ }
+ if ($rest) {
+ $rest= $self->tidy_cond($rest);
+ $rest= $rest_head . $rest . $rest_tail;
+ }
+
+ my $l= length($type);
+ my $line= $type;
+ $line .= $rest if length($rest);
+ my $blank_prefix= " " x $l;
+
+ # at this point we have a single line with the entire expression on it
+ # if it fits on one line we are done, we can return it right away.
+ if (length($line) <= $limit) {
+ $line =~ s/\s*\z/\n/;
+ return $line;
+ }
+ my $rest_copy= $rest;
+ my @fragments;
+ my $op_pat= qr/(?:\|\||&&|[?:])/;
+
+ # does the $rest contain a parenthesized group? If it does then
+ # there are a mixture of different ops being used, as if it was all
+ # the same opcode there would not be a parenthesized group.
+ # If it does then we handle it differently, and try to put the
+ # different parts of the expression on their own line.
+ if ($rest_copy =~ /$op_pat\s*\(/) {
+ my @parts;
+ while (length $rest_copy) {
+ if ($rest_copy =~ s/^(.*?$op_pat)(\s*!?\()/$2/) {
+ push @parts, $1;
+ } else {
+ #$rest_copy=~s/^\s+//;
+ push @parts, $rest_copy;
+ last;
+ }
+ }
+ $parts[0]= $type . $parts[0];
+ $parts[$_]= $blank_prefix . $parts[$_] for 1 .. $#parts;
+ foreach my $line (@parts) {
+ if (length($line) > $limit) {
+ $line= $self->_break_line_at_op($limit, $line, $blank_prefix);
+ }
+ }
+ s/\s*\z/\n/ for @parts;
+ $line= join "", @parts;
+ @fragments= ("defined", "||");
+ }
+ else {
+ # the expression consists of just one opcode type, so we can use
+ # simpler logic to break it apart with the objective of ensuring
+ # that the lines are similarly formed with trailing operators on
+ # each line but the last.
+ @fragments= ("||", "defined");
+ $line= $self->_break_line_at_op($limit, $type . $rest, $blank_prefix);
+ }
+ # try to line up the text on different lines. We stop after
+ # the first $fragment that modifies the text. The order
+ # of fragments we try is determined above based on the type
+ # of condition this is.
+ my $pre_line= $line;
foreach my $fragment (@fragments) {
$line= $self->_line_up_frag($line, $fragment);
- last if $line ne $_[1];
+ last if $line ne $pre_line;
}
- return $line;
+
+ # if we have lined up by "defined" in _line_up_frag()
+ # then we may have " || defined(...)" type expressions
+ # convert these to " || defined(...)" as it looks better.
+ $line =~ s/( )(\|\||&&|[()?:])([ ]{2,})(!?defined)/$3$2$1$4/g;
+ $line =~ s/(\|\||&&|[()?:])[ ]{10,}/$1 /g;
+
+ # add back the line continuations. this is all pretty inefficient,
+ # but it works nicely.
+ my @lines= split /\n/, $line;
+ my $last= pop @lines;
+ my $max_len= _max(map { length $_ } @lines);
+ $_= sprintf "%*s \\\n", -$max_len, $_ for @lines;
+ $last .= "\n";
+
+ $line= join "", @lines, $last;
+
+ # remove line continuations that are inside of a comment,
+ # we may have a variable number of lines of the expression
+ # or parts of lines of the expression in a comment, so
+ # we do this last.
+ $line =~ s!/\* (.*) \*/
+ !"/*"._strip_line_cont("$1")."*/"!xsge;
+
+ return $self->_fixup_indent($line);
+}
+
+#remove line continuations from the argument.
+sub _strip_line_cont {
+ my ($string)= @_;
+ $string =~ s/\s*\\\n/\n/g;
+ return $string;
}
# Takes an array of HeaderLines objects produced by parse_fh()
@@ -818,6 +1037,7 @@ sub lines_as_str {
$post_process_content ||= $self->{post_process_content};
my $filter= $self->{filter_content};
my $last_line= "";
+
#warn $self->dd($lines);
foreach my $line_data (@$lines) {
my $line= $line_data->{line};
@@ -828,7 +1048,7 @@ sub lines_as_str {
$line =~ s/^#(\s*)/#$ind/;
}
if ($line_data->{type} eq "cond") {
- my $add_commented_expr_after = 10;
+ my $add_commented_expr_after= $self->{add_commented_expr_after};
if ($line_data->{sub_type} =~ /#(?:else|endif)/) {
my $joined= join " && ",
map { "($_)" } @{ $line_data->{cond}[-1] };
@@ -862,11 +1082,8 @@ sub lines_as_str {
$comment =~ s!^(/\*\s+)!!
and $type .= $1;
}
- my $l= length($type);
- $line= _my_wrap($type, " " x $l, $cond . $comment);
- $line =~ s/\n(?!\z)/ \\\n/g;
- $line =~ s!\\\n\s+((?:\)\s*)*)\*/!$1*/!;
- $line= $self->line_up($line, "||", "define");
+
+ $line= $self->_wrap_and_line_up_cond($type, $cond . $comment);
}
$line_data->{line}= $line;
if ($post_process_content and $line_data->{type} eq "content") {
@@ -887,7 +1104,7 @@ sub _my_wrap {
local $Text::Wrap::unexpand= 0;
local $Text::Wrap::huge= "overflow";
local $Text::Wrap::columns= 78;
- unless (length $line) { return $head };
+ unless (length $line) { return $head }
$line= wrap $head, $rest, $line;
return $line;
}
@@ -1078,7 +1295,7 @@ sub _recurse_group_content_tree {
local $self->{rgct_ret}= \@ret;
local $self->{line_by_depth}= [];
- $self->__recurse_group_content_tree($node,@path);
+ $self->__recurse_group_content_tree($node, @path);
return \@ret;
}
@@ -1098,9 +1315,14 @@ sub __recurse_group_content_tree {
if (my $cb= $self->{post_process_content}) {
$cb->($self, $_, \@path) for @{ $node->{''} };
}
- push @$ret,
- map { HeaderLine->new(%$_, cond => [@path], level => $depth, start_line_num => 0+@$ret) }
- @{ $node->{''} };
+ push @$ret, map {
+ HeaderLine->new(
+ %$_,
+ cond => [@path],
+ level => $depth,
+ start_line_num => 0 + @$ret
+ )
+ } @{ $node->{''} };
}
my %skip;
@@ -1127,15 +1349,15 @@ sub __recurse_group_content_tree {
}
my $raw= "#${ind}if $expr\n";
my $hl= HeaderLine->new(
- type => "cond",
- sub_type => "#if",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ @path, [$expr] ],
- start_line_num => 0+@$ret,
+ type => "cond",
+ sub_type => "#if",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ @path, [$expr] ],
+ start_line_num => 0 + @$ret,
);
- $self->{line_by_depth}[$depth] = 0+@$ret;
+ $self->{line_by_depth}[$depth]= 0 + @$ret;
push @$ret, $hl;
$self->__recurse_group_content_tree($kid, @path, [$expr]);
if ($node->{$not}) {
@@ -1162,14 +1384,14 @@ sub __recurse_group_content_tree {
? -2
: -1;
my $end_line= HeaderLine->new(
- type => "cond",
- sub_type => "#endif",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ @path, $last_ret->{cond}[$idx] ],
- start_line_num => 0+@$ret,
- inner_lines => @$ret - $self->{line_by_depth}[$depth],
+ type => "cond",
+ sub_type => "#endif",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ @path, $last_ret->{cond}[$idx] ],
+ start_line_num => 0 + @$ret,
+ inner_lines => @$ret - $self->{line_by_depth}[$depth],
);
undef $self->{line_by_depth}[$depth];
push @$ret, $end_line;
@@ -1177,7 +1399,6 @@ sub __recurse_group_content_tree {
return $ret;
}
-
# this handles the specific case of an else clause, detecting
# when an elif can be constructed, may recursively call itself
# to deal with if/elif/elif/else chains. Calls back into
@@ -1197,7 +1418,7 @@ sub _handle_else {
# the inverse of $k1, which we will use later.
$not_k1= $self->tidy_cond("!($k1)");
}
- my $ret = $self->{rgct_ret};
+ my $ret= $self->{rgct_ret};
if (length($k1) and !defined($k2)) {
# only one child, no payload -> elsif $k1
@@ -1215,16 +1436,16 @@ sub _handle_else {
my $raw= "#${ind}elif $sub_expr\n";
push @{ $path[-1] }, $sub_expr;
my $hl= HeaderLine->new(
- type => "cond",
- sub_type => "#elif",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ map { [@$_] } @path ],
- start_line_num => 0+@$ret,
- inner_lines => @$ret - $self->{line_by_depth}[$depth],
+ type => "cond",
+ sub_type => "#elif",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ map { [@$_] } @path ],
+ start_line_num => 0 + @$ret,
+ inner_lines => @$ret - $self->{line_by_depth}[$depth],
);
- $self->{line_by_depth}[$depth] = 0+@$ret;
+ $self->{line_by_depth}[$depth]= 0 + @$ret;
push @$ret, $hl;
$self->__recurse_group_content_tree($kid, @path);
}
@@ -1235,16 +1456,16 @@ sub _handle_else {
push @{ $path[-1] }, $k1;
my $hl= HeaderLine->new(
- type => "cond",
- sub_type => "#elif",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ map { [@$_] } @path ],
- start_line_num => 0+@$ret,
- inner_lines => @$ret - $self->{line_by_depth}[$depth],
+ type => "cond",
+ sub_type => "#elif",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ map { [@$_] } @path ],
+ start_line_num => 0 + @$ret,
+ inner_lines => @$ret - $self->{line_by_depth}[$depth],
);
- $self->{line_by_depth}[$depth] = 0+@$ret;
+ $self->{line_by_depth}[$depth]= 0 + @$ret;
push @$ret, $hl;
$self->__recurse_group_content_tree($kid->{$k1}, @path);
$path[-1][-1]= $k2;
@@ -1254,16 +1475,16 @@ sub _handle_else {
# payload, 3+ children, or 2 which are not complementary -> else
my $raw= "#${ind}else\n";
my $hl= HeaderLine->new(
- type => "cond",
- sub_type => "#else",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ map { [@$_] } @path ],
- start_line_num => 0+@$ret,
- inner_lines => @$ret - $self->{line_by_depth}[$depth],
+ type => "cond",
+ sub_type => "#else",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ map { [@$_] } @path ],
+ start_line_num => 0 + @$ret,
+ inner_lines => @$ret - $self->{line_by_depth}[$depth],
);
- $self->{line_by_depth}[$depth] = 0+@$ret;
+ $self->{line_by_depth}[$depth]= 0 + @$ret;
push @$ret, $hl;
$self->__recurse_group_content_tree($kid, @path);
}