summaryrefslogtreecommitdiff
path: root/regen
diff options
context:
space:
mode:
Diffstat (limited to 'regen')
-rw-r--r--regen/HeaderParser.pm349
1 files changed, 285 insertions, 64 deletions
diff --git a/regen/HeaderParser.pm b/regen/HeaderParser.pm
index e9bcd79068..cc38f9ea9d 100644
--- a/regen/HeaderParser.pm
+++ b/regen/HeaderParser.pm
@@ -171,6 +171,9 @@ sub indent_chars {
# we use OO to store state, etc.
sub new {
my ($class, %args)= @_;
+ $args{add_commented_expr_after} //= 10;
+ $args{max_width} //= 78;
+ $args{min_break_width} //= 70;
return bless \%args,;
}
@@ -198,7 +201,7 @@ sub _tokenize_expr {
sub _count_ops {
my ($self, $term)= @_;
- my $count = 0;
+ my $count= 0;
$count++ while $term =~ m/(?: \|\| | \&\& | \? )/gx;
return $count;
}
@@ -219,7 +222,7 @@ sub _sort_terms {
$_ =~ m/PERL_IN_/ ? 1 : 0, # 2: PERL_IN_ labeled define
$_ =~ m/^\d/ ? 1 : 0, # 3: digit
$_ =~ m/DEBUGGING/ ? 1 : 0, # 4: DEBUGGING?
- $self->_count_ops($_), # 5: Number of ops (||, && and ternary)
+ $self->_count_ops($_), # 5: Number of ops (||, &&)
]
} @_;
my %seen;
@@ -449,7 +452,18 @@ sub _parse_expr_assoc {
push @$lhs, $rhs;
}
else {
- $lhs= [ $op_token->{op}, $lhs, $rhs ];
+ my @lt= ($lhs);
+ my @rt= ($rhs);
+
+ # if we have '( a && b ) && ( c && d)'
+ # turn it into 'a && b && c && d'
+ if (@$lhs > 2 && $lhs->[0] eq $op_token->{op}) {
+ (undef,@lt)= @$lhs; # throw away op.
+ }
+ if (@$rhs > 2 && $rhs->[0] eq $op_token->{op}) {
+ (undef,@rt)= @$rhs; # throw away op.
+ }
+ $lhs= [ $op_token->{op}, @lt, @rt ];
}
}
return $lhs;
@@ -662,7 +676,8 @@ sub parse_fh {
push @lines, $line_info;
if ($do_pop) {
- $line_info->{inner_lines} = $line_info->start_line_num - $cond_line[-1]->start_line_num;
+ $line_info->{inner_lines}=
+ $line_info->start_line_num - $cond_line[-1]->start_line_num;
pop @cond;
pop @cond_line;
}
@@ -799,14 +814,218 @@ sub _line_up_frag {
return $ret;
}
-sub line_up {
- my ($self, $line, @fragments)= @_;
+sub _fixup_indent {
+ my ($self, $line)= @_;
+ my @lines= split /\n/, $line;
+ if ($lines[0]=~/^(#\s*\w+(?:\s*\/\*)?\s)(\s+)/) {
+ my $first_left_len = length $1;
+
+ while (1) {
+ my $ok = 1;
+ for (@lines) {
+ /^.{$first_left_len} /
+ or do { $ok = 0; last; };
+ }
+ if ($ok) {
+ s/^(.{$first_left_len}) /$1/ for @lines;
+ } else {
+ last;
+ }
+ }
+ }
+
+ if ($lines[0]=~/^(#\s*\w+\s+)\(/) {
+ my $len = length($1);
+ for my $idx (1..$#lines) {
+ $lines[$idx]=~s/^([ ]{$len})(\s+)(\()/$1$3$2/;
+ }
+ }
+ my $ret= join "", map { "$_\n" } @lines;
+ return $ret;
+}
+
+# this is the workhorse for _break_line_at_op().
+sub __break_line_at_op {
+ my ($self, $limit, $line, $blank_prefix)= @_;
+ my @lines= ("");
+ while (length $line) {
+ my $part;
+ if ($line =~ s/^(.*?(?:\|\||&&)\s+)//) {
+ $part= $1;
+ }
+ else {
+ $part= $line;
+ $line= "";
+ }
+ if (length($lines[-1]) + length($part) < $limit) {
+ $lines[-1] .= $part;
+ }
+ else {
+ push @lines, $blank_prefix . $part;
+ }
+ }
+ return \@lines;
+}
+
+# Break a condition line into parts, while trying to keep the last
+# token on each line being an operator like || or && or ? or : We try
+# to keep each line at $limit characters, however, we also try to
+# ensure that each line has the same number of operators on it such
+# that across all the lines there are only two counts of operators (eg,
+# we either way each line to have two operators on it, or 0, or 1 or 0,
+# or 2 or 1, and so on.) If we cannot meet this requirement we reduce
+# the limit by 1 and try again, until we meet the objective, or the
+# limit ends up at 70 chars or less.
+sub _break_line_at_op {
+ my ($self, $limit, $line, $blank_prefix)= @_;
+ my $lines;
+ while (1) {
+ $lines= $self->__break_line_at_op($limit, $line, $blank_prefix);
+ my %op_counts;
+ foreach my $line_idx (0 .. $#$lines) {
+ my $line= $lines->[$line_idx];
+ my $count= 0;
+ $count++ while $line =~ /(\|\||&&|\?|:)/g;
+ $op_counts{$count}++;
+
+ }
+ if ($limit <= $self->{min_break_width} || keys(%op_counts) <= 2) {
+ last;
+ }
+ $limit--;
+ }
+
+ s/\s*\z/\n/ for @$lines;
+ return join "", @$lines;
+}
+
+sub _max { # cant use Scalar::Util so we roll our own
+ my $max= shift;
+ $max < $_ and $max= $_ for @_;
+ return $max;
+}
+
+# take a condition, split into $type and $rest
+# wrap it, and try to line up operators and defined() functions
+# that it contains. This is rather horrible code, but it does a
+# reasonable job applying the heuristics we need to lay our the
+# conditions in a reasonable way.
+sub _wrap_and_line_up_cond {
+ my ($self, $type, $rest)= @_;
+
+ my $limit= $self->{max_width};
+
+ # extract the expression part of the line, and normalize it, we do
+ # this here even though it might be duplicative as it is possible
+ # that the caller code has munged the expression in some way, and we
+ # might want to simplify the expression first. Eg:
+ # 'defined(FOO) && (defined(BAR) && defined(BAZ))' should be turned into
+ # 'defined(FOO) && defined(BAR) && defined(BAZ)' if possible.
+ my $rest_head= "";
+ my $rest_tail= "";
+ if ($rest =~ s!(if\s+)!!) {
+ $rest_head= $1;
+ }
+ if ($rest =~ s!(\s*/\*.*?\*/)\s*\z!! || $rest =~ s!(\s*\*/\s*)\z!!) {
+ $rest_tail= $1;
+ }
+ if ($rest) {
+ $rest= $self->tidy_cond($rest);
+ $rest= $rest_head . $rest . $rest_tail;
+ }
+
+ my $l= length($type);
+ my $line= $type;
+ $line .= $rest if length($rest);
+ my $blank_prefix= " " x $l;
+
+ # at this point we have a single line with the entire expression on it
+ # if it fits on one line we are done, we can return it right away.
+ if (length($line) <= $limit) {
+ $line =~ s/\s*\z/\n/;
+ return $line;
+ }
+ my $rest_copy= $rest;
+ my @fragments;
+ my $op_pat= qr/(?:\|\||&&|[?:])/;
+
+ # does the $rest contain a parenthesized group? If it does then
+ # there are a mixture of different ops being used, as if it was all
+ # the same opcode there would not be a parenthesized group.
+ # If it does then we handle it differently, and try to put the
+ # different parts of the expression on their own line.
+ if ($rest_copy =~ /$op_pat\s*\(/) {
+ my @parts;
+ while (length $rest_copy) {
+ if ($rest_copy =~ s/^(.*?$op_pat)(\s*!?\()/$2/) {
+ push @parts, $1;
+ } else {
+ #$rest_copy=~s/^\s+//;
+ push @parts, $rest_copy;
+ last;
+ }
+ }
+ $parts[0]= $type . $parts[0];
+ $parts[$_]= $blank_prefix . $parts[$_] for 1 .. $#parts;
+ foreach my $line (@parts) {
+ if (length($line) > $limit) {
+ $line= $self->_break_line_at_op($limit, $line, $blank_prefix);
+ }
+ }
+ s/\s*\z/\n/ for @parts;
+ $line= join "", @parts;
+ @fragments= ("defined", "||");
+ }
+ else {
+ # the expression consists of just one opcode type, so we can use
+ # simpler logic to break it apart with the objective of ensuring
+ # that the lines are similarly formed with trailing operators on
+ # each line but the last.
+ @fragments= ("||", "defined");
+ $line= $self->_break_line_at_op($limit, $type . $rest, $blank_prefix);
+ }
+ # try to line up the text on different lines. We stop after
+ # the first $fragment that modifies the text. The order
+ # of fragments we try is determined above based on the type
+ # of condition this is.
+ my $pre_line= $line;
foreach my $fragment (@fragments) {
$line= $self->_line_up_frag($line, $fragment);
- last if $line ne $_[1];
+ last if $line ne $pre_line;
}
- return $line;
+
+ # if we have lined up by "defined" in _line_up_frag()
+ # then we may have " || defined(...)" type expressions
+ # convert these to " || defined(...)" as it looks better.
+ $line =~ s/( )(\|\||&&|[()?:])([ ]{2,})(!?defined)/$3$2$1$4/g;
+ $line =~ s/(\|\||&&|[()?:])[ ]{10,}/$1 /g;
+
+ # add back the line continuations. this is all pretty inefficient,
+ # but it works nicely.
+ my @lines= split /\n/, $line;
+ my $last= pop @lines;
+ my $max_len= _max(map { length $_ } @lines);
+ $_= sprintf "%*s \\\n", -$max_len, $_ for @lines;
+ $last .= "\n";
+
+ $line= join "", @lines, $last;
+
+ # remove line continuations that are inside of a comment,
+ # we may have a variable number of lines of the expression
+ # or parts of lines of the expression in a comment, so
+ # we do this last.
+ $line =~ s!/\* (.*) \*/
+ !"/*"._strip_line_cont("$1")."*/"!xsge;
+
+ return $self->_fixup_indent($line);
+}
+
+#remove line continuations from the argument.
+sub _strip_line_cont {
+ my ($string)= @_;
+ $string =~ s/\s*\\\n/\n/g;
+ return $string;
}
# Takes an array of HeaderLines objects produced by parse_fh()
@@ -818,6 +1037,7 @@ sub lines_as_str {
$post_process_content ||= $self->{post_process_content};
my $filter= $self->{filter_content};
my $last_line= "";
+
#warn $self->dd($lines);
foreach my $line_data (@$lines) {
my $line= $line_data->{line};
@@ -828,7 +1048,7 @@ sub lines_as_str {
$line =~ s/^#(\s*)/#$ind/;
}
if ($line_data->{type} eq "cond") {
- my $add_commented_expr_after = 10;
+ my $add_commented_expr_after= $self->{add_commented_expr_after};
if ($line_data->{sub_type} =~ /#(?:else|endif)/) {
my $joined= join " && ",
map { "($_)" } @{ $line_data->{cond}[-1] };
@@ -862,11 +1082,8 @@ sub lines_as_str {
$comment =~ s!^(/\*\s+)!!
and $type .= $1;
}
- my $l= length($type);
- $line= _my_wrap($type, " " x $l, $cond . $comment);
- $line =~ s/\n(?!\z)/ \\\n/g;
- $line =~ s!\\\n\s+((?:\)\s*)*)\*/!$1*/!;
- $line= $self->line_up($line, "||", "define");
+
+ $line= $self->_wrap_and_line_up_cond($type, $cond . $comment);
}
$line_data->{line}= $line;
if ($post_process_content and $line_data->{type} eq "content") {
@@ -887,7 +1104,7 @@ sub _my_wrap {
local $Text::Wrap::unexpand= 0;
local $Text::Wrap::huge= "overflow";
local $Text::Wrap::columns= 78;
- unless (length $line) { return $head };
+ unless (length $line) { return $head }
$line= wrap $head, $rest, $line;
return $line;
}
@@ -1078,7 +1295,7 @@ sub _recurse_group_content_tree {
local $self->{rgct_ret}= \@ret;
local $self->{line_by_depth}= [];
- $self->__recurse_group_content_tree($node,@path);
+ $self->__recurse_group_content_tree($node, @path);
return \@ret;
}
@@ -1098,9 +1315,14 @@ sub __recurse_group_content_tree {
if (my $cb= $self->{post_process_content}) {
$cb->($self, $_, \@path) for @{ $node->{''} };
}
- push @$ret,
- map { HeaderLine->new(%$_, cond => [@path], level => $depth, start_line_num => 0+@$ret) }
- @{ $node->{''} };
+ push @$ret, map {
+ HeaderLine->new(
+ %$_,
+ cond => [@path],
+ level => $depth,
+ start_line_num => 0 + @$ret
+ )
+ } @{ $node->{''} };
}
my %skip;
@@ -1127,15 +1349,15 @@ sub __recurse_group_content_tree {
}
my $raw= "#${ind}if $expr\n";
my $hl= HeaderLine->new(
- type => "cond",
- sub_type => "#if",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ @path, [$expr] ],
- start_line_num => 0+@$ret,
+ type => "cond",
+ sub_type => "#if",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ @path, [$expr] ],
+ start_line_num => 0 + @$ret,
);
- $self->{line_by_depth}[$depth] = 0+@$ret;
+ $self->{line_by_depth}[$depth]= 0 + @$ret;
push @$ret, $hl;
$self->__recurse_group_content_tree($kid, @path, [$expr]);
if ($node->{$not}) {
@@ -1162,14 +1384,14 @@ sub __recurse_group_content_tree {
? -2
: -1;
my $end_line= HeaderLine->new(
- type => "cond",
- sub_type => "#endif",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ @path, $last_ret->{cond}[$idx] ],
- start_line_num => 0+@$ret,
- inner_lines => @$ret - $self->{line_by_depth}[$depth],
+ type => "cond",
+ sub_type => "#endif",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ @path, $last_ret->{cond}[$idx] ],
+ start_line_num => 0 + @$ret,
+ inner_lines => @$ret - $self->{line_by_depth}[$depth],
);
undef $self->{line_by_depth}[$depth];
push @$ret, $end_line;
@@ -1177,7 +1399,6 @@ sub __recurse_group_content_tree {
return $ret;
}
-
# this handles the specific case of an else clause, detecting
# when an elif can be constructed, may recursively call itself
# to deal with if/elif/elif/else chains. Calls back into
@@ -1197,7 +1418,7 @@ sub _handle_else {
# the inverse of $k1, which we will use later.
$not_k1= $self->tidy_cond("!($k1)");
}
- my $ret = $self->{rgct_ret};
+ my $ret= $self->{rgct_ret};
if (length($k1) and !defined($k2)) {
# only one child, no payload -> elsif $k1
@@ -1215,16 +1436,16 @@ sub _handle_else {
my $raw= "#${ind}elif $sub_expr\n";
push @{ $path[-1] }, $sub_expr;
my $hl= HeaderLine->new(
- type => "cond",
- sub_type => "#elif",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ map { [@$_] } @path ],
- start_line_num => 0+@$ret,
- inner_lines => @$ret - $self->{line_by_depth}[$depth],
+ type => "cond",
+ sub_type => "#elif",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ map { [@$_] } @path ],
+ start_line_num => 0 + @$ret,
+ inner_lines => @$ret - $self->{line_by_depth}[$depth],
);
- $self->{line_by_depth}[$depth] = 0+@$ret;
+ $self->{line_by_depth}[$depth]= 0 + @$ret;
push @$ret, $hl;
$self->__recurse_group_content_tree($kid, @path);
}
@@ -1235,16 +1456,16 @@ sub _handle_else {
push @{ $path[-1] }, $k1;
my $hl= HeaderLine->new(
- type => "cond",
- sub_type => "#elif",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ map { [@$_] } @path ],
- start_line_num => 0+@$ret,
- inner_lines => @$ret - $self->{line_by_depth}[$depth],
+ type => "cond",
+ sub_type => "#elif",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ map { [@$_] } @path ],
+ start_line_num => 0 + @$ret,
+ inner_lines => @$ret - $self->{line_by_depth}[$depth],
);
- $self->{line_by_depth}[$depth] = 0+@$ret;
+ $self->{line_by_depth}[$depth]= 0 + @$ret;
push @$ret, $hl;
$self->__recurse_group_content_tree($kid->{$k1}, @path);
$path[-1][-1]= $k2;
@@ -1254,16 +1475,16 @@ sub _handle_else {
# payload, 3+ children, or 2 which are not complementary -> else
my $raw= "#${ind}else\n";
my $hl= HeaderLine->new(
- type => "cond",
- sub_type => "#else",
- raw => $raw,
- line => $raw,
- level => $depth,
- cond => [ map { [@$_] } @path ],
- start_line_num => 0+@$ret,
- inner_lines => @$ret - $self->{line_by_depth}[$depth],
+ type => "cond",
+ sub_type => "#else",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ map { [@$_] } @path ],
+ start_line_num => 0 + @$ret,
+ inner_lines => @$ret - $self->{line_by_depth}[$depth],
);
- $self->{line_by_depth}[$depth] = 0+@$ret;
+ $self->{line_by_depth}[$depth]= 0 + @$ret;
push @$ret, $hl;
$self->__recurse_group_content_tree($kid, @path);
}