summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST3
-rw-r--r--regen.pl1
-rw-r--r--regen/HeaderParser.pm1553
-rw-r--r--regen/tidy_embed.pl61
-rw-r--r--t/porting/header_parser.t517
5 files changed, 2135 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
index 8dec989978..618694d914 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5555,6 +5555,7 @@ regen/embed.pl Produces {embed,embedvar,proto}.h
regen/embed_lib.pl Reads embed.fnc and regen/opcodes
regen/feature.pl Generates feature.pm
regen/genpacksizetables.pl Generate the size tables for pack/unpack
+regen/HeaderParser.pm Module used to parse header files
regen/keywords.pl Program to write keywords.h
regen/lib_cleanup.pl Generate lib/.gitignore from MANIFEST
regen/mg_vtable.pl generate mg_vtable.h
@@ -5572,6 +5573,7 @@ regen/regcharclass_multi_char_folds.pl Generate input for regcharclass.pl
regen/regcomp.pl Builder of regnodes.h
regen/regen_lib.pl Common file routines for generator scripts
regen/scope_types.pl Regenerate scope_types.h
+regen/tidy_embed.pl Program to clean up embed.fnc manually
regen/uconfig_h.pl generate uconfig.h (requires /bin/sh)
regen/unicode_constants.pl generate unicode_constants.h
regen/warnings.pl Program to write warnings.h and lib/warnings.pm
@@ -6118,6 +6120,7 @@ t/porting/extrefs.t Check perl headers don't make extern refs
t/porting/filenames.t Check the MANIFEST for filename portability.
t/porting/FindExt.t Test win32/FindExt.pm
t/porting/globvar.t Check that globvar.sym is sane
+t/porting/header_parser.t Check that regen/HeaderParser.pm works as expected
t/porting/known_pod_issues.dat Data file for porting/podcheck.t
t/porting/libperl.t Check libperl.a sanity
t/porting/maintainers.t Test that Porting/Maintainers.pl is up to date
diff --git a/regen.pl b/regen.pl
index 3cadf85c09..5ea59e0f5c 100644
--- a/regen.pl
+++ b/regen.pl
@@ -31,4 +31,5 @@ overload.pl
reentr.pl
regcomp.pl
scope_types.pl
+tidy_embed.pl
warnings.pl
diff --git a/regen/HeaderParser.pm b/regen/HeaderParser.pm
new file mode 100644
index 0000000000..8bc2c72548
--- /dev/null
+++ b/regen/HeaderParser.pm
@@ -0,0 +1,1553 @@
+package HeaderParser;
+use strict;
+use warnings;
+
+# these are required below in BEGIN statements, we cant have a
+# hard dependency on them as they might not be available when
+# we run as part of autodoc.pl
+#
+# use Data::Dumper;
+# use Storable qw(dclone);
+#
+use Carp qw(confess);
+use Text::Tabs qw(expand unexpand);
+use Text::Wrap qw(wrap);
+
+# The style of this file is determined by:
+#
+# perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \
+# -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs \
+# -fsb='#start-no-tidy' -fse='#end-no-tidy' -cpb -bfvt=2
+
+my (
+ %unop, # unary operators and their precedence
+ %binop, # binary operators and their precedence
+ %is_right_assoc, # operators which are right associative
+ %precedence, # precedence of all operators.
+ %associative, # associative operators
+ %commutative, # commutative operators
+ %cmpop, # comparison operators
+ $unop_pat, # pattern to match unary operators
+ $binop_pat, # pattern to match binary operators
+ %op_names, # map of op to description, used in error messages
+ $tokenize_pat # a pattern which can tokenize an expression
+);
+
+BEGIN {
+ # this is initialization for the operator precedence expression parser
+ # we use for handling preprocessor conditions.
+ %op_names= (
+ '==' => 'equality',
+ '!=' => 'inequality',
+ '<<' => 'bit-shift-left',
+ '>>' => 'bit-shift-right',
+ '+' => 'addition',
+ '-' => 'subtraction',
+ '*' => 'multiplication',
+ '/' => 'division',
+ '%' => 'modulo',
+ '||' => 'logical-or', # Lowest precedence
+ '&&' => 'logical-and',
+ '|' => 'binary-or',
+ '^' => 'binary-xor',
+ '&' => 'binary-and',
+ '<' => 'less-than', # split on spaces, all with equal precedence
+ '>' => 'greater-than',
+ '<=' => 'less-than-or-equal',
+ '>=' => 'greater-than-or-equal',
+ );
+ my @cmpop= (
+ '== !=', # listed in lowest to highest precedence
+ '< > <= >=', # split on spaces, all with equal precedence
+ );
+ my @binop= (
+ '||', # Lowest precedence
+ '&&',
+ '|',
+ '^',
+ '&',
+ @cmpop, # include the numerical comparison operators.
+ '<< >>',
+ '+ -',
+ '* / %', # highest prcedence operators.
+ );
+
+ my @unop= qw( ! ~ + - );
+ %unop= map { $_ => 1 } @unop;
+ %cmpop= map { $_ => 1 } map { split /\s+/, $_ } @cmpop;
+ %binop= map { $_ => 1 } map { split /\s+/, $_ } @binop;
+
+ my $make_pat= sub {
+ my $pat= join "|", sort { length($b) <=> length($a) || $a cmp $b }
+ map quotemeta($_), @_;
+ return qr/$pat/;
+ };
+ $unop_pat= $make_pat->(@unop);
+ foreach my $ix (0 .. $#binop) {
+ my $sym= $binop[$ix];
+ $precedence{$_}= (1 + $ix) * 10 for split /\s+/, $sym;
+ }
+ $is_right_assoc{"?"}= 1;
+ $is_right_assoc{":"}= 1;
+ $precedence{"?"}= 1;
+ $precedence{":"}= 0;
+
+ $associative{$_}++
+ for qw( || && + *); # we leave '==' out so we don't reorder terms
+ $commutative{$_}++ for qw( || && + *);
+
+ $binop_pat= $make_pat->(keys %precedence);
+ $tokenize_pat= qr/
+ ^(?:
+ (?<comment> \/\*.*?\*\/ )
+ | (?<ws> \s+ )
+ | (?<term>
+ (?<literal>
+ (?<define> defined\(\w+\) )
+ | (?<func> \w+\s*\(\s*\w+(?:\s*,\s*\w+)*\s*\) )
+ | (?<const> (?:0x[a-fA-F0-9]+|\d+[LU]*|'.') )
+ | (?<sym> \w+ )
+ )
+ | (?<op> $binop_pat | $unop_pat )
+ | (?<paren> [\(\)] )
+ )
+ )
+ /xs;
+}
+
+# dump the arguments with dump. wraps loading Dumper
+# as we are executed by miniperl where Dumper isnt available
+sub dd {
+ my $self= shift;
+ local $self->{orig_content};
+ my $ret= "(dump not available)";
+ eval {
+ require Data::Dumper;
+ $ret= Data::Dumper->new(\@_)->Indent(1)->Sortkeys(1)->Useqq(1)->Dump();
+ };
+ return $ret;
+}
+
+my $has_storable;
+
+# same story here, in miniperl we use slow perl code,
+# in real perl we can use Storable and speed things up.
+BEGIN { eval "use Storable; \$has_storable=1;" }
+
+# recursively copy an AoAoA...
+sub copy_aoa {
+ my ($aoa)= @_;
+ if ($has_storable) {
+ return Storable::dclone($aoa);
+ }
+ else {
+ return _copy_aoa($aoa);
+ }
+}
+
+sub _copy_aoa {
+ my ($thing)= @_;
+ if (ref $thing) {
+ return [ map { ref($_) ? _copy_aoa($_) : $_ } @$thing ];
+ }
+ else {
+ return $thing;
+ }
+}
+
+# return the number characters that should go in between a '#' and
+# the name of a c preprocessor directive. Returns 0 spaces for level
+# 0, and 2 * ($level - 1) + 1 spaces for the rest. (1,3,5, etc)
+# This might sound weird, but consider these are tab *stops* and the
+# '#' is included in the total. which means indents of 2, 4, 6 etc.
+sub indent_chars {
+ my ($self, $level)= @_;
+ my $ind= "";
+ $ind .= " " if $level;
+ $ind .= " " x ($level - 1) if $level > 1;
+ return $ind;
+}
+
+# we use OO to store state, etc.
+sub new {
+ my ($class, %args)= @_;
+ return bless \%args,;
+}
+
+# this parses the expression into an array of tokens
+# this is somewhat crude, we could do this incrementally
+# if we wanted and avoid the overhead. but it makes it
+# easier to debug the tokenizer.
+sub _tokenize_expr {
+ my ($self, $expr)= @_;
+ delete $self->{tokens};
+ delete $self->{parse_tree};
+ $self->{original_expr}= $expr;
+
+ my @tokens;
+ while ($expr =~ s/$tokenize_pat//xs) {
+ push @tokens, {%+} if defined $+{'term'};
+ }
+ $self->{tokens}= \@tokens;
+ warn $self->dd($self) if $self->{debug};
+ if (length $expr) {
+ confess "Failed to tokenize_expr: $expr\n";
+ }
+ return \@tokens;
+}
+
+# sort terms in an expression in a way that puts things
+# in a sensible order. Anything starting with PERL_IN_
+# should be on the left in alphabetical order. Digits
+# should be on the right (eg 0), and ties are resolved
+# by stripping non-alpha-numerc, thus removing underbar
+# parens, spaces, logical operators, etc, and then by
+# lc comparison of the result.
+sub _sort_terms {
+ my $self= shift;
+ my (@terms)= map {
+ [
+ $_, # 0: raw
+ lc($_) =~ s/[^a-zA-Z0-9]//gr, # 1: "_" stripped and caseless
+ $_ =~ m/PERL_IN_/ ? 1 : 0, # 2: PERL_IN_ labeled define
+ $_ =~ m/^\d/ ? 1 : 0, # 3: digit
+ $_ =~ m/DEBUGGING/ ? 1 : 0 # 4: DEBUGGING?
+ ]
+ } @_;
+ my %seen;
+ #start-no-tidy
+ @terms= map { $seen{ $_->[0] }++ ? () : $_->[0] }
+ sort {
+ $b->[2] <=> $a->[2] || # PERL_IN before others
+ $a->[3] <=> $b->[3] || # digits after others
+ $a->[4] <=> $b->[4] || # DEBUGGING after all else
+ $a->[1] cmp $b->[1] || # stripped caseless cmp
+ lc($a->[0]) cmp lc($b->[0]) || # caseless cmp
+ $a->[0] cmp $b->[0] || # exact cmp
+ 0
+ } @terms;
+ #end-no-tidy
+ return @terms;
+}
+
+# normalize a condition expression by parsing it and then stringifying
+# the parse tree.
+sub tidy_cond {
+ my ($self, $expr)= @_;
+ my $ret= $self->{_tidy_cond_cache}{$expr} //= do {
+ $self->parse_expr($expr) if defined $expr;
+ my $text= $self->_pt_as_str();
+ $text;
+ };
+ $self->{last_expr}= $ret;
+ return $ret;
+}
+
+# convert a parse tree structure to a string recursively.
+#
+# Parse trees are currently made up of arrays, with the count
+# of items in the object determining the type of op it represents.
+# 1 argument: literal value of some sort.
+# 2 arguments: unary operator: 0 slot is the operator, 1 is a parse tree
+# : ternary: 0 slot holds '?', 1 is an array holding three
+# parse trees: cond, true, false
+# 3 arguments or more: binary operator. 0 slot is the op. 1..n are parse trees
+# : note, this is multigate for commutative operators like
+# : "+", "*", "&&" and "||", so an expr
+# : like "A && B && !C" would be represented as:
+# : [ "&&", ["A"], ["B"], [ "!",["C"] ] ]
+#
+sub _pt_as_str {
+ my ($self, $node, $parent_op, $depth)= @_;
+
+ $node ||= $self->{parse_tree}
+ or confess "No parse tree?";
+ $depth ||= 0;
+ if (@$node == 1) {
+
+ # its a literal
+ return $node->[0];
+ }
+ elsif (@$node == 2) {
+
+ # is this a ternary or an unop?
+ if ($node->[0] eq '?') {
+
+ # ternary, the three "parts" are tucked away in
+ # an array in the payload slot
+ my $expr=
+ $self->_pt_as_str($node->[1][0], "?", $depth + 1) . " ? "
+ . $self->_pt_as_str($node->[1][1], "?", $depth + 1) . " : "
+ . $self->_pt_as_str($node->[1][2], "?", $depth + 1);
+
+ # stick parens on if this is a subexpression
+ $expr= "( " . $expr . " )" if $depth;
+ return $expr;
+ }
+ else {
+ if ( $node->[0] eq "!"
+ and @{ $node->[1] } == 2
+ and $node->[1][0] eq "!")
+ {
+ # normalize away !! in expressions.
+ return $self->_pt_as_str($node->[1][1], $parent_op, $depth);
+ }
+
+ # unop - the payload is a optree
+ return $node->[0]
+ . $self->_pt_as_str($node->[1], $node->[0], $depth + 1);
+ }
+ }
+
+ # if we get here we are dealing with a binary operator
+ # the nodes are not necessarily binary, as we "collect"
+ # the terms into a list, thus: A && B && C && D -> ['&&',A,B,C,D]
+ my ($op, @terms)= @$node;
+
+ # convert the terms to strings
+ @terms= map { $self->_pt_as_str($_, $op, $depth + 1) } @terms;
+
+ # sort them to normalize the subexpression
+ my $expr=
+ join " $op ", $associative{$op}
+ ? $self->_sort_terms(@terms)
+ : @terms;
+
+ # stick parens on if this is a subexpression
+ $expr= "( " . $expr . " )" if $depth and !$cmpop{$op};
+
+ # and we are done.
+ return $expr;
+}
+
+# Returns the precedence of an operator, returns 0 if there is no token
+# or the next token is not an op, or confesss if it encounters an op it does not
+# know.
+sub _precedence {
+ my $self= shift;
+ my $token= shift // return 0;
+
+ my $op= (ref $token ? $token->{op} : $token) // return 0;
+
+ return $precedence{$op} // confess "Unknown op '$op'";
+}
+
+# entry point into parsing the tokens, checks that we actually parsed everything
+# and didnt leave anything in the token stream (possible from a malformed expression)
+# Performs some minor textual cleanups using regexes, but then does a proper parse
+# of the expression.
+sub parse_expr {
+ my ($self, $expr)= @_;
+ if (defined $expr) {
+ $expr =~ s/\s*\\\n\s*/ /g;
+ $expr =~ s/defined\s+(\w+)/defined($1)/g;
+ $self->_tokenize_expr($expr);
+ }
+ my $ret= $self->_parse_expr();
+ if (@{ $self->{tokens} }) {
+
+ # if all was well with parsing we should not get here.
+ confess "Unparsed tokens: ", $self->dd($self->{tokens});
+ }
+ $self->{parse_tree}= $ret;
+ return $ret;
+}
+
+# this is just a wrapper around _parse_expr_assoc() which handles
+# parsing an arbitrary expression.
+sub _parse_expr {
+ my ($self)= @_;
+ return $self->_parse_expr_assoc($self->_parse_expr_primary(), 1);
+}
+
+# This handles extracting from the token stream
+# - simple literals
+# - unops (assumed to be right associative)
+# - parens (which reset the precedence acceptable to the parser)
+#
+sub _parse_expr_primary {
+ my ($self)= @_;
+ my $tokens= $self->{tokens}
+ or confess "No tokens in _parse_expr_primary?";
+ my $first= $tokens->[0]
+ or confess "No primary?";
+ if ($first->{paren} and $first->{paren} eq "(") {
+ shift @$tokens;
+ my $expr= $self->_parse_expr();
+ $first= $tokens->[0];
+ if (!$first->{paren} or $first->{paren} ne ")") {
+ confess "Expecting close paren", $self->dd($tokens);
+ }
+ shift @$tokens;
+ return $expr;
+ }
+ elsif ($first->{op} and $unop{ $first->{op} }) {
+ my $op_token= shift @$tokens;
+ return [ $op_token->{op}, $self->_parse_expr_primary() ];
+ }
+ elsif (defined $first->{literal}) {
+ shift @$tokens;
+ return [ $first->{literal} ];
+ }
+ else {
+ die sprintf
+ "Unexpected token '%s', expecting literal, unary, or expression.\n",
+ $first->{term};
+ }
+}
+
+# This is the heart of the expression parser. It uses
+# a pair of nested loops to avoid excessive recursion during parsing,
+# which should be a bit faster than other strategies. It only should
+# recurse when the precedence level changes.
+sub _parse_expr_assoc {
+ my ($self, $lhs, $min_precedence)= @_;
+ my $tokens= $self->{tokens}
+ or confess "No tokens in _parse_expr_assoc";
+ my $la= $tokens->[0]; # lookahead
+ my $la_pr= $self->_precedence($la); # lookahead precedence
+ while ($la && $la_pr >= $min_precedence) {
+ my $op_token= shift @$tokens;
+ my $op_pr= $la_pr; # op precedence
+ if ($op_token->{op} eq "?") {
+ my $mid= $self->_parse_expr();
+ if (@$tokens and $tokens->[0]{op} and $tokens->[0]{op} eq ":") {
+ shift @$tokens;
+ my $tail= $self->_parse_expr();
+ return [ '?', [ $lhs, $mid, $tail ] ];
+ }
+ confess "Panic: expecting ':'", $self->dd($tokens);
+ }
+ my $rhs;
+ eval { $rhs= $self->_parse_expr_primary(); }
+ or die "Error in $op_names{$op_token->{op}} expression: $@";
+ $la= $tokens->[0];
+ $la_pr= $self->_precedence($la);
+ while (
+ $la_pr > $op_pr || # any and larger
+ ( $is_right_assoc{ $op_token->{op} }
+ and $la_pr == $op_pr) # right and equal
+ ) {
+ my $new_precedence= $op_pr + ($la_pr > $op_pr ? 1 : 0);
+ $rhs= $self->_parse_expr_assoc($rhs, $new_precedence);
+ $la= $tokens->[0];
+ $la_pr= $self->_precedence($la);
+ }
+ if ( @$lhs >= 3
+ && $lhs->[0] eq $op_token->{op}
+ && $commutative{ $op_token->{op} })
+ {
+ push @$lhs, $rhs;
+ }
+ else {
+ $lhs= [ $op_token->{op}, $lhs, $rhs ];
+ }
+ }
+ return $lhs;
+}
+
+#entry point for normalizing and if/elif statements
+#returns the line and condition in normalized form.
+sub normalize_if_elif {
+ my ($self, $line, $line_info)= @_;
+ if (my $dat= $self->{cache_normalize_if_elif}{$line}) {
+ return $dat->{line}, $dat->{cond};
+ }
+ my ($cond);
+ eval {
+ ($line, $cond)= $self->_normalize_if_elif($line);
+ 1;
+ } or die sprintf "Error at line %d\nLine %d: %s\n%s",
+ ($line_info->start_line_num()) x 2, $line, $@;
+ $self->{cache_normalize_if_elif}{$line}= { line => $line, cond => $cond };
+ return ($line, $cond);
+}
+
+#guts of the normalize_if_elif() - cleans up the line, extracts
+#the condition, and then tidies it with tidy_cond().
+sub _normalize_if_elif {
+ my ($self, $line)= @_;
+ my $nl= "";
+ $nl= $1 if $line =~ s/(\n+)\z//;
+ $line =~ s/\s+\z//;
+ my @comment;
+ push @comment, $1 while $line =~ s!\s*(/\*.*?\*/)\z!!;
+ $line =~ s/defined\s*\(\s*(\w+)\s*\)/defined($1)/g;
+ $line =~ s/!\s+defined/!defined/g;
+
+ if ($line =~ /^#((?:el)?if)(n?)def\s+(\w+)/) {
+ my $if= $1;
+ my $not= $2 ? "!" : "";
+ $line= "#$if ${not}defined($3)";
+ }
+ $line =~ s/#((?:el)?if)\s+//
+ or confess "Bad cond: $line";
+ my $if= $1;
+ $line =~ s/!\s+/!/g;
+
+ my $old_cond= $line;
+ my $cond= $self->tidy_cond($old_cond);
+
+ warn "cond - $old_cond\ncond + $cond\n"
+ if $old_cond ne $cond and $self->{debug};
+
+ $line= "#$if $cond";
+ $line .= " " . join " ", reverse @comment if @comment;
+
+ $line .= $nl;
+ return ($line, $cond);
+}
+
+# parses a text buffer as though it was a file on disk
+# calls parse_fh()
+sub parse_text {
+ my ($self, $text)= @_;
+ open my $fh, "<", \$text
+ or die "Failed to open buffer for read: $!";
+ return $self->parse_fh($fh);
+}
+
+# takes a readable filehandle and parses whatever contents is
+# returned by reading it. Returns an array of HeaderLine objects.
+# this is the main routing for parsing a header file.
+sub parse_fh {
+ my ($self, $fh)= @_;
+ my @lines;
+ my @cond;
+ my @cond_line;
+ my $last_cond;
+ my $cb= $self->{pre_process_content};
+ $self->{orig_content}= "";
+ my $line_num= 1;
+
+ while (defined(my $line= readline($fh))) {
+ my $start_line_num= $line_num++;
+ $self->{orig_content} .= $line;
+ while ($line =~ /\\\n\z/ or $line =~ m</\*(?:(?!\*/).)*\s*\z>s) {
+ defined(my $read_line= readline($fh))
+ or last;
+ $self->{orig_content} .= $read_line;
+ $line_num++;
+ $line .= $read_line;
+ }
+ while ($line =~ m!/\*(.*?)(\*/|\z)!gs) {
+ my ($inner, $tail)= ($1, $2);
+ if ($tail ne "*/") {
+ confess
+ "Unterminated comment starting at line $start_line_num\n";
+ }
+ elsif ($inner =~ m!/\*!) {
+ confess
+ "Nested/broken comment starting at line $start_line_num\n";
+ }
+ }
+
+ my $raw= $line;
+ my $type= "content";
+ my $sub_type= "text";
+ my $level= @cond;
+ my $do_pop= 0;
+ my $flat= $line;
+ $flat =~ s/\s*\\\n\s*/ /g;
+ $flat =~ s!/\*.*?\*/! !gs;
+ $flat =~ s/\s+/ /g;
+ $flat =~ s/\s+\z//;
+ $flat =~ s/^\s*#\s*/#/g;
+
+ my $line_info=
+ HeaderLine->new(start_line_num => $start_line_num, raw => $raw);
+ my $do_cond_line;
+ if ($flat =~ /^#/) {
+ if ($flat =~ m/^(#(?:el)?if)(n?)def\s+(\w+)/) {
+ my $if= $1;
+ my $not= $2 ? "!" : "";
+ my $sym= $3;
+ $flat =~
+ s/^(#(?:el)?if)(n?)def\s+(\w+)/$if ${not}defined($sym)/;
+ }
+ my $cond; # used in various expressions below
+ if ($flat =~ /^#endif/) {
+ if (!@cond) {
+ confess "Not expecting $flat";
+ }
+ $do_pop= 1;
+ $level--;
+ $type= "cond";
+ $sub_type= "#endif";
+ }
+ elsif ($flat =~ /^#if\b/) {
+ ($flat, $cond)= $self->normalize_if_elif($flat, $line_info);
+ push @cond, [$cond];
+ push @cond_line, $line_info;
+ $type= "cond";
+ $sub_type= "#if";
+ }
+ elsif ($flat =~ /^#elif\b/) {
+ if (!@cond) {
+ confess "No if for $flat";
+ }
+ $level--;
+ ($flat, $cond)= $self->normalize_if_elif($flat, $line_info);
+ $cond[-1][-1]= $self->tidy_cond("!($cond[-1][-1])");
+ $cond_line[-1]= $line_info;
+ push @{ $cond[-1] }, $cond;
+ $type= "cond";
+ $sub_type= "#elif";
+ }
+ elsif ($flat =~ /^#else\b/) {
+ if (!@cond) {
+ confess "No if for $flat";
+ }
+ $level--;
+ $cond[-1][-1]= $self->tidy_cond("!($cond[-1][-1])");
+ $cond_line[-1]= $line_info;
+ $type= "cond";
+ $sub_type= "#else";
+ }
+ elsif ($flat =~ /#undef/) {
+ $type= "content";
+ $sub_type= "#undef";
+ }
+ elsif ($flat =~ /#pragma\b/) {
+ $type= "content";
+ $sub_type= "#pragma";
+ }
+ elsif ($flat =~ /#include\b/) {
+ $type= "content";
+ $sub_type= "#include";
+ }
+ elsif ($flat =~ /#define\b/) {
+ $type= "content";
+ $sub_type= "#define";
+ }
+ elsif ($flat =~ /#error\b/) {
+ $type= "content";
+ $sub_type= "#error";
+ }
+ else {
+ confess "Do not know what to do with $line";
+ }
+ if ($type eq "cond") {
+
+ # normalize conditional lines
+ $line= $flat;
+ $last_cond= $line_info;
+ }
+ }
+ $line =~ s/\n?\z/\n/;
+
+ %$line_info= (
+ cond => copy_aoa(\@cond),
+ type => $type,
+ sub_type => $sub_type,
+ raw => $raw,
+ flat => $flat,
+ line => $line,
+ level => $level,
+ start_line_num => $start_line_num,
+ n_lines => $line_num - $start_line_num,
+ );
+
+ push @lines, $line_info;
+ if ($do_pop) {
+ pop @cond;
+ pop @cond_line;
+ }
+ if ($type eq "content" and $cb) {
+ $cb->($self, $lines[-1]);
+ }
+ }
+ if (@cond_line) {
+ my $msg= "Unterminated conditional block starting line "
+ . $cond_line[-1]->start_line_num();
+ $msg .=
+ " with last conditional operation at line "
+ . $last_cond->start_line_num()
+ if $cond_line[-1] != $last_cond;
+ confess $msg;
+ }
+ $self->{lines}= \@lines;
+ return \@lines;
+}
+
+# returns the last lines we parsed.
+sub lines { $_[0]->{lines} }
+
+# assuming a line looks like an embed.fnc entry parse it
+# and normalize it, and create and EmbedLine object from it.
+sub tidy_embed_fnc_entry {
+ my ($self, $line_data)= @_;
+ my $line= $line_data->{line};
+ return $line if $line =~ /^\s*:/;
+ return $line unless $line_data->{type} eq "content";
+ return $line unless $line =~ /\|/;
+
+ $line =~ s/\s*\\\n/ /g;
+ $line =~ s/\s+\z//;
+ ($line)= expand($line);
+ my ($flags, $ret, $name, @args)= split /\s*\|\s*/, $line;
+ for ($ret, @args) {
+ s/(\w)\*/$1 */g;
+ s/\*\s+(\w)/*$1/g;
+ s/\*const/* const/g;
+ }
+ my $head= sprintf "%-8s|%-7s", $flags, $ret;
+ $head .= sprintf "|%*s", -(31 - length($head)), $name;
+ if (@args and length($head) > 32) {
+ $head .= "\\\n";
+ $head .= " " x 32;
+ }
+ foreach my $ix (0 .. $#args) {
+ my $arg= $args[$ix];
+ $head .= "|$arg";
+ $head .= "\\\n" . (" " x 32) if $ix < $#args;
+ }
+ $line= $head . "\n";
+
+ if ($line =~ /\\\n/) {
+ my @lines= split /\s*\\\n/, $line;
+ my $len= length($lines[0]);
+ $len < length($_) and $len= length($_) for @lines;
+ $len= int(($len + 7) / 8) * 8;
+ $len= 72 if $len < 72;
+ $line= join("\\\n",
+ (map { sprintf "%*s", -$len, $_ } @lines[ 0 .. $#lines - 1 ]),
+ $lines[-1]);
+ }
+ ($line)= unexpand($line);
+
+ $line_data->{embed}= EmbedLine->new(
+ flags => $flags,
+ return_type => $ret,
+ name => $name,
+ args => \@args,
+ );
+ $line =~ s/\s+\z/\n/;
+ $line_data->{line}= $line;
+ return $line;
+}
+
+# line up the text in a multiline string by a given $fragment
+# of text, inserting whitespace in front or behind the $fragment
+# to get the text to line up. Returns the text. This is wrapped
+# by line_up() and is used to wrap long conditions and comments
+# in the generated code.
+sub _line_up_frag {
+ my ($self, $str, $fragment)= @_;
+ die "has tabs?!" if $str =~ /\t/;
+ my @lines= split /\n/, $str;
+ my $changed= 1;
+ while ($changed) {
+ $changed= 0;
+ foreach my $ix (0 .. $#lines - 1) {
+ my $f_index= 0;
+ my $n_index= 0;
+ while (1) {
+ $f_index= index($lines[$ix], $fragment, $f_index);
+ $n_index= index($lines[ $ix + 1 ], $fragment, $n_index);
+ if ($f_index == -1 or $n_index == -1) {
+ last;
+ }
+ if ($f_index < $n_index) {
+ my $f_idx= $f_index;
+ $f_idx-- while substr($lines[$ix], $f_idx, 1) ne " ";
+ substr($lines[$ix], $f_idx, 0, " " x ($n_index - $f_index));
+ $changed++;
+ last;
+ }
+ elsif ($n_index < $f_index) {
+ my $n_idx= $n_index;
+ $n_idx-- while substr($lines[ $ix + 1 ], $n_idx, 1) ne " ";
+ substr($lines[ $ix + 1 ],
+ $n_idx, 0, " " x ($f_index - $n_index));
+ $changed++;
+ last;
+ }
+ $f_index++;
+ $n_index++;
+ }
+ }
+ }
+ my $ret= join "", map { "$_\n" } @lines;
+ return $ret;
+}
+
+sub line_up {
+ my ($self, $line, @fragments)= @_;
+
+ foreach my $fragment (@fragments) {
+ $line= $self->_line_up_frag($line, $fragment);
+ last if $line ne $_[1];
+ }
+ return $line;
+}
+
+# Takes an array of HeaderLines objects produced by parse_fh()
+# or by group_content(), and turn it into a string.
+sub lines_as_str {
+ my ($self, $lines, $post_process_content)= @_;
+ $lines ||= $self->{lines};
+ my $ret;
+ $post_process_content ||= $self->{post_process_content};
+ my $filter= $self->{filter_content};
+ my $last_line= "";
+ foreach my $line_data (@$lines) {
+ my $line= $line_data->{line};
+ if ($line_data->{type} ne "content" or $line_data->{sub_type} ne "text")
+ {
+ my $level= $line_data->{level};
+ my $ind= $self->indent_chars($level);
+ $line =~ s/^#(\s*)/#$ind/;
+ }
+ if ($line_data->{type} eq "cond") {
+ if ($line_data->{sub_type} =~ /#(?:else|endif)/) {
+ my $joined= join " && ",
+ map { "($_)" } @{ $line_data->{cond}[-1] };
+ my $cond_txt= $self->tidy_cond($joined);
+ $cond_txt= "if $cond_txt" if $line_data->{sub_type} eq "#else";
+ $line =~ s!\s*\z! /* $cond_txt */\n!;
+ }
+ elsif ($line_data->{sub_type} eq "#elif") {
+ my $last_frame= $line_data->{cond}[-1];
+ my $joined= join " && ",
+ map { "($_)" } @$last_frame[ 0 .. ($#$last_frame - 1) ];
+ my $cond_txt= $self->tidy_cond($joined);
+ $line =~ s!\s*\z! /* && $cond_txt */\n!;
+ }
+ }
+ $line =~ s/\s+\z/\n/;
+ if ($last_line eq "\n" and $line eq "\n") {
+ next;
+ }
+ $last_line= $line;
+ if ($line_data->{type} eq "cond") {
+ $line =~ m!(^\s*#\s*\w+\s+)([^/].*?\s*)?(/\*.*)?\n\z!
+ or die "Failed to split cond line: $line";
+ my ($type, $cond, $comment)= ($1, $2, $3);
+ $comment //= "";
+ $cond //= "";
+ my $new_line;
+ if (!length($cond) and $comment) {
+ $comment =~ s!^(/\*\s+)!!
+ and $type .= $1;
+ }
+ my $l= length($type);
+ $line= _my_wrap($type, " " x $l, $cond . $comment);
+ $line =~ s/\n(?!\z)/ \\\n/g;
+ $line =~ s!\\\n\s+((?:\)\s*)*)\*/!$1*/!;
+ $line= $self->line_up($line, "||", "define");
+ }
+ $line_data->{line}= $line;
+ if ($post_process_content and $line_data->{type} eq "content") {
+ $post_process_content->($self, $line_data);
+ }
+ if ($filter and $line_data->{type} eq "content") {
+ $filter->($self, $line_data) or next;
+ }
+ $ret .= $line_data->{line};
+ }
+ return $ret;
+}
+
+# Text::Wrap::wrap has an odd api, so hide it behind a wrapper
+# sub which sets things up properly.
+sub _my_wrap {
+ my ($head, $rest, $line)= @_;
+ local $Text::Wrap::unexpand= 0;
+ local $Text::Wrap::huge= "overflow";
+ local $Text::Wrap::columns= 78;
+ $line= wrap $head, $rest, $line;
+ return $line;
+}
+
+# recursively extract the && expressions from a parse tree,
+# returning the result as strings.
+# if $node is not a '&&' op then it returns $node as a string,
+# otherwise it returns the string form of the arguments to the
+# '&&' op, recursively flattening any '&&' nodes that it might
+# contain.
+sub _and_clauses {
+ my ($self, $node)= @_;
+
+ my @ret;
+ if (@$node < 3 or $node->[0] ne "&&") {
+ return $self->_pt_as_str($node);
+ }
+ foreach my $idx (1 .. $#$node) {
+ push @ret, $self->_and_clauses($node->[$idx]);
+ }
+ return @ret;
+}
+
+# recursively walk the a parse tree, and return the literal
+# terms it contains, ignoring any operators in the optree.
+sub _terms {
+ my ($self, $node)= @_;
+ if (@$node == 1) {
+ return $self->_pt_as_str($node);
+ }
+ my @ret;
+ if (@$node == 2) {
+ if ($node->[0] eq "?") {
+ push @ret, map { $self->_terms($_) } @{ $node->[1] };
+ }
+ else {
+ push @ret, $self->_terms($node->[1]);
+ }
+ }
+ else {
+ foreach my $i (1 .. $#$node) {
+ push @ret, $self->_terms($node->[$i]);
+ }
+ }
+ return @ret;
+}
+
+# takes a HeaderLine "cond" AoA and flattens it into
+# a single expression, and then extracts all the and clauses
+# it contains. Thus [['defined(A)'],['defined(B)']] and
+# [['defined(A) && defined(B)']], end up as ['defined(A)','defined(B)']
+sub _flatten_cond {
+ my ($self, $cond_ary)= @_;
+
+ my $expr= join " && ", map {
+ map { "($_)" }
+ @$_
+ } @$cond_ary;
+ return [] unless $expr;
+ my $tree= $self->parse_expr($expr);
+ my %seen;
+ my @and_clause= grep { !$seen{$_}++ } $self->_and_clauses($tree);
+ return \@and_clause;
+}
+
+# Find the best path into a tree of conditions, such that
+# we reuse the maximum number of existing branches. Returning
+# two arrays, the first contain the parts of $cond_array that
+# make up the best path, in the best path order, and a second array
+# with the remaining items in the initial order they were provided.
+# Thus if we have previously stored only the path "A", "B", "C"
+# into the tree, and want to find the best path for
+# ["E","D","C","B","A"] we should return: ["A","B","C"],["E","D"],
+#
+# This used to reduce the number of conditions in the grouped content,
+# and is especially helpful with dealing with DEBUGGING related
+# functionality. It is coupled with careful control over the order
+# that we add paths and conditions to the tree.
+sub _best_path {
+ my ($self, $tree_node, $cond_array, @path)= @_;
+ my $best= \@path;
+ my $rest= $cond_array;
+ foreach my $cond (@$cond_array) {
+ if ($tree_node->{$cond}) {
+ my ($new_best, $new_rest)=
+ $self->_best_path($tree_node->{$cond},
+ [ grep $_ ne $cond, @$cond_array ],
+ @path, $cond);
+ if (@$new_best > @$best) {
+ ($best, $rest)= ($new_best, $new_rest);
+ }
+ }
+ }
+ if (@$best == @path) {
+ foreach my $cond (@$cond_array) {
+ my $not_cond= $self->tidy_cond("!($cond)");
+ if ($tree_node->{$not_cond}) {
+ $best= [ @path, $cond ];
+ $rest= [ grep $_ ne $cond, @$cond_array ];
+ last;
+ }
+ }
+ }
+ return ($best, $rest);
+}
+
+# This builds a group content tree from a set of lines. each content line in
+# the original file is added to the file based on the conditions that apply to
+# the content.
+#
+# The tree is made up of nested HoH's with keys in the HoH being normalized
+# clauses from the {cond} data in the HeaderLine objects.
+#
+# Care is taken to minimize the number of pathways and to reorder clauses to
+# reuse existing pathways and minimize the total number of conditions in the
+# file.
+#
+# The '' key of a hash contains an array of the lines that are part of the
+# condition that lead to that key. Thus lines with no conditions are in
+# @{$tree{''}}, lines with the condition "defined(A) && defined(B)" would be
+# in $tree{"defined(A)"}{"defined(B)"}{""}.
+#
+# The result of this sub is normally passed into _recurse_group_content_tree()
+# which converts it back into a set of HeaderLine objects.
+#
+sub _build_group_content_tree {
+ my ($self, $lines)= @_;
+ $lines ||= $self->{lines};
+ my $filter= $self->{filter_content};
+ my %seen_normal;
+ foreach my $line_data (@$lines) {
+ next if $line_data->{type} ne "content";
+ next if $filter and !$filter->($self, $line_data);
+ my $cond_frames= $line_data->{cond};
+ my $cond_frame= $self->_flatten_cond($cond_frames);
+ my $flat_merged= join " && ", map "($_)", @$cond_frame;
+ my $normalized;
+ if (@$cond_frame) {
+ $normalized= $self->tidy_cond($flat_merged);
+ }
+ else {
+ $normalized= $flat_merged; # empty string
+ }
+ push @{ $seen_normal{$normalized} }, $line_data;
+ }
+ my @debugging;
+ my @non_debugging;
+ foreach my $key (keys %seen_normal) {
+ if ($key =~ /DEBUGGING/) {
+ push @debugging, $key;
+ }
+ else {
+ push @non_debugging, $key;
+ }
+ }
+ @non_debugging=
+ sort { length($a) <=> length($b) || $a cmp $b } @non_debugging;
+ @debugging= sort { length($b) <=> length($a) || $a cmp $b } @debugging;
+ my %tree;
+ foreach my $normal_expr (@non_debugging, @debugging) {
+ my $all_line_data= $seen_normal{$normal_expr};
+
+ my $cond_frame=
+ (length $normal_expr)
+ ? $self->_flatten_cond([ [$normal_expr] ])
+ : [];
+ @$cond_frame= $self->_sort_terms(@$cond_frame);
+ my $node= \%tree;
+ my ($best, $rest)= $self->_best_path($node, $cond_frame);
+ die sprintf "Woah: %d %d %d", 0 + @$best, 0 + @$rest, 0 + @$cond_frame
+ unless @$best + @$rest == @$cond_frame;
+
+ foreach my $cond (@$best, @$rest) {
+ $node= $node->{$cond} ||= {};
+ }
+ push @{ $node->{''} }, @$all_line_data;
+ }
+
+ warn $self->dd(\%tree) if $self->{debug};
+ $self->{tree}= \%tree;
+ return \%tree;
+}
+
+# convert a tree of conditions constructed by _build_group_content_tree()
+# and turn it into a set of HeaderLines that represents it. Performs the
+# appropriate sets required to reconstitute an if/elif/elif/else sequence
+# by calling _handle_else().
+sub _recurse_group_content_tree {
+ my ($self, $node, @path)= @_;
+ my $depth= 0 + @path;
+ my $ind= $self->indent_chars($depth);
+ my @ret;
+ if ($node->{''}) {
+ if (my $cb= $self->{post_process_grouped_content}) {
+ $cb->($self, $node->{''}, \@path);
+ }
+ if (my $cb= $self->{post_process_content}) {
+ $cb->($self, $_, \@path) for @{ $node->{''} };
+ }
+ push @ret,
+ map { HeaderLine->new(%$_, cond => [@path], level => $depth) }
+ @{ $node->{''} };
+ }
+
+ my %skip;
+ foreach my $expr (
+ map { $_->[0] }
+ sort { $a->[1] cmp $b->[1] || $a->[0] cmp $b->[0] }
+ map { [ $_, lc($_) =~ s/[^A-Za-z0-9]+//gr ] } keys %$node
+ ) {
+ next unless length $expr; # ignore payload
+ my $not= $self->tidy_cond("!($expr)");
+ if ($skip{$expr} or ($not !~ /^!/ and $node->{$not})) {
+ next;
+ }
+ my $kid= $node->{$expr};
+ while (!$node->{$not} and keys(%$kid) == 1 and !$kid->{''}) {
+ my ($kid_key)= keys(%$kid);
+ $expr= $self->tidy_cond("($expr) && ($kid_key)");
+ $kid= $kid->{$kid_key};
+ my $new_not= $self->tidy_cond("!($expr)");
+ if ($node->{$new_not}) {
+ $not= $new_not;
+ $skip{$not}++;
+ }
+ }
+ my $raw= "#${ind}if $expr\n";
+ my $hl= HeaderLine->new(
+ type => "cond",
+ sub_type => "#if",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ @path, [$expr] ]);
+ my $ar= $self->_recurse_group_content_tree($kid, @path, [$expr]);
+ push @ret, $hl, @$ar;
+ if ($node->{$not}) {
+ $skip{$not}++;
+ my $ar=
+ $self->_handle_else($not, $node->{$not}, $ind, $depth, @path,
+ [$not]);
+ push @ret, @$ar;
+ }
+
+ # and finally the #endif
+
+ $raw= "#${ind}endif\n";
+
+ # we need to extract the condition information from the last line in @ret,
+ # as we don't know which condition we are ending here. It could be an elsif
+ # from deep in the parse tree for instance.
+ # So we need to extract the last frame from the cond structure in the last
+ # line-info in @ret.
+ # BUT if this last line is itself an #endif, then we need to take the second
+ # to last line instead, as the endif would have "popped" that frame off the
+ # condition stack.
+ my $last_ret= $ret[-1];
+ my $idx=
+ ($last_ret->{type} eq "cond" && $last_ret->{sub_type} eq "#endif")
+ ? -2
+ : -1;
+ my $end_line= HeaderLine->new(
+ type => "cond",
+ sub_type => "#endif",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ @path, $last_ret->{cond}[$idx] ]);
+ push @ret, $end_line;
+ }
+ return \@ret;
+}
+
+# this handles the specific case of an else clause, detecting
+# when an elif can be constructed, may recursively call itself
+# to deal with if/elif/elif/else chains. Calls back into
+# _recurse_group_content_tree().
+sub _handle_else {
+ my ($self, $not, $kid, $ind, $depth, @path)= @_;
+
+ # extract the first 3 keys - from this we can detect
+ # which of the three scenarios we have to handle.
+ my ($k1, $k2, $k3)=
+ sort { length($a) <=> length($b) || $a cmp $b } keys %$kid;
+ my $not_k1;
+ if (length($k1) and defined($k2) and !defined($k3)) {
+
+ # if we do not have a payload (length($k1)) and we have exactly
+ # two keys (defined($k2) and !defined($k3)) we need to compute
+ # the inverse of $k1, which we will use later.
+ $not_k1= $self->tidy_cond("!($k1)");
+ }
+ my @ret;
+ if (length($k1) and !defined($k2)) {
+
+ # only one child, no payload -> elsif $k1
+ my $sub_expr;
+ do {
+ $sub_expr=
+ !$sub_expr
+ ? $k1
+ : $self->tidy_cond("($sub_expr) && ($k1)");
+ $kid= $kid->{$k1};
+ ($k1, $k2)=
+ sort { length($a) <=> length($b) || $a cmp $b } keys %$kid;
+ } while length($k1) and !defined $k2;
+
+ my $raw= "#${ind}elif $sub_expr\n";
+ push @{ $path[-1] }, $sub_expr;
+ my $hl= HeaderLine->new(
+ type => "cond",
+ sub_type => "#elif",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ map { [@$_] } @path ]);
+ my $ar= $self->_recurse_group_content_tree($kid, @path);
+ push @ret, $hl, @$ar;
+ }
+ elsif (defined($not_k1) and $not_k1 eq $k2) {
+
+ # two children which are complementary, no payload -> elif $k1 else..
+ my $raw= "#${ind}elif $k1\n";
+
+ push @{ $path[-1] }, $k1;
+ my $hl= HeaderLine->new(
+ type => "cond",
+ sub_type => "#elif",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ map { [@$_] } @path ]);
+ my $ar= $self->_recurse_group_content_tree($kid->{$k1}, @path);
+ $path[-1][-1]= $k2;
+ my $rest= $self->_handle_else($k2, $kid->{$k2}, $ind, $depth, @path);
+ push @ret, $hl, @$ar, @$rest;
+ }
+ else {
+ # payload, 3+ children, or 2 which are not complementary -> else
+ my $raw= "#${ind}else\n";
+ my $hl= HeaderLine->new(
+ type => "cond",
+ sub_type => "#else",
+ raw => $raw,
+ line => $raw,
+ level => $depth,
+ cond => [ map { [@$_] } @path ]);
+ my $ar= $self->_recurse_group_content_tree($kid, @path);
+ push @ret, $hl, @$ar;
+ }
+ return \@ret;
+}
+
+# group the content in lines by the condition that apply to them
+# returns a set of lines representing the new structure
+sub group_content {
+ my ($self, $lines, $filter)= @_;
+ $lines ||= $self->{lines};
+ local $self->{filter_content}= $filter || $self->{filter_content};
+ my $tree= $self->_build_group_content_tree($lines);
+ return $self->_recurse_group_content_tree($tree);
+}
+
+#read a file by name - opens the file and passes the fh into parse_fh().
+sub read_file {
+ my ($self, $file_name, $callback)= @_;
+ $self= $self->new() unless ref $self;
+ $self->{last_file_name}= $file_name;
+ open my $fh, "<", $file_name
+ or confess "Failed to open '$file_name' for read: $!";
+ my $lines= $self->parse_fh($fh);
+ if ($callback) {
+ foreach my $line (@$lines) {
+ $callback->($self, $line);
+ }
+ }
+ return $self;
+}
+
+# These are utility methods for the HeaderLine objects.
+sub HeaderLine::new {
+ my ($class, %self)= @_;
+ return bless \%self, $class;
+}
+sub HeaderLine::cond { $_[0]->{cond} } # AoA
+sub HeaderLine::type { $_[0]->{type} }
+sub HeaderLine::type_is { return $_[0]->type eq $_[1] ? 1 : 0 }
+sub HeaderLine::sub_type { $_[0]->{sub_type} }
+sub HeaderLine::sub_type_is { return $_[0]->sub_type eq $_[1] ? 1 : 0 }
+sub HeaderLine::raw { $_[0]->{raw} }
+sub HeaderLine::flat { $_[0]->{flat} }
+sub HeaderLine::line { $_[0]->{line} }
+sub HeaderLine::level { $_[0]->{level} }
+sub HeaderLine::is_content { return $_[0]->type_is("content") }
+sub HeaderLine::is_cond { return $_[0]->type_is("cond") }
+sub HeaderLine::is_define { return $_[0]->sub_type_is("#define") }
+sub HeaderLine::line_num { $_[0]->{start_line_num} }
+sub HeaderLine::n_lines { $_[0]->{n_lines} }
+sub HeaderLine::embed { $_[0]->{embed} }
+*HeaderLine::start_line_num= *HeaderLine::line_num;
+
+# these are methods for EmbedLine objects
+*EmbedLine::new= *HeaderLine::new;
+sub EmbedLine::flags { $_[0]->{flags} }
+sub EmbedLine::return_type { $_[0]->{return_type} }
+sub EmbedLine::name { $_[0]->{name} }
+sub EmbedLine::args { $_[0]->{args} } # array ref
+
+1;
+
+__END__
+
+=head1 NAME
+
+HeaderParser - A minimal header file parser that can be hooked by other porting
+scripts.
+
+=head1 SYNOPSIS
+
+ my $o= HeaderParser->new();
+ my $lines= $o->parse_fh($fh);
+
+=head1 DESCRIPTION
+
+HeaderParser is a tool to parse C preprocessor header files. The tool
+understands the syntax of preprocessor conditions, and is capable of creating
+a parse tree of the expressions involved, and normalizing them as well.
+
+C preprocessor files are a bit tricky to parse properly, especially with a
+"line by line" model. There are two issues that must be dealt with:
+
+=over 4
+
+=item Line Continuations
+
+Any line ending in "\\\n" (that is backslash newline) is considered to be part
+of a longer string which continues on the next line. Processors should replace
+the "\\\n" typically with a space when converting to a "real" line.
+
+=item Comments Acting As A Line Continuation
+
+The rules for header files stipulates that C style comments are stripped
+before processing other content, this means that comments can serve as a form
+of line continuation:
+
+ #if defined(foo) /*
+ */ && defined(bar)
+
+is the same as
+
+ #if defined(foo) && defined(bar)
+
+This type of comment usage is often overlooked by people writing header file
+parsers for the first time.
+
+=item Indented pre processor directives.
+
+It is easy to forget that there may be multiple spaces between the "#"
+character and the directive. It also easy to forget that there may be spaces
+in *front* of the "#" character. Both of these cases are often overlooked.
+
+=back
+
+The main idea of this module is to provide a single framework for correctly
+parsing the content of our header files in a consistent manner. A secondary
+purpose it to make various tasks we want to do easier, such as normalizing
+content or preprocessor expressions, or just extracting the real "content" of
+the file properly.
+
+=head2 parse_fh
+
+This function parses a filehandle into a set of lines. Each line is represented by a hash
+based object which contains the following fields:
+
+ bless {
+ cond => [['defined(a)'],['defined(b)']],
+ type => "content",
+ sub_type => undef,
+ raw => $raw_content_of_line,
+ line => $normalized_content_of_line,
+ level => $level,
+ }, "HeaderLine"
+
+A "line" in this context is a logical line, and because of line continuations
+and comments may contain more than one physical line, and thus more than
+one newline, but will always include at least one and will always end with one
+(unless there is no newline at the end of the file). Thus
+
+ before /*
+ this is a comment
+ */ after \
+ and continues
+
+will be treated as a single logical line even though the content is
+spread over four lines.
+
+=over 4
+
+=item cond
+
+An array of arrays containing the normalized expressions of any C preprocessor
+conditional blocks which include the line. Each line has its own copy of the
+conditions it was operated on currently, but that may change so dont alter
+this data. The inner arrays may contain more than one element. If so then the
+line is part of an "#else" or "#elsif" and the clauses should be considered to
+be a conjuction when considering "when is this line included", however when
+considered as part of an if/elsif/else, each added clause represents the most
+recent condition. In the following you can see how:
+
+ before /* cond => [ ] */
+ #if A /* cond => [ ['A'] ] */
+ do-a /* cond => [ ['A'] ] */
+ #elif B /* cond => [ ['!A', 'B'] ] */
+ do-b /* cond => [ ['!A', 'B'] ] */
+ #else /* cond => [ ['!A', '!B'] ] */
+ do-c /* cond => [ ['!A', '!B'] ] */
+ # if D /* cond => [ ['!A', '!B'], ['D'] ] */
+ do-d /* cond => [ ['!A', '!B'], ['D'] ] */
+ # endif /* cond => [ ['!A', '!B'], ['D'] ] */
+ #endif /* cond => [ ['!A', '!B'] ] */
+ after /* cond => [ ] */
+
+So in the above we can see how the three clauses of the if produce
+a single "frame" in the cond array, but that frame "grows" and changes
+as additional else clauses are added. When an entirely new if block
+is started (D) it gets its own block. Each endif includes the clause
+it terminates.
+
+=item type
+
+This value indicates the type of the line. This may be one of the following:
+'content', 'cond', 'define', 'include' and 'error'. Several of the types
+have a sub_type.
+
+=item sub_type
+
+This value gives more detail on the type of the line where necessary.
+Not all types have a subtype.
+
+ Type | Sub Type
+ --------+----------
+ content | text
+ | include
+ | define
+ | error
+ cond | #if
+ | #elif
+ | #else
+ | #endif
+
+Note that there are no '#ifdef' or '#elifndef' or similar expressions. All
+expressions of that form are normalized into the '#if defined' form to
+simplify processing.
+
+=item raw
+
+This was the raw original text before HeaderParser performed any modifications
+to it.
+
+=item line
+
+This is the normalized and modified text after HeaderParser or any callbacks
+have processed it.
+
+=item level
+
+This is the "indent level" of a line and corresponds to the number of blocks
+that the line is within, not including any blocks that might be created by
+the line itself.
+
+ before /* level => 0 */
+ #if A /* level => 0 */
+ do-a /* level => 1 */
+ #elif B /* level => 0 */
+ do-b /* level => 1 */
+ #else /* level => 0 */
+ do-c /* level => 1 */
+ # if D /* level => 1 */
+ do-d /* level => 2 */
+ # endif /* level => 1 */
+ #endif /* level => 0 */
+ after /* level => 0 */
+
+=back
+
+parse_fh() will throw an exception if it encounters a malformed expression
+or input it cannot handle.
+
+=head2 lines_as_str
+
+This function will return a string representation of the lines it is provided.
+
+=head2 group_content
+
+This function will group the text in the file by the conditions which contain
+it. This is only useful for files where the content is essentially a list and
+where changing the order that lines are output in will not break the resulting
+file.
+
+Each content line will be grouped into a structure of nested if/else blocks
+(elif will produce a new nested block) such that the content under the control
+of a given set of normalized condition clauses are grouped together in the order
+the occurred in the file, such that each combined conditional clause is output
+only once.
+
+This means a file like this:
+
+ #if A
+ A
+ #elif K
+ AK
+ #else
+ ZA
+ #endif
+ #if B && Q
+ B
+ #endif
+ #if Q && B
+ BC
+ #endif
+ #if A
+ AD
+ #endif
+ #if !A
+ ZZ
+ #endif
+
+Will end up looking roughly like this:
+
+ #if A
+ A
+ AD
+ #else
+ ZZ
+ # if K
+ AK
+ # else
+ ZA
+ # endif
+ #endif
+ #if B && Q
+ B
+ BC
+ #endif
+
+Content at a given block level always goes before conditional clauses
+at the same nesting level.
+
+=head2 HOOKS
+
+There are severals hooks that are available, C<pre_process_content> and
+C<post_process_content>, and C<post_process_grouped_content>. All of these
+hooks will be called with the HeaderParser object as the first argument.
+The "process_content" callbacks will be called with a line hash as the second
+argument, and C<post_process_grouped_content> will be called with an
+array of line hashes for the content in that group, so that the array may be
+modified or sorted. Callbacks called from inside of C<group_content()>
+(that is C<post_process_content> and C<post_process_grouped_content> will be
+called with an additional argument containing and array specifying the actual
+conditional "path" to the content (which may differ somewhat from the data in
+a lines "cond" property).
+
+These hooks may do what they like, but generally they will modify the
+"line" property of the line hash to change the final output returned
+by C<lines_as_str()> or C<group_content()>.
+
+=head2 FORMATTING AND INDENTING
+
+Header parser tries hard to produce neat and readable output with a consistent
+style and form. For example:
+
+ #if defined(FOO)
+ # define HAS_FOO
+ # if defined(BAR)
+ # define HAS_FOO_AND_BAR
+ # else /* !defined(BAR) */
+ # define HAS_FOO_NO_BAR
+ # endif /* !defined(BAR) */
+ #endif /* defined(FOO) */
+
+HeaderParser uses two space tab stops for indenting C pre-processor
+directives. It puts the spaces between the "#" and the directive. The "#" is
+considered "part" of the indent, even though the space comes after it. This
+means the first indent level "looks" like one space, and following indents
+look like 2. This should match what a sensible editor would do with two space
+tab stops. The C<indent_chars()> method can be used to convert an indent level
+into a string that contains the appropriate number of spaces to go in between
+the "#" and the directive.
+
+When emitting "#endif", "#elif" and "#else" directives comments will be
+emitted also to show the conditions that apply. These comments may be wrapped
+to cover multiple lines. Some effort is made to get these comments to line up
+visually, but it uses heuristics which may not always produce the best result.
+
+=cut
diff --git a/regen/tidy_embed.pl b/regen/tidy_embed.pl
new file mode 100644
index 0000000000..6991b1db04
--- /dev/null
+++ b/regen/tidy_embed.pl
@@ -0,0 +1,61 @@
+use lib "regen";
+use HeaderParser;
+use strict;
+use warnings;
+
+my $parser= HeaderParser->new(
+ pre_process_content => sub {
+ my ($self,$line_data)= @_;
+ $self->tidy_embed_fnc_entry($line_data);
+ my $embed= $line_data->{embed}
+ or return;
+ },
+ _post_process_grouped_content => sub {
+ my ($self,$group_ary)= @_;
+ @{$group_ary}=
+ sort {
+ $a->{embed}{name} cmp $b->{embed}{name}
+ } @{$group_ary};
+ },
+ );
+my $tap;
+if (@ARGV and $ARGV[0] eq "--tap") {
+ $tap = shift @ARGV;
+}
+my $file= "embed.fnc";
+if (@ARGV) {
+ $file= shift @ARGV;
+}
+my $new= "$file.new";
+my $bak= "$file.bak";
+$parser->read_file($file);
+my $lines= $parser->lines;
+my @tail;
+while ($lines->[-1]{type} eq "content" and
+ ($lines->[-1]{line} eq "\n" or $lines->[-1]{line}=~/^\s*:/)
+) {
+ unshift @tail, pop @$lines;
+}
+
+my $grouped_content_ary= $parser->group_content();
+push @$grouped_content_ary, @tail;
+my $grouped_content_txt= $parser->normalized_content($grouped_content_ary);
+if ($grouped_content_txt ne $parser->{orig_content}) {
+ if ($tap) {
+ print "not ok - $0 $file\n";
+ } elsif (-t) {
+ print "Updating $file\n";
+ }
+ open my $fh,">",$new
+ or die "Failed to open '$new' for write: $!";
+ print $fh $grouped_content_txt
+ or die "Failed to print to '$new': $!";
+ close $fh
+ or die "Failed to close '$new': $!";
+ rename $file, $bak
+ or die "Couldn't move '$file' to '$bak': $!";
+ rename $new, $file
+ or die "Couldn't move embed.fnc.new to embed.fnc: $!";
+} elsif ($tap) {
+ print "ok - $0 $file\n";
+}
diff --git a/t/porting/header_parser.t b/t/porting/header_parser.t
new file mode 100644
index 0000000000..6a8975b507
--- /dev/null
+++ b/t/porting/header_parser.t
@@ -0,0 +1,517 @@
+#!./perl -w
+
+# Verify that all files generated by perl scripts are up to date.
+
+BEGIN {
+ if (-f "./TestInit.pm") {
+ push @INC, ".";
+ } elsif (-f '../TestInit.pm') {
+ push @INC, "..";
+ }
+}
+use TestInit qw(T A); # T is chdir to the top level, A makes paths absolute
+use strict;
+
+# this tests the functions in HeaderParser.pm which we use for make regen.
+
+require './t/test.pl';
+require './regen/HeaderParser.pm';
+
+skip_all_if_miniperl("needs Data::Dumper");
+
+require Data::Dumper;
+
+sub show_text {
+ my ($as_text)= @_;
+ print STDERR $as_text=~s/^/" " x 8/mger;
+}
+
+my $hp= HeaderParser->new;
+$hp->parse_text(<<~'EOF');
+ #ifdef A
+ #ifdef B
+ #define AB
+ content 1
+ #endif
+ content 2
+ #define A
+ #endif
+ /*comment
+ line */
+ #define C /* this is
+ a hidden line continuation */ D
+ EOF
+my $normal= $hp->lines_as_str();
+my $lines= $hp->lines();
+my $lines_as_str= Data::Dumper->new([$lines])->Sortkeys(1)->Useqq(1)->Indent(1)->Dump();
+is($lines_as_str,<<~'DUMP_EOF', "Simple data structure as expected") or show_text($lines_as_str);
+ $VAR1 = [
+ bless( {
+ "cond" => [
+ [
+ "defined(A)"
+ ]
+ ],
+ "flat" => "#if defined(A)",
+ "level" => 0,
+ "line" => "#if defined(A)\n",
+ "n_lines" => 1,
+ "raw" => "#ifdef A\n",
+ "start_line_num" => 1,
+ "sub_type" => "#if",
+ "type" => "cond"
+ }, 'HeaderLine' ),
+ bless( {
+ "cond" => [
+ [
+ "defined(A)"
+ ],
+ [
+ "defined(B)"
+ ]
+ ],
+ "flat" => "#if defined(B)",
+ "level" => 1,
+ "line" => "# if defined(B)\n",
+ "n_lines" => 1,
+ "raw" => "#ifdef B\n",
+ "start_line_num" => 2,
+ "sub_type" => "#if",
+ "type" => "cond"
+ }, 'HeaderLine' ),
+ bless( {
+ "cond" => [
+ [
+ "defined(A)"
+ ],
+ [
+ "defined(B)"
+ ]
+ ],
+ "flat" => "#define AB",
+ "level" => 2,
+ "line" => "# define AB\n",
+ "n_lines" => 1,
+ "raw" => "#define AB\n",
+ "start_line_num" => 3,
+ "sub_type" => "#define",
+ "type" => "content"
+ }, 'HeaderLine' ),
+ bless( {
+ "cond" => [
+ [
+ "defined(A)"
+ ],
+ [
+ "defined(B)"
+ ]
+ ],
+ "flat" => "content 1",
+ "level" => 2,
+ "line" => "content 1\n",
+ "n_lines" => 1,
+ "raw" => "content 1\n",
+ "start_line_num" => 4,
+ "sub_type" => "text",
+ "type" => "content"
+ }, 'HeaderLine' ),
+ bless( {
+ "cond" => [
+ [
+ "defined(A)"
+ ],
+ [
+ "defined(B)"
+ ]
+ ],
+ "flat" => "#endif",
+ "level" => 1,
+ "line" => "# endif /* defined(B) */\n",
+ "n_lines" => 1,
+ "raw" => "#endif\n",
+ "start_line_num" => 5,
+ "sub_type" => "#endif",
+ "type" => "cond"
+ }, 'HeaderLine' ),
+ bless( {
+ "cond" => [
+ [
+ "defined(A)"
+ ]
+ ],
+ "flat" => "content 2",
+ "level" => 1,
+ "line" => "content 2\n",
+ "n_lines" => 1,
+ "raw" => "content 2\n",
+ "start_line_num" => 6,
+ "sub_type" => "text",
+ "type" => "content"
+ }, 'HeaderLine' ),
+ bless( {
+ "cond" => [
+ [
+ "defined(A)"
+ ]
+ ],
+ "flat" => "#define A",
+ "level" => 1,
+ "line" => "# define A\n",
+ "n_lines" => 1,
+ "raw" => "#define A\n",
+ "start_line_num" => 7,
+ "sub_type" => "#define",
+ "type" => "content"
+ }, 'HeaderLine' ),
+ bless( {
+ "cond" => [
+ [
+ "defined(A)"
+ ]
+ ],
+ "flat" => "#endif",
+ "level" => 0,
+ "line" => "#endif /* defined(A) */\n",
+ "n_lines" => 1,
+ "raw" => "#endif\n",
+ "start_line_num" => 8,
+ "sub_type" => "#endif",
+ "type" => "cond"
+ }, 'HeaderLine' ),
+ bless( {
+ "cond" => [],
+ "flat" => "",
+ "level" => 0,
+ "line" => "/*comment\n line */\n",
+ "n_lines" => 2,
+ "raw" => "/*comment\n line */\n",
+ "start_line_num" => 9,
+ "sub_type" => "text",
+ "type" => "content"
+ }, 'HeaderLine' ),
+ bless( {
+ "cond" => [],
+ "flat" => "#define C D",
+ "level" => 0,
+ "line" => "#define C /* this is\n a hidden line continuation */ D\n",
+ "n_lines" => 2,
+ "raw" => "#define C /* this is\n a hidden line continuation */ D\n",
+ "start_line_num" => 11,
+ "sub_type" => "#define",
+ "type" => "content"
+ }, 'HeaderLine' )
+ ];
+ DUMP_EOF
+
+is($normal,<<~'EOF',"Normalized text as expected");
+ #if defined(A)
+ # if defined(B)
+ # define AB
+ content 1
+ # endif /* defined(B) */
+ content 2
+ # define A
+ #endif /* defined(A) */
+ /*comment
+ line */
+ #define C /* this is
+ a hidden line continuation */ D
+ EOF
+
+{
+ my @warn;
+ local $SIG{__WARN__}= sub { push @warn, $_[0]; warn $_[0] };
+ my $ok= eval {
+ HeaderParser->new()->parse_text(<<~'EOF'); 1
+ #ifdef A
+ #ifdef B
+ #endif
+ EOF
+ };
+ my $err= !$ok ? $@ : "";
+ ok(!$ok,"Should throw an error");
+ like($err,qr/Unterminated conditional block starting line 1 with last conditional operation at line 3/,
+ "Got expected error message");
+}
+{
+ my @warn;
+ local $SIG{__WARN__}= sub { push @warn, $_[0]; warn $_[0] };
+ my $ok= eval {
+ HeaderParser->new()->parse_text(<<~'EOF'); 1
+ #ifdef A
+ #ifdef B
+ #elif C
+ EOF
+ };
+ my $err= !$ok ? $@ : "";
+ ok(!$ok,"Should throw an error");
+ like($err,qr/Unterminated conditional block starting line 3/,
+ "Unterminated block detected");
+}
+{
+ my @warn;
+ local $SIG{__WARN__}= sub { push @warn, $_[0]; warn $_[0] };
+ my $ok= eval {
+ HeaderParser->new()->parse_text(<<~'EOF'); 1
+ #if 1 * * 10 > 5
+ #elifdef C
+ EOF
+ };
+ my $err= !$ok ? $@ : "";
+ ok(!$ok,"Should throw an error");
+ is($err,
+ "Error at line 1\n" .
+ "Line 1: #if 1 * * 10 > 5\n" .
+ "Error in multiplication expression: " .
+ "Unexpected token '*', expecting literal, unary, or expression.\n",
+ "Expected token error") or warn $err;
+}
+{
+ my $hp= HeaderParser->new(debug=>0);
+
+ $hp->parse_text(<<~'EOF');
+ #ifdef A
+ # ifdef B
+ # define P
+ # else
+ # define Q
+ # endif
+ # if !defined B
+ # define R
+ # else
+ # define S
+ # endif
+ #endif
+ EOF
+ my $grouped= $hp->group_content();
+ my $as_text= $hp->lines_as_str($grouped);
+ is($as_text,<<~'EOF',"inverted simple clauses get merged properly") or show_text($as_text);
+ #if defined(A)
+ # if defined(B)
+ # define P
+ # define S
+ # else /* if !defined(B) */
+ # define Q
+ # define R
+ # endif /* !defined(B) */
+ #endif /* defined(A) */
+ EOF
+}
+{
+ my $hp= HeaderParser->new(debug=>0);
+ $hp->parse_text(<<~'EOF');
+ #if defined(A) && defined(B)
+ # if (defined(C) && defined(D))
+ # define P
+ # else
+ # define Q
+ # endif
+ # if !(defined C && defined D)
+ # define R
+ # else
+ # define S
+ # endif
+ #endif
+ EOF
+ my $grouped= $hp->group_content();
+ my $as_text= $hp->lines_as_str($grouped);
+ is($as_text,<<~'EOF',"inverted complex clauses get merged properly") or show_text($as_text);
+ #if defined(A) && defined(B)
+ # if defined(C) && defined(D)
+ # define P
+ # define S
+ # else /* if !( defined(C) && defined(D) ) */
+ # define Q
+ # define R
+ # endif /* !( defined(C) && defined(D) ) */
+ #endif /* defined(A) && defined(B) */
+ EOF
+}
+{
+ my $hp= HeaderParser->new(debug=>0);
+ $hp->parse_text(<<~'EOF');
+ #if defined(A)
+ #define HAS_A
+ #elif defined(B)
+ #define HAS_B
+ #elif defined(C)
+ #define HAS_C
+ #else
+ #define HAS_D
+ #endif
+ EOF
+ my $grouped= $hp->group_content();
+ my $as_text= $hp->lines_as_str($grouped);
+ is($as_text,<<~'EOF',"test nested elif round trip") or show_text($as_text);
+ #if defined(A)
+ # define HAS_A
+ #elif defined(B) /* && !defined(A) */
+ # define HAS_B
+ #elif defined(C) /* && !defined(A) && !defined(B) */
+ # define HAS_C
+ #else /* if !defined(A) && !defined(B) && !defined(C) */
+ # define HAS_D
+ #endif /* !defined(A) && !defined(B) && !defined(C) */
+ EOF
+}
+{
+ my $hp= HeaderParser->new(debug=>0);
+ $hp->parse_text(<<~'EOF');
+ #if defined(A)
+ #define HAS_A
+ #endif
+ #if !defined(A) && defined(B)
+ #define HAS_B
+ #endif
+ #if defined(C)
+ #if !defined(A)
+ #if !defined(B)
+ #define HAS_C
+ #endif
+ #endif
+ #endif
+ #if !defined(B) && !defined(A) && !defined(C)
+ #define HAS_D
+ #endif
+ EOF
+ my $grouped= $hp->group_content();
+ my $as_text= $hp->lines_as_str($grouped);
+ is($as_text,<<~'EOF',"test elif composition from disparate statements") or show_text($as_text);
+ #if defined(A)
+ # define HAS_A
+ #elif defined(B) /* && !defined(A) */
+ # define HAS_B
+ #elif defined(C) /* && !defined(A) && !defined(B) */
+ # define HAS_C
+ #else /* if !defined(A) && !defined(B) && !defined(C) */
+ # define HAS_D
+ #endif /* !defined(A) && !defined(B) && !defined(C) */
+ EOF
+}
+{
+ my $hp= HeaderParser->new(debug=>0);
+ $hp->parse_text(<<~'EOF');
+ #if defined(A)
+ #define HAS_A
+ #endif
+ #if !defined(A)
+ #define HAS_NOT_A
+ #if !defined(C)
+ #define HAS_A_NOT_C
+ #endif
+ #endif
+ #if defined(C)
+ #define HAS_C
+ #if defined(A)
+ #define HAS_A_C
+ #endif
+ #else
+ #if defined(A)
+ #define HAS_NOT_C_A
+ #endif
+ #endif
+ EOF
+ my $grouped= $hp->group_content();
+ my $as_text= $hp->lines_as_str($grouped);
+ is($as_text,<<~'EOF',"test else composition") or show_text($as_text);
+ #if defined(A)
+ # define HAS_A
+ # if defined(C)
+ # define HAS_A_C
+ # else /* if !defined(C) */
+ # define HAS_NOT_C_A
+ # endif /* !defined(C) */
+ #else /* if !defined(A) */
+ # define HAS_NOT_A
+ # if !defined(C)
+ # define HAS_A_NOT_C
+ # endif /* !defined(C) */
+ #endif /* !defined(A) */
+ #if defined(C)
+ # define HAS_C
+ #endif /* defined(C) */
+ EOF
+}
+{
+ my $hp= HeaderParser->new(debug=>0);
+ $hp->parse_text(<<~'EOF');
+ #if !defined(A)
+ #define NOT_A1
+ #else
+ #define A1
+ #endif
+ #if !!!!defined(A)
+ #define A2
+ #else
+ #define NOT_A2
+ #endif
+ EOF
+ my $grouped= $hp->group_content();
+ my $as_text= $hp->lines_as_str($grouped);
+ is($as_text,<<~'EOF',"normalization into if/else") or show_text($as_text);
+ #if defined(A)
+ # define A1
+ # define A2
+ #else /* if !defined(A) */
+ # define NOT_A1
+ # define NOT_A2
+ #endif /* !defined(A) */
+ EOF
+}
+{
+ my $hp= HeaderParser->new(debug=>0);
+ $hp->parse_text(<<~'EOF');
+ #if !!!(defined(A) && defined(B))
+ #define NOT_A_AND_B
+ #endif
+ #if defined(A)
+ #if defined(B)
+ #define A_AND_B
+ #endif
+ #endif
+ EOF
+ my $grouped= $hp->group_content();
+ my $as_text= $hp->lines_as_str($grouped);
+ is($as_text,<<~'EOF',"normalization with complex else") or show_text($as_text);
+ #if defined(A) && defined(B)
+ # define A_AND_B
+ #else /* if !( defined(A) && defined(B) ) */
+ # define NOT_A_AND_B
+ #endif /* !( defined(A) && defined(B) ) */
+ EOF
+}
+{
+ my $hp= HeaderParser->new(debug=>0);
+ $hp->parse_text(<<~'EOF');
+ #if defined(A) && !!defined(A) && !!!!defined(A)
+ #define HAS_A
+ #endif
+ EOF
+ my $grouped= $hp->group_content();
+ my $as_text= $hp->lines_as_str($grouped);
+ is($as_text,<<~'EOF',"simplification") or show_text($as_text);
+ #if defined(A)
+ # define HAS_A
+ #endif /* defined(A) */
+ EOF
+}
+{
+ local $::TODO;
+ $::TODO= "Absorbtion not implemented yet";
+ # currently we don't handle absorbtion: (A && (A || B || C ...)) == A
+ my $hp= HeaderParser->new(debug=>0);
+ $hp->parse_text(<<~'EOF');
+ #if defined(X) && (defined(X) || defined(Y))
+ #define HAS_X
+ #endif
+ EOF
+ my $grouped= $hp->group_content();
+ my $as_text= $hp->lines_as_str($grouped);
+ is($as_text,<<~'EOF',"simplification by absorbtion"); # or show_text($as_text);
+ #if defined(X)
+ # define HAS_X
+ #endif /* defined(X) */
+ EOF
+}
+
+done_testing();