5 files changed, 2135 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
index 8dec989978..618694d914 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5555,6 +5555,7 @@ regen/embed.pl			Produces {embed,embedvar,proto}.h
 regen/embed_lib.pl		Reads embed.fnc and regen/opcodes
 regen/feature.pl		Generates feature.pm
 regen/genpacksizetables.pl	Generate the size tables for pack/unpack
+regen/HeaderParser.pm		Module used to parse header files
 regen/keywords.pl		Program to write keywords.h
 regen/lib_cleanup.pl		Generate lib/.gitignore from MANIFEST
 regen/mg_vtable.pl		generate mg_vtable.h
@@ -5572,6 +5573,7 @@ regen/regcharclass_multi_char_folds.pl	Generate input for regcharclass.pl
 regen/regcomp.pl		Builder of regnodes.h
 regen/regen_lib.pl		Common file routines for generator scripts
 regen/scope_types.pl		Regenerate scope_types.h
+regen/tidy_embed.pl		Program to clean up embed.fnc manually
 regen/uconfig_h.pl		generate uconfig.h (requires /bin/sh)
 regen/unicode_constants.pl	generate unicode_constants.h
 regen/warnings.pl		Program to write warnings.h and lib/warnings.pm
@@ -6118,6 +6120,7 @@ t/porting/extrefs.t		Check perl headers don't make extern refs
 t/porting/filenames.t		Check the MANIFEST for filename portability.
 t/porting/FindExt.t		Test win32/FindExt.pm
 t/porting/globvar.t		Check that globvar.sym is sane
+t/porting/header_parser.t	Check that regen/HeaderParser.pm works as expected
 t/porting/known_pod_issues.dat	Data file for porting/podcheck.t
 t/porting/libperl.t		Check libperl.a sanity
 t/porting/maintainers.t		Test that Porting/Maintainers.pl is up to date
diff --git a/regen.pl b/regen.pl
index 3cadf85c09..5ea59e0f5c 100644
--- a/regen.pl
+++ b/regen.pl
@@ -31,4 +31,5 @@ overload.pl
 reentr.pl
 regcomp.pl
 scope_types.pl
+tidy_embed.pl
 warnings.pl
diff --git a/regen/HeaderParser.pm b/regen/HeaderParser.pm
new file mode 100644
index 0000000000..8bc2c72548
--- /dev/null
+++ b/regen/HeaderParser.pm
@@ -0,0 +1,1553 @@
+package HeaderParser;
+use strict;
+use warnings;
+
+# these are required below in BEGIN statements, we cant have a
+# hard dependency on them as they might not be available when
+# we run as part of autodoc.pl
+#
+# use Data::Dumper;
+# use Storable qw(dclone);
+#
+use Carp       qw(confess);
+use Text::Tabs qw(expand unexpand);
+use Text::Wrap qw(wrap);
+
+# The style of this file is determined by:
+#
+# perltidy -w -ple -bbb -bbc -bbs -nolq -l=80 -noll -nola -nwls='=' \
+#   -isbc -nolc -otr -kis -ci=4 -se -sot -sct -nsbl -pt=2 -fs  \
+#   -fsb='#start-no-tidy' -fse='#end-no-tidy' -cpb -bfvt=2
+
+my (
+    %unop,              # unary operators and their precedence
+    %binop,             # binary operators and their precedence
+    %is_right_assoc,    # operators which are right associative
+    %precedence,        # precedence of all operators.
+    %associative,       # associative operators
+    %commutative,       # commutative operators
+    %cmpop,             # comparison operators
+    $unop_pat,          # pattern to match unary operators
+    $binop_pat,         # pattern to match binary operators
+    %op_names,          # map of op to description, used in error messages
+    $tokenize_pat       # a pattern which can tokenize an expression
+);
+
+BEGIN {
+    # this is initialization for the operator precedence expression parser
+    # we use for handling preprocessor conditions.
+    %op_names= (
+        '==' => 'equality',
+        '!=' => 'inequality',
+        '<<' => 'bit-shift-left',
+        '>>' => 'bit-shift-right',
+        '+'  => 'addition',
+        '-'  => 'subtraction',
+        '*'  => 'multiplication',
+        '/'  => 'division',
+        '%'  => 'modulo',
+        '||' => 'logical-or',       # Lowest precedence
+        '&&' => 'logical-and',
+        '|'  => 'binary-or',
+        '^'  => 'binary-xor',
+        '&'  => 'binary-and',
+        '<'  => 'less-than',        # split on spaces, all with equal precedence
+        '>'  => 'greater-than',
+        '<=' => 'less-than-or-equal',
+        '>=' => 'greater-than-or-equal',
+    );
+    my @cmpop= (
+        '== !=',                    # listed in lowest to highest precedence
+        '< > <= >=',                # split on spaces, all with equal precedence
+    );
+    my @binop= (
+        '||',                       # Lowest precedence
+        '&&',
+        '|',
+        '^',
+        '&',
+        @cmpop,    # include the numerical comparison operators.
+        '<< >>',
+        '+ -',
+        '* / %',    # highest prcedence operators.
+    );
+
+    my @unop= qw( ! ~ + - );
+    %unop= map  { $_ => 1 } @unop;
+    %cmpop= map { $_ => 1 } map { split /\s+/, $_ } @cmpop;
+    %binop= map { $_ => 1 } map { split /\s+/, $_ } @binop;
+
+    my $make_pat= sub {
+        my $pat= join "|", sort { length($b) <=> length($a) || $a cmp $b }
+            map quotemeta($_), @_;
+        return qr/$pat/;
+    };
+    $unop_pat= $make_pat->(@unop);
+    foreach my $ix (0 .. $#binop) {
+        my $sym= $binop[$ix];
+        $precedence{$_}= (1 + $ix) * 10 for split /\s+/, $sym;
+    }
+    $is_right_assoc{"?"}= 1;
+    $is_right_assoc{":"}= 1;
+    $precedence{"?"}= 1;
+    $precedence{":"}= 0;
+
+    $associative{$_}++
+        for qw( || && + *);    # we leave '==' out so we don't reorder terms
+    $commutative{$_}++ for qw( || && + *);
+
+    $binop_pat= $make_pat->(keys %precedence);
+    $tokenize_pat= qr/
+     ^(?:
+        (?<comment> \/\*.*?\*\/ )
+      | (?<ws>      \s+ )
+      | (?<term>
+            (?<literal>
+                (?<define> defined\(\w+\) )
+            |   (?<func>   \w+\s*\(\s*\w+(?:\s*,\s*\w+)*\s*\) )
+            |   (?<const>  (?:0x[a-fA-F0-9]+|\d+[LU]*|'.') )
+            |   (?<sym>    \w+ )
+            )
+        |   (?<op> $binop_pat | $unop_pat )
+        |   (?<paren> [\(\)] )
+        )
+      )
+    /xs;
+}
+
+# dump the arguments with dump. wraps loading Dumper
+# as we are executed by miniperl where Dumper isnt available
+sub dd {
+    my $self= shift;
+    local $self->{orig_content};
+    my $ret= "(dump not available)";
+    eval {
+        require Data::Dumper;
+        $ret= Data::Dumper->new(\@_)->Indent(1)->Sortkeys(1)->Useqq(1)->Dump();
+    };
+    return $ret;
+}
+
+my $has_storable;
+
+# same story here, in miniperl we use slow perl code,
+# in real perl we can use Storable and speed things up.
+BEGIN { eval "use Storable; \$has_storable=1;" }
+
+# recursively copy an AoAoA...
+sub copy_aoa {
+    my ($aoa)= @_;
+    if ($has_storable) {
+        return Storable::dclone($aoa);
+    }
+    else {
+        return _copy_aoa($aoa);
+    }
+}
+
+sub _copy_aoa {
+    my ($thing)= @_;
+    if (ref $thing) {
+        return [ map { ref($_) ? _copy_aoa($_) : $_ } @$thing ];
+    }
+    else {
+        return $thing;
+    }
+}
+
+# return the number characters that should go in between a '#' and
+# the name of a c preprocessor directive. Returns 0 spaces for level
+# 0, and 2 * ($level - 1) + 1 spaces for the rest. (1,3,5, etc)
+# This might sound weird, but consider these are tab *stops* and the
+# '#' is included in the total. which means indents of 2, 4, 6 etc.
+sub indent_chars {
+    my ($self, $level)= @_;
+    my $ind= "";
+    $ind .= " "                 if $level;
+    $ind .= "  " x ($level - 1) if $level > 1;
+    return $ind;
+}
+
+# we use OO to store state, etc.
+sub new {
+    my ($class, %args)= @_;
+    return bless \%args,;
+}
+
+# this parses the expression into an array of tokens
+# this is somewhat crude, we could do this incrementally
+# if we wanted and avoid the overhead. but it makes it
+# easier to debug the tokenizer.
+sub _tokenize_expr {
+    my ($self, $expr)= @_;
+    delete $self->{tokens};
+    delete $self->{parse_tree};
+    $self->{original_expr}= $expr;
+
+    my @tokens;
+    while ($expr =~ s/$tokenize_pat//xs) {
+        push @tokens, {%+} if defined $+{'term'};
+    }
+    $self->{tokens}= \@tokens;
+    warn $self->dd($self) if $self->{debug};
+    if (length $expr) {
+        confess "Failed to tokenize_expr: $expr\n";
+    }
+    return \@tokens;
+}
+
+# sort terms in an expression in a way that puts things
+# in a sensible order. Anything starting with PERL_IN_
+# should be on the left in alphabetical order. Digits
+# should be on the right (eg 0), and ties are resolved
+# by stripping non-alpha-numerc, thus removing underbar
+# parens, spaces, logical operators, etc, and then by
+# lc comparison of the result.
+sub _sort_terms {
+    my $self= shift;
+    my (@terms)= map {
+        [
+            $_,                                # 0: raw
+            lc($_) =~ s/[^a-zA-Z0-9]//gr,      # 1: "_" stripped and caseless
+            $_     =~ m/PERL_IN_/  ? 1 : 0,    # 2: PERL_IN_ labeled define
+            $_     =~ m/^\d/       ? 1 : 0,    # 3: digit
+            $_     =~ m/DEBUGGING/ ? 1 : 0     # 4: DEBUGGING?
+        ]
+    } @_;
+    my %seen;
+    #start-no-tidy
+    @terms= map { $seen{ $_->[0] }++ ? () : $_->[0] }
+        sort {
+            $b->[2] <=> $a->[2]         ||    # PERL_IN before others
+            $a->[3] <=> $b->[3]         ||    # digits after others
+            $a->[4] <=> $b->[4]         ||    # DEBUGGING after all else
+            $a->[1] cmp $b->[1]         ||    # stripped caseless cmp
+            lc($a->[0]) cmp lc($b->[0]) ||    # caseless cmp
+            $a->[0] cmp $b->[0]         ||    # exact cmp
+            0
+        } @terms;
+    #end-no-tidy
+    return @terms;
+}
+
+# normalize a condition expression by parsing it and then stringifying
+# the parse tree.
+sub tidy_cond {
+    my ($self, $expr)= @_;
+    my $ret= $self->{_tidy_cond_cache}{$expr} //= do {
+        $self->parse_expr($expr) if defined $expr;
+        my $text= $self->_pt_as_str();
+        $text;
+    };
+    $self->{last_expr}= $ret;
+    return $ret;
+}
+
+# convert a parse tree structure to a string recursively.
+#
+# Parse trees are currently made up of arrays, with the count
+# of items in the object determining the type of op it represents.
+# 1 argument:  literal value of some sort.
+# 2 arguments: unary operator: 0 slot is the operator, 1 is a parse tree
+#            : ternary: 0 slot holds '?', 1 is an array holding three
+#                       parse trees: cond, true, false
+# 3 arguments or more: binary operator. 0 slot is the op. 1..n are parse trees
+#                    : note, this is multigate for commutative operators like
+#                    : "+", "*", "&&" and "||", so an expr
+#                    : like "A && B && !C" would be represented as:
+#                    : [ "&&", ["A"], ["B"], [ "!",["C"] ] ]
+#
+sub _pt_as_str {
+    my ($self, $node, $parent_op, $depth)= @_;
+
+    $node ||= $self->{parse_tree}
+        or confess "No parse tree?";
+    $depth ||= 0;
+    if (@$node == 1) {
+
+        # its a literal
+        return $node->[0];
+    }
+    elsif (@$node == 2) {
+
+        # is this a ternary or an unop?
+        if ($node->[0] eq '?') {
+
+            # ternary, the three "parts" are tucked away in
+            # an array in the payload slot
+            my $expr=
+                  $self->_pt_as_str($node->[1][0], "?", $depth + 1) . " ? "
+                . $self->_pt_as_str($node->[1][1], "?", $depth + 1) . " : "
+                . $self->_pt_as_str($node->[1][2], "?", $depth + 1);
+
+            # stick parens on if this is a subexpression
+            $expr= "( " . $expr . " )" if $depth;
+            return $expr;
+        }
+        else {
+            if (    $node->[0] eq "!"
+                and @{ $node->[1] } == 2
+                and $node->[1][0] eq "!")
+            {
+                # normalize away !! in expressions.
+                return $self->_pt_as_str($node->[1][1], $parent_op, $depth);
+            }
+
+            # unop - the payload is a optree
+            return $node->[0]
+                . $self->_pt_as_str($node->[1], $node->[0], $depth + 1);
+        }
+    }
+
+    # if we get here we are dealing with a binary operator
+    # the nodes are not necessarily binary, as we "collect"
+    # the terms into a list, thus: A && B && C && D -> ['&&',A,B,C,D]
+    my ($op, @terms)= @$node;
+
+    # convert the terms to strings
+    @terms= map { $self->_pt_as_str($_, $op, $depth + 1) } @terms;
+
+    # sort them to normalize the subexpression
+    my $expr=
+        join " $op ", $associative{$op}
+        ? $self->_sort_terms(@terms)
+        : @terms;
+
+    # stick parens on if this is a subexpression
+    $expr= "( " . $expr . " )" if $depth and !$cmpop{$op};
+
+    # and we are done.
+    return $expr;
+}
+
+# Returns the precedence of an operator, returns 0 if there is no token
+# or the next token is not an op, or confesss if it encounters an op it does not
+# know.
+sub _precedence {
+    my $self= shift;
+    my $token= shift // return 0;
+
+    my $op= (ref $token ? $token->{op} : $token) // return 0;
+
+    return $precedence{$op} // confess "Unknown op '$op'";
+}
+
+# entry point into parsing the tokens, checks that we actually parsed everything
+# and didnt leave anything in the token stream (possible from a malformed expression)
+# Performs some minor textual cleanups using regexes, but then does a proper parse
+# of the expression.
+sub parse_expr {
+    my ($self, $expr)= @_;
+    if (defined $expr) {
+        $expr =~ s/\s*\\\n\s*/ /g;
+        $expr =~ s/defined\s+(\w+)/defined($1)/g;
+        $self->_tokenize_expr($expr);
+    }
+    my $ret= $self->_parse_expr();
+    if (@{ $self->{tokens} }) {
+
+        # if all was well with parsing we should not get here.
+        confess "Unparsed tokens: ", $self->dd($self->{tokens});
+    }
+    $self->{parse_tree}= $ret;
+    return $ret;
+}
+
+# this is just a wrapper around _parse_expr_assoc() which handles
+# parsing an arbitrary expression.
+sub _parse_expr {
+    my ($self)= @_;
+    return $self->_parse_expr_assoc($self->_parse_expr_primary(), 1);
+}
+
+# This handles extracting from the token stream
+#  - simple literals
+#  - unops (assumed to be right associative)
+#  - parens (which reset the precedence acceptable to the parser)
+#
+sub _parse_expr_primary {
+    my ($self)= @_;
+    my $tokens= $self->{tokens}
+        or confess "No tokens in _parse_expr_primary?";
+    my $first= $tokens->[0]
+        or confess "No primary?";
+    if ($first->{paren} and $first->{paren} eq "(") {
+        shift @$tokens;
+        my $expr= $self->_parse_expr();
+        $first= $tokens->[0];
+        if (!$first->{paren} or $first->{paren} ne ")") {
+            confess "Expecting close paren", $self->dd($tokens);
+        }
+        shift @$tokens;
+        return $expr;
+    }
+    elsif ($first->{op} and $unop{ $first->{op} }) {
+        my $op_token= shift @$tokens;
+        return [ $op_token->{op}, $self->_parse_expr_primary() ];
+    }
+    elsif (defined $first->{literal}) {
+        shift @$tokens;
+        return [ $first->{literal} ];
+    }
+    else {
+        die sprintf
+            "Unexpected token '%s', expecting literal, unary, or expression.\n",
+            $first->{term};
+    }
+}
+
+# This is the heart of the expression parser. It uses
+# a pair of nested loops to avoid excessive recursion during parsing,
+# which should be a bit faster than other strategies. It only should
+# recurse when the precedence level changes.
+sub _parse_expr_assoc {
+    my ($self, $lhs, $min_precedence)= @_;
+    my $tokens= $self->{tokens}
+        or confess "No tokens in _parse_expr_assoc";
+    my $la= $tokens->[0];                  # lookahead
+    my $la_pr= $self->_precedence($la);    # lookahead precedence
+    while ($la && $la_pr >= $min_precedence) {
+        my $op_token= shift @$tokens;
+        my $op_pr= $la_pr;                 # op precedence
+        if ($op_token->{op} eq "?") {
+            my $mid= $self->_parse_expr();
+            if (@$tokens and $tokens->[0]{op} and $tokens->[0]{op} eq ":") {
+                shift @$tokens;
+                my $tail= $self->_parse_expr();
+                return [ '?', [ $lhs, $mid, $tail ] ];
+            }
+            confess "Panic: expecting ':'", $self->dd($tokens);
+        }
+        my $rhs;
+        eval { $rhs= $self->_parse_expr_primary(); }
+            or die "Error in $op_names{$op_token->{op}} expression: $@";
+        $la= $tokens->[0];
+        $la_pr= $self->_precedence($la);
+        while (
+            $la_pr > $op_pr ||    # any and larger
+            (       $is_right_assoc{ $op_token->{op} }
+                and $la_pr == $op_pr)    # right and equal
+        ) {
+            my $new_precedence= $op_pr + ($la_pr > $op_pr ? 1 : 0);
+            $rhs= $self->_parse_expr_assoc($rhs, $new_precedence);
+            $la= $tokens->[0];
+            $la_pr= $self->_precedence($la);
+        }
+        if (   @$lhs >= 3
+            && $lhs->[0] eq $op_token->{op}
+            && $commutative{ $op_token->{op} })
+        {
+            push @$lhs, $rhs;
+        }
+        else {
+            $lhs= [ $op_token->{op}, $lhs, $rhs ];
+        }
+    }
+    return $lhs;
+}
+
+#entry point for normalizing and if/elif statements
+#returns the line and condition in normalized form.
+sub normalize_if_elif {
+    my ($self, $line, $line_info)= @_;
+    if (my $dat= $self->{cache_normalize_if_elif}{$line}) {
+        return $dat->{line}, $dat->{cond};
+    }
+    my ($cond);
+    eval {
+        ($line, $cond)= $self->_normalize_if_elif($line);
+        1;
+    } or die sprintf "Error at line %d\nLine %d: %s\n%s",
+        ($line_info->start_line_num()) x 2, $line, $@;
+    $self->{cache_normalize_if_elif}{$line}= { line => $line, cond => $cond };
+    return ($line, $cond);
+}
+
+#guts of the normalize_if_elif() - cleans up the line, extracts
+#the condition, and then tidies it with tidy_cond().
+sub _normalize_if_elif {
+    my ($self, $line)= @_;
+    my $nl= "";
+    $nl= $1 if $line =~ s/(\n+)\z//;
+    $line =~ s/\s+\z//;
+    my @comment;
+    push @comment, $1 while $line =~ s!\s*(/\*.*?\*/)\z!!;
+    $line =~ s/defined\s*\(\s*(\w+)\s*\)/defined($1)/g;
+    $line =~ s/!\s+defined/!defined/g;
+
+    if ($line =~ /^#((?:el)?if)(n?)def\s+(\w+)/) {
+        my $if= $1;
+        my $not= $2 ? "!" : "";
+        $line= "#$if ${not}defined($3)";
+    }
+    $line =~ s/#((?:el)?if)\s+//
+        or confess "Bad cond: $line";
+    my $if= $1;
+    $line =~ s/!\s+/!/g;
+
+    my $old_cond= $line;
+    my $cond= $self->tidy_cond($old_cond);
+
+    warn "cond - $old_cond\ncond + $cond\n"
+        if $old_cond ne $cond and $self->{debug};
+
+    $line= "#$if $cond";
+    $line .= "  " . join " ", reverse @comment if @comment;
+
+    $line .= $nl;
+    return ($line, $cond);
+}
+
+# parses a text buffer as though it was a file on disk
+# calls parse_fh()
+sub parse_text {
+    my ($self, $text)= @_;
+    open my $fh, "<", \$text
+        or die "Failed to open buffer for read: $!";
+    return $self->parse_fh($fh);
+}
+
+# takes a readable filehandle and parses whatever contents is
+# returned by reading it. Returns an array of HeaderLine objects.
+# this is the main routing for parsing a header file.
+sub parse_fh {
+    my ($self, $fh)= @_;
+    my @lines;
+    my @cond;
+    my @cond_line;
+    my $last_cond;
+    my $cb= $self->{pre_process_content};
+    $self->{orig_content}= "";
+    my $line_num= 1;
+
+    while (defined(my $line= readline($fh))) {
+        my $start_line_num= $line_num++;
+        $self->{orig_content} .= $line;
+        while ($line =~ /\\\n\z/ or $line =~ m</\*(?:(?!\*/).)*\s*\z>s) {
+            defined(my $read_line= readline($fh))
+                or last;
+            $self->{orig_content} .= $read_line;
+            $line_num++;
+            $line .= $read_line;
+        }
+        while ($line =~ m!/\*(.*?)(\*/|\z)!gs) {
+            my ($inner, $tail)= ($1, $2);
+            if ($tail ne "*/") {
+                confess
+                    "Unterminated comment starting at line $start_line_num\n";
+            }
+            elsif ($inner =~ m!/\*!) {
+                confess
+                    "Nested/broken comment starting at line $start_line_num\n";
+            }
+        }
+
+        my $raw= $line;
+        my $type= "content";
+        my $sub_type= "text";
+        my $level= @cond;
+        my $do_pop= 0;
+        my $flat= $line;
+        $flat =~ s/\s*\\\n\s*/ /g;
+        $flat =~ s!/\*.*?\*/! !gs;
+        $flat =~ s/\s+/ /g;
+        $flat =~ s/\s+\z//;
+        $flat =~ s/^\s*#\s*/#/g;
+
+        my $line_info=
+            HeaderLine->new(start_line_num => $start_line_num, raw => $raw);
+        my $do_cond_line;
+        if ($flat =~ /^#/) {
+            if ($flat =~ m/^(#(?:el)?if)(n?)def\s+(\w+)/) {
+                my $if= $1;
+                my $not= $2 ? "!" : "";
+                my $sym= $3;
+                $flat =~
+                    s/^(#(?:el)?if)(n?)def\s+(\w+)/$if ${not}defined($sym)/;
+            }
+            my $cond;    # used in various expressions below
+            if ($flat =~ /^#endif/) {
+                if (!@cond) {
+                    confess "Not expecting $flat";
+                }
+                $do_pop= 1;
+                $level--;
+                $type= "cond";
+                $sub_type= "#endif";
+            }
+            elsif ($flat =~ /^#if\b/) {
+                ($flat, $cond)= $self->normalize_if_elif($flat, $line_info);
+                push @cond,      [$cond];
+                push @cond_line, $line_info;
+                $type= "cond";
+                $sub_type= "#if";
+            }
+            elsif ($flat =~ /^#elif\b/) {
+                if (!@cond) {
+                    confess "No if for $flat";
+                }
+                $level--;
+                ($flat, $cond)= $self->normalize_if_elif($flat, $line_info);
+                $cond[-1][-1]= $self->tidy_cond("!($cond[-1][-1])");
+                $cond_line[-1]= $line_info;
+                push @{ $cond[-1] }, $cond;
+                $type= "cond";
+                $sub_type= "#elif";
+            }
+            elsif ($flat =~ /^#else\b/) {
+                if (!@cond) {
+                    confess "No if for $flat";
+                }
+                $level--;
+                $cond[-1][-1]= $self->tidy_cond("!($cond[-1][-1])");
+                $cond_line[-1]= $line_info;
+                $type= "cond";
+                $sub_type= "#else";
+            }
+            elsif ($flat =~ /#undef/) {
+                $type= "content";
+                $sub_type= "#undef";
+            }
+            elsif ($flat =~ /#pragma\b/) {
+                $type= "content";
+                $sub_type= "#pragma";
+            }
+            elsif ($flat =~ /#include\b/) {
+                $type= "content";
+                $sub_type= "#include";
+            }
+            elsif ($flat =~ /#define\b/) {
+                $type= "content";
+                $sub_type= "#define";
+            }
+            elsif ($flat =~ /#error\b/) {
+                $type= "content";
+                $sub_type= "#error";
+            }
+            else {
+                confess "Do not know what to do with $line";
+            }
+            if ($type eq "cond") {
+
+                # normalize conditional lines
+                $line= $flat;
+                $last_cond= $line_info;
+            }
+        }
+        $line =~ s/\n?\z/\n/;
+
+        %$line_info= (
+            cond           => copy_aoa(\@cond),
+            type           => $type,
+            sub_type       => $sub_type,
+            raw            => $raw,
+            flat           => $flat,
+            line           => $line,
+            level          => $level,
+            start_line_num => $start_line_num,
+            n_lines        => $line_num - $start_line_num,
+        );
+
+        push @lines, $line_info;
+        if ($do_pop) {
+            pop @cond;
+            pop @cond_line;
+        }
+        if ($type eq "content" and $cb) {
+            $cb->($self, $lines[-1]);
+        }
+    }
+    if (@cond_line) {
+        my $msg= "Unterminated conditional block starting line "
+            . $cond_line[-1]->start_line_num();
+        $msg .=
+            " with last conditional operation at line "
+            . $last_cond->start_line_num()
+            if $cond_line[-1] != $last_cond;
+        confess $msg;
+    }
+    $self->{lines}= \@lines;
+    return \@lines;
+}
+
+# returns the last lines we parsed.
+sub lines { $_[0]->{lines} }
+
+# assuming a line looks like an embed.fnc entry parse it
+# and normalize it, and create and EmbedLine object from it.
+sub tidy_embed_fnc_entry {
+    my ($self, $line_data)= @_;
+    my $line= $line_data->{line};
+    return $line if $line =~ /^\s*:/;
+    return $line unless $line_data->{type} eq "content";
+    return $line unless $line =~ /\|/;
+
+    $line =~ s/\s*\\\n/ /g;
+    $line =~ s/\s+\z//;
+    ($line)= expand($line);
+    my ($flags, $ret, $name, @args)= split /\s*\|\s*/, $line;
+    for ($ret, @args) {
+        s/(\w)\*/$1 */g;
+        s/\*\s+(\w)/*$1/g;
+        s/\*const/* const/g;
+    }
+    my $head= sprintf "%-8s|%-7s", $flags, $ret;
+    $head .= sprintf "|%*s", -(31 - length($head)), $name;
+    if (@args and length($head) > 32) {
+        $head .= "\\\n";
+        $head .= " " x 32;
+    }
+    foreach my $ix (0 .. $#args) {
+        my $arg= $args[$ix];
+        $head .= "|$arg";
+        $head .= "\\\n" . (" " x 32) if $ix < $#args;
+    }
+    $line= $head . "\n";
+
+    if ($line =~ /\\\n/) {
+        my @lines= split /\s*\\\n/, $line;
+        my $len= length($lines[0]);
+        $len < length($_) and $len= length($_) for @lines;
+        $len= int(($len + 7) / 8) * 8;
+        $len= 72 if $len < 72;
+        $line= join("\\\n",
+            (map { sprintf "%*s", -$len, $_ } @lines[ 0 .. $#lines - 1 ]),
+            $lines[-1]);
+    }
+    ($line)= unexpand($line);
+
+    $line_data->{embed}= EmbedLine->new(
+        flags       => $flags,
+        return_type => $ret,
+        name        => $name,
+        args        => \@args,
+    );
+    $line =~ s/\s+\z/\n/;
+    $line_data->{line}= $line;
+    return $line;
+}
+
+# line up the text in a multiline string by a given $fragment
+# of text, inserting whitespace in front or behind the $fragment
+# to get the text to line up. Returns the text. This is wrapped
+# by line_up() and is used to wrap long conditions and comments
+# in the generated code.
+sub _line_up_frag {
+    my ($self, $str, $fragment)= @_;
+    die "has tabs?!" if $str =~ /\t/;
+    my @lines= split /\n/, $str;
+    my $changed= 1;
+    while ($changed) {
+        $changed= 0;
+        foreach my $ix (0 .. $#lines - 1) {
+            my $f_index= 0;
+            my $n_index= 0;
+            while (1) {
+                $f_index= index($lines[$ix],       $fragment, $f_index);
+                $n_index= index($lines[ $ix + 1 ], $fragment, $n_index);
+                if ($f_index == -1 or $n_index == -1) {
+                    last;
+                }
+                if ($f_index < $n_index) {
+                    my $f_idx= $f_index;
+                    $f_idx-- while substr($lines[$ix], $f_idx, 1) ne " ";
+                    substr($lines[$ix], $f_idx, 0, " " x ($n_index - $f_index));
+                    $changed++;
+                    last;
+                }
+                elsif ($n_index < $f_index) {
+                    my $n_idx= $n_index;
+                    $n_idx-- while substr($lines[ $ix + 1 ], $n_idx, 1) ne " ";
+                    substr($lines[ $ix + 1 ],
+                        $n_idx, 0, " " x ($f_index - $n_index));
+                    $changed++;
+                    last;
+                }
+                $f_index++;
+                $n_index++;
+            }
+        }
+    }
+    my $ret= join "", map { "$_\n" } @lines;
+    return $ret;
+}
+
+sub line_up {
+    my ($self, $line, @fragments)= @_;
+
+    foreach my $fragment (@fragments) {
+        $line= $self->_line_up_frag($line, $fragment);
+        last if $line ne $_[1];
+    }
+    return $line;
+}
+
+# Takes an array of HeaderLines objects produced by parse_fh()
+# or by group_content(), and turn it into a string.
+sub lines_as_str {
+    my ($self, $lines, $post_process_content)= @_;
+    $lines ||= $self->{lines};
+    my $ret;
+    $post_process_content ||= $self->{post_process_content};
+    my $filter= $self->{filter_content};
+    my $last_line= "";
+    foreach my $line_data (@$lines) {
+        my $line= $line_data->{line};
+        if ($line_data->{type} ne "content" or $line_data->{sub_type} ne "text")
+        {
+            my $level= $line_data->{level};
+            my $ind= $self->indent_chars($level);
+            $line =~ s/^#(\s*)/#$ind/;
+        }
+        if ($line_data->{type} eq "cond") {
+            if ($line_data->{sub_type} =~ /#(?:else|endif)/) {
+                my $joined= join " && ",
+                    map { "($_)" } @{ $line_data->{cond}[-1] };
+                my $cond_txt= $self->tidy_cond($joined);
+                $cond_txt= "if $cond_txt" if $line_data->{sub_type} eq "#else";
+                $line =~ s!\s*\z! /* $cond_txt */\n!;
+            }
+            elsif ($line_data->{sub_type} eq "#elif") {
+                my $last_frame= $line_data->{cond}[-1];
+                my $joined= join " && ",
+                    map { "($_)" } @$last_frame[ 0 .. ($#$last_frame - 1) ];
+                my $cond_txt= $self->tidy_cond($joined);
+                $line =~ s!\s*\z! /* && $cond_txt */\n!;
+            }
+        }
+        $line =~ s/\s+\z/\n/;
+        if ($last_line eq "\n" and $line eq "\n") {
+            next;
+        }
+        $last_line= $line;
+        if ($line_data->{type} eq "cond") {
+            $line =~ m!(^\s*#\s*\w+\s+)([^/].*?\s*)?(/\*.*)?\n\z!
+                or die "Failed to split cond line: $line";
+            my ($type, $cond, $comment)= ($1, $2, $3);
+            $comment //= "";
+            $cond    //= "";
+            my $new_line;
+            if (!length($cond) and $comment) {
+                $comment =~ s!^(/\*\s+)!!
+                    and $type .= $1;
+            }
+            my $l= length($type);
+            $line= _my_wrap($type, " " x $l, $cond . $comment);
+            $line =~ s/\n(?!\z)/ \\\n/g;
+            $line =~ s!\\\n\s+((?:\)\s*)*)\*/!$1*/!;
+            $line= $self->line_up($line, "||", "define");
+        }
+        $line_data->{line}= $line;
+        if ($post_process_content and $line_data->{type} eq "content") {
+            $post_process_content->($self, $line_data);
+        }
+        if ($filter and $line_data->{type} eq "content") {
+            $filter->($self, $line_data) or next;
+        }
+        $ret .= $line_data->{line};
+    }
+    return $ret;
+}
+
+# Text::Wrap::wrap has an odd api, so hide it behind a wrapper
+# sub which sets things up properly.
+sub _my_wrap {
+    my ($head, $rest, $line)= @_;
+    local $Text::Wrap::unexpand= 0;
+    local $Text::Wrap::huge= "overflow";
+    local $Text::Wrap::columns= 78;
+    $line= wrap $head, $rest, $line;
+    return $line;
+}
+
+# recursively extract the && expressions from a parse tree,
+# returning the result as strings.
+# if $node is not a '&&' op then it returns $node as a string,
+# otherwise it returns the string form of the arguments to the
+# '&&' op, recursively flattening any '&&' nodes that it might
+# contain.
+sub _and_clauses {
+    my ($self, $node)= @_;
+
+    my @ret;
+    if (@$node < 3 or $node->[0] ne "&&") {
+        return $self->_pt_as_str($node);
+    }
+    foreach my $idx (1 .. $#$node) {
+        push @ret, $self->_and_clauses($node->[$idx]);
+    }
+    return @ret;
+}
+
+# recursively walk the a parse tree, and return the literal
+# terms it contains, ignoring any operators in the optree.
+sub _terms {
+    my ($self, $node)= @_;
+    if (@$node == 1) {
+        return $self->_pt_as_str($node);
+    }
+    my @ret;
+    if (@$node == 2) {
+        if ($node->[0] eq "?") {
+            push @ret, map { $self->_terms($_) } @{ $node->[1] };
+        }
+        else {
+            push @ret, $self->_terms($node->[1]);
+        }
+    }
+    else {
+        foreach my $i (1 .. $#$node) {
+            push @ret, $self->_terms($node->[$i]);
+        }
+    }
+    return @ret;
+}
+
+# takes a HeaderLine "cond" AoA and flattens it into
+# a single expression, and then extracts all the and clauses
+# it contains. Thus [['defined(A)'],['defined(B)']] and
+# [['defined(A) && defined(B)']], end up as ['defined(A)','defined(B)']
+sub _flatten_cond {
+    my ($self, $cond_ary)= @_;
+
+    my $expr= join " && ", map {
+        map { "($_)" }
+            @$_
+    } @$cond_ary;
+    return [] unless $expr;
+    my $tree= $self->parse_expr($expr);
+    my %seen;
+    my @and_clause= grep { !$seen{$_}++ } $self->_and_clauses($tree);
+    return \@and_clause;
+}
+
+# Find the best path into a tree of conditions, such that
+# we reuse the maximum number of existing branches. Returning
+# two arrays, the first contain the parts of $cond_array that
+# make up the best path, in the best path order, and a second array
+# with the remaining items in the initial order they were provided.
+# Thus if we have previously stored only the path "A", "B", "C"
+# into the tree, and want to find the best path for
+# ["E","D","C","B","A"] we should return: ["A","B","C"],["E","D"],
+#
+# This used to reduce the number of conditions in the grouped content,
+# and is especially helpful with dealing with DEBUGGING related
+# functionality. It is coupled with careful control over the order
+# that we add paths and conditions to the tree.
+sub _best_path {
+    my ($self, $tree_node, $cond_array, @path)= @_;
+    my $best= \@path;
+    my $rest= $cond_array;
+    foreach my $cond (@$cond_array) {
+        if ($tree_node->{$cond}) {
+            my ($new_best, $new_rest)=
+                $self->_best_path($tree_node->{$cond},
+                [ grep $_ ne $cond, @$cond_array ],
+                @path, $cond);
+            if (@$new_best > @$best) {
+                ($best, $rest)= ($new_best, $new_rest);
+            }
+        }
+    }
+    if (@$best == @path) {
+        foreach my $cond (@$cond_array) {
+            my $not_cond= $self->tidy_cond("!($cond)");
+            if ($tree_node->{$not_cond}) {
+                $best= [ @path, $cond ];
+                $rest= [ grep $_ ne $cond, @$cond_array ];
+                last;
+            }
+        }
+    }
+    return ($best, $rest);
+}
+
+# This builds a group content tree from a set of lines. each content line in
+# the original file is added to the file based on the conditions that apply to
+# the content.
+#
+# The tree is made up of nested HoH's with keys in the HoH being normalized
+# clauses from the {cond} data in the HeaderLine objects.
+#
+# Care is taken to minimize the number of pathways and to reorder clauses to
+# reuse existing pathways and minimize the total number of conditions in the
+# file.
+#
+# The '' key of a hash contains an array of the lines that are part of the
+# condition that lead to that key. Thus lines with no conditions are in
+# @{$tree{''}}, lines with the condition "defined(A) && defined(B)" would be
+# in $tree{"defined(A)"}{"defined(B)"}{""}.
+#
+# The result of this sub is normally passed into _recurse_group_content_tree()
+# which converts it back into a set of HeaderLine objects.
+#
+sub _build_group_content_tree {
+    my ($self, $lines)= @_;
+    $lines ||= $self->{lines};
+    my $filter= $self->{filter_content};
+    my %seen_normal;
+    foreach my $line_data (@$lines) {
+        next if $line_data->{type} ne "content";
+        next if $filter and !$filter->($self, $line_data);
+        my $cond_frames= $line_data->{cond};
+        my $cond_frame= $self->_flatten_cond($cond_frames);
+        my $flat_merged= join " && ", map "($_)", @$cond_frame;
+        my $normalized;
+        if (@$cond_frame) {
+            $normalized= $self->tidy_cond($flat_merged);
+        }
+        else {
+            $normalized= $flat_merged;    # empty string
+        }
+        push @{ $seen_normal{$normalized} }, $line_data;
+    }
+    my @debugging;
+    my @non_debugging;
+    foreach my $key (keys %seen_normal) {
+        if ($key =~ /DEBUGGING/) {
+            push @debugging, $key;
+        }
+        else {
+            push @non_debugging, $key;
+        }
+    }
+    @non_debugging=
+        sort { length($a) <=> length($b) || $a cmp $b } @non_debugging;
+    @debugging= sort { length($b) <=> length($a) || $a cmp $b } @debugging;
+    my %tree;
+    foreach my $normal_expr (@non_debugging, @debugging) {
+        my $all_line_data= $seen_normal{$normal_expr};
+
+        my $cond_frame=
+            (length $normal_expr)
+            ? $self->_flatten_cond([ [$normal_expr] ])
+            : [];
+        @$cond_frame= $self->_sort_terms(@$cond_frame);
+        my $node= \%tree;
+        my ($best, $rest)= $self->_best_path($node, $cond_frame);
+        die sprintf "Woah: %d %d %d", 0 + @$best, 0 + @$rest, 0 + @$cond_frame
+            unless @$best + @$rest == @$cond_frame;
+
+        foreach my $cond (@$best, @$rest) {
+            $node= $node->{$cond} ||= {};
+        }
+        push @{ $node->{''} }, @$all_line_data;
+    }
+
+    warn $self->dd(\%tree) if $self->{debug};
+    $self->{tree}= \%tree;
+    return \%tree;
+}
+
+# convert a tree of conditions constructed by _build_group_content_tree()
+# and turn it into a set of HeaderLines that represents it. Performs the
+# appropriate sets required to reconstitute an if/elif/elif/else sequence
+# by calling _handle_else().
+sub _recurse_group_content_tree {
+    my ($self, $node, @path)= @_;
+    my $depth= 0 + @path;
+    my $ind= $self->indent_chars($depth);
+    my @ret;
+    if ($node->{''}) {
+        if (my $cb= $self->{post_process_grouped_content}) {
+            $cb->($self, $node->{''}, \@path);
+        }
+        if (my $cb= $self->{post_process_content}) {
+            $cb->($self, $_, \@path) for @{ $node->{''} };
+        }
+        push @ret,
+            map { HeaderLine->new(%$_, cond => [@path], level => $depth) }
+            @{ $node->{''} };
+    }
+
+    my %skip;
+    foreach my $expr (
+        map  { $_->[0] }
+        sort { $a->[1] cmp $b->[1] || $a->[0] cmp $b->[0] }
+        map  { [ $_, lc($_) =~ s/[^A-Za-z0-9]+//gr ] } keys %$node
+    ) {
+        next unless length $expr;    # ignore payload
+        my $not= $self->tidy_cond("!($expr)");
+        if ($skip{$expr} or ($not !~ /^!/ and $node->{$not})) {
+            next;
+        }
+        my $kid= $node->{$expr};
+        while (!$node->{$not} and keys(%$kid) == 1 and !$kid->{''}) {
+            my ($kid_key)= keys(%$kid);
+            $expr= $self->tidy_cond("($expr) && ($kid_key)");
+            $kid= $kid->{$kid_key};
+            my $new_not= $self->tidy_cond("!($expr)");
+            if ($node->{$new_not}) {
+                $not= $new_not;
+                $skip{$not}++;
+            }
+        }
+        my $raw= "#${ind}if $expr\n";
+        my $hl= HeaderLine->new(
+            type     => "cond",
+            sub_type => "#if",
+            raw      => $raw,
+            line     => $raw,
+            level    => $depth,
+            cond     => [ @path, [$expr] ]);
+        my $ar= $self->_recurse_group_content_tree($kid, @path, [$expr]);
+        push @ret, $hl, @$ar;
+        if ($node->{$not}) {
+            $skip{$not}++;
+            my $ar=
+                $self->_handle_else($not, $node->{$not}, $ind, $depth, @path,
+                [$not]);
+            push @ret, @$ar;
+        }
+
+        # and finally the #endif
+
+        $raw= "#${ind}endif\n";
+
+        # we need to extract the condition information from the last line in @ret,
+        # as we don't know which condition we are ending here. It could be an elsif
+        # from deep in the parse tree for instance.
+        # So we need to extract the last frame from the cond structure in the last
+        # line-info in @ret.
+        # BUT if this last line is itself an #endif, then we need to take the second
+        # to last line instead, as the endif would have "popped" that frame off the
+        # condition stack.
+        my $last_ret= $ret[-1];
+        my $idx=
+            ($last_ret->{type} eq "cond" && $last_ret->{sub_type} eq "#endif")
+            ? -2
+            : -1;
+        my $end_line= HeaderLine->new(
+            type     => "cond",
+            sub_type => "#endif",
+            raw      => $raw,
+            line     => $raw,
+            level    => $depth,
+            cond     => [ @path, $last_ret->{cond}[$idx] ]);
+        push @ret, $end_line;
+    }
+    return \@ret;
+}
+
+# this handles the specific case of an else clause, detecting
+# when an elif can be constructed, may recursively call itself
+# to deal with if/elif/elif/else chains. Calls back into
+# _recurse_group_content_tree().
+sub _handle_else {
+    my ($self, $not, $kid, $ind, $depth, @path)= @_;
+
+    # extract the first 3 keys - from this we can detect
+    # which of the three scenarios we have to handle.
+    my ($k1, $k2, $k3)=
+        sort { length($a) <=> length($b) || $a cmp $b } keys %$kid;
+    my $not_k1;
+    if (length($k1) and defined($k2) and !defined($k3)) {
+
+        # if we do not have a payload (length($k1)) and we have exactly
+        # two keys (defined($k2) and !defined($k3)) we need to compute
+        # the inverse of $k1, which we will use later.
+        $not_k1= $self->tidy_cond("!($k1)");
+    }
+    my @ret;
+    if (length($k1) and !defined($k2)) {
+
+        # only one child, no payload -> elsif $k1
+        my $sub_expr;
+        do {
+            $sub_expr=
+                 !$sub_expr
+                ? $k1
+                : $self->tidy_cond("($sub_expr) && ($k1)");
+            $kid= $kid->{$k1};
+            ($k1, $k2)=
+                sort { length($a) <=> length($b) || $a cmp $b } keys %$kid;
+        } while length($k1) and !defined $k2;
+
+        my $raw= "#${ind}elif $sub_expr\n";
+        push @{ $path[-1] }, $sub_expr;
+        my $hl= HeaderLine->new(
+            type     => "cond",
+            sub_type => "#elif",
+            raw      => $raw,
+            line     => $raw,
+            level    => $depth,
+            cond     => [ map { [@$_] } @path ]);
+        my $ar= $self->_recurse_group_content_tree($kid, @path);
+        push @ret, $hl, @$ar;
+    }
+    elsif (defined($not_k1) and $not_k1 eq $k2) {
+
+        # two children which are complementary, no payload -> elif $k1 else..
+        my $raw= "#${ind}elif $k1\n";
+
+        push @{ $path[-1] }, $k1;
+        my $hl= HeaderLine->new(
+            type     => "cond",
+            sub_type => "#elif",
+            raw      => $raw,
+            line     => $raw,
+            level    => $depth,
+            cond     => [ map { [@$_] } @path ]);
+        my $ar= $self->_recurse_group_content_tree($kid->{$k1}, @path);
+        $path[-1][-1]= $k2;
+        my $rest= $self->_handle_else($k2, $kid->{$k2}, $ind, $depth, @path);
+        push @ret, $hl, @$ar, @$rest;
+    }
+    else {
+        # payload, 3+ children, or 2 which are not complementary -> else
+        my $raw= "#${ind}else\n";
+        my $hl= HeaderLine->new(
+            type     => "cond",
+            sub_type => "#else",
+            raw      => $raw,
+            line     => $raw,
+            level    => $depth,
+            cond     => [ map { [@$_] } @path ]);
+        my $ar= $self->_recurse_group_content_tree($kid, @path);
+        push @ret, $hl, @$ar;
+    }
+    return \@ret;
+}
+
+# group the content in lines by the condition that apply to them
+# returns a set of lines representing the new structure
+sub group_content {
+    my ($self, $lines, $filter)= @_;
+    $lines ||= $self->{lines};
+    local $self->{filter_content}= $filter || $self->{filter_content};
+    my $tree= $self->_build_group_content_tree($lines);
+    return $self->_recurse_group_content_tree($tree);
+}
+
+#read a file by name - opens the file and passes the fh into parse_fh().
+sub read_file {
+    my ($self, $file_name, $callback)= @_;
+    $self= $self->new() unless ref $self;
+    $self->{last_file_name}= $file_name;
+    open my $fh, "<", $file_name
+        or confess "Failed to open '$file_name' for read: $!";
+    my $lines= $self->parse_fh($fh);
+    if ($callback) {
+        foreach my $line (@$lines) {
+            $callback->($self, $line);
+        }
+    }
+    return $self;
+}
+
+# These are utility methods for the HeaderLine objects.
+sub HeaderLine::new {
+    my ($class, %self)= @_;
+    return bless \%self, $class;
+}
+sub HeaderLine::cond        { $_[0]->{cond} }                             # AoA
+sub HeaderLine::type        { $_[0]->{type} }
+sub HeaderLine::type_is     { return $_[0]->type eq $_[1] ? 1 : 0 }
+sub HeaderLine::sub_type    { $_[0]->{sub_type} }
+sub HeaderLine::sub_type_is { return $_[0]->sub_type eq $_[1] ? 1 : 0 }
+sub HeaderLine::raw         { $_[0]->{raw} }
+sub HeaderLine::flat        { $_[0]->{flat} }
+sub HeaderLine::line        { $_[0]->{line} }
+sub HeaderLine::level       { $_[0]->{level} }
+sub HeaderLine::is_content  { return $_[0]->type_is("content") }
+sub HeaderLine::is_cond     { return $_[0]->type_is("cond") }
+sub HeaderLine::is_define   { return $_[0]->sub_type_is("#define") }
+sub HeaderLine::line_num    { $_[0]->{start_line_num} }
+sub HeaderLine::n_lines     { $_[0]->{n_lines} }
+sub HeaderLine::embed       { $_[0]->{embed} }
+*HeaderLine::start_line_num= *HeaderLine::line_num;
+
+# these are methods for EmbedLine objects
+*EmbedLine::new= *HeaderLine::new;
+sub EmbedLine::flags       { $_[0]->{flags} }
+sub EmbedLine::return_type { $_[0]->{return_type} }
+sub EmbedLine::name        { $_[0]->{name} }
+sub EmbedLine::args        { $_[0]->{args} }          # array ref
+
+1;
+
+__END__
+
+=head1 NAME
+
+HeaderParser - A minimal header file parser that can be hooked by other porting
+scripts.
+
+=head1 SYNOPSIS
+
+    my $o= HeaderParser->new();
+    my $lines= $o->parse_fh($fh);
+
+=head1 DESCRIPTION
+
+HeaderParser is a tool to parse C preprocessor header files. The tool
+understands the syntax of preprocessor conditions, and is capable of creating
+a parse tree of the expressions involved, and normalizing them as well.
+
+C preprocessor files are a bit tricky to parse properly, especially with a
+"line by line" model. There are two issues that must be dealt with:
+
+=over 4
+
+=item Line Continuations
+
+Any line ending in "\\\n" (that is backslash newline) is considered to be part
+of a longer string which continues on the next line. Processors should replace
+the "\\\n" typically with a space when converting to a "real" line.
+
+=item Comments Acting As A Line Continuation
+
+The rules for header files stipulates that C style comments are stripped
+before processing other content, this means that comments can serve as a form
+of line continuation:
+
+    #if defined(foo) /*
+    */ && defined(bar)
+
+is the same as
+
+    #if defined(foo) && defined(bar)
+
+This type of comment usage is often overlooked by people writing header file
+parsers for the first time.
+
+=item Indented pre processor directives.
+
+It is easy to forget that there may be multiple spaces between the "#"
+character and the directive. It also easy to forget that there may be spaces
+in *front* of the "#" character. Both of these cases are often overlooked.
+
+=back
+
+The main idea of this module is to provide a single framework for correctly
+parsing the content of our header files in a consistent manner. A secondary
+purpose it to make various tasks we want to do easier, such as normalizing
+content or preprocessor expressions, or just extracting the real "content" of
+the file properly.
+
+=head2 parse_fh
+
+This function parses a filehandle into a set of lines.  Each line is represented by a hash
+based object which contains the following fields:
+
+    bless {
+        cond     => [['defined(a)'],['defined(b)']],
+        type     => "content",
+        sub_type => undef,
+        raw      => $raw_content_of_line,
+        line     => $normalized_content_of_line,
+        level    => $level,
+    }, "HeaderLine"
+
+A "line" in this context is a logical line, and because of line continuations
+and comments may contain more than one physical line, and thus more than
+one newline, but will always include at least one and will always end with one
+(unless there is no newline at the end of the file). Thus
+
+    before /*
+     this is a comment
+    */ after \
+    and continues
+
+will be treated as a single logical line even though the content is
+spread over four lines.
+
+=over 4
+
+=item cond
+
+An array of arrays containing the normalized expressions of any C preprocessor
+conditional blocks which include the line. Each line has its own copy of the
+conditions it was operated on currently, but that may change so dont alter
+this data. The inner arrays may contain more than one element. If so then the
+line is part of an "#else" or "#elsif" and the clauses should be considered to
+be a conjuction when considering "when is this line included", however when
+considered as part of an if/elsif/else, each added clause represents the most
+recent condition. In the following you can see how:
+
+    before          /* cond => [ ]                      */
+    #if A           /* cond => [ ['A'] ]                */
+    do-a            /* cond => [ ['A'] ]                */
+    #elif B         /* cond => [ ['!A', 'B'] ]          */
+    do-b            /* cond => [ ['!A', 'B'] ]          */
+    #else           /* cond => [ ['!A', '!B'] ]         */
+    do-c            /* cond => [ ['!A', '!B'] ]         */
+    # if D          /* cond => [ ['!A', '!B'], ['D'] ]  */
+    do-d            /* cond => [ ['!A', '!B'], ['D'] ]  */
+    # endif         /* cond => [ ['!A', '!B'], ['D'] ]  */
+    #endif          /* cond => [ ['!A', '!B'] ]         */
+    after           /* cond => [ ]                      */
+
+So in the above we can see how the three clauses of the if produce
+a single "frame" in the cond array, but that frame "grows" and changes
+as additional else clauses are added. When an entirely new if block
+is started (D) it gets its own block. Each endif includes the clause
+it terminates.
+
+=item type
+
+This value indicates the type of the line. This may be one of the following:
+'content', 'cond', 'define', 'include' and 'error'. Several of the types
+have a sub_type.
+
+=item sub_type
+
+This value gives more detail on the type of the line where necessary.
+Not all types have a subtype.
+
+    Type    | Sub Type
+    --------+----------
+    content | text
+            | include
+            | define
+            | error
+    cond    | #if
+            | #elif
+            | #else
+            | #endif
+
+Note that there are no '#ifdef' or '#elifndef' or similar expressions. All
+expressions of that form are normalized into the '#if defined' form to
+simplify processing.
+
+=item raw
+
+This was the raw original text before HeaderParser performed any modifications
+to it.
+
+=item line
+
+This is the normalized and modified text after HeaderParser or any callbacks
+have processed it.
+
+=item level
+
+This is the "indent level" of a line and corresponds to the number of blocks
+that the line is within, not including any blocks that might be created by
+the line itself.
+
+    before          /* level => 0 */
+    #if A           /* level => 0 */
+    do-a            /* level => 1 */
+    #elif B         /* level => 0 */
+    do-b            /* level => 1 */
+    #else           /* level => 0 */
+    do-c            /* level => 1 */
+    # if D          /* level => 1 */
+    do-d            /* level => 2 */
+    # endif         /* level => 1 */
+    #endif          /* level => 0 */
+    after           /* level => 0 */
+
+=back
+
+parse_fh() will throw an exception if it encounters a malformed expression
+or input it cannot handle.
+
+=head2 lines_as_str
+
+This function will return a string representation of the lines it is provided.
+
+=head2 group_content
+
+This function will group the text in the file by the conditions which contain
+it. This is only useful for files where the content is essentially a list and
+where changing the order that lines are output in will not break the resulting
+file.
+
+Each content line will be grouped into a structure of nested if/else blocks
+(elif will produce a new nested block) such that the content under the control
+of a given set of normalized condition clauses are grouped together in the order
+the occurred in the file, such that each combined conditional clause is output
+only once.
+
+This means a file like this:
+
+    #if A
+    A
+    #elif K
+    AK
+    #else
+    ZA
+    #endif
+    #if B && Q
+    B
+    #endif
+    #if Q && B
+    BC
+    #endif
+    #if A
+    AD
+    #endif
+    #if !A
+    ZZ
+    #endif
+
+Will end up looking roughly like this:
+
+    #if A
+    A
+    AD
+    #else
+    ZZ
+    # if K
+    AK
+    # else
+    ZA
+    # endif
+    #endif
+    #if B && Q
+    B
+    BC
+    #endif
+
+Content at a given block level always goes before conditional clauses
+at the same nesting level.
+
+=head2 HOOKS
+
+There are severals hooks that are available, C<pre_process_content> and
+C<post_process_content>, and C<post_process_grouped_content>. All of these
+hooks  will be called with the HeaderParser object as the first argument.
+The "process_content" callbacks will be called with a line hash as the second
+argument, and C<post_process_grouped_content> will be called with an
+array of line hashes for the content in that group, so that the array may be
+modified or sorted.  Callbacks called from inside of C<group_content()>
+(that is C<post_process_content> and C<post_process_grouped_content> will be
+called with an additional argument containing and array specifying the actual
+conditional "path" to the content  (which may differ somewhat from the data in
+a lines "cond" property).
+
+These hooks may do what they like, but generally they will modify the
+"line" property of the line hash to change the final output returned
+by C<lines_as_str()> or C<group_content()>.
+
+=head2 FORMATTING AND INDENTING
+
+Header parser tries hard to produce neat and readable output with a consistent
+style and form. For example:
+
+    #if defined(FOO)
+    # define HAS_FOO
+    # if defined(BAR)
+    #   define HAS_FOO_AND_BAR
+    # else /* !defined(BAR) */
+    #   define HAS_FOO_NO_BAR
+    # endif /* !defined(BAR) */
+    #endif /* defined(FOO) */
+
+HeaderParser uses two space tab stops for indenting C pre-processor
+directives. It puts the spaces between the "#" and the directive. The "#" is
+considered "part" of the indent, even though the space comes after it. This
+means the first indent level "looks" like one space, and following indents
+look like 2. This should match what a sensible editor would do with two space
+tab stops. The C<indent_chars()> method can be used to convert an indent level
+into a string that contains the appropriate number of spaces to go in between
+the "#" and the directive.
+
+When emitting "#endif", "#elif" and "#else" directives comments will be
+emitted also to show the conditions that apply. These comments may be wrapped
+to cover multiple lines. Some effort is made to get these comments to line up
+visually, but it uses heuristics which may not always produce the best result.
+
+=cut
diff --git a/regen/tidy_embed.pl b/regen/tidy_embed.pl
new file mode 100644
index 0000000000..6991b1db04
--- /dev/null
+++ b/regen/tidy_embed.pl
@@ -0,0 +1,61 @@
+use lib "regen";
+use HeaderParser;
+use strict;
+use warnings;
+
+my $parser= HeaderParser->new(
+        pre_process_content => sub {
+            my ($self,$line_data)= @_;
+            $self->tidy_embed_fnc_entry($line_data);
+            my $embed= $line_data->{embed}
+                or return;
+        },
+        _post_process_grouped_content => sub {
+            my ($self,$group_ary)= @_;
+            @{$group_ary}=
+                sort {
+                    $a->{embed}{name} cmp $b->{embed}{name}
+                } @{$group_ary};
+        },
+    );
+my $tap;
+if (@ARGV and $ARGV[0] eq "--tap") {
+    $tap = shift @ARGV;
+}
+my $file= "embed.fnc";
+if (@ARGV) {
+    $file= shift @ARGV;
+}
+my $new= "$file.new";
+my $bak= "$file.bak";
+$parser->read_file($file);
+my $lines= $parser->lines;
+my @tail;
+while ($lines->[-1]{type} eq "content" and
+    ($lines->[-1]{line} eq "\n" or $lines->[-1]{line}=~/^\s*:/)
+) {
+    unshift @tail, pop @$lines;
+}
+
+my $grouped_content_ary= $parser->group_content();
+push @$grouped_content_ary, @tail;
+my $grouped_content_txt= $parser->normalized_content($grouped_content_ary);
+if ($grouped_content_txt ne $parser->{orig_content}) {
+    if ($tap) {
+        print "not ok - $0 $file\n";
+    } elsif (-t) {
+        print "Updating $file\n";
+    }
+    open my $fh,">",$new
+        or die "Failed to open '$new' for write: $!";
+    print $fh $grouped_content_txt
+        or die "Failed to print to '$new': $!";
+    close $fh
+        or die "Failed to close '$new': $!";
+    rename $file, $bak
+        or die "Couldn't move '$file' to '$bak': $!";
+    rename $new, $file
+        or die "Couldn't move embed.fnc.new to embed.fnc: $!";
+} elsif ($tap) {
+    print "ok - $0 $file\n";
+}
diff --git a/t/porting/header_parser.t b/t/porting/header_parser.t
new file mode 100644
index 0000000000..6a8975b507
--- /dev/null
+++ b/t/porting/header_parser.t
@@ -0,0 +1,517 @@
+#!./perl -w
+
+# Verify that all files generated by perl scripts are up to date.
+
+BEGIN {
+    if (-f "./TestInit.pm") {
+        push @INC, ".";
+    } elsif (-f '../TestInit.pm') {
+        push @INC, "..";
+    }
+}
+use TestInit qw(T A); # T is chdir to the top level, A makes paths absolute
+use strict;
+
+# this tests the functions in HeaderParser.pm which we use for make regen.
+
+require './t/test.pl';
+require './regen/HeaderParser.pm';
+
+skip_all_if_miniperl("needs Data::Dumper");
+
+require Data::Dumper;
+
+sub show_text {
+    my ($as_text)= @_;
+    print STDERR $as_text=~s/^/" " x 8/mger;
+}
+
+my $hp= HeaderParser->new;
+$hp->parse_text(<<~'EOF');
+    #ifdef A
+    #ifdef B
+    #define AB
+    content 1
+    #endif
+    content 2
+    #define A
+    #endif
+    /*comment
+      line */
+    #define C /* this is
+                 a hidden line continuation */ D
+    EOF
+my $normal= $hp->lines_as_str();
+my $lines= $hp->lines();
+my $lines_as_str= Data::Dumper->new([$lines])->Sortkeys(1)->Useqq(1)->Indent(1)->Dump();
+is($lines_as_str,<<~'DUMP_EOF', "Simple data structure as expected") or show_text($lines_as_str);
+        $VAR1 = [
+          bless( {
+            "cond" => [
+              [
+                "defined(A)"
+              ]
+            ],
+            "flat" => "#if defined(A)",
+            "level" => 0,
+            "line" => "#if defined(A)\n",
+            "n_lines" => 1,
+            "raw" => "#ifdef A\n",
+            "start_line_num" => 1,
+            "sub_type" => "#if",
+            "type" => "cond"
+          }, 'HeaderLine' ),
+          bless( {
+            "cond" => [
+              [
+                "defined(A)"
+              ],
+              [
+                "defined(B)"
+              ]
+            ],
+            "flat" => "#if defined(B)",
+            "level" => 1,
+            "line" => "# if defined(B)\n",
+            "n_lines" => 1,
+            "raw" => "#ifdef B\n",
+            "start_line_num" => 2,
+            "sub_type" => "#if",
+            "type" => "cond"
+          }, 'HeaderLine' ),
+          bless( {
+            "cond" => [
+              [
+                "defined(A)"
+              ],
+              [
+                "defined(B)"
+              ]
+            ],
+            "flat" => "#define AB",
+            "level" => 2,
+            "line" => "#   define AB\n",
+            "n_lines" => 1,
+            "raw" => "#define AB\n",
+            "start_line_num" => 3,
+            "sub_type" => "#define",
+            "type" => "content"
+          }, 'HeaderLine' ),
+          bless( {
+            "cond" => [
+              [
+                "defined(A)"
+              ],
+              [
+                "defined(B)"
+              ]
+            ],
+            "flat" => "content 1",
+            "level" => 2,
+            "line" => "content 1\n",
+            "n_lines" => 1,
+            "raw" => "content 1\n",
+            "start_line_num" => 4,
+            "sub_type" => "text",
+            "type" => "content"
+          }, 'HeaderLine' ),
+          bless( {
+            "cond" => [
+              [
+                "defined(A)"
+              ],
+              [
+                "defined(B)"
+              ]
+            ],
+            "flat" => "#endif",
+            "level" => 1,
+            "line" => "# endif /* defined(B) */\n",
+            "n_lines" => 1,
+            "raw" => "#endif\n",
+            "start_line_num" => 5,
+            "sub_type" => "#endif",
+            "type" => "cond"
+          }, 'HeaderLine' ),
+          bless( {
+            "cond" => [
+              [
+                "defined(A)"
+              ]
+            ],
+            "flat" => "content 2",
+            "level" => 1,
+            "line" => "content 2\n",
+            "n_lines" => 1,
+            "raw" => "content 2\n",
+            "start_line_num" => 6,
+            "sub_type" => "text",
+            "type" => "content"
+          }, 'HeaderLine' ),
+          bless( {
+            "cond" => [
+              [
+                "defined(A)"
+              ]
+            ],
+            "flat" => "#define A",
+            "level" => 1,
+            "line" => "# define A\n",
+            "n_lines" => 1,
+            "raw" => "#define A\n",
+            "start_line_num" => 7,
+            "sub_type" => "#define",
+            "type" => "content"
+          }, 'HeaderLine' ),
+          bless( {
+            "cond" => [
+              [
+                "defined(A)"
+              ]
+            ],
+            "flat" => "#endif",
+            "level" => 0,
+            "line" => "#endif /* defined(A) */\n",
+            "n_lines" => 1,
+            "raw" => "#endif\n",
+            "start_line_num" => 8,
+            "sub_type" => "#endif",
+            "type" => "cond"
+          }, 'HeaderLine' ),
+          bless( {
+            "cond" => [],
+            "flat" => "",
+            "level" => 0,
+            "line" => "/*comment\n  line */\n",
+            "n_lines" => 2,
+            "raw" => "/*comment\n  line */\n",
+            "start_line_num" => 9,
+            "sub_type" => "text",
+            "type" => "content"
+          }, 'HeaderLine' ),
+          bless( {
+            "cond" => [],
+            "flat" => "#define C D",
+            "level" => 0,
+            "line" => "#define C /* this is\n             a hidden line continuation */ D\n",
+            "n_lines" => 2,
+            "raw" => "#define C /* this is\n             a hidden line continuation */ D\n",
+            "start_line_num" => 11,
+            "sub_type" => "#define",
+            "type" => "content"
+          }, 'HeaderLine' )
+        ];
+        DUMP_EOF
+
+is($normal,<<~'EOF',"Normalized text as expected");
+    #if defined(A)
+    # if defined(B)
+    #   define AB
+    content 1
+    # endif /* defined(B) */
+    content 2
+    # define A
+    #endif /* defined(A) */
+    /*comment
+      line */
+    #define C /* this is
+                 a hidden line continuation */ D
+    EOF
+
+{
+    my @warn;
+    local $SIG{__WARN__}= sub { push @warn, $_[0]; warn $_[0] };
+    my $ok= eval {
+        HeaderParser->new()->parse_text(<<~'EOF'); 1
+        #ifdef A
+        #ifdef B
+        #endif
+        EOF
+    };
+    my $err= !$ok ? $@ : "";
+    ok(!$ok,"Should throw an error");
+    like($err,qr/Unterminated conditional block starting line 1 with last conditional operation at line 3/,
+         "Got expected error message");
+}
+{
+    my @warn;
+    local $SIG{__WARN__}= sub { push @warn, $_[0]; warn $_[0] };
+    my $ok= eval {
+        HeaderParser->new()->parse_text(<<~'EOF'); 1
+        #ifdef A
+        #ifdef B
+        #elif C
+        EOF
+    };
+    my $err= !$ok ? $@ : "";
+    ok(!$ok,"Should throw an error");
+    like($err,qr/Unterminated conditional block starting line 3/,
+         "Unterminated block detected");
+}
+{
+    my @warn;
+    local $SIG{__WARN__}= sub { push @warn, $_[0]; warn $_[0] };
+    my $ok= eval {
+        HeaderParser->new()->parse_text(<<~'EOF'); 1
+        #if 1 * * 10 > 5
+        #elifdef C
+        EOF
+    };
+    my $err= !$ok ? $@ : "";
+    ok(!$ok,"Should throw an error");
+    is($err,
+       "Error at line 1\n" .
+       "Line 1: #if 1 * * 10 > 5\n" .
+       "Error in multiplication expression: " .
+       "Unexpected token '*', expecting literal, unary, or expression.\n",
+         "Expected token error") or warn $err;
+}
+{
+    my $hp= HeaderParser->new(debug=>0);
+
+    $hp->parse_text(<<~'EOF');
+        #ifdef A
+        # ifdef B
+        #   define P
+        # else
+        #   define Q
+        # endif
+        # if !defined B
+        #   define R
+        # else
+        #   define S
+        # endif
+        #endif
+        EOF
+    my $grouped= $hp->group_content();
+    my $as_text= $hp->lines_as_str($grouped);
+    is($as_text,<<~'EOF',"inverted simple clauses get merged properly") or show_text($as_text);
+        #if defined(A)
+        # if defined(B)
+        #   define P
+        #   define S
+        # else /* if !defined(B) */
+        #   define Q
+        #   define R
+        # endif /* !defined(B) */
+        #endif /* defined(A) */
+        EOF
+}
+{
+    my $hp= HeaderParser->new(debug=>0);
+    $hp->parse_text(<<~'EOF');
+        #if defined(A) && defined(B)
+        # if (defined(C) && defined(D))
+        #   define P
+        # else
+        #   define Q
+        # endif
+        # if !(defined C && defined D)
+        #   define R
+        # else
+        #   define S
+        # endif
+        #endif
+        EOF
+    my $grouped= $hp->group_content();
+    my $as_text= $hp->lines_as_str($grouped);
+    is($as_text,<<~'EOF',"inverted complex clauses get merged properly") or show_text($as_text);
+        #if defined(A) && defined(B)
+        # if defined(C) && defined(D)
+        #   define P
+        #   define S
+        # else /* if !( defined(C) && defined(D) ) */
+        #   define Q
+        #   define R
+        # endif /* !( defined(C) && defined(D) ) */
+        #endif /* defined(A) && defined(B) */
+        EOF
+}
+{
+    my $hp= HeaderParser->new(debug=>0);
+    $hp->parse_text(<<~'EOF');
+        #if defined(A)
+        #define HAS_A
+        #elif defined(B)
+        #define HAS_B
+        #elif defined(C)
+        #define HAS_C
+        #else
+        #define HAS_D
+        #endif
+        EOF
+    my $grouped= $hp->group_content();
+    my $as_text= $hp->lines_as_str($grouped);
+    is($as_text,<<~'EOF',"test nested elif round trip") or show_text($as_text);
+        #if defined(A)
+        # define HAS_A
+        #elif defined(B) /* && !defined(A) */
+        # define HAS_B
+        #elif defined(C) /* && !defined(A) && !defined(B) */
+        # define HAS_C
+        #else /* if !defined(A) && !defined(B) && !defined(C) */
+        # define HAS_D
+        #endif /* !defined(A) && !defined(B) && !defined(C) */
+        EOF
+}
+{
+    my $hp= HeaderParser->new(debug=>0);
+    $hp->parse_text(<<~'EOF');
+        #if defined(A)
+        #define HAS_A
+        #endif
+        #if !defined(A) && defined(B)
+        #define HAS_B
+        #endif
+        #if defined(C)
+        #if !defined(A)
+        #if !defined(B)
+        #define HAS_C
+        #endif
+        #endif
+        #endif
+        #if !defined(B) && !defined(A) && !defined(C)
+        #define HAS_D
+        #endif
+        EOF
+    my $grouped= $hp->group_content();
+    my $as_text= $hp->lines_as_str($grouped);
+    is($as_text,<<~'EOF',"test elif composition from disparate statements") or show_text($as_text);
+        #if defined(A)
+        # define HAS_A
+        #elif defined(B) /* && !defined(A) */
+        # define HAS_B
+        #elif defined(C) /* && !defined(A) && !defined(B) */
+        # define HAS_C
+        #else /* if !defined(A) && !defined(B) && !defined(C) */
+        # define HAS_D
+        #endif /* !defined(A) && !defined(B) && !defined(C) */
+        EOF
+}
+{
+    my $hp= HeaderParser->new(debug=>0);
+    $hp->parse_text(<<~'EOF');
+        #if defined(A)
+        #define HAS_A
+        #endif
+        #if !defined(A)
+        #define HAS_NOT_A
+        #if !defined(C)
+        #define HAS_A_NOT_C
+        #endif
+        #endif
+        #if defined(C)
+        #define HAS_C
+        #if defined(A)
+        #define HAS_A_C
+        #endif
+        #else
+        #if defined(A)
+        #define HAS_NOT_C_A
+        #endif
+        #endif
+        EOF
+    my $grouped= $hp->group_content();
+    my $as_text= $hp->lines_as_str($grouped);
+    is($as_text,<<~'EOF',"test else composition") or show_text($as_text);
+        #if defined(A)
+        # define HAS_A
+        # if defined(C)
+        #   define HAS_A_C
+        # else /* if !defined(C) */
+        #   define HAS_NOT_C_A
+        # endif /* !defined(C) */
+        #else /* if !defined(A) */
+        # define HAS_NOT_A
+        # if !defined(C)
+        #   define HAS_A_NOT_C
+        # endif /* !defined(C) */
+        #endif /* !defined(A) */
+        #if defined(C)
+        # define HAS_C
+        #endif /* defined(C) */
+        EOF
+}
+{
+    my $hp= HeaderParser->new(debug=>0);
+    $hp->parse_text(<<~'EOF');
+        #if !defined(A)
+        #define NOT_A1
+        #else
+        #define A1
+        #endif
+        #if !!!!defined(A)
+        #define A2
+        #else
+        #define NOT_A2
+        #endif
+        EOF
+    my $grouped= $hp->group_content();
+    my $as_text= $hp->lines_as_str($grouped);
+    is($as_text,<<~'EOF',"normalization into if/else") or show_text($as_text);
+        #if defined(A)
+        # define A1
+        # define A2
+        #else /* if !defined(A) */
+        # define NOT_A1
+        # define NOT_A2
+        #endif /* !defined(A) */
+        EOF
+}
+{
+    my $hp= HeaderParser->new(debug=>0);
+    $hp->parse_text(<<~'EOF');
+        #if !!!(defined(A) && defined(B))
+        #define NOT_A_AND_B
+        #endif
+        #if defined(A)
+        #if defined(B)
+        #define A_AND_B
+        #endif
+        #endif
+        EOF
+    my $grouped= $hp->group_content();
+    my $as_text= $hp->lines_as_str($grouped);
+    is($as_text,<<~'EOF',"normalization with complex else") or show_text($as_text);
+        #if defined(A) && defined(B)
+        # define A_AND_B
+        #else /* if !( defined(A) && defined(B) ) */
+        # define NOT_A_AND_B
+        #endif /* !( defined(A) && defined(B) ) */
+        EOF
+}
+{
+    my $hp= HeaderParser->new(debug=>0);
+    $hp->parse_text(<<~'EOF');
+        #if defined(A) && !!defined(A) && !!!!defined(A)
+        #define HAS_A
+        #endif
+        EOF
+    my $grouped= $hp->group_content();
+    my $as_text= $hp->lines_as_str($grouped);
+    is($as_text,<<~'EOF',"simplification") or show_text($as_text);
+        #if defined(A)
+        # define HAS_A
+        #endif /* defined(A) */
+        EOF
+}
+{
+    local $::TODO;
+    $::TODO= "Absorbtion not implemented yet";
+    # currently we don't handle absorbtion: (A && (A || B || C ...)) == A
+    my $hp= HeaderParser->new(debug=>0);
+    $hp->parse_text(<<~'EOF');
+        #if defined(X) && (defined(X) || defined(Y))
+        #define HAS_X
+        #endif
+        EOF
+    my $grouped= $hp->group_content();
+    my $as_text= $hp->lines_as_str($grouped);
+    is($as_text,<<~'EOF',"simplification by absorbtion"); # or show_text($as_text);
+        #if defined(X)
+        # define HAS_X
+        #endif /* defined(X) */
+        EOF
+}
+
+done_testing();