regen/embed.pl - switch to using HeaderParser for code generation and etc.

HeaderParser was designed to replace the old grouping code in embed.pl and regen/embed_lib.pl, which is used to generate embed.h and proto.h and embedvar.h It can handle "elif" in embed.fnc, and produces more consistently structured and formatted and readable output than the old code. It also has much better logic to dedupe expressions. Adding or changing a constraint in embed.fnc should no longer have any "action at a distance" effects on the output for other functions which were not changed or obviously affected by the change. The old code assumed that sorting the constraints applying to a given function definition was acceptable, with the result that "elif" was impossible to support properly, and creating the problem that adding a new constraint which sorted into a different position could change a large amount of the output, making it hard to verify that the change was correct. The new logic should, once the initial normalization is applied, ensure that any further changes are minimal. This patch also includes a new tool regen/normalize_embed.pl which will be run by make regen, which consistently formats embed.fnc itself, which should make maintaining the file easier, especially during code splits and reorgs. Function definitions can be lifted out, moved to the end, with new constraints, and the `make regen` will put it all back into the correct place and order. A number of tools and tests which use embed_lib.pl to load embed.fnc data have necessarily been changed to use the new HeaderParser based logic.
author: Yves Orton <demerphq@gmail.com> 2022-12-16 18:31:17 +0100
committer: Yves Orton <demerphq@gmail.com> 2022-12-24 01:17:22 +0100
commit: 69e2d40a60d6d9ad69498a1b5a45e53763ea04f3 (patch)
tree: d8601980c88b5c62ef168a85841f1e7e7810b3e3 /regen
parent: d655b7ce0d9f884dc0965633e816e31c75a953a1 (diff)
download: perl-69e2d40a60d6d9ad69498a1b5a45e53763ea04f3.tar.gz
4 files changed, 394 insertions, 359 deletions
diff --git a/regen/embed.pl b/regen/embed.pl
index 00a09b6f75..e23d756f05 100755
--- a/regen/embed.pl
+++ b/regen/embed.pl
@@ -17,7 +17,7 @@
 #
 # This script is normally invoked from regen.pl.
 
-require 5.004;	# keep this compatible, an old perl is all we may have before
+require 5.004;  # keep this compatible, an old perl is all we may have before
                 # we build the new one
 
 use strict;
@@ -29,6 +29,7 @@ BEGIN {
 }
 
 my $unflagged_pointers;
+my @az = ('a'..'z');
 
 #
 # See database of global and static function prototypes in embed.fnc
@@ -58,27 +59,48 @@ sub open_print_header {
 
     return open_new($file, '>',
                     { file => $file, style => '*', by => 'regen/embed.pl',
-                      from => ['data in embed.fnc', 'regen/embed.pl',
-                               'regen/opcodes', 'intrpvar.h', 'perlvars.h'],
+                      from => [
+                               'embed.fnc',
+                               'intrpvar.h',
+                               'perlvars.h',
+                               'regen/opcodes',
+                               'regen/embed.pl',
+                               'regen/embed_lib.pl',
+                               'regen/HeaderParser.pm',
+                           ],
                       final => "\nEdit those files and run 'make regen_headers' to effect changes.\n",
-                      copyright => [1993 .. 2009], quote => $quote });
+                      copyright => [1993 .. 2022],
+                      quote => $quote });
 }
 
-my ($embed, $core, $ext, $api) = setup_embed();
+
+sub open_buf_out {
+    $_[0] //= "";
+    open my $fh,">", \$_[0]
+        or die "Failed to open buffer: $!";
+    return $fh;
+}
 
 # generate proto.h
-{
-    my $pr = open_print_header("proto.h");
-    print $pr "START_EXTERN_C\n";
+sub generate_proto_h {
+    my ($all)= @_;
+    my $pr = open_buf_out(my $proto_buffer);
     my $ret;
 
-    foreach (@$embed) {
-        if (@$_ == 1) {
-            print $pr "$_->[0]\n";
+    foreach (@$all) {
+        if ($_->{type} ne "content") {
+            print $pr "$_->{line}";
             next;
         }
+        my $embed= $_->{embed}
+            or next;
 
-        my ($flags,$retval,$plain_func,@args) = @$_;
+        my $level= $_->{level};
+        my $ind= $level ? " " : "";
+        $ind .= "  " x ($level-1) if $level>1;
+        my $inner_ind= $ind ? "  " : " ";
+
+        my ($flags,$retval,$plain_func,$args) = @{$embed}{qw(flags return_type name args)};
         if ($flags =~ / ( [^AabCDdEefFGhIiMmNnOoPpRrSsTUuWXx;] ) /x) {
             die_at_end "flag $1 is not legal (for function $plain_func)";
         }
@@ -142,17 +164,17 @@ my ($embed, $core, $ext, $api) = setup_embed();
 
         die_at_end "For '$plain_func', M flag requires p flag"
                                             if $flags =~ /M/ && $flags !~ /p/;
-	my $C_required_flags = '[pIimbs]';
+        my $C_required_flags = '[pIimbs]';
         die_at_end
-	    "For '$plain_func', C flag requires one of $C_required_flags] flags"
-						if $flags =~ /C/
-						&& ($flags !~ /$C_required_flags/
+            "For '$plain_func', C flag requires one of $C_required_flags] flags"
+                                                if $flags =~ /C/
+                                                && ($flags !~ /$C_required_flags/
 
-						   # Notwithstanding the
-						   # above, if the name won't
-						   # clash with a user name,
-						   # it's ok.
-						&& $plain_func !~ /^[Pp]erl/);
+                                                   # Notwithstanding the
+                                                   # above, if the name won't
+                                                   # clash with a user name,
+                                                   # it's ok.
+                                                && $plain_func !~ /^[Pp]erl/);
 
         die_at_end "For '$plain_func', X flag requires one of [Iip] flags"
                                             if $flags =~ /X/ && $flags !~ /[Iip]/;
@@ -168,32 +190,33 @@ my ($embed, $core, $ext, $api) = setup_embed();
                                             if $flags =~ /I/ && $flags =~ /i/;
 
         $ret = "";
-        $ret .= "$retval\t$func(";
+        $ret .= "$retval\n";
+        $ret .= "$func(";
         if ( $has_context ) {
-            $ret .= @args ? "pTHX_ " : "pTHX";
+            $ret .= @$args ? "pTHX_ " : "pTHX";
         }
-        if (@args) {
+        if (@$args) {
             die_at_end "n flag is contradicted by having arguments"
                                                                 if $flags =~ /n/;
             my $n;
-            for my $arg ( @args ) {
+            for my $arg ( @$args ) {
                 ++$n;
-		if ($arg =~ / ^ " (.+) " $ /x) {    # Handle literal string
-		    my $name = $1;
-
-		    # Make the string a legal C identifier; 'p' is arbitrary,
-		    # and is because C reserves leading underscores
-		    $name =~ s/^\W/p/a;
-		    $name =~ s/\W/_/ag;
-
-		    $arg = "const char * const $name";
-		    die_at_end 'm flag required for "literal" argument'
-							    unless $flags =~ /m/;
-		}
-		elsif (   $args_assert_line
-		       && $arg =~ /\*/
-		       && $arg !~ /\b(NN|NULLOK)\b/ )
-		{
+                if ($arg =~ / ^ " (.+) " $ /x) {    # Handle literal string
+                    my $name = $1;
+
+                    # Make the string a legal C identifier; 'p' is arbitrary,
+                    # and is because C reserves leading underscores
+                    $name =~ s/^\W/p/a;
+                    $name =~ s/\W/_/ag;
+
+                    $arg = "const char * const $name";
+                    die_at_end 'm flag required for "literal" argument'
+                                                            unless $flags =~ /m/;
+                }
+                elsif (   $args_assert_line
+                       && $arg =~ /\*/
+                       && $arg !~ /\b(NN|NULLOK)\b/ )
+                {
                     warn "$func: $arg needs NN or NULLOK\n";
                     ++$unflagged_pointers;
                 }
@@ -215,7 +238,7 @@ my ($embed, $core, $ext, $api) = setup_embed();
                     push @names_of_nn, $1;
                 }
             }
-            $ret .= join ", ", @args;
+            $ret .= join ", ", @$args;
         }
         else {
             $ret .= "void" if !$has_context;
@@ -246,24 +269,28 @@ my ($embed, $core, $ext, $api) = setup_embed();
             push @attrs, '__attribute__visibility__("hidden")'
         }
         if( $flags =~ /f/ ) {
-            my $prefix	= $has_context ? 'pTHX_' : '';
+            my $prefix  = $has_context ? 'pTHX_' : '';
             my ($argc, $pat);
-            if ($args[-1] eq '...') {
-                $argc	= scalar @args;
-                $pat	= $argc - 1;
-                $argc	= $prefix . $argc;
+            if (!defined $args->[1]) {
+                use Data::Dumper;
+                die Dumper($_);
+            }
+            if ($args->[-1] eq '...') {
+                $argc   = scalar @$args;
+                $pat    = $argc - 1;
+                $argc   = $prefix . $argc;
             }
             else {
                 # don't check args, and guess which arg is the pattern
                 # (one of 'fmt', 'pat', 'f'),
                 $argc = 0;
-                my @fmts = grep $args[$_] =~ /\b(f|pat|fmt)$/, 0..$#args;
+                my @fmts = grep $args->[$_] =~ /\b(f|pat|fmt)$/, 0..$#$args;
                 if (@fmts != 1) {
                     die "embed.pl: '$plain_func': can't determine pattern arg\n";
                 }
                 $pat = $fmts[0] + 1;
             }
-            my $macro	= grep($_ == $pat, @nonnull)
+            my $macro   = grep($_ == $pat, @nonnull)
                                 ? '__attribute__format__'
                                 : '__attribute__format__null_ok__';
             if ($plain_func =~ /strftime/) {
@@ -274,116 +301,163 @@ my ($embed, $core, $ext, $api) = setup_embed();
                                     $prefix, $pat, $argc;
             }
         }
-        elsif ((grep { $_ eq '...' } @args) && $flags !~ /F/) {
+        elsif ((grep { $_ eq '...' } @$args) && $flags !~ /F/) {
             die_at_end "$plain_func: Function with '...' arguments must have"
                      . " f or F flag";
         }
         if ( @attrs ) {
             $ret .= "\n";
-            $ret .= join( "\n", map { "\t\t\t$_" } @attrs );
+            $ret .= join( "\n", map { (" " x 8) . $_ } @attrs );
         }
         $ret .= ";";
         $ret = "/* $ret */" if $commented_out;
 
-        $ret .= "\n#define PERL_ARGS_ASSERT_\U$plain_func\E"
-                                            if $args_assert_line || @names_of_nn;
-        $ret .= "\t\\\n\t" . join '; ', map "assert($_)", @names_of_nn
-                                                                if @names_of_nn;
+        if ($args_assert_line || @names_of_nn) {
+            $ret .= "\n#${ind}define PERL_ARGS_ASSERT_\U$plain_func\E";
+            if (@names_of_nn) {
+                $ret .= " \\\n";
+                my $def = " " x 8;
+                foreach my $ix (0..$#names_of_nn) {
+                    $def .= "assert($names_of_nn[$ix])";
+                    if ($ix == $#names_of_nn) {
+                        $def .= "\n";
+                    } elsif (length $def > 70) {
+                        $ret .= $def . "; \\\n";
+                        $def = " " x 8;
+                    } else {
+                        $def .= "; ";
+                    }
+                }
+                $ret .= $def;
+            }
+        }
+        $ret .= "\n";
+
+        $ret = "#${ind}ifndef PERL_NO_INLINE_FUNCTIONS\n$ret\n#${ind}endif"
+            if $static_inline;
+        $ret = "#${ind}ifndef NO_MATHOMS\n$ret\n#${ind}endif"
+            if $binarycompat;
 
-        $ret = "#ifndef PERL_NO_INLINE_FUNCTIONS\n$ret\n#endif" if $static_inline;
-        $ret = "#ifndef NO_MATHOMS\n$ret\n#endif" if $binarycompat;
         $ret .= @attrs ? "\n\n" : "\n";
 
         print $pr $ret;
     }
 
-    print $pr <<'EOF';
-#ifdef PERL_CORE
-#  include "pp_proto.h"
-#endif
-END_EXTERN_C
-EOF
 
-    read_only_bottom_close_and_rename($pr) if ! $error_count;
+    close $pr;
+
+    my $clean= normalize_group_content($proto_buffer);
+
+    my $fh = open_print_header("proto.h");
+    print $fh <<~"EOF";
+    START_EXTERN_C
+    $clean
+    #ifdef PERL_CORE
+    #  include "pp_proto.h"
+    #endif
+    END_EXTERN_C
+    EOF
+
+    read_only_bottom_close_and_rename($fh) if ! $error_count;
+}
+
+{
+    my $hp= HeaderParser->new();
+    sub normalize_group_content {
+        open my $in, "<", \$_[0]
+            or die "Failed to open buffer: $!";
+        $hp->parse_fh($in);
+        my $ppc= sub {
+            my ($self, $line_data)= @_;
+            # re-align defines so that the definitions line up at the 48th col
+            # as much as possible.
+            if ($line_data->{sub_type} eq "#define") {
+                $line_data->{line}=~s/^(\s*#\s*define\s+\S+?(?:\([^()]*\))?\s)(\s*)(\S+)/
+                    sprintf "%-48s%s", $1, $3/e;
+            }
+        };
+        my $clean= $hp->lines_as_str($hp->group_content(),$ppc);
+        return $clean;
+    }
+}
+
+sub normalize_and_print {
+    my ($file, $buffer)= @_;
+    my $fh = open_print_header($file);
+    print $fh normalize_group_content($buffer);
+    read_only_bottom_close_and_rename($fh);
 }
 
-die_at_end "$unflagged_pointers pointer arguments to clean up\n" if $unflagged_pointers;
 
 sub readvars {
     my ($file, $pre) = @_;
-    local (*FILE, $_);
+    my $hp= HeaderParser->new()->read_file($file);
     my %seen;
-    open(FILE, '<', $file)
-        or die "embed.pl: Can't open $file: $!\n";
-    while (<FILE>) {
-        s/[ \t]*#.*//;		# Delete comments.
-        if (/PERLVARA?I?C?\($pre,\s*(\w+)/) {
-            die_at_end "duplicate symbol $1 while processing $file line $.\n"
-                if $seen{$1}++;
+    foreach my $line_data (@{$hp->lines}) {
+        #next unless $line_data->is_content;
+        my $line= $line_data->line;
+        if ($line=~m/^\s*PERLVARA?I?C?\(\s*$pre\s*,\s*(\w+)/){
+            $seen{$1}++
+                and
+                die_at_end "duplicate symbol $1 while processing $file line "
+                       . ($line_data->start_line_num) . "\n"
         }
     }
-    close(FILE);
-    return sort keys %seen;
+    my @keys= sort { lc($a) cmp lc($b) ||
+                        $a  cmp    $b }
+              keys %seen;
+    return @keys;
 }
 
-my @intrp = readvars 'intrpvar.h','I';
-my @globvar = readvars 'perlvars.h','G';
+sub add_indent {
+    #my ($ret, $add, $width)= @_;
+    my $width= $_[2] || 48;
+    $_[0] .= " " x ($width-length($_[0])) if length($_[0])<$width;
+    $_[0] .= " " unless $_[0]=~/\s\z/;
+    if (defined $_[1]) {
+        $_[0] .= $_[1];
+    }
+    return $_[0];
+}
 
-sub hide {
-    my ($from, $to, $indent) = @_;
+sub indent_define {
+    my ($from, $to, $indent, $width) = @_;
     $indent = '' unless defined $indent;
-    my $t = int(length("$indent$from") / 8);
-    "#${indent}define $from" . "\t" x ($t < 3 ? 3 - $t : 1) . "$to\n";
+    my $ret= "#${indent}define $from";
+    add_indent($ret,"$to\n",$width);
 }
 
-sub multon ($$$) {
-    my ($sym,$pre,$ptr) = @_;
-    hide("PL_$sym", "($ptr$pre$sym)");
+sub multon {
+    my ($sym,$pre,$ptr,$ind) = @_;
+    $ind//="";
+    indent_define("PL_$sym", "($ptr$pre$sym)", $ind);
 }
 
-my $em = open_print_header('embed.h');
-
-print $em <<'END';
-/* (Doing namespace management portably in C is really gross.) */
-
-/* By defining PERL_NO_SHORT_NAMES (not done by default) the short forms
- * (like warn instead of Perl_warn) for the API are not defined.
- * Not defining the short forms is a good thing for cleaner embedding.
- * BEWARE that a bunch of macros don't have long names, so either must be
- * added or don't use them if you define this symbol */
-
-#ifndef PERL_NO_SHORT_NAMES
-
-/* Hide global symbols */
-
-END
-
-my @az = ('a'..'z');
-
 sub embed_h {
-    my ($guard, $funcs) = @_;
-    print $em "$guard\n" if $guard;
+    my ($em, $guard, $funcs) = @_;
 
     my $lines;
     foreach (@$funcs) {
-        if (@$_ == 1) {
-            my $cond = $_->[0];
-            # Indent the conditionals if we are wrapped in an #if/#endif pair.
-            $cond =~ s/#(.*)/#  $1/ if $guard;
-            $lines .= "$cond\n";
+        if ($_->{type} ne "content") {
+            $lines .= $_->{line};
             next;
         }
+        my $level= $_->{level};
+        my $embed= $_->{embed} or next;
+        my ($flags,$retval,$func,$args) = @{$embed}{qw(flags return_type name args)};
         my $ret = "";
-        my ($flags,$retval,$func,@args) = @$_;
+        my $ind= $level ? " " : "";
+        $ind .= "  " x ($level-1) if $level>1;
+        my $inner_ind= $ind ? "  " : " ";
         unless ($flags =~ /[omM]/) {
-            my $argc = scalar @args;
+            my $argc = scalar @$args;
             if ($flags =~ /T/) {
                 my $full_name = full_name($func, $flags);
-                next if $full_name eq $func;	# Don't output a no-op.
-                $ret = hide($func, $full_name);
+                next if $full_name eq $func;    # Don't output a no-op.
+                $ret = indent_define($func, $full_name, $ind);
             }
             else {
-                my $use_va_list = $argc && $args[-1] =~ /\.\.\./;
+                my $use_va_list = $argc && $args->[-1] =~ /\.\.\./;
 
                 if($use_va_list) {
                     # CPP has trouble with empty __VA_ARGS__ and comma joining,
@@ -398,11 +472,8 @@ sub embed_h {
                     $use_va_list ? ("...") : ());
                 my $replacelist = join(",", @az[0..$argc-1],
                     $use_va_list ? ("__VA_ARGS__") : ());
-
-                $ret = "#define $func($paramlist)";
-                my $t = int(length($ret) / 8);
-                $ret .=  "\t" x ($t < 4 ? 4 - $t : 1);
-                $ret .= full_name($func, $flags) . "(aTHX";
+                $ret = "#${ind}define $func($paramlist) ";
+                add_indent($ret,full_name($func, $flags) . "(aTHX");
                 $ret .= "_ " if $replacelist;
                 $ret .= $replacelist;
                 if ($flags =~ /W/) {
@@ -413,147 +484,175 @@ sub embed_h {
                     }
                 }
                 $ret .= ")\n";
-
                 if($use_va_list) {
                     # Make them available to !MULTIPLICITY or PERL_CORE
-                    $ret = "#if !defined(MULTIPLICITY) || defined(PERL_CORE)\n" .
+                    $ret = "#${ind}if !defined(MULTIPLICITY) || defined(PERL_CORE)\n" .
                            $ret .
-                           "#endif\n";
+                           "#${ind}endif\n";
                 }
             }
-            $ret = "#ifndef NO_MATHOMS\n$ret#endif\n" if $flags =~ /b/;
+            $ret = "#${ind}ifndef NO_MATHOMS\n$ret#${ind}endif\n" if $flags =~ /b/;
         }
         $lines .= $ret;
     }
-    # Prune empty #if/#endif pairs.
-    while ($lines =~ s/#\s*if[^\n]+\n#\s*endif\n//) {
+    # remove empty blocks
+    1 while $lines =~ s/^#\s*if.*\n#\s*endif.*\n//mg
+         or $lines =~ s/^(#\s*if)\s+(.*)\n#else.*\n/$1 !($2)\n/mg;
+    if ($guard) {
+        print $em "$guard /* guard */\n";
+        $lines=~s/^#(\s*)/"#".(length($1)?"  ":" ").$1/mge;
     }
-    # Merge adjacent blocks.
-    while ($lines =~ s/(#ifndef MULTIPLICITY
-[^\n]+
-)#endif
-#ifndef MULTIPLICITY
-/$1/) {
-    }
-
     print $em $lines;
     print $em "#endif\n" if $guard;
 }
 
-embed_h('', $api);
-embed_h('#if defined(PERL_CORE) || defined(PERL_EXT)', $ext);
-embed_h('#ifdef PERL_CORE', $core);
+sub generate_embed_h {
+    my ($all, $api, $ext, $core)= @_;
 
-print $em <<'END';
+    my $em= open_buf_out(my $embed_buffer);
 
-#endif	/* #ifndef PERL_NO_SHORT_NAMES */
+    print $em <<~'END';
+    /* (Doing namespace management portably in C is really gross.) */
 
-/* Compatibility stubs.  Compile extensions with -DPERL_NOCOMPAT to
-   disable them.
- */
+    /* By defining PERL_NO_SHORT_NAMES (not done by default) the short forms
+     * (like warn instead of Perl_warn) for the API are not defined.
+     * Not defining the short forms is a good thing for cleaner embedding.
+     * BEWARE that a bunch of macros don't have long names, so either must be
+     * added or don't use them if you define this symbol */
 
-#if !defined(PERL_CORE)
-#  define sv_setptrobj(rv,ptr,name)	sv_setref_iv(rv,name,PTR2IV(ptr))
-#  define sv_setptrref(rv,ptr)		sv_setref_iv(rv,NULL,PTR2IV(ptr))
-#endif
+    #ifndef PERL_NO_SHORT_NAMES
 
-#if !defined(PERL_CORE) && !defined(PERL_NOCOMPAT)
+    /* Hide global symbols */
 
-/* Compatibility for various misnamed functions.  All functions
-   in the API that begin with "perl_" (not "Perl_") take an explicit
-   interpreter context pointer.
-   The following are not like that, but since they had a "perl_"
-   prefix in previous versions, we provide compatibility macros.
- */
-#  define perl_atexit(a,b)		call_atexit(a,b)
-END
+    END
 
-foreach (@$embed) {
-    my ($flags, $retval, $func, @args) = @$_;
-    next unless $func;
-    next unless $flags =~ /O/;
+    embed_h($em, '', $api);
+    embed_h($em, '#if defined(PERL_CORE) || defined(PERL_EXT)', $ext);
+    embed_h($em, '#if defined(PERL_CORE)', $core);
 
-    my $alist = join ",", @az[0..$#args];
-    my $ret = "#  define perl_$func($alist)";
-    my $t = (length $ret) >> 3;
-    $ret .=  "\t" x ($t < 5 ? 5 - $t : 1);
-    print $em "$ret$func($alist)\n";
-}
+    print $em <<~'END';
 
-my @nocontext;
-{
-    my (%has_va, %has_nocontext);
-    foreach (@$embed) {
-        next unless @$_ > 1;
-        ++$has_va{$_->[2]} if $_->[-1] =~ /\.\.\./;
-        ++$has_nocontext{$1} if $_->[2] =~ /(.*)_nocontext/;
+    #endif      /* #ifndef PERL_NO_SHORT_NAMES */
+
+    #if !defined(PERL_CORE)
+    /* Compatibility stubs.  Compile extensions with -DPERL_NOCOMPAT to
+     * disable them.
+     */
+    #  define sv_setptrobj(rv,ptr,name) sv_setref_iv(rv,name,PTR2IV(ptr))
+    #  define sv_setptrref(rv,ptr)              sv_setref_iv(rv,NULL,PTR2IV(ptr))
+    #endif
+
+    #if !defined(PERL_CORE) && !defined(PERL_NOCOMPAT)
+
+    /* Compatibility for various misnamed functions.  All functions
+       in the API that begin with "perl_" (not "Perl_") take an explicit
+       interpreter context pointer.
+       The following are not like that, but since they had a "perl_"
+       prefix in previous versions, we provide compatibility macros.
+     */
+    #  define perl_atexit(a,b)          call_atexit(a,b)
+    END
+
+    foreach (@$all) {
+        my $embed= $_->{embed} or next;
+        my ($flags, $retval, $func, $args) = @{$embed}{qw(flags return_type name args)};
+        next unless $flags =~ /O/;
+
+        my $alist = join ",", @az[0..$#$args];
+        my $ret = "#  define perl_$func($alist) ";
+        print $em add_indent($ret,"$func($alist)\n");
     }
 
-    @nocontext = sort grep {
-        $has_nocontext{$_}
-            && !/printf/ # Not clear to me why these are skipped but they are.
-    } keys %has_va;
-}
+    my @nocontext;
+    {
+        my (%has_va, %has_nocontext);
+        foreach (@$all) {
+            my $embed= $_->{embed}
+                or next;
+            ++$has_va{$embed->{name}} if @{$embed->{args}} and $embed->{args}[-1] =~ /\.\.\./;
+            ++$has_nocontext{$1} if $embed->{name} =~ /(.*)_nocontext/;
+        }
 
-print $em <<'END';
+        @nocontext = sort grep {
+            $has_nocontext{$_}
+                && !/printf/ # Not clear to me why these are skipped but they are.
+        } keys %has_va;
+    }
 
-/* varargs functions can't be handled with CPP macros. :-(
-   This provides a set of compatibility functions that don't take
-   an extra argument but grab the context pointer using the macro
-   dTHX.
- */
-#if defined(MULTIPLICITY) && !defined(PERL_NO_SHORT_NAMES)
-END
+    print $em <<~'END';
 
-foreach (@nocontext) {
-    print $em hide($_, "Perl_${_}_nocontext", "  ");
-}
+    /* varargs functions can't be handled with CPP macros. :-(
+       This provides a set of compatibility functions that don't take
+       an extra argument but grab the context pointer using the macro
+       dTHX.
+     */
+    #if defined(MULTIPLICITY) && !defined(PERL_NO_SHORT_NAMES)
+    END
 
-print $em <<'END';
-#endif
+    foreach (@nocontext) {
+        print $em indent_define($_, "Perl_${_}_nocontext", "  ");
+    }
+
+    print $em <<~'END';
+    #endif
 
-#endif /* !defined(PERL_CORE) && !defined(PERL_NOCOMPAT) */
+    #endif /* !defined(PERL_CORE) && !defined(PERL_NOCOMPAT) */
 
-#if !defined(MULTIPLICITY)
-/* undefined symbols, point them back at the usual ones */
-END
+    #if !defined(MULTIPLICITY)
+    /* undefined symbols, point them back at the usual ones */
+    END
+
+    foreach (@nocontext) {
+        print $em indent_define("Perl_${_}_nocontext", "Perl_$_", "  ");
+    }
 
-foreach (@nocontext) {
-    print $em hide("Perl_${_}_nocontext", "Perl_$_", "  ");
+    print $em "#endif\n";
+    close $em;
+
+    normalize_and_print('embed.h',$embed_buffer)
+        unless $error_count;
 }
 
-print $em <<'END';
-#endif
-END
+sub generate_embedvar_h {
+    my $em = open_buf_out(my $embedvar_buffer);
 
-read_only_bottom_close_and_rename($em) if ! $error_count;
+    print $em "#if defined(MULTIPLICITY)\n",
+              indent_define("vTHX","aTHX"," ");
 
-$em = open_print_header('embedvar.h');
 
-print $em <<'END';
-#if defined(MULTIPLICITY)
-#  define vTHX	aTHX
-END
+    my @intrp = readvars 'intrpvar.h','I';
+    #my @globvar = readvars 'perlvars.h','G';
 
-my $sym;
 
-for $sym (@intrp) {
-    if ($sym eq 'sawampersand') {
-        print $em "#ifndef PL_sawampersand\n";
-    }
-    print $em multon($sym,'I','vTHX->');
-    if ($sym eq 'sawampersand') {
-        print $em "#endif\n";
+    for my $sym (@intrp) {
+        my $ind = " ";
+        if ($sym eq 'sawampersand') {
+            print $em "# if !defined(PL_sawampersand)\n";
+            $ind = "   ";
+        }
+        my $line = multon($sym, 'I', 'vTHX->', $ind);
+        print $em $line;
+        if ($sym eq 'sawampersand') {
+            print $em "# endif /* !defined(PL_sawampersand) */\n";
+        }
     }
-}
 
-print $em <<'END';
+    print $em "#endif       /* MULTIPLICITY */\n";
+    close $em;
 
-#endif	/* MULTIPLICITY */
-END
+    normalize_and_print('embedvar.h',$embedvar_buffer)
+        unless $error_count;
+}
 
-read_only_bottom_close_and_rename($em) if ! $error_count;
+sub update_headers {
+    my ($all, $api, $ext, $core) = setup_embed(); # see regen/embed_lib.pl
+    generate_proto_h($all);
+    die_at_end "$unflagged_pointers pointer arguments to clean up\n" if $unflagged_pointers;
+    generate_embed_h($all, $api, $ext, $core);
+    generate_embedvar_h();
+    die "$error_count errors found" if $error_count;
+}
 
-die "$error_count errors found" if $error_count;
+update_headers() unless caller;
 
 # ex: set ts=8 sts=4 sw=4 noet:
diff --git a/regen/embed_lib.pl b/regen/embed_lib.pl
index 4790153f95..2d5946a613 100644
--- a/regen/embed_lib.pl
+++ b/regen/embed_lib.pl
@@ -1,5 +1,16 @@
 #!/usr/bin/perl -w
+BEGIN {
+    for $n (qw(lib regen)) {
+        if (-e "../$n") {
+            push @INC, "../$n";
+        } elsif (-e "./$n") {
+            push @INC, "./$n";
+        }
+    }
+}
 use strict;
+use warnings;
+use HeaderParser;
 
 # read embed.fnc and regen/opcodes, needed by regen/embed.pl, makedef.pl,
 # autodoc.pl and t/porting/diag.t
@@ -7,158 +18,81 @@ use strict;
 require 5.004;	# keep this compatible, an old perl is all we may have before
                 # we build the new one
 
-# Records the current pre-processor state:
-my @state;
-# Nested structure to group functions by the pre-processor conditions that
-# control when they are compiled:
-my %groups;
-
-sub current_group {
-    my $group = \%groups;
-    # Nested #if blocks are effectively &&ed together
-    # For embed.fnc, ordering within the && isn't relevant, so we can
-    # sort them to try to group more functions together.
-    foreach (sort @state) {
-        $group->{$_} ||= {};
-        $group = $group->{$_};
-    }
-    return $group->{''} ||= [];
-}
-
-sub add_level {
-    my ($level, $indent, $wanted) = @_;
-    my $funcs = $level->{''};
-    my @entries;
-    if ($funcs) {
-        if (!defined $wanted) {
-            @entries = @$funcs;
-        } else {
-            foreach (@$funcs) {
-                if ($_->[0] =~ /[AC]/) { # 'C' is like 'A' for our purposes
-                                         # here
-                    push @entries, $_ if $wanted eq 'A';
-                } elsif ($_->[0] =~ /E/) {
-                    push @entries, $_ if $wanted eq 'E';
-                } else {
-                    push @entries, $_ if $wanted eq '';
-                }
-            }
-        }
-        @entries = sort {$a->[2] cmp $b->[2]} @entries;
-    }
-    foreach (sort grep {length $_} keys %$level) {
-        my @conditional = add_level($level->{$_}, $indent . '  ', $wanted);
-        push @entries,
-            ["#${indent}if $_"], @conditional, ["#${indent}endif"]
-                if @conditional;
-    }
-    return @entries;
-}
-
 sub setup_embed {
     my $prefix = shift || '';
-    open IN, '<', $prefix . 'embed.fnc' or die $!;
-
-    my @embed;
-    my %seen;
-    my $macro_depth = 0;
-
-    while (<IN>) {
-        chomp;
-        next if /^:/;
-        next if /^$/;
-        while (s|\\$||) {
-            $_ .= <IN>;
-            chomp;
-        }
-        s/\s+$//;
-        my @args;
-        if (/^\s*(#|$)/) {
-            @args = $_;
-        }
-        else {
-            @args = split /\s*\|\s*/, $_;
-        }
-        if (@args == 1) {
-            if ($args[0] !~ /^#\s*(?:if|ifdef|ifndef|else|endif)/) {
-                die "Illegal line $. '$args[0]' in embed.fnc";
-            }
-            $macro_depth++ if $args[0] =~/^#\s*if(n?def)?\b/;
-            $macro_depth-- if $args[0] =~/^#\s*endif\b/;
-            die "More #endif than #if in embed.fnc:$." if $macro_depth < 0;
-        }
-        else  {
-            die "Illegal line (less than 3 fields) in embed.fnc:$.: $_"
-                unless @args >= 3;
-            my $name = $args[2];
-            # only check for duplicates outside of #if's - otherwise
-            # they may be alternate definitions of the same function
-            if ($macro_depth == 0) {
-                die "Duplicate function name: '$name' in embed.fnc:$."
-                    if exists $seen{$name};
-            }
-            $seen{$name} = 1;
-        }
-
-        push @embed, \@args;
-    }
-    die "More #if than #endif by the end of embed.fnc" if $macro_depth != 0;
-
-    close IN or die "Problem reading embed.fnc: $!";
-
-    open IN, '<', $prefix . 'regen/opcodes' or die $!;
+    my $parser= HeaderParser->new(
+        pre_process_content => sub {
+            my ($self,$line_data)= @_;
+            # HeaderParser knows how to parse and normalize embed_fnc.
+            # calling this here ensures sets up the embed subpacket.
+            $self->tidy_embed_fnc_entry($line_data);
+            my $embed= $line_data->{embed}
+                or return;
+        },
+        post_process_grouped_content => sub {
+            # sort the group content by name.
+            @{$_[1]}=
+                sort {
+                    $a->{embed}{name} cmp $b->{embed}{name}
+                } @{$_[1]};
+        },
+    )->read_file($prefix . 'embed.fnc');
+    my $lines= $parser->lines();
+
+    # add the opcode checker functions automatically.
+    open my $in_fh, '<', $prefix . 'regen/opcodes' or die $!;
     {
         my %syms;
 
-        while (<IN>) {
-            chomp;
-            next unless $_;
-            next if /^#/;
-            my $check = (split /\t+/, $_)[2];
+        my $line_num = 0;
+        while (my $line= <$in_fh>) {
+            $line_num++;
+            chomp($line);
+            next unless $line;
+            next if $line=~/^#/;
+            my $check = (split /\t+/, $line)[2];
             next if $syms{$check}++;
 
             # These are all indirectly referenced by globals.c.
-            push @embed, ['pR', 'OP *', $check, 'NN OP *o'];
+            my $new= HeaderLine->new(
+                cond => [["defined(PERL_IN_GLOBALS_C) || defined(PERL_IN_OP_C) || defined(PERL_IN_PEEP_C)"]],
+                raw => "pR|OP *|$check|NN OP *o",
+                line => "pR|OP *|$check|NN OP *o",
+                type => "content",
+                level => 1,
+                start_line_num => $lines + $line_num,
+            );
+            $parser->tidy_embed_fnc_entry($new);
+            push @$lines, $new;
         }
     }
-    close IN or die "Problem reading regen/opcodes: $!";
+    close $in_fh
+        or die "Problem reading regen/opcodes: $!";
 
     # Cluster entries in embed.fnc that have the same #ifdef guards.
     # Also, split out at the top level the three classes of functions.
-    # Output structure is actually the same as input structure - an
-    # (ordered) list of array references, where the elements in the
-    # reference determine what it is - a reference to a 1-element array is a
-    # pre-processor directive, a reference to 2+ element array is a function.
-
-    my $current = current_group();
-
-    foreach (@embed) {
-        if (@$_ > 1) {
-            push @$current, $_;
-            next;
-        }
-        $_->[0] =~ s/^#\s+/#/;
-        $_->[0] =~ /^\S*/;
-        $_->[0] =~ s/^#ifdef\s+(\S+)/#if defined($1)/;
-        $_->[0] =~ s/^#ifndef\s+(\S+)/#if !defined($1)/;
-        if ($_->[0] =~ /^#if\s*(.*)/) {
-            push @state, $1;
-        } elsif ($_->[0] =~ m!^#else\s*(?:/\*.*?\*/)?$!) {
-            die "Unmatched #else in embed.fnc" unless @state;
-            $state[-1] = "!($state[-1])";
-        } elsif ($_->[0] =~ m!^#endif\s*(?:/\*.*?\*/)?$!) {
-            die "Unmatched #endif in embed.fnc" unless @state;
-            pop @state;
-        } else {
-            die "Unhandled pre-processor directive '$_->[0]' in embed.fnc";
-        }
-        $current = current_group();
-    }
-
-    return ([add_level(\%groups, '')],
-            [add_level(\%groups, '', '')],    # core
-            [add_level(\%groups, '', 'E')],   # ext
-            [add_level(\%groups, '', 'A')]);  # api
+    # The result for each group_content() calls is an arrayref containing
+    # HeaderLine objects, with the embed.fnc data prenormalized, and each
+    # conditional clause containing a sorted list of functions, with
+    # any further conditional clauses following.
+    # Note this is a normalized and relatively smart grouping, and we can
+    # handle if/elif and etc properly. At the cost of being a touch slow.
+
+    return (
+        $parser->group_content($lines,
+                sub { $_[1]->{embed} }),                # everything
+        $parser->group_content($lines,
+                sub { $_[1]->{embed} &&
+                      $_[1]->{embed}{flags}=~/[AC]/ }), # only API and private API
+        $parser->group_content($lines,
+                sub { $_[1]->{embed} &&
+                      $_[1]->{embed}{flags}!~/[AC]/ &&  # otherwise Extensions
+                      $_[1]->{embed}{flags}=~/[E]/ }),
+        $parser->group_content($lines,
+                sub { $_[1]->{embed} &&
+                      $_[1]->{embed}{flags}!~/[AC]/ &&  # everything else.
+                      $_[1]->{embed}{flags}!~/[E]/ }),
+    );
 }
 
 1;
diff --git a/regen/regen_lib.pl b/regen/regen_lib.pl
index 0eb5654e56..1d250299db 100644
--- a/regen/regen_lib.pl
+++ b/regen/regen_lib.pl
@@ -3,6 +3,7 @@ use strict;
 our (@Changed, $TAP);
 use File::Compare;
 use Symbol;
+use Carp;
 use Text::Wrap();
 
 # Common functions needed by the regen scripts
@@ -175,6 +176,7 @@ EOM
 sub read_only_bottom_close_and_rename {
     my ($fh, $sources) = @_;
     my ($name, $lang, $final_name) = @{*{$fh}}{qw(name lang final_name)};
+    confess "bad fh in read_only_bottom_close_and_rename" unless $name;
     die "No final name specified at open time for $name"
         unless $final_name;
 
diff --git a/regen/tidy_embed.pl b/regen/tidy_embed.pl
index 6991b1db04..bafe9de082 100644
--- a/regen/tidy_embed.pl
+++ b/regen/tidy_embed.pl
@@ -39,7 +39,7 @@ while ($lines->[-1]{type} eq "content" and
 
 my $grouped_content_ary= $parser->group_content();
 push @$grouped_content_ary, @tail;
-my $grouped_content_txt= $parser->normalized_content($grouped_content_ary);
+my $grouped_content_txt= $parser->lines_as_str($grouped_content_ary);
 if ($grouped_content_txt ne $parser->{orig_content}) {
     if ($tap) {
         print "not ok - $0 $file\n";
author	Yves Orton <demerphq@gmail.com>	2022-12-16 18:31:17 +0100
committer	Yves Orton <demerphq@gmail.com>	2022-12-24 01:17:22 +0100
commit	69e2d40a60d6d9ad69498a1b5a45e53763ea04f3 (patch)
tree	d8601980c88b5c62ef168a85841f1e7e7810b3e3 /regen
parent	d655b7ce0d9f884dc0965633e816e31c75a953a1 (diff)
download	perl-69e2d40a60d6d9ad69498a1b5a45e53763ea04f3.tar.gz