summaryrefslogtreecommitdiff
path: root/regen_perly.pl
blob: f3b3f59ce3b94b3bac9675cbf7b8becf7f346915 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/perl
#
# regen_perly.pl, DAPM 12-Feb-04
#
# Copyright (c) 2004, 2005 Larry Wall
#
# Given an input file perly.y, run bison on it and produce
# the following output files:
#
# perly.h	standard bison header file with minor doctoring of
#		#line directives plus adding a #ifdef PERL_CORE
#
# perly.tab	the parser table C definitions extracted from the bison output
#		plus an extra table generated by this script.
#
# perly.act	the action case statements extracted from the bison output
#
# Note that perly.c is *not* regenerated - this is now a static file which
# is not dependent on perly.y any more.
#
# If a filename of the form foo.y is given on the command line, then
# this is used instead as the basename for all the files mentioned
# above.
#
# Note that temporary files of the form perlytmp.h and perlytmp.c are
# created and then deleted during this process
#
# Note also that this script is intended to be run on a UNIX system;
# it may work elsewhere but no specific attempt has been made to make it
# portable.

sub usage { die "usage: $0 [ -b bison_executable ] [ file.y ]\n" }

use warnings;
use strict;

my $bison = 'bison';

if (@ARGV >= 2 and $ARGV[0] eq '-b') {
    shift;
    $bison = shift;
}

my $y_file = shift || 'perly.y';

usage unless @ARGV==0 && $y_file =~ /\.y$/;

(my $h_file    = $y_file) =~ s/\.y$/.h/;
(my $act_file  = $y_file) =~ s/\.y$/.act/;
(my $tab_file  = $y_file) =~ s/\.y$/.tab/;
(my $tmpc_file = $y_file) =~ s/\.y$/tmp.c/;
(my $tmph_file = $y_file) =~ s/\.y$/tmp.h/;

# the yytranslate[] table generated by bison is ASCII/EBCDIC sensitive

die "$0: must be run on an ASCII system\n" unless ord 'A' == 65;

# check for correct version number. The constraints are:
#  * must be >= 1.24 to avoid licensing issues.
#  * it must generate the yystos[] table. Version 1.28 doesn't generate
#    this; 1.35+ does
#  * Must produce output which is extractable by the regexes below
#  * Must produce the right values.
# These last two contstraints  may well be met by earlier versions, but
# I simply haven't tested them yet. If it works for you, then modify
# the test below to allow that version too. DAPM Feb 04.

my $version = `$bison -V`;
unless ($version =~ /\b(1\.875[a-z]?|2\.[01])\b/) { die <<EOF; }

You have the wrong version of bison in your path; currently 1.875
2.0 or 2.1 is required.  Try installing
    http://ftp.gnu.org/gnu/bison/bison-2.1.tar.gz
or similar.  Your bison identifies itself as:

$version
EOF

# creates $tmpc_file and $tmph_file
my_system("$bison -d -o $tmpc_file $y_file");

open CTMPFILE, $tmpc_file or die "Can't open $tmpc_file: $!\n";
my $clines;
{ local $/; $clines = <CTMPFILE>; }
die "failed to read $tmpc_file: length mismatch\n"
    unless length $clines == -s $tmpc_file;
close CTMPFILE;

my ($actlines, $tablines) = extract($clines);

$tablines .= make_opval_tab($y_file, $tablines);

chmod 0644, $act_file;
open ACTFILE, ">$act_file" or die "can't open $act_file: $!\n";
print ACTFILE $actlines;
close ACTFILE;
chmod 0444, $act_file;

chmod 0644, $tab_file;
open TABFILE, ">$tab_file" or die "can't open $tab_file: $!\n";
print TABFILE $tablines;
close TABFILE;
chmod 0444, $tab_file;

unlink $tmpc_file;

# Wrap PERL_CORE round the symbol definitions. Also,  the
# C<#line 30 "perly.y"> confuses the Win32 resource compiler and the
# C<#line 188 "perlytmp.h"> gets picked up by make depend, so remove them.

open TMPH_FILE, $tmph_file or die "Can't open $tmph_file: $!\n";
chmod 0644, $h_file;
open H_FILE, ">$h_file" or die "Can't open $h_file: $!\n";
my $endcore_done = 0;
while (<TMPH_FILE>) {
    print H_FILE "#ifdef PERL_CORE\n" if $. == 1;
    if (!$endcore_done and /YYSTYPE_IS_DECLARED/) {
	print H_FILE "#endif /* PERL_CORE */\n";
	$endcore_done = 1;
    }
    next if /^#line \d+ ".*"/;
    print H_FILE $_;
}
close TMPH_FILE;
close H_FILE;
chmod 0444, $h_file;
unlink $tmph_file;

print "rebuilt:  $h_file $tab_file $act_file\n";

exit 0;


sub extract {
    my $clines = shift;
    my $tablines;
    my $actlines;

    $clines =~ m@
	(?:
	    ^/* YYFINAL[^\n]+\n		#optional comment
	)?
	\# \s* define \s* YYFINAL	# first #define
	.*?				# other defines + most tables
	yystos\[\]\s*=			# start of last table
	.*?
	}\s*;				# end of last table
    @xms
	or die "Can't extract tables from $tmpc_file\n";
    $tablines = $&;


    $clines =~ m@
	switch \s* \( \s* \w+ \s* \) \s* { \s*
	(
	    case \s* \d+ \s* : \s*
	    \#line [^\n]+"\Q$y_file\E"
	    .*?
	)
	}
	\s*
	( \s* /\* .*? \*/ \s* )*	# optional C-comments
	\s*
	(
	    \#line[^\n]+\.c"
	|
	    \#line[^\n]+\.simple"
	)
    @xms
	or die "Can't extract actions from $tmpc_file\n";
    $actlines = $1;

    return $actlines. "\n", $tablines. "\n";
}

# read a .y file and extract a list of all the token names and
# non-terminal names that are declared to be of type opval
# then scan the string $tablines for the table yytname which gives
# the token index of each token/non-terminal, then use this to
# create a new table, indexed by token number, which indicates
# whether that token is of type opval.
#
# ie given
# %token <opval> A B
# %type  <opval> C D
#
# and yytname[] = { "A" "B", "C", "D", "E", "F" };
#
# then return
# static const int yy_is_opval[] = { 1, 1, 1, 1, 0, 0 }

sub make_opval_tab {
    my ($y_file, $tablines) = @_;
    my %tokens;
    open my $fh, '<', $y_file or die "Can't open $y_file: $!\n";
    while (<$fh>) {
	next unless s/^%(token|type)\s+<opval>\s+//;
	$tokens{$_} =1 for (split ' ', $_);
    }

    $tablines =~ /^\Qstatic const char *const yytname[] =\E\n
	    {\n
	    (.*?)
	    ^};
	    /xsm
	or die "Can't extract yytname[] from table string\n";
    my $fields = $1;
    $fields =~ s/"([^"]+)"/$tokens{$1}||0/ge;
    return 
	"/* which symbols are of type opval */\n" .
	"static const int yy_is_opval[] =\n{\n" . $fields . "\n};\n";
}


sub my_system {
    system(@_);
    if ($? == -1) {
	die "failed to execute comamnd '@_': $!\n";
    }
    elsif ($? & 127) {
	die sprintf "command '@_' died with signal %d\n",
	    ($? & 127);
    }
    elsif ($? >> 8) {
	die sprintf "command '@_' exited with value %d\n", $? >> 8;
    }
}