summaryrefslogtreecommitdiff
path: root/make_sunver.pl
blob: 8a90b1fea0d366f7a9ec0857d5304a67ee34f389 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
#!/usr/bin/perl -w

# make_sunver.pl
#
# This script takes at least two arguments, a GNU style version script and
# a list of object and archive files, and generates a corresponding Sun
# style version script as follows:
#
# Each glob pattern, C++ mangled pattern or literal in the input script is
# matched against all global symbols in the input objects, emitting those
# that matched (or nothing if no match was found).
# A comment with the original pattern and its type is left in the output
# file to make it easy to understand the matches.
#
# It uses elfdump when present (native), GNU readelf otherwise.
# It depends on the GNU version of c++filt, since it must understand the
# GNU mangling style.

use FileHandle;
use IPC::Open2;

# Enforce C locale.
$ENV{'LC_ALL'} = "C";
$ENV{'LANG'} = "C";

# Input version script, GNU style.
my $symvers = shift;

##########
# Get all the symbols from the library, match them, and add them to a hash.

my %sym_hash = ();

# List of objects and archives to process.
my @OBJECTS = ();

# List of shared objects to omit from processing.
my @SHAREDOBJS = ();

# Filter out those input archives that have corresponding shared objects to
# avoid adding all symbols matched in the archive to the output map.
foreach $file (@ARGV) {
    if (($so = $file) =~ s/\.a$/.so/ && -e $so) {
	printf STDERR "omitted $file -> $so\n";
	push (@SHAREDOBJS, $so);
    } else {
	push (@OBJECTS, $file);
    }
}

# We need to detect and ignore hidden symbols.  Solaris nm can only detect
# this in the harder to parse default output format, and GNU nm not at all,
# so use elfdump -s in the native case and GNU readelf -s otherwise.
# GNU objdump -t cannot be used since it produces a variable number of
# columns.

# The path to elfdump.
my $elfdump = "/usr/ccs/bin/elfdump";

if (-f $elfdump) {
    open ELFDUMP,$elfdump.' -s '.(join ' ',@OBJECTS).'|' or die $!;
    my $skip_arsym = 0;

    while (<ELFDUMP>) {
	chomp;

	# Ignore empty lines.
	if (/^$/) {
	    # End of archive symbol table, stop skipping.
	    $skip_arsym = 0 if $skip_arsym;
	    next;
	}

	# Keep skipping until end of archive symbol table.
	next if ($skip_arsym);

	# Ignore object name header for individual objects and archives.
	next if (/:$/);

	# Ignore table header lines.
	next if (/^Symbol Table Section:/);
	next if (/index.*value.*size/);

	# Start of archive symbol table: start skipping.
	if (/^Symbol Table: \(archive/) {
	    $skip_arsym = 1;
	    next;
	}

	# Split table.
	(undef, undef, undef, undef, $bind, $oth, undef, $shndx, $name) = split;

	# Error out for unknown input.
	die "unknown input line:\n$_" unless defined($bind);

	# Ignore local symbols.
	next if ($bind eq "LOCL");
	# Ignore hidden symbols.
	next if ($oth eq "H");
	# Ignore undefined symbols.
	next if ($shndx eq "UNDEF");
	# Error out for unhandled cases.
	if ($bind !~ /^(GLOB|WEAK)/ or $oth ne "D") {
	    die "unhandled symbol:\n$_";
	}

	# Remember symbol.
	$sym_hash{$name}++;
    }
    close ELFDUMP or die "$elfdump error";
} else {
    open READELF, 'readelf -s -W '.(join ' ',@OBJECTS).'|' or die $!;
    # Process each symbol.
    while (<READELF>) {
	chomp;

	# Ignore empty lines.
	next if (/^$/);

	# Ignore object name header.
	next if (/^File: .*$/);

	# Ignore table header lines.
	next if (/^Symbol table.*contains.*:/);
	next if (/Num:.*Value.*Size/);

	# Split table.
	(undef, undef, undef, undef, $bind, $vis, $ndx, $name) = split;

	# Error out for unknown input.
	die "unknown input line:\n$_" unless defined($bind);

	# Ignore local symbols.
	next if ($bind eq "LOCAL");
	# Ignore hidden symbols.
	next if ($vis eq "HIDDEN");
	# Ignore undefined symbols.
	next if ($ndx eq "UND");
	# Error out for unhandled cases.
	if ($bind !~ /^(GLOBAL|WEAK)/ or $vis ne "DEFAULT") {
	    die "unhandled symbol:\n$_";
	}

	# Remember symbol.
	$sym_hash{$name}++;
    }
    close READELF or die "readelf error";
}

##########
# The various types of glob patterns.
#
# A glob pattern that is to be applied to the demangled name: 'cxx'.
# A glob patterns that applies directly to the name in the .o files: 'glob'.
# This pattern is ignored; used for local variables (usually just '*'): 'ign'.

# The type of the current pattern.
my $glob = 'glob';

# We're currently inside `extern "C++"', which Sun ld doesn't understand.
my $in_extern = 0;

# The c++filt command to use.  This *must* be GNU c++filt; the Sun Studio
# c++filt doesn't handle the GNU mangling style.
my $cxxfilt = $ENV{'CXXFILT'} || "c++filt";

# The current version name.
my $current_version = "";

# Was there any attempt to match a symbol to this version?
my $matches_attempted;

# The number of versions which matched this symbol.
my $matched_symbols;

open F,$symvers or die $!;

# Print information about generating this file
print "# This file was generated by make_sunver.pl.  DO NOT EDIT!\n";
print "# It was generated by:\n";
printf "# %s %s %s\n", $0, $symvers, (join ' ',@ARGV);
printf "# Omitted archives with corresponding shared libraries: %s\n",
    (join ' ', @SHAREDOBJS) if $#SHAREDOBJS >= 0;
print "#\n\n";

while (<F>) {
    # Lines of the form '};'
    if (/^([ \t]*)(\}[ \t]*;[ \t]*)$/) {
	$glob = 'glob';
	if ($in_extern) {
	    $in_extern--;
	    print "$1##$2\n";
	} else {
	    print;
	}
	next;
    }

    # Lines of the form '} SOME_VERSION_NAME_1.0;'
    if (/^[ \t]*\}[ \tA-Z0-9_.a-z]+;[ \t]*$/) {
	$glob = 'glob';
	# We tried to match symbols agains this version, but none matched.
	# Emit dummy hidden symbol to avoid marking this version WEAK.
	if ($matches_attempted && $matched_symbols == 0) {
	    print "  hidden:\n";
	    print "    .force_WEAK_off_$current_version = DATA S0x0 V0x0;\n";
	}
	print; next;
    }

    # Comment and blank lines
    if (/^[ \t]*\#/) { print; next; }
    if (/^[ \t]*$/) { print; next; }

    # Lines of the form '{'
    if (/^([ \t]*){$/) {
	if ($in_extern) {
	    print "$1##{\n";
	} else {
	    print;
	}
	next;
    }

    # Lines of the form 'SOME_VERSION_NAME_1.1 {'
    if (/^([A-Z0-9_.]+)[ \t]+{$/) {
	# Record version name.
	$current_version = $1;
	# Reset match attempts, #matched symbols for this version.
	$matches_attempted = 0;
	$matched_symbols = 0;
	print;
	next;
    }

    # Ignore 'global:'
    if (/^[ \t]*global:$/) { print; next; }

    # After 'local:', globs should be ignored, they won't be exported.
    if (/^[ \t]*local:$/) {
	$glob = 'ign';
	print;
	next;
    }

    # After 'extern "C++"', globs are C++ patterns
    if (/^([ \t]*)(extern \"C\+\+\"[ \t]*)$/) {
	$in_extern++;
	$glob = 'cxx';
	# Need to comment, Sun ld cannot handle this.
	print "$1##$2\n"; next;
    }

    # Chomp newline now we're done with passing through the input file.
    chomp;

    # Catch globs.  Note that '{}' is not allowed in globs by this script,
    # so only '*' and '[]' are available.
    if (/^([ \t]*)([^ \t;{}#]+);?[ \t]*$/) {
	my $ws = $1;
	my $ptn = $2;
	# Turn the glob into a regex by replacing '*' with '.*', '?' with '.'.
	# Keep $ptn so we can still print the original form.
	($pattern = $ptn) =~ s/\*/\.\*/g;
	$pattern =~ s/\?/\./g;

	if ($glob eq 'ign') {
	    # We're in a local: * section; just continue.
	    print "$_\n";
	    next;
	}

	# Print the glob commented for human readers.
	print "$ws##$ptn ($glob)\n";
	# We tried to match a symbol to this version.
	$matches_attempted++;

	if ($glob eq 'glob') {
	    my %ptn_syms = ();

	    # Match ptn against symbols in %sym_hash.
	    foreach my $sym (keys %sym_hash) {
		# Maybe it matches one of the patterns based on the symbol in
		# the .o file.
		$ptn_syms{$sym}++ if ($sym =~ /^$pattern$/);
	    }

	    foreach my $sym (sort keys(%ptn_syms)) {
		$matched_symbols++;
		print "$ws$sym;\n";
	    }
	} elsif ($glob eq 'cxx') {
	    my %dem_syms = ();

	    # Verify that we're actually using GNU c++filt.  Other versions
	    # most likely cannot handle GNU style symbol mangling.
	    my $cxxout = `$cxxfilt --version 2>&1`;
	    $cxxout =~ m/GNU/ or die "$0 requires GNU c++filt to function";

	    # Talk to c++filt through a pair of file descriptors.
	    # Need to start a fresh instance per pattern, otherwise the
	    # process grows to 500+ MB.
	    my $pid = open2(*FILTIN, *FILTOUT, $cxxfilt) or die $!;

	    # Match ptn against symbols in %sym_hash.
	    foreach my $sym (keys %sym_hash) {
		# No?  Well, maybe its demangled form matches one of those
		# patterns.
		printf FILTOUT "%s\n",$sym;
		my $dem = <FILTIN>;
		chomp $dem;
		$dem_syms{$sym}++ if ($dem =~ /^$pattern$/);
	    }

	    close FILTOUT or die "c++filt error";
	    close FILTIN or die "c++filt error";
	    # Need to wait for the c++filt process to avoid lots of zombies.
	    waitpid $pid, 0;

	    foreach my $sym (sort keys(%dem_syms)) {
		$matched_symbols++;
		print "$ws$sym;\n";
	    }
	} else {
	    # No?  Well, then ignore it.
	}
	next;
    }
    # Important sanity check.  This script can't handle lots of formats
    # that GNU ld can, so be sure to error out if one is seen!
    die "strange line `$_'";
}
close F;