BuildCommon.pm


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407

# Written by Zack Weinberg <zackw at panix.com> in 2017, 2020, 2021.
# To the extent possible under law, Zack Weinberg has waived all
# copyright and related or neighboring rights to this work.
#
# See https://creativecommons.org/publicdomain/zero/1.0/ for further
# details.

package BuildCommon;

use v5.14;    # implicit use strict, use feature ':5.14'
use warnings FATAL => 'all';
use utf8;
use open qw(:utf8);
no  if $] >= 5.018, warnings => 'experimental::smartmatch';
no  if $] >= 5.022, warnings => 'experimental::re_strict';
use if $] >= 5.022, re       => 'strict';

use Cwd qw(realpath);
use File::Spec::Functions qw(
    catfile
    catpath
    file_name_is_absolute
    path
    splitpath
);
use FindBin ();
use POSIX   ();

our @EXPORT_OK;
use Exporter qw(import);

BEGIN {
    @EXPORT_OK = qw(
        ensure_C_locale
        ensure_empty_stdin
        error
        get_status
        get_status_and_output
        popen
        run
        sh_split
        sh_quote
        subprocess_error
        which
    );
}

#
# Utilities for dealing with subprocesses.
#

# Diagnostics: report some kind of catastrophic internal error.
# Exit code 99 tells the Automake test driver to mark a test as
# 'errored' rather than 'failed'.
sub error {    ## no critic (Subroutines::RequireArgUnpacking)
    my $msg = join q{ }, @_;
    print {*STDERR} $FindBin::Script, ': ERROR: ', $msg, "\n";
    exit 99;
}

# Like 'error', but the problem was with a subprocess, detected upon
# trying to start the program named as @_.
sub invocation_error {    ## no critic (Subroutines::RequireArgUnpacking)
    my $err = "$!";
    my $cmd = join q{ }, @_;
    error("failed to invoke $cmd: $err");
}

# Like 'error', but the problem was with a subprocess, detected upon
# termination of the program named as @_; interpret both $! and $?
# appropriately.
sub subprocess_error {    ## no critic (Subroutines::RequireArgUnpacking)
    my $syserr = $!;
    my $status = $?;
    my $cmd    = join q{ }, @_;
    if ($syserr) {
        error("system error with $cmd: $syserr");

    } elsif ($status == 0) {
        return;

    } elsif (($status & 0xFF) == 0) {
        # we wouldn't be here if the exit status was zero
        error("$cmd: exit " . ($status >> 8));

    } else {
        my $sig = ($status & 0x7F);
        # Neither Perl core nor the POSIX module exposes strsignal.
        # This is the least terrible kludge I can presently find;
        # it decodes the numbers to their <signal.h> constant names
        # (e.g. "SIGKILL" instead of "Killed" for signal 9).
        # Linear search through POSIX's hundreds of symbols is
        # acceptable because this function terminates the process,
        # so it can only ever be called once per run.
        my $signame;
        while (my ($name, $glob) = each %{'POSIX::'}) {
            if ($name =~ /^SIG(?!_|RT)/ && (${$glob} // -1) == $sig) {
                $signame = $name;
                last;
            }
        }
        $signame //= "signal $sig";
        error("$cmd: killed by $signame");
    }
}

# Split a string into words, exactly the way the Bourne shell would do
# it, with the default setting of IFS, when the string is the result
# of a variable expansion.  If any of the resulting words would be
# changed by filename expansion, throw an exception, otherwise return
# a list of the words.
#
# Note: the word splitting process does *not* look for nested
# quotation, substitutions, or operators.  For instance, if a
# shell variable was set with
#    var='"ab cd"'
# then './a.out $var' would pass two arguments to a.out:
# '"ab' and 'cd"'.
sub sh_split {
    my @words = split /[ \t\n]+/, shift;
    for my $w (@words) {
        die "sh_split: '$w' could be changed by filename expansion"
            if $w =~ / (?<! \\) [\[?*] /ax;
    }
    return @words;
}

# Quote a string, or list of strings, so that they will pass
# unmolested through the shell.  Avoids adding quotation whenever
# possible.  Algorithm copied from Python's shlex.quote.
sub sh_quote {    ## no critic (Subroutines::RequireArgUnpacking)
    my @quoted;
    for my $w (@_) {
        if ($w =~ m{[^\w@%+=:,./-]}a) {
            my $q = $w;
            $q =~ s/'/'\\''/g;
            $q =~ s/^/'/;
            $q =~ s/$/'/;
            push @quoted, $q;
        } else {
            push @quoted, $w;
        }
    }
    return wantarray ? @quoted : $quoted[0];
}

# Emit a logging message for the execution of a subprocess whose
# argument vector is @_.
sub log_execution {    ## no critic (Subroutines::RequireArgUnpacking)
    print {*STDERR} '+ ', join(q{ }, sh_quote(@_)), "\n";
    return;
}

# Run, and log execution of, a subprocess, with no I/O redirection.
# @_ should be an argument vector.
# Calls invocation_error() and/or subprocess_error() as appropriate.
# Does *not* call which(); do that yourself if you need it.
sub run {    ## no critic (Subroutines::RequireArgUnpacking)
    die 'run: no command to execute'
        if scalar(@_) == 0;
    log_execution(@_);

    my $pid = fork
        // invocation_error($_[0]);

    if ($pid == 0) {
        # child
        { exec {$_[0]} @_; };
        print {*STDERR} "exec $_[0] failed: $!\n";
        exit(127);
    }

    # parent
    waitpid $pid, 0;
    undef $!;
    subprocess_error(@_) if $?;
}

# Run, and log execution of, a subprocess.  @_ should be one of the
# open modes that creates a pipe, followed by an argument vector.
# An anonymous filehandle for the pipe is returned.
# Calls invocation_error() if open() fails.
# Does *not* call which(); do that yourself if you need it.
sub popen {
    my ($mode, @args) = @_;
    die "popen: inappropriate mode argument '$mode'"
        unless $mode eq '-|' || $mode eq '|-';
    die 'popen: no command to execute'
        if scalar(@args) == 0;

    log_execution(@args);
    open my $fh, $mode, @args
        or invocation_error($args[0]);
    return $fh;
}

# Run, and log execution of, a subprocess.  @_ should be an argument vector.
# If the subprocess exits normally (successful or unsuccessful),
# returns the exit status.
# If the subprocess could not be started because there is no such command,
# returns -1.
# Otherwise invocation_error/subprocess_error are called as appropriate.
sub get_status {
    die 'run: no command to execute'
        if scalar(@_) == 0;
    log_execution(@_);

    my $pid = fork
        // invocation_error($_[0]);

    if ($pid == 0) {
        # child
        { exec {$_[0]} @_; };
        exit(126) if $!{ENOENT};
        print {*STDERR} "exec $_[0] failed: $!\n";
        exit(127);
    }

    # parent
    waitpid $pid, 0;
    undef $!;
    if ($? == 0x7F00 || ($? & 0x7F) != 0) {
        subprocess_error(@_);
    }
    my $status = $? >> 8;
    return -1 if $status == 126;
    return $status;
}

# Run, and log execution of, a subprocess.  Capture all of its output,
# including both stdout and stderr.
# @_ should be an argument vector.
# If the subprocess exits normally (successful or unsuccessful),
# returns a list whose first element is the exit status, followed by
# all the lines of output from the subprocess (stdout and stderr are
# intermingled).
# If the subprocess could not be started because there is no such command,
# returns (-1,).
# Otherwise invocation_error/subprocess_error are called as appropriate.
sub get_status_and_output {
    die 'get_status_and_output: no command to execute'
        if scalar(@_) == 0;
    log_execution(@_);

    my $pid = open(my $fh, '-|')
        // invocation_error($_[0]);

    if ($pid == 0) {
        # child
        open(STDERR, ">&STDOUT") or do {
            print {*STDERR} "Can't dup STDOUT: $!\n";
            exit(127);
        };
        { exec {$_[0]} @_; };
        exit(126) if $!{ENOENT};
        print {*STDERR} "exec $_[0] failed: $!\n";
        exit(127);
    }

    # parent
    my @lines = <$fh>;
    close $fh or do {
        if ($! != 0 || ($? & 0x7F) != 0) {
            subprocess_error(@_);
        }
    };
    my $status = $? >> 8;
    if ($status == 127) {
        subprocess_error(@_);
    }
    if ($status == 126) {
        $status = -1;
    }
    return ($status, @lines);
}

# Force use of the C locale for this process and all subprocesses.
# This is necessary because subprocesses' output may be locale-
# dependent.  If the C.UTF-8 locale is available, it is used,
# otherwise the plain C locale.  Note that we do *not*
# 'use locale' here or anywhere else!
sub ensure_C_locale {
    use POSIX qw(setlocale LC_ALL);

    for my $k (keys %ENV) {
        if ($k eq 'LANG' || $k eq 'LANGUAGE' || $k =~ /^LC_/) {
            delete $ENV{$k};
        }
    }
    if (defined(setlocale(LC_ALL, 'C.UTF-8'))) {
        $ENV{LC_ALL} = 'C.UTF-8'; ## no critic (RequireLocalizedPunctuationVars)
    } elsif (defined(setlocale(LC_ALL, 'C'))) {
        $ENV{LC_ALL} = 'C';       ## no critic (RequireLocalizedPunctuationVars)
    } else {
        error("could not set 'C' locale: $!");
    }
    return;
}

# Close standard input at the OS level and reopen it on /dev/null.
# This ensures that no subprocesses will get stuck trying to read from
# standard input.
sub ensure_empty_stdin {
    use POSIX qw(open close dup2 O_RDONLY);
    my $fd = open('/dev/null', O_RDONLY) // die "open('/dev/null'): $!\n";
    dup2($fd, 0) // die("dup2($fd, 0): $!\n");
    close($fd);
}

# Clean up $ENV{PATH}, and return the cleaned path as a list.
sub clean_PATH {
    state @path;
    if (!@path) {
        for my $d (path()) {
            # Discard all entries that are not absolute paths.
            next unless file_name_is_absolute($d);
            # Discard all entries that are not directories, or don't
            # exist.  (This is not just for tidiness; realpath()
            # behaves unpredictably if called on a nonexistent
            # pathname.)
            next unless -d $d;
            # Resolve symlinks in all remaining entries.
            $d = realpath($d);
            # Discard duplicates.
            push @path, $d unless grep { $_ eq $d } @path;
        }
        error('nothing left after cleaning PATH')
            unless @path;

        # File::Spec knows internally whether $PATH is colon-separated
        # or semicolon-separated, but it won't tell us.  Assume it's
        # colon-separated unless the first element of $PATH has a
        # colon in it (and is therefore probably a DOS-style absolute
        # path, with a drive letter).
        my $newpath;
        if ($path[0] =~ /:/) {
            $newpath = join ';', @path;
        } else {
            $newpath = join ':', @path;
        }
        $ENV{PATH} = $newpath;    ## no critic (RequireLocalizedPunctuationVars)
    }
    return @path;
}

# Locate a program that we need.
# $_[0] is the name of the program along with any options that are
# required to use it correctly.  Split this into an argument list,
# exactly as /bin/sh would do it, and then search $PATH for the
# executable.  If we find it, return a list whose first element is
# the absolute pathname of the executable, followed by any options.
# Otherwise return an empty list.
sub which {
    my ($command) = @_;
    my @PATH = clean_PATH();

    # Split the command name from any options attached to it.
    my ($cmd, @options) = sh_split($command);
    my ($vol, $path, $file) = splitpath($cmd);

    if ($file eq 'false') {
        # Special case: the command 'false' is never considered to be
        # available.  Autoconf sets config variables like $CC and $NM to
        # 'false' if it can't find the requested tool.
        return ();

    } elsif ($file ne $cmd) {
        # $cmd was not a bare filename.  Do not do path search, but do
        # verify that $cmd exists and is executable, then convert it
        # to a canonical absolute path.
        #
        # Note: the result of realpath() is unspecified if its
        # argument does not exist, so we must test its existence
        # first.
        #
        # Note: if $file is a symlink, we must *not* resolve that
        # symlink, because that may change the name of the program,
        # which in turn may change what the program does.
        # For instance, suppose $CC is /usr/lib/ccache/cc, and this
        # 'cc' is a symlink to /usr/bin/ccache.  Resolving the symlink
        # will cause ccache to be invoked as 'ccache' instead of 'cc'
        # and it will error out because it's no longer being told
        # it's supposed to run the compiler.
        if (-f -x $cmd) {
            return (catfile(realpath(catpath($vol, $path, q{})), $file),
                @options);
        } else {
            return ();
        }

    } else {
        for my $d (@PATH) {
            my $cand = catfile($d, $cmd);
            if (-f -x $cand) {
                # @PATH came from clean_PATH, so all of the directories
                # have already been canonicalized.  If the last element
                # of $cand is a symlink, we should *not* resolve it (see
                # above).  Therefore, we do not call realpath here.
                return ($cand, @options);
            }
        }
        return ();

    }
}

1;