diff options
-rw-r--r-- | .rspec | 2 | ||||
-rw-r--r-- | lib/mixlib/shellout/unix.rb | 47 | ||||
-rw-r--r-- | spec/mixlib/shellout_spec.rb | 66 |
3 files changed, 103 insertions, 12 deletions
@@ -1 +1 @@ --cbfs +-cfs diff --git a/lib/mixlib/shellout/unix.rb b/lib/mixlib/shellout/unix.rb index 15ead60..a2518cf 100644 --- a/lib/mixlib/shellout/unix.rb +++ b/lib/mixlib/shellout/unix.rb @@ -29,13 +29,20 @@ module Mixlib # to +stdout+ and +stderr+, and saving its exit status object to +status+ # === Returns # returns +self+; +stdout+, +stderr+, +status+, and +exitstatus+ will be - # populated with results of the command + # populated with results of the command. # === Raises # * Errno::EACCES when you are not privileged to execute the command # * Errno::ENOENT when the command is not available on the system (or not # in the current $PATH) # * Chef::Exceptions::CommandTimeout when the command does not complete - # within +timeout+ seconds (default: 600s) + # within +timeout+ seconds (default: 600s). When this happens, ShellOut + # will send a TERM and then KILL to the entire process group to ensure + # that any grandchild processes are terminated. If the invocation of + # the child process spawned multiple child processes (which commonly + # happens if the command is passed as a single string to be interpreted + # by bin/sh, and bin/sh is not bash), the exit status object may not + # contain the correct exit code of the process (of course there is no + # exit code if the command is killed by SIGKILL, also). def run_command @child_pid = fork_subprocess @reaped = false @@ -51,6 +58,7 @@ module Mixlib # CHEF-3390: Marshall.load on Ruby < 1.8.7p369 also has a GC bug related # to Marshall.load, so try disabling GC first. propagate_pre_exec_failure + @child_pgid = -Process.getpgid(@child_pid) @result = nil @execution_time = 0 @@ -122,6 +130,12 @@ module Mixlib Dir.chdir(cwd) if cwd end + # Process group id of the child. Returned as a negative value so you can + # put it directly in arguments to kill, wait, etc. + def child_pgid + @child_pgid + end + def initialize_ipc @stdin_pipe, @stdout_pipe, @stderr_pipe, @process_status_pipe = IO.pipe, IO.pipe, IO.pipe, IO.pipe @process_status_pipe.last.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) @@ -263,6 +277,14 @@ module Mixlib initialize_ipc fork do + # Child processes may themselves fork off children. A common case + # is when the command is given as a single string (instead of + # command name plus Array of arguments) and /bin/sh does not + # support the "ONESHOT" optimization (where sh -c does exec without + # forking). To support cleaning up all the children, we need to + # ensure they're in a unique process group. + Process.setsid + configure_subprocess_file_descriptors clean_parent_file_descriptors @@ -302,14 +324,13 @@ module Mixlib def reap_errant_child return if attempt_reap - @terminate_reason = "Command execeded allowed execution time, killed by TERM signal." + @terminate_reason = "Command execeded allowed execution time, process terminated" logger.error("Command execeded allowed execution time, sending TERM") if logger - Process.kill(:TERM, @child_pid) + Process.kill(:TERM, child_pgid) sleep 3 - return if attempt_reap - @terminate_reason = "Command execeded allowed execution time, did not respond to TERM. Killed by KILL signal." - logger.error("Command did not exit from TERM, sending KILL") if logger - Process.kill(:KILL, @child_pid) + attempt_reap + logger.error("Command execeded allowed execution time, sending KILL") if logger + Process.kill(:KILL, child_pgid) reap # Should not hit this but it's possible if something is calling waitall @@ -323,12 +344,22 @@ module Mixlib @child_pid && !@reaped end + # Unconditionally reap the child process. This is used in scenarios where + # we can be confident the child will exit quickly, and has not spawned + # and grandchild processes. def reap results = Process.waitpid2(@child_pid) @reaped = true @status = results.last + rescue Errno::ECHILD + # When cleaning up timed-out processes, we might send SIGKILL to the + # whole process group after we've cleaned up the direct child. In that + # case the grandchildren will have been adopted by init so we can't + # reap them even if we wanted to (we don't). + nil end + # Try to reap the child process but don't block if it isn't dead yet. def attempt_reap if results = Process.waitpid2(@child_pid, Process::WNOHANG) @reaped = true diff --git a/spec/mixlib/shellout_spec.rb b/spec/mixlib/shellout_spec.rb index 71ecfef..0a6f9b2 100644 --- a/spec/mixlib/shellout_spec.rb +++ b/spec/mixlib/shellout_spec.rb @@ -826,8 +826,15 @@ describe Mixlib::ShellOut do end context 'with subprocess that takes longer than timeout' do + def ruby_wo_shell(code) + parts = %w[ruby] + parts << "--disable-gems" if ruby_19? + parts << "-e" + parts << code + end + let(:cmd) do - ruby_eval.call(<<-CODE) + ruby_wo_shell(<<-CODE) STDOUT.sync = true trap(:TERM) { puts "got term"; exit!(123) } sleep 10 @@ -849,7 +856,7 @@ describe Mixlib::ShellOut do context "and the child is unresponsive" do let(:cmd) do - ruby_eval.call(<<-CODE) + ruby_wo_shell(<<-CODE) STDOUT.sync = true trap(:TERM) { puts "nanana cant hear you" } sleep 10 @@ -877,11 +884,64 @@ describe Mixlib::ShellOut do shell_cmd.status.termsig.should == 9 log_output.string.should include("Command execeded allowed execution time, sending TERM") - log_output.string.should include("Command did not exit from TERM, sending KILL") + log_output.string.should include("Command execeded allowed execution time, sending KILL") end end end + + context "and the child process forks grandchildren" do + let(:cmd) do + ruby_wo_shell(<<-CODE) + STDOUT.sync = true + trap(:TERM) { print "got term in child\n"; exit!(123) } + fork do + trap(:TERM) { print "got term in grandchild\n"; exit!(142) } + sleep 10 + end + sleep 10 + CODE + end + + it "should TERM the wayward child and grandchild" do + # note: let blocks don't correctly memoize if an exception is raised, + # so can't use executed_cmd + lambda { shell_cmd.run_command}.should raise_error(Mixlib::ShellOut::CommandTimeout) + shell_cmd.stdout.should include("got term in child") + shell_cmd.stdout.should include("got term in grandchild") + end + + end + context "and the child process forks grandchildren that don't respond to TERM" do + let(:cmd) do + ruby_wo_shell(<<-CODE) + STDOUT.sync = true + + trap(:TERM) { print "got term in child\n"; exit!(123) } + fork do + trap(:TERM) { print "got term in grandchild\n" } + sleep 10 + end + sleep 10 + CODE + end + + it "should TERM the wayward child and grandchild, then KILL whoever is left" do + # note: let blocks don't correctly memoize if an exception is raised, + # so can't use executed_cmd + lambda { shell_cmd.run_command}.should raise_error(Mixlib::ShellOut::CommandTimeout) + + shell_cmd.stdout.should include("got term in child") + shell_cmd.stdout.should include("got term in grandchild") + + # A little janky. We get the process group id out of the command + # object, then try to kill a process in it to make sure none + # exists. Trusting the system under test like this isn't great but + # it's difficult to test otherwise. + lambda { Process.kill(:INT, shell_cmd.send(:child_pgid)) }.should raise_error(Errno::ESRCH) + end + + end end context 'with subprocess that exceeds buffersize' do |