diff options
author | danielsdeleo <dan@opscode.com> | 2013-12-06 16:44:23 -0800 |
---|---|---|
committer | danielsdeleo <dan@opscode.com> | 2013-12-09 09:58:20 -0800 |
commit | a4c67900863bf5baaa6766a325cc6ecf9eaecf7b (patch) | |
tree | 8b26b51554e3a5714b47c76d10ccd39cc4b544a0 | |
parent | b148f45428299159697344ee63713650c9083c06 (diff) | |
download | mixlib-shellout-a4c67900863bf5baaa6766a325cc6ecf9eaecf7b.tar.gz |
Always send KILL to process group
There is not a good way to reliably detect whether all grandchildren
have exited after sending a TERM to the process group; if the child has
correctly exited, grandchildren (which may be unresponsive) will have
been adopted by init (ppid == 1) so the parent cannot wait() on them. To
ensure no stuck grandchildren are left, send a KILL to the process group
after allowing the processes time to clean up.
-rw-r--r-- | lib/mixlib/shellout/unix.rb | 42 | ||||
-rw-r--r-- | spec/mixlib/shellout_spec.rb | 6 |
2 files changed, 14 insertions, 34 deletions
diff --git a/lib/mixlib/shellout/unix.rb b/lib/mixlib/shellout/unix.rb index b5e0d2d..a2518cf 100644 --- a/lib/mixlib/shellout/unix.rb +++ b/lib/mixlib/shellout/unix.rb @@ -324,15 +324,14 @@ module Mixlib def reap_errant_child return if attempt_reap - @terminate_reason = "Command execeded allowed execution time, killed by TERM signal." + @terminate_reason = "Command execeded allowed execution time, process terminated" logger.error("Command execeded allowed execution time, sending TERM") if logger Process.kill(:TERM, child_pgid) sleep 3 - return if attempt_reap_all - @terminate_reason = "Command execeded allowed execution time, did not respond to TERM. Killed by KILL signal." - logger.error("Command did not exit from TERM, sending KILL") if logger + attempt_reap + logger.error("Command execeded allowed execution time, sending KILL") if logger Process.kill(:KILL, child_pgid) - reap_all + reap # Should not hit this but it's possible if something is calling waitall # in a separate thread. @@ -352,19 +351,12 @@ module Mixlib results = Process.waitpid2(@child_pid) @reaped = true @status = results.last - end - - # Reap app processes with the same process group id as the forked child. - # This is used after a timeout when forcibly killing the child process - # and any grandchild processes it may have spawned. - def reap_all - results = [] - loop do - results = Process.waitpid2(child_pgid) - end - rescue Errno::ECHILD, Errno::ESRCH - @reaped = true - @status = results.last + rescue Errno::ECHILD + # When cleaning up timed-out processes, we might send SIGKILL to the + # whole process group after we've cleaned up the direct child. In that + # case the grandchildren will have been adopted by init so we can't + # reap them even if we wanted to (we don't). + nil end # Try to reap the child process but don't block if it isn't dead yet. @@ -377,20 +369,6 @@ module Mixlib end end - # Try to reap all the processes in the child process' process group. - # Return a false-y value if any processes in the group cannot be reaped - # without blocking (e.g., not dead yet). - def attempt_reap_all - results = [] - while results = Process.waitpid2(@child_pid, Process::WNOHANG) - @reaped = true - @status = results.last - end - nil - rescue Errno::ECHILD, Errno::ESRCH - @status - end - end end end diff --git a/spec/mixlib/shellout_spec.rb b/spec/mixlib/shellout_spec.rb index 582aef4..0a6f9b2 100644 --- a/spec/mixlib/shellout_spec.rb +++ b/spec/mixlib/shellout_spec.rb @@ -884,7 +884,7 @@ describe Mixlib::ShellOut do shell_cmd.status.termsig.should == 9 log_output.string.should include("Command execeded allowed execution time, sending TERM") - log_output.string.should include("Command did not exit from TERM, sending KILL") + log_output.string.should include("Command execeded allowed execution time, sending KILL") end end @@ -916,7 +916,8 @@ describe Mixlib::ShellOut do let(:cmd) do ruby_wo_shell(<<-CODE) STDOUT.sync = true - trap(:TERM) { print "got term in child\n" } + + trap(:TERM) { print "got term in child\n"; exit!(123) } fork do trap(:TERM) { print "got term in grandchild\n" } sleep 10 @@ -929,6 +930,7 @@ describe Mixlib::ShellOut do # note: let blocks don't correctly memoize if an exception is raised, # so can't use executed_cmd lambda { shell_cmd.run_command}.should raise_error(Mixlib::ShellOut::CommandTimeout) + shell_cmd.stdout.should include("got term in child") shell_cmd.stdout.should include("got term in grandchild") |