summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordanielsdeleo <dan@opscode.com>2013-12-06 16:44:23 -0800
committerdanielsdeleo <dan@opscode.com>2013-12-09 09:58:20 -0800
commita4c67900863bf5baaa6766a325cc6ecf9eaecf7b (patch)
tree8b26b51554e3a5714b47c76d10ccd39cc4b544a0
parentb148f45428299159697344ee63713650c9083c06 (diff)
downloadmixlib-shellout-a4c67900863bf5baaa6766a325cc6ecf9eaecf7b.tar.gz
Always send KILL to process group
There is not a good way to reliably detect whether all grandchildren have exited after sending a TERM to the process group; if the child has correctly exited, grandchildren (which may be unresponsive) will have been adopted by init (ppid == 1) so the parent cannot wait() on them. To ensure no stuck grandchildren are left, send a KILL to the process group after allowing the processes time to clean up.
-rw-r--r--lib/mixlib/shellout/unix.rb42
-rw-r--r--spec/mixlib/shellout_spec.rb6
2 files changed, 14 insertions, 34 deletions
diff --git a/lib/mixlib/shellout/unix.rb b/lib/mixlib/shellout/unix.rb
index b5e0d2d..a2518cf 100644
--- a/lib/mixlib/shellout/unix.rb
+++ b/lib/mixlib/shellout/unix.rb
@@ -324,15 +324,14 @@ module Mixlib
def reap_errant_child
return if attempt_reap
- @terminate_reason = "Command execeded allowed execution time, killed by TERM signal."
+ @terminate_reason = "Command execeded allowed execution time, process terminated"
logger.error("Command execeded allowed execution time, sending TERM") if logger
Process.kill(:TERM, child_pgid)
sleep 3
- return if attempt_reap_all
- @terminate_reason = "Command execeded allowed execution time, did not respond to TERM. Killed by KILL signal."
- logger.error("Command did not exit from TERM, sending KILL") if logger
+ attempt_reap
+ logger.error("Command execeded allowed execution time, sending KILL") if logger
Process.kill(:KILL, child_pgid)
- reap_all
+ reap
# Should not hit this but it's possible if something is calling waitall
# in a separate thread.
@@ -352,19 +351,12 @@ module Mixlib
results = Process.waitpid2(@child_pid)
@reaped = true
@status = results.last
- end
-
- # Reap app processes with the same process group id as the forked child.
- # This is used after a timeout when forcibly killing the child process
- # and any grandchild processes it may have spawned.
- def reap_all
- results = []
- loop do
- results = Process.waitpid2(child_pgid)
- end
- rescue Errno::ECHILD, Errno::ESRCH
- @reaped = true
- @status = results.last
+ rescue Errno::ECHILD
+ # When cleaning up timed-out processes, we might send SIGKILL to the
+ # whole process group after we've cleaned up the direct child. In that
+ # case the grandchildren will have been adopted by init so we can't
+ # reap them even if we wanted to (we don't).
+ nil
end
# Try to reap the child process but don't block if it isn't dead yet.
@@ -377,20 +369,6 @@ module Mixlib
end
end
- # Try to reap all the processes in the child process' process group.
- # Return a false-y value if any processes in the group cannot be reaped
- # without blocking (e.g., not dead yet).
- def attempt_reap_all
- results = []
- while results = Process.waitpid2(@child_pid, Process::WNOHANG)
- @reaped = true
- @status = results.last
- end
- nil
- rescue Errno::ECHILD, Errno::ESRCH
- @status
- end
-
end
end
end
diff --git a/spec/mixlib/shellout_spec.rb b/spec/mixlib/shellout_spec.rb
index 582aef4..0a6f9b2 100644
--- a/spec/mixlib/shellout_spec.rb
+++ b/spec/mixlib/shellout_spec.rb
@@ -884,7 +884,7 @@ describe Mixlib::ShellOut do
shell_cmd.status.termsig.should == 9
log_output.string.should include("Command execeded allowed execution time, sending TERM")
- log_output.string.should include("Command did not exit from TERM, sending KILL")
+ log_output.string.should include("Command execeded allowed execution time, sending KILL")
end
end
@@ -916,7 +916,8 @@ describe Mixlib::ShellOut do
let(:cmd) do
ruby_wo_shell(<<-CODE)
STDOUT.sync = true
- trap(:TERM) { print "got term in child\n" }
+
+ trap(:TERM) { print "got term in child\n"; exit!(123) }
fork do
trap(:TERM) { print "got term in grandchild\n" }
sleep 10
@@ -929,6 +930,7 @@ describe Mixlib::ShellOut do
# note: let blocks don't correctly memoize if an exception is raised,
# so can't use executed_cmd
lambda { shell_cmd.run_command}.should raise_error(Mixlib::ShellOut::CommandTimeout)
+
shell_cmd.stdout.should include("got term in child")
shell_cmd.stdout.should include("got term in grandchild")