summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordanielsdeleo <dan@opscode.com>2013-11-01 15:16:19 -0700
committerdanielsdeleo <dan@opscode.com>2013-11-01 15:27:32 -0700
commitbc60d9b2abfbf64c848ff2e7bdb5cb69d3d84577 (patch)
tree320dee4ea27b2d4b01e2322fc3374ac099ac9a9d
parentc34885b8247a5ff1d2a2ec2b63f41a6f7b0d0b52 (diff)
downloadmixlib-shellout-bc60d9b2abfbf64c848ff2e7bdb5cb69d3d84577.tar.gz
Force subprocess to exit after timeout
Fixes MIXLIB-16. This issue is particularly prominent when yum/rpm commands go off the deep end repeatedly, but affects any case where a process takes longer than the timeout to complete.
-rw-r--r--lib/mixlib/shellout/unix.rb69
-rw-r--r--spec/mixlib/shellout_spec.rb32
2 files changed, 82 insertions, 19 deletions
diff --git a/lib/mixlib/shellout/unix.rb b/lib/mixlib/shellout/unix.rb
index d7b8b92..f884832 100644
--- a/lib/mixlib/shellout/unix.rb
+++ b/lib/mixlib/shellout/unix.rb
@@ -57,42 +57,38 @@ module Mixlib
write_to_child_stdin
until @status
- ready = IO.select(open_pipes, nil, nil, READ_WAIT_TIME)
- unless ready
+ ready_buffers = attempt_buffer_read
+ unless ready_buffers
@execution_time += READ_WAIT_TIME
if @execution_time >= timeout && !@result
+ # kill the bad proccess
+ reap_errant_child
+ # read anything it wrote when we killed it
+ attempt_buffer_read
+ # raise
raise CommandTimeout, "command timed out:\n#{format_for_exception}"
end
end
- if ready && ready.first.include?(child_stdout)
- read_stdout_to_buffer
- end
- if ready && ready.first.include?(child_stderr)
- read_stderr_to_buffer
- end
-
unless @status
# make one more pass to get the last of the output after the
# child process dies
- if results = Process.waitpid2(@child_pid, Process::WNOHANG)
- @status = results.last
+ if attempt_reap
redo
end
end
end
+
self
rescue Errno::ENOENT
# When ENOENT happens, we can be reasonably sure that the child process
# is going to exit quickly, so we use the blocking variant of waitpid2
- Process.waitpid2(@child_pid) rescue nil
+ reap
+ raise
+ rescue CommandTimeout
raise
rescue Exception
- # For exceptions other than ENOENT, such as timeout, we can't be sure
- # how long the child process will live, so we use the non-blocking
- # variant of waitpid2. This can result in zombie processes when the
- # child later dies. See MIXLIB-16 for proposed enhancement.
- Process.waitpid2(@child_pid, Process::WNOHANG) rescue nil
+ reap_errant_child
raise
ensure
# no matter what happens, turn the GC back on, and hope whatever busted
@@ -239,6 +235,17 @@ module Mixlib
child_stdin.close # Kick things off
end
+ def attempt_buffer_read
+ ready = IO.select(open_pipes, nil, nil, READ_WAIT_TIME)
+ if ready && ready.first.include?(child_stdout)
+ read_stdout_to_buffer
+ end
+ if ready && ready.first.include?(child_stderr)
+ read_stderr_to_buffer
+ end
+ ready
+ end
+
def read_stdout_to_buffer
while chunk = child_stdout.read_nonblock(READ_SIZE)
@stdout << chunk
@@ -299,6 +306,34 @@ module Mixlib
end
end
+ def reap_errant_child
+ return if attempt_reap
+ Process.kill(:TERM, @child_pid)
+ sleep 3
+ return if attempt_reap
+ Process.kill(:KILL, @child_pid)
+ reap
+
+ # Should not hit this but it's possible if something is calling waitall
+ # in a separate thread.
+ rescue Errno::ESRCH
+ nil
+ end
+
+ def reap
+ results = Process.waitpid2(@child_pid)
+ @status = results.last
+ end
+
+
+ def attempt_reap
+ if results = Process.waitpid2(@child_pid, Process::WNOHANG)
+ @status = results.last
+ else
+ nil
+ end
+ end
+
end
end
end
diff --git a/spec/mixlib/shellout_spec.rb b/spec/mixlib/shellout_spec.rb
index 9814473..d068dfc 100644
--- a/spec/mixlib/shellout_spec.rb
+++ b/spec/mixlib/shellout_spec.rb
@@ -825,12 +825,40 @@ describe Mixlib::ShellOut do
end
context 'with subprocess that takes longer than timeout' do
- let(:cmd) { ruby_eval.call('sleep 2') }
- let(:options) { { :timeout => 0.1 } }
+ let(:cmd) do
+ ruby_eval.call(<<-CODE)
+ STDOUT.sync = true
+ trap(:TERM) { puts "got term"; exit!(123) }
+ sleep 10
+ CODE
+ end
+ let(:options) { { :timeout => 1 } }
it "should raise CommandTimeout" do
lambda { executed_cmd }.should raise_error(Mixlib::ShellOut::CommandTimeout)
end
+
+ it "should ask the process nicely to exit" do
+ lambda { executed_cmd }.should raise_error(Mixlib::ShellOut::CommandTimeout)
+ executed_cmd.stdout.should include("got term")
+ executed_cmd.exitstatus.should == 123
+ end
+
+ context "and the child is unresponsive" do
+ let(:cmd) do
+ ruby_eval.call(<<-CODE)
+ STDOUT.sync = true
+ trap(:TERM) { puts "nanana cant hear you" }
+ sleep 10
+ CODE
+ end
+
+ it "should KILL the wayward child" do
+ lambda { executed_cmd }.should raise_error(Mixlib::ShellOut::CommandTimeout)
+ executed_cmd.stdout.should include("nanana cant hear you")
+ executed_cmd.status.termsig.should == 9
+ end
+ end
end
context 'with subprocess that exceeds buffersize' do