diff options
author | danielsdeleo <dan@opscode.com> | 2013-11-01 15:16:19 -0700 |
---|---|---|
committer | danielsdeleo <dan@opscode.com> | 2013-11-01 15:27:32 -0700 |
commit | bc60d9b2abfbf64c848ff2e7bdb5cb69d3d84577 (patch) | |
tree | 320dee4ea27b2d4b01e2322fc3374ac099ac9a9d | |
parent | c34885b8247a5ff1d2a2ec2b63f41a6f7b0d0b52 (diff) | |
download | mixlib-shellout-bc60d9b2abfbf64c848ff2e7bdb5cb69d3d84577.tar.gz |
Force subprocess to exit after timeout
Fixes MIXLIB-16.
This issue is particularly prominent when yum/rpm commands go off the
deep end repeatedly, but affects any case where a process takes longer
than the timeout to complete.
-rw-r--r-- | lib/mixlib/shellout/unix.rb | 69 | ||||
-rw-r--r-- | spec/mixlib/shellout_spec.rb | 32 |
2 files changed, 82 insertions, 19 deletions
diff --git a/lib/mixlib/shellout/unix.rb b/lib/mixlib/shellout/unix.rb index d7b8b92..f884832 100644 --- a/lib/mixlib/shellout/unix.rb +++ b/lib/mixlib/shellout/unix.rb @@ -57,42 +57,38 @@ module Mixlib write_to_child_stdin until @status - ready = IO.select(open_pipes, nil, nil, READ_WAIT_TIME) - unless ready + ready_buffers = attempt_buffer_read + unless ready_buffers @execution_time += READ_WAIT_TIME if @execution_time >= timeout && !@result + # kill the bad proccess + reap_errant_child + # read anything it wrote when we killed it + attempt_buffer_read + # raise raise CommandTimeout, "command timed out:\n#{format_for_exception}" end end - if ready && ready.first.include?(child_stdout) - read_stdout_to_buffer - end - if ready && ready.first.include?(child_stderr) - read_stderr_to_buffer - end - unless @status # make one more pass to get the last of the output after the # child process dies - if results = Process.waitpid2(@child_pid, Process::WNOHANG) - @status = results.last + if attempt_reap redo end end end + self rescue Errno::ENOENT # When ENOENT happens, we can be reasonably sure that the child process # is going to exit quickly, so we use the blocking variant of waitpid2 - Process.waitpid2(@child_pid) rescue nil + reap + raise + rescue CommandTimeout raise rescue Exception - # For exceptions other than ENOENT, such as timeout, we can't be sure - # how long the child process will live, so we use the non-blocking - # variant of waitpid2. This can result in zombie processes when the - # child later dies. See MIXLIB-16 for proposed enhancement. - Process.waitpid2(@child_pid, Process::WNOHANG) rescue nil + reap_errant_child raise ensure # no matter what happens, turn the GC back on, and hope whatever busted @@ -239,6 +235,17 @@ module Mixlib child_stdin.close # Kick things off end + def attempt_buffer_read + ready = IO.select(open_pipes, nil, nil, READ_WAIT_TIME) + if ready && ready.first.include?(child_stdout) + read_stdout_to_buffer + end + if ready && ready.first.include?(child_stderr) + read_stderr_to_buffer + end + ready + end + def read_stdout_to_buffer while chunk = child_stdout.read_nonblock(READ_SIZE) @stdout << chunk @@ -299,6 +306,34 @@ module Mixlib end end + def reap_errant_child + return if attempt_reap + Process.kill(:TERM, @child_pid) + sleep 3 + return if attempt_reap + Process.kill(:KILL, @child_pid) + reap + + # Should not hit this but it's possible if something is calling waitall + # in a separate thread. + rescue Errno::ESRCH + nil + end + + def reap + results = Process.waitpid2(@child_pid) + @status = results.last + end + + + def attempt_reap + if results = Process.waitpid2(@child_pid, Process::WNOHANG) + @status = results.last + else + nil + end + end + end end end diff --git a/spec/mixlib/shellout_spec.rb b/spec/mixlib/shellout_spec.rb index 9814473..d068dfc 100644 --- a/spec/mixlib/shellout_spec.rb +++ b/spec/mixlib/shellout_spec.rb @@ -825,12 +825,40 @@ describe Mixlib::ShellOut do end context 'with subprocess that takes longer than timeout' do - let(:cmd) { ruby_eval.call('sleep 2') } - let(:options) { { :timeout => 0.1 } } + let(:cmd) do + ruby_eval.call(<<-CODE) + STDOUT.sync = true + trap(:TERM) { puts "got term"; exit!(123) } + sleep 10 + CODE + end + let(:options) { { :timeout => 1 } } it "should raise CommandTimeout" do lambda { executed_cmd }.should raise_error(Mixlib::ShellOut::CommandTimeout) end + + it "should ask the process nicely to exit" do + lambda { executed_cmd }.should raise_error(Mixlib::ShellOut::CommandTimeout) + executed_cmd.stdout.should include("got term") + executed_cmd.exitstatus.should == 123 + end + + context "and the child is unresponsive" do + let(:cmd) do + ruby_eval.call(<<-CODE) + STDOUT.sync = true + trap(:TERM) { puts "nanana cant hear you" } + sleep 10 + CODE + end + + it "should KILL the wayward child" do + lambda { executed_cmd }.should raise_error(Mixlib::ShellOut::CommandTimeout) + executed_cmd.stdout.should include("nanana cant hear you") + executed_cmd.status.termsig.should == 9 + end + end end context 'with subprocess that exceeds buffersize' do |