diff options
author | danielsdeleo <dan@opscode.com> | 2013-07-05 15:01:31 -0700 |
---|---|---|
committer | danielsdeleo <dan@opscode.com> | 2013-07-05 15:26:09 -0700 |
commit | 07ea110f8694b5b3a36fb936e7eaa46a9ce86050 (patch) | |
tree | 694bca908e5cd71b6bf66b9e07966959dc025adb /lib/chef | |
parent | 8505ed76e545af010e3d66b2882732fcfffc0084 (diff) | |
download | chef-07ea110f8694b5b3a36fb936e7eaa46a9ce86050.tar.gz |
Print debug stacktraces from within a forked worker
Fixes CHEF-4357: http://tickets.opscode.com/browse/CHEF-4357
Rescue exceptions from within a forker worker and feed them to
`Chef::Application.debug_stacktrace()`, then exit non-zero. This fixes
an issue where Chef would spew ruby stacktraces with forking enabled
(stacktraces should only be printed to console with `-l debug`).
Additionally fixes an issue where a generic exception was passed to
`debug_stacktrace`, leaving a stacktrace.out file with no useful
information.
In addition to the above fixes, includes the following improvements:
* Update the forked worker's argv0 to clearly indicate it's the worker
process, and include ppid and run start time.
* When reaping the worker process, include more useful information when
the worker exited non-successfully, such as exit code or signal that
killed the process. In particular, OOM kill (e.g., when running
resource intensive code compile, etc. on small machine) should be a
little more obvious.
Diffstat (limited to 'lib/chef')
-rw-r--r-- | lib/chef/application/client.rb | 2 | ||||
-rw-r--r-- | lib/chef/application/solo.rb | 1 | ||||
-rw-r--r-- | lib/chef/application/windows_service.rb | 1 | ||||
-rw-r--r-- | lib/chef/client.rb | 33 | ||||
-rw-r--r-- | lib/chef/exceptions.rb | 2 |
5 files changed, 29 insertions, 10 deletions
diff --git a/lib/chef/application/client.rb b/lib/chef/application/client.rb index 42a7744795..b08a795697 100644 --- a/lib/chef/application/client.rb +++ b/lib/chef/application/client.rb @@ -316,7 +316,6 @@ class Chef::Application::Client < Chef::Application rescue Exception => e if Chef::Config[:interval] Chef::Log.error("#{e.class}: #{e}") - Chef::Application.debug_stacktrace(e) Chef::Log.error("Sleeping for #{Chef::Config[:interval]} seconds before trying again") unless SELF_PIPE.empty? client_sleep Chef::Config[:interval] @@ -326,7 +325,6 @@ class Chef::Application::Client < Chef::Application end retry else - Chef::Application.debug_stacktrace(e) Chef::Application.fatal!("#{e.class}: #{e.message}", 1) end end diff --git a/lib/chef/application/solo.rb b/lib/chef/application/solo.rb index 7e316dc260..5e3fe90607 100644 --- a/lib/chef/application/solo.rb +++ b/lib/chef/application/solo.rb @@ -253,7 +253,6 @@ class Chef::Application::Solo < Chef::Application sleep Chef::Config[:interval] retry else - Chef::Application.debug_stacktrace(e) Chef::Application.fatal!("#{e.class}: #{e.message}", 1) end end diff --git a/lib/chef/application/windows_service.rb b/lib/chef/application/windows_service.rb index 5b77a44392..85a18c7a17 100644 --- a/lib/chef/application/windows_service.rb +++ b/lib/chef/application/windows_service.rb @@ -99,7 +99,6 @@ class Chef Chef::Log.error("#{e.class}: #{e}") rescue Exception => e Chef::Log.error("#{e.class}: #{e}") - Chef::Application.debug_stacktrace(e) end end end diff --git a/lib/chef/client.rb b/lib/chef/client.rb index 80c6dd5ecd..4a0db4053b 100644 --- a/lib/chef/client.rb +++ b/lib/chef/client.rb @@ -191,20 +191,40 @@ class Chef if(Chef::Config[:client_fork] && Process.respond_to?(:fork) && !Chef::Platform.windows?) Chef::Log.info "Forking chef instance to converge..." pid = fork do - Chef::Log.info "Forked instance now converging" - do_run - exit + client_solo = Chef::Config[:solo] ? "chef-solo" : "chef-client" + $0 = "#{client_solo} worker: ppid=#{Process.ppid};start=#{Time.new.strftime("%R:%S")};" + begin + Chef::Log.debug "Forked instance now converging" + do_run + rescue Exception => e + exit 1 + else + exit 0 + end end - Chef::Log.info "Fork successful. Waiting for new chef pid: #{pid}" + Chef::Log.debug "Fork successful. Waiting for new chef pid: #{pid}" result = Process.waitpid2(pid) - raise "Forked convergence run failed" unless result.last.success? - Chef::Log.info "Forked child successfully reaped (pid: #{pid})" + handle_child_exit(result) + Chef::Log.debug "Forked child successfully reaped (pid: #{pid})" true else do_run end end + def handle_child_exit(pid_and_status) + status = pid_and_status[1] + return true if status.success? + message = if status.signaled? + "Chef run process terminated by signal #{status.termsig} (#{Signal.list.invert[status.termsig]})" + else + "Chef run process exited unsuccessfully (exit code #{status.exitstatus})" + end + raise Exceptions::ChildConvergeError, message + end + + + # Configures the Chef::Cookbook::FileVendor class to fetch file from the # server or disk as appropriate, creates the run context for this run, and # sanity checks the cookbook collection. @@ -488,6 +508,7 @@ class Chef run_status.exception = e run_failed end + Chef::Application.debug_stacktrace(e) @events.run_failed(e) raise ensure diff --git a/lib/chef/exceptions.rb b/lib/chef/exceptions.rb index c0a5a185d6..99054eb254 100644 --- a/lib/chef/exceptions.rb +++ b/lib/chef/exceptions.rb @@ -160,6 +160,8 @@ class Chef # the symlink that is there either loops or points to a nonexistent file class InvalidSymlink < RuntimeError; end + class ChildConvergeError < RuntimeError; end + class MissingRole < RuntimeError NULL = Object.new |