From 07ea110f8694b5b3a36fb936e7eaa46a9ce86050 Mon Sep 17 00:00:00 2001 From: danielsdeleo Date: Fri, 5 Jul 2013 15:01:31 -0700 Subject: Print debug stacktraces from within a forked worker Fixes CHEF-4357: http://tickets.opscode.com/browse/CHEF-4357 Rescue exceptions from within a forker worker and feed them to `Chef::Application.debug_stacktrace()`, then exit non-zero. This fixes an issue where Chef would spew ruby stacktraces with forking enabled (stacktraces should only be printed to console with `-l debug`). Additionally fixes an issue where a generic exception was passed to `debug_stacktrace`, leaving a stacktrace.out file with no useful information. In addition to the above fixes, includes the following improvements: * Update the forked worker's argv0 to clearly indicate it's the worker process, and include ppid and run start time. * When reaping the worker process, include more useful information when the worker exited non-successfully, such as exit code or signal that killed the process. In particular, OOM kill (e.g., when running resource intensive code compile, etc. on small machine) should be a little more obvious. --- lib/chef/application/client.rb | 2 -- lib/chef/application/solo.rb | 1 - lib/chef/application/windows_service.rb | 1 - lib/chef/client.rb | 33 +++++++++++++++++++++++++++------ lib/chef/exceptions.rb | 2 ++ 5 files changed, 29 insertions(+), 10 deletions(-) (limited to 'lib/chef') diff --git a/lib/chef/application/client.rb b/lib/chef/application/client.rb index 42a7744795..b08a795697 100644 --- a/lib/chef/application/client.rb +++ b/lib/chef/application/client.rb @@ -316,7 +316,6 @@ class Chef::Application::Client < Chef::Application rescue Exception => e if Chef::Config[:interval] Chef::Log.error("#{e.class}: #{e}") - Chef::Application.debug_stacktrace(e) Chef::Log.error("Sleeping for #{Chef::Config[:interval]} seconds before trying again") unless SELF_PIPE.empty? client_sleep Chef::Config[:interval] @@ -326,7 +325,6 @@ class Chef::Application::Client < Chef::Application end retry else - Chef::Application.debug_stacktrace(e) Chef::Application.fatal!("#{e.class}: #{e.message}", 1) end end diff --git a/lib/chef/application/solo.rb b/lib/chef/application/solo.rb index 7e316dc260..5e3fe90607 100644 --- a/lib/chef/application/solo.rb +++ b/lib/chef/application/solo.rb @@ -253,7 +253,6 @@ class Chef::Application::Solo < Chef::Application sleep Chef::Config[:interval] retry else - Chef::Application.debug_stacktrace(e) Chef::Application.fatal!("#{e.class}: #{e.message}", 1) end end diff --git a/lib/chef/application/windows_service.rb b/lib/chef/application/windows_service.rb index 5b77a44392..85a18c7a17 100644 --- a/lib/chef/application/windows_service.rb +++ b/lib/chef/application/windows_service.rb @@ -99,7 +99,6 @@ class Chef Chef::Log.error("#{e.class}: #{e}") rescue Exception => e Chef::Log.error("#{e.class}: #{e}") - Chef::Application.debug_stacktrace(e) end end end diff --git a/lib/chef/client.rb b/lib/chef/client.rb index 80c6dd5ecd..4a0db4053b 100644 --- a/lib/chef/client.rb +++ b/lib/chef/client.rb @@ -191,20 +191,40 @@ class Chef if(Chef::Config[:client_fork] && Process.respond_to?(:fork) && !Chef::Platform.windows?) Chef::Log.info "Forking chef instance to converge..." pid = fork do - Chef::Log.info "Forked instance now converging" - do_run - exit + client_solo = Chef::Config[:solo] ? "chef-solo" : "chef-client" + $0 = "#{client_solo} worker: ppid=#{Process.ppid};start=#{Time.new.strftime("%R:%S")};" + begin + Chef::Log.debug "Forked instance now converging" + do_run + rescue Exception => e + exit 1 + else + exit 0 + end end - Chef::Log.info "Fork successful. Waiting for new chef pid: #{pid}" + Chef::Log.debug "Fork successful. Waiting for new chef pid: #{pid}" result = Process.waitpid2(pid) - raise "Forked convergence run failed" unless result.last.success? - Chef::Log.info "Forked child successfully reaped (pid: #{pid})" + handle_child_exit(result) + Chef::Log.debug "Forked child successfully reaped (pid: #{pid})" true else do_run end end + def handle_child_exit(pid_and_status) + status = pid_and_status[1] + return true if status.success? + message = if status.signaled? + "Chef run process terminated by signal #{status.termsig} (#{Signal.list.invert[status.termsig]})" + else + "Chef run process exited unsuccessfully (exit code #{status.exitstatus})" + end + raise Exceptions::ChildConvergeError, message + end + + + # Configures the Chef::Cookbook::FileVendor class to fetch file from the # server or disk as appropriate, creates the run context for this run, and # sanity checks the cookbook collection. @@ -488,6 +508,7 @@ class Chef run_status.exception = e run_failed end + Chef::Application.debug_stacktrace(e) @events.run_failed(e) raise ensure diff --git a/lib/chef/exceptions.rb b/lib/chef/exceptions.rb index c0a5a185d6..99054eb254 100644 --- a/lib/chef/exceptions.rb +++ b/lib/chef/exceptions.rb @@ -160,6 +160,8 @@ class Chef # the symlink that is there either loops or points to a nonexistent file class InvalidSymlink < RuntimeError; end + class ChildConvergeError < RuntimeError; end + class MissingRole < RuntimeError NULL = Object.new -- cgit v1.2.1