summaryrefslogtreecommitdiff
path: root/lib/chef
diff options
context:
space:
mode:
authordanielsdeleo <dan@opscode.com>2013-07-05 15:01:31 -0700
committerdanielsdeleo <dan@opscode.com>2013-07-05 15:26:09 -0700
commit07ea110f8694b5b3a36fb936e7eaa46a9ce86050 (patch)
tree694bca908e5cd71b6bf66b9e07966959dc025adb /lib/chef
parent8505ed76e545af010e3d66b2882732fcfffc0084 (diff)
downloadchef-07ea110f8694b5b3a36fb936e7eaa46a9ce86050.tar.gz
Print debug stacktraces from within a forked worker
Fixes CHEF-4357: http://tickets.opscode.com/browse/CHEF-4357 Rescue exceptions from within a forker worker and feed them to `Chef::Application.debug_stacktrace()`, then exit non-zero. This fixes an issue where Chef would spew ruby stacktraces with forking enabled (stacktraces should only be printed to console with `-l debug`). Additionally fixes an issue where a generic exception was passed to `debug_stacktrace`, leaving a stacktrace.out file with no useful information. In addition to the above fixes, includes the following improvements: * Update the forked worker's argv0 to clearly indicate it's the worker process, and include ppid and run start time. * When reaping the worker process, include more useful information when the worker exited non-successfully, such as exit code or signal that killed the process. In particular, OOM kill (e.g., when running resource intensive code compile, etc. on small machine) should be a little more obvious.
Diffstat (limited to 'lib/chef')
-rw-r--r--lib/chef/application/client.rb2
-rw-r--r--lib/chef/application/solo.rb1
-rw-r--r--lib/chef/application/windows_service.rb1
-rw-r--r--lib/chef/client.rb33
-rw-r--r--lib/chef/exceptions.rb2
5 files changed, 29 insertions, 10 deletions
diff --git a/lib/chef/application/client.rb b/lib/chef/application/client.rb
index 42a7744795..b08a795697 100644
--- a/lib/chef/application/client.rb
+++ b/lib/chef/application/client.rb
@@ -316,7 +316,6 @@ class Chef::Application::Client < Chef::Application
rescue Exception => e
if Chef::Config[:interval]
Chef::Log.error("#{e.class}: #{e}")
- Chef::Application.debug_stacktrace(e)
Chef::Log.error("Sleeping for #{Chef::Config[:interval]} seconds before trying again")
unless SELF_PIPE.empty?
client_sleep Chef::Config[:interval]
@@ -326,7 +325,6 @@ class Chef::Application::Client < Chef::Application
end
retry
else
- Chef::Application.debug_stacktrace(e)
Chef::Application.fatal!("#{e.class}: #{e.message}", 1)
end
end
diff --git a/lib/chef/application/solo.rb b/lib/chef/application/solo.rb
index 7e316dc260..5e3fe90607 100644
--- a/lib/chef/application/solo.rb
+++ b/lib/chef/application/solo.rb
@@ -253,7 +253,6 @@ class Chef::Application::Solo < Chef::Application
sleep Chef::Config[:interval]
retry
else
- Chef::Application.debug_stacktrace(e)
Chef::Application.fatal!("#{e.class}: #{e.message}", 1)
end
end
diff --git a/lib/chef/application/windows_service.rb b/lib/chef/application/windows_service.rb
index 5b77a44392..85a18c7a17 100644
--- a/lib/chef/application/windows_service.rb
+++ b/lib/chef/application/windows_service.rb
@@ -99,7 +99,6 @@ class Chef
Chef::Log.error("#{e.class}: #{e}")
rescue Exception => e
Chef::Log.error("#{e.class}: #{e}")
- Chef::Application.debug_stacktrace(e)
end
end
end
diff --git a/lib/chef/client.rb b/lib/chef/client.rb
index 80c6dd5ecd..4a0db4053b 100644
--- a/lib/chef/client.rb
+++ b/lib/chef/client.rb
@@ -191,20 +191,40 @@ class Chef
if(Chef::Config[:client_fork] && Process.respond_to?(:fork) && !Chef::Platform.windows?)
Chef::Log.info "Forking chef instance to converge..."
pid = fork do
- Chef::Log.info "Forked instance now converging"
- do_run
- exit
+ client_solo = Chef::Config[:solo] ? "chef-solo" : "chef-client"
+ $0 = "#{client_solo} worker: ppid=#{Process.ppid};start=#{Time.new.strftime("%R:%S")};"
+ begin
+ Chef::Log.debug "Forked instance now converging"
+ do_run
+ rescue Exception => e
+ exit 1
+ else
+ exit 0
+ end
end
- Chef::Log.info "Fork successful. Waiting for new chef pid: #{pid}"
+ Chef::Log.debug "Fork successful. Waiting for new chef pid: #{pid}"
result = Process.waitpid2(pid)
- raise "Forked convergence run failed" unless result.last.success?
- Chef::Log.info "Forked child successfully reaped (pid: #{pid})"
+ handle_child_exit(result)
+ Chef::Log.debug "Forked child successfully reaped (pid: #{pid})"
true
else
do_run
end
end
+ def handle_child_exit(pid_and_status)
+ status = pid_and_status[1]
+ return true if status.success?
+ message = if status.signaled?
+ "Chef run process terminated by signal #{status.termsig} (#{Signal.list.invert[status.termsig]})"
+ else
+ "Chef run process exited unsuccessfully (exit code #{status.exitstatus})"
+ end
+ raise Exceptions::ChildConvergeError, message
+ end
+
+
+
# Configures the Chef::Cookbook::FileVendor class to fetch file from the
# server or disk as appropriate, creates the run context for this run, and
# sanity checks the cookbook collection.
@@ -488,6 +508,7 @@ class Chef
run_status.exception = e
run_failed
end
+ Chef::Application.debug_stacktrace(e)
@events.run_failed(e)
raise
ensure
diff --git a/lib/chef/exceptions.rb b/lib/chef/exceptions.rb
index c0a5a185d6..99054eb254 100644
--- a/lib/chef/exceptions.rb
+++ b/lib/chef/exceptions.rb
@@ -160,6 +160,8 @@ class Chef
# the symlink that is there either loops or points to a nonexistent file
class InvalidSymlink < RuntimeError; end
+ class ChildConvergeError < RuntimeError; end
+
class MissingRole < RuntimeError
NULL = Object.new