diff options
author | Adam Leff <adam@leff.co> | 2016-06-02 15:51:04 -0400 |
---|---|---|
committer | Adam Leff <adam@leff.co> | 2016-06-02 15:51:04 -0400 |
commit | f241151f7b779cae91810c48a299f84997b89b02 (patch) | |
tree | 4dcb100d90959768d187064cf82abce38003da69 /lib | |
parent | e2f4576c0d319794e1c78aeebb5aab5305e84668 (diff) | |
parent | e3039ee388b5a5f9dd6a90f74adc9a4bcf1eec8a (diff) | |
download | chef-f241151f7b779cae91810c48a299f84997b89b02.tar.gz |
Merge pull request #4973 from chef/adamleff/data_collector
Creation of the new DataCollector reporter
Diffstat (limited to 'lib')
-rw-r--r-- | lib/chef/client.rb | 8 | ||||
-rw-r--r-- | lib/chef/data_collector.rb | 345 | ||||
-rw-r--r-- | lib/chef/data_collector/messages.rb | 125 | ||||
-rw-r--r-- | lib/chef/data_collector/messages/helpers.rb | 161 | ||||
-rw-r--r-- | lib/chef/data_collector/resource_report.rb | 84 |
5 files changed, 723 insertions, 0 deletions
diff --git a/lib/chef/client.rb b/lib/chef/client.rb index 054b284bd5..c857da1b93 100644 --- a/lib/chef/client.rb +++ b/lib/chef/client.rb @@ -45,6 +45,7 @@ require "chef/formatters/doc" require "chef/formatters/minimal" require "chef/version" require "chef/resource_reporter" +require "chef/data_collector" require "chef/audit/audit_reporter" require "chef/run_lock" require "chef/policy_builder" @@ -263,6 +264,7 @@ class Chef run_ohai register unless Chef::Config[:solo_legacy_mode] + register_data_collector_reporter load_node @@ -957,6 +959,12 @@ class Chef Chef::ReservedNames::Win32::Security.has_admin_privileges? end + + # Register the data collector reporter to send event information to the + # data collector server + def register_data_collector_reporter + events.register(Chef::DataCollector::Reporter.new) if Chef::DataCollector.register_reporter? + end end end diff --git a/lib/chef/data_collector.rb b/lib/chef/data_collector.rb new file mode 100644 index 0000000000..e852d11ab6 --- /dev/null +++ b/lib/chef/data_collector.rb @@ -0,0 +1,345 @@ +# +# Author:: Adam Leff (<adamleff@chef.io>) +# Author:: Ryan Cragun (<ryan@chef.io>) +# +# Copyright:: Copyright 2012-2016, Chef Software Inc. +# License:: Apache License, Version 2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +require "uri" +require "chef/event_dispatch/base" +require "chef/data_collector/messages" +require "chef/data_collector/resource_report" + +class Chef + + # == Chef::DataCollector + # Provides methods for determinine whether a reporter should be registered. + class DataCollector + def self.register_reporter? + Chef::Config[:data_collector][:server_url] && + !Chef::Config[:why_run] && + self.reporter_enabled_for_current_mode? + end + + def self.reporter_enabled_for_current_mode? + if Chef::Config[:solo] || Chef::Config[:local_mode] + acceptable_modes = [:solo, :both] + else + acceptable_modes = [:client, :both] + end + + acceptable_modes.include?(Chef::Config[:data_collector][:mode]) + end + + # == Chef::DataCollector::Reporter + # Provides an event handler that can be registered to report on Chef + # run data. Unlike the existing Chef::ResourceReporter event handler, + # the DataCollector handler is not tied to a Chef Server / Chef Reporting + # and exports its data through a webhook-like mechanism to a configured + # endpoint. + class Reporter < EventDispatch::Base + attr_reader :updated_resources, :status, :exception, :error_descriptions, + :expanded_run_list, :run_status, :http, :resource_count, + :current_resource_report, :enabled + + def initialize + @updated_resources = [] + @resource_count = 0 + @current_resource_loaded = nil + @error_descriptions = {} + @expanded_run_list = {} + @http = Chef::HTTP.new(data_collector_server_url) + @enabled = true + end + + # see EventDispatch::Base#run_started + # Upon receipt, we will send our run start message to the + # configured DataCollector endpoint. Depending on whether + # the user has configured raise_on_failure, if we cannot + # send the message, we will either disable the DataCollector + # Reporter for the duration of this run, or we'll raise an + # exception. + def run_started(current_run_status) + update_run_status(current_run_status) + + disable_reporter_on_error do + send_to_data_collector( + Chef::DataCollector::Messages.run_start_message(current_run_status).to_json + ) + end + end + + # see EventDispatch::Base#run_completed + # Upon receipt, we will send our run completion message to the + # configured DataCollector endpoint. + def run_completed(node) + send_run_completion(status: "success") + end + + # see EventDispatch::Base#run_failed + def run_failed(exception) + send_run_completion(status: "failure") + end + + # see EventDispatch::Base#resource_current_state_loaded + # Create a new ResourceReport instance that we'll use to track + # the state of this resource during the run. Nested resources are + # ignored as they are assumed to be an inline resource of a custom + # resource, and we only care about tracking top-level resources. + def resource_current_state_loaded(new_resource, action, current_resource) + return if nested_resource?(new_resource) + update_current_resource_report( + Chef::DataCollector::ResourceReport.new( + new_resource, + action, + current_resource + ) + ) + end + + # see EventDispatch::Base#resource_up_to_date + # Mark our ResourceReport status accordingly, and increment the total + # resource count. + def resource_up_to_date(new_resource, action) + current_resource_report.up_to_date unless nested_resource?(new_resource) + increment_resource_count + end + + # see EventDispatch::Base#resource_skipped + # Increment the total resource count. If this is a top-level resource, + # we also create a ResourceReport instance (because a skipped resource + # does not trigger the resource_current_state_loaded event), and flag + # it as skipped. + def resource_skipped(new_resource, action, conditional) + increment_resource_count + return if nested_resource?(new_resource) + + update_current_resource_report( + Chef::DataCollector::ResourceReport.new( + new_resource, + action + ) + ) + current_resource_report.skipped(conditional) + end + + # see EventDispatch::Base#resource_updated + # Flag the current ResourceReport instance as updated (as long as it's + # a top-level resource) and increment the total resource count. + def resource_updated(new_resource, action) + current_resource_report.updated unless nested_resource?(new_resource) + increment_resource_count + end + + # see EventDispatch::Base#resource_failed + # Flag the current ResourceReport as failed and supply the exception as + # long as it's a top-level resource, increment the total resource count, + # and update the run error text with the proper Formatter. + def resource_failed(new_resource, action, exception) + current_resource_report.failed(exception) unless nested_resource?(new_resource) + increment_resource_count + update_error_description( + Formatters::ErrorMapper.resource_failed( + new_resource, + action, + exception + ).for_json + ) + end + + # see EventDispatch::Base#resource_completed + # Mark the ResourceReport instance as finished (for timing details) + # and add it to the list of resources encountered during this run. + # This marks the end of this resource during this run. + def resource_completed(new_resource) + if current_resource_report && !nested_resource?(new_resource) + current_resource_report.finish + add_updated_resource(current_resource_report) + update_current_resource_report(nil) + end + end + + # see EventDispatch::Base#run_list_expanded + # The expanded run list is stored for later use by the run_completed + # event and message. + def run_list_expanded(run_list_expansion) + @expanded_run_list = run_list_expansion + end + + # see EventDispatch::Base#run_list_expand_failed + # The run error text is updated with the output of the appropriate + # formatter. + def run_list_expand_failed(node, exception) + update_error_description( + Formatters::ErrorMapper.run_list_expand_failed( + node, + exception + ).for_json + ) + end + + # see EventDispatch::Base#cookbook_resolution_failed + # The run error text is updated with the output of the appropriate + # formatter. + def cookbook_resolution_failed(expanded_run_list, exception) + update_error_description( + Formatters::ErrorMapper.cookbook_resolution_failed( + expanded_run_list, + exception + ).for_json + ) + end + + # see EventDispatch::Base#cookbook_sync_failed + # The run error text is updated with the output of the appropriate + # formatter. + def cookbook_sync_failed(cookbooks, exception) + update_error_description( + Formatters::ErrorMapper.cookbook_sync_failed( + cookbooks, + exception + ).for_json + ) + end + + private + + # + # Yields to the passed-in block (which is expected to be some interaction + # with the DataCollector endpoint). If some communication failure occurs, + # either disable any future communications to the DataCollector endpoint, or + # raise an exception (if the user has set + # Chef::Config.data_collector.raise_on_failure to true.) + # + # @param block [Proc] A ruby block to run. Ignored if a command is given. + # + def disable_reporter_on_error + yield + rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, + Errno::ECONNREFUSED, EOFError, Net::HTTPBadResponse, + Net::HTTPHeaderSyntaxError, Net::ProtocolError, OpenSSL::SSL::SSLError => e + disable_data_collector_reporter + code = if e.respond_to?(:response) && e.response.code + e.response.code.to_s + else + "Exception Code Empty" + end + + msg = "Error while reporting run start to Data Collector. " \ + "URL: #{data_collector_server_url} " \ + "Exception: #{code} -- #{e.message} " + + if Chef::Config[:data_collector][:raise_on_failure] + Chef::Log.error(msg) + raise + else + Chef::Log.warn(msg) + end + end + + def send_to_data_collector(message) + return unless data_collector_accessible? + + Chef::Log.debug("data_collector_reporter: POSTing the following message to #{data_collector_server_url}: #{message}") + http.post(nil, message, headers) + end + + # + # Send any messages to the DataCollector endpoint that are necessary to + # indicate the run has completed. Currently, two messages are sent: + # + # - An "action" message with the node object indicating it's been updated + # - An "run_converge" (i.e. RunEnd) message with details about the run, + # what resources were modified/up-to-date/skipped, etc. + # + # @param opts [Hash] Additional details about the run, such as its success/failure. + # + def send_run_completion(opts) + # If run_status is nil we probably failed before the client triggered + # the run_started callback. In this case we'll skip updating because + # we have nothing to report. + return unless run_status + + send_to_data_collector(Chef::DataCollector::Messages.node_update_message(run_status).to_json) + send_to_data_collector( + Chef::DataCollector::Messages.run_end_message( + run_status: run_status, + expanded_run_list: expanded_run_list, + total_resource_count: resource_count, + updated_resources: updated_resources, + status: opts[:status], + error_descriptions: error_descriptions + ).to_json + ) + end + + def headers + headers = { "Content-Type" => "application/json" } + + unless data_collector_token.nil? + headers["x-data-collector-token"] = data_collector_token + headers["x-data-collector-auth"] = "version=1.0" + end + + headers + end + + def data_collector_server_url + Chef::Config[:data_collector][:server_url] + end + + def data_collector_token + Chef::Config[:data_collector][:token] + end + + def increment_resource_count + @resource_count += 1 + end + + def add_updated_resource(resource_report) + @updated_resources << resource_report + end + + def disable_data_collector_reporter + @enabled = false + end + + def data_collector_accessible? + @enabled + end + + def update_run_status(run_status) + @run_status = run_status + end + + def update_current_resource_report(resource_report) + @current_resource_report = resource_report + end + + def update_error_description(discription_hash) + @error_descriptions = discription_hash + end + + # If we are getting messages about a resource while we are in the middle of + # another resource's update, we assume that the nested resource is just the + # implementation of a provider, and we want to hide it from the reporting + # output. + def nested_resource?(new_resource) + @current_resource_report && @current_resource_report.new_resource != new_resource + end + end + end +end diff --git a/lib/chef/data_collector/messages.rb b/lib/chef/data_collector/messages.rb new file mode 100644 index 0000000000..b6114a8bec --- /dev/null +++ b/lib/chef/data_collector/messages.rb @@ -0,0 +1,125 @@ +# +# Author:: Adam Leff (<adamleff@chef.io) +# Author:: Ryan Cragun (<ryan@chef.io>) +# +# Copyright:: Copyright 2012-2016, Chef Software Inc. +# License:: Apache License, Version 2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +require "json" +require "securerandom" +require_relative "messages/helpers" + +class Chef + class DataCollector + module Messages + extend Helpers + + # + # Message payload that is sent to the DataCollector server at the + # start of a Chef run. + # + # @param run_status [Chef::RunStatus] The RunStatus instance for this node/run. + # + # @return [Hash] A hash containing the run start message data. + # + def self.run_start_message(run_status) + { + "chef_server_fqdn" => chef_server_fqdn(run_status), + "entity_uuid" => node_uuid, + "id" => run_status.run_id, + "message_version" => "1.0.0", + "message_type" => "run_start", + "node_name" => run_status.node.name, + "organization_name" => organization, + "run_id" => run_status.run_id, + "source" => collector_source, + "start_time" => run_status.start_time.utc.iso8601, + } + end + + # + # Message payload that is sent to the DataCollector server at the + # end of a Chef run. + # + # @param reporter_data [Hash] Data supplied by the Reporter, such as run_status, resource counts, etc. + # + # @return [Hash] A hash containing the run end message data. + # + def self.run_end_message(reporter_data) + run_status = reporter_data[:run_status] + + message = { + "chef_server_fqdn" => chef_server_fqdn(run_status), + "entity_uuid" => node_uuid, + "expanded_run_list" => reporter_data[:expanded_run_list], + "id" => run_status.run_id, + "message_version" => "1.0.0", + "message_type" => "run_converge", + "node_name" => run_status.node.name, + "organization_name" => organization, + "resources" => reporter_data[:updated_resources].map(&:for_json), + "run_id" => run_status.run_id, + "run_list" => run_status.node.run_list.for_json, + "start_time" => run_status.start_time.utc.iso8601, + "end_time" => run_status.end_time.utc.iso8601, + "source" => collector_source, + "status" => reporter_data[:status], + "total_resource_count" => reporter_data[:total_resource_count], + "updated_resource_count" => reporter_data[:updated_resources].count, + } + + message["error"] = { + "class" => run_status.exception.class, + "message" => run_status.exception.message, + "backtrace" => run_status.exception.backtrace, + "description" => reporter_data[:error_descriptions], + } if run_status.exception + + message + end + + # + # Message payload that is sent to the DataCollector server at the + # end of a Chef run. + # + # @param run_status [Chef::RunStatus] The RunStatus instance for this node/run. + # + # @return [Hash] A hash containing the node object and related metadata. + # + def self.node_update_message(run_status) + { + "entity_name" => run_status.node.name, + "entity_type" => "node", + "entity_uuid" => node_uuid, + "id" => SecureRandom.uuid, + "message_version" => "1.1.0", + "message_type" => "action", + "organization_name" => organization, + "recorded_at" => Time.now.utc.iso8601, + "remote_hostname" => run_status.node["fqdn"], + "requestor_name" => run_status.node.name, + "requestor_type" => "client", + "run_id" => run_status.run_id, + "service_hostname" => chef_server_fqdn(run_status), + "source" => collector_source, + "task" => "update", + "user_agent" => Chef::HTTP::HTTPRequest::DEFAULT_UA, + "data" => run_status.node, + } + end + end + end +end diff --git a/lib/chef/data_collector/messages/helpers.rb b/lib/chef/data_collector/messages/helpers.rb new file mode 100644 index 0000000000..3e52f80047 --- /dev/null +++ b/lib/chef/data_collector/messages/helpers.rb @@ -0,0 +1,161 @@ +# +# Author:: Adam Leff (<adamleff@chef.io) +# Author:: Ryan Cragun (<ryan@chef.io>) +# +# Copyright:: Copyright 2012-2016, Chef Software Inc. +# License:: Apache License, Version 2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +class Chef + class DataCollector + module Messages + module Helpers + # + # Fully-qualified domain name of the Chef Server configured in Chef::Config + # If the chef_server_url cannot be parsed as a URI, the node["fqdn"] attribute + # will be returned, or "localhost" if the run_status is unavailable to us. + # + # @param run_status [Chef::RunStatus] The RunStatus object for this Chef Run. + # + # @return [String] FQDN of the configured Chef Server, or node/localhost if not found. + # + def chef_server_fqdn(run_status) + if !Chef::Config[:chef_server_url].nil? + URI(Chef::Config[:chef_server_url]).host + elsif !Chef::Config[:node_name].nil? + Chef::Config[:node_name] + else + "localhost" + end + end + + # + # The organization name the node is associated with. For Chef Solo runs, a + # user-configured organization string is returned, or the string "chef_solo" + # if such a string is not configured. + # + # @return [String] Organization to which the node is associated + # + def organization + solo_run? ? data_collector_organization : chef_server_organization + end + + # + # Returns the user-configured organization, or "chef_solo" if none is configured. + # + # This is only used when Chef is run in Solo mode. + # + # @return [String] Data-collector-specific organization used when running in Chef Solo + # + def data_collector_organization + Chef::Config[:data_collector][:organization] || "chef_solo" + end + + # + # Return the organization assumed by the configured chef_server_url. + # + # We must parse this from the Chef::Config[:chef_server_url] because a node + # has no knowledge of an organization or to which organization is belongs. + # + # If we cannot determine the organization, we return "unknown_organization" + # + # @return [String] shortname of the Chef Server organization + # + def chef_server_organization + return "unknown_organization" unless Chef::Config[:chef_server_url] + + Chef::Config[:chef_server_url].match(%r{/+organizations/+(\w+)}).nil? ? "unknown_organization" : $1 + end + + # + # The source of the data collecting during this run, used by the + # DataCollector endpoint to determine if Chef was in Solo mode or not. + # + # @return [String] "chef_solo" if in Solo mode, "chef_client" if in Client mode + # + def collector_source + solo_run? ? "chef_solo" : "chef_client" + end + + # + # If we're running in Solo (legacy) mode, or in Solo (formerly + # "Chef Client Local Mode"), we're considered to be in a "solo run". + # + # @return [Boolean] Whether we're in a solo run or not + # + def solo_run? + Chef::Config[:solo] || Chef::Config[:local_mode] + end + + # + # Returns a UUID that uniquely identifies this node for reporting reasons. + # + # The node is read in from disk if it exists, or it's generated if it does + # does not exist. + # + # @return [String] UUID for the node + # + def node_uuid + read_node_uuid || generate_node_uuid + end + + # + # Generates a UUID for the node via SecureRandom.uuid and writes out + # metadata file so the UUID persists between runs. + # + # @return [String] UUID for the node + # + def generate_node_uuid + uuid = SecureRandom.uuid + update_metadata("node_uuid", uuid) + + uuid + end + + # + # Reads in the node UUID from the node metadata file + # + # @return [String] UUID for the node + # + def read_node_uuid + metadata["node_uuid"] + end + + # + # Returns the DataCollector metadata for this node + # + # If the metadata file does not exist in the file cache path, + # an empty hash will be returned. + # + # @return [Hash] DataCollector metadata for this node + # + def metadata + JSON.load(Chef::FileCache.load(metadata_filename)) + rescue Chef::Exceptions::FileNotFound + {} + end + + def update_metadata(key, value) + metadata[key] = value + Chef::FileCache.store(metadata_filename, metadata.to_json, 0644) + end + + def metadata_filename + "data_collector_metadata.json" + end + end + end + end +end diff --git a/lib/chef/data_collector/resource_report.rb b/lib/chef/data_collector/resource_report.rb new file mode 100644 index 0000000000..1793fe2c9d --- /dev/null +++ b/lib/chef/data_collector/resource_report.rb @@ -0,0 +1,84 @@ +# +# Author:: Adam Leff (<adamleff@chef.io>) +# Author:: Ryan Cragun (<ryan@chef.io>) +# +# Copyright:: Copyright 2012-2016, Chef Software Inc. +# License:: Apache License, Version 2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +class Chef + class DataCollector + class ResourceReport + + attr_reader :action, :current_resource, :elapsed_time, :new_resource, :status + attr_accessor :conditional, :exception + + def initialize(new_resource, action, current_resource = nil) + @new_resource = new_resource + @action = action + @current_resource = current_resource + end + + def skipped(conditional) + @status = "skipped" + @conditional = conditional + end + + def updated + @status = "updated" + end + + def failed(exception) + @current_resource = nil + @status = "failed" + @exception = exception + end + + def up_to_date + @status = "up-to-date" + end + + def finish + @elapsed_time = new_resource.elapsed_time + end + + def to_hash + hash = { + "type" => new_resource.resource_name.to_sym, + "name" => new_resource.name.to_s, + "id" => new_resource.identity.to_s, + "after" => new_resource.state_for_resource_reporter, + "before" => current_resource ? current_resource.state_for_resource_reporter : {}, + "duration" => (elapsed_time * 1000).to_i.to_s, + "delta" => new_resource.respond_to?(:diff) ? new_resource.diff : "", + "result" => action.to_s, + "status" => status, + } + + if new_resource.cookbook_name + hash["cookbook_name"] = new_resource.cookbook_name + hash["cookbook_version"] = new_resource.cookbook_version.version + end + + hash["conditional"] = conditional.to_text if status == "skipped" + hash["error_message"] = exception.message unless exception.nil? + + hash + end + alias :to_h :to_hash + alias :for_json :to_hash + end + end +end |