From 26ac691a688cb569a7345d8f31a406d467240bb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Chojnacki?= Date: Tue, 4 Jul 2017 15:28:34 +0000 Subject: Instrument Unicorn with Ruby exporter --- lib/gitlab/metrics/base_sampler.rb | 94 ++++++++++++++++ lib/gitlab/metrics/connection_rack_middleware.rb | 45 ++++++++ lib/gitlab/metrics/influx_sampler.rb | 101 +++++++++++++++++ lib/gitlab/metrics/prometheus.rb | 4 +- lib/gitlab/metrics/sampler.rb | 133 ----------------------- lib/gitlab/metrics/unicorn_sampler.rb | 48 ++++++++ 6 files changed, 290 insertions(+), 135 deletions(-) create mode 100644 lib/gitlab/metrics/base_sampler.rb create mode 100644 lib/gitlab/metrics/connection_rack_middleware.rb create mode 100644 lib/gitlab/metrics/influx_sampler.rb delete mode 100644 lib/gitlab/metrics/sampler.rb create mode 100644 lib/gitlab/metrics/unicorn_sampler.rb (limited to 'lib/gitlab/metrics') diff --git a/lib/gitlab/metrics/base_sampler.rb b/lib/gitlab/metrics/base_sampler.rb new file mode 100644 index 00000000000..219accfc029 --- /dev/null +++ b/lib/gitlab/metrics/base_sampler.rb @@ -0,0 +1,94 @@ +require 'logger' +module Gitlab + module Metrics + class BaseSampler + def self.initialize_instance(*args) + raise "#{name} singleton instance already initialized" if @instance + @instance = new(*args) + at_exit(&@instance.method(:stop)) + @instance + end + + def self.instance + @instance + end + + attr_reader :running + + # interval - The sampling interval in seconds. + def initialize(interval) + interval_half = interval.to_f / 2 + + @interval = interval + @interval_steps = (-interval_half..interval_half).step(0.1).to_a + + @mutex = Mutex.new + end + + def enabled? + true + end + + def start + return unless enabled? + + @mutex.synchronize do + return if running + @running = true + + @thread = Thread.new do + sleep(sleep_interval) + + while running + safe_sample + + sleep(sleep_interval) + end + end + end + end + + def stop + @mutex.synchronize do + return unless running + + @running = false + + if @thread + @thread.wakeup if @thread.alive? + @thread.join + @thread = nil + end + end + end + + def safe_sample + sample + rescue => e + Rails.logger.warn("#{self.class}: #{e}, stopping") + stop + end + + def sample + raise NotImplementedError + end + + # Returns the sleep interval with a random adjustment. + # + # The random adjustment is put in place to ensure we: + # + # 1. Don't generate samples at the exact same interval every time (thus + # potentially missing anything that happens in between samples). + # 2. Don't sample data at the same interval two times in a row. + def sleep_interval + while step = @interval_steps.sample + if step != @last_step + @last_step = step + + return @interval + @last_step + end + end + end + end + end +end diff --git a/lib/gitlab/metrics/connection_rack_middleware.rb b/lib/gitlab/metrics/connection_rack_middleware.rb new file mode 100644 index 00000000000..b3da360be8f --- /dev/null +++ b/lib/gitlab/metrics/connection_rack_middleware.rb @@ -0,0 +1,45 @@ +module Gitlab + module Metrics + class ConnectionRackMiddleware + def initialize(app) + @app = app + end + + def self.rack_request_count + @rack_request_count ||= Gitlab::Metrics.counter(:rack_request, 'Rack request count') + end + + def self.rack_response_count + @rack_response_count ||= Gitlab::Metrics.counter(:rack_response, 'Rack response count') + end + + def self.rack_uncaught_errors_count + @rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors, 'Rack connections handling uncaught errors count') + end + + def self.rack_execution_time + @rack_execution_time ||= Gitlab::Metrics.histogram(:rack_execution_time, 'Rack connection handling execution time', + {}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 1.5, 2, 2.5, 3, 5, 7, 10]) + end + + def call(env) + method = env['REQUEST_METHOD'].downcase + started = Time.now.to_f + begin + ConnectionRackMiddleware.rack_request_count.increment(method: method) + + status, headers, body = @app.call(env) + + ConnectionRackMiddleware.rack_response_count.increment(method: method, status: status) + [status, headers, body] + rescue + ConnectionRackMiddleware.rack_uncaught_errors_count.increment + raise + ensure + elapsed = Time.now.to_f - started + ConnectionRackMiddleware.rack_execution_time.observe({}, elapsed) + end + end + end + end +end diff --git a/lib/gitlab/metrics/influx_sampler.rb b/lib/gitlab/metrics/influx_sampler.rb new file mode 100644 index 00000000000..6db1dd755b7 --- /dev/null +++ b/lib/gitlab/metrics/influx_sampler.rb @@ -0,0 +1,101 @@ +module Gitlab + module Metrics + # Class that sends certain metrics to InfluxDB at a specific interval. + # + # This class is used to gather statistics that can't be directly associated + # with a transaction such as system memory usage, garbage collection + # statistics, etc. + class InfluxSampler < BaseSampler + # interval - The sampling interval in seconds. + def initialize(interval = Metrics.settings[:sample_interval]) + super(interval) + @last_step = nil + + @metrics = [] + + @last_minor_gc = Delta.new(GC.stat[:minor_gc_count]) + @last_major_gc = Delta.new(GC.stat[:major_gc_count]) + + if Gitlab::Metrics.mri? + require 'allocations' + + Allocations.start + end + end + + def sample + sample_memory_usage + sample_file_descriptors + sample_objects + sample_gc + + flush + ensure + GC::Profiler.clear + @metrics.clear + end + + def flush + Metrics.submit_metrics(@metrics.map(&:to_hash)) + end + + def sample_memory_usage + add_metric('memory_usage', value: System.memory_usage) + end + + def sample_file_descriptors + add_metric('file_descriptors', value: System.file_descriptor_count) + end + + if Metrics.mri? + def sample_objects + sample = Allocations.to_hash + counts = sample.each_with_object({}) do |(klass, count), hash| + name = klass.name + + next unless name + + hash[name] = count + end + + # Symbols aren't allocated so we'll need to add those manually. + counts['Symbol'] = Symbol.all_symbols.length + + counts.each do |name, count| + add_metric('object_counts', { count: count }, type: name) + end + end + else + def sample_objects + end + end + + def sample_gc + time = GC::Profiler.total_time * 1000.0 + stats = GC.stat.merge(total_time: time) + + # We want the difference of GC runs compared to the last sample, not the + # total amount since the process started. + stats[:minor_gc_count] = + @last_minor_gc.compared_with(stats[:minor_gc_count]) + + stats[:major_gc_count] = + @last_major_gc.compared_with(stats[:major_gc_count]) + + stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count] + + add_metric('gc_statistics', stats) + end + + def add_metric(series, values, tags = {}) + prefix = sidekiq? ? 'sidekiq_' : 'rails_' + + @metrics << Metric.new("#{prefix}#{series}", values, tags) + end + + def sidekiq? + Sidekiq.server? + end + end + end +end diff --git a/lib/gitlab/metrics/prometheus.rb b/lib/gitlab/metrics/prometheus.rb index 9d314a56e58..fb7bbc7cfc7 100644 --- a/lib/gitlab/metrics/prometheus.rb +++ b/lib/gitlab/metrics/prometheus.rb @@ -29,8 +29,8 @@ module Gitlab provide_metric(name) || registry.summary(name, docstring, base_labels) end - def gauge(name, docstring, base_labels = {}) - provide_metric(name) || registry.gauge(name, docstring, base_labels) + def gauge(name, docstring, base_labels = {}, multiprocess_mode = :all) + provide_metric(name) || registry.gauge(name, docstring, base_labels, multiprocess_mode) end def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS) diff --git a/lib/gitlab/metrics/sampler.rb b/lib/gitlab/metrics/sampler.rb deleted file mode 100644 index 0000450d9bb..00000000000 --- a/lib/gitlab/metrics/sampler.rb +++ /dev/null @@ -1,133 +0,0 @@ -module Gitlab - module Metrics - # Class that sends certain metrics to InfluxDB at a specific interval. - # - # This class is used to gather statistics that can't be directly associated - # with a transaction such as system memory usage, garbage collection - # statistics, etc. - class Sampler - # interval - The sampling interval in seconds. - def initialize(interval = Metrics.settings[:sample_interval]) - interval_half = interval.to_f / 2 - - @interval = interval - @interval_steps = (-interval_half..interval_half).step(0.1).to_a - @last_step = nil - - @metrics = [] - - @last_minor_gc = Delta.new(GC.stat[:minor_gc_count]) - @last_major_gc = Delta.new(GC.stat[:major_gc_count]) - - if Gitlab::Metrics.mri? - require 'allocations' - - Allocations.start - end - end - - def start - Thread.new do - Thread.current.abort_on_exception = true - - loop do - sleep(sleep_interval) - - sample - end - end - end - - def sample - sample_memory_usage - sample_file_descriptors - sample_objects - sample_gc - - flush - ensure - GC::Profiler.clear - @metrics.clear - end - - def flush - Metrics.submit_metrics(@metrics.map(&:to_hash)) - end - - def sample_memory_usage - add_metric('memory_usage', value: System.memory_usage) - end - - def sample_file_descriptors - add_metric('file_descriptors', value: System.file_descriptor_count) - end - - if Metrics.mri? - def sample_objects - sample = Allocations.to_hash - counts = sample.each_with_object({}) do |(klass, count), hash| - name = klass.name - - next unless name - - hash[name] = count - end - - # Symbols aren't allocated so we'll need to add those manually. - counts['Symbol'] = Symbol.all_symbols.length - - counts.each do |name, count| - add_metric('object_counts', { count: count }, type: name) - end - end - else - def sample_objects - end - end - - def sample_gc - time = GC::Profiler.total_time * 1000.0 - stats = GC.stat.merge(total_time: time) - - # We want the difference of GC runs compared to the last sample, not the - # total amount since the process started. - stats[:minor_gc_count] = - @last_minor_gc.compared_with(stats[:minor_gc_count]) - - stats[:major_gc_count] = - @last_major_gc.compared_with(stats[:major_gc_count]) - - stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count] - - add_metric('gc_statistics', stats) - end - - def add_metric(series, values, tags = {}) - prefix = sidekiq? ? 'sidekiq_' : 'rails_' - - @metrics << Metric.new("#{prefix}#{series}", values, tags) - end - - def sidekiq? - Sidekiq.server? - end - - # Returns the sleep interval with a random adjustment. - # - # The random adjustment is put in place to ensure we: - # - # 1. Don't generate samples at the exact same interval every time (thus - # potentially missing anything that happens in between samples). - # 2. Don't sample data at the same interval two times in a row. - def sleep_interval - while step = @interval_steps.sample - if step != @last_step - @last_step = step - - return @interval + @last_step - end - end - end - end - end -end diff --git a/lib/gitlab/metrics/unicorn_sampler.rb b/lib/gitlab/metrics/unicorn_sampler.rb new file mode 100644 index 00000000000..f6987252039 --- /dev/null +++ b/lib/gitlab/metrics/unicorn_sampler.rb @@ -0,0 +1,48 @@ +module Gitlab + module Metrics + class UnicornSampler < BaseSampler + def initialize(interval) + super(interval) + end + + def unicorn_active_connections + @unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max) + end + + def unicorn_queued_connections + @unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max) + end + + def enabled? + # Raindrops::Linux.tcp_listener_stats is only present on Linux + unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats) + end + + def sample + Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats| + unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active) + unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued) + end + + Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats| + unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active) + unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued) + end + end + + private + + def tcp_listeners + @tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z}) + end + + def unix_listeners + @unix_listeners ||= Unicorn.listener_names - tcp_listeners + end + + def unicorn_with_listeners? + defined?(Unicorn) && Unicorn.listener_names.any? + end + end + end +end -- cgit v1.2.1