From 26ac691a688cb569a7345d8f31a406d467240bb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Chojnacki?= <pawel@chojnacki.ws>
Date: Tue, 4 Jul 2017 15:28:34 +0000
Subject: Instrument Unicorn with Ruby exporter

---
 lib/gitlab/metrics/base_sampler.rb               |  94 ++++++++++++++++
 lib/gitlab/metrics/connection_rack_middleware.rb |  45 ++++++++
 lib/gitlab/metrics/influx_sampler.rb             | 101 +++++++++++++++++
 lib/gitlab/metrics/prometheus.rb                 |   4 +-
 lib/gitlab/metrics/sampler.rb                    | 133 -----------------------
 lib/gitlab/metrics/unicorn_sampler.rb            |  48 ++++++++
 6 files changed, 290 insertions(+), 135 deletions(-)
 create mode 100644 lib/gitlab/metrics/base_sampler.rb
 create mode 100644 lib/gitlab/metrics/connection_rack_middleware.rb
 create mode 100644 lib/gitlab/metrics/influx_sampler.rb
 delete mode 100644 lib/gitlab/metrics/sampler.rb
 create mode 100644 lib/gitlab/metrics/unicorn_sampler.rb

(limited to 'lib/gitlab/metrics')

diff --git a/lib/gitlab/metrics/base_sampler.rb b/lib/gitlab/metrics/base_sampler.rb
new file mode 100644
index 00000000000..219accfc029
--- /dev/null
+++ b/lib/gitlab/metrics/base_sampler.rb
@@ -0,0 +1,94 @@
+require 'logger'
+module Gitlab
+  module Metrics
+    class BaseSampler
+      def self.initialize_instance(*args)
+        raise "#{name} singleton instance already initialized" if @instance
+        @instance = new(*args)
+        at_exit(&@instance.method(:stop))
+        @instance
+      end
+
+      def self.instance
+        @instance
+      end
+
+      attr_reader :running
+
+      # interval - The sampling interval in seconds.
+      def initialize(interval)
+        interval_half = interval.to_f / 2
+
+        @interval = interval
+        @interval_steps = (-interval_half..interval_half).step(0.1).to_a
+
+        @mutex = Mutex.new
+      end
+
+      def enabled?
+        true
+      end
+
+      def start
+        return unless enabled?
+
+        @mutex.synchronize do
+          return if running
+          @running = true
+
+          @thread = Thread.new do
+            sleep(sleep_interval)
+
+            while running
+              safe_sample
+
+              sleep(sleep_interval)
+            end
+          end
+        end
+      end
+
+      def stop
+        @mutex.synchronize do
+          return unless running
+
+          @running = false
+
+          if @thread
+            @thread.wakeup if @thread.alive?
+            @thread.join
+            @thread = nil
+          end
+        end
+      end
+
+      def safe_sample
+        sample
+      rescue => e
+        Rails.logger.warn("#{self.class}: #{e}, stopping")
+        stop
+      end
+
+      def sample
+        raise NotImplementedError
+      end
+
+      # Returns the sleep interval with a random adjustment.
+      #
+      # The random adjustment is put in place to ensure we:
+      #
+      # 1. Don't generate samples at the exact same interval every time (thus
+      #    potentially missing anything that happens in between samples).
+      # 2. Don't sample data at the same interval two times in a row.
+      def sleep_interval
+        while step = @interval_steps.sample
+          if step != @last_step
+            @last_step = step
+
+            return @interval + @last_step
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/metrics/connection_rack_middleware.rb b/lib/gitlab/metrics/connection_rack_middleware.rb
new file mode 100644
index 00000000000..b3da360be8f
--- /dev/null
+++ b/lib/gitlab/metrics/connection_rack_middleware.rb
@@ -0,0 +1,45 @@
+module Gitlab
+  module Metrics
+    class ConnectionRackMiddleware
+      def initialize(app)
+        @app = app
+      end
+
+      def self.rack_request_count
+        @rack_request_count ||= Gitlab::Metrics.counter(:rack_request, 'Rack request count')
+      end
+
+      def self.rack_response_count
+        @rack_response_count ||= Gitlab::Metrics.counter(:rack_response, 'Rack response count')
+      end
+
+      def self.rack_uncaught_errors_count
+        @rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors, 'Rack connections handling uncaught errors count')
+      end
+
+      def self.rack_execution_time
+        @rack_execution_time ||= Gitlab::Metrics.histogram(:rack_execution_time, 'Rack connection handling execution time',
+                                                           {}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 1.5, 2, 2.5, 3, 5, 7, 10])
+      end
+
+      def call(env)
+        method = env['REQUEST_METHOD'].downcase
+        started = Time.now.to_f
+        begin
+          ConnectionRackMiddleware.rack_request_count.increment(method: method)
+
+          status, headers, body = @app.call(env)
+
+          ConnectionRackMiddleware.rack_response_count.increment(method: method, status: status)
+          [status, headers, body]
+        rescue
+          ConnectionRackMiddleware.rack_uncaught_errors_count.increment
+          raise
+        ensure
+          elapsed = Time.now.to_f - started
+          ConnectionRackMiddleware.rack_execution_time.observe({}, elapsed)
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/metrics/influx_sampler.rb b/lib/gitlab/metrics/influx_sampler.rb
new file mode 100644
index 00000000000..6db1dd755b7
--- /dev/null
+++ b/lib/gitlab/metrics/influx_sampler.rb
@@ -0,0 +1,101 @@
+module Gitlab
+  module Metrics
+    # Class that sends certain metrics to InfluxDB at a specific interval.
+    #
+    # This class is used to gather statistics that can't be directly associated
+    # with a transaction such as system memory usage, garbage collection
+    # statistics, etc.
+    class InfluxSampler < BaseSampler
+      # interval - The sampling interval in seconds.
+      def initialize(interval = Metrics.settings[:sample_interval])
+        super(interval)
+        @last_step = nil
+
+        @metrics = []
+
+        @last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
+        @last_major_gc = Delta.new(GC.stat[:major_gc_count])
+
+        if Gitlab::Metrics.mri?
+          require 'allocations'
+
+          Allocations.start
+        end
+      end
+
+      def sample
+        sample_memory_usage
+        sample_file_descriptors
+        sample_objects
+        sample_gc
+
+        flush
+      ensure
+        GC::Profiler.clear
+        @metrics.clear
+      end
+
+      def flush
+        Metrics.submit_metrics(@metrics.map(&:to_hash))
+      end
+
+      def sample_memory_usage
+        add_metric('memory_usage', value: System.memory_usage)
+      end
+
+      def sample_file_descriptors
+        add_metric('file_descriptors', value: System.file_descriptor_count)
+      end
+
+      if Metrics.mri?
+        def sample_objects
+          sample = Allocations.to_hash
+          counts = sample.each_with_object({}) do |(klass, count), hash|
+            name = klass.name
+
+            next unless name
+
+            hash[name] = count
+          end
+
+          # Symbols aren't allocated so we'll need to add those manually.
+          counts['Symbol'] = Symbol.all_symbols.length
+
+          counts.each do |name, count|
+            add_metric('object_counts', { count: count }, type: name)
+          end
+        end
+      else
+        def sample_objects
+        end
+      end
+
+      def sample_gc
+        time = GC::Profiler.total_time * 1000.0
+        stats = GC.stat.merge(total_time: time)
+
+        # We want the difference of GC runs compared to the last sample, not the
+        # total amount since the process started.
+        stats[:minor_gc_count] =
+          @last_minor_gc.compared_with(stats[:minor_gc_count])
+
+        stats[:major_gc_count] =
+          @last_major_gc.compared_with(stats[:major_gc_count])
+
+        stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
+
+        add_metric('gc_statistics', stats)
+      end
+
+      def add_metric(series, values, tags = {})
+        prefix = sidekiq? ? 'sidekiq_' : 'rails_'
+
+        @metrics << Metric.new("#{prefix}#{series}", values, tags)
+      end
+
+      def sidekiq?
+        Sidekiq.server?
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/metrics/prometheus.rb b/lib/gitlab/metrics/prometheus.rb
index 9d314a56e58..fb7bbc7cfc7 100644
--- a/lib/gitlab/metrics/prometheus.rb
+++ b/lib/gitlab/metrics/prometheus.rb
@@ -29,8 +29,8 @@ module Gitlab
         provide_metric(name) || registry.summary(name, docstring, base_labels)
       end
 
-      def gauge(name, docstring, base_labels = {})
-        provide_metric(name) || registry.gauge(name, docstring, base_labels)
+      def gauge(name, docstring, base_labels = {}, multiprocess_mode = :all)
+        provide_metric(name) || registry.gauge(name, docstring, base_labels, multiprocess_mode)
       end
 
       def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS)
diff --git a/lib/gitlab/metrics/sampler.rb b/lib/gitlab/metrics/sampler.rb
deleted file mode 100644
index 0000450d9bb..00000000000
--- a/lib/gitlab/metrics/sampler.rb
+++ /dev/null
@@ -1,133 +0,0 @@
-module Gitlab
-  module Metrics
-    # Class that sends certain metrics to InfluxDB at a specific interval.
-    #
-    # This class is used to gather statistics that can't be directly associated
-    # with a transaction such as system memory usage, garbage collection
-    # statistics, etc.
-    class Sampler
-      # interval - The sampling interval in seconds.
-      def initialize(interval = Metrics.settings[:sample_interval])
-        interval_half = interval.to_f / 2
-
-        @interval       = interval
-        @interval_steps = (-interval_half..interval_half).step(0.1).to_a
-        @last_step      = nil
-
-        @metrics = []
-
-        @last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
-        @last_major_gc = Delta.new(GC.stat[:major_gc_count])
-
-        if Gitlab::Metrics.mri?
-          require 'allocations'
-
-          Allocations.start
-        end
-      end
-
-      def start
-        Thread.new do
-          Thread.current.abort_on_exception = true
-
-          loop do
-            sleep(sleep_interval)
-
-            sample
-          end
-        end
-      end
-
-      def sample
-        sample_memory_usage
-        sample_file_descriptors
-        sample_objects
-        sample_gc
-
-        flush
-      ensure
-        GC::Profiler.clear
-        @metrics.clear
-      end
-
-      def flush
-        Metrics.submit_metrics(@metrics.map(&:to_hash))
-      end
-
-      def sample_memory_usage
-        add_metric('memory_usage', value: System.memory_usage)
-      end
-
-      def sample_file_descriptors
-        add_metric('file_descriptors', value: System.file_descriptor_count)
-      end
-
-      if Metrics.mri?
-        def sample_objects
-          sample = Allocations.to_hash
-          counts = sample.each_with_object({}) do |(klass, count), hash|
-            name = klass.name
-
-            next unless name
-
-            hash[name] = count
-          end
-
-          # Symbols aren't allocated so we'll need to add those manually.
-          counts['Symbol'] = Symbol.all_symbols.length
-
-          counts.each do |name, count|
-            add_metric('object_counts', { count: count }, type: name)
-          end
-        end
-      else
-        def sample_objects
-        end
-      end
-
-      def sample_gc
-        time  = GC::Profiler.total_time * 1000.0
-        stats = GC.stat.merge(total_time: time)
-
-        # We want the difference of GC runs compared to the last sample, not the
-        # total amount since the process started.
-        stats[:minor_gc_count] =
-          @last_minor_gc.compared_with(stats[:minor_gc_count])
-
-        stats[:major_gc_count] =
-          @last_major_gc.compared_with(stats[:major_gc_count])
-
-        stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
-
-        add_metric('gc_statistics', stats)
-      end
-
-      def add_metric(series, values, tags = {})
-        prefix = sidekiq? ? 'sidekiq_' : 'rails_'
-
-        @metrics << Metric.new("#{prefix}#{series}", values, tags)
-      end
-
-      def sidekiq?
-        Sidekiq.server?
-      end
-
-      # Returns the sleep interval with a random adjustment.
-      #
-      # The random adjustment is put in place to ensure we:
-      #
-      # 1. Don't generate samples at the exact same interval every time (thus
-      #    potentially missing anything that happens in between samples).
-      # 2. Don't sample data at the same interval two times in a row.
-      def sleep_interval
-        while step = @interval_steps.sample
-          if step != @last_step
-            @last_step = step
-
-            return @interval + @last_step
-          end
-        end
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/metrics/unicorn_sampler.rb b/lib/gitlab/metrics/unicorn_sampler.rb
new file mode 100644
index 00000000000..f6987252039
--- /dev/null
+++ b/lib/gitlab/metrics/unicorn_sampler.rb
@@ -0,0 +1,48 @@
+module Gitlab
+  module Metrics
+    class UnicornSampler < BaseSampler
+      def initialize(interval)
+        super(interval)
+      end
+
+      def unicorn_active_connections
+        @unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max)
+      end
+
+      def unicorn_queued_connections
+        @unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max)
+      end
+
+      def enabled?
+        # Raindrops::Linux.tcp_listener_stats is only present on Linux
+        unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats)
+      end
+
+      def sample
+        Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
+          unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active)
+          unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued)
+        end
+
+        Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
+          unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active)
+          unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued)
+        end
+      end
+
+      private
+
+      def tcp_listeners
+        @tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
+      end
+
+      def unix_listeners
+        @unix_listeners ||= Unicorn.listener_names - tcp_listeners
+      end
+
+      def unicorn_with_listeners?
+        defined?(Unicorn) && Unicorn.listener_names.any?
+      end
+    end
+  end
+end
-- 
cgit v1.2.1