summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Cobb <rcobb@gitlab.com>2019-05-20 13:36:59 -0600
committerRyan Cobb <rcobb@gitlab.com>2019-05-20 13:36:59 -0600
commit9eeedfccbcbaa98265597a964cdf895f27fcf68b (patch)
tree0b9dd1e1ea736c938228de8ef8f8437d0a13ff9d
parent4063b7e811050c0870d782072664673e16eafdfe (diff)
downloadgitlab-ce-9eeedfccbcbaa98265597a964cdf895f27fcf68b.tar.gz
Adds ruby and unicorn instrumentation
This adds ruby and unicorn instrumentation. This was originally intended in 11.11 but due to performance concerns it was reverted. This new commit foregoes the sys-proctable gem was causing performance issues previously.
-rw-r--r--doc/administration/monitoring/prometheus/gitlab_metrics.md13
-rw-r--r--lib/gitlab/metrics/samplers/ruby_sampler.rb31
-rw-r--r--lib/gitlab/metrics/samplers/unicorn_sampler.rb32
-rw-r--r--lib/gitlab/metrics/system.rb26
-rw-r--r--spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb30
-rw-r--r--spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb25
-rw-r--r--spec/lib/gitlab/metrics/system_spec.rb24
7 files changed, 152 insertions, 29 deletions
diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md
index c243dd9edbb..48bd709a2b7 100644
--- a/doc/administration/monitoring/prometheus/gitlab_metrics.md
+++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md
@@ -43,10 +43,11 @@ The following metrics are available:
| redis_ping_latency_seconds | Gauge | 9.4 | Round trip time of the redis ping |
| user_session_logins_total | Counter | 9.4 | Counter of how many users have logged in |
| upload_file_does_not_exist | Counter | 10.7 in EE, 11.5 in CE | Number of times an upload record could not find its file |
-| failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login |
-| successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login |
-| unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) |
-| unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections |
+| failed_login_captcha_total | Gauge | 11.0 | Counter of failed CAPTCHA attempts during login |
+| successful_login_captcha_total | Gauge | 11.0 | Counter of successful CAPTCHA attempts during login |
+| unicorn_active_connections | Gauge | 11.0 | The number of active Unicorn connections (workers) |
+| unicorn_queued_connections | Gauge | 11.0 | The number of queued Unicorn connections |
+| unicorn_workers | Gauge | 11.11 | The number of Unicorn workers |
## Sidekiq Metrics available for Geo **[PREMIUM]**
@@ -100,6 +101,10 @@ Some basic Ruby runtime metrics are available:
| ruby_file_descriptors | Gauge | 11.1 | File descriptors per process |
| ruby_memory_bytes | Gauge | 11.1 | Memory usage by process |
| ruby_sampler_duration_seconds_total | Counter | 11.1 | Time spent collecting stats |
+| ruby_process_cpu_seconds_total | Gauge | 11.11 | Total amount of CPU time per process |
+| ruby_process_max_fds | Gauge | 11.11 | Maximum number of open file descriptors per process |
+| ruby_process_resident_memory_bytes | Gauge | 11.11 | Memory usage by process, measured in bytes |
+| ruby_process_start_time_seconds | Gauge | 11.11 | The elapsed time between system boot and the process started, measured in seconds |
[GC.stat]: https://ruby-doc.org/core-2.3.0/GC.html#method-c-stat
diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb
index 18a69321905..4d9c43f37e7 100644
--- a/lib/gitlab/metrics/samplers/ruby_sampler.rb
+++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb
@@ -23,25 +23,32 @@ module Gitlab
end
def init_metrics
- metrics = {}
- metrics[:sampler_duration] = ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels)
- metrics[:total_time] = ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels)
+ metrics = {
+ file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum),
+ memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum),
+ process_cpu_seconds_total: ::Gitlab::Metrics.gauge(with_prefix(:process, :cpu_seconds_total), 'Process CPU seconds total'),
+ process_max_fds: ::Gitlab::Metrics.gauge(with_prefix(:process, :max_fds), 'Process max fds'),
+ process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels, :livesum),
+ process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'),
+ sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels),
+ total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels)
+ }
+
GC.stat.keys.each do |key|
metrics[key] = ::Gitlab::Metrics.gauge(with_prefix(:gc_stat, key), to_doc_string(key), labels, :livesum)
end
- metrics[:memory_usage] = ::Gitlab::Metrics.gauge(with_prefix(:memory, :bytes), 'Memory used', labels, :livesum)
- metrics[:file_descriptors] = ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels, :livesum)
-
metrics
end
def sample
start_time = System.monotonic_time
- metrics[:memory_usage].set(labels.merge(worker_label), System.memory_usage)
metrics[:file_descriptors].set(labels.merge(worker_label), System.file_descriptor_count)
-
+ metrics[:process_cpu_seconds_total].set(labels.merge(worker_label), ::Gitlab::Metrics::System.cpu_time)
+ metrics[:process_max_fds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.max_open_file_descriptors)
+ metrics[:process_start_time_seconds].set(labels.merge(worker_label), ::Gitlab::Metrics::System.process_start_time)
+ set_memory_usage_metrics
sample_gc
metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time)
@@ -61,6 +68,14 @@ module Gitlab
metrics[:total_time].increment(labels, GC::Profiler.total_time)
end
+ def set_memory_usage_metrics
+ memory_usage = System.memory_usage
+ memory_labels = labels.merge(worker_label)
+
+ metrics[:memory_bytes].set(memory_labels, memory_usage)
+ metrics[:process_resident_memory_bytes].set(memory_labels, memory_usage)
+ end
+
def worker_label
return {} unless defined?(Unicorn::Worker)
diff --git a/lib/gitlab/metrics/samplers/unicorn_sampler.rb b/lib/gitlab/metrics/samplers/unicorn_sampler.rb
index bec64e864b3..1b7a4c79080 100644
--- a/lib/gitlab/metrics/samplers/unicorn_sampler.rb
+++ b/lib/gitlab/metrics/samplers/unicorn_sampler.rb
@@ -8,12 +8,16 @@ module Gitlab
super(interval)
end
- def unicorn_active_connections
- @unicorn_active_connections ||= ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max)
+ def metrics
+ @metrics ||= init_metrics
end
- def unicorn_queued_connections
- @unicorn_queued_connections ||= ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max)
+ def init_metrics
+ {
+ unicorn_active_connections: ::Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max),
+ unicorn_queued_connections: ::Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max),
+ unicorn_workers: ::Gitlab::Metrics.gauge(:unicorn_workers, 'Unicorn workers')
+ }
end
def enabled?
@@ -23,14 +27,13 @@ module Gitlab
def sample
Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
- unicorn_active_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.active)
- unicorn_queued_connections.set({ socket_type: 'tcp', socket_address: addr }, stats.queued)
+ set_unicorn_connection_metrics('tcp', addr, stats)
end
-
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
- unicorn_active_connections.set({ socket_type: 'unix', socket_address: addr }, stats.active)
- unicorn_queued_connections.set({ socket_type: 'unix', socket_address: addr }, stats.queued)
+ set_unicorn_connection_metrics('unix', addr, stats)
end
+
+ metrics[:unicorn_workers].set({}, unicorn_workers_count)
end
private
@@ -39,6 +42,13 @@ module Gitlab
@tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
end
+ def set_unicorn_connection_metrics(type, addr, stats)
+ labels = { socket_type: type, socket_address: addr }
+
+ metrics[:unicorn_active_connections].set(labels, stats.active)
+ metrics[:unicorn_queued_connections].set(labels, stats.queued)
+ end
+
def unix_listeners
@unix_listeners ||= Unicorn.listener_names - tcp_listeners
end
@@ -46,6 +56,10 @@ module Gitlab
def unicorn_with_listeners?
defined?(Unicorn) && Unicorn.listener_names.any?
end
+
+ def unicorn_workers_count
+ `pgrep -f '[u]nicorn_rails worker.+ #{Rails.root.to_s}'`.split.count
+ end
end
end
end
diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb
index 426496855e3..d3cbd200584 100644
--- a/lib/gitlab/metrics/system.rb
+++ b/lib/gitlab/metrics/system.rb
@@ -23,6 +23,20 @@ module Gitlab
def self.file_descriptor_count
Dir.glob('/proc/self/fd/*').length
end
+
+ def self.max_open_file_descriptors
+ match = File.read('/proc/self/limits').match(/Max open files\s*(\d+)/)
+
+ return unless match && match[1]
+
+ match[1].to_i
+ end
+
+ def self.process_start_time
+ fields = File.read('/proc/self/stat').split
+
+ ( fields[21].to_i || 0 ) / clk_tck
+ end
else
def self.memory_usage
0.0
@@ -31,6 +45,14 @@ module Gitlab
def self.file_descriptor_count
0
end
+
+ def self.max_open_file_descriptors
+ 0
+ end
+
+ def self.process_start_time
+ 0
+ end
end
# THREAD_CPUTIME is not supported on OS X
@@ -59,6 +81,10 @@ module Gitlab
def self.monotonic_time
Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second)
end
+
+ def self.clk_tck
+ @clk_tck ||= `genconf CLK_TCK`.to_i
+ end
end
end
end
diff --git a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb
index 7972ff253fe..aaf8c9fa2a0 100644
--- a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb
+++ b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb
@@ -10,17 +10,20 @@ describe Gitlab::Metrics::Samplers::RubySampler do
describe '#sample' do
it 'samples various statistics' do
- expect(Gitlab::Metrics::System).to receive(:memory_usage)
+ expect(Gitlab::Metrics::System).to receive(:cpu_time)
expect(Gitlab::Metrics::System).to receive(:file_descriptor_count)
+ expect(Gitlab::Metrics::System).to receive(:memory_usage)
+ expect(Gitlab::Metrics::System).to receive(:process_start_time)
+ expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors)
expect(sampler).to receive(:sample_gc)
sampler.sample
end
- it 'adds a metric containing the memory usage' do
+ it 'adds a metric containing the process resident memory bytes' do
expect(Gitlab::Metrics::System).to receive(:memory_usage).and_return(9000)
- expect(sampler.metrics[:memory_usage]).to receive(:set).with({}, 9000)
+ expect(sampler.metrics[:process_resident_memory_bytes]).to receive(:set).with({}, 9000)
sampler.sample
end
@@ -34,6 +37,27 @@ describe Gitlab::Metrics::Samplers::RubySampler do
sampler.sample
end
+ it 'adds a metric containing the process total cpu time' do
+ expect(Gitlab::Metrics::System).to receive(:cpu_time).and_return(0.51)
+ expect(sampler.metrics[:process_cpu_seconds_total]).to receive(:set).with({}, 0.51)
+
+ sampler.sample
+ end
+
+ it 'adds a metric containing the process start time' do
+ expect(Gitlab::Metrics::System).to receive(:process_start_time).and_return(12345)
+ expect(sampler.metrics[:process_start_time_seconds]).to receive(:set).with({}, 12345)
+
+ sampler.sample
+ end
+
+ it 'adds a metric containing the process max file descriptors' do
+ expect(Gitlab::Metrics::System).to receive(:max_open_file_descriptors).and_return(1024)
+ expect(sampler.metrics[:process_max_fds]).to receive(:set).with({}, 1024)
+
+ sampler.sample
+ end
+
it 'clears any GC profiles' do
expect(GC::Profiler).to receive(:clear)
diff --git a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb
index 4b03f3c2532..090e456644f 100644
--- a/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb
+++ b/spec/lib/gitlab/metrics/samplers/unicorn_sampler_spec.rb
@@ -39,8 +39,8 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do
it 'updates metrics type unix and with addr' do
labels = { socket_type: 'unix', socket_address: socket_address }
- expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active')
- expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued')
+ expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active')
+ expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued')
subject.sample
end
@@ -50,7 +50,6 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do
context 'unicorn listens on tcp sockets' do
let(:tcp_socket_address) { '0.0.0.0:8080' }
let(:tcp_sockets) { [tcp_socket_address] }
-
before do
allow(unicorn).to receive(:listener_names).and_return(tcp_sockets)
end
@@ -71,13 +70,29 @@ describe Gitlab::Metrics::Samplers::UnicornSampler do
it 'updates metrics type unix and with addr' do
labels = { socket_type: 'tcp', socket_address: tcp_socket_address }
- expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active')
- expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued')
+ expect(subject.metrics[:unicorn_active_connections]).to receive(:set).with(labels, 'active')
+ expect(subject.metrics[:unicorn_queued_connections]).to receive(:set).with(labels, 'queued')
subject.sample
end
end
end
+
+ context 'additional metrics' do
+ let(:unicorn_workers) { 2 }
+
+ before do
+ allow(unicorn).to receive(:listener_names).and_return([""])
+ allow(::Gitlab::Metrics::System).to receive(:cpu_time).and_return(3.14)
+ allow(subject).to receive(:unicorn_workers_count).and_return(unicorn_workers)
+ end
+
+ it "sets additional metrics" do
+ expect(subject.metrics[:unicorn_workers]).to receive(:set).with({}, unicorn_workers)
+
+ subject.sample
+ end
+ end
end
describe '#start' do
diff --git a/spec/lib/gitlab/metrics/system_spec.rb b/spec/lib/gitlab/metrics/system_spec.rb
index 14afcdf5daa..b0603d96eb2 100644
--- a/spec/lib/gitlab/metrics/system_spec.rb
+++ b/spec/lib/gitlab/metrics/system_spec.rb
@@ -13,6 +13,18 @@ describe Gitlab::Metrics::System do
expect(described_class.file_descriptor_count).to be > 0
end
end
+
+ describe '.max_open_file_descriptors' do
+ it 'returns the max allowed open file descriptors' do
+ expect(described_class.max_open_file_descriptors).to be > 0
+ end
+ end
+
+ describe '.process_start_time' do
+ it 'returns the process start time' do
+ expect(described_class.process_start_time).to be > 0
+ end
+ end
else
describe '.memory_usage' do
it 'returns 0.0' do
@@ -25,6 +37,18 @@ describe Gitlab::Metrics::System do
expect(described_class.file_descriptor_count).to eq(0)
end
end
+
+ describe '.max_open_file_descriptors' do
+ it 'returns 0' do
+ expect(described_class.max_open_file_descriptors).to eq(0)
+ end
+ end
+
+ describe 'process_start_time' do
+ it 'returns 0' do
+ expect(described_class.process_start_time).to eq(0)
+ end
+ end
end
describe '.cpu_time' do