summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksei Lipniagov <alipniagov@gitlab.com>2019-08-02 09:04:32 +0000
committerKamil TrzciƄski <ayufan@ayufan.eu>2019-08-02 09:04:32 +0000
commit1f9edb7c4a393a6ebe78e6f98e46515ad655cece (patch)
treea3eb7ca80f5a9d51142a91fc44f4f244f23a301f
parentebdd3a233e1e5dd252eef6c4ba5f1da311fe68e2 (diff)
downloadgitlab-ce-1f9edb7c4a393a6ebe78e6f98e46515ad655cece.tar.gz
Call `GC::Profiler.clear` only in one place
Previously, both InfluxSampler and RubySampler were relying on the `GC::Profiler.total_time` data which is the sum over the list of captured GC events. Also, both samplers asynchronously called `GC::Profiler.clear` which led to incorrect metric data because each sampler has the wrong assumption it is the only object who calls `GC::Profiler.clear` and thus could rely on the gathered results between such calls. We should ensure that `GC::Profiler.total_time` is called only in one place making it possible to rely on accumulated data between such wipes. Also, we need to track the amount of profiler reports we lost.
-rw-r--r--changelogs/unreleased/63571-fix-gc-profiler-data-being-wiped.yml5
-rw-r--r--config/initializers/peek.rb7
-rw-r--r--lib/gitlab/metrics/samplers/influx_sampler.rb22
-rw-r--r--lib/gitlab/metrics/samplers/ruby_sampler.rb23
-rw-r--r--spec/lib/gitlab/metrics/samplers/influx_sampler_spec.rb20
-rw-r--r--spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb20
6 files changed, 44 insertions, 53 deletions
diff --git a/changelogs/unreleased/63571-fix-gc-profiler-data-being-wiped.yml b/changelogs/unreleased/63571-fix-gc-profiler-data-being-wiped.yml
new file mode 100644
index 00000000000..7943d9573f3
--- /dev/null
+++ b/changelogs/unreleased/63571-fix-gc-profiler-data-being-wiped.yml
@@ -0,0 +1,5 @@
+---
+title: Fix GC::Profiler metrics fetching
+merge_request: 31331
+author:
+type: fixed
diff --git a/config/initializers/peek.rb b/config/initializers/peek.rb
index 4a2b7931f30..b6c7f1ff4fc 100644
--- a/config/initializers/peek.rb
+++ b/config/initializers/peek.rb
@@ -9,5 +9,10 @@ Peek.into Peek::Views::ActiveRecord
Peek.into Peek::Views::Gitaly
Peek.into Peek::Views::RedisDetailed
Peek.into Peek::Views::Rugged
-Peek.into Peek::Views::GC
+
+# `Peek::Views::GC` is currently disabled in production, as it runs with every request
+# even if PerformanceBar is inactive and clears `GC::Profiler` reports we need for metrics.
+# Check https://gitlab.com/gitlab-org/gitlab-ce/issues/65455
+Peek.into Peek::Views::GC if Rails.env.development?
+
Peek.into Peek::Views::Tracing if Labkit::Tracing.tracing_url_enabled?
diff --git a/lib/gitlab/metrics/samplers/influx_sampler.rb b/lib/gitlab/metrics/samplers/influx_sampler.rb
index 5138b37f83e..1eae0a7bf45 100644
--- a/lib/gitlab/metrics/samplers/influx_sampler.rb
+++ b/lib/gitlab/metrics/samplers/influx_sampler.rb
@@ -15,19 +15,14 @@ module Gitlab
@last_step = nil
@metrics = []
-
- @last_minor_gc = Delta.new(GC.stat[:minor_gc_count])
- @last_major_gc = Delta.new(GC.stat[:major_gc_count])
end
def sample
sample_memory_usage
sample_file_descriptors
- sample_gc
flush
ensure
- GC::Profiler.clear
@metrics.clear
end
@@ -43,23 +38,6 @@ module Gitlab
add_metric('file_descriptors', value: System.file_descriptor_count)
end
- def sample_gc
- time = GC::Profiler.total_time * 1000.0
- stats = GC.stat.merge(total_time: time)
-
- # We want the difference of GC runs compared to the last sample, not the
- # total amount since the process started.
- stats[:minor_gc_count] =
- @last_minor_gc.compared_with(stats[:minor_gc_count])
-
- stats[:major_gc_count] =
- @last_major_gc.compared_with(stats[:major_gc_count])
-
- stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count]
-
- add_metric('gc_statistics', stats)
- end
-
def add_metric(series, values, tags = {})
prefix = sidekiq? ? 'sidekiq_' : 'rails_'
diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb
index 1e200db0baf..3bfa3da35e0 100644
--- a/lib/gitlab/metrics/samplers/ruby_sampler.rb
+++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb
@@ -6,7 +6,11 @@ module Gitlab
module Metrics
module Samplers
class RubySampler < BaseSampler
+ GC_REPORT_BUCKETS = [0.001, 0.002, 0.005, 0.01, 0.05, 0.1, 0.5].freeze
+
def initialize(interval)
+ GC::Profiler.clear
+
metrics[:process_start_time_seconds].set(labels, Time.now.to_i)
super
@@ -37,7 +41,7 @@ module Gitlab
process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used', labels),
process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'),
sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels),
- total_time: ::Gitlab::Metrics.counter(with_prefix(:gc, :duration_seconds_total), 'Total GC time', labels)
+ gc_duration_seconds: ::Gitlab::Metrics.histogram(with_prefix(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS)
}
GC.stat.keys.each do |key|
@@ -57,20 +61,27 @@ module Gitlab
sample_gc
metrics[:sampler_duration].increment(labels, System.monotonic_time - start_time)
- ensure
- GC::Profiler.clear
end
private
def sample_gc
- # Collect generic GC stats.
+ # Observe all GC samples
+ sample_gc_reports.each do |report|
+ metrics[:gc_duration_seconds].observe(labels, report[:GC_TIME])
+ end
+
+ # Collect generic GC stats
GC.stat.each do |key, value|
metrics[key].set(labels, value)
end
+ end
- # Collect the GC time since last sample in float seconds.
- metrics[:total_time].increment(labels, GC::Profiler.total_time)
+ def sample_gc_reports
+ GC::Profiler.enable
+ GC::Profiler.raw_data
+ ensure
+ GC::Profiler.clear
end
def set_memory_usage_metrics
diff --git a/spec/lib/gitlab/metrics/samplers/influx_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/influx_sampler_spec.rb
index 81954fcf8c5..2923048f742 100644
--- a/spec/lib/gitlab/metrics/samplers/influx_sampler_spec.rb
+++ b/spec/lib/gitlab/metrics/samplers/influx_sampler_spec.rb
@@ -17,18 +17,10 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do
it 'samples various statistics' do
expect(sampler).to receive(:sample_memory_usage)
expect(sampler).to receive(:sample_file_descriptors)
- expect(sampler).to receive(:sample_gc)
expect(sampler).to receive(:flush)
sampler.sample
end
-
- it 'clears any GC profiles' do
- expect(sampler).to receive(:flush)
- expect(GC::Profiler).to receive(:clear)
-
- sampler.sample
- end
end
describe '#flush' do
@@ -67,18 +59,6 @@ describe Gitlab::Metrics::Samplers::InfluxSampler do
end
end
- describe '#sample_gc' do
- it 'adds a metric containing garbage collection statistics' do
- expect(GC::Profiler).to receive(:total_time).and_return(0.24)
-
- expect(sampler).to receive(:add_metric)
- .with(/gc_statistics/, an_instance_of(Hash))
- .and_call_original
-
- sampler.sample_gc
- end
- end
-
describe '#add_metric' do
it 'prefixes the series name for a Rails process' do
expect(sampler).to receive(:sidekiq?).and_return(false)
diff --git a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb
index 4d93b70e6e3..5005a5d9ebc 100644
--- a/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb
+++ b/spec/lib/gitlab/metrics/samplers/ruby_sampler_spec.rb
@@ -59,17 +59,29 @@ describe Gitlab::Metrics::Samplers::RubySampler do
end
it 'clears any GC profiles' do
- expect(GC::Profiler).to receive(:clear)
+ expect(GC::Profiler).to receive(:clear).at_least(:once)
sampler.sample
end
end
describe '#sample_gc' do
- it 'adds a metric containing garbage collection time statistics' do
- expect(GC::Profiler).to receive(:total_time).and_return(0.24)
+ let!(:sampler) { described_class.new(5) }
- expect(sampler.metrics[:total_time]).to receive(:increment).with({}, 0.24)
+ let(:gc_reports) { [{ GC_TIME: 0.1 }, { GC_TIME: 0.2 }, { GC_TIME: 0.3 }] }
+
+ it 're-enables GC::Profiler if needed' do
+ expect(GC::Profiler).to receive(:enable)
+
+ sampler.sample
+ end
+
+ it 'observes GC cycles time' do
+ expect(sampler).to receive(:sample_gc_reports).and_return(gc_reports)
+
+ expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.1).ordered
+ expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.2).ordered
+ expect(sampler.metrics[:gc_duration_seconds]).to receive(:observe).with({}, 0.3).ordered
sampler.sample
end