diff options
Diffstat (limited to 'lib/gitlab/metrics')
-rw-r--r-- | lib/gitlab/metrics/boot_time_tracker.rb | 34 | ||||
-rw-r--r-- | lib/gitlab/metrics/exporter/base_exporter.rb | 8 | ||||
-rw-r--r-- | lib/gitlab/metrics/exporter/web_exporter.rb | 55 | ||||
-rw-r--r-- | lib/gitlab/metrics/rails_slis.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/metrics/requests_rack_middleware.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/metrics/samplers/ruby_sampler.rb | 36 | ||||
-rw-r--r-- | lib/gitlab/metrics/system.rb | 69 |
7 files changed, 115 insertions, 91 deletions
diff --git a/lib/gitlab/metrics/boot_time_tracker.rb b/lib/gitlab/metrics/boot_time_tracker.rb new file mode 100644 index 00000000000..3e7026b8dea --- /dev/null +++ b/lib/gitlab/metrics/boot_time_tracker.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Gitlab + module Metrics + class BootTimeTracker + include Singleton + + SUPPORTED_RUNTIMES = [:puma, :sidekiq, :console].freeze + + def startup_time + @startup_time || 0 + end + + def track_boot_time!(logger: Gitlab::AppJsonLogger) + return if @startup_time + + runtime = Gitlab::Runtime.safe_identify + return unless SUPPORTED_RUNTIMES.include?(runtime) + + @startup_time = Gitlab::Metrics::System.process_runtime_elapsed_seconds + + Gitlab::Metrics.gauge( + :gitlab_rails_boot_time_seconds, 'Time elapsed for Rails primary process to finish startup' + ).set({}, @startup_time) + + logger.info(message: 'Application boot finished', runtime: runtime.to_s, duration_s: @startup_time) + end + + def reset! + @startup_time = nil + end + end + end +end diff --git a/lib/gitlab/metrics/exporter/base_exporter.rb b/lib/gitlab/metrics/exporter/base_exporter.rb index 190d3d3fd2f..2aea8d655fa 100644 --- a/lib/gitlab/metrics/exporter/base_exporter.rb +++ b/lib/gitlab/metrics/exporter/base_exporter.rb @@ -9,8 +9,10 @@ module Gitlab class BaseExporter < Daemon attr_reader :server - attr_accessor :readiness_checks - + # @param settings [Hash] SettingsLogic hash containing the `*_exporter` config + # @param log_enabled [Boolean] whether to log HTTP requests + # @param log_file [String] path to where the server log should be located + # @param gc_requests [Boolean] whether to run a major GC after each scraper request def initialize(settings, log_enabled:, log_file:, gc_requests: false, **options) super(**options) @@ -85,7 +87,7 @@ module Gitlab end def readiness_probe - ::Gitlab::HealthChecks::Probes::Collection.new(*readiness_checks) + ::Gitlab::HealthChecks::Probes::Collection.new end def liveness_probe diff --git a/lib/gitlab/metrics/exporter/web_exporter.rb b/lib/gitlab/metrics/exporter/web_exporter.rb index c05ad8ccf42..9a471c33770 100644 --- a/lib/gitlab/metrics/exporter/web_exporter.rb +++ b/lib/gitlab/metrics/exporter/web_exporter.rb @@ -4,64 +4,9 @@ module Gitlab module Metrics module Exporter class WebExporter < BaseExporter - ExporterCheck = Struct.new(:exporter) do - def readiness - Gitlab::HealthChecks::Result.new( - 'web_exporter', exporter.running) - end - - def available? - true - end - end - - RailsMetricsInitializer = Struct.new(:app) do - def call(env) - Gitlab::Metrics::RailsSlis.initialize_request_slis_if_needed! - - app.call(env) - end - end - - attr_reader :running - # This exporter is always run on master process def initialize(**options) super(Settings.monitoring.web_exporter, log_enabled: true, log_file: 'web_exporter.log', **options) - - # DEPRECATED: - # these `readiness_checks` are deprecated - # as presenting no value in a way how we run - # application: https://gitlab.com/gitlab-org/gitlab/issues/35343 - self.readiness_checks = [ - WebExporter::ExporterCheck.new(self), - Gitlab::HealthChecks::PumaCheck - ] - end - - def mark_as_not_running! - @running = false - end - - private - - def rack_app - app = super - - Rack::Builder.app do - use RailsMetricsInitializer - run app - end - end - - def start_working - @running = true - super - end - - def stop_working - mark_as_not_running! - super end end end diff --git a/lib/gitlab/metrics/rails_slis.rb b/lib/gitlab/metrics/rails_slis.rb index 8c40c0ad441..c4f305dbdc4 100644 --- a/lib/gitlab/metrics/rails_slis.rb +++ b/lib/gitlab/metrics/rails_slis.rb @@ -4,7 +4,7 @@ module Gitlab module Metrics module RailsSlis class << self - def initialize_request_slis_if_needed! + def initialize_request_slis! Gitlab::Metrics::Sli.initialize_sli(:rails_request_apdex, possible_request_labels) unless Gitlab::Metrics::Sli.initialized?(:rails_request_apdex) Gitlab::Metrics::Sli.initialize_sli(:graphql_query_apdex, possible_graphql_query_labels) unless Gitlab::Metrics::Sli.initialized?(:graphql_query_apdex) end diff --git a/lib/gitlab/metrics/requests_rack_middleware.rb b/lib/gitlab/metrics/requests_rack_middleware.rb index c143a7f5a1b..d7fe983c553 100644 --- a/lib/gitlab/metrics/requests_rack_middleware.rb +++ b/lib/gitlab/metrics/requests_rack_middleware.rb @@ -62,6 +62,8 @@ module Gitlab http_requests_total.get({ method: method, status: status, feature_category: feature_category }) end end + + Gitlab::Metrics::RailsSlis.initialize_request_slis! end def call(env) diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb index d71ee671b8d..4a3ef3711a5 100644 --- a/lib/gitlab/metrics/samplers/ruby_sampler.rb +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -7,22 +7,20 @@ module Gitlab DEFAULT_SAMPLING_INTERVAL_SECONDS = 60 GC_REPORT_BUCKETS = [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 1].freeze - def initialize(...) + def initialize(prefix: nil, **options) + @prefix = prefix + GC::Profiler.clear metrics[:process_start_time_seconds].set(labels, Time.now.to_i) - super(...) + super(**options) end def metrics @metrics ||= init_metrics end - def with_prefix(prefix, name) - "ruby_#{prefix}_#{name}".to_sym - end - def to_doc_string(name) name.to_s.humanize end @@ -33,19 +31,19 @@ module Gitlab def init_metrics metrics = { - file_descriptors: ::Gitlab::Metrics.gauge(with_prefix(:file, :descriptors), 'File descriptors used', labels), - process_cpu_seconds_total: ::Gitlab::Metrics.gauge(with_prefix(:process, :cpu_seconds_total), 'Process CPU seconds total'), - process_max_fds: ::Gitlab::Metrics.gauge(with_prefix(:process, :max_fds), 'Process max fds'), - process_resident_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :resident_memory_bytes), 'Memory used (RSS)', labels), - process_unique_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :unique_memory_bytes), 'Memory used (USS)', labels), - process_proportional_memory_bytes: ::Gitlab::Metrics.gauge(with_prefix(:process, :proportional_memory_bytes), 'Memory used (PSS)', labels), - process_start_time_seconds: ::Gitlab::Metrics.gauge(with_prefix(:process, :start_time_seconds), 'Process start time seconds'), - sampler_duration: ::Gitlab::Metrics.counter(with_prefix(:sampler, :duration_seconds_total), 'Sampler time', labels), - gc_duration_seconds: ::Gitlab::Metrics.histogram(with_prefix(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS) + file_descriptors: ::Gitlab::Metrics.gauge(metric_name(:file, :descriptors), 'File descriptors used', labels), + process_cpu_seconds_total: ::Gitlab::Metrics.gauge(metric_name(:process, :cpu_seconds_total), 'Process CPU seconds total'), + process_max_fds: ::Gitlab::Metrics.gauge(metric_name(:process, :max_fds), 'Process max fds'), + process_resident_memory_bytes: ::Gitlab::Metrics.gauge(metric_name(:process, :resident_memory_bytes), 'Memory used (RSS)', labels), + process_unique_memory_bytes: ::Gitlab::Metrics.gauge(metric_name(:process, :unique_memory_bytes), 'Memory used (USS)', labels), + process_proportional_memory_bytes: ::Gitlab::Metrics.gauge(metric_name(:process, :proportional_memory_bytes), 'Memory used (PSS)', labels), + process_start_time_seconds: ::Gitlab::Metrics.gauge(metric_name(:process, :start_time_seconds), 'Process start time seconds'), + sampler_duration: ::Gitlab::Metrics.counter(metric_name(:sampler, :duration_seconds_total), 'Sampler time', labels), + gc_duration_seconds: ::Gitlab::Metrics.histogram(metric_name(:gc, :duration_seconds), 'GC time', labels, GC_REPORT_BUCKETS) } GC.stat.keys.each do |key| - metrics[key] = ::Gitlab::Metrics.gauge(with_prefix(:gc_stat, key), to_doc_string(key), labels) + metrics[key] = ::Gitlab::Metrics.gauge(metric_name(:gc_stat, key), to_doc_string(key), labels) end metrics @@ -65,6 +63,12 @@ module Gitlab private + def metric_name(group, metric) + name = "ruby_#{group}_#{metric}" + name = "#{@prefix}_#{name}" if @prefix.present? + name.to_sym + end + def sample_gc # Observe all GC samples sample_gc_reports.each do |report| diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb index 9bbcd1e056c..e646846face 100644 --- a/lib/gitlab/metrics/system.rb +++ b/lib/gitlab/metrics/system.rb @@ -7,6 +7,9 @@ module Gitlab # This module relies on the /proc filesystem being available. If /proc is # not available the methods of this module will be stubbed. module System + extend self + + PROC_STAT_PATH = '/proc/self/stat' PROC_STATUS_PATH = '/proc/self/status' PROC_SMAPS_ROLLUP_PATH = '/proc/self/smaps_rollup' PROC_LIMITS_PATH = '/proc/self/limits' @@ -17,7 +20,7 @@ module Gitlab RSS_PATTERN = /VmRSS:\s+(?<value>\d+)/.freeze MAX_OPEN_FILES_PATTERN = /Max open files\s*(?<value>\d+)/.freeze - def self.summary + def summary proportional_mem = memory_usage_uss_pss { version: RUBY_DESCRIPTION, @@ -32,43 +35,43 @@ module Gitlab end # Returns the current process' RSS (resident set size) in bytes. - def self.memory_usage_rss + def memory_usage_rss sum_matches(PROC_STATUS_PATH, rss: RSS_PATTERN)[:rss].kilobytes end # Returns the current process' USS/PSS (unique/proportional set size) in bytes. - def self.memory_usage_uss_pss + def memory_usage_uss_pss sum_matches(PROC_SMAPS_ROLLUP_PATH, uss: PRIVATE_PAGES_PATTERN, pss: PSS_PATTERN) .transform_values(&:kilobytes) end - def self.file_descriptor_count + def file_descriptor_count Dir.glob(PROC_FD_GLOB).length end - def self.max_open_file_descriptors + def max_open_file_descriptors sum_matches(PROC_LIMITS_PATH, max_fds: MAX_OPEN_FILES_PATTERN)[:max_fds] end - def self.cpu_time + def cpu_time Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :float_second) end # Returns the current real time in a given precision. # # Returns the time as a Float for precision = :float_second. - def self.real_time(precision = :float_second) + def real_time(precision = :float_second) Process.clock_gettime(Process::CLOCK_REALTIME, precision) end # Returns the current monotonic clock time as seconds with microseconds precision. # # Returns the time as a Float. - def self.monotonic_time + def monotonic_time Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second) end - def self.thread_cpu_time + def thread_cpu_time # Not all OS kernels are supporting `Process::CLOCK_THREAD_CPUTIME_ID` # Refer: https://gitlab.com/gitlab-org/gitlab/issues/30567#note_221765627 return unless defined?(Process::CLOCK_THREAD_CPUTIME_ID) @@ -76,33 +79,67 @@ module Gitlab Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :float_second) end - def self.thread_cpu_duration(start_time) + def thread_cpu_duration(start_time) end_time = thread_cpu_time return unless start_time && end_time end_time - start_time end + # Returns the total time the current process has been running in seconds. + def process_runtime_elapsed_seconds + # Entry 22 (1-indexed) contains the process `starttime`, see: + # https://man7.org/linux/man-pages/man5/proc.5.html + # + # This value is a fixed timestamp in clock ticks. + # To obtain an elapsed time in seconds, we divide by the number + # of ticks per second and subtract from the system uptime. + start_time_ticks = proc_stat_entries[21].to_f + clock_ticks_per_second = Etc.sysconf(Etc::SC_CLK_TCK) + uptime - (start_time_ticks / clock_ticks_per_second) + end + + private + # Given a path to a file in /proc and a hash of (metric, pattern) pairs, # sums up all values found for those patterns under the respective metric. - def self.sum_matches(proc_file, **patterns) + def sum_matches(proc_file, **patterns) results = patterns.transform_values { 0 } - begin - File.foreach(proc_file) do |line| + safe_yield_procfile(proc_file) do |io| + io.each_line do |line| patterns.each do |metric, pattern| match = line.match(pattern) value = match&.named_captures&.fetch('value', 0) results[metric] += value.to_i end end - rescue Errno::ENOENT - # This means the procfile we're reading from did not exist; - # this is safe to ignore, since we initialize each metric to 0 end results end + + def proc_stat_entries + safe_yield_procfile(PROC_STAT_PATH) do |io| + io.read.split(' ') + end || [] + end + + def safe_yield_procfile(path, &block) + File.open(path, &block) + rescue Errno::ENOENT + # This means the procfile we're reading from did not exist; + # most likely we're on Darwin. + end + + # Equivalent to reading /proc/uptime on Linux 2.6+. + # + # Returns 0 if not supported, e.g. on Darwin. + def uptime + Process.clock_gettime(Process::CLOCK_BOOTTIME) + rescue NameError + 0 + end end end end |