1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
# frozen_string_literal: true
# trigger stackprof by sending a SIGUSR2 signal
#
# Docs: https://docs.gitlab.com/ee/development/performance.html#production
module Gitlab
class StackProf
DEFAULT_FILE_PREFIX = Dir.tmpdir
DEFAULT_TIMEOUT_SEC = 30
DEFAULT_MODE = :cpu
# Sample interval as a frequency in microseconds (~100hz); appropriate for CPU profiles
DEFAULT_INTERVAL_US = 10_000
# Sample interval in event occurrences (n = every nth event); appropriate for allocation profiles
DEFAULT_INTERVAL_EVENTS = 1_000
# this is a workaround for sidekiq, which defines its own SIGUSR2 handler.
# by defering to the sidekiq startup event, we get to set up our own
# handler late enough.
# see also: https://github.com/mperham/sidekiq/pull/4653
def self.install
require 'stackprof'
require 'tmpdir'
if Gitlab::Runtime.sidekiq?
Sidekiq.configure_server do |config|
config.on :startup do
on_worker_start
end
end
else
Gitlab::Cluster::LifecycleEvents.on_worker_start do
on_worker_start
end
end
end
def self.on_worker_start
log_event('listening for SIGUSR2 signal')
# create a pipe in order to propagate signal out of the signal handler
# see also: https://cr.yp.to/docs/selfpipe.html
read, write = IO.pipe
# create a separate thread that polls for signals on the pipe.
#
# this way we do not execute in signal handler context, which
# lifts restrictions and also serializes the calls in a thread-safe
# manner.
#
# it's very similar to a goroutine and channel design.
#
# another nice benefit of this method is that we can timeout the
# IO.select call, allowing the profile to automatically stop after
# a given interval (by default 30 seconds), avoiding unbounded memory
# growth from a profile that was started and never stopped.
t = Thread.new do
timeout_s = ENV['STACKPROF_TIMEOUT_S']&.to_i || DEFAULT_TIMEOUT_SEC
current_timeout_s = nil
loop do
read.getbyte if IO.select([read], nil, nil, current_timeout_s)
if ::StackProf.running?
stackprof_file_prefix = ENV['STACKPROF_FILE_PREFIX'] || DEFAULT_FILE_PREFIX
stackprof_out_file = "#{stackprof_file_prefix}/stackprof.#{Process.pid}.#{SecureRandom.hex(6)}.profile"
log_event(
'stopping profile',
profile_filename: stackprof_out_file,
profile_timeout_s: timeout_s
)
::StackProf.stop
::StackProf.results(stackprof_out_file)
current_timeout_s = nil
else
mode = ENV['STACKPROF_MODE']&.to_sym || DEFAULT_MODE
interval = ENV['STACKPROF_INTERVAL']&.to_i
interval ||= (mode == :object ? DEFAULT_INTERVAL_EVENTS : DEFAULT_INTERVAL_US)
log_event(
'starting profile',
profile_mode: mode,
profile_interval: interval,
profile_timeout: timeout_s
)
::StackProf.start(
mode: mode,
raw: Gitlab::Utils.to_boolean(ENV['STACKPROF_RAW'] || 'true'),
interval: interval
)
current_timeout_s = timeout_s
end
end
rescue => e
log_event("stackprof failed: #{e}")
end
t.abort_on_exception = true
# in the case of puma, this will override the existing SIGUSR2 signal handler
# that can be used to trigger a restart.
#
# puma cluster has two types of restarts:
# * SIGUSR1: phased restart
# * SIGUSR2: restart
#
# phased restart is not supported in our configuration, because we use
# preload_app. this means we will always perform a normal restart.
# additionally, phased restart is not supported when sending a SIGUSR2
# directly to a puma worker (as opposed to the master process).
#
# the result is that the behaviour of SIGUSR1 and SIGUSR2 is identical in
# our configuration, and we can always use a SIGUSR1 to perform a restart.
#
# thus, it is acceptable for us to re-appropriate the SIGUSR2 signal, and
# override the puma behaviour.
#
# see also:
# * https://github.com/puma/puma/blob/master/docs/signals.md#puma-signals
# * https://github.com/phusion/unicorn/blob/master/SIGNALS
# * https://github.com/mperham/sidekiq/wiki/Signals
Signal.trap('SIGUSR2') do
write.write('.')
end
end
def self.log_event(event, labels = {})
Gitlab::AppJsonLogger.info({
event: 'stackprof',
message: event,
pid: Process.pid
}.merge(labels.compact))
end
end
end
if Gitlab::Utils.to_boolean(ENV['STACKPROF_ENABLED'].to_s)
Gitlab::StackProf.install
end
|