diff options
author | Igor Drozdov <idrozdov@gitlab.com> | 2022-04-07 22:19:09 +0400 |
---|---|---|
committer | Igor Drozdov <idrozdov@gitlab.com> | 2022-04-13 08:57:35 +0400 |
commit | cc353a57f3aa8f3f751c175ea596d2baca0b1f19 (patch) | |
tree | bc89cbdc9ec3ca400305975e83363669fe639e69 | |
parent | 9abd6d79043f90057ff9a7bf9deecb868e83c46d (diff) | |
download | gitlab-shell-cc353a57f3aa8f3f751c175ea596d2baca0b1f19.tar.gz |
Add additional metrics to gitlab-sshd
- Observe time to establish a session
- Log the duration of the successfully established connection
- Observe total time to handle the connection
- Log the duration of the successfully executed connection
- Observe the count of ssh connections
- Observe the count of failed ssh connections
-rw-r--r-- | internal/config/config_test.go | 7 | ||||
-rw-r--r-- | internal/metrics/metrics.go | 56 | ||||
-rw-r--r-- | internal/sshd/connection.go | 8 | ||||
-rw-r--r-- | internal/sshd/session.go | 3 | ||||
-rw-r--r-- | internal/sshd/session_test.go | 4 | ||||
-rw-r--r-- | internal/sshd/sshd.go | 26 |
6 files changed, 78 insertions, 26 deletions
diff --git a/internal/config/config_test.go b/internal/config/config_test.go index e422c4e..a929106 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -39,7 +39,7 @@ func TestCustomPrometheusMetrics(t *testing.T) { require.NoError(t, err) var actualNames []string - for _, m := range ms[0:6] { + for _, m := range ms[0:9] { actualNames = append(actualNames, m.GetName()) } @@ -48,8 +48,11 @@ func TestCustomPrometheusMetrics(t *testing.T) { "gitlab_shell_http_request_duration_seconds", "gitlab_shell_http_requests_total", "gitlab_shell_sshd_concurrent_limited_sessions_total", - "gitlab_shell_sshd_connection_duration_seconds", "gitlab_shell_sshd_in_flight_connections", + "gitlab_shell_sshd_session_duration_seconds", + "gitlab_shell_sshd_session_established_duration_seconds", + "gitlab_sli:shell_sshd_sessions:errors_total", + "gitlab_sli:shell_sshd_sessions:total", } require.Equal(t, expectedMetricNames, actualNames) diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index b328445..5fa5036 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -18,30 +18,42 @@ const ( httpRequestsTotalMetricName = "requests_total" httpRequestDurationSecondsMetricName = "request_duration_seconds" - sshdConnectionsInFlightName = "in_flight_connections" - sshdConnectionDuration = "connection_duration_seconds" - sshdHitMaxSessions = "concurrent_limited_sessions_total" + sshdConnectionsInFlightName = "in_flight_connections" + sshdHitMaxSessionsName = "concurrent_limited_sessions_total" + sshdSessionDurationSecondsName = "session_duration_seconds" + sshdSessionEstablishedDurationSecondsName = "session_established_duration_seconds" + + sliSshdSessionsTotalName = "gitlab_sli:shell_sshd_sessions:total" + sliSshdSessionsErrorsTotalName = "gitlab_sli:shell_sshd_sessions:errors_total" gitalyConnectionsTotalName = "connections_total" ) var ( - SshdConnectionDuration = promauto.NewHistogram( + SshdSessionDuration = promauto.NewHistogram( prometheus.HistogramOpts{ Namespace: namespace, Subsystem: sshdSubsystem, - Name: sshdConnectionDuration, + Name: sshdSessionDurationSecondsName, Help: "A histogram of latencies for connections to gitlab-shell sshd.", Buckets: []float64{ - 0.005, /* 5ms */ - 0.025, /* 25ms */ - 0.1, /* 100ms */ - 0.5, /* 500ms */ - 1.0, /* 1s */ - 10.0, /* 10s */ - 30.0, /* 30s */ - 60.0, /* 1m */ - 300.0, /* 5m */ + 5.0, /* 5s */ + 30.0, /* 30s */ + 60.0, /* 1m */ + }, + }, + ) + + SshdSessionEstablishedDuration = promauto.NewHistogram( + prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: sshdSubsystem, + Name: sshdSessionEstablishedDurationSecondsName, + Help: "A histogram of latencies until session established to gitlab-shell sshd.", + Buckets: []float64{ + 0.5, /* 5ms */ + 1.0, /* 1s */ + 5.0, /* 5s */ }, }, ) @@ -59,11 +71,25 @@ var ( prometheus.CounterOpts{ Namespace: namespace, Subsystem: sshdSubsystem, - Name: sshdHitMaxSessions, + Name: sshdHitMaxSessionsName, Help: "The number of times the concurrent sessions limit was hit in gitlab-shell sshd.", }, ) + SliSshdSessionsTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: sliSshdSessionsTotalName, + Help: "Number of SSH sessions that have been established", + }, + ) + + SliSshdSessionsErrorsTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: sliSshdSessionsErrorsTotalName, + Help: "Number of SSH sessions that have failed", + }, + ) + GitalyConnectionsTotal = promauto.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, diff --git a/internal/sshd/connection.go b/internal/sshd/connection.go index 25b082a..1312833 100644 --- a/internal/sshd/connection.go +++ b/internal/sshd/connection.go @@ -2,7 +2,6 @@ package sshd import ( "context" - "time" "golang.org/x/crypto/ssh" "golang.org/x/sync/semaphore" @@ -29,13 +28,6 @@ func newConnection(maxSessions int64, remoteAddr string) *connection { func (c *connection) handle(ctx context.Context, chans <-chan ssh.NewChannel, handler channelHandler) { ctxlog := log.WithContextFields(ctx, log.Fields{"remote_addr": c.remoteAddr}) - metrics.SshdConnectionsInFlight.Inc() - - defer func(started time.Time) { - metrics.SshdConnectionsInFlight.Dec() - metrics.SshdConnectionDuration.Observe(time.Since(started).Seconds()) - }(time.Now()) - for newChannel := range chans { ctxlog.WithField("channel_type", newChannel.ChannelType()).Info("connection: handle: new channel requested") if newChannel.ChannelType() != "session" { diff --git a/internal/sshd/session.go b/internal/sshd/session.go index ff8540b..beb529e 100644 --- a/internal/sshd/session.go +++ b/internal/sshd/session.go @@ -22,6 +22,7 @@ type session struct { channel ssh.Channel gitlabKeyId string remoteAddr string + success bool // State managed by the session execCmd string @@ -182,6 +183,8 @@ func (s *session) exit(ctx context.Context, status uint32) { log.WithContextFields(ctx, log.Fields{"exit_status": status}).Info("session: exit: exiting") req := exitStatusReq{ExitStatus: status} + s.success = status == 0 + s.channel.CloseWrite() s.channel.SendRequest("exit-status", false, ssh.Marshal(req)) } diff --git a/internal/sshd/session_test.go b/internal/sshd/session_test.go index f135825..d0cc8d4 100644 --- a/internal/sshd/session_test.go +++ b/internal/sshd/session_test.go @@ -99,6 +99,7 @@ func TestHandleExec(t *testing.T) { expectedExecCmd string sentRequestName string sentRequestPayload []byte + success bool }{ { desc: "invalid payload", @@ -111,6 +112,7 @@ func TestHandleExec(t *testing.T) { expectedExecCmd: "discover", sentRequestName: "exit-status", sentRequestPayload: ssh.Marshal(exitStatusReq{ExitStatus: 0}), + success: true, }, } @@ -130,6 +132,7 @@ func TestHandleExec(t *testing.T) { require.Equal(t, false, s.handleExec(context.Background(), r)) require.Equal(t, tc.sentRequestName, f.sentRequestName) require.Equal(t, tc.sentRequestPayload, f.sentRequestPayload) + require.Equal(t, tc.success, s.success) }) } } @@ -141,6 +144,7 @@ func TestHandleShell(t *testing.T) { errMsg string gitlabKeyId string expectedExitCode uint32 + success bool }{ { desc: "fails to parse command", diff --git a/internal/sshd/sshd.go b/internal/sshd/sshd.go index b661233..8097e9b 100644 --- a/internal/sshd/sshd.go +++ b/internal/sshd/sshd.go @@ -12,6 +12,7 @@ import ( "golang.org/x/crypto/ssh" "gitlab.com/gitlab-org/gitlab-shell/internal/config" + "gitlab.com/gitlab-org/gitlab-shell/internal/metrics" "gitlab.com/gitlab-org/labkit/correlation" "gitlab.com/gitlab-org/labkit/log" @@ -145,6 +146,20 @@ func (s *Server) getStatus() status { } func (s *Server) handleConn(ctx context.Context, nconn net.Conn) { + success := false + + metrics.SshdConnectionsInFlight.Inc() + started := time.Now() + defer func() { + metrics.SshdConnectionsInFlight.Dec() + metrics.SshdSessionDuration.Observe(time.Since(started).Seconds()) + + metrics.SliSshdSessionsTotal.Inc() + if !success { + metrics.SliSshdSessionsErrorsTotal.Inc() + } + }() + remoteAddr := nconn.RemoteAddr().String() defer s.wg.Done() @@ -172,8 +187,12 @@ func (s *Server) handleConn(ctx context.Context, nconn net.Conn) { go ssh.DiscardRequests(reqs) + var establishSessionDuration float64 conn := newConnection(s.Config.Server.ConcurrentSessionsLimit, remoteAddr) conn.handle(ctx, chans, func(ctx context.Context, channel ssh.Channel, requests <-chan *ssh.Request) { + establishSessionDuration = time.Since(started).Seconds() + metrics.SshdSessionEstablishedDuration.Observe(establishSessionDuration) + session := &session{ cfg: s.Config, channel: channel, @@ -182,9 +201,14 @@ func (s *Server) handleConn(ctx context.Context, nconn net.Conn) { } session.handle(ctx, requests) + + success = session.success }) - ctxlog.Info("server: handleConn: done") + ctxlog.WithFields(log.Fields{ + "duration_s": time.Since(started).Seconds(), + "establish_session_duration_s": establishSessionDuration, + }).Info("server: handleConn: done") } func (s *Server) initSSHConnection(ctx context.Context, nconn net.Conn) (sconn *ssh.ServerConn, chans <-chan ssh.NewChannel, reqs <-chan *ssh.Request, err error) { |