summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIgor Drozdov <idrozdov@gitlab.com>2022-04-07 22:19:09 +0400
committerIgor Drozdov <idrozdov@gitlab.com>2022-04-13 08:57:35 +0400
commitcc353a57f3aa8f3f751c175ea596d2baca0b1f19 (patch)
treebc89cbdc9ec3ca400305975e83363669fe639e69
parent9abd6d79043f90057ff9a7bf9deecb868e83c46d (diff)
downloadgitlab-shell-cc353a57f3aa8f3f751c175ea596d2baca0b1f19.tar.gz
Add additional metrics to gitlab-sshd
- Observe time to establish a session - Log the duration of the successfully established connection - Observe total time to handle the connection - Log the duration of the successfully executed connection - Observe the count of ssh connections - Observe the count of failed ssh connections
-rw-r--r--internal/config/config_test.go7
-rw-r--r--internal/metrics/metrics.go56
-rw-r--r--internal/sshd/connection.go8
-rw-r--r--internal/sshd/session.go3
-rw-r--r--internal/sshd/session_test.go4
-rw-r--r--internal/sshd/sshd.go26
6 files changed, 78 insertions, 26 deletions
diff --git a/internal/config/config_test.go b/internal/config/config_test.go
index e422c4e..a929106 100644
--- a/internal/config/config_test.go
+++ b/internal/config/config_test.go
@@ -39,7 +39,7 @@ func TestCustomPrometheusMetrics(t *testing.T) {
require.NoError(t, err)
var actualNames []string
- for _, m := range ms[0:6] {
+ for _, m := range ms[0:9] {
actualNames = append(actualNames, m.GetName())
}
@@ -48,8 +48,11 @@ func TestCustomPrometheusMetrics(t *testing.T) {
"gitlab_shell_http_request_duration_seconds",
"gitlab_shell_http_requests_total",
"gitlab_shell_sshd_concurrent_limited_sessions_total",
- "gitlab_shell_sshd_connection_duration_seconds",
"gitlab_shell_sshd_in_flight_connections",
+ "gitlab_shell_sshd_session_duration_seconds",
+ "gitlab_shell_sshd_session_established_duration_seconds",
+ "gitlab_sli:shell_sshd_sessions:errors_total",
+ "gitlab_sli:shell_sshd_sessions:total",
}
require.Equal(t, expectedMetricNames, actualNames)
diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go
index b328445..5fa5036 100644
--- a/internal/metrics/metrics.go
+++ b/internal/metrics/metrics.go
@@ -18,30 +18,42 @@ const (
httpRequestsTotalMetricName = "requests_total"
httpRequestDurationSecondsMetricName = "request_duration_seconds"
- sshdConnectionsInFlightName = "in_flight_connections"
- sshdConnectionDuration = "connection_duration_seconds"
- sshdHitMaxSessions = "concurrent_limited_sessions_total"
+ sshdConnectionsInFlightName = "in_flight_connections"
+ sshdHitMaxSessionsName = "concurrent_limited_sessions_total"
+ sshdSessionDurationSecondsName = "session_duration_seconds"
+ sshdSessionEstablishedDurationSecondsName = "session_established_duration_seconds"
+
+ sliSshdSessionsTotalName = "gitlab_sli:shell_sshd_sessions:total"
+ sliSshdSessionsErrorsTotalName = "gitlab_sli:shell_sshd_sessions:errors_total"
gitalyConnectionsTotalName = "connections_total"
)
var (
- SshdConnectionDuration = promauto.NewHistogram(
+ SshdSessionDuration = promauto.NewHistogram(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: sshdSubsystem,
- Name: sshdConnectionDuration,
+ Name: sshdSessionDurationSecondsName,
Help: "A histogram of latencies for connections to gitlab-shell sshd.",
Buckets: []float64{
- 0.005, /* 5ms */
- 0.025, /* 25ms */
- 0.1, /* 100ms */
- 0.5, /* 500ms */
- 1.0, /* 1s */
- 10.0, /* 10s */
- 30.0, /* 30s */
- 60.0, /* 1m */
- 300.0, /* 5m */
+ 5.0, /* 5s */
+ 30.0, /* 30s */
+ 60.0, /* 1m */
+ },
+ },
+ )
+
+ SshdSessionEstablishedDuration = promauto.NewHistogram(
+ prometheus.HistogramOpts{
+ Namespace: namespace,
+ Subsystem: sshdSubsystem,
+ Name: sshdSessionEstablishedDurationSecondsName,
+ Help: "A histogram of latencies until session established to gitlab-shell sshd.",
+ Buckets: []float64{
+ 0.5, /* 5ms */
+ 1.0, /* 1s */
+ 5.0, /* 5s */
},
},
)
@@ -59,11 +71,25 @@ var (
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: sshdSubsystem,
- Name: sshdHitMaxSessions,
+ Name: sshdHitMaxSessionsName,
Help: "The number of times the concurrent sessions limit was hit in gitlab-shell sshd.",
},
)
+ SliSshdSessionsTotal = promauto.NewCounter(
+ prometheus.CounterOpts{
+ Name: sliSshdSessionsTotalName,
+ Help: "Number of SSH sessions that have been established",
+ },
+ )
+
+ SliSshdSessionsErrorsTotal = promauto.NewCounter(
+ prometheus.CounterOpts{
+ Name: sliSshdSessionsErrorsTotalName,
+ Help: "Number of SSH sessions that have failed",
+ },
+ )
+
GitalyConnectionsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
diff --git a/internal/sshd/connection.go b/internal/sshd/connection.go
index 25b082a..1312833 100644
--- a/internal/sshd/connection.go
+++ b/internal/sshd/connection.go
@@ -2,7 +2,6 @@ package sshd
import (
"context"
- "time"
"golang.org/x/crypto/ssh"
"golang.org/x/sync/semaphore"
@@ -29,13 +28,6 @@ func newConnection(maxSessions int64, remoteAddr string) *connection {
func (c *connection) handle(ctx context.Context, chans <-chan ssh.NewChannel, handler channelHandler) {
ctxlog := log.WithContextFields(ctx, log.Fields{"remote_addr": c.remoteAddr})
- metrics.SshdConnectionsInFlight.Inc()
-
- defer func(started time.Time) {
- metrics.SshdConnectionsInFlight.Dec()
- metrics.SshdConnectionDuration.Observe(time.Since(started).Seconds())
- }(time.Now())
-
for newChannel := range chans {
ctxlog.WithField("channel_type", newChannel.ChannelType()).Info("connection: handle: new channel requested")
if newChannel.ChannelType() != "session" {
diff --git a/internal/sshd/session.go b/internal/sshd/session.go
index ff8540b..beb529e 100644
--- a/internal/sshd/session.go
+++ b/internal/sshd/session.go
@@ -22,6 +22,7 @@ type session struct {
channel ssh.Channel
gitlabKeyId string
remoteAddr string
+ success bool
// State managed by the session
execCmd string
@@ -182,6 +183,8 @@ func (s *session) exit(ctx context.Context, status uint32) {
log.WithContextFields(ctx, log.Fields{"exit_status": status}).Info("session: exit: exiting")
req := exitStatusReq{ExitStatus: status}
+ s.success = status == 0
+
s.channel.CloseWrite()
s.channel.SendRequest("exit-status", false, ssh.Marshal(req))
}
diff --git a/internal/sshd/session_test.go b/internal/sshd/session_test.go
index f135825..d0cc8d4 100644
--- a/internal/sshd/session_test.go
+++ b/internal/sshd/session_test.go
@@ -99,6 +99,7 @@ func TestHandleExec(t *testing.T) {
expectedExecCmd string
sentRequestName string
sentRequestPayload []byte
+ success bool
}{
{
desc: "invalid payload",
@@ -111,6 +112,7 @@ func TestHandleExec(t *testing.T) {
expectedExecCmd: "discover",
sentRequestName: "exit-status",
sentRequestPayload: ssh.Marshal(exitStatusReq{ExitStatus: 0}),
+ success: true,
},
}
@@ -130,6 +132,7 @@ func TestHandleExec(t *testing.T) {
require.Equal(t, false, s.handleExec(context.Background(), r))
require.Equal(t, tc.sentRequestName, f.sentRequestName)
require.Equal(t, tc.sentRequestPayload, f.sentRequestPayload)
+ require.Equal(t, tc.success, s.success)
})
}
}
@@ -141,6 +144,7 @@ func TestHandleShell(t *testing.T) {
errMsg string
gitlabKeyId string
expectedExitCode uint32
+ success bool
}{
{
desc: "fails to parse command",
diff --git a/internal/sshd/sshd.go b/internal/sshd/sshd.go
index b661233..8097e9b 100644
--- a/internal/sshd/sshd.go
+++ b/internal/sshd/sshd.go
@@ -12,6 +12,7 @@ import (
"golang.org/x/crypto/ssh"
"gitlab.com/gitlab-org/gitlab-shell/internal/config"
+ "gitlab.com/gitlab-org/gitlab-shell/internal/metrics"
"gitlab.com/gitlab-org/labkit/correlation"
"gitlab.com/gitlab-org/labkit/log"
@@ -145,6 +146,20 @@ func (s *Server) getStatus() status {
}
func (s *Server) handleConn(ctx context.Context, nconn net.Conn) {
+ success := false
+
+ metrics.SshdConnectionsInFlight.Inc()
+ started := time.Now()
+ defer func() {
+ metrics.SshdConnectionsInFlight.Dec()
+ metrics.SshdSessionDuration.Observe(time.Since(started).Seconds())
+
+ metrics.SliSshdSessionsTotal.Inc()
+ if !success {
+ metrics.SliSshdSessionsErrorsTotal.Inc()
+ }
+ }()
+
remoteAddr := nconn.RemoteAddr().String()
defer s.wg.Done()
@@ -172,8 +187,12 @@ func (s *Server) handleConn(ctx context.Context, nconn net.Conn) {
go ssh.DiscardRequests(reqs)
+ var establishSessionDuration float64
conn := newConnection(s.Config.Server.ConcurrentSessionsLimit, remoteAddr)
conn.handle(ctx, chans, func(ctx context.Context, channel ssh.Channel, requests <-chan *ssh.Request) {
+ establishSessionDuration = time.Since(started).Seconds()
+ metrics.SshdSessionEstablishedDuration.Observe(establishSessionDuration)
+
session := &session{
cfg: s.Config,
channel: channel,
@@ -182,9 +201,14 @@ func (s *Server) handleConn(ctx context.Context, nconn net.Conn) {
}
session.handle(ctx, requests)
+
+ success = session.success
})
- ctxlog.Info("server: handleConn: done")
+ ctxlog.WithFields(log.Fields{
+ "duration_s": time.Since(started).Seconds(),
+ "establish_session_duration_s": establishSessionDuration,
+ }).Info("server: handleConn: done")
}
func (s *Server) initSSHConnection(ctx context.Context, nconn net.Conn) (sconn *ssh.ServerConn, chans <-chan ssh.NewChannel, reqs <-chan *ssh.Request, err error) {