diff options
author | Steve Azzopardi <sazzopardi@gitlab.com> | 2023-01-02 15:50:27 +0100 |
---|---|---|
committer | Ash McKenzie <amckenzie@gitlab.com> | 2023-01-12 02:56:43 +0000 |
commit | a093c9d3cfc1ee18368ebbf828dc61c15b74540c (patch) | |
tree | 5e10805b8ecd9d582ef27766f5374e8150a16aeb | |
parent | c5b3accf0a8b2008137a90d40cfd65ba3ef2c99a (diff) | |
download | gitlab-shell-a093c9d3cfc1ee18368ebbf828dc61c15b74540c.tar.gz |
feat: retry on error
What
---
Change the default `HTTP.Client` to
`github.com/hashicorp/go-retryablehttp.Client` to get automatic retries
and exponential backoff.
We retry the request 2 times resulting in 3 attempts of sending the
request, the min retry wait is 1 second, and the maximum is 15
seconds.
Hide the retry logic behind a temporary feature flag
`FF_GITLAB_SHELL_RETRYABLE_HTTP` to easily roll this out in GitLab.com.
When we verify that this works as expected we will remove
`FF_GITLAB_SHELL_RETRYABLE_HTTP` and have the retry logic as the default
logic.
Why
---
In https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979 users
end up seeing the following errors when trying to `git-clone(1)` a
repository locally on in CI.
```shell
remote: ===============================
remote:
remote: ERROR: Internal API unreachable
remote:
remote: ================================
```
When we look at the application logs we see the following error:
```json
{ "err": "http://gitlab-webservice-git.gitlab.svc:8181/api/v4/internal/allowed":
dial tcp 10.69.184.120:8181: connect: connection refused", "msg":
"Internal API unreachable"}
```
In
https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979#note_1222670120
we've correlated these `connection refused` errors with infrastructure
events that remove the git pods that are hosting
`gitlab-webservice-git` service. We could try to make the underlying
infrastructure more reactive to these changes as suggested in
https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979#note_1225164944
but we can still end up serving bad requests.
Implementing retry logic for 5xx or other errors would allow users to
still be able to `git-clone(1)` reposirories, although it being slower.
This is espically important during CI runs so users don't have to retry
jobs themselves.
Reference: https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979
Closes: https://gitlab.com/gitlab-org/gitlab-shell/-/issues/604
Signed-off-by: Steve Azzopardi <sazzopardi@gitlab.com>
-rw-r--r-- | client/client_test.go | 51 | ||||
-rw-r--r-- | client/gitlabnet.go | 5 | ||||
-rw-r--r-- | client/httpclient.go | 19 | ||||
-rw-r--r-- | client/httpclient_test.go | 2 | ||||
-rw-r--r-- | client/testserver/testserver.go | 29 | ||||
-rw-r--r-- | go.mod | 2 | ||||
-rw-r--r-- | go.sum | 6 | ||||
-rw-r--r-- | internal/command/discover/discover_test.go | 50 | ||||
-rw-r--r-- | internal/command/healthcheck/healthcheck_test.go | 2 | ||||
-rw-r--r-- | internal/command/personalaccesstoken/personalaccesstoken_test.go | 6 | ||||
-rw-r--r-- | internal/command/twofactorrecover/twofactorrecover_test.go | 6 | ||||
-rw-r--r-- | internal/command/twofactorverify/twofactorverify_test.go | 2 | ||||
-rw-r--r-- | internal/config/config.go | 4 | ||||
-rw-r--r-- | internal/gitlabnet/lfsauthenticate/client_test.go | 2 | ||||
-rw-r--r-- | spec/gitlab_shell_discover_spec.rb | 2 |
15 files changed, 134 insertions, 54 deletions
diff --git a/client/client_test.go b/client/client_test.go index aefff33..776a0aa 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -6,6 +6,8 @@ import ( "fmt" "io" "net/http" + "net/http/httptest" + "os" "path" "strings" "testing" @@ -18,9 +20,7 @@ import ( "gitlab.com/gitlab-org/gitlab-shell/v14/internal/testhelper" ) -var ( - secret = "sssh, it's a secret" -) +var secret = "sssh, it's a secret" func TestClients(t *testing.T) { testhelper.PrepareTestRootDir(t) @@ -70,6 +70,11 @@ func TestClients(t *testing.T) { }, secret: "\n" + secret + "\n", }, + { + desc: "Retry client", + server: testserver.StartRetryHttpServer, + secret: secret, + }, } for _, tc := range testCases { @@ -297,3 +302,43 @@ func buildRequests(t *testing.T, relativeURLRoot string) []testserver.TestReques return requests } + +func TestRetryableHTTPFeatureToggle(t *testing.T) { + t.Run("retryable http off", func(t *testing.T) { + os.Setenv("FF_GITLAB_SHELL_RETRYABLE_HTTP", "0") + reqAttempts := 0 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + reqAttempts++ + w.WriteHeader(500) + })) + defer srv.Close() + + httpClient, err := NewHTTPClientWithOpts(srv.URL, "/", "", "", 1, nil) + require.NoError(t, err) + client, err := NewGitlabNetClient("", "", "", httpClient) + require.NoError(t, err) + + _, err = client.Get(context.Background(), "/") + require.EqualError(t, err, "Internal API unreachable") + require.Equal(t, 1, reqAttempts) + }) + + t.Run("retryable http on", func(t *testing.T) { + os.Setenv("FF_GITLAB_SHELL_RETRYABLE_HTTP", "1") + reqAttempts := 0 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + reqAttempts++ + w.WriteHeader(500) + })) + defer srv.Close() + + httpClient, err := NewHTTPClientWithOpts(srv.URL, "/", "", "", 1, nil) + require.NoError(t, err) + client, err := NewGitlabNetClient("", "", "", httpClient) + require.NoError(t, err) + + _, err = client.Get(context.Background(), "/") + require.EqualError(t, err, "Internal API unreachable") + require.Equal(t, 3, reqAttempts) + }) +} diff --git a/client/gitlabnet.go b/client/gitlabnet.go index 24c1d5f..02bef8c 100644 --- a/client/gitlabnet.go +++ b/client/gitlabnet.go @@ -11,6 +11,7 @@ import ( "time" "github.com/golang-jwt/jwt/v4" + "github.com/hashicorp/go-retryablehttp" "gitlab.com/gitlab-org/labkit/log" ) @@ -88,7 +89,7 @@ func appendPath(host string, path string) string { return strings.TrimSuffix(host, "/") + "/" + strings.TrimPrefix(path, "/") } -func newRequest(ctx context.Context, method, host, path string, data interface{}) (*http.Request, error) { +func newRequest(ctx context.Context, method, host, path string, data interface{}) (*retryablehttp.Request, error) { var jsonReader io.Reader if data != nil { jsonData, err := json.Marshal(data) @@ -99,7 +100,7 @@ func newRequest(ctx context.Context, method, host, path string, data interface{} jsonReader = bytes.NewReader(jsonData) } - request, err := http.NewRequestWithContext(ctx, method, appendPath(host, path), jsonReader) + request, err := retryablehttp.NewRequestWithContext(ctx, method, appendPath(host, path), jsonReader) if err != nil { return nil, err } diff --git a/client/httpclient.go b/client/httpclient.go index bd00b6b..47486a0 100644 --- a/client/httpclient.go +++ b/client/httpclient.go @@ -13,6 +13,7 @@ import ( "strings" "time" + "github.com/hashicorp/go-retryablehttp" "gitlab.com/gitlab-org/labkit/correlation" "gitlab.com/gitlab-org/labkit/tracing" ) @@ -25,12 +26,10 @@ const ( defaultReadTimeoutSeconds = 300 ) -var ( - ErrCafileNotFound = errors.New("cafile not found") -) +var ErrCafileNotFound = errors.New("cafile not found") type HttpClient struct { - *http.Client + *retryablehttp.Client Host string } @@ -101,10 +100,15 @@ func NewHTTPClientWithOpts(gitlabURL, gitlabRelativeURLRoot, caFile, caPath stri return nil, errors.New("unknown GitLab URL prefix") } - c := &http.Client{ - Transport: correlation.NewInstrumentedRoundTripper(tracing.NewRoundTripper(transport)), - Timeout: readTimeout(readTimeoutSeconds), + c := retryablehttp.NewClient() + c.RetryMax = 0 // Retry logic is disabled by default + if os.Getenv("FF_GITLAB_SHELL_RETRYABLE_HTTP") == "1" { // Feature toggle for enabling retries on GitLab client. + c.RetryMax = 2 + c.RetryWaitMax = 15 * time.Second } + c.Logger = nil + c.HTTPClient.Transport = correlation.NewInstrumentedRoundTripper(tracing.NewRoundTripper(transport)) + c.HTTPClient.Timeout = readTimeout(readTimeoutSeconds) client := &HttpClient{Client: c, Host: host} @@ -132,7 +136,6 @@ func buildSocketTransport(gitlabURL, gitlabRelativeURLRoot string) (*http.Transp func buildHttpsTransport(hcc httpClientCfg, gitlabURL string) (*http.Transport, string, error) { certPool, err := x509.SystemCertPool() - if err != nil { certPool = x509.NewCertPool() } diff --git a/client/httpclient_test.go b/client/httpclient_test.go index ea05a88..a5de47c 100644 --- a/client/httpclient_test.go +++ b/client/httpclient_test.go @@ -21,7 +21,7 @@ func TestReadTimeout(t *testing.T) { require.NoError(t, err) require.NotNil(t, client) - require.Equal(t, time.Duration(expectedSeconds)*time.Second, client.Client.Timeout) + require.Equal(t, time.Duration(expectedSeconds)*time.Second, client.HTTPClient.Timeout) } const ( diff --git a/client/testserver/testserver.go b/client/testserver/testserver.go index b09c7ba..105e7a0 100644 --- a/client/testserver/testserver.go +++ b/client/testserver/testserver.go @@ -59,6 +59,35 @@ func StartHttpServer(t *testing.T, handlers []TestRequestHandler) string { return server.URL } +func StartRetryHttpServer(t *testing.T, handlers []TestRequestHandler) string { + os.Setenv("FF_GITLAB_SHELL_RETRYABLE_HTTP", "1") + attempts := map[string]int{} + + retryMiddileware := func(next func(w http.ResponseWriter, r *http.Request)) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + attempts[r.URL.String()+r.Method]++ + if attempts[r.URL.String()+r.Method] == 1 { + w.WriteHeader(500) + return + } + + http.HandlerFunc(next).ServeHTTP(w, r) + }) + } + t.Helper() + + h := http.NewServeMux() + + for _, handler := range handlers { + h.Handle(handler.Path, retryMiddileware(handler.Handler)) + } + + server := httptest.NewServer(h) + t.Cleanup(func() { server.Close() }) + + return server.URL +} + func StartHttpsServer(t *testing.T, handlers []TestRequestHandler, clientCAPath string) string { t.Helper() @@ -6,6 +6,7 @@ require ( github.com/golang-jwt/jwt/v4 v4.4.1 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 + github.com/hashicorp/go-retryablehttp v0.7.1 github.com/mattn/go-shellwords v1.0.11 github.com/mikesmitty/edkey v0.0.0-20170222072505-3356ea4e686a github.com/otiai10/copy v1.4.2 @@ -48,6 +49,7 @@ require ( github.com/google/pprof v0.0.0-20210804190019-f964ff605595 // indirect github.com/google/uuid v1.3.0 // indirect github.com/googleapis/gax-go/v2 v2.2.0 // indirect + github.com/hashicorp/go-cleanhttp v0.5.1 // indirect github.com/hashicorp/yamux v0.1.1 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/kr/text v0.2.0 // indirect @@ -227,6 +227,12 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM= +github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI= +github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-retryablehttp v0.7.1 h1:sUiuQAnLlbvmExtFQs72iFW/HXeUn8Z1aJLQ4LJJbTQ= +github.com/hashicorp/go-retryablehttp v0.7.1/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE= diff --git a/internal/command/discover/discover_test.go b/internal/command/discover/discover_test.go index ccbfa7e..df9ca47 100644 --- a/internal/command/discover/discover_test.go +++ b/internal/command/discover/discover_test.go @@ -16,33 +16,31 @@ import ( "gitlab.com/gitlab-org/gitlab-shell/v14/internal/config" ) -var ( - requests = []testserver.TestRequestHandler{ - { - Path: "/api/v4/internal/discover", - Handler: func(w http.ResponseWriter, r *http.Request) { - if r.URL.Query().Get("key_id") == "1" || r.URL.Query().Get("username") == "alex-doe" { - body := map[string]interface{}{ - "id": 2, - "username": "alex-doe", - "name": "Alex Doe", - } - json.NewEncoder(w).Encode(body) - } else if r.URL.Query().Get("username") == "broken_message" { - body := map[string]string{ - "message": "Forbidden!", - } - w.WriteHeader(http.StatusForbidden) - json.NewEncoder(w).Encode(body) - } else if r.URL.Query().Get("username") == "broken" { - w.WriteHeader(http.StatusInternalServerError) - } else { - fmt.Fprint(w, "null") +var requests = []testserver.TestRequestHandler{ + { + Path: "/api/v4/internal/discover", + Handler: func(w http.ResponseWriter, r *http.Request) { + if r.URL.Query().Get("key_id") == "1" || r.URL.Query().Get("username") == "alex-doe" { + body := map[string]interface{}{ + "id": 2, + "username": "alex-doe", + "name": "Alex Doe", + } + json.NewEncoder(w).Encode(body) + } else if r.URL.Query().Get("username") == "broken_message" { + body := map[string]string{ + "message": "Forbidden!", } - }, + w.WriteHeader(http.StatusForbidden) + json.NewEncoder(w).Encode(body) + } else if r.URL.Query().Get("username") == "broken" { + w.WriteHeader(http.StatusInternalServerError) + } else { + fmt.Fprint(w, "null") + } }, - } -) + }, +} func TestExecute(t *testing.T) { url := testserver.StartSocketHttpServer(t, requests) @@ -112,7 +110,7 @@ func TestFailingExecute(t *testing.T) { { desc: "When the API fails", arguments: &commandargs.Shell{GitlabUsername: "broken"}, - expectedError: "Failed to get username: Internal API error (500)", + expectedError: "Failed to get username: Internal API unreachable", }, } diff --git a/internal/command/healthcheck/healthcheck_test.go b/internal/command/healthcheck/healthcheck_test.go index f642226..12a8444 100644 --- a/internal/command/healthcheck/healthcheck_test.go +++ b/internal/command/healthcheck/healthcheck_test.go @@ -84,5 +84,5 @@ func TestFailingAPIExecute(t *testing.T) { err := cmd.Execute(context.Background()) require.Empty(t, buffer.String()) - require.EqualError(t, err, "Internal API available: FAILED - Internal API error (500)") + require.EqualError(t, err, "Internal API available: FAILED - Internal API unreachable") } diff --git a/internal/command/personalaccesstoken/personalaccesstoken_test.go b/internal/command/personalaccesstoken/personalaccesstoken_test.go index daa5834..492f745 100644 --- a/internal/command/personalaccesstoken/personalaccesstoken_test.go +++ b/internal/command/personalaccesstoken/personalaccesstoken_test.go @@ -17,9 +17,7 @@ import ( "gitlab.com/gitlab-org/gitlab-shell/v14/internal/gitlabnet/personalaccesstoken" ) -var ( - requests []testserver.TestRequestHandler -) +var requests []testserver.TestRequestHandler func setup(t *testing.T) { requests = []testserver.TestRequestHandler{ @@ -147,7 +145,7 @@ func TestExecute(t *testing.T) { GitlabKeyId: "broken", SshArgs: []string{cmdname, "newtoken", "read_api,read_repository"}, }, - expectedError: "Internal API error (500)", + expectedError: "Internal API unreachable", }, { desc: "Without KeyID or User", diff --git a/internal/command/twofactorrecover/twofactorrecover_test.go b/internal/command/twofactorrecover/twofactorrecover_test.go index 81b4dff..7e20a06 100644 --- a/internal/command/twofactorrecover/twofactorrecover_test.go +++ b/internal/command/twofactorrecover/twofactorrecover_test.go @@ -18,9 +18,7 @@ import ( "gitlab.com/gitlab-org/gitlab-shell/v14/internal/gitlabnet/twofactorrecover" ) -var ( - requests []testserver.TestRequestHandler -) +var requests []testserver.TestRequestHandler func setup(t *testing.T) { requests = []testserver.TestRequestHandler{ @@ -99,7 +97,7 @@ func TestExecute(t *testing.T) { desc: "With API fails", arguments: &commandargs.Shell{GitlabKeyId: "broken"}, answer: "yes\n", - expectedOutput: question + errorHeader + "Internal API error (500)\n", + expectedOutput: question + errorHeader + "Internal API unreachable\n", }, { desc: "With missing arguments", diff --git a/internal/command/twofactorverify/twofactorverify_test.go b/internal/command/twofactorverify/twofactorverify_test.go index 640d3ed..213c025 100644 --- a/internal/command/twofactorverify/twofactorverify_test.go +++ b/internal/command/twofactorverify/twofactorverify_test.go @@ -136,7 +136,7 @@ func TestExecute(t *testing.T) { { desc: "With API fails", arguments: &commandargs.Shell{GitlabKeyId: "broken"}, - expectedOutput: errorHeader + "Internal API error (500)\n", + expectedOutput: errorHeader + "Internal API unreachable\n", }, { desc: "With missing arguments", diff --git a/internal/config/config.go b/internal/config/config.go index 35d8e74..7f473c5 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -134,8 +134,8 @@ func (c *Config) HttpClient() (*client.HttpClient, error) { return } - tr := client.Transport - client.Transport = metrics.NewRoundTripper(tr) + tr := client.HTTPClient.Transport + client.HTTPClient.Transport = metrics.NewRoundTripper(tr) c.httpClient = client }) diff --git a/internal/gitlabnet/lfsauthenticate/client_test.go b/internal/gitlabnet/lfsauthenticate/client_test.go index f62d8e1..01b3f84 100644 --- a/internal/gitlabnet/lfsauthenticate/client_test.go +++ b/internal/gitlabnet/lfsauthenticate/client_test.go @@ -74,7 +74,7 @@ func TestFailedRequests(t *testing.T) { { desc: "With API fails", args: &commandargs.Shell{GitlabKeyId: "broken", CommandType: commandargs.LfsAuthenticate, SshArgs: []string{"git-lfs-authenticate", repo, "download"}}, - expectedOutput: "Internal API error (500)", + expectedOutput: "Internal API unreachable", }, } diff --git a/spec/gitlab_shell_discover_spec.rb b/spec/gitlab_shell_discover_spec.rb index 225d6b9..07a9be1 100644 --- a/spec/gitlab_shell_discover_spec.rb +++ b/spec/gitlab_shell_discover_spec.rb @@ -124,7 +124,7 @@ describe 'bin/gitlab-shell' do it 'returns an error message when the API call fails without a message' do _, stderr, status = run!(["-c/usr/share/webapps/gitlab-shell/bin/gitlab-shell", "username-broken"]) - expect(stderr).to match(/Failed to get username: Internal API error \(500\)/) + expect(stderr).to match(/Failed to get username: Internal API unreachable/) expect(status).not_to be_success end end |