diff options
author | Steve Azzopardi <sazzopardi@gitlab.com> | 2023-01-02 15:50:27 +0100 |
---|---|---|
committer | Ash McKenzie <amckenzie@gitlab.com> | 2023-01-12 02:56:43 +0000 |
commit | a093c9d3cfc1ee18368ebbf828dc61c15b74540c (patch) | |
tree | 5e10805b8ecd9d582ef27766f5374e8150a16aeb /client/client_test.go | |
parent | c5b3accf0a8b2008137a90d40cfd65ba3ef2c99a (diff) | |
download | gitlab-shell-a093c9d3cfc1ee18368ebbf828dc61c15b74540c.tar.gz |
feat: retry on error
What
---
Change the default `HTTP.Client` to
`github.com/hashicorp/go-retryablehttp.Client` to get automatic retries
and exponential backoff.
We retry the request 2 times resulting in 3 attempts of sending the
request, the min retry wait is 1 second, and the maximum is 15
seconds.
Hide the retry logic behind a temporary feature flag
`FF_GITLAB_SHELL_RETRYABLE_HTTP` to easily roll this out in GitLab.com.
When we verify that this works as expected we will remove
`FF_GITLAB_SHELL_RETRYABLE_HTTP` and have the retry logic as the default
logic.
Why
---
In https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979 users
end up seeing the following errors when trying to `git-clone(1)` a
repository locally on in CI.
```shell
remote: ===============================
remote:
remote: ERROR: Internal API unreachable
remote:
remote: ================================
```
When we look at the application logs we see the following error:
```json
{ "err": "http://gitlab-webservice-git.gitlab.svc:8181/api/v4/internal/allowed":
dial tcp 10.69.184.120:8181: connect: connection refused", "msg":
"Internal API unreachable"}
```
In
https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979#note_1222670120
we've correlated these `connection refused` errors with infrastructure
events that remove the git pods that are hosting
`gitlab-webservice-git` service. We could try to make the underlying
infrastructure more reactive to these changes as suggested in
https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979#note_1225164944
but we can still end up serving bad requests.
Implementing retry logic for 5xx or other errors would allow users to
still be able to `git-clone(1)` reposirories, although it being slower.
This is espically important during CI runs so users don't have to retry
jobs themselves.
Reference: https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979
Closes: https://gitlab.com/gitlab-org/gitlab-shell/-/issues/604
Signed-off-by: Steve Azzopardi <sazzopardi@gitlab.com>
Diffstat (limited to 'client/client_test.go')
-rw-r--r-- | client/client_test.go | 51 |
1 files changed, 48 insertions, 3 deletions
diff --git a/client/client_test.go b/client/client_test.go index aefff33..776a0aa 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -6,6 +6,8 @@ import ( "fmt" "io" "net/http" + "net/http/httptest" + "os" "path" "strings" "testing" @@ -18,9 +20,7 @@ import ( "gitlab.com/gitlab-org/gitlab-shell/v14/internal/testhelper" ) -var ( - secret = "sssh, it's a secret" -) +var secret = "sssh, it's a secret" func TestClients(t *testing.T) { testhelper.PrepareTestRootDir(t) @@ -70,6 +70,11 @@ func TestClients(t *testing.T) { }, secret: "\n" + secret + "\n", }, + { + desc: "Retry client", + server: testserver.StartRetryHttpServer, + secret: secret, + }, } for _, tc := range testCases { @@ -297,3 +302,43 @@ func buildRequests(t *testing.T, relativeURLRoot string) []testserver.TestReques return requests } + +func TestRetryableHTTPFeatureToggle(t *testing.T) { + t.Run("retryable http off", func(t *testing.T) { + os.Setenv("FF_GITLAB_SHELL_RETRYABLE_HTTP", "0") + reqAttempts := 0 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + reqAttempts++ + w.WriteHeader(500) + })) + defer srv.Close() + + httpClient, err := NewHTTPClientWithOpts(srv.URL, "/", "", "", 1, nil) + require.NoError(t, err) + client, err := NewGitlabNetClient("", "", "", httpClient) + require.NoError(t, err) + + _, err = client.Get(context.Background(), "/") + require.EqualError(t, err, "Internal API unreachable") + require.Equal(t, 1, reqAttempts) + }) + + t.Run("retryable http on", func(t *testing.T) { + os.Setenv("FF_GITLAB_SHELL_RETRYABLE_HTTP", "1") + reqAttempts := 0 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + reqAttempts++ + w.WriteHeader(500) + })) + defer srv.Close() + + httpClient, err := NewHTTPClientWithOpts(srv.URL, "/", "", "", 1, nil) + require.NoError(t, err) + client, err := NewGitlabNetClient("", "", "", httpClient) + require.NoError(t, err) + + _, err = client.Get(context.Background(), "/") + require.EqualError(t, err, "Internal API unreachable") + require.Equal(t, 3, reqAttempts) + }) +} |