summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Azzopardi <sazzopardi@gitlab.com>2023-01-02 15:50:27 +0100
committerAsh McKenzie <amckenzie@gitlab.com>2023-01-12 02:56:43 +0000
commita093c9d3cfc1ee18368ebbf828dc61c15b74540c (patch)
tree5e10805b8ecd9d582ef27766f5374e8150a16aeb
parentc5b3accf0a8b2008137a90d40cfd65ba3ef2c99a (diff)
downloadgitlab-shell-a093c9d3cfc1ee18368ebbf828dc61c15b74540c.tar.gz
feat: retry on error
What --- Change the default `HTTP.Client` to `github.com/hashicorp/go-retryablehttp.Client` to get automatic retries and exponential backoff. We retry the request 2 times resulting in 3 attempts of sending the request, the min retry wait is 1 second, and the maximum is 15 seconds. Hide the retry logic behind a temporary feature flag `FF_GITLAB_SHELL_RETRYABLE_HTTP` to easily roll this out in GitLab.com. When we verify that this works as expected we will remove `FF_GITLAB_SHELL_RETRYABLE_HTTP` and have the retry logic as the default logic. Why --- In https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979 users end up seeing the following errors when trying to `git-clone(1)` a repository locally on in CI. ```shell remote: =============================== remote: remote: ERROR: Internal API unreachable remote: remote: ================================ ``` When we look at the application logs we see the following error: ```json { "err": "http://gitlab-webservice-git.gitlab.svc:8181/api/v4/internal/allowed": dial tcp 10.69.184.120:8181: connect: connection refused", "msg": "Internal API unreachable"} ``` In https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979#note_1222670120 we've correlated these `connection refused` errors with infrastructure events that remove the git pods that are hosting `gitlab-webservice-git` service. We could try to make the underlying infrastructure more reactive to these changes as suggested in https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979#note_1225164944 but we can still end up serving bad requests. Implementing retry logic for 5xx or other errors would allow users to still be able to `git-clone(1)` reposirories, although it being slower. This is espically important during CI runs so users don't have to retry jobs themselves. Reference: https://gitlab.com/gitlab-com/gl-infra/production/-/issues/7979 Closes: https://gitlab.com/gitlab-org/gitlab-shell/-/issues/604 Signed-off-by: Steve Azzopardi <sazzopardi@gitlab.com>
-rw-r--r--client/client_test.go51
-rw-r--r--client/gitlabnet.go5
-rw-r--r--client/httpclient.go19
-rw-r--r--client/httpclient_test.go2
-rw-r--r--client/testserver/testserver.go29
-rw-r--r--go.mod2
-rw-r--r--go.sum6
-rw-r--r--internal/command/discover/discover_test.go50
-rw-r--r--internal/command/healthcheck/healthcheck_test.go2
-rw-r--r--internal/command/personalaccesstoken/personalaccesstoken_test.go6
-rw-r--r--internal/command/twofactorrecover/twofactorrecover_test.go6
-rw-r--r--internal/command/twofactorverify/twofactorverify_test.go2
-rw-r--r--internal/config/config.go4
-rw-r--r--internal/gitlabnet/lfsauthenticate/client_test.go2
-rw-r--r--spec/gitlab_shell_discover_spec.rb2
15 files changed, 134 insertions, 54 deletions
diff --git a/client/client_test.go b/client/client_test.go
index aefff33..776a0aa 100644
--- a/client/client_test.go
+++ b/client/client_test.go
@@ -6,6 +6,8 @@ import (
"fmt"
"io"
"net/http"
+ "net/http/httptest"
+ "os"
"path"
"strings"
"testing"
@@ -18,9 +20,7 @@ import (
"gitlab.com/gitlab-org/gitlab-shell/v14/internal/testhelper"
)
-var (
- secret = "sssh, it's a secret"
-)
+var secret = "sssh, it's a secret"
func TestClients(t *testing.T) {
testhelper.PrepareTestRootDir(t)
@@ -70,6 +70,11 @@ func TestClients(t *testing.T) {
},
secret: "\n" + secret + "\n",
},
+ {
+ desc: "Retry client",
+ server: testserver.StartRetryHttpServer,
+ secret: secret,
+ },
}
for _, tc := range testCases {
@@ -297,3 +302,43 @@ func buildRequests(t *testing.T, relativeURLRoot string) []testserver.TestReques
return requests
}
+
+func TestRetryableHTTPFeatureToggle(t *testing.T) {
+ t.Run("retryable http off", func(t *testing.T) {
+ os.Setenv("FF_GITLAB_SHELL_RETRYABLE_HTTP", "0")
+ reqAttempts := 0
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ reqAttempts++
+ w.WriteHeader(500)
+ }))
+ defer srv.Close()
+
+ httpClient, err := NewHTTPClientWithOpts(srv.URL, "/", "", "", 1, nil)
+ require.NoError(t, err)
+ client, err := NewGitlabNetClient("", "", "", httpClient)
+ require.NoError(t, err)
+
+ _, err = client.Get(context.Background(), "/")
+ require.EqualError(t, err, "Internal API unreachable")
+ require.Equal(t, 1, reqAttempts)
+ })
+
+ t.Run("retryable http on", func(t *testing.T) {
+ os.Setenv("FF_GITLAB_SHELL_RETRYABLE_HTTP", "1")
+ reqAttempts := 0
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ reqAttempts++
+ w.WriteHeader(500)
+ }))
+ defer srv.Close()
+
+ httpClient, err := NewHTTPClientWithOpts(srv.URL, "/", "", "", 1, nil)
+ require.NoError(t, err)
+ client, err := NewGitlabNetClient("", "", "", httpClient)
+ require.NoError(t, err)
+
+ _, err = client.Get(context.Background(), "/")
+ require.EqualError(t, err, "Internal API unreachable")
+ require.Equal(t, 3, reqAttempts)
+ })
+}
diff --git a/client/gitlabnet.go b/client/gitlabnet.go
index 24c1d5f..02bef8c 100644
--- a/client/gitlabnet.go
+++ b/client/gitlabnet.go
@@ -11,6 +11,7 @@ import (
"time"
"github.com/golang-jwt/jwt/v4"
+ "github.com/hashicorp/go-retryablehttp"
"gitlab.com/gitlab-org/labkit/log"
)
@@ -88,7 +89,7 @@ func appendPath(host string, path string) string {
return strings.TrimSuffix(host, "/") + "/" + strings.TrimPrefix(path, "/")
}
-func newRequest(ctx context.Context, method, host, path string, data interface{}) (*http.Request, error) {
+func newRequest(ctx context.Context, method, host, path string, data interface{}) (*retryablehttp.Request, error) {
var jsonReader io.Reader
if data != nil {
jsonData, err := json.Marshal(data)
@@ -99,7 +100,7 @@ func newRequest(ctx context.Context, method, host, path string, data interface{}
jsonReader = bytes.NewReader(jsonData)
}
- request, err := http.NewRequestWithContext(ctx, method, appendPath(host, path), jsonReader)
+ request, err := retryablehttp.NewRequestWithContext(ctx, method, appendPath(host, path), jsonReader)
if err != nil {
return nil, err
}
diff --git a/client/httpclient.go b/client/httpclient.go
index bd00b6b..47486a0 100644
--- a/client/httpclient.go
+++ b/client/httpclient.go
@@ -13,6 +13,7 @@ import (
"strings"
"time"
+ "github.com/hashicorp/go-retryablehttp"
"gitlab.com/gitlab-org/labkit/correlation"
"gitlab.com/gitlab-org/labkit/tracing"
)
@@ -25,12 +26,10 @@ const (
defaultReadTimeoutSeconds = 300
)
-var (
- ErrCafileNotFound = errors.New("cafile not found")
-)
+var ErrCafileNotFound = errors.New("cafile not found")
type HttpClient struct {
- *http.Client
+ *retryablehttp.Client
Host string
}
@@ -101,10 +100,15 @@ func NewHTTPClientWithOpts(gitlabURL, gitlabRelativeURLRoot, caFile, caPath stri
return nil, errors.New("unknown GitLab URL prefix")
}
- c := &http.Client{
- Transport: correlation.NewInstrumentedRoundTripper(tracing.NewRoundTripper(transport)),
- Timeout: readTimeout(readTimeoutSeconds),
+ c := retryablehttp.NewClient()
+ c.RetryMax = 0 // Retry logic is disabled by default
+ if os.Getenv("FF_GITLAB_SHELL_RETRYABLE_HTTP") == "1" { // Feature toggle for enabling retries on GitLab client.
+ c.RetryMax = 2
+ c.RetryWaitMax = 15 * time.Second
}
+ c.Logger = nil
+ c.HTTPClient.Transport = correlation.NewInstrumentedRoundTripper(tracing.NewRoundTripper(transport))
+ c.HTTPClient.Timeout = readTimeout(readTimeoutSeconds)
client := &HttpClient{Client: c, Host: host}
@@ -132,7 +136,6 @@ func buildSocketTransport(gitlabURL, gitlabRelativeURLRoot string) (*http.Transp
func buildHttpsTransport(hcc httpClientCfg, gitlabURL string) (*http.Transport, string, error) {
certPool, err := x509.SystemCertPool()
-
if err != nil {
certPool = x509.NewCertPool()
}
diff --git a/client/httpclient_test.go b/client/httpclient_test.go
index ea05a88..a5de47c 100644
--- a/client/httpclient_test.go
+++ b/client/httpclient_test.go
@@ -21,7 +21,7 @@ func TestReadTimeout(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, client)
- require.Equal(t, time.Duration(expectedSeconds)*time.Second, client.Client.Timeout)
+ require.Equal(t, time.Duration(expectedSeconds)*time.Second, client.HTTPClient.Timeout)
}
const (
diff --git a/client/testserver/testserver.go b/client/testserver/testserver.go
index b09c7ba..105e7a0 100644
--- a/client/testserver/testserver.go
+++ b/client/testserver/testserver.go
@@ -59,6 +59,35 @@ func StartHttpServer(t *testing.T, handlers []TestRequestHandler) string {
return server.URL
}
+func StartRetryHttpServer(t *testing.T, handlers []TestRequestHandler) string {
+ os.Setenv("FF_GITLAB_SHELL_RETRYABLE_HTTP", "1")
+ attempts := map[string]int{}
+
+ retryMiddileware := func(next func(w http.ResponseWriter, r *http.Request)) http.Handler {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ attempts[r.URL.String()+r.Method]++
+ if attempts[r.URL.String()+r.Method] == 1 {
+ w.WriteHeader(500)
+ return
+ }
+
+ http.HandlerFunc(next).ServeHTTP(w, r)
+ })
+ }
+ t.Helper()
+
+ h := http.NewServeMux()
+
+ for _, handler := range handlers {
+ h.Handle(handler.Path, retryMiddileware(handler.Handler))
+ }
+
+ server := httptest.NewServer(h)
+ t.Cleanup(func() { server.Close() })
+
+ return server.URL
+}
+
func StartHttpsServer(t *testing.T, handlers []TestRequestHandler, clientCAPath string) string {
t.Helper()
diff --git a/go.mod b/go.mod
index 23eecbb..4a1a5cf 100644
--- a/go.mod
+++ b/go.mod
@@ -6,6 +6,7 @@ require (
github.com/golang-jwt/jwt/v4 v4.4.1
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
+ github.com/hashicorp/go-retryablehttp v0.7.1
github.com/mattn/go-shellwords v1.0.11
github.com/mikesmitty/edkey v0.0.0-20170222072505-3356ea4e686a
github.com/otiai10/copy v1.4.2
@@ -48,6 +49,7 @@ require (
github.com/google/pprof v0.0.0-20210804190019-f964ff605595 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/googleapis/gax-go/v2 v2.2.0 // indirect
+ github.com/hashicorp/go-cleanhttp v0.5.1 // indirect
github.com/hashicorp/yamux v0.1.1 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/kr/text v0.2.0 // indirect
diff --git a/go.sum b/go.sum
index 29c9560..468372d 100644
--- a/go.sum
+++ b/go.sum
@@ -227,6 +227,12 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
+github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM=
+github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
+github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
+github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
+github.com/hashicorp/go-retryablehttp v0.7.1 h1:sUiuQAnLlbvmExtFQs72iFW/HXeUn8Z1aJLQ4LJJbTQ=
+github.com/hashicorp/go-retryablehttp v0.7.1/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/yamux v0.1.1 h1:yrQxtgseBDrq9Y652vSRDvsKCJKOUD+GzTS4Y0Y8pvE=
diff --git a/internal/command/discover/discover_test.go b/internal/command/discover/discover_test.go
index ccbfa7e..df9ca47 100644
--- a/internal/command/discover/discover_test.go
+++ b/internal/command/discover/discover_test.go
@@ -16,33 +16,31 @@ import (
"gitlab.com/gitlab-org/gitlab-shell/v14/internal/config"
)
-var (
- requests = []testserver.TestRequestHandler{
- {
- Path: "/api/v4/internal/discover",
- Handler: func(w http.ResponseWriter, r *http.Request) {
- if r.URL.Query().Get("key_id") == "1" || r.URL.Query().Get("username") == "alex-doe" {
- body := map[string]interface{}{
- "id": 2,
- "username": "alex-doe",
- "name": "Alex Doe",
- }
- json.NewEncoder(w).Encode(body)
- } else if r.URL.Query().Get("username") == "broken_message" {
- body := map[string]string{
- "message": "Forbidden!",
- }
- w.WriteHeader(http.StatusForbidden)
- json.NewEncoder(w).Encode(body)
- } else if r.URL.Query().Get("username") == "broken" {
- w.WriteHeader(http.StatusInternalServerError)
- } else {
- fmt.Fprint(w, "null")
+var requests = []testserver.TestRequestHandler{
+ {
+ Path: "/api/v4/internal/discover",
+ Handler: func(w http.ResponseWriter, r *http.Request) {
+ if r.URL.Query().Get("key_id") == "1" || r.URL.Query().Get("username") == "alex-doe" {
+ body := map[string]interface{}{
+ "id": 2,
+ "username": "alex-doe",
+ "name": "Alex Doe",
+ }
+ json.NewEncoder(w).Encode(body)
+ } else if r.URL.Query().Get("username") == "broken_message" {
+ body := map[string]string{
+ "message": "Forbidden!",
}
- },
+ w.WriteHeader(http.StatusForbidden)
+ json.NewEncoder(w).Encode(body)
+ } else if r.URL.Query().Get("username") == "broken" {
+ w.WriteHeader(http.StatusInternalServerError)
+ } else {
+ fmt.Fprint(w, "null")
+ }
},
- }
-)
+ },
+}
func TestExecute(t *testing.T) {
url := testserver.StartSocketHttpServer(t, requests)
@@ -112,7 +110,7 @@ func TestFailingExecute(t *testing.T) {
{
desc: "When the API fails",
arguments: &commandargs.Shell{GitlabUsername: "broken"},
- expectedError: "Failed to get username: Internal API error (500)",
+ expectedError: "Failed to get username: Internal API unreachable",
},
}
diff --git a/internal/command/healthcheck/healthcheck_test.go b/internal/command/healthcheck/healthcheck_test.go
index f642226..12a8444 100644
--- a/internal/command/healthcheck/healthcheck_test.go
+++ b/internal/command/healthcheck/healthcheck_test.go
@@ -84,5 +84,5 @@ func TestFailingAPIExecute(t *testing.T) {
err := cmd.Execute(context.Background())
require.Empty(t, buffer.String())
- require.EqualError(t, err, "Internal API available: FAILED - Internal API error (500)")
+ require.EqualError(t, err, "Internal API available: FAILED - Internal API unreachable")
}
diff --git a/internal/command/personalaccesstoken/personalaccesstoken_test.go b/internal/command/personalaccesstoken/personalaccesstoken_test.go
index daa5834..492f745 100644
--- a/internal/command/personalaccesstoken/personalaccesstoken_test.go
+++ b/internal/command/personalaccesstoken/personalaccesstoken_test.go
@@ -17,9 +17,7 @@ import (
"gitlab.com/gitlab-org/gitlab-shell/v14/internal/gitlabnet/personalaccesstoken"
)
-var (
- requests []testserver.TestRequestHandler
-)
+var requests []testserver.TestRequestHandler
func setup(t *testing.T) {
requests = []testserver.TestRequestHandler{
@@ -147,7 +145,7 @@ func TestExecute(t *testing.T) {
GitlabKeyId: "broken",
SshArgs: []string{cmdname, "newtoken", "read_api,read_repository"},
},
- expectedError: "Internal API error (500)",
+ expectedError: "Internal API unreachable",
},
{
desc: "Without KeyID or User",
diff --git a/internal/command/twofactorrecover/twofactorrecover_test.go b/internal/command/twofactorrecover/twofactorrecover_test.go
index 81b4dff..7e20a06 100644
--- a/internal/command/twofactorrecover/twofactorrecover_test.go
+++ b/internal/command/twofactorrecover/twofactorrecover_test.go
@@ -18,9 +18,7 @@ import (
"gitlab.com/gitlab-org/gitlab-shell/v14/internal/gitlabnet/twofactorrecover"
)
-var (
- requests []testserver.TestRequestHandler
-)
+var requests []testserver.TestRequestHandler
func setup(t *testing.T) {
requests = []testserver.TestRequestHandler{
@@ -99,7 +97,7 @@ func TestExecute(t *testing.T) {
desc: "With API fails",
arguments: &commandargs.Shell{GitlabKeyId: "broken"},
answer: "yes\n",
- expectedOutput: question + errorHeader + "Internal API error (500)\n",
+ expectedOutput: question + errorHeader + "Internal API unreachable\n",
},
{
desc: "With missing arguments",
diff --git a/internal/command/twofactorverify/twofactorverify_test.go b/internal/command/twofactorverify/twofactorverify_test.go
index 640d3ed..213c025 100644
--- a/internal/command/twofactorverify/twofactorverify_test.go
+++ b/internal/command/twofactorverify/twofactorverify_test.go
@@ -136,7 +136,7 @@ func TestExecute(t *testing.T) {
{
desc: "With API fails",
arguments: &commandargs.Shell{GitlabKeyId: "broken"},
- expectedOutput: errorHeader + "Internal API error (500)\n",
+ expectedOutput: errorHeader + "Internal API unreachable\n",
},
{
desc: "With missing arguments",
diff --git a/internal/config/config.go b/internal/config/config.go
index 35d8e74..7f473c5 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -134,8 +134,8 @@ func (c *Config) HttpClient() (*client.HttpClient, error) {
return
}
- tr := client.Transport
- client.Transport = metrics.NewRoundTripper(tr)
+ tr := client.HTTPClient.Transport
+ client.HTTPClient.Transport = metrics.NewRoundTripper(tr)
c.httpClient = client
})
diff --git a/internal/gitlabnet/lfsauthenticate/client_test.go b/internal/gitlabnet/lfsauthenticate/client_test.go
index f62d8e1..01b3f84 100644
--- a/internal/gitlabnet/lfsauthenticate/client_test.go
+++ b/internal/gitlabnet/lfsauthenticate/client_test.go
@@ -74,7 +74,7 @@ func TestFailedRequests(t *testing.T) {
{
desc: "With API fails",
args: &commandargs.Shell{GitlabKeyId: "broken", CommandType: commandargs.LfsAuthenticate, SshArgs: []string{"git-lfs-authenticate", repo, "download"}},
- expectedOutput: "Internal API error (500)",
+ expectedOutput: "Internal API unreachable",
},
}
diff --git a/spec/gitlab_shell_discover_spec.rb b/spec/gitlab_shell_discover_spec.rb
index 225d6b9..07a9be1 100644
--- a/spec/gitlab_shell_discover_spec.rb
+++ b/spec/gitlab_shell_discover_spec.rb
@@ -124,7 +124,7 @@ describe 'bin/gitlab-shell' do
it 'returns an error message when the API call fails without a message' do
_, stderr, status = run!(["-c/usr/share/webapps/gitlab-shell/bin/gitlab-shell", "username-broken"])
- expect(stderr).to match(/Failed to get username: Internal API error \(500\)/)
+ expect(stderr).to match(/Failed to get username: Internal API unreachable/)
expect(status).not_to be_success
end
end