summaryrefslogtreecommitdiff
path: root/workhorse
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2022-04-20 10:00:54 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2022-04-20 10:00:54 +0000
commit3cccd102ba543e02725d247893729e5c73b38295 (patch)
treef36a04ec38517f5deaaacb5acc7d949688d1e187 /workhorse
parent205943281328046ef7b4528031b90fbda70c75ac (diff)
downloadgitlab-ce-3cccd102ba543e02725d247893729e5c73b38295.tar.gz
Add latest changes from gitlab-org/gitlab@14-10-stable-eev14.10.0-rc42
Diffstat (limited to 'workhorse')
-rw-r--r--workhorse/README.md43
-rw-r--r--workhorse/doc/architecture/channel.md204
-rw-r--r--workhorse/doc/architecture/gitlab_features.md77
-rw-r--r--workhorse/doc/channel.md15
-rw-r--r--workhorse/doc/development/new_features.md49
-rw-r--r--workhorse/doc/development/tests.md25
-rw-r--r--workhorse/doc/operations/configuration.md221
-rw-r--r--workhorse/doc/operations/install.md52
-rw-r--r--workhorse/go.mod2
-rw-r--r--workhorse/go.sum6
-rw-r--r--workhorse/internal/api/api.go23
-rw-r--r--workhorse/internal/api/api_test.go47
-rw-r--r--workhorse/internal/git/info-refs.go37
-rw-r--r--workhorse/internal/headers/content_headers.go55
-rw-r--r--workhorse/internal/lsif_transformer/parser/errors.go30
-rw-r--r--workhorse/internal/lsif_transformer/parser/errors_test.go26
-rw-r--r--workhorse/internal/lsif_transformer/parser/hovers.go9
-rw-r--r--workhorse/internal/lsif_transformer/parser/ranges.go9
-rw-r--r--workhorse/internal/lsif_transformer/parser/references.go9
-rw-r--r--workhorse/internal/proxy/proxy.go26
-rw-r--r--workhorse/internal/senddata/contentprocessor/contentprocessor_test.go52
-rw-r--r--workhorse/internal/upload/artifacts_upload_test.go2
-rw-r--r--workhorse/internal/upload/destination/destination.go21
-rw-r--r--workhorse/internal/upload/destination/objectstore/s3_session.go19
-rw-r--r--workhorse/internal/upload/destination/objectstore/s3_session_test.go25
-rw-r--r--workhorse/internal/upload/destination/upload_opts.go4
-rw-r--r--workhorse/internal/upload/destination/upload_opts_test.go4
-rw-r--r--workhorse/internal/upstream/upstream.go31
-rw-r--r--workhorse/internal/upstream/upstream_test.go59
-rw-r--r--workhorse/proxy_test.go38
-rw-r--r--workhorse/tools.go3
31 files changed, 472 insertions, 751 deletions
diff --git a/workhorse/README.md b/workhorse/README.md
index c7617645b34..c57f90b4a49 100644
--- a/workhorse/README.md
+++ b/workhorse/README.md
@@ -1,35 +1,14 @@
-# GitLab Workhorse
+---
+stage: Create
+group: Source Code
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+---
-GitLab Workhorse is a smart reverse proxy for GitLab. It handles
-"large" HTTP requests such as file downloads, file uploads, Git
-push/pull and Git archive downloads.
+# Workhorse documentation
-Workhorse itself is not a feature, but there are [several features in
-GitLab](doc/architecture/gitlab_features.md) that would not work efficiently without Workhorse.
-
-## Canonical source
-
-The canonical source for Workhorse is
-[gitlab-org/gitlab/workhorse](https://gitlab.com/gitlab-org/gitlab/tree/master/workhorse).
-Prior to https://gitlab.com/groups/gitlab-org/-/epics/4826, it was
-[gitlab-org/gitlab-workhorse](https://gitlab.com/gitlab-org/gitlab-workhorse/tree/master),
-but that repository is no longer used for development.
-
-## Documentation
-
-Workhorse documentation is available in the [`doc` folder of this repository](doc/).
-
-* Architectural overview
- * [GitLab features that rely on Workhorse](doc/architecture/gitlab_features.md)
- * [Websocket channel support](doc/architecture/channel.md)
-* Operating Workhorse
- * [Source installation](doc/operations/install.md)
- * [Workhorse configuration](doc/operations/configuration.md)
-* [Contributing](CONTRIBUTING.md)
- * [Adding new features](doc/development/new_features.md)
- * [Testing your code](doc/development/tests.md)
-
-## License
-
-This code is distributed under the MIT license, see the [LICENSE](LICENSE) file.
+This document was moved to [another location](../doc/development/workhorse/index.md).
+<!-- This redirect file can be deleted after <2022-07-01>. -->
+<!-- Redirects that point to other docs in the same project expire in three months. -->
+<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
+<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
diff --git a/workhorse/doc/architecture/channel.md b/workhorse/doc/architecture/channel.md
index f7a72d0fd45..04bb0e7652f 100644
--- a/workhorse/doc/architecture/channel.md
+++ b/workhorse/doc/architecture/channel.md
@@ -1,194 +1,16 @@
-# Websocket channel support
-
-In some cases, GitLab can provide in-browser terminal access to an
-environment (which is a running server or container, onto which a
-project has been deployed), or even access to services running in CI
-through a WebSocket. Workhorse manages the WebSocket upgrade and
-long-lived connection to the websocket connection, which frees
-up GitLab to process other requests.
-
-This document outlines the architecture of these connections.
-
-## Introduction to WebSockets
-
-A websocket is an "upgraded" HTTP/1.1 request. Their purpose is to
-permit bidirectional communication between a client and a server.
-**Websockets are not HTTP**. Clients can send messages (known as
-frames) to the server at any time, and vice-versa. Client messages
-are not necessarily requests, and server messages are not necessarily
-responses. WebSocket URLs have schemes like `ws://` (unencrypted) or
-`wss://` (TLS-secured).
-
-When requesting an upgrade to WebSocket, the browser sends a HTTP/1.1
-request that looks like this:
-
-```
-GET /path.ws HTTP/1.1
-Connection: upgrade
-Upgrade: websocket
-Sec-WebSocket-Protocol: terminal.gitlab.com
-# More headers, including security measures
-```
-
-At this point, the connection is still HTTP, so this is a request and
-the server can send a normal HTTP response, including `404 Not Found`,
-`500 Internal Server Error`, etc.
-
-If the server decides to permit the upgrade, it will send a HTTP
-`101 Switching Protocols` response. From this point, the connection
-is no longer HTTP. It is a WebSocket and frames, not HTTP requests,
-will flow over it. The connection will persist until the client or
-server closes the connection.
-
-In addition to the subprotocol, individual websocket frames may
-also specify a message type - examples include `BinaryMessage`,
-`TextMessage`, `Ping`, `Pong` or `Close`. Only binary frames can
-contain arbitrary data - other frames are expected to be valid
-UTF-8 strings, in addition to any subprotocol expectations.
-
-## Browser to Workhorse
-
-Using the terminal as an example, GitLab serves a JavaScript terminal
-emulator to the browser on a URL like
-`https://gitlab.com/group/project/-/environments/1/terminal`.
-This opens a websocket connection to, e.g.,
-`wss://gitlab.com/group/project/-/environments/1/terminal.ws`,
-This endpoint doesn't exist in GitLab - only in Workhorse.
-
-When receiving the connection, Workhorse first checks that the
-client is authorized to access the requested terminal. It does
-this by performing a "preauthentication" request to GitLab.
-
-If the client has the appropriate permissions and the terminal
-exists, GitLab responds with a successful response that includes
-details of the terminal that the client should be connected to.
-Otherwise, it returns an appropriate HTTP error response.
-
-Errors are passed back to the client as HTTP responses, but if
-GitLab returns valid terminal details to Workhorse, it will
-connect to the specified terminal, upgrade the browser to a
-WebSocket, and proxy between the two connections for as long
-as the browser's credentials are valid. Workhorse will also
-send regular `PingMessage` control frames to the browser, to
-keep intervening proxies from terminating the connection
-while the browser is present.
-
-The browser must request an upgrade with a specific subprotocol:
-
-### `terminal.gitlab.com`
-
-This subprotocol considers `TextMessage` frames to be invalid.
-Control frames, such as `PingMessage` or `CloseMessage`, have
-their usual meanings.
-
-`BinaryMessage` frames sent from the browser to the server are
-arbitrary text input.
-
-`BinaryMessage` frames sent from the server to the browser are
-arbitrary text output.
-
-These frames are expected to contain ANSI text control codes
-and may be in any encoding.
-
-### `base64.terminal.gitlab.com`
+---
+stage: Create
+group: Source Code
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+redirect_to: '../../../doc/development/backend/create_source_code_be/workhorse/channel.md'
+remove_date: '2022-07-01'
+---
-This subprotocol considers `BinaryMessage` frames to be invalid.
-Control frames, such as `PingMessage` or `CloseMessage`, have
-their usual meanings.
-
-`TextMessage` frames sent from the browser to the server are
-base64-encoded arbitrary text input (so the server must
-base64-decode them before inputting them).
-
-`TextMessage` frames sent from the server to the browser are
-base64-encoded arbitrary text output (so the browser must
-base64-decode them before outputting them).
-
-In their base64-encoded form, these frames are expected to
-contain ANSI terminal control codes, and may be in any encoding.
-
-## Workhorse to GitLab
-
-Using again the terminal as an example, before upgrading the browser,
-Workhorse sends a normal HTTP request to GitLab on a URL like
-`https://gitlab.com/group/project/environments/1/terminal.ws/authorize`.
-This returns a JSON response containing details of where the
-terminal can be found, and how to connect it. In particular,
-the following details are returned in case of success:
-
-* WebSocket URL to **connect** to, e.g.: `wss://example.com/terminals/1.ws?tty=1`
-* WebSocket subprotocols to support, e.g.: `["channel.k8s.io"]`
-* Headers to send, e.g.: `Authorization: Token xxyyz..`
-* Certificate authority to verify `wss` connections with (optional)
-
-Workhorse periodically re-checks this endpoint, and if it gets an
-error response, or the details of the terminal change, it will
-terminate the websocket session.
-
-## Workhorse to the WebSocket server
-
-In GitLab, environments or CI jobs may have a deployment service (e.g.,
-`KubernetesService`) associated with them. This service knows
-where the terminals or the service for an environment may be found, and these
-details are returned to Workhorse by GitLab.
-
-These URLs are *also* WebSocket URLs, and GitLab tells Workhorse
-which subprotocols to speak over the connection, along with any
-authentication details required by the remote end.
-
-Before upgrading the browser's connection to a websocket,
-Workhorse opens a HTTP client connection, according to the
-details given to it by Workhorse, and attempts to upgrade
-that connection to a websocket. If it fails, an error
-response is sent to the browser; otherwise, the browser is
-also upgraded.
-
-Workhorse now has two websocket connections, albeit with
-differing subprotocols. It decodes incoming frames from the
-browser, re-encodes them to the channel's subprotocol, and
-sends them to the channel. Similarly, it decodes incoming
-frames from the channel, re-encodes them to the browser's
-subprotocol, and sends them to the browser.
-
-When either connection closes or enters an error state,
-Workhorse detects the error and closes the other connection,
-terminating the channel session. If the browser is the
-connection that has disconnected, Workhorse will send an ANSI
-`End of Transmission` control code (the `0x04` byte) to the
-channel, encoded according to the appropriate subprotocol.
-Workhorse will automatically reply to any websocket ping frame
-sent by the channel, to avoid being disconnected.
-
-Currently, Workhorse only supports the following subprotocols.
-Supporting new deployment services will require new subprotocols
-to be supported:
-
-### `channel.k8s.io`
-
-Used by Kubernetes, this subprotocol defines a simple multiplexed
-channel.
-
-Control frames have their usual meanings. `TextMessage` frames are
-invalid. `BinaryMessage` frames represent I/O to a specific file
-descriptor.
-
-The first byte of each `BinaryMessage` frame represents the file
-descriptor (fd) number, as a `uint8` (so the value `0x00` corresponds
-to fd 0, `STDIN`, while `0x01` corresponds to fd 1, `STDOUT`).
-
-The remaining bytes represent arbitrary data. For frames received
-from the server, they are bytes that have been received from that
-fd. For frames sent to the server, they are bytes that should be
-written to that fd.
-
-### `base64.channel.k8s.io`
-
-Also used by Kubernetes, this subprotocol defines a similar multiplexed
-channel to `channel.k8s.io`. The main differences are:
+# Websocket channel support
-* `TextMessage` frames are valid, rather than `BinaryMessage` frames.
-* The first byte of each `TextMessage` frame represents the file
- descriptor as a numeric UTF-8 character, so the character `U+0030`,
- or "0", is fd 0, STDIN).
-* The remaining bytes represent base64-encoded arbitrary data.
+This document was moved to [another location](../../../doc/development/workhorse/channel.md).
+<!-- This redirect file can be deleted after <2022-07-01>. -->
+<!-- Redirects that point to other docs in the same project expire in three months. -->
+<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
+<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
diff --git a/workhorse/doc/architecture/gitlab_features.md b/workhorse/doc/architecture/gitlab_features.md
index 2d3ca78f5e8..42dd919690d 100644
--- a/workhorse/doc/architecture/gitlab_features.md
+++ b/workhorse/doc/architecture/gitlab_features.md
@@ -1,69 +1,14 @@
-# Features that rely on Workhorse
-
-Workhorse itself is not a feature, but there are several features in
-GitLab that would not work efficiently without Workhorse.
-
-To put the efficiency benefit in context, consider that in 2020Q3 on
-GitLab.com [we see][thanos] Rails application threads using on average
-about 200MB of RSS vs about 200KB for Workhorse goroutines.
-
-Examples of features that rely on Workhorse:
-
-## 1. `git clone` and `git push` over HTTP
-
-Git clone, pull and push are slow because they transfer large amounts
-of data and because each is CPU intensive on the GitLab side. Without
-Workhorse, HTTP access to Git repositories would compete with regular
-web access to the application, requiring us to run way more Rails
-application servers.
-
-## 2. CI runner long polling
+---
+stage: Create
+group: Source Code
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+---
-GitLab CI runners fetch new CI jobs by polling the GitLab server.
-Workhorse acts as a kind of "waiting room" where CI runners can sit
-and wait for new CI jobs. Because of Go's efficiency we can fit a lot
-of runners in the waiting room at little cost. Without this waiting
-room mechanism we would have to add a lot more Rails server capacity.
-
-## 3. File uploads and downloads
-
-File uploads and downloads may be slow either because the file is
-large or because the user's connection is slow. Workhorse can handle
-the slow part for Rails. This improves the efficiency of features such
-as CI artifacts, package repositories, LFS objects, etc.
-
-## 4. Websocket proxying
-
-Features such as the web terminal require a long lived connection
-between the user's web browser and a container inside GitLab that is
-not directly accessible from the internet. Dedicating a Rails
-application thread to proxying such a connection would cost much more
-memory than it costs to have Workhorse look after it.
-
-## Quick facts (how does Workhorse work)
-
-- Workhorse can handle some requests without involving Rails at all:
- for example, JavaScript files and CSS files are served straight
- from disk.
-- Workhorse can modify responses sent by Rails: for example if you use
- `send_file` in Rails then GitLab Workhorse will open the file on
- disk and send its contents as the response body to the client.
-- Workhorse can take over requests after asking permission from Rails.
- Example: handling `git clone`.
-- Workhorse can modify requests before passing them to Rails. Example:
- when handling a Git LFS upload Workhorse first asks permission from
- Rails, then it stores the request body in a tempfile, then it sends
- a modified request containing the tempfile path to Rails.
-- Workhorse can manage long-lived WebSocket connections for Rails.
- Example: handling the terminal websocket for environments.
-- Workhorse does not connect to PostgreSQL, only to Rails and (optionally) Redis.
-- We assume that all requests that reach Workhorse pass through an
- upstream proxy such as NGINX or Apache first.
-- Workhorse does not accept HTTPS connections.
-- Workhorse does not clean up idle client connections.
-- We assume that all requests to Rails pass through Workhorse.
+# Features that rely on Workhorse
-For more information see ['A brief history of GitLab Workhorse'][brief-history-blog].
+This document was moved to [another location](../../../doc/development/workhorse/gitlab_features.md).
-[thanos]: https://thanos-query.ops.gitlab.net/graph?g0.range_input=1h&g0.max_source_resolution=0s&g0.expr=sum(ruby_process_resident_memory_bytes%7Bapp%3D%22webservice%22%2Cenv%3D%22gprd%22%2Crelease%3D%22gitlab%22%7D)%20%2F%20sum(puma_max_threads%7Bapp%3D%22webservice%22%2Cenv%3D%22gprd%22%2Crelease%3D%22gitlab%22%7D)&g0.tab=1&g1.range_input=1h&g1.max_source_resolution=0s&g1.expr=sum(go_memstats_sys_bytes%7Bapp%3D%22webservice%22%2Cenv%3D%22gprd%22%2Crelease%3D%22gitlab%22%7D)%2Fsum(go_goroutines%7Bapp%3D%22webservice%22%2Cenv%3D%22gprd%22%2Crelease%3D%22gitlab%22%7D)&g1.tab=1
-[brief-history-blog]: https://about.gitlab.com/2016/04/12/a-brief-history-of-gitlab-workhorse/
+<!-- This redirect file can be deleted after <2022-07-01>. -->
+<!-- Redirects that point to other docs in the same project expire in three months. -->
+<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
+<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
diff --git a/workhorse/doc/channel.md b/workhorse/doc/channel.md
index 68553170775..86078321dff 100644
--- a/workhorse/doc/channel.md
+++ b/workhorse/doc/channel.md
@@ -1 +1,14 @@
-This file was moved to [`architecture/channel.md`](doc/architecture/channel.md).
+---
+stage: Create
+group: Source Code
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+---
+
+# Websocket channel support
+
+This document was moved to [another location](../../doc/development/workhorse/channel.md).
+
+<!-- This redirect file can be deleted after <2022-07-01>. -->
+<!-- Redirects that point to other docs in the same project expire in three months. -->
+<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
+<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
diff --git a/workhorse/doc/development/new_features.md b/workhorse/doc/development/new_features.md
index e8bbd345a40..60f0ad75539 100644
--- a/workhorse/doc/development/new_features.md
+++ b/workhorse/doc/development/new_features.md
@@ -1,41 +1,14 @@
-## Adding new features to Workhorse
+---
+stage: Create
+group: Source Code
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+---
-GitLab Workhorse is a smart reverse proxy for GitLab. It handles
-"long" HTTP requests such as file downloads, file uploads, Git
-push/pull and Git archive downloads.
+# Adding new features to Workhorse
-Workhorse itself is not a feature, but there are [several features in GitLab](https://gitlab.com/gitlab-org/gitlab/-/blob/master/workhorse/doc/architecture/gitlab_features.md) that would not work efficiently without Workhorse.
-
-At a first glance, it may look like Workhorse is just a pipeline for processing HTTP streams so that you can reduce the amount of logic in your Ruby on Rails controller, but there are good reasons to avoid treating it like that.
-
-Engineers embarking on the quest of offloading a feature to Workhorse often find that the endeavor is much higher than what originally anticipated. In part because of the new programming language (only a few engineers at GitLab are Go developers), in part because of the demanding requirements for Workhorse. Workhorse is stateless, memory and disk usage must be kept under tight control, and the request should not be slowed down in the process.
-
-## Can I add a new feature to Workhorse?
-
-We suggest to follow this route only if absolutely necessary and no other options are available.
-
-Splitting a feature between the Rails code-base and Workhorse is deliberately choosing to introduce technical debt. It adds complexity to the system and coupling between the two components.
-
-* Building features using Workhorse has a considerable complexity cost, so you should prefer designs based on Rails requests and Sidekiq jobs.
-* Even when using Rails+Sidekiq is "more work" than using Rails+Workhorse, Rails+Sidekiq is easier to maintain in the long term because Workhorse is unique to GitLab while Rails+Sidekiq is an industry standard.
-* For "global" behaviors around web requests consider using a Rack middleware instead of Workhorse.
-* Generally speaking, we should only use Rails+Workhorse if the HTTP client expects behavior that is not reasonable to implement in Rails, like "long" requests.
-
-## What is a "long" request?
-
-There is one order of magnitude between Workhorse and puma RAM usage. Having connection open for a period longer than milliseconds is a problem because of the amount of RAM it monopolizes once it reaches the Ruby on Rails controller.
-
-So far we identified two classes of "long" requests: data transfers and HTTP long polling.
-
-`git push`, `git pull`, uploading or downloading an artifact, the CI runner waiting for a new job are all good examples of long requests.
-
-With the rise of cloud-native installations, Workhorse's feature-set was extended to add object storage direct-upload, to get rid of the shared Network File System (NFS) drives.
-
-In 2020 @nolith presented at FOSDEM a talk titled [_Speed up the monolith. Building a smart reverse proxy in Go_](https://archive.fosdem.org/2020/schedule/event/speedupmonolith/).
-You can watch the recording for more details on the history of Workhorse and the NFS removal.
-
-[Uploads development documentation]( https://docs.gitlab.com/ee/development/uploads.html)
-contains the most common use-cases for adding a new type of upload and may answer all of your questions.
-
-If you still think we should add a new feature to Workhorse, please open an issue explaining **what you want to implement** and **why it can't be implemented in our ruby code-base**. Workhorse maintainers will be happy to help you assessing the situation.
+This document was moved to [another location](../../../doc/development/workhorse/new_features.md).
+<!-- This redirect file can be deleted after <2022-07-01>. -->
+<!-- Redirects that point to other docs in the same project expire in three months. -->
+<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
+<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
diff --git a/workhorse/doc/development/tests.md b/workhorse/doc/development/tests.md
index 82f74e8656b..a5eb7571cc0 100644
--- a/workhorse/doc/development/tests.md
+++ b/workhorse/doc/development/tests.md
@@ -1,17 +1,14 @@
-# Testing your code
-
-Run the tests with:
+---
+stage: Create
+group: Source Code
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+---
-```
-make clean test
-```
-
-## Coverage / what to test
+# Testing your code
-Each feature in GitLab Workhorse should have an integration test that
-verifies that the feature 'kicks in' on the right requests and leaves
-other requests unaffected. It is better to also have package-level tests
-for specific behavior but the high-level integration tests should have
-the first priority during development.
+This document was moved to [another location](../../../doc/development/workhorse/index.md).
-It is OK if a feature is only covered by integration tests.
+<!-- This redirect file can be deleted after <2022-07-01>. -->
+<!-- Redirects that point to other docs in the same project expire in three months. -->
+<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
+<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
diff --git a/workhorse/doc/operations/configuration.md b/workhorse/doc/operations/configuration.md
index 8694cf1bd82..62dc8369dfb 100644
--- a/workhorse/doc/operations/configuration.md
+++ b/workhorse/doc/operations/configuration.md
@@ -1,213 +1,14 @@
-# Workhorse configuration
-
-For historical reasons Workhorse uses both command line flags, a configuration file and environment variables.
-
-All new configuration options that get added to Workhorse should go into the configuration file.
-
-## CLI options
-
-```
- gitlab-workhorse [OPTIONS]
-
-Options:
- -apiCiLongPollingDuration duration
- Long polling duration for job requesting for runners (default 50ns)
- -apiLimit uint
- Number of API requests allowed at single time
- -apiQueueDuration duration
- Maximum queueing duration of requests (default 30s)
- -apiQueueLimit uint
- Number of API requests allowed to be queued
- -authBackend string
- Authentication/authorization backend (default "http://localhost:8080")
- -authSocket string
- Optional: Unix domain socket to dial authBackend at
- -cableBackend string
- Optional: ActionCable backend (default authBackend)
- -cableSocket string
- Optional: Unix domain socket to dial cableBackend at (default authSocket)
- -config string
- TOML file to load config from
- -developmentMode
- Allow the assets to be served from Rails app
- -documentRoot string
- Path to static files content (default "public")
- -listenAddr string
- Listen address for HTTP server (default "localhost:8181")
- -listenNetwork string
- Listen 'network' (tcp, tcp4, tcp6, unix) (default "tcp")
- -listenUmask int
- Umask for Unix socket
- -logFile string
- Log file location
- -logFormat string
- Log format to use defaults to text (text, json, structured, none) (default "text")
- -pprofListenAddr string
- pprof listening address, e.g. 'localhost:6060'
- -prometheusListenAddr string
- Prometheus listening address, e.g. 'localhost:9229'
- -proxyHeadersTimeout duration
- How long to wait for response headers when proxying the request (default 5m0s)
- -secretPath string
- File with secret key to authenticate with authBackend (default "./.gitlab_workhorse_secret")
- -version
- Print version and exit
-```
-
-The 'auth backend' refers to the GitLab Rails application. The name is
-a holdover from when GitLab Workhorse only handled Git push/pull over
-HTTP.
-
-GitLab Workhorse can listen on either a TCP or a Unix domain socket. It
-can also open a second listening TCP listening socket with the Go
-[net/http/pprof profiler server](http://golang.org/pkg/net/http/pprof/).
-
-GitLab Workhorse can listen on redis events (currently only builds/register
-for runners). This requires you to pass a valid TOML config file via
-`-config` flag.
-For regular setups it only requires the following (replacing the string
-with the actual socket)
-
-## Redis
-
-GitLab Workhorse integrates with Redis to do long polling for CI build
-requests. This is configured via two things:
-
-- Redis settings in the TOML config file
-- The `-apiCiLongPollingDuration` command line flag to control polling
- behavior for CI build requests
-
-It is OK to enable Redis in the config file but to leave CI polling
-disabled; this just results in an idle Redis pubsub connection. The
-opposite is not possible: CI long polling requires a correct Redis
-configuration.
-
-Below we discuss the options for the `[redis]` section in the config
-file.
-
-```
-[redis]
-URL = "unix:///var/run/gitlab/redis.sock"
-Password = "my_awesome_password"
-Sentinel = [ "tcp://sentinel1:23456", "tcp://sentinel2:23456" ]
-SentinelMaster = "mymaster"
-```
-
-- `URL` takes a string in the format `unix://path/to/redis.sock` or
-`tcp://host:port`.
-- `Password` is only required if your redis instance is password-protected
-- `Sentinel` is used if you are using Sentinel.
- *NOTE* that if both `Sentinel` and `URL` are given, only `Sentinel` will be used
-
-Optional fields are as follows:
-```
-[redis]
-DB = 0
-MaxIdle = 1
-MaxActive = 1
-```
-
-- `DB` is the Database to connect to. Defaults to `0`
-- `MaxIdle` is how many idle connections can be in the redis-pool at once. Defaults to 1
-- `MaxActive` is how many connections the pool can keep. Defaults to 1
-
-## Relative URL support
-
-If you are mounting GitLab at a relative URL, e.g.
-`example.com/gitlab`, then you should also use this relative URL in
-the `authBackend` setting:
-
-```
-gitlab-workhorse -authBackend http://localhost:8080/gitlab
-```
-
-## Interaction of authBackend and authSocket
-
-The interaction between `authBackend` and `authSocket` can be a bit
-confusing. It comes down to: if `authSocket` is set it overrides the
-_host_ part of `authBackend` but not the relative path.
-
-In table form:
+---
+stage: Create
+group: Source Code
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+---
-|authBackend|authSocket|Workhorse connects to?|Rails relative URL|
-|---|---|---|---|
-|unset|unset|`localhost:8080`|`/`|
-|`http://localhost:3000`|unset|`localhost:3000`|`/`|
-|`http://localhost:3000/gitlab`|unset|`localhost:3000`|`/gitlab`|
-|unset|`/path/to/socket`|`/path/to/socket`|`/`|
-|`http://localhost:3000`|`/path/to/socket`|`/path/to/socket`|`/`|
-|`http://localhost:3000/gitlab`|`/path/to/socket`|`/path/to/socket`|`/gitlab`|
-
-The same applies to `cableBackend` and `cableSocket`.
-
-## Error tracking
-
-GitLab-Workhorse supports remote error tracking with
-[Sentry](https://sentry.io). To enable this feature set the
-`GITLAB_WORKHORSE_SENTRY_DSN` environment variable.
-You can also set the `GITLAB_WORKHORSE_SENTRY_ENVIRONMENT` environment variable to
-use the Sentry environment functionality to separate staging, production and
-development.
-
-Omnibus (`/etc/gitlab/gitlab.rb`):
-
-```
-gitlab_workhorse['env'] = {
- 'GITLAB_WORKHORSE_SENTRY_DSN' => 'https://foobar'
- 'GITLAB_WORKHORSE_SENTRY_ENVIRONMENT' => 'production'
-}
-```
-
-Source installations (`/etc/default/gitlab`):
-
-```
-export GITLAB_WORKHORSE_SENTRY_DSN='https://foobar'
-export GITLAB_WORKHORSE_SENTRY_ENVIRONMENT='production'
-```
-
-## Distributed Tracing
-
-Workhorse supports distributed tracing through [LabKit][] using [OpenTracing APIs](https://opentracing.io).
-
-By default, no tracing implementation is linked into the binary, but different OpenTracing providers can be linked in using [build tags][build-tags]/[build constraints][build-tags]. This can be done by setting the `BUILD_TAGS` make variable.
-
-For more details of the supported providers, see LabKit, but as an example, for Jaeger tracing support, include the tags: `BUILD_TAGS="tracer_static tracer_static_jaeger"`.
-
-```shell
-make BUILD_TAGS="tracer_static tracer_static_jaeger"
-```
-
-Once Workhorse is compiled with an opentracing provider, the tracing configuration is configured via the `GITLAB_TRACING` environment variable.
-
-For example:
-
-```shell
-GITLAB_TRACING=opentracing://jaeger ./gitlab-workhorse
-```
-
-## Continuous Profiling
-
-Workhorse supports continuous profiling through [LabKit][] using [Stackdriver Profiler](https://cloud.google.com/profiler).
-
-By default, the Stackdriver Profiler implementation is linked in the binary using [build tags][build-tags], though it's not
-required and can be skipped.
-
-For example:
-
-```shell
-make BUILD_TAGS=""
-```
-
-Once Workhorse is compiled with Continuous Profiling, the profiler configuration can be set via `GITLAB_CONTINUOUS_PROFILING`
-environment variable.
-
-For example:
-
-```shell
-GITLAB_CONTINUOUS_PROFILING="stackdriver?service=workhorse&service_version=1.0.1&project_id=test-123 ./gitlab-workhorse"
-```
+# Workhorse configuration
-More information about see the [LabKit monitoring docs](https://gitlab.com/gitlab-org/labkit/-/blob/master/monitoring/doc.go).
+This document was moved to [another location](../../../doc/development/workhorse/configuration.md).
-[LabKit]: https://gitlab.com/gitlab-org/labkit/
-[build-tags]: https://golang.org/pkg/go/build/#hdr-Build_Constraints
+<!-- This redirect file can be deleted after <2022-07-01>. -->
+<!-- Redirects that point to other docs in the same project expire in three months. -->
+<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
+<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
diff --git a/workhorse/doc/operations/install.md b/workhorse/doc/operations/install.md
index 3bee13e2683..9f0a8212783 100644
--- a/workhorse/doc/operations/install.md
+++ b/workhorse/doc/operations/install.md
@@ -1,44 +1,14 @@
-# Installation
-
-To install GitLab Workhorse you need [Go 1.15 or
-newer](https://golang.org/dl) and [GNU
-Make](https://www.gnu.org/software/make/).
-
-To install into `/usr/local/bin` run `make install`.
-
-```
-make install
-```
-
-To install into `/foo/bin` set the PREFIX variable.
-
-```
-make install PREFIX=/foo
-```
+---
+stage: Create
+group: Source Code
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+---
-On some operating systems, such as FreeBSD, you may have to use
-`gmake` instead of `make`.
-
-*NOTE*: Some features depends on build tags, make sure to check
-[Workhorse configuration](doc/operations/configuration.md) to enable them.
-
-## Run time dependencies
-
-### Exiftool
-
-Workhorse uses [exiftool](https://www.sno.phy.queensu.ca/~phil/exiftool/) for
-removing EXIF data (which may contain sensitive information) from uploaded
-images. If you installed GitLab:
-
-- Using the Omnibus package, you're all set.
- *NOTE* that if you are using CentOS Minimal, you may need to install `perl`
- package: `yum install perl`
-- From source, make sure `exiftool` is installed:
+# Installation
- ```sh
- # Debian/Ubuntu
- sudo apt-get install libimage-exiftool-perl
+This document was moved to [another location](../../../doc/development/workhorse/index.md).
- # RHEL/CentOS
- sudo yum install perl-Image-ExifTool
- ```
+<!-- This redirect file can be deleted after <2022-07-01>. -->
+<!-- Redirects that point to other docs in the same project expire in three months. -->
+<!-- Redirects that point to docs in a different project or site (for example, link is not relative and starts with `https:`) expire in one year. -->
+<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/redirects.html -->
diff --git a/workhorse/go.mod b/workhorse/go.mod
index 3a264d41dac..83bdcd0b5bb 100644
--- a/workhorse/go.mod
+++ b/workhorse/go.mod
@@ -28,7 +28,7 @@ require (
github.com/sirupsen/logrus v1.8.1
github.com/smartystreets/goconvey v1.6.4
github.com/stretchr/testify v1.7.0
- gitlab.com/gitlab-org/gitaly/v14 v14.9.0-rc1
+ gitlab.com/gitlab-org/gitaly/v14 v14.9.0-rc5.0.20220329111719-51da8bc17059
gitlab.com/gitlab-org/golang-archive-zip v0.1.1
gitlab.com/gitlab-org/labkit v1.6.0
gocloud.dev v0.23.0
diff --git a/workhorse/go.sum b/workhorse/go.sum
index a565e93bfcc..1cb7418d3c1 100644
--- a/workhorse/go.sum
+++ b/workhorse/go.sum
@@ -606,7 +606,7 @@ github.com/lib/pq v1.10.1/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lib/pq v1.10.2/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/libgit2/git2go v0.0.0-20190104134018-ecaeb7a21d47/go.mod h1:4bKN42efkbNYMZlvDfxGDxzl066GhpvIircZDsm8Y+Y=
github.com/libgit2/git2go/v31 v31.4.12/go.mod h1:c/rkJcBcUFx6wHaT++UwNpKvIsmPNqCeQ/vzO4DrEec=
-github.com/libgit2/git2go/v33 v33.0.6/go.mod h1:KdpqkU+6+++4oHna/MIOgx4GCQ92IPCdpVRMRI80J+4=
+github.com/libgit2/git2go/v33 v33.0.9/go.mod h1:KdpqkU+6+++4oHna/MIOgx4GCQ92IPCdpVRMRI80J+4=
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20200305213919-a88bf8de3718/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20210210170715-a8dfcb80d3a7 h1:YjW+hUb8Fh2S58z4av4t/0cBMK/Q0aP48RocCFsC8yI=
@@ -886,8 +886,8 @@ github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wK
gitlab.com/gitlab-org/gitaly v1.68.0 h1:VlcJs1+PrhW7lqJUU7Fh1q8FMJujmbbivdfde/cwB98=
gitlab.com/gitlab-org/gitaly v1.68.0/go.mod h1:/pCsB918Zu5wFchZ9hLYin9WkJ2yQqdVNz0zlv5HbXg=
gitlab.com/gitlab-org/gitaly/v14 v14.0.0-rc1/go.mod h1:4Cz8tOAyueSZX5o6gYum1F/unupaOclxqETPcg4ODvQ=
-gitlab.com/gitlab-org/gitaly/v14 v14.9.0-rc1 h1:9vStRdXxcBQ8dHlVnpV28fwLOgyDkSFIpGnPqwzdTvw=
-gitlab.com/gitlab-org/gitaly/v14 v14.9.0-rc1/go.mod h1:Xk5pn6IWsejg3z2X6BRczC5QaI97PRF3GU5OrJ5Amkg=
+gitlab.com/gitlab-org/gitaly/v14 v14.9.0-rc5.0.20220329111719-51da8bc17059 h1:X7+3GQIxUpScXpIMCU5+sfpYvZyBIQ3GMlEosP7Jssw=
+gitlab.com/gitlab-org/gitaly/v14 v14.9.0-rc5.0.20220329111719-51da8bc17059/go.mod h1:uX1qhFKBDuPqATlpMcFL2dKDiX8D/tbUg7CYWx7OXt4=
gitlab.com/gitlab-org/gitlab-shell v1.9.8-0.20201117050822-3f9890ef73dc/go.mod h1:5QSTbpAHY2v0iIH5uHh2KA9w7sPUqPmnLjDApI/sv1U=
gitlab.com/gitlab-org/gitlab-shell v1.9.8-0.20210720163109-50da611814d2/go.mod h1:QWDYBwuy24qGMandtCngLRPzFgnGPg6LSNoJWPKmJMc=
gitlab.com/gitlab-org/golang-archive-zip v0.1.1 h1:35k9giivbxwF03+8A05Cm8YoxoakU8FBCj5gysjCTCE=
diff --git a/workhorse/internal/api/api.go b/workhorse/internal/api/api.go
index 7f696f70c7a..896f59a322a 100644
--- a/workhorse/internal/api/api.go
+++ b/workhorse/internal/api/api.go
@@ -64,7 +64,13 @@ func NewAPI(myURL *url.URL, version string, roundTripper http.RoundTripper) *API
}
type GeoProxyEndpointResponse struct {
- GeoProxyURL string `json:"geo_proxy_url"`
+ GeoProxyURL string `json:"geo_proxy_url"`
+ GeoProxyExtraData string `json:"geo_proxy_extra_data"`
+}
+
+type GeoProxyData struct {
+ GeoProxyURL *url.URL
+ GeoProxyExtraData string
}
type HandleFunc func(http.ResponseWriter, *http.Request, *Response)
@@ -394,7 +400,7 @@ func validResponseContentType(resp *http.Response) bool {
return helper.IsContentType(ResponseContentType, resp.Header.Get("Content-Type"))
}
-func (api *API) GetGeoProxyURL() (*url.URL, error) {
+func (api *API) GetGeoProxyData() (*GeoProxyData, error) {
geoProxyApiUrl := *api.URL
geoProxyApiUrl.Path, geoProxyApiUrl.RawPath = joinURLPath(api.URL, geoProxyEndpointPath)
geoProxyApiReq := &http.Request{
@@ -405,23 +411,26 @@ func (api *API) GetGeoProxyURL() (*url.URL, error) {
httpResponse, err := api.doRequestWithoutRedirects(geoProxyApiReq)
if err != nil {
- return nil, fmt.Errorf("GetGeoProxyURL: do request: %v", err)
+ return nil, fmt.Errorf("GetGeoProxyData: do request: %v", err)
}
defer httpResponse.Body.Close()
if httpResponse.StatusCode != http.StatusOK {
- return nil, fmt.Errorf("GetGeoProxyURL: Received HTTP status code: %v", httpResponse.StatusCode)
+ return nil, fmt.Errorf("GetGeoProxyData: Received HTTP status code: %v", httpResponse.StatusCode)
}
response := &GeoProxyEndpointResponse{}
if err := json.NewDecoder(httpResponse.Body).Decode(response); err != nil {
- return nil, fmt.Errorf("GetGeoProxyURL: decode response: %v", err)
+ return nil, fmt.Errorf("GetGeoProxyData: decode response: %v", err)
}
geoProxyURL, err := url.Parse(response.GeoProxyURL)
if err != nil {
- return nil, fmt.Errorf("GetGeoProxyURL: Could not parse Geo proxy URL: %v, err: %v", response.GeoProxyURL, err)
+ return nil, fmt.Errorf("GetGeoProxyData: Could not parse Geo proxy URL: %v, err: %v", response.GeoProxyURL, err)
}
- return geoProxyURL, nil
+ return &GeoProxyData{
+ GeoProxyURL: geoProxyURL,
+ GeoProxyExtraData: response.GeoProxyExtraData,
+ }, nil
}
diff --git a/workhorse/internal/api/api_test.go b/workhorse/internal/api/api_test.go
index b82bb55fb85..346f32b4a36 100644
--- a/workhorse/internal/api/api_test.go
+++ b/workhorse/internal/api/api_test.go
@@ -4,7 +4,6 @@ import (
"fmt"
"net/http"
"net/http/httptest"
- "net/url"
"regexp"
"testing"
@@ -18,21 +17,37 @@ import (
"gitlab.com/gitlab-org/gitlab/workhorse/internal/upstream/roundtripper"
)
-func TestGetGeoProxyURLWhenGeoSecondary(t *testing.T) {
- geoProxyURL, err := getGeoProxyURLGivenResponse(t, `{"geo_proxy_url":"http://primary"}`)
-
- require.NoError(t, err)
- require.Equal(t, "http://primary", geoProxyURL.String())
-}
-
-func TestGetGeoProxyURLWhenGeoPrimaryOrNonGeo(t *testing.T) {
- geoProxyURL, err := getGeoProxyURLGivenResponse(t, "{}")
-
- require.NoError(t, err)
- require.Equal(t, "", geoProxyURL.String())
+func TestGetGeoProxyDataForResponses(t *testing.T) {
+ testCases := []struct {
+ desc string
+ json string
+ expectedError bool
+ expectedURL string
+ expectedExtraData string
+ }{
+ {"when Geo secondary", `{"geo_proxy_url":"http://primary","geo_proxy_extra_data":"geo-data"}`, false, "http://primary", "geo-data"},
+ {"when Geo secondary with explicit null data", `{"geo_proxy_url":"http://primary","geo_proxy_extra_data":null}`, false, "http://primary", ""},
+ {"when Geo secondary without extra data", `{"geo_proxy_url":"http://primary"}`, false, "http://primary", ""},
+ {"when Geo primary or no node", `{}`, false, "", ""},
+ {"for malformed request", `non-json`, true, "", ""},
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ geoProxyData, err := getGeoProxyDataGivenResponse(t, tc.json)
+
+ if tc.expectedError {
+ require.Error(t, err)
+ } else {
+ require.NoError(t, err)
+ require.Equal(t, tc.expectedURL, geoProxyData.GeoProxyURL.String())
+ require.Equal(t, tc.expectedExtraData, geoProxyData.GeoProxyExtraData)
+ }
+ })
+ }
}
-func getGeoProxyURLGivenResponse(t *testing.T, givenInternalApiResponse string) (*url.URL, error) {
+func getGeoProxyDataGivenResponse(t *testing.T, givenInternalApiResponse string) (*GeoProxyData, error) {
t.Helper()
ts := testRailsServer(regexp.MustCompile(`/api/v4/geo/proxy`), 200, givenInternalApiResponse)
defer ts.Close()
@@ -43,9 +58,9 @@ func getGeoProxyURLGivenResponse(t *testing.T, givenInternalApiResponse string)
apiClient := NewAPI(backend, version, rt)
- geoProxyURL, err := apiClient.GetGeoProxyURL()
+ geoProxyData, err := apiClient.GetGeoProxyData()
- return geoProxyURL, err
+ return geoProxyData, err
}
func testRailsServer(url *regexp.Regexp, code int, body string) *httptest.Server {
diff --git a/workhorse/internal/git/info-refs.go b/workhorse/internal/git/info-refs.go
index 8390143b99b..b7f825839f8 100644
--- a/workhorse/internal/git/info-refs.go
+++ b/workhorse/internal/git/info-refs.go
@@ -6,6 +6,7 @@ import (
"fmt"
"io"
"net/http"
+ "sync"
"github.com/golang/gddo/httputil"
grpccodes "google.golang.org/grpc/codes"
@@ -64,21 +65,43 @@ func handleGetInfoRefsWithGitaly(ctx context.Context, responseWriter *HttpRespon
return err
}
- var w io.Writer
-
+ var w io.WriteCloser = nopCloser{responseWriter}
if encoding == "gzip" {
- gzWriter := gzip.NewWriter(responseWriter)
- w = gzWriter
- defer gzWriter.Close()
+ gzWriter := getGzWriter(responseWriter)
+ defer putGzWriter(gzWriter)
+ w = gzWriter
responseWriter.Header().Set("Content-Encoding", "gzip")
- } else {
- w = responseWriter
}
if _, err = io.Copy(w, infoRefsResponseReader); err != nil {
return err
}
+ if err := w.Close(); err != nil {
+ return err
+ }
+
return nil
}
+
+var gzipPool = &sync.Pool{New: func() interface{} {
+ // Invariant: the inner writer is io.Discard. We do not want to retain
+ // response writers of past requests in the pool.
+ return gzip.NewWriter(io.Discard)
+}}
+
+func getGzWriter(w io.Writer) *gzip.Writer {
+ gzWriter := gzipPool.Get().(*gzip.Writer)
+ gzWriter.Reset(w)
+ return gzWriter
+}
+
+func putGzWriter(w *gzip.Writer) {
+ w.Reset(io.Discard) // Maintain pool invariant
+ gzipPool.Put(w)
+}
+
+type nopCloser struct{ io.Writer }
+
+func (nc nopCloser) Close() error { return nil }
diff --git a/workhorse/internal/headers/content_headers.go b/workhorse/internal/headers/content_headers.go
index 9c33ddb8c8a..8cca3d97e82 100644
--- a/workhorse/internal/headers/content_headers.go
+++ b/workhorse/internal/headers/content_headers.go
@@ -8,28 +8,37 @@ import (
)
var (
- ImageTypeRegex = regexp.MustCompile(`^image/*`)
- SvgMimeTypeRegex = regexp.MustCompile(`^image/svg\+xml$`)
+ javaScriptTypeRegex = regexp.MustCompile(`^(text|application)\/javascript$`)
- TextTypeRegex = regexp.MustCompile(`^text/*`)
+ imageTypeRegex = regexp.MustCompile(`^image/*`)
+ svgMimeTypeRegex = regexp.MustCompile(`^image/svg\+xml$`)
- VideoTypeRegex = regexp.MustCompile(`^video/*`)
+ textTypeRegex = regexp.MustCompile(`^text/*`)
- PdfTypeRegex = regexp.MustCompile(`application\/pdf`)
+ videoTypeRegex = regexp.MustCompile(`^video/*`)
- AttachmentRegex = regexp.MustCompile(`^attachment`)
- InlineRegex = regexp.MustCompile(`^inline`)
+ pdfTypeRegex = regexp.MustCompile(`application\/pdf`)
+
+ attachmentRegex = regexp.MustCompile(`^attachment`)
+ inlineRegex = regexp.MustCompile(`^inline`)
)
// Mime types that can't be inlined. Usually subtypes of main types
-var forbiddenInlineTypes = []*regexp.Regexp{SvgMimeTypeRegex}
+var forbiddenInlineTypes = []*regexp.Regexp{svgMimeTypeRegex}
// Mime types that can be inlined. We can add global types like "image/" or
// specific types like "text/plain". If there is a specific type inside a global
// allowed type that can't be inlined we must add it to the forbiddenInlineTypes var.
// One example of this is the mime type "image". We allow all images to be
// inlined except for SVGs.
-var allowedInlineTypes = []*regexp.Regexp{ImageTypeRegex, TextTypeRegex, VideoTypeRegex, PdfTypeRegex}
+var allowedInlineTypes = []*regexp.Regexp{imageTypeRegex, textTypeRegex, videoTypeRegex, pdfTypeRegex}
+
+const (
+ svgContentType = "image/svg+xml"
+ textPlainContentType = "text/plain; charset=utf-8"
+ attachmentDispositionText = "attachment"
+ inlineDispositionText = "inline"
+)
func SafeContentHeaders(data []byte, contentDisposition string) (string, string) {
contentType := safeContentType(data)
@@ -40,16 +49,24 @@ func SafeContentHeaders(data []byte, contentDisposition string) (string, string)
func safeContentType(data []byte) string {
// Special case for svg because DetectContentType detects it as text
if svg.Is(data) {
- return "image/svg+xml"
+ return svgContentType
}
// Override any existing Content-Type header from other ResponseWriters
contentType := http.DetectContentType(data)
+ // http.DetectContentType does not support JavaScript and would only
+ // return text/plain. But for cautionary measures, just in case they start supporting
+ // it down the road and start returning application/javascript, we want to handle it now
+ // to avoid regressions.
+ if isType(contentType, javaScriptTypeRegex) {
+ return textPlainContentType
+ }
+
// If the content is text type, we set to plain, because we don't
// want to render it inline if they're html or javascript
- if isType(contentType, TextTypeRegex) {
- return "text/plain; charset=utf-8"
+ if isType(contentType, textTypeRegex) {
+ return textPlainContentType
}
return contentType
@@ -58,7 +75,7 @@ func safeContentType(data []byte) string {
func safeContentDisposition(contentType string, contentDisposition string) string {
// If the existing disposition is attachment we return that. This allow us
// to force a download from GitLab (ie: RawController)
- if AttachmentRegex.MatchString(contentDisposition) {
+ if attachmentRegex.MatchString(contentDisposition) {
return contentDisposition
}
@@ -82,11 +99,11 @@ func safeContentDisposition(contentType string, contentDisposition string) strin
func attachmentDisposition(contentDisposition string) string {
if contentDisposition == "" {
- return "attachment"
+ return attachmentDispositionText
}
- if InlineRegex.MatchString(contentDisposition) {
- return InlineRegex.ReplaceAllString(contentDisposition, "attachment")
+ if inlineRegex.MatchString(contentDisposition) {
+ return inlineRegex.ReplaceAllString(contentDisposition, attachmentDispositionText)
}
return contentDisposition
@@ -94,11 +111,11 @@ func attachmentDisposition(contentDisposition string) string {
func inlineDisposition(contentDisposition string) string {
if contentDisposition == "" {
- return "inline"
+ return inlineDispositionText
}
- if AttachmentRegex.MatchString(contentDisposition) {
- return AttachmentRegex.ReplaceAllString(contentDisposition, "inline")
+ if attachmentRegex.MatchString(contentDisposition) {
+ return attachmentRegex.ReplaceAllString(contentDisposition, inlineDispositionText)
}
return contentDisposition
diff --git a/workhorse/internal/lsif_transformer/parser/errors.go b/workhorse/internal/lsif_transformer/parser/errors.go
deleted file mode 100644
index 1040a789413..00000000000
--- a/workhorse/internal/lsif_transformer/parser/errors.go
+++ /dev/null
@@ -1,30 +0,0 @@
-package parser
-
-import (
- "errors"
- "strings"
-)
-
-func combineErrors(errsOrNil ...error) error {
- var errs []error
- for _, err := range errsOrNil {
- if err != nil {
- errs = append(errs, err)
- }
- }
-
- if len(errs) == 0 {
- return nil
- }
-
- if len(errs) == 1 {
- return errs[0]
- }
-
- var msgs []string
- for _, err := range errs {
- msgs = append(msgs, err.Error())
- }
-
- return errors.New(strings.Join(msgs, "\n"))
-}
diff --git a/workhorse/internal/lsif_transformer/parser/errors_test.go b/workhorse/internal/lsif_transformer/parser/errors_test.go
deleted file mode 100644
index 31a7130d05e..00000000000
--- a/workhorse/internal/lsif_transformer/parser/errors_test.go
+++ /dev/null
@@ -1,26 +0,0 @@
-package parser
-
-import (
- "errors"
- "testing"
-
- "github.com/stretchr/testify/require"
-)
-
-type customErr struct {
- err string
-}
-
-func (e customErr) Error() string {
- return e.err
-}
-
-func TestCombineErrors(t *testing.T) {
- err := combineErrors(nil, errors.New("first"), nil, customErr{"second"})
- require.EqualError(t, err, "first\nsecond")
-
- err = customErr{"custom error"}
- require.Equal(t, err, combineErrors(nil, err, nil))
-
- require.Nil(t, combineErrors(nil, nil, nil))
-}
diff --git a/workhorse/internal/lsif_transformer/parser/hovers.go b/workhorse/internal/lsif_transformer/parser/hovers.go
index 5889d595ade..a13c7e4c5c2 100644
--- a/workhorse/internal/lsif_transformer/parser/hovers.go
+++ b/workhorse/internal/lsif_transformer/parser/hovers.go
@@ -95,10 +95,15 @@ func (h *Hovers) For(refId Id) json.RawMessage {
}
func (h *Hovers) Close() error {
- return combineErrors(
+ for _, err := range []error{
h.File.Close(),
h.Offsets.Close(),
- )
+ } {
+ if err != nil {
+ return err
+ }
+ }
+ return nil
}
func (h *Hovers) addData(line []byte) error {
diff --git a/workhorse/internal/lsif_transformer/parser/ranges.go b/workhorse/internal/lsif_transformer/parser/ranges.go
index a11a66d70ca..3786e15186e 100644
--- a/workhorse/internal/lsif_transformer/parser/ranges.go
+++ b/workhorse/internal/lsif_transformer/parser/ranges.go
@@ -130,11 +130,16 @@ func (r *Ranges) Serialize(f io.Writer, rangeIds []Id, docs map[Id]string) error
}
func (r *Ranges) Close() error {
- return combineErrors(
+ for _, err := range []error{
r.Cache.Close(),
r.References.Close(),
r.Hovers.Close(),
- )
+ } {
+ if err != nil {
+ return err
+ }
+ }
+ return nil
}
func (r *Ranges) definitionPathFor(docs map[Id]string, refId Id) string {
diff --git a/workhorse/internal/lsif_transformer/parser/references.go b/workhorse/internal/lsif_transformer/parser/references.go
index 58ff9a61c02..39c34105fd1 100644
--- a/workhorse/internal/lsif_transformer/parser/references.go
+++ b/workhorse/internal/lsif_transformer/parser/references.go
@@ -86,10 +86,15 @@ func (r *References) For(docs map[Id]string, refId Id) []SerializedReference {
}
func (r *References) Close() error {
- return combineErrors(
+ for _, err := range []error{
r.Items.Close(),
r.Offsets.Close(),
- )
+ } {
+ if err != nil {
+ return err
+ }
+ }
+ return nil
}
func (r *References) getItems(refId Id) []Item {
diff --git a/workhorse/internal/proxy/proxy.go b/workhorse/internal/proxy/proxy.go
index be161c833a9..06e2c65a6a8 100644
--- a/workhorse/internal/proxy/proxy.go
+++ b/workhorse/internal/proxy/proxy.go
@@ -19,6 +19,7 @@ type Proxy struct {
reverseProxy *httputil.ReverseProxy
AllowResponseBuffering bool
customHeaders map[string]string
+ forceTargetHostHeader bool
}
func WithCustomHeaders(customHeaders map[string]string) func(*Proxy) {
@@ -27,6 +28,12 @@ func WithCustomHeaders(customHeaders map[string]string) func(*Proxy) {
}
}
+func WithForcedTargetHostHeader() func(*Proxy) {
+ return func(proxy *Proxy) {
+ proxy.forceTargetHostHeader = true
+ }
+}
+
func NewProxy(myURL *url.URL, version string, roundTripper http.RoundTripper, options ...func(*Proxy)) *Proxy {
p := Proxy{Version: version, AllowResponseBuffering: true, customHeaders: make(map[string]string)}
@@ -43,6 +50,25 @@ func NewProxy(myURL *url.URL, version string, roundTripper http.RoundTripper, op
option(&p)
}
+ if p.forceTargetHostHeader {
+ // because of https://github.com/golang/go/issues/28168, the
+ // upstream won't receive the expected Host header unless this
+ // is forced in the Director func here
+ previousDirector := p.reverseProxy.Director
+ p.reverseProxy.Director = func(request *http.Request) {
+ previousDirector(request)
+
+ // send original host along for the upstream
+ // to know it's being proxied under a different Host
+ // (for redirects and other stuff that depends on this)
+ request.Header.Set("X-Forwarded-Host", request.Host)
+ request.Header.Set("Forwarded", fmt.Sprintf("host=%s", request.Host))
+
+ // override the Host with the target
+ request.Host = request.URL.Host
+ }
+ }
+
return &p
}
diff --git a/workhorse/internal/senddata/contentprocessor/contentprocessor_test.go b/workhorse/internal/senddata/contentprocessor/contentprocessor_test.go
index 2396bb0f952..b009cda1a24 100644
--- a/workhorse/internal/senddata/contentprocessor/contentprocessor_test.go
+++ b/workhorse/internal/senddata/contentprocessor/contentprocessor_test.go
@@ -56,12 +56,18 @@ func TestSetProperContentTypeAndDisposition(t *testing.T) {
body: "<html><body>Hello world!</body></html>",
},
{
- desc: "Javascript type",
+ desc: "Javascript within HTML type",
contentType: "text/plain; charset=utf-8",
contentDisposition: "inline",
body: "<script>alert(\"foo\")</script>",
},
{
+ desc: "Javascript type",
+ contentType: "text/plain; charset=utf-8",
+ contentDisposition: "inline",
+ body: "alert(\"foo\")",
+ },
+ {
desc: "Image type",
contentType: "image/png",
contentDisposition: "inline",
@@ -170,25 +176,41 @@ func TestSetProperContentTypeAndDisposition(t *testing.T) {
}
func TestFailOverrideContentType(t *testing.T) {
- testCase := struct {
- contentType string
- body string
+ testCases := []struct {
+ desc string
+ overrideFromUpstream string
+ responseContentType string
+ body string
}{
- contentType: "text/plain; charset=utf-8",
- body: "<html><body>Hello world!</body></html>",
+ {
+ desc: "Force text/html into text/plain",
+ responseContentType: "text/plain; charset=utf-8",
+ overrideFromUpstream: "text/html; charset=utf-8",
+ body: "<html><body>Hello world!</body></html>",
+ },
+ {
+ desc: "Force application/javascript into text/plain",
+ responseContentType: "text/plain; charset=utf-8",
+ overrideFromUpstream: "application/javascript; charset=utf-8",
+ body: "alert(1);",
+ },
}
- h := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
- // We are pretending to be upstream or an inner layer of the ResponseWriter chain
- w.Header().Set(headers.GitlabWorkhorseDetectContentTypeHeader, "true")
- w.Header().Set(headers.ContentTypeHeader, "text/html; charset=utf-8")
- _, err := io.WriteString(w, testCase.body)
- require.NoError(t, err)
- })
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ h := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+ // We are pretending to be upstream or an inner layer of the ResponseWriter chain
+ w.Header().Set(headers.GitlabWorkhorseDetectContentTypeHeader, "true")
+ w.Header().Set(headers.ContentTypeHeader, tc.overrideFromUpstream)
+ _, err := io.WriteString(w, tc.body)
+ require.NoError(t, err)
+ })
- resp := makeRequest(t, h, testCase.body, "")
+ resp := makeRequest(t, h, tc.body, "")
- require.Equal(t, testCase.contentType, resp.Header.Get(headers.ContentTypeHeader))
+ require.Equal(t, tc.responseContentType, resp.Header.Get(headers.ContentTypeHeader))
+ })
+ }
}
func TestSuccessOverrideContentDispositionFromInlineToAttachment(t *testing.T) {
diff --git a/workhorse/internal/upload/artifacts_upload_test.go b/workhorse/internal/upload/artifacts_upload_test.go
index 0a9e4ef3869..96eb3810673 100644
--- a/workhorse/internal/upload/artifacts_upload_test.go
+++ b/workhorse/internal/upload/artifacts_upload_test.go
@@ -66,7 +66,7 @@ func testArtifactsUploadServer(t *testing.T, authResponse *api.Response, bodyPro
if r.Method != "POST" {
t.Fatal("Expected POST request")
}
- if opts.IsLocal() {
+ if opts.IsLocalTempFile() {
if r.FormValue("file.path") == "" {
t.Fatal("Expected file to be present")
return
diff --git a/workhorse/internal/upload/destination/destination.go b/workhorse/internal/upload/destination/destination.go
index 7a030e59a64..b18b6e22a99 100644
--- a/workhorse/internal/upload/destination/destination.go
+++ b/workhorse/internal/upload/destination/destination.go
@@ -128,9 +128,14 @@ func Upload(ctx context.Context, reader io.Reader, size int64, opts *UploadOpts)
var uploadDestination consumer
var err error
switch {
- case opts.IsLocal():
- clientMode = "local"
+ // This case means Workhorse is acting as an upload proxy for Rails and buffers files
+ // to disk in a temporary location, see:
+ // https://docs.gitlab.com/ee/development/uploads/background.html#moving-disk-buffering-to-workhorse
+ case opts.IsLocalTempFile():
+ clientMode = "local_tempfile"
uploadDestination, err = fh.newLocalFile(ctx, opts)
+ // All cases below mean we are doing a direct upload to remote i.e. object storage, see:
+ // https://docs.gitlab.com/ee/development/uploads/background.html#moving-to-object-storage-and-direct-uploads
case opts.UseWorkhorseClientEnabled() && opts.ObjectStorageConfig.IsGoCloud():
clientMode = fmt.Sprintf("go_cloud:%s", opts.ObjectStorageConfig.Provider)
p := &objectstore.GoCloudObjectParams{
@@ -141,14 +146,14 @@ func Upload(ctx context.Context, reader io.Reader, size int64, opts *UploadOpts)
}
uploadDestination, err = objectstore.NewGoCloudObject(p)
case opts.UseWorkhorseClientEnabled() && opts.ObjectStorageConfig.IsAWS() && opts.ObjectStorageConfig.IsValid():
- clientMode = "s3"
+ clientMode = "s3_client"
uploadDestination, err = objectstore.NewS3Object(
opts.RemoteTempObjectID,
opts.ObjectStorageConfig.S3Credentials,
opts.ObjectStorageConfig.S3Config,
)
case opts.IsMultipart():
- clientMode = "multipart"
+ clientMode = "s3_multipart"
uploadDestination, err = objectstore.NewMultipart(
opts.PresignedParts,
opts.PresignedCompleteMultipart,
@@ -158,7 +163,7 @@ func Upload(ctx context.Context, reader io.Reader, size int64, opts *UploadOpts)
opts.PartSize,
)
default:
- clientMode = "http"
+ clientMode = "presigned_put"
uploadDestination, err = objectstore.NewObject(
opts.PresignedPut,
opts.PresignedDelete,
@@ -195,15 +200,15 @@ func Upload(ctx context.Context, reader io.Reader, size int64, opts *UploadOpts)
logger := log.WithContextFields(ctx, log.Fields{
"copied_bytes": fh.Size,
- "is_local": opts.IsLocal(),
+ "is_local": opts.IsLocalTempFile(),
"is_multipart": opts.IsMultipart(),
- "is_remote": !opts.IsLocal(),
+ "is_remote": !opts.IsLocalTempFile(),
"remote_id": opts.RemoteID,
"temp_file_prefix": opts.TempFilePrefix,
"client_mode": clientMode,
})
- if opts.IsLocal() {
+ if opts.IsLocalTempFile() {
logger = logger.WithField("local_temp_path", opts.LocalTempPath)
} else {
logger = logger.WithField("remote_temp_object", opts.RemoteTempObjectID)
diff --git a/workhorse/internal/upload/destination/objectstore/s3_session.go b/workhorse/internal/upload/destination/objectstore/s3_session.go
index a0c1f099145..aa38f18ed7a 100644
--- a/workhorse/internal/upload/destination/objectstore/s3_session.go
+++ b/workhorse/internal/upload/destination/objectstore/s3_session.go
@@ -6,6 +6,7 @@ import (
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
+ "github.com/aws/aws-sdk-go/aws/endpoints"
"github.com/aws/aws-sdk-go/aws/session"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/config"
@@ -70,7 +71,23 @@ func setupS3Session(s3Credentials config.S3Credentials, s3Config config.S3Config
}
if s3Config.Endpoint != "" {
- cfg.Endpoint = aws.String(s3Config.Endpoint)
+ // The administrator has configured an S3 endpoint override,
+ // e.g. to make use of S3 IPv6 support or S3 FIPS mode. We
+ // need to configure a custom resolver to make sure that
+ // the custom endpoint is only used for S3 API calls, and not
+ // for STS API calls.
+ s3CustomResolver := func(service, region string, optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
+ if service == endpoints.S3ServiceID {
+ return endpoints.ResolvedEndpoint{
+ URL: s3Config.Endpoint,
+ SigningRegion: region,
+ }, nil
+ }
+
+ return endpoints.DefaultResolver().EndpointFor(service, region, optFns...)
+ }
+
+ cfg.EndpointResolver = endpoints.ResolverFunc(s3CustomResolver)
}
sess, err := session.NewSession(cfg)
diff --git a/workhorse/internal/upload/destination/objectstore/s3_session_test.go b/workhorse/internal/upload/destination/objectstore/s3_session_test.go
index 5d57b4f9af8..4bbe38f90ec 100644
--- a/workhorse/internal/upload/destination/objectstore/s3_session_test.go
+++ b/workhorse/internal/upload/destination/objectstore/s3_session_test.go
@@ -5,6 +5,7 @@ import (
"time"
"github.com/aws/aws-sdk-go/aws"
+ "github.com/aws/aws-sdk-go/aws/endpoints"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitlab/workhorse/internal/config"
@@ -17,7 +18,9 @@ func TestS3SessionSetup(t *testing.T) {
sess, err := setupS3Session(credentials, cfg)
require.NoError(t, err)
- require.Equal(t, aws.StringValue(sess.Config.Region), "us-west-1")
+ s3Config := sess.ClientConfig(endpoints.S3ServiceID)
+ require.Equal(t, "https://s3.us-west-1.amazonaws.com", s3Config.Endpoint)
+ require.Equal(t, "us-west-1", s3Config.SigningRegion)
require.True(t, aws.BoolValue(sess.Config.S3ForcePathStyle))
require.Equal(t, len(sessionCache.sessions), 1)
@@ -29,6 +32,26 @@ func TestS3SessionSetup(t *testing.T) {
ResetS3Session(cfg)
}
+func TestS3SessionEndpointSetup(t *testing.T) {
+ credentials := config.S3Credentials{}
+ const customS3Endpoint = "https://example.com"
+ const region = "us-west-2"
+ cfg := config.S3Config{Region: region, PathStyle: true, Endpoint: customS3Endpoint}
+
+ sess, err := setupS3Session(credentials, cfg)
+ require.NoError(t, err)
+
+ // ClientConfig is what is ultimately used by an S3 client
+ s3Config := sess.ClientConfig(endpoints.S3ServiceID)
+ require.Equal(t, customS3Endpoint, s3Config.Endpoint)
+ require.Equal(t, region, s3Config.SigningRegion)
+
+ stsConfig := sess.ClientConfig(endpoints.StsServiceID)
+ require.Equal(t, "https://sts.amazonaws.com", stsConfig.Endpoint, "STS should use default endpoint")
+
+ ResetS3Session(cfg)
+}
+
func TestS3SessionExpiry(t *testing.T) {
credentials := config.S3Credentials{}
cfg := config.S3Config{Region: "us-west-1", PathStyle: true}
diff --git a/workhorse/internal/upload/destination/upload_opts.go b/workhorse/internal/upload/destination/upload_opts.go
index 750a79d7bc2..77a8927d34f 100644
--- a/workhorse/internal/upload/destination/upload_opts.go
+++ b/workhorse/internal/upload/destination/upload_opts.go
@@ -70,8 +70,8 @@ func (s *UploadOpts) UseWorkhorseClientEnabled() bool {
return s.UseWorkhorseClient && s.ObjectStorageConfig.IsValid() && s.RemoteTempObjectID != ""
}
-// IsLocal checks if the options require the writing of the file on disk
-func (s *UploadOpts) IsLocal() bool {
+// IsLocalTempFile checks if the options require the writing of a temporary file on disk
+func (s *UploadOpts) IsLocalTempFile() bool {
return s.LocalTempPath != ""
}
diff --git a/workhorse/internal/upload/destination/upload_opts_test.go b/workhorse/internal/upload/destination/upload_opts_test.go
index fde726c985d..24a372495c6 100644
--- a/workhorse/internal/upload/destination/upload_opts_test.go
+++ b/workhorse/internal/upload/destination/upload_opts_test.go
@@ -49,7 +49,7 @@ func TestUploadOptsLocalAndRemote(t *testing.T) {
PartSize: test.partSize,
}
- require.Equal(t, test.isLocal, opts.IsLocal(), "IsLocal() mismatch")
+ require.Equal(t, test.isLocal, opts.IsLocalTempFile(), "IsLocalTempFile() mismatch")
require.Equal(t, test.isMultipart, opts.IsMultipart(), "IsMultipart() mismatch")
})
}
@@ -336,7 +336,7 @@ func TestGoCloudConfig(t *testing.T) {
require.Equal(t, apiResponse.RemoteObject.ObjectStorage.GoCloudConfig, opts.ObjectStorageConfig.GoCloudConfig)
require.True(t, opts.UseWorkhorseClientEnabled())
require.Equal(t, test.valid, opts.ObjectStorageConfig.IsValid())
- require.False(t, opts.IsLocal())
+ require.False(t, opts.IsLocalTempFile())
})
}
}
diff --git a/workhorse/internal/upstream/upstream.go b/workhorse/internal/upstream/upstream.go
index c0678b1cb3e..6d107fc28cd 100644
--- a/workhorse/internal/upstream/upstream.go
+++ b/workhorse/internal/upstream/upstream.go
@@ -37,7 +37,6 @@ var (
upload.RewrittenFieldsHeader,
}
geoProxyApiPollingInterval = 10 * time.Second
- geoProxyWorkhorseHeaders = map[string]string{"Gitlab-Workhorse-Geo-Proxy": "1"}
)
type upstream struct {
@@ -48,6 +47,7 @@ type upstream struct {
CableRoundTripper http.RoundTripper
APIClient *apipkg.API
geoProxyBackend *url.URL
+ geoProxyExtraData string
geoLocalRoutes []routeEntry
geoProxyCableRoute routeEntry
geoProxyRoute routeEntry
@@ -215,34 +215,51 @@ func (u *upstream) pollGeoProxyAPI() {
// Calls /api/v4/geo/proxy and sets up routes
func (u *upstream) callGeoProxyAPI() {
- geoProxyURL, err := u.APIClient.GetGeoProxyURL()
+ geoProxyData, err := u.APIClient.GetGeoProxyData()
if err != nil {
log.WithError(err).WithFields(log.Fields{"geoProxyBackend": u.geoProxyBackend}).Error("Geo Proxy: Unable to determine Geo Proxy URL. Fallback on cached value.")
return
}
- if u.geoProxyBackend.String() != geoProxyURL.String() {
- log.WithFields(log.Fields{"oldGeoProxyURL": u.geoProxyBackend, "newGeoProxyURL": geoProxyURL}).Info("Geo Proxy: URL changed")
- u.updateGeoProxyFields(geoProxyURL)
+ hasProxyDataChanged := false
+ if u.geoProxyBackend.String() != geoProxyData.GeoProxyURL.String() {
+ log.WithFields(log.Fields{"oldGeoProxyURL": u.geoProxyBackend, "newGeoProxyURL": geoProxyData.GeoProxyURL}).Info("Geo Proxy: URL changed")
+ hasProxyDataChanged = true
+ }
+
+ if u.geoProxyExtraData != geoProxyData.GeoProxyExtraData {
+ // extra data is usually a JWT, thus not explicitly logging it
+ log.Info("Geo Proxy: signed data changed")
+ hasProxyDataChanged = true
+ }
+
+ if hasProxyDataChanged {
+ u.updateGeoProxyFieldsFromData(geoProxyData)
}
}
-func (u *upstream) updateGeoProxyFields(geoProxyURL *url.URL) {
+func (u *upstream) updateGeoProxyFieldsFromData(geoProxyData *apipkg.GeoProxyData) {
u.mu.Lock()
defer u.mu.Unlock()
- u.geoProxyBackend = geoProxyURL
+ u.geoProxyBackend = geoProxyData.GeoProxyURL
+ u.geoProxyExtraData = geoProxyData.GeoProxyExtraData
if u.geoProxyBackend.String() == "" {
return
}
+ geoProxyWorkhorseHeaders := map[string]string{
+ "Gitlab-Workhorse-Geo-Proxy": "1",
+ "Gitlab-Workhorse-Geo-Proxy-Extra-Data": u.geoProxyExtraData,
+ }
geoProxyRoundTripper := roundtripper.NewBackendRoundTripper(u.geoProxyBackend, "", u.ProxyHeadersTimeout, u.DevelopmentMode)
geoProxyUpstream := proxypkg.NewProxy(
u.geoProxyBackend,
u.Version,
geoProxyRoundTripper,
proxypkg.WithCustomHeaders(geoProxyWorkhorseHeaders),
+ proxypkg.WithForcedTargetHostHeader(),
)
u.geoProxyCableRoute = u.wsRoute(`^/-/cable\z`, geoProxyUpstream)
u.geoProxyRoute = u.route("", "", geoProxyUpstream, withGeoProxy())
diff --git a/workhorse/internal/upstream/upstream_test.go b/workhorse/internal/upstream/upstream_test.go
index 80e59202b69..8f054f5ccef 100644
--- a/workhorse/internal/upstream/upstream_test.go
+++ b/workhorse/internal/upstream/upstream_test.go
@@ -209,21 +209,74 @@ func TestGeoProxyFeatureEnablingAndDisabling(t *testing.T) {
runTestCases(t, ws, testCasesProxied)
}
-func TestGeoProxySetsCustomHeader(t *testing.T) {
+func TestGeoProxyUpdatesExtraDataWhenChanged(t *testing.T) {
+ var expectedGeoProxyExtraData string
+
remoteServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, "1", r.Header.Get("Gitlab-Workhorse-Geo-Proxy"), "custom proxy header")
+ require.Equal(t, expectedGeoProxyExtraData, r.Header.Get("Gitlab-Workhorse-Geo-Proxy-Extra-Data"), "custom extra data header")
w.WriteHeader(http.StatusOK)
}))
defer remoteServer.Close()
- geoProxyEndpointResponseBody := fmt.Sprintf(`{"geo_proxy_url":"%v"}`, remoteServer.URL)
+ geoProxyEndpointExtraData1 := fmt.Sprintf(`{"geo_proxy_url":"%v","geo_proxy_extra_data":"data1"}`, remoteServer.URL)
+ geoProxyEndpointExtraData2 := fmt.Sprintf(`{"geo_proxy_url":"%v","geo_proxy_extra_data":"data2"}`, remoteServer.URL)
+ geoProxyEndpointExtraData3 := fmt.Sprintf(`{"geo_proxy_url":"%v"}`, remoteServer.URL)
+ geoProxyEndpointResponseBody := geoProxyEndpointExtraData1
+ expectedGeoProxyExtraData = "data1"
+
railsServer, deferredClose := startRailsServer("Local Rails server", &geoProxyEndpointResponseBody)
defer deferredClose()
- ws, wsDeferredClose, _ := startWorkhorseServer(railsServer.URL, true)
+ ws, wsDeferredClose, waitForNextApiPoll := startWorkhorseServer(railsServer.URL, true)
defer wsDeferredClose()
http.Get(ws.URL)
+
+ // Verify that the expected header changes after next updated poll.
+ geoProxyEndpointResponseBody = geoProxyEndpointExtraData2
+ expectedGeoProxyExtraData = "data2"
+ waitForNextApiPoll()
+
+ http.Get(ws.URL)
+
+ // Validate that non-existing extra data results in empty header
+ geoProxyEndpointResponseBody = geoProxyEndpointExtraData3
+ expectedGeoProxyExtraData = ""
+ waitForNextApiPoll()
+
+ http.Get(ws.URL)
+}
+
+func TestGeoProxySetsCustomHeader(t *testing.T) {
+ testCases := []struct {
+ desc string
+ json string
+ extraData string
+ }{
+ {"no extra data", `{"geo_proxy_url":"%v"}`, ""},
+ {"with extra data", `{"geo_proxy_url":"%v","geo_proxy_extra_data":"extra-geo-data"}`, "extra-geo-data"},
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.desc, func(t *testing.T) {
+ remoteServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ require.Equal(t, "1", r.Header.Get("Gitlab-Workhorse-Geo-Proxy"), "custom proxy header")
+ require.Equal(t, tc.extraData, r.Header.Get("Gitlab-Workhorse-Geo-Proxy-Extra-Data"), "custom proxy extra data header")
+ w.WriteHeader(http.StatusOK)
+ }))
+ defer remoteServer.Close()
+
+ geoProxyEndpointResponseBody := fmt.Sprintf(tc.json, remoteServer.URL)
+ railsServer, deferredClose := startRailsServer("Local Rails server", &geoProxyEndpointResponseBody)
+ defer deferredClose()
+
+ ws, wsDeferredClose, _ := startWorkhorseServer(railsServer.URL, true)
+ defer wsDeferredClose()
+
+ http.Get(ws.URL)
+ })
+ }
}
func runTestCases(t *testing.T, ws *httptest.Server, testCases []testCase) {
diff --git a/workhorse/proxy_test.go b/workhorse/proxy_test.go
index 754deea0032..02148c07522 100644
--- a/workhorse/proxy_test.go
+++ b/workhorse/proxy_test.go
@@ -7,6 +7,7 @@ import (
"net"
"net/http"
"net/http/httptest"
+ "net/url"
"regexp"
"testing"
"time"
@@ -31,10 +32,17 @@ func newProxy(url string, rt http.RoundTripper, opts ...func(*proxy.Proxy)) *pro
}
func TestProxyRequest(t *testing.T) {
- ts := testhelper.TestServerWithHandler(regexp.MustCompile(`/url/path\z`), func(w http.ResponseWriter, r *http.Request) {
+ inboundURL, err := url.Parse("https://explicitly.set.host/url/path")
+ require.NoError(t, err, "parse inbound url")
+
+ urlRegexp := regexp.MustCompile(fmt.Sprintf(`%s\z`, inboundURL.Path))
+ ts := testhelper.TestServerWithHandler(urlRegexp, func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, "POST", r.Method, "method")
require.Equal(t, "test", r.Header.Get("Custom-Header"), "custom header")
require.Equal(t, testVersion, r.Header.Get("Gitlab-Workhorse"), "version header")
+ require.Equal(t, inboundURL.Host, r.Host, "sent host header")
+ require.Empty(t, r.Header.Get("X-Forwarded-Host"), "X-Forwarded-Host header")
+ require.Empty(t, r.Header.Get("Forwarded"), "Forwarded header")
require.Regexp(
t,
@@ -52,7 +60,7 @@ func TestProxyRequest(t *testing.T) {
fmt.Fprint(w, "RESPONSE")
})
- httpRequest, err := http.NewRequest("POST", ts.URL+"/url/path", bytes.NewBufferString("REQUEST"))
+ httpRequest, err := http.NewRequest("POST", inboundURL.String(), bytes.NewBufferString("REQUEST"))
require.NoError(t, err)
httpRequest.Header.Set("Custom-Header", "test")
@@ -64,6 +72,32 @@ func TestProxyRequest(t *testing.T) {
require.Equal(t, "test", w.Header().Get("Custom-Response-Header"), "custom response header")
}
+func TestProxyWithForcedTargetHostHeader(t *testing.T) {
+ var tsUrl *url.URL
+ inboundURL, err := url.Parse("https://explicitly.set.host/url/path")
+ require.NoError(t, err, "parse upstream url")
+
+ urlRegexp := regexp.MustCompile(fmt.Sprintf(`%s\z`, inboundURL.Path))
+ ts := testhelper.TestServerWithHandler(urlRegexp, func(w http.ResponseWriter, r *http.Request) {
+ require.Equal(t, tsUrl.Host, r.Host, "upstream host header")
+ require.Equal(t, inboundURL.Host, r.Header.Get("X-Forwarded-Host"), "X-Forwarded-Host header")
+ require.Equal(t, fmt.Sprintf("host=%s", inboundURL.Host), r.Header.Get("Forwarded"), "Forwarded header")
+
+ _, err := w.Write([]byte(`ok`))
+ require.NoError(t, err, "write ok response")
+ })
+ tsUrl, err = url.Parse(ts.URL)
+ require.NoError(t, err, "parse testserver URL")
+
+ httpRequest, err := http.NewRequest("POST", inboundURL.String(), nil)
+ require.NoError(t, err)
+
+ w := httptest.NewRecorder()
+ testProxy := newProxy(ts.URL, nil, proxy.WithForcedTargetHostHeader())
+ testProxy.ServeHTTP(w, httpRequest)
+ testhelper.RequireResponseBody(t, w, "ok")
+}
+
func TestProxyWithCustomHeaders(t *testing.T) {
ts := testhelper.TestServerWithHandler(regexp.MustCompile(`/url/path\z`), func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, "value", r.Header.Get("Custom-Header"), "custom proxy header")
diff --git a/workhorse/tools.go b/workhorse/tools.go
index 9df59be349e..94b487844e1 100644
--- a/workhorse/tools.go
+++ b/workhorse/tools.go
@@ -1,4 +1,5 @@
-//+build tools
+//go:build tools
+// +build tools
package main