summaryrefslogtreecommitdiff
path: root/doc/administration
diff options
context:
space:
mode:
Diffstat (limited to 'doc/administration')
-rw-r--r--doc/administration/application_settings_cache.md20
-rw-r--r--doc/administration/audit_events.md7
-rw-r--r--doc/administration/auth/README.md2
-rw-r--r--doc/administration/auth/atlassian.md2
-rw-r--r--doc/administration/auth/authentiq.md2
-rw-r--r--doc/administration/auth/cognito.md2
-rw-r--r--doc/administration/auth/crowd.md2
-rw-r--r--doc/administration/auth/jwt.md2
-rw-r--r--doc/administration/auth/ldap/index.md34
-rw-r--r--doc/administration/auth/ldap/ldap-troubleshooting.md23
-rw-r--r--doc/administration/auth/oidc.md244
-rw-r--r--doc/administration/auth/smartcard.md5
-rw-r--r--doc/administration/compliance.md3
-rw-r--r--doc/administration/consul.md26
-rw-r--r--doc/administration/database_load_balancing.md32
-rw-r--r--doc/administration/encrypted_configuration.md4
-rw-r--r--doc/administration/external_pipeline_validation.md25
-rw-r--r--doc/administration/geo/disaster_recovery/index.md22
-rw-r--r--doc/administration/geo/disaster_recovery/planned_failover.md5
-rw-r--r--doc/administration/geo/index.md122
-rw-r--r--doc/administration/geo/replication/configuration.md26
-rw-r--r--doc/administration/geo/replication/datatypes.md30
-rw-r--r--doc/administration/geo/replication/docker_registry.md12
-rw-r--r--doc/administration/geo/replication/faq.md36
-rw-r--r--doc/administration/geo/replication/geo_validation_tests.md9
-rw-r--r--doc/administration/geo/replication/location_aware_git_url.md30
-rw-r--r--doc/administration/geo/replication/multiple_servers.md20
-rw-r--r--doc/administration/geo/replication/object_storage.md18
-rw-r--r--doc/administration/geo/replication/remove_geo_node.md2
-rw-r--r--doc/administration/geo/replication/security_review.md68
-rw-r--r--doc/administration/geo/replication/troubleshooting.md7
-rw-r--r--doc/administration/geo/replication/usage.md8
-rw-r--r--doc/administration/geo/replication/using_a_geo_server.md2
-rw-r--r--doc/administration/geo/replication/version_specific_updates.md25
-rw-r--r--doc/administration/geo/setup/database.md272
-rw-r--r--doc/administration/geo/setup/index.md2
-rw-r--r--doc/administration/git_annex.md237
-rw-r--r--doc/administration/gitaly/configure_gitaly.md28
-rw-r--r--doc/administration/gitaly/index.md18
-rw-r--r--doc/administration/gitaly/praefect.md217
-rw-r--r--doc/administration/housekeeping.md24
-rw-r--r--doc/administration/img/housekeeping_settings.pngbin24754 -> 0 bytes
-rw-r--r--doc/administration/incoming_email.md79
-rw-r--r--doc/administration/index.md3
-rw-r--r--doc/administration/instance_limits.md178
-rw-r--r--doc/administration/job_artifacts.md64
-rw-r--r--doc/administration/job_logs.md98
-rw-r--r--doc/administration/lfs/img/git-annex-branches.pngbin32164 -> 0 bytes
-rw-r--r--doc/administration/maintenance_mode/index.md20
-rw-r--r--doc/administration/monitoring/performance/img/performance_bar_external_http_calls.pngbin56569 -> 0 bytes
-rw-r--r--doc/administration/monitoring/performance/img/performance_bar_frontend.pngbin34521 -> 0 bytes
-rw-r--r--doc/administration/monitoring/performance/img/performance_bar_gitaly_calls.pngbin81321 -> 0 bytes
-rw-r--r--doc/administration/monitoring/performance/img/performance_bar_redis_calls.pngbin17273 -> 0 bytes
-rw-r--r--doc/administration/monitoring/performance/img/performance_bar_request_selector_warning_expanded.pngbin19758 -> 0 bytes
-rw-r--r--doc/administration/monitoring/performance/img/performance_bar_rugged_calls.pngbin28784 -> 0 bytes
-rw-r--r--doc/administration/monitoring/performance/img/performance_bar_sql_queries.pngbin139550 -> 0 bytes
-rw-r--r--doc/administration/monitoring/performance/performance_bar.md31
-rw-r--r--doc/administration/monitoring/prometheus/gitlab_metrics.md13
-rw-r--r--doc/administration/monitoring/prometheus/postgres_exporter.md2
-rw-r--r--doc/administration/nfs.md38
-rw-r--r--doc/administration/object_storage.md2
-rw-r--r--doc/administration/operations/extra_sidekiq_processes.md6
-rw-r--r--doc/administration/operations/fast_ssh_key_lookup.md16
-rw-r--r--doc/administration/operations/filesystem_benchmarking.md12
-rw-r--r--doc/administration/operations/rails_console.md6
-rw-r--r--doc/administration/operations/ssh_certificates.md26
-rw-r--r--doc/administration/packages/container_registry.md22
-rw-r--r--doc/administration/packages/index.md6
-rw-r--r--doc/administration/pages/index.md170
-rw-r--r--doc/administration/pages/source.md2
-rw-r--r--doc/administration/polling.md8
-rw-r--r--doc/administration/postgresql/pgbouncer.md6
-rw-r--r--doc/administration/postgresql/replication_and_failover.md2
-rw-r--r--doc/administration/raketasks/ldap.md27
-rw-r--r--doc/administration/raketasks/project_import_export.md4
-rw-r--r--doc/administration/raketasks/storage.md14
-rw-r--r--doc/administration/reference_architectures/10k_users.md23
-rw-r--r--doc/administration/reference_architectures/1k_users.md12
-rw-r--r--doc/administration/reference_architectures/25k_users.md57
-rw-r--r--doc/administration/reference_architectures/2k_users.md32
-rw-r--r--doc/administration/reference_architectures/3k_users.md76
-rw-r--r--doc/administration/reference_architectures/50k_users.md57
-rw-r--r--doc/administration/reference_architectures/5k_users.md50
-rw-r--r--doc/administration/reference_architectures/troubleshooting.md4
-rw-r--r--doc/administration/reply_by_email_postfix_setup.md8
-rw-r--r--doc/administration/repository_storage_types.md6
-rw-r--r--doc/administration/restart_gitlab.md10
-rw-r--r--doc/administration/server_hooks.md7
-rw-r--r--doc/administration/terraform_state.md2
-rw-r--r--doc/administration/troubleshooting/defcon.md25
-rw-r--r--doc/administration/troubleshooting/elasticsearch.md10
-rw-r--r--doc/administration/troubleshooting/gitlab_rails_cheat_sheet.md38
-rw-r--r--doc/administration/troubleshooting/log_parsing.md39
-rw-r--r--doc/administration/troubleshooting/sidekiq.md20
-rw-r--r--doc/administration/troubleshooting/ssl.md14
-rw-r--r--doc/administration/troubleshooting/test_environments.md2
-rw-r--r--doc/administration/whats-new.md18
97 files changed, 1925 insertions, 1115 deletions
diff --git a/doc/administration/application_settings_cache.md b/doc/administration/application_settings_cache.md
index b6c09129c79..b3aac932fa8 100644
--- a/doc/administration/application_settings_cache.md
+++ b/doc/administration/application_settings_cache.md
@@ -4,16 +4,15 @@ group: Memory
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# Changing application settings cache expiry interval **(FREE SELF)**
+# Change the expiration interval for application cache **(FREE SELF)**
-Application settings are cached for 60 seconds by default which should work
-for most installations. A higher value would mean a greater delay between
-changing an application setting and noticing that change come into effect.
-A value of `0` would result in the `application_settings` table being
-loaded for every request causing extra load on Redis and/or PostgreSQL.
-It is therefore recommended to keep the value above zero.
+By default, GitLab caches application settings for 60 seconds. Occasionally,
+you may need to increase that interval to have more delay between application
+setting changes and when users notice those changes in the application.
-## Change the application settings cache expiry
+We recommend you set this value to greater than `0` seconds. Setting it to `0`
+causes the `application_settings` table to load for every request. This causes
+extra load for Redis and PostgreSQL.
To change the expiry value:
@@ -25,7 +24,8 @@ To change the expiry value:
gitlab_rails['application_settings_cache_seconds'] = 60
```
-1. Save the file, and reconfigure and restart GitLab for the changes to take effect:
+1. Save the file, and then reconfigure and restart GitLab for the changes to
+ take effect:
```shell
gitlab-ctl reconfigure
@@ -43,5 +43,5 @@ To change the expiry value:
application_settings_cache_seconds: 60
```
-1. Save the file and [restart](restart_gitlab.md#installations-from-source)
+1. Save the file, and then [restart](restart_gitlab.md#installations-from-source)
GitLab for the changes to take effect.
diff --git a/doc/administration/audit_events.md b/doc/administration/audit_events.md
index 9aa533d54f6..a7f4fc10655 100644
--- a/doc/administration/audit_events.md
+++ b/doc/administration/audit_events.md
@@ -157,6 +157,11 @@ on adding these events into GitLab:
- [Group settings and activity](https://gitlab.com/groups/gitlab-org/-/epics/475)
- [Instance-level settings and activity](https://gitlab.com/groups/gitlab-org/-/epics/476)
+Don't see the event you want in any of the epics linked above? You can use the **Audit Event
+Proposal** issue template to
+[create an issue](https://gitlab.com/gitlab-org/gitlab/-/issues/new?issuable_template=Audit%20Event%20Proposal)
+to request it.
+
### Disabled events
#### Repository push
@@ -170,7 +175,7 @@ In an upcoming release, Audit Events for Git push events will be enabled
by default. Follow our [Partitioning strategy for Audit Events epic](https://gitlab.com/groups/gitlab-org/-/epics/3206) for updates.
If you still wish to enable **Repository push** events in your instance, follow
-the steps bellow.
+the steps below.
**In Omnibus installations:**
diff --git a/doc/administration/auth/README.md b/doc/administration/auth/README.md
index ef82c556468..a072cc73c43 100644
--- a/doc/administration/auth/README.md
+++ b/doc/administration/auth/README.md
@@ -6,7 +6,7 @@ group: Access
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# GitLab authentication and authorization
+# GitLab authentication and authorization **(FREE SELF)**
GitLab integrates with the following external authentication and authorization
providers:
diff --git a/doc/administration/auth/atlassian.md b/doc/administration/auth/atlassian.md
index 9c5da558b0d..365236748b9 100644
--- a/doc/administration/auth/atlassian.md
+++ b/doc/administration/auth/atlassian.md
@@ -5,7 +5,7 @@ group: Access
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# Atlassian OmniAuth Provider
+# Atlassian OmniAuth Provider **(FREE SELF)**
To enable the Atlassian OmniAuth provider for passwordless authentication you must register an application with Atlassian.
diff --git a/doc/administration/auth/authentiq.md b/doc/administration/auth/authentiq.md
index dbc5e446287..2eab4555c85 100644
--- a/doc/administration/auth/authentiq.md
+++ b/doc/administration/auth/authentiq.md
@@ -5,7 +5,7 @@ group: Access
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# Authentiq OmniAuth Provider
+# Authentiq OmniAuth Provider **(FREE SELF)**
To enable the Authentiq OmniAuth provider for passwordless authentication you must register an application with Authentiq.
diff --git a/doc/administration/auth/cognito.md b/doc/administration/auth/cognito.md
index f264321ea6c..de5fa991abe 100644
--- a/doc/administration/auth/cognito.md
+++ b/doc/administration/auth/cognito.md
@@ -5,7 +5,7 @@ group: Access
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# Amazon Web Services Cognito
+# Amazon Web Services Cognito **(FREE SELF)**
Amazon Cognito lets you add user sign-up, sign-in, and access control to your GitLab instance.
The following documentation enables Cognito as an OAuth2 provider.
diff --git a/doc/administration/auth/crowd.md b/doc/administration/auth/crowd.md
index 440d956fc8f..f83710ef4c7 100644
--- a/doc/administration/auth/crowd.md
+++ b/doc/administration/auth/crowd.md
@@ -5,7 +5,7 @@ group: Access
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# Atlassian Crowd OmniAuth Provider
+# Atlassian Crowd OmniAuth Provider **(FREE SELF)**
Authenticate to GitLab using the Atlassian Crowd OmniAuth provider. Enabling
this provider also allows Crowd authentication for Git-over-https requests.
diff --git a/doc/administration/auth/jwt.md b/doc/administration/auth/jwt.md
index 59d00265a1b..b74b70ee8c0 100644
--- a/doc/administration/auth/jwt.md
+++ b/doc/administration/auth/jwt.md
@@ -5,7 +5,7 @@ group: Access
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# JWT OmniAuth provider
+# JWT OmniAuth provider **(FREE SELF)**
To enable the JWT OmniAuth provider, you must register your application with JWT.
JWT will provide you with a secret key for you to use.
diff --git a/doc/administration/auth/ldap/index.md b/doc/administration/auth/ldap/index.md
index 0e55efba8ae..364c7cebea3 100644
--- a/doc/administration/auth/ldap/index.md
+++ b/doc/administration/auth/ldap/index.md
@@ -5,7 +5,7 @@ group: Access
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# General LDAP Setup
+# General LDAP setup **(FREE SELF)**
GitLab integrates with LDAP to support user authentication.
@@ -29,7 +29,7 @@ stands for **Lightweight Directory Access Protocol**, which is a standard
application protocol for accessing and maintaining distributed directory
information services over an Internet Protocol (IP) network.
-## Security **(FREE SELF)**
+## Security
GitLab assumes that LDAP users:
@@ -44,7 +44,7 @@ We recommend against using LDAP integration if your LDAP users are
allowed to change their 'mail', 'email' or 'userPrincipalName' attribute on
the LDAP server or share email addresses.
-### User deletion **(FREE SELF)**
+### User deletion
If a user is deleted from the LDAP server, they are also blocked in GitLab.
Users are immediately blocked from logging in. However, there is an
@@ -56,13 +56,13 @@ immediately block all access.
GitLab Enterprise Edition Premium supports a
[configurable sync time](#adjusting-ldap-user-sync-schedule). **(PREMIUM)**
-## Git password authentication **(FREE SELF)**
+## Git password authentication
LDAP-enabled users can always authenticate with Git using their GitLab username
or email and LDAP password, even if password authentication for Git is disabled
in the application settings.
-## Enabling LDAP sign-in for existing GitLab users **(FREE SELF)**
+## Enabling LDAP sign-in for existing GitLab users
When a user signs in to GitLab with LDAP for the first time, and their LDAP
email address is the primary email address of an existing GitLab user, then
@@ -73,7 +73,7 @@ In other words, if an existing GitLab user wants to enable LDAP sign-in for
themselves, they should check that their GitLab email address matches their
LDAP email address, and then sign into GitLab via their LDAP credentials.
-## Google Secure LDAP **(FREE SELF)**
+## Google Secure LDAP
> Introduced in GitLab 11.9.
@@ -81,7 +81,7 @@ LDAP email address, and then sign into GitLab via their LDAP credentials.
LDAP service that can be configured with GitLab for authentication and group sync.
See [Google Secure LDAP](google_secure_ldap.md) for detailed configuration instructions.
-## Configuration **(FREE SELF)**
+## Configuration
To enable LDAP integration you need to add your LDAP server settings in
`/etc/gitlab/gitlab.rb` or `/home/git/gitlab/config/gitlab.yml` for Omnibus
@@ -100,7 +100,7 @@ would be on port 389. `plain` also operates on port 389. Removed values: `tls` w
LDAP users must have a set email address, regardless of whether or not it's used
to sign in.
-### Example Configurations **(FREE SELF)**
+### Example Configurations
**Omnibus Configuration**
@@ -163,7 +163,7 @@ production:
...
```
-### Basic Configuration Settings **(FREE SELF)**
+### Basic Configuration Settings
| Setting | Description | Required | Examples |
| ------- | ----------- | -------- | -------- |
@@ -190,7 +190,7 @@ Some examples of the `user_filter` field syntax:
- `'(employeeType=developer)'`
- `'(&(objectclass=user)(|(samaccountname=momo)(samaccountname=toto)))'`
-### SSL Configuration Settings **(FREE SELF)**
+### SSL Configuration Settings
| Setting | Description | Required | Examples |
| ------- | ----------- | -------- | -------- |
@@ -200,7 +200,7 @@ Some examples of the `user_filter` field syntax:
| `cert` | Client certificate | no | `'-----BEGIN CERTIFICATE----- <REDACTED> -----END CERTIFICATE -----'` |
| `key` | Client private key | no | `'-----BEGIN PRIVATE KEY----- <REDACTED> -----END PRIVATE KEY -----'` |
-### Attribute Configuration Settings **(FREE SELF)**
+### Attribute Configuration Settings
LDAP attributes that GitLab uses to create an account for the LDAP user. The specified attribute can either be the attribute name as a string (for example, `'mail'`), or an array of attribute names to try in order (for example, `['mail', 'email']`). Note that the user's LDAP sign-in is the attribute specified as `uid` above.
@@ -221,7 +221,7 @@ LDAP attributes that GitLab uses to create an account for the LDAP user. The spe
| `external_groups` | An array of CNs of groups containing users that should be considered external. Note: Not `cn=interns` or the full DN. | no | `['interns', 'contractors']` |
| `sync_ssh_keys` | The LDAP attribute containing a user's public SSH key. | no | `'sshPublicKey'` or false if not set |
-### Set up LDAP user filter **(FREE SELF)**
+### Set up LDAP user filter
If you want to limit all GitLab access to a subset of the LDAP users on your
LDAP server, the first step should be to narrow the configured `base`. However,
@@ -266,7 +266,7 @@ Support for nested members in the user filter should not be confused with
Please note that GitLab does not support the custom filter syntax used by
OmniAuth LDAP.
-#### Escaping special characters **(FREE SELF)**
+#### Escaping special characters
The `user_filter` DN can contain special characters. For example:
@@ -297,7 +297,7 @@ The `user_filter` DN can contain special characters. For example:
OU=Gitlab \28Inc\29,DC=gitlab,DC=com
```
-### Enabling LDAP username lowercase **(FREE SELF)**
+### Enabling LDAP username lowercase
Some LDAP servers, depending on their configurations, can return uppercase usernames.
This can lead to several confusing issues such as creating links or namespaces with uppercase names.
@@ -335,7 +335,7 @@ the configuration option `lowercase_usernames`. By default, this configuration o
1. [Restart GitLab](../../restart_gitlab.md#installations-from-source) for the changes to take effect.
-### Disable LDAP web sign in **(FREE SELF)**
+### Disable LDAP web sign in
It can be useful to prevent using LDAP credentials through the web UI when
an alternative such as SAML is preferred. This allows LDAP to be used for group
@@ -367,7 +367,7 @@ This does not disable [using LDAP credentials for Git access](#git-password-auth
1. [Restart GitLab](../../restart_gitlab.md#installations-from-source) for the changes to take effect.
-### Using encrypted credentials **(FREE SELF)**
+### Using encrypted credentials
Instead of having the LDAP integration credentials stored in plaintext in the configuration files, you can optionally
use an encrypted file for the LDAP credentials. To use this feature, you first need to enable
@@ -454,7 +454,7 @@ If initially your LDAP configuration looked like:
1. [Restart GitLab](../../restart_gitlab.md#installations-from-source) for the changes to take effect.
-## Encryption **(FREE SELF)**
+## Encryption
### TLS Server Authentication
diff --git a/doc/administration/auth/ldap/ldap-troubleshooting.md b/doc/administration/auth/ldap/ldap-troubleshooting.md
index 9d9c01b870f..1e6684751ed 100644
--- a/doc/administration/auth/ldap/ldap-troubleshooting.md
+++ b/doc/administration/auth/ldap/ldap-troubleshooting.md
@@ -5,7 +5,7 @@ group: Access
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# LDAP Troubleshooting for Administrators
+# LDAP Troubleshooting for Administrators **(FREE SELF)**
## Common Problems & Workflows
@@ -27,7 +27,7 @@ Could not authenticate you from Ldapmain because "Connection timed out - user sp
```
If your configured LDAP provider and/or endpoint is offline or otherwise
-unreachable by GitLab, no LDAP user will be able to authenticate and sign-in.
+unreachable by GitLab, no LDAP user is able to authenticate and sign-in.
GitLab does not cache or store credentials for LDAP users to provide authentication
during an LDAP outage.
@@ -181,14 +181,13 @@ user = User.find_by_any_email('email@example.com')
user.username
```
-This will show you which user has this email address. One of two steps will
-have to be taken here:
+This shows you which user has this email address. One of two steps must be taken here:
- To create a new GitLab user/username for this user when signing in with LDAP,
remove the secondary email to remove the conflict.
- To use an existing GitLab user/username for this user to use with LDAP,
remove this email as a secondary email and make it a primary one so GitLab
- will associate this profile to the LDAP identity.
+ associates this profile to the LDAP identity.
The user can do either of these steps [in their
profile](../../../user/profile/index.md#access-your-user-profile) or an administrator can do it.
@@ -227,7 +226,7 @@ output](#example-console-output-after-a-user-sync).
##### Example console output after a user sync **(PREMIUM SELF)**
-The output from a [manual user sync](#sync-all-users) will be very verbose, and a
+The output from a [manual user sync](#sync-all-users) is very verbose, and a
single user's successful sync can look like this:
```shell
@@ -255,8 +254,8 @@ uid: John
There's a lot here, so let's go over what could be helpful when debugging.
-First, GitLab will look for all users that have previously
-signed in with LDAP and iterate on them. Each user's sync will start with
+First, GitLab looks for all users that have previously
+signed in with LDAP and iterate on them. Each user's sync starts with
the following line that contains the user's username and email, as they
exist in GitLab now:
@@ -274,7 +273,7 @@ link between this user and the configured LDAP provider(s):
Identity Load (0.9ms) SELECT "identities".* FROM "identities" WHERE "identities"."user_id" = 20 AND (provider LIKE 'ldap%') LIMIT 1
```
-The identity object will have the DN that GitLab will use to look for the user
+The identity object has the DN that GitLab uses to look for the user
in LDAP. If the DN isn't found, the email is used instead. We can see that
this user is found in LDAP:
@@ -294,18 +293,18 @@ following message instead:
LDAP search error: No Such Object
```
-...in which case the user will be blocked:
+...in which case the user is blocked:
```shell
User Update (0.4ms) UPDATE "users" SET "state" = $1, "updated_at" = $2 WHERE "users"."id" = $3 [["state", "ldap_blocked"], ["updated_at", "2019-10-18 15:46:22.902177"], ["id", 20]]
```
-Once the user is found in LDAP the rest of the output will update the GitLab
+Once the user is found in LDAP, the rest of the output updates the GitLab
database with any changes.
#### Query a user in LDAP
-This will test that GitLab can reach out to LDAP and read a particular user.
+This tests that GitLab can reach out to LDAP and read a particular user.
It can expose potential errors connecting to and/or querying LDAP
that may seem to fail silently in the GitLab UI.
diff --git a/doc/administration/auth/oidc.md b/doc/administration/auth/oidc.md
index cde8944fadc..30ca7d94a1e 100644
--- a/doc/administration/auth/oidc.md
+++ b/doc/administration/auth/oidc.md
@@ -5,12 +5,12 @@ group: Access
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# OpenID Connect OmniAuth provider
+# OpenID Connect OmniAuth provider **(FREE SELF)**
GitLab can use [OpenID Connect](https://openid.net/specs/openid-connect-core-1_0.html) as an OmniAuth provider.
To enable the OpenID Connect OmniAuth provider, you must register your application with an OpenID Connect provider.
-The OpenID Connect will provide you with a client details and secret for you to use.
+The OpenID Connect provides you with a client's details and secret for you to use.
1. On your GitLab server, open the configuration file.
@@ -86,21 +86,21 @@ The OpenID Connect will provide you with a client details and secret for you to
and the [OpenID Connect Core 1.0 specification](https://openid.net/specs/openid-connect-core-1_0.html).
1. For the configuration above, change the values for the provider to match your OpenID Connect client setup. Use the following as a guide:
- - `<your_oidc_label>` is the label that will be displayed on the login page.
- - `<custom_provider_icon>` (optional) is the icon that will be displayed on the login page. Icons for the major social login platforms are built-in into GitLab,
+ - `<your_oidc_label>` is the label that appears on the login page.
+ - `<custom_provider_icon>` (optional) is the icon that appears on the login page. Icons for the major social login platforms are built-in into GitLab,
but can be overridden by specifying this parameter. Both local paths and absolute URLs are accepted.
- `<your_oidc_url>` (optional) is the URL that points to the OpenID Connect provider. For example, `https://example.com/auth/realms/your-realm`.
If this value is not provided, the URL is constructed from the `client_options` in the following format: `<client_options.scheme>://<client_options.host>:<client_options.port>`.
- - If `discovery` is set to `true`, the OpenID Connect provider will try to auto discover the client options using `<your_oidc_url>/.well-known/openid-configuration`. Defaults to `false`.
+ - If `discovery` is set to `true`, the OpenID Connect provider attempts to auto discover the client options using `<your_oidc_url>/.well-known/openid-configuration`. Defaults to `false`.
- `client_auth_method` (optional) specifies the method used for authenticating the client with the OpenID Connect provider.
- Supported values are:
- `basic` - HTTP Basic Authentication
- `jwt_bearer` - JWT based authentication (private key and client secret signing)
- `mtls` - Mutual TLS or X.509 certificate validation
- - Any other value will POST the client ID and secret in the request body
+ - Any other value POSTs the client ID and secret in the request body
- If not specified, defaults to `basic`.
- - `<uid_field>` (optional) is the field name from the `user_info.raw_attributes` details that will be used as `uid` value. For example, `preferred_username`.
- If this value is not provided or the field with the configured value is missing from the `user_info.raw_attributes` details, the `uid` will use the `sub` field.
+ - `<uid_field>` (optional) is the field name from the `user_info.raw_attributes` that defines the value for `uid`. For example, `preferred_username`.
+ If this value is not provided or the field with the configured value is missing from the `user_info.raw_attributes` details, the `uid` uses the `sub` field.
- `send_scope_to_token_endpoint` is `true` by default. In other words, the `scope` parameter is normally included in requests to the token endpoint.
However, if your OpenID Connect provider does not accept the `scope` parameter in such requests, set this to `false`.
- `client_options` are the OpenID Connect client-specific options. Specifically:
@@ -119,9 +119,9 @@ The OpenID Connect will provide you with a client details and secret for you to
for the changes to take effect if you installed GitLab via Omnibus or from source respectively.
On the sign in page, there should now be an OpenID Connect icon below the regular sign in form.
-Click the icon to begin the authentication process. The OpenID Connect provider will ask the user to
+Click the icon to begin the authentication process. The OpenID Connect provider asks the user to
sign in and authorize the GitLab application (if confirmation required by the client). If everything goes well, the user
-will be redirected to GitLab and will be signed in.
+is redirected to GitLab and signed in.
## Example configurations
@@ -134,25 +134,26 @@ See the [Google documentation](https://developers.google.com/identity/protocols/
for more details:
```ruby
- gitlab_rails['omniauth_providers'] = [
- {
- 'name' => 'openid_connect',
- 'label' => 'Google OpenID',
- 'args' => {
- 'name' => 'openid_connect',
- 'scope' => ['openid', 'profile', 'email'],
- 'response_type' => 'code',
- 'issuer' => 'https://accounts.google.com',
- 'client_auth_method' => 'query',
- 'discovery' => true,
- 'uid_field' => 'preferred_username',
- 'client_options' => {
- 'identifier' => '<YOUR PROJECT CLIENT ID>',
- 'secret' => '<YOUR PROJECT CLIENT SECRET>',
- 'redirect_uri' => 'https://example.com/users/auth/openid_connect/callback',
+gitlab_rails['omniauth_providers'] = [
+ {
+ 'name' => 'openid_connect',
+ 'label' => 'Google OpenID',
+ 'args' => {
+ 'name' => 'openid_connect',
+ 'scope' => ['openid', 'profile', 'email'],
+ 'response_type' => 'code',
+ 'issuer' => 'https://accounts.google.com',
+ 'client_auth_method' => 'query',
+ 'discovery' => true,
+ 'uid_field' => 'preferred_username',
+ 'client_options' => {
+ 'identifier' => '<YOUR PROJECT CLIENT ID>',
+ 'secret' => '<YOUR PROJECT CLIENT SECRET>',
+ 'redirect_uri' => 'https://example.com/users/auth/openid_connect/callback',
+ }
}
- }
- }
+ }
+]
```
### Microsoft Azure
@@ -170,30 +171,177 @@ to obtain the tenant ID, client ID, and client secret for your app.
Example Omnibus configuration block:
```ruby
- gitlab_rails['omniauth_providers'] = [
- {
- 'name' => 'openid_connect',
- 'label' => 'Azure OIDC',
- 'args' => {
- 'name' => 'openid_connect',
- 'scope' => ['openid', 'profile', 'email'],
- 'response_type' => 'code',
- 'issuer' => 'https://login.microsoftonline.com/<YOUR-TENANT-ID>/v2.0',
- 'client_auth_method' => 'query',
- 'discovery' => true,
- 'uid_field' => 'preferred_username',
- 'client_options' => {
- 'identifier' => '<YOUR APP CLIENT ID>',
- 'secret' => '<YOUR APP CLIENT SECRET>',
- 'redirect_uri' => 'https://gitlab.example.com/users/auth/openid_connect/callback'
- }
- }
- }
+gitlab_rails['omniauth_providers'] = [
+ {
+ 'name' => 'openid_connect',
+ 'label' => 'Azure OIDC',
+ 'args' => {
+ 'name' => 'openid_connect',
+ 'scope' => ['openid', 'profile', 'email'],
+ 'response_type' => 'code',
+ 'issuer' => 'https://login.microsoftonline.com/<YOUR-TENANT-ID>/v2.0',
+ 'client_auth_method' => 'query',
+ 'discovery' => true,
+ 'uid_field' => 'preferred_username',
+ 'client_options' => {
+ 'identifier' => '<YOUR APP CLIENT ID>',
+ 'secret' => '<YOUR APP CLIENT SECRET>',
+ 'redirect_uri' => 'https://gitlab.example.com/users/auth/openid_connect/callback'
+ }
+ }
+ }
+]
```
Microsoft has documented how its platform works with [the OIDC protocol](https://docs.microsoft.com/en-us/azure/active-directory/develop/v2-protocols-oidc).
-## Troubleshooting
+### Microsoft Azure Active Directory B2C
+
+While GitLab works with [Azure Active Directory B2C](https://docs.microsoft.com/en-us/azure/active-directory-b2c/overview), it requires special
+configuration to work. To get started, sign in to the [Azure Portal](https://portal.azure.com).
+For your app, you'll need the following information from Azure:
+
+- A tenant ID. You may already have one. For more information, review the
+ [Microsoft Azure Tenant](https://docs.microsoft.com/en-us/azure/active-directory/develop/quickstart-create-new-tenant) documentation.
+- A client ID and a client secret. Follow the instructions in the
+ [Microsoft tutorial](https://docs.microsoft.com/en-us/azure/active-directory-b2c/tutorial-register-applications?tabs=app-reg-ga) documentation to obtain the
+ client ID and client secret for your app.
+- The user flow or policy name. Follow the instructions in the [Microsoft tutorial](https://docs.microsoft.com/en-us/azure/active-directory-b2c/tutorial-create-user-flows?pivots=b2c-user-flow).
+
+If your GitLab domain is `gitlab.example.com`, ensure the app has the following `Redirect URI`:
+
+`https://gitlab.example.com/users/auth/openid_connect/callback`
+
+In addition, ensure that [ID tokens are enabled](https://docs.microsoft.com/en-us/azure/active-directory-b2c/tutorial-register-applications?tabs=app-reg-ga#enable-id-token-implicit-grant).
+
+Add the following API permissions to the app:
+
+1. `openid`
+1. `offline_access`
+
+#### Configure custom policies
+
+Azure B2C [offers two ways of defining the business logic for logging in a user](https://docs.microsoft.com/en-us/azure/active-directory-b2c/user-flow-overview):
+
+- [User flows](https://docs.microsoft.com/en-us/azure/active-directory-b2c/user-flow-overview#user-flows)
+- [Custom policies](https://docs.microsoft.com/en-us/azure/active-directory-b2c/user-flow-overview#custom-policies)
+
+While cumbersome to configure, custom policies are required because
+standard Azure B2C user flows [do not send the OpenID `email` claim](https://github.com/MicrosoftDocs/azure-docs/issues/16566). In
+other words, they do not work with the [`allow_single_sign_on` or `auto_link_user`
+parameters](../../integration/omniauth.md#initial-omniauth-configuration).
+With a standard Azure B2C policy, GitLab cannot create a new account or
+link to an existing one with an e-mail address.
+
+Carefully follow the instructions for [creating a custom policy](https://docs.microsoft.com/en-us/azure/active-directory-b2c/tutorial-create-user-flows?pivots=b2c-custom-policy).
+
+The Microsoft instructions use `SocialAndLocalAccounts` in the [custom policy starter pack](https://docs.microsoft.com/en-us/azure/active-directory-b2c/tutorial-create-user-flows?pivots=b2c-custom-policy#custom-policy-starter-pack),
+but `LocalAccounts` works for authenticating against local, Active Directory accounts. Before you follow the instructions to [upload the polices](https://docs.microsoft.com/en-us/azure/active-directory-b2c/tutorial-create-user-flows?pivots=b2c-custom-policy#upload-the-policies), do the following:
+
+1. To export the `email` claim, modify the `SignUpOrSignin.xml`. Replace the following line:
+
+ ```xml
+ <OutputClaim ClaimTypeReferenceId="email" />
+ ```
+
+ with:
+
+ ```xml
+ <OutputClaim ClaimTypeReferenceId="signInNames.emailAddress" PartnerClaimType="email" />
+ ```
+
+1. For OIDC discovery to work with B2C, the policy must be configured with an issuer compatible with the [OIDC
+specification](https://openid.net/specs/openid-connect-discovery-1_0.html#rfc.section.4.3).
+See the [token compatibility settings](https://docs.microsoft.com/en-us/azure/active-directory-b2c/configure-tokens?pivots=b2c-custom-policy#token-compatibility-settings).
+In `TrustFrameworkBase.xml` under `JwtIssuer`, set `IssuanceClaimPattern` to `AuthorityWithTfp`:
+
+ ```xml
+ <ClaimsProvider>
+ <DisplayName>Token Issuer</DisplayName>
+ <TechnicalProfiles>
+ <TechnicalProfile Id="JwtIssuer">
+ <DisplayName>JWT Issuer</DisplayName>
+ <Protocol Name="None" />
+ <OutputTokenFormat>JWT</OutputTokenFormat>
+ <Metadata>
+ <Item Key="IssuanceClaimPattern">AuthorityWithTfp</Item>
+ ...
+ ```
+
+1. Now [upload the policy](https://docs.microsoft.com/en-us/azure/active-directory-b2c/tutorial-create-user-flows?pivots=b2c-custom-policy#upload-the-policies). Overwrite
+the existing files if you are updating an existing policy.
+
+1. Determine the issuer URL using the sign-in policy. The issuer URL will be in the form:
+
+ ```markdown
+ https://<YOUR-DOMAIN>/tfp/<YOUR-TENANT-ID>/<YOUR-SIGN-IN-POLICY-NAME>/v2.0/
+ ```
+
+ The policy name is lowercased in the URL. For example, `B2C_1A_signup_signin`
+ policy appears as `b2c_1a_signup_sigin`.
+
+ Note that the trailing forward slash is required.
+
+1. Verify the operation of the OIDC discovery URL and issuer URL, append `.well-known/openid-configuration`
+to the issuer URL:
+
+ ```markdown
+ https://<YOUR-DOMAIN>/tfp/<YOUR-TENANT-ID>/<YOUR-SIGN-IN-POLICY-NAME>/v2.0/.well-known/openid-configuration
+ ```
+
+ For example, if `domain` is `example.b2clogin.com` and tenant ID is `fc40c736-476c-4da1-b489-ee48cee84386`, you can use `curl` and `jq` to
+extract the issuer:
+
+ ```shell
+ $ curl --silent "https://example.b2clogin.com/tfp/fc40c736-476c-4da1-b489-ee48cee84386/b2c_1a_signup_signin/v2.0/.well-known/openid-configuration" | jq .issuer
+ "https://example.b2clogin.com/tfp/fc40c736-476c-4da1-b489-ee48cee84386/b2c_1a_signup_signin/v2.0/"
+ ```
+
+1. Configure the issuer URL with the custom policy used for
+`signup_signin`. For example, this is the Omnibus configuration with a
+custom policy for `b2c_1a_signup_signin`:
+
+```ruby
+gitlab_rails['omniauth_providers'] = [
+{
+ 'name' => 'openid_connect',
+ 'label' => 'Azure B2C OIDC',
+ 'args' => {
+ 'name' => 'openid_connect',
+ 'scope' => ['openid'],
+ 'response_mode' => 'query',
+ 'response_type' => 'id_token',
+ 'issuer' => 'https://<YOUR-DOMAIN>/tfp/<YOUR-TENANT-ID>/b2c_1a_signup_signin/v2.0/',
+ 'client_auth_method' => 'query',
+ 'discovery' => true,
+ 'send_scope_to_token_endpoint' => true,
+ 'client_options' => {
+ 'identifier' => '<YOUR APP CLIENT ID>',
+ 'secret' => '<YOUR APP CLIENT SECRET>',
+ 'redirect_uri' => 'https://gitlab.example.com/users/auth/openid_connect/callback'
+ }
+ }
+}]
+```
+
+#### Troubleshooting Azure B2C
+
+- Ensure all occurrences of `yourtenant.onmicrosoft.com`, `ProxyIdentityExperienceFrameworkAppId`, and `IdentityExperienceFrameworkAppId` match your B2C tenant hostname and
+the respective client IDs in the XML policy files.
+
+- Add `https://jwt.ms` as a redirect URI to the app, and use the [custom policy tester](https://docs.microsoft.com/en-us/azure/active-directory-b2c/tutorial-create-user-flows?pivots=b2c-custom-policy#test-the-custom-policy).
+Make sure the payload includes `email` that matches the user's e-mail access.
+
+- After you enable the custom policy, users might see "Invalid username or password" after they try to sign in. This might be a configuration
+issue with the `IdentityExperienceFramework` app. See [this Microsoft comment](https://docs.microsoft.com/en-us/answers/questions/50355/unable-to-sign-on-using-custom-policy.html?childToView=122370#comment-122370)
+that suggests checking that the app manifest contains these settings:
+
+ - `"accessTokenAcceptedVersion": null`
+ - `"signInAudience": "AzureADMyOrg"`
+
+ Note that this configuration corresponds with the `Supported account types` setting used when creating the `IdentityExperienceFramework` app.
+
+## General troubleshooting
If you're having trouble, here are some tips:
diff --git a/doc/administration/auth/smartcard.md b/doc/administration/auth/smartcard.md
index 4fe69d0160e..07c29984552 100644
--- a/doc/administration/auth/smartcard.md
+++ b/doc/administration/auth/smartcard.md
@@ -108,6 +108,11 @@ attribute. As a prerequisite, you must use an LDAP server that:
- Supports the `certificateExactMatch` matching rule.
- Has the certificate stored in the `userCertificate` attribute.
+NOTE:
+Active Directory doesn't support the `certificateExactMatch` matching rule so
+[it is not supported at this time](https://gitlab.com/gitlab-org/gitlab/-/issues/327491). For
+more information, see [the relevant issue](https://gitlab.com/gitlab-org/gitlab/-/issues/328074).
+
## Configure GitLab for smartcard authentication
**For Omnibus installations**
diff --git a/doc/administration/compliance.md b/doc/administration/compliance.md
index 04369a76fb3..470dc1b4f9e 100644
--- a/doc/administration/compliance.md
+++ b/doc/administration/compliance.md
@@ -27,3 +27,6 @@ relevant compliance standards.
|**[Auditor users](auditor_users.md)**<br>Auditor users are users who are given read-only access to all projects, groups, and other resources on the GitLab instance.|Premium+||Instance|
|**[Credentials inventory](../user/admin_area/credentials_inventory.md)**<br>With a credentials inventory, GitLab administrators can keep track of the credentials used by all of the users in their GitLab instance. |Ultimate||Instance|
|**Separation of Duties using [Protected branches](../user/project/protected_branches.md#protected-branches-approval-by-code-owners) and [custom CI Configuration Paths](../ci/pipelines/settings.md#custom-cicd-configuration-path)**<br> GitLab Premium users can leverage the GitLab cross-project YAML configurations to define deployers of code and developers of code. View the [Separation of Duties Deploy Project](https://gitlab.com/guided-explorations/separation-of-duties-deploy/blob/master/README.md) and [Separation of Duties Project](https://gitlab.com/guided-explorations/separation-of-duties/blob/master/README.md) to see how to use this set up to define these roles.|Premium+|✓|Project|
+|**[Compliance frameworks](../user/project/settings/index.md#compliance-frameworks)**<br>Create a custom compliance framework at the group level to describe the type of compliance requirements any child project needs to follow. |Premium+|✓|Group|
+|**[Compliance pipelines](../user/project/settings/index.md#compliance-pipeline-configuration)**<br>Define a pipeline configuration to run for any projects with a given compliance framework.|Ultimate|✓|Group|
+|**[Compliance dashboard](../user/compliance/compliance_dashboard/index.md)**<br>Quickly get visibility into the compliance posture of your organization.|Ultimate|✓|Group|
diff --git a/doc/administration/consul.md b/doc/administration/consul.md
index 926267a414a..a748259aff0 100644
--- a/doc/administration/consul.md
+++ b/doc/administration/consul.md
@@ -83,7 +83,7 @@ curl "http://127.0.0.1:8500/v1/health/state/critical"
Consul nodes communicate using the raft protocol. If the current leader goes
offline, there needs to be a leader election. A leader node must exist to facilitate
synchronization across the cluster. If too many nodes go offline at the same time,
-the cluster will lose quorum and not elect a leader due to
+the cluster loses quorum and doesn't elect a leader due to
[broken consensus](https://www.consul.io/docs/architecture/consensus).
Consult the [troubleshooting section](#troubleshooting-consul) if the cluster is not
@@ -122,19 +122,19 @@ db-a XX.XX.X.Y:8301 alive client 0.9.0 2 gitlab_co
db-b XX.XX.X.Y:8301 alive client 0.9.0 2 gitlab_consul
```
-Ideally all nodes will have a `Status` of `alive`.
+Ideally all nodes have a `Status` of `alive`.
### Restart Consul
If it is necessary to restart Consul, it is important to do this in
a controlled manner to maintain quorum. If quorum is lost, to recover the cluster,
-you will need to follow the Consul [outage recovery](#outage-recovery) process.
+you follow the Consul [outage recovery](#outage-recovery) process.
To be safe, it's recommended that you only restart Consul in one node at a time to
ensure the cluster remains intact. For larger clusters, it is possible to restart
multiple nodes at a time. See the
[Consul consensus document](https://www.consul.io/docs/architecture/consensus#deployment-table)
-for how many failures it can tolerate. This will be the number of simultaneous
+for the number of failures it can tolerate. This will be the number of simultaneous
restarts it can sustain.
To restart Consul:
@@ -145,13 +145,13 @@ sudo gitlab-ctl restart consul
### Consul nodes unable to communicate
-By default, Consul will attempt to
+By default, Consul attempts to
[bind](https://www.consul.io/docs/agent/options#_bind) to `0.0.0.0`, but
-it will advertise the first private IP address on the node for other Consul nodes
+it advertises the first private IP address on the node for other Consul nodes
to communicate with it. If the other nodes cannot communicate with a node on
-this address, then the cluster will have a failed status.
+this address, then the cluster has a failed status.
-If you are running into this issue, you will see messages like the following in `gitlab-ctl tail consul` output:
+If you run into this issue, then messages like the following are output in `gitlab-ctl tail consul`:
```plaintext
2017-09-25_19:53:39.90821 2017/09/25 19:53:39 [WARN] raft: no known peers, aborting election
@@ -181,10 +181,10 @@ If you still see the errors, you may have to
### Consul does not start - multiple private IPs
-In case that a node has multiple private IPs, Consul will be confused as to
-which of the private addresses to advertise, and then immediately exit on start.
+If a node has multiple private IPs, Consul doesn't know about
+which of the private addresses to advertise, and then it immediately exits on start.
-You will see messages like the following in `gitlab-ctl tail consul` output:
+Messages like the following are output in `gitlab-ctl tail consul`:
```plaintext
2017-11-09_17:41:45.52876 ==> Starting Consul agent...
@@ -211,8 +211,8 @@ To fix this:
### Outage recovery
-If you lost enough Consul nodes in the cluster to break quorum, then the cluster
-is considered failed, and it will not function without manual intervention.
+If you have lost enough Consul nodes in the cluster to break quorum, then the cluster
+is considered to have failed and cannot function without manual intervention.
In that case, you can either recreate the nodes from scratch or attempt a
recover.
diff --git a/doc/administration/database_load_balancing.md b/doc/administration/database_load_balancing.md
index fe30d98b92d..bd34a82f688 100644
--- a/doc/administration/database_load_balancing.md
+++ b/doc/administration/database_load_balancing.md
@@ -39,7 +39,7 @@ at least 1 secondary in [hot standby](https://www.postgresql.org/docs/11/hot-sta
Load balancing also requires that the configured hosts **always** point to the
primary, even after a database failover. Furthermore, the additional hosts to
balance load among must **always** point to secondary databases. This means that
-you should put a load balance in front of every database, and have GitLab connect
+you should put a load balancer in front of every database, and have GitLab connect
to those load balancers.
For example, say you have a primary (`db1.gitlab.com`) and two secondaries,
@@ -161,11 +161,11 @@ records, eventually dropping this hostname from rotation if it can't resolve its
The `interval` value specifies the _minimum_ time between checks. If the A
record has a TTL greater than this value, then service discovery will honor said
TTL. For example, if the TTL of the A record is 90 seconds, then service
-discovery will wait at least 90 seconds before checking the A record again.
+discovery waits at least 90 seconds before checking the A record again.
When the list of hosts is updated, it might take a while for the old connections
to be terminated. The `disconnect_timeout` setting can be used to enforce an
-upper limit on the time it will take to terminate all old database connections.
+upper limit on the time it takes to terminate all old database connections.
Some nameservers (like [Consul](https://www.consul.io/docs/discovery/dns#udp-based-dns-queries)) can return a truncated list of hosts when
queried over UDP. To overcome this issue, you can use TCP for querying by setting
@@ -179,7 +179,7 @@ all-in-one package based installations as well as GitLab Helm chart deployments.
If you use an application server that forks, such as Unicorn, you _have to_
update your Unicorn configuration to start service discovery _after_ a fork.
-Failure to do so will lead to service discovery only running in the parent
+Failure to do so leads to service discovery only running in the parent
process. If you are using Unicorn, then you can add the following to your
Unicorn configuration file:
@@ -190,13 +190,13 @@ after_fork do |server, worker|
end
```
-This will ensure that service discovery is started in both the parent and all
+This ensures that service discovery is started in both the parent and all
child processes.
## Balancing queries
-Read-only `SELECT` queries will be balanced among all the secondary hosts.
-Everything else (including transactions) will be executed on the primary.
+Read-only `SELECT` queries balance among all the secondary hosts.
+Everything else (including transactions) executes on the primary.
Queries such as `SELECT ... FOR UPDATE` are also executed on the primary.
## Prepared statements
@@ -207,19 +207,19 @@ response timings.
## Primary sticking
-After a write has been performed, GitLab will stick to using the primary for a
-certain period of time, scoped to the user that performed the write. GitLab will
-revert back to using secondaries when they have either caught up, or after 30
+After a write has been performed, GitLab sticks to using the primary for a
+certain period of time, scoped to the user that performed the write. GitLab
+reverts back to using secondaries when they have either caught up, or after 30
seconds.
## Failover handling
-In the event of a failover or an unresponsive database, the load balancer will
-try to use the next available host. If no secondaries are available the
+In the event of a failover or an unresponsive database, the load balancer
+tries to use the next available host. If no secondaries are available the
operation is performed on the primary instead.
-In the event of a connection error being produced when writing data, the
-operation will be retried up to 3 times using an exponential back-off.
+If a connection error occurs while writing data, the
+operation is retried up to 3 times using an exponential back-off.
When using load balancing, you should be able to safely restart a database server
without it immediately leading to errors being presented to the users.
@@ -251,9 +251,9 @@ For example:
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/3526) in [GitLab Premium](https://about.gitlab.com/pricing/) 10.3.
-To prevent reading from an outdated secondary the load balancer will check if it
+To prevent reading from an outdated secondary the load balancer checks if it
is in sync with the primary. If the data is determined to be recent enough the
-secondary can be used, otherwise it will be ignored. To reduce the overhead of
+secondary is used, otherwise it is ignored. To reduce the overhead of
these checks we only perform these checks at certain intervals.
There are three configuration options that influence this behavior:
diff --git a/doc/administration/encrypted_configuration.md b/doc/administration/encrypted_configuration.md
index 508f4314694..ff48005e427 100644
--- a/doc/administration/encrypted_configuration.md
+++ b/doc/administration/encrypted_configuration.md
@@ -18,7 +18,7 @@ In order to enable the encrypted configuration settings, a new base key needs to
**Omnibus Installation**
-Starting with 13.7 the new secret is automatically generated for you, but you will need to ensure your
+Starting with 13.7 the new secret is automatically generated for you, but you need to ensure your
`/etc/gitlab/gitlab-secrets.json` contains the same values on all nodes.
**GitLab Cloud Native Helm Chart**
@@ -34,4 +34,4 @@ The new secret can be generated by running:
bundle exec rake gitlab:env:info RAILS_ENV=production GITLAB_GENERATE_ENCRYPTED_SETTINGS_KEY_BASE=true
```
-This will print general information on the GitLab instance, but will also cause the key to be generated in `<path-to-gitlab-rails>/config/secrets.yml`
+This prints general information on the GitLab instance, but also causes the key to be generated in `<path-to-gitlab-rails>/config/secrets.yml`
diff --git a/doc/administration/external_pipeline_validation.md b/doc/administration/external_pipeline_validation.md
index f8329b24d6c..89543e446ac 100644
--- a/doc/administration/external_pipeline_validation.md
+++ b/doc/administration/external_pipeline_validation.md
@@ -5,27 +5,28 @@ info: To determine the technical writer assigned to the Stage/Group associated w
type: reference, howto
---
-# External Pipeline Validation
+# External pipeline validation
You can use an external service to validate a pipeline before it's created.
WARNING:
This is an experimental feature and subject to change without notice.
-## Usage
-
GitLab sends a POST request to the external service URL with the pipeline
-data as payload. GitLab then invalidates the pipeline based on the response
-code. If there's an error or the request times out, the pipeline is not
-invalidated.
+data as payload. The response code from the external service determines if GitLab
+should accept or reject the pipeline. If the response is:
+
+- `200`, the pipeline is accepted.
+- `4XX`, the pipeline is rejected.
+- Other codes, the pipeline is accepted and logged.
-Response codes:
+If there's an error or the request times out, the pipeline is accepted.
-- `200`: Accepted
-- `4XX`: Not accepted
-- All other codes: accepted and logged
+Pipelines rejected by the external validation service aren't created, and don't
+appear in pipeline lists in the GitLab UI or API. If you create a pipeline in the
+UI that is rejected, `Pipeline cannot be run. External validation failed` is displayed.
-## Configuration
+## Configure external pipeline validation
To configure external pipeline validation, add the
[`EXTERNAL_VALIDATION_SERVICE_URL` environment variable](environment_variables.md)
@@ -35,7 +36,7 @@ By default, requests to the external service time out after five seconds. To ove
the default, set the `EXTERNAL_VALIDATION_SERVICE_TIMEOUT` environment variable to the
required number of seconds.
-## Payload Schema
+## Payload schema
```json
{
diff --git a/doc/administration/geo/disaster_recovery/index.md b/doc/administration/geo/disaster_recovery/index.md
index d1ea2978202..7c6f4a32b57 100644
--- a/doc/administration/geo/disaster_recovery/index.md
+++ b/doc/administration/geo/disaster_recovery/index.md
@@ -62,8 +62,7 @@ must disable the **primary** node.
- If you do not have SSH access to the **primary** node, take the machine offline and
prevent it from rebooting by any means at your disposal.
- Since there are many ways you may prefer to accomplish this, we will avoid a
- single recommendation. You may need to:
+ You might need to:
- Reconfigure the load balancers.
- Change DNS records (for example, point the primary DNS record to the
@@ -240,11 +239,11 @@ an external PostgreSQL database, as it can only perform changes on a **secondary
node with GitLab and the database on the same machine. As a result, a manual process is
required:
-1. Promote the replica database associated with the **secondary** site. This will
- set the database to read-write. The instructions vary depending on where your database is hosted:
+1. Promote the replica database associated with the **secondary** site. This
+ sets the database to read-write. The instructions vary depending on where your database is hosted:
- [Amazon RDS](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_ReadRepl.html#USER_ReadRepl.Promote)
- [Azure PostgreSQL](https://docs.microsoft.com/en-us/azure/postgresql/howto-read-replicas-portal#stop-replication)
- - For other external PostgreSQL databases, save the following script in you
+ - For other external PostgreSQL databases, save the following script in your
secondary node, for example `/tmp/geo_promote.sh`, and modify the connection
parameters to match your environment. Then, execute it to promote the replica:
@@ -292,7 +291,7 @@ required:
### Step 4. (Optional) Updating the primary domain DNS record
Updating the DNS records for the primary domain to point to the **secondary** node
-will prevent the need to update all references to the primary domain to the
+to prevent the need to update all references to the primary domain to the
secondary domain, like changing Git remotes and API URLs.
1. SSH into the **secondary** node and login as root:
@@ -311,7 +310,7 @@ secondary domain, like changing Git remotes and API URLs.
```
NOTE:
- Changing `external_url` won't prevent access via the old secondary URL, as
+ Changing `external_url` does not prevent access via the old secondary URL, as
long as the secondary DNS records are still intact.
1. Reconfigure the **secondary** node for the change to take effect:
@@ -326,7 +325,7 @@ secondary domain, like changing Git remotes and API URLs.
gitlab-rake geo:update_primary_node_url
```
- This command will use the changed `external_url` configuration defined
+ This command uses the changed `external_url` configuration defined
in `/etc/gitlab/gitlab.rb`.
1. For GitLab 11.11 through 12.7 only, you may need to update the **primary**
@@ -334,7 +333,7 @@ secondary domain, like changing Git remotes and API URLs.
To determine if you need to do this, search for the
`gitlab_rails["geo_node_name"]` setting in your `/etc/gitlab/gitlab.rb`
- file. If it is commented out with `#` or not found at all, then you will
+ file. If it is commented out with `#` or not found at all, then you
need to update the **primary** node's name in the database. You can search for it
like so:
@@ -444,7 +443,7 @@ and after that you also need two extra steps.
Now we need to make each **secondary** node listen to changes on the new **primary** node. To do that you need
to [initiate the replication process](../setup/database.md#step-3-initiate-the-replication-process) again but this time
-for another **primary** node. All the old replication settings will be overwritten.
+for another **primary** node. All the old replication settings are overwritten.
## Promoting a secondary Geo cluster in GitLab Cloud Native Helm Charts
@@ -479,8 +478,7 @@ must disable the **primary** site:
- If you do not have access to the **primary** Kubernetes cluster, take the cluster offline and
prevent it from coming back online by any means at your disposal.
- Since there are many ways you may prefer to accomplish this, we will avoid a
- single recommendation. You may need to:
+ You might need to:
- Reconfigure the load balancers.
- Change DNS records (for example, point the primary DNS record to the
diff --git a/doc/administration/geo/disaster_recovery/planned_failover.md b/doc/administration/geo/disaster_recovery/planned_failover.md
index 96c6482e3db..bd8467f5437 100644
--- a/doc/administration/geo/disaster_recovery/planned_failover.md
+++ b/doc/administration/geo/disaster_recovery/planned_failover.md
@@ -27,7 +27,7 @@ have a high degree of confidence in being able to perform them accurately.
## Not all data is automatically replicated
-If you are using any GitLab features that Geo [doesn't support](../index.md#limitations),
+If you are using any GitLab features that Geo [doesn't support](../replication/datatypes.md#limitations-on-replicationverification),
you must make separate provisions to ensure that the **secondary** node has an
up-to-date copy of any data associated with that feature. This may extend the
required scheduled maintenance period significantly.
@@ -40,8 +40,7 @@ final transfer inside the maintenance window) will then transfer only the
Repository-centric strategies for using `rsync` effectively can be found in the
[moving repositories](../../operations/moving_repositories.md) documentation; these strategies can
-be adapted for use with any other file-based data, such as GitLab Pages (to
-be found in `/var/opt/gitlab/gitlab-rails/shared/pages` if using Omnibus).
+be adapted for use with any other file-based data, such as [GitLab Pages](../../pages/index.md#change-storage-path).
## Preflight checks
diff --git a/doc/administration/geo/index.md b/doc/administration/geo/index.md
index f1884f297e8..780e391973c 100644
--- a/doc/administration/geo/index.md
+++ b/doc/administration/geo/index.md
@@ -22,12 +22,14 @@ Geo undergoes significant changes from release to release. Upgrades **are** supp
Fetching large repositories can take a long time for teams located far from a single GitLab instance.
-Geo provides local, read-only instances of your GitLab instances. This can reduce the time it takes
+Geo provides local, read-only sites of your GitLab instances. This can reduce the time it takes
to clone and fetch large repositories, speeding up development.
For a video introduction to Geo, see [Introduction to GitLab Geo - GitLab Features](https://www.youtube.com/watch?v=-HDLxSjEh6w).
-To make sure you're using the right version of the documentation, navigate to [the source version of this page on GitLab.com](https://gitlab.com/gitlab-org/gitlab/blob/master/doc/administration/geo/index.md) and choose the appropriate release from the **Switch branch/tag** dropdown. For example, [`v11.2.3-ee`](https://gitlab.com/gitlab-org/gitlab/blob/v11.2.3-ee/doc/administration/geo/index.md).
+To make sure you're using the right version of the documentation, navigate to [the Geo page on GitLab.com](https://gitlab.com/gitlab-org/gitlab/blob/master/doc/administration/geo/index.md) and choose the appropriate release from the **Switch branch/tag** dropdown. For example, [`v13.7.6-ee`](https://gitlab.com/gitlab-org/gitlab/-/blob/v13.7.6-ee/doc/administration/geo/index.md).
+
+Geo uses a set of defined terms that is described in the [Geo Glossary](glossary.md), please familiarize yourself with those terms.
## Use cases
@@ -35,21 +37,21 @@ Implementing Geo provides the following benefits:
- Reduce from minutes to seconds the time taken for your distributed developers to clone and fetch large repositories and projects.
- Enable all of your developers to contribute ideas and work in parallel, no matter where they are.
-- Balance the read-only load between your **primary** and **secondary** nodes.
+- Balance the read-only load between your **primary** and **secondary** sites.
In addition, it:
- Can be used for cloning and fetching projects, in addition to reading any data available in the GitLab web interface (see [limitations](#limitations)).
- Overcomes slow connections between distant offices, saving time by improving speed for distributed teams.
- Helps reducing the loading time for automated tasks, custom integrations, and internal workflows.
-- Can quickly fail over to a **secondary** node in a [disaster recovery](disaster_recovery/index.md) scenario.
-- Allows [planned failover](disaster_recovery/planned_failover.md) to a **secondary** node.
+- Can quickly fail over to a **secondary** site in a [disaster recovery](disaster_recovery/index.md) scenario.
+- Allows [planned failover](disaster_recovery/planned_failover.md) to a **secondary** site.
Geo provides:
-- Read-only **secondary** nodes: Maintain one **primary** GitLab node while still enabling read-only **secondary** nodes for each of your distributed teams.
-- Authentication system hooks: **Secondary** nodes receives all authentication data (like user accounts and logins) from the **primary** instance.
-- An intuitive UI: **Secondary** nodes use the same web interface your team has grown accustomed to. In addition, there are visual notifications that block write operations and make it clear that a user is on a **secondary** node.
+- Read-only **secondary** sites: Maintain one **primary** GitLab site while still enabling read-only **secondary** sites for each of your distributed teams.
+- Authentication system hooks: **Secondary** sites receives all authentication data (like user accounts and logins) from the **primary** instance.
+- An intuitive UI: **Secondary** sites use the same web interface your team has grown accustomed to. In addition, there are visual notifications that block write operations and make it clear that a user is on a **secondary** sites.
### Gitaly Cluster
@@ -64,16 +66,16 @@ Your Geo instance can be used for cloning and fetching projects, in addition to
When Geo is enabled, the:
-- Original instance is known as the **primary** node.
-- Replicated read-only nodes are known as **secondary** nodes.
+- Original instance is known as the **primary** site.
+- Replicated read-only sites are known as **secondary** sites.
Keep in mind that:
-- **Secondary** nodes talk to the **primary** node to:
+- **Secondary** sites talk to the **primary** site to:
- Get user data for logins (API).
- Replicate repositories, LFS Objects, and Attachments (HTTPS + JWT).
-- In GitLab Premium 10.0 and later, the **primary** node no longer talks to **secondary** nodes to notify for changes (API).
-- Pushing directly to a **secondary** node (for both HTTP and SSH, including Git LFS) was [introduced](https://about.gitlab.com/releases/2018/09/22/gitlab-11-3-released/) in [GitLab Premium](https://about.gitlab.com/pricing/#self-managed) 11.3.
+- In GitLab Premium 10.0 and later, the **primary** site no longer talks to **secondary** sites to notify for changes (API).
+- Pushing directly to a **secondary** site (for both HTTP and SSH, including Git LFS) was [introduced](https://about.gitlab.com/releases/2018/09/22/gitlab-11-3-released/) in [GitLab Premium](https://about.gitlab.com/pricing/#self-managed) 11.3.
- There are [limitations](#limitations) when using Geo.
### Architecture
@@ -84,31 +86,31 @@ The following diagram illustrates the underlying architecture of Geo.
In this diagram:
-- There is the **primary** node and the details of one **secondary** node.
-- Writes to the database can only be performed on the **primary** node. A **secondary** node receives database
+- There is the **primary** site and the details of one **secondary** site.
+- Writes to the database can only be performed on the **primary** site. A **secondary** site receives database
updates via PostgreSQL streaming replication.
- If present, the [LDAP server](#ldap) should be configured to replicate for [Disaster Recovery](disaster_recovery/index.md) scenarios.
-- A **secondary** node performs different type of synchronizations against the **primary** node, using a special
+- A **secondary** site performs different type of synchronizations against the **primary** site, using a special
authorization protected by JWT:
- Repositories are cloned/updated via Git over HTTPS.
- Attachments, LFS objects, and other files are downloaded via HTTPS using a private API endpoint.
From the perspective of a user performing Git operations:
-- The **primary** node behaves as a full read-write GitLab instance.
-- **Secondary** nodes are read-only but proxy Git push operations to the **primary** node. This makes **secondary** nodes appear to support push operations themselves.
+- The **primary** site behaves as a full read-write GitLab instance.
+- **Secondary** sites are read-only but proxy Git push operations to the **primary** site. This makes **secondary** sites appear to support push operations themselves.
To simplify the diagram, some necessary components are omitted. Note that:
- Git over SSH requires [`gitlab-shell`](https://gitlab.com/gitlab-org/gitlab-shell) and OpenSSH.
- Git over HTTPS required [`gitlab-workhorse`](https://gitlab.com/gitlab-org/gitlab-workhorse).
-Note that a **secondary** node needs two different PostgreSQL databases:
+Note that a **secondary** site needs two different PostgreSQL databases:
- A read-only database instance that streams data from the main GitLab database.
-- [Another database instance](#geo-tracking-database) used internally by the **secondary** node to record what data has been replicated.
+- [Another database instance](#geo-tracking-database) used internally by the **secondary** site to record what data has been replicated.
-In **secondary** nodes, there is an additional daemon: [Geo Log Cursor](#geo-log-cursor).
+In **secondary** sites, there is an additional daemon: [Geo Log Cursor](#geo-log-cursor).
## Requirements for running Geo
@@ -122,7 +124,7 @@ The following are required to run Geo:
- PostgreSQL 11+ with [Streaming Replication](https://wiki.postgresql.org/wiki/Streaming_Replication)
- Git 2.9+
- Git-lfs 2.4.2+ on the user side when using LFS
-- All nodes must run the same GitLab version.
+- All sites must run the same GitLab version.
Additionally, check the GitLab [minimum requirements](../../install/requirements.md),
and we recommend you use:
@@ -132,9 +134,9 @@ and we recommend you use:
### Firewall rules
-The following table lists basic ports that must be open between the **primary** and **secondary** nodes for Geo.
+The following table lists basic ports that must be open between the **primary** and **secondary** sites for Geo.
-| **Primary** node | **Secondary** node | Protocol |
+| **Primary** site | **Secondary** site | Protocol |
|:-----------------|:-------------------|:-------------|
| 80 | 80 | HTTP |
| 443 | 443 | TCP or HTTPS |
@@ -153,10 +155,10 @@ If you wish to terminate SSL at the GitLab application server instead, use TCP p
### LDAP
-We recommend that if you use LDAP on your **primary** node, you also set up secondary LDAP servers on each **secondary** node. Otherwise, users will not be able to perform Git operations over HTTP(s) on the **secondary** node using HTTP Basic Authentication. However, Git via SSH and personal access tokens will still work.
+We recommend that if you use LDAP on your **primary** site, you also set up secondary LDAP servers on each **secondary** site. Otherwise, users will not be able to perform Git operations over HTTP(s) on the **secondary** site using HTTP Basic Authentication. However, Git via SSH and personal access tokens will still work.
NOTE:
-It is possible for all **secondary** nodes to share an LDAP server, but additional latency can be an issue. Also, consider what LDAP server will be available in a [disaster recovery](disaster_recovery/index.md) scenario if a **secondary** node is promoted to be a **primary** node.
+It is possible for all **secondary** sites to share an LDAP server, but additional latency can be an issue. Also, consider what LDAP server will be available in a [disaster recovery](disaster_recovery/index.md) scenario if a **secondary** site is promoted to be a **primary** site.
Check for instructions on how to set up replication in your LDAP service. Instructions will be different depending on the software or service used. For example, OpenLDAP provides [these instructions](https://www.openldap.org/doc/admin24/replication.html).
@@ -168,18 +170,37 @@ The tracking database instance is used as metadata to control what needs to be u
- Fetch new LFS Objects.
- Fetch changes from a repository that has recently been updated.
-Because the replicated database instance is read-only, we need this additional database instance for each **secondary** node.
+Because the replicated database instance is read-only, we need this additional database instance for each **secondary** site.
### Geo Log Cursor
This daemon:
-- Reads a log of events replicated by the **primary** node to the **secondary** database instance.
+- Reads a log of events replicated by the **primary** site to the **secondary** database instance.
- Updates the Geo Tracking Database instance with changes that need to be executed.
-When something is marked to be updated in the tracking database instance, asynchronous jobs running on the **secondary** node will execute the required operations and update the state.
+When something is marked to be updated in the tracking database instance, asynchronous jobs running on the **secondary** site will execute the required operations and update the state.
+
+This new architecture allows GitLab to be resilient to connectivity issues between the sites. It doesn't matter how long the **secondary** site is disconnected from the **primary** site as it will be able to replay all the events in the correct order and become synchronized with the **primary** site again.
+
+## Limitations
+
+WARNING:
+This list of limitations only reflects the latest version of GitLab. If you are using an older version, extra limitations may be in place.
-This new architecture allows GitLab to be resilient to connectivity issues between the nodes. It doesn't matter how long the **secondary** node is disconnected from the **primary** node as it will be able to replay all the events in the correct order and become synchronized with the **primary** node again.
+- Pushing directly to a **secondary** site redirects (for HTTP) or proxies (for SSH) the request to the **primary** site instead of [handling it directly](https://gitlab.com/gitlab-org/gitlab/-/issues/1381), except when using Git over HTTP with credentials embedded within the URI. For example, `https://user:password@secondary.tld`.
+- The **primary** site has to be online for OAuth login to happen. Existing sessions and Git are not affected. Support for the **secondary** site to use an OAuth provider independent from the primary is [being planned](https://gitlab.com/gitlab-org/gitlab/-/issues/208465).
+- The installation takes multiple manual steps that together can take about an hour depending on circumstances. We are working on improving this experience. See [Omnibus GitLab issue #2978](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/2978) for details.
+- Real-time updates of issues/merge requests (for example, via long polling) doesn't work on the **secondary** site.
+- [Selective synchronization](replication/configuration.md#selective-synchronization) applies only to files and repositories. Other datasets are replicated to the **secondary** site in full, making it inappropriate for use as an access control mechanism.
+- Object pools for forked project deduplication work only on the **primary** site, and are duplicated on the **secondary** site.
+- GitLab Runners cannot register with a **secondary** site. Support for this is [planned for the future](https://gitlab.com/gitlab-org/gitlab/-/issues/3294).
+- Configuring Geo **secondary** sites to [use high-availability configurations of PostgreSQL](https://gitlab.com/groups/gitlab-org/-/epics/2536) is currently in **alpha** support.
+- [Selective synchronization](replication/configuration.md#selective-synchronization) only limits what repositories are replicated. The entire PostgreSQL data is still replicated. Selective synchronization is not built to accomodate compliance / export control use cases.
+
+### Limitations on replication/verification
+
+There is a complete list of all GitLab [data types](replication/datatypes.md) and [existing support for replication and verification](replication/datatypes.md#limitations-on-replicationverification).
## Setup instructions
@@ -187,7 +208,7 @@ For setup instructions, see [Setting up Geo](setup/index.md).
## Post-installation documentation
-After installing GitLab on the **secondary** nodes and performing the initial configuration, see the following documentation for post-installation information.
+After installing GitLab on the **secondary** site(s) and performing the initial configuration, see the following documentation for post-installation information.
### Configuring Geo
@@ -195,16 +216,16 @@ For information on configuring Geo, see [Geo configuration](replication/configur
### Updating Geo
-For information on how to update your Geo nodes to the latest GitLab version, see [Updating the Geo nodes](replication/updating_the_geo_nodes.md).
+For information on how to update your Geo site(s) to the latest GitLab version, see [Updating the Geo sites](replication/updating_the_geo_nodes.md).
### Pausing and resuming replication
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/35913) in [GitLab Premium](https://about.gitlab.com/pricing/) 13.2.
WARNING:
-In GitLab 13.2 and 13.3, promoting a secondary node to a primary while the
+In GitLab 13.2 and 13.3, promoting a secondary site to a primary while the
secondary is paused fails. Do not pause replication before promoting a
-secondary. If the node is paused, be sure to resume before promoting. This
+secondary. If the site is paused, be sure to resume before promoting. This
issue has been fixed in GitLab 13.4 and later.
WARNING:
@@ -213,7 +234,7 @@ Omnibus GitLab-managed database. External databases are currently not supported.
In some circumstances, like during [upgrades](replication/updating_the_geo_nodes.md) or a [planned failover](disaster_recovery/planned_failover.md), it is desirable to pause replication between the primary and secondary.
-Pausing and resuming replication is done via a command line tool from the secondary node where the `postgresql` service is enabled.
+Pausing and resuming replication is done via a command line tool from the a node in the secondary site where the `postgresql` service is enabled.
If `postgresql` is on a standalone database node, ensure that `gitlab.rb` on that node contains the configuration line `gitlab_rails['geo_node_name'] = 'node_name'`, where `node_name` is the same as the `geo_name_name` on the application node.
@@ -243,7 +264,7 @@ For information on using Geo in disaster recovery situations to mitigate data-lo
### Replicating the Container Registry
-For more information on how to replicate the Container Registry, see [Docker Registry for a **secondary** node](replication/docker_registry.md).
+For more information on how to replicate the Container Registry, see [Docker Registry for a **secondary** site](replication/docker_registry.md).
### Security Review
@@ -259,41 +280,22 @@ For an example of how to set up a location-aware Git remote URL with AWS Route53
### Backfill
-Once a **secondary** node is set up, it will start replicating missing data from
-the **primary** node in a process known as **backfill**. You can monitor the
-synchronization process on each Geo node from the **primary** node's **Geo Nodes**
+Once a **secondary** site is set up, it will start replicating missing data from
+the **primary** site in a process known as **backfill**. You can monitor the
+synchronization process on each Geo site from the **primary** site's **Geo Nodes**
dashboard in your browser.
Failures that happen during a backfill are scheduled to be retried at the end
of the backfill.
-## Remove Geo node
+## Remove Geo site
-For more information on removing a Geo node, see [Removing **secondary** Geo nodes](replication/remove_geo_node.md).
+For more information on removing a Geo site, see [Removing **secondary** Geo sites](replication/remove_geo_site.md).
## Disable Geo
To find out how to disable Geo, see [Disabling Geo](replication/disable_geo.md).
-## Limitations
-
-WARNING:
-This list of limitations only reflects the latest version of GitLab. If you are using an older version, extra limitations may be in place.
-
-- Pushing directly to a **secondary** node redirects (for HTTP) or proxies (for SSH) the request to the **primary** node instead of [handling it directly](https://gitlab.com/gitlab-org/gitlab/-/issues/1381), except when using Git over HTTP with credentials embedded within the URI. For example, `https://user:password@secondary.tld`.
-- The **primary** node has to be online for OAuth login to happen. Existing sessions and Git are not affected. Support for the **secondary** node to use an OAuth provider independent from the primary is [being planned](https://gitlab.com/gitlab-org/gitlab/-/issues/208465).
-- The installation takes multiple manual steps that together can take about an hour depending on circumstances. We are working on improving this experience. See [Omnibus GitLab issue #2978](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/2978) for details.
-- Real-time updates of issues/merge requests (for example, via long polling) doesn't work on the **secondary** node.
-- [Selective synchronization](replication/configuration.md#selective-synchronization) applies only to files and repositories. Other datasets are replicated to the **secondary** node in full, making it inappropriate for use as an access control mechanism.
-- Object pools for forked project deduplication work only on the **primary** node, and are duplicated on the **secondary** node.
-- GitLab Runners cannot register with a **secondary** node. Support for this is [planned for the future](https://gitlab.com/gitlab-org/gitlab/-/issues/3294).
-- Geo **secondary** nodes can not be configured to [use high-availability configurations of PostgreSQL](https://gitlab.com/groups/gitlab-org/-/epics/2536).
-- [Selective synchronization](replication/configuration.md#selective-synchronization) only limits what repositories are replicated. The entire PostgreSQL data is still replicated. Selective synchronization is not built to accomodate compliance / export control use cases.
-
-### Limitations on replication/verification
-
-There is a complete list of all GitLab [data types](replication/datatypes.md) and [existing support for replication and verification](replication/datatypes.md#limitations-on-replicationverification).
-
## Frequently Asked Questions
For answers to common questions, see the [Geo FAQ](replication/faq.md).
diff --git a/doc/administration/geo/replication/configuration.md b/doc/administration/geo/replication/configuration.md
index 7dbb0c78166..6d5f3e61ba0 100644
--- a/doc/administration/geo/replication/configuration.md
+++ b/doc/administration/geo/replication/configuration.md
@@ -24,7 +24,7 @@ You are encouraged to first read through all the steps before executing them
in your testing/production environment.
NOTE:
-**Do not** set up any custom authentication for the **secondary** nodes. This will be handled by the **primary** node.
+**Do not** set up any custom authentication for the **secondary** nodes. This is handled by the **primary** node.
Any change that requires access to the **Admin Area** needs to be done in the
**primary** node because the **secondary** node is a read-only replica.
@@ -41,7 +41,7 @@ they must be manually replicated to the **secondary** node.
sudo cat /etc/gitlab/gitlab-secrets.json
```
- This will display the secrets that need to be replicated, in JSON format.
+ This displays the secrets that need to be replicated, in JSON format.
1. SSH into the **secondary** node and login as the `root` user:
@@ -85,11 +85,11 @@ GitLab integrates with the system-installed SSH daemon, designating a user
(typically named `git`) through which all access requests are handled.
In a [Disaster Recovery](../disaster_recovery/index.md) situation, GitLab system
-administrators will promote a **secondary** node to the **primary** node. DNS records for the
+administrators promote a **secondary** node to the **primary** node. DNS records for the
**primary** domain should also be updated to point to the new **primary** node
-(previously a **secondary** node). Doing so will avoid the need to update Git remotes and API URLs.
+(previously a **secondary** node). Doing so avoids the need to update Git remotes and API URLs.
-This will cause all SSH requests to the newly promoted **primary** node to
+This causes all SSH requests to the newly promoted **primary** node to
fail due to SSH host key mismatch. To prevent this, the primary SSH host
keys must be manually replicated to the **secondary** node.
@@ -183,7 +183,7 @@ keys must be manually replicated to the **secondary** node.
sudo -i
```
-1. Edit `/etc/gitlab/gitlab.rb` and add a **unique** name for your node. You will need this in the next steps:
+1. Edit `/etc/gitlab/gitlab.rb` and add a **unique** name for your node. You need this in the next steps:
```ruby
# The unique identifier for the Geo node.
@@ -229,9 +229,9 @@ keys must be manually replicated to the **secondary** node.
gitlab-rake gitlab:geo:check
```
-Once added to the Geo administration page and restarted, the **secondary** node will automatically start
+Once added to the Geo administration page and restarted, the **secondary** node automatically starts
replicating missing data from the **primary** node in a process known as **backfill**.
-Meanwhile, the **primary** node will start to notify each **secondary** node of any changes, so
+Meanwhile, the **primary** node starts to notify each **secondary** node of any changes, so
that the **secondary** node can act on those notifications immediately.
Be sure the _secondary_ node is running and accessible. You can sign in to the
@@ -241,7 +241,7 @@ _secondary_ node with the same credentials as were used with the _primary_ node.
You can safely skip this step if your **primary** node uses a CA-issued HTTPS certificate.
-If your **primary** node is using a self-signed certificate for *HTTPS* support, you will
+If your **primary** node is using a self-signed certificate for *HTTPS* support, you
need to add that certificate to the **secondary** node's trust store. Retrieve the
certificate from the **primary** node and follow
[these instructions](https://docs.gitlab.com/omnibus/settings/ssl.html)
@@ -265,7 +265,7 @@ the _primary_ node. Visit the _secondary_ node's **Admin Area > Geo**
(`/admin/geo/nodes`) in your browser to determine if it's correctly identified
as a _secondary_ Geo node, and if Geo is enabled.
-The initial replication, or 'backfill', will probably still be in progress. You
+The initial replication, or 'backfill', is probably still in progress. You
can monitor the synchronization process on each Geo node from the **primary**
node's **Geo Nodes** dashboard in your browser.
@@ -282,12 +282,12 @@ The two most obvious issues that can become apparent in the dashboard are:
- You are using a custom certificate or custom CA (see the [troubleshooting document](troubleshooting.md)).
- The instance is firewalled (check your firewall rules).
-Please note that disabling a **secondary** node will stop the synchronization process.
+Please note that disabling a **secondary** node stops the synchronization process.
Please note that if `git_data_dirs` is customized on the **primary** node for multiple
repository shards you must duplicate the same configuration on each **secondary** node.
-Point your users to the ["Using a Geo Server" guide](using_a_geo_server.md).
+Point your users to the [Using a Geo Site guide](usage.md).
Currently, this is what is synced:
@@ -312,7 +312,7 @@ It is important to note that selective synchronization:
1. Does not hide project metadata from **secondary** nodes.
- Since Geo currently relies on PostgreSQL replication, all project metadata
gets replicated to **secondary** nodes, but repositories that have not been
- selected will be empty.
+ selected are empty.
1. Does not reduce the number of events generated for the Geo event log.
- The **primary** node generates events as long as any **secondary** nodes are present.
Selective synchronization restrictions are implemented on the **secondary** nodes,
diff --git a/doc/administration/geo/replication/datatypes.md b/doc/administration/geo/replication/datatypes.md
index 61f99257844..e2f12cbd8dc 100644
--- a/doc/administration/geo/replication/datatypes.md
+++ b/doc/administration/geo/replication/datatypes.md
@@ -205,3 +205,33 @@ successfully, you must replicate their data using some other means.
|[GitLab Pages](../../pages/index.md) | [No](https://gitlab.com/groups/gitlab-org/-/epics/589) | No | No | |
|[Dependency proxy images](../../../user/packages/dependency_proxy/index.md) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/259694) | No | No | Blocked on [Geo: Secondary Mimicry](https://gitlab.com/groups/gitlab-org/-/epics/1528). Note that replication of this cache is not needed for Disaster Recovery purposes because it can be recreated from external sources. |
|[Vulnerability Export](../../../user/application_security/vulnerability_report/#export-vulnerability-details) | [Not planned](https://gitlab.com/groups/gitlab-org/-/epics/3111) | No | Via Object Storage provider if supported. Native Geo support (Beta). | Not planned because they are ephemeral and sensitive. They can be regenerated on demand. |
+
+#### LFS object replication using the self service framework
+
+> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/276696) in GitLab 13.12.
+> - [Deployed behind a feature flag](../../../user/feature_flags.md), enabled by default.
+> - Not enabled on GitLab.com as Geo is not enabled.
+> - Recommended for production use.
+> - For GitLab self-managed instances, GitLab administrators can opt to [disable it](#enable-or-disable-lfs-object-replication-using-the-self-service-framework).
+
+There can be [risks when disabling released features](../../../user/feature_flags.md#risks-when-disabling-released-features).
+Refer to this feature's version history for more details.
+
+##### Enable or disable LFS object replication using the self service framework
+
+LFS object replication using the self service framework is under development but ready for production use. It is
+deployed behind a feature flag that is **enabled by default**.
+[GitLab administrators with access to the GitLab Rails console](../../../administration/feature_flags.md)
+can opt to disable it.
+
+To enable it:
+
+```ruby
+Feature.enable(:geo_lfs_object_replication)
+```
+
+To disable it:
+
+```ruby
+Feature.disable(:geo_lfs_object_replication)
+```
diff --git a/doc/administration/geo/replication/docker_registry.md b/doc/administration/geo/replication/docker_registry.md
index ea73614511f..a8628481ba7 100644
--- a/doc/administration/geo/replication/docker_registry.md
+++ b/doc/administration/geo/replication/docker_registry.md
@@ -24,7 +24,7 @@ integrated [Container Registry](../../packages/container_registry.md#use-object-
You can enable a storage-agnostic replication so it
can be used for cloud or local storage. Whenever a new image is pushed to the
-**primary** site, each **secondary** site will pull it to its own container
+**primary** site, each **secondary** site pulls it to its own container
repository.
To configure Docker Registry replication:
@@ -70,12 +70,12 @@ We need to make Docker Registry send notification events to the
NOTE:
If you use an external Registry (not the one integrated with GitLab), you must add
- these settings to its configuration yourself. In this case, you will also have to specify
+ these settings to its configuration yourself. In this case, you also have to specify
notification secret in `registry.notification_secret` section of
`/etc/gitlab/gitlab.rb` file.
NOTE:
- If you use GitLab HA, you will also have to specify
+ If you use GitLab HA, you also have to specify
the notification secret in `registry.notification_secret` section of
`/etc/gitlab/gitlab.rb` file for every web node.
@@ -95,11 +95,11 @@ expecting to see the Docker images replicated.
Because we need to allow the **secondary** site to communicate securely with
the **primary** site Container Registry, we need to have a single key
-pair for all the sites. The **secondary** site will use this key to
+pair for all the sites. The **secondary** site uses this key to
generate a short-lived JWT that is pull-only-capable to access the
**primary** site Container Registry.
-For each application and Sidekiq node on the **secondary** site:
+For each application and Sidekiq node on the **secondary** site:
1. SSH into the node and login as the `root` user:
@@ -126,5 +126,5 @@ For each application and Sidekiq node on the **secondary** site:
To verify Container Registry replication is working, go to **Admin Area > Geo**
(`/admin/geo/nodes`) on the **secondary** site.
-The initial replication, or "backfill", will probably still be in progress.
+The initial replication, or "backfill", is probably still in progress.
You can monitor the synchronization process on each Geo site from the **primary** site's **Geo Nodes** dashboard in your browser.
diff --git a/doc/administration/geo/replication/faq.md b/doc/administration/geo/replication/faq.md
index 73a36f5e674..a83a1c22db6 100644
--- a/doc/administration/geo/replication/faq.md
+++ b/doc/administration/geo/replication/faq.md
@@ -13,61 +13,61 @@ The requirements are listed [on the index page](../index.md#requirements-for-run
## How does Geo know which projects to sync?
-On each **secondary** node, there is a read-only replicated copy of the GitLab database.
-A **secondary** node also has a tracking database where it stores which projects have been synced.
+On each **secondary** site, there is a read-only replicated copy of the GitLab database.
+A **secondary** site also has a tracking database where it stores which projects have been synced.
Geo compares the two databases to find projects that are not yet tracked.
At the start, this tracking database is empty, so Geo will start trying to update from every project that it can see in the GitLab database.
For each project to sync:
-1. Geo will issue a `git fetch geo --mirror` to get the latest information from the **primary** node.
+1. Geo will issue a `git fetch geo --mirror` to get the latest information from the **primary** site.
If there are no changes, the sync will be fast and end quickly. Otherwise, it will pull the latest commits.
-1. The **secondary** node will update the tracking database to store the fact that it has synced projects A, B, C, etc.
+1. The **secondary** site will update the tracking database to store the fact that it has synced projects A, B, C, etc.
1. Repeat until all projects are synced.
-When someone pushes a commit to the **primary** node, it generates an event in the GitLab database that the repository has changed.
-The **secondary** node sees this event, marks the project in question as dirty, and schedules the project to be resynced.
+When someone pushes a commit to the **primary** site, it generates an event in the GitLab database that the repository has changed.
+The **secondary** site sees this event, marks the project in question as dirty, and schedules the project to be resynced.
To ensure that problems with pipelines (for example, syncs failing too many times or jobs being lost) don't permanently stop projects syncing, Geo also periodically checks the tracking database for projects that are marked as dirty. This check happens when
the number of concurrent syncs falls below `repos_max_capacity` and there are no new projects waiting to be synced.
Geo also has a checksum feature which runs a SHA256 sum across all the Git references to the SHA values.
-If the refs don't match between the **primary** node and the **secondary** node, then the **secondary** node will mark that project as dirty and try to resync it.
+If the refs don't match between the **primary** site and the **secondary** site, then the **secondary** site will mark that project as dirty and try to resync it.
So even if we have an outdated tracking database, the validation should activate and find discrepancies in the repository state and resync.
## Can I use Geo in a disaster recovery situation?
Yes, but there are limitations to what we replicate (see
-[What data is replicated to a **secondary** node?](#what-data-is-replicated-to-a-secondary-node)).
+[What data is replicated to a **secondary** site?](#what-data-is-replicated-to-a-secondary-site)).
Read the documentation for [Disaster Recovery](../disaster_recovery/index.md).
-## What data is replicated to a **secondary** node?
+## What data is replicated to a **secondary** site?
We currently replicate project repositories, LFS objects, generated
attachments / avatars and the whole database. This means user accounts,
issues, merge requests, groups, project data, etc., will be available for
query.
-## Can I `git push` to a **secondary** node?
+## Can I `git push` to a **secondary** site?
-Yes! Pushing directly to a **secondary** node (for both HTTP and SSH, including Git LFS) was [introduced](https://about.gitlab.com/releases/2018/09/22/gitlab-11-3-released/) in [GitLab Premium](https://about.gitlab.com/pricing/#self-managed) 11.3.
+Yes! Pushing directly to a **secondary** site (for both HTTP and SSH, including Git LFS) was [introduced](https://about.gitlab.com/releases/2018/09/22/gitlab-11-3-released/) in [GitLab Premium](https://about.gitlab.com/pricing/#self-managed) 11.3.
-## How long does it take to have a commit replicated to a **secondary** node?
+## How long does it take to have a commit replicated to a **secondary** site?
All replication operations are asynchronous and are queued to be dispatched. Therefore, it depends on a lot of
factors including the amount of traffic, how big your commit is, the
-connectivity between your nodes, your hardware, etc.
+connectivity between your sites, your hardware, etc.
## What if the SSH server runs at a different port?
-That's totally fine. We use HTTP(s) to fetch repository changes from the **primary** node to all **secondary** nodes.
+That's totally fine. We use HTTP(s) to fetch repository changes from the **primary** site to all **secondary** sites.
-## Is this possible to set up a Docker Registry for a **secondary** node that mirrors the one on the **primary** node?
+## Is this possible to set up a Docker Registry for a **secondary** site that mirrors the one on the **primary** site?
-Yes. See [Docker Registry for a **secondary** node](docker_registry.md).
+Yes. See [Docker Registry for a **secondary** site](docker_registry.md).
-## Can I login to a secondary node?
+## Can I login to a secondary site?
-Yes, but secondary nodes receive all authentication data (like user accounts and logins) from the primary instance. This means you will be re-directed to the primary for authentication and routed back afterwards.
+Yes, but secondary sites receive all authentication data (like user accounts and logins) from the primary instance. This means you will be re-directed to the primary for authentication and routed back afterwards.
diff --git a/doc/administration/geo/replication/geo_validation_tests.md b/doc/administration/geo/replication/geo_validation_tests.md
index 06fd3cd70be..8f67e70c9e2 100644
--- a/doc/administration/geo/replication/geo_validation_tests.md
+++ b/doc/administration/geo/replication/geo_validation_tests.md
@@ -179,6 +179,15 @@ The following are PostgreSQL upgrade validation tests we performed.
The following are additional validation tests we performed.
+### May 2021
+
+[Test failover with object storage replication enabled](https://gitlab.com/gitlab-org/gitlab/-/issues/330362):
+
+- Description: At the time of testing, Geo's object storage replication functionality was in beta. We tested that object storage replication works as intended and that the data was present on the new primary after a failover.
+- Outcome: The test was successful. Data in object storage was replicated and present after a failover.
+- Follow up issues:
+ - [Geo: Failing to replicate initial Monitoring project](https://gitlab.com/gitlab-org/gitlab/-/issues/330485)
+
### August 2020
[Test Gitaly Cluster on a Geo Deployment](https://gitlab.com/gitlab-org/gitlab/-/issues/223210):
diff --git a/doc/administration/geo/replication/location_aware_git_url.md b/doc/administration/geo/replication/location_aware_git_url.md
index 272b746015b..014ca59e571 100644
--- a/doc/administration/geo/replication/location_aware_git_url.md
+++ b/doc/administration/geo/replication/location_aware_git_url.md
@@ -8,11 +8,11 @@ type: howto
# Location-aware Git remote URL with AWS Route53 **(PREMIUM SELF)**
You can provide GitLab users with a single remote URL that automatically uses
-the Geo node closest to them. This means users don't need to update their Git
-configuration to take advantage of closer Geo nodes as they move.
+the Geo site closest to them. This means users don't need to update their Git
+configuration to take advantage of closer Geo sites as they move.
This is possible because, Git push requests can be automatically redirected
-(HTTP) or proxied (SSH) from **secondary** nodes to the **primary** node.
+(HTTP) or proxied (SSH) from **secondary** sites to the **primary** site.
Though these instructions use [AWS Route53](https://aws.amazon.com/route53/),
other services such as [Cloudflare](https://www.cloudflare.com/) could be used
@@ -20,30 +20,30 @@ as well.
NOTE:
You can also use a load balancer to distribute web UI or API traffic to
-[multiple Geo **secondary** nodes](../../../user/admin_area/geo_nodes.md#multiple-secondary-nodes-behind-a-load-balancer).
-Importantly, the **primary** node cannot yet be included. See the feature request
+[multiple Geo **secondary** sites](../../../user/admin_area/geo_nodes.md#multiple-secondary-nodes-behind-a-load-balancer).
+Importantly, the **primary** site cannot yet be included. See the feature request
[Support putting the **primary** behind a Geo node load balancer](https://gitlab.com/gitlab-org/gitlab/-/issues/10888).
## Prerequisites
In this example, we have already set up:
-- `primary.example.com` as a Geo **primary** node.
-- `secondary.example.com` as a Geo **secondary** node.
+- `primary.example.com` as a Geo **primary** site.
+- `secondary.example.com` as a Geo **secondary** site.
We will create a `git.example.com` subdomain that will automatically direct
requests:
-- From Europe to the **secondary** node.
-- From all other locations to the **primary** node.
+- From Europe to the **secondary** site.
+- From all other locations to the **primary** site.
In any case, you require:
-- A working GitLab **primary** node that is accessible at its own address.
-- A working GitLab **secondary** node.
+- A working GitLab **primary** site that is accessible at its own address.
+- A working GitLab **secondary** site.
- A Route53 Hosted Zone managing your domain.
-If you haven't yet set up a Geo _primary_ node and _secondary_ node, see the
+If you haven't yet set up a Geo _primary_ site and _secondary_ site, see the
[Geo setup instructions](../index.md#setup-instructions).
## Create a traffic policy
@@ -89,7 +89,7 @@ routing configurations.
![Created policy record](img/single_git_created_policy_record.png)
You have successfully set up a single host, e.g. `git.example.com` which
-distributes traffic to your Geo nodes by geolocation!
+distributes traffic to your Geo sites by geolocation!
## Configure Git clone URLs to use the special Git URL
@@ -114,10 +114,10 @@ You can customize the:
After following the configuration steps above, handling for Git requests is now location aware.
For requests:
-- Outside Europe, all requests are directed to the **primary** node.
+- Outside Europe, all requests are directed to the **primary** site.
- Within Europe, over:
- HTTP:
- - `git clone http://git.example.com/foo/bar.git` is directed to the **secondary** node.
+ - `git clone http://git.example.com/foo/bar.git` is directed to the **secondary** site.
- `git push` is initially directed to the **secondary**, which automatically
redirects to `primary.example.com`.
- SSH:
diff --git a/doc/administration/geo/replication/multiple_servers.md b/doc/administration/geo/replication/multiple_servers.md
index f83e0e14e54..59bb3884a02 100644
--- a/doc/administration/geo/replication/multiple_servers.md
+++ b/doc/administration/geo/replication/multiple_servers.md
@@ -53,14 +53,14 @@ It is possible to use cloud hosted services for PostgreSQL and Redis, but this i
## Prerequisites: Two working GitLab multi-node clusters
-One cluster will serve as the **primary** node. Use the
+One cluster serves as the **primary** node. Use the
[GitLab multi-node documentation](../../reference_architectures/index.md) to set this up. If
you already have a working GitLab instance that is in-use, it can be used as a
**primary**.
-The second cluster will serve as the **secondary** node. Again, use the
+The second cluster serves as the **secondary** node. Again, use the
[GitLab multi-node documentation](../../reference_architectures/index.md) to set this up.
-It's a good idea to log in and test it, however, note that its data will be
+It's a good idea to log in and test it, however, note that its data is
wiped out as part of the process of replicating from the **primary**.
## Configure the GitLab cluster to be the **primary** node
@@ -120,7 +120,7 @@ major differences:
called the "tracking database", which tracks the synchronization state of
various resources.
-Therefore, we will set up the multi-node components one-by-one, and include deviations
+Therefore, we set up the multi-node components one-by-one, and include deviations
from the normal multi-node setup. However, we highly recommend first configuring a
brand-new cluster as if it were not part of a Geo setup so that it can be
tested and verified as a working cluster. And only then should it be modified
@@ -133,7 +133,7 @@ Configure the following services, again using the non-Geo multi-node
documentation:
- [Configuring Redis for GitLab](../../redis/replication_and_failover.md#example-configuration-for-the-gitlab-application) for multiple nodes.
-- [Gitaly](../../gitaly/index.md), which will store data that is
+- [Gitaly](../../gitaly/index.md), which stores data that is
synchronized from the **primary** node.
NOTE:
@@ -143,7 +143,7 @@ recommended.
### Step 2: Configure the main read-only replica PostgreSQL database on the **secondary** node
NOTE:
-The following documentation assumes the database will be run on
+The following documentation assumes the database runs on
a single node only. Multi-node PostgreSQL on **secondary** nodes is
[not currently supported](https://gitlab.com/groups/gitlab-org/-/epics/2536).
@@ -151,7 +151,7 @@ Configure the [**secondary** database](../setup/database.md) as a read-only repl
the **primary** database. Use the following as a guide.
1. Generate an MD5 hash of the desired password for the database user that the
- GitLab application will use to access the read-replica database:
+ GitLab application uses to access the read-replica database:
Note that the username (`gitlab` by default) is incorporated into the hash.
@@ -233,13 +233,13 @@ If using an external PostgreSQL instance, refer also to
### Step 3: Configure the tracking database on the **secondary** node
NOTE:
-This documentation assumes the tracking database will be run on
+This documentation assumes the tracking database runs on
only a single machine, rather than as a PostgreSQL cluster.
Configure the tracking database.
1. Generate an MD5 hash of the desired password for the database user that the
- GitLab application will use to access the tracking database:
+ GitLab application uses to access the tracking database:
Note that the username (`gitlab_geo` by default) is incorporated into the
hash.
@@ -377,7 +377,7 @@ Make sure that current node IP is listed in `postgresql['md5_auth_cidr_addresses
After making these changes [Reconfigure GitLab](../../restart_gitlab.md#omnibus-gitlab-reconfigure) so the changes take effect.
-On the secondary the following GitLab frontend services will be enabled:
+On the secondary the following GitLab frontend services are enabled:
- `geo-logcursor`
- `gitlab-pages`
diff --git a/doc/administration/geo/replication/object_storage.md b/doc/administration/geo/replication/object_storage.md
index ad419f999b3..7dd831092a3 100644
--- a/doc/administration/geo/replication/object_storage.md
+++ b/doc/administration/geo/replication/object_storage.md
@@ -9,9 +9,9 @@ type: howto
Geo can be used in combination with Object Storage (AWS S3, or other compatible object storage).
-Currently, **secondary** nodes can use either:
+Currently, **secondary** sites can use either:
-- The same storage bucket as the **primary** node.
+- The same storage bucket as the **primary** site.
- A replicated storage bucket.
To have:
@@ -28,13 +28,13 @@ To have:
WARNING:
This is a [**beta** feature](https://about.gitlab.com/handbook/product/#beta) and is not ready yet for production use at any scale. The main limitations are a lack of testing at scale and no verification of any replicated data.
-**Secondary** nodes can replicate files stored on the **primary** node regardless of
+**Secondary** sites can replicate files stored on the **primary** site regardless of
whether they are stored on the local file system or in object storage.
To enable GitLab replication, you must:
1. Go to **Admin Area > Geo**.
-1. Press **Edit** on the **secondary** node.
+1. Press **Edit** on the **secondary** site.
1. In the **Synchronization Settings** section, find the **Allow this secondary node to replicate content on Object Storage**
checkbox to enable it.
@@ -46,7 +46,7 @@ For CI job artifacts, there is similar documentation to configure
For user uploads, there is similar documentation to configure [upload object storage](../../uploads.md#using-object-storage)
-If you want to migrate the **primary** node's files to object storage, you can
+If you want to migrate the **primary** site's files to object storage, you can
configure the **secondary** in a few ways:
- Use the exact same object storage.
@@ -57,15 +57,15 @@ configure the **secondary** in a few ways:
GitLab does not currently support the case where both:
-- The **primary** node uses local storage.
-- A **secondary** node uses object storage.
+- The **primary** site uses local storage.
+- A **secondary** site uses object storage.
## Third-party replication services
When using Amazon S3, you can use
[CRR](https://docs.aws.amazon.com/AmazonS3/latest/dev/crr.html) to
-have automatic replication between the bucket used by the **primary** node and
-the bucket used by **secondary** nodes.
+have automatic replication between the bucket used by the **primary** site and
+the bucket used by **secondary** sites.
If you are using Google Cloud Storage, consider using
[Multi-Regional Storage](https://cloud.google.com/storage/docs/storage-classes#multi-regional).
diff --git a/doc/administration/geo/replication/remove_geo_node.md b/doc/administration/geo/replication/remove_geo_node.md
index 09ea84b6c4b..697d8c6ae38 100644
--- a/doc/administration/geo/replication/remove_geo_node.md
+++ b/doc/administration/geo/replication/remove_geo_node.md
@@ -4,5 +4,5 @@ redirect_to: '../../geo/replication/remove_geo_site.md'
This document was moved to [another location](../../geo/replication/remove_geo_site.md).
-<!-- This redirect file can be deleted after 2022-04-01 -->
+<!-- This redirect file can be deleted after 2021-06-01 -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/#move-or-rename-a-page -->
diff --git a/doc/administration/geo/replication/security_review.md b/doc/administration/geo/replication/security_review.md
index abb84b95623..f84d7a2171d 100644
--- a/doc/administration/geo/replication/security_review.md
+++ b/doc/administration/geo/replication/security_review.md
@@ -36,7 +36,7 @@ from [owasp.org](https://owasp.org/).
- The GitLab model of sensitivity is centered around public vs. internal vs.
private projects. Geo replicates them all indiscriminately. "Selective sync"
exists for files and repositories (but not database content), which would permit
- only less-sensitive projects to be replicated to a **secondary** node if desired.
+ only less-sensitive projects to be replicated to a **secondary** site if desired.
- See also: [GitLab data classification policy](https://about.gitlab.com/handbook/engineering/security/data-classification-standard.html).
### What data backup and retention requirements have been defined for the application?
@@ -48,18 +48,18 @@ from [owasp.org](https://owasp.org/).
### Who are the application's end‐users?
-- **Secondary** nodes are created in regions that are distant (in terms of
- Internet latency) from the main GitLab installation (the **primary** node). They are
- intended to be used by anyone who would ordinarily use the **primary** node, who finds
- that the **secondary** node is closer to them (in terms of Internet latency).
+- **Secondary** sites are created in regions that are distant (in terms of
+ Internet latency) from the main GitLab installation (the **primary** site). They are
+ intended to be used by anyone who would ordinarily use the **primary** site, who finds
+ that the **secondary** site is closer to them (in terms of Internet latency).
### How do the end‐users interact with the application?
-- **Secondary** nodes provide all the interfaces a **primary** node does
+- **Secondary** sites provide all the interfaces a **primary** site does
(notably a HTTP/HTTPS web application, and HTTP/HTTPS or SSH Git repository
access), but is constrained to read-only activities. The principal use case is
- envisioned to be cloning Git repositories from the **secondary** node in favor of the
- **primary** node, but end-users may use the GitLab web interface to view projects,
+ envisioned to be cloning Git repositories from the **secondary** site in favor of the
+ **primary** site, but end-users may use the GitLab web interface to view projects,
issues, merge requests, snippets, etc.
### What security expectations do the end‐users have?
@@ -67,10 +67,10 @@ from [owasp.org](https://owasp.org/).
- The replication process must be secure. It would typically be unacceptable to
transmit the entire database contents or all files and repositories across the
public Internet in plaintext, for instance.
-- **Secondary** nodes must have the same access controls over its content as the
- **primary** node - unauthenticated users must not be able to gain access to privileged
- information on the **primary** node by querying the **secondary** node.
-- Attackers must not be able to impersonate the **secondary** node to the **primary** node, and
+- **Secondary** sites must have the same access controls over its content as the
+ **primary** site - unauthenticated users must not be able to gain access to privileged
+ information on the **primary** site by querying the **secondary** site.
+- Attackers must not be able to impersonate the **secondary** site to the **primary** site, and
thus gain access to privileged information.
## Administrators
@@ -86,7 +86,7 @@ from [owasp.org](https://owasp.org/).
### What administrative capabilities does the application offer?
-- **Secondary** nodes may be added, modified, or removed by users with
+- **Secondary** sites may be added, modified, or removed by users with
administrative access.
- The replication process may be controlled (start/stop) via the Sidekiq
administrative controls.
@@ -95,9 +95,9 @@ from [owasp.org](https://owasp.org/).
### What details regarding routing, switching, firewalling, and load‐balancing have been defined?
-- Geo requires the **primary** node and **secondary** node to be able to communicate with each
- other across a TCP/IP network. In particular, the **secondary** nodes must be able to
- access HTTP/HTTPS and PostgreSQL services on the **primary** node.
+- Geo requires the **primary** site and **secondary** site to be able to communicate with each
+ other across a TCP/IP network. In particular, the **secondary** sites must be able to
+ access HTTP/HTTPS and PostgreSQL services on the **primary** site.
### What core network devices support the application?
@@ -105,9 +105,9 @@ from [owasp.org](https://owasp.org/).
### What network performance requirements exist?
-- Maximum replication speeds between **primary** node and **secondary** node is limited by the
+- Maximum replication speeds between **primary** site and **secondary** site is limited by the
available bandwidth between sites. No hard requirements exist - time to complete
- replication (and ability to keep up with changes on the **primary** node) is a function
+ replication (and ability to keep up with changes on the **primary** site) is a function
of the size of the data set, tolerance for latency, and available network
capacity.
@@ -189,9 +189,9 @@ from [owasp.org](https://owasp.org/).
### How will database connection strings, encryption keys, and other sensitive components be stored, accessed, and protected from unauthorized detection?
- There are some Geo-specific values. Some are shared secrets which must be
- securely transmitted from the **primary** node to the **secondary** node at setup time. Our
- documentation recommends transmitting them from the **primary** node to the system
- administrator via SSH, and then back out to the **secondary** node in the same manner.
+ securely transmitted from the **primary** site to the **secondary** site at setup time. Our
+ documentation recommends transmitting them from the **primary** site to the system
+ administrator via SSH, and then back out to the **secondary** site in the same manner.
In particular, this includes the PostgreSQL replication credentials and a secret
key (`db_key_base`) which is used to decrypt certain columns in the database.
The `db_key_base` secret is stored unencrypted on the file system, in
@@ -205,25 +205,25 @@ from [owasp.org](https://owasp.org/).
- Data is entered via the web application exposed by GitLab itself. Some data is
also entered using system administration commands on the GitLab servers (e.g.,
`gitlab-ctl set-primary-node`).
-- **Secondary** nodes also receive inputs via PostgreSQL streaming replication from the **primary** node.
+- **Secondary** sites also receive inputs via PostgreSQL streaming replication from the **primary** site.
### What data output paths does the application support?
-- **Primary** nodes output via PostgreSQL streaming replication to the **secondary** node.
+- **Primary** sites output via PostgreSQL streaming replication to the **secondary** site.
Otherwise, principally via the web application exposed by GitLab itself, and via
SSH `git clone` operations initiated by the end-user.
### How does data flow across the application's internal components?
-- **Secondary** nodes and **primary** nodes interact via HTTP/HTTPS (secured with JSON web
+- **Secondary** sites and **primary** sites interact via HTTP/HTTPS (secured with JSON web
tokens) and via PostgreSQL streaming replication.
-- Within a **primary** node or **secondary** node, the SSOT is the file system and the database
- (including Geo tracking database on **secondary** node). The various internal components
+- Within a **primary** site or **secondary** site, the SSOT is the file system and the database
+ (including Geo tracking database on **secondary** site). The various internal components
are orchestrated to make alterations to these stores.
### What data input validation requirements have been defined?
-- **Secondary** nodes must have a faithful replication of the **primary** node's data.
+- **Secondary** sites must have a faithful replication of the **primary** site's data.
### What data does the application store and how?
@@ -231,11 +231,11 @@ from [owasp.org](https://owasp.org/).
### What data is or may need to be encrypted and what key management requirements have been defined?
-- Neither **primary** nodes or **secondary** nodes encrypt Git repository or file system data at
+- Neither **primary** sites or **secondary** sites encrypt Git repository or file system data at
rest. A subset of database columns are encrypted at rest using the `db_otp_key`.
- A static secret shared across all hosts in a GitLab deployment.
- In transit, data should be encrypted, although the application does permit
- communication to proceed unencrypted. The two main transits are the **secondary** node's
+ communication to proceed unencrypted. The two main transits are the **secondary** site's
replication process for PostgreSQL, and for Git repositories/files. Both should
be protected using TLS, with the keys for that managed via Omnibus per existing
configuration for end-user access to GitLab.
@@ -253,19 +253,19 @@ from [owasp.org](https://owasp.org/).
### What user privilege levels does the application support?
-- Geo adds one type of privilege: **secondary** nodes can access a special Geo API to
+- Geo adds one type of privilege: **secondary** sites can access a special Geo API to
download files over HTTP/HTTPS, and to clone repositories using HTTP/HTTPS.
### What user identification and authentication requirements have been defined?
-- **Secondary** nodes identify to Geo **primary** nodes via OAuth or JWT authentication
+- **Secondary** sites identify to Geo **primary** sites via OAuth or JWT authentication
based on the shared database (HTTP access) or a PostgreSQL replication user (for
database replication). The database replication also requires IP-based access
controls to be defined.
### What user authorization requirements have been defined?
-- **Secondary** nodes must only be able to *read* data. They are not currently able to mutate data on the **primary** node.
+- **Secondary** sites must only be able to *read* data. They are not currently able to mutate data on the **primary** site.
### What session management requirements have been defined?
@@ -279,9 +279,9 @@ from [owasp.org](https://owasp.org/).
### What access requirements have been defined for URI and Service calls?
-- **Secondary** nodes make many calls to the **primary** node's API. This is how file
+- **Secondary** sites make many calls to the **primary** site's API. This is how file
replication proceeds, for instance. This endpoint is only accessible with a JWT token.
-- The **primary** node also makes calls to the **secondary** node to get status information.
+- The **primary** site also makes calls to the **secondary** site to get status information.
## Application Monitoring
diff --git a/doc/administration/geo/replication/troubleshooting.md b/doc/administration/geo/replication/troubleshooting.md
index 079a3713c73..6d990fd12ba 100644
--- a/doc/administration/geo/replication/troubleshooting.md
+++ b/doc/administration/geo/replication/troubleshooting.md
@@ -558,6 +558,7 @@ to start again from scratch, there are a few steps that can help you:
mv /var/opt/gitlab/gitlab-rails/uploads /var/opt/gitlab/gitlab-rails/uploads.old
mkdir -p /var/opt/gitlab/gitlab-rails/uploads
+ gitlab-ctl start postgresql
gitlab-ctl start geo-postgresql
```
@@ -853,6 +854,12 @@ To resolve this issue:
the **primary** node using IPv4 in the `/etc/hosts` file. Alternatively, you should
[enable IPv6 on the **primary** node](https://docs.gitlab.com/omnibus/settings/nginx.html#setting-the-nginx-listen-address-or-addresses).
+### GitLab Pages return 404 errors after promoting
+
+This is due to [Pages data not being managed by Geo](datatypes.md#limitations-on-replicationverification).
+Find advice to resolve those errors in the
+[Pages administration documentation](../../../administration/pages/index.md#404-error-after-promoting-a-geo-secondary-to-a-primary-node).
+
## Fixing client errors
### Authorization errors from LFS HTTP(s) client requests
diff --git a/doc/administration/geo/replication/usage.md b/doc/administration/geo/replication/usage.md
index 2fcc0565567..1491aa3427e 100644
--- a/doc/administration/geo/replication/usage.md
+++ b/doc/administration/geo/replication/usage.md
@@ -25,3 +25,11 @@ remote: ssh://git@primary.geo/user/repo.git
remote:
Everything up-to-date
```
+
+NOTE:
+If you're using HTTPS instead of [SSH](../../../ssh/README.md) to push to the secondary,
+you can't store credentials in the URL like `user:password@URL`. Instead, you can use a
+[`.netrc` file](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html)
+for Unix-like operating systems or `_netrc` for Windows. In that case, the credentials
+will be stored as a plain text. If you're looking for a more secure way to store credentials,
+you can use [Git Credential Storage](https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage).
diff --git a/doc/administration/geo/replication/using_a_geo_server.md b/doc/administration/geo/replication/using_a_geo_server.md
index e48e750f710..f8ce72ac3f8 100644
--- a/doc/administration/geo/replication/using_a_geo_server.md
+++ b/doc/administration/geo/replication/using_a_geo_server.md
@@ -4,5 +4,5 @@ redirect_to: '../../geo/replication/usage.md'
This document was moved to [another location](../../geo/replication/usage.md).
-<!-- This redirect file can be deleted after 2022-04-01 -->
+<!-- This redirect file can be deleted after 2022-06-01 -->
<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/#move-or-rename-a-page -->
diff --git a/doc/administration/geo/replication/version_specific_updates.md b/doc/administration/geo/replication/version_specific_updates.md
index 4f0a4dc638c..4a101c52325 100644
--- a/doc/administration/geo/replication/version_specific_updates.md
+++ b/doc/administration/geo/replication/version_specific_updates.md
@@ -15,8 +15,29 @@ for updating Geo nodes.
We've detected an issue [with a column rename](https://gitlab.com/gitlab-org/gitlab/-/issues/324160)
that may prevent upgrades to GitLab 13.9.0, 13.9.1, 13.9.2 and 13.9.3.
-We are working on a patch and recommend delaying any upgrade attempt until a fixed version
-is released.
+We are working on a patch, but until a fixed version is released, you can manually complete
+the zero-downtime upgrade:
+
+1. Before running the final `sudo gitlab-rake db:migrate` command on the deploy node,
+ execute the following queries using the PostgreSQL console (or `sudo gitlab-psql`)
+ to drop the problematic triggers:
+
+ ```sql
+ drop trigger trigger_e40a6f1858e6 on application_settings;
+ drop trigger trigger_0d588df444c8 on application_settings;
+ drop trigger trigger_1572cbc9a15f on application_settings;
+ drop trigger trigger_22a39c5c25f3 on application_settings;
+ ```
+
+1. Run the final migrations:
+
+ ```shell
+ sudo gitlab-rake db:migrate
+ ```
+
+If you have already run the final `sudo gitlab-rake db:migrate` command on the deploy node and have
+encountered the [column rename issue](https://gitlab.com/gitlab-org/gitlab/-/issues/324160), you can still
+follow the previous steps to complete the update.
More details are available [in this issue](https://gitlab.com/gitlab-org/gitlab/-/issues/324160).
diff --git a/doc/administration/geo/setup/database.md b/doc/administration/geo/setup/database.md
index 9c2cc8fc62e..b87a606e349 100644
--- a/doc/administration/geo/setup/database.md
+++ b/doc/administration/geo/setup/database.md
@@ -466,15 +466,16 @@ The replication process is now complete.
[PgBouncer](https://www.pgbouncer.org/) may be used with GitLab Geo to pool
PostgreSQL connections. We recommend using PgBouncer if you use GitLab in a
high-availability configuration with a cluster of nodes supporting a Geo
-**primary** node and another cluster of nodes supporting a Geo **secondary** node. For more
-information, see [High Availability with Omnibus GitLab](../../postgresql/replication_and_failover.md).
+**primary** site and two other clusters of nodes supporting a Geo **secondary** site.
+One for the main database and the other for the tracking database. For more information,
+see [High Availability with Omnibus GitLab](../../postgresql/replication_and_failover.md).
## Patroni support
Support for Patroni is intended to replace `repmgr` as a
[highly available PostgreSQL solution](../../postgresql/replication_and_failover.md)
on the primary node, but it can also be used for PostgreSQL HA on a secondary
-site.
+site. Similar to `repmgr`, using Patroni on a secondary node is optional.
Starting with GitLab 13.5, Patroni is available for _experimental_ use with Geo
primary and secondary sites. Due to its experimental nature, Patroni support is
@@ -490,6 +491,10 @@ This experimental implementation has the following limitations:
For instructions about how to set up Patroni on the primary site, see the
[PostgreSQL replication and failover with Omnibus GitLab](../../postgresql/replication_and_failover.md#patroni) page.
+### Configuring Patroni cluster for a Geo secondary site
+
+In a Geo secondary site, the main PostgreSQL database is a read-only replica of the primary site’s PostgreSQL database.
+
If you are currently using `repmgr` on your Geo primary site, see [these instructions](#migrating-from-repmgr-to-patroni) for migrating from `repmgr` to Patroni.
A production-ready and secure setup requires at least three Consul nodes, three
@@ -498,9 +503,7 @@ configuration for the secondary site. The internal load balancer provides a sing
endpoint for connecting to the Patroni cluster's leader whenever a new leader is
elected. Be sure to use [password credentials](../../postgresql/replication_and_failover.md#database-authorization-for-patroni) and other database best practices.
-Similar to `repmgr`, using Patroni on a secondary node is optional.
-
-### Step 1. Configure Patroni permanent replication slot on the primary site
+#### Step 1. Configure Patroni permanent replication slot on the primary site
To set up database replication with Patroni on a secondary node, we need to
configure a _permanent replication slot_ on the primary node's Patroni cluster,
@@ -520,7 +523,7 @@ Leader instance**:
```ruby
consul['enable'] = true
consul['configuration'] = {
- retry_join: %w[CONSUL_PRIMARY1_IP CONSULT_PRIMARY2_IP CONSULT_PRIMARY3_IP]
+ retry_join: %w[CONSUL_PRIMARY1_IP CONSUL_PRIMARY2_IP CONSUL_PRIMARY3_IP]
}
repmgr['enable'] = false
@@ -553,7 +556,7 @@ Leader instance**:
gitlab-ctl reconfigure
```
-### Step 2. Configure the internal load balancer on the primary site
+#### Step 2. Configure the internal load balancer on the primary site
To avoid reconfiguring the Standby Leader on the secondary site whenever a new
Leader is elected on the primary site, we'll need to set up a TCP internal load
@@ -597,7 +600,65 @@ backend postgresql
Refer to your preferred Load Balancer's documentation for further guidance.
-### Step 3. Configure a Standby cluster on the secondary site
+#### Step 3. Configure a PgBouncer node on the secondary site
+
+A production-ready and highly available configuration requires at least
+three Consul nodes, a minimum of one PgBouncer node, but it’s recommended to have
+one per database node. An internal load balancer (TCP) is required when there is
+more than one PgBouncer service nodes. The internal load balancer provides a single
+endpoint for connecting to the PgBouncer cluster. For more information,
+see [High Availability with Omnibus GitLab](../../postgresql/replication_and_failover.md).
+
+Follow the minimal configuration for the PgBouncer node:
+
+1. SSH into your PgBouncer node and login as root:
+
+ ```shell
+ sudo -i
+ ```
+
+1. Edit `/etc/gitlab/gitlab.rb` and add the following:
+
+ ```ruby
+ # Disable all components except Pgbouncer and Consul agent
+ roles ['pgbouncer_role']
+
+ # PgBouncer configuration
+ pgbouncer['users'] = {
+ 'pgbouncer': {
+ password: 'PGBOUNCER_PASSWORD_HASH'
+ }
+ }
+
+ # Consul configuration
+ consul['watchers'] = %w(postgresql)
+
+ consul['configuration'] = {
+ retry_join: %w[CONSUL_SECONDARY1_IP CONSUL_SECONDARY2_IP CONSUL_SECONDARY3_IP]
+ }
+
+ consul['monitoring_service_discovery'] = true
+ ```
+
+1. Reconfigure GitLab for the changes to take effect:
+
+ ```shell
+ gitlab-ctl reconfigure
+ ```
+
+1. Create a `.pgpass` file so Consul is able to reload PgBouncer. Enter the `PLAIN_TEXT_PGBOUNCER_PASSWORD` twice when asked:
+
+ ```shell
+ gitlab-ctl write-pgpass --host 127.0.0.1 --database pgbouncer --user pgbouncer --hostuser gitlab-consul
+ ```
+
+1. Restart the PgBouncer service:
+
+ ```shell
+ gitlab-ctl restart pgbouncer
+ ```
+
+#### Step 4. Configure a Standby cluster on the secondary site
NOTE:
If you are converting a secondary site to a Patroni Cluster, you must start
@@ -619,7 +680,7 @@ For each Patroni instance on the secondary site:
consul['enable'] = true
consul['configuration'] = {
- retry_join: %w[CONSUL_SECONDARY1_IP CONSULT_SECONDARY2_IP CONSULT_SECONDARY3_IP]
+ retry_join: %w[CONSUL_SECONDARY1_IP CONSUL_SECONDARY2_IP CONSUL_SECONDARY3_IP]
}
repmgr['enable'] = false
@@ -669,14 +730,14 @@ For each Patroni instance on the secondary site:
gitlab-ctl reconfigure
```
-## Migrating from repmgr to Patroni
+### Migrating from repmgr to Patroni
1. Before migrating, it is recommended that there is no replication lag between the primary and secondary sites and that replication is paused. In GitLab 13.2 and later, you can pause and resume replication with `gitlab-ctl geo-replication-pause` and `gitlab-ctl geo-replication-resume` on a Geo secondary database node.
1. Follow the [instructions to migrate repmgr to Patroni](../../postgresql/replication_and_failover.md#switching-from-repmgr-to-patroni). When configuring Patroni on each primary site database node, add `patroni['replication_slots'] = { '<slot_name>' => 'physical' }`
to `gitlab.rb` where `<slot_name>` is the name of the replication slot for your Geo secondary. This will ensure that Patroni recognizes the replication slot as permanent and will not drop it upon restarting.
1. If database replication to the secondary was paused before migration, resume replication once Patroni is confirmed working on the primary.
-## Migrating a single PostgreSQL node to Patroni
+### Migrating a single PostgreSQL node to Patroni
Before the introduction of Patroni, Geo had no Omnibus support for HA setups on the secondary node.
@@ -685,12 +746,197 @@ With Patroni it's now possible to support that. In order to migrate the existing
1. Make sure you have a Consul cluster setup on the secondary (similar to how you set it up on the primary).
1. [Configure a permanent replication slot](#step-1-configure-patroni-permanent-replication-slot-on-the-primary-site).
1. [Configure the internal load balancer](#step-2-configure-the-internal-load-balancer-on-the-primary-site).
-1. [Configure a Standby Cluster](#step-3-configure-a-standby-cluster-on-the-secondary-site)
+1. [Configure a PgBouncer node](#step-3-configure-a-pgbouncer-node-on-the-secondary-site)
+1. [Configure a Standby Cluster](#step-4-configure-a-standby-cluster-on-the-secondary-site)
on that single node machine.
You will end up with a "Standby Cluster" with a single node. That allows you to later on add additional Patroni nodes
by following the same instructions above.
+### Configuring Patroni cluster for the tracking PostgreSQL database
+
+Secondary sites use a separate PostgreSQL installation as a tracking database to
+keep track of replication status and automatically recover from potential replication issues.
+Omnibus automatically configures a tracking database when `roles ['geo_secondary_role']` is set.
+If you want to run this database in a highly available configuration, follow the instructions below.
+
+A production-ready and secure setup requires at least three Consul nodes, three
+Patroni nodes on the secondary site secondary site. Be sure to use [password credentials](../../postgresql/replication_and_failover.md#database-authorization-for-patroni) and other database best practices.
+
+#### Step 1. Configure a PgBouncer node on the secondary site
+
+A production-ready and highly available configuration requires at least
+three Consul nodes, three PgBouncer nodes, and one internal load-balancing node.
+The internal load balancer provides a single endpoint for connecting to the
+PgBouncer cluster. For more information, see [High Availability with Omnibus GitLab](../../postgresql/replication_and_failover.md).
+
+Follow the minimal configuration for the PgBouncer node for the tracking database:
+
+1. SSH into your PgBouncer node and login as root:
+
+ ```shell
+ sudo -i
+ ```
+
+1. Edit `/etc/gitlab/gitlab.rb` and add the following:
+
+ ```ruby
+ # Disable all components except Pgbouncer and Consul agent
+ roles ['pgbouncer_role']
+
+ # PgBouncer configuration
+ pgbouncer['users'] = {
+ 'pgbouncer': {
+ password: 'PGBOUNCER_PASSWORD_HASH'
+ }
+ }
+
+ pgbouncer['databases'] = {
+ gitlabhq_geo_production: {
+ user: 'pgbouncer',
+ password: 'PGBOUNCER_PASSWORD_HASH'
+ }
+ }
+
+ # Consul configuration
+ consul['watchers'] = %w(postgresql)
+
+ consul['configuration'] = {
+ retry_join: %w[CONSUL_TRACKINGDB1_IP CONSUL_TRACKINGDB2_IP CONSUL_TRACKINGDB3_IP]
+ }
+
+ consul['monitoring_service_discovery'] = true
+
+ # GitLab database settings
+ gitlab_rails['db_database'] = 'gitlabhq_geo_production'
+ gitlab_rails['db_username'] = 'gitlab_geo'
+ ```
+
+1. Reconfigure GitLab for the changes to take effect:
+
+ ```shell
+ gitlab-ctl reconfigure
+ ```
+
+1. Create a `.pgpass` file so Consul is able to reload PgBouncer. Enter the `PLAIN_TEXT_PGBOUNCER_PASSWORD` twice when asked:
+
+ ```shell
+ gitlab-ctl write-pgpass --host 127.0.0.1 --database pgbouncer --user pgbouncer --hostuser gitlab-consul
+ ```
+
+1. Restart the PgBouncer service:
+
+ ```shell
+ gitlab-ctl restart pgbouncer
+ ```
+
+#### Step 2. Configure a Patroni cluster
+
+For each Patroni instance on the secondary site for the tracking database:
+
+1. SSH into your Patroni node and login as root:
+
+ ```shell
+ sudo -i
+ ```
+
+1. Edit `/etc/gitlab/gitlab.rb` and add the following:
+
+ ```ruby
+ # Disable all components except PostgreSQL, Patroni, and Consul
+ roles ['patroni_role']
+
+ # Consul configuration
+ consul['services'] = %w(postgresql)
+
+ consul['configuration'] = {
+ server: true,
+ retry_join: %w[CONSUL_TRACKINGDB1_IP CONSUL_TRACKINGDB2_IP CONSUL_TRACKINGDB3_IP]
+ }
+
+ # PostgreSQL configuration
+ postgresql['listen_address'] = '0.0.0.0'
+ postgresql['hot_standby'] = 'on'
+ postgresql['wal_level'] = 'replica'
+
+ postgresql['pgbouncer_user_password'] = 'PGBOUNCER_PASSWORD_HASH'
+ postgresql['sql_replication_password'] = 'POSTGRESQL_REPLICATION_PASSWORD_HASH'
+ postgresql['sql_user_password'] = 'POSTGRESQL_PASSWORD_HASH'
+
+ postgresql['md5_auth_cidr_addresses'] = [
+ 'PATRONI_TRACKINGDB1_IP/32', 'PATRONI_TRACKINGDB2_IP/32', 'PATRONI_TRACKINGDB3_IP/32', 'PATRONI_TRACKINGDB_PGBOUNCER/32',
+ # Any other instance that needs access to the database as per documentation
+ ]
+
+ # Patroni configuration
+ patroni['replication_password'] = 'PLAIN_TEXT_POSTGRESQL_REPLICATION_PASSWORD'
+ patroni['postgresql']['max_wal_senders'] = 5 # A minimum of three for one replica, plus two for each additional replica
+
+ # GitLab database settings
+ gitlab_rails['db_database'] = 'gitlabhq_geo_production'
+ gitlab_rails['db_username'] = 'gitlab_geo'
+
+ # Disable automatic database migrations
+ gitlab_rails['auto_migrate'] = false
+ ```
+
+1. Reconfigure GitLab for the changes to take effect.
+ This is required to bootstrap PostgreSQL users and settings:
+
+ ```shell
+ gitlab-ctl reconfigure
+ ```
+
+#### Step 3. Configure the tracking database on the secondary nodes
+
+For each node running the `gitlab-rails`, `sidekiq`, and `geo-logcursor` services:
+
+1. SSH into your node and login as root:
+
+ ```shell
+ sudo -i
+ ```
+
+1. Edit `/etc/gitlab/gitlab.rb` and add the following attributes. You may have other attributes set, but the following need to be set.
+
+ ```ruby
+ # Tracking database settings
+ geo_secondary['db_username'] = 'gitlab_geo'
+ geo_secondary['db_password'] = 'PLAIN_TEXT_PGBOUNCER_PASSWORD'
+ geo_secondary['db_database'] = 'gitlabhq_geo_production'
+ geo_secondary['db_host'] = 'PATRONI_TRACKINGDB_PGBOUNCER_IP'
+ geo_secondary['db_port'] = 6432
+ geo_secondary['auto_migrate'] = false
+
+ # Disable the tracking database service
+ geo_postgresql['enable'] = false
+ ```
+
+1. Reconfigure GitLab for the changes to take effect.
+
+ ```shell
+ gitlab-ctl reconfigure
+ ```
+
+1. Run the tracking database migrations:
+
+ ```shell
+ gitlab-rake geo:db:migrate
+ ```
+
+### Migrating a single tracking database node to Patroni
+
+Before the introduction of Patroni, Geo had no Omnibus support for HA setups on
+the secondary node.
+
+With Patroni, it's now possible to support that. Due to some restrictions on the
+Patroni implementation on Omnibus that do not allow us to manage two different
+clusters on the same machine, we recommend setting up a new Patroni cluster for
+the tracking database by following the same instructions above.
+
+The secondary nodes will backfill the new tracking database, and no data
+synchronization will be required.
+
## Troubleshooting
Read the [troubleshooting document](../replication/troubleshooting.md).
diff --git a/doc/administration/geo/setup/index.md b/doc/administration/geo/setup/index.md
index 5ec18e29f21..1afa4360cbc 100644
--- a/doc/administration/geo/setup/index.md
+++ b/doc/administration/geo/setup/index.md
@@ -25,7 +25,7 @@ If you installed GitLab using the Omnibus packages (highly recommended):
1. [Configure fast lookup of authorized SSH keys in the database](../../operations/fast_ssh_key_lookup.md). This step is required and needs to be done on **both** the **primary** and **secondary** nodes.
1. [Configure GitLab](../replication/configuration.md) to set the **primary** and **secondary** nodes.
1. Optional: [Configure a secondary LDAP server](../../auth/ldap/index.md) for the **secondary** node. See [notes on LDAP](../index.md#ldap).
-1. [Follow the "Using a Geo Server" guide](../replication/using_a_geo_server.md).
+1. Follow the [Using a Geo Site](../replication/usage.md) guide.
## Post-installation documentation
diff --git a/doc/administration/git_annex.md b/doc/administration/git_annex.md
index 07b7b1730e3..741b2a78b85 100644
--- a/doc/administration/git_annex.md
+++ b/doc/administration/git_annex.md
@@ -1,237 +1,8 @@
---
-stage: Create
-group: Source Code
-info: "To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments"
-type: reference, howto
-disqus_identifier: 'https://docs.gitlab.com/ee/workflow/git_annex.html'
+redirect_to: 'index.md'
---
-# Git annex
+This document was moved to [another location](index.md).
-WARNING:
-[Git Annex support was removed](https://gitlab.com/gitlab-org/gitlab/-/issues/1648)
-in GitLab 9.0. Read through the [migration guide from git-annex to Git LFS](../topics/git/lfs/migrate_from_git_annex_to_git_lfs.md).
-
-The biggest limitation of Git, compared to some older centralized version
-control systems has been the maximum size of the repositories.
-
-The general recommendation is to not have Git repositories larger than 1GB to
-preserve performance. Although GitLab has no limit (some repositories in GitLab
-are over 50GB!), we subscribe to the advice to keep repositories as small as
-you can.
-
-Not being able to version control large binaries is a big problem for many
-larger organizations.
-Videos, photos, audio, compiled binaries, and many other types of files are too
-large. As a workaround, people keep artwork-in-progress in a Dropbox folder and
-only check in the final result. This results in using outdated files, not
-having a complete history, and increases the risk of losing work.
-
-This problem is solved in GitLab Enterprise Edition by integrating the
-[git-annex](https://git-annex.branchable.com/) application.
-
-`git-annex` allows managing large binaries with Git without checking the
-contents into Git.
-You check-in only a symlink that contains the SHA-1 of the large binary. If you
-need the large binary, you can sync it from the GitLab server over `rsync`, a
-very fast file copying tool.
-
-## GitLab git-annex Configuration
-
-`git-annex` is disabled by default in GitLab. Below are the
-configuration options required to enable it.
-
-### Requirements
-
-`git-annex` needs to be installed both on the server and the client-side.
-
-For Debian-like systems (for example, Debian and Ubuntu) this can be achieved by running:
-
-```shell
-sudo apt-get update && sudo apt-get install git-annex
-```
-
-For RedHat-like systems (for example, CentOS and RHEL) this can be achieved by running:
-
-```shell
-sudo yum install epel-release && sudo yum install git-annex
-```
-
-### Configuration for Omnibus packages
-
-For Omnibus GitLab packages, only one configuration setting is needed.
-The Omnibus package internally sets the correct options in all locations.
-
-1. In `/etc/gitlab/gitlab.rb` add the following line:
-
- ```ruby
- gitlab_shell['git_annex_enabled'] = true
- ```
-
-1. Save the file and [reconfigure GitLab](restart_gitlab.md#omnibus-gitlab-reconfigure) for the changes to take effect.
-
-### Configuration for installations from source
-
-There are 2 settings to enable git-annex on your GitLab server.
-
-One is located in `config/gitlab.yml` of the GitLab repository and the other
-one is located in `config.yml` of GitLab Shell.
-
-1. In `config/gitlab.yml` add or edit the following lines:
-
- ```yaml
- gitlab_shell:
- git_annex_enabled: true
- ```
-
-1. In `config.yml` of GitLab Shell add or edit the following lines:
-
- ```yaml
- git_annex_enabled: true
- ```
-
-1. Save the files and [restart GitLab](restart_gitlab.md#installations-from-source) for the changes to take effect.
-
-## Using GitLab git-annex
-
-NOTE:
-Your Git remotes must be using the SSH protocol, not HTTP(S).
-
-Here is an example workflow of uploading a very large file and then checking it
-into your Git repository:
-
-```shell
-git clone git@example.com:group/project.git
-
-git annex init 'My Laptop' # initialize the annex project and give an optional description
-cp ~/tmp/debian.iso ./ # copy a large file into the current directory
-git annex add debian.iso # add the large file to git annex
-git commit -am "Add Debian iso" # commit the file metadata
-git annex sync --content # sync the Git repo and large file to the GitLab server
-```
-
-The output should look like this:
-
-```plaintext
-commit
-On branch master
-Your branch is ahead of 'origin/master' by 1 commit.
- (use "git push" to publish your local commits)
-nothing to commit, working tree clean
-ok
-pull origin
-remote: Counting objects: 5, done.
-remote: Compressing objects: 100% (4/4), done.
-remote: Total 5 (delta 2), reused 0 (delta 0)
-Unpacking objects: 100% (5/5), done.
-From example.com:group/project
- 497842b..5162f80 git-annex -> origin/git-annex
-ok
-(merging origin/git-annex into git-annex...)
-(recording state in git...)
-copy debian.iso (checking origin...) (to origin...)
-SHA256E-s26214400--8092b3d482fb1b7a5cf28c43bc1425c8f2d380e86869c0686c49aa7b0f086ab2.iso
- 26,214,400 100% 638.88kB/s 0:00:40 (xfr#1, to-chk=0/1)
-ok
-pull origin
-ok
-(recording state in git...)
-push origin
-Counting objects: 15, done.
-Delta compression using up to 4 threads.
-Compressing objects: 100% (13/13), done.
-Writing objects: 100% (15/15), 1.64 KiB | 0 bytes/s, done.
-Total 15 (delta 1), reused 0 (delta 0)
-To example.com:group/project.git
- * [new branch] git-annex -> synced/git-annex
- * [new branch] master -> synced/master
-ok
-```
-
-Your files can be found in the `master` branch, but more branches are created
-by the `annex sync` command.
-
-Git Annex creates a new directory at `.git/annex/` and records the
-tracked files in the `.git/config` file. The files you assign to be tracked
-with `git-annex` don't affect the existing `.git/config` records. The files
-are turned into symbolic links that point to data in `.git/annex/objects/`.
-
-The `debian.iso` file in the example contain the symbolic link:
-
-```plaintext
-.git/annex/objects/ZW/1k/SHA256E-s82701--6384039733b5035b559efd5a2e25a493ab6e09aabfd5162cc03f6f0ec238429d.png/SHA256E-s82701--6384039733b5035b559efd5a2e25a493ab6e09aabfd5162cc03f6f0ec238429d.iso
-```
-
-Use `git annex info` to retrieve the information about the local copy of your
-repository.
-
----
-
-You can download a single large file with these commands:
-
-```shell
-git clone git@gitlab.example.com:group/project.git
-
-git annex sync # sync Git branches but not the large file
-git annex get debian.iso # download the large file
-```
-
-To download all files:
-
-```shell
-git clone git@gitlab.example.com:group/project.git
-
-git annex sync --content # sync Git branches and download all the large files
-```
-
-By using `git-annex` without GitLab, anyone that can access the server can also
-access the files of all projects. GitLab Annex ensures that you can only
-access files of projects you have access to (developer, maintainer, or owner role).
-
-## How it works
-
-Internally GitLab uses [GitLab Shell](https://gitlab.com/gitlab-org/gitlab-shell) to handle SSH access and this was a great
-integration point for `git-annex`.
-There is a setting in GitLab Shell so you can disable GitLab Annex support
-if you want to.
-
-## Troubleshooting tips
-
-Differences in the version of `git-annex` on the GitLab server and on local machines
-can cause `git-annex` to raise unpredicted warnings and errors.
-
-Consult the [Annex upgrade page](https://git-annex.branchable.com/upgrades/) for more information about
-the differences between versions. You can find out which version is installed
-on your server by navigating to `https://pkgs.org/download/git-annex` and
-searching for your distribution.
-
-Although there is no general guide for `git-annex` errors, there are a few tips
-on how to go around the warnings.
-
-### `git-annex-shell: Not a git-annex or gcrypt repository`
-
-This warning can appear on the initial `git annex sync --content` and is caused
-by differences in `git-annex-shell`. You can read more about it
-[in this git-annex issue](https://git-annex.branchable.com/forum/Error_from_git-annex-shell_on_creation_of_gcrypt_special_remote/).
-
-One important thing to note is that despite the warning, the `sync` succeeds
-and the files are pushed to the GitLab repository.
-
-If you get hit by this, you can run the following command inside the repository
-that the warning was raised:
-
-```shell
-git config remote.origin.annex-ignore false
-```
-
-Consecutive runs of `git annex sync --content` **should not** produce this
-warning and the output should look like this:
-
-```plaintext
-commit ok
-pull origin
-ok
-pull origin
-ok
-push origin
-```
+<!-- This redirect file can be deleted after <2021-07-22>. -->
+<!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/#move-or-rename-a-page -->
diff --git a/doc/administration/gitaly/configure_gitaly.md b/doc/administration/gitaly/configure_gitaly.md
index 51ad376a625..c9b6fd4c510 100644
--- a/doc/administration/gitaly/configure_gitaly.md
+++ b/doc/administration/gitaly/configure_gitaly.md
@@ -378,13 +378,26 @@ server (with `gitaly_address`) unless you use
1. Edit `/etc/gitlab/gitlab.rb`:
```ruby
+ # Use the same token value configured on all Gitaly servers
+ gitlab_rails['gitaly_token'] = '<AUTH_TOKEN>'
+
git_data_dirs({
- 'default' => { 'gitaly_address' => 'tcp://gitaly1.internal:8075' },
+ 'default' => { 'gitaly_address' => 'tcp://gitaly1.internal:8075' },
'storage1' => { 'gitaly_address' => 'tcp://gitaly1.internal:8075' },
'storage2' => { 'gitaly_address' => 'tcp://gitaly2.internal:8075' },
})
```
+ Alternatively, if each Gitaly server is configured to use a different authentication token:
+
+ ```ruby
+ git_data_dirs({
+ 'default' => { 'gitaly_address' => 'tcp://gitaly1.internal:8075', 'gitaly_token' => '<AUTH_TOKEN_1>' },
+ 'storage1' => { 'gitaly_address' => 'tcp://gitaly1.internal:8075', 'gitaly_token' => '<AUTH_TOKEN_1>' },
+ 'storage2' => { 'gitaly_address' => 'tcp://gitaly2.internal:8075', 'gitaly_token' => '<AUTH_TOKEN_2>' },
+ })
+ ```
+
1. Save the file and [reconfigure GitLab](../restart_gitlab.md#omnibus-gitlab-reconfigure).
1. Run `sudo gitlab-rake gitlab:gitaly:check` on the Gitaly client (for example, the
Rails application) to confirm it can connect to Gitaly servers.
@@ -404,12 +417,15 @@ server (with `gitaly_address`) unless you use
storages:
default:
gitaly_address: tcp://gitaly1.internal:8075
+ gitaly_token: AUTH_TOKEN_1
path: /some/local/path
storage1:
gitaly_address: tcp://gitaly1.internal:8075
+ gitaly_token: AUTH_TOKEN_1
path: /some/local/path
storage2:
gitaly_address: tcp://gitaly2.internal:8075
+ gitaly_token: AUTH_TOKEN_2
path: /some/local/path
```
@@ -979,7 +995,7 @@ identical fetches. It:
The pack-objects cache is a local cache. It:
- Stores its metadata in the memory of the Gitaly process it is enabled in.
-- Stores the actual Git data it is caching in files on local storage.
+- Stores the actual Git data it is caching in files on local storage.
Using local files has the benefit that the operating system may
automatically keep parts of the pack-objects cache files in RAM,
@@ -1021,7 +1037,7 @@ we felt we cannot assume this is true everywhere.
The cache needs a directory to store its files in. This directory
should be:
-- In a filesystem with enough space. If the cache filesystem runs out of space, all
+- In a file system with enough space. If the cache file system runs out of space, all
fetches start failing.
- On a disk with enough IO bandwidth. If the cache disk runs out of IO bandwidth, all
fetches, and probably the entire server, slows down.
@@ -1036,8 +1052,8 @@ uses a unique random string as part of the cache filenames it creates. This mean
- They do not reuse another process's files.
While the default directory puts the cache files in the same
-filesystem as your repository data, this is not requirement. You can
-put the cache files on a different filesystem if that works better for
+file system as your repository data, this is not requirement. You can
+put the cache files on a different file system if that works better for
your infrastructure.
The amount of IO bandwidth required from the disk depends on:
@@ -1141,7 +1157,7 @@ The following cache metrics are available.
|Metric|Type|Labels|Description|
|:---|:---|:---|:---|
-|`gitaly_pack_objects_cache_enabled`|gauge|`dir`,`max_age`|Set to `1` when the cache is enabled via the Gitaly config file|
+|`gitaly_pack_objects_cache_enabled`|gauge|`dir`,`max_age`|Set to `1` when the cache is enabled via the Gitaly configuration file|
|`gitaly_pack_objects_cache_lookups_total`|counter|`result`|Hit/miss counter for cache lookups|
|`gitaly_pack_objects_generated_bytes_total`|counter||Number of bytes written into the cache|
|`gitaly_pack_objects_served_bytes_total`|counter||Number of bytes read from the cache|
diff --git a/doc/administration/gitaly/index.md b/doc/administration/gitaly/index.md
index 1a8e18ca2b2..3935e990590 100644
--- a/doc/administration/gitaly/index.md
+++ b/doc/administration/gitaly/index.md
@@ -62,7 +62,7 @@ Gitaly comes pre-configured with Omnibus GitLab, which is a configuration
[suitable for up to 1000 users](../reference_architectures/1k_users.md). For:
- Omnibus GitLab installations for up to 2000 users, see [specific Gitaly configuration instructions](../reference_architectures/2k_users.md#configure-gitaly).
-- Source installations or custom Gitaly installations, see [Configure Gitaly](#configure-gitaly).
+- Source installations or custom Gitaly installations, see [Configure Gitaly](configure_gitaly.md).
GitLab installations for more than 2000 users should use Gitaly Cluster.
@@ -108,9 +108,9 @@ The availability objectives for Gitaly clusters are:
Gitaly Cluster supports:
- [Strong consistency](praefect.md#strong-consistency) of the secondary replicas.
-- [Automatic failover](praefect.md#automatic-failover-and-leader-election) from the primary to the secondary.
+- [Automatic failover](praefect.md#automatic-failover-and-primary-election-strategies) from the primary to the secondary.
- Reporting of possible data loss if replication queue is non-empty.
-- Marking repositories as [read only](praefect.md#read-only-mode) if data loss is detected to prevent data inconsistencies.
+- Marking repositories as [read-only](praefect.md#read-only-mode) if data loss is detected to prevent data inconsistencies.
Follow the [Gitaly Cluster epic](https://gitlab.com/groups/gitlab-org/-/epics/1489)
for improvements including
@@ -213,7 +213,9 @@ In this example:
- Each repository is stored on one of three Gitaly storages: `storage-1`, `storage-2`,
or `storage-3`.
- Each storage is serviced by a Gitaly node.
-- The three Gitaly nodes store data in three separate hashed storage locations.
+- The three Gitaly nodes share data in three separate hashed storage locations.
+- The [replication factor](praefect.md#replication-factor) is `3`. There are three copies maintained
+ of each repository.
Generally, virtual storage with Gitaly Cluster can replace direct Gitaly storage configurations, at
the expense of additional storage needed to store each repository on multiple Gitaly nodes. The
@@ -246,10 +248,10 @@ Gitaly Cluster and [Geo](../geo/index.md) both provide redundancy. However the r
The following table outlines the major differences between Gitaly Cluster and Geo:
-| Tool | Nodes | Locations | Latency tolerance | Failover | Consistency | Provides redundancy for |
-|:---------------|:---------|:----------|:-------------------|:----------------------------------------------------------------|:-----------------------------------------|:------------------------|
-| Gitaly Cluster | Multiple | Single | Approximately 1 ms | [Automatic](praefect.md#automatic-failover-and-leader-election) | [Strong](praefect.md#strong-consistency) | Data storage in Git |
-| Geo | Multiple | Multiple | Up to one minute | [Manual](../geo/disaster_recovery/index.md) | Eventual | Entire GitLab instance |
+| Tool | Nodes | Locations | Latency tolerance | Failover | Consistency | Provides redundancy for |
+|:---------------|:---------|:----------|:-------------------|:----------------------------------------------------------------------------|:-----------------------------------------|:------------------------|
+| Gitaly Cluster | Multiple | Single | Approximately 1 ms | [Automatic](praefect.md#automatic-failover-and-primary-election-strategies) | [Strong](praefect.md#strong-consistency) | Data storage in Git |
+| Geo | Multiple | Multiple | Up to one minute | [Manual](../geo/disaster_recovery/index.md) | Eventual | Entire GitLab instance |
For more information, see:
diff --git a/doc/administration/gitaly/praefect.md b/doc/administration/gitaly/praefect.md
index 1a8df1cea92..a1733d9d6ac 100644
--- a/doc/administration/gitaly/praefect.md
+++ b/doc/administration/gitaly/praefect.md
@@ -7,9 +7,14 @@ type: reference
# Configure Gitaly Cluster **(FREE SELF)**
-In addition to Gitaly Cluster configuration instructions available as part of
-[reference architectures](../reference_architectures/index.md) for installations for more than
-2000 users, advanced configuration instructions are available below.
+Configure Gitaly Cluster using either:
+
+- The Gitaly Cluster configuration instructions available as part of
+ [reference architectures](../reference_architectures/index.md) for installations for more than
+ 2000 users.
+- The advanced configuration instructions that follow on this page.
+
+Smaller GitLab installations may need only [Gitaly itself](index.md).
## Requirements for configuring a Gitaly Cluster
@@ -869,6 +874,27 @@ Particular attention should be shown to:
repository that viewed. If the project is created, and you can see the
README file, it works!
+#### Use TCP for existing GitLab instances
+
+When adding Gitaly Cluster to an existing Gitaly instance, the existing Gitaly storage
+must use a TCP address. If `gitaly_address` is not specified, then a Unix socket is used,
+which will prevent the communication with the cluster.
+
+For example:
+
+```ruby
+git_data_dirs({
+ 'default' => { 'gitaly_address' => 'tcp://old-gitaly.internal:8075' },
+ 'cluster' => {
+ 'gitaly_address' => 'tcp://<load_balancer_server_address>:2305',
+ 'gitaly_token' => '<praefect_external_token>'
+ }
+})
+```
+
+See [Mixed Configuration](configure_gitaly.md#mixed-configuration) for further information on
+running multiple Gitaly storages.
+
### Grafana
Grafana is included with GitLab, and can be used to monitor your Praefect
@@ -1004,13 +1030,10 @@ replication factor offers better redundancy and distribution of read workload, b
in a higher storage cost. By default, Praefect replicates repositories to every storage in a
virtual storage.
-### Configure replication factors
+### Configure replication factor
WARNING:
-The feature is not production ready yet. After you set a replication factor, you can't unset it
-without manually modifying database state. Variable replication factor requires you to enable
-repository-specific primaries by configuring the `per_repository` primary election strategy. The election
-strategy is not production ready yet.
+Configurable replication factors require [repository-specific primary nodes](#repository-specific-primary-nodes) to be used.
Praefect supports configuring a replication factor on a per-repository basis, by assigning
specific storage nodes to host a repository.
@@ -1056,26 +1079,119 @@ You can configure:
current assignments: gitaly-1, gitaly-2
```
-## Automatic failover and leader election
+## Automatic failover and primary election strategies
+
+Praefect regularly checks the health of each Gitaly node. This is used to automatically fail over
+to a newly-elected primary Gitaly node if the current primary node is found to be unhealthy.
+
+We recommend using [repository-specific primary nodes](#repository-specific-primary-nodes). This is
+[planned to be the only available election strategy](https://gitlab.com/gitlab-org/gitaly/-/issues/3574)
+from GitLab 14.0.
+
+### Repository-specific primary nodes
+
+> [Introduced](https://gitlab.com/gitlab-org/gitaly/-/issues/3492) in GitLab 13.12.
+
+Gitaly Cluster supports electing repository-specific primary Gitaly nodes. Repository-specific
+Gitaly primary nodes are enabled in `/etc/gitlab/gitlab.rb` by setting
+`praefect['failover_election_strategy'] = 'per_repository'`.
+
+Praefect's [deprecated election strategies](#deprecated-election-strategies):
+
+- Elected a primary Gitaly node for each virtual storage, which was used as the primary node for
+ each repository in the virtual storage.
+- Prevented horizontal scaling of a virtual storage. The primary Gitaly node needed a replica of
+ each repository and thus became the bottleneck.
+
+The `per_repository` election strategy solves this problem by electing a primary Gitaly node separately for each
+repository. Combined with [configurable replication factors](#configure-replication-factor), you can
+horizontally scale storage capacity and distribute write load across Gitaly nodes.
+
+Primary elections are run when:
+
+- Praefect starts up.
+- The cluster's consensus of a Gitaly node's health changes.
+
+A Gitaly node is considered:
+
+- Healthy if `>=50%` Praefect nodes have successfully health checked the Gitaly node in the
+ previous ten seconds.
+- Unhealthy otherwise.
+
+During an election run, Praefect elects a new primary Gitaly node for each repository that has
+an unhealthy primary Gitaly node. The election is made:
+
+- Randomly from healthy secondary Gitaly nodes that are the most up to date.
+- Only from Gitaly nodes assigned to the host repository.
+
+If there are no healthy secondary nodes for a repository:
+
+- The unhealthy primary node is demoted and the repository is left without a primary node.
+- Operations that require a primary node fail until a primary is successfully elected.
+
+#### Migrate to repository-specific primary Gitaly nodes
+
+New Gitaly Clusters can start using the `per_repository` election strategy immediately.
+
+To migrate existing clusters:
+
+1. Praefect nodes didn't historically keep database records of every repository stored on the cluster. When
+ the `per_repository` election strategy is configured, Praefect expects to have database records of
+ each repository. A [background migration](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/2749) is
+ included in GitLab 13.6 and later to create any missing database records for repositories. Before migrating
+ you should verify the migration has run by checking Praefect's logs:
-Praefect regularly checks the health of each backend Gitaly node. This
-information can be used to automatically failover to a new primary node if the
-current primary node is found to be unhealthy.
+ Check Praefect's logs for `repository importer finished` message. The `virtual_storages` field contains
+ the names of virtual storages and whether they've had any missing database records created.
-- **PostgreSQL (recommended):** Enabled by default, and equivalent to:
- `praefect['failover_election_strategy'] = sql`. This configuration
- option allows multiple Praefect nodes to coordinate via the
- PostgreSQL database to elect a primary Gitaly node. This configuration
- causes Praefect nodes to elect a new primary, monitor its health,
- and elect a new primary if the current one has not been reachable in
- 10 seconds by a majority of the Praefect nodes.
+ For example, the `default` virtual storage has been successfully migrated:
+
+ ```json
+ {"level":"info","msg":"repository importer finished","pid":19752,"time":"2021-04-28T11:41:36.743Z","virtual_storages":{"default":true}}
+ ```
+
+ If a virtual storage has not been successfully migrated, it would have `false` next to it:
+
+ ```json
+ {"level":"info","msg":"repository importer finished","pid":19752,"time":"2021-04-28T11:41:36.743Z","virtual_storages":{"default":false}}
+ ```
+
+ The migration is ran when Praefect starts up. If the migration is unsuccessful, you can restart
+ a Praefect node to reattempt it. The migration only runs with `sql` election strategy configured.
+
+1. Running two different election strategies side by side can cause a split brain, where different
+ Praefect nodes consider repositories to have different primaries. To avoid this, shut down
+ all Praefect nodes before changing the election strategy.
+
+ Do this by running `gitlab-ctl stop praefect` on the Praefect nodes.
+
+1. On the Praefect nodes, configure the election strategy in `/etc/gitlab/gitlab.rb` with
+ `praefect['failover_election_strategy'] = 'per_repository'`.
+
+1. Finally, run `gitlab-ctl reconfigure` to reconfigure and restart the Praefect nodes.
+
+### Deprecated election strategies
+
+WARNING:
+The below election strategies are deprecated and are scheduled for removal in GitLab 14.0.
+Migrate to [repository-specific primary nodes](#repository-specific-primary-nodes).
+
+- **PostgreSQL:** Enabled by default until GitLab 14.0, and equivalent to:
+ `praefect['failover_election_strategy'] = 'sql'`.
+
+ This configuration option:
+
+ - Allows multiple Praefect nodes to coordinate via the PostgreSQL database to elect a primary
+ Gitaly node.
+ - Causes Praefect nodes to elect a new primary Gitaly node, monitor its health, and elect a new primary
+ Gitaly node if the current one is not reached within 10 seconds by a majority of the Praefect
+ nodes.
- **Memory:** Enabled by setting `praefect['failover_election_strategy'] = 'local'`
- in `/etc/gitlab/gitlab.rb` on the Praefect node. If a sufficient number of health
- checks fail for the current primary backend Gitaly node, and new primary will
- be elected. **Do not use with multiple Praefect nodes!** Using with multiple
- Praefect nodes is likely to result in a split brain.
+ in `/etc/gitlab/gitlab.rb` on the Praefect node.
-We are likely to implement support for Consul, and a cloud native, strategy in the future.
+ If a sufficient number of health checks fail for the current primary Gitaly node, a new primary is
+ elected. **Do not use with multiple Praefect nodes!** Using with multiple Praefect nodes is
+ likely to result in a split brain.
## Primary Node Failure
@@ -1285,6 +1401,13 @@ praefect['reconciliation_scheduling_interval'] = '0' # disable the feature
### Manual reconciliation
+WARNING:
+The `reconcile` sub-command is deprecated and scheduled for removal in GitLab 14.0. Use
+[automatic reconciliation](#automatic-reconciliation) instead. Manual reconciliation may
+produce excess replication jobs and is limited in functionality. Manual reconciliation does
+not work when [repository-specific primary nodes](#repository-specific-primary-nodes) are
+enabled.
+
The Praefect `reconcile` sub-command allows for the manual reconciliation between two Gitaly nodes. The
command replicates every repository on a later version on the reference storage to the target storage.
@@ -1298,6 +1421,25 @@ sudo /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.t
## Migrate to Gitaly Cluster
+Whether migrating to Gitaly Cluster because of [NFS support deprecation](index.md#nfs-deprecation-notice)
+or to move from single Gitaly nodes, the basic process involves:
+
+1. Create the required storage.
+1. Create and configure Gitaly Cluster.
+1. [Move the repositories](#move-repositories).
+
+The size of the required storage can vary between instances and depends on the set
+[replication factor](#replication-factor). The migration to Gitaly Cluster might include
+implementing repository storage redundancy.
+
+For a replication factor:
+
+- Of `1`: NFS, Gitaly, and Gitaly Cluster have roughly the same storage requirements.
+- More than `1`: The amount of required storage is `used space * replication factor`. `used space`
+ should include any planned future growth.
+
+### Move Repositories
+
To migrate to Gitaly Cluster, existing repositories stored outside Gitaly Cluster must be
moved. There is no automatic migration but the moves can be scheduled with the GitLab API.
@@ -1305,7 +1447,7 @@ GitLab repositories can be associated with projects, groups, and snippets. Each
have a separate API to schedule the respective repositories to move. To move all repositories
on a GitLab instance, each of these types must be scheduled to move for each storage.
-Each repository is made read only when the move is scheduled. The repository is not writable
+Each repository is made read-only for the duration of the move. The repository is not writable
until the move has completed.
After creating and configuring Gitaly Cluster:
@@ -1316,11 +1458,11 @@ After creating and configuring Gitaly Cluster:
so that the Gitaly Cluster receives all new projects. This stops new projects being created
on existing Gitaly nodes while the migration is in progress.
1. Schedule repository moves for:
- - [Projects](#bulk-schedule-projects).
- - [Snippets](#bulk-schedule-snippets).
- - [Groups](#bulk-schedule-groups). **(PREMIUM SELF)**
+ - [Projects](#bulk-schedule-project-moves).
+ - [Snippets](#bulk-schedule-snippet-moves).
+ - [Groups](#bulk-schedule-group-moves). **(PREMIUM SELF)**
-### Bulk schedule projects
+#### Bulk schedule project moves
1. [Schedule repository storage moves for all projects on a storage shard](../../api/project_repository_storage_moves.md#schedule-repository-storage-moves-for-all-projects-on-a-storage-shard) using the API. For example:
@@ -1353,7 +1495,7 @@ After creating and configuring Gitaly Cluster:
1. Repeat for each storage as required.
-### Bulk schedule snippets
+#### Bulk schedule snippet moves
1. [Schedule repository storage moves for all snippets on a storage shard](../../api/snippet_repository_storage_moves.md#schedule-repository-storage-moves-for-all-snippets-on-a-storage-shard) using the API. For example:
@@ -1378,7 +1520,7 @@ After creating and configuring Gitaly Cluster:
1. Repeat for each storage as required.
-### Bulk schedule groups **(PREMIUM SELF)**
+#### Bulk schedule group moves **(PREMIUM SELF)**
1. [Schedule repository storage moves for all groups on a storage shard](../../api/group_repository_storage_moves.md#schedule-repository-storage-moves-for-all-groups-on-a-storage-shard) using the API.
@@ -1421,3 +1563,16 @@ Here are common errors and potential causes:
- **GRPC::Unavailable (14:all SubCons are in TransientFailure...)**
- Praefect cannot reach one or more of its child Gitaly nodes. Try running
the Praefect connection checker to diagnose.
+
+### Determine primary Gitaly node
+
+To determine the current primary Gitaly node for a specific Praefect node:
+
+- Use the `Shard Primary Election` [Grafana chart](#grafana) on the [`Gitlab Omnibus - Praefect` dashboard](https://gitlab.com/gitlab-org/grafana-dashboards/-/blob/master/omnibus/praefect.json).
+ This is recommended.
+- If you do not have Grafana set up, use the following command on each host of each
+ Praefect node:
+
+ ```shell
+ curl localhost:9652/metrics | grep gitaly_praefect_primaries`
+ ```
diff --git a/doc/administration/housekeeping.md b/doc/administration/housekeeping.md
index 8f369a05fbf..9668b7277c2 100644
--- a/doc/administration/housekeeping.md
+++ b/doc/administration/housekeeping.md
@@ -4,9 +4,10 @@ group: Distribution
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# Housekeeping
+# Housekeeping **(FREE)**
-> [Introduced](https://gitlab.com/gitlab-org/gitlab-foss/-/issues/3041) in GitLab 8.4.
+GitLab supports and automates housekeeping tasks within your current repository,
+such as compressing file revisions and removing unreachable objects.
## Automatic housekeeping
@@ -25,8 +26,8 @@ For example in the following scenario a `git repack -d` will be executed:
- Git GC period = `200`
- Full repack period = `50`
-When the `pushes_since_gc` value is 50 a `repack -A -d --pack-kept-objects` will run, similarly when
-the `pushes_since_gc` value is 200 a `git gc` will be run.
+When the `pushes_since_gc` value is 50 a `repack -A -d --pack-kept-objects` runs, similarly when
+the `pushes_since_gc` value is 200 a `git gc` runs.
- `git gc` ([man page](https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-gc.html)) runs a number of housekeeping tasks,
such as compressing file revisions (to reduce disk space and increase performance)
@@ -34,12 +35,14 @@ the `pushes_since_gc` value is 200 a `git gc` will be run.
`git add`.
- `git repack` ([man page](https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-repack.html)) re-organize existing packs into a single, more efficient pack.
-Housekeeping will also [remove unreferenced LFS files](../raketasks/cleanup.md#remove-unreferenced-lfs-files)
+Housekeeping also [removes unreferenced LFS files](../raketasks/cleanup.md#remove-unreferenced-lfs-files)
from your project on the same schedule as the `git gc` operation, freeing up storage space for your project.
-You can find this option under your project's **Settings > General > Advanced**.
+To manually start the housekeeping process:
-![Housekeeping settings](img/housekeeping_settings.png)
+1. In your project, go to **Settings > General**.
+1. Expand the **Advanced** section.
+1. Select **Run housekeeping**.
## How housekeeping handles pool repositories
@@ -56,4 +59,9 @@ This is the current call stack by which it is invoked:
1. `Gitaly::FetchIntoObjectPoolRequest`
To manually invoke it from a Rails console, if needed, you can call `project.pool_repository.object_pool.fetch`.
-This is a potentially long-running task, though Gitaly will timeout in about 8 hours.
+This is a potentially long-running task, though Gitaly times out in about 8 hours.
+
+WARNING:
+Do not run `git prune` or `git gc` in pool repositories! This can
+cause data loss in "real" repositories that depend on the pool in
+question.
diff --git a/doc/administration/img/housekeeping_settings.png b/doc/administration/img/housekeeping_settings.png
deleted file mode 100644
index 356de51f0cc..00000000000
--- a/doc/administration/img/housekeeping_settings.png
+++ /dev/null
Binary files differ
diff --git a/doc/administration/incoming_email.md b/doc/administration/incoming_email.md
index 22cd6ca097c..9aa6bffdb98 100644
--- a/doc/administration/incoming_email.md
+++ b/doc/administration/incoming_email.md
@@ -4,7 +4,7 @@ group: Project Management
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# Incoming email
+# Incoming email **(FREE SELF)**
GitLab has several features based on receiving incoming emails:
@@ -185,8 +185,11 @@ Example for Omnibus installs:
```ruby
gitlab_rails['incoming_email_enabled'] = true
-# The email address including the `%{key}` placeholder that will be replaced to reference the item being replied to.
-# The placeholder can be omitted but if present, it must appear in the "user" part of the address (before the `@`).
+# The email address including the %{key} placeholder that will be replaced to reference the
+# item being replied to. This %{key} should be included in its entirety within the email
+# address and not replaced by another value.
+# For example: emailaddress+%{key}@gitlab.example.com.
+# The placeholder must appear in the "user" part of the address (before the `@`).
gitlab_rails['incoming_email_address'] = "incoming+%{key}@gitlab.example.com"
# Email account username
@@ -223,7 +226,7 @@ incoming_email:
# The email address including the %{key} placeholder that will be replaced to reference the
# item being replied to. This %{key} should be included in its entirety within the email
# address and not replaced by another value.
- # For example: emailadress+%{key}@gmail.com.
+ # For example: emailaddress+%{key}@gitlab.example.com.
# The placeholder must appear in the "user" part of the address (before the `@`).
address: "incoming+%{key}@gitlab.example.com"
@@ -264,8 +267,11 @@ Example for Omnibus installs:
```ruby
gitlab_rails['incoming_email_enabled'] = true
-# The email address including the `%{key}` placeholder that will be replaced to reference the item being replied to.
-# The placeholder can be omitted but if present, it must appear in the "user" part of the address (before the `@`).
+# The email address including the %{key} placeholder that will be replaced to reference the
+# item being replied to. This %{key} should be included in its entirety within the email
+# address and not replaced by another value.
+# For example: emailaddress+%{key}@gmail.com.
+# The placeholder must appear in the "user" part of the address (before the `@`).
gitlab_rails['incoming_email_address'] = "gitlab-incoming+%{key}@gmail.com"
# Email account username
@@ -299,8 +305,11 @@ Example for source installs:
incoming_email:
enabled: true
- # The email address including the `%{key}` placeholder that will be replaced to reference the item being replied to.
- # The placeholder can be omitted but if present, it must appear in the "user" part of the address (before the `@`).
+ # The email address including the %{key} placeholder that will be replaced to reference the
+ # item being replied to. This %{key} should be included in its entirety within the email
+ # address and not replaced by another value.
+ # For example: emailaddress+%{key}@gmail.com.
+ # The placeholder must appear in the "user" part of the address (before the `@`).
address: "gitlab-incoming+%{key}@gmail.com"
# Email account username
@@ -345,8 +354,11 @@ Example for Omnibus installs:
```ruby
gitlab_rails['incoming_email_enabled'] = true
-# The email address including the `%{key}` placeholder that will be replaced to reference the item being replied to.
-# The placeholder can be omitted but if present, it must appear in the "user" part of the address (before the `@`).
+# The email address including the %{key} placeholder that will be replaced to reference the
+# item being replied to. This %{key} should be included in its entirety within the email
+# address and not replaced by another value.
+# For example: emailaddress-%{key}@exchange.example.com.
+# The placeholder must appear in the "user" part of the address (before the `@`).
# Exchange does not support sub-addressing, so a catch-all mailbox must be used.
gitlab_rails['incoming_email_address'] = "incoming-%{key}@exchange.example.com"
@@ -370,8 +382,11 @@ Example for source installs:
incoming_email:
enabled: true
- # The email address including the `%{key}` placeholder that will be replaced to reference the item being replied to.
- # The placeholder can be omitted but if present, it must appear in the "user" part of the address (before the `@`).
+ # The email address including the %{key} placeholder that will be replaced to reference the
+ # item being replied to. This %{key} should be included in its entirety within the email
+ # address and not replaced by another value.
+ # For example: emailaddress-%{key}@exchange.example.com.
+ # The placeholder must appear in the "user" part of the address (before the `@`).
# Exchange does not support sub-addressing, so a catch-all mailbox must be used.
address: "incoming-%{key}@exchange.example.com"
@@ -476,9 +491,11 @@ This example for Omnibus GitLab assumes the mailbox `incoming@office365.example.
```ruby
gitlab_rails['incoming_email_enabled'] = true
-# The email address including the `%{key}` placeholder that will be replaced
-# to reference the item being replied to. The placeholder can be omitted, but if
-# present, it must appear in the "user" part of the address (before the `@`).
+# The email address including the %{key} placeholder that will be replaced to reference the
+# item being replied to. This %{key} should be included in its entirety within the email
+# address and not replaced by another value.
+# For example: emailaddress+%{key}@office365.example.com.
+# The placeholder must appear in the "user" part of the address (before the `@`).
gitlab_rails['incoming_email_address'] = "incoming+%{key}@office365.example.com"
# Email account username
@@ -501,9 +518,11 @@ This example for source installs assumes the mailbox `incoming@office365.example
incoming_email:
enabled: true
- # The email address including the `%{key}` placeholder that will be replaced
- # to reference the item being replied to. The placeholder can be omitted, but
- # if present, it must appear in the "user" part of the address (before the `@`).
+ # The email address including the %{key} placeholder that will be replaced to reference the
+ # item being replied to. This %{key} should be included in its entirety within the email
+ # address and not replaced by another value.
+ # For example: emailaddress+%{key}@office365.example.com.
+ # The placeholder must appear in the "user" part of the address (before the `@`).
address: "incoming+%{key}@office365.example.comm"
# Email account username
@@ -527,9 +546,11 @@ This example for Omnibus installs assumes the catch-all mailbox `incoming@office
```ruby
gitlab_rails['incoming_email_enabled'] = true
-# The email address including the `%{key}` placeholder that will be replaced to
-# reference the item being replied to. The placeholder can be omitted, but if present,
-# it must appear in the "user" part of the address (before the `@`).
+# The email address including the %{key} placeholder that will be replaced to reference the
+# item being replied to. This %{key} should be included in its entirety within the email
+# address and not replaced by another value.
+# For example: emailaddress-%{key}@office365.example.com.
+# The placeholder must appear in the "user" part of the address (before the `@`).
gitlab_rails['incoming_email_address'] = "incoming-%{key}@office365.example.com"
# Email account username
@@ -552,9 +573,11 @@ This example for source installs assumes the catch-all mailbox `incoming@office3
incoming_email:
enabled: true
- # The email address including the `%{key}` placeholder that will be replaced
- # to reference the item being replied to. The placeholder can be omitted, but
- # if present, it must appear in the "user" part of the address (before the `@`).
+ # The email address including the %{key} placeholder that will be replaced to reference the
+ # item being replied to. This %{key} should be included in its entirety within the email
+ # address and not replaced by another value.
+ # For example: emailaddress+%{key}@office365.example.com.
+ # The placeholder must appear in the "user" part of the address (before the `@`).
address: "incoming-%{key}@office365.example.com"
# Email account username
@@ -653,9 +676,11 @@ This example for Omnibus GitLab assumes you're using the following mailbox: `inc
```ruby
gitlab_rails['incoming_email_enabled'] = true
-# The email address including the `%{key}` placeholder that will be replaced
-# to reference the item being replied to. The placeholder can be omitted, but if
-# present, it must appear in the "user" part of the address (before the `@`).
+# The email address including the %{key} placeholder that will be replaced to reference the
+# item being replied to. This %{key} should be included in its entirety within the email
+# address and not replaced by another value.
+# For example: emailaddress+%{key}@example.onmicrosoft.com.
+# The placeholder must appear in the "user" part of the address (before the `@`).
gitlab_rails['incoming_email_address'] = "incoming+%{key}@example.onmicrosoft.com"
# Email account username
diff --git a/doc/administration/index.md b/doc/administration/index.md
index a7d81ca1543..1bc2084a23a 100644
--- a/doc/administration/index.md
+++ b/doc/administration/index.md
@@ -139,7 +139,7 @@ Learn how to install, configure, update, and maintain your GitLab instance.
- [Postfix for incoming email](reply_by_email_postfix_setup.md): Set up a
basic Postfix mail server with IMAP authentication on Ubuntu for incoming
emails.
-- [Abuse reports](../user/admin_area/abuse_reports.md): View and resolve abuse reports from your users.
+- [Abuse reports](../user/admin_area/review_abuse_reports.md): View and resolve abuse reports from your users.
- [Credentials Inventory](../user/admin_area/credentials_inventory.md): With Credentials inventory, GitLab administrators can keep track of the credentials used by their users in their GitLab self-managed instance.
## Project settings
@@ -190,7 +190,6 @@ Learn how to install, configure, update, and maintain your GitLab instance.
- [Git LFS configuration](lfs/index.md): Learn how to configure LFS for GitLab.
- [Housekeeping](housekeeping.md): Keep your Git repositories tidy and fast.
- [Configuring Git Protocol v2](git_protocol.md): Git protocol version 2 support.
-- [Manage large files with `git-annex` (Deprecated)](git_annex.md)
## Monitoring GitLab
diff --git a/doc/administration/instance_limits.md b/doc/administration/instance_limits.md
index 820006eeadf..9ea76ff6151 100644
--- a/doc/administration/instance_limits.md
+++ b/doc/administration/instance_limits.md
@@ -15,7 +15,7 @@ performance, data, or could even exhaust the allocated resources for the applica
Rate limits can be used to improve the security and durability of GitLab.
-For example, a simple script can make thousands of web requests per second. Whether malicious, apathetic, or just a bug, your application and infrastructure may not be able to cope with the load. Rate limits can help mitigate these types of attacks.
+For example, one script can make thousands of web requests per second. Whether malicious, apathetic, or just a bug, your application and infrastructure may not be able to cope with the load. Rate limits can help to mitigate these types of attacks.
Read more about [configuring rate limits](../security/rate_limits.md) in the Security documentation.
@@ -25,17 +25,17 @@ Read more about [configuring rate limits](../security/rate_limits.md) in the Sec
This setting limits the request rate to the issue creation endpoint.
-Read more on [issue creation rate limits](../user/admin_area/settings/rate_limit_on_issues_creation.md).
+Read more about [issue creation rate limits](../user/admin_area/settings/rate_limit_on_issues_creation.md).
-- **Default rate limit** - Disabled by default
+- **Default rate limit**: Disabled by default.
### By User or IP
This setting limits the request rate per user or IP.
-Read more on [User and IP rate limits](../user/admin_area/settings/user_and_ip_rate_limits.md).
+Read more about [User and IP rate limits](../user/admin_area/settings/user_and_ip_rate_limits.md).
-- **Default rate limit** - Disabled by default
+- **Default rate limit**: Disabled by default.
### By raw endpoint
@@ -43,9 +43,9 @@ Read more on [User and IP rate limits](../user/admin_area/settings/user_and_ip_r
This setting limits the request rate per endpoint.
-Read more on [raw endpoint rate limits](../user/admin_area/settings/rate_limits_on_raw_endpoints.md).
+Read more about [raw endpoint rate limits](../user/admin_area/settings/rate_limits_on_raw_endpoints.md).
-- **Default rate limit** - 300 requests per project, per commit and per file path
+- **Default rate limit**: 300 requests per project, per commit and per file path.
### By protected path
@@ -65,9 +65,18 @@ GitLab rate limits the following paths by default:
'/admin/session'
```
-Read more on [protected path rate limits](../user/admin_area/settings/protected_paths.md).
+Read more about [protected path rate limits](../user/admin_area/settings/protected_paths.md).
-- **Default rate limit** - After 10 requests, the client must wait 60 seconds before trying again
+- **Default rate limit**: After 10 requests, the client must wait 60 seconds before trying again.
+
+### Package Registry
+
+> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/57029) in GitLab 13.12.
+
+This setting limits the request rate on the Packages API per user or IP. For more information, see
+[Package Registry Rate Limits](../user/admin_area/settings/package_registry_rate_limits.md).
+
+- **Default rate limit**: Disabled by default.
### Import/Export
@@ -75,16 +84,16 @@ Read more on [protected path rate limits](../user/admin_area/settings/protected_
This setting limits the import/export actions for groups and projects.
-| Limit | Default (per minute per user) |
-| ----- | ----------------------------- |
-| Project Import | 6 |
-| Project Export | 6 |
+| Limit | Default (per minute per user) |
+|-------------------------|-------------------------------|
+| Project Import | 6 |
+| Project Export | 6 |
| Project Export Download | 1 |
-| Group Import | 6 |
-| Group Export | 6 |
-| Group Export | Download | 1 |
+| Group Import | 6 |
+| Group Export | 6 |
+| Group Export Download | 1 |
-Read more on [import/export rate limits](../user/admin_area/settings/import_export_rate_limits.md).
+Read more about [import/export rate limits](../user/admin_area/settings/import_export_rate_limits.md).
### Rack attack
@@ -92,9 +101,9 @@ This method of rate limiting is cumbersome, but has some advantages. It allows
throttling of specific paths, and is also integrated into Git and container
registry requests.
-Read more on the [Rack Attack initializer](../security/rack_attack.md) method of setting rate limits.
+Read more about the [Rack Attack initializer](../security/rack_attack.md) method of setting rate limits.
-- **Default rate limit** - Disabled
+- **Default rate limit**: Disabled.
### Member Invitations
@@ -103,15 +112,58 @@ Limit the maximum daily member invitations allowed per group hierarchy.
- GitLab.com: Free members may invite 20 members per day.
- Self-managed: Invites are not limited.
+### Webhook calls
+
+> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/61151) in GitLab 13.12.
+> - [Deployed behind a feature flag](../user/feature_flags.md), disabled by default.
+> - Disabled on GitLab.com.
+> - Not recommended for production use.
+> - To use in GitLab self-managed instances, ask a GitLab administrator to [enable it](#enable-or-disable-rate-limiting-for-webhooks). **(FREE SELF)**
+
+Limit the number of times any given webhook can be called per minute.
+This only applies to project and group webhooks.
+
+Calls over the rate limit are logged into `auth.log`.
+
+```ruby
+# If limits don't exist for the default plan, you can create one with:
+# Plan.default.create_limits!
+
+Plan.default.actual_limits.update!(web_hook_calls: 10)
+```
+
+Set the limit to `0` to disable it.
+
+- **Default rate limit**: Disabled.
+
+#### Enable or disable rate limiting for webhooks **(FREE SELF)**
+
+Rate limiting for webhooks is under development and not ready for production use. It is
+deployed behind a feature flag that is **disabled by default**.
+[GitLab administrators with access to the GitLab Rails console](../administration/feature_flags.md)
+can enable it.
+
+To enable it:
+
+```ruby
+Feature.enable(:web_hooks_rate_limit)
+```
+
+To disable it:
+
+```ruby
+Feature.disable(:web_hooks_rate_limit)
+```
+
## Gitaly concurrency limit
Clone traffic can put a large strain on your Gitaly service. To prevent such workloads from overwhelming your Gitaly server, you can set concurrency limits in Gitaly's configuration file.
-Read more on [Gitaly concurrency limits](gitaly/configure_gitaly.md#limit-rpc-concurrency).
+Read more about [Gitaly concurrency limits](gitaly/configure_gitaly.md#limit-rpc-concurrency).
-- **Default rate limit** - Disabled
+- **Default rate limit**: Disabled.
-## Number of comments per issue, merge request or commit
+## Number of comments per issue, merge request, or commit
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/22388) in GitLab 12.4.
@@ -120,7 +172,7 @@ merge request, or commit. When the limit is reached, system notes can still be
added so that the history of events is not lost, but user-submitted comments
will fail.
-- **Max limit:** 5.000 comments
+- **Max limit**: 5,000 comments.
## Size of comments and descriptions of issues, merge requests, and epics
@@ -132,7 +184,7 @@ item will not be created.
It's possible that this limit will be changed to a lower number in the future.
-- **Max size:** ~1 million characters / ~1 MB
+- **Max size**: ~1 million characters / ~1 MB.
## Size of commit titles and descriptions
@@ -152,7 +204,7 @@ The maximum number of issues loaded on the milestone overview page is 500.
When the number exceeds the limit the page displays an alert and links to a paginated
[issue list](../user/project/issues/managing_issues.md) of all issues in the milestone.
-- **Limit:** 500 issues
+- **Limit**: 500 issues.
## Number of pipelines per Git push
@@ -174,13 +226,13 @@ Activity history for projects and individuals' profiles was limited to one year
There is a limit when embedding metrics in GFM for performance reasons.
-- **Max limit:** 100 embeds
+- **Max limit**: 100 embeds.
## Number of webhooks
On GitLab.com, the [maximum number of webhooks and their size](../user/gitlab_com/index.md#webhooks) per project, and per group, is limited.
-To set this limit on a self-managed installation, where the default is `100` project webhooks and `50` group webhooks, run the following in the
+To set this limit for a self-managed installation, where the default is `100` project webhooks and `50` group webhooks, run the following in the
[GitLab Rails console](operations/rails_console.md#starting-a-rails-console-session):
```ruby
@@ -203,7 +255,7 @@ Set the limit to `0` to disable it.
The [minimum time between pull refreshes](../user/project/repository/repository_mirroring.md)
defaults to 300 seconds (5 minutes).
-To change this limit on a self-managed installation, run the following in the
+To change this limit for a self-managed installation, run the following in the
[GitLab Rails console](operations/rails_console.md#starting-a-rails-console-session):
```ruby
@@ -220,14 +272,14 @@ Plan.default.actual_limits.update!(pull_mirror_interval_seconds: 200)
GitLab ignores all incoming emails sent from auto-responders by looking for the `X-Autoreply`
header. Such emails don't create comments on issues or merge requests.
-## Amount of data sent from Sentry via Error Tracking
+## Amount of data sent from Sentry through Error Tracking
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/14926) in GitLab 12.6.
Sentry payloads sent to GitLab have a 1 MB maximum limit, both for security reasons
and to limit memory consumption.
-## Max offset allowed via REST API for offset-based pagination
+## Max offset allowed by the REST API for offset-based pagination
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/34565) in GitLab 13.0.
@@ -236,7 +288,7 @@ requested offset into the set of results. This limit is only applied to endpoint
support keyset-based pagination. More information about pagination options can be
found in the [API docs section on pagination](../api/README.md#pagination).
-To set this limit on a self-managed installation, run the following in the
+To set this limit for a self-managed installation, run the following in the
[GitLab Rails console](operations/rails_console.md#starting-a-rails-console-session):
```ruby
@@ -246,7 +298,7 @@ To set this limit on a self-managed installation, run the following in the
Plan.default.actual_limits.update!(offset_pagination_limit: 10000)
```
-- **Default offset pagination limit:** 50000
+- **Default offset pagination limit**: `50000`.
Set the limit to `0` to disable it.
@@ -272,7 +324,7 @@ will fail with a `job_activity_limit_exceeded` error.
higher installations, this limit is defined under a `default` plan that affects all
projects. This limit is disabled (`0`) by default.
-To set this limit on a self-managed installation, run the following in the
+To set this limit for a self-managed installation, run the following in the
[GitLab Rails console](operations/rails_console.md#starting-a-rails-console-session):
```ruby
@@ -295,7 +347,7 @@ too many deployments fail with a `deployments_limit_exceeded` error.
The default limit is 500 for all [GitLab self-managed and SaaS plans](https://about.gitlab.com/pricing/).
-To change the limit on a self-managed installation, change the `default` plan's limit with the following
+To change the limit for a self-managed installation, change the `default` plan's limit with the following
[GitLab Rails console](operations/rails_console.md#starting-a-rails-console-session) command:
```ruby
@@ -323,7 +375,7 @@ limit, the subscription will be considered invalid.
or higher installations, this limit is defined under a `default` plan that
affects all projects. By default, there is a limit of `2` subscriptions.
-To set this limit on a self-managed installation, run the following in the
+To set this limit for a self-managed installation, run the following in the
[GitLab Rails console](operations/rails_console.md#starting-a-rails-console-session):
```ruby
@@ -348,7 +400,7 @@ On [GitLab Premium](https://about.gitlab.com/pricing/) self-managed or
higher installations, this limit is defined under a `default` plan that affects all
projects. By default, there is a limit of `10` pipeline schedules.
-To set this limit on a self-managed installation, run the following in the
+To set this limit for a self-managed installation, run the following in the
[GitLab Rails console](operations/rails_console.md#starting-a-rails-console-session):
```ruby
@@ -426,6 +478,32 @@ installation, run the following in the [GitLab Rails console](operations/rails_c
Plan.default.actual_limits.update!(ci_max_artifact_size_junit: 10)
```
+### Number of registered runners per scope
+
+> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/321368) in GitLab 13.12.
+
+The total number of registered runners is limited at the group and project
+levels. Each time a new runner is registered, GitLab checks these limits. A
+runner's registration fails if it exceeds the limit for the scope determined by
+the runner registration token.
+
+- GitLab SaaS subscribers have different limits defined per plan, affecting all projects using that plan.
+- Self-managed GitLab Premium and Ultimate limits are defined by a default plan that affects all projects:
+
+ | Runner scope | Default value |
+ |---------------------------------------------|---------------|
+ | `ci_registered_group_runners` | 1000 |
+ | `ci_registered_project_runners` | 1000 |
+
+ To update these limits, run the following in the
+ [GitLab Rails console](operations/rails_console.md#starting-a-rails-console-session):
+
+ ```ruby
+ # Use ci_registered_group_runners or ci_registered_project_runners
+ # depending on desired scope
+ Plan.default.actual_limits.update!(ci_registered_project_runners: 100)
+ ```
+
## Instance monitoring and metrics
### Incident Management inbound alert limits
@@ -496,11 +574,11 @@ See [Environment Dashboard](../ci/environments/environments_dashboard.md#adding-
Pods and Deployments. However, data over 10 MB for a certain environment read from
Kubernetes won't be shown.
-## Merge Request reports
+## Merge request reports
Reports that go over the 20 MB limit won't be loaded. Affected reports:
-- [Merge Request security reports](../user/project/merge_requests/testing_and_reports_in_merge_requests.md#security-reports)
+- [Merge request security reports](../user/project/merge_requests/testing_and_reports_in_merge_requests.md#security-reports)
- [CI/CD parameter `artifacts:expose_as`](../ci/yaml/README.md#artifactsexpose_as)
- [Unit test reports](../ci/unit_test_reports.md)
@@ -514,8 +592,8 @@ You can set a limit on the content of repository files that are indexed in
Elasticsearch. Any files larger than this limit will not be indexed, and thus
will not be searchable.
-Setting a limit helps reduce the memory usage of the indexing processes as well
-as the overall index size. This value defaults to `1024 KiB` (1 MiB) as any
+Setting a limit helps reduce the memory usage of the indexing processes and
+the overall index size. This value defaults to `1024 KiB` (1 MiB) as any
text files larger than this likely aren't meant to be read by humans.
You must set a limit, as unlimited file sizes aren't supported. Setting this
@@ -535,8 +613,8 @@ This is applicable to all indexed data except repository files that get
indexed, which have a separate limit (see [Maximum file size
indexed](#maximum-file-size-indexed)).
-- On GitLab.com this is limited to 20000 characters
-- For self-managed installations it is unlimited by default
+- On GitLab.com, this is limited to 20,000 characters
+- For self-managed installations, this is unlimited by default
This limit can be configured for self-managed installations when [enabling
Elasticsearch](../integration/elasticsearch.md#enabling-advanced-search).
@@ -550,7 +628,7 @@ Set the limit to `0` to disable it.
## Snippets limits
-See the [documentation on Snippets settings](snippets/index.md).
+See the [documentation about Snippets settings](snippets/index.md).
## Design Management limits
@@ -587,14 +665,14 @@ More information can be found in the [Push event activities limit and bulk push
On GitLab.com, the maximum file size for a package that's uploaded to the [GitLab Package Registry](../user/packages/package_registry/index.md) varies by format:
-- Conan: 5GB
-- Generic: 5GB
-- Maven: 5GB
-- npm: 5GB
-- NuGet: 5GB
-- PyPI: 5GB
+- Conan: 5 GB
+- Generic: 5 GB
+- Maven: 5 GB
+- npm: 5 GB
+- NuGet: 5 GB
+- PyPI: 5 GB
-To set this limit on a self-managed installation, run the following in the
+To set this limit for a self-managed installation, run the following in the
[GitLab Rails console](operations/rails_console.md#starting-a-rails-console-session):
```ruby
diff --git a/doc/administration/job_artifacts.md b/doc/administration/job_artifacts.md
index bec1fa6ad05..187e98e9b43 100644
--- a/doc/administration/job_artifacts.md
+++ b/doc/administration/job_artifacts.md
@@ -5,12 +5,9 @@ info: To determine the technical writer assigned to the Stage/Group associated w
type: reference, howto
---
-# Jobs artifacts administration
+# Jobs artifacts administration **(FREE SELF)**
-> - Introduced in GitLab 8.2 and GitLab Runner 0.7.0.
-> - Starting with GitLab 8.4 and GitLab Runner 1.0, the artifacts archive format changed to `ZIP`.
-> - Starting with GitLab 8.17, builds are renamed to jobs.
-> - This is the administration documentation. For the user guide see [pipelines/job_artifacts](../ci/pipelines/job_artifacts.md).
+This is the administration documentation. For the user guide see [pipelines/job_artifacts](../ci/pipelines/job_artifacts.md).
Artifacts is a list of files and directories which are attached to a job after it
finishes. This feature is enabled by default in all GitLab installations. Keep reading
@@ -87,12 +84,7 @@ _The artifacts are stored by default in
### Using object storage
-> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/1762) in
-> [GitLab Premium](https://about.gitlab.com/pricing/) 9.4.
-> - Since version 9.5, artifacts are [browsable](../ci/pipelines/job_artifacts.md#download-job-artifacts),
-> when object storage is enabled. 9.4 lacks this feature.
-> - Since version 10.6, available in [GitLab Free](https://about.gitlab.com/pricing/).
-> - Since version 11.0, we support `direct_upload` to S3.
+> Introduced in GitLab 11.0: Support for `direct_upload` to S3.
If you don't want to use the local disk where GitLab is installed to store the
artifacts, you can use an object storage like AWS S3 instead.
@@ -118,14 +110,14 @@ This section describes the earlier configuration format.
For source installations the following settings are nested under `artifacts:` and then `object_store:`. On Omnibus GitLab installs they are prefixed by `artifacts_object_store_`.
-| Setting | Default | Description |
-|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `enabled` | `false` | Enable/disable object storage |
-| `remote_directory` | | The bucket name where Artifacts are stored |
-| `direct_upload` | `false` | Set to `true` to enable direct upload of Artifacts without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. |
-| `background_upload` | `true` | Set to `false` to disable automatic upload. Option may be removed once upload is direct to S3 |
+| Setting | Default | Description |
+|---------------------|---------|-------------|
+| `enabled` | `false` | Enable or disable object storage. |
+| `remote_directory` | | The bucket name where Artifacts are stored. |
+| `direct_upload` | `false` | Set to `true` to enable direct upload of Artifacts without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. |
+| `background_upload` | `true` | Set to `false` to disable automatic upload. Option may be removed once upload is direct to S3. |
| `proxy_download` | `false` | Set to `true` to enable proxying all files served. Option allows to reduce egress traffic as this allows clients to download directly from remote storage instead of proxying all data. |
-| `connection` | | Various connection options described below |
+| `connection` | | Various connection options described below. |
#### Connection settings
@@ -370,42 +362,6 @@ steps below.
If the `expire` directive is not set explicitly in your pipeline, artifacts expire per the
default artifacts expiration setting, which you can find in the [CI/CD Administration settings](../user/admin_area/settings/continuous_integration.md).
-## Validation for dependencies
-
-> Introduced in GitLab 10.3.
-
-To disable [the dependencies validation](../ci/yaml/README.md#when-a-dependent-job-fails),
-you can enable the `ci_validate_build_dependencies_override` feature flag from a Rails console.
-
-**In Omnibus installations:**
-
-1. Enter the Rails console:
-
- ```shell
- sudo gitlab-rails console
- ```
-
-1. Enable the feature flag to disable the validation:
-
- ```ruby
- Feature.enable(:ci_validate_build_dependencies_override)
- ```
-
-**In installations from source:**
-
-1. Enter the Rails console:
-
- ```shell
- cd /home/git/gitlab
- sudo -u git -H bundle exec rails console -e production
- ```
-
-1. Enable the feature flag to disable the validation:
-
- ```ruby
- Feature.enable(:ci_validate_build_dependencies_override)
- ```
-
## Set the maximum file size of the artifacts
If artifacts are enabled, you can change the maximum file size of the
diff --git a/doc/administration/job_logs.md b/doc/administration/job_logs.md
index 1afadcaf668..93f83311cad 100644
--- a/doc/administration/job_logs.md
+++ b/doc/administration/job_logs.md
@@ -5,7 +5,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
type: reference
---
-# Job logs
+# Job logs **(FREE SELF)**
> [Renamed from job traces to job logs](https://gitlab.com/gitlab-org/gitlab/-/issues/29121) in GitLab 12.5.
@@ -109,7 +109,7 @@ See "Phase 4: uploading" in [Data flow](#data-flow) to learn about the process.
If you want to avoid any local disk usage for job logs,
you can do so using one of the following options:
-- Enable the [beta incremental logging](#new-incremental-logging-architecture) feature.
+- Enable the [beta incremental logging](#incremental-logging-architecture) feature.
- Set the [job logs location](#changing-the-job-logs-local-location)
to an NFS drive.
@@ -128,16 +128,28 @@ This command permanently deletes the log files and is irreversible.
find /var/opt/gitlab/gitlab-rails/shared/artifacts -name "job.log" -mtime +60 -delete
```
-## New incremental logging architecture
+## Incremental logging architecture
-> - [Introduced](https://gitlab.com/gitlab-org/gitlab-foss/-/merge_requests/18169) in GitLab 10.4.
+> - [Deployed behind a feature flag](../user/feature_flags.md), disabled by default.
+> - Enabled on GitLab.com.
+> - [Recommended for production use](https://gitlab.com/groups/gitlab-org/-/epics/4275) in GitLab 13.6.
+> - [Recommended for production use with AWS S3](https://gitlab.com/gitlab-org/gitlab/-/issues/273498) in GitLab 13.7.
+> - To use in GitLab self-managed instances, ask a GitLab administrator to [enable it](#enable-or-disable-incremental-logging). **(FREE SELF)**
-NOTE:
-This beta feature is off by default. See below for how to [enable or disable](#enabling-incremental-logging) it.
+Job logs are sent from the GitLab Runner in chunks and cached temporarily on disk
+in `/var/opt/gitlab/gitlab-ci/builds` by Omnibus GitLab. After the job completes,
+a background job archives the job log. The log is moved to `/var/opt/gitlab/gitlab-rails/shared/artifacts/`
+by default, or to object storage if configured.
-By combining the process with object storage settings, we can completely bypass
-the local file storage. This is a useful option if GitLab is installed as
-cloud-native, for example on Kubernetes.
+In a [scaled-out architecture](reference_architectures/index.md) with Rails and Sidekiq running on more than one
+server, these two locations on the filesystem have to be shared using NFS.
+
+To eliminate both filesystem requirements:
+
+- Enable the incremental logging feature, which uses Redis instead of disk space for temporary caching of job logs.
+- Configure [object storage](job_artifacts.md#object-storage-settings) for storing archived job logs.
+
+### Technical details
The data flow is the same as described in the [data flow section](#data-flow)
with one change: _the stored path of the first two phases is different_. This incremental
@@ -159,67 +171,39 @@ Here is the detailed data flow:
1. The Sidekiq worker archives the log to object storage and cleans up the log
in Redis and a persistent store (object storage or the database).
-### Enabling incremental logging
+### Limitations
-The following commands are to be issued in a Rails console:
+- [Redis cluster is not supported](https://gitlab.com/gitlab-org/gitlab/-/issues/224171).
+- You must configure [object storage for CI/CD artifacts, logs, and builds](job_artifacts.md#object-storage-settings)
+ before you enable the feature flag. After the flag is enabled, files cannot be written
+ to disk, and there is no protection against misconfiguration.
+- There is [an epic tracking other potential limitations and improvements](https://gitlab.com/groups/gitlab-org/-/epics/3791).
-```shell
-# Omnibus GitLab
-gitlab-rails console
+### Enable or disable incremental logging **(FREE SELF)**
-# Installation from source
-cd /home/git/gitlab
-sudo -u git -H bin/rails console -e production
-```
+Incremental logging is under development, but ready for production use. It is
+deployed behind a feature flag that is **disabled by default**.
+[GitLab administrators with access to the GitLab Rails console](feature_flags.md)
+can enable it.
-**To check if incremental logging (trace) is enabled:**
+Before you enable the feature flag:
-```ruby
-Feature.enabled?(:ci_enable_live_trace)
-```
+- Review [the limitations of incremental logging](#limitations).
+- [Enable object storage](job_artifacts.md#object-storage-settings).
-**To enable incremental logging (trace):**
+To enable incremental logging:
```ruby
Feature.enable(:ci_enable_live_trace)
```
-NOTE:
-The transition period is handled gracefully. Upcoming logs are
-generated with the incremental architecture, and on-going logs stay with the
-legacy architecture, which means that on-going logs aren't forcibly
-re-generated with the incremental architecture.
+Running jobs' logs continue to be written to disk, but new jobs use
+incremental logging.
-**To disable incremental logging (trace):**
+To disable incremental logging:
```ruby
-Feature.disable('ci_enable_live_trace')
+Feature.disable(:ci_enable_live_trace)
```
-NOTE:
-The transition period is handled gracefully. Upcoming logs are generated
-with the legacy architecture, and on-going incremental logs stay with the incremental
-architecture, which means that on-going incremental logs aren't forcibly re-generated
-with the legacy architecture.
-
-### Potential implications
-
-In some cases, having data stored on Redis could incur data loss:
-
-1. **Case 1: When all data in Redis are accidentally flushed**
- - On going incremental logs could be recovered by re-sending logs (this is
- supported by all versions of GitLab Runner).
- - Finished jobs which have not archived incremental logs lose the last part
- (~128KB) of log data.
-
-1. **Case 2: When Sidekiq workers fail to archive (e.g., there was a bug that
- prevents archiving process, Sidekiq inconsistency, etc.)**
- - All log data in Redis is deleted after one week. If the
- Sidekiq workers can't finish by the expiry date, the part of log data is lost.
-
-Another issue that might arise is that it could consume all memory on the Redis
-instance. If the number of jobs is 1000, 128MB (128KB * 1000) is consumed.
-
-Also, it could pressure the database replication lag. `INSERT`s are generated to
-indicate that we have log chunk. `UPDATE`s with 128KB of data is issued once we
-receive multiple chunks.
+Running jobs continue to use incremental logging, but new jobs write to the disk.
diff --git a/doc/administration/lfs/img/git-annex-branches.png b/doc/administration/lfs/img/git-annex-branches.png
deleted file mode 100644
index 3d614f68177..00000000000
--- a/doc/administration/lfs/img/git-annex-branches.png
+++ /dev/null
Binary files differ
diff --git a/doc/administration/maintenance_mode/index.md b/doc/administration/maintenance_mode/index.md
index 437f59247f6..7fd383eabae 100644
--- a/doc/administration/maintenance_mode/index.md
+++ b/doc/administration/maintenance_mode/index.md
@@ -10,9 +10,9 @@ info: To determine the technical writer assigned to the Stage/Group associated w
Maintenance Mode allows administrators to reduce write operations to a minimum while maintenance tasks are performed. The main goal is to block all external actions that change the internal state, including the PostgreSQL database, but especially files, Git repositories, Container repositories, etc.
-Once Maintenance Mode is enabled, in-progress actions will finish relatively quickly since no new actions are coming in, and internal state changes will be minimal.
+Once Maintenance Mode is enabled, in-progress actions finish relatively quickly since no new actions are coming in, and internal state changes are minimal.
In that state, various maintenance tasks are easier, and services can be stopped completely or be
-further degraded for a much shorter period of time than might otherwise be needed, for example stopping cron jobs and draining queues should be fairly quick.
+further degraded for a much shorter period of time than might otherwise be needed. For example, stopping cron jobs and draining queues should be fairly quick.
Maintenance Mode allows most external actions that do not change internal state. On a high-level, HTTP POST, PUT, PATCH, and DELETE requests are blocked and a detailed overview of [how special cases are handled](#rest-api) is available.
@@ -74,33 +74,33 @@ In some cases, the visual feedback from an action could be misleading, for examp
### Admin functions
-Systems administrators can edit the application settings. This will allow
+Systems administrators can edit the application settings. This allows
them to disable Maintenance Mode after it's been enabled.
### Authentication
All users can log in and out of the GitLab instance but no new users can be created.
-If there are [LDAP syncs](../auth/ldap/index.md) scheduled for that time, they will fail since user creation is disabled. Similarly, [user creations based on SAML](../../integration/saml.md#general-setup) will fail.
+If there are [LDAP syncs](../auth/ldap/index.md) scheduled for that time, they fail since user creation is disabled. Similarly, [user creations based on SAML](../../integration/saml.md#general-setup) fail.
### Git actions
-All read-only Git operations will continue to work, for example
-`git clone` and `git pull`. All write operations will fail, both through the CLI and Web IDE with the error message: `Git push is not allowed because this GitLab instance is currently in (read-only) maintenance mode.`
+All read-only Git operations continue to work, for example
+`git clone` and `git pull`. All write operations fail, both through the CLI and Web IDE with the error message: `Git push is not allowed because this GitLab instance is currently in (read-only) maintenance mode.`
-If Geo is enabled, Git pushes to both primary and secondaries will fail.
+If Geo is enabled, Git pushes to both primary and secondaries fail.
### Merge requests, issues, epics
-All write actions except those mentioned above will fail. For example, a user cannot update merge requests or issues.
+All write actions except those mentioned above fail. For example, a user cannot update merge requests or issues.
### Incoming email
-Creating new issue replies, issues (including new Service Desk issues), merge requests [by email](../incoming_email.md) will fail.
+Creating new issue replies, issues (including new Service Desk issues), merge requests [by email](../incoming_email.md) fail.
### Outgoing email
-Notification emails will continue to arrive, but emails that require database writes, like resetting the password, will not arrive.
+Notification emails continue to arrive, but emails that require database writes, like resetting the password, do not arrive.
### REST API
diff --git a/doc/administration/monitoring/performance/img/performance_bar_external_http_calls.png b/doc/administration/monitoring/performance/img/performance_bar_external_http_calls.png
deleted file mode 100644
index 45f2b0956e9..00000000000
--- a/doc/administration/monitoring/performance/img/performance_bar_external_http_calls.png
+++ /dev/null
Binary files differ
diff --git a/doc/administration/monitoring/performance/img/performance_bar_frontend.png b/doc/administration/monitoring/performance/img/performance_bar_frontend.png
deleted file mode 100644
index 9cc874c883a..00000000000
--- a/doc/administration/monitoring/performance/img/performance_bar_frontend.png
+++ /dev/null
Binary files differ
diff --git a/doc/administration/monitoring/performance/img/performance_bar_gitaly_calls.png b/doc/administration/monitoring/performance/img/performance_bar_gitaly_calls.png
deleted file mode 100644
index 6caa2869d9b..00000000000
--- a/doc/administration/monitoring/performance/img/performance_bar_gitaly_calls.png
+++ /dev/null
Binary files differ
diff --git a/doc/administration/monitoring/performance/img/performance_bar_redis_calls.png b/doc/administration/monitoring/performance/img/performance_bar_redis_calls.png
deleted file mode 100644
index f2df8c794db..00000000000
--- a/doc/administration/monitoring/performance/img/performance_bar_redis_calls.png
+++ /dev/null
Binary files differ
diff --git a/doc/administration/monitoring/performance/img/performance_bar_request_selector_warning_expanded.png b/doc/administration/monitoring/performance/img/performance_bar_request_selector_warning_expanded.png
deleted file mode 100644
index 36553f513e1..00000000000
--- a/doc/administration/monitoring/performance/img/performance_bar_request_selector_warning_expanded.png
+++ /dev/null
Binary files differ
diff --git a/doc/administration/monitoring/performance/img/performance_bar_rugged_calls.png b/doc/administration/monitoring/performance/img/performance_bar_rugged_calls.png
deleted file mode 100644
index 0340ca1b2f7..00000000000
--- a/doc/administration/monitoring/performance/img/performance_bar_rugged_calls.png
+++ /dev/null
Binary files differ
diff --git a/doc/administration/monitoring/performance/img/performance_bar_sql_queries.png b/doc/administration/monitoring/performance/img/performance_bar_sql_queries.png
deleted file mode 100644
index ef74e0a3b6e..00000000000
--- a/doc/administration/monitoring/performance/img/performance_bar_sql_queries.png
+++ /dev/null
Binary files differ
diff --git a/doc/administration/monitoring/performance/performance_bar.md b/doc/administration/monitoring/performance/performance_bar.md
index f6aa60b36a1..dd43c7d6fbb 100644
--- a/doc/administration/monitoring/performance/performance_bar.md
+++ b/doc/administration/monitoring/performance/performance_bar.md
@@ -18,25 +18,20 @@ From left to right, it displays:
- **Current Host**: the current host serving the page.
- **Database queries**: the time taken (in milliseconds) and the total number
of database queries, displayed in the format `00ms / 00 (00 cached) pg`. Click to display
- a modal window with more details:
- ![SQL profiling using the Performance Bar](img/performance_bar_sql_queries.png)
+ a modal window with more details.
- **Gitaly calls**: the time taken (in milliseconds) and the total number of
[Gitaly](../../gitaly/index.md) calls. Click to display a modal window with more
- details:
- ![Gitaly profiling using the Performance Bar](img/performance_bar_gitaly_calls.png)
+ details.
- **Rugged calls**: the time taken (in milliseconds) and the total number of
[Rugged](../../nfs.md#improving-nfs-performance-with-gitlab) calls.
- Click to display a modal window with more details:
- ![Rugged profiling using the Performance Bar](img/performance_bar_rugged_calls.png)
+ Click to display a modal window with more details.
- **Redis calls**: the time taken (in milliseconds) and the total number of
- Redis calls. Click to display a modal window with more details:
- ![Redis profiling using the Performance Bar](img/performance_bar_redis_calls.png)
+ Redis calls. Click to display a modal window with more details.
- **Elasticsearch calls**: the time taken (in milliseconds) and the total number of
Elasticsearch calls. Click to display a modal window with more details.
- **External HTTP calls**: the time taken (in milliseconds) and the total
number of external calls to other systems. Click to display a modal window
- with more details
- ![External call details in the Performance Bar](img/performance_bar_external_http_calls.png)
+ with more details.
- **Load timings** of the page: if your browser supports load timings (Chromium
and Chrome) several values in milliseconds, separated by slashes.
Click to display a modal window with more details. The values, from left to right:
@@ -44,8 +39,7 @@ From left to right, it displays:
- [**First Contentful Paint**](https://web.dev/first-contentful-paint/):
Time until something was visible to the user.
- [**DomContentLoaded**](https://developers.google.com/web/fundamentals/performance/critical-rendering-path/measure-crp) Event.
- - **Total number of requests** the page loaded:
- ![Frontend requests using the Performance Bar](img/performance_bar_frontend.png)
+ - **Total number of requests** the page loaded.
- **Trace**: If Jaeger is integrated, **Trace** links to a Jaeger tracing page
with the current request's `correlation_id` included.
- **+**: A link to add a request's details to the performance bar. The request
@@ -60,21 +54,18 @@ From left to right, it displays:
## Request warnings
-Requests exceeding predefined limits display a warning **{warning}** icon and
-explanation next to the failing metric. In this example, the Gitaly call duration
-exceeded the threshold:
+Requests that exceed predefined limits display a warning **{warning}** icon and
+explanation next to the metric. In this example, the Gitaly call duration
+exceeded the threshold.
![Gitaly call duration exceeded threshold](img/performance_bar_gitaly_threshold.png)
If any requests on the current page generated warnings, the warning icon displays
-next to the **Request selector**:
+next to the **Requests** selector menu. In this selector menu, an exclamation `(!)`
+appears next to requests with warnings.
![Request selector showing two requests with warnings](img/performance_bar_request_selector_warning.png)
-Requests with warnings display `(!)` after their path in the **Request selector**:
-
-![Request selector showing dropdown](img/performance_bar_request_selector_warning_expanded.png)
-
## Enable the Performance Bar via the Admin Area
The GitLab Performance Bar is disabled by default. To enable it for a given group:
diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md
index 9f87192aab0..f29db9ead38 100644
--- a/doc/administration/monitoring/prometheus/gitlab_metrics.md
+++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md
@@ -74,8 +74,6 @@ The following metrics are available:
| `gitlab_transaction_event_import_repository_total` | Counter | 9.4 | Counter for repository imports (RepositoryImportWorker) | |
| `gitlab_transaction_event_patch_hard_limit_bytes_hit_total` | Counter | 13.9 | Counter for diff patch size limit hits | |
| `gitlab_transaction_event_push_branch_total` | Counter | 9.4 | Counter for all branch pushes | |
-| `gitlab_transaction_event_push_commit_total` | Counter | 9.4 | Counter for commits | `branch` |
-| `gitlab_transaction_event_push_tag_total` | Counter | 9.4 | Counter for tag pushes | |
| `gitlab_transaction_event_rails_exception_total` | Counter | 9.4 | Counter for number of rails exceptions | |
| `gitlab_transaction_event_receive_email_total` | Counter | 9.4 | Counter for received emails | `handler` |
| `gitlab_transaction_event_remote_mirrors_failed_total` | Counter | 10.8 | Counter for failed remote mirrors | |
@@ -129,6 +127,9 @@ The following metrics are available:
| `pipeline_graph_link_calculation_duration_seconds` | Histogram | 13.9 | Total time spent calculating links, in seconds | |
| `pipeline_graph_links_total` | Histogram | 13.9 | Number of links per graph | |
| `pipeline_graph_links_per_job_ratio` | Histogram | 13.9 | Ratio of links to job per graph | |
+| `gitlab_ci_pipeline_security_orchestration_policy_processing_duration_seconds` | Histogram | 13.12 | Time in seconds it takes to process Security Policies in CI/CD pipeline | |
+| `gitlab_ci_difference_live_vs_actual_minutes` | Histogram | 13.12 | Difference between CI minute consumption counted while jobs were running (live) vs when jobs are complete (actual). Used to enforce CI minute consumption limits on long running jobs. | `plan` |
+| `gitlab_spamcheck_request_duration_seconds` | Histogram | 13.12 | The duration for requests between Rails and the anti-spam engine | |
## Metrics controlled by a feature flag
@@ -214,11 +215,15 @@ configuration option in `gitlab.yml`. These metrics are served from the
| `geo_package_files_failed` | Gauge | 13.3 | Number of syncable package files failed to sync on secondary | `url` |
| `geo_package_files_registry` | Gauge | 13.3 | Number of package files in the registry | `url` |
| `geo_terraform_state_versions` | Gauge | 13.5 | Number of terraform state versions on primary | `url` |
-| `geo_terraform_state_versions_checksummed` | Gauge | 13.5 | Number of terraform state versions checksummed on primary | `url` |
+| `geo_terraform_state_versions_checksummed` | Gauge | 13.5 | Number of terraform state versions checksummed successfully on primary | `url` |
| `geo_terraform_state_versions_checksum_failed` | Gauge | 13.5 | Number of terraform state versions failed to calculate the checksum on primary | `url` |
+| `geo_terraform_state_versions_checksum_total` | Gauge | 13.12 | Number of terraform state versions tried to checksum on primary | `url` |
| `geo_terraform_state_versions_synced` | Gauge | 13.5 | Number of syncable terraform state versions synced on secondary | `url` |
| `geo_terraform_state_versions_failed` | Gauge | 13.5 | Number of syncable terraform state versions failed to sync on secondary | `url` |
| `geo_terraform_state_versions_registry` | Gauge | 13.5 | Number of terraform state versions in the registry | `url` |
+| `geo_terraform_state_versions_verified` | Gauge | 13.12 | Number of terraform state versions verified on secondary | `url` |
+| `geo_terraform_state_versions_verification_failed` | Gauge | 13.12 | Number of terraform state versions verifications failed on secondary | `url` |
+| `geo_terraform_state_versions_verification_total` | Gauge | 13.12 | Number of terraform state versions verifications tried on secondary | `url` |
| `global_search_bulk_cron_queue_size` | Gauge | 12.10 | Number of database records waiting to be synchronized to Elasticsearch | |
| `global_search_awaiting_indexing_queue_size` | Gauge | 13.2 | Number of database updates waiting to be synchronized to Elasticsearch while indexing is paused | |
| `geo_merge_request_diffs` | Gauge | 13.4 | Number of merge request diffs on primary | `url` |
@@ -254,7 +259,7 @@ The following metrics are available:
|:--------------------------------- |:--------- |:------------------------------------------------------------- |:-------------------------------------- |:--------------------------------------------------------- |
| `db_load_balancing_hosts` | Gauge | [12.3](https://gitlab.com/gitlab-org/gitlab/-/issues/13630) | Current number of load balancing hosts | |
| `sidekiq_load_balancing_count` | Counter | 13.11 | Sidekiq jobs using load balancing with data consistency set to :sticky or :delayed | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency`, `data_consistency`, `database_chosen` |
-
+
## Database partitioning metrics **(PREMIUM SELF)**
The following metrics are available:
diff --git a/doc/administration/monitoring/prometheus/postgres_exporter.md b/doc/administration/monitoring/prometheus/postgres_exporter.md
index 783030a9220..8a851afe35b 100644
--- a/doc/administration/monitoring/prometheus/postgres_exporter.md
+++ b/doc/administration/monitoring/prometheus/postgres_exporter.md
@@ -6,7 +6,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
# PostgreSQL Server Exporter **(FREE SELF)**
-The [PostgreSQL Server Exporter](https://github.com/wrouesnel/postgres_exporter) allows you to export various PostgreSQL metrics.
+The [PostgreSQL Server Exporter](https://github.com/prometheus-community/postgres_exporter) allows you to export various PostgreSQL metrics.
For installations from source you must install and configure it yourself.
diff --git a/doc/administration/nfs.md b/doc/administration/nfs.md
index 52948732766..c49a2c20ed2 100644
--- a/doc/administration/nfs.md
+++ b/doc/administration/nfs.md
@@ -22,8 +22,8 @@ file system performance, see
## Gitaly and NFS deprecation
WARNING:
-From GitLab 14.0, enhancements and bug fixes for NFS for Git repositories will no longer be
-considered and customer technical support will be considered out of scope.
+From GitLab 14.0, enhancements and bug fixes for NFS for Git repositories are no longer
+considered and customer technical support is considered out of scope.
[Read more about Gitaly and NFS](gitaly/index.md#nfs-deprecation-notice) and
[the correct mount options to use](#upgrade-to-gitaly-cluster-or-disable-caching-if-experiencing-data-loss).
@@ -81,8 +81,8 @@ When you define your NFS exports, we recommend you also add the following
options:
- `no_root_squash` - NFS normally changes the `root` user to `nobody`. This is
- a good security measure when NFS shares will be accessed by many different
- users. However, in this case only GitLab will use the NFS share so it
+ a good security measure when NFS shares are accessed by many different
+ users. However, in this case only GitLab uses the NFS share so it
is safe. GitLab recommends the `no_root_squash` setting because we need to
manage file permissions automatically. Without the setting you may receive
errors when the Omnibus package tries to alter permissions. Note that GitLab
@@ -137,15 +137,15 @@ NFS performance with GitLab can in some cases be improved with
[Rugged](https://github.com/libgit2/rugged).
NOTE:
-From GitLab 12.1, it will automatically be detected if Rugged can and should be used per storage.
+From GitLab 12.1, it automatically detects if Rugged can and should be used per storage.
-If you previously enabled Rugged using the feature flag, you will need to unset the feature flag by using:
+If you previously enabled Rugged using the feature flag, you need to unset the feature flag by using:
```shell
sudo gitlab-rake gitlab:features:unset_rugged
```
-If the Rugged feature flag is explicitly set to either `true` or `false`, GitLab will use the value explicitly set.
+If the Rugged feature flag is explicitly set to either `true` or `false`, GitLab uses the value explicitly set.
#### Improving NFS performance with Puma
@@ -190,7 +190,7 @@ Note there are several options that you should consider using:
| `lookupcache=positive` | Tells the NFS client to honor `positive` cache results but invalidates any `negative` cache results. Negative cache results cause problems with Git. Specifically, a `git push` can fail to register uniformly across all NFS clients. The negative cache causes the clients to 'remember' that the files did not exist previously.
| `hard` | Instead of `soft`. [Further details](#soft-mount-option).
| `cto` | `cto` is the default option, which you should use. Do not use `nocto`. [Further details](#nocto-mount-option).
-| `_netdev` | Wait to mount filesystem until network is online. See also the [`high_availability['mountpoint']`](https://docs.gitlab.com/omnibus/settings/configuration.html#only-start-omnibus-gitlab-services-after-a-given-file-system-is-mounted) option.
+| `_netdev` | Wait to mount file system until network is online. See also the [`high_availability['mountpoint']`](https://docs.gitlab.com/omnibus/settings/configuration.html#only-start-omnibus-gitlab-services-after-a-given-file-system-is-mounted) option.
#### `soft` mount option
@@ -222,7 +222,7 @@ they highlight that if the NFS client driver caches data, `soft` means there is
writes by GitLab are actually on disk.
Mount points set with the option `hard` may not perform as well, and if the
-NFS server goes down, `hard` will cause processes to hang when interacting with
+NFS server goes down, `hard` causes processes to hang when interacting with
the mount point. Use `SIGKILL` (`kill -9`) to deal with hung processes.
The `intr` option
[stopped working in the 2.6 kernel](https://access.redhat.com/solutions/157873).
@@ -260,7 +260,7 @@ mountpoint
└── uploads
```
-To do so, we'll need to configure Omnibus with the paths to each directory nested
+To do so, configure Omnibus with the paths to each directory nested
in the mount point as follows:
Mount `/gitlab-nfs` then use the following Omnibus
@@ -274,7 +274,7 @@ gitlab_ci['builds_directory'] = '/gitlab-nfs/gitlab-data/builds'
```
Run `sudo gitlab-ctl reconfigure` to start using the central location. Be aware
-that if you had existing data, you'll need to manually copy or rsync it to
+that if you had existing data, you need to manually copy or rsync it to
these new locations, and then restart GitLab.
### Bind mounts
@@ -296,21 +296,21 @@ NFS mount point is `/gitlab-nfs`. Then, add the following bind mounts in
/gitlab-nfs/gitlab-data/builds /var/opt/gitlab/gitlab-ci/builds none bind 0 0
```
-Using bind mounts will require manually making sure the data directories
+Using bind mounts requires you to manually make sure the data directories
are empty before attempting a restore. Read more about the
[restore prerequisites](../raketasks/backup_restore.md).
### Multiple NFS mounts
-When using default Omnibus configuration you will need to share 4 data locations
+When using default Omnibus configuration you need to share 4 data locations
between all GitLab cluster nodes. No other locations should be shared. The
following are the 4 locations need to be shared:
| Location | Description | Default configuration |
| -------- | ----------- | --------------------- |
-| `/var/opt/gitlab/git-data` | Git repository data. This will account for a large portion of your data | `git_data_dirs({"default" => { "path" => "/var/opt/gitlab/git-data"} })`
+| `/var/opt/gitlab/git-data` | Git repository data. This accounts for a large portion of your data | `git_data_dirs({"default" => { "path" => "/var/opt/gitlab/git-data"} })`
| `/var/opt/gitlab/gitlab-rails/uploads` | User uploaded attachments | `gitlab_rails['uploads_directory'] = '/var/opt/gitlab/gitlab-rails/uploads'`
-| `/var/opt/gitlab/gitlab-rails/shared` | Build artifacts, GitLab Pages, LFS objects, temp files, etc. If you're using LFS this may also account for a large portion of your data | `gitlab_rails['shared_path'] = '/var/opt/gitlab/gitlab-rails/shared'`
+| `/var/opt/gitlab/gitlab-rails/shared` | Build artifacts, GitLab Pages, LFS objects, temp files, and so on. If you're using LFS this may also account for a large portion of your data | `gitlab_rails['shared_path'] = '/var/opt/gitlab/gitlab-rails/shared'`
| `/var/opt/gitlab/gitlab-ci/builds` | GitLab CI/CD build traces | `gitlab_ci['builds_directory'] = '/var/opt/gitlab/gitlab-ci/builds'`
Other GitLab directories should not be shared between nodes. They contain
@@ -318,7 +318,7 @@ node-specific files and GitLab code that does not need to be shared. To ship
logs to a central location consider using remote syslog. Omnibus GitLab packages
provide configuration for [UDP log shipping](https://docs.gitlab.com/omnibus/settings/logs.html#udp-log-shipping-gitlab-enterprise-edition-only).
-Having multiple NFS mounts will require manually making sure the data directories
+Having multiple NFS mounts requires you to manually make sure the data directories
are empty before attempting a restore. Read more about the
[restore prerequisites](../raketasks/backup_restore.md).
@@ -348,7 +348,7 @@ Any `Operation not permitted` errors means you should investigate your NFS serve
## NFS in a Firewalled Environment
If the traffic between your NFS server and NFS client(s) is subject to port filtering
-by a firewall, then you will need to reconfigure that firewall to allow NFS communication.
+by a firewall, then you need to reconfigure that firewall to allow NFS communication.
[This guide from TDLP](https://tldp.org/HOWTO/NFS-HOWTO/security.html#FIREWALLS)
covers the basics of using NFS in a firewalled environment. Additionally, we encourage you to
@@ -370,7 +370,7 @@ sudo ufw allow from <client_ip_address> to any port nfs
WARNING:
From GitLab 13.0, using NFS for Git repositories is deprecated.
-As of GitLab 14.0, NFS-related issues with Gitaly will no longer be addressed. Read
+As of GitLab 14.0, NFS-related issues with Gitaly are no longer addressed. Read
more about [Gitaly and NFS deprecation](gitaly/index.md#nfs-deprecation-notice).
Customers and users have reported data loss on high-traffic repositories when using NFS for Git repositories.
@@ -391,7 +391,7 @@ The problem may be partially mitigated by adjusting caching using the following
WARNING:
The `actimeo=0` and `noac` options both result in a significant reduction in performance, possibly leading to timeouts.
You may be able to avoid timeouts and data loss using `actimeo=0` and `lookupcache=positive` _without_ `noac`, however
-we expect the performance reduction will still be significant. Upgrade to
+we expect the performance reduction is still significant. Upgrade to
[Gitaly Cluster](gitaly/praefect.md) as soon as possible.
### Avoid using cloud-based file systems
diff --git a/doc/administration/object_storage.md b/doc/administration/object_storage.md
index cbef752773b..d133e8c3721 100644
--- a/doc/administration/object_storage.md
+++ b/doc/administration/object_storage.md
@@ -595,7 +595,7 @@ with the Fog library that GitLab uses. Symptoms include an error in `production.
If you configure GitLab to use object storage for CI logs and artifacts,
you can avoid [local disk usage for job logs](job_logs.md#data-flow) by enabling
-[beta incremental logging](job_logs.md#new-incremental-logging-architecture).
+[beta incremental logging](job_logs.md#incremental-logging-architecture).
### Proxy Download
diff --git a/doc/administration/operations/extra_sidekiq_processes.md b/doc/administration/operations/extra_sidekiq_processes.md
index 88e3369e25e..6b8d6f3bf1e 100644
--- a/doc/administration/operations/extra_sidekiq_processes.md
+++ b/doc/administration/operations/extra_sidekiq_processes.md
@@ -144,8 +144,10 @@ attributes](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/workers/all_q
quickly. Can be `high`, `low`, or `throttled`. For example, the
`authorized_projects` queue is used to refresh user permissions, and
is high urgency.
-- `name` - the queue name. The other attributes are typically more useful as
- they are more general, but this is available in case a particular queue needs
+- `worker_name` - the worker name. The other attributes are typically more useful as
+ they are more general, but this is available in case a particular worker needs
+ to be selected.
+- `name` - the queue name. Similiarly, this is available in case a particular queue needs
to be selected.
- `resource_boundary` - if the queue is bound by `cpu`, `memory`, or
`unknown`. For example, the `project_export` queue is memory bound as it has
diff --git a/doc/administration/operations/fast_ssh_key_lookup.md b/doc/administration/operations/fast_ssh_key_lookup.md
index ec5bf9d6379..980db9713ee 100644
--- a/doc/administration/operations/fast_ssh_key_lookup.md
+++ b/doc/administration/operations/fast_ssh_key_lookup.md
@@ -16,7 +16,7 @@ Regular SSH operations become slow as the number of users grows because OpenSSH
searches for a key to authorize a user via a linear search. In the worst case,
such as when the user is not authorized to access GitLab, OpenSSH will scan the
entire file to search for a key. This can take significant time and disk I/O,
-which will delay users attempting to push or pull to a repository. Making
+which delays users attempting to push or pull to a repository. Making
matters worse, if users add or remove keys frequently, the operating system may
not be able to cache the `authorized_keys` file, which causes the disk to be
accessed repeatedly.
@@ -28,7 +28,7 @@ lookup of authorized SSH keys.
WARNING:
OpenSSH version 6.9+ is required because
`AuthorizedKeysCommand` must be able to accept a fingerprint. These
-instructions will break installations using older versions of OpenSSH, such as
+instructions break installations that use older versions of OpenSSH, such as
those included with CentOS 6 as of September 2017. If you want to use this
feature for CentOS 6, follow [the instructions on how to build and install a custom OpenSSH package](#compiling-a-custom-version-of-openssh-for-centos-6) before continuing.
@@ -40,9 +40,9 @@ single source of truth, [Geo](../geo/index.md) needs to be configured to perform
lookups via database lookup.
As part of [setting up Geo](../geo/index.md#setup-instructions),
-you will be required to follow the steps outlined below for both the primary and
+you are required to follow the steps outlined below for both the primary and
secondary nodes, but note that the `Write to "authorized keys" file` checkbox
-only needs to be unchecked on the primary node since it will be reflected
+only needs to be unchecked on the primary node since it is reflected
automatically on the secondary if database replication is working.
## Setting up fast lookup via GitLab Shell
@@ -91,10 +91,10 @@ as required, but that might require temporary ownership changes during `gitlab-s
WARNING:
Do not disable writes until SSH is confirmed to be working
-perfectly, because the file will quickly become out-of-date.
+perfectly; otherwise, the file quickly becomes out-of-date.
In the case of lookup failures (which are common), the `authorized_keys`
-file will still be scanned. So Git SSH performance will still be slow for many
+file is still scanned. So Git SSH performance would still be slow for many
users as long as a large file exists.
You can disable any more writes to the `authorized_keys` file by unchecking
@@ -183,8 +183,8 @@ the database. The following instructions can be used to build OpenSSH 7.5:
-rw-r--r--. 1 root root 367516 Jun 20 19:37 openssh-server-7.5p1-1.x86_64.rpm
```
-1. Install the packages. OpenSSH packages will replace `/etc/pam.d/sshd`
- with its own version, which may prevent users from logging in, so be sure
+1. Install the packages. OpenSSH packages replace `/etc/pam.d/sshd`
+ with their own versions, which may prevent users from logging in, so be sure
that the file is backed up and restored after installation:
```shell
diff --git a/doc/administration/operations/filesystem_benchmarking.md b/doc/administration/operations/filesystem_benchmarking.md
index 6119fcdae45..ffce104e1ad 100644
--- a/doc/administration/operations/filesystem_benchmarking.md
+++ b/doc/administration/operations/filesystem_benchmarking.md
@@ -8,7 +8,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
File system performance has a big impact on overall GitLab performance,
especially for actions that read or write to Git repositories. This information
-will help benchmark file system performance against known good and bad real-world
+helps benchmark file system performance against known good and bad real-world
systems.
Normally when talking about file system performance the biggest concern is
@@ -35,12 +35,12 @@ Then run the following:
fio --randrepeat=1 --ioengine=libaio --direct=1 --gtod_reduce=1 --name=test --bs=4k --iodepth=64 --readwrite=randrw --rwmixread=75 --size=4G --filename=/path/to/git-data/testfile
```
-This will create a 4GB file in `/path/to/git-data/testfile`. It performs
+This creates a 4GB file in `/path/to/git-data/testfile`. It performs
4KB reads and writes using a 75%/25% split within the file, with 64
operations running at a time. Be sure to delete the file after the test
completes.
-The output will vary depending on what version of `fio` installed. The following
+The output varies depending on what version of `fio` installed. The following
is an example output from `fio` v2.2.10 on a networked solid-state drive (SSD):
```plaintext
@@ -78,8 +78,8 @@ test but still have poor performance due to read speed and various other
factors.
The following one-line commands provide a quick benchmark for file system write and read
-performance. This will write 1,000 small files to the directory in which it is
-executed, and then read the same 1,000 files.
+performance. This writes 1,000 small files to the directory in which it is
+executed, and then reads the same 1,000 files.
1. Change into the root of the appropriate
[repository storage path](../repository_storage_paths.md).
@@ -107,7 +107,7 @@ executed, and then read the same 1,000 files.
cd ../; rm -rf test
```
-The output of the `time for ...` commands will look similar to the following. The
+The output of the `time for ...` commands resemble the following. The
important metric is the `real` time.
```shell
diff --git a/doc/administration/operations/rails_console.md b/doc/administration/operations/rails_console.md
index 1fe1ea96bff..8c366e311b8 100644
--- a/doc/administration/operations/rails_console.md
+++ b/doc/administration/operations/rails_console.md
@@ -149,17 +149,17 @@ Traceback (most recent call last):
/opt/gitlab/..../runner_command.rb:42:in `load': cannot load such file -- /tmp/helloworld.rb (LoadError)
```
-In case you encouter a similar error to this:
+In case you encounter a similar error to this:
```plaintext
-[root ~]# sudo gitlab-rails runner helloworld.rb
+[root ~]# sudo gitlab-rails runner helloworld.rb
Please specify a valid ruby command or the path of a script to run.
Run 'rails runner -h' for help.
undefined local variable or method `helloworld' for main:Object
```
-You can either move the file to the `/tmp` directory or create a new directory onwed by the user `git` and save the script in that directory as illustrated below:
+You can either move the file to the `/tmp` directory or create a new directory owned by the user `git` and save the script in that directory as illustrated below:
```shell
sudo mkdir /scripts
diff --git a/doc/administration/operations/ssh_certificates.md b/doc/administration/operations/ssh_certificates.md
index c0525cf6258..cc09ad95dce 100644
--- a/doc/administration/operations/ssh_certificates.md
+++ b/doc/administration/operations/ssh_certificates.md
@@ -110,10 +110,10 @@ Where `{KEY_ID}` is the `%i` argument passed to the script
(e.g. `aeanfjord`), and `{PRINCIPAL}` is the principal passed to it
(e.g. `sshUsers`).
-You will need to customize the `sshUsers` part of that. It should be
+You need to customize the `sshUsers` part of that. It should be
some principal that's guaranteed to be part of the key for all users
who can log in to GitLab, or you must provide a list of principals,
-one of which is going to be present for the user, e.g.:
+one of which is present for the user, e.g.:
```plaintext
[...]
@@ -122,7 +122,7 @@ one of which is going to be present for the user, e.g.:
## Principals and security
-You can supply as many principals as you want, these will be turned
+You can supply as many principals as you want, these are turned
into multiple lines of `authorized_keys` output, as described in the
`AuthorizedPrincipalsFile` documentation in `sshd_config(5)`.
@@ -130,32 +130,32 @@ Normally when using the `AuthorizedKeysCommand` with OpenSSH the
principal is some "group" that's allowed to log into that
server. However with GitLab it's only used to appease OpenSSH's
requirement for it, we effectively only care about the "key ID" being
-correct. Once that's extracted GitLab will enforce its own ACLs for
+correct. Once that's extracted GitLab enforces its own ACLs for
that user (e.g. what projects the user can access).
So it's OK to e.g. be overly generous in what you accept, since if the
-user e.g. has no access to GitLab at all it'll just error out with a
+user e.g. has no access to GitLab at all it just errors out with a
message about this being an invalid user.
## Interaction with the `authorized_keys` file
SSH certificates can be used in conjunction with the `authorized_keys`
-file, and if set up as configured above the `authorized_keys` file will
-still serve as a fallback.
+file, and if set up as configured above the `authorized_keys` file
+still serves as a fallback.
This is because if the `AuthorizedPrincipalsCommand` can't
-authenticate the user, OpenSSH will fall back on
+authenticate the user, OpenSSH falls back on
`~/.ssh/authorized_keys` (or the `AuthorizedKeysCommand`).
Therefore there may still be a reason to use the ["Fast lookup of
authorized SSH keys in the database"](fast_ssh_key_lookup.html) method
-in conjunction with this. Since you'll be using SSH certificates for
+in conjunction with this. Since you are using SSH certificates for
all your normal users, and relying on the `~/.ssh/authorized_keys`
fallback for deploy keys, if you make use of those.
But you may find that there's no reason to do that, since all your
-normal users will use the fast `AuthorizedPrincipalsCommand` path, and
-only automated deployment key access will fall back on
+normal users use the fast `AuthorizedPrincipalsCommand` path, and
+only automated deployment key access falls back on
`~/.ssh/authorized_keys`, or that you have a lot more keys for normal
users (especially if they're renewed) than you have deploy keys.
@@ -167,14 +167,14 @@ uploading an SSH public key to their profile, relying on the
currently no feature to prevent this, [but there's an open request for
adding it](https://gitlab.com/gitlab-org/gitlab/-/issues/23260).
-Such a restriction can currently be hacked in by e.g. providing a
+Such a restriction can currently be hacked in by, for example, providing a
custom `AuthorizedKeysCommand` which checks if the discovered key-ID
returned from `gitlab-shell-authorized-keys-check` is a deploy key or
not (all non-deploy keys should be refused).
## Disabling the global warning about users lacking SSH keys
-By default GitLab will show a "You won't be able to pull or push
+By default GitLab shows a "You won't be able to pull or push
project code via SSH" warning to users who have not uploaded an SSH
key to their profile.
diff --git a/doc/administration/packages/container_registry.md b/doc/administration/packages/container_registry.md
index 853a1884821..14e54513536 100644
--- a/doc/administration/packages/container_registry.md
+++ b/doc/administration/packages/container_registry.md
@@ -301,6 +301,16 @@ the Container Registry by themselves, follow the steps below.
## Configure storage for the Container Registry
+NOTE:
+For storage backends that support it, you can use object versioning to preserve, retrieve, and
+restore the non-current versions of every object stored in your buckets. However, this may result in
+higher storage usage and costs. Due to how the registry operates, image uploads are first stored in
+a temporary path and then transferred to a final location. For object storage backends, including S3
+and GCS, this transfer is achieved with a copy followed by a delete. With object versioning enabled,
+these deleted temporary upload artifacts are kept as non-current versions, therefore increasing the
+storage bucket size. To ensure that non-current versions are deleted after a given amount of time,
+you should configure an object lifecycle policy with your storage provider.
+
You can configure the Container Registry to use various storage backends by
configuring a storage driver. By default the GitLab Container Registry
is configured to use the [file system driver](#use-file-system)
@@ -937,7 +947,7 @@ To enable the read-only mode:
sudo gitlab-ctl reconfigure
```
- This command sets the Container Registry into the read only mode.
+ This command sets the Container Registry into the read-only mode.
1. Next, trigger one of the garbage collect commands:
@@ -1065,15 +1075,15 @@ If the registry fails to authenticate valid login attempts, you get the followin
```shell
# docker login gitlab.company.com:4567
Username: user
-Password:
+Password:
Error response from daemon: login attempt to https://gitlab.company.com:4567/v2/ failed with status: 401 Unauthorized
```
And more specifically, this appears in the `/var/log/gitlab/registry/current` log file:
```plaintext
-level=info msg="token signed by untrusted key with ID: "TOKE:NL6Q:7PW6:EXAM:PLET:OKEN:BG27:RCIB:D2S3:EXAM:PLET:OKEN""
-level=warning msg="error authorizing context: invalid token" go.version=go1.12.7 http.request.host="gitlab.company.com:4567" http.request.id=74613829-2655-4f96-8991-1c9fe33869b8 http.request.method=GET http.request.remoteaddr=10.72.11.20 http.request.uri="/v2/" http.request.useragent="docker/19.03.2 go/go1.12.8 git-commit/6a30dfc kernel/3.10.0-693.2.2.el7.x86_64 os/linux arch/amd64 UpstreamClient(Docker-Client/19.03.2 \(linux\))"
+level=info msg="token signed by untrusted key with ID: "TOKE:NL6Q:7PW6:EXAM:PLET:OKEN:BG27:RCIB:D2S3:EXAM:PLET:OKEN""
+level=warning msg="error authorizing context: invalid token" go.version=go1.12.7 http.request.host="gitlab.company.com:4567" http.request.id=74613829-2655-4f96-8991-1c9fe33869b8 http.request.method=GET http.request.remoteaddr=10.72.11.20 http.request.uri="/v2/" http.request.useragent="docker/19.03.2 go/go1.12.8 git-commit/6a30dfc kernel/3.10.0-693.2.2.el7.x86_64 os/linux arch/amd64 UpstreamClient(Docker-Client/19.03.2 \(linux\))"
```
GitLab uses the contents of the certificate key pair's two sides to encrypt the authentication token
@@ -1294,8 +1304,8 @@ GitLab Rails application, the Docker Registry, or something else. In this
case, since we know that since the login succeeded, we probably need to look
at the communication between the client and the Registry.
-The REST API between the Docker client and Registry is [described
-here](https://docs.docker.com/registry/spec/api/). Normally, one would just
+The REST API between the Docker client and Registry is described
+[in the Docker documentation](https://docs.docker.com/registry/spec/api/). Normally, one would just
use Wireshark or tcpdump to capture the traffic and see where things went
wrong. However, since all communications between Docker clients and servers
are done over HTTPS, it's a bit difficult to decrypt the traffic quickly even
diff --git a/doc/administration/packages/index.md b/doc/administration/packages/index.md
index 5ddf47d9b33..6440fb16fc6 100644
--- a/doc/administration/packages/index.md
+++ b/doc/administration/packages/index.md
@@ -98,6 +98,12 @@ To enable the Packages feature:
1. [Restart GitLab](../restart_gitlab.md#helm-chart-installations "How to reconfigure Helm GitLab") for the changes to take effect.
+## Rate limits
+
+When downloading packages as dependencies in downstream projects, many requests are made through the
+Packages API. You may therefore reach enforced user and IP rate limits. To address this issue, you
+can define specific rate limits for the Packages API. For more details, see [Package Registry Rate Limits](../../user/admin_area/settings/package_registry_rate_limits.md).
+
## Changing the storage path
By default, the packages are stored locally, but you can change the default
diff --git a/doc/administration/pages/index.md b/doc/administration/pages/index.md
index ae4fa086e3f..c93ae90deb6 100644
--- a/doc/administration/pages/index.md
+++ b/doc/administration/pages/index.md
@@ -5,7 +5,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
description: 'Learn how to administer GitLab Pages.'
---
-# GitLab Pages administration
+# GitLab Pages administration **(FREE SELF)**
> - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/80) in GitLab EE 8.3.
> - Custom CNAMEs with TLS support were [introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/173) in GitLab EE 8.5.
@@ -120,7 +120,7 @@ This example contains the following:
- `example.com`: The GitLab domain.
- `example.io`: The domain GitLab Pages is served from.
- `192.0.2.1`: The primary IP of your GitLab instance.
-- `192.0.2.2`: The secondary IP, which is dedicated to GitLab Pages.
+- `192.0.2.2`: The secondary IP, which is dedicated to GitLab Pages. It must be different than the primary IP.
NOTE:
You should not use the GitLab domain to serve user pages. For more information see the [security section](#security).
@@ -253,13 +253,14 @@ control over how the Pages daemon runs and serves content in your environment.
| `gitlab_server` | Server to use for authentication when access control is enabled; defaults to GitLab `external_url`. |
| `headers` | Specify any additional http headers that should be sent to the client with each response. Multiple headers can be given as an array, header and value as one string, for example `['my-header: myvalue', 'my-other-header: my-other-value']` |
| `inplace_chroot` | On [systems that don't support bind-mounts](index.md#additional-configuration-for-docker-container), this instructs GitLab Pages to `chroot` into its `pages_path` directory. Some caveats exist when using in-place `chroot`; refer to the GitLab Pages [README](https://gitlab.com/gitlab-org/gitlab-pages/blob/master/README.md#caveats) for more information. |
+| `enable_disk` | Allows the GitLab Pages daemon to serve content from disk. Shall be disabled if shared disk storage isn't available. |
| `insecure_ciphers` | Use default list of cipher suites, may contain insecure ones like 3DES and RC4. |
| `internal_gitlab_server` | Internal GitLab server address used exclusively for API requests. Useful if you want to send that traffic over an internal load balancer. Defaults to GitLab `external_url`. |
| `listen_proxy` | The addresses to listen on for reverse-proxy requests. Pages binds to these addresses' network sockets and receives incoming requests from them. Sets the value of `proxy_pass` in `$nginx-dir/conf/gitlab-pages.conf`. |
| `log_directory` | Absolute path to a log directory. |
| `log_format` | The log output format: `text` or `json`. |
| `log_verbose` | Verbose logging, true/false. |
-| `propagate_correlation_id` | Set to true (false by default) to re-use existing Correlation ID from the incoming request header `X-Request-ID` if present. If a reverse proxy sets this header, the value will be propagated in the request chain. |
+| `propagate_correlation_id` | Set to true (false by default) to re-use existing Correlation ID from the incoming request header `X-Request-ID` if present. If a reverse proxy sets this header, the value is propagated in the request chain. |
| `max_connections` | Limit on the number of concurrent connections to the HTTP, HTTPS or proxy listeners. |
| `metrics_address` | The address to listen on for metrics requests. |
| `redirect_http` | Redirect pages from HTTP to HTTPS, true/false. |
@@ -310,14 +311,12 @@ world. Custom domains are supported, but no TLS.
```ruby
pages_external_url "http://example.io"
- nginx['listen_addresses'] = ['192.0.2.1']
+ nginx['listen_addresses'] = ['192.0.2.1'] # The primary IP of the GitLab instance
pages_nginx['enable'] = false
- gitlab_pages['external_http'] = ['192.0.2.2:80', '[2001:db8::2]:80']
+ gitlab_pages['external_http'] = ['192.0.2.2:80', '[2001:db8::2]:80'] # The secondary IPs for the GitLab Pages daemon
```
- where `192.0.2.1` is the primary IP address that GitLab is listening to and
- `192.0.2.2` and `2001:db8::2` are the secondary IPs the GitLab Pages daemon
- listens on. If you don't have IPv6, you can omit the IPv6 address.
+ If you don't have IPv6, you can omit the IPv6 address.
1. [Reconfigure GitLab](../restart_gitlab.md#omnibus-gitlab-reconfigure).
@@ -342,20 +341,18 @@ world. Custom domains and TLS are supported.
```ruby
pages_external_url "https://example.io"
- nginx['listen_addresses'] = ['192.0.2.1']
+ nginx['listen_addresses'] = ['192.0.2.1'] # The primary IP of the GitLab instance
pages_nginx['enable'] = false
- gitlab_pages['external_http'] = ['192.0.2.2:80', '[2001:db8::2]:80']
- gitlab_pages['external_https'] = ['192.0.2.2:443', '[2001:db8::2]:443']
+ gitlab_pages['external_http'] = ['192.0.2.2:80', '[2001:db8::2]:80'] # The secondary IPs for the GitLab Pages daemon
+ gitlab_pages['external_https'] = ['192.0.2.2:443', '[2001:db8::2]:443'] # The secondary IPs for the GitLab Pages daemon
# Redirect pages from HTTP to HTTPS
gitlab_pages['redirect_http'] = true
```
- where `192.0.2.1` is the primary IP address that GitLab is listening to and
- `192.0.2.2` and `2001:db8::2` are the secondary IPs where the GitLab Pages daemon
- listens on. If you don't have IPv6, you can omit the IPv6 address.
+ If you don't have IPv6, you can omit the IPv6 address.
1. If you haven't named your certificate and key `example.io.crt` and `example.io.key` respectively,
-then you'll need to also add the full paths as shown below:
+then you need to also add the full paths as shown below:
```ruby
gitlab_pages['cert'] = "/etc/gitlab/ssl/example.io.crt"
@@ -468,7 +465,7 @@ To do that:
WARNING:
For self-managed installations, all public websites remain private until they are
-redeployed. This issue will be resolved by
+redeployed. Resolve this issue by
[sourcing domain configuration from the GitLab API](https://gitlab.com/gitlab-org/gitlab/-/issues/218357).
### Running behind a proxy
@@ -555,9 +552,9 @@ Follow the steps below to configure verbose logging of GitLab Pages daemon.
> [Introduced](https://gitlab.com/gitlab-org/gitlab-pages/-/merge_requests/438) in GitLab 13.10.
-Setting the `propagate_correlation_id` to true will allow installations behind a reverse proxy generate
+Setting the `propagate_correlation_id` to true allows installations behind a reverse proxy to generate
and set a correlation ID to requests sent to GitLab Pages. When a reverse proxy sets the header value `X-Request-ID`,
-the value will be propagated in the request chain.
+the value propagates in the request chain.
Users [can find the correlation ID in the logs](../troubleshooting/tracing_correlation_id.md#identify-the-correlation-id-for-a-request).
To enable the propagation of the correlation ID:
@@ -648,7 +645,7 @@ The default is 100MB.
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/16610) in GitLab 12.7.
NOTE:
-Only GitLab admin users will be able to view and override the **Maximum size of Pages** setting.
+Only GitLab admin users are able to view and override the **Maximum size of Pages** setting.
To override the global maximum pages size for a specific project:
@@ -771,7 +768,7 @@ The default value for Omnibus installations is `nil`.
If left unchanged, GitLab Pages tries to use any available source (either `gitlab` or `disk`). The
preferred source is `gitlab`, which uses [API-based configuration](#gitlab-api-based-configuration).
-On large GitLab instances, using the API-based configuration will significantly improve the pages daemon startup time, as there is no need to load all custom domains configuration into memory.
+On large GitLab instances, using the API-based configuration significantly improves the pages daemon startup time, as there is no need to load all custom domains configuration into memory.
For more details see this [blog post](https://about.gitlab.com/blog/2020/08/03/how-gitlab-pages-uses-the-gitlab-api-to-serve-content/).
@@ -839,25 +836,25 @@ Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".
Examples:
-- Increasing `gitlab_cache_expiry` will allow items to exist in the cache longer.
+- Increasing `gitlab_cache_expiry` allows items to exist in the cache longer.
This setting might be useful if the communication between GitLab Pages and GitLab Rails
is not stable.
-- Increasing `gitlab_cache_refresh` will reduce the frequency at which GitLab Pages
+- Increasing `gitlab_cache_refresh` reduces the frequency at which GitLab Pages
requests a domain's configuration from GitLab Rails. This setting might be useful
GitLab Pages generates too many requests to GitLab API and content does not change frequently.
-- Decreasing `gitlab_cache_cleanup` will remove expired items from the cache more frequently,
+- Decreasing `gitlab_cache_cleanup` removes expired items from the cache more frequently,
reducing the memory usage of your Pages node.
- Decreasing `gitlab_retrieval_timeout` allows you to stop the request to GitLab Rails
-more quickly. Increasing it will allow more time to receive a response from the API,
+more quickly. Increasing it allows more time to receive a response from the API,
useful in slow networking environments.
-- Decreasing `gitlab_retrieval_interval` will make requests to the API more frequently,
+- Decreasing `gitlab_retrieval_interval` makes requests to the API more frequently,
only when there is an error response from the API, for example a connection timeout.
-- Decreasing `gitlab_retrieval_retries` will reduce the number of times a domain's
+- Decreasing `gitlab_retrieval_retries` reduces the number of times a domain's
configuration is tried to be resolved automatically before reporting an error.
## Using object storage
@@ -868,13 +865,6 @@ configuration is tried to be resolved automatically before reporting an error.
### Object storage settings
-WARNING:
-With the following settings, Pages uses both NFS and Object Storage locations when deploying the
-site. **Do not remove the existing NFS mount used by Pages** when applying these settings. For more
-information, see the epics
-[3901](https://gitlab.com/groups/gitlab-org/-/epics/3901#how-to-test-object-storage-integration-in-beta)
-and [3910](https://gitlab.com/groups/gitlab-org/-/epics/3910).
-
The following settings are:
- Nested under `pages:` and then `object_store:` on source installations.
@@ -886,6 +876,10 @@ The following settings are:
| `remote_directory` | The name of the bucket where Pages site content is stored. | |
| `connection` | Various connection options described below. | |
+NOTE:
+If you want to stop using and disconnect the NFS server, you need to [explicitly disable
+local storage](#disable-pages-local-storage), and it's only possible after upgrading to GitLab 13.11.
+
#### S3-compatible connection settings
See [the available connection settings for different providers](../object_storage.md#connection-settings).
@@ -919,6 +913,8 @@ In Omnibus installations:
1. Save the file and [reconfigure GitLab](../restart_gitlab.md#omnibus-gitlab-reconfigure)
for the changes to take effect.
+1. [Migrate existing Pages deployments to object storage.](#migrate-pages-deployments-to-object-storage)
+
In installations from source:
1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following lines:
@@ -938,6 +934,8 @@ In installations from source:
1. Save the file and [restart GitLab](../restart_gitlab.md#installations-from-source)
for the changes to take effect.
+1. [Migrate existing Pages deployments to object storage.](#migrate-pages-deployments-to-object-storage)
+
## ZIP storage
In GitLab 14.0 the underlaying storage format of GitLab Pages is changing from
@@ -951,7 +949,9 @@ These ZIP archives can be stored either locally on disk storage or on the [objec
> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/59003) in GitLab 13.11.
-GitLab will [try to automatically migrate](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/54578) the old storage format to the new ZIP-based one when you upgrade to GitLab 13.11 or further.
+GitLab tries to
+[automatically migrate](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/54578)
+the old storage format to the new ZIP-based one when you upgrade to GitLab 13.11 or further.
However, some projects may fail to be migrated for different reasons.
To verify that all projects have been migrated successfully, you can manually run the migration:
@@ -996,7 +996,7 @@ If you find that migrated data is invalid, you can remove all migrated data by r
sudo gitlab-rake gitlab:pages:clean_migrated_zip_storage
```
-This will not remove any data from the legacy disk storage and the GitLab Pages daemon will automatically fallback
+This does not remove any data from the legacy disk storage and the GitLab Pages daemon automatically falls back
to using that.
### Migrate Pages deployments to object storage
@@ -1021,6 +1021,43 @@ After the migration to object storage is performed, you can choose to revert you
sudo gitlab-rake gitlab:pages:deployments:migrate_to_local
```
+### Disable Pages local storage
+
+> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/301159) in GitLab 13.11.
+
+If you use [object storage](#using-object-storage), disable local storage:
+
+1. Edit `/etc/gitlab/gitlab.rb`:
+
+ ```ruby
+ gitlab_rails['pages_local_store_enabled'] = false
+ ```
+
+1. [Reconfigure GitLab](../restart_gitlab.md#omnibus-gitlab-reconfigure) for the changes to take effect.
+
+Starting from GitLab 13.12, this setting also disables the [legacy storage](#migrate-legacy-storage-to-zip-storage), so if you were using NFS to serve Pages, you can completely disconnect from it.
+
+## Migrate GitLab Pages to 14.0
+
+In GitLab 14.0 a number of breaking changes are introduced which may require some user intervention.
+The steps below describe the best way to migrate without causing any downtime for your GitLab instance.
+
+If you run GitLab on a single server, then most likely you will not notice any problem after
+upgrading to GitLab 14.0, but it may be safer to follow the steps anyway.
+If you run GitLab on a single server, then most likely the upgrade process to 14.0 will go smoothly for you. Regardless, we recommend everyone follow the migration steps to ensure a successful upgrade.
+If at any point you run into issues, consult the [troubleshooting section](#troubleshooting).
+
+To migrate GitLab Pages to GitLab 14.0:
+
+1. If your current GitLab version is lower than 13.12, then you first need to upgrade to 13.12.
+Upgrading directly to 14.0 may cause downtime for some web-sites hosted on GitLab Pages
+until you finish the following steps.
+1. Enable the [API-based configuration](#gitlab-api-based-configuration), which
+is the default starting from GitLab 14.0. Skip this step if you're already running GitLab 14.0 or above.
+1. If you want to store your pages content in the [object storage](#using-object-storage), make sure to configure it.
+If you want to store the pages content locally or continue using an NFS server, skip this step.
+1. [Migrate legacy storage to ZIP storage.](#migrate-legacy-storage-to-zip-storage)
+
## Backup
GitLab Pages are part of the [regular backup](../../raketasks/backup_restore.md), so there is no separate backup to configure.
@@ -1120,6 +1157,35 @@ to define the explicit address that the GitLab Pages daemon should listen on:
gitlab_pages['listen_proxy'] = '127.0.0.1:8090'
```
+### Intermittent 502 errors or after a few days
+
+If you run Pages on a system that uses `systemd` and
+[`tmpfiles.d`](https://www.freedesktop.org/software/systemd/man/tmpfiles.d.html),
+you may encounter intermittent 502 errors trying to serve Pages with an error similar to:
+
+```plaintext
+dial tcp: lookup gitlab.example.com on [::1]:53: dial udp [::1]:53: connect: no route to host"
+```
+
+GitLab Pages creates a [bind mount](https://man7.org/linux/man-pages/man8/mount.8.html)
+inside `/tmp/gitlab-pages-*` that includes files like `/etc/hosts`.
+However, `systemd` may clean the `/tmp/` directory on a regular basis so the DNS
+configuration may be lost.
+
+To stop `systemd` from cleaning the Pages related content:
+
+1. Tell `tmpfiles.d` to not remove the Pages `/tmp` directory:
+
+ ```shell
+ echo 'x /tmp/gitlab-pages-*' >> /etc/tmpfiles.d/gitlab-pages-jail.conf
+ ```
+
+1. Restart GitLab Pages:
+
+ ```shell
+ sudo gitlab-ctl restart gitlab-pages
+ ```
+
### 404 error after transferring the project to a different group or user, or changing project path
If you encounter a `404 Not Found` error a Pages site after transferring a project to
@@ -1136,6 +1202,17 @@ date > /var/opt/gitlab/gitlab-rails/shared/pages/.update
If you've customized the Pages storage path, adjust the command above to use your custom path.
+### 404 error after promoting a Geo secondary to a primary node
+
+These are due to the Pages files not being among the
+[supported data types](../geo/replication/datatypes.md#limitations-on-replicationverification).
+
+It is possible to copy the subfolders and files in the [Pages path](#change-storage-path)
+to the new primary node to resolve this.
+For example, you can adapt the `rsync` strategy from the
+[moving repositories documenation](../operations/moving_repositories.md).
+Alternatively, run the CI pipelines of those projects that contain a `pages` job again.
+
### Failed to connect to the internal GitLab API
If you have enabled [API-based configuration](#gitlab-api-based-configuration) and see the following error:
@@ -1199,7 +1276,7 @@ If the wildcard DNS [prerequisite](#prerequisites) can't be met, you can still u
all projects you need to use Pages with into a single group namespace, for example `pages`.
1. Configure a [DNS entry](#dns-configuration) without the `*.`-wildcard, for example `pages.example.io`.
1. Configure `pages_external_url http://example.io/` in your `gitlab.rb` file.
- Omit the group namespace here, because it will automatically be prepended by GitLab.
+ Omit the group namespace here, because it automatically is prepended by GitLab.
### Pages daemon fails with permission denied errors
@@ -1223,3 +1300,24 @@ Once added, reconfigure with `sudo gitlab-ctl reconfigure` and restart GitLab wi
Verify that the **Callback URL**/Redirect URI in the GitLab Pages [System OAuth application](../../integration/oauth_provider.md#instance-wide-applications)
is using the protocol (HTTP or HTTPS) that `pages_external_url` is configured to use.
+
+### 500 error `cannot serve from disk`
+
+If you get a 500 response from Pages and encounter an error similar to:
+
+```plaintext
+ERRO[0145] cannot serve from disk error="gitlab: disk access is disabled via enable-disk=false" project_id=27 source_path="file:///shared/pages/@hashed/67/06/670671cd97404156226e507973f2ab8330d3022ca96e0c93bdbdb320c41adcaf/pages_deployments/14/artifacts.zip" source_type=zip
+```
+
+It means that GitLab Rails is telling GitLab Pages to serve content from a location on disk,
+however, GitLab Pages was configured to disable disk access.
+
+To enable disk access:
+
+1. Enable disk access for GitLab Pages in `/etc/gitlab/gitlab.rb`:
+
+ ```ruby
+ gitlab_pages['enable_disk'] = true
+ ```
+
+1. [Reconfigure GitLab](../restart_gitlab.md#omnibus-gitlab-reconfigure).
diff --git a/doc/administration/pages/source.md b/doc/administration/pages/source.md
index e1b54e11a1f..f1c3b515f68 100644
--- a/doc/administration/pages/source.md
+++ b/doc/administration/pages/source.md
@@ -4,7 +4,7 @@ group: Release
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# GitLab Pages administration for source installations
+# GitLab Pages administration for source installations **(FREE SELF)**
NOTE:
Before attempting to enable GitLab Pages, first make sure you have
diff --git a/doc/administration/polling.md b/doc/administration/polling.md
index cd2e8ecad60..f66df70a163 100644
--- a/doc/administration/polling.md
+++ b/doc/administration/polling.md
@@ -18,14 +18,14 @@ seconds; these are _not_ the actual values.
- 1 is the default, and recommended for most installations. (Issue notes poll
every 2 seconds, and issue titles poll every 5 seconds.)
-- 0 will disable UI polling completely. (On the next poll, clients will stop
+- 0 disables UI polling completely. (On the next poll, clients stop
polling for updates.)
-- A value greater than 1 will slow polling down. If you see issues with
+- A value greater than 1 slows polling down. If you see issues with
database load from lots of clients polling for updates, increasing the
multiplier from 1 can be a good compromise, rather than disabling polling
completely. (For example: If this is set to 2, then issue notes poll every 4
seconds, and issue titles poll every 10 seconds.)
-- A value between 0 and 1 will make the UI poll more frequently (so updates
- will show in other sessions faster), but is **not recommended**. 1 should be
+- A value between 0 and 1 makes the UI poll more frequently (so updates
+ show in other sessions faster), but is **not recommended**. 1 should be
fast enough. (For example, if this is set to 0.5, then issue notes poll every
1 second, and issue titles poll every 2.5 seconds.)
diff --git a/doc/administration/postgresql/pgbouncer.md b/doc/administration/postgresql/pgbouncer.md
index 791ca64b001..fc7da04b960 100644
--- a/doc/administration/postgresql/pgbouncer.md
+++ b/doc/administration/postgresql/pgbouncer.md
@@ -23,7 +23,7 @@ This content has been moved to a [new location](replication_and_failover.md#conf
1. Generate PGBOUNCER_USER_PASSWORD_HASH with the command `gitlab-ctl pg-password-md5 pgbouncer`
-1. Generate SQL_USER_PASSWORD_HASH with the command `gitlab-ctl pg-password-md5 gitlab`. We'll also need to enter the plaintext SQL_USER_PASSWORD later
+1. Generate SQL_USER_PASSWORD_HASH with the command `gitlab-ctl pg-password-md5 gitlab`. Enter the plaintext SQL_USER_PASSWORD later.
1. On your database node, ensure the following is set in your `/etc/gitlab/gitlab.rb`
@@ -37,7 +37,7 @@ This content has been moved to a [new location](replication_and_failover.md#conf
1. Run `gitlab-ctl reconfigure`
NOTE:
- If the database was already running, it will need to be restarted after reconfigure by running `gitlab-ctl restart postgresql`.
+ If the database was already running, it needs to be restarted after reconfigure by running `gitlab-ctl restart postgresql`.
1. On the node you are running PgBouncer on, make sure the following is set in `/etc/gitlab/gitlab.rb`
@@ -68,7 +68,7 @@ This content has been moved to a [new location](replication_and_failover.md#conf
## Backups
-Do not backup or restore GitLab through a PgBouncer connection: this will cause a GitLab outage.
+Do not backup or restore GitLab through a PgBouncer connection: it causes a GitLab outage.
[Read more about this and how to reconfigure backups](../../raketasks/backup_restore.md#backup-and-restore-for-installations-using-pgbouncer).
diff --git a/doc/administration/postgresql/replication_and_failover.md b/doc/administration/postgresql/replication_and_failover.md
index 7e25bd69539..878d2b536cb 100644
--- a/doc/administration/postgresql/replication_and_failover.md
+++ b/doc/administration/postgresql/replication_and_failover.md
@@ -120,7 +120,7 @@ Few notes on the service itself:
- The service runs under a system account, by default `gitlab-consul`.
- If you are using a different username, you will have to specify it. We
will refer to it with `CONSUL_USERNAME`,
-- There will be a database user created with read only access to the repmgr
+- There will be a database user created with read-only access to the repmgr
database
- Passwords will be stored in the following locations:
- `/etc/gitlab/gitlab.rb`: hashed
diff --git a/doc/administration/raketasks/ldap.md b/doc/administration/raketasks/ldap.md
index 531e9e89020..d7a37d1df3a 100644
--- a/doc/administration/raketasks/ldap.md
+++ b/doc/administration/raketasks/ldap.md
@@ -10,8 +10,8 @@ The following are LDAP-related Rake tasks.
## Check
-The LDAP check Rake task will test the `bind_dn` and `password` credentials
-(if configured) and will list a sample of LDAP users. This task is also
+The LDAP check Rake task tests the `bind_dn` and `password` credentials
+(if configured) and lists a sample of LDAP users. This task is also
executed as part of the `gitlab:check` task, but can run independently
using the command below.
@@ -27,7 +27,7 @@ sudo gitlab-rake gitlab:ldap:check
sudo -u git -H bundle exec rake gitlab:ldap:check RAILS_ENV=production
```
-By default, the task will return a sample of 100 LDAP users. Change this
+By default, the task returns a sample of 100 LDAP users. Change this
limit by passing a number to the check task:
```shell
@@ -36,9 +36,9 @@ rake gitlab:ldap:check[50]
## Run a group sync **(PREMIUM SELF)**
-> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/14735) in [GitLab Starter](https://about.gitlab.com/pricing/) 12.2.
+> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/14735) in GitLab 12.2.
-The following task will run a [group sync](../auth/ldap/index.md#group-sync) immediately. This is valuable
+The following task runs a [group sync](../auth/ldap/index.md#group-sync) immediately. This is valuable
when you'd like to update all configured group memberships against LDAP without
waiting for the next scheduled group sync to be run.
@@ -61,9 +61,9 @@ bundle exec rake gitlab:ldap:group_sync
## Rename a provider
-If you change the LDAP server ID in `gitlab.yml` or `gitlab.rb` you will need
-to update all user identities or users will be unable to sign in. Input the
-old and new provider and this task will update all matching identities in the
+If you change the LDAP server ID in `gitlab.yml` or `gitlab.rb` you need
+to update all user identities or users aren't able to sign in. Input the
+old and new provider and this task updates all matching identities in the
database.
`old_provider` and `new_provider` are derived from the prefix `ldap` plus the
@@ -81,9 +81,10 @@ main:
`main` is the LDAP server ID. Together, the unique provider is `ldapmain`.
-> **Warning**: If you input an incorrect new provider users will be unable
-to sign in. If this happens, run the task again with the incorrect provider
-as the `old_provider` and the correct provider as the `new_provider`.
+WARNING:
+If you input an incorrect new provider, users cannot sign in. If this happens,
+run the task again with the incorrect provider as the `old_provider` and the
+correct provider as the `new_provider`.
**Omnibus Installation**
@@ -119,7 +120,7 @@ User identities were successfully updated
### Other options
-If you do not specify an `old_provider` and `new_provider` you will be prompted
+If you do not specify an `old_provider` and `new_provider` the task prompts you
for them:
**Omnibus Installation**
@@ -141,7 +142,7 @@ What is the old provider? Ex. 'ldapmain': ldapmain
What is the new provider? Ex. 'ldapcustom': ldapmycompany
```
-This tasks also accepts the `force` environment variable which will skip the
+This task also accepts the `force` environment variable, which skips the
confirmation dialog:
```shell
diff --git a/doc/administration/raketasks/project_import_export.md b/doc/administration/raketasks/project_import_export.md
index 0fe15f2b5ba..cd6ffc957b1 100644
--- a/doc/administration/raketasks/project_import_export.md
+++ b/doc/administration/raketasks/project_import_export.md
@@ -53,5 +53,5 @@ Note the following:
- The project import option must be enabled in
application settings (`/admin/application_settings/general`) under **Import sources**, which is available
under **Admin Area > Settings > Visibility and access controls**.
-- The exports are stored in a temporary [shared directory](../../development/shared_files.md)
- and are deleted every 24 hours by a specific worker.
+- The exports are stored in a temporary directory and are deleted every
+ 24 hours by a specific worker.
diff --git a/doc/administration/raketasks/storage.md b/doc/administration/raketasks/storage.md
index 7fedcbf3c1a..5b6d4e16d8d 100644
--- a/doc/administration/raketasks/storage.md
+++ b/doc/administration/raketasks/storage.md
@@ -77,7 +77,7 @@ To have a summary and then a list of projects and their attachments using hashed
WARNING:
In GitLab 13.0, [hashed storage](../repository_storage_types.md#hashed-storage)
is enabled by default and the legacy storage is deprecated.
-Support for legacy storage will be removed in GitLab 14.0. If you're on GitLab
+GitLab 14.0 eliminates support for legacy storage. If you're on GitLab
13.0 and later, switching new projects to legacy storage is not possible.
The option to choose between hashed and legacy storage in the admin area has
been disabled.
@@ -114,25 +114,25 @@ There is a specific queue you can watch to see how long it will take to finish:
After it reaches zero, you can confirm every project has been migrated by running the commands above.
If you find it necessary, you can run this migration script again to schedule missing projects.
-Any error or warning will be logged in Sidekiq's log file.
+Any error or warning is logged in Sidekiq's log file.
If [Geo](../geo/index.md) is enabled, each project that is successfully migrated
generates an event to replicate the changes on any **secondary** nodes.
-You only need the `gitlab:storage:migrate_to_hashed` Rake task to migrate your repositories, but we have additional
-commands below that helps you inspect projects and attachments in both legacy and hashed storage.
+You only need the `gitlab:storage:migrate_to_hashed` Rake task to migrate your repositories, but there are
+[additional commands(#list-projects-and-attachments) to help you inspect projects and attachments in both legacy and hashed storage.
## Rollback from hashed storage to legacy storage
WARNING:
In GitLab 13.0, [hashed storage](../repository_storage_types.md#hashed-storage)
is enabled by default and the legacy storage is deprecated.
-Support for legacy storage will be removed in GitLab 14.0. If you're on GitLab
+GitLab 14.0 eliminates support for legacy storage. If you're on GitLab
13.0 and later, switching new projects to legacy storage is not possible.
The option to choose between hashed and legacy storage in the admin area has
been disabled.
-This task will schedule all your existing projects and associated attachments to be rolled back to the
+This task schedules all your existing projects and associated attachments to be rolled back to the
legacy storage type.
- **Omnibus installation**
@@ -161,7 +161,7 @@ On the **Queues** tab, you can watch the `hashed_storage:hashed_storage_project_
After it reaches zero, you can confirm every project has been rolled back by running the commands above.
If some projects weren't rolled back, you can run this rollback script again to schedule further rollbacks.
-Any error or warning will be logged in Sidekiq's log file.
+Any error or warning is logged in Sidekiq's log file.
If you have a Geo setup, the rollback will not be reflected automatically
on the **secondary** node. You may need to wait for a backfill operation to kick-in and remove
diff --git a/doc/administration/reference_architectures/10k_users.md b/doc/administration/reference_architectures/10k_users.md
index 97af1fe8d3c..e4a699b962c 100644
--- a/doc/administration/reference_architectures/10k_users.md
+++ b/doc/administration/reference_architectures/10k_users.md
@@ -24,8 +24,8 @@ full list of reference architectures, see
| Internal load balancing node | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
| Redis - Cache** | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
| Redis - Queues / Shared State** | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
-| Redis Sentinel - Cache** | 3 | 1 vCPU, 1.7 GB memory | `g1-small` | `t3.small` | `B1MS` |
-| Redis Sentinel - Queues / Shared State** | 3 | 1 vCPU, 1.7 GB memory | `g1-small` | `t3.small` | `B1MS` |
+| Redis Sentinel - Cache** | 3 | 1 vCPU, 3.75 GB memory | `n1-standard-1` | `c5.large` | `A1 v2` |
+| Redis Sentinel - Queues / Shared State** | 3 | 1 vCPU, 3.75 GB memory | `n1-standard-1` | `c5.large` | `A1 v2` |
| Gitaly | 3 | 16 vCPU, 60 GB memory | `n1-standard-16` | `m5.4xlarge` | `D16s v3` |
| Praefect | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
| Praefect PostgreSQL* | 1+ | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
@@ -129,7 +129,7 @@ The Google Cloud Platform (GCP) architectures were built and tested using the
CPU platform. On different hardware you may find that adjustments, either lower
or higher, are required for your CPU or node counts. For more information, see
our [Sysbench](https://github.com/akopytov/sysbench)-based
-[CPU benchmark](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
+[CPU benchmarks](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
Due to better performance and availability, for data objects (such as LFS,
uploads, or artifacts), using an [object storage service](#configure-the-object-storage)
@@ -1917,7 +1917,12 @@ To configure the Sidekiq nodes, on each one:
### Sidekiq configuration ###
#######################################
sidekiq['listen_address'] = "0.0.0.0"
- sidekiq['cluster'] = true # no need to set this after GitLab 13.0
+
+ # Set number of Sidekiq queue processes to the same number as available CPUs
+ sidekiq['queue_groups'] = ['*'] * 4
+
+ # Set number of Sidekiq threads per queue process to the recommend number of 10
+ sidekiq['max_concurrency'] = 10
#######################################
### Monitoring configuration ###
@@ -1962,7 +1967,9 @@ To configure the Sidekiq nodes, on each one:
1. [Reconfigure GitLab](../restart_gitlab.md#omnibus-gitlab-reconfigure) for the changes to take effect.
NOTE:
-You can also run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
+If you find that the environment's Sidekiq job processing is slow with long queues,
+more nodes can be added as required. You can also tune your Sidekiq nodes to
+run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
<div align="right">
<a type="button" class="btn btn-default" href="#setup-components">
@@ -2363,7 +2370,7 @@ considered and customer technical support will be considered out of scope.
As an alternative approach, you can also run select components of GitLab as Cloud Native
in Kubernetes via our official [Helm Charts](https://docs.gitlab.com/charts/).
In this setup, we support running the equivalent of GitLab Rails and Sidekiq nodes
-in a Kubernetes cluster, named Webservice and Sidekiq respectively. In addition,
+in a Kubernetes cluster, named Webservice and Sidekiq respectively. In addition,
the following other supporting services are supported: NGINX, Task Runner, Migrations,
Prometheus and Grafana.
@@ -2405,8 +2412,8 @@ services where applicable):
| Internal load balancing node | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` |
| Redis - Cache** | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` |
| Redis - Queues / Shared State** | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` |
-| Redis Sentinel - Cache** | 3 | 1 vCPU, 1.7 GB memory | `g1-small` |
-| Redis Sentinel - Queues / Shared State** | 3 | 1 vCPU, 1.7 GB memory | `g1-small` |
+| Redis Sentinel - Cache** | 3 | 1 vCPU, 3.75 GB memory | `n1-standard-1` |
+| Redis Sentinel - Queues / Shared State** | 3 | 1 vCPU, 3.75 GB memory | `n1-standard-1` |
| Gitaly | 3 | 16 vCPU, 60 GB memory | `n1-standard-16` |
| Praefect | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` |
| Praefect PostgreSQL* | 1+ | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` |
diff --git a/doc/administration/reference_architectures/1k_users.md b/doc/administration/reference_architectures/1k_users.md
index bed584e94bd..6ad4e0f05e3 100644
--- a/doc/administration/reference_architectures/1k_users.md
+++ b/doc/administration/reference_architectures/1k_users.md
@@ -17,20 +17,20 @@ many organizations .
> - **Supported users (approximate):** 1,000
> - **High Availability:** No. For a highly-available environment, you can
-> follow the [3K reference architecture](3k_users.md).
+> follow a modified [3K reference architecture](3k_users.md#supported-modifications-for-lower-user-counts-ha).
> - **Test requests per second (RPS) rates:** API: 20 RPS, Web: 2 RPS, Git (Pull): 2 RPS, Git (Push): 1 RPS
-| Users | Configuration | GCP | AWS | Azure |
-|--------------|-------------------------|----------------|-----------------|----------------|
-| Up to 500 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
-| Up to 1,000 | 8 vCPU, 7.2 GB memory | n1-highcpu-8 | `c5.2xlarge` | F8s v2 |
+| Users | Configuration | GCP | AWS | Azure |
+|--------------|-------------------------|----------------|--------------|----------|
+| Up to 500 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+| Up to 1,000 | 8 vCPU, 7.2 GB memory | `n1-highcpu-8` | `c5.2xlarge` | `F8s v2` |
The Google Cloud Platform (GCP) architectures were built and tested using the
[Intel Xeon E5 v3 (Haswell)](https://cloud.google.com/compute/docs/cpu-platforms)
CPU platform. On different hardware you may find that adjustments, either lower
or higher, are required for your CPU or node counts. For more information, see
our [Sysbench](https://github.com/akopytov/sysbench)-based
-[CPU benchmark](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
+[CPU benchmarks](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
In addition to the stated configurations, we recommend having at least 2 GB of
swap on your server, even if you currently have enough available memory. Having
diff --git a/doc/administration/reference_architectures/25k_users.md b/doc/administration/reference_architectures/25k_users.md
index 7f9f284d085..129386d6ce5 100644
--- a/doc/administration/reference_architectures/25k_users.md
+++ b/doc/administration/reference_architectures/25k_users.md
@@ -15,25 +15,31 @@ full list of reference architectures, see
> - **High Availability:** Yes ([Praefect](#configure-praefect-postgresql) needs a third-party PostgreSQL solution for HA)
> - **Test requests per second (RPS) rates:** API: 500 RPS, Web: 50 RPS, Git (Pull): 50 RPS, Git (Push): 10 RPS
-| Service | Nodes | Configuration | GCP | AWS | Azure |
-|-----------------------------------------|-------------|-------------------------|-----------------|-------------|----------|
-| External load balancing node | 1 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
-| Consul | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| PostgreSQL | 3 | 16 vCPU, 60 GB memory | n1-standard-16 | `m5.4xlarge` | D16s v3 |
-| PgBouncer | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Internal load balancing node | 1 | 4 vCPU, 3.6GB memory | n1-highcpu-4 | `c5.large` | F2s v2 |
-| Redis - Cache | 3 | 4 vCPU, 15 GB memory | n1-standard-4 | `m5.xlarge` | D4s v3 |
-| Redis - Queues / Shared State | 3 | 4 vCPU, 15 GB memory | n1-standard-4 | `m5.xlarge` | D4s v3 |
-| Redis Sentinel - Cache | 3 | 1 vCPU, 1.7 GB memory | g1-small | `t3.small` | B1MS |
-| Redis Sentinel - Queues / Shared State | 3 | 1 vCPU, 1.7 GB memory | g1-small | `t3.small` | B1MS |
-| Gitaly | 3 | 32 vCPU, 120 GB memory | n1-standard-32 | `m5.8xlarge` | D32s v3 |
-| Praefect | 3 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
-| Praefect PostgreSQL | 1+* | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Sidekiq | 4 | 4 vCPU, 15 GB memory | n1-standard-4 | `m5.xlarge` | D4s v3 |
-| GitLab Rails | 5 | 32 vCPU, 28.8 GB memory | n1-highcpu-32 | `c5.9xlarge` | F32s v2 |
-| Monitoring node | 1 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
-| Object storage | n/a | n/a | n/a | n/a | n/a |
-| NFS server | 1 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
+| Service | Nodes | Configuration | GCP | AWS | Azure |
+|------------------------------------------|-------------|-------------------------|------------------|--------------|-----------|
+| External load balancing node | 1 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+| Consul* | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| PostgreSQL* | 3 | 16 vCPU, 60 GB memory | `n1-standard-1` | `m5.4xlarge` | `D16s v3` |
+| PgBouncer* | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Internal load balancing node | 1 | 4 vCPU, 3.6GB memory | `n1-highcpu-4` | `c5.large` | `F2s v2` |
+| Redis - Cache** | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
+| Redis - Queues / Shared State** | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
+| Redis Sentinel - Cache** | 3 | 1 vCPU, 3.75 GB memory | `n1-standard-1` | `c5.large` | `A1 v2` |
+| Redis Sentinel - Queues / Shared State** | 3 | 1 vCPU, 3.75 GB memory | `n1-standard-1` | `c5.large` | `A1 v2` |
+| Gitaly | 3 | 32 vCPU, 120 GB memory | `n1-standard-32` | `m5.8xlarge` | `D32s v3` |
+| Praefect | 3 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+| Praefect PostgreSQL* | 1+ | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Sidekiq | 4 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
+| GitLab Rails | 5 | 32 vCPU, 28.8 GB memory | `n1-highcpu-32` | `c5.9xlarge` | `F32s v2` |
+| Monitoring node | 1 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+| Object storage | n/a | n/a | n/a | n/a | n/a |
+| NFS server | 1 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+
+NOTE:
+Components marked with * can be optionally run on reputable
+third party external PaaS PostgreSQL solutions. Google Cloud SQL and AWS RDS are known to work.
+Components marked with ** can be optionally run on reputable
+third party external PaaS Redis solutions. Google Memorystore and AWS Elasticache are known to work.
```plantuml
@startuml 25k
@@ -123,7 +129,7 @@ The Google Cloud Platform (GCP) architectures were built and tested using the
CPU platform. On different hardware you may find that adjustments, either lower
or higher, are required for your CPU or node counts. For more information, see
our [Sysbench](https://github.com/akopytov/sysbench)-based
-[CPU benchmark](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
+[CPU benchmarks](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
Due to better performance and availability, for data objects (such as LFS,
uploads, or artifacts), using an [object storage service](#configure-the-object-storage)
@@ -1913,7 +1919,12 @@ To configure the Sidekiq nodes, on each one:
### Sidekiq configuration ###
#######################################
sidekiq['listen_address'] = "0.0.0.0"
- sidekiq['cluster'] = true # no need to set this after GitLab 13.0
+
+ # Set number of Sidekiq queue processes to the same number as available CPUs
+ sidekiq['queue_groups'] = ['*'] * 4
+
+ # Set number of Sidekiq threads per queue process to the recommend number of 10
+ sidekiq['max_concurrency'] = 10
#######################################
### Monitoring configuration ###
@@ -1958,7 +1969,9 @@ To configure the Sidekiq nodes, on each one:
1. [Reconfigure GitLab](../restart_gitlab.md#omnibus-gitlab-reconfigure) for the changes to take effect.
NOTE:
-You can also run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
+If you find that the environment's Sidekiq job processing is slow with long queues,
+more nodes can be added as required. You can also tune your Sidekiq nodes to
+run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
<div align="right">
<a type="button" class="btn btn-default" href="#setup-components">
diff --git a/doc/administration/reference_architectures/2k_users.md b/doc/administration/reference_architectures/2k_users.md
index e5418e47ca2..69e261cfbe6 100644
--- a/doc/administration/reference_architectures/2k_users.md
+++ b/doc/administration/reference_architectures/2k_users.md
@@ -13,19 +13,25 @@ For a full list of reference architectures, see
> - **Supported users (approximate):** 2,000
> - **High Availability:** No. For a highly-available environment, you can
-> follow the [3K reference architecture](3k_users.md).
+> follow a modified [3K reference architecture](3k_users.md#supported-modifications-for-lower-user-counts-ha).
> - **Test requests per second (RPS) rates:** API: 40 RPS, Web: 4 RPS, Git (Pull): 4 RPS, Git (Push): 1 RPS
-| Service | Nodes | Configuration | GCP | AWS | Azure |
-|------------------------------------------|--------|-------------------------|----------------|--------------|---------|
-| Load balancer | 1 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| PostgreSQL | 1 | 2 vCPU, 7.5 GB memory | n1-standard-2 | `m5.large` | D2s v3 |
-| Redis | 1 | 1 vCPU, 3.75 GB memory | n1-standard-1 | `m5.large` | D2s v3 |
-| Gitaly | 1 | 4 vCPU, 15 GB memory | n1-standard-4 | `m5.xlarge` | D4s v3 |
-| GitLab Rails | 2 | 8 vCPU, 7.2 GB memory | n1-highcpu-8 | `c5.2xlarge` | F8s v2 |
-| Monitoring node | 1 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Object storage | n/a | n/a | n/a | n/a | n/a |
-| NFS server (optional, not recommended) | 1 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
+| Service | Nodes | Configuration | GCP | AWS | Azure |
+|------------------------------------------|--------|-------------------------|-----------------|--------------|----------|
+| Load balancer | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| PostgreSQL* | 1 | 2 vCPU, 7.5 GB memory | `n1-standard-2` | `m5.large` | `D2s v3` |
+| Redis** | 1 | 1 vCPU, 3.75 GB memory | `n1-standard-1` | `m5.large` | `D2s v3` |
+| Gitaly | 1 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
+| GitLab Rails | 2 | 8 vCPU, 7.2 GB memory | `n1-highcpu-8` | `c5.2xlarge` | `F8s v2` |
+| Monitoring node | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Object storage | n/a | n/a | n/a | n/a | n/a |
+| NFS server (optional, not recommended) | 1 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+
+NOTE:
+Components marked with * can be optionally run on reputable
+third party external PaaS PostgreSQL solutions. Google Cloud SQL and AWS RDS are known to work.
+Components marked with ** can be optionally run on reputable
+third party external PaaS Redis solutions. Google Memorystore and AWS Elasticache are known to work.
```plantuml
@startuml 2k
@@ -60,7 +66,7 @@ The Google Cloud Platform (GCP) architectures were built and tested using the
CPU platform. On different hardware you may find that adjustments, either lower
or higher, are required for your CPU or node counts. For more information, see
our [Sysbench](https://github.com/akopytov/sysbench)-based
-[CPU benchmark](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
+[CPU benchmarks](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
Due to better performance and availability, for data objects (such as LFS,
uploads, or artifacts), using an [object storage service](#configure-the-object-storage)
@@ -642,6 +648,8 @@ On each node perform the following:
node_exporter['listen_address'] = '0.0.0.0:9100'
gitlab_workhorse['prometheus_listen_addr'] = '0.0.0.0:9229'
sidekiq['listen_address'] = "0.0.0.0"
+ # Set number of Sidekiq threads per queue process to the recommend number of 10
+ sidekiq['max_concurrency'] = 10
puma['listen'] = '0.0.0.0'
# Add the monitoring node's IP address to the monitoring whitelist and allow it to
diff --git a/doc/administration/reference_architectures/3k_users.md b/doc/administration/reference_architectures/3k_users.md
index b8d0a98a1f1..d81f98a35d4 100644
--- a/doc/administration/reference_architectures/3k_users.md
+++ b/doc/administration/reference_architectures/3k_users.md
@@ -14,8 +14,9 @@ than 3,000 users. For fewer users, reduce the stated node sizes as needed.
If maintaining a high level of uptime for your GitLab environment isn't a
requirement, or if you don't have the expertise to maintain this sort of
-environment, we recommend using the [2,000-user reference architecture](2k_users.md)
-for your GitLab installation.
+environment, we recommend using the non-HA [2,000-user reference architecture](2k_users.md)
+for your GitLab installation. If HA is still a requirement, there's several supported
+tweaks you can make to this architecture to reduce complexity as detailed here.
For a full list of reference architectures, see
[Available reference architectures](index.md#available-reference-architectures).
@@ -24,22 +25,28 @@ For a full list of reference architectures, see
> - **High Availability:** Yes, although [Praefect](#configure-praefect-postgresql) needs a third-party PostgreSQL solution
> - **Test requests per second (RPS) rates:** API: 60 RPS, Web: 6 RPS, Git (Pull): 6 RPS, Git (Push): 1 RPS
-| Service | Nodes | Configuration | GCP | AWS | Azure |
-|--------------------------------------------|-------------|-----------------------|----------------|-------------|---------|
-| External load balancing node | 1 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Redis | 3 | 2 vCPU, 7.5 GB memory | n1-standard-2 | `m5.large` | D2s v3 |
-| Consul + Sentinel | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| PostgreSQL | 3 | 2 vCPU, 7.5 GB memory | n1-standard-2 | `m5.large` | D2s v3 |
-| PgBouncer | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Internal load balancing node | 1 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Gitaly | 3 | 4 vCPU, 15 GB memory | n1-standard-4 | `m5.xlarge` | D4s v3 |
-| Praefect | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Praefect PostgreSQL | 1+* | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Sidekiq | 4 | 2 vCPU, 7.5 GB memory | n1-standard-2 | `m5.large` | D2s v3 |
-| GitLab Rails | 3 | 8 vCPU, 7.2 GB memory | n1-highcpu-8 | `c5.2xlarge` | F8s v2 |
-| Monitoring node | 1 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Object storage | n/a | n/a | n/a | n/a | n/a |
-| NFS server (optional, not recommended) | 1 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
+| Service | Nodes | Configuration | GCP | AWS | Azure |
+|--------------------------------------------|-------------|-----------------------|-----------------|--------------|----------|
+| External load balancing node | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Redis** | 3 | 2 vCPU, 7.5 GB memory | `n1-standard-2` | `m5.large` | `D2s v3` |
+| Consul* + Sentinel** | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| PostgreSQL* | 3 | 2 vCPU, 7.5 GB memory | `n1-standard-2` | `m5.large` | `D2s v3` |
+| PgBouncer* | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Internal load balancing node | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Gitaly | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
+| Praefect | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Praefect PostgreSQL* | 1+ | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Sidekiq | 4 | 2 vCPU, 7.5 GB memory | `n1-standard-2` | `m5.large` | `D2s v3` |
+| GitLab Rails | 3 | 8 vCPU, 7.2 GB memory | `n1-highcpu-8` | `c5.2xlarge` | `F8s v2` |
+| Monitoring node | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Object storage | n/a | n/a | n/a | n/a | n/a |
+| NFS server (optional, not recommended) | 1 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+
+NOTE:
+Components marked with * can be optionally run on reputable
+third party external PaaS PostgreSQL solutions. Google Cloud SQL and AWS RDS are known to work.
+Components marked with ** can be optionally run on reputable
+third party external PaaS Redis solutions. Google Memorystore and AWS Elasticache are known to work.
```plantuml
@startuml 3k
@@ -129,7 +136,7 @@ The Google Cloud Platform (GCP) architectures were built and tested using the
CPU platform. On different hardware you may find that adjustments, either lower
or higher, are required for your CPU or node counts. For more information, see
our [Sysbench](https://github.com/akopytov/sysbench)-based
-[CPU benchmark](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
+[CPU benchmarks](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
Due to better performance and availability, for data objects (such as LFS,
uploads, or artifacts), using an [object storage service](#configure-the-object-storage)
@@ -1595,6 +1602,12 @@ To configure the Sidekiq nodes, one each one:
#######################################
sidekiq['listen_address'] = "0.0.0.0"
+ # Set number of Sidekiq queue processes to the same number as available CPUs
+ sidekiq['queue_groups'] = ['*'] * 2
+
+ # Set number of Sidekiq threads per queue process to the recommend number of 10
+ sidekiq['max_concurrency'] = 10
+
#######################################
### Monitoring configuration ###
#######################################
@@ -1650,7 +1663,9 @@ To configure the Sidekiq nodes, one each one:
```
NOTE:
-You can also run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
+If you find that the environment's Sidekiq job processing is slow with long queues,
+more nodes can be added as required. You can also tune your Sidekiq nodes to
+run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
<div align="right">
<a type="button" class="btn btn-default" href="#setup-components">
@@ -2040,6 +2055,27 @@ considered and customer technical support will be considered out of scope.
[Read more about Gitaly and NFS](../gitaly/index.md#nfs-deprecation-notice) and
[the correct mount options to use](../nfs.md#upgrade-to-gitaly-cluster-or-disable-caching-if-experiencing-data-loss).
+## Supported modifications for lower user counts (HA)
+
+The 3k GitLab reference architecture is the smallest we recommend that achieves High Availability (HA).
+However, for environments that need to serve less users but maintain HA, there's several
+supported modifications you can make to this architecture to reduce complexity and cost.
+
+It should be noted that to achieve HA with GitLab, this architecture's makeup is ultimately what is
+required. Each component has various considerations and rules to follow and this architecture
+meets all of these. Smaller versions of this architecture will be fundamentally the same,
+but with smaller performance requirements, several modifications can be considered as follows:
+
+- Lowering node specs: Depending on your user count, you can lower all suggested node specs as desired. However, it's recommended that you don't go lower than the [general requirements](../../install/requirements.md).
+- Combining select nodes: Some nodes can be combined to reduce complexity at the cost of some performance:
+ - GitLab Rails and Sidekiq: Sidekiq nodes can be removed and the component instead enabled on the GitLab Rails nodes.
+ - PostgreSQL and PgBouncer: PgBouncer nodes can be removed and the component instead enabled on PostgreSQL with the Internal Load Balancer pointing to them instead.
+- Running select components in reputable Cloud PaaS solutions: Select components of the GitLab setup can instead be run on Cloud Provider PaaS solutions. By doing this, additional dependent components can also be removed:
+ - PostgreSQL: Can be run on reputable Cloud PaaS solutions such as Google Cloud SQL or AWS RDS. In this setup, the PgBouncer and Consul nodes are no longer required:
+ - Consul may still be desired if [Prometheus](../monitoring/prometheus/index.md) auto discovery is a requirement, otherwise you would need to [manually add scrape configurations](../monitoring/prometheus/index.md#adding-custom-scrape-configurations) for all nodes.
+ - As Redis Sentinel runs on the same box as Consul in this architecture, it may need to be run on a separate box if Redis is still being run via Omnibus.
+ - Redis: Can be run on reputable Cloud PaaS solutions such as Google Memorystore and AWS Elasticache. In this setup, the Redis Sentinel is no longer required.
+
<div align="right">
<a type="button" class="btn btn-default" href="#setup-components">
Back to setup components <i class="fa fa-angle-double-up" aria-hidden="true"></i>
diff --git a/doc/administration/reference_architectures/50k_users.md b/doc/administration/reference_architectures/50k_users.md
index 183a998e89a..5cc463c953d 100644
--- a/doc/administration/reference_architectures/50k_users.md
+++ b/doc/administration/reference_architectures/50k_users.md
@@ -15,25 +15,31 @@ full list of reference architectures, see
> - **High Availability:** Yes ([Praefect](#configure-praefect-postgresql) needs a third-party PostgreSQL solution for HA)
> - **Test requests per second (RPS) rates:** API: 1000 RPS, Web: 100 RPS, Git (Pull): 100 RPS, Git (Push): 20 RPS
-| Service | Nodes | Configuration | GCP | AWS | Azure |
-|-----------------------------------------|-------------|-------------------------|-----------------|--------------|----------|
-| External load balancing node | 1 | 8 vCPU, 7.2 GB memory | n1-highcpu-8 | `c5.2xlarge` | F8s v2 |
-| Consul | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| PostgreSQL | 3 | 32 vCPU, 120 GB memory | n1-standard-32 | `m5.8xlarge` | D32s v3 |
-| PgBouncer | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Internal load balancing node | 1 | 8 vCPU, 7.2 GB memory | n1-highcpu-8 | `c5.2xlarge` | F8s v2 |
-| Redis - Cache | 3 | 4 vCPU, 15 GB memory | n1-standard-4 | `m5.xlarge` | D4s v3 |
-| Redis - Queues / Shared State | 3 | 4 vCPU, 15 GB memory | n1-standard-4 | `m5.xlarge` | D4s v3 |
-| Redis Sentinel - Cache | 3 | 1 vCPU, 1.7 GB memory | g1-small | `t3.small` | B1MS |
-| Redis Sentinel - Queues / Shared State | 3 | 1 vCPU, 1.7 GB memory | g1-small | `t3.small` | B1MS |
-| Gitaly | 3 | 64 vCPU, 240 GB memory | n1-standard-64 | `m5.16xlarge` | D64s v3 |
-| Praefect | 3 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
-| Praefect PostgreSQL | 1+* | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Sidekiq | 4 | 4 vCPU, 15 GB memory | n1-standard-4 | `m5.xlarge` | D4s v3 |
-| GitLab Rails | 12 | 32 vCPU, 28.8 GB memory | n1-highcpu-32 | `c5.9xlarge` | F32s v2 |
-| Monitoring node | 1 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
-| Object storage | n/a | n/a | n/a | n/a | n/a |
-| NFS server | 1 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
+| Service | Nodes | Configuration | GCP | AWS | Azure |
+|------------------------------------------|-------------|-------------------------|------------------|---------------|-----------|
+| External load balancing node | 1 | 8 vCPU, 7.2 GB memory | `n1-highcpu-8` | `c5.2xlarge` | `F8s v2` |
+| Consul* | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| PostgreSQL* | 3 | 32 vCPU, 120 GB memory | `n1-standard-32` | `m5.8xlarge` | `D32s v3` |
+| PgBouncer* | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Internal load balancing node | 1 | 8 vCPU, 7.2 GB memory | `n1-highcpu-8` | `c5.2xlarge` | `F8s v2` |
+| Redis - Cache** | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
+| Redis - Queues / Shared State** | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
+| Redis Sentinel - Cache** | 3 | 1 vCPU, 3.75 GB memory | `n1-standard-1` | `c5.large` | `A1 v2` |
+| Redis Sentinel - Queues / Shared State** | 3 | 1 vCPU, 3.75 GB memory | `n1-standard-1` | `c5.large` | `A1 v2` |
+| Gitaly | 3 | 64 vCPU, 240 GB memory | `n1-standard-64` | `m5.16xlarge` | `D64s v3` |
+| Praefect | 3 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+| Praefect PostgreSQL* | 1+ | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Sidekiq | 4 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
+| GitLab Rails | 12 | 32 vCPU, 28.8 GB memory | `n1-highcpu-32` | `c5.9xlarge` | `F32s v2` |
+| Monitoring node | 1 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+| Object storage | n/a | n/a | n/a | n/a | n/a |
+| NFS server | 1 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+
+NOTE:
+Components marked with * can be optionally run on reputable
+third party external PaaS PostgreSQL solutions. Google Cloud SQL and AWS RDS are known to work.
+Components marked with ** can be optionally run on reputable
+third party external PaaS Redis solutions. Google Memorystore and AWS Elasticache are known to work.
```plantuml
@startuml 50k
@@ -123,7 +129,7 @@ The Google Cloud Platform (GCP) architectures were built and tested using the
CPU platform. On different hardware you may find that adjustments, either lower
or higher, are required for your CPU or node counts. For more information, see
our [Sysbench](https://github.com/akopytov/sysbench)-based
-[CPU benchmark](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
+[CPU benchmarks](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
Due to better performance and availability, for data objects (such as LFS,
uploads, or artifacts), using an [object storage service](#configure-the-object-storage)
@@ -1920,7 +1926,12 @@ To configure the Sidekiq nodes, on each one:
### Sidekiq configuration ###
#######################################
sidekiq['listen_address'] = "0.0.0.0"
- sidekiq['cluster'] = true # no need to set this after GitLab 13.0
+
+ # Set number of Sidekiq queue processes to the same number as available CPUs
+ sidekiq['queue_groups'] = ['*'] * 4
+
+ # Set number of Sidekiq threads per queue process to the recommend number of 10
+ sidekiq['max_concurrency'] = 10
#######################################
### Monitoring configuration ###
@@ -1965,7 +1976,9 @@ To configure the Sidekiq nodes, on each one:
1. [Reconfigure GitLab](../restart_gitlab.md#omnibus-gitlab-reconfigure) for the changes to take effect.
NOTE:
-You can also run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
+If you find that the environment's Sidekiq job processing is slow with long queues,
+more nodes can be added as required. You can also tune your Sidekiq nodes to
+run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
<div align="right">
<a type="button" class="btn btn-default" href="#setup-components">
diff --git a/doc/administration/reference_architectures/5k_users.md b/doc/administration/reference_architectures/5k_users.md
index 70d02bba855..792dd7020c7 100644
--- a/doc/administration/reference_architectures/5k_users.md
+++ b/doc/administration/reference_architectures/5k_users.md
@@ -22,22 +22,28 @@ costly-to-operate environment by using the
> - **High Availability:** Yes ([Praefect](#configure-praefect-postgresql) needs a third-party PostgreSQL solution for HA)
> - **Test requests per second (RPS) rates:** API: 100 RPS, Web: 10 RPS, Git (Pull): 10 RPS, Git (Push): 2 RPS
-| Service | Nodes | Configuration | GCP | AWS | Azure |
-|--------------------------------------------|-------------|-------------------------|----------------|-------------|----------|
-| External load balancing node | 1 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Redis | 3 | 2 vCPU, 7.5 GB memory | n1-standard-2 | `m5.large` | D2s v3 |
-| Consul + Sentinel | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| PostgreSQL | 3 | 4 vCPU, 15 GB memory | n1-standard-4 | `m5.xlarge` | D4s v3 |
-| PgBouncer | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Internal load balancing node | 1 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Gitaly | 3 | 8 vCPU, 30 GB memory | n1-standard-8 | `m5.2xlarge` | D8s v3 |
-| Praefect | 3 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Praefect PostgreSQL | 1+* | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Sidekiq | 4 | 2 vCPU, 7.5 GB memory | n1-standard-2 | `m5.large` | D2s v3 |
-| GitLab Rails | 3 | 16 vCPU, 14.4 GB memory | n1-highcpu-16 | `c5.4xlarge` | F16s v2 |
-| Monitoring node | 1 | 2 vCPU, 1.8 GB memory | n1-highcpu-2 | `c5.large` | F2s v2 |
-| Object storage | n/a | n/a | n/a | n/a | n/a |
-| NFS server (optional, not recommended) | 1 | 4 vCPU, 3.6 GB memory | n1-highcpu-4 | `c5.xlarge` | F4s v2 |
+| Service | Nodes | Configuration | GCP | AWS | Azure |
+|--------------------------------------------|-------------|-------------------------|-----------------|--------------|----------|
+| External load balancing node | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Redis** | 3 | 2 vCPU, 7.5 GB memory | `n1-standard-2` | `m5.large` | `D2s v3` |
+| Consul* + Sentinel** | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| PostgreSQL* | 3 | 4 vCPU, 15 GB memory | `n1-standard-4` | `m5.xlarge` | `D4s v3` |
+| PgBouncer* | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Internal load balancing node | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Gitaly | 3 | 8 vCPU, 30 GB memory | `n1-standard-8` | `m5.2xlarge` | `D8s v3` |
+| Praefect | 3 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Praefect PostgreSQL* | 1+ | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Sidekiq | 4 | 2 vCPU, 7.5 GB memory | `n1-standard-2` | `m5.large` | `D2s v3` |
+| GitLab Rails | 3 | 16 vCPU, 14.4 GB memory | `n1-highcpu-16` | `c5.4xlarge` | `F16s v2`|
+| Monitoring node | 1 | 2 vCPU, 1.8 GB memory | `n1-highcpu-2` | `c5.large` | `F2s v2` |
+| Object storage | n/a | n/a | n/a | n/a | n/a |
+| NFS server (optional, not recommended) | 1 | 4 vCPU, 3.6 GB memory | `n1-highcpu-4` | `c5.xlarge` | `F4s v2` |
+
+NOTE:
+Components marked with * can be optionally run on reputable
+third party external PaaS PostgreSQL solutions. Google Cloud SQL and AWS RDS are known to work.
+Components marked with ** can be optionally run on reputable
+third party external PaaS Redis solutions. Google Memorystore and AWS Elasticache are known to work.
```plantuml
@startuml 5k
@@ -127,7 +133,7 @@ The Google Cloud Platform (GCP) architectures were built and tested using the
CPU platform. On different hardware you may find that adjustments, either lower
or higher, are required for your CPU or node counts. For more information, see
our [Sysbench](https://github.com/akopytov/sysbench)-based
-[CPU benchmark](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
+[CPU benchmarks](https://gitlab.com/gitlab-org/quality/performance/-/wikis/Reference-Architectures/GCP-CPU-Benchmarks).
Due to better performance and availability, for data objects (such as LFS,
uploads, or artifacts), using an [object storage service](#configure-the-object-storage)
@@ -1585,6 +1591,12 @@ To configure the Sidekiq nodes, one each one:
#######################################
sidekiq['listen_address'] = "0.0.0.0"
+ # Set number of Sidekiq queue processes to the same number as available CPUs
+ sidekiq['queue_groups'] = ['*'] * 4
+
+ # Set number of Sidekiq threads per queue process to the recommend number of 10
+ sidekiq['max_concurrency'] = 10
+
#######################################
### Monitoring configuration ###
#######################################
@@ -1639,7 +1651,9 @@ To configure the Sidekiq nodes, one each one:
```
NOTE:
-You can also run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
+If you find that the environment's Sidekiq job processing is slow with long queues,
+more nodes can be added as required. You can also tune your Sidekiq nodes to
+run [multiple Sidekiq processes](../operations/extra_sidekiq_processes.md).
<div align="right">
<a type="button" class="btn btn-default" href="#setup-components">
diff --git a/doc/administration/reference_architectures/troubleshooting.md b/doc/administration/reference_architectures/troubleshooting.md
index 60a6ee0132f..d5f57965a80 100644
--- a/doc/administration/reference_architectures/troubleshooting.md
+++ b/doc/administration/reference_architectures/troubleshooting.md
@@ -4,7 +4,7 @@ group: Distribution
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# Troubleshooting a reference architecture setup
+# Troubleshooting a reference architecture setup **(FREE SELF)**
This page serves as the troubleshooting documentation if you followed one of
the [reference architectures](index.md#reference-architectures).
@@ -33,7 +33,7 @@ The dependency on disk storage also prevents Pages being deployed using the
### Incremental logging is required for CI to use object storage
If you configure GitLab to use object storage for CI logs and artifacts,
-[you must also enable incremental logging](../job_logs.md#new-incremental-logging-architecture).
+[you must also enable incremental logging](../job_logs.md#incremental-logging-architecture).
### Proxy Download
diff --git a/doc/administration/reply_by_email_postfix_setup.md b/doc/administration/reply_by_email_postfix_setup.md
index f310ef04295..f0b9daead69 100644
--- a/doc/administration/reply_by_email_postfix_setup.md
+++ b/doc/administration/reply_by_email_postfix_setup.md
@@ -9,7 +9,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w
This document will take you through the steps of setting up a basic Postfix mail
server with IMAP authentication on Ubuntu, to be used with [incoming email](incoming_email.md).
-The instructions make the assumption that you will be using the email address `incoming@gitlab.example.com`, that is, username `incoming` on host `gitlab.example.com`. Don't forget to change it to your actual host when executing the example code snippets.
+The instructions make the assumption that you are using the email address `incoming@gitlab.example.com`, that is, username `incoming` on host `gitlab.example.com`. Don't forget to change it to your actual host when executing the example code snippets.
## Configure your server firewall
@@ -127,7 +127,7 @@ The instructions make the assumption that you will be using the email address `i
## Configure Postfix to use Maildir-style mailboxes
-Courier, which we will install later to add IMAP authentication, requires mailboxes to have the Maildir format, rather than mbox.
+Courier, which we install later to add IMAP authentication, requires mailboxes to have the Maildir format, rather than mbox.
1. Configure Postfix to use Maildir-style mailboxes:
@@ -191,7 +191,7 @@ Courier, which we will install later to add IMAP authentication, requires mailbo
imapd start
```
-1. The `courier-authdaemon` isn't started after installation. Without it, IMAP authentication will fail:
+1. The `courier-authdaemon` isn't started after installation. Without it, IMAP authentication fails:
```shell
sudo service courier-authdaemon start
@@ -213,7 +213,7 @@ Courier, which we will install later to add IMAP authentication, requires mailbo
1. Let Postfix know about the IPs that it should consider part of the LAN:
- We'll assume `192.168.1.0/24` is your local LAN. You can safely skip this step if you don't have other machines in the same local network.
+ Let's assume `192.168.1.0/24` is your local LAN. You can safely skip this step if you don't have other machines in the same local network.
```shell
sudo postconf -e "mynetworks = 127.0.0.0/8, 192.168.1.0/24"
diff --git a/doc/administration/repository_storage_types.md b/doc/administration/repository_storage_types.md
index 29e31fcb6ef..21bb11226ce 100644
--- a/doc/administration/repository_storage_types.md
+++ b/doc/administration/repository_storage_types.md
@@ -131,7 +131,7 @@ forks use the object pool for shared objects. For more information, see
[How Git object deduplication works in GitLab](../development/git_object_deduplication.md).
Objects are moved from the source project to the object pool when housekeeping is run on the source
-project. Object pool repositories are stored similarly to regular repositories:
+project. Object pool repositories are stored similarly to regular repositories in a directory called `@pools` instead of `@hashed`
```ruby
# object pool paths
@@ -139,8 +139,8 @@ project. Object pool repositories are stored similarly to regular repositories:
```
WARNING:
-Do not run `git prune` or `git gc` in object pool repositories. This can cause data loss in the
-regular repositories that depend on the object pool.
+Do not run `git prune` or `git gc` in object pool repositories, which are stored in the `@pools` directory.
+This can cause data loss in the regular repositories that depend on the object pool.
### Object storage support
diff --git a/doc/administration/restart_gitlab.md b/doc/administration/restart_gitlab.md
index f4cc98ca145..5f7f08f4ecf 100644
--- a/doc/administration/restart_gitlab.md
+++ b/doc/administration/restart_gitlab.md
@@ -27,7 +27,7 @@ GitLab Rails application (Puma) as well as the other components, like:
### Omnibus GitLab restart
-There may be times in the documentation where you will be asked to _restart_
+There may be times in the documentation where you are asked to _restart_
GitLab. In that case, you need to run the following command:
```shell
@@ -73,7 +73,7 @@ As a last resort, you can try to
### Omnibus GitLab reconfigure
-There may be times in the documentation where you will be asked to _reconfigure_
+There may be times in the documentation where you are asked to _reconfigure_
GitLab. Remember that this method applies only for the Omnibus packages.
Reconfigure Omnibus GitLab with:
@@ -86,15 +86,15 @@ Reconfiguring GitLab should occur in the event that something in its
configuration (`/etc/gitlab/gitlab.rb`) has changed.
When you run this command, [Chef](https://www.chef.io/products/chef-infra/), the underlying configuration management
-application that powers Omnibus GitLab, will make sure that all things like directories,
+application that powers Omnibus GitLab, makes sure that all things like directories,
permissions, and services are in place and in the same shape that they were
initially shipped.
-It will also restart GitLab components where needed, if any of their
+It also restarts GitLab components where needed, if any of their
configuration files have changed.
If you manually edit any files in `/var/opt/gitlab` that are managed by Chef,
-running reconfigure will revert the changes AND restart the services that
+running reconfigure reverts the changes AND restarts the services that
depend on those files.
## Installations from source
diff --git a/doc/administration/server_hooks.md b/doc/administration/server_hooks.md
index 671da505e56..f67bf676a61 100644
--- a/doc/administration/server_hooks.md
+++ b/doc/administration/server_hooks.md
@@ -70,7 +70,12 @@ Assuming the hook code is properly implemented, the hook code is executed as app
To create a Git hook that applies to all of the repositories in your instance, set a global server
hook. The default global server hook directory is in the GitLab Shell directory. Any
-hook added there applies to all repositories.
+hook added there applies to all repositories, including:
+
+- [Project and group wiki](../user/project/wiki/index.md) repositories,
+ whose storage directory names are in the format `<id>.wiki.git`.
+- [Design management](../user/project/issues/design_management.md) repositories under a
+ project, whose storage directory names are in the format `<id>.design.git`.
The default directory:
diff --git a/doc/administration/terraform_state.md b/doc/administration/terraform_state.md
index 0c01279b04c..898d98495d9 100644
--- a/doc/administration/terraform_state.md
+++ b/doc/administration/terraform_state.md
@@ -4,7 +4,7 @@ group: Configure
info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
---
-# Terraform state administration (alpha) **(FREE)**
+# Terraform state administration **(FREE)**
> [Introduced](https://gitlab.com/groups/gitlab-org/-/epics/2673) in GitLab 12.10.
diff --git a/doc/administration/troubleshooting/defcon.md b/doc/administration/troubleshooting/defcon.md
new file mode 100644
index 00000000000..09e11553d97
--- /dev/null
+++ b/doc/administration/troubleshooting/defcon.md
@@ -0,0 +1,25 @@
+---
+stage: Enablement
+group: Distribution
+info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#assignments
+type: reference
+---
+
+# Disaster Recovery
+
+This document describes a feature that allows to easily disable some important but computationally
+expensive parts of the application, in order to relieve stress on the database in an ongoing downtime.
+
+## `ci_queueing_disaster_recovery`
+
+This feature flag, if enabled temporarily disables fair scheduling on shared runners.
+This can help reduce system resource usage on the `jobs/request` endpoint
+by significantly reducing computations being performed.
+
+Side effects:
+
+- In case of a large backlog of jobs, the jobs will be processed in the order
+they were put in the system instead of balancing the jobs across many projects
+- Projects which are out of quota will be run. This affects
+only jobs that were created during the last hour, as prior jobs are canceled
+by a periodic background worker (`StuckCiJobsWorker`).
diff --git a/doc/administration/troubleshooting/elasticsearch.md b/doc/administration/troubleshooting/elasticsearch.md
index 11425d464b9..d04ce23188f 100644
--- a/doc/administration/troubleshooting/elasticsearch.md
+++ b/doc/administration/troubleshooting/elasticsearch.md
@@ -251,13 +251,13 @@ can be made. If the indices:
- Can be made, Escalate this to GitLab support.
If the issue is not with creating an empty index, the next step is to check for errors
-during the indexing of projects. If errors do occur, they will either stem from the indexing:
+during the indexing of projects. If errors do occur, they stem from either the indexing:
- On the GitLab side. You need to rectify those. If they are not
something you are familiar with, contact GitLab support for guidance.
- Within the Elasticsearch instance itself. See if the error is [documented and has a fix](../../integration/elasticsearch.md#troubleshooting). If not, speak with your Elasticsearch administrator.
-If the indexing process does not present errors, you will want to check the status of the indexed projects. You can do this via the following Rake tasks:
+If the indexing process does not present errors, check the status of the indexed projects. You can do this via the following Rake tasks:
- [`sudo gitlab-rake gitlab:elastic:index_projects_status`](../../integration/elasticsearch.md#gitlab-advanced-search-rake-tasks) (shows the overall status)
- [`sudo gitlab-rake gitlab:elastic:projects_not_indexed`](../../integration/elasticsearch.md#gitlab-advanced-search-rake-tasks) (shows specific projects that are not indexed)
@@ -290,11 +290,11 @@ If the issue is:
regarding the error(s) you are seeing. If you are unsure here, it never hurts to reach
out to GitLab support.
-Beyond that, you will want to review the error. If it is:
+Beyond that, review the error. If it is:
- Specifically from the indexer, this could be a bug/issue and should be escalated to
GitLab support.
-- An OS issue, you will want to reach out to your systems administrator.
+- An OS issue, you should reach out to your systems administrator.
- A `Faraday::TimeoutError (execution expired)` error **and** you're using a proxy,
[set a custom `gitlab_rails['env']` environment variable, called `no_proxy`](https://docs.gitlab.com/omnibus/settings/environment-variables.html)
with the IP address of your Elasticsearch host.
@@ -365,7 +365,7 @@ require contacting an Elasticsearch administrator or GitLab Support.
The best place to start while debugging issues with an Advanced Search
migration is the [`elasticsearch.log` file](../logs.md#elasticsearchlog).
-Migrations will log information while a migration is in progress and any
+Migrations log information while a migration is in progress and any
errors encountered. Apply fixes for any errors found in the log and retry
the migration.
diff --git a/doc/administration/troubleshooting/gitlab_rails_cheat_sheet.md b/doc/administration/troubleshooting/gitlab_rails_cheat_sheet.md
index 6b1cf2d1194..588be73e786 100644
--- a/doc/administration/troubleshooting/gitlab_rails_cheat_sheet.md
+++ b/doc/administration/troubleshooting/gitlab_rails_cheat_sheet.md
@@ -305,6 +305,42 @@ p.statistics.refresh!
pp p.statistics # compare with earlier values
```
+### Identify deploy keys associated with blocked and non-member users
+
+When the user who created a deploy key is blocked or removed from the project, the key
+can no longer be used to push to protected branches in a private project (see [issue #329742](https://gitlab.com/gitlab-org/gitlab/-/issues/329742)).
+The following script identifies unusable deploy keys:
+
+```ruby
+ghost_user_id = User.ghost.id
+
+DeployKeysProject.with_write_access.find_each do |deploy_key_mapping|
+ project = deploy_key_mapping.project
+ deploy_key = deploy_key_mapping.deploy_key
+ user = deploy_key.user
+
+ access_checker = Gitlab::DeployKeyAccess.new(deploy_key, container: project)
+
+ # can_push_for_ref? tests if deploy_key can push to default branch, which is likely to be protected
+ can_push = access_checker.can_do_action?(:push_code)
+ can_push_to_default = access_checker.can_push_for_ref?(project.repository.root_ref)
+
+ next if access_checker.allowed? && can_push && can_push_to_default
+
+ if user.nil? || user.id == ghost_user_id
+ username = 'none'
+ state = '-'
+ else
+ username = user.username
+ user_state = user.state
+ end
+
+ puts "Deploy key: #{deploy_key.id}, Project: #{project.full_path}, Can push?: " + (can_push ? 'YES' : 'NO') +
+ ", Can push to default branch #{project.repository.root_ref}?: " + (can_push_to_default ? 'YES' : 'NO') +
+ ", User: #{username}, User state: #{user_state}"
+end
+```
+
## Wikis
### Recreate
@@ -537,7 +573,7 @@ inactive_users.each do |user|
end
```
-### Find Max permissions for project/group
+### Find a user's max permissions for project/group
```ruby
user = User.find_by_username 'username'
diff --git a/doc/administration/troubleshooting/log_parsing.md b/doc/administration/troubleshooting/log_parsing.md
index 2900ce58940..a0f71960e14 100644
--- a/doc/administration/troubleshooting/log_parsing.md
+++ b/doc/administration/troubleshooting/log_parsing.md
@@ -201,3 +201,42 @@ grep "fatal: " /var/log/gitlab/gitaly/current | \
jq '."grpc.request.glProjectPath"' | \
sort | uniq
```
+
+### Parsing `gitlab-shell.log`
+
+For investigating Git calls via SSH, from [GitLab 12.10](https://gitlab.com/gitlab-org/gitlab-shell/-/merge_requests/367).
+
+Find the top 20 calls by project and user:
+
+```shell
+jq --raw-output --slurp '
+ map(
+ select(
+ .username != null and
+ .gl_project_path !=null
+ )
+ )
+ | group_by(.username+.gl_project_path)
+ | sort_by(-length)
+ | limit(20; .[])
+ | "count: \(length)\tuser: \(.[0].username)\tproject: \(.[0].gl_project_path)" ' \
+ /var/log/gitlab/gitlab-shell/gitlab-shell.log
+```
+
+Find the top 20 calls by project, user, and command:
+
+```shell
+jq --raw-output --slurp '
+ map(
+ select(
+ .command != null and
+ .username != null and
+ .gl_project_path !=null
+ )
+ )
+ | group_by(.username+.gl_project_path+.command)
+ | sort_by(-length)
+ | limit(20; .[])
+ | "count: \(length)\tcommand: \(.[0].command)\tuser: \(.[0].username)\tproject: \(.[0].gl_project_path)" ' \
+ /var/log/gitlab/gitlab-shell/gitlab-shell.log
+```
diff --git a/doc/administration/troubleshooting/sidekiq.md b/doc/administration/troubleshooting/sidekiq.md
index 8d19a8a163b..297a8355036 100644
--- a/doc/administration/troubleshooting/sidekiq.md
+++ b/doc/administration/troubleshooting/sidekiq.md
@@ -11,7 +11,7 @@ tasks. When things go wrong it can be difficult to troubleshoot. These
situations also tend to be high-pressure because a production system job queue
may be filling up. Users will notice when this happens because new branches
may not show up and merge requests may not be updated. The following are some
-troubleshooting steps that will help you diagnose the bottleneck.
+troubleshooting steps to help you diagnose the bottleneck.
GitLab administrators/users should consider working through these
debug steps with GitLab Support so the backtraces can be analyzed by our team.
@@ -42,7 +42,7 @@ Example log output:
When using [Sidekiq JSON logging](../logs.md#sidekiqlog),
arguments logs are limited to a maximum size of 10 kilobytes of text;
-any arguments after this limit will be discarded and replaced with a
+any arguments after this limit are discarded and replaced with a
single argument containing the string `"..."`.
You can set `SIDEKIQ_LOG_ARGUMENTS` [environment variable](https://docs.gitlab.com/omnibus/settings/environment-variables.html)
@@ -58,7 +58,7 @@ In GitLab 13.5 and earlier, set `SIDEKIQ_LOG_ARGUMENTS` to `1` to start logging
## Thread dump
-Send the Sidekiq process ID the `TTIN` signal and it will output thread
+Send the Sidekiq process ID the `TTIN` signal to output thread
backtraces in the log file.
```shell
@@ -66,7 +66,7 @@ kill -TTIN <sidekiq_pid>
```
Check in `/var/log/gitlab/sidekiq/current` or `$GITLAB_HOME/log/sidekiq.log` for
-the backtrace output. The backtraces will be lengthy and generally start with
+the backtrace output. The backtraces are lengthy and generally start with
several `WARN` level messages. Here's an example of a single thread's backtrace:
```plaintext
@@ -88,8 +88,8 @@ Move on to other troubleshooting methods if this happens.
## Process profiling with `perf`
Linux has a process profiling tool called `perf` that is helpful when a certain
-process is eating up a lot of CPU. If you see high CPU usage and Sidekiq won't
-respond to the `TTIN` signal, this is a good next step.
+process is eating up a lot of CPU. If you see high CPU usage and Sidekiq isn't
+responding to the `TTIN` signal, this is a good next step.
If `perf` is not installed on your system, install it with `apt-get` or `yum`:
@@ -134,8 +134,8 @@ corresponding Ruby code where this is happening.
`gdb` can be another effective tool for debugging Sidekiq. It gives you a little
more interactive way to look at each thread and see what's causing problems.
-Attaching to a process with `gdb` will suspends the normal operation
-of the process (Sidekiq will not process jobs while `gdb` is attached).
+Attaching to a process with `gdb` suspends the normal operation
+of the process (Sidekiq does not process jobs while `gdb` is attached).
Start by attaching to the Sidekiq PID:
@@ -285,7 +285,7 @@ end
### Remove Sidekiq jobs for given parameters (destructive)
The general method to kill jobs conditionally is the following command, which
-will remove jobs that are queued but not started. Running jobs will not be killed.
+removes jobs that are queued but not started. Running jobs can not be killed.
```ruby
queue = Sidekiq::Queue.new('<queue name>')
@@ -294,7 +294,7 @@ queue.each { |job| job.delete if <condition>}
Have a look at the section below for cancelling running jobs.
-In the method above, `<queue-name>` is the name of the queue that contains the job(s) you want to delete and `<condition>` will decide which jobs get deleted.
+In the method above, `<queue-name>` is the name of the queue that contains the job(s) you want to delete and `<condition>` decides which jobs get deleted.
Commonly, `<condition>` references the job arguments, which depend on the type of job in question. To find the arguments for a specific queue, you can have a look at the `perform` function of the related worker file, commonly found at `/app/workers/<queue-name>_worker.rb`.
diff --git a/doc/administration/troubleshooting/ssl.md b/doc/administration/troubleshooting/ssl.md
index d7bfd537eca..7c0b745f8c2 100644
--- a/doc/administration/troubleshooting/ssl.md
+++ b/doc/administration/troubleshooting/ssl.md
@@ -238,3 +238,17 @@ remote server that is serving only HTTP.
One scenario is that you're using [object storage](../object_storage.md), which
isn't served under HTTPS. GitLab is misconfigured and attempts a TLS handshake,
but the object storage will respond with plain HTTP.
+
+## `schannel: SEC_E_UNTRUSTED_ROOT`
+
+If you're on Windows and get the following error:
+
+```plaintext
+Fatal: unable to access 'https://gitlab.domain.tld/group/project.git': schannel: SEC_E_UNTRUSTED_ROOT (0x80090325) - The certificate chain was issued by an authority that is not trusted."
+```
+
+You may need to specify that Git should use OpenSSL:
+
+```shell
+git config --system http.sslbackend openssl
+```
diff --git a/doc/administration/troubleshooting/test_environments.md b/doc/administration/troubleshooting/test_environments.md
index 53e51bbfe80..16ef4f83978 100644
--- a/doc/administration/troubleshooting/test_environments.md
+++ b/doc/administration/troubleshooting/test_environments.md
@@ -92,7 +92,7 @@ gitlab_rails['omniauth_providers'] = [
#### GroupSAML for GitLab.com
-See [the GDK SAML documentation](https://gitlab.com/gitlab-org/gitlab-development-kit/blob/master/doc/howto/saml.md).
+See [the GDK SAML documentation](https://gitlab.com/gitlab-org/gitlab-development-kit/blob/main/doc/howto/saml.md).
### Elasticsearch
diff --git a/doc/administration/whats-new.md b/doc/administration/whats-new.md
index 4cbb0b854ae..a5e3a232890 100644
--- a/doc/administration/whats-new.md
+++ b/doc/administration/whats-new.md
@@ -19,11 +19,23 @@ feature list, the feature list is tailored to your subscription type:
- Features only available to self-managed installations are not shown on GitLab.com.
- Features only available on GitLab.com are not shown to self-managed installations.
-The **What's new** feature cannot be disabled, but
-[is planned](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/59011) for a future release.
-
## Self-managed installations
Due to our release post process, the content for **What's new** is not yet finalized
when a new version (`.0` release) is cut. The updated **What's new** is included
in the first patch release, such as `13.10.1`.
+
+## Configure What's new variant
+
+You can configure the What's new variant:
+
+1. Navigate to **Admin Area > Settings > Preferences**, then expand **What's new**.
+1. Choose one of the following options:
+
+ | Option | Description |
+ | ------ | ----------- |
+ | Enable What's new: All tiers | What's new presents new features from all tiers to help you keep track of all new features. |
+ | Enable What's new: Current tier only | What's new presents new features for your current subscription tier, while hiding new features not available to your subscription tier. |
+ | Disable What's new | What's new is disabled and can no longer be viewed. |
+
+1. Save your changes.