From be834a25982746ffd85252ff502df42bb88cb9d5 Mon Sep 17 00:00:00 2001 From: GitLab Bot Date: Mon, 5 Oct 2020 13:54:15 +0000 Subject: Add latest changes from gitlab-org/gitlab@13-5-stable-ee --- doc/administration/audit_events.md | 9 +- doc/administration/auth/README.md | 2 +- doc/administration/geo/disaster_recovery/index.md | 15 +- .../geo/disaster_recovery/planned_failover.md | 2 +- .../geo/disaster_recovery/promotion_runbook.md | 268 +------------------- .../runbooks/planned_failover_multi_node.md | 274 +++++++++++++++++++++ .../runbooks/planned_failover_single_node.md | 269 ++++++++++++++++++++ doc/administration/geo/index.md | 10 +- doc/administration/geo/replication/datatypes.md | 68 +++-- doc/administration/geo/replication/disable_geo.md | 2 +- doc/administration/geo/replication/faq.md | 4 + .../geo/replication/geo_validation_tests.md | 16 ++ .../geo/replication/multiple_servers.md | 2 +- .../geo/replication/troubleshooting.md | 13 +- .../geo/replication/updating_the_geo_nodes.md | 7 +- .../geo/replication/version_specific_updates.md | 4 +- doc/administration/geo/setup/database.md | 12 +- doc/administration/gitaly/index.md | 52 ++-- doc/administration/gitaly/praefect.md | 8 +- doc/administration/gitaly/reference.md | 4 +- doc/administration/housekeeping.md | 3 + doc/administration/img/export_audit_log_v13_4.png | Bin 127518 -> 46643 bytes doc/administration/incoming_email.md | 2 +- doc/administration/index.md | 8 +- doc/administration/instance_limits.md | 5 +- doc/administration/job_artifacts.md | 29 ++- doc/administration/job_logs.md | 9 + doc/administration/libravatar.md | 11 +- doc/administration/logs.md | 6 +- doc/administration/merge_request_diffs.md | 45 +++- .../gitlab_self_monitoring_project/index.md | 26 +- doc/administration/monitoring/performance/index.md | 2 +- .../monitoring/prometheus/gitlab_metrics.md | 15 +- doc/administration/monitoring/prometheus/index.md | 6 +- doc/administration/nfs.md | 19 ++ doc/administration/object_storage.md | 12 +- doc/administration/packages/container_registry.md | 163 +++++------- doc/administration/packages/dependency_proxy.md | 36 +-- doc/administration/packages/index.md | 50 ++-- doc/administration/pages/index.md | 10 +- doc/administration/postgresql/external.md | 6 + doc/administration/postgresql/index.md | 5 +- doc/administration/postgresql/pgbouncer.md | 3 + .../postgresql/replication_and_failover.md | 110 ++++++++- doc/administration/postgresql/standalone.md | 14 +- doc/administration/raketasks/check.md | 31 +++ doc/administration/raketasks/doctor.md | 5 +- doc/administration/raketasks/github_import.md | 5 +- doc/administration/read_only_gitlab.md | 125 ++++++++++ .../redis/replication_and_failover.md | 6 +- .../redis/replication_and_failover_external.md | 12 +- doc/administration/redis/troubleshooting.md | 6 +- .../reference_architectures/10k_users.md | 117 +++++---- .../reference_architectures/1k_users.md | 23 +- .../reference_architectures/25k_users.md | 117 +++++---- .../reference_architectures/2k_users.md | 61 ++++- .../reference_architectures/3k_users.md | 86 ++++--- .../reference_architectures/50k_users.md | 117 +++++---- .../reference_architectures/5k_users.md | 86 ++++--- .../reference_architectures/index.md | 2 +- doc/administration/reply_by_email_postfix_setup.md | 4 +- doc/administration/smime_signing_email.md | 4 +- doc/administration/snippets/index.md | 4 +- .../troubleshooting/elasticsearch.md | 6 +- .../troubleshooting/gitlab_rails_cheat_sheet.md | 4 +- .../troubleshooting/kubernetes_cheat_sheet.md | 3 +- doc/administration/troubleshooting/ssl.md | 2 +- doc/administration/uploads.md | 6 +- 68 files changed, 1642 insertions(+), 826 deletions(-) create mode 100644 doc/administration/geo/disaster_recovery/runbooks/planned_failover_multi_node.md create mode 100644 doc/administration/geo/disaster_recovery/runbooks/planned_failover_single_node.md create mode 100644 doc/administration/read_only_gitlab.md (limited to 'doc/administration') diff --git a/doc/administration/audit_events.md b/doc/administration/audit_events.md index 099346b2b0b..ac972e2e33e 100644 --- a/doc/administration/audit_events.md +++ b/doc/administration/audit_events.md @@ -9,7 +9,7 @@ info: To determine the technical writer assigned to the Stage/Group associated w GitLab offers a way to view the changes made within the GitLab server for owners and administrators on a [paid plan](https://about.gitlab.com/pricing/). GitLab system administrators can also take advantage of the logs located on the -filesystem. See [the logs system documentation](logs.md) for more details. +file system. See [the logs system documentation](logs.md) for more details. ## Overview @@ -108,7 +108,7 @@ Server-wide audit logging introduces the ability to observe user actions across the entire instance of your GitLab server, making it easy to understand who changed what and when for audit purposes. -To view the server-wide admin log, visit **Admin Area > Monitoring > Audit Log**. +To view the server-wide administrator log, visit **Admin Area > Monitoring > Audit Log**. In addition to the group and project events, the following user actions are also recorded: @@ -126,6 +126,7 @@ recorded: - User was added ([introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/251) in GitLab 12.8) - User was blocked via Admin Area ([introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/251) in GitLab 12.8) - User was blocked via API ([introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/25872) in GitLab 12.9) +- Failed second-factor authentication attempt ([introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/16826) in GitLab 13.5) It's possible to filter particular actions by choosing an audit data type from the filter dropdown box. You can further filter by specific group, project, or user @@ -151,7 +152,7 @@ on adding these events into GitLab: The current architecture of audit events is not prepared to receive a very high amount of records. It may make the user interface for your project or audit logs very busy, and the disk space consumed by the -`audit_events` PostgreSQL table will increase considerably. It's disabled by default +`audit_events` PostgreSQL table may increase considerably. It's disabled by default to prevent performance degradations on GitLab instances with very high Git write traffic. In an upcoming release, Audit Logs for Git push events will be enabled @@ -172,6 +173,7 @@ the steps bellow. ```ruby Feature.enable(:repository_push_audit_event) + ``` ## Export to CSV **(PREMIUM ONLY)** @@ -183,6 +185,7 @@ the steps bellow. CAUTION: **Warning:** This feature might not be available to you. Check the **version history** note above for details. +If available, you can enable it with a [feature flag](#enable-or-disable-audit-log-export-to-csv). Export to CSV allows customers to export the current filter view of your audit log as a CSV file, diff --git a/doc/administration/auth/README.md b/doc/administration/auth/README.md index 926a4abab7d..cf82454cfd2 100644 --- a/doc/administration/auth/README.md +++ b/doc/administration/auth/README.md @@ -18,7 +18,7 @@ providers: - [Azure](../../integration/azure.md) - [Bitbucket Cloud](../../integration/bitbucket.md) - [CAS](../../integration/cas.md) -- [Crowd](../../integration/crowd.md) +- [Crowd](crowd.md) - [Facebook](../../integration/facebook.md) - [GitHub](../../integration/github.md) - [GitLab.com](../../integration/gitlab.md) diff --git a/doc/administration/geo/disaster_recovery/index.md b/doc/administration/geo/disaster_recovery/index.md index 8862776ee1b..dc46c0756db 100644 --- a/doc/administration/geo/disaster_recovery/index.md +++ b/doc/administration/geo/disaster_recovery/index.md @@ -11,7 +11,7 @@ Geo replicates your database, your Git repositories, and few other assets. We will support and replicate more data in the future, that will enable you to failover with minimal effort, in a disaster situation. -See [Geo current limitations](../index.md#current-limitations) for more information. +See [Geo limitations](../index.md#limitations) for more information. CAUTION: **Warning:** Disaster recovery for multi-secondary configurations is in **Alpha**. @@ -84,8 +84,8 @@ must disable the **primary** node. single recommendation. You may need to: - Reconfigure the load balancers. - - Change DNS records (for example, point the primary DNS record to the **secondary** - node in order to stop usage of the **primary** node). + - Change DNS records (for example, point the primary DNS record to the + **secondary** node to stop usage of the **primary** node). - Stop the virtual servers. - Block traffic through a firewall. - Revoke object storage permissions from the **primary** node. @@ -129,6 +129,9 @@ Note the following when promoting a secondary: ``` 1. Promote the **secondary** node to the **primary** node. + +DANGER: **Danger:** +In GitLab 13.2 and later versions, promoting a secondary node to a primary while the secondary is paused fails. We are [investigating the issue](https://gitlab.com/gitlab-org/gitlab/-/issues/225173). Do not pause replication before promoting a secondary. If the node is paused, please resume before promoting. To promote the secondary node to primary along with preflight checks: @@ -159,6 +162,9 @@ conjunction with multiple servers, as it can only perform changes on a **secondary** with only a single machine. Instead, you must do this manually. +DANGER: **Danger:** +In GitLab 13.2 and later versions, promoting a secondary node to a primary while the secondary is paused fails. We are [investigating the issue](https://gitlab.com/gitlab-org/gitlab/-/issues/225173). Do not pause replication before promoting a secondary. If the node is paused, please resume before promoting. + 1. SSH in to the database node in the **secondary** and trigger PostgreSQL to promote to read-write: @@ -201,6 +207,9 @@ an external PostgreSQL database, as it can only perform changes on a **secondary node with GitLab and the database on the same machine. As a result, a manual process is required: +DANGER: **Danger:** +In GitLab 13.2 and later versions, promoting a secondary node to a primary while the secondary is paused fails. We are [investigating the issue](https://gitlab.com/gitlab-org/gitlab/-/issues/225173). Do not pause replication before promoting a secondary. If the node is paused, please resume before promoting. + 1. Promote the replica database associated with the **secondary** site. This will set the database to read-write: - Amazon RDS - [Promoting a Read Replica to Be a Standalone DB Instance](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_ReadRepl.html#USER_ReadRepl.Promote) diff --git a/doc/administration/geo/disaster_recovery/planned_failover.md b/doc/administration/geo/disaster_recovery/planned_failover.md index 9b9c386652c..1238c4d8e2a 100644 --- a/doc/administration/geo/disaster_recovery/planned_failover.md +++ b/doc/administration/geo/disaster_recovery/planned_failover.md @@ -27,7 +27,7 @@ have a high degree of confidence in being able to perform them accurately. ## Not all data is automatically replicated -If you are using any GitLab features that Geo [doesn't support](../index.md#current-limitations), +If you are using any GitLab features that Geo [doesn't support](../index.md#limitations), you must make separate provisions to ensure that the **secondary** node has an up-to-date copy of any data associated with that feature. This may extend the required scheduled maintenance period significantly. diff --git a/doc/administration/geo/disaster_recovery/promotion_runbook.md b/doc/administration/geo/disaster_recovery/promotion_runbook.md index fb2353513df..7eb6ef01aee 100644 --- a/doc/administration/geo/disaster_recovery/promotion_runbook.md +++ b/doc/administration/geo/disaster_recovery/promotion_runbook.md @@ -1,269 +1,5 @@ --- -stage: Enablement -group: Geo -info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#designated-technical-writers -type: howto +redirect_to: runbooks/planned_failover_single_node.md --- -CAUTION: **Caution:** -This runbook is in **alpha**. For complete, production-ready documentation, see the -[disaster recovery documentation](index.md). - -# Disaster Recovery (Geo) promotion runbooks **(PREMIUM ONLY)** - -## Geo planned failover runbook 1 - -| Component | Configuration | -| ----------- | --------------- | -| PostgreSQL | Omnibus-managed | -| Geo site | Single-node | -| Secondaries | One | - -This runbook will guide you through a planned failover of a single-node Geo site -with one secondary. The following general architecture is assumed: - -```mermaid -graph TD - subgraph main[Geo deployment] - subgraph Primary[Primary site] - Node_1[(GitLab node)] - end - subgraph Secondary1[Secondary site] - Node_2[(GitLab node)] - end - end -``` - -This guide will result in the following: - -1. An offline primary. -1. A promoted secondary that is now the new primary. - -What is not covered: - -1. Re-adding the old **primary** as a secondary. -1. Adding a new secondary. - -### Preparation - -NOTE: **Note:** -Before following any of those steps, make sure you have `root` access to the -**secondary** to promote it, since there isn't provided an automated way to -promote a Geo replica and perform a failover. - -On the **secondary** node, navigate to the **Admin Area > Geo** dashboard to -review its status. Replicated objects (shown in green) should be close to 100%, -and there should be no failures (shown in red). If a large proportion of -objects aren't yet replicated (shown in gray), consider giving the node more -time to complete. - -![Replication status](img/replication-status.png) - -If any objects are failing to replicate, this should be investigated before -scheduling the maintenance window. After a planned failover, anything that -failed to replicate will be **lost**. - -You can use the -[Geo status API](../../../api/geo_nodes.md#retrieve-project-sync-or-verification-failures-that-occurred-on-the-current-node) -to review failed objects and the reasons for failure. -A common cause of replication failures is the data being missing on the -**primary** node - you can resolve these failures by restoring the data from backup, -or removing references to the missing data. - -The maintenance window won't end until Geo replication and verification is -completely finished. To keep the window as short as possible, you should -ensure these processes are close to 100% as possible during active use. - -If the **secondary** node is still replicating data from the **primary** node, -follow these steps to avoid unnecessary data loss: - -1. Until a [read-only mode](https://gitlab.com/gitlab-org/gitlab/-/issues/14609) - is implemented, updates must be prevented from happening manually to the - **primary**. Note that your **secondary** node still needs read-only - access to the **primary** node during the maintenance window: - - 1. At the scheduled time, using your cloud provider or your node's firewall, block - all HTTP, HTTPS and SSH traffic to/from the **primary** node, **except** for your IP and - the **secondary** node's IP. - - For instance, you can run the following commands on the **primary** node: - - ```shell - sudo iptables -A INPUT -p tcp -s --destination-port 22 -j ACCEPT - sudo iptables -A INPUT -p tcp -s --destination-port 22 -j ACCEPT - sudo iptables -A INPUT --destination-port 22 -j REJECT - - sudo iptables -A INPUT -p tcp -s --destination-port 80 -j ACCEPT - sudo iptables -A INPUT -p tcp -s --destination-port 80 -j ACCEPT - sudo iptables -A INPUT --tcp-dport 80 -j REJECT - - sudo iptables -A INPUT -p tcp -s --destination-port 443 -j ACCEPT - sudo iptables -A INPUT -p tcp -s --destination-port 443 -j ACCEPT - sudo iptables -A INPUT --tcp-dport 443 -j REJECT - ``` - - From this point, users will be unable to view their data or make changes on the - **primary** node. They will also be unable to log in to the **secondary** node. - However, existing sessions will work for the remainder of the maintenance period, and - public data will be accessible throughout. - - 1. Verify the **primary** node is blocked to HTTP traffic by visiting it in browser via - another IP. The server should refuse connection. - - 1. Verify the **primary** node is blocked to Git over SSH traffic by attempting to pull an - existing Git repository with an SSH remote URL. The server should refuse - connection. - - 1. On the **primary** node, disable non-Geo periodic background jobs by navigating - to **Admin Area > Monitoring > Background Jobs > Cron**, clicking `Disable All`, - and then clicking `Enable` for the `geo_sidekiq_cron_config_worker` cron job. - This job will re-enable several other cron jobs that are essential for planned - failover to complete successfully. - -1. Finish replicating and verifying all data: - - CAUTION: **Caution:** - Not all data is automatically replicated. Read more about - [what is excluded](planned_failover.md#not-all-data-is-automatically-replicated). - - 1. If you are manually replicating any - [data not managed by Geo](../replication/datatypes.md#limitations-on-replicationverification), - trigger the final replication process now. - 1. On the **primary** node, navigate to **Admin Area > Monitoring > Background Jobs > Queues** - and wait for all queues except those with `geo` in the name to drop to 0. - These queues contain work that has been submitted by your users; failing over - before it is completed will cause the work to be lost. - 1. On the **primary** node, navigate to **Admin Area > Geo** and wait for the - following conditions to be true of the **secondary** node you are failing over to: - - All replication meters to each 100% replicated, 0% failures. - - All verification meters reach 100% verified, 0% failures. - - Database replication lag is 0ms. - - The Geo log cursor is up to date (0 events behind). - - 1. On the **secondary** node, navigate to **Admin Area > Monitoring > Background Jobs > Queues** - and wait for all the `geo` queues to drop to 0 queued and 0 running jobs. - 1. On the **secondary** node, use [these instructions](../../raketasks/check.md) - to verify the integrity of CI artifacts, LFS objects, and uploads in file - storage. - - At this point, your **secondary** node will contain an up-to-date copy of everything the - **primary** node has, meaning nothing will be lost when you fail over. - -1. In this final step, you need to permanently disable the **primary** node. - - CAUTION: **Caution:** - When the **primary** node goes offline, there may be data saved on the **primary** node - that has not been replicated to the **secondary** node. This data should be treated - as lost if you proceed. - - TIP: **Tip:** - If you plan to [update the **primary** domain DNS record](index.md#step-4-optional-updating-the-primary-domain-dns-record), - you may wish to lower the TTL now to speed up propagation. - - When performing a failover, we want to avoid a split-brain situation where - writes can occur in two different GitLab instances. So to prepare for the - failover, you must disable the **primary** node: - - - If you have SSH access to the **primary** node, stop and disable GitLab: - - ```shell - sudo gitlab-ctl stop - ``` - - Prevent GitLab from starting up again if the server unexpectedly reboots: - - ```shell - sudo systemctl disable gitlab-runsvdir - ``` - - NOTE: **Note:** - (**CentOS only**) In CentOS 6 or older, there is no easy way to prevent GitLab from being - started if the machine reboots isn't available (see [Omnibus GitLab issue #3058](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/3058)). - It may be safest to uninstall the GitLab package completely with `sudo yum remove gitlab-ee`. - - NOTE: **Note:** - (**Ubuntu 14.04 LTS**) If you are using an older version of Ubuntu - or any other distribution based on the Upstart init system, you can prevent GitLab - from starting if the machine reboots as `root` with - `initctl stop gitlab-runsvvdir && echo 'manual' > /etc/init/gitlab-runsvdir.override && initctl reload-configuration`. - - - If you do not have SSH access to the **primary** node, take the machine offline and - prevent it from rebooting. Since there are many ways you may prefer to accomplish - this, we will avoid a single recommendation. You may need to: - - - Reconfigure the load balancers. - - Change DNS records (for example, point the **primary** DNS record to the **secondary** - node in order to stop usage of the **primary** node). - - Stop the virtual servers. - - Block traffic through a firewall. - - Revoke object storage permissions from the **primary** node. - - Physically disconnect a machine. - -### Promoting the **secondary** node - -Note the following when promoting a secondary: - -- A new **secondary** should not be added at this time. If you want to add a new - **secondary**, do this after you have completed the entire process of promoting - the **secondary** to the **primary**. -- If you encounter an `ActiveRecord::RecordInvalid: Validation failed: Name has already been taken` - error during this process, read - [the troubleshooting advice](../replication/troubleshooting.md#fixing-errors-during-a-failover-or-when-promoting-a-secondary-to-a-primary-node). - -To promote the secondary node: - -1. SSH in to your **secondary** node and login as root: - - ```shell - sudo -i - ``` - -1. Edit `/etc/gitlab/gitlab.rb` to reflect its new status as **primary** by - removing any lines that enabled the `geo_secondary_role`: - - ```ruby - ## In pre-11.5 documentation, the role was enabled as follows. Remove this line. - geo_secondary_role['enable'] = true - - ## In 11.5+ documentation, the role was enabled as follows. Remove this line. - roles ['geo_secondary_role'] - ``` - -1. Run the following command to list out all preflight checks and automatically - check if replication and verification are complete before scheduling a planned - failover to ensure the process will go smoothly: - - ```shell - gitlab-ctl promotion-preflight-checks - ``` - -1. Promote the **secondary**: - - ```shell - gitlab-ctl promote-to-primary-node - ``` - - If you have already run the [preflight checks](planned_failover.md#preflight-checks) - or don't want to run them, you can skip them: - - ```shell - gitlab-ctl promote-to-primary-node --skip-preflight-check - ``` - - You can also promote the secondary node to primary **without any further confirmation**, even when preflight checks fail: - - ```shell - sudo gitlab-ctl promote-to-primary-node --force - ``` - -1. Verify you can connect to the newly promoted **primary** node using the URL used - previously for the **secondary** node. - - If successful, the **secondary** node has now been promoted to the **primary** node. - -### Next steps - -To regain geographic redundancy as quickly as possible, you should -[add a new **secondary** node](../setup/index.md). To -do that, you can re-add the old **primary** as a new secondary and bring it back -online. +This document was moved to [another location](runbooks/planned_failover_single_node.md). diff --git a/doc/administration/geo/disaster_recovery/runbooks/planned_failover_multi_node.md b/doc/administration/geo/disaster_recovery/runbooks/planned_failover_multi_node.md new file mode 100644 index 00000000000..1e3bac0b354 --- /dev/null +++ b/doc/administration/geo/disaster_recovery/runbooks/planned_failover_multi_node.md @@ -0,0 +1,274 @@ +--- +stage: Enablement +group: Geo +info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#designated-technical-writers +type: howto +--- + +CAUTION: **Caution:** +This runbook is in **alpha**. For complete, production-ready documentation, see the +[disaster recovery documentation](../index.md). + +# Disaster Recovery (Geo) promotion runbooks **(PREMIUM ONLY)** + +## Geo planned failover for a multi-node configuration + +| Component | Configuration | +|-------------|-----------------| +| PostgreSQL | Omnibus-managed | +| Geo site | Multi-node | +| Secondaries | One | + +This runbook will guide you through a planned failover of a multi-node Geo site +with one secondary. The following [2000 user reference architecture](../../../../administration/reference_architectures/2k_users.md) is assumed: + +```mermaid +graph TD + subgraph main[Geo deployment] + subgraph Primary[Primary site, multi-node] + Node_1[Rails node 1] + Node_2[Rails node 2] + Node_3[PostgreSQL node] + Node_4[Gitaly node] + Node_5[Redis node] + Node_6[Monitoring node] + end + subgraph Secondary[Secondary site, multi-node] + Node_7[Rails node 1] + Node_8[Rails node 2] + Node_9[PostgreSQL node] + Node_10[Gitaly node] + Node_11[Redis node] + Node_12[Monitoring node] + end + end +``` + +The load balancer node and optional NFS server are omitted for clarity. + +This guide will result in the following: + +1. An offline primary. +1. A promoted secondary that is now the new primary. + +What is not covered: + +1. Re-adding the old **primary** as a secondary. +1. Adding a new secondary. + +### Preparation + +NOTE: **Note:** +Before following any of those steps, make sure you have `root` access to the +**secondary** to promote it, since there isn't provided an automated way to +promote a Geo replica and perform a failover. + +On the **secondary** node, navigate to the **Admin Area > Geo** dashboard to +review its status. Replicated objects (shown in green) should be close to 100%, +and there should be no failures (shown in red). If a large proportion of +objects aren't yet replicated (shown in gray), consider giving the node more +time to complete. + +![Replication status](../img/replication-status.png) + +If any objects are failing to replicate, this should be investigated before +scheduling the maintenance window. After a planned failover, anything that +failed to replicate will be **lost**. + +You can use the +[Geo status API](../../../../api/geo_nodes.md#retrieve-project-sync-or-verification-failures-that-occurred-on-the-current-node) +to review failed objects and the reasons for failure. +A common cause of replication failures is the data being missing on the +**primary** node - you can resolve these failures by restoring the data from backup, +or removing references to the missing data. + +The maintenance window won't end until Geo replication and verification is +completely finished. To keep the window as short as possible, you should +ensure these processes are close to 100% as possible during active use. + +If the **secondary** node is still replicating data from the **primary** node, +follow these steps to avoid unnecessary data loss: + +1. Until a [read-only mode](https://gitlab.com/gitlab-org/gitlab/-/issues/14609) + is implemented, updates must be prevented from happening manually to the + **primary**. Note that your **secondary** node still needs read-only + access to the **primary** node during the maintenance window: + + 1. At the scheduled time, using your cloud provider or your node's firewall, block + all HTTP, HTTPS and SSH traffic to/from the **primary** node, **except** for your IP and + the **secondary** node's IP. + + For instance, you can run the following commands on the **primary** node: + + ```shell + sudo iptables -A INPUT -p tcp -s --destination-port 22 -j ACCEPT + sudo iptables -A INPUT -p tcp -s --destination-port 22 -j ACCEPT + sudo iptables -A INPUT --destination-port 22 -j REJECT + + sudo iptables -A INPUT -p tcp -s --destination-port 80 -j ACCEPT + sudo iptables -A INPUT -p tcp -s --destination-port 80 -j ACCEPT + sudo iptables -A INPUT --tcp-dport 80 -j REJECT + + sudo iptables -A INPUT -p tcp -s --destination-port 443 -j ACCEPT + sudo iptables -A INPUT -p tcp -s --destination-port 443 -j ACCEPT + sudo iptables -A INPUT --tcp-dport 443 -j REJECT + ``` + + From this point, users will be unable to view their data or make changes on the + **primary** node. They will also be unable to log in to the **secondary** node. + However, existing sessions will work for the remainder of the maintenance period, and + public data will be accessible throughout. + + 1. Verify the **primary** node is blocked to HTTP traffic by visiting it in browser via + another IP. The server should refuse connection. + + 1. Verify the **primary** node is blocked to Git over SSH traffic by attempting to pull an + existing Git repository with an SSH remote URL. The server should refuse + connection. + + 1. On the **primary** node, disable non-Geo periodic background jobs by navigating + to **Admin Area > Monitoring > Background Jobs > Cron**, clicking `Disable All`, + and then clicking `Enable` for the `geo_sidekiq_cron_config_worker` cron job. + This job will re-enable several other cron jobs that are essential for planned + failover to complete successfully. + +1. Finish replicating and verifying all data: + + CAUTION: **Caution:** + Not all data is automatically replicated. Read more about + [what is excluded](../planned_failover.md#not-all-data-is-automatically-replicated). + + 1. If you are manually replicating any + [data not managed by Geo](../../replication/datatypes.md#limitations-on-replicationverification), + trigger the final replication process now. + 1. On the **primary** node, navigate to **Admin Area > Monitoring > Background Jobs > Queues** + and wait for all queues except those with `geo` in the name to drop to 0. + These queues contain work that has been submitted by your users; failing over + before it is completed will cause the work to be lost. + 1. On the **primary** node, navigate to **Admin Area > Geo** and wait for the + following conditions to be true of the **secondary** node you are failing over to: + - All replication meters to each 100% replicated, 0% failures. + - All verification meters reach 100% verified, 0% failures. + - Database replication lag is 0ms. + - The Geo log cursor is up to date (0 events behind). + + 1. On the **secondary** node, navigate to **Admin Area > Monitoring > Background Jobs > Queues** + and wait for all the `geo` queues to drop to 0 queued and 0 running jobs. + 1. On the **secondary** node, use [these instructions](../../../raketasks/check.md) + to verify the integrity of CI artifacts, LFS objects, and uploads in file + storage. + + At this point, your **secondary** node will contain an up-to-date copy of everything the + **primary** node has, meaning nothing will be lost when you fail over. + +1. In this final step, you need to permanently disable the **primary** node. + + CAUTION: **Caution:** + When the **primary** node goes offline, there may be data saved on the **primary** node + that has not been replicated to the **secondary** node. This data should be treated + as lost if you proceed. + + TIP: **Tip:** + If you plan to [update the **primary** domain DNS record](../index.md#step-4-optional-updating-the-primary-domain-dns-record), + you may wish to lower the TTL now to speed up propagation. + + When performing a failover, we want to avoid a split-brain situation where + writes can occur in two different GitLab instances. So to prepare for the + failover, you must disable the **primary** node: + + - If you have SSH access to the **primary** node, stop and disable GitLab: + + ```shell + sudo gitlab-ctl stop + ``` + + Prevent GitLab from starting up again if the server unexpectedly reboots: + + ```shell + sudo systemctl disable gitlab-runsvdir + ``` + + NOTE: **Note:** + (**CentOS only**) In CentOS 6 or older, there is no easy way to prevent GitLab from being + started if the machine reboots isn't available (see [Omnibus GitLab issue #3058](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/3058)). + It may be safest to uninstall the GitLab package completely with `sudo yum remove gitlab-ee`. + + NOTE: **Note:** + (**Ubuntu 14.04 LTS**) If you are using an older version of Ubuntu + or any other distribution based on the Upstart init system, you can prevent GitLab + from starting if the machine reboots as `root` with + `initctl stop gitlab-runsvvdir && echo 'manual' > /etc/init/gitlab-runsvdir.override && initctl reload-configuration`. + + - If you do not have SSH access to the **primary** node, take the machine offline and + prevent it from rebooting. Since there are many ways you may prefer to accomplish + this, we will avoid a single recommendation. You may need to: + + - Reconfigure the load balancers. + - Change DNS records (for example, point the **primary** DNS record to the + **secondary** node to stop using the **primary** node). + - Stop the virtual servers. + - Block traffic through a firewall. + - Revoke object storage permissions from the **primary** node. + - Physically disconnect a machine. + +### Promoting the **secondary** node + +NOTE: **Note:** +A new **secondary** should not be added at this time. If you want to add a new +**secondary**, do this after you have completed the entire process of promoting +the **secondary** to the **primary**. + +CAUTION: **Caution:** +If you encounter an `ActiveRecord::RecordInvalid: Validation failed: Name has already been taken` error during this process, read +[the troubleshooting advice](../../replication/troubleshooting.md#fixing-errors-during-a-failover-or-when-promoting-a-secondary-to-a-primary-node). + +The `gitlab-ctl promote-to-primary-node` command cannot be used yet in +conjunction with multiple servers, as it can only +perform changes on a **secondary** with only a single machine. Instead, you must +do this manually. + +DANGER: **Danger:** +In GitLab 13.2 and later versions, promoting a secondary node to a primary while the secondary is paused fails. We are [investigating the issue](https://gitlab.com/gitlab-org/gitlab/-/issues/225173). Do not pause replication before promoting a secondary. If the node is paused, please resume before promoting. + +1. SSH in to the PostgreSQL node in the **secondary** and trigger PostgreSQL to + promote to read-write: + + ```shell + sudo gitlab-pg-ctl promote + ``` + + In GitLab 12.8 and earlier, see [Message: `sudo: gitlab-pg-ctl: command not found`](../../replication/troubleshooting.md#message-sudo-gitlab-pg-ctl-command-not-found). + +1. Edit `/etc/gitlab/gitlab.rb` on every machine in the **secondary** to + reflect its new status as **primary** by removing any lines that enabled the + `geo_secondary_role`: + + ```ruby + ## In pre-11.5 documentation, the role was enabled as follows. Remove this line. + geo_secondary_role['enable'] = true + + ## In 11.5+ documentation, the role was enabled as follows. Remove this line. + roles ['geo_secondary_role'] + ``` + + After making these changes [Reconfigure GitLab](../../../restart_gitlab.md#omnibus-gitlab-reconfigure) each + machine so the changes take effect. + +1. Promote the **secondary** to **primary**. SSH into a single Rails node + server and execute: + + ```shell + sudo gitlab-rake geo:set_secondary_as_primary + ``` + +1. Verify you can connect to the newly promoted **primary** using the URL used + previously for the **secondary**. + +1. Success! The **secondary** has now been promoted to **primary**. + +### Next steps + +To regain geographic redundancy as quickly as possible, you should +[add a new **secondary** node](../../setup/index.md). To +do that, you can re-add the old **primary** as a new secondary and bring it back +online. diff --git a/doc/administration/geo/disaster_recovery/runbooks/planned_failover_single_node.md b/doc/administration/geo/disaster_recovery/runbooks/planned_failover_single_node.md new file mode 100644 index 00000000000..5e847030077 --- /dev/null +++ b/doc/administration/geo/disaster_recovery/runbooks/planned_failover_single_node.md @@ -0,0 +1,269 @@ +--- +stage: Enablement +group: Geo +info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/engineering/ux/technical-writing/#designated-technical-writers +type: howto +--- + +CAUTION: **Caution:** +This runbook is in **alpha**. For complete, production-ready documentation, see the +[disaster recovery documentation](../index.md). + +# Disaster Recovery (Geo) promotion runbooks **(PREMIUM ONLY)** + +## Geo planned failover for a single-node configuration + +| Component | Configuration | +|-------------|-----------------| +| PostgreSQL | Omnibus-managed | +| Geo site | Single-node | +| Secondaries | One | + +This runbook will guide you through a planned failover of a single-node Geo site +with one secondary. The following general architecture is assumed: + +```mermaid +graph TD + subgraph main[Geo deployment] + subgraph Primary[Primary site] + Node_1[(GitLab node)] + end + subgraph Secondary1[Secondary site] + Node_2[(GitLab node)] + end + end +``` + +This guide will result in the following: + +1. An offline primary. +1. A promoted secondary that is now the new primary. + +What is not covered: + +1. Re-adding the old **primary** as a secondary. +1. Adding a new secondary. + +### Preparation + +NOTE: **Note:** +Before following any of those steps, make sure you have `root` access to the +**secondary** to promote it, since there isn't provided an automated way to +promote a Geo replica and perform a failover. + +On the **secondary** node, navigate to the **Admin Area > Geo** dashboard to +review its status. Replicated objects (shown in green) should be close to 100%, +and there should be no failures (shown in red). If a large proportion of +objects aren't yet replicated (shown in gray), consider giving the node more +time to complete. + +![Replication status](../img/replication-status.png) + +If any objects are failing to replicate, this should be investigated before +scheduling the maintenance window. After a planned failover, anything that +failed to replicate will be **lost**. + +You can use the +[Geo status API](../../../../api/geo_nodes.md#retrieve-project-sync-or-verification-failures-that-occurred-on-the-current-node) +to review failed objects and the reasons for failure. +A common cause of replication failures is the data being missing on the +**primary** node - you can resolve these failures by restoring the data from backup, +or removing references to the missing data. + +The maintenance window won't end until Geo replication and verification is +completely finished. To keep the window as short as possible, you should +ensure these processes are close to 100% as possible during active use. + +If the **secondary** node is still replicating data from the **primary** node, +follow these steps to avoid unnecessary data loss: + +1. Until a [read-only mode](https://gitlab.com/gitlab-org/gitlab/-/issues/14609) + is implemented, updates must be prevented from happening manually to the + **primary**. Note that your **secondary** node still needs read-only + access to the **primary** node during the maintenance window: + + 1. At the scheduled time, using your cloud provider or your node's firewall, block + all HTTP, HTTPS and SSH traffic to/from the **primary** node, **except** for your IP and + the **secondary** node's IP. + + For instance, you can run the following commands on the **primary** node: + + ```shell + sudo iptables -A INPUT -p tcp -s --destination-port 22 -j ACCEPT + sudo iptables -A INPUT -p tcp -s --destination-port 22 -j ACCEPT + sudo iptables -A INPUT --destination-port 22 -j REJECT + + sudo iptables -A INPUT -p tcp -s --destination-port 80 -j ACCEPT + sudo iptables -A INPUT -p tcp -s --destination-port 80 -j ACCEPT + sudo iptables -A INPUT --tcp-dport 80 -j REJECT + + sudo iptables -A INPUT -p tcp -s --destination-port 443 -j ACCEPT + sudo iptables -A INPUT -p tcp -s --destination-port 443 -j ACCEPT + sudo iptables -A INPUT --tcp-dport 443 -j REJECT + ``` + + From this point, users will be unable to view their data or make changes on the + **primary** node. They will also be unable to log in to the **secondary** node. + However, existing sessions will work for the remainder of the maintenance period, and + public data will be accessible throughout. + + 1. Verify the **primary** node is blocked to HTTP traffic by visiting it in browser via + another IP. The server should refuse connection. + + 1. Verify the **primary** node is blocked to Git over SSH traffic by attempting to pull an + existing Git repository with an SSH remote URL. The server should refuse + connection. + + 1. On the **primary** node, disable non-Geo periodic background jobs by navigating + to **Admin Area > Monitoring > Background Jobs > Cron**, clicking `Disable All`, + and then clicking `Enable` for the `geo_sidekiq_cron_config_worker` cron job. + This job will re-enable several other cron jobs that are essential for planned + failover to complete successfully. + +1. Finish replicating and verifying all data: + + CAUTION: **Caution:** + Not all data is automatically replicated. Read more about + [what is excluded](../planned_failover.md#not-all-data-is-automatically-replicated). + + 1. If you are manually replicating any + [data not managed by Geo](../../replication/datatypes.md#limitations-on-replicationverification), + trigger the final replication process now. + 1. On the **primary** node, navigate to **Admin Area > Monitoring > Background Jobs > Queues** + and wait for all queues except those with `geo` in the name to drop to 0. + These queues contain work that has been submitted by your users; failing over + before it is completed will cause the work to be lost. + 1. On the **primary** node, navigate to **Admin Area > Geo** and wait for the + following conditions to be true of the **secondary** node you are failing over to: + - All replication meters to each 100% replicated, 0% failures. + - All verification meters reach 100% verified, 0% failures. + - Database replication lag is 0ms. + - The Geo log cursor is up to date (0 events behind). + + 1. On the **secondary** node, navigate to **Admin Area > Monitoring > Background Jobs > Queues** + and wait for all the `geo` queues to drop to 0 queued and 0 running jobs. + 1. On the **secondary** node, use [these instructions](../../../raketasks/check.md) + to verify the integrity of CI artifacts, LFS objects, and uploads in file + storage. + + At this point, your **secondary** node will contain an up-to-date copy of everything the + **primary** node has, meaning nothing will be lost when you fail over. + +1. In this final step, you need to permanently disable the **primary** node. + + CAUTION: **Caution:** + When the **primary** node goes offline, there may be data saved on the **primary** node + that has not been replicated to the **secondary** node. This data should be treated + as lost if you proceed. + + TIP: **Tip:** + If you plan to [update the **primary** domain DNS record](../index.md#step-4-optional-updating-the-primary-domain-dns-record), + you may wish to lower the TTL now to speed up propagation. + + When performing a failover, we want to avoid a split-brain situation where + writes can occur in two different GitLab instances. So to prepare for the + failover, you must disable the **primary** node: + + - If you have SSH access to the **primary** node, stop and disable GitLab: + + ```shell + sudo gitlab-ctl stop + ``` + + Prevent GitLab from starting up again if the server unexpectedly reboots: + + ```shell + sudo systemctl disable gitlab-runsvdir + ``` + + NOTE: **Note:** + (**CentOS only**) In CentOS 6 or older, there is no easy way to prevent GitLab from being + started if the machine reboots isn't available (see [Omnibus GitLab issue #3058](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/3058)). + It may be safest to uninstall the GitLab package completely with `sudo yum remove gitlab-ee`. + + NOTE: **Note:** + (**Ubuntu 14.04 LTS**) If you are using an older version of Ubuntu + or any other distribution based on the Upstart init system, you can prevent GitLab + from starting if the machine reboots as `root` with + `initctl stop gitlab-runsvvdir && echo 'manual' > /etc/init/gitlab-runsvdir.override && initctl reload-configuration`. + + - If you do not have SSH access to the **primary** node, take the machine offline and + prevent it from rebooting. Since there are many ways you may prefer to accomplish + this, we will avoid a single recommendation. You may need to: + + - Reconfigure the load balancers. + - Change DNS records (for example, point the **primary** DNS record to the + **secondary** node to stop using the **primary** node). + - Stop the virtual servers. + - Block traffic through a firewall. + - Revoke object storage permissions from the **primary** node. + - Physically disconnect a machine. + +### Promoting the **secondary** node + +Note the following when promoting a secondary: + +- A new **secondary** should not be added at this time. If you want to add a new + **secondary**, do this after you have completed the entire process of promoting + the **secondary** to the **primary**. +- If you encounter an `ActiveRecord::RecordInvalid: Validation failed: Name has already been taken` + error during this process, read + [the troubleshooting advice](../../replication/troubleshooting.md#fixing-errors-during-a-failover-or-when-promoting-a-secondary-to-a-primary-node). + +To promote the secondary node: + +1. SSH in to your **secondary** node and login as root: + + ```shell + sudo -i + ``` + +1. Edit `/etc/gitlab/gitlab.rb` to reflect its new status as **primary** by + removing any lines that enabled the `geo_secondary_role`: + + ```ruby + ## In pre-11.5 documentation, the role was enabled as follows. Remove this line. + geo_secondary_role['enable'] = true + + ## In 11.5+ documentation, the role was enabled as follows. Remove this line. + roles ['geo_secondary_role'] + ``` + +1. Run the following command to list out all preflight checks and automatically + check if replication and verification are complete before scheduling a planned + failover to ensure the process will go smoothly: + + ```shell + gitlab-ctl promotion-preflight-checks + ``` + +1. Promote the **secondary**: + + ```shell + gitlab-ctl promote-to-primary-node + ``` + + If you have already run the [preflight checks](../planned_failover.md#preflight-checks) + or don't want to run them, you can skip them: + + ```shell + gitlab-ctl promote-to-primary-node --skip-preflight-check + ``` + + You can also promote the secondary node to primary **without any further confirmation**, even when preflight checks fail: + + ```shell + sudo gitlab-ctl promote-to-primary-node --force + ``` + +1. Verify you can connect to the newly promoted **primary** node using the URL used + previously for the **secondary** node. + + If successful, the **secondary** node has now been promoted to the **primary** node. + +### Next steps + +To regain geographic redundancy as quickly as possible, you should +[add a new **secondary** node](../../setup/index.md). To +do that, you can re-add the old **primary** as a new secondary and bring it back +online. diff --git a/doc/administration/geo/index.md b/doc/administration/geo/index.md index 6fdf213ac78..47d19c1e12c 100644 --- a/doc/administration/geo/index.md +++ b/doc/administration/geo/index.md @@ -39,7 +39,7 @@ Implementing Geo provides the following benefits: In addition, it: -- Can be used for cloning and fetching projects, in addition to reading any data available in the GitLab web interface (see [current limitations](#current-limitations)). +- Can be used for cloning and fetching projects, in addition to reading any data available in the GitLab web interface (see [limitations](#limitations)). - Overcomes slow connections between distant offices, saving time by improving speed for distributed teams. - Helps reducing the loading time for automated tasks, custom integrations, and internal workflows. - Can quickly fail over to a **secondary** node in a [disaster recovery](disaster_recovery/index.md) scenario. @@ -69,7 +69,7 @@ Keep in mind that: - Replicate repositories, LFS Objects, and Attachments (HTTPS + JWT). - Since GitLab Premium 10.0, the **primary** node no longer talks to **secondary** nodes to notify for changes (API). - Pushing directly to a **secondary** node (for both HTTP and SSH, including Git LFS) was [introduced](https://about.gitlab.com/releases/2018/09/22/gitlab-11-3-released/) in [GitLab Premium](https://about.gitlab.com/pricing/#self-managed) 11.3. -- There are [limitations](#current-limitations) in the current implementation. +- There are [limitations](#limitations) when using Geo. ### Architecture @@ -195,6 +195,9 @@ For information on how to update your Geo nodes to the latest GitLab version, se > [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/35913) in [GitLab Premium](https://about.gitlab.com/pricing/) 13.2. +DANGER: **Danger:** +In GitLab 13.2 and later versions, promoting a secondary node to a primary while the secondary is paused fails. We are [investigating the issue](https://gitlab.com/gitlab-org/gitlab/-/issues/225173). Do not pause replication before promoting a secondary. If the node is paused, please resume before promoting. + In some circumstances, like during [upgrades](replication/updating_the_geo_nodes.md) or a [planned failover](disaster_recovery/planned_failover.md), it is desirable to pause replication between the primary and secondary. Pausing and resuming replication is done via a command line tool from the secondary node. @@ -247,7 +250,7 @@ For more information on removing a Geo node, see [Removing **secondary** Geo nod To find out how to disable Geo, see [Disabling Geo](replication/disable_geo.md). -## Current limitations +## Limitations CAUTION: **Caution:** This list of limitations only reflects the latest version of GitLab. If you are using an older version, extra limitations may be in place. @@ -261,6 +264,7 @@ This list of limitations only reflects the latest version of GitLab. If you are - Object pools for forked project deduplication work only on the **primary** node, and are duplicated on the **secondary** node. - [External merge request diffs](../merge_request_diffs.md) will not be replicated if they are on-disk, and viewing merge requests will fail. However, external MR diffs in object storage **are** supported. The default configuration (in-database) does work. - GitLab Runners cannot register with a **secondary** node. Support for this is [planned for the future](https://gitlab.com/gitlab-org/gitlab/-/issues/3294). +- Geo **secondary** nodes can not be configured to [use high-availability configurations of PostgreSQL](https://gitlab.com/groups/gitlab-org/-/epics/2536). ### Limitations on replication/verification diff --git a/doc/administration/geo/replication/datatypes.md b/doc/administration/geo/replication/datatypes.md index 166a724f9c1..52a2b1521a9 100644 --- a/doc/administration/geo/replication/datatypes.md +++ b/doc/administration/geo/replication/datatypes.md @@ -47,6 +47,8 @@ verification methods: | Blobs | Container registry _(object storage)_ | Geo with API/Managed/Docker API (*2*) | _Not implemented_ | | Blobs | Package registry _(filesystem)_ | Geo with API | _Not implemented_ | | Blobs | Package registry _(object storage)_ | Geo with API/Managed (*2*) | _Not implemented_ | +| Blobs | Versioned Terraform State _(filesystem)_ | Geo with API | _Not implemented_ | +| Blobs | Versioned Terraform State _(object storage)_ | Geo with API/Managed (*2*) | _Not implemented_ | - (*1*): Redis replication can be used as part of HA with Redis sentinel. It's not used between Geo nodes. - (*2*): Object storage replication can be performed by Geo or by your object storage provider/appliance @@ -160,39 +162,33 @@ replicating data from those features will cause the data to be **lost**. If you wish to use those features on a **secondary** node, or to execute a failover successfully, you must replicate their data using some other means. -| Feature | Replicated (added in GitLab version) | Verified (added in GitLab version) | Notes | -|:------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------|:----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------| -| Application data in PostgreSQL | **Yes** (10.2) | **Yes** (10.2) | | -| Project repository | **Yes** (10.2) | **Yes** (10.7) | | -| Project wiki repository | **Yes** (10.2) | **Yes** (10.7) | | -| Project designs repository | **Yes** (12.7) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/32467) | | -| Uploads | **Yes** (10.2) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1817) | Verified only on transfer, or manually (*1*) | -| LFS objects | **Yes** (10.2) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/8922) | Verified only on transfer, or manually (*1*). Unavailable for new LFS objects in 11.11.x and 12.0.x (*2*). | -| CI job artifacts (other than traces) | **Yes** (10.4) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/8923) | Verified only manually (*1*) | -| Archived traces | **Yes** (10.4) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/8923) | Verified only on transfer, or manually (*1*) | -| Personal snippets | **Yes** (10.2) | **Yes** (10.2) | | -| [Versioned snippets](../../../user/snippets.md#versioned-snippets) | [No](https://gitlab.com/groups/gitlab-org/-/epics/2809) | [No](https://gitlab.com/groups/gitlab-org/-/epics/2810) | | -| Project snippets | **Yes** (10.2) | **Yes** (10.2) | | -| Object pools for forked project deduplication | **Yes** | No | | -| [Server-side Git hooks](../../server_hooks.md) | No | No | | -| [Elasticsearch integration](../../../integration/elasticsearch.md) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/1186) | No | | -| [GitLab Pages](../../pages/index.md) | [No](https://gitlab.com/groups/gitlab-org/-/epics/589) | No | | -| [Container Registry](../../packages/container_registry.md) | **Yes** (12.3) | No | | -| [NPM Registry](../../../user/packages/npm_registry/index.md) | **Yes** (13.2) | No | Behind feature flag `geo_package_file_replication`, enabled by default | -| [Maven Repository](../../../user/packages/maven_repository/index.md) | **Yes** (13.2) | No | Behind feature flag `geo_package_file_replication`, enabled by default | -| [Conan Repository](../../../user/packages/conan_repository/index.md) | **Yes** (13.2) | No | Behind feature flag `geo_package_file_replication`, enabled by default | -| [NuGet Repository](../../../user/packages/nuget_repository/index.md) | **Yes** (13.2) | No | Behind feature flag `geo_package_file_replication`, enabled by default | -| [PyPi Repository](../../../user/packages/pypi_repository/index.md) | **Yes** (13.2) | No | Behind feature flag `geo_package_file_replication`, enabled by default | -| [Composer Repository](../../../user/packages/composer_repository/index.md) | **Yes** (13.2) | No | Behind feature flag `geo_package_file_replication`, enabled by default | -| [External merge request diffs](../../merge_request_diffs.md) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/33817) | No | | -| [Terraform State](../../terraform_state.md) | [No](https://gitlab.com/groups/gitlab-org/-/epics/3112)(*3*) | No | | -| [Vulnerability Export](../../../user/application_security/security_dashboard/#export-vulnerabilities) | [No](https://gitlab.com/groups/gitlab-org/-/epics/3111)(*3*) | No | | -| Content in object storage | **Yes** (12.4) | No | | - -- (*1*): The integrity can be verified manually using - [Integrity Check Rake Task](../../raketasks/check.md) on both nodes and comparing - the output between them. -- (*2*): GitLab versions 11.11.x and 12.0.x are affected by [a bug that prevents any new - LFS objects from replicating](https://gitlab.com/gitlab-org/gitlab/-/issues/32696). -- (*3*): If you are using Object Storage, the replication can be performed by the - Object Storage provider if supported. Please see [Geo with Object Storage](object_storage.md) +| Feature | Replicated (added in GitLab version) | Verified (added in GitLab version) | Object Storage replication (please see [Geo with Object Storage](object_storage.md)) | Notes | +|:---------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [Application data in PostgreSQL](../../postgresql/index.md) | **Yes** (10.2) | **Yes** (10.2) | No | | +| [Project repository](../../..//user/project/repository/) | **Yes** (10.2) | **Yes** (10.7) | No | | +| [Project wiki repository](../../../user/project/wiki/) | **Yes** (10.2) | **Yes** (10.7) | No | | +| [Uploads](../../uploads.md) | **Yes** (10.2) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1817) | No | Verified only on transfer or manually using [Integrity Check Rake Task](../../raketasks/check.md) on both nodes and comparing the output between them. | +| [LFS objects](../../lfs/index.md) | **Yes** (10.2) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/8922) | Via Object Storage provider if supported. Native Geo support (Beta). | Verified only on transfer or manually using [Integrity Check Rake Task](../../raketasks/check.md) on both nodes and comparing the output between them. GitLab versions 11.11.x and 12.0.x are affected by [a bug that prevents any new LFS objects from replicating](https://gitlab.com/gitlab-org/gitlab/-/issues/32696). | +| [Personal snippets](../../../user/snippets.md#personal-snippets) | **Yes** (10.2) | **Yes** (10.2) | No | | +| [Project snippets](../../../user/snippets.md#project-snippets) | **Yes** (10.2) | **Yes** (10.2) | No | | +| [CI job artifacts (other than Job Logs)](../../../ci/pipelines/job_artifacts.md) | **Yes** (10.4) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/8923) | Via Object Storage provider if supported. Native Geo support (Beta) . | Verified only manually using [Integrity Check Rake Task](../../raketasks/check.md) on both nodes and comparing the output between them | +| [Job logs](../../job_logs.md) | **Yes** (10.4) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/8923) | Via Object Storage provider if supported. Native Geo support (Beta). | Verified only on transfer or manually using [Integrity Check Rake Task](../../raketasks/check.md) on both nodes and comparing the output between them | +| [Object pools for forked project deduplication](../../../development/git_object_deduplication.md) | **Yes** | No | No | | +| [Container Registry](../../packages/container_registry.md) | **Yes** (12.3) | No | No | | +| [Content in object storage (beta)](object_storage.md) | **Yes** (12.4) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/13845) | No | | +| [Project designs repository](../../../user/project/issues/design_management.md) | **Yes** (12.7) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/32467) | Via Object Storage provider if supported. Native Geo support (Beta). | | +| [NPM Registry](../../../user/packages/npm_registry/index.md) | **Yes** (13.2) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1817) | Via Object Storage provider if supported. Native Geo support (Beta). | Behind feature flag `geo_package_file_replication`, enabled by default | +| [Maven Repository](../../../user/packages/maven_repository/index.md) | **Yes** (13.2) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1817) | Via Object Storage provider if supported. Native Geo support (Beta). | Behind feature flag `geo_package_file_replication`, enabled by default | +| [Conan Repository](../../../user/packages/conan_repository/index.md) | **Yes** (13.2) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1817) | Via Object Storage provider if supported. Native Geo support (Beta). | Behind feature flag `geo_package_file_replication`, enabled by default | +| [NuGet Repository](../../../user/packages/nuget_repository/index.md) | **Yes** (13.2) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1817) | Via Object Storage provider if supported. Native Geo support (Beta). | Behind feature flag `geo_package_file_replication`, enabled by default | +| [PyPi Repository](../../../user/packages/pypi_repository/index.md) | **Yes** (13.2) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1817) | Via Object Storage provider if supported. Native Geo support (Beta). | Behind feature flag `geo_package_file_replication`, enabled by default | +| [Composer Repository](../../../user/packages/composer_repository/index.md) | **Yes** (13.2) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1817) | Via Object Storage provider if supported. Native Geo support (Beta). | Behind feature flag `geo_package_file_replication`, enabled by default | +| [Versioned Terraform State](../../terraform_state.md) | **Yes** (13.5) | No | Via Object Storage provider if supported. Native Geo support (Beta). | Behind feature flag `geo_terraform_state_version_replication`, enabled by default | +| [External merge request diffs](../../merge_request_diffs.md) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/33817) | No | Via Object Storage provider if supported. Native Geo support (Beta). | | +| [Versioned snippets](../../../user/snippets.md#versioned-snippets) | [No](https://gitlab.com/groups/gitlab-org/-/epics/2809) | [No](https://gitlab.com/groups/gitlab-org/-/epics/2810) | No | | +| [Server-side Git hooks](../../server_hooks.md) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1867) | No | No | | +| [Elasticsearch integration](../../../integration/elasticsearch.md) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/1186) | No | No | | +| [GitLab Pages](../../pages/index.md) | [No](https://gitlab.com/groups/gitlab-org/-/epics/589) | No | No | | +| [CI Pipeline Artifacts](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/models/ci/pipeline_artifact.rb) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/238464) | No | Via Object Storage provider if supported. Native Geo support (Beta). | Persists additional artifacts after a pipeline completes | +| [Dependency proxy images](../../../user/packages/dependency_proxy/index.md) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/259694) | No | No | Blocked on [Geo: Secondary Mimicry](https://gitlab.com/groups/gitlab-org/-/epics/1528). Note that replication of this cache is not needed for Disaster Recovery purposes because it can be recreated from external sources. | +| [Vulnerability Export](../../../user/application_security/security_dashboard/#export-vulnerabilities) | [Not planned](https://gitlab.com/groups/gitlab-org/-/epics/3111) | No | Via Object Storage provider if supported. Native Geo support (Beta). | Not planned because they are ephemeral and sensitive. They can be regenerated on demand. | diff --git a/doc/administration/geo/replication/disable_geo.md b/doc/administration/geo/replication/disable_geo.md index aed8e5fc3bc..14a11d9c1e3 100644 --- a/doc/administration/geo/replication/disable_geo.md +++ b/doc/administration/geo/replication/disable_geo.md @@ -29,7 +29,7 @@ anymore on these nodes. You can follow our docs to [remove your secondary Geo no If the current node that you want to keep using is a secondary node, you need to first promote it to primary. You can use our steps on [how to promote a secondary node](../disaster_recovery/#step-3-promoting-a-secondary-node) -in order to do that. +to do that. ## Remove the primary node from the UI diff --git a/doc/administration/geo/replication/faq.md b/doc/administration/geo/replication/faq.md index 3892d73b465..f7f391b360e 100644 --- a/doc/administration/geo/replication/faq.md +++ b/doc/administration/geo/replication/faq.md @@ -67,3 +67,7 @@ That's totally fine. We use HTTP(s) to fetch repository changes from the **prima ## Is this possible to set up a Docker Registry for a **secondary** node that mirrors the one on the **primary** node? Yes. See [Docker Registry for a **secondary** node](docker_registry.md). + +## Can I login to a secondary node? + +Yes, but secondary nodes receive all authentication data (like user accounts and logins) from the primary instance. This means you will be re-directed to the primary for authentication and routed back afterwards. diff --git a/doc/administration/geo/replication/geo_validation_tests.md b/doc/administration/geo/replication/geo_validation_tests.md index 8247b8c6336..efd070635cb 100644 --- a/doc/administration/geo/replication/geo_validation_tests.md +++ b/doc/administration/geo/replication/geo_validation_tests.md @@ -114,6 +114,22 @@ The following are GitLab upgrade validation tests we performed. The following are PostgreSQL upgrade validation tests we performed. +### September 2020 + +[Verify PostgreSQL 12 upgrade for Geo installations](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/5454): + +- Description: With PostgreSQL 12 available as an opt-in version in GitLab 13.3, we tested upgrading + existing Geo installations from PostgreSQL 11 to 12. We also re-tested fresh installations of GitLab + with Geo after fixes were made to support PostgreSQL 12. These tests were done using a + [nightly build](https://packages.gitlab.com/gitlab/nightly-builds/packages/ubuntu/bionic/gitlab-ee_13.3.6+rnightly.169516.d5209202-0_amd64.deb) + of GitLab 13.4. +- Outcome: Tests were successful for Geo deployments with a single database node on the primary and secondary. + We encountered known issues with repmgr and Patroni managed PostgreSQL clusters on the Geo primary. Using + PostgreSQL 12 with a database cluster on the primary is not recommended until the issues are resolved. +- Known issues for PostgreSQL clusters: + - [Ensure Patroni detects PostgreSQL update](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/5423) + - [Allow configuring permanent replication slots in patroni](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/5628) + ### August 2020 [Verify Geo installation with PostgreSQL 12](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/5453): diff --git a/doc/administration/geo/replication/multiple_servers.md b/doc/administration/geo/replication/multiple_servers.md index cba41c375a3..9828c52ee7d 100644 --- a/doc/administration/geo/replication/multiple_servers.md +++ b/doc/administration/geo/replication/multiple_servers.md @@ -133,7 +133,7 @@ Configure the following services, again using the non-Geo multi-node documentation: - [Configuring Redis for GitLab](../../redis/replication_and_failover.md#example-configuration-for-the-gitlab-application) for multiple nodes. -- [Gitaly](../../high_availability/gitaly.md), which will store data that is +- [Gitaly](../../gitaly/index.md), which will store data that is synchronized from the **primary** node. NOTE: **Note:** diff --git a/doc/administration/geo/replication/troubleshooting.md b/doc/administration/geo/replication/troubleshooting.md index f6d6f39fb19..0b6ff867f11 100644 --- a/doc/administration/geo/replication/troubleshooting.md +++ b/doc/administration/geo/replication/troubleshooting.md @@ -386,6 +386,15 @@ This happens when you have added IP addresses without a subnet mask in `postgres To fix this, add the subnet mask in `/etc/gitlab/gitlab.rb` under `postgresql['md5_auth_cidr_addresses']` to respect the CIDR format (i.e. `1.2.3.4/32`). +### Message: `Found data in the gitlabhq_production database!` when running `gitlab-ctl replicate-geo-database` + +This happens if data is detected in the `projects` table. When one or more projects are detected, the operation +is aborted to prevent accidental data loss. To bypass this message, pass the `--force` option to the command. + +In GitLab 13.4, a seed project is added when GitLab is first installed. This makes it necessary to pass `--force` even +on a new Geo secondary node. There is an [issue to account for seed projects](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/5618) +when checking the database. + ### Very large repositories never successfully synchronize on the **secondary** node GitLab places a timeout on all repository clones, including project imports @@ -483,8 +492,8 @@ to start again from scratch, there are a few steps that can help you: gitlab-ctl start geo-postgresql ``` - Reconfigure in order to recreate the folders and make sure permissions and ownership - are correctly + Reconfigure to recreate the folders and make sure permissions and ownership + are correct: ```shell gitlab-ctl reconfigure diff --git a/doc/administration/geo/replication/updating_the_geo_nodes.md b/doc/administration/geo/replication/updating_the_geo_nodes.md index b78aeb06ebf..1af2b8d0b88 100644 --- a/doc/administration/geo/replication/updating_the_geo_nodes.md +++ b/doc/administration/geo/replication/updating_the_geo_nodes.md @@ -21,14 +21,17 @@ Updating Geo nodes involves performing: NOTE: **Note:** These general update steps are not intended for [high-availability deployments](https://docs.gitlab.com/omnibus/update/README.html#multi-node--ha-deployment), and will cause downtime. If you want to avoid downtime, consider using [zero downtime updates](https://docs.gitlab.com/omnibus/update/README.html#zero-downtime-updates). +DANGER: **Danger:** +In GitLab 13.2 and later versions, promoting a secondary node to a primary while the secondary is paused fails. We are [investigating the issue](https://gitlab.com/gitlab-org/gitlab/-/issues/225173). Do not pause replication before promoting a secondary. If the node is paused, please resume before promoting. + To update the Geo nodes when a new GitLab version is released, update **primary** and all **secondary** nodes: 1. **Optional:** [Pause replication on each **secondary** node.](../index.md#pausing-and-resuming-replication) 1. Log into the **primary** node. -1. [Update GitLab on the **primary** node using Omnibus](https://docs.gitlab.com/omnibus/update/README.html). +1. [Update GitLab on the **primary** node using Omnibus's Geo-specific steps](https://docs.gitlab.com/omnibus/update/README.html#geo-deployment). 1. Log into each **secondary** node. -1. [Update GitLab on each **secondary** node using Omnibus](https://docs.gitlab.com/omnibus/update/README.html). +1. [Update GitLab on each **secondary** node using Omnibus's Geo-specific steps](https://docs.gitlab.com/omnibus/update/README.html#geo-deployment). 1. If you paused replication in step 1, [resume replication on each **secondary**](../index.md#pausing-and-resuming-replication) 1. [Test](#check-status-after-updating) **primary** and **secondary** nodes, and check version in each. diff --git a/doc/administration/geo/replication/version_specific_updates.md b/doc/administration/geo/replication/version_specific_updates.md index 1ae246e3e61..71facb808ab 100644 --- a/doc/administration/geo/replication/version_specific_updates.md +++ b/doc/administration/geo/replication/version_specific_updates.md @@ -447,8 +447,8 @@ Omnibus is the following: > **IMPORTANT**: With GitLab 9.0, the PostgreSQL version is updated to 9.6 and manual steps are -required in order to update the **secondary** nodes and keep the Streaming -Replication working. Downtime is required, so plan ahead. +required to update the **secondary** nodes and keep the Streaming Replication +working. Downtime is required, so plan ahead. The following steps apply only if you update from a 8.17 GitLab version to 9.0+. For previous versions, update to GitLab 8.17 first before attempting to diff --git a/doc/administration/geo/setup/database.md b/doc/administration/geo/setup/database.md index aefa8a0e399..09b9c71aeb7 100644 --- a/doc/administration/geo/setup/database.md +++ b/doc/administration/geo/setup/database.md @@ -17,9 +17,10 @@ NOTE: **Note:** The stages of the setup process must be completed in the documented order. Before attempting the steps in this stage, [complete all prior stages](../setup/index.md#using-omnibus-gitlab). -This document describes the minimal steps you have to take in order to -replicate your **primary** GitLab database to a **secondary** node's database. You may -have to change some values according to your database setup, how big it is, etc. +This document describes the minimal steps you have to take to replicate your +**primary** GitLab database to a **secondary** node's database. You may have to +change some values, based on attributes including your database's setup and +size. You are encouraged to first read through all the steps before executing them in your testing/production environment. @@ -433,6 +434,11 @@ data before running `pg_basebackup`. NOTE: **Note:** Replication slot names must only contain lowercase letters, numbers, and the underscore character. + NOTE: **Note:** + In GitLab 13.4, a seed project is added when GitLab is first installed. This makes it necessary to pass `--force` even + on a new Geo secondary node. There is an [issue to account for seed projects](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/5618) + when checking the database. + When prompted, enter the _plaintext_ password you set up for the `gitlab_replicator` user in the first step. diff --git a/doc/administration/gitaly/index.md b/doc/administration/gitaly/index.md index e6b137bac29..750e6aab687 100644 --- a/doc/administration/gitaly/index.md +++ b/doc/administration/gitaly/index.md @@ -90,7 +90,7 @@ When running Gitaly on its own server, note the following regarding GitLab versi leveraged for redundancy on block-level Git data, but only has to be mounted on the Gitaly servers. - From GitLab 11.8 to 12.2, it is possible to use Elasticsearch in a Gitaly setup that doesn't use - NFS. In order to use Elasticsearch in these versions, the + NFS. To use Elasticsearch in these versions, the [repository indexer](../../integration/elasticsearch.md#elasticsearch-repository-indexer) must be enabled in your GitLab configuration. - [Since GitLab 12.3](https://gitlab.com/gitlab-org/gitlab/-/issues/6481), the new indexer is @@ -382,10 +382,10 @@ if previously enabled manually. Gitaly makes the following assumptions: - Your `gitaly1.internal` Gitaly server can be reached at `gitaly1.internal:8075` from your Gitaly - clients, and that Gitaly server can read and write to `/mnt/gitlab/default` and + clients, and that Gitaly server can read, write, and set permissions on `/mnt/gitlab/default` and `/mnt/gitlab/storage1`. - Your `gitaly2.internal` Gitaly server can be reached at `gitaly2.internal:8075` from your Gitaly - clients, and that Gitaly server can read and write to `/mnt/gitlab/storage2`. + clients, and that Gitaly server can read, write, and set permissions on `/mnt/gitlab/storage2`. - Your `gitaly1.internal` and `gitaly2.internal` Gitaly servers can reach each other. You can't define Gitaly servers with some as a local Gitaly server @@ -424,17 +424,17 @@ server (with `gitaly_address`) unless you setup with special storages: default: gitaly_address: tcp://gitaly1.internal:8075 - path: /some/dummy/path + path: /some/local/path storage1: gitaly_address: tcp://gitaly1.internal:8075 - path: /some/dummy/path + path: /some/local/path storage2: gitaly_address: tcp://gitaly2.internal:8075 - path: /some/dummy/path + path: /some/local/path ``` NOTE: **Note:** - `/some/dummy/path` should be set to a local folder that exists, however no data will be stored in + `/some/local/path` should be set to a local folder that exists, however no data will be stored in this folder. This will no longer be necessary after [this issue](https://gitlab.com/gitlab-org/gitaly/-/issues/1282) is resolved. @@ -482,6 +482,14 @@ git_data_dirs({ 'storage1' => { 'gitaly_address' => 'tcp://gitlab.internal:8075', 'path' => '/mnt/gitlab/git-data' }, 'storage2' => { 'gitaly_address' => 'tcp://gitaly2.internal:8075' }, }) + +# Make Gitaly accept connections on all network interfaces +gitaly['listen_addr'] = "0.0.0.0:8075" + +# Or for TLS +gitaly['tls_listen_addr'] = "0.0.0.0:9999" +gitaly['certificate_path'] = "/etc/gitlab/ssl/cert.pem" +gitaly['key_path'] = "/etc/gitlab/ssl/key.pem" ``` `path` can only be included for storage shards on the local Gitaly server. @@ -532,20 +540,12 @@ corresponding to each Gitaly server must be installed on that Gitaly server. Additionally, the certificate (or its certificate authority) must be installed on all: -- Gitaly servers, including the Gitaly server using the certificate. +- Gitaly servers. - Gitaly clients that communicate with it. -The process is documented in the -[GitLab custom certificate configuration](https://docs.gitlab.com/omnibus/settings/ssl.html#install-custom-public-certificates) -and repeated below. - Note the following: -- The certificate must specify the address you use to access the Gitaly server. If you are: - - Addressing the Gitaly server by a hostname, you can either use the Common Name field for this, - or add it as a Subject Alternative Name. - - Addressing the Gitaly server by its IP address, you must add it as a Subject Alternative Name to - the certificate. [gRPC does not support using an IP address as Common Name in a certificate](https://github.com/grpc/grpc/issues/2691). +- The certificate must specify the address you use to access the Gitaly server. You must add the hostname or IP address as a Subject Alternative Name to the certificate. - You can configure Gitaly servers with both an unencrypted listening address `listen_addr` and an encrypted listening address `tls_listen_addr` at the same time. This allows you to gradually transition from unencrypted to encrypted traffic if necessary. @@ -631,17 +631,17 @@ To configure Gitaly with TLS: storages: default: gitaly_address: tls://gitaly1.internal:9999 - path: /some/dummy/path + path: /some/local/path storage1: gitaly_address: tls://gitaly1.internal:9999 - path: /some/dummy/path + path: /some/local/path storage2: gitaly_address: tls://gitaly2.internal:9999 - path: /some/dummy/path + path: /some/local/path ``` NOTE: **Note:** - `/some/dummy/path` should be set to a local folder that exists, however no data will be stored + `/some/local/path` should be set to a local folder that exists, however no data will be stored in this folder. This will no longer be necessary after [Gitaly issue #1282](https://gitlab.com/gitlab-org/gitaly/-/issues/1282) is resolved. @@ -1021,6 +1021,9 @@ The second facet presents the only real solution. For this, we developed ## Troubleshooting Gitaly +Check [Gitaly timeouts](../../user/admin_area/settings/gitaly_timeouts.md) when troubleshooting +Gitaly. + ### Checking versions when using standalone Gitaly servers When using standalone Gitaly servers, you must make sure they are the same version @@ -1242,13 +1245,6 @@ unset http_proxy unset https_proxy ``` -### Gitaly not listening on new address after reconfiguring - -When updating the `gitaly['listen_addr']` or `gitaly['prometheus_listen_addr']` -values, Gitaly may continue to listen on the old address after a `sudo gitlab-ctl reconfigure`. - -When this occurs, performing a `sudo gitlab-ctl restart` will resolve the issue. This will no longer be necessary after [this issue](https://gitlab.com/gitlab-org/gitaly/-/issues/2521) is resolved. - ### Permission denied errors appearing in Gitaly logs when accessing repositories from a standalone Gitaly server If this error occurs even though file permissions are correct, it's likely that diff --git a/doc/administration/gitaly/praefect.md b/doc/administration/gitaly/praefect.md index 876904a2093..45c077cded1 100644 --- a/doc/administration/gitaly/praefect.md +++ b/doc/administration/gitaly/praefect.md @@ -547,14 +547,14 @@ To configure Praefect with TLS: storages: default: gitaly_address: tls://praefect1.internal:3305 - path: /some/dummy/path + path: /some/local/path storage1: gitaly_address: tls://praefect2.internal:3305 - path: /some/dummy/path + path: /some/local/path ``` NOTE: **Note:** - `/some/dummy/path` should be set to a local folder that exists, however no + `/some/local/path` should be set to a local folder that exists, however no data will be stored in this folder. This will no longer be necessary after [this issue](https://gitlab.com/gitlab-org/gitaly/-/issues/1282) is resolved. @@ -993,6 +993,8 @@ information, see the [strong consistency epic](https://gitlab.com/groups/gitlab- To enable strong consistency: +- In GitLab 13.5, you must use Git v2.28.0 or higher on Gitaly nodes to enable + strong consistency. - In GitLab 13.4 and later, the strong consistency voting strategy has been improved. Instead of requiring all nodes to agree, only the primary and half of the secondaries need to agree. This strategy is enabled by default. To diff --git a/doc/administration/gitaly/reference.md b/doc/administration/gitaly/reference.md index 0c211c220d7..53001b946d8 100644 --- a/doc/administration/gitaly/reference.md +++ b/doc/administration/gitaly/reference.md @@ -138,8 +138,8 @@ Most of the time we use `git cat-file --batch` processes for that. For better performance, Gitaly can re-use these `git cat-file` processes across RPC calls. Previously used processes are kept around in a ["Git cat-file cache"](https://about.gitlab.com/blog/2019/07/08/git-performance-on-nfs/#enter-cat-file-cache). -In order to control how much system resources this uses, we have a maximum number -of cat-file processes that can go into the cache. +To control how much system resources this uses, we have a maximum number of +cat-file processes that can go into the cache. The default limit is 100 `cat-file`s, which constitute a pair of `git cat-file --batch` and `git cat-file --batch-check` processes. If diff --git a/doc/administration/housekeeping.md b/doc/administration/housekeeping.md index 4110f8b7646..2882b05f415 100644 --- a/doc/administration/housekeeping.md +++ b/doc/administration/housekeeping.md @@ -28,6 +28,9 @@ the `pushes_since_gc` value is 200 a `git gc` will be run. `git add`. - `git repack` ([man page](https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-repack.html)) re-organize existing packs into a single, more efficient pack. +Housekeeping will also [remove unreferenced LFS files](../raketasks/cleanup.md#remove-unreferenced-lfs-files) +from your project on the same schedule as the `git gc` operation, freeing up storage space for your project. + You can find this option under your project's **Settings > General > Advanced**. ![Housekeeping settings](img/housekeeping_settings.png) diff --git a/doc/administration/img/export_audit_log_v13_4.png b/doc/administration/img/export_audit_log_v13_4.png index 1b404b5742c..e4ba330b8a9 100644 Binary files a/doc/administration/img/export_audit_log_v13_4.png and b/doc/administration/img/export_audit_log_v13_4.png differ diff --git a/doc/administration/incoming_email.md b/doc/administration/incoming_email.md index c0c03044225..f8c1a550b67 100644 --- a/doc/administration/incoming_email.md +++ b/doc/administration/incoming_email.md @@ -90,7 +90,7 @@ Be careful when choosing the domain used for receiving incoming email. For the sake of example, suppose your top-level company domain is `hooli.com`. All employees in your company have an email address at that domain via Google Apps, and your company's private Slack instance requires a valid `@hooli.com` -email address in order to sign up. +email address to sign up. If you also host a public-facing GitLab instance at `hooli.com` and set your incoming email domain to `hooli.com`, an attacker could abuse the "Create new diff --git a/doc/administration/index.md b/doc/administration/index.md index a6448fcf64f..076658ead0e 100644 --- a/doc/administration/index.md +++ b/doc/administration/index.md @@ -52,8 +52,10 @@ Learn how to install, configure, update, and maintain your GitLab instance. - [GitLab Pages configuration](pages/index.md): Enable and configure GitLab Pages. - [GitLab Pages configuration for GitLab source installations](pages/source.md): Enable and configure GitLab Pages on [source installations](../install/installation.md#installation-from-source). - [Uploads administration](uploads.md): Configure GitLab uploads storage. -- [Environment variables](environment_variables.md): Supported environment variables that can be used to override their defaults values in order to configure GitLab. -- [Plugins](plugins.md): With custom plugins, GitLab administrators can introduce custom integrations without modifying GitLab's source code. +- [Environment variables](environment_variables.md): Supported environment + variables that can be used to override their default values to configure + GitLab. +- [Plugins](file_hooks.md): With custom plugins, GitLab administrators can introduce custom integrations without modifying GitLab's source code. - [Enforcing Terms of Service](../user/admin_area/settings/terms.md) - [Third party offers](../user/admin_area/settings/third_party_offers.md) - [Compliance](compliance.md): A collection of features from across the application that you may configure to help ensure that your GitLab instance and DevOps workflow meet compliance standards. @@ -113,7 +115,7 @@ Learn how to install, configure, update, and maintain your GitLab instance. - [Kerberos authentication](../integration/kerberos.md) **(STARTER ONLY)** - See also other [authentication](../topics/authentication/index.md#gitlab-administrators) topics (for example, enforcing 2FA). - [Email users](../tools/email.md): Email GitLab users from within GitLab. **(STARTER ONLY)** -- [User Cohorts](../user/admin_area/user_cohorts.md): Display the monthly cohorts of new users and their activities over time. +- [User Cohorts](../user/admin_area/analytics/user_cohorts.md): Display the monthly cohorts of new users and their activities over time. - [Audit logs and events](audit_events.md): View the changes made within the GitLab server for: - Groups and projects. **(STARTER)** - Instances. **(PREMIUM ONLY)** diff --git a/doc/administration/instance_limits.md b/doc/administration/instance_limits.md index abd98002934..6729338e0c7 100644 --- a/doc/administration/instance_limits.md +++ b/doc/administration/instance_limits.md @@ -480,7 +480,7 @@ indexed](#maximum-file-size-indexed)). - For self-managed installations it is unlimited by default This limit can be configured for self-managed installations when [enabling -Elasticsearch](../integration/elasticsearch.md#enabling-elasticsearch). +Elasticsearch](../integration/elasticsearch.md#enabling-advanced-search). NOTE: **Note:** Set the limit to `0` to disable it. @@ -552,6 +552,9 @@ Plan.default.actual_limits.update!(maven_max_file_size: 100.megabytes) # For PyPI Packages Plan.default.actual_limits.update!(pypi_max_file_size: 100.megabytes) + +# For Debian Packages +Plan.default.actual_limits.update!(debian_max_file_size: 100.megabytes) ``` Set the limit to `0` to allow any file size. diff --git a/doc/administration/job_artifacts.md b/doc/administration/job_artifacts.md index 2a79923b793..fd658116289 100644 --- a/doc/administration/job_artifacts.md +++ b/doc/administration/job_artifacts.md @@ -99,8 +99,13 @@ artifacts, you can use an object storage like AWS S3 instead. This configuration relies on valid AWS credentials to be configured already. Use an object storage option like AWS S3 to store job artifacts. +If you configure GitLab to store artifacts on object storage, you may also want to +[eliminate local disk usage for job logs](job_logs.md#prevent-local-disk-usage). +In both cases, job logs are archived and moved to object storage when the job completes. + DANGER: **Danger:** -If you configure GitLab to store CI logs and artifacts on object storage, you must also enable [incremental logging](job_logs.md#new-incremental-logging-architecture). Otherwise, job logs will disappear or not be saved. +In a multi-server setup you must use one of the options to +[eliminate local disk usage for job logs](job_logs.md#prevent-local-disk-usage), or job logs could be lost. [Read more about using object storage with GitLab](object_storage.md). @@ -117,9 +122,9 @@ For source installations the following settings are nested under `artifacts:` an |---------|-------------|---------| | `enabled` | Enable/disable object storage | `false` | | `remote_directory` | The bucket name where Artifacts will be stored| | -| `direct_upload` | Set to true to enable direct upload of Artifacts without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. | `false` | -| `background_upload` | Set to false to disable automatic upload. Option may be removed once upload is direct to S3 | `true` | -| `proxy_download` | Set to true to enable proxying all files served. Option allows to reduce egress traffic as this allows clients to download directly from remote storage instead of proxying all data | `false` | +| `direct_upload` | Set to `true` to enable direct upload of Artifacts without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. | `false` | +| `background_upload` | Set to `false` to disable automatic upload. Option may be removed once upload is direct to S3 | `true` | +| `proxy_download` | Set to `true` to enable proxying all files served. Option allows to reduce egress traffic as this allows clients to download directly from remote storage instead of proxying all data | `false` | | `connection` | Various connection options described below | | #### Connection settings @@ -203,9 +208,9 @@ _The artifacts are stored by default in enabled: true object_store: enabled: true - remote_directory: "artifacts" # The bucket name + remote_directory: "artifacts" # The bucket name connection: - provider: AWS # Only AWS supported at the moment + provider: AWS # Only AWS supported at the moment aws_access_key_id: AWS_ACCESS_KEY_ID aws_secret_access_key: AWS_SECRET_ACCESS_KEY region: eu-central-1 @@ -316,9 +321,9 @@ _The uploads are stored by default in **In Omnibus installations:** -In order to migrate back to local storage: +To migrate back to local storage: -1. Set both `direct_upload` and `background_upload` to false in `gitlab.rb`, under the artifacts object storage settings. +1. Set both `direct_upload` and `background_upload` to `false` in `gitlab.rb`, under the artifacts object storage settings. 1. [Reconfigure GitLab](restart_gitlab.md#omnibus-gitlab-reconfigure). 1. Run `gitlab-rake gitlab:artifacts:migrate_to_local`. 1. Disable object_storage for artifacts in `gitlab.rb`: @@ -419,10 +424,10 @@ generated by [GitLab Workhorse](https://gitlab.com/gitlab-org/gitlab-workhorse). that are located in the artifacts archive itself. The metadata file is in a binary format, with additional Gzip compression. -GitLab does not extract the artifacts archive in order to save space, memory -and disk I/O. It instead inspects the metadata file which contains all the -relevant information. This is especially important when there is a lot of -artifacts, or an archive is a very large file. +GitLab doesn't extract the artifacts archive to save space, memory, and disk +I/O. It instead inspects the metadata file which contains all the relevant +information. This is especially important when there is a lot of artifacts, or +an archive is a very large file. When clicking on a specific file, [GitLab Workhorse](https://gitlab.com/gitlab-org/gitlab-workhorse) extracts it from the archive and the download begins. This implementation saves space, diff --git a/doc/administration/job_logs.md b/doc/administration/job_logs.md index c34035e3c0c..c89ffb8da8b 100644 --- a/doc/administration/job_logs.md +++ b/doc/administration/job_logs.md @@ -65,6 +65,15 @@ job logs are automatically migrated to it along with the other job artifacts. See "Phase 4: uploading" in [Data flow](#data-flow) to learn about the process. +## Prevent local disk usage + +If you want to avoid any local disk usage for job logs, +you can do so using one of the following options: + +- Enable the [beta incremental logging](#new-incremental-logging-architecture) feature. +- Set the [job logs location](#changing-the-job-logs-local-location) + to an NFS drive. + ## How to remove job logs There isn't a way to automatically expire old job logs, but it's safe to remove diff --git a/doc/administration/libravatar.md b/doc/administration/libravatar.md index beecd9e4783..428a4b97a38 100644 --- a/doc/administration/libravatar.md +++ b/doc/administration/libravatar.md @@ -72,10 +72,9 @@ Then run `sudo gitlab-ctl reconfigure` for the changes to take effect. missing images for user email addresses that are not found on the Libravatar service. -In order to use a set other than `identicon`, replace the `&d=identicon` -portion of the URL with another supported set. -For example, you can use the `retro` set, in which case the URL would look like: -`plain_url: "http://cdn.libravatar.org/avatar/%{hash}?s=%{size}&d=retro"` +To use a set other than `identicon`, replace the `&d=identicon` portion of the +URL with another supported set. For example, you can use the `retro` set, in +which case the URL would look like: `plain_url: "http://cdn.libravatar.org/avatar/%{hash}?s=%{size}&d=retro"` ## Usage examples for Microsoft Office 365 @@ -84,8 +83,8 @@ Note that this service requires a login, so this use case is most useful in a corporate installation where all users have access to Office 365. ```ruby -gitlab_rails['gravatar_plain_url'] = 'http://outlook.office365.com/owa/service.svc/s/GetPersonaPhoto?email=%{email}&size=HR120x120' -gitlab_rails['gravatar_ssl_url'] = 'https://outlook.office365.com/owa/service.svc/s/GetPersonaPhoto?email=%{email}&size=HR120x120' +gitlab_rails['gravatar_plain_url'] = 'http://outlook.office.com/owa/service.svc/s/GetPersonaPhoto?email=%{email}&size=HR120x120' +gitlab_rails['gravatar_ssl_url'] = 'https://outlook.office.com/owa/service.svc/s/GetPersonaPhoto?email=%{email}&size=HR120x120' ```