diff options
author | Micaël Bergeron <mbergeron@gitlab.com> | 2018-02-21 11:43:21 -0500 |
---|---|---|
committer | Micaël Bergeron <mbergeron@gitlab.com> | 2018-03-01 10:34:30 -0500 |
commit | 0f1d348d683fdef6c36c3b244c85e59f582ff886 (patch) | |
tree | 5558ab163c6154e76a36b6345d22deb302eacc82 | |
parent | a2f375e8f74870dcdcfa1c7886bd1c14c80a684e (diff) | |
download | gitlab-ce-0f1d348d683fdef6c36c3b244c85e59f582ff886.tar.gz |
port the object storage to CE
24 files changed, 951 insertions, 58 deletions
diff --git a/app/models/appearance.rb b/app/models/appearance.rb index dcd14c08f3c..2a6406d63c7 100644 --- a/app/models/appearance.rb +++ b/app/models/appearance.rb @@ -1,5 +1,7 @@ class Appearance < ActiveRecord::Base include CacheMarkdownField + include AfterCommitQueue + include ObjectStorage::BackgroundMove cache_markdown_field :description cache_markdown_field :new_project_guidelines diff --git a/app/models/ci/job_artifact.rb b/app/models/ci/job_artifact.rb index 2dfd8d4ef58..df57b4f65e3 100644 --- a/app/models/ci/job_artifact.rb +++ b/app/models/ci/job_artifact.rb @@ -1,6 +1,7 @@ module Ci class JobArtifact < ActiveRecord::Base include AfterCommitQueue + include ObjectStorage::BackgroundMove extend Gitlab::Ci::Model belongs_to :project @@ -8,15 +9,11 @@ module Ci before_save :set_size, if: :file_changed? - mount_uploader :file, JobArtifactUploader + scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) } - after_save if: :file_changed?, on: [:create, :update] do - run_after_commit do - file.schedule_migration_to_object_storage - end - end + mount_uploader :file, JobArtifactUploader - delegate :open, :exists?, to: :file + delegate :exists?, :open, to: :file enum file_type: { archive: 1, @@ -28,6 +25,10 @@ module Ci self.where(project: project).sum(:size) end + def local_store? + [nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store) + end + def set_size self.size = file.size end diff --git a/app/models/concerns/avatarable.rb b/app/models/concerns/avatarable.rb index d35e37935fb..4d40a2c483e 100644 --- a/app/models/concerns/avatarable.rb +++ b/app/models/concerns/avatarable.rb @@ -3,6 +3,7 @@ module Avatarable included do prepend ShadowMethods + include ObjectStorage::BackgroundMove validate :avatar_type, if: ->(user) { user.avatar.present? && user.avatar_changed? } validates :avatar, file_size: { maximum: 200.kilobytes.to_i } diff --git a/app/models/lfs_object.rb b/app/models/lfs_object.rb index 65c157d61ca..04c75d827e0 100644 --- a/app/models/lfs_object.rb +++ b/app/models/lfs_object.rb @@ -1,10 +1,12 @@ class LfsObject < ActiveRecord::Base - prepend EE::LfsObject include AfterCommitQueue + include ObjectStorage::BackgroundMove has_many :lfs_objects_projects, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent has_many :projects, through: :lfs_objects_projects + scope :with_files_stored_locally, -> { where(file_store: [nil, LfsObjectUploader::Store::LOCAL]) } + validates :oid, presence: true, uniqueness: true scope :with_files_stored_locally, -> { where(file_store: [nil, LfsObjectUploader::Store::LOCAL]) } @@ -21,6 +23,10 @@ class LfsObject < ActiveRecord::Base projects.exists?(project.lfs_storage_project.id) end + def local_store? + [nil, LfsObjectUploader::Store::LOCAL].include?(self.file_store) + end + def self.destroy_unreferenced joins("LEFT JOIN lfs_objects_projects ON lfs_objects_projects.lfs_object_id = #{table_name}.id") .where(lfs_objects_projects: { id: nil }) diff --git a/app/models/upload.rb b/app/models/upload.rb index 3aca452616c..cf71a7b76fc 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -9,6 +9,8 @@ class Upload < ActiveRecord::Base validates :model, presence: true validates :uploader, presence: true + scope :with_files_stored_locally, -> { where(store: [nil, ObjectStorage::Store::LOCAL]) } + before_save :calculate_checksum!, if: :foreground_checksummable? after_commit :schedule_checksum, if: :checksummable? @@ -34,8 +36,8 @@ class Upload < ActiveRecord::Base self.checksum = Digest::SHA256.file(absolute_path).hexdigest end - def build_uploader - uploader_class.new(model, mount_point, **uploader_context).tap do |uploader| + def build_uploader(mounted_as = nil) + uploader_class.new(model, mounted_as || mount_point).tap do |uploader| uploader.upload = self uploader.retrieve_from_store!(identifier) end @@ -52,6 +54,12 @@ class Upload < ActiveRecord::Base }.compact end + def local? + return true if store.nil? + + store == ObjectStorage::Store::LOCAL + end + private def delete_file! @@ -62,12 +70,6 @@ class Upload < ActiveRecord::Base checksum.nil? && local? && exist? end - def local? - return true if store.nil? - - store == ObjectStorage::Store::LOCAL - end - def foreground_checksummable? checksummable? && size <= CHECKSUM_THRESHOLD end diff --git a/app/uploaders/attachment_uploader.rb b/app/uploaders/attachment_uploader.rb index cd819dc9bff..11e038f9327 100644 --- a/app/uploaders/attachment_uploader.rb +++ b/app/uploaders/attachment_uploader.rb @@ -2,7 +2,6 @@ class AttachmentUploader < GitlabUploader include RecordsUploads::Concern include ObjectStorage::Concern prepend ObjectStorage::Extension::RecordsUploads - include UploaderHelper private diff --git a/app/uploaders/file_uploader.rb b/app/uploaders/file_uploader.rb index 051f1b19938..0e2da64de6a 100644 --- a/app/uploaders/file_uploader.rb +++ b/app/uploaders/file_uploader.rb @@ -15,10 +15,12 @@ class FileUploader < GitlabUploader MARKDOWN_PATTERN = %r{\!?\[.*?\]\(/uploads/(?<secret>[0-9a-f]{32})/(?<file>.*?)\)} DYNAMIC_PATH_PATTERN = %r{(?<secret>\h{32})/(?<identifier>.*)} - attr_accessor :model - after :remove, :prune_store_dir + # FileUploader do not run in a model transaction, so we can simply + # enqueue a job after the :store hook. + after :store, :schedule_background_upload + def self.root File.join(options.storage_path, 'uploads') end diff --git a/app/uploaders/object_storage.rb b/app/uploaders/object_storage.rb new file mode 100644 index 00000000000..55f07967dfc --- /dev/null +++ b/app/uploaders/object_storage.rb @@ -0,0 +1,314 @@ +require 'fog/aws' +require 'carrierwave/storage/fog' + +# +# This concern should add object storage support +# to the GitlabUploader class +# +module ObjectStorage + RemoteStoreError = Class.new(StandardError) + UnknownStoreError = Class.new(StandardError) + ObjectStorageUnavailable = Class.new(StandardError) + + module Store + LOCAL = 1 + REMOTE = 2 + end + + module Extension + # this extension is the glue between the ObjectStorage::Concern and RecordsUploads::Concern + module RecordsUploads + extend ActiveSupport::Concern + + prepended do |base| + raise "#{base} must include ObjectStorage::Concern to use extensions." unless base < Concern + + base.include(::RecordsUploads::Concern) + end + + def retrieve_from_store!(identifier) + paths = store_dirs.map { |store, path| File.join(path, identifier) } + + unless current_upload_satisfies?(paths, model) + # the upload we already have isn't right, find the correct one + self.upload = uploads.find_by(model: model, path: paths) + end + + super + end + + def build_upload + super.tap do |upload| + upload.store = object_store + end + end + + def upload=(upload) + return unless upload + + self.object_store = upload.store + super + end + + def schedule_background_upload(*args) + return unless schedule_background_upload? + + ObjectStorage::BackgroundMoveWorker.perform_async(self.class.name, + upload.class.to_s, + mounted_as, + upload.id) + end + + private + + def current_upload_satisfies?(paths, model) + return false unless upload + return false unless model + + paths.include?(upload.path) && + upload.model_id == model.id && + upload.model_type == model.class.base_class.sti_name + end + end + end + + # Add support for automatic background uploading after the file is stored. + # + module BackgroundMove + extend ActiveSupport::Concern + + def background_upload(mount_points = []) + return unless mount_points.any? + + run_after_commit do + mount_points.each { |mount| send(mount).schedule_background_upload } # rubocop:disable GitlabSecurity/PublicSend + end + end + + def changed_mounts + self.class.uploaders.select do |mount, uploader_class| + mounted_as = uploader_class.serialization_column(self.class, mount) + mount if send(:"#{mounted_as}_changed?") # rubocop:disable GitlabSecurity/PublicSend + end.keys + end + + included do + after_save on: [:create, :update] do + background_upload(changed_mounts) + end + end + end + + module Concern + extend ActiveSupport::Concern + + included do |base| + base.include(ObjectStorage) + + before :store, :verify_license! + after :migrate, :delete_migrated_file + end + + class_methods do + def object_store_options + options.object_store + end + + def object_store_enabled? + object_store_options.enabled + end + + def background_upload_enabled? + object_store_options.background_upload + end + + def object_store_credentials + object_store_options.connection.to_hash.deep_symbolize_keys + end + + def remote_store_path + object_store_options.remote_directory + end + + def licensed? + License.feature_available?(:object_storage) + end + + def serialization_column(model_class, mount_point) + model_class.uploader_options.dig(mount_point, :mount_on) || mount_point + end + end + + def file_storage? + storage.is_a?(CarrierWave::Storage::File) + end + + def file_cache_storage? + cache_storage.is_a?(CarrierWave::Storage::File) + end + + def object_store + @object_store ||= model.try(store_serialization_column) || Store::LOCAL + end + + # rubocop:disable Gitlab/ModuleWithInstanceVariables + def object_store=(value) + @object_store = value || Store::LOCAL + @storage = storage_for(object_store) + end + # rubocop:enable Gitlab/ModuleWithInstanceVariables + + # Return true if the current file is part or the model (i.e. is mounted in the model) + # + def persist_object_store? + model.respond_to?(:"#{store_serialization_column}=") + end + + # Save the current @object_store to the model <mounted_as>_store column + def persist_object_store! + return unless persist_object_store? + + updated = model.update_column(store_serialization_column, object_store) + raise ActiveRecordError unless updated + end + + def use_file + if file_storage? + return yield path + end + + begin + cache_stored_file! + yield cache_path + ensure + cache_storage.delete_dir!(cache_path(nil)) + end + end + + def filename + super || file&.filename + end + + # + # Move the file to another store + # + # new_store: Enum (Store::LOCAL, Store::REMOTE) + # + def migrate!(new_store) + return unless object_store != new_store + return unless file + + new_file = nil + file_to_delete = file + from_object_store = object_store + self.object_store = new_store # changes the storage and file + + cache_stored_file! if file_storage? + + with_callbacks(:migrate, file_to_delete) do + with_callbacks(:store, file_to_delete) do # for #store_versions! + new_file = storage.store!(file) + persist_object_store! + self.file = new_file + end + end + + file + rescue => e + # in case of failure delete new file + new_file.delete unless new_file.nil? + # revert back to the old file + self.object_store = from_object_store + self.file = file_to_delete + raise e + end + + def schedule_background_upload(*args) + return unless schedule_background_upload? + + ObjectStorage::BackgroundMoveWorker.perform_async(self.class.name, + model.class.name, + mounted_as, + model.id) + end + + def fog_directory + self.class.remote_store_path + end + + def fog_credentials + self.class.object_store_credentials + end + + def fog_public + false + end + + def delete_migrated_file(migrated_file) + migrated_file.delete if exists? + end + + def verify_license!(_file) + return if file_storage? + + raise(ObjectStorageUnavailable, 'Object Storage feature is missing') unless self.class.licensed? + end + + def exists? + file.present? + end + + def store_dir(store = nil) + store_dirs[store || object_store] + end + + def store_dirs + { + Store::LOCAL => File.join(base_dir, dynamic_segment), + Store::REMOTE => File.join(dynamic_segment) + } + end + + private + + def schedule_background_upload? + self.class.object_store_enabled? && + self.class.background_upload_enabled? && + self.class.licensed? && + self.file_storage? + end + + # this is a hack around CarrierWave. The #migrate method needs to be + # able to force the current file to the migrated file upon success. + def file=(file) + @file = file # rubocop:disable Gitlab/ModuleWithInstanceVariables + end + + def serialization_column + self.class.serialization_column(model.class, mounted_as) + end + + # Returns the column where the 'store' is saved + # defaults to 'store' + def store_serialization_column + [serialization_column, 'store'].compact.join('_').to_sym + end + + def storage + @storage ||= storage_for(object_store) + end + + def storage_for(store) + case store + when Store::REMOTE + raise 'Object Storage is not enabled' unless self.class.object_store_enabled? + + CarrierWave::Storage::Fog.new(self) + when Store::LOCAL + CarrierWave::Storage::File.new(self) + else + raise UnknownStoreError + end + end + end +end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 28a5e5da037..0a7656f69f0 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -38,6 +38,9 @@ - github_importer:github_import_stage_import_pull_requests - github_importer:github_import_stage_import_repository +- object_storage:object_storage_background_move +- object_storage:object_storage_migrate_uploads + - pipeline_cache:expire_job_cache - pipeline_cache:expire_pipeline_cache - pipeline_creation:create_pipeline @@ -102,3 +105,5 @@ - update_user_activity - upload_checksum - web_hook + + diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 6cc90981cce..b0c517b6210 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -305,6 +305,12 @@ Settings.artifacts['storage_path'] = Settings.absolute(Settings.artifacts.values # Settings.artifact['path'] is deprecated, use `storage_path` instead Settings.artifacts['path'] = Settings.artifacts['storage_path'] Settings.artifacts['max_size'] ||= 100 # in megabytes +Settings.artifacts['object_store'] ||= Settingslogic.new({}) +Settings.artifacts['object_store']['enabled'] = false if Settings.artifacts['object_store']['enabled'].nil? +Settings.artifacts['object_store']['remote_directory'] ||= nil +Settings.artifacts['object_store']['background_upload'] = true if Settings.artifacts['object_store']['background_upload'].nil? +# Convert upload connection settings to use string keys, to make Fog happy +Settings.artifacts['object_store']['connection']&.deep_stringify_keys! Settings.artifacts['object_store'] ||= Settingslogic.new({}) Settings.artifacts['object_store']['enabled'] = false if Settings.artifacts['object_store']['enabled'].nil? diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index afea744eae6..ca1577591ba 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -69,3 +69,4 @@ - [project_migrate_hashed_storage, 1] - [storage_migrator, 1] - [pages_domain_verification, 1] + - [object_storage, 1] diff --git a/db/schema.rb b/db/schema.rb index fba0c710705..b957a67578a 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -336,6 +336,7 @@ ActiveRecord::Schema.define(version: 20180216121030) do t.datetime_with_timezone "updated_at", null: false t.datetime_with_timezone "expire_at" t.string "file" + t.integer "file_store" end add_index "ci_job_artifacts", ["expire_at", "job_id"], name: "index_ci_job_artifacts_on_expire_at_and_job_id", using: :btree diff --git a/doc/administration/geo/replication/object_storage.md b/doc/administration/geo/replication/object_storage.md new file mode 100644 index 00000000000..36d9cf7af83 --- /dev/null +++ b/doc/administration/geo/replication/object_storage.md @@ -0,0 +1,38 @@ +# Geo with Object storage + +Geo can be used in combination with Object Storage (AWS S3, or +other compatible object storage). + +## Configuration + +At this time it is required that if object storage is enabled on the +primary, it must also be enabled on the secondary. + +The secondary nodes can use the same storage bucket as the primary, or +they can use a replicated storage bucket. At this time GitLab does not +take care of content replication in object storage. + +For LFS, follow the documentation to +[set up LFS object storage](../../../workflow/lfs/lfs_administration.md#setting-up-s3-compatible-object-storage). + +For CI job artifacts, there is similar documentation to configure +[jobs artifact object storage](../../job_artifacts.md#using-object-storage) + +Complete these steps on all nodes, primary **and** secondary. + +## Replication + +When using Amazon S3, you can use +[CRR](https://docs.aws.amazon.com/AmazonS3/latest/dev/crr.html) to +have automatic replication between the bucket used by the primary and +the bucket used by the secondary. + +If you are using Google Cloud Storage, consider using +[Multi-Regional Storage](https://cloud.google.com/storage/docs/storage-classes#multi-regional). +Or you can use the [Storage Transfer Service](https://cloud.google.com/storage/transfer/), +although this only supports daily synchronization. + +For manual synchronization, or scheduled by `cron`, please have a look at: + +- [`s3cmd sync`](http://s3tools.org/s3cmd-sync) +- [`gsutil rsync`](https://cloud.google.com/storage/docs/gsutil/commands/rsync) diff --git a/doc/administration/job_artifacts.md b/doc/administration/job_artifacts.md index 968630349a6..cfc7c41e077 100644 --- a/doc/administration/job_artifacts.md +++ b/doc/administration/job_artifacts.md @@ -85,41 +85,100 @@ _The artifacts are stored by default in 1. Save the file and [restart GitLab][] for the changes to take effect. +### Using object storage + +>**Notes:** +- [Introduced][ee-1762] in [GitLab Premium][eep] 9.4. +- Since version 9.5, artifacts are [browsable], when object storage is enabled. + 9.4 lacks this feature. +> Available in [GitLab Premium](https://about.gitlab.com/products/) and +[GitLab.com Silver](https://about.gitlab.com/gitlab-com/). +> Since version 10.6, available in [GitLab CE](https://about.gitlab.com/products/) + +If you don't want to use the local disk where GitLab is installed to store the +artifacts, you can use an object storage like AWS S3 instead. +This configuration relies on valid AWS credentials to be configured already. +Use an [Object storage option][os] like AWS S3 to store job artifacts. + +**In Omnibus installations:** + +_The artifacts are stored by default in +`/var/opt/gitlab/gitlab-rails/shared/artifacts`._ + +1. Edit `/etc/gitlab/gitlab.rb` and add the following lines by replacing with + the values you want: + + ```ruby + gitlab_rails['artifacts_enabled'] = true + gitlab_rails['artifacts_object_store_enabled'] = true + gitlab_rails['artifacts_object_store_remote_directory'] = "artifacts" + gitlab_rails['artifacts_object_store_connection'] = { + 'provider' => 'AWS', + 'region' => 'eu-central-1', + 'aws_access_key_id' => 'AWS_ACCESS_KEY_ID', + 'aws_secret_access_key' => 'AWS_SECRET_ACCESS_KEY' + } + ``` + + NOTE: For GitLab 9.4+, if you are using AWS IAM profiles, be sure to omit the + AWS access key and secret acces key/value pairs. For example: + + ```ruby + gitlab_rails['artifacts_object_store_connection'] = { + 'provider' => 'AWS', + 'region' => 'eu-central-1', + 'use_iam_profile' => true + } + ``` + +1. Save the file and [reconfigure GitLab][] for the changes to take effect. +1. Migrate any existing local artifacts to the object storage: + + ```bash + gitlab-rake gitlab:artifacts:migrate + ``` + + Currently this has to be executed manually and it will allow you to + migrate the existing artifacts to the object storage, but all new + artifacts will still be stored on the local disk. In the future + you will be given an option to define a default storage artifacts for all + new files. + --- -**Using Object Store** +**In installations from source:** + +_The artifacts are stored by default in +`/home/git/gitlab/shared/artifacts`._ -The previously mentioned methods use the local disk to store artifacts. However, -there is the option to use object stores like AWS' S3. To do this, set the -`object_store` in your `gitlab.yml`. This relies on valid AWS -credentials to be configured already. +1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following + lines: ```yaml artifacts: + enabled: true + object_store: enabled: true - path: /mnt/storage/artifacts - object_store: - enabled: true - remote_directory: my-bucket-name - connection: - provider: AWS - aws_access_key_id: S3_KEY_ID - aws_secret_key_id: S3_SECRET_KEY_ID - region: eu-central-1 + remote_directory: "artifacts" # The bucket name + connection: + provider: AWS # Only AWS supported at the moment + aws_access_key_id: AWS_ACESS_KEY_ID + aws_secret_access_key: AWS_SECRET_ACCESS_KEY + region: eu-central-1 ``` -This will allow you to migrate existing artifacts to object store, -but all new artifacts will still be stored on the local disk. -In the future you will be given an option to define a default storage artifacts -for all new files. Currently the artifacts migration has to be executed manually: +1. Save the file and [restart GitLab][] for the changes to take effect. +1. Migrate any existing local artifacts to the object storage: - ```bash - gitlab-rake gitlab:artifacts:migrate - ``` + ```bash + sudo -u git -H bundle exec rake gitlab:artifacts:migrate RAILS_ENV=production + ``` -Please note, that enabling this feature -will have the effect that artifacts are _not_ browsable anymore through the web -interface. This limitation will be removed in one of the upcoming releases. + Currently this has to be executed manually and it will allow you to + migrate the existing artifacts to the object storage, but all new + artifacts will still be stored on the local disk. In the future + you will be given an option to define a default storage artifacts for all + new files. ## Expiring artifacts @@ -223,7 +282,7 @@ When clicking on a specific file, [GitLab Workhorse] extracts it from the archive and the download begins. This implementation saves space, memory and disk I/O. -[reconfigure gitlab]: restart_gitlab.md "How to restart GitLab" -[restart gitlab]: restart_gitlab.md "How to restart GitLab" +[reconfigure gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure "How to reconfigure Omnibus GitLab" +[restart gitlab]: restart_gitlab.md#installations-from-source "How to restart GitLab" [gitlab workhorse]: https://gitlab.com/gitlab-org/gitlab-workhorse "GitLab Workhorse repository" -[ee-os]: https://docs.gitlab.com/ee/administration/job_artifacts.html#using-object-storage +[os]: https://docs.gitlab.com/administration/job_artifacts.html#using-object-storage diff --git a/doc/administration/raketasks/uploads/migrate.md b/doc/administration/raketasks/uploads/migrate.md new file mode 100644 index 00000000000..0cd33ffc122 --- /dev/null +++ b/doc/administration/raketasks/uploads/migrate.md @@ -0,0 +1,74 @@ +# Uploads Migrate Rake Task + +## Migrate to Object Storage + +After [configuring the object storage](../../uploads.md#using-object-storage) for GitLab's uploads, you may use this task to migrate existing uploads from the local storage to the remote storage. + +>**Note:** +All of the processing will be done in a background worker and requires **no downtime**. + +This tasks uses 3 parameters to find uploads to migrate. + +>**Note:** +These parameters are mainly internal to GitLab's structure, you may want to refer to the task list instead below. + +Parameter | Type | Description +--------- | ---- | ----------- +`uploader_class` | string | Type of the uploader to migrate from +`model_class` | string | Type of the model to migrate from +`mount_point` | string/symbol | Name of the model's column on which the uploader is mounted on. + +This task also accepts some environment variables which you can use to override +certain values: + +Variable | Type | Description +-------- | ---- | ----------- +`BATCH` | integer | Specifies the size of the batch. Defaults to 200. + +** Omnibus Installation** + +```bash +# gitlab-rake gitlab:uploads:migrate[uploader_class, model_class, mount_point] + +# Avatars +gitlab-rake "gitlab:uploads:migrate[AvatarUploader, Project, :avatar]" +gitlab-rake "gitlab:uploads:migrate[AvatarUploader, Group, :avatar]" +gitlab-rake "gitlab:uploads:migrate[AvatarUploader, User, :avatar]" + +# Attachments +gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Note, :attachment]" +gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :logo]" +gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :header_logo]" + +# Markdown +gitlab-rake "gitlab:uploads:migrate[FileUploader, Project]" +gitlab-rake "gitlab:uploads:migrate[PersonalFileUploader, Snippet]" +gitlab-rake "gitlab:uploads:migrate[NamespaceFileUploader, Snippet]" +gitlab-rake "gitlab:uploads:migrate[FileUploader, MergeRequest]" +``` + +**Source Installation** + +>**Note:** +Use `RAILS_ENV=production` for every task. + +```bash +# sudo -u git -H bundle exec rake gitlab:uploads:migrate + +# Avatars +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, Project, :avatar]" +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, Group, :avatar]" +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, User, :avatar]" + +# Attachments +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Note, :attachment]" +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :logo]" +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :header_logo]" + +# Markdown +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, Project]" +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[PersonalFileUploader, Snippet]" +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[NamespaceFileUploader, Snippet]" +sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, MergeRequest]" + +``` diff --git a/doc/administration/uploads.md b/doc/administration/uploads.md new file mode 100644 index 00000000000..df813e75770 --- /dev/null +++ b/doc/administration/uploads.md @@ -0,0 +1,183 @@ +# Uploads administration + +>**Notes:** +Uploads represent all user data that may be sent to GitLab as a single file. As an example, avatars and notes' attachments are uploads. Uploads are integral to GitLab functionality, and therefore cannot be disabled. + +### Using local storage + +>**Notes:** +This is the default configuration + +To change the location where the uploads are stored locally, follow the steps +below. + +--- + +**In Omnibus installations:** + +>**Notes:** +For historical reasons, uploads are stored into a base directory, which by default is `uploads/-/system`. It is strongly discouraged to change this configuration option on an existing GitLab installation. + +_The uploads are stored by default in `/var/opt/gitlab/gitlab-rails/public/uploads/-/system`._ + +1. To change the storage path for example to `/mnt/storage/uploads`, edit + `/etc/gitlab/gitlab.rb` and add the following line: + + ```ruby + gitlab_rails['uploads_storage_path'] = "/mnt/storage/" + gitlab_rails['uploads_base_dir'] = "uploads" + ``` + +1. Save the file and [reconfigure GitLab][] for the changes to take effect. + +--- + +**In installations from source:** + +_The uploads are stored by default in +`/home/git/gitlab/public/uploads/-/system`._ + +1. To change the storage path for example to `/mnt/storage/uploads`, edit + `/home/git/gitlab/config/gitlab.yml` and add or amend the following lines: + + ```yaml + uploads: + storage_path: /mnt/storage + base_dir: uploads + ``` + +1. Save the file and [restart GitLab][] for the changes to take effect. + +### Using object storage + +>**Notes:** +- [Introduced][ee-3867] in [GitLab Enterprise Edition Premium][eep] 10.5. + +If you don't want to use the local disk where GitLab is installed to store the +uploads, you can use an object storage provider like AWS S3 instead. +This configuration relies on valid AWS credentials to be configured already. + +**In Omnibus installations:** + +_The uploads are stored by default in +`/var/opt/gitlab/gitlab-rails/public/uploads/-/system`._ + +1. Edit `/etc/gitlab/gitlab.rb` and add the following lines by replacing with + the values you want: + + ```ruby + gitlab_rails['uploads_object_store_enabled'] = true + gitlab_rails['uploads_object_store_remote_directory'] = "uploads" + gitlab_rails['uploads_object_store_connection'] = { + 'provider' => 'AWS', + 'region' => 'eu-central-1', + 'aws_access_key_id' => 'AWS_ACCESS_KEY_ID', + 'aws_secret_access_key' => 'AWS_SECRET_ACCESS_KEY' + } + ``` + +>**Note:** +If you are using AWS IAM profiles, be sure to omit the AWS access key and secret acces key/value pairs. + + ```ruby + gitlab_rails['uploads_object_store_connection'] = { + 'provider' => 'AWS', + 'region' => 'eu-central-1', + 'use_iam_profile' => true + } + ``` + +1. Save the file and [reconfigure GitLab][] for the changes to take effect. +1. Migrate any existing local uploads to the object storage: + +>**Notes:** +These task complies with the `BATCH` environment variable to process uploads in batch (200 by default). All of the processing will be done in a background worker and requires **no downtime**. + + ```bash + # gitlab-rake gitlab:uploads:migrate[uploader_class, model_class, mount_point] + + # Avatars + gitlab-rake "gitlab:uploads:migrate[AvatarUploader, Project, :avatar]" + gitlab-rake "gitlab:uploads:migrate[AvatarUploader, Group, :avatar]" + gitlab-rake "gitlab:uploads:migrate[AvatarUploader, User, :avatar]" + + # Attachments + gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Note, :attachment]" + gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :logo]" + gitlab-rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :header_logo]" + + # Markdown + gitlab-rake "gitlab:uploads:migrate[FileUploader, Project]" + gitlab-rake "gitlab:uploads:migrate[PersonalFileUploader, Snippet]" + gitlab-rake "gitlab:uploads:migrate[NamespaceFileUploader, Snippet]" + gitlab-rake "gitlab:uploads:migrate[FileUploader, MergeRequest]" + ``` + + Currently this has to be executed manually and it will allow you to + migrate the existing uploads to the object storage, but all new + uploads will still be stored on the local disk. In the future + you will be given an option to define a default storage for all + new files. + +--- + +**In installations from source:** + +_The uploads are stored by default in +`/home/git/gitlab/public/uploads/-/system`._ + +1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following + lines: + + ```yaml + uploads: + object_store: + enabled: true + remote_directory: "uploads" # The bucket name + connection: + provider: AWS # Only AWS supported at the moment + aws_access_key_id: AWS_ACESS_KEY_ID + aws_secret_access_key: AWS_SECRET_ACCESS_KEY + region: eu-central-1 + ``` + +1. Save the file and [restart GitLab][] for the changes to take effect. +1. Migrate any existing local uploads to the object storage: + +>**Notes:** + +- These task comply with the `BATCH` environment variable to process uploads in batch (200 by default). All of the processing will be done in a background worker and requires **no downtime**. + +- To migrate in production use `RAILS_ENV=production` environment variable. + + ```bash + # sudo -u git -H bundle exec rake gitlab:uploads:migrate + + # Avatars + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, Project, :avatar]" + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, Group, :avatar]" + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AvatarUploader, User, :avatar]" + + # Attachments + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Note, :attachment]" + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :logo]" + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[AttachmentUploader, Appearance, :header_logo]" + + # Markdown + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, Project]" + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[PersonalFileUploader, Snippet]" + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[NamespaceFileUploader, Snippet]" + sudo -u git -H bundle exec rake "gitlab:uploads:migrate[FileUploader, MergeRequest]" + + ``` + + Currently this has to be executed manually and it will allow you to + migrate the existing uploads to the object storage, but all new + uploads will still be stored on the local disk. In the future + you will be given an option to define a default storage for all + new files. + +[reconfigure gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure "How to reconfigure Omnibus GitLab" +[restart gitlab]: restart_gitlab.md#installations-from-source "How to restart GitLab" +[eep]: https://about.gitlab.com/gitlab-ee/ "GitLab Enterprise Edition Premium" +[ee-3867]: https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/3867 diff --git a/doc/gitlab-geo/object_storage.md b/doc/gitlab-geo/object_storage.md new file mode 100644 index 00000000000..272bc13c450 --- /dev/null +++ b/doc/gitlab-geo/object_storage.md @@ -0,0 +1 @@ +This document was moved to [another location](../administration/geo/replication/object_storage.md). diff --git a/doc/raketasks/README.md b/doc/raketasks/README.md index 2f916f5dea7..90187617c41 100644 --- a/doc/raketasks/README.md +++ b/doc/raketasks/README.md @@ -14,3 +14,4 @@ comments: false - [Webhooks](web_hooks.md) - [Import](import.md) of git repositories in bulk - [Rebuild authorized_keys file](http://docs.gitlab.com/ce/raketasks/maintenance.html#rebuild-authorized_keys-file) task for administrators +- [Migrate Uploads](../administration/raketasks/uploads/migrate.md) diff --git a/doc/workflow/lfs/lfs_administration.md b/doc/workflow/lfs/lfs_administration.md index d768b73286d..fdf31a3bac7 100644 --- a/doc/workflow/lfs/lfs_administration.md +++ b/doc/workflow/lfs/lfs_administration.md @@ -5,6 +5,7 @@ Documentation on how to use Git LFS are under [Managing large binary files with ## Requirements * Git LFS is supported in GitLab starting with version 8.2. +* Support for object storage, such as AWS S3, was introduced in 10.0. * Users need to install [Git LFS client](https://git-lfs.github.com) version 1.0.1 and up. ## Configuration @@ -12,16 +13,18 @@ Documentation on how to use Git LFS are under [Managing large binary files with Git LFS objects can be large in size. By default, they are stored on the server GitLab is installed on. -There are two configuration options to help GitLab server administrators: +There are various configuration options to help GitLab server administrators: * Enabling/disabling Git LFS support * Changing the location of LFS object storage +* Setting up AWS S3 compatible object storage ### Omnibus packages In `/etc/gitlab/gitlab.rb`: ```ruby +# Change to true to enable lfs gitlab_rails['lfs_enabled'] = false # Optionally, change the storage path location. Defaults to @@ -35,11 +38,113 @@ gitlab_rails['lfs_storage_path'] = "/mnt/storage/lfs-objects" In `config/gitlab.yml`: ```yaml +# Change to true to enable lfs lfs: enabled: false storage_path: /mnt/storage/lfs-objects ``` +## Setting up S3 compatible object storage + +> **Note:** [Introduced][ee-2760] in [GitLab Premium][eep] 10.0. +> Available in [GitLab CE][ce] 10.6 + +It is possible to store LFS objects on remote object storage instead of on a local disk. + +This allows you to offload storage to an external AWS S3 compatible service, freeing up disk space locally. You can also host your own S3 compatible storage decoupled from GitLab, with with a service such as [Minio](https://www.minio.io/). + +Object storage currently transfers files first to GitLab, and then on the object storage in a second stage. This can be done either by using a rake task to transfer existing objects, or in a background job after each file is received. + +### Object Storage Settings + +For source installations the following settings are nested under `lfs:` and then `object_store:`. On omnibus installs they are prefixed by `lfs_object_store_`. + +| Setting | Description | Default | +|---------|-------------|---------| +| `enabled` | Enable/disable object storage | `false` | +| `remote_directory` | The bucket name where LFS objects will be stored| | +| `background_upload` | Set to false to disable automatic upload. Option may be removed once upload is direct to S3 | `true` | +| `connection` | Various connection options described below | | + +#### S3 compatible connection settings + +The connection settings match those provided by [Fog](https://github.com/fog), and are as follows: + +| Setting | Description | Default | +|---------|-------------|---------| +| `provider` | Always `AWS` for compatible hosts | AWS | +| `aws_access_key_id` | AWS credentials, or compatible | | +| `aws_secret_access_key` | AWS credentials, or compatible | | +| `region` | AWS region | us-east-1 | +| `host` | S3 compatible host for when not using AWS, e.g. `localhost` or `storage.example.com` | s3.amazonaws.com | +| `endpoint` | Can be used when configuring an S3 compatible service such as [Minio](https://www.minio.io), by entering a URL such as `http://127.0.0.1:9000` | (optional) | +| `path_style` | Set to true to use `host/bucket_name/object` style paths instead of `bucket_name.host/object`. Leave as false for AWS S3 | false | + + +### From source + +1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following + lines: + + ```yaml + lfs: + enabled: true + object_store: + enabled: false + remote_directory: lfs-objects # Bucket name + connection: + provider: AWS + aws_access_key_id: 1ABCD2EFGHI34JKLM567N + aws_secret_access_key: abcdefhijklmnopQRSTUVwxyz0123456789ABCDE + region: eu-central-1 + # Use the following options to configure an AWS compatible host such as Minio + host: 'localhost' + endpoint: 'http://127.0.0.1:9000' + path_style: true + ``` + +1. Save the file and [restart GitLab][] for the changes to take effect. +1. Migrate any existing local LFS objects to the object storage: + + ```bash + sudo -u git -H bundle exec rake gitlab:lfs:migrate RAILS_ENV=production + ``` + + This will migrate existing LFS objects to object storage. New LFS objects + will be forwarded to object storage unless + `gitlab_rails['lfs_object_store_background_upload']` is set to false. + +### In Omnibus + +1. Edit `/etc/gitlab/gitlab.rb` and add the following lines by replacing with + the values you want: + + ```ruby + gitlab_rails['lfs_object_store_enabled'] = true + gitlab_rails['lfs_object_store_remote_directory'] = "lfs-objects" + gitlab_rails['lfs_object_store_connection'] = { + 'provider' => 'AWS', + 'region' => 'eu-central-1', + 'aws_access_key_id' => '1ABCD2EFGHI34JKLM567N', + 'aws_secret_access_key' => 'abcdefhijklmnopQRSTUVwxyz0123456789ABCDE', + # The below options configure an S3 compatible host instead of AWS + 'host' => 'localhost', + 'endpoint' => 'http://127.0.0.1:9000', + 'path_style' => true + } + ``` + +1. Save the file and [reconfigure GitLab]s for the changes to take effect. +1. Migrate any existing local LFS objects to the object storage: + + ```bash + gitlab-rake gitlab:lfs:migrate + ``` + + This will migrate existing LFS objects to object storage. New LFS objects + will be forwarded to object storage unless + `gitlab_rails['lfs_object_store_background_upload']` is set to false. + ## Storage statistics You can see the total storage used for LFS objects on groups and projects @@ -48,10 +153,13 @@ and [projects APIs](../../api/projects.md). ## Known limitations -* Currently, storing GitLab Git LFS objects on a non-local storage (like S3 buckets) - is not supported * Support for removing unreferenced LFS objects was added in 8.14 onwards. * LFS authentications via SSH was added with GitLab 8.12 * Only compatible with the GitLFS client versions 1.1.0 and up, or 1.0.2. * The storage statistics currently count each LFS object multiple times for every project linking to it + +[reconfigure gitlab]: ../../administration/restart_gitlab.md#omnibus-gitlab-reconfigure "How to reconfigure Omnibus GitLab" +[restart gitlab]: ../../administration/restart_gitlab.md#installations-from-source "How to restart GitLab" +[eep]: https://about.gitlab.com/products/ "GitLab Premium" +[ee-2760]: https://gitlab.com/gitlab-org/gitlab-ee/merge_requests/2760 diff --git a/spec/factories/appearances.rb b/spec/factories/appearances.rb index 5f9c57c0c8d..85db098080c 100644 --- a/spec/factories/appearances.rb +++ b/spec/factories/appearances.rb @@ -2,8 +2,21 @@ FactoryBot.define do factory :appearance do - title "MepMep" - description "This is my Community Edition instance" + title "GitLab Enterprise Edition" + description "Open source software to collaborate on code" new_project_guidelines "Custom project guidelines" end + + trait :with_logo do + logo { fixture_file_upload('spec/fixtures/dk.png') } + end + + trait :with_header_logo do + header_logo { fixture_file_upload('spec/fixtures/dk.png') } + end + + trait :with_logos do + with_logo + with_header_logo + end end diff --git a/spec/factories/uploads.rb b/spec/factories/uploads.rb index cd75cbf8adf..008d9616216 100644 --- a/spec/factories/uploads.rb +++ b/spec/factories/uploads.rb @@ -6,6 +6,7 @@ FactoryBot.define do store ObjectStorage::Store::LOCAL mount_point :avatar secret nil + store ObjectStorage::Store::LOCAL # we should build a mount agnostic upload by default transient do @@ -28,6 +29,10 @@ FactoryBot.define do secret SecureRandom.hex end + trait :object_storage do + store ObjectStorage::Store::REMOTE + end + trait :namespace_upload do model { build(:group) } path { File.join(secret, filename) } diff --git a/spec/requests/lfs_http_spec.rb b/spec/requests/lfs_http_spec.rb index 04c0114b5d6..da7dd936a76 100644 --- a/spec/requests/lfs_http_spec.rb +++ b/spec/requests/lfs_http_spec.rb @@ -683,6 +683,34 @@ describe 'Git LFS API and storage' do expect(json_response['objects'].first['actions']['upload']['href']).to eq("#{Gitlab.config.gitlab.url}/#{project.full_path}.git/gitlab-lfs/objects/#{sample_oid}/#{sample_size}") expect(json_response['objects'].first['actions']['upload']['header']).to eq('Authorization' => authorization) end + + ## EE-specific context + context 'and project is above the limit' do + let(:update_lfs_permissions) do + allow_any_instance_of(EE::Project).to receive_messages( + repository_and_lfs_size: 100.megabytes, + actual_size_limit: 99.megabytes) + end + + it 'responds with status 406' do + expect(response).to have_gitlab_http_status(406) + expect(json_response['message']).to eql('Your push has been rejected, because this repository has exceeded its size limit of 99 MB by 1 MB. Please contact your GitLab administrator for more information.') + end + end + + context 'and project will go over the limit' do + let(:update_lfs_permissions) do + allow_any_instance_of(EE::Project).to receive_messages( + repository_and_lfs_size: 200.megabytes, + actual_size_limit: 300.megabytes) + end + + it 'responds with status 406' do + expect(response).to have_gitlab_http_status(406) + expect(json_response['documentation_url']).to include('/help') + expect(json_response['message']).to eql('Your push has been rejected, because this repository has exceeded its size limit of 300 MB by 50 MB. Please contact your GitLab administrator for more information.') + end + end end describe 'when request is authenticated' do @@ -997,12 +1025,12 @@ describe 'Git LFS API and storage' do context 'and workhorse requests upload finalize for a new lfs object' do before do - lfs_object.destroy + allow_any_instance_of(LfsObjectUploader).to receive(:exists?) { false } end context 'with object storage disabled' do it "doesn't attempt to migrate file to object storage" do - expect(ObjectStorageUploadWorker).not_to receive(:perform_async) + expect(ObjectStorage::BackgroundMoveWorker).not_to receive(:perform_async) put_finalize(with_tempfile: true) end @@ -1014,7 +1042,7 @@ describe 'Git LFS API and storage' do end it 'schedules migration of file to object storage' do - expect(ObjectStorageUploadWorker).to receive(:perform_async).with('LfsObjectUploader', 'LfsObject', :file, kind_of(Numeric)) + expect(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async).with('LfsObjectUploader', 'LfsObject', :file, kind_of(Numeric)) put_finalize(with_tempfile: true) end diff --git a/spec/uploaders/file_uploader_spec.rb b/spec/uploaders/file_uploader_spec.rb index 7373aad7796..1f268fb7cb1 100644 --- a/spec/uploaders/file_uploader_spec.rb +++ b/spec/uploaders/file_uploader_spec.rb @@ -89,6 +89,29 @@ describe FileUploader do end end + describe 'callbacks' do + describe '#prune_store_dir after :remove' do + before do + uploader.store!(fixture_file_upload('spec/fixtures/doc_sample.txt')) + end + + def store_dir + File.expand_path(uploader.store_dir, uploader.root) + end + + it 'is called' do + expect(uploader).to receive(:prune_store_dir).once + + uploader.remove! + end + + it 'prune the store directory' do + expect { uploader.remove! } + .to change { File.exist?(store_dir) }.from(true).to(false) + end + end + end + describe "#migrate!" do before do uploader.store!(fixture_file_upload(Rails.root.join('spec/fixtures/dk.png'))) @@ -98,4 +121,24 @@ describe FileUploader do it_behaves_like "migrates", to_store: described_class::Store::REMOTE it_behaves_like "migrates", from_store: described_class::Store::REMOTE, to_store: described_class::Store::LOCAL end + + describe '#upload=' do + let(:secret) { SecureRandom.hex } + let(:upload) { create(:upload, :issuable_upload, secret: secret, filename: 'file.txt') } + + it 'handles nil' do + expect(uploader).not_to receive(:apply_context!) + + uploader.upload = nil + end + + it 'extract the uploader context from it' do + expect(uploader).to receive(:apply_context!).with(a_hash_including(secret: secret, identifier: 'file.txt')) + + uploader.upload = upload + end + + it_behaves_like "migrates", to_store: described_class::Store::REMOTE + it_behaves_like "migrates", from_store: described_class::Store::REMOTE, to_store: described_class::Store::LOCAL + end end diff --git a/spec/uploaders/lfs_object_uploader_spec.rb b/spec/uploaders/lfs_object_uploader_spec.rb index 2e4bd008afe..fbb4a188646 100644 --- a/spec/uploaders/lfs_object_uploader_spec.rb +++ b/spec/uploaders/lfs_object_uploader_spec.rb @@ -26,7 +26,7 @@ describe LfsObjectUploader do describe 'migration to object storage' do context 'with object storage disabled' do it "is skipped" do - expect(ObjectStorageUploadWorker).not_to receive(:perform_async) + expect(ObjectStorage::BackgroundMoveWorker).not_to receive(:perform_async) lfs_object end @@ -38,7 +38,7 @@ describe LfsObjectUploader do end it 'is scheduled to run after creation' do - expect(ObjectStorageUploadWorker).to receive(:perform_async).with(described_class.name, 'LfsObject', :file, kind_of(Numeric)) + expect(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async).with(described_class.name, 'LfsObject', :file, kind_of(Numeric)) lfs_object end @@ -50,7 +50,7 @@ describe LfsObjectUploader do end it 'is skipped' do - expect(ObjectStorageUploadWorker).not_to receive(:perform_async) + expect(ObjectStorage::BackgroundMoveWorker).not_to receive(:perform_async) lfs_object end @@ -67,7 +67,7 @@ describe LfsObjectUploader do end it 'can store file remotely' do - allow(ObjectStorageUploadWorker).to receive(:perform_async) + allow(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async) store_file(lfs_object) |