diff options
Diffstat (limited to 'lib/gitlab')
182 files changed, 4887 insertions, 843 deletions
diff --git a/lib/gitlab/access.rb b/lib/gitlab/access.rb index b4012ebbb99..7127948cf00 100644 --- a/lib/gitlab/access.rb +++ b/lib/gitlab/access.rb @@ -58,9 +58,9 @@ module Gitlab def protection_options { "Not protected: Both developers and masters can push new commits, force push, or delete the branch." => PROTECTION_NONE, - "Protected against pushes: Developers cannot push new commits, but are allowed to accept merge requests to the branch." => PROTECTION_DEV_CAN_MERGE, - "Partially protected: Developers can push new commits, but cannot force push or delete the branch. Masters can do all of those." => PROTECTION_DEV_CAN_PUSH, - "Fully protected: Developers cannot push new commits, force push, or delete the branch. Only masters can do any of those." => PROTECTION_FULL + "Protected against pushes: Developers cannot push new commits, but are allowed to accept merge requests to the branch. Masters can push to the branch." => PROTECTION_DEV_CAN_MERGE, + "Partially protected: Both developers and masters can push new commits, but cannot force push or delete the branch." => PROTECTION_DEV_CAN_PUSH, + "Fully protected: Developers cannot push new commits, but masters can. No-one can force push or delete the branch." => PROTECTION_FULL } end diff --git a/lib/gitlab/auth.rb b/lib/gitlab/auth.rb index 87aeb76b66a..cbbc51db99e 100644 --- a/lib/gitlab/auth.rb +++ b/lib/gitlab/auth.rb @@ -1,11 +1,11 @@ module Gitlab module Auth - MissingPersonalTokenError = Class.new(StandardError) + MissingPersonalAccessTokenError = Class.new(StandardError) REGISTRY_SCOPES = [:read_registry].freeze # Scopes used for GitLab API access - API_SCOPES = [:api, :read_user].freeze + API_SCOPES = [:api, :read_user, :sudo].freeze # Scopes used for OpenID Connect OPENID_SCOPES = [:openid].freeze @@ -25,7 +25,7 @@ module Gitlab result = service_request_check(login, password, project) || build_access_token_check(login, password) || - lfs_token_check(login, password) || + lfs_token_check(login, password, project) || oauth_access_token_check(login, password) || personal_access_token_check(password) || user_with_password_for_git(login, password) || @@ -38,7 +38,7 @@ module Gitlab # If sign-in is disabled and LDAP is not configured, recommend a # personal access token on failed auth attempts - raise Gitlab::Auth::MissingPersonalTokenError + raise Gitlab::Auth::MissingPersonalAccessTokenError end def find_with_user_password(login, password) @@ -106,7 +106,7 @@ module Gitlab user = find_with_user_password(login, password) return unless user - raise Gitlab::Auth::MissingPersonalTokenError if user.two_factor_enabled? + raise Gitlab::Auth::MissingPersonalAccessTokenError if user.two_factor_enabled? Gitlab::Auth::Result.new(user, nil, :gitlab_or_ldap, full_authentication_abilities) end @@ -128,7 +128,7 @@ module Gitlab token = PersonalAccessTokensFinder.new(state: 'active').find_by(token: password) if token && valid_scoped_token?(token, available_scopes) - Gitlab::Auth::Result.new(token.user, nil, :personal_token, abilities_for_scope(token.scopes)) + Gitlab::Auth::Result.new(token.user, nil, :personal_access_token, abilities_for_scope(token.scopes)) end end @@ -146,7 +146,7 @@ module Gitlab end.flatten.uniq end - def lfs_token_check(login, password) + def lfs_token_check(login, password, project) deploy_key_matches = login.match(/\Alfs\+deploy-key-(\d+)\z/) actor = @@ -163,6 +163,8 @@ module Gitlab authentication_abilities = if token_handler.user? full_authentication_abilities + elsif token_handler.deploy_key_pushable?(project) + read_write_authentication_abilities else read_authentication_abilities end @@ -208,10 +210,15 @@ module Gitlab ] end - def full_authentication_abilities + def read_write_authentication_abilities read_authentication_abilities + [ :push_code, - :create_container_image, + :create_container_image + ] + end + + def full_authentication_abilities + read_write_authentication_abilities + [ :admin_container_image ] end @@ -226,8 +233,10 @@ module Gitlab [] end - def available_scopes - API_SCOPES + registry_scopes + def available_scopes(current_user = nil) + scopes = API_SCOPES + registry_scopes + scopes.delete(:sudo) if current_user && !current_user.admin? + scopes end # Other available scopes diff --git a/lib/gitlab/auth/request_authenticator.rb b/lib/gitlab/auth/request_authenticator.rb new file mode 100644 index 00000000000..46ec040ce92 --- /dev/null +++ b/lib/gitlab/auth/request_authenticator.rb @@ -0,0 +1,25 @@ +# Use for authentication only, in particular for Rack::Attack. +# Does not perform authorization of scopes, etc. +module Gitlab + module Auth + class RequestAuthenticator + include UserAuthFinders + + attr_reader :request + + def initialize(request) + @request = request + end + + def user + find_sessionless_user || find_user_from_warden + end + + def find_sessionless_user + find_user_from_access_token || find_user_from_rss_token + rescue Gitlab::Auth::AuthenticationError + nil + end + end + end +end diff --git a/lib/gitlab/auth/user_auth_finders.rb b/lib/gitlab/auth/user_auth_finders.rb new file mode 100644 index 00000000000..b4114a3ac96 --- /dev/null +++ b/lib/gitlab/auth/user_auth_finders.rb @@ -0,0 +1,109 @@ +module Gitlab + module Auth + # + # Exceptions + # + + AuthenticationError = Class.new(StandardError) + MissingTokenError = Class.new(AuthenticationError) + TokenNotFoundError = Class.new(AuthenticationError) + ExpiredError = Class.new(AuthenticationError) + RevokedError = Class.new(AuthenticationError) + UnauthorizedError = Class.new(AuthenticationError) + + class InsufficientScopeError < AuthenticationError + attr_reader :scopes + def initialize(scopes) + @scopes = scopes.map { |s| s.try(:name) || s } + end + end + + module UserAuthFinders + include Gitlab::Utils::StrongMemoize + + PRIVATE_TOKEN_HEADER = 'HTTP_PRIVATE_TOKEN'.freeze + PRIVATE_TOKEN_PARAM = :private_token + + # Check the Rails session for valid authentication details + def find_user_from_warden + current_request.env['warden']&.authenticate if verified_request? + end + + def find_user_from_rss_token + return unless current_request.path.ends_with?('.atom') || current_request.format.atom? + + token = current_request.params[:rss_token].presence + return unless token + + User.find_by_rss_token(token) || raise(UnauthorizedError) + end + + def find_user_from_access_token + return unless access_token + + validate_access_token! + + access_token.user || raise(UnauthorizedError) + end + + def validate_access_token!(scopes: []) + return unless access_token + + case AccessTokenValidationService.new(access_token, request: request).validate(scopes: scopes) + when AccessTokenValidationService::INSUFFICIENT_SCOPE + raise InsufficientScopeError.new(scopes) + when AccessTokenValidationService::EXPIRED + raise ExpiredError + when AccessTokenValidationService::REVOKED + raise RevokedError + end + end + + private + + def access_token + strong_memoize(:access_token) do + find_oauth_access_token || find_personal_access_token + end + end + + def find_personal_access_token + token = + current_request.params[PRIVATE_TOKEN_PARAM].presence || + current_request.env[PRIVATE_TOKEN_HEADER].presence + + return unless token + + # Expiration, revocation and scopes are verified in `validate_access_token!` + PersonalAccessToken.find_by(token: token) || raise(UnauthorizedError) + end + + def find_oauth_access_token + token = Doorkeeper::OAuth::Token.from_request(current_request, *Doorkeeper.configuration.access_token_methods) + return unless token + + # Expiration, revocation and scopes are verified in `validate_access_token!` + oauth_token = OauthAccessToken.by_token(token) + raise UnauthorizedError unless oauth_token + + oauth_token.revoke_previous_refresh_token! + oauth_token + end + + # Check if the request is GET/HEAD, or if CSRF token is valid. + def verified_request? + Gitlab::RequestForgeryProtection.verified?(current_request.env) + end + + def ensure_action_dispatch_request(request) + return request if request.is_a?(ActionDispatch::Request) + + ActionDispatch::Request.new(request.env) + end + + def current_request + @current_request ||= ensure_action_dispatch_request(request) + end + end + end +end diff --git a/lib/gitlab/background_migration/create_fork_network_memberships_range.rb b/lib/gitlab/background_migration/create_fork_network_memberships_range.rb index c88eb9783ed..67a39d28944 100644 --- a/lib/gitlab/background_migration/create_fork_network_memberships_range.rb +++ b/lib/gitlab/background_migration/create_fork_network_memberships_range.rb @@ -51,10 +51,20 @@ module Gitlab FROM projects WHERE forked_project_links.forked_from_project_id = projects.id ) + AND NOT EXISTS ( + SELECT true + FROM forked_project_links AS parent_links + WHERE parent_links.forked_to_project_id = forked_project_links.forked_from_project_id + AND NOT EXISTS ( + SELECT true + FROM projects + WHERE parent_links.forked_from_project_id = projects.id + ) + ) AND forked_project_links.id BETWEEN #{start_id} AND #{end_id} MISSING_MEMBERS - ForkNetworkMember.count_by_sql(count_sql) > 0 + ForkedProjectLink.count_by_sql(count_sql) > 0 end def log(message) diff --git a/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb b/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb new file mode 100644 index 00000000000..7e109e96e73 --- /dev/null +++ b/lib/gitlab/background_migration/populate_merge_requests_latest_merge_request_diff_id.rb @@ -0,0 +1,30 @@ +module Gitlab + module BackgroundMigration + class PopulateMergeRequestsLatestMergeRequestDiffId + BATCH_SIZE = 1_000 + + class MergeRequest < ActiveRecord::Base + self.table_name = 'merge_requests' + + include ::EachBatch + end + + def perform(start_id, stop_id) + update = ' + latest_merge_request_diff_id = ( + SELECT MAX(id) + FROM merge_request_diffs + WHERE merge_requests.id = merge_request_diffs.merge_request_id + )'.squish + + MergeRequest + .where(id: start_id..stop_id) + .where(latest_merge_request_diff_id: nil) + .each_batch(of: BATCH_SIZE) do |relation| + + relation.update_all(update) + end + end + end + end +end diff --git a/lib/gitlab/bare_repository_import/importer.rb b/lib/gitlab/bare_repository_import/importer.rb new file mode 100644 index 00000000000..196de667805 --- /dev/null +++ b/lib/gitlab/bare_repository_import/importer.rb @@ -0,0 +1,101 @@ +module Gitlab + module BareRepositoryImport + class Importer + NoAdminError = Class.new(StandardError) + + def self.execute(import_path) + import_path << '/' unless import_path.ends_with?('/') + repos_to_import = Dir.glob(import_path + '**/*.git') + + unless user = User.admins.order_id_asc.first + raise NoAdminError.new('No admin user found to import repositories') + end + + repos_to_import.each do |repo_path| + bare_repo = Gitlab::BareRepositoryImport::Repository.new(import_path, repo_path) + + if bare_repo.hashed? || bare_repo.wiki? + log " * Skipping repo #{bare_repo.repo_path}".color(:yellow) + + next + end + + log "Processing #{repo_path}".color(:yellow) + + new(user, bare_repo).create_project_if_needed + end + end + + attr_reader :user, :project_name, :bare_repo + + delegate :log, to: :class + delegate :project_name, :project_full_path, :group_path, :repo_path, :wiki_path, to: :bare_repo + + def initialize(user, bare_repo) + @user = user + @bare_repo = bare_repo + end + + def create_project_if_needed + if project = Project.find_by_full_path(project_full_path) + log " * #{project.name} (#{project_full_path}) exists" + + return project + end + + create_project + end + + private + + def create_project + group = find_or_create_groups + + project = Projects::CreateService.new(user, + name: project_name, + path: project_name, + skip_disk_validation: true, + namespace_id: group&.id).execute + + if project.persisted? && mv_repo(project) + log " * Created #{project.name} (#{project_full_path})".color(:green) + + ProjectCacheWorker.perform_async(project.id) + else + log " * Failed trying to create #{project.name} (#{project_full_path})".color(:red) + log " Errors: #{project.errors.messages}".color(:red) if project.errors.any? + end + + project + end + + def mv_repo(project) + FileUtils.mv(repo_path, File.join(project.repository_storage_path, project.disk_path + '.git')) + + if bare_repo.wiki_exists? + FileUtils.mv(wiki_path, File.join(project.repository_storage_path, project.disk_path + '.wiki.git')) + end + + true + rescue => e + log " * Failed to move repo: #{e.message}".color(:red) + + false + end + + def find_or_create_groups + return nil unless group_path.present? + + log " * Using namespace: #{group_path}" + + Groups::NestedCreateService.new(user, group_path: group_path).execute + end + + # This is called from within a rake task only used by Admins, so allow writing + # to STDOUT + def self.log(message) + puts message # rubocop:disable Rails/Output + end + end + end +end diff --git a/lib/gitlab/bare_repository_import/repository.rb b/lib/gitlab/bare_repository_import/repository.rb new file mode 100644 index 00000000000..8574ac6eb30 --- /dev/null +++ b/lib/gitlab/bare_repository_import/repository.rb @@ -0,0 +1,42 @@ +module Gitlab + module BareRepositoryImport + class Repository + attr_reader :group_path, :project_name, :repo_path + + def initialize(root_path, repo_path) + @root_path = root_path + @repo_path = repo_path + + # Split path into 'all/the/namespaces' and 'project_name' + @group_path, _, @project_name = repo_relative_path.rpartition('/') + end + + def wiki_exists? + File.exist?(wiki_path) + end + + def wiki? + @wiki ||= repo_path.end_with?('.wiki.git') + end + + def wiki_path + @wiki_path ||= repo_path.sub(/\.git$/, '.wiki.git') + end + + def hashed? + @hashed ||= group_path.start_with?('@hashed') + end + + def project_full_path + @project_full_path ||= "#{group_path}/#{project_name}" + end + + private + + def repo_relative_path + # Remove root path and `.git` at the end + repo_path[@root_path.size...-4] + end + end + end +end diff --git a/lib/gitlab/bare_repository_importer.rb b/lib/gitlab/bare_repository_importer.rb deleted file mode 100644 index 1d98d187805..00000000000 --- a/lib/gitlab/bare_repository_importer.rb +++ /dev/null @@ -1,97 +0,0 @@ -module Gitlab - class BareRepositoryImporter - NoAdminError = Class.new(StandardError) - - def self.execute - Gitlab.config.repositories.storages.each do |storage_name, repository_storage| - git_base_path = repository_storage['path'] - repos_to_import = Dir.glob(git_base_path + '/**/*.git') - - repos_to_import.each do |repo_path| - if repo_path.end_with?('.wiki.git') - log " * Skipping wiki repo" - next - end - - log "Processing #{repo_path}".color(:yellow) - - repo_relative_path = repo_path[repository_storage['path'].length..-1] - .sub(/^\//, '') # Remove leading `/` - .sub(/\.git$/, '') # Remove `.git` at the end - new(storage_name, repo_relative_path).create_project_if_needed - end - end - end - - attr_reader :storage_name, :full_path, :group_path, :project_path, :user - delegate :log, to: :class - - def initialize(storage_name, repo_path) - @storage_name = storage_name - @full_path = repo_path - - unless @user = User.admins.order_id_asc.first - raise NoAdminError.new('No admin user found to import repositories') - end - - @group_path, @project_path = File.split(repo_path) - @group_path = nil if @group_path == '.' - end - - def create_project_if_needed - if project = Project.find_by_full_path(full_path) - log " * #{project.name} (#{full_path}) exists" - return project - end - - create_project - end - - private - - def create_project - group = find_or_create_group - - project_params = { - name: project_path, - path: project_path, - repository_storage: storage_name, - namespace_id: group&.id, - skip_disk_validation: true - } - - project = Projects::CreateService.new(user, project_params).execute - - if project.persisted? - log " * Created #{project.name} (#{full_path})".color(:green) - ProjectCacheWorker.perform_async(project.id) - else - log " * Failed trying to create #{project.name} (#{full_path})".color(:red) - log " Errors: #{project.errors.messages}".color(:red) - end - - project - end - - def find_or_create_group - return nil unless group_path - - if namespace = Namespace.find_by_full_path(group_path) - log " * Namespace #{group_path} exists.".color(:green) - return namespace - end - - log " * Creating Group: #{group_path}" - Groups::NestedCreateService.new(user, group_path: group_path).execute - end - - # This is called from within a rake task only used by Admins, so allow writing - # to STDOUT - # - # rubocop:disable Rails/Output - def self.log(message) - puts message - end - # rubocop:enable Rails/Output - end -end diff --git a/lib/gitlab/bitbucket_import/importer.rb b/lib/gitlab/bitbucket_import/importer.rb index 033ecd15749..d48ae17aeaf 100644 --- a/lib/gitlab/bitbucket_import/importer.rb +++ b/lib/gitlab/bitbucket_import/importer.rb @@ -61,9 +61,9 @@ module Gitlab def import_wiki return if project.wiki.repository_exists? - path_with_namespace = "#{project.full_path}.wiki" + disk_path = project.wiki.disk_path import_url = project.import_url.sub(/\.git\z/, ".git/wiki") - gitlab_shell.import_repository(project.repository_storage_path, path_with_namespace, import_url) + gitlab_shell.import_repository(project.repository_storage_path, disk_path, import_url) rescue StandardError => e errors << { type: :wiki, errors: e.message } end diff --git a/lib/gitlab/changes_list.rb b/lib/gitlab/changes_list.rb index 5b32fca00a4..9c9e6668e6f 100644 --- a/lib/gitlab/changes_list.rb +++ b/lib/gitlab/changes_list.rb @@ -16,6 +16,7 @@ module Gitlab @changes ||= begin @raw_changes.map do |change| next if change.blank? + oldrev, newrev, ref = change.strip.split(' ') { oldrev: oldrev, newrev: newrev, ref: ref } end.compact diff --git a/lib/gitlab/checks/change_access.rb b/lib/gitlab/checks/change_access.rb index b6805230348..ef92fc5a0a0 100644 --- a/lib/gitlab/checks/change_access.rb +++ b/lib/gitlab/checks/change_access.rb @@ -12,7 +12,8 @@ module Gitlab change_existing_tags: 'You are not allowed to change existing tags on this project.', update_protected_tag: 'Protected tags cannot be updated.', delete_protected_tag: 'Protected tags cannot be deleted.', - create_protected_tag: 'You are not allowed to create this tag as it is protected.' + create_protected_tag: 'You are not allowed to create this tag as it is protected.', + lfs_objects_missing: 'LFS objects are missing. Ensure LFS is properly set up or try a manual "git lfs push --all".' }.freeze attr_reader :user_access, :project, :skip_authorization, :protocol @@ -36,6 +37,7 @@ module Gitlab push_checks branch_checks tag_checks + lfs_objects_exist_check true end @@ -136,6 +138,14 @@ module Gitlab def matching_merge_request? Checks::MatchingMergeRequest.new(@newrev, @branch_name, @project).match? end + + def lfs_objects_exist_check + lfs_check = Checks::LfsIntegrity.new(project, @newrev) + + if lfs_check.objects_missing? + raise GitAccess::UnauthorizedError, ERROR_MESSAGES[:lfs_objects_missing] + end + end end end end diff --git a/lib/gitlab/checks/lfs_integrity.rb b/lib/gitlab/checks/lfs_integrity.rb new file mode 100644 index 00000000000..f7276a380dc --- /dev/null +++ b/lib/gitlab/checks/lfs_integrity.rb @@ -0,0 +1,27 @@ +module Gitlab + module Checks + class LfsIntegrity + REV_LIST_OBJECT_LIMIT = 2_000 + + def initialize(project, newrev) + @project = project + @newrev = newrev + end + + def objects_missing? + return false unless @newrev && @project.lfs_enabled? + + new_lfs_pointers = Gitlab::Git::LfsChanges.new(@project.repository, @newrev).new_pointers(object_limit: REV_LIST_OBJECT_LIMIT) + + return false unless new_lfs_pointers.present? + + existing_count = @project.lfs_storage_project + .lfs_objects + .where(oid: new_lfs_pointers.map(&:lfs_oid)) + .count + + existing_count != new_lfs_pointers.count + end + end + end +end diff --git a/lib/gitlab/ci/build/artifacts/metadata.rb b/lib/gitlab/ci/build/artifacts/metadata.rb index a788fb3fcbc..0bbd60d8ffe 100644 --- a/lib/gitlab/ci/build/artifacts/metadata.rb +++ b/lib/gitlab/ci/build/artifacts/metadata.rb @@ -98,6 +98,7 @@ module Gitlab def read_string(gz) string_size = read_uint32(gz) return nil unless string_size + gz.read(string_size) end diff --git a/lib/gitlab/ci/build/artifacts/metadata/entry.rb b/lib/gitlab/ci/build/artifacts/metadata/entry.rb index 22941d48edf..5b2f09e03ea 100644 --- a/lib/gitlab/ci/build/artifacts/metadata/entry.rb +++ b/lib/gitlab/ci/build/artifacts/metadata/entry.rb @@ -43,6 +43,7 @@ module Gitlab def parent return nil unless has_parent? + self.class.new(@path.to_s.chomp(basename), @entries) end @@ -64,6 +65,7 @@ module Gitlab def directories(opts = {}) return [] unless directory? + dirs = children.select(&:directory?) return dirs unless has_parent? && opts[:parent] @@ -74,6 +76,7 @@ module Gitlab def files return [] unless directory? + children.select(&:file?) end diff --git a/lib/gitlab/ci/build/image.rb b/lib/gitlab/ci/build/image.rb index b88b2e36d53..c811f88f483 100644 --- a/lib/gitlab/ci/build/image.rb +++ b/lib/gitlab/ci/build/image.rb @@ -8,6 +8,7 @@ module Gitlab def from_image(job) image = Gitlab::Ci::Build::Image.new(job.options[:image]) return unless image.valid? + image end diff --git a/lib/gitlab/ci/config/entry/image.rb b/lib/gitlab/ci/config/entry/image.rb index 6555c589173..2844be80a84 100644 --- a/lib/gitlab/ci/config/entry/image.rb +++ b/lib/gitlab/ci/config/entry/image.rb @@ -37,6 +37,7 @@ module Gitlab def value return { name: @config } if string? return @config if hash? + {} end end diff --git a/lib/gitlab/ci/config/entry/validators.rb b/lib/gitlab/ci/config/entry/validators.rb index 0159179f0a9..eb606b57667 100644 --- a/lib/gitlab/ci/config/entry/validators.rb +++ b/lib/gitlab/ci/config/entry/validators.rb @@ -111,6 +111,7 @@ module Gitlab def validate_string_or_regexp(value) return false unless value.is_a?(String) return validate_regexp(value) if look_like_regexp?(value) + true end end diff --git a/lib/gitlab/ci/status/build/cancelable.rb b/lib/gitlab/ci/status/build/cancelable.rb index 8ad3e57e59d..2d9166d6bdd 100644 --- a/lib/gitlab/ci/status/build/cancelable.rb +++ b/lib/gitlab/ci/status/build/cancelable.rb @@ -8,7 +8,7 @@ module Gitlab end def action_icon - 'icon_action_cancel' + 'cancel' end def action_path diff --git a/lib/gitlab/ci/status/build/failed_allowed.rb b/lib/gitlab/ci/status/build/failed_allowed.rb index e42d3574357..dc90f398c7e 100644 --- a/lib/gitlab/ci/status/build/failed_allowed.rb +++ b/lib/gitlab/ci/status/build/failed_allowed.rb @@ -8,7 +8,7 @@ module Gitlab end def icon - 'icon_status_warning' + 'status_warning' end def group diff --git a/lib/gitlab/ci/status/build/play.rb b/lib/gitlab/ci/status/build/play.rb index c7726543599..b7b45466d3b 100644 --- a/lib/gitlab/ci/status/build/play.rb +++ b/lib/gitlab/ci/status/build/play.rb @@ -12,7 +12,7 @@ module Gitlab end def action_icon - 'icon_action_play' + 'play' end def action_title diff --git a/lib/gitlab/ci/status/build/retryable.rb b/lib/gitlab/ci/status/build/retryable.rb index 8c8fdc56d75..44ffe783e50 100644 --- a/lib/gitlab/ci/status/build/retryable.rb +++ b/lib/gitlab/ci/status/build/retryable.rb @@ -8,7 +8,7 @@ module Gitlab end def action_icon - 'icon_action_retry' + 'retry' end def action_title diff --git a/lib/gitlab/ci/status/build/stop.rb b/lib/gitlab/ci/status/build/stop.rb index d464738deaf..46e730797e4 100644 --- a/lib/gitlab/ci/status/build/stop.rb +++ b/lib/gitlab/ci/status/build/stop.rb @@ -12,7 +12,7 @@ module Gitlab end def action_icon - 'icon_action_stop' + 'stop' end def action_title diff --git a/lib/gitlab/ci/status/canceled.rb b/lib/gitlab/ci/status/canceled.rb index e5fdc1f8136..e6195a60d4f 100644 --- a/lib/gitlab/ci/status/canceled.rb +++ b/lib/gitlab/ci/status/canceled.rb @@ -11,7 +11,7 @@ module Gitlab end def icon - 'icon_status_canceled' + 'status_canceled' end def favicon diff --git a/lib/gitlab/ci/status/created.rb b/lib/gitlab/ci/status/created.rb index d188bd286a6..846f00b83dd 100644 --- a/lib/gitlab/ci/status/created.rb +++ b/lib/gitlab/ci/status/created.rb @@ -11,7 +11,7 @@ module Gitlab end def icon - 'icon_status_created' + 'status_created' end def favicon diff --git a/lib/gitlab/ci/status/failed.rb b/lib/gitlab/ci/status/failed.rb index 38e45714c22..27ce85bd3ed 100644 --- a/lib/gitlab/ci/status/failed.rb +++ b/lib/gitlab/ci/status/failed.rb @@ -11,7 +11,7 @@ module Gitlab end def icon - 'icon_status_failed' + 'status_failed' end def favicon diff --git a/lib/gitlab/ci/status/manual.rb b/lib/gitlab/ci/status/manual.rb index a4a7edadac9..fc387e2fd25 100644 --- a/lib/gitlab/ci/status/manual.rb +++ b/lib/gitlab/ci/status/manual.rb @@ -11,7 +11,7 @@ module Gitlab end def icon - 'icon_status_manual' + 'status_manual' end def favicon diff --git a/lib/gitlab/ci/status/pending.rb b/lib/gitlab/ci/status/pending.rb index 5164260b861..6780780db32 100644 --- a/lib/gitlab/ci/status/pending.rb +++ b/lib/gitlab/ci/status/pending.rb @@ -11,7 +11,7 @@ module Gitlab end def icon - 'icon_status_pending' + 'status_pending' end def favicon diff --git a/lib/gitlab/ci/status/running.rb b/lib/gitlab/ci/status/running.rb index 993937e98ca..ee13905e46d 100644 --- a/lib/gitlab/ci/status/running.rb +++ b/lib/gitlab/ci/status/running.rb @@ -11,7 +11,7 @@ module Gitlab end def icon - 'icon_status_running' + 'status_running' end def favicon diff --git a/lib/gitlab/ci/status/skipped.rb b/lib/gitlab/ci/status/skipped.rb index 0c942920b02..0dbdc4de426 100644 --- a/lib/gitlab/ci/status/skipped.rb +++ b/lib/gitlab/ci/status/skipped.rb @@ -11,7 +11,7 @@ module Gitlab end def icon - 'icon_status_skipped' + 'status_skipped' end def favicon diff --git a/lib/gitlab/ci/status/success.rb b/lib/gitlab/ci/status/success.rb index d7af98857b0..731013ec017 100644 --- a/lib/gitlab/ci/status/success.rb +++ b/lib/gitlab/ci/status/success.rb @@ -11,7 +11,7 @@ module Gitlab end def icon - 'icon_status_success' + 'status_success' end def favicon diff --git a/lib/gitlab/ci/status/success_warning.rb b/lib/gitlab/ci/status/success_warning.rb index 4d7d82e04cf..32b4cf43e48 100644 --- a/lib/gitlab/ci/status/success_warning.rb +++ b/lib/gitlab/ci/status/success_warning.rb @@ -15,7 +15,7 @@ module Gitlab end def icon - 'icon_status_warning' + 'status_warning' end def group diff --git a/lib/gitlab/daemon.rb b/lib/gitlab/daemon.rb index dfd17e35707..633de9f9776 100644 --- a/lib/gitlab/daemon.rb +++ b/lib/gitlab/daemon.rb @@ -2,6 +2,7 @@ module Gitlab class Daemon def self.initialize_instance(*args) raise "#{name} singleton instance already initialized" if @instance + @instance = new(*args) Kernel.at_exit(&@instance.method(:stop)) @instance @@ -43,7 +44,7 @@ module Gitlab if thread thread.wakeup if thread.alive? - thread.join + thread.join unless Thread.current == thread @thread = nil end end diff --git a/lib/gitlab/database.rb b/lib/gitlab/database.rb index 357f16936c6..cd7b4c043da 100644 --- a/lib/gitlab/database.rb +++ b/lib/gitlab/database.rb @@ -4,6 +4,10 @@ module Gitlab # https://www.postgresql.org/docs/9.2/static/datatype-numeric.html # http://dev.mysql.com/doc/refman/5.7/en/integer-types.html MAX_INT_VALUE = 2147483647 + # The max value between MySQL's TIMESTAMP and PostgreSQL's timestampz: + # https://www.postgresql.org/docs/9.1/static/datatype-datetime.html + # https://dev.mysql.com/doc/refman/5.7/en/datetime.html + MAX_TIMESTAMP_VALUE = Time.at((1 << 31) - 1).freeze def self.config ActiveRecord::Base.configurations[Rails.env] @@ -104,20 +108,45 @@ module Gitlab end end - def self.bulk_insert(table, rows) + # Bulk inserts a number of rows into a table, optionally returning their + # IDs. + # + # table - The name of the table to insert the rows into. + # rows - An Array of Hash instances, each mapping the columns to their + # values. + # return_ids - When set to true the return value will be an Array of IDs of + # the inserted rows, this only works on PostgreSQL. + def self.bulk_insert(table, rows, return_ids: false) return if rows.empty? keys = rows.first.keys columns = keys.map { |key| connection.quote_column_name(key) } + return_ids = false if mysql? tuples = rows.map do |row| row.values_at(*keys).map { |value| connection.quote(value) } end - connection.execute <<-EOF + sql = <<-EOF INSERT INTO #{table} (#{columns.join(', ')}) VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')} EOF + + if return_ids + sql << 'RETURNING id' + end + + result = connection.execute(sql) + + if return_ids + result.values.map { |tuple| tuple[0].to_i } + else + [] + end + end + + def self.sanitize_timestamp(timestamp) + MAX_TIMESTAMP_VALUE > timestamp ? timestamp : MAX_TIMESTAMP_VALUE.dup end # pool_size - The size of the DB pool. diff --git a/lib/gitlab/database/grant.rb b/lib/gitlab/database/grant.rb index aee3981e79a..9f76967fc77 100644 --- a/lib/gitlab/database/grant.rb +++ b/lib/gitlab/database/grant.rb @@ -6,28 +6,36 @@ module Gitlab if Database.postgresql? 'information_schema.role_table_grants' else - 'mysql.user' + 'information_schema.schema_privileges' end - def self.scope_to_current_user - if Database.postgresql? - where('grantee = user') - else - where("CONCAT(User, '@', Host) = current_user()") - end - end - # Returns true if the current user can create and execute triggers on the # given table. def self.create_and_execute_trigger?(table) priv = if Database.postgresql? where(privilege_type: 'TRIGGER', table_name: table) + .where('grantee = user') else - where(Trigger_priv: 'Y') + queries = [ + Grant.select(1) + .from('information_schema.user_privileges') + .where("PRIVILEGE_TYPE = 'SUPER'") + .where("GRANTEE = CONCAT('\\'', REPLACE(CURRENT_USER(), '@', '\\'@\\''), '\\'')"), + + Grant.select(1) + .from('information_schema.schema_privileges') + .where("PRIVILEGE_TYPE = 'TRIGGER'") + .where('TABLE_SCHEMA = ?', Gitlab::Database.database_name) + .where("GRANTEE = CONCAT('\\'', REPLACE(CURRENT_USER(), '@', '\\'@\\''), '\\'')") + ] + + union = SQL::Union.new(queries).to_sql + + Grant.from("(#{union}) privs") end - priv.scope_to_current_user.any? + priv.any? end end end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 2c35da8f1aa..c276c3566b4 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -220,6 +220,15 @@ module Gitlab # column - The name of the column to update. # value - The value for the column. # + # The `value` argument is typically a literal. To perform a computed + # update, an Arel literal can be used instead: + # + # update_value = Arel.sql('bar * baz') + # + # update_column_in_batches(:projects, :foo, update_value) do |table, query| + # query.where(table[:some_column].eq('hello')) + # end + # # Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop # determines this method to be too complex while there's no way to make it # less "complex" without introducing extra methods (which actually will diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/migration_classes.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/migration_classes.rb index 5481024db8e..7e492938eac 100644 --- a/lib/gitlab/database/rename_reserved_paths_migration/v1/migration_classes.rb +++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/migration_classes.rb @@ -68,6 +68,11 @@ module Gitlab has_one :route, as: :source self.table_name = 'projects' + HASHED_STORAGE_FEATURES = { + repository: 1, + attachments: 2 + }.freeze + def repository_storage_path Gitlab.config.repositories.storages[repository_storage]['path'] end @@ -76,6 +81,13 @@ module Gitlab def self.name 'Project' end + + def hashed_storage?(feature) + raise ArgumentError, "Invalid feature" unless HASHED_STORAGE_FEATURES.include?(feature) + return false unless respond_to?(:storage_version) + + self.storage_version && self.storage_version >= HASHED_STORAGE_FEATURES[feature] + end end end end diff --git a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb index 75a75f61953..d32616862f0 100644 --- a/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb +++ b/lib/gitlab/database/rename_reserved_paths_migration/v1/rename_projects.rb @@ -22,9 +22,11 @@ module Gitlab end def move_project_folders(project, old_full_path, new_full_path) - move_repository(project, old_full_path, new_full_path) - move_repository(project, "#{old_full_path}.wiki", "#{new_full_path}.wiki") - move_uploads(old_full_path, new_full_path) + unless project.hashed_storage?(:repository) + move_repository(project, old_full_path, new_full_path) + move_repository(project, "#{old_full_path}.wiki", "#{new_full_path}.wiki") + end + move_uploads(old_full_path, new_full_path) unless project.hashed_storage?(:attachments) move_pages(old_full_path, new_full_path) end diff --git a/lib/gitlab/diff/file.rb b/lib/gitlab/diff/file.rb index ea5891a028a..d0cfe2386ca 100644 --- a/lib/gitlab/diff/file.rb +++ b/lib/gitlab/diff/file.rb @@ -25,6 +25,10 @@ module Gitlab @repository = repository @diff_refs = diff_refs @fallback_diff_refs = fallback_diff_refs + + # Ensure items are collected in the the batch + new_blob + old_blob end def position(position_marker, position_type: :text) @@ -95,21 +99,15 @@ module Gitlab end def new_blob - return @new_blob if defined?(@new_blob) - - sha = new_content_sha - return @new_blob = nil unless sha + return unless new_content_sha - @new_blob = repository.blob_at(sha, file_path) + Blob.lazy(repository.project, new_content_sha, file_path) end def old_blob - return @old_blob if defined?(@old_blob) - - sha = old_content_sha - return @old_blob = nil unless sha + return unless old_content_sha - @old_blob = repository.blob_at(sha, old_path) + Blob.lazy(repository.project, old_content_sha, old_path) end def content_sha diff --git a/lib/gitlab/diff/file_collection/base.rb b/lib/gitlab/diff/file_collection/base.rb index 88ae65cb468..a6007ebf531 100644 --- a/lib/gitlab/diff/file_collection/base.rb +++ b/lib/gitlab/diff/file_collection/base.rb @@ -22,10 +22,7 @@ module Gitlab end def diff_files - # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37445 - Gitlab::GitalyClient.allow_n_plus_1_calls do - @diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) } - end + @diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) } end def diff_file_with_old_path(old_path) diff --git a/lib/gitlab/diff/inline_diff.rb b/lib/gitlab/diff/inline_diff.rb index 55708d42161..2d7b57120a6 100644 --- a/lib/gitlab/diff/inline_diff.rb +++ b/lib/gitlab/diff/inline_diff.rb @@ -102,6 +102,7 @@ module Gitlab new_char = b[pos] break if old_char != new_char + length += 1 end diff --git a/lib/gitlab/diff/parser.rb b/lib/gitlab/diff/parser.rb index 7dc9cc7c281..8302f30a0a2 100644 --- a/lib/gitlab/diff/parser.rb +++ b/lib/gitlab/diff/parser.rb @@ -30,6 +30,7 @@ module Gitlab line_new = line.match(/\+[0-9]*/)[0].to_i.abs rescue 0 next if line_old <= 1 && line_new <= 1 # top of file + yielder << Gitlab::Diff::Line.new(full_line, type, line_obj_index, line_old, line_new) line_obj_index += 1 next diff --git a/lib/gitlab/diff/position.rb b/lib/gitlab/diff/position.rb index ccfb908bcca..690b27cde81 100644 --- a/lib/gitlab/diff/position.rb +++ b/lib/gitlab/diff/position.rb @@ -125,6 +125,7 @@ module Gitlab def find_diff_file(repository) return unless diff_refs.complete? return unless comparison = diff_refs.compare_in(repository.project) + comparison.diffs(paths: paths, expanded: true).diff_files.first end diff --git a/lib/gitlab/ee_compat_check.rb b/lib/gitlab/ee_compat_check.rb index c4c60d1dfee..efc2e46d289 100644 --- a/lib/gitlab/ee_compat_check.rb +++ b/lib/gitlab/ee_compat_check.rb @@ -2,8 +2,8 @@ module Gitlab # Checks if a set of migrations requires downtime or not. class EeCompatCheck - DEFAULT_CE_REPO = 'https://gitlab.com/gitlab-org/gitlab-ce.git'.freeze - EE_REPO = 'https://gitlab.com/gitlab-org/gitlab-ee.git'.freeze + DEFAULT_CE_PROJECT_URL = 'https://gitlab.com/gitlab-org/gitlab-ce'.freeze + EE_REPO_URL = 'https://gitlab.com/gitlab-org/gitlab-ee.git'.freeze CHECK_DIR = Rails.root.join('ee_compat_check') IGNORED_FILES_REGEX = /(VERSION|CHANGELOG\.md:\d+)/.freeze PLEASE_READ_THIS_BANNER = %Q{ @@ -17,14 +17,16 @@ module Gitlab ============================================================\n }.freeze - attr_reader :ee_repo_dir, :patches_dir, :ce_repo, :ce_branch, :ee_branch_found - attr_reader :failed_files + attr_reader :ee_repo_dir, :patches_dir, :ce_project_url, :ce_repo_url, :ce_branch, :ee_branch_found + attr_reader :job_id, :failed_files - def initialize(branch:, ce_repo: DEFAULT_CE_REPO) + def initialize(branch:, ce_project_url: DEFAULT_CE_PROJECT_URL, job_id: nil) @ee_repo_dir = CHECK_DIR.join('ee-repo') @patches_dir = CHECK_DIR.join('patches') @ce_branch = branch - @ce_repo = ce_repo + @ce_project_url = ce_project_url + @ce_repo_url = "#{ce_project_url}.git" + @job_id = job_id end def check @@ -59,8 +61,8 @@ module Gitlab step("#{ee_repo_dir} already exists") else step( - "Cloning #{EE_REPO} into #{ee_repo_dir}", - %W[git clone --branch master --single-branch --depth=200 #{EE_REPO} #{ee_repo_dir}] + "Cloning #{EE_REPO_URL} into #{ee_repo_dir}", + %W[git clone --branch master --single-branch --depth=200 #{EE_REPO_URL} #{ee_repo_dir}] ) end end @@ -132,7 +134,7 @@ module Gitlab def check_patch(patch_path) step("Checking out master", %w[git checkout master]) step("Resetting to latest master", %w[git reset --hard origin/master]) - step("Fetching CE/#{ce_branch}", %W[git fetch #{ce_repo} #{ce_branch}]) + step("Fetching CE/#{ce_branch}", %W[git fetch #{ce_repo_url} #{ce_branch}]) step( "Checking if #{patch_path} applies cleanly to EE/master", # Don't use --check here because it can result in a 0-exit status even @@ -191,7 +193,7 @@ module Gitlab # Repository is initially cloned with a depth of 20 so we need to fetch # deeper in the case the branch has more than 20 commits on top of master fetch(branch: branch, depth: depth) - fetch(branch: 'master', depth: depth) + fetch(branch: 'master', depth: depth, remote: DEFAULT_CE_PROJECT_URL) merge_base_found? end @@ -199,10 +201,10 @@ module Gitlab raise "\n#{branch} is too far behind master, please rebase it!\n" unless success end - def fetch(branch:, depth:) + def fetch(branch:, depth:, remote: 'origin') step( "Fetching deeper...", - %W[git fetch --depth=#{depth} --prune origin +refs/heads/#{branch}:refs/remotes/origin/#{branch}] + %W[git fetch --depth=#{depth} --prune #{remote} +refs/heads/#{branch}:refs/remotes/origin/#{branch}] ) do |output, status| raise "Fetch failed: #{output}" unless status.zero? end @@ -237,7 +239,7 @@ module Gitlab end def patch_url - "https://gitlab.com/gitlab-org/gitlab-ce/-/jobs/#{ENV['CI_JOB_ID']}/artifacts/raw/ee_compat_check/patches/#{ce_patch_name}" + "#{ce_project_url}/-/jobs/#{job_id}/artifacts/raw/ee_compat_check/patches/#{ce_patch_name}" end def step(desc, cmd = nil) @@ -304,7 +306,7 @@ module Gitlab # In the EE repo $ git fetch origin $ git checkout -b #{ee_branch_prefix} origin/master - $ git fetch #{ce_repo} #{ce_branch} + $ git fetch #{ce_repo_url} #{ce_branch} $ git cherry-pick SHA # Repeat for all the commits you want to pick You can squash the `#{ce_branch}` commits into a single "Port of #{ce_branch} to EE" commit. diff --git a/lib/gitlab/email/handler/unsubscribe_handler.rb b/lib/gitlab/email/handler/unsubscribe_handler.rb index 5894384da5d..ea80e21532e 100644 --- a/lib/gitlab/email/handler/unsubscribe_handler.rb +++ b/lib/gitlab/email/handler/unsubscribe_handler.rb @@ -16,6 +16,7 @@ module Gitlab noteable = sent_notification.noteable raise NoteableNotFoundError unless noteable + noteable.unsubscribe(sent_notification.recipient) end diff --git a/lib/gitlab/fogbugz_import/client.rb b/lib/gitlab/fogbugz_import/client.rb index 2152182b37f..acb000e3e23 100644 --- a/lib/gitlab/fogbugz_import/client.rb +++ b/lib/gitlab/fogbugz_import/client.rb @@ -45,6 +45,7 @@ module Gitlab project_name = repo(project_id).name res = @api.command(:search, q: "project:'#{project_name}'", cols: 'ixPersonAssignedTo,ixPersonOpenedBy,ixPersonClosedBy,sStatus,sPriority,sCategory,fOpen,sTitle,sLatestTextSummary,dtOpened,dtClosed,dtResolved,dtLastUpdated,events') return [] unless res['cases']['count'].to_i > 0 + res['cases']['case'] end diff --git a/lib/gitlab/fogbugz_import/importer.rb b/lib/gitlab/fogbugz_import/importer.rb index 3dcee681c72..5e426b13ade 100644 --- a/lib/gitlab/fogbugz_import/importer.rb +++ b/lib/gitlab/fogbugz_import/importer.rb @@ -18,6 +18,7 @@ module Gitlab def execute return true unless repo.valid? + client = Gitlab::FogbugzImport::Client.new(token: fb_session[:token], uri: fb_session[:uri]) @cases = client.cases(@repo.id.to_i) @@ -206,6 +207,7 @@ module Gitlab def format_content(raw_content) return raw_content if raw_content.nil? + linkify_issues(escape_for_markdown(raw_content)) end diff --git a/lib/gitlab/gcp/model.rb b/lib/gitlab/gcp/model.rb deleted file mode 100644 index 195391f0e3c..00000000000 --- a/lib/gitlab/gcp/model.rb +++ /dev/null @@ -1,13 +0,0 @@ -module Gitlab - module Gcp - module Model - def table_name_prefix - "gcp_" - end - - def model_name - @model_name ||= ActiveModel::Name.new(self, nil, self.name.split("::").last) - end - end - end -end diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index a4336facee5..ddd52136bc4 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -12,6 +12,12 @@ module Gitlab # blob data should use load_all_data!. MAX_DATA_DISPLAY_SIZE = 10.megabytes + # These limits are used as a heuristic to ignore files which can't be LFS + # pointers. The format of these is described in + # https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer + LFS_POINTER_MIN_SIZE = 120.bytes + LFS_POINTER_MAX_SIZE = 200.bytes + attr_accessor :name, :path, :size, :data, :mode, :id, :commit_id, :loaded_size, :binary class << self @@ -30,16 +36,7 @@ module Gitlab if is_enabled Gitlab::GitalyClient::BlobService.new(repository).get_blob(oid: sha, limit: MAX_DATA_DISPLAY_SIZE) else - blob = repository.lookup(sha) - - next unless blob.is_a?(Rugged::Blob) - - new( - id: blob.oid, - size: blob.size, - data: blob.content(MAX_DATA_DISPLAY_SIZE), - binary: blob.binary? - ) + rugged_raw(repository, sha, limit: MAX_DATA_DISPLAY_SIZE) end end end @@ -59,10 +56,25 @@ module Gitlab end end + # Find LFS blobs given an array of sha ids + # Returns array of Gitlab::Git::Blob + # Does not guarantee blob data will be set + def batch_lfs_pointers(repository, blob_ids) + blob_ids.lazy + .select { |sha| possible_lfs_blob?(repository, sha) } + .map { |sha| rugged_raw(repository, sha, limit: LFS_POINTER_MAX_SIZE) } + .select(&:lfs_pointer?) + .force + end + def binary?(data) EncodingHelper.detect_libgit2_binary?(data) end + def size_could_be_lfs?(size) + size.between?(LFS_POINTER_MIN_SIZE, LFS_POINTER_MAX_SIZE) + end + private # Recursive search of blob id by path @@ -90,6 +102,7 @@ module Gitlab if path_arr.size > 1 return nil unless entry[:type] == :tree + path_arr.shift find_entry_by_path(repository, entry[:oid], path_arr.join('/')) else @@ -166,6 +179,31 @@ module Gitlab ) end end + rescue Rugged::ReferenceError + nil + end + + def rugged_raw(repository, sha, limit:) + blob = repository.lookup(sha) + + return unless blob.is_a?(Rugged::Blob) + + new( + id: blob.oid, + size: blob.size, + data: blob.content(limit), + binary: blob.binary? + ) + end + + # Efficient lookup to determine if object size + # and type make it a possible LFS blob without loading + # blob content into memory with repository.lookup(sha) + def possible_lfs_blob?(repository, sha) + object_header = repository.rugged.read_header(sha) + + object_header[:type] == :blob && + size_could_be_lfs?(object_header[:len]) end end @@ -226,7 +264,7 @@ module Gitlab # size # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer def lfs_pointer? - has_lfs_version_key? && lfs_oid.present? && lfs_size.present? + self.class.size_could_be_lfs?(size) && has_lfs_version_key? && lfs_oid.present? && lfs_size.present? end def lfs_oid diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb index 23ae37ff71e..d5518814483 100644 --- a/lib/gitlab/git/commit.rb +++ b/lib/gitlab/git/commit.rb @@ -73,7 +73,7 @@ module Gitlab decorate(repo, commit) if commit rescue Rugged::ReferenceError, Rugged::InvalidError, Rugged::ObjectError, Gitlab::Git::CommandError, Gitlab::Git::Repository::NoRepository, - Rugged::OdbError, Rugged::TreeError + Rugged::OdbError, Rugged::TreeError, ArgumentError nil end diff --git a/lib/gitlab/git/lfs_changes.rb b/lib/gitlab/git/lfs_changes.rb new file mode 100644 index 00000000000..732dd5d998a --- /dev/null +++ b/lib/gitlab/git/lfs_changes.rb @@ -0,0 +1,33 @@ +module Gitlab + module Git + class LfsChanges + def initialize(repository, newrev) + @repository = repository + @newrev = newrev + end + + def new_pointers(object_limit: nil, not_in: nil) + @new_pointers ||= begin + rev_list.new_objects(not_in: not_in, require_path: true) do |object_ids| + object_ids = object_ids.take(object_limit) if object_limit + + Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids) + end + end + end + + def all_pointers + rev_list.all_objects(require_path: true) do |object_ids| + Gitlab::Git::Blob.batch_lfs_pointers(@repository, object_ids) + end + end + + private + + def rev_list + ::Gitlab::Git::RevList.new(path_to_repo: @repository.path_to_repo, + newrev: @newrev) + end + end + end +end diff --git a/lib/gitlab/git/operation_service.rb b/lib/gitlab/git/operation_service.rb index ab94ba8a73a..e36d5410431 100644 --- a/lib/gitlab/git/operation_service.rb +++ b/lib/gitlab/git/operation_service.rb @@ -72,7 +72,7 @@ module Gitlab # Whenever `start_branch_name` is passed, if `branch_name` doesn't exist, # it would be created from `start_branch_name`. - # If `start_project` is passed, and the branch doesn't exist, + # If `start_repository` is passed, and the branch doesn't exist, # it would try to find the commits from it instead of current repository. def with_branch( branch_name, @@ -80,15 +80,13 @@ module Gitlab start_repository: repository, &block) - # Refactoring aid - unless start_repository.is_a?(Gitlab::Git::Repository) - raise "expected a Gitlab::Git::Repository, got #{start_repository}" - end + Gitlab::Git.check_namespace!(start_repository) + start_repository = RemoteRepository.new(start_repository) unless start_repository.is_a?(RemoteRepository) start_branch_name = nil if start_repository.empty_repo? if start_branch_name && !start_repository.branch_exists?(start_branch_name) - raise ArgumentError, "Cannot find branch #{start_branch_name} in #{start_repository.full_path}" + raise ArgumentError, "Cannot find branch #{start_branch_name} in #{start_repository.relative_path}" end update_branch_with_hooks(branch_name) do diff --git a/lib/gitlab/git/popen.rb b/lib/gitlab/git/popen.rb index b45da6020ee..d41fe78daa1 100644 --- a/lib/gitlab/git/popen.rb +++ b/lib/gitlab/git/popen.rb @@ -7,7 +7,7 @@ module Gitlab module Popen FAST_GIT_PROCESS_TIMEOUT = 15.seconds - def popen(cmd, path, vars = {}) + def popen(cmd, path, vars = {}, lazy_block: nil) unless cmd.is_a?(Array) raise "System commands must be given as an array of strings" end @@ -22,7 +22,12 @@ module Gitlab yield(stdin) if block_given? stdin.close - @cmd_output << stdout.read + if lazy_block + return lazy_block.call(stdout.lazy) + else + @cmd_output << stdout.read + end + @cmd_output << stderr.read @cmd_status = wait_thr.value.exitstatus end diff --git a/lib/gitlab/git/remote_repository.rb b/lib/gitlab/git/remote_repository.rb new file mode 100644 index 00000000000..3685aa20669 --- /dev/null +++ b/lib/gitlab/git/remote_repository.rb @@ -0,0 +1,82 @@ +module Gitlab + module Git + # + # When a Gitaly call involves two repositories instead of one we cannot + # assume that both repositories are on the same Gitaly server. In this + # case we need to make a distinction between the repository that the + # call is being made on (a Repository instance), and the "other" + # repository (a RemoteRepository instance). This is the reason why we + # have the RemoteRepository class in Gitlab::Git. + # + # When you make changes, be aware that gitaly-ruby sub-classes this + # class. + # + class RemoteRepository + attr_reader :path, :relative_path, :gitaly_repository + + def initialize(repository) + @relative_path = repository.relative_path + @gitaly_repository = repository.gitaly_repository + + # These instance variables will not be available in gitaly-ruby, where + # we have no disk access to this repository. + @repository = repository + @path = repository.path + end + + def empty_repo? + # We will override this implementation in gitaly-ruby because we cannot + # use '@repository' there. + @repository.empty_repo? + end + + def commit_id(revision) + # We will override this implementation in gitaly-ruby because we cannot + # use '@repository' there. + @repository.commit(revision)&.sha + end + + def branch_exists?(name) + # We will override this implementation in gitaly-ruby because we cannot + # use '@repository' there. + @repository.branch_exists?(name) + end + + # Compares self to a Gitlab::Git::Repository. This implementation uses + # 'self.gitaly_repository' so that it will also work in the + # GitalyRemoteRepository subclass defined in gitaly-ruby. + def same_repository?(other_repository) + gitaly_repository.storage_name == other_repository.storage && + gitaly_repository.relative_path == other_repository.relative_path + end + + def fetch_env + gitaly_ssh = File.absolute_path(File.join(Gitlab.config.gitaly.client_path, 'gitaly-ssh')) + gitaly_address = gitaly_client.address(storage) + gitaly_token = gitaly_client.token(storage) + + request = Gitaly::SSHUploadPackRequest.new(repository: gitaly_repository) + env = { + 'GITALY_ADDRESS' => gitaly_address, + 'GITALY_PAYLOAD' => request.to_json, + 'GITALY_WD' => Dir.pwd, + 'GIT_SSH_COMMAND' => "#{gitaly_ssh} upload-pack" + } + env['GITALY_TOKEN'] = gitaly_token if gitaly_token.present? + + env + end + + private + + # Must return an object that responds to 'address' and 'storage'. + def gitaly_client + Gitlab::GitalyClient + end + + def storage + gitaly_repository.storage_name + end + end + end +end diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index fc8af38d4d9..dcca20c75ef 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -6,6 +6,7 @@ require "rubygems/package" module Gitlab module Git class Repository + include Gitlab::Git::RepositoryMirroring include Gitlab::Git::Popen ALLOWED_OBJECT_DIRECTORIES_VARIABLES = %w[ @@ -57,7 +58,7 @@ module Gitlab # Rugged repo object attr_reader :rugged - attr_reader :storage, :gl_repository, :relative_path, :gitaly_resolver + attr_reader :storage, :gl_repository, :relative_path # This initializer method is only used on the client side (gitlab-ce). # Gitaly-ruby uses a different initializer. @@ -65,7 +66,6 @@ module Gitlab @storage = storage @relative_path = relative_path @gl_repository = gl_repository - @gitaly_resolver = Gitlab::GitalyClient storage_path = Gitlab.config.repositories.storages[@storage]['path'] @path = File.join(storage_path, @relative_path) @@ -104,7 +104,7 @@ module Gitlab end def exists? - Gitlab::GitalyClient.migrate(:repository_exists) do |enabled| + Gitlab::GitalyClient.migrate(:repository_exists, status: Gitlab::GitalyClient::MigrationStatus::OPT_OUT) do |enabled| if enabled gitaly_repository_client.exists? else @@ -290,13 +290,27 @@ module Gitlab end end + def batch_existence(object_ids, existing: true) + filter_method = existing ? :select : :reject + + object_ids.public_send(filter_method) do |oid| # rubocop:disable GitlabSecurity/PublicSend + rugged.exists?(oid) + end + end + # Returns an Array of branch and tag names def ref_names branch_names + tag_names end def delete_all_refs_except(prefixes) - delete_refs(*all_ref_names_except(prefixes)) + gitaly_migrate(:ref_delete_refs) do |is_enabled| + if is_enabled + gitaly_ref_client.delete_refs(except_with_prefixes: prefixes) + else + delete_refs(*all_ref_names_except(prefixes)) + end + end end # Returns an Array of all ref names, except when it's matching pattern @@ -750,13 +764,13 @@ module Gitlab end def ff_merge(user, source_sha, target_branch) - OperationService.new(user, self).with_branch(target_branch) do |our_commit| - raise ArgumentError, 'Invalid merge target' unless our_commit - - source_sha + gitaly_migrate(:operation_user_ff_branch) do |is_enabled| + if is_enabled + gitaly_ff_merge(user, source_sha, target_branch) + else + rugged_ff_merge(user, source_sha, target_branch) + end end - rescue Rugged::ReferenceError - raise ArgumentError, 'Invalid merge source' end def revert(user:, commit:, branch_name:, message:, start_branch_name:, start_repository:) @@ -890,16 +904,30 @@ module Gitlab end end - # Delete the specified remote from this repository. - def remote_delete(remote_name) + def add_remote(remote_name, url) + rugged.remotes.create(remote_name, url) + rescue Rugged::ConfigError + remote_update(remote_name, url: url) + end + + def remove_remote(remote_name) + # When a remote is deleted all its remote refs are deleted too, but in + # the case of mirrors we map its refs (that would usualy go under + # [remote_name]/) to the top level namespace. We clean the mapping so + # those don't get deleted. + if rugged.config["remote.#{remote_name}.mirror"] + rugged.config.delete("remote.#{remote_name}.fetch") + end + rugged.remotes.delete(remote_name) - nil + true + rescue Rugged::ConfigError + false end - # Add a new remote to this repository. - def remote_add(remote_name, url) - rugged.remotes.create(remote_name, url) - nil + # Returns true if a remote exists. + def remote_exists?(name) + rugged.remotes[name].present? end # Update the specified remote using the values in the +options+ hash @@ -962,6 +990,10 @@ module Gitlab @attributes.attributes(path) end + def gitattribute(path, name) + attributes(path)[name] + end + def languages(ref = nil) Gitlab::GitalyClient.migrate(:commit_languages) do |is_enabled| if is_enabled @@ -991,23 +1023,22 @@ module Gitlab def with_repo_branch_commit(start_repository, start_branch_name) Gitlab::Git.check_namespace!(start_repository) + start_repository = RemoteRepository.new(start_repository) unless start_repository.is_a?(RemoteRepository) return yield nil if start_repository.empty_repo? - if start_repository == self + if start_repository.same_repository?(self) yield commit(start_branch_name) else - start_commit = start_repository.commit(start_branch_name) + start_commit_id = start_repository.commit_id(start_branch_name) - return yield nil unless start_commit + return yield nil unless start_commit_id - sha = start_commit.sha - - if branch_commit = commit(sha) + if branch_commit = commit(start_commit_id) yield branch_commit else with_repo_tmp_commit( - start_repository, start_branch_name, sha) do |tmp_commit| + start_repository, start_branch_name, start_commit_id) do |tmp_commit| yield tmp_commit end end @@ -1026,13 +1057,12 @@ module Gitlab delete_refs(tmp_ref) if tmp_ref end - def fetch_source_branch(source_repository, source_branch, local_ref) - with_repo_branch_commit(source_repository, source_branch) do |commit| - if commit - write_ref(local_ref, commit.sha) - true + def fetch_source_branch!(source_repository, source_branch, local_ref) + Gitlab::GitalyClient.migrate(:fetch_source_branch) do |is_enabled| + if is_enabled + gitaly_repository_client.fetch_source_branch(source_repository, source_branch, local_ref) else - false + rugged_fetch_source_branch(source_repository, source_branch, local_ref) end end end @@ -1064,6 +1094,9 @@ module Gitlab end def fetch_ref(source_repository, source_ref:, target_ref:) + Gitlab::Git.check_namespace!(source_repository) + source_repository = RemoteRepository.new(source_repository) unless source_repository.is_a?(RemoteRepository) + message, status = GitalyClient.migrate(:fetch_ref) do |is_enabled| if is_enabled gitaly_fetch_ref(source_repository, source_ref: source_ref, target_ref: target_ref) @@ -1127,6 +1160,11 @@ module Gitlab Gitlab::Git::Blob.find(self, sha, path) unless Gitlab::Git.blank_ref?(sha) end + # Items should be of format [[commit_id, path], [commit_id1, path1]] + def batch_blobs(items, blob_size_limit: nil) + Gitlab::Git::Blob.batch(self, items, blob_size_limit: blob_size_limit) + end + def commit_index(user, branch_name, index, options) committer = user_to_committer(user) @@ -1169,14 +1207,25 @@ module Gitlab Gitlab::GitalyClient.migrate(method, status: status, &block) rescue GRPC::NotFound => e raise NoRepository.new(e) - rescue GRPC::BadStatus => e - raise CommandError.new(e) rescue GRPC::InvalidArgument => e raise ArgumentError.new(e) + rescue GRPC::BadStatus => e + raise CommandError.new(e) end private + def rugged_fetch_source_branch(source_repository, source_branch, local_ref) + with_repo_branch_commit(source_repository, source_branch) do |commit| + if commit + write_ref(local_ref, commit.sha) + true + else + false + end + end + end + # Gitaly note: JV: Trying to get rid of the 'filter' option so we can implement this with 'git'. def branches_filter(filter: nil, sort_by: nil) # n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37464 @@ -1352,6 +1401,7 @@ module Gitlab end return nil unless tmp_entry.type == :tree + tmp_entry = tmp_entry[dir] end end @@ -1472,6 +1522,7 @@ module Gitlab # Ref names must start with `refs/`. def rugged_ref_exists?(ref_name) raise ArgumentError, 'invalid refname' unless ref_name.start_with?('refs/') + rugged.references.exist?(ref_name) rescue Rugged::ReferenceError false @@ -1538,6 +1589,7 @@ module Gitlab Gitlab::Git::Branch.new(self, rugged_ref.name, rugged_ref.target, target_commit) rescue Rugged::ReferenceError => e raise InvalidRef.new("Branch #{ref} already exists") if e.to_s =~ /'refs\/heads\/#{ref}'/ + raise InvalidRef.new("Invalid reference #{start_point}") end @@ -1597,22 +1649,25 @@ module Gitlab end def gitaly_fetch_ref(source_repository, source_ref:, target_ref:) - gitaly_ssh = File.absolute_path(File.join(Gitlab.config.gitaly.client_path, 'gitaly-ssh')) - gitaly_address = gitaly_resolver.address(source_repository.storage) - gitaly_token = gitaly_resolver.token(source_repository.storage) - - request = Gitaly::SSHUploadPackRequest.new(repository: source_repository.gitaly_repository) - env = { - 'GITALY_ADDRESS' => gitaly_address, - 'GITALY_PAYLOAD' => request.to_json, - 'GITALY_WD' => Dir.pwd, - 'GIT_SSH_COMMAND' => "#{gitaly_ssh} upload-pack" - } - env['GITALY_TOKEN'] = gitaly_token if gitaly_token.present? - args = %W(fetch --no-tags -f ssh://gitaly/internal.git #{source_ref}:#{target_ref}) - run_git(args, env: env) + run_git(args, env: source_repository.fetch_env) + end + + def gitaly_ff_merge(user, source_sha, target_branch) + gitaly_operations_client.user_ff_branch(user, source_sha, target_branch) + rescue GRPC::FailedPrecondition => e + raise CommitError, e + end + + def rugged_ff_merge(user, source_sha, target_branch) + OperationService.new(user, self).with_branch(target_branch) do |our_commit| + raise ArgumentError, 'Invalid merge target' unless our_commit + + source_sha + end + rescue Rugged::ReferenceError + raise ArgumentError, 'Invalid merge source' end end end diff --git a/lib/gitlab/git/repository_mirroring.rb b/lib/gitlab/git/repository_mirroring.rb new file mode 100644 index 00000000000..4500482d68f --- /dev/null +++ b/lib/gitlab/git/repository_mirroring.rb @@ -0,0 +1,95 @@ +module Gitlab + module Git + module RepositoryMirroring + IMPORT_HEAD_REFS = '+refs/heads/*:refs/heads/*'.freeze + IMPORT_TAG_REFS = '+refs/tags/*:refs/tags/*'.freeze + MIRROR_REMOTE = 'mirror'.freeze + + RemoteError = Class.new(StandardError) + + def set_remote_as_mirror(remote_name) + # This is used to define repository as equivalent as "git clone --mirror" + rugged.config["remote.#{remote_name}.fetch"] = 'refs/*:refs/*' + rugged.config["remote.#{remote_name}.mirror"] = true + rugged.config["remote.#{remote_name}.prune"] = true + end + + def set_import_remote_as_mirror(remote_name) + # Add first fetch with Rugged so it does not create its own. + rugged.config["remote.#{remote_name}.fetch"] = IMPORT_HEAD_REFS + + add_remote_fetch_config(remote_name, IMPORT_TAG_REFS) + + rugged.config["remote.#{remote_name}.mirror"] = true + rugged.config["remote.#{remote_name}.prune"] = true + end + + def add_remote_fetch_config(remote_name, refspec) + run_git(%W[config --add remote.#{remote_name}.fetch #{refspec}]) + end + + def fetch_mirror(url) + add_remote(MIRROR_REMOTE, url) + set_remote_as_mirror(MIRROR_REMOTE) + fetch(MIRROR_REMOTE) + remove_remote(MIRROR_REMOTE) + end + + def remote_tags(remote) + # Each line has this format: "dc872e9fa6963f8f03da6c8f6f264d0845d6b092\trefs/tags/v1.10.0\n" + # We want to convert it to: [{ 'v1.10.0' => 'dc872e9fa6963f8f03da6c8f6f264d0845d6b092' }, ...] + list_remote_tags(remote).map do |line| + target, path = line.strip.split("\t") + + # When the remote repo does not have tags. + if target.nil? || path.nil? + Rails.logger.info "Empty or invalid list of tags for remote: #{remote}. Output: #{output}" + return [] + end + + name = path.split('/', 3).last + # We're only interested in tag references + # See: http://stackoverflow.com/questions/15472107/when-listing-git-ls-remote-why-theres-after-the-tag-name + next if name =~ /\^\{\}\Z/ + + target_commit = Gitlab::Git::Commit.find(self, target) + Gitlab::Git::Tag.new(self, name, target, target_commit) + end.compact + end + + def remote_branches(remote_name) + branches = [] + + rugged.references.each("refs/remotes/#{remote_name}/*").map do |ref| + name = ref.name.sub(/\Arefs\/remotes\/#{remote_name}\//, '') + + begin + target_commit = Gitlab::Git::Commit.find(self, ref.target) + branches << Gitlab::Git::Branch.new(self, name, ref.target, target_commit) + rescue Rugged::ReferenceError + # Omit invalid branch + end + end + + branches + end + + private + + def list_remote_tags(remote) + tag_list, exit_code, error = nil + cmd = %W(#{Gitlab.config.git.bin_path} --git-dir=#{path} ls-remote --tags #{remote}) + + Open3.popen3(*cmd) do |stdin, stdout, stderr, wait_thr| + tag_list = stdout.read + error = stderr.read + exit_code = wait_thr.value.exitstatus + end + + raise RemoteError, error unless exit_code.zero? + + tag_list.split("\n") + end + end + end +end diff --git a/lib/gitlab/git/rev_list.rb b/lib/gitlab/git/rev_list.rb index 60b2a4ec411..4974205b8fd 100644 --- a/lib/gitlab/git/rev_list.rb +++ b/lib/gitlab/git/rev_list.rb @@ -13,11 +13,32 @@ module Gitlab @path_to_repo = path_to_repo end - # This method returns an array of new references + # This method returns an array of new commit references def new_refs execute([*base_args, newrev, '--not', '--all']) end + # Finds newly added objects + # Returns an array of shas + # + # Can skip objects which do not have a path using required_path: true + # This skips commit objects and root trees, which might not be needed when + # looking for blobs + # + # When given a block it will yield objects as a lazy enumerator so + # the caller can limit work done instead of processing megabytes of data + def new_objects(require_path: nil, not_in: nil, &lazy_block) + args = [*base_args, newrev, *not_in_refs(not_in), '--objects'] + + get_objects(args, require_path: require_path, &lazy_block) + end + + def all_objects(require_path: nil, &lazy_block) + args = [*base_args, '--all', '--objects'] + + get_objects(args, require_path: require_path, &lazy_block) + end + # This methods returns an array of missed references # # Should become obsolete after https://gitlab.com/gitlab-org/gitaly/issues/348. @@ -27,6 +48,13 @@ module Gitlab private + def not_in_refs(references) + return ['--not', '--all'] unless references + return [] if references.empty? + + references.prepend('--not') + end + def execute(args) output, status = popen(args, nil, Gitlab::Git::Env.to_env_hash) @@ -37,6 +65,10 @@ module Gitlab output.split("\n") end + def lazy_execute(args, &lazy_block) + popen(args, nil, Gitlab::Git::Env.to_env_hash, lazy_block: lazy_block) + end + def base_args [ Gitlab.config.git.bin_path, @@ -44,6 +76,30 @@ module Gitlab 'rev-list' ] end + + def get_objects(args, require_path: nil) + if block_given? + lazy_execute(args) do |lazy_output| + objects = objects_from_output(lazy_output, require_path: require_path) + + yield(objects) + end + else + object_output = execute(args) + + objects_from_output(object_output, require_path: require_path) + end + end + + def objects_from_output(object_output, require_path: nil) + object_output.map do |output_line| + sha, path = output_line.split(' ', 2) + + next if require_path && path.blank? + + sha + end.reject(&:nil?) + end end end end diff --git a/lib/gitlab/git/wiki.rb b/lib/gitlab/git/wiki.rb index f01d5c96fc8..d4a53d32c28 100644 --- a/lib/gitlab/git/wiki.rb +++ b/lib/gitlab/git/wiki.rb @@ -10,6 +10,8 @@ module Gitlab end PageBlob = Struct.new(:name) + attr_reader :repository + def self.default_ref 'master' end @@ -35,51 +37,74 @@ module Gitlab end def delete_page(page_path, commit_details) - assert_type!(commit_details, CommitDetails) - - gollum_wiki.delete_page(gollum_page_by_path(page_path), commit_details.to_h) - nil + @repository.gitaly_migrate(:wiki_delete_page) do |is_enabled| + if is_enabled + gitaly_delete_page(page_path, commit_details) + gollum_wiki.clear_cache + else + gollum_delete_page(page_path, commit_details) + end + end end def update_page(page_path, title, format, content, commit_details) - assert_type!(format, Symbol) - assert_type!(commit_details, CommitDetails) - - gollum_wiki.update_page(gollum_page_by_path(page_path), title, format, content, commit_details.to_h) - nil + @repository.gitaly_migrate(:wiki_update_page) do |is_enabled| + if is_enabled + gitaly_update_page(page_path, title, format, content, commit_details) + gollum_wiki.clear_cache + else + gollum_update_page(page_path, title, format, content, commit_details) + end + end end - def pages - gollum_wiki.pages.map { |gollum_page| new_page(gollum_page) } + def pages(limit: nil) + @repository.gitaly_migrate(:wiki_get_all_pages, status: Gitlab::GitalyClient::MigrationStatus::DISABLED) do |is_enabled| + if is_enabled + gitaly_get_all_pages + else + gollum_get_all_pages(limit: limit) + end + end end def page(title:, version: nil, dir: nil) - if version - version = Gitlab::Git::Commit.find(@repository, version).id + @repository.gitaly_migrate(:wiki_find_page) do |is_enabled| + if is_enabled + gitaly_find_page(title: title, version: version, dir: dir) + else + gollum_find_page(title: title, version: version, dir: dir) + end end - - gollum_page = gollum_wiki.page(title, version, dir) - return unless gollum_page - - new_page(gollum_page) end def file(name, version) - version ||= self.class.default_ref - gollum_file = gollum_wiki.file(name, version) - return unless gollum_file - - Gitlab::Git::WikiFile.new(gollum_file) + @repository.gitaly_migrate(:wiki_find_file) do |is_enabled| + if is_enabled + gitaly_find_file(name, version) + else + gollum_find_file(name, version) + end + end end - def page_versions(page_path) + # options: + # :page - The Integer page number. + # :per_page - The number of items per page. + # :limit - Total number of items to return. + def page_versions(page_path, options = {}) current_page = gollum_page_by_path(page_path) - current_page.versions.map do |gollum_git_commit| - gollum_page = gollum_wiki.page(current_page.title, gollum_git_commit.id) - new_version(gollum_page, gollum_git_commit.id) + + commits_from_page(current_page, options).map do |gitlab_git_commit| + gollum_page = gollum_wiki.page(current_page.title, gitlab_git_commit.id) + Gitlab::Git::WikiPageVersion.new(gitlab_git_commit, gollum_page&.format) end end + def count_page_versions(page_path) + @repository.count_commits(ref: 'HEAD', path: page_path) + end + def preview_slug(title, format) # Adapted from gollum gem (Gollum::Wiki#preview_page) to avoid # using Rugged through a Gollum::Wiki instance @@ -94,6 +119,22 @@ module Gitlab private + # options: + # :page - The Integer page number. + # :per_page - The number of items per page. + # :limit - Total number of items to return. + def commits_from_page(gollum_page, options = {}) + unless options[:limit] + options[:offset] = ([1, options.delete(:page).to_i].max - 1) * Gollum::Page.per_page + options[:limit] = (options.delete(:per_page) || Gollum::Page.per_page).to_i + end + + @repository.log(ref: gollum_page.last_version.id, + path: gollum_page.path, + limit: options[:limit], + offset: options[:offset]) + end + def gollum_wiki @gollum_wiki ||= Gollum::Wiki.new(@repository.path) end @@ -110,8 +151,17 @@ module Gitlab end def new_version(gollum_page, commit_id) - commit = Gitlab::Git::Commit.find(@repository, commit_id) - Gitlab::Git::WikiPageVersion.new(commit, gollum_page&.format) + Gitlab::Git::WikiPageVersion.new(version(commit_id), gollum_page&.format) + end + + def version(commit_id) + commit_find_proc = -> { Gitlab::Git::Commit.find(@repository, commit_id) } + + if RequestStore.active? + RequestStore.fetch([:wiki_version_commit, commit_id]) { commit_find_proc.call } + else + commit_find_proc.call + end end def assert_type!(object, klass) @@ -135,9 +185,75 @@ module Gitlab raise Gitlab::Git::Wiki::DuplicatePageError, e.message end + def gollum_delete_page(page_path, commit_details) + assert_type!(commit_details, CommitDetails) + + gollum_wiki.delete_page(gollum_page_by_path(page_path), commit_details.to_h) + nil + end + + def gollum_update_page(page_path, title, format, content, commit_details) + assert_type!(format, Symbol) + assert_type!(commit_details, CommitDetails) + + gollum_wiki.update_page(gollum_page_by_path(page_path), title, format, content, commit_details.to_h) + nil + end + + def gollum_find_page(title:, version: nil, dir: nil) + if version + version = Gitlab::Git::Commit.find(@repository, version).id + end + + gollum_page = gollum_wiki.page(title, version, dir) + return unless gollum_page + + new_page(gollum_page) + end + + def gollum_find_file(name, version) + version ||= self.class.default_ref + gollum_file = gollum_wiki.file(name, version) + return unless gollum_file + + Gitlab::Git::WikiFile.new(gollum_file) + end + + def gollum_get_all_pages(limit: nil) + gollum_wiki.pages(limit: limit).map { |gollum_page| new_page(gollum_page) } + end + def gitaly_write_page(name, format, content, commit_details) gitaly_wiki_client.write_page(name, format, content, commit_details) end + + def gitaly_update_page(page_path, title, format, content, commit_details) + gitaly_wiki_client.update_page(page_path, title, format, content, commit_details) + end + + def gitaly_delete_page(page_path, commit_details) + gitaly_wiki_client.delete_page(page_path, commit_details) + end + + def gitaly_find_page(title:, version: nil, dir: nil) + wiki_page, version = gitaly_wiki_client.find_page(title: title, version: version, dir: dir) + return unless wiki_page + + Gitlab::Git::WikiPage.new(wiki_page, version) + end + + def gitaly_find_file(name, version) + wiki_file = gitaly_wiki_client.find_file(name, version) + return unless wiki_file + + Gitlab::Git::WikiFile.new(wiki_file) + end + + def gitaly_get_all_pages + gitaly_wiki_client.get_all_pages.map do |wiki_page, version| + Gitlab::Git::WikiPage.new(wiki_page, version) + end + end end end end diff --git a/lib/gitlab/gitaly_client.rb b/lib/gitlab/gitaly_client.rb index 6868be26758..572f4c892f6 100644 --- a/lib/gitlab/gitaly_client.rb +++ b/lib/gitlab/gitaly_client.rb @@ -34,10 +34,11 @@ module Gitlab private_constant :MUTEX class << self - attr_accessor :query_time + attr_accessor :query_time, :migrate_histogram end self.query_time = 0 + self.migrate_histogram = Gitlab::Metrics.histogram(:gitaly_migrate_call_duration, "Gitaly migration call execution timings") def self.stub(name, storage) MUTEX.synchronize do @@ -74,6 +75,10 @@ module Gitlab address end + def self.address_metadata(storage) + Base64.strict_encode64(JSON.dump({ storage => { 'address' => address(storage), 'token' => token(storage) } })) + end + # All Gitaly RPC call sites should use GitalyClient.call. This method # makes sure that per-request authentication headers are set. # @@ -88,18 +93,19 @@ module Gitlab # kwargs.merge(deadline: Time.now + 10) # end # - def self.call(storage, service, rpc, request) + def self.call(storage, service, rpc, request, remote_storage: nil) start = Process.clock_gettime(Process::CLOCK_MONOTONIC) enforce_gitaly_request_limits(:call) - kwargs = request_kwargs(storage) + kwargs = request_kwargs(storage, remote_storage: remote_storage) kwargs = yield(kwargs) if block_given? + stub(service, storage).__send__(rpc, request, kwargs) # rubocop:disable GitlabSecurity/PublicSend ensure self.query_time += Process.clock_gettime(Process::CLOCK_MONOTONIC) - start end - def self.request_kwargs(storage) + def self.request_kwargs(storage, remote_storage: nil) encoded_token = Base64.strict_encode64(token(storage).to_s) metadata = { 'authorization' => "Bearer #{encoded_token}", @@ -109,6 +115,7 @@ module Gitlab feature_stack = Thread.current[:gitaly_feature_stack] feature = feature_stack && feature_stack[0] metadata['call_site'] = feature.to_s if feature + metadata['gitaly-servers'] = address_metadata(remote_storage) if remote_storage { metadata: metadata } end @@ -171,8 +178,11 @@ module Gitlab feature_stack = Thread.current[:gitaly_feature_stack] ||= [] feature_stack.unshift(feature) begin + start = Process.clock_gettime(Process::CLOCK_MONOTONIC) yield is_enabled ensure + total_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start + migrate_histogram.observe({ gitaly_enabled: is_enabled, feature: feature }, total_time) feature_stack.shift Thread.current[:gitaly_feature_stack] = nil if feature_stack.empty? end diff --git a/lib/gitlab/gitaly_client/attributes_bag.rb b/lib/gitlab/gitaly_client/attributes_bag.rb new file mode 100644 index 00000000000..198a1de91c7 --- /dev/null +++ b/lib/gitlab/gitaly_client/attributes_bag.rb @@ -0,0 +1,31 @@ +module Gitlab + module GitalyClient + # This module expects an `ATTRS` const to be defined on the subclass + # See GitalyClient::WikiFile for an example + module AttributesBag + extend ActiveSupport::Concern + + included do + attr_accessor(*const_get(:ATTRS)) + end + + def initialize(params) + params = params.with_indifferent_access + + attributes.each do |attr| + instance_variable_set("@#{attr}", params[attr]) + end + end + + def ==(other) + attributes.all? do |field| + instance_variable_get("@#{field}") == other.instance_variable_get("@#{field}") + end + end + + def attributes + self.class.const_get(:ATTRS) + end + end + end +end diff --git a/lib/gitlab/gitaly_client/commit_service.rb b/lib/gitlab/gitaly_client/commit_service.rb index a2b50f2507e..da5505cb2fe 100644 --- a/lib/gitlab/gitaly_client/commit_service.rb +++ b/lib/gitlab/gitaly_client/commit_service.rb @@ -18,7 +18,7 @@ module Gitlab response = GitalyClient.call(@repository.storage, :commit_service, :list_files, request) response.flat_map do |msg| - msg.paths.map { |d| d.dup.force_encoding(Encoding::UTF_8) } + msg.paths.map { |d| EncodingHelper.encode!(d.dup) } end end diff --git a/lib/gitlab/gitaly_client/diff.rb b/lib/gitlab/gitaly_client/diff.rb index 54df6304865..d98a0ce988f 100644 --- a/lib/gitlab/gitaly_client/diff.rb +++ b/lib/gitlab/gitaly_client/diff.rb @@ -1,21 +1,9 @@ module Gitlab module GitalyClient class Diff - FIELDS = %i(from_path to_path old_mode new_mode from_id to_id patch overflow_marker collapsed).freeze + ATTRS = %i(from_path to_path old_mode new_mode from_id to_id patch overflow_marker collapsed).freeze - attr_accessor(*FIELDS) - - def initialize(params) - params.each do |key, val| - public_send(:"#{key}=", val) # rubocop:disable GitlabSecurity/PublicSend - end - end - - def ==(other) - FIELDS.all? do |field| - public_send(field) == other.public_send(field) # rubocop:disable GitlabSecurity/PublicSend - end - end + include AttributesBag end end end diff --git a/lib/gitlab/gitaly_client/diff_stitcher.rb b/lib/gitlab/gitaly_client/diff_stitcher.rb index 65d81dc5d46..da243ee2d1a 100644 --- a/lib/gitlab/gitaly_client/diff_stitcher.rb +++ b/lib/gitlab/gitaly_client/diff_stitcher.rb @@ -12,7 +12,7 @@ module Gitlab @rpc_response.each do |diff_msg| if current_diff.nil? - diff_params = diff_msg.to_h.slice(*GitalyClient::Diff::FIELDS) + diff_params = diff_msg.to_h.slice(*GitalyClient::Diff::ATTRS) # gRPC uses frozen strings by default, and we need to have an unfrozen string as it # gets processed further down the line. So we unfreeze the first chunk of the patch # in case it's the only chunk we receive for this diff. diff --git a/lib/gitlab/gitaly_client/operation_service.rb b/lib/gitlab/gitaly_client/operation_service.rb index adaf255f24b..526d44a8b77 100644 --- a/lib/gitlab/gitaly_client/operation_service.rb +++ b/lib/gitlab/gitaly_client/operation_service.rb @@ -105,6 +105,23 @@ module Gitlab ensure request_enum.close end + + def user_ff_branch(user, source_sha, target_branch) + request = Gitaly::UserFFBranchRequest.new( + repository: @gitaly_repo, + user: Gitlab::Git::User.from_gitlab(user).to_gitaly, + commit_id: source_sha, + branch: GitalyClient.encode(target_branch) + ) + + branch_update = GitalyClient.call( + @repository.storage, + :operation_service, + :user_ff_branch, + request + ).branch_update + Gitlab::Git::OperationService::BranchUpdate.from_gitaly(branch_update) + end end end end diff --git a/lib/gitlab/gitaly_client/ref_service.rb b/lib/gitlab/gitaly_client/ref_service.rb index b0c73395cb1..31b04bc2650 100644 --- a/lib/gitlab/gitaly_client/ref_service.rb +++ b/lib/gitlab/gitaly_client/ref_service.rb @@ -126,6 +126,15 @@ module Gitlab GitalyClient.call(@repository.storage, :ref_service, :delete_branch, request) end + def delete_refs(except_with_prefixes:) + request = Gitaly::DeleteRefsRequest.new( + repository: @gitaly_repo, + except_with_prefix: except_with_prefixes + ) + + GitalyClient.call(@repository.storage, :ref_service, :delete_refs, request) + end + private def consume_refs_response(response) @@ -137,6 +146,7 @@ module Gitlab enum_value = Gitaly::FindLocalBranchesRequest::SortBy.resolve(sort_by.upcase.to_sym) raise ArgumentError, "Invalid sort_by key `#{sort_by}`" unless enum_value + enum_value end diff --git a/lib/gitlab/gitaly_client/repository_service.rb b/lib/gitlab/gitaly_client/repository_service.rb index cef692d3c2a..70cb16bd810 100644 --- a/lib/gitlab/gitaly_client/repository_service.rb +++ b/lib/gitlab/gitaly_client/repository_service.rb @@ -65,6 +65,25 @@ module Gitlab response.value end + + def fetch_source_branch(source_repository, source_branch, local_ref) + request = Gitaly::FetchSourceBranchRequest.new( + repository: @gitaly_repo, + source_repository: source_repository.gitaly_repository, + source_branch: source_branch.b, + target_ref: local_ref.b + ) + + response = GitalyClient.call( + @storage, + :repository_service, + :fetch_source_branch, + request, + remote_storage: source_repository.storage + ) + + response.result + end end end end diff --git a/lib/gitlab/gitaly_client/wiki_file.rb b/lib/gitlab/gitaly_client/wiki_file.rb new file mode 100644 index 00000000000..47c60c92484 --- /dev/null +++ b/lib/gitlab/gitaly_client/wiki_file.rb @@ -0,0 +1,9 @@ +module Gitlab + module GitalyClient + class WikiFile + ATTRS = %i(name mime_type path raw_data).freeze + + include AttributesBag + end + end +end diff --git a/lib/gitlab/gitaly_client/wiki_page.rb b/lib/gitlab/gitaly_client/wiki_page.rb new file mode 100644 index 00000000000..7339468e911 --- /dev/null +++ b/lib/gitlab/gitaly_client/wiki_page.rb @@ -0,0 +1,25 @@ +module Gitlab + module GitalyClient + class WikiPage + ATTRS = %i(title format url_path path name historical raw_data).freeze + + include AttributesBag + + def initialize(params) + super + + # All gRPC strings in a response are frozen, so we get an unfrozen + # version here so appending to `raw_data` doesn't blow up. + @raw_data = @raw_data.dup + end + + def historical? + @historical + end + + def format + @format.to_sym + end + end + end +end diff --git a/lib/gitlab/gitaly_client/wiki_service.rb b/lib/gitlab/gitaly_client/wiki_service.rb index 03afcce81f0..c8f065f5881 100644 --- a/lib/gitlab/gitaly_client/wiki_service.rb +++ b/lib/gitlab/gitaly_client/wiki_service.rb @@ -15,11 +15,7 @@ module Gitlab repository: @gitaly_repo, name: GitalyClient.encode(name), format: format.to_s, - commit_details: Gitaly::WikiCommitDetails.new( - name: GitalyClient.encode(commit_details.name), - email: GitalyClient.encode(commit_details.email), - message: GitalyClient.encode(commit_details.message) - ) + commit_details: gitaly_commit_details(commit_details) ) strio = StringIO.new(content) @@ -40,6 +36,135 @@ module Gitlab raise Gitlab::Git::Wiki::DuplicatePageError, error end end + + def update_page(page_path, title, format, content, commit_details) + request = Gitaly::WikiUpdatePageRequest.new( + repository: @gitaly_repo, + page_path: GitalyClient.encode(page_path), + title: GitalyClient.encode(title), + format: format.to_s, + commit_details: gitaly_commit_details(commit_details) + ) + + strio = StringIO.new(content) + + enum = Enumerator.new do |y| + until strio.eof? + chunk = strio.read(MAX_MSG_SIZE) + request.content = GitalyClient.encode(chunk) + + y.yield request + + request = Gitaly::WikiUpdatePageRequest.new + end + end + + GitalyClient.call(@repository.storage, :wiki_service, :wiki_update_page, enum) + end + + def delete_page(page_path, commit_details) + request = Gitaly::WikiDeletePageRequest.new( + repository: @gitaly_repo, + page_path: GitalyClient.encode(page_path), + commit_details: gitaly_commit_details(commit_details) + ) + + GitalyClient.call(@repository.storage, :wiki_service, :wiki_delete_page, request) + end + + def find_page(title:, version: nil, dir: nil) + request = Gitaly::WikiFindPageRequest.new( + repository: @gitaly_repo, + title: GitalyClient.encode(title), + revision: GitalyClient.encode(version), + directory: GitalyClient.encode(dir) + ) + + response = GitalyClient.call(@repository.storage, :wiki_service, :wiki_find_page, request) + + wiki_page_from_iterator(response) + end + + def get_all_pages + request = Gitaly::WikiGetAllPagesRequest.new(repository: @gitaly_repo) + response = GitalyClient.call(@repository.storage, :wiki_service, :wiki_get_all_pages, request) + pages = [] + + loop do + page, version = wiki_page_from_iterator(response) { |message| message.end_of_page } + + break unless page && version + + pages << [page, version] + end + + pages + end + + def find_file(name, revision) + request = Gitaly::WikiFindFileRequest.new( + repository: @gitaly_repo, + name: GitalyClient.encode(name), + revision: GitalyClient.encode(revision) + ) + + response = GitalyClient.call(@repository.storage, :wiki_service, :wiki_find_file, request) + wiki_file = nil + + response.each do |message| + next unless message.name.present? + + if wiki_file + wiki_file.raw_data << message.raw_data + else + wiki_file = GitalyClient::WikiFile.new(message.to_h) + # All gRPC strings in a response are frozen, so we get + # an unfrozen version here so appending in the else clause below doesn't blow up. + wiki_file.raw_data = wiki_file.raw_data.dup + end + end + + wiki_file + end + + private + + # If a block is given and the yielded value is true, iteration will be + # stopped early at that point; else the iterator is consumed entirely. + # The iterator is traversed with `next` to allow resuming the iteration. + def wiki_page_from_iterator(iterator) + wiki_page = version = nil + + while message = iterator.next + break if block_given? && yield(message) + + page = message.page + next unless page + + if wiki_page + wiki_page.raw_data << page.raw_data + else + wiki_page = GitalyClient::WikiPage.new(page.to_h) + + version = Gitlab::Git::WikiPageVersion.new( + Gitlab::Git::Commit.decorate(@repository, page.version.commit), + page.version.format + ) + end + end + + [wiki_page, version] + rescue StopIteration + [wiki_page, version] + end + + def gitaly_commit_details(commit_details) + Gitaly::WikiCommitDetails.new( + name: GitalyClient.encode(commit_details.name), + email: GitalyClient.encode(commit_details.email), + message: GitalyClient.encode(commit_details.message) + ) + end end end end diff --git a/lib/gitlab/github_import.rb b/lib/gitlab/github_import.rb new file mode 100644 index 00000000000..d2ae4c1255e --- /dev/null +++ b/lib/gitlab/github_import.rb @@ -0,0 +1,34 @@ +module Gitlab + module GithubImport + def self.new_client_for(project, token: nil, parallel: true) + token_to_use = token || project.import_data&.credentials&.fetch(:user) + + Client.new(token_to_use, parallel: parallel) + end + + # Inserts a raw row and returns the ID of the inserted row. + # + # attributes - The attributes/columns to set. + # relation - An ActiveRecord::Relation to use for finding the ID of the row + # when using MySQL. + def self.insert_and_return_id(attributes, relation) + # We use bulk_insert here so we can bypass any queries executed by + # callbacks or validation rules, as doing this wouldn't scale when + # importing very large projects. + result = Gitlab::Database + .bulk_insert(relation.table_name, [attributes], return_ids: true) + + # MySQL doesn't support returning the IDs of a bulk insert in a way that + # is not a pain, so in this case we'll issue an extra query instead. + result.first || + relation.where(iid: attributes[:iid]).limit(1).pluck(:id).first + end + + # Returns the ID of the ghost user. + def self.ghost_user_id + key = 'github-import/ghost-user-id' + + Caching.read_integer(key) || Caching.write(key, User.select(:id).ghost.id) + end + end +end diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb new file mode 100644 index 00000000000..147597289cf --- /dev/null +++ b/lib/gitlab/github_import/bulk_importing.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module BulkImporting + # Builds and returns an Array of objects to bulk insert into the + # database. + # + # enum - An Enumerable that returns the objects to turn into database + # rows. + def build_database_rows(enum) + enum.each_with_object([]) do |(object, _), rows| + rows << build(object) unless already_imported?(object) + end + end + + # Bulk inserts the given rows into the database. + def bulk_insert(model, rows, batch_size: 100) + rows.each_slice(batch_size) do |slice| + Gitlab::Database.bulk_insert(model.table_name, slice) + end + end + end + end +end diff --git a/lib/gitlab/github_import/caching.rb b/lib/gitlab/github_import/caching.rb new file mode 100644 index 00000000000..b08f133794f --- /dev/null +++ b/lib/gitlab/github_import/caching.rb @@ -0,0 +1,151 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Caching + # The default timeout of the cache keys. + TIMEOUT = 24.hours.to_i + + WRITE_IF_GREATER_SCRIPT = <<-EOF.strip_heredoc.freeze + local key, value, ttl = KEYS[1], tonumber(ARGV[1]), ARGV[2] + local existing = tonumber(redis.call("get", key)) + + if existing == nil or value > existing then + redis.call("set", key, value) + redis.call("expire", key, ttl) + return true + else + return false + end + EOF + + # Reads a cache key. + # + # If the key exists and has a non-empty value its TTL is refreshed + # automatically. + # + # raw_key - The cache key to read. + # timeout - The new timeout of the key if the key is to be refreshed. + def self.read(raw_key, timeout: TIMEOUT) + key = cache_key_for(raw_key) + value = Redis::Cache.with { |redis| redis.get(key) } + + if value.present? + # We refresh the expiration time so frequently used keys stick + # around, removing the need for querying the database as much as + # possible. + # + # A key may be empty when we looked up a GitHub user (for example) but + # did not find a matching GitLab user. In that case we _don't_ want to + # refresh the TTL so we automatically pick up the right data when said + # user were to register themselves on the GitLab instance. + Redis::Cache.with { |redis| redis.expire(key, timeout) } + end + + value + end + + # Reads an integer from the cache, or returns nil if no value was found. + # + # See Caching.read for more information. + def self.read_integer(raw_key, timeout: TIMEOUT) + value = read(raw_key, timeout: timeout) + + value.to_i if value.present? + end + + # Sets a cache key to the given value. + # + # key - The cache key to write. + # value - The value to set. + # timeout - The time after which the cache key should expire. + def self.write(raw_key, value, timeout: TIMEOUT) + key = cache_key_for(raw_key) + + Redis::Cache.with do |redis| + redis.set(key, value, ex: timeout) + end + + value + end + + # Adds a value to a set. + # + # raw_key - The key of the set to add the value to. + # value - The value to add to the set. + # timeout - The new timeout of the key. + def self.set_add(raw_key, value, timeout: TIMEOUT) + key = cache_key_for(raw_key) + + Redis::Cache.with do |redis| + redis.multi do |m| + m.sadd(key, value) + m.expire(key, timeout) + end + end + end + + # Returns true if the given value is present in the set. + # + # raw_key - The key of the set to check. + # value - The value to check for. + def self.set_includes?(raw_key, value) + key = cache_key_for(raw_key) + + Redis::Cache.with do |redis| + redis.sismember(key, value) + end + end + + # Sets multiple keys to a given value. + # + # mapping - A Hash mapping the cache keys to their values. + # timeout - The time after which the cache key should expire. + def self.write_multiple(mapping, timeout: TIMEOUT) + Redis::Cache.with do |redis| + redis.multi do |multi| + mapping.each do |raw_key, value| + multi.set(cache_key_for(raw_key), value, ex: timeout) + end + end + end + end + + # Sets the expiration time of a key. + # + # raw_key - The key for which to change the timeout. + # timeout - The new timeout. + def self.expire(raw_key, timeout) + key = cache_key_for(raw_key) + + Redis::Cache.with do |redis| + redis.expire(key, timeout) + end + end + + # Sets a key to the given integer but only if the existing value is + # smaller than the given value. + # + # This method uses a Lua script to ensure the read and write are atomic. + # + # raw_key - The key to set. + # value - The new value for the key. + # timeout - The key timeout in seconds. + # + # Returns true when the key was overwritten, false otherwise. + def self.write_if_greater(raw_key, value, timeout: TIMEOUT) + key = cache_key_for(raw_key) + val = Redis::Cache.with do |redis| + redis + .eval(WRITE_IF_GREATER_SCRIPT, keys: [key], argv: [value, timeout]) + end + + val ? true : false + end + + def self.cache_key_for(raw_key) + "#{Redis::Cache::CACHE_NAMESPACE}:#{raw_key}" + end + end + end +end diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 0550f9695bd..5da9befa08e 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -1,147 +1,216 @@ +# frozen_string_literal: true + module Gitlab module GithubImport + # HTTP client for interacting with the GitHub API. + # + # This class is basically a fancy wrapped around Octokit while adding some + # functionality to deal with rate limiting and parallel imports. Usage is + # mostly the same as Octokit, for example: + # + # client = GithubImport::Client.new('hunter2') + # + # client.labels.each do |label| + # puts label.name + # end class Client - GITHUB_SAFE_REMAINING_REQUESTS = 100 - GITHUB_SAFE_SLEEP_TIME = 500 + attr_reader :octokit + + # A single page of data and the corresponding page number. + Page = Struct.new(:objects, :number) + + # The minimum number of requests we want to keep available. + # + # We don't use a value of 0 as multiple threads may be using the same + # token in parallel. This could result in all of them hitting the GitHub + # rate limit at once. The threshold is put in place to not hit the limit + # in most cases. + RATE_LIMIT_THRESHOLD = 50 + + # token - The GitHub API token to use. + # + # per_page - The number of objects that should be displayed per page. + # + # parallel - When set to true hitting the rate limit will result in a + # dedicated error being raised. When set to `false` we will + # instead just `sleep()` until the rate limit is reset. Setting + # this value to `true` for parallel importing is crucial as + # otherwise hitting the rate limit will result in a thread + # being blocked in a `sleep()` call for up to an hour. + def initialize(token, per_page: 100, parallel: true) + @octokit = Octokit::Client.new( + access_token: token, + per_page: per_page, + api_endpoint: api_endpoint + ) - attr_reader :access_token, :host, :api_version + @octokit.connection_options[:ssl] = { verify: verify_ssl } - def initialize(access_token, host: nil, api_version: 'v3') - @access_token = access_token - @host = host.to_s.sub(%r{/+\z}, '') - @api_version = api_version - @users = {} + @parallel = parallel + end - if access_token - ::Octokit.auto_paginate = false - end + def parallel? + @parallel end - def api - @api ||= ::Octokit::Client.new( - access_token: access_token, - api_endpoint: api_endpoint, - # If there is no config, we're connecting to github.com and we - # should verify ssl. - connection_options: { - ssl: { verify: config ? config['verify_ssl'] : true } - } - ) + # Returns the details of a GitHub user. + # + # username - The username of the user. + def user(username) + with_rate_limit { octokit.user(username) } end - def client - unless config - raise Projects::ImportService::Error, - 'OAuth configuration for GitHub missing.' - end + # Returns the details of a GitHub repository. + # + # name - The path (in the form `owner/repository`) of the repository. + def repository(name) + with_rate_limit { octokit.repo(name) } + end - @client ||= ::OAuth2::Client.new( - config.app_id, - config.app_secret, - github_options.merge(ssl: { verify: config['verify_ssl'] }) - ) + def labels(*args) + each_object(:labels, *args) end - def authorize_url(redirect_uri) - client.auth_code.authorize_url({ - redirect_uri: redirect_uri, - scope: "repo, user, user:email" - }) + def milestones(*args) + each_object(:milestones, *args) end - def get_token(code) - client.auth_code.get_token(code).token + def releases(*args) + each_object(:releases, *args) end - def method_missing(method, *args, &block) - if api.respond_to?(method) - request(method, *args, &block) - else - super(method, *args, &block) + # Fetches data from the GitHub API and yields a Page object for every page + # of data, without loading all of them into memory. + # + # method - The Octokit method to use for getting the data. + # args - Arguments to pass to the Octokit method. + # + # rubocop: disable GitlabSecurity/PublicSend + def each_page(method, *args, &block) + return to_enum(__method__, method, *args) unless block_given? + + page = + if args.last.is_a?(Hash) && args.last[:page] + args.last[:page] + else + 1 + end + + collection = with_rate_limit { octokit.public_send(method, *args) } + next_url = octokit.last_response.rels[:next] + + yield Page.new(collection, page) + + while next_url + response = with_rate_limit { next_url.get } + next_url = response.rels[:next] + + yield Page.new(response.data, page += 1) end end - def respond_to?(method) - api.respond_to?(method) || super + # Iterates over all of the objects for the given method (e.g. `:labels`). + # + # method - The method to send to Octokit for querying data. + # args - Any arguments to pass to the Octokit method. + def each_object(method, *args, &block) + return to_enum(__method__, method, *args) unless block_given? + + each_page(method, *args) do |page| + page.objects.each do |object| + yield object + end + end end - def user(login) - return nil unless login.present? - return @users[login] if @users.key?(login) + # Yields the supplied block, responding to any rate limit errors. + # + # The exact strategy used for handling rate limiting errors depends on + # whether we are running in parallel mode or not. For more information see + # `#rate_or_wait_for_rate_limit`. + def with_rate_limit + return yield unless rate_limiting_enabled? - @users[login] = api.user(login) - end + request_count_counter.increment - private + raise_or_wait_for_rate_limit unless requests_remaining? - def api_endpoint - if host.present? && api_version.present? - "#{host}/api/#{api_version}" - else - github_options[:site] + begin + yield + rescue Octokit::TooManyRequests + raise_or_wait_for_rate_limit + + # This retry will only happen when running in sequential mode as we'll + # raise an error in parallel mode. + retry end end - def config - Gitlab.config.omniauth.providers.find { |provider| provider.name == "github" } + # Returns `true` if we're still allowed to perform API calls. + def requests_remaining? + remaining_requests > RATE_LIMIT_THRESHOLD + end + + def remaining_requests + octokit.rate_limit.remaining end - def github_options - if config - config["args"]["client_options"].deep_symbolize_keys + def raise_or_wait_for_rate_limit + rate_limit_counter.increment + + if parallel? + raise RateLimitError else - OmniAuth::Strategies::GitHub.default_options[:client_options].symbolize_keys + sleep(rate_limit_resets_in) end end - def rate_limit - api.rate_limit! - # GitHub Rate Limit API returns 404 when the rate limit is - # disabled. In this case we just want to return gracefully - # instead of spitting out an error. - rescue Octokit::NotFound - nil + def rate_limit_resets_in + # We add a few seconds to the rate limit so we don't _immediately_ + # resume when the rate limit resets as this may result in us performing + # a request before GitHub has a chance to reset the limit. + octokit.rate_limit.resets_in + 5 end - def has_rate_limit? - return @has_rate_limit if defined?(@has_rate_limit) - - @has_rate_limit = rate_limit.present? + def rate_limiting_enabled? + @rate_limiting_enabled ||= api_endpoint.include?('.github.com') end - def rate_limit_exceed? - has_rate_limit? && rate_limit.remaining <= GITHUB_SAFE_REMAINING_REQUESTS + def api_endpoint + custom_api_endpoint || default_api_endpoint end - def rate_limit_sleep_time - rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME + def custom_api_endpoint + github_omniauth_provider.dig('args', 'client_options', 'site') end - def request(method, *args, &block) - sleep rate_limit_sleep_time if rate_limit_exceed? + def default_api_endpoint + OmniAuth::Strategies::GitHub.default_options[:client_options][:site] + end - data = api.__send__(method, *args) # rubocop:disable GitlabSecurity/PublicSend - return data unless data.is_a?(Array) + def verify_ssl + github_omniauth_provider.fetch('verify_ssl', true) + end - last_response = api.last_response + def github_omniauth_provider + @github_omniauth_provider ||= + Gitlab.config.omniauth.providers + .find { |provider| provider.name == 'github' } + .to_h + end - if block_given? - yield data - # api.last_response could change while we're yielding (e.g. fetching labels for each PR) - # so we cache our own last response - each_response_page(last_response, &block) - else - each_response_page(last_response) { |page| data.concat(page) } - data - end + def rate_limit_counter + @rate_limit_counter ||= Gitlab::Metrics.counter( + :github_importer_rate_limit_hits, + 'The number of times we hit the GitHub rate limit when importing projects' + ) end - def each_response_page(last_response) - while last_response.rels[:next] - sleep rate_limit_sleep_time if rate_limit_exceed? - last_response = last_response.rels[:next].get - yield last_response.data if last_response.data.is_a?(Array) - end + def request_count_counter + @request_counter ||= Gitlab::Metrics.counter( + :github_importer_request_count, + 'The number of GitHub API calls performed when importing projects' + ) end end end diff --git a/lib/gitlab/github_import/importer/diff_note_importer.rb b/lib/gitlab/github_import/importer/diff_note_importer.rb new file mode 100644 index 00000000000..8274f37d358 --- /dev/null +++ b/lib/gitlab/github_import/importer/diff_note_importer.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class DiffNoteImporter + attr_reader :note, :project, :client, :user_finder + + # note - An instance of `Gitlab::GithubImport::Representation::DiffNote`. + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + def initialize(note, project, client) + @note = note + @project = project + @client = client + @user_finder = UserFinder.new(project, client) + end + + def execute + return unless (mr_id = find_merge_request_id) + + author_id, author_found = user_finder.author_id_for(note) + + note_body = + MarkdownText.format(note.note, note.author, author_found) + + attributes = { + noteable_type: 'MergeRequest', + noteable_id: mr_id, + project_id: project.id, + author_id: author_id, + note: note_body, + system: false, + commit_id: note.commit_id, + line_code: note.line_code, + type: 'LegacyDiffNote', + created_at: note.created_at, + updated_at: note.updated_at, + st_diff: note.diff_hash.to_yaml + } + + # It's possible that during an import we'll insert tens of thousands + # of diff notes. If we were to use the Note/LegacyDiffNote model here + # we'd also have to run additional queries for both validations and + # callbacks, putting a lot of pressure on the database. + # + # To work around this we're using bulk_insert with a single row. This + # allows us to efficiently insert data (even if it's just 1 row) + # without having to use all sorts of hacks to disable callbacks. + Gitlab::Database.bulk_insert(LegacyDiffNote.table_name, [attributes]) + rescue ActiveRecord::InvalidForeignKey + # It's possible the project and the issue have been deleted since + # scheduling this job. In this case we'll just skip creating the note. + end + + # Returns the ID of the merge request this note belongs to. + def find_merge_request_id + GithubImport::IssuableFinder.new(project, note).database_id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/diff_notes_importer.rb b/lib/gitlab/github_import/importer/diff_notes_importer.rb new file mode 100644 index 00000000000..966f12c5c2f --- /dev/null +++ b/lib/gitlab/github_import/importer/diff_notes_importer.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class DiffNotesImporter + include ParallelScheduling + + def representation_class + Representation::DiffNote + end + + def importer_class + DiffNoteImporter + end + + def sidekiq_worker_class + ImportDiffNoteWorker + end + + def collection_method + :pull_requests_comments + end + + def id_for_already_imported_cache(note) + note.id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/issue_and_label_links_importer.rb b/lib/gitlab/github_import/importer/issue_and_label_links_importer.rb new file mode 100644 index 00000000000..bad064b76c8 --- /dev/null +++ b/lib/gitlab/github_import/importer/issue_and_label_links_importer.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class IssueAndLabelLinksImporter + attr_reader :issue, :project, :client + + # issue - An instance of `Gitlab::GithubImport::Representation::Issue`. + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(issue, project, client) + @issue = issue + @project = project + @client = client + end + + def execute + IssueImporter.import_if_issue(issue, project, client) + LabelLinksImporter.new(issue, project, client).execute + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb new file mode 100644 index 00000000000..31fefebf787 --- /dev/null +++ b/lib/gitlab/github_import/importer/issue_importer.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class IssueImporter + attr_reader :project, :issue, :client, :user_finder, :milestone_finder, + :issuable_finder + + # Imports an issue if it's a regular issue and not a pull request. + def self.import_if_issue(issue, project, client) + new(issue, project, client).execute unless issue.pull_request? + end + + # issue - An instance of `Gitlab::GithubImport::Representation::Issue`. + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(issue, project, client) + @issue = issue + @project = project + @client = client + @user_finder = UserFinder.new(project, client) + @milestone_finder = MilestoneFinder.new(project) + @issuable_finder = GithubImport::IssuableFinder.new(project, issue) + end + + def execute + Issue.transaction do + if (issue_id = create_issue) + create_assignees(issue_id) + issuable_finder.cache_database_id(issue_id) + end + end + end + + # Creates a new GitLab issue for the current GitHub issue. + # + # Returns the ID of the created issue as an Integer. If the issue + # couldn't be created this method will return `nil` instead. + def create_issue + author_id, author_found = user_finder.author_id_for(issue) + + description = + MarkdownText.format(issue.description, issue.author, author_found) + + attributes = { + iid: issue.iid, + title: issue.truncated_title, + author_id: author_id, + project_id: project.id, + description: description, + milestone_id: milestone_finder.id_for(issue), + state: issue.state, + created_at: issue.created_at, + updated_at: issue.updated_at + } + + GithubImport.insert_and_return_id(attributes, project.issues) + rescue ActiveRecord::InvalidForeignKey + # It's possible the project has been deleted since scheduling this + # job. In this case we'll just skip creating the issue. + end + + # Stores all issue assignees in the database. + # + # issue_id - The ID of the created issue. + def create_assignees(issue_id) + assignees = [] + + issue.assignees.each do |assignee| + if (user_id = user_finder.user_id_for(assignee)) + assignees << { issue_id: issue_id, user_id: user_id } + end + end + + Gitlab::Database.bulk_insert(IssueAssignee.table_name, assignees) + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/issues_importer.rb b/lib/gitlab/github_import/importer/issues_importer.rb new file mode 100644 index 00000000000..ac6d0666b3a --- /dev/null +++ b/lib/gitlab/github_import/importer/issues_importer.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class IssuesImporter + include ParallelScheduling + + def importer_class + IssueAndLabelLinksImporter + end + + def representation_class + Representation::Issue + end + + def sidekiq_worker_class + ImportIssueWorker + end + + def collection_method + :issues + end + + def id_for_already_imported_cache(issue) + issue.number + end + + def collection_options + { state: 'all', sort: 'created', direction: 'asc' } + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/label_links_importer.rb b/lib/gitlab/github_import/importer/label_links_importer.rb new file mode 100644 index 00000000000..2001b7e3482 --- /dev/null +++ b/lib/gitlab/github_import/importer/label_links_importer.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class LabelLinksImporter + attr_reader :issue, :project, :client, :label_finder + + # issue - An instance of `Gitlab::GithubImport::Representation::Issue` + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(issue, project, client) + @issue = issue + @project = project + @client = client + @label_finder = LabelFinder.new(project) + end + + def execute + create_labels + end + + def create_labels + time = Time.zone.now + rows = [] + target_id = find_target_id + + issue.label_names.each do |label_name| + # Although unlikely it's technically possible for an issue to be + # given a label that was created and assigned after we imported all + # the project's labels. + next unless (label_id = label_finder.id_for(label_name)) + + rows << { + label_id: label_id, + target_id: target_id, + target_type: issue.issuable_type, + created_at: time, + updated_at: time + } + end + + Gitlab::Database.bulk_insert(LabelLink.table_name, rows) + end + + def find_target_id + GithubImport::IssuableFinder.new(project, issue).database_id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/labels_importer.rb b/lib/gitlab/github_import/importer/labels_importer.rb new file mode 100644 index 00000000000..a73033d35ba --- /dev/null +++ b/lib/gitlab/github_import/importer/labels_importer.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class LabelsImporter + include BulkImporting + + attr_reader :project, :client, :existing_labels + + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + def initialize(project, client) + @project = project + @client = client + @existing_labels = project.labels.pluck(:title).to_set + end + + def execute + bulk_insert(Label, build_labels) + build_labels_cache + end + + def build_labels + build_database_rows(each_label) + end + + def already_imported?(label) + existing_labels.include?(label.name) + end + + def build_labels_cache + LabelFinder.new(project).build_cache + end + + def build(label) + time = Time.zone.now + + { + title: label.name, + color: '#' + label.color, + project_id: project.id, + type: 'ProjectLabel', + created_at: time, + updated_at: time + } + end + + def each_label + client.labels(project.import_source) + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb new file mode 100644 index 00000000000..c53480e828a --- /dev/null +++ b/lib/gitlab/github_import/importer/milestones_importer.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class MilestonesImporter + include BulkImporting + + attr_reader :project, :client, :existing_milestones + + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(project, client) + @project = project + @client = client + @existing_milestones = project.milestones.pluck(:iid).to_set + end + + def execute + bulk_insert(Milestone, build_milestones) + build_milestones_cache + end + + def build_milestones + build_database_rows(each_milestone) + end + + def already_imported?(milestone) + existing_milestones.include?(milestone.number) + end + + def build_milestones_cache + MilestoneFinder.new(project).build_cache + end + + def build(milestone) + { + iid: milestone.number, + title: milestone.title, + description: milestone.description, + project_id: project.id, + state: state_for(milestone), + created_at: milestone.created_at, + updated_at: milestone.updated_at + } + end + + def state_for(milestone) + milestone.state == 'open' ? :active : :closed + end + + def each_milestone + client.milestones(project.import_source, state: 'all') + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/note_importer.rb b/lib/gitlab/github_import/importer/note_importer.rb new file mode 100644 index 00000000000..c890f2df360 --- /dev/null +++ b/lib/gitlab/github_import/importer/note_importer.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class NoteImporter + attr_reader :note, :project, :client, :user_finder + + # note - An instance of `Gitlab::GithubImport::Representation::Note`. + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + def initialize(note, project, client) + @note = note + @project = project + @client = client + @user_finder = UserFinder.new(project, client) + end + + def execute + return unless (noteable_id = find_noteable_id) + + author_id, author_found = user_finder.author_id_for(note) + + note_body = + MarkdownText.format(note.note, note.author, author_found) + + attributes = { + noteable_type: note.noteable_type, + noteable_id: noteable_id, + project_id: project.id, + author_id: author_id, + note: note_body, + system: false, + created_at: note.created_at, + updated_at: note.updated_at + } + + # We're using bulk_insert here so we can bypass any validations and + # callbacks. Running these would result in a lot of unnecessary SQL + # queries being executed when importing large projects. + Gitlab::Database.bulk_insert(Note.table_name, [attributes]) + rescue ActiveRecord::InvalidForeignKey + # It's possible the project and the issue have been deleted since + # scheduling this job. In this case we'll just skip creating the note. + end + + # Returns the ID of the issue or merge request to create the note for. + def find_noteable_id + GithubImport::IssuableFinder.new(project, note).database_id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/notes_importer.rb b/lib/gitlab/github_import/importer/notes_importer.rb new file mode 100644 index 00000000000..5aec760ea5f --- /dev/null +++ b/lib/gitlab/github_import/importer/notes_importer.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class NotesImporter + include ParallelScheduling + + def importer_class + NoteImporter + end + + def representation_class + Representation::Note + end + + def sidekiq_worker_class + ImportNoteWorker + end + + def collection_method + :issues_comments + end + + def id_for_already_imported_cache(note) + note.id + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/pull_request_importer.rb b/lib/gitlab/github_import/importer/pull_request_importer.rb new file mode 100644 index 00000000000..49d859f9624 --- /dev/null +++ b/lib/gitlab/github_import/importer/pull_request_importer.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class PullRequestImporter + attr_reader :pull_request, :project, :client, :user_finder, + :milestone_finder, :issuable_finder + + # pull_request - An instance of + # `Gitlab::GithubImport::Representation::PullRequest`. + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(pull_request, project, client) + @pull_request = pull_request + @project = project + @client = client + @user_finder = UserFinder.new(project, client) + @milestone_finder = MilestoneFinder.new(project) + @issuable_finder = + GithubImport::IssuableFinder.new(project, pull_request) + end + + def execute + if (mr_id = create_merge_request) + issuable_finder.cache_database_id(mr_id) + end + end + + # Creates the merge request and returns its ID. + # + # This method will return `nil` if the merge request could not be + # created. + def create_merge_request + author_id, author_found = user_finder.author_id_for(pull_request) + + description = MarkdownText + .format(pull_request.description, pull_request.author, author_found) + + # This work must be wrapped in a transaction as otherwise we can leave + # behind incomplete data in the event of an error. This can then lead + # to duplicate key errors when jobs are retried. + MergeRequest.transaction do + attributes = { + iid: pull_request.iid, + title: pull_request.truncated_title, + description: description, + source_project_id: project.id, + target_project_id: project.id, + source_branch: pull_request.formatted_source_branch, + target_branch: pull_request.target_branch, + state: pull_request.state, + milestone_id: milestone_finder.id_for(pull_request), + author_id: author_id, + assignee_id: user_finder.assignee_id_for(pull_request), + created_at: pull_request.created_at, + updated_at: pull_request.updated_at + } + + # When creating merge requests there are a lot of hooks that may + # run, for many different reasons. Many of these hooks (e.g. the + # ones used for rendering Markdown) are completely unnecessary and + # may even lead to transaction timeouts. + # + # To ensure importing pull requests has a minimal impact and can + # complete in a reasonable time we bypass all the hooks by inserting + # the row and then retrieving it. We then only perform the + # additional work that is strictly necessary. + merge_request_id = GithubImport + .insert_and_return_id(attributes, project.merge_requests) + + merge_request = project.merge_requests.find(merge_request_id) + + # These fields are set so we can create the correct merge request + # diffs. + merge_request.source_branch_sha = pull_request.source_branch_sha + merge_request.target_branch_sha = pull_request.target_branch_sha + + merge_request.keep_around_commit + merge_request.merge_request_diffs.create + + merge_request.id + end + rescue ActiveRecord::InvalidForeignKey + # It's possible the project has been deleted since scheduling this + # job. In this case we'll just skip creating the merge request. + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/pull_requests_importer.rb b/lib/gitlab/github_import/importer/pull_requests_importer.rb new file mode 100644 index 00000000000..5437e32e9f1 --- /dev/null +++ b/lib/gitlab/github_import/importer/pull_requests_importer.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class PullRequestsImporter + include ParallelScheduling + + def importer_class + PullRequestImporter + end + + def representation_class + Representation::PullRequest + end + + def sidekiq_worker_class + ImportPullRequestWorker + end + + def id_for_already_imported_cache(pr) + pr.number + end + + def each_object_to_import + super do |pr| + update_repository if update_repository?(pr) + yield pr + end + end + + def update_repository + # We set this column _before_ fetching the repository, and this is + # deliberate. If we were to update this column after the fetch we may + # miss out on changes pushed during the fetch or between the fetch and + # updating the timestamp. + project.update_column(:last_repository_updated_at, Time.zone.now) + + project.repository.fetch_remote('github', forced: false) + + pname = project.path_with_namespace + + Rails.logger + .info("GitHub importer finished updating repository for #{pname}") + + repository_updates_counter.increment(project: pname) + end + + def update_repository?(pr) + last_update = project.last_repository_updated_at || project.created_at + + return false if pr.updated_at < last_update + + # PRs may be updated without there actually being new commits, thus we + # check to make sure we only re-fetch if truly necessary. + !(commit_exists?(pr.head.sha) && commit_exists?(pr.base.sha)) + end + + def commit_exists?(sha) + project.repository.lookup(sha) + true + rescue Rugged::Error + false + end + + def collection_method + :pull_requests + end + + def collection_options + { state: 'all', sort: 'created', direction: 'asc' } + end + + def repository_updates_counter + @repository_updates_counter ||= Gitlab::Metrics.counter( + :github_importer_repository_updates, + 'The number of times repositories have to be updated again' + ) + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/releases_importer.rb b/lib/gitlab/github_import/importer/releases_importer.rb new file mode 100644 index 00000000000..100f459fdcc --- /dev/null +++ b/lib/gitlab/github_import/importer/releases_importer.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class ReleasesImporter + include BulkImporting + + attr_reader :project, :client, :existing_tags + + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(project, client) + @project = project + @client = client + @existing_tags = project.releases.pluck(:tag).to_set + end + + def execute + bulk_insert(Release, build_releases) + end + + def build_releases + build_database_rows(each_release) + end + + def already_imported?(release) + existing_tags.include?(release.tag_name) + end + + def build(release) + { + tag: release.tag_name, + description: description_for(release), + created_at: release.created_at, + updated_at: release.updated_at, + project_id: project.id + } + end + + def each_release + client.releases(project.import_source) + end + + def description_for(release) + if release.body.present? + release.body + else + "Release for tag #{release.tag_name}" + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/importer/repository_importer.rb b/lib/gitlab/github_import/importer/repository_importer.rb new file mode 100644 index 00000000000..0b67fc8db73 --- /dev/null +++ b/lib/gitlab/github_import/importer/repository_importer.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Importer + class RepositoryImporter + include Gitlab::ShellAdapter + + attr_reader :project, :client + + def initialize(project, client) + @project = project + @client = client + end + + # Returns true if we should import the wiki for the project. + def import_wiki? + client.repository(project.import_source)&.has_wiki && + !project.wiki_repository_exists? + end + + # Imports the repository data. + # + # This method will return true if the data was imported successfully or + # the repository had already been imported before. + def execute + imported = + # It's possible a repository has already been imported when running + # this code, e.g. because we had to retry this job after + # `import_wiki?` raised a rate limit error. In this case we'll skip + # re-importing the main repository. + if project.repository.empty_repo? + import_repository + else + true + end + + update_clone_time if imported + + imported = import_wiki_repository if import_wiki? && imported + + imported + end + + def import_repository + project.ensure_repository + + configure_repository_remote + + project.repository.fetch_remote('github', forced: true) + + true + rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e + fail_import("Failed to import the repository: #{e.message}") + end + + def configure_repository_remote + return if project.repository.remote_exists?('github') + + project.repository.add_remote('github', project.import_url) + project.repository.set_import_remote_as_mirror('github') + + project.repository.add_remote_fetch_config( + 'github', + '+refs/pull/*/head:refs/merge-requests/*/head' + ) + end + + def import_wiki_repository + wiki_path = "#{project.disk_path}.wiki" + wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git') + storage_path = project.repository_storage_path + + gitlab_shell.import_repository(storage_path, wiki_path, wiki_url) + + true + rescue Gitlab::Shell::Error => e + if e.message !~ /repository not exported/ + fail_import("Failed to import the wiki: #{e.message}") + else + true + end + end + + def update_clone_time + project.update_column(:last_repository_updated_at, Time.zone.now) + end + + def fail_import(message) + project.mark_import_as_failed(message) + false + end + end + end + end +end diff --git a/lib/gitlab/github_import/issuable_finder.rb b/lib/gitlab/github_import/issuable_finder.rb new file mode 100644 index 00000000000..211915f1d87 --- /dev/null +++ b/lib/gitlab/github_import/issuable_finder.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # IssuableFinder can be used for caching and retrieving database IDs for + # issuable objects such as issues and pull requests. By caching these IDs we + # remove the need for running a lot of database queries when importing + # GitHub projects. + class IssuableFinder + attr_reader :project, :object + + # The base cache key to use for storing/retrieving issuable IDs. + CACHE_KEY = 'github-import/issuable-finder/%{project}/%{type}/%{iid}'.freeze + + # project - An instance of `Project`. + # object - The object to look up or set a database ID for. + def initialize(project, object) + @project = project + @object = object + end + + # Returns the database ID for the object. + # + # This method will return `nil` if no ID could be found. + def database_id + val = Caching.read(cache_key) + + val.to_i if val.present? + end + + # Associates the given database ID with the current object. + # + # database_id - The ID of the corresponding database row. + def cache_database_id(database_id) + Caching.write(cache_key, database_id) + end + + private + + def cache_key + CACHE_KEY % { + project: project.id, + type: cache_key_type, + iid: cache_key_iid + } + end + + # Returns the identifier to use for cache keys. + # + # For issues and pull requests this will be "Issue" or "MergeRequest" + # respectively. For diff notes this will return "MergeRequest", for + # regular notes it will either return "Issue" or "MergeRequest" depending + # on what type of object the note belongs to. + def cache_key_type + if object.respond_to?(:issuable_type) + object.issuable_type + elsif object.respond_to?(:noteable_type) + object.noteable_type + else + raise( + TypeError, + "Instances of #{object.class} are not supported" + ) + end + end + + def cache_key_iid + if object.respond_to?(:noteable_id) + object.noteable_id + elsif object.respond_to?(:iid) + object.iid + else + raise( + TypeError, + "Instances of #{object.class} are not supported" + ) + end + end + end + end +end diff --git a/lib/gitlab/github_import/label_finder.rb b/lib/gitlab/github_import/label_finder.rb new file mode 100644 index 00000000000..9be071141db --- /dev/null +++ b/lib/gitlab/github_import/label_finder.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class LabelFinder + attr_reader :project + + # The base cache key to use for storing/retrieving label IDs. + CACHE_KEY = 'github-import/label-finder/%{project}/%{name}'.freeze + + # project - An instance of `Project`. + def initialize(project) + @project = project + end + + # Returns the label ID for the given name. + def id_for(name) + Caching.read_integer(cache_key_for(name)) + end + + def build_cache + mapping = @project + .labels + .pluck(:id, :name) + .each_with_object({}) do |(id, name), hash| + hash[cache_key_for(name)] = id + end + + Caching.write_multiple(mapping) + end + + def cache_key_for(name) + CACHE_KEY % { project: project.id, name: name } + end + end + end +end diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb new file mode 100644 index 00000000000..b25c4f7becf --- /dev/null +++ b/lib/gitlab/github_import/markdown_text.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class MarkdownText + attr_reader :text, :author, :exists + + def self.format(*args) + new(*args).to_s + end + + # text - The Markdown text as a String. + # author - An instance of `Gitlab::GithubImport::Representation::User` + # exists - Boolean that indicates the user exists in the GitLab database. + def initialize(text, author, exists = false) + @text = text + @author = author + @exists = exists + end + + def to_s + if exists + text + else + "*Created by: #{author.login}*\n\n#{text}" + end + end + end + end +end diff --git a/lib/gitlab/github_import/milestone_finder.rb b/lib/gitlab/github_import/milestone_finder.rb new file mode 100644 index 00000000000..208d15dc144 --- /dev/null +++ b/lib/gitlab/github_import/milestone_finder.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + class MilestoneFinder + attr_reader :project + + # The base cache key to use for storing/retrieving milestone IDs. + CACHE_KEY = 'github-import/milestone-finder/%{project}/%{iid}'.freeze + + # project - An instance of `Project` + def initialize(project) + @project = project + end + + # issuable - An instance of `Gitlab::GithubImport::Representation::Issue` + # or `Gitlab::GithubImport::Representation::PullRequest`. + def id_for(issuable) + return unless issuable.milestone_number + + Caching.read_integer(cache_key_for(issuable.milestone_number)) + end + + def build_cache + mapping = @project + .milestones + .pluck(:id, :iid) + .each_with_object({}) do |(id, iid), hash| + hash[cache_key_for(iid)] = id + end + + Caching.write_multiple(mapping) + end + + def cache_key_for(iid) + CACHE_KEY % { project: project.id, iid: iid } + end + end + end +end diff --git a/lib/gitlab/github_import/page_counter.rb b/lib/gitlab/github_import/page_counter.rb new file mode 100644 index 00000000000..c3db2d0b469 --- /dev/null +++ b/lib/gitlab/github_import/page_counter.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # PageCounter can be used to keep track of the last imported page of a + # collection, allowing workers to resume where they left off in the event of + # an error. + class PageCounter + attr_reader :cache_key + + # The base cache key to use for storing the last page number. + CACHE_KEY = 'github-importer/page-counter/%{project}/%{collection}'.freeze + + def initialize(project, collection) + @cache_key = CACHE_KEY % { project: project.id, collection: collection } + end + + # Sets the page number to the given value. + # + # Returns true if the page number was overwritten, false otherwise. + def set(page) + Caching.write_if_greater(cache_key, page) + end + + # Returns the current value from the cache. + def current + Caching.read_integer(cache_key) || 1 + end + end + end +end diff --git a/lib/gitlab/github_import/parallel_importer.rb b/lib/gitlab/github_import/parallel_importer.rb new file mode 100644 index 00000000000..6da11e6ef08 --- /dev/null +++ b/lib/gitlab/github_import/parallel_importer.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # The ParallelImporter schedules the importing of a GitHub project using + # Sidekiq. + class ParallelImporter + attr_reader :project + + def self.async? + true + end + + def self.imports_repository? + true + end + + def initialize(project) + @project = project + end + + def execute + jid = generate_jid + + # The original import JID is the JID of the RepositoryImportWorker job, + # which will be removed once that job completes. Reusing that JID could + # result in StuckImportJobsWorker marking the job as stuck before we get + # to running Stage::ImportRepositoryWorker. + # + # We work around this by setting the JID to a custom generated one, then + # refreshing it in the various stages whenever necessary. + Gitlab::SidekiqStatus + .set(jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION) + + project.update_column(:import_jid, jid) + + Stage::ImportRepositoryWorker + .perform_async(project.id) + + true + end + + def generate_jid + "github-importer/#{project.id}" + end + end + end +end diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb new file mode 100644 index 00000000000..d4d1357f5a3 --- /dev/null +++ b/lib/gitlab/github_import/parallel_scheduling.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module ParallelScheduling + attr_reader :project, :client, :page_counter, :already_imported_cache_key + + # The base cache key to use for tracking already imported objects. + ALREADY_IMPORTED_CACHE_KEY = + 'github-importer/already-imported/%{project}/%{collection}'.freeze + + # project - An instance of `Project`. + # client - An instance of `Gitlab::GithubImport::Client`. + # parallel - When set to true the objects will be imported in parallel. + def initialize(project, client, parallel: true) + @project = project + @client = client + @parallel = parallel + @page_counter = PageCounter.new(project, collection_method) + @already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY % + { project: project.id, collection: collection_method } + end + + def parallel? + @parallel + end + + def execute + retval = + if parallel? + parallel_import + else + sequential_import + end + + # Once we have completed all work we can remove our "already exists" + # cache so we don't put too much pressure on Redis. + # + # We don't immediately remove it since it's technically possible for + # other instances of this job to still run, instead we set the + # expiration time to a lower value. This prevents the other jobs from + # still scheduling duplicates while. Since all work has already been + # completed those jobs will just cycle through any remaining pages while + # not scheduling anything. + Caching.expire(already_imported_cache_key, 15.minutes.to_i) + + retval + end + + # Imports all the objects in sequence in the current thread. + def sequential_import + each_object_to_import do |object| + repr = representation_class.from_api_response(object) + + importer_class.new(repr, project, client).execute + end + end + + # Imports all objects in parallel by scheduling a Sidekiq job for every + # individual object. + def parallel_import + waiter = JobWaiter.new + + each_object_to_import do |object| + repr = representation_class.from_api_response(object) + + sidekiq_worker_class + .perform_async(project.id, repr.to_hash, waiter.key) + + waiter.jobs_remaining += 1 + end + + waiter + end + + # The method that will be called for traversing through all the objects to + # import, yielding them to the supplied block. + def each_object_to_import + repo = project.import_source + + # We inject the page number here to make sure that all importers always + # start where they left off. Simply starting over wouldn't work for + # repositories with a lot of data (e.g. tens of thousands of comments). + options = collection_options.merge(page: page_counter.current) + + client.each_page(collection_method, repo, options) do |page| + # Technically it's possible that the same work is performed multiple + # times, as Sidekiq doesn't guarantee there will ever only be one + # instance of a job. In such a scenario it's possible for one job to + # have a lower page number (e.g. 5) compared to another (e.g. 10). In + # this case we skip over all the objects until we have caught up, + # reducing the number of duplicate jobs scheduled by the provided + # block. + next unless page_counter.set(page.number) + + page.objects.each do |object| + next if already_imported?(object) + + yield object + + # We mark the object as imported immediately so we don't end up + # scheduling it multiple times. + mark_as_imported(object) + end + end + end + + # Returns true if the given object has already been imported, false + # otherwise. + # + # object - The object to check. + def already_imported?(object) + id = id_for_already_imported_cache(object) + + Caching.set_includes?(already_imported_cache_key, id) + end + + # Marks the given object as "already imported". + def mark_as_imported(object) + id = id_for_already_imported_cache(object) + + Caching.set_add(already_imported_cache_key, id) + end + + # Returns the ID to use for the cache used for checking if an object has + # already been imported or not. + # + # object - The object we may want to import. + def id_for_already_imported_cache(object) + raise NotImplementedError + end + + # The class used for converting API responses to Hashes when performing + # the import. + def representation_class + raise NotImplementedError + end + + # The class to use for importing objects when importing them sequentially. + def importer_class + raise NotImplementedError + end + + # The Sidekiq worker class used for scheduling the importing of objects in + # parallel. + def sidekiq_worker_class + raise NotImplementedError + end + + # The name of the method to call to retrieve the data to import. + def collection_method + raise NotImplementedError + end + + # Any options to be passed to the method used for retrieving the data to + # import. + def collection_options + {} + end + end + end +end diff --git a/lib/gitlab/github_import/rate_limit_error.rb b/lib/gitlab/github_import/rate_limit_error.rb new file mode 100644 index 00000000000..cc2de909c29 --- /dev/null +++ b/lib/gitlab/github_import/rate_limit_error.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # Error that will be raised when we're about to reach (or have reached) the + # GitHub API's rate limit. + RateLimitError = Class.new(StandardError) + end +end diff --git a/lib/gitlab/github_import/representation.rb b/lib/gitlab/github_import/representation.rb new file mode 100644 index 00000000000..639477ef2a2 --- /dev/null +++ b/lib/gitlab/github_import/representation.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + TIMESTAMP_KEYS = %i[created_at updated_at merged_at].freeze + + # Converts a Hash with String based keys to one that can be used by the + # various Representation classes. + # + # Example: + # + # Representation.symbolize_hash('number' => 10) # => { number: 10 } + def self.symbolize_hash(raw_hash = nil) + hash = raw_hash.deep_symbolize_keys + + TIMESTAMP_KEYS.each do |key| + hash[key] = Time.parse(hash[key]) if hash[key].is_a?(String) + end + + hash + end + end + end +end diff --git a/lib/gitlab/github_import/representation/diff_note.rb b/lib/gitlab/github_import/representation/diff_note.rb new file mode 100644 index 00000000000..bb7439a0641 --- /dev/null +++ b/lib/gitlab/github_import/representation/diff_note.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class DiffNote + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :noteable_type, :noteable_id, :commit_id, :file_path, + :diff_hunk, :author, :note, :created_at, :updated_at, + :github_id + + NOTEABLE_ID_REGEX = /\/pull\/(?<iid>\d+)/i + + # Builds a diff note from a GitHub API response. + # + # note - An instance of `Sawyer::Resource` containing the note details. + def self.from_api_response(note) + matches = note.html_url.match(NOTEABLE_ID_REGEX) + + unless matches + raise( + ArgumentError, + "The note URL #{note.html_url.inspect} is not supported" + ) + end + + user = Representation::User.from_api_response(note.user) if note.user + hash = { + noteable_type: 'MergeRequest', + noteable_id: matches[:iid].to_i, + file_path: note.path, + commit_id: note.commit_id, + diff_hunk: note.diff_hunk, + author: user, + note: note.body, + created_at: note.created_at, + updated_at: note.updated_at, + github_id: note.id + } + + new(hash) + end + + # Builds a new note using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + hash = Representation.symbolize_hash(raw_hash) + hash[:author] &&= Representation::User.from_json_hash(hash[:author]) + + new(hash) + end + + # attributes - A Hash containing the raw note details. The keys of this + # Hash must be Symbols. + def initialize(attributes) + @attributes = attributes + end + + def line_code + diff_line = Gitlab::Diff::Parser.new.parse(diff_hunk.lines).to_a.last + + Gitlab::Git + .diff_line_code(file_path, diff_line.new_pos, diff_line.old_pos) + end + + # Returns a Hash that can be used to populate `notes.st_diff`, removing + # the need for requesting Git data for every diff note. + def diff_hash + { + diff: diff_hunk, + new_path: file_path, + old_path: file_path, + + # These fields are not displayed for LegacyDiffNote notes, so it + # doesn't really matter what we set them to. + a_mode: '100644', + b_mode: '100644', + new_file: false + } + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/expose_attribute.rb b/lib/gitlab/github_import/representation/expose_attribute.rb new file mode 100644 index 00000000000..c3405759631 --- /dev/null +++ b/lib/gitlab/github_import/representation/expose_attribute.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + module ExposeAttribute + extend ActiveSupport::Concern + + module ClassMethods + # Defines getter methods for the given attribute names. + # + # Example: + # + # expose_attribute :iid, :title + def expose_attribute(*names) + names.each do |name| + name = name.to_sym + + define_method(name) { attributes[name] } + end + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/issue.rb b/lib/gitlab/github_import/representation/issue.rb new file mode 100644 index 00000000000..f3071b3e2b3 --- /dev/null +++ b/lib/gitlab/github_import/representation/issue.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class Issue + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :iid, :title, :description, :milestone_number, + :created_at, :updated_at, :state, :assignees, + :label_names, :author + + # Builds an issue from a GitHub API response. + # + # issue - An instance of `Sawyer::Resource` containing the issue + # details. + def self.from_api_response(issue) + user = + if issue.user + Representation::User.from_api_response(issue.user) + end + + hash = { + iid: issue.number, + title: issue.title, + description: issue.body, + milestone_number: issue.milestone&.number, + state: issue.state == 'open' ? :opened : :closed, + assignees: issue.assignees.map do |u| + Representation::User.from_api_response(u) + end, + label_names: issue.labels.map(&:name), + author: user, + created_at: issue.created_at, + updated_at: issue.updated_at, + pull_request: issue.pull_request ? true : false + } + + new(hash) + end + + # Builds a new issue using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + hash = Representation.symbolize_hash(raw_hash) + + hash[:state] = hash[:state].to_sym + hash[:assignees].map! { |u| Representation::User.from_json_hash(u) } + hash[:author] &&= Representation::User.from_json_hash(hash[:author]) + + new(hash) + end + + # attributes - A hash containing the raw issue details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + + def truncated_title + title.truncate(255) + end + + def labels? + label_names && label_names.any? + end + + def pull_request? + attributes[:pull_request] + end + + def issuable_type + pull_request? ? 'MergeRequest' : 'Issue' + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/note.rb b/lib/gitlab/github_import/representation/note.rb new file mode 100644 index 00000000000..a68bc4c002f --- /dev/null +++ b/lib/gitlab/github_import/representation/note.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class Note + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :noteable_id, :noteable_type, :author, :note, + :created_at, :updated_at, :github_id + + NOTEABLE_TYPE_REGEX = /\/(?<type>(pull|issues))\/(?<iid>\d+)/i + + # Builds a note from a GitHub API response. + # + # note - An instance of `Sawyer::Resource` containing the note details. + def self.from_api_response(note) + matches = note.html_url.match(NOTEABLE_TYPE_REGEX) + + if !matches || !matches[:type] + raise( + ArgumentError, + "The note URL #{note.html_url.inspect} is not supported" + ) + end + + noteable_type = + if matches[:type] == 'pull' + 'MergeRequest' + else + 'Issue' + end + + user = Representation::User.from_api_response(note.user) if note.user + hash = { + noteable_type: noteable_type, + noteable_id: matches[:iid].to_i, + author: user, + note: note.body, + created_at: note.created_at, + updated_at: note.updated_at, + github_id: note.id + } + + new(hash) + end + + # Builds a new note using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + hash = Representation.symbolize_hash(raw_hash) + + hash[:author] &&= Representation::User.from_json_hash(hash[:author]) + + new(hash) + end + + # attributes - A Hash containing the raw note details. The keys of this + # Hash must be Symbols. + def initialize(attributes) + @attributes = attributes + end + + alias_method :issuable_type, :noteable_type + end + end + end +end diff --git a/lib/gitlab/github_import/representation/pull_request.rb b/lib/gitlab/github_import/representation/pull_request.rb new file mode 100644 index 00000000000..593b491a837 --- /dev/null +++ b/lib/gitlab/github_import/representation/pull_request.rb @@ -0,0 +1,114 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class PullRequest + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :iid, :title, :description, :source_branch, + :source_branch_sha, :target_branch, :target_branch_sha, + :milestone_number, :author, :assignee, :created_at, + :updated_at, :merged_at, :source_repository_id, + :target_repository_id, :source_repository_owner + + # Builds a PR from a GitHub API response. + # + # issue - An instance of `Sawyer::Resource` containing the PR details. + def self.from_api_response(pr) + assignee = + if pr.assignee + Representation::User.from_api_response(pr.assignee) + end + + user = Representation::User.from_api_response(pr.user) if pr.user + hash = { + iid: pr.number, + title: pr.title, + description: pr.body, + source_branch: pr.head.ref, + target_branch: pr.base.ref, + source_branch_sha: pr.head.sha, + target_branch_sha: pr.base.sha, + source_repository_id: pr.head&.repo&.id, + target_repository_id: pr.base&.repo&.id, + source_repository_owner: pr.head&.user&.login, + state: pr.state == 'open' ? :opened : :closed, + milestone_number: pr.milestone&.number, + author: user, + assignee: assignee, + created_at: pr.created_at, + updated_at: pr.updated_at, + merged_at: pr.merged_at + } + + new(hash) + end + + # Builds a new PR using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + hash = Representation.symbolize_hash(raw_hash) + + hash[:state] = hash[:state].to_sym + hash[:author] &&= Representation::User.from_json_hash(hash[:author]) + + # Assignees are optional so we only convert it from a Hash if one was + # set. + hash[:assignee] &&= Representation::User + .from_json_hash(hash[:assignee]) + + new(hash) + end + + # attributes - A Hash containing the raw PR details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + + def truncated_title + title.truncate(255) + end + + # Returns a formatted source branch. + # + # For cross-project pull requests the branch name will be in the format + # `owner-name:branch-name`. + def formatted_source_branch + if cross_project? && source_repository_owner + "#{source_repository_owner}:#{source_branch}" + elsif source_branch == target_branch + # Sometimes the source and target branch are the same, but GitLab + # doesn't support this. This can happen when both the user and + # source repository have been deleted, and the PR was submitted from + # the fork's master branch. + "#{source_branch}-#{iid}" + else + source_branch + end + end + + def state + if merged_at + :merged + else + attributes[:state] + end + end + + def cross_project? + return true unless source_repository_id + + source_repository_id != target_repository_id + end + + def issuable_type + 'MergeRequest' + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/to_hash.rb b/lib/gitlab/github_import/representation/to_hash.rb new file mode 100644 index 00000000000..4a0f36ab8f0 --- /dev/null +++ b/lib/gitlab/github_import/representation/to_hash.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + module ToHash + # Converts the current representation to a Hash. The keys of this Hash + # will be Symbols. + def to_hash + hash = {} + + attributes.each do |key, value| + hash[key] = convert_value_for_to_hash(value) + end + + hash + end + + def convert_value_for_to_hash(value) + if value.is_a?(Array) + value.map { |v| convert_value_for_to_hash(v) } + elsif value.respond_to?(:to_hash) + value.to_hash + else + value + end + end + end + end + end +end diff --git a/lib/gitlab/github_import/representation/user.rb b/lib/gitlab/github_import/representation/user.rb new file mode 100644 index 00000000000..e00dcfca33d --- /dev/null +++ b/lib/gitlab/github_import/representation/user.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + module Representation + class User + include ToHash + include ExposeAttribute + + attr_reader :attributes + + expose_attribute :id, :login + + # Builds a user from a GitHub API response. + # + # user - An instance of `Sawyer::Resource` containing the user details. + def self.from_api_response(user) + new(id: user.id, login: user.login) + end + + # Builds a user using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + new(Representation.symbolize_hash(raw_hash)) + end + + # attributes - A Hash containing the user details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + end + end + end +end diff --git a/lib/gitlab/github_import/sequential_importer.rb b/lib/gitlab/github_import/sequential_importer.rb new file mode 100644 index 00000000000..4f7324536a0 --- /dev/null +++ b/lib/gitlab/github_import/sequential_importer.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # The SequentialImporter imports a GitHub project in a single thread, + # without using Sidekiq. This makes it useful for testing purposes as well + # as Rake tasks, but it should be avoided for anything else in favour of the + # parallel importer. + class SequentialImporter + attr_reader :project, :client + + SEQUENTIAL_IMPORTERS = [ + Importer::LabelsImporter, + Importer::MilestonesImporter, + Importer::ReleasesImporter + ].freeze + + PARALLEL_IMPORTERS = [ + Importer::PullRequestsImporter, + Importer::IssuesImporter, + Importer::DiffNotesImporter, + Importer::NotesImporter + ].freeze + + # project - The project to import the data into. + # token - The token to use for the GitHub API. + def initialize(project, token: nil) + @project = project + @client = GithubImport + .new_client_for(project, token: token, parallel: false) + end + + def execute + Importer::RepositoryImporter.new(project, client).execute + + SEQUENTIAL_IMPORTERS.each do |klass| + klass.new(project, client).execute + end + + PARALLEL_IMPORTERS.each do |klass| + klass.new(project, client, parallel: false).execute + end + + project.repository.after_import + + true + end + end + end +end diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb new file mode 100644 index 00000000000..be1259662a7 --- /dev/null +++ b/lib/gitlab/github_import/user_finder.rb @@ -0,0 +1,164 @@ +# frozen_string_literal: true + +module Gitlab + module GithubImport + # Class that can be used for finding a GitLab user ID based on a GitHub user + # ID or username. + # + # Any found user IDs are cached in Redis to reduce the number of SQL queries + # executed over time. Valid keys are refreshed upon access so frequently + # used keys stick around. + # + # Lookups are cached even if no ID was found to remove the need for querying + # the database when most queries are not going to return results anyway. + class UserFinder + attr_reader :project, :client + + # The base cache key to use for caching user IDs for a given GitHub user + # ID. + ID_CACHE_KEY = 'github-import/user-finder/user-id/%s'.freeze + + # The base cache key to use for caching user IDs for a given GitHub email + # address. + ID_FOR_EMAIL_CACHE_KEY = + 'github-import/user-finder/id-for-email/%s'.freeze + + # The base cache key to use for caching the Email addresses of GitHub + # usernames. + EMAIL_FOR_USERNAME_CACHE_KEY = + 'github-import/user-finder/email-for-username/%s'.freeze + + # project - An instance of `Project` + # client - An instance of `Gitlab::GithubImport::Client` + def initialize(project, client) + @project = project + @client = client + end + + # Returns the GitLab user ID of an object's author. + # + # If the object has no author ID we'll use the ID of the GitLab ghost + # user. + def author_id_for(object) + id = + if object&.author + user_id_for(object.author) + else + GithubImport.ghost_user_id + end + + if id + [id, true] + else + [project.creator_id, false] + end + end + + # Returns the GitLab user ID of an issuable's assignee. + def assignee_id_for(issuable) + user_id_for(issuable.assignee) if issuable.assignee + end + + # Returns the GitLab user ID for a GitHub user. + # + # user - An instance of `Gitlab::GithubImport::Representation::User`. + def user_id_for(user) + find(user.id, user.login) + end + + # Returns the GitLab ID for the given GitHub ID or username. + # + # id - The ID of the GitHub user. + # username - The username of the GitHub user. + def find(id, username) + email = email_for_github_username(username) + cached, found_id = find_from_cache(id, email) + + return found_id if found_id + + # We only want to query the database if necessary. If previous lookups + # didn't yield a user ID we won't query the database again until the + # keys expire. + find_id_from_database(id, email) unless cached + end + + # Finds a user ID from the cache for a given GitHub ID or Email. + def find_from_cache(id, email = nil) + id_exists, id_for_github_id = cached_id_for_github_id(id) + + return [id_exists, id_for_github_id] if id_for_github_id + + # Just in case no Email address could be retrieved (for whatever reason) + return [false] unless email + + cached_id_for_github_email(email) + end + + # Finds a GitLab user ID from the database for a given GitHub user ID or + # Email. + def find_id_from_database(id, email) + id_for_github_id(id) || id_for_github_email(email) + end + + def email_for_github_username(username) + cache_key = EMAIL_FOR_USERNAME_CACHE_KEY % username + email = Caching.read(cache_key) + + unless email + user = client.user(username) + email = Caching.write(cache_key, user.email) if user + end + + email + end + + def cached_id_for_github_id(id) + read_id_from_cache(ID_CACHE_KEY % id) + end + + def cached_id_for_github_email(email) + read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email) + end + + # Queries and caches the GitLab user ID for a GitHub user ID, if one was + # found. + def id_for_github_id(id) + gitlab_id = query_id_for_github_id(id) || nil + + Caching.write(ID_CACHE_KEY % id, gitlab_id) + end + + # Queries and caches the GitLab user ID for a GitHub email, if one was + # found. + def id_for_github_email(email) + gitlab_id = query_id_for_github_email(email) || nil + + Caching.write(ID_FOR_EMAIL_CACHE_KEY % email, gitlab_id) + end + + def query_id_for_github_id(id) + User.for_github_id(id).pluck(:id).first + end + + def query_id_for_github_email(email) + User.by_any_email(email).pluck(:id).first + end + + # Reads an ID from the cache. + # + # The return value is an Array with two values: + # + # 1. A boolean indicating if the key was present or not. + # 2. The ID as an Integer, or nil in case no ID could be found. + def read_id_from_cache(key) + value = Caching.read(key) + exists = !value.nil? + number = value.to_i + + # The cache key may be empty to indicate a previously looked up user for + # which we couldn't find an ID. + [exists, number.positive? ? number : nil] + end + end + end +end diff --git a/lib/gitlab/gitlab_import/client.rb b/lib/gitlab/gitlab_import/client.rb index f1007daab5d..075b3982608 100644 --- a/lib/gitlab/gitlab_import/client.rb +++ b/lib/gitlab/gitlab_import/client.rb @@ -65,6 +65,7 @@ module Gitlab y << item end break if items.empty? || items.size < per_page + page += 1 end end diff --git a/lib/gitlab/gon_helper.rb b/lib/gitlab/gon_helper.rb index 3a666c2268b..dfcdfc307b6 100644 --- a/lib/gitlab/gon_helper.rb +++ b/lib/gitlab/gon_helper.rb @@ -20,7 +20,7 @@ module Gitlab gon.gitlab_url = Gitlab.config.gitlab.url gon.revision = Gitlab::REVISION gon.gitlab_logo = ActionController::Base.helpers.asset_path('gitlab_logo.png') - gon.sprite_icons = ActionController::Base.helpers.asset_path('icons.svg') + gon.sprite_icons = IconsHelper.sprite_icon_path if current_user gon.current_user_id = current_user.id diff --git a/lib/gitlab/hook_data/issue_builder.rb b/lib/gitlab/hook_data/issue_builder.rb index de9cab80a02..e29dd0d5b0e 100644 --- a/lib/gitlab/hook_data/issue_builder.rb +++ b/lib/gitlab/hook_data/issue_builder.rb @@ -4,7 +4,6 @@ module Gitlab SAFE_HOOK_ATTRIBUTES = %i[ assignee_id author_id - branch_name closed_at confidential created_at @@ -29,6 +28,7 @@ module Gitlab SAFE_HOOK_RELATIONS = %i[ assignees labels + total_time_spent ].freeze attr_accessor :issue diff --git a/lib/gitlab/hook_data/merge_request_builder.rb b/lib/gitlab/hook_data/merge_request_builder.rb index eaef19c9d04..ae9b68eb648 100644 --- a/lib/gitlab/hook_data/merge_request_builder.rb +++ b/lib/gitlab/hook_data/merge_request_builder.rb @@ -19,7 +19,6 @@ module Gitlab merge_user_id merge_when_pipeline_succeeds milestone_id - ref_fetched source_branch source_project_id state @@ -34,6 +33,7 @@ module Gitlab SAFE_HOOK_RELATIONS = %i[ assignee labels + total_time_spent ].freeze attr_accessor :merge_request diff --git a/lib/gitlab/import_export/import_export.yml b/lib/gitlab/import_export/import_export.yml index dec8b4c5acd..263599831bf 100644 --- a/lib/gitlab/import_export/import_export.yml +++ b/lib/gitlab/import_export/import_export.yml @@ -19,6 +19,7 @@ project_tree: - milestone: - events: - :push_event_payload + - :issue_assignees - snippets: - :award_emoji - notes: @@ -53,7 +54,6 @@ project_tree: - :auto_devops - :triggers - :pipeline_schedules - - :cluster - :services - :hooks - protected_branches: @@ -62,6 +62,7 @@ project_tree: - protected_tags: - :create_access_levels - :project_feature + - :custom_attributes # Only include the following attributes for the models specified. included_attributes: @@ -113,6 +114,7 @@ excluded_attributes: - :milestone_id - :ref_fetched - :merge_jid + - :latest_merge_request_diff_id award_emoji: - :awardable_id statuses: diff --git a/lib/gitlab/import_export/importer.rb b/lib/gitlab/import_export/importer.rb index fbdd74788bc..c14646b0611 100644 --- a/lib/gitlab/import_export/importer.rb +++ b/lib/gitlab/import_export/importer.rb @@ -1,6 +1,10 @@ module Gitlab module ImportExport class Importer + def self.imports_repository? + true + end + def initialize(project) @archive_file = project.import_source @current_user = project.creator diff --git a/lib/gitlab/import_export/merge_request_parser.rb b/lib/gitlab/import_export/merge_request_parser.rb index 81a213e8321..f3d7407383c 100644 --- a/lib/gitlab/import_export/merge_request_parser.rb +++ b/lib/gitlab/import_export/merge_request_parser.rb @@ -1,7 +1,7 @@ module Gitlab module ImportExport class MergeRequestParser - FORKED_PROJECT_ID = -1 + FORKED_PROJECT_ID = nil def initialize(project, diff_head_sha, merge_request, relation_hash) @project = project @@ -26,7 +26,7 @@ module Gitlab end def fetch_ref - @project.repository.fetch_ref(@project.repository.path, @diff_head_sha, @merge_request.source_branch) + @project.repository.fetch_ref(@project.repository, source_ref: @diff_head_sha, target_ref: @merge_request.source_branch) end def branch_exists?(branch_name) diff --git a/lib/gitlab/import_export/relation_factory.rb b/lib/gitlab/import_export/relation_factory.rb index 469b230377d..2b34ceb5831 100644 --- a/lib/gitlab/import_export/relation_factory.rb +++ b/lib/gitlab/import_export/relation_factory.rb @@ -8,8 +8,6 @@ module Gitlab triggers: 'Ci::Trigger', pipeline_schedules: 'Ci::PipelineSchedule', builds: 'Ci::Build', - cluster: 'Gcp::Cluster', - clusters: 'Gcp::Cluster', hooks: 'ProjectHook', merge_access_levels: 'ProtectedBranch::MergeAccessLevel', push_access_levels: 'ProtectedBranch::PushAccessLevel', @@ -17,7 +15,8 @@ module Gitlab labels: :project_labels, priorities: :label_priorities, auto_devops: :project_auto_devops, - label: :project_label }.freeze + label: :project_label, + custom_attributes: 'ProjectCustomAttribute' }.freeze USER_REFERENCES = %w[author_id assignee_id updated_by_id user_id created_by_id last_edited_by_id merge_user_id resolved_by_id].freeze diff --git a/lib/gitlab/import_sources.rb b/lib/gitlab/import_sources.rb index 5404dc11a87..eeb03625479 100644 --- a/lib/gitlab/import_sources.rb +++ b/lib/gitlab/import_sources.rb @@ -8,14 +8,14 @@ module Gitlab ImportSource = Struct.new(:name, :title, :importer) ImportTable = [ - ImportSource.new('github', 'GitHub', Github::Import), + ImportSource.new('github', 'GitHub', Gitlab::GithubImport::ParallelImporter), ImportSource.new('bitbucket', 'Bitbucket', Gitlab::BitbucketImport::Importer), ImportSource.new('gitlab', 'GitLab.com', Gitlab::GitlabImport::Importer), ImportSource.new('google_code', 'Google Code', Gitlab::GoogleCodeImport::Importer), ImportSource.new('fogbugz', 'FogBugz', Gitlab::FogbugzImport::Importer), ImportSource.new('git', 'Repo by URL', nil), ImportSource.new('gitlab_project', 'GitLab export', Gitlab::ImportExport::Importer), - ImportSource.new('gitea', 'Gitea', Gitlab::GithubImport::Importer) + ImportSource.new('gitea', 'Gitea', Gitlab::LegacyGithubImport::Importer) ].freeze class << self diff --git a/lib/gitlab/issuable_metadata.rb b/lib/gitlab/issuable_metadata.rb index 977c05910d3..0c9de72329c 100644 --- a/lib/gitlab/issuable_metadata.rb +++ b/lib/gitlab/issuable_metadata.rb @@ -1,6 +1,14 @@ module Gitlab module IssuableMetadata def issuable_meta_data(issuable_collection, collection_type) + # ActiveRecord uses Object#extend for null relations. + if !(issuable_collection.singleton_class < ActiveRecord::NullRelation) && + issuable_collection.respond_to?(:limit_value) && + issuable_collection.limit_value.nil? + + raise 'Collection must have a limit applied for preloading meta-data' + end + # map has to be used here since using pluck or select will # throw an error when ordering issuables by priority which inserts # a new order into the collection. diff --git a/lib/gitlab/job_waiter.rb b/lib/gitlab/job_waiter.rb index 4d6bbda15f3..f654508c391 100644 --- a/lib/gitlab/job_waiter.rb +++ b/lib/gitlab/job_waiter.rb @@ -19,11 +19,13 @@ module Gitlab Gitlab::Redis::SharedState.with { |redis| redis.lpush(key, jid) } end - attr_reader :key, :jobs_remaining, :finished + attr_reader :key, :finished + attr_accessor :jobs_remaining # jobs_remaining - the number of jobs left to wait for - def initialize(jobs_remaining) - @key = "gitlab:job_waiter:#{SecureRandom.uuid}" + # key - The key of this waiter. + def initialize(jobs_remaining = 0, key = "gitlab:job_waiter:#{SecureRandom.uuid}") + @key = key @jobs_remaining = jobs_remaining @finished = [] end diff --git a/lib/gitlab/kubernetes/helm.rb b/lib/gitlab/kubernetes/helm.rb new file mode 100644 index 00000000000..7a50f07f3c5 --- /dev/null +++ b/lib/gitlab/kubernetes/helm.rb @@ -0,0 +1,96 @@ +module Gitlab + module Kubernetes + class Helm + HELM_VERSION = '2.7.0'.freeze + NAMESPACE = 'gitlab-managed-apps'.freeze + INSTALL_DEPS = <<-EOS.freeze + set -eo pipefail + apk add -U ca-certificates openssl >/dev/null + wget -q -O - https://kubernetes-helm.storage.googleapis.com/helm-v${HELM_VERSION}-linux-amd64.tar.gz | tar zxC /tmp >/dev/null + mv /tmp/linux-amd64/helm /usr/bin/ + EOS + + InstallCommand = Struct.new(:name, :install_helm, :chart) do + def pod_name + "install-#{name}" + end + end + + def initialize(kubeclient) + @kubeclient = kubeclient + @namespace = Namespace.new(NAMESPACE, kubeclient) + end + + def install(command) + @namespace.ensure_exists! + @kubeclient.create_pod(pod_resource(command)) + end + + ## + # Returns Pod phase + # + # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase + # + # values: "Pending", "Running", "Succeeded", "Failed", "Unknown" + # + def installation_status(pod_name) + @kubeclient.get_pod(pod_name, @namespace.name).status.phase + end + + def installation_log(pod_name) + @kubeclient.get_pod_log(pod_name, @namespace.name).body + end + + def delete_installation_pod!(pod_name) + @kubeclient.delete_pod(pod_name, @namespace.name) + end + + private + + def pod_resource(command) + labels = { 'gitlab.org/action': 'install', 'gitlab.org/application': command.name } + metadata = { name: command.pod_name, namespace: @namespace.name, labels: labels } + container = { + name: 'helm', + image: 'alpine:3.6', + env: generate_pod_env(command), + command: %w(/bin/sh), + args: %w(-c $(COMMAND_SCRIPT)) + } + spec = { containers: [container], restartPolicy: 'Never' } + + ::Kubeclient::Resource.new(metadata: metadata, spec: spec) + end + + def generate_pod_env(command) + { + HELM_VERSION: HELM_VERSION, + TILLER_NAMESPACE: @namespace.name, + COMMAND_SCRIPT: generate_script(command) + }.map { |key, value| { name: key, value: value } } + end + + def generate_script(command) + [ + INSTALL_DEPS, + helm_init_command(command), + helm_install_command(command) + ].join("\n") + end + + def helm_init_command(command) + if command.install_helm + 'helm init >/dev/null' + else + 'helm init --client-only >/dev/null' + end + end + + def helm_install_command(command) + return if command.chart.nil? + + "helm install #{command.chart} --name #{command.name} --namespace #{@namespace.name} >/dev/null" + end + end + end +end diff --git a/lib/gitlab/kubernetes/namespace.rb b/lib/gitlab/kubernetes/namespace.rb new file mode 100644 index 00000000000..fbbddb7bffa --- /dev/null +++ b/lib/gitlab/kubernetes/namespace.rb @@ -0,0 +1,30 @@ +module Gitlab + module Kubernetes + class Namespace + attr_accessor :name + + def initialize(name, client) + @name = name + @client = client + end + + def exists? + @client.get_namespace(name) + rescue ::KubeException => ke + raise ke unless ke.error_code == 404 + + false + end + + def create! + resource = ::Kubeclient::Resource.new(metadata: { name: name }) + + @client.create_namespace(resource) + end + + def ensure_exists! + exists? || create! + end + end + end +end diff --git a/lib/gitlab/kubernetes/pod.rb b/lib/gitlab/kubernetes/pod.rb new file mode 100644 index 00000000000..f3842cdf762 --- /dev/null +++ b/lib/gitlab/kubernetes/pod.rb @@ -0,0 +1,12 @@ +module Gitlab + module Kubernetes + module Pod + PENDING = 'Pending'.freeze + RUNNING = 'Running'.freeze + SUCCEEDED = 'Succeeded'.freeze + FAILED = 'Failed'.freeze + UNKNOWN = 'Unknown'.freeze + PHASES = [PENDING, RUNNING, SUCCEEDED, FAILED, UNKNOWN].freeze + end + end +end diff --git a/lib/gitlab/ldap/auth_hash.rb b/lib/gitlab/ldap/auth_hash.rb index 3123da17fd9..1bd0965679a 100644 --- a/lib/gitlab/ldap/auth_hash.rb +++ b/lib/gitlab/ldap/auth_hash.rb @@ -4,7 +4,7 @@ module Gitlab module LDAP class AuthHash < Gitlab::OAuth::AuthHash def uid - Gitlab::LDAP::Person.normalize_dn(super) + @uid ||= Gitlab::LDAP::Person.normalize_dn(super) end private diff --git a/lib/gitlab/ldap/authentication.rb b/lib/gitlab/ldap/authentication.rb index ed1de73f8c6..7274d1c3b43 100644 --- a/lib/gitlab/ldap/authentication.rb +++ b/lib/gitlab/ldap/authentication.rb @@ -62,6 +62,7 @@ module Gitlab def user return nil unless ldap_user + Gitlab::LDAP::User.find_by_uid_and_provider(ldap_user.dn, provider) end end diff --git a/lib/gitlab/ldap/user.rb b/lib/gitlab/ldap/user.rb index 1793097363e..3945df27eed 100644 --- a/lib/gitlab/ldap/user.rb +++ b/lib/gitlab/ldap/user.rb @@ -9,10 +9,8 @@ module Gitlab class User < Gitlab::OAuth::User class << self def find_by_uid_and_provider(uid, provider) - # LDAP distinguished name is case-insensitive - identity = ::Identity - .where(provider: provider) - .iwhere(extern_uid: uid).last + identity = ::Identity.with_extern_uid(provider, uid).take + identity && identity.user end end diff --git a/lib/gitlab/github_import/base_formatter.rb b/lib/gitlab/legacy_github_import/base_formatter.rb index f330041cc00..2f07fde406c 100644 --- a/lib/gitlab/github_import/base_formatter.rb +++ b/lib/gitlab/legacy_github_import/base_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class BaseFormatter attr_reader :client, :formatter, :project, :raw_data diff --git a/lib/gitlab/github_import/branch_formatter.rb b/lib/gitlab/legacy_github_import/branch_formatter.rb index 8aa885fb811..80fe1d67209 100644 --- a/lib/gitlab/github_import/branch_formatter.rb +++ b/lib/gitlab/legacy_github_import/branch_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class BranchFormatter < BaseFormatter delegate :repo, :sha, :ref, to: :raw_data diff --git a/lib/gitlab/legacy_github_import/client.rb b/lib/gitlab/legacy_github_import/client.rb new file mode 100644 index 00000000000..53c910d44bd --- /dev/null +++ b/lib/gitlab/legacy_github_import/client.rb @@ -0,0 +1,148 @@ +module Gitlab + module LegacyGithubImport + class Client + GITHUB_SAFE_REMAINING_REQUESTS = 100 + GITHUB_SAFE_SLEEP_TIME = 500 + + attr_reader :access_token, :host, :api_version + + def initialize(access_token, host: nil, api_version: 'v3') + @access_token = access_token + @host = host.to_s.sub(%r{/+\z}, '') + @api_version = api_version + @users = {} + + if access_token + ::Octokit.auto_paginate = false + end + end + + def api + @api ||= ::Octokit::Client.new( + access_token: access_token, + api_endpoint: api_endpoint, + # If there is no config, we're connecting to github.com and we + # should verify ssl. + connection_options: { + ssl: { verify: config ? config['verify_ssl'] : true } + } + ) + end + + def client + unless config + raise Projects::ImportService::Error, + 'OAuth configuration for GitHub missing.' + end + + @client ||= ::OAuth2::Client.new( + config.app_id, + config.app_secret, + github_options.merge(ssl: { verify: config['verify_ssl'] }) + ) + end + + def authorize_url(redirect_uri) + client.auth_code.authorize_url({ + redirect_uri: redirect_uri, + scope: "repo, user, user:email" + }) + end + + def get_token(code) + client.auth_code.get_token(code).token + end + + def method_missing(method, *args, &block) + if api.respond_to?(method) + request(method, *args, &block) + else + super(method, *args, &block) + end + end + + def respond_to?(method) + api.respond_to?(method) || super + end + + def user(login) + return nil unless login.present? + return @users[login] if @users.key?(login) + + @users[login] = api.user(login) + end + + private + + def api_endpoint + if host.present? && api_version.present? + "#{host}/api/#{api_version}" + else + github_options[:site] + end + end + + def config + Gitlab.config.omniauth.providers.find { |provider| provider.name == "github" } + end + + def github_options + if config + config["args"]["client_options"].deep_symbolize_keys + else + OmniAuth::Strategies::GitHub.default_options[:client_options].symbolize_keys + end + end + + def rate_limit + api.rate_limit! + # GitHub Rate Limit API returns 404 when the rate limit is + # disabled. In this case we just want to return gracefully + # instead of spitting out an error. + rescue Octokit::NotFound + nil + end + + def has_rate_limit? + return @has_rate_limit if defined?(@has_rate_limit) + + @has_rate_limit = rate_limit.present? + end + + def rate_limit_exceed? + has_rate_limit? && rate_limit.remaining <= GITHUB_SAFE_REMAINING_REQUESTS + end + + def rate_limit_sleep_time + rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME + end + + def request(method, *args, &block) + sleep rate_limit_sleep_time if rate_limit_exceed? + + data = api.__send__(method, *args) # rubocop:disable GitlabSecurity/PublicSend + return data unless data.is_a?(Array) + + last_response = api.last_response + + if block_given? + yield data + # api.last_response could change while we're yielding (e.g. fetching labels for each PR) + # so we cache our own last response + each_response_page(last_response, &block) + else + each_response_page(last_response) { |page| data.concat(page) } + data + end + end + + def each_response_page(last_response) + while last_response.rels[:next] + sleep rate_limit_sleep_time if rate_limit_exceed? + last_response = last_response.rels[:next].get + yield last_response.data if last_response.data.is_a?(Array) + end + end + end + end +end diff --git a/lib/gitlab/github_import/comment_formatter.rb b/lib/gitlab/legacy_github_import/comment_formatter.rb index 8911b81ec9a..d2c7a8ae9f4 100644 --- a/lib/gitlab/github_import/comment_formatter.rb +++ b/lib/gitlab/legacy_github_import/comment_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class CommentFormatter < BaseFormatter attr_writer :author_id diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/legacy_github_import/importer.rb index b8c07460ebb..4d096e5a741 100644 --- a/lib/gitlab/github_import/importer.rb +++ b/lib/gitlab/legacy_github_import/importer.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class Importer include Gitlab::ShellAdapter @@ -15,6 +15,7 @@ module Gitlab def client return @client if defined?(@client) + unless credentials raise Projects::ImportService::Error, "Unable to find project import data credentials for project ID: #{@project.id}" diff --git a/lib/gitlab/github_import/issuable_formatter.rb b/lib/gitlab/legacy_github_import/issuable_formatter.rb index 27b171d6ddb..de55382d3ad 100644 --- a/lib/gitlab/github_import/issuable_formatter.rb +++ b/lib/gitlab/legacy_github_import/issuable_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class IssuableFormatter < BaseFormatter attr_writer :assignee_id, :author_id diff --git a/lib/gitlab/github_import/issue_formatter.rb b/lib/gitlab/legacy_github_import/issue_formatter.rb index 977cd0423ba..4c8825ccf19 100644 --- a/lib/gitlab/github_import/issue_formatter.rb +++ b/lib/gitlab/legacy_github_import/issue_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class IssueFormatter < IssuableFormatter def attributes { diff --git a/lib/gitlab/github_import/label_formatter.rb b/lib/gitlab/legacy_github_import/label_formatter.rb index 211ccdc51bb..c3eed12e739 100644 --- a/lib/gitlab/github_import/label_formatter.rb +++ b/lib/gitlab/legacy_github_import/label_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class LabelFormatter < BaseFormatter def attributes { diff --git a/lib/gitlab/github_import/milestone_formatter.rb b/lib/gitlab/legacy_github_import/milestone_formatter.rb index dd782eff059..a565294384d 100644 --- a/lib/gitlab/github_import/milestone_formatter.rb +++ b/lib/gitlab/legacy_github_import/milestone_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class MilestoneFormatter < BaseFormatter def attributes { diff --git a/lib/gitlab/github_import/project_creator.rb b/lib/gitlab/legacy_github_import/project_creator.rb index a55adc9b1c8..41e7eac4d08 100644 --- a/lib/gitlab/github_import/project_creator.rb +++ b/lib/gitlab/legacy_github_import/project_creator.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class ProjectCreator include Gitlab::CurrentSettings diff --git a/lib/gitlab/github_import/pull_request_formatter.rb b/lib/gitlab/legacy_github_import/pull_request_formatter.rb index 150afa31432..94c2e99066a 100644 --- a/lib/gitlab/github_import/pull_request_formatter.rb +++ b/lib/gitlab/legacy_github_import/pull_request_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class PullRequestFormatter < IssuableFormatter delegate :user, :project, :ref, :repo, :sha, to: :source_branch, prefix: true delegate :user, :exists?, :project, :ref, :repo, :sha, :short_sha, to: :target_branch, prefix: true diff --git a/lib/gitlab/github_import/release_formatter.rb b/lib/gitlab/legacy_github_import/release_formatter.rb index 1ad702a6058..3ed9d4f76da 100644 --- a/lib/gitlab/github_import/release_formatter.rb +++ b/lib/gitlab/legacy_github_import/release_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class ReleaseFormatter < BaseFormatter def attributes { diff --git a/lib/gitlab/github_import/user_formatter.rb b/lib/gitlab/legacy_github_import/user_formatter.rb index 04c2964da20..6d8055622f1 100644 --- a/lib/gitlab/github_import/user_formatter.rb +++ b/lib/gitlab/legacy_github_import/user_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class UserFormatter attr_reader :client, :raw diff --git a/lib/gitlab/github_import/wiki_formatter.rb b/lib/gitlab/legacy_github_import/wiki_formatter.rb index ca8d96f5650..27f45875c7c 100644 --- a/lib/gitlab/github_import/wiki_formatter.rb +++ b/lib/gitlab/legacy_github_import/wiki_formatter.rb @@ -1,5 +1,5 @@ module Gitlab - module GithubImport + module LegacyGithubImport class WikiFormatter attr_reader :project diff --git a/lib/gitlab/lfs_token.rb b/lib/gitlab/lfs_token.rb index 8e57ba831c5..ead5d566871 100644 --- a/lib/gitlab/lfs_token.rb +++ b/lib/gitlab/lfs_token.rb @@ -27,6 +27,10 @@ module Gitlab end end + def deploy_key_pushable?(project) + actor.is_a?(DeployKey) && actor.can_push_to?(project) + end + def user? actor.is_a?(User) end diff --git a/lib/gitlab/metrics/background_transaction.rb b/lib/gitlab/metrics/background_transaction.rb new file mode 100644 index 00000000000..5919ebb1493 --- /dev/null +++ b/lib/gitlab/metrics/background_transaction.rb @@ -0,0 +1,14 @@ +module Gitlab + module Metrics + class BackgroundTransaction < Transaction + def initialize(worker_class) + super() + @worker_class = worker_class + end + + def labels + { controller: @worker_class.name, action: 'perform' } + end + end + end +end diff --git a/lib/gitlab/metrics/base_sampler.rb b/lib/gitlab/metrics/base_sampler.rb deleted file mode 100644 index 716d20bb91a..00000000000 --- a/lib/gitlab/metrics/base_sampler.rb +++ /dev/null @@ -1,63 +0,0 @@ -require 'logger' -module Gitlab - module Metrics - class BaseSampler < Daemon - # interval - The sampling interval in seconds. - def initialize(interval) - interval_half = interval.to_f / 2 - - @interval = interval - @interval_steps = (-interval_half..interval_half).step(0.1).to_a - - super() - end - - def safe_sample - sample - rescue => e - Rails.logger.warn("#{self.class}: #{e}, stopping") - stop - end - - def sample - raise NotImplementedError - end - - # Returns the sleep interval with a random adjustment. - # - # The random adjustment is put in place to ensure we: - # - # 1. Don't generate samples at the exact same interval every time (thus - # potentially missing anything that happens in between samples). - # 2. Don't sample data at the same interval two times in a row. - def sleep_interval - while (step = @interval_steps.sample) - if step != @last_step - @last_step = step - - return @interval + @last_step - end - end - end - - private - - attr_reader :running - - def start_working - @running = true - sleep(sleep_interval) - - while running - safe_sample - - sleep(sleep_interval) - end - end - - def stop_working - @running = false - end - end - end -end diff --git a/lib/gitlab/metrics/influx_db.rb b/lib/gitlab/metrics/influx_db.rb index 7b06bb953aa..bdf7910b7c7 100644 --- a/lib/gitlab/metrics/influx_db.rb +++ b/lib/gitlab/metrics/influx_db.rb @@ -11,6 +11,8 @@ module Gitlab settings[:enabled] || false end + # Prometheus histogram buckets used for arbitrary code measurements + EXECUTION_MEASUREMENT_BUCKETS = [0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1].freeze RAILS_ROOT = Rails.root.to_s METRICS_ROOT = Rails.root.join('lib', 'gitlab', 'metrics').to_s PATH_REGEX = /^#{RAILS_ROOT}\/?/ @@ -99,24 +101,27 @@ module Gitlab cpu_stop = System.cpu_time real_stop = Time.now.to_f - real_time = (real_stop - real_start) * 1000.0 + real_time = (real_stop - real_start) cpu_time = cpu_stop - cpu_start - trans.increment("#{name}_real_time", real_time) - trans.increment("#{name}_cpu_time", cpu_time) - trans.increment("#{name}_call_count", 1) + Gitlab::Metrics.histogram("gitlab_#{name}_real_duration_seconds".to_sym, + "Measure #{name}", + Transaction::BASE_LABELS, + EXECUTION_MEASUREMENT_BUCKETS) + .observe(trans.labels, real_time) - retval - end + Gitlab::Metrics.histogram("gitlab_#{name}_cpu_duration_seconds".to_sym, + "Measure #{name}", + Transaction::BASE_LABELS, + EXECUTION_MEASUREMENT_BUCKETS) + .observe(trans.labels, cpu_time / 1000.0) - # Adds a tag to the current transaction (if any) - # - # name - The name of the tag to add. - # value - The value of the tag. - def tag_transaction(name, value) - trans = current_transaction + # InfluxDB stores the _real_time time values as milliseconds + trans.increment("#{name}_real_time", real_time * 1000, false) + trans.increment("#{name}_cpu_time", cpu_time, false) + trans.increment("#{name}_call_count", 1, false) - trans&.add_tag(name, value) + retval end # Sets the action of the current transaction (if any) diff --git a/lib/gitlab/metrics/influx_sampler.rb b/lib/gitlab/metrics/influx_sampler.rb deleted file mode 100644 index 6db1dd755b7..00000000000 --- a/lib/gitlab/metrics/influx_sampler.rb +++ /dev/null @@ -1,101 +0,0 @@ -module Gitlab - module Metrics - # Class that sends certain metrics to InfluxDB at a specific interval. - # - # This class is used to gather statistics that can't be directly associated - # with a transaction such as system memory usage, garbage collection - # statistics, etc. - class InfluxSampler < BaseSampler - # interval - The sampling interval in seconds. - def initialize(interval = Metrics.settings[:sample_interval]) - super(interval) - @last_step = nil - - @metrics = [] - - @last_minor_gc = Delta.new(GC.stat[:minor_gc_count]) - @last_major_gc = Delta.new(GC.stat[:major_gc_count]) - - if Gitlab::Metrics.mri? - require 'allocations' - - Allocations.start - end - end - - def sample - sample_memory_usage - sample_file_descriptors - sample_objects - sample_gc - - flush - ensure - GC::Profiler.clear - @metrics.clear - end - - def flush - Metrics.submit_metrics(@metrics.map(&:to_hash)) - end - - def sample_memory_usage - add_metric('memory_usage', value: System.memory_usage) - end - - def sample_file_descriptors - add_metric('file_descriptors', value: System.file_descriptor_count) - end - - if Metrics.mri? - def sample_objects - sample = Allocations.to_hash - counts = sample.each_with_object({}) do |(klass, count), hash| - name = klass.name - - next unless name - - hash[name] = count - end - - # Symbols aren't allocated so we'll need to add those manually. - counts['Symbol'] = Symbol.all_symbols.length - - counts.each do |name, count| - add_metric('object_counts', { count: count }, type: name) - end - end - else - def sample_objects - end - end - - def sample_gc - time = GC::Profiler.total_time * 1000.0 - stats = GC.stat.merge(total_time: time) - - # We want the difference of GC runs compared to the last sample, not the - # total amount since the process started. - stats[:minor_gc_count] = - @last_minor_gc.compared_with(stats[:minor_gc_count]) - - stats[:major_gc_count] = - @last_major_gc.compared_with(stats[:major_gc_count]) - - stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count] - - add_metric('gc_statistics', stats) - end - - def add_metric(series, values, tags = {}) - prefix = sidekiq? ? 'sidekiq_' : 'rails_' - - @metrics << Metric.new("#{prefix}#{series}", values, tags) - end - - def sidekiq? - Sidekiq.server? - end - end - end -end diff --git a/lib/gitlab/metrics/instrumentation.rb b/lib/gitlab/metrics/instrumentation.rb index 6aa38542cb4..023e9963493 100644 --- a/lib/gitlab/metrics/instrumentation.rb +++ b/lib/gitlab/metrics/instrumentation.rb @@ -118,19 +118,21 @@ module Gitlab def self.instrument(type, mod, name) return unless Metrics.enabled? - name = name.to_sym + name = name.to_sym target = type == :instance ? mod : mod.singleton_class if type == :instance target = mod - label = "#{mod.name}##{name}" + method_name = "##{name}" method = mod.instance_method(name) else target = mod.singleton_class - label = "#{mod.name}.#{name}" + method_name = ".#{name}" method = mod.method(name) end + label = "#{mod.name}#{method_name}" + unless instrumented?(target) target.instance_variable_set(PROXY_IVAR, Module.new) end @@ -153,7 +155,8 @@ module Gitlab proxy_module.class_eval <<-EOF, __FILE__, __LINE__ + 1 def #{name}(#{args_signature}) if trans = Gitlab::Metrics::Instrumentation.transaction - trans.method_call_for(#{label.to_sym.inspect}).measure { super } + trans.method_call_for(#{label.to_sym.inspect}, #{mod.name.inspect}, "#{method_name}") + .measure { super } else super end diff --git a/lib/gitlab/metrics/method_call.rb b/lib/gitlab/metrics/method_call.rb index d3465e5ec19..90235095306 100644 --- a/lib/gitlab/metrics/method_call.rb +++ b/lib/gitlab/metrics/method_call.rb @@ -2,15 +2,45 @@ module Gitlab module Metrics # Class for tracking timing information about method calls class MethodCall - attr_reader :real_time, :cpu_time, :call_count + MUTEX = Mutex.new + BASE_LABELS = { module: nil, method: nil }.freeze + attr_reader :real_time, :cpu_time, :call_count, :labels + + def self.call_real_duration_histogram + return @call_real_duration_histogram if @call_real_duration_histogram + + MUTEX.synchronize do + @call_real_duration_histogram ||= Gitlab::Metrics.histogram( + :gitlab_method_call_real_duration_seconds, + 'Method calls real duration', + Transaction::BASE_LABELS.merge(BASE_LABELS), + [0.1, 0.2, 0.5, 1, 2, 5, 10] + ) + end + end + + def self.call_cpu_duration_histogram + return @call_cpu_duration_histogram if @call_cpu_duration_histogram + + MUTEX.synchronize do + @call_duration_histogram ||= Gitlab::Metrics.histogram( + :gitlab_method_call_cpu_duration_seconds, + 'Method calls cpu duration', + Transaction::BASE_LABELS.merge(BASE_LABELS), + [0.1, 0.2, 0.5, 1, 2, 5, 10] + ) + end + end # name - The full name of the method (including namespace) such as # `User#sign_in`. # - # series - The series to use for storing the data. - def initialize(name, series) + def initialize(name, module_name, method_name, transaction) + @module_name = module_name + @method_name = method_name + @transaction = transaction @name = name - @series = series + @labels = { module: @module_name, method: @method_name } @real_time = 0 @cpu_time = 0 @call_count = 0 @@ -22,21 +52,27 @@ module Gitlab start_cpu = System.cpu_time retval = yield - @real_time += System.monotonic_time - start_real - @cpu_time += System.cpu_time - start_cpu + real_time = System.monotonic_time - start_real + cpu_time = System.cpu_time - start_cpu + + @real_time += real_time + @cpu_time += cpu_time @call_count += 1 + self.class.call_real_duration_histogram.observe(@transaction.labels.merge(labels), real_time / 1000.0) + self.class.call_cpu_duration_histogram.observe(@transaction.labels.merge(labels), cpu_time / 1000.0) + retval end # Returns a Metric instance of the current method call. def to_metric Metric.new( - @series, + Instrumentation.series, { - duration: real_time, + duration: real_time, cpu_duration: cpu_time, - call_count: call_count + call_count: call_count }, method: @name ) diff --git a/lib/gitlab/metrics/prometheus.rb b/lib/gitlab/metrics/prometheus.rb index 460dab47276..09103b4ca2d 100644 --- a/lib/gitlab/metrics/prometheus.rb +++ b/lib/gitlab/metrics/prometheus.rb @@ -5,6 +5,9 @@ module Gitlab module Prometheus include Gitlab::CurrentSettings + REGISTRY_MUTEX = Mutex.new + PROVIDER_MUTEX = Mutex.new + def metrics_folder_present? multiprocess_files_dir = ::Prometheus::Client.configuration.multiprocess_files_dir @@ -20,23 +23,38 @@ module Gitlab end def registry - @registry ||= ::Prometheus::Client.registry + return @registry if @registry + + REGISTRY_MUTEX.synchronize do + @registry ||= ::Prometheus::Client.registry + end end def counter(name, docstring, base_labels = {}) - provide_metric(name) || registry.counter(name, docstring, base_labels) + safe_provide_metric(:counter, name, docstring, base_labels) end def summary(name, docstring, base_labels = {}) - provide_metric(name) || registry.summary(name, docstring, base_labels) + safe_provide_metric(:summary, name, docstring, base_labels) end def gauge(name, docstring, base_labels = {}, multiprocess_mode = :all) - provide_metric(name) || registry.gauge(name, docstring, base_labels, multiprocess_mode) + safe_provide_metric(:gauge, name, docstring, base_labels, multiprocess_mode) end def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS) - provide_metric(name) || registry.histogram(name, docstring, base_labels, buckets) + safe_provide_metric(:histogram, name, docstring, base_labels, buckets) + end + + private + + def safe_provide_metric(method, name, *args) + metric = provide_metric(name) + return metric if metric + + PROVIDER_MUTEX.synchronize do + provide_metric(name) || registry.method(method).call(name, *args) + end end def provide_metric(name) @@ -47,8 +65,6 @@ module Gitlab end end - private - def prometheus_metrics_enabled_unmemoized metrics_folder_present? && current_application_settings[:prometheus_metrics_enabled] || false end diff --git a/lib/gitlab/metrics/rack_middleware.rb b/lib/gitlab/metrics/rack_middleware.rb index adc0db1a874..2d45765df3f 100644 --- a/lib/gitlab/metrics/rack_middleware.rb +++ b/lib/gitlab/metrics/rack_middleware.rb @@ -2,20 +2,6 @@ module Gitlab module Metrics # Rack middleware for tracking Rails and Grape requests. class RackMiddleware - CONTROLLER_KEY = 'action_controller.instance'.freeze - ENDPOINT_KEY = 'api.endpoint'.freeze - CONTENT_TYPES = { - 'text/html' => :html, - 'text/plain' => :txt, - 'application/json' => :json, - 'text/js' => :js, - 'application/atom+xml' => :atom, - 'image/png' => :png, - 'image/jpeg' => :jpeg, - 'image/gif' => :gif, - 'image/svg+xml' => :svg - }.freeze - def initialize(app) @app = app end @@ -35,12 +21,6 @@ module Gitlab # Even in the event of an error we want to submit any metrics we # might've gathered up to this point. ensure - if env[CONTROLLER_KEY] - tag_controller(trans, env) - elsif env[ENDPOINT_KEY] - tag_endpoint(trans, env) - end - trans.finish end @@ -48,60 +28,19 @@ module Gitlab end def transaction_from_env(env) - trans = Transaction.new + trans = WebTransaction.new(env) - trans.set(:request_uri, filtered_path(env)) - trans.set(:request_method, env['REQUEST_METHOD']) + trans.set(:request_uri, filtered_path(env), false) + trans.set(:request_method, env['REQUEST_METHOD'], false) trans end - def tag_controller(trans, env) - controller = env[CONTROLLER_KEY] - action = "#{controller.class.name}##{controller.action_name}" - suffix = CONTENT_TYPES[controller.content_type] - - if suffix && suffix != :html - action += ".#{suffix}" - end - - trans.action = action - end - - def tag_endpoint(trans, env) - endpoint = env[ENDPOINT_KEY] - - begin - route = endpoint.route - rescue - # endpoint.route is calling env[Grape::Env::GRAPE_ROUTING_ARGS][:route_info] - # but env[Grape::Env::GRAPE_ROUTING_ARGS] is nil in the case of a 405 response - # so we're rescuing exceptions and bailing out - end - - if route - path = endpoint_paths_cache[route.request_method][route.path] - trans.action = "Grape##{route.request_method} #{path}" - end - end - private def filtered_path(env) ActionDispatch::Request.new(env).filtered_path.presence || env['REQUEST_URI'] end - - def endpoint_paths_cache - @endpoint_paths_cache ||= Hash.new do |hash, http_method| - hash[http_method] = Hash.new do |inner_hash, raw_path| - inner_hash[raw_path] = endpoint_instrumentable_path(raw_path) - end - end - end - - def endpoint_instrumentable_path(raw_path) - raw_path.sub('(.:format)', '').sub('/:version', '') - end end end end diff --git a/lib/gitlab/metrics/samplers/base_sampler.rb b/lib/gitlab/metrics/samplers/base_sampler.rb new file mode 100644 index 00000000000..37f90c4673d --- /dev/null +++ b/lib/gitlab/metrics/samplers/base_sampler.rb @@ -0,0 +1,64 @@ +require 'logger' + +module Gitlab + module Metrics + module Samplers + class BaseSampler < Daemon + # interval - The sampling interval in seconds. + def initialize(interval) + interval_half = interval.to_f / 2 + + @interval = interval + @interval_steps = (-interval_half..interval_half).step(0.1).to_a + + super() + end + + def safe_sample + sample + rescue => e + Rails.logger.warn("#{self.class}: #{e}, stopping") + stop + end + + def sample + raise NotImplementedError + end + + # Returns the sleep interval with a random adjustment. + # + # The random adjustment is put in place to ensure we: + # + # 1. Don't generate samples at the exact same interval every time (thus + # potentially missing anything that happens in between samples). + # 2. Don't sample data at the same interval two times in a row. + def sleep_interval + while step = @interval_steps.sample + if step != @last_step + @last_step = step + + return @interval + @last_step + end + end + end + + private + + attr_reader :running + + def start_working + @running = true + sleep(sleep_interval) + while running + safe_sample + sleep(sleep_interval) + end + end + + def stop_working + @running = false + end + end + end + end +end diff --git a/lib/gitlab/metrics/samplers/influx_sampler.rb b/lib/gitlab/metrics/samplers/influx_sampler.rb new file mode 100644 index 00000000000..f4f9b5ca792 --- /dev/null +++ b/lib/gitlab/metrics/samplers/influx_sampler.rb @@ -0,0 +1,103 @@ +module Gitlab + module Metrics + module Samplers + # Class that sends certain metrics to InfluxDB at a specific interval. + # + # This class is used to gather statistics that can't be directly associated + # with a transaction such as system memory usage, garbage collection + # statistics, etc. + class InfluxSampler < BaseSampler + # interval - The sampling interval in seconds. + def initialize(interval = Metrics.settings[:sample_interval]) + super(interval) + @last_step = nil + + @metrics = [] + + @last_minor_gc = Delta.new(GC.stat[:minor_gc_count]) + @last_major_gc = Delta.new(GC.stat[:major_gc_count]) + + if Gitlab::Metrics.mri? + require 'allocations' + + Allocations.start + end + end + + def sample + sample_memory_usage + sample_file_descriptors + sample_objects + sample_gc + + flush + ensure + GC::Profiler.clear + @metrics.clear + end + + def flush + Metrics.submit_metrics(@metrics.map(&:to_hash)) + end + + def sample_memory_usage + add_metric('memory_usage', value: System.memory_usage) + end + + def sample_file_descriptors + add_metric('file_descriptors', value: System.file_descriptor_count) + end + + if Metrics.mri? + def sample_objects + sample = Allocations.to_hash + counts = sample.each_with_object({}) do |(klass, count), hash| + name = klass.name + + next unless name + + hash[name] = count + end + + # Symbols aren't allocated so we'll need to add those manually. + counts['Symbol'] = Symbol.all_symbols.length + + counts.each do |name, count| + add_metric('object_counts', { count: count }, type: name) + end + end + else + def sample_objects + end + end + + def sample_gc + time = GC::Profiler.total_time * 1000.0 + stats = GC.stat.merge(total_time: time) + + # We want the difference of GC runs compared to the last sample, not the + # total amount since the process started. + stats[:minor_gc_count] = + @last_minor_gc.compared_with(stats[:minor_gc_count]) + + stats[:major_gc_count] = + @last_major_gc.compared_with(stats[:major_gc_count]) + + stats[:count] = stats[:minor_gc_count] + stats[:major_gc_count] + + add_metric('gc_statistics', stats) + end + + def add_metric(series, values, tags = {}) + prefix = sidekiq? ? 'sidekiq_' : 'rails_' + + @metrics << Metric.new("#{prefix}#{series}", values, tags) + end + + def sidekiq? + Sidekiq.server? + end + end + end + end +end diff --git a/lib/gitlab/metrics/samplers/ruby_sampler.rb b/lib/gitlab/metrics/samplers/ruby_sampler.rb new file mode 100644 index 00000000000..436a9e9550d --- /dev/null +++ b/lib/gitlab/metrics/samplers/ruby_sampler.rb @@ -0,0 +1,111 @@ +require 'prometheus/client/support/unicorn' + +module Gitlab + module Metrics + module Samplers + class RubySampler < BaseSampler + def metrics + @metrics ||= init_metrics + end + + def with_prefix(prefix, name) + "ruby_#{prefix}_#{name}".to_sym + end + + def to_doc_string(name) + name.to_s.humanize + end + + def labels + {} + end + + def initialize(interval) + super(interval) + + if Metrics.mri? + require 'allocations' + + Allocations.start + end + end + + def init_metrics + metrics = {} + metrics[:sampler_duration] = Metrics.histogram(with_prefix(:sampler_duration, :seconds), 'Sampler time', {}) + metrics[:total_time] = Metrics.gauge(with_prefix(:gc, :time_total), 'Total GC time', labels, :livesum) + GC.stat.keys.each do |key| + metrics[key] = Metrics.gauge(with_prefix(:gc, key), to_doc_string(key), labels, :livesum) + end + + metrics[:objects_total] = Metrics.gauge(with_prefix(:objects, :total), 'Objects total', labels.merge(class: nil), :livesum) + metrics[:memory_usage] = Metrics.gauge(with_prefix(:memory, :usage_total), 'Memory used total', labels, :livesum) + metrics[:file_descriptors] = Metrics.gauge(with_prefix(:file, :descriptors_total), 'File descriptors total', labels, :livesum) + + metrics + end + + def sample + start_time = System.monotonic_time + sample_gc + sample_objects + + metrics[:memory_usage].set(labels, System.memory_usage) + metrics[:file_descriptors].set(labels, System.file_descriptor_count) + + metrics[:sampler_duration].observe(labels.merge(worker_label), (System.monotonic_time - start_time) / 1000.0) + ensure + GC::Profiler.clear + end + + private + + def sample_gc + metrics[:total_time].set(labels, GC::Profiler.total_time * 1000) + + GC.stat.each do |key, value| + metrics[key].set(labels, value) + end + end + + def sample_objects + list_objects.each do |name, count| + metrics[:objects_total].set(labels.merge(class: name), count) + end + end + + if Metrics.mri? + def list_objects + sample = Allocations.to_hash + counts = sample.each_with_object({}) do |(klass, count), hash| + name = klass.name + + next unless name + + hash[name] = count + end + + # Symbols aren't allocated so we'll need to add those manually. + counts['Symbol'] = Symbol.all_symbols.length + counts + end + else + def list_objects + end + end + + def worker_label + return {} unless defined?(Unicorn::Worker) + + worker_no = ::Prometheus::Client::Support::Unicorn.worker_id + + if worker_no + { unicorn: worker_no } + else + { unicorn: 'master' } + end + end + end + end + end +end diff --git a/lib/gitlab/metrics/samplers/unicorn_sampler.rb b/lib/gitlab/metrics/samplers/unicorn_sampler.rb new file mode 100644 index 00000000000..ea325651fbb --- /dev/null +++ b/lib/gitlab/metrics/samplers/unicorn_sampler.rb @@ -0,0 +1,50 @@ +module Gitlab + module Metrics + module Samplers + class UnicornSampler < BaseSampler + def initialize(interval) + super(interval) + end + + def unicorn_active_connections + @unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max) + end + + def unicorn_queued_connections + @unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max) + end + + def enabled? + # Raindrops::Linux.tcp_listener_stats is only present on Linux + unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats) + end + + def sample + Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats| + unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active) + unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued) + end + + Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats| + unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active) + unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued) + end + end + + private + + def tcp_listeners + @tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z}) + end + + def unix_listeners + @unix_listeners ||= Unicorn.listener_names - tcp_listeners + end + + def unicorn_with_listeners? + defined?(Unicorn) && Unicorn.listener_names.any? + end + end + end + end +end diff --git a/lib/gitlab/metrics/sidekiq_middleware.rb b/lib/gitlab/metrics/sidekiq_middleware.rb index f9dd8e41912..df4bdf16847 100644 --- a/lib/gitlab/metrics/sidekiq_middleware.rb +++ b/lib/gitlab/metrics/sidekiq_middleware.rb @@ -5,7 +5,7 @@ module Gitlab # This middleware is intended to be used as a server-side middleware. class SidekiqMiddleware def call(worker, message, queue) - trans = Transaction.new("#{worker.class.name}#perform") + trans = BackgroundTransaction.new(worker.class) begin # Old gitlad-shell messages don't provide enqueued_at/created_at attributes diff --git a/lib/gitlab/metrics/subscribers/action_view.rb b/lib/gitlab/metrics/subscribers/action_view.rb index d435a33e9c7..3da474fc1ec 100644 --- a/lib/gitlab/metrics/subscribers/action_view.rb +++ b/lib/gitlab/metrics/subscribers/action_view.rb @@ -15,10 +15,24 @@ module Gitlab private + def metric_view_rendering_duration_seconds + @metric_view_rendering_duration_seconds ||= Gitlab::Metrics.histogram( + :gitlab_view_rendering_duration_seconds, + 'View rendering time', + Transaction::BASE_LABELS.merge({ path: nil }), + [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] + ) + end + def track(event) values = values_for(event) tags = tags_for(event) + metric_view_rendering_duration_seconds.observe( + current_transaction.labels.merge(tags), + event.duration + ) + current_transaction.increment(:view_duration, event.duration) current_transaction.add_metric(SERIES, values, tags) end diff --git a/lib/gitlab/metrics/subscribers/active_record.rb b/lib/gitlab/metrics/subscribers/active_record.rb index 96cad941d5c..ead1acb8d44 100644 --- a/lib/gitlab/metrics/subscribers/active_record.rb +++ b/lib/gitlab/metrics/subscribers/active_record.rb @@ -8,8 +8,10 @@ module Gitlab def sql(event) return unless current_transaction - current_transaction.increment(:sql_duration, event.duration) - current_transaction.increment(:sql_count, 1) + metric_sql_duration_seconds.observe(current_transaction.labels, event.duration / 1000.0) + + current_transaction.increment(:sql_duration, event.duration, false) + current_transaction.increment(:sql_count, 1, false) end private @@ -17,6 +19,15 @@ module Gitlab def current_transaction Transaction.current end + + def metric_sql_duration_seconds + @metric_sql_duration_seconds ||= Gitlab::Metrics.histogram( + :gitlab_sql_duration_seconds, + 'SQL time', + Transaction::BASE_LABELS, + [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] + ) + end end end end diff --git a/lib/gitlab/metrics/subscribers/rails_cache.rb b/lib/gitlab/metrics/subscribers/rails_cache.rb index aaed2184f44..efd3c9daf79 100644 --- a/lib/gitlab/metrics/subscribers/rails_cache.rb +++ b/lib/gitlab/metrics/subscribers/rails_cache.rb @@ -7,28 +7,29 @@ module Gitlab attach_to :active_support def cache_read(event) - increment(:cache_read, event.duration) + observe(:read, event.duration) return unless current_transaction return if event.payload[:super_operation] == :fetch if event.payload[:hit] - current_transaction.increment(:cache_read_hit_count, 1) + current_transaction.increment(:cache_read_hit_count, 1, false) else - current_transaction.increment(:cache_read_miss_count, 1) + metric_cache_misses_total.increment(current_transaction.labels) + current_transaction.increment(:cache_read_miss_count, 1, false) end end def cache_write(event) - increment(:cache_write, event.duration) + observe(:write, event.duration) end def cache_delete(event) - increment(:cache_delete, event.duration) + observe(:delete, event.duration) end def cache_exist?(event) - increment(:cache_exists, event.duration) + observe(:exists, event.duration) end def cache_fetch_hit(event) @@ -40,16 +41,18 @@ module Gitlab def cache_generate(event) return unless current_transaction + metric_cache_misses_total.increment(current_transaction.labels) current_transaction.increment(:cache_read_miss_count, 1) end - def increment(key, duration) + def observe(key, duration) return unless current_transaction - current_transaction.increment(:cache_duration, duration) - current_transaction.increment(:cache_count, 1) - current_transaction.increment("#{key}_duration".to_sym, duration) - current_transaction.increment("#{key}_count".to_sym, 1) + metric_cache_operation_duration_seconds.observe(current_transaction.labels.merge({ operation: key }), duration / 1000.0) + current_transaction.increment(:cache_duration, duration, false) + current_transaction.increment(:cache_count, 1, false) + current_transaction.increment("cache_#{key}_duration".to_sym, duration, false) + current_transaction.increment("cache_#{key}_count".to_sym, 1, false) end private @@ -57,6 +60,23 @@ module Gitlab def current_transaction Transaction.current end + + def metric_cache_operation_duration_seconds + @metric_cache_operation_duration_seconds ||= Gitlab::Metrics.histogram( + :gitlab_cache_operation_duration_seconds, + 'Cache access time', + Transaction::BASE_LABELS.merge({ action: nil }), + [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] + ) + end + + def metric_cache_misses_total + @metric_cache_misses_total ||= Gitlab::Metrics.counter( + :gitlab_cache_misses_total, + 'Cache read miss', + Transaction::BASE_LABELS + ) + end end end end diff --git a/lib/gitlab/metrics/system.rb b/lib/gitlab/metrics/system.rb index aba3e0df382..c2cbd3c16a1 100644 --- a/lib/gitlab/metrics/system.rb +++ b/lib/gitlab/metrics/system.rb @@ -46,14 +46,14 @@ module Gitlab # Returns the current real time in a given precision. # - # Returns the time as a Float. + # Returns the time as a Fixnum. def self.real_time(precision = :millisecond) Process.clock_gettime(Process::CLOCK_REALTIME, precision) end # Returns the current monotonic clock time in a given precision. # - # Returns the time as a Float. + # Returns the time as a Fixnum. def self.monotonic_time(precision = :millisecond) Process.clock_gettime(Process::CLOCK_MONOTONIC, precision) end diff --git a/lib/gitlab/metrics/transaction.rb b/lib/gitlab/metrics/transaction.rb index 4f9fb1c7853..ee3afc5ffdb 100644 --- a/lib/gitlab/metrics/transaction.rb +++ b/lib/gitlab/metrics/transaction.rb @@ -2,34 +2,33 @@ module Gitlab module Metrics # Class for storing metrics information of a single transaction. class Transaction + # base labels shared among all transactions + BASE_LABELS = { controller: nil, action: nil }.freeze + THREAD_KEY = :_gitlab_metrics_transaction + METRICS_MUTEX = Mutex.new # The series to store events (e.g. Git pushes) in. EVENT_SERIES = 'events'.freeze attr_reader :tags, :values, :method, :metrics - attr_accessor :action - def self.current Thread.current[THREAD_KEY] end - # action - A String describing the action performed, usually the class - # plus method name. - def initialize(action = nil) + def initialize @metrics = [] @methods = {} - @started_at = nil + @started_at = nil @finished_at = nil @values = Hash.new(0) - @tags = {} - @action = action + @tags = {} @memory_before = 0 - @memory_after = 0 + @memory_after = 0 end def duration @@ -44,12 +43,15 @@ module Gitlab Thread.current[THREAD_KEY] = self @memory_before = System.memory_usage - @started_at = System.monotonic_time + @started_at = System.monotonic_time yield ensure @memory_after = System.memory_usage - @finished_at = System.monotonic_time + @finished_at = System.monotonic_time + + self.class.metric_transaction_duration_seconds.observe(labels, duration * 1000) + self.class.metric_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0) Thread.current[THREAD_KEY] = nil end @@ -66,33 +68,29 @@ module Gitlab # event_name - The name of the event (e.g. "git_push"). # tags - A set of tags to attach to the event. def add_event(event_name, tags = {}) - @metrics << Metric.new(EVENT_SERIES, - { count: 1 }, - { event: event_name }.merge(tags), - :event) + self.class.metric_event_counter(event_name, tags).increment(tags.merge(labels)) + @metrics << Metric.new(EVENT_SERIES, { count: 1 }, tags.merge(event: event_name), :event) end # Returns a MethodCall object for the given name. - def method_call_for(name) + def method_call_for(name, module_name, method_name) unless method = @methods[name] - @methods[name] = method = MethodCall.new(name, Instrumentation.series) + @methods[name] = method = MethodCall.new(name, module_name, method_name, self) end method end - def increment(name, value) + def increment(name, value, use_prometheus = true) + self.class.metric_transaction_counter(name).increment(labels, value) if use_prometheus @values[name] += value end - def set(name, value) + def set(name, value, use_prometheus = true) + self.class.metric_transaction_gauge(name).set(labels, value) if use_prometheus @values[name] = value end - def add_tag(key, value) - @tags[key] = value - end - def finish track_self submit @@ -117,14 +115,83 @@ module Gitlab submit_hashes = submit.map do |metric| hash = metric.to_hash - - hash[:tags][:action] ||= @action if @action && !metric.event? + hash[:tags][:action] ||= action if action && !metric.event? hash end Metrics.submit_metrics(submit_hashes) end + + def labels + BASE_LABELS + end + + # returns string describing the action performed, usually the class plus method name. + def action + "#{labels[:controller]}##{labels[:action]}" if labels && !labels.empty? + end + + def self.metric_transaction_duration_seconds + return @metric_transaction_duration_seconds if @metric_transaction_duration_seconds + + METRICS_MUTEX.synchronize do + @metric_transaction_duration_seconds ||= Gitlab::Metrics.histogram( + :gitlab_transaction_duration_seconds, + 'Transaction duration', + BASE_LABELS, + [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0] + ) + end + end + + def self.metric_transaction_allocated_memory_bytes + return @metric_transaction_allocated_memory_bytes if @metric_transaction_allocated_memory_bytes + + METRICS_MUTEX.synchronize do + @metric_transaction_allocated_memory_bytes ||= Gitlab::Metrics.histogram( + :gitlab_transaction_allocated_memory_bytes, + 'Transaction allocated memory bytes', + BASE_LABELS, + [1000, 10000, 20000, 500000, 1000000, 2000000, 5000000, 10000000, 20000000, 100000000] + ) + end + end + + def self.metric_event_counter(event_name, tags) + return @metric_event_counters[event_name] if @metric_event_counters&.has_key?(event_name) + + METRICS_MUTEX.synchronize do + @metric_event_counters ||= {} + @metric_event_counters[event_name] ||= Gitlab::Metrics.counter( + "gitlab_transaction_event_#{event_name}_total".to_sym, + "Transaction event #{event_name} counter", + tags.merge(BASE_LABELS) + ) + end + end + + def self.metric_transaction_counter(name) + return @metric_transaction_counters[name] if @metric_transaction_counters&.has_key?(name) + + METRICS_MUTEX.synchronize do + @metric_transaction_counters ||= {} + @metric_transaction_counters[name] ||= Gitlab::Metrics.counter( + "gitlab_transaction_#{name}_total".to_sym, "Transaction #{name} counter", BASE_LABELS + ) + end + end + + def self.metric_transaction_gauge(name) + return @metric_transaction_gauges[name] if @metric_transaction_gauges&.has_key?(name) + + METRICS_MUTEX.synchronize do + @metric_transaction_gauges ||= {} + @metric_transaction_gauges[name] ||= Gitlab::Metrics.gauge( + "gitlab_transaction_#{name}".to_sym, "Transaction gauge #{name}", BASE_LABELS, :livesum + ) + end + end end end end diff --git a/lib/gitlab/metrics/unicorn_sampler.rb b/lib/gitlab/metrics/unicorn_sampler.rb deleted file mode 100644 index f6987252039..00000000000 --- a/lib/gitlab/metrics/unicorn_sampler.rb +++ /dev/null @@ -1,48 +0,0 @@ -module Gitlab - module Metrics - class UnicornSampler < BaseSampler - def initialize(interval) - super(interval) - end - - def unicorn_active_connections - @unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max) - end - - def unicorn_queued_connections - @unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max) - end - - def enabled? - # Raindrops::Linux.tcp_listener_stats is only present on Linux - unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats) - end - - def sample - Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats| - unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active) - unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued) - end - - Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats| - unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active) - unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued) - end - end - - private - - def tcp_listeners - @tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z}) - end - - def unix_listeners - @unix_listeners ||= Unicorn.listener_names - tcp_listeners - end - - def unicorn_with_listeners? - defined?(Unicorn) && Unicorn.listener_names.any? - end - end - end -end diff --git a/lib/gitlab/metrics/web_transaction.rb b/lib/gitlab/metrics/web_transaction.rb new file mode 100644 index 00000000000..89ff02a96d6 --- /dev/null +++ b/lib/gitlab/metrics/web_transaction.rb @@ -0,0 +1,82 @@ +module Gitlab + module Metrics + class WebTransaction < Transaction + CONTROLLER_KEY = 'action_controller.instance'.freeze + ENDPOINT_KEY = 'api.endpoint'.freeze + + CONTENT_TYPES = { + 'text/html' => :html, + 'text/plain' => :txt, + 'application/json' => :json, + 'text/js' => :js, + 'application/atom+xml' => :atom, + 'image/png' => :png, + 'image/jpeg' => :jpeg, + 'image/gif' => :gif, + 'image/svg+xml' => :svg + }.freeze + + def initialize(env) + super() + @env = env + end + + def labels + return @labels if @labels + + # memoize transaction labels only source env variables were present + @labels = if @env[CONTROLLER_KEY] + labels_from_controller || {} + elsif @env[ENDPOINT_KEY] + labels_from_endpoint || {} + end + + @labels || {} + end + + private + + def labels_from_controller + controller = @env[CONTROLLER_KEY] + + action = "#{controller.action_name}" + suffix = CONTENT_TYPES[controller.content_type] + + if suffix && suffix != :html + action += ".#{suffix}" + end + + { controller: controller.class.name, action: action } + end + + def labels_from_endpoint + endpoint = @env[ENDPOINT_KEY] + + begin + route = endpoint.route + rescue + # endpoint.route is calling env[Grape::Env::GRAPE_ROUTING_ARGS][:route_info] + # but env[Grape::Env::GRAPE_ROUTING_ARGS] is nil in the case of a 405 response + # so we're rescuing exceptions and bailing out + end + + if route + path = endpoint_paths_cache[route.request_method][route.path] + { controller: 'Grape', action: "#{route.request_method} #{path}" } + end + end + + def endpoint_paths_cache + @endpoint_paths_cache ||= Hash.new do |hash, http_method| + hash[http_method] = Hash.new do |inner_hash, raw_path| + inner_hash[raw_path] = endpoint_instrumentable_path(raw_path) + end + end + end + + def endpoint_instrumentable_path(raw_path) + raw_path.sub('(.:format)', '').sub('/:version', '') + end + end + end +end diff --git a/lib/gitlab/middleware/go.rb b/lib/gitlab/middleware/go.rb index f42168c720e..c6a56277922 100644 --- a/lib/gitlab/middleware/go.rb +++ b/lib/gitlab/middleware/go.rb @@ -4,6 +4,7 @@ module Gitlab module Middleware class Go include ActionView::Helpers::TagHelper + include Gitlab::CurrentSettings PROJECT_PATH_REGEX = %r{\A(#{Gitlab::PathRegex.full_namespace_route_regex}/#{Gitlab::PathRegex.project_route_regex})/}.freeze @@ -37,10 +38,19 @@ module Gitlab end def go_body(path) - project_url = URI.join(Gitlab.config.gitlab.url, path) + config = Gitlab.config + project_url = URI.join(config.gitlab.url, path) import_prefix = strip_url(project_url.to_s) - meta_tag = tag :meta, name: 'go-import', content: "#{import_prefix} git #{project_url}.git" + repository_url = if current_application_settings.enabled_git_access_protocol == 'ssh' + shell = config.gitlab_shell + port = ":#{shell.ssh_port}" unless shell.ssh_port == 22 + "ssh://#{shell.ssh_user}@#{shell.ssh_host}#{port}/#{path}.git" + else + "#{project_url}.git" + end + + meta_tag = tag :meta, name: 'go-import', content: "#{import_prefix} git #{repository_url}" head_tag = content_tag :head, meta_tag content_tag :html, head_tag end @@ -55,6 +65,7 @@ module Gitlab project_path_match = "#{path_info}/".match(PROJECT_PATH_REGEX) return unless project_path_match + path = project_path_match[1] # Go subpackages may be in the form of `namespace/project/path1/path2/../pathN`. diff --git a/lib/gitlab/middleware/rails_queue_duration.rb b/lib/gitlab/middleware/rails_queue_duration.rb index 63c3372da51..bc70b2459ef 100644 --- a/lib/gitlab/middleware/rails_queue_duration.rb +++ b/lib/gitlab/middleware/rails_queue_duration.rb @@ -14,11 +14,22 @@ module Gitlab proxy_start = env['HTTP_GITLAB_WORKHORSE_PROXY_START'].presence if trans && proxy_start # Time in milliseconds since gitlab-workhorse started the request - trans.set(:rails_queue_duration, Time.now.to_f * 1_000 - proxy_start.to_f / 1_000_000) + duration = Time.now.to_f * 1_000 - proxy_start.to_f / 1_000_000 + trans.set(:rails_queue_duration, duration) + metric_rails_queue_duration_seconds.observe(trans.labels, duration / 1_000) end @app.call(env) end + + private + + def metric_rails_queue_duration_seconds + @metric_rails_queue_duration_seconds ||= Gitlab::Metrics.histogram( + :gitlab_rails_queue_duration_seconds, + Gitlab::Metrics::Transaction::BASE_LABELS + ) + end end end end diff --git a/lib/gitlab/middleware/read_only.rb b/lib/gitlab/middleware/read_only.rb index 0de0cddcce4..c26656704d7 100644 --- a/lib/gitlab/middleware/read_only.rb +++ b/lib/gitlab/middleware/read_only.rb @@ -12,6 +12,7 @@ module Gitlab def call(env) @env = env + @route_hash = nil if disallowed_request? && Gitlab::Database.read_only? Rails.logger.debug('GitLab ReadOnly: preventing possible non read-only operation') @@ -57,7 +58,7 @@ module Gitlab end def last_visited_url - @env['HTTP_REFERER'] || rack_session['user_return_to'] || Rails.application.routes.url_helpers.root_url + @env['HTTP_REFERER'] || rack_session['user_return_to'] || Gitlab::Routing.url_helpers.root_url end def route_hash @@ -65,11 +66,7 @@ module Gitlab end def whitelisted_routes - logout_route || grack_route || @whitelisted.any? { |path| request.path.include?(path) } || lfs_route || sidekiq_route - end - - def logout_route - route_hash[:controller] == 'sessions' && route_hash[:action] == 'destroy' + grack_route || @whitelisted.any? { |path| request.path.include?(path) } || lfs_route || sidekiq_route end def sidekiq_route @@ -77,11 +74,17 @@ module Gitlab end def grack_route - request.path.end_with?('.git/git-upload-pack') + # Calling route_hash may be expensive. Only do it if we think there's a possible match + return false unless request.path.end_with?('.git/git-upload-pack') + + route_hash[:controller] == 'projects/git_http' && route_hash[:action] == 'git_upload_pack' end def lfs_route - request.path.end_with?('/info/lfs/objects/batch') + # Calling route_hash may be expensive. Only do it if we think there's a possible match + return false unless request.path.end_with?('/info/lfs/objects/batch') + + route_hash[:controller] == 'projects/lfs_api' && route_hash[:action] == 'batch' end end end diff --git a/lib/gitlab/multi_collection_paginator.rb b/lib/gitlab/multi_collection_paginator.rb index eb3c9002710..c22d0a84860 100644 --- a/lib/gitlab/multi_collection_paginator.rb +++ b/lib/gitlab/multi_collection_paginator.rb @@ -55,7 +55,9 @@ module Gitlab def first_collection_last_page_size return @first_collection_last_page_size if defined?(@first_collection_last_page_size) - @first_collection_last_page_size = paginated_first_collection(first_collection_page_count).count + @first_collection_last_page_size = paginated_first_collection(first_collection_page_count) + .except(:select) + .size end end end diff --git a/lib/gitlab/o_auth/user.rb b/lib/gitlab/o_auth/user.rb index 47c2a422387..552133234a3 100644 --- a/lib/gitlab/o_auth/user.rb +++ b/lib/gitlab/o_auth/user.rb @@ -157,7 +157,7 @@ module Gitlab end def find_by_uid_and_provider - identity = Identity.find_by(provider: auth_hash.provider, extern_uid: auth_hash.uid) + identity = Identity.with_extern_uid(auth_hash.provider, auth_hash.uid).take identity && identity.user end @@ -179,7 +179,7 @@ module Gitlab valid_username = ::Namespace.clean_path(username) uniquify = Uniquify.new - valid_username = uniquify.string(valid_username) { |s| !DynamicPathValidator.valid_user_path?(s) } + valid_username = uniquify.string(valid_username) { |s| !UserPathValidator.valid_path?(s) } name = auth_hash.name name = valid_username if name.strip.empty? diff --git a/lib/gitlab/optimistic_locking.rb b/lib/gitlab/optimistic_locking.rb index 962ff4d3985..1d9a5d1a20a 100644 --- a/lib/gitlab/optimistic_locking.rb +++ b/lib/gitlab/optimistic_locking.rb @@ -11,6 +11,7 @@ module Gitlab rescue ActiveRecord::StaleObjectError retries -= 1 raise unless retries >= 0 + subject.reload end end diff --git a/lib/gitlab/path_regex.rb b/lib/gitlab/path_regex.rb index 22f8dd669d0..9a91f8bf96a 100644 --- a/lib/gitlab/path_regex.rb +++ b/lib/gitlab/path_regex.rb @@ -112,22 +112,6 @@ module Gitlab # this would map to the activity-page of its parent. GROUP_ROUTES = %w[ - - activity - analytics - audit_events - avatar - edit - group_members - hooks - issues - labels - ldap - ldap_group_links - merge_requests - milestones - notification_setting - pipeline_quota - projects ].freeze ILLEGAL_PROJECT_PATH_WORDS = PROJECT_WILDCARD_ROUTES diff --git a/lib/gitlab/performance_bar/peek_query_tracker.rb b/lib/gitlab/performance_bar/peek_query_tracker.rb index 69e117f1da9..f2825db59ae 100644 --- a/lib/gitlab/performance_bar/peek_query_tracker.rb +++ b/lib/gitlab/performance_bar/peek_query_tracker.rb @@ -36,7 +36,7 @@ module Gitlab end def track_query(raw_query, bindings, start, finish) - duration = finish - start + duration = (finish - start) * 1000.0 query_info = { duration: duration.round(3), sql: raw_query } PEEK_DB_CLIENT.query_details << query_info diff --git a/lib/gitlab/regex.rb b/lib/gitlab/regex.rb index bd677ec4bf3..2c7b8af83f2 100644 --- a/lib/gitlab/regex.rb +++ b/lib/gitlab/regex.rb @@ -25,7 +25,7 @@ module Gitlab # See https://github.com/docker/distribution/blob/master/reference/regexp.go. # def container_repository_name_regex - @container_repository_regex ||= %r{\A[a-z0-9]+(?:[-._/][a-z0-9]+)*\Z} + @container_repository_regex ||= %r{\A[a-z0-9]+((?:[._/]|__|[-])[a-z0-9]+)*\Z} end ## diff --git a/lib/gitlab/routing.rb b/lib/gitlab/routing.rb index e57890f1143..2c994536060 100644 --- a/lib/gitlab/routing.rb +++ b/lib/gitlab/routing.rb @@ -40,5 +40,24 @@ module Gitlab def self.url_helpers @url_helpers ||= Gitlab::Application.routes.url_helpers end + + def self.redirect_legacy_paths(router, *paths) + build_redirect_path = lambda do |request, _params, path| + # Only replace the last occurence of `path`. + # + # `request.fullpath` includes the querystring + new_path = request.path.sub(%r{/#{path}(/*)(?!.*#{path})}, "/-/#{path}\\1") + new_path << "?#{request.query_string}" if request.query_string.present? + + new_path + end + + paths.each do |path| + router.match "/#{path}(/*rest)", + via: [:get, :post, :patch, :delete], + to: router.redirect { |params, request| build_redirect_path.call(request, params, path) }, + as: "legacy_#{path}_redirect" + end + end end end diff --git a/lib/gitlab/saml/user.rb b/lib/gitlab/saml/user.rb index e0a9d1dee77..d8faf7aad8c 100644 --- a/lib/gitlab/saml/user.rb +++ b/lib/gitlab/saml/user.rb @@ -28,6 +28,7 @@ module Gitlab def changed? return true unless gl_user + gl_user.changed? || gl_user.identities.any?(&:changed?) end diff --git a/lib/gitlab/shell.rb b/lib/gitlab/shell.rb index a37112ae5c4..996d213fdb4 100644 --- a/lib/gitlab/shell.rb +++ b/lib/gitlab/shell.rb @@ -101,8 +101,7 @@ module Gitlab # # Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/387 def import_repository(storage, name, url) - # Timeout should be less than 900 ideally, to prevent the memory killer - # to silently kill the process without knowing we are timing out here. + # The timeout ensures the subprocess won't hang forever cmd = [gitlab_shell_projects_path, 'import-project', storage, "#{name}.git", url, "#{Gitlab.config.gitlab_shell.git_timeout}"] gitlab_shell_fast_execute_raise_error(cmd) @@ -368,6 +367,7 @@ module Gitlab output, status = gitlab_shell_fast_execute_helper(cmd, vars) raise Error, output unless status.zero? + true end diff --git a/lib/gitlab/shell_adapter.rb b/lib/gitlab/shell_adapter.rb index fbe2a7a0d72..053dd4ab9e0 100644 --- a/lib/gitlab/shell_adapter.rb +++ b/lib/gitlab/shell_adapter.rb @@ -5,7 +5,7 @@ module Gitlab module ShellAdapter def gitlab_shell - Gitlab::Shell.new + @gitlab_shell ||= Gitlab::Shell.new end end end diff --git a/lib/gitlab/sherlock/transaction.rb b/lib/gitlab/sherlock/transaction.rb index 3489fb251b6..400a552bf99 100644 --- a/lib/gitlab/sherlock/transaction.rb +++ b/lib/gitlab/sherlock/transaction.rb @@ -89,7 +89,9 @@ module Gitlab ActiveSupport::Notifications.subscribe('sql.active_record') do |_, start, finish, _, data| next unless same_thread? - track_query(data[:sql].strip, data[:binds], start, finish) + unless data.fetch(:cached, data[:name] == 'CACHE') + track_query(data[:sql].strip, data[:binds], start, finish) + end end end diff --git a/lib/gitlab/sidekiq_middleware/memory_killer.rb b/lib/gitlab/sidekiq_middleware/memory_killer.rb index d7d24eeb37b..2bfb7caefd9 100644 --- a/lib/gitlab/sidekiq_middleware/memory_killer.rb +++ b/lib/gitlab/sidekiq_middleware/memory_killer.rb @@ -7,7 +7,6 @@ module Gitlab GRACE_TIME = (ENV['SIDEKIQ_MEMORY_KILLER_GRACE_TIME'] || 15 * 60).to_s.to_i # Wait 30 seconds for running jobs to finish during graceful shutdown SHUTDOWN_WAIT = (ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT'] || 30).to_s.to_i - SHUTDOWN_SIGNAL = (ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_SIGNAL'] || 'SIGKILL').to_s # Create a mutex used to ensure there will be only one thread waiting to # shut Sidekiq down @@ -15,6 +14,7 @@ module Gitlab def call(worker, job, queue) yield + current_rss = get_rss return unless MAX_RSS > 0 && current_rss > MAX_RSS @@ -23,32 +23,45 @@ module Gitlab # Return if another thread is already waiting to shut Sidekiq down return unless MUTEX.try_lock - Sidekiq.logger.warn "current RSS #{current_rss} exceeds maximum RSS "\ - "#{MAX_RSS}" - Sidekiq.logger.warn "this thread will shut down PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']} "\ - "in #{GRACE_TIME} seconds" - sleep(GRACE_TIME) + Sidekiq.logger.warn "Sidekiq worker PID-#{pid} current RSS #{current_rss}"\ + " exceeds maximum RSS #{MAX_RSS} after finishing job #{worker.class} JID-#{job['jid']}" + Sidekiq.logger.warn "Sidekiq worker PID-#{pid} will stop fetching new jobs in #{GRACE_TIME} seconds, and will be shut down #{SHUTDOWN_WAIT} seconds later" - Sidekiq.logger.warn "sending SIGTERM to PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']}" - Process.kill('SIGTERM', Process.pid) + # Wait `GRACE_TIME` to give the memory intensive job time to finish. + # Then, tell Sidekiq to stop fetching new jobs. + wait_and_signal(GRACE_TIME, 'SIGSTP', 'stop fetching new jobs') - Sidekiq.logger.warn "waiting #{SHUTDOWN_WAIT} seconds before sending "\ - "#{SHUTDOWN_SIGNAL} to PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']}" - sleep(SHUTDOWN_WAIT) + # Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish. + # Then, tell Sidekiq to gracefully shut down by giving jobs a few more + # moments to finish, killing and requeuing them if they didn't, and + # then terminating itself. + wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down') - Sidekiq.logger.warn "sending #{SHUTDOWN_SIGNAL} to PID #{Process.pid} - Worker #{worker.class} - JID-#{job['jid']}" - Process.kill(SHUTDOWN_SIGNAL, Process.pid) + # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't. + wait_and_signal(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die') end end private def get_rss - output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{Process.pid})) + output, status = Gitlab::Popen.popen(%W(ps -o rss= -p #{pid})) return 0 unless status.zero? output.to_i end + + def wait_and_signal(time, signal, explanation) + Sidekiq.logger.warn "waiting #{time} seconds before sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})" + sleep(time) + + Sidekiq.logger.warn "sending Sidekiq worker PID-#{pid} #{signal} (#{explanation})" + Process.kill(signal, pid) + end + + def pid + Process.pid + end end end end diff --git a/lib/gitlab/string_range_marker.rb b/lib/gitlab/string_range_marker.rb index 11aeec1ebfa..f9faa134206 100644 --- a/lib/gitlab/string_range_marker.rb +++ b/lib/gitlab/string_range_marker.rb @@ -90,6 +90,7 @@ module Gitlab # Takes an array of integers, and returns an array of ranges covering the same integers def collapse_ranges(positions) return [] if positions.empty? + ranges = [] start = prev = positions[0] diff --git a/lib/gitlab/template/finders/repo_template_finder.rb b/lib/gitlab/template/finders/repo_template_finder.rb index cb7957e2af9..33f07fa0120 100644 --- a/lib/gitlab/template/finders/repo_template_finder.rb +++ b/lib/gitlab/template/finders/repo_template_finder.rb @@ -18,6 +18,7 @@ module Gitlab def read(path) blob = @repository.blob_at(@commit.id, path) if @commit raise FileNotFoundError if blob.nil? + blob.data end diff --git a/lib/gitlab/testing/request_blocker_middleware.rb b/lib/gitlab/testing/request_blocker_middleware.rb index aa67fa08577..4a8e3c2eee0 100644 --- a/lib/gitlab/testing/request_blocker_middleware.rb +++ b/lib/gitlab/testing/request_blocker_middleware.rb @@ -7,6 +7,7 @@ module Gitlab class RequestBlockerMiddleware @@num_active_requests = Concurrent::AtomicFixnum.new(0) @@block_requests = Concurrent::AtomicBoolean.new(false) + @@slow_requests = Concurrent::AtomicBoolean.new(false) # Returns the number of requests the server is currently processing. def self.num_active_requests @@ -19,9 +20,15 @@ module Gitlab @@block_requests.value = true end + # Slows down incoming requests (useful for race conditions). + def self.slow_requests! + @@slow_requests.value = true + end + # Allows the server to accept requests again. def self.allow_requests! @@block_requests.value = false + @@slow_requests.value = false end def initialize(app) @@ -33,6 +40,7 @@ module Gitlab if block_requests? block_request(env) else + sleep 0.2 if slow_requests? @app.call(env) end ensure @@ -45,6 +53,10 @@ module Gitlab @@block_requests.true? end + def slow_requests? + @@slow_requests.true? + end + def block_request(env) [503, {}, []] end diff --git a/lib/gitlab/testing/request_inspector_middleware.rb b/lib/gitlab/testing/request_inspector_middleware.rb new file mode 100644 index 00000000000..e387667480d --- /dev/null +++ b/lib/gitlab/testing/request_inspector_middleware.rb @@ -0,0 +1,71 @@ +# rubocop:disable Style/ClassVars + +module Gitlab + module Testing + class RequestInspectorMiddleware + @@log_requests = Concurrent::AtomicBoolean.new(false) + @@logged_requests = Concurrent::Array.new + @@inject_headers = Concurrent::Hash.new + + # Resets the current request log and starts logging requests + def self.log_requests!(headers = {}) + @@inject_headers.replace(headers) + @@logged_requests.replace([]) + @@log_requests.value = true + end + + # Stops logging requests + def self.stop_logging! + @@log_requests.value = false + end + + def self.requests + @@logged_requests + end + + def initialize(app) + @app = app + end + + def call(env) + return @app.call(env) unless @@log_requests.true? + + url = env['REQUEST_URI'] + env.merge! http_headers_env(@@inject_headers) if @@inject_headers.any? + request_headers = env_http_headers(env) + status, headers, body = @app.call(env) + + request = OpenStruct.new( + url: url, + status_code: status, + request_headers: request_headers, + response_headers: headers + ) + log_request request + + [status, headers, body] + end + + private + + def env_http_headers(env) + Hash[*env.select { |k, v| k.start_with? 'HTTP_' } + .collect { |k, v| [k.sub(/^HTTP_/, ''), v] } + .collect { |k, v| [k.split('_').collect(&:capitalize).join('-'), v] } + .sort + .flatten] + end + + def http_headers_env(headers) + Hash[*headers + .collect { |k, v| [k.split('-').collect(&:upcase).join('_'), v] } + .collect { |k, v| [k.prepend('HTTP_'), v] } + .flatten] + end + + def log_request(response) + @@logged_requests.push(response) + end + end + end +end diff --git a/lib/gitlab/url_blocker.rb b/lib/gitlab/url_blocker.rb index fee1a127fd7..13150ddab67 100644 --- a/lib/gitlab/url_blocker.rb +++ b/lib/gitlab/url_blocker.rb @@ -22,10 +22,12 @@ module Gitlab return true if blocked_user_or_hostname?(uri.user) return true if blocked_user_or_hostname?(uri.hostname) - server_ips = Resolv.getaddresses(uri.hostname) + server_ips = Addrinfo.getaddrinfo(uri.hostname, 80, nil, :STREAM).map(&:ip_address) return true if (blocked_ips & server_ips).any? rescue Addressable::URI::InvalidURIError return true + rescue SocketError + return false end false diff --git a/lib/gitlab/url_sanitizer.rb b/lib/gitlab/url_sanitizer.rb index 1caa791c1be..59331c827af 100644 --- a/lib/gitlab/url_sanitizer.rb +++ b/lib/gitlab/url_sanitizer.rb @@ -70,6 +70,7 @@ module Gitlab def generate_full_url return @url unless valid_credentials? + @full_url = @url.dup @full_url.password = credentials[:password] if credentials[:password].present? diff --git a/lib/gitlab/usage_data.rb b/lib/gitlab/usage_data.rb index 70a403652e7..112d4939582 100644 --- a/lib/gitlab/usage_data.rb +++ b/lib/gitlab/usage_data.rb @@ -48,9 +48,9 @@ module Gitlab deploy_keys: DeployKey.count, deployments: Deployment.count, environments: ::Environment.count, - gcp_clusters: ::Gcp::Cluster.count, - gcp_clusters_enabled: ::Gcp::Cluster.enabled.count, - gcp_clusters_disabled: ::Gcp::Cluster.disabled.count, + clusters: ::Clusters::Cluster.count, + clusters_enabled: ::Clusters::Cluster.enabled.count, + clusters_disabled: ::Clusters::Cluster.disabled.count, in_review_folder: ::Environment.in_review_folder.count, groups: Group.count, issues: Issue.count, diff --git a/lib/gitlab/utils/strong_memoize.rb b/lib/gitlab/utils/strong_memoize.rb new file mode 100644 index 00000000000..a2ac9285b56 --- /dev/null +++ b/lib/gitlab/utils/strong_memoize.rb @@ -0,0 +1,31 @@ +module Gitlab + module Utils + module StrongMemoize + # Instead of writing patterns like this: + # + # def trigger_from_token + # return @trigger if defined?(@trigger) + # + # @trigger = Ci::Trigger.find_by_token(params[:token].to_s) + # end + # + # We could write it like: + # + # def trigger_from_token + # strong_memoize(:trigger) do + # Ci::Trigger.find_by_token(params[:token].to_s) + # end + # end + # + def strong_memoize(name) + ivar_name = "@#{name}" + + if instance_variable_defined?(ivar_name) + instance_variable_get(ivar_name) + else + instance_variable_set(ivar_name, yield) + end + end + end + end +end diff --git a/lib/gitlab/visibility_level.rb b/lib/gitlab/visibility_level.rb index c60bd91ea6e..11472ce6cce 100644 --- a/lib/gitlab/visibility_level.rb +++ b/lib/gitlab/visibility_level.rb @@ -99,6 +99,7 @@ module Gitlab def level_value(level) return level.to_i if level.to_i.to_s == level.to_s && string_options.key(level.to_i) + string_options[level] || PRIVATE end diff --git a/lib/gitlab/workhorse.rb b/lib/gitlab/workhorse.rb index 58d5b0da1c4..864a9e04888 100644 --- a/lib/gitlab/workhorse.rb +++ b/lib/gitlab/workhorse.rb @@ -16,14 +16,15 @@ module Gitlab SECRET_LENGTH = 32 class << self - def git_http_ok(repository, is_wiki, user, action) + def git_http_ok(repository, is_wiki, user, action, show_all_refs: false) project = repository.project repo_path = repository.path_to_repo params = { GL_ID: Gitlab::GlId.gl_id(user), GL_REPOSITORY: Gitlab::GlRepository.gl_repository(project, is_wiki), GL_USERNAME: user&.username, - RepoPath: repo_path + RepoPath: repo_path, + ShowAllRefs: show_all_refs } server = { address: Gitlab::GitalyClient.address(project.repository_storage), @@ -173,6 +174,7 @@ module Gitlab @secret ||= begin bytes = Base64.strict_decode64(File.read(secret_path).chomp) raise "#{secret_path} does not contain #{SECRET_LENGTH} bytes" if bytes.length != SECRET_LENGTH + bytes end end |