diff options
Diffstat (limited to 'lib/gitlab')
-rw-r--r-- | lib/gitlab/access.rb | 6 | ||||
-rw-r--r-- | lib/gitlab/ci/build/artifacts/metadata/entry.rb | 244 | ||||
-rw-r--r-- | lib/gitlab/ci/build/artifacts/path.rb | 51 | ||||
-rw-r--r-- | lib/gitlab/encoding_helper.rb | 17 | ||||
-rw-r--r-- | lib/gitlab/git/blob.rb | 12 | ||||
-rw-r--r-- | lib/gitlab/git/diff.rb | 16 | ||||
-rw-r--r-- | lib/gitlab/import_export/import_export.yml | 1 | ||||
-rw-r--r-- | lib/gitlab/import_export/project_tree_restorer.rb | 79 | ||||
-rw-r--r-- | lib/gitlab/import_export/shared.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/ldap/user.rb | 4 | ||||
-rw-r--r-- | lib/gitlab/o_auth/auth_hash.rb | 17 | ||||
-rw-r--r-- | lib/gitlab/o_auth/user.rb | 32 | ||||
-rw-r--r-- | lib/gitlab/saml/user.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/url_sanitizer.rb | 19 | ||||
-rw-r--r-- | lib/gitlab/workhorse.rb | 4 |
15 files changed, 322 insertions, 184 deletions
diff --git a/lib/gitlab/access.rb b/lib/gitlab/access.rb index 4714ab18cc1..b4012ebbb99 100644 --- a/lib/gitlab/access.rb +++ b/lib/gitlab/access.rb @@ -67,10 +67,14 @@ module Gitlab def protection_values protection_options.values end + + def human_access(access) + options_with_owner.key(access) + end end def human_access - Gitlab::Access.options_with_owner.key(access_field) + Gitlab::Access.human_access(access_field) end def owner? diff --git a/lib/gitlab/ci/build/artifacts/metadata/entry.rb b/lib/gitlab/ci/build/artifacts/metadata/entry.rb index 2e073334abc..22941d48edf 100644 --- a/lib/gitlab/ci/build/artifacts/metadata/entry.rb +++ b/lib/gitlab/ci/build/artifacts/metadata/entry.rb @@ -1,129 +1,129 @@ module Gitlab - module Ci::Build::Artifacts - class Metadata - ## - # Class that represents an entry (path and metadata) to a file or - # directory in GitLab CI Build Artifacts binary file / archive - # - # This is IO-operations safe class, that does similar job to - # Ruby's Pathname but without the risk of accessing filesystem. - # - # This class is working only with UTF-8 encoded paths. - # - class Entry - attr_reader :path, :entries - attr_accessor :name - - def initialize(path, entries) - @path = path.dup.force_encoding('UTF-8') - @entries = entries - - if path.include?("\0") - raise ArgumentError, 'Path contains zero byte character!' - end + module Ci + module Build + module Artifacts + class Metadata + ## + # Class that represents an entry (path and metadata) to a file or + # directory in GitLab CI Build Artifacts binary file / archive + # + # This is IO-operations safe class, that does similar job to + # Ruby's Pathname but without the risk of accessing filesystem. + # + # This class is working only with UTF-8 encoded paths. + # + class Entry + attr_reader :entries + attr_accessor :name + + def initialize(path, entries) + @entries = entries + @path = Artifacts::Path.new(path) + end + + delegate :empty?, to: :children + + def directory? + blank_node? || @path.directory? + end + + def file? + !directory? + end + + def blob + return unless file? + + @blob ||= Blob.decorate(::Ci::ArtifactBlob.new(self), nil) + end + + def has_parent? + nodes > 0 + end + + def parent + return nil unless has_parent? + self.class.new(@path.to_s.chomp(basename), @entries) + end + + def basename + (directory? && !blank_node?) ? name + '/' : name + end + + def name + @name || @path.name + end + + def children + return [] unless directory? + return @children if @children + + child_pattern = %r{^#{Regexp.escape(@path.to_s)}[^/]+/?$} + @children = select_entries { |path| path =~ child_pattern } + end + + def directories(opts = {}) + return [] unless directory? + dirs = children.select(&:directory?) + return dirs unless has_parent? && opts[:parent] + + dotted_parent = parent + dotted_parent.name = '..' + dirs.prepend(dotted_parent) + end + + def files + return [] unless directory? + children.select(&:file?) + end + + def metadata + @entries[@path.to_s] || {} + end + + def nodes + @path.nodes + (file? ? 1 : 0) + end + + def blank_node? + @path.to_s.empty? # "" is considered to be './' + end + + def exists? + blank_node? || @entries.include?(@path.to_s) + end + + def total_size + descendant_pattern = %r{^#{Regexp.escape(@path.to_s)}} + entries.sum do |path, entry| + (entry[:size] if path =~ descendant_pattern).to_i + end + end + + def path + @path.to_s + end + + def to_s + @path.to_s + end + + def ==(other) + path == other.path && @entries == other.entries + end + + def inspect + "#{self.class.name}: #{self}" + end - unless path.valid_encoding? - raise ArgumentError, 'Path contains non-UTF-8 byte sequence!' + private + + def select_entries + selected = @entries.select { |path, _metadata| yield path } + selected.map { |path, _metadata| self.class.new(path, @entries) } + end end end - - delegate :empty?, to: :children - - def directory? - blank_node? || @path.end_with?('/') - end - - def file? - !directory? - end - - def blob - return unless file? - - @blob ||= Blob.decorate(::Ci::ArtifactBlob.new(self), nil) - end - - def has_parent? - nodes > 0 - end - - def parent - return nil unless has_parent? - self.class.new(@path.chomp(basename), @entries) - end - - def basename - (directory? && !blank_node?) ? name + '/' : name - end - - def name - @name || @path.split('/').last.to_s - end - - def children - return [] unless directory? - return @children if @children - - child_pattern = %r{^#{Regexp.escape(@path)}[^/]+/?$} - @children = select_entries { |path| path =~ child_pattern } - end - - def directories(opts = {}) - return [] unless directory? - dirs = children.select(&:directory?) - return dirs unless has_parent? && opts[:parent] - - dotted_parent = parent - dotted_parent.name = '..' - dirs.prepend(dotted_parent) - end - - def files - return [] unless directory? - children.select(&:file?) - end - - def metadata - @entries[@path] || {} - end - - def nodes - @path.count('/') + (file? ? 1 : 0) - end - - def blank_node? - @path.empty? # "" is considered to be './' - end - - def exists? - blank_node? || @entries.include?(@path) - end - - def total_size - descendant_pattern = %r{^#{Regexp.escape(@path)}} - entries.sum do |path, entry| - (entry[:size] if path =~ descendant_pattern).to_i - end - end - - def to_s - @path - end - - def ==(other) - @path == other.path && @entries == other.entries - end - - def inspect - "#{self.class.name}: #{@path}" - end - - private - - def select_entries - selected = @entries.select { |path, _metadata| yield path } - selected.map { |path, _metadata| self.class.new(path, @entries) } - end end end end diff --git a/lib/gitlab/ci/build/artifacts/path.rb b/lib/gitlab/ci/build/artifacts/path.rb new file mode 100644 index 00000000000..9cd9b36c5f8 --- /dev/null +++ b/lib/gitlab/ci/build/artifacts/path.rb @@ -0,0 +1,51 @@ +module Gitlab + module Ci + module Build + module Artifacts + class Path + def initialize(path) + @path = path.dup.force_encoding('UTF-8') + end + + def valid? + nonzero? && utf8? + end + + def directory? + @path.end_with?('/') + end + + def name + @path.split('/').last.to_s + end + + def nodes + @path.count('/') + end + + def to_s + @path.tap do |path| + unless nonzero? + raise ArgumentError, 'Path contains zero byte character!' + end + + unless utf8? + raise ArgumentError, 'Path contains non-UTF-8 byte sequence!' + end + end + end + + private + + def nonzero? + @path.exclude?("\0") + end + + def utf8? + @path.valid_encoding? + end + end + end + end + end +end diff --git a/lib/gitlab/encoding_helper.rb b/lib/gitlab/encoding_helper.rb index 8ddc91e341d..7b3483a7f96 100644 --- a/lib/gitlab/encoding_helper.rb +++ b/lib/gitlab/encoding_helper.rb @@ -22,10 +22,10 @@ module Gitlab # return message if message type is binary detect = CharlockHolmes::EncodingDetector.detect(message) - return message.force_encoding("BINARY") if detect && detect[:type] == :binary + return message.force_encoding("BINARY") if detect_binary?(message, detect) - # force detected encoding if we have sufficient confidence. if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD + # force detected encoding if we have sufficient confidence. message.force_encoding(detect[:encoding]) end @@ -36,6 +36,19 @@ module Gitlab "--broken encoding: #{encoding}" end + def detect_binary?(data, detect = nil) + detect ||= CharlockHolmes::EncodingDetector.detect(data) + detect && detect[:type] == :binary && detect[:confidence] == 100 + end + + def detect_libgit2_binary?(data) + # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks + # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), + # which is what we use below to keep a consistent behavior. + detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) + detect && detect[:type] == :binary + end + def encode_utf8(message) detect = CharlockHolmes::EncodingDetector.detect(message) if detect && detect[:encoding] diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb index 7780f4e4d4f..8d96826f6ee 100644 --- a/lib/gitlab/git/blob.rb +++ b/lib/gitlab/git/blob.rb @@ -42,14 +42,6 @@ module Gitlab end end - def binary?(data) - # EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks - # only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15), - # which is what we use below to keep a consistent behavior. - detect = CharlockHolmes::EncodingDetector.new(8000).detect(data) - detect && detect[:type] == :binary - end - # Returns an array of Blob instances, specified in blob_references as # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the # full blob contents are returned. If blob_size_limit >= 0 then each blob will @@ -65,6 +57,10 @@ module Gitlab end end + def binary?(data) + EncodingHelper.detect_libgit2_binary?(data) + end + private # Recursive search of blob id by path diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index ce3d65062e8..a23c8cf0dd1 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -116,6 +116,15 @@ module Gitlab filtered_opts end + + # Return a binary diff message like: + # + # "Binary files a/file/path and b/file/path differ\n" + # This is used when we detect that a diff is binary + # using CharlockHolmes when Rugged treats it as text. + def binary_message(old_path, new_path) + "Binary files #{old_path} and #{new_path} differ\n" + end end def initialize(raw_diff, expanded: true) @@ -190,6 +199,13 @@ module Gitlab @collapsed = true end + def json_safe_diff + return @diff unless detect_binary?(@diff) + + # the diff is binary, let's make a message for it + Diff.binary_message(@old_path, @new_path) + end + private def init_from_rugged(rugged) diff --git a/lib/gitlab/import_export/import_export.yml b/lib/gitlab/import_export/import_export.yml index 78795dd3d92..ec73846d844 100644 --- a/lib/gitlab/import_export/import_export.yml +++ b/lib/gitlab/import_export/import_export.yml @@ -116,6 +116,7 @@ excluded_attributes: statuses: - :trace - :token + - :when push_event_payload: - :event_id diff --git a/lib/gitlab/import_export/project_tree_restorer.rb b/lib/gitlab/import_export/project_tree_restorer.rb index cbc8d170936..3bc095a99a9 100644 --- a/lib/gitlab/import_export/project_tree_restorer.rb +++ b/lib/gitlab/import_export/project_tree_restorer.rb @@ -9,6 +9,8 @@ module Gitlab @user = user @shared = shared @project = project + @project_id = project.id + @saved = true end def restore @@ -22,8 +24,10 @@ module Gitlab @project_members = @tree_hash.delete('project_members') - ActiveRecord::Base.no_touching do - create_relations + ActiveRecord::Base.uncached do + ActiveRecord::Base.no_touching do + create_relations + end end rescue => e @shared.error(e) @@ -48,21 +52,24 @@ module Gitlab # the configuration yaml file too. # Finally, it updates each attribute in the newly imported project. def create_relations - saved = [] default_relation_list.each do |relation| - next unless relation.is_a?(Hash) || @tree_hash[relation.to_s].present? + if relation.is_a?(Hash) + create_sub_relations(relation, @tree_hash) + elsif @tree_hash[relation.to_s].present? + save_relation_hash(@tree_hash[relation.to_s], relation) + end + end - create_sub_relations(relation, @tree_hash) if relation.is_a?(Hash) + @saved + end - relation_key = relation.is_a?(Hash) ? relation.keys.first : relation - relation_hash_list = @tree_hash[relation_key.to_s] + def save_relation_hash(relation_hash_batch, relation_key) + relation_hash = create_relation(relation_key, relation_hash_batch) - next unless relation_hash_list + @saved = false unless restored_project.append_or_update_attribute(relation_key, relation_hash) - relation_hash = create_relation(relation_key, relation_hash_list) - saved << restored_project.append_or_update_attribute(relation_key, relation_hash) - end - saved.all? + # Restore the project again, extra query that skips holding the AR objects in memory + @restored_project = Project.find(@project_id) end def default_relation_list @@ -93,20 +100,42 @@ module Gitlab # issue, finds any subrelations such as notes, creates them and assign them back to the hash # # Recursively calls this method if the sub-relation is a hash containing more sub-relations - def create_sub_relations(relation, tree_hash) + def create_sub_relations(relation, tree_hash, save: true) relation_key = relation.keys.first.to_s return if tree_hash[relation_key].blank? - [tree_hash[relation_key]].flatten.each do |relation_item| - relation.values.flatten.each do |sub_relation| - # We just use author to get the user ID, do not attempt to create an instance. - next if sub_relation == :author + tree_array = [tree_hash[relation_key]].flatten + + # Avoid keeping a possible heavy object in memory once we are done with it + while relation_item = tree_array.shift + # The transaction at this level is less speedy than one single transaction + # But we can't have it in the upper level or GC won't get rid of the AR objects + # after we save the batch. + Project.transaction do + process_sub_relation(relation, relation_item) + + # For every subrelation that hangs from Project, save the associated records alltogether + # This effectively batches all records per subrelation item, only keeping those in memory + # We have to keep in mind that more batch granularity << Memory, but >> Slowness + if save + save_relation_hash([relation_item], relation_key) + tree_hash[relation_key].delete(relation_item) + end + end + end + + tree_hash.delete(relation_key) if save + end + + def process_sub_relation(relation, relation_item) + relation.values.flatten.each do |sub_relation| + # We just use author to get the user ID, do not attempt to create an instance. + next if sub_relation == :author - create_sub_relations(sub_relation, relation_item) if sub_relation.is_a?(Hash) + create_sub_relations(sub_relation, relation_item, save: false) if sub_relation.is_a?(Hash) - relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation) - relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank? - end + relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation) + relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank? end end @@ -121,14 +150,12 @@ module Gitlab end def create_relation(relation, relation_hash_list) - relation_type = relation.to_sym - relation_array = [relation_hash_list].flatten.map do |relation_hash| - Gitlab::ImportExport::RelationFactory.create(relation_sym: relation_type, - relation_hash: parsed_relation_hash(relation_hash, relation_type), + Gitlab::ImportExport::RelationFactory.create(relation_sym: relation.to_sym, + relation_hash: parsed_relation_hash(relation_hash, relation.to_sym), members_mapper: members_mapper, user: @user, - project: restored_project) + project: @restored_project) end.compact relation_hash_list.is_a?(Array) ? relation_array : relation_array.first diff --git a/lib/gitlab/import_export/shared.rb b/lib/gitlab/import_export/shared.rb index 5d6de8bc475..9fd0b709ef2 100644 --- a/lib/gitlab/import_export/shared.rb +++ b/lib/gitlab/import_export/shared.rb @@ -16,7 +16,7 @@ module Gitlab error_out(error.message, caller[0].dup) @errors << error.message # Debug: - Rails.logger.error(error.backtrace) + Rails.logger.error(error.backtrace.join("\n")) end private diff --git a/lib/gitlab/ldap/user.rb b/lib/gitlab/ldap/user.rb index 39180dc17d9..3bf27b37ae6 100644 --- a/lib/gitlab/ldap/user.rb +++ b/lib/gitlab/ldap/user.rb @@ -36,7 +36,7 @@ module Gitlab end def find_by_email - ::User.find_by(email: auth_hash.email.downcase) if auth_hash.has_email? + ::User.find_by(email: auth_hash.email.downcase) if auth_hash.has_attribute?(:email) end def update_user_attributes @@ -60,7 +60,7 @@ module Gitlab ldap_config.block_auto_created_users end - def sync_email_from_provider? + def sync_profile_from_provider? true end diff --git a/lib/gitlab/o_auth/auth_hash.rb b/lib/gitlab/o_auth/auth_hash.rb index 7d6911a1ab3..1f331b1e91d 100644 --- a/lib/gitlab/o_auth/auth_hash.rb +++ b/lib/gitlab/o_auth/auth_hash.rb @@ -32,8 +32,21 @@ module Gitlab @password ||= Gitlab::Utils.force_utf8(Devise.friendly_token[0, 8].downcase) end - def has_email? - get_info(:email).present? + def location + location = get_info(:address) + if location.is_a?(Hash) + [location.locality.presence, location.country.presence].compact.join(', ') + else + location + end + end + + def has_attribute?(attribute) + if attribute == :location + get_info(:address).present? + else + get_info(attribute).present? + end end private diff --git a/lib/gitlab/o_auth/user.rb b/lib/gitlab/o_auth/user.rb index e8330917e91..7704bf715e4 100644 --- a/lib/gitlab/o_auth/user.rb +++ b/lib/gitlab/o_auth/user.rb @@ -12,7 +12,7 @@ module Gitlab def initialize(auth_hash) self.auth_hash = auth_hash - update_email + update_profile if sync_profile_from_provider? end def persisted? @@ -184,20 +184,30 @@ module Gitlab } end - def sync_email_from_provider? - auth_hash.provider.to_s == Gitlab.config.omniauth.sync_email_from_provider.to_s + def sync_profile_from_provider? + providers = Gitlab.config.omniauth.sync_profile_from_provider + + if providers.is_a?(Array) + providers.include?(auth_hash.provider) + else + providers + end end - def update_email - if auth_hash.has_email? && sync_email_from_provider? - if persisted? - gl_user.skip_reconfirmation! - gl_user.email = auth_hash.email - end + def update_profile + user_synced_attributes_metadata = gl_user.user_synced_attributes_metadata || gl_user.build_user_synced_attributes_metadata - gl_user.external_email = true - gl_user.email_provider = auth_hash.provider + UserSyncedAttributesMetadata::SYNCABLE_ATTRIBUTES.each do |key| + if auth_hash.has_attribute?(key) && gl_user.sync_attribute?(key) + gl_user[key] = auth_hash.public_send(key) # rubocop:disable GitlabSecurity/PublicSend + user_synced_attributes_metadata.set_attribute_synced(key, true) + else + user_synced_attributes_metadata.set_attribute_synced(key, false) + end end + + user_synced_attributes_metadata.provider = auth_hash.provider + gl_user.user_synced_attributes_metadata = user_synced_attributes_metadata end def log diff --git a/lib/gitlab/saml/user.rb b/lib/gitlab/saml/user.rb index 8a7cc690046..0f323a9e8b2 100644 --- a/lib/gitlab/saml/user.rb +++ b/lib/gitlab/saml/user.rb @@ -40,7 +40,7 @@ module Gitlab end def find_by_email - if auth_hash.has_email? + if auth_hash.has_attribute?(:email) user = ::User.find_by(email: auth_hash.email.downcase) user.identities.new(extern_uid: auth_hash.uid, provider: auth_hash.provider) if user user diff --git a/lib/gitlab/url_sanitizer.rb b/lib/gitlab/url_sanitizer.rb index c81dc7e30d0..703adae12cb 100644 --- a/lib/gitlab/url_sanitizer.rb +++ b/lib/gitlab/url_sanitizer.rb @@ -9,7 +9,7 @@ module Gitlab end def self.valid?(url) - return false unless url + return false unless url.present? Addressable::URI.parse(url.strip) @@ -19,7 +19,12 @@ module Gitlab end def initialize(url, credentials: nil) - @url = Addressable::URI.parse(url.strip) + @url = Addressable::URI.parse(url.to_s.strip) + + %i[user password].each do |symbol| + credentials[symbol] = credentials[symbol].presence if credentials&.key?(symbol) + end + @credentials = credentials end @@ -29,13 +34,13 @@ module Gitlab def masked_url url = @url.dup - url.password = "*****" unless url.password.nil? - url.user = "*****" unless url.user.nil? + url.password = "*****" if url.password.present? + url.user = "*****" if url.user.present? url.to_s end def credentials - @credentials ||= { user: @url.user, password: @url.password } + @credentials ||= { user: @url.user.presence, password: @url.password.presence } end def full_url @@ -47,8 +52,10 @@ module Gitlab def generate_full_url return @url unless valid_credentials? @full_url = @url.dup - @full_url.user = credentials[:user] + @full_url.password = credentials[:password] + @full_url.user = credentials[:user] + @full_url end diff --git a/lib/gitlab/workhorse.rb b/lib/gitlab/workhorse.rb index e5ad9b5a40c..7a94af2f8f1 100644 --- a/lib/gitlab/workhorse.rb +++ b/lib/gitlab/workhorse.rb @@ -121,10 +121,10 @@ module Gitlab ] end - def send_artifacts_entry(build, entry) + def send_artifacts_entry(build, path) params = { 'Archive' => build.artifacts_file.path, - 'Entry' => Base64.encode64(entry.path) + 'Entry' => Base64.encode64(path.to_s) } [ |