diff options
author | Robert Speicher <rspeicher@gmail.com> | 2019-04-02 12:56:40 +0000 |
---|---|---|
committer | Robert Speicher <rspeicher@gmail.com> | 2019-04-02 12:56:40 +0000 |
commit | 4b9dbec33ce446362d617f481b35628890763bd7 (patch) | |
tree | 9dceae8f3b1a4526c5a20ad23fa0df1874cab90c /lib | |
parent | 784b1756020ba564b45cb539a538f79c138f92dd (diff) | |
parent | 69b65a6b745e74bba290787420a0017395fd7c25 (diff) | |
download | gitlab-ce-4b9dbec33ce446362d617f481b35628890763bd7.tar.gz |
Merge branch 'jarv/dev-to-gitlab-2019-04-02' into 'master'
Jarv/dev to gitlab 2019 04 02
Closes #2810
See merge request gitlab-org/gitlab-ce!26846
Diffstat (limited to 'lib')
-rw-r--r-- | lib/api/projects.rb | 8 | ||||
-rw-r--r-- | lib/gitlab/ci/build/policy/refs.rb | 4 | ||||
-rw-r--r-- | lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb | 4 | ||||
-rw-r--r-- | lib/gitlab/config/entry/legacy_validation_helpers.rb | 8 | ||||
-rw-r--r-- | lib/gitlab/config/entry/validators.rb | 14 | ||||
-rw-r--r-- | lib/gitlab/import_export/import_export.yml | 1 | ||||
-rw-r--r-- | lib/gitlab/sanitizers/exif.rb | 156 | ||||
-rw-r--r-- | lib/gitlab/untrusted_regexp.rb | 35 | ||||
-rw-r--r-- | lib/gitlab/untrusted_regexp/ruby_syntax.rb | 43 | ||||
-rw-r--r-- | lib/tasks/gitlab/uploads/sanitize.rake | 18 |
10 files changed, 237 insertions, 54 deletions
diff --git a/lib/api/projects.rb b/lib/api/projects.rb index 0f4a47677d9..57336e95041 100644 --- a/lib/api/projects.rb +++ b/lib/api/projects.rb @@ -373,11 +373,9 @@ module API desc 'Get languages in project repository' get ':id/languages' do - if user_project.repository_languages.present? - user_project.repository_languages.map { |l| [l.name, l.share] }.to_h - else - user_project.repository.languages.map { |language| language.values_at(:label, :value) }.to_h - end + ::Projects::RepositoryLanguagesService + .new(user_project, current_user) + .execute.map { |lang| [lang.name, lang.share] }.to_h end desc 'Remove a project' diff --git a/lib/gitlab/ci/build/policy/refs.rb b/lib/gitlab/ci/build/policy/refs.rb index df5f5ffc253..360424bec11 100644 --- a/lib/gitlab/ci/build/policy/refs.rb +++ b/lib/gitlab/ci/build/policy/refs.rb @@ -35,8 +35,8 @@ module Gitlab # patterns can be matched only when branch or tag is used # the pattern matching does not work for merge requests pipelines if pipeline.branch? || pipeline.tag? - if pattern.first == "/" && pattern.last == "/" - Regexp.new(pattern[1...-1]) =~ pipeline.ref + if regexp = Gitlab::UntrustedRegexp::RubySyntax.fabricate(pattern) + regexp.match?(pipeline.ref) else pattern == pipeline.ref end diff --git a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb index d7e6dacf068..2b719c9c6fc 100644 --- a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb +++ b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb @@ -13,13 +13,13 @@ module Gitlab def initialize(regexp) @value = regexp - unless Gitlab::UntrustedRegexp.valid?(@value) + unless Gitlab::UntrustedRegexp::RubySyntax.valid?(@value) raise Lexer::SyntaxError, 'Invalid regular expression!' end end def evaluate(variables = {}) - Gitlab::UntrustedRegexp.fabricate(@value) + Gitlab::UntrustedRegexp::RubySyntax.fabricate!(@value) rescue RegexpError raise Expression::RuntimeError, 'Invalid regular expression!' end diff --git a/lib/gitlab/config/entry/legacy_validation_helpers.rb b/lib/gitlab/config/entry/legacy_validation_helpers.rb index d3ab5625743..0a629075302 100644 --- a/lib/gitlab/config/entry/legacy_validation_helpers.rb +++ b/lib/gitlab/config/entry/legacy_validation_helpers.rb @@ -45,17 +45,15 @@ module Gitlab end def validate_regexp(value) - !value.nil? && Regexp.new(value.to_s) && true - rescue RegexpError, TypeError - false + Gitlab::UntrustedRegexp::RubySyntax.valid?(value) end def validate_string_or_regexp(value) return true if value.is_a?(Symbol) return false unless value.is_a?(String) - if value.first == '/' && value.last == '/' - validate_regexp(value[1...-1]) + if Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value) + validate_regexp(value) else true end diff --git a/lib/gitlab/config/entry/validators.rb b/lib/gitlab/config/entry/validators.rb index 25bfa50f829..d348e11b753 100644 --- a/lib/gitlab/config/entry/validators.rb +++ b/lib/gitlab/config/entry/validators.rb @@ -120,17 +120,13 @@ module Gitlab private - def look_like_regexp?(value) - value.is_a?(String) && value.start_with?('/') && - value.end_with?('/') + def matches_syntax?(value) + Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value) end def validate_regexp(value) - look_like_regexp?(value) && - Regexp.new(value.to_s[1...-1]) && - true - rescue RegexpError - false + matches_syntax?(value) && + Gitlab::UntrustedRegexp::RubySyntax.valid?(value) end end @@ -149,7 +145,7 @@ module Gitlab def validate_string_or_regexp(value) return false unless value.is_a?(String) - return validate_regexp(value) if look_like_regexp?(value) + return validate_regexp(value) if matches_syntax?(value) true end diff --git a/lib/gitlab/import_export/import_export.yml b/lib/gitlab/import_export/import_export.yml index 89667976217..ce268793128 100644 --- a/lib/gitlab/import_export/import_export.yml +++ b/lib/gitlab/import_export/import_export.yml @@ -118,6 +118,7 @@ excluded_attributes: - :description_html - :repository_languages - :bfg_object_map + - :detected_repository_languages - :tag_list namespaces: - :runners_token diff --git a/lib/gitlab/sanitizers/exif.rb b/lib/gitlab/sanitizers/exif.rb new file mode 100644 index 00000000000..0928ccdc324 --- /dev/null +++ b/lib/gitlab/sanitizers/exif.rb @@ -0,0 +1,156 @@ +# frozen_string_literal: true + +module Gitlab + module Sanitizers + class Exif + # these tags are not removed from the image + WHITELISTED_TAGS = %w( + ResolutionUnit + XResolution + YResolution + YCbCrSubSampling + YCbCrPositioning + BitsPerSample + ImageHeight + ImageWidth + ImageSize + Copyright + CopyrightNotice + Orientation + ).freeze + + # these tags are common in exiftool output, these + # do not contain any sensitive information, but + # we don't need to preserve them when removing + # exif tags + IGNORED_TAGS = %w( + ColorComponents + EncodingProcess + ExifByteOrder + ExifToolVersion + JFIFVersion + Directory + FileAccessDate + FileInodeChangeDate + FileModifyDate + FileName + FilePermissions + FileSize + SourceFile + Megapixels + FileType + FileTypeExtension + MIMEType + ).freeze + + ALLOWED_TAGS = WHITELISTED_TAGS + IGNORED_TAGS + EXCLUDE_PARAMS = WHITELISTED_TAGS.map { |tag| "-#{tag}" } + + attr_reader :logger + + def initialize(logger: Rails.logger) + @logger = logger + end + + # rubocop: disable CodeReuse/ActiveRecord + def batch_clean(start_id: nil, stop_id: nil, dry_run: true, sleep_time: nil) + relation = Upload.where('lower(path) like ? or lower(path) like ? or lower(path) like ?', + '%.jpg', '%.jpeg', '%.tiff') + + logger.info "running in dry run mode, no images will be rewritten" if dry_run + + find_params = { + start: start_id.present? ? start_id.to_i : nil, + finish: stop_id.present? ? stop_id.to_i : Upload.last&.id + } + + relation.find_each(find_params) do |upload| + clean(upload.build_uploader, dry_run: dry_run) + sleep sleep_time if sleep_time + rescue => err + logger.error "failed to sanitize #{upload_ref(upload)}: #{err.message}" + logger.debug err.backtrace.join("\n ") + end + end + # rubocop: enable CodeReuse/ActiveRecord + + def clean(uploader, dry_run: true) + Dir.mktmpdir('gitlab-exif') do |tmpdir| + src_path = fetch_upload_to_file(uploader, tmpdir) + + to_remove = extra_tags(src_path) + + if to_remove.empty? + logger.info "#{upload_ref(uploader.upload)}: only whitelisted tags present, skipping" + break + end + + logger.info "#{upload_ref(uploader.upload)}: found exif tags to remove: #{to_remove}" + + break if dry_run + + remove_and_store(tmpdir, src_path, uploader) + end + end + + def extra_tags(path) + exif_tags(path).keys - ALLOWED_TAGS + end + + private + + def remove_and_store(tmpdir, src_path, uploader) + exec_remove_exif!(src_path) + logger.info "#{upload_ref(uploader.upload)}: exif removed, storing" + File.open(src_path, 'r') { |f| uploader.store!(f) } + end + + def exec_remove_exif!(path) + # IPTC and XMP-iptcExt groups may keep copyright information so + # we always preserve them + cmd = ["exiftool", "-all=", "-tagsFromFile", "@", *EXCLUDE_PARAMS, "--IPTC:all", "--XMP-iptcExt:all", path] + output, status = Gitlab::Popen.popen(cmd) + + if status != 0 + raise "exiftool return code is #{status}: #{output}" + end + + if File.size(path) == 0 + raise "size of file is 0" + end + + # exiftool creates backup of the original file in filename_original + old_path = "#{path}_original" + if File.size(path) == File.size(old_path) + raise "size of sanitized file is same as original size" + end + end + + def fetch_upload_to_file(uploader, dir) + # upload is stored into the file with the original name - this filename + # is used by carrierwave when storing the file back to the storage + filename = File.join(dir, uploader.filename) + + File.open(filename, 'w') do |file| + file.binmode + file.write uploader.read + end + + filename + end + + def upload_ref(upload) + "#{upload.id}:#{upload.path}" + end + + def exif_tags(path) + cmd = ["exiftool", "-all", "-j", "-sort", "--IPTC:all", "--XMP-iptcExt:all", path] + output, status = Gitlab::Popen.popen(cmd) + + raise "failed to get exif tags: #{output}" if status != 0 + + JSON.parse(output).first + end + end + end +end diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb index ba1137313d8..14126b6ec06 100644 --- a/lib/gitlab/untrusted_regexp.rb +++ b/lib/gitlab/untrusted_regexp.rb @@ -35,6 +35,10 @@ module Gitlab matches end + def match?(text) + text.present? && scan(text).present? + end + def replace(text, rewrite) RE2.Replace(text, regexp, rewrite) end @@ -43,37 +47,6 @@ module Gitlab self.source == other.source end - # Handles regular expressions with the preferred RE2 library where possible - # via UntustedRegex. Falls back to Ruby's built-in regular expression library - # when the syntax would be invalid in RE2. - # - # One difference between these is `(?m)` multi-line mode. Ruby regex enables - # this by default, but also handles `^` and `$` differently. - # See: https://www.regular-expressions.info/modifiers.html - def self.with_fallback(pattern, multiline: false) - UntrustedRegexp.new(pattern, multiline: multiline) - rescue RegexpError - Regexp.new(pattern) - end - - def self.valid?(pattern) - !!self.fabricate(pattern) - rescue RegexpError - false - end - - def self.fabricate(pattern) - matches = pattern.match(%r{^/(?<regexp>.+)/(?<flags>[ismU]*)$}) - - raise RegexpError, 'Invalid regular expression!' if matches.nil? - - expression = matches[:regexp] - flags = matches[:flags] - expression.prepend("(?#{flags})") if flags.present? - - self.new(expression, multiline: false) - end - private attr_reader :regexp diff --git a/lib/gitlab/untrusted_regexp/ruby_syntax.rb b/lib/gitlab/untrusted_regexp/ruby_syntax.rb new file mode 100644 index 00000000000..91f300f97d0 --- /dev/null +++ b/lib/gitlab/untrusted_regexp/ruby_syntax.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +module Gitlab + class UntrustedRegexp + # This class implements support for Ruby syntax of regexps + # and converts that to RE2 representation: + # /<regexp>/<flags> + class RubySyntax + PATTERN = %r{^/(?<regexp>.+)/(?<flags>[ismU]*)$}.freeze + + # Checks if pattern matches a regexp pattern + # but does not enforce it's validity + def self.matches_syntax?(pattern) + pattern.is_a?(String) && pattern.match(PATTERN).present? + end + + # The regexp can match the pattern `/.../`, but may not be fabricatable: + # it can be invalid or incomplete: `/match ( string/` + def self.valid?(pattern) + !!self.fabricate(pattern) + end + + def self.fabricate(pattern) + self.fabricate!(pattern) + rescue RegexpError + nil + end + + def self.fabricate!(pattern) + raise RegexpError, 'Pattern is not string!' unless pattern.is_a?(String) + + matches = pattern.match(PATTERN) + raise RegexpError, 'Invalid regular expression!' if matches.nil? + + expression = matches[:regexp] + flags = matches[:flags] + expression.prepend("(?#{flags})") if flags.present? + + UntrustedRegexp.new(expression, multiline: false) + end + end + end +end diff --git a/lib/tasks/gitlab/uploads/sanitize.rake b/lib/tasks/gitlab/uploads/sanitize.rake new file mode 100644 index 00000000000..12cf5302555 --- /dev/null +++ b/lib/tasks/gitlab/uploads/sanitize.rake @@ -0,0 +1,18 @@ +namespace :gitlab do + namespace :uploads do + namespace :sanitize do + desc 'GitLab | Uploads | Remove EXIF from images.' + task :remove_exif, [:start_id, :stop_id, :dry_run, :sleep_time] => :environment do |task, args| + args.with_defaults(dry_run: 'true') + args.with_defaults(sleep_time: 0.3) + + logger = Logger.new(STDOUT) + + sanitizer = Gitlab::Sanitizers::Exif.new(logger: logger) + sanitizer.batch_clean(start_id: args.start_id, stop_id: args.stop_id, + dry_run: args.dry_run != 'false', + sleep_time: args.sleep_time.to_f) + end + end + end +end |