summaryrefslogtreecommitdiff
path: root/lib/gitlab
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab')
-rw-r--r--lib/gitlab/ci/build/policy/refs.rb4
-rw-r--r--lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb4
-rw-r--r--lib/gitlab/config/entry/legacy_validation_helpers.rb8
-rw-r--r--lib/gitlab/config/entry/validators.rb14
-rw-r--r--lib/gitlab/import_export/import_export.yml1
-rw-r--r--lib/gitlab/sanitizers/exif.rb156
-rw-r--r--lib/gitlab/untrusted_regexp.rb35
-rw-r--r--lib/gitlab/untrusted_regexp/ruby_syntax.rb43
8 files changed, 216 insertions, 49 deletions
diff --git a/lib/gitlab/ci/build/policy/refs.rb b/lib/gitlab/ci/build/policy/refs.rb
index df5f5ffc253..360424bec11 100644
--- a/lib/gitlab/ci/build/policy/refs.rb
+++ b/lib/gitlab/ci/build/policy/refs.rb
@@ -35,8 +35,8 @@ module Gitlab
# patterns can be matched only when branch or tag is used
# the pattern matching does not work for merge requests pipelines
if pipeline.branch? || pipeline.tag?
- if pattern.first == "/" && pattern.last == "/"
- Regexp.new(pattern[1...-1]) =~ pipeline.ref
+ if regexp = Gitlab::UntrustedRegexp::RubySyntax.fabricate(pattern)
+ regexp.match?(pipeline.ref)
else
pattern == pipeline.ref
end
diff --git a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb
index d7e6dacf068..2b719c9c6fc 100644
--- a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb
+++ b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb
@@ -13,13 +13,13 @@ module Gitlab
def initialize(regexp)
@value = regexp
- unless Gitlab::UntrustedRegexp.valid?(@value)
+ unless Gitlab::UntrustedRegexp::RubySyntax.valid?(@value)
raise Lexer::SyntaxError, 'Invalid regular expression!'
end
end
def evaluate(variables = {})
- Gitlab::UntrustedRegexp.fabricate(@value)
+ Gitlab::UntrustedRegexp::RubySyntax.fabricate!(@value)
rescue RegexpError
raise Expression::RuntimeError, 'Invalid regular expression!'
end
diff --git a/lib/gitlab/config/entry/legacy_validation_helpers.rb b/lib/gitlab/config/entry/legacy_validation_helpers.rb
index d3ab5625743..0a629075302 100644
--- a/lib/gitlab/config/entry/legacy_validation_helpers.rb
+++ b/lib/gitlab/config/entry/legacy_validation_helpers.rb
@@ -45,17 +45,15 @@ module Gitlab
end
def validate_regexp(value)
- !value.nil? && Regexp.new(value.to_s) && true
- rescue RegexpError, TypeError
- false
+ Gitlab::UntrustedRegexp::RubySyntax.valid?(value)
end
def validate_string_or_regexp(value)
return true if value.is_a?(Symbol)
return false unless value.is_a?(String)
- if value.first == '/' && value.last == '/'
- validate_regexp(value[1...-1])
+ if Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value)
+ validate_regexp(value)
else
true
end
diff --git a/lib/gitlab/config/entry/validators.rb b/lib/gitlab/config/entry/validators.rb
index 25bfa50f829..d348e11b753 100644
--- a/lib/gitlab/config/entry/validators.rb
+++ b/lib/gitlab/config/entry/validators.rb
@@ -120,17 +120,13 @@ module Gitlab
private
- def look_like_regexp?(value)
- value.is_a?(String) && value.start_with?('/') &&
- value.end_with?('/')
+ def matches_syntax?(value)
+ Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value)
end
def validate_regexp(value)
- look_like_regexp?(value) &&
- Regexp.new(value.to_s[1...-1]) &&
- true
- rescue RegexpError
- false
+ matches_syntax?(value) &&
+ Gitlab::UntrustedRegexp::RubySyntax.valid?(value)
end
end
@@ -149,7 +145,7 @@ module Gitlab
def validate_string_or_regexp(value)
return false unless value.is_a?(String)
- return validate_regexp(value) if look_like_regexp?(value)
+ return validate_regexp(value) if matches_syntax?(value)
true
end
diff --git a/lib/gitlab/import_export/import_export.yml b/lib/gitlab/import_export/import_export.yml
index 89667976217..ce268793128 100644
--- a/lib/gitlab/import_export/import_export.yml
+++ b/lib/gitlab/import_export/import_export.yml
@@ -118,6 +118,7 @@ excluded_attributes:
- :description_html
- :repository_languages
- :bfg_object_map
+ - :detected_repository_languages
- :tag_list
namespaces:
- :runners_token
diff --git a/lib/gitlab/sanitizers/exif.rb b/lib/gitlab/sanitizers/exif.rb
new file mode 100644
index 00000000000..0928ccdc324
--- /dev/null
+++ b/lib/gitlab/sanitizers/exif.rb
@@ -0,0 +1,156 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Sanitizers
+ class Exif
+ # these tags are not removed from the image
+ WHITELISTED_TAGS = %w(
+ ResolutionUnit
+ XResolution
+ YResolution
+ YCbCrSubSampling
+ YCbCrPositioning
+ BitsPerSample
+ ImageHeight
+ ImageWidth
+ ImageSize
+ Copyright
+ CopyrightNotice
+ Orientation
+ ).freeze
+
+ # these tags are common in exiftool output, these
+ # do not contain any sensitive information, but
+ # we don't need to preserve them when removing
+ # exif tags
+ IGNORED_TAGS = %w(
+ ColorComponents
+ EncodingProcess
+ ExifByteOrder
+ ExifToolVersion
+ JFIFVersion
+ Directory
+ FileAccessDate
+ FileInodeChangeDate
+ FileModifyDate
+ FileName
+ FilePermissions
+ FileSize
+ SourceFile
+ Megapixels
+ FileType
+ FileTypeExtension
+ MIMEType
+ ).freeze
+
+ ALLOWED_TAGS = WHITELISTED_TAGS + IGNORED_TAGS
+ EXCLUDE_PARAMS = WHITELISTED_TAGS.map { |tag| "-#{tag}" }
+
+ attr_reader :logger
+
+ def initialize(logger: Rails.logger)
+ @logger = logger
+ end
+
+ # rubocop: disable CodeReuse/ActiveRecord
+ def batch_clean(start_id: nil, stop_id: nil, dry_run: true, sleep_time: nil)
+ relation = Upload.where('lower(path) like ? or lower(path) like ? or lower(path) like ?',
+ '%.jpg', '%.jpeg', '%.tiff')
+
+ logger.info "running in dry run mode, no images will be rewritten" if dry_run
+
+ find_params = {
+ start: start_id.present? ? start_id.to_i : nil,
+ finish: stop_id.present? ? stop_id.to_i : Upload.last&.id
+ }
+
+ relation.find_each(find_params) do |upload|
+ clean(upload.build_uploader, dry_run: dry_run)
+ sleep sleep_time if sleep_time
+ rescue => err
+ logger.error "failed to sanitize #{upload_ref(upload)}: #{err.message}"
+ logger.debug err.backtrace.join("\n ")
+ end
+ end
+ # rubocop: enable CodeReuse/ActiveRecord
+
+ def clean(uploader, dry_run: true)
+ Dir.mktmpdir('gitlab-exif') do |tmpdir|
+ src_path = fetch_upload_to_file(uploader, tmpdir)
+
+ to_remove = extra_tags(src_path)
+
+ if to_remove.empty?
+ logger.info "#{upload_ref(uploader.upload)}: only whitelisted tags present, skipping"
+ break
+ end
+
+ logger.info "#{upload_ref(uploader.upload)}: found exif tags to remove: #{to_remove}"
+
+ break if dry_run
+
+ remove_and_store(tmpdir, src_path, uploader)
+ end
+ end
+
+ def extra_tags(path)
+ exif_tags(path).keys - ALLOWED_TAGS
+ end
+
+ private
+
+ def remove_and_store(tmpdir, src_path, uploader)
+ exec_remove_exif!(src_path)
+ logger.info "#{upload_ref(uploader.upload)}: exif removed, storing"
+ File.open(src_path, 'r') { |f| uploader.store!(f) }
+ end
+
+ def exec_remove_exif!(path)
+ # IPTC and XMP-iptcExt groups may keep copyright information so
+ # we always preserve them
+ cmd = ["exiftool", "-all=", "-tagsFromFile", "@", *EXCLUDE_PARAMS, "--IPTC:all", "--XMP-iptcExt:all", path]
+ output, status = Gitlab::Popen.popen(cmd)
+
+ if status != 0
+ raise "exiftool return code is #{status}: #{output}"
+ end
+
+ if File.size(path) == 0
+ raise "size of file is 0"
+ end
+
+ # exiftool creates backup of the original file in filename_original
+ old_path = "#{path}_original"
+ if File.size(path) == File.size(old_path)
+ raise "size of sanitized file is same as original size"
+ end
+ end
+
+ def fetch_upload_to_file(uploader, dir)
+ # upload is stored into the file with the original name - this filename
+ # is used by carrierwave when storing the file back to the storage
+ filename = File.join(dir, uploader.filename)
+
+ File.open(filename, 'w') do |file|
+ file.binmode
+ file.write uploader.read
+ end
+
+ filename
+ end
+
+ def upload_ref(upload)
+ "#{upload.id}:#{upload.path}"
+ end
+
+ def exif_tags(path)
+ cmd = ["exiftool", "-all", "-j", "-sort", "--IPTC:all", "--XMP-iptcExt:all", path]
+ output, status = Gitlab::Popen.popen(cmd)
+
+ raise "failed to get exif tags: #{output}" if status != 0
+
+ JSON.parse(output).first
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb
index ba1137313d8..14126b6ec06 100644
--- a/lib/gitlab/untrusted_regexp.rb
+++ b/lib/gitlab/untrusted_regexp.rb
@@ -35,6 +35,10 @@ module Gitlab
matches
end
+ def match?(text)
+ text.present? && scan(text).present?
+ end
+
def replace(text, rewrite)
RE2.Replace(text, regexp, rewrite)
end
@@ -43,37 +47,6 @@ module Gitlab
self.source == other.source
end
- # Handles regular expressions with the preferred RE2 library where possible
- # via UntustedRegex. Falls back to Ruby's built-in regular expression library
- # when the syntax would be invalid in RE2.
- #
- # One difference between these is `(?m)` multi-line mode. Ruby regex enables
- # this by default, but also handles `^` and `$` differently.
- # See: https://www.regular-expressions.info/modifiers.html
- def self.with_fallback(pattern, multiline: false)
- UntrustedRegexp.new(pattern, multiline: multiline)
- rescue RegexpError
- Regexp.new(pattern)
- end
-
- def self.valid?(pattern)
- !!self.fabricate(pattern)
- rescue RegexpError
- false
- end
-
- def self.fabricate(pattern)
- matches = pattern.match(%r{^/(?<regexp>.+)/(?<flags>[ismU]*)$})
-
- raise RegexpError, 'Invalid regular expression!' if matches.nil?
-
- expression = matches[:regexp]
- flags = matches[:flags]
- expression.prepend("(?#{flags})") if flags.present?
-
- self.new(expression, multiline: false)
- end
-
private
attr_reader :regexp
diff --git a/lib/gitlab/untrusted_regexp/ruby_syntax.rb b/lib/gitlab/untrusted_regexp/ruby_syntax.rb
new file mode 100644
index 00000000000..91f300f97d0
--- /dev/null
+++ b/lib/gitlab/untrusted_regexp/ruby_syntax.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+module Gitlab
+ class UntrustedRegexp
+ # This class implements support for Ruby syntax of regexps
+ # and converts that to RE2 representation:
+ # /<regexp>/<flags>
+ class RubySyntax
+ PATTERN = %r{^/(?<regexp>.+)/(?<flags>[ismU]*)$}.freeze
+
+ # Checks if pattern matches a regexp pattern
+ # but does not enforce it's validity
+ def self.matches_syntax?(pattern)
+ pattern.is_a?(String) && pattern.match(PATTERN).present?
+ end
+
+ # The regexp can match the pattern `/.../`, but may not be fabricatable:
+ # it can be invalid or incomplete: `/match ( string/`
+ def self.valid?(pattern)
+ !!self.fabricate(pattern)
+ end
+
+ def self.fabricate(pattern)
+ self.fabricate!(pattern)
+ rescue RegexpError
+ nil
+ end
+
+ def self.fabricate!(pattern)
+ raise RegexpError, 'Pattern is not string!' unless pattern.is_a?(String)
+
+ matches = pattern.match(PATTERN)
+ raise RegexpError, 'Invalid regular expression!' if matches.nil?
+
+ expression = matches[:regexp]
+ flags = matches[:flags]
+ expression.prepend("(?#{flags})") if flags.present?
+
+ UntrustedRegexp.new(expression, multiline: false)
+ end
+ end
+ end
+end