diff options
-rw-r--r-- | changelogs/unreleased/use-untrusted-regexp.yml | 5 | ||||
-rw-r--r-- | doc/ci/yaml/README.md | 13 | ||||
-rw-r--r-- | lib/gitlab/ci/build/policy/refs.rb | 4 | ||||
-rw-r--r-- | lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb | 4 | ||||
-rw-r--r-- | lib/gitlab/config/entry/legacy_validation_helpers.rb | 8 | ||||
-rw-r--r-- | lib/gitlab/config/entry/validators.rb | 14 | ||||
-rw-r--r-- | lib/gitlab/untrusted_regexp.rb | 35 | ||||
-rw-r--r-- | lib/gitlab/untrusted_regexp/ruby_syntax.rb | 43 | ||||
-rw-r--r-- | spec/lib/gitlab/ci/build/policy/refs_spec.rb | 13 | ||||
-rw-r--r-- | spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb | 2 | ||||
-rw-r--r-- | spec/lib/gitlab/ci/trace/stream_spec.rb | 2 | ||||
-rw-r--r-- | spec/lib/gitlab/route_map_spec.rb | 2 | ||||
-rw-r--r-- | spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb | 72 | ||||
-rw-r--r-- | spec/lib/gitlab/untrusted_regexp_spec.rb | 74 | ||||
-rw-r--r-- | spec/support/shared_examples/malicious_regexp_shared_examples.rb | 3 |
15 files changed, 198 insertions, 96 deletions
diff --git a/changelogs/unreleased/use-untrusted-regexp.yml b/changelogs/unreleased/use-untrusted-regexp.yml new file mode 100644 index 00000000000..dd7f1bcaca1 --- /dev/null +++ b/changelogs/unreleased/use-untrusted-regexp.yml @@ -0,0 +1,5 @@ +--- +title: Use UntrustedRegexp for matching refs policy +merge_request: +author: +type: security diff --git a/doc/ci/yaml/README.md b/doc/ci/yaml/README.md index aab26092811..251e2cd3d05 100644 --- a/doc/ci/yaml/README.md +++ b/doc/ci/yaml/README.md @@ -255,6 +255,19 @@ job: - branches ``` +Pattern matching is case-sensitive by default. Use `i` flag modifier, like +`/pattern/i` to make a pattern case-insensitive: + +```yaml +job: + # use regexp + only: + - /^issue-.*$/i + # use special keyword + except: + - branches +``` + In this example, `job` will run only for refs that are tagged, or if a build is explicitly requested via an API trigger or a [Pipeline Schedule][schedules]: diff --git a/lib/gitlab/ci/build/policy/refs.rb b/lib/gitlab/ci/build/policy/refs.rb index df5f5ffc253..360424bec11 100644 --- a/lib/gitlab/ci/build/policy/refs.rb +++ b/lib/gitlab/ci/build/policy/refs.rb @@ -35,8 +35,8 @@ module Gitlab # patterns can be matched only when branch or tag is used # the pattern matching does not work for merge requests pipelines if pipeline.branch? || pipeline.tag? - if pattern.first == "/" && pattern.last == "/" - Regexp.new(pattern[1...-1]) =~ pipeline.ref + if regexp = Gitlab::UntrustedRegexp::RubySyntax.fabricate(pattern) + regexp.match?(pipeline.ref) else pattern == pipeline.ref end diff --git a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb index d7e6dacf068..2b719c9c6fc 100644 --- a/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb +++ b/lib/gitlab/ci/pipeline/expression/lexeme/pattern.rb @@ -13,13 +13,13 @@ module Gitlab def initialize(regexp) @value = regexp - unless Gitlab::UntrustedRegexp.valid?(@value) + unless Gitlab::UntrustedRegexp::RubySyntax.valid?(@value) raise Lexer::SyntaxError, 'Invalid regular expression!' end end def evaluate(variables = {}) - Gitlab::UntrustedRegexp.fabricate(@value) + Gitlab::UntrustedRegexp::RubySyntax.fabricate!(@value) rescue RegexpError raise Expression::RuntimeError, 'Invalid regular expression!' end diff --git a/lib/gitlab/config/entry/legacy_validation_helpers.rb b/lib/gitlab/config/entry/legacy_validation_helpers.rb index d3ab5625743..0a629075302 100644 --- a/lib/gitlab/config/entry/legacy_validation_helpers.rb +++ b/lib/gitlab/config/entry/legacy_validation_helpers.rb @@ -45,17 +45,15 @@ module Gitlab end def validate_regexp(value) - !value.nil? && Regexp.new(value.to_s) && true - rescue RegexpError, TypeError - false + Gitlab::UntrustedRegexp::RubySyntax.valid?(value) end def validate_string_or_regexp(value) return true if value.is_a?(Symbol) return false unless value.is_a?(String) - if value.first == '/' && value.last == '/' - validate_regexp(value[1...-1]) + if Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value) + validate_regexp(value) else true end diff --git a/lib/gitlab/config/entry/validators.rb b/lib/gitlab/config/entry/validators.rb index 25bfa50f829..d348e11b753 100644 --- a/lib/gitlab/config/entry/validators.rb +++ b/lib/gitlab/config/entry/validators.rb @@ -120,17 +120,13 @@ module Gitlab private - def look_like_regexp?(value) - value.is_a?(String) && value.start_with?('/') && - value.end_with?('/') + def matches_syntax?(value) + Gitlab::UntrustedRegexp::RubySyntax.matches_syntax?(value) end def validate_regexp(value) - look_like_regexp?(value) && - Regexp.new(value.to_s[1...-1]) && - true - rescue RegexpError - false + matches_syntax?(value) && + Gitlab::UntrustedRegexp::RubySyntax.valid?(value) end end @@ -149,7 +145,7 @@ module Gitlab def validate_string_or_regexp(value) return false unless value.is_a?(String) - return validate_regexp(value) if look_like_regexp?(value) + return validate_regexp(value) if matches_syntax?(value) true end diff --git a/lib/gitlab/untrusted_regexp.rb b/lib/gitlab/untrusted_regexp.rb index ba1137313d8..14126b6ec06 100644 --- a/lib/gitlab/untrusted_regexp.rb +++ b/lib/gitlab/untrusted_regexp.rb @@ -35,6 +35,10 @@ module Gitlab matches end + def match?(text) + text.present? && scan(text).present? + end + def replace(text, rewrite) RE2.Replace(text, regexp, rewrite) end @@ -43,37 +47,6 @@ module Gitlab self.source == other.source end - # Handles regular expressions with the preferred RE2 library where possible - # via UntustedRegex. Falls back to Ruby's built-in regular expression library - # when the syntax would be invalid in RE2. - # - # One difference between these is `(?m)` multi-line mode. Ruby regex enables - # this by default, but also handles `^` and `$` differently. - # See: https://www.regular-expressions.info/modifiers.html - def self.with_fallback(pattern, multiline: false) - UntrustedRegexp.new(pattern, multiline: multiline) - rescue RegexpError - Regexp.new(pattern) - end - - def self.valid?(pattern) - !!self.fabricate(pattern) - rescue RegexpError - false - end - - def self.fabricate(pattern) - matches = pattern.match(%r{^/(?<regexp>.+)/(?<flags>[ismU]*)$}) - - raise RegexpError, 'Invalid regular expression!' if matches.nil? - - expression = matches[:regexp] - flags = matches[:flags] - expression.prepend("(?#{flags})") if flags.present? - - self.new(expression, multiline: false) - end - private attr_reader :regexp diff --git a/lib/gitlab/untrusted_regexp/ruby_syntax.rb b/lib/gitlab/untrusted_regexp/ruby_syntax.rb new file mode 100644 index 00000000000..91f300f97d0 --- /dev/null +++ b/lib/gitlab/untrusted_regexp/ruby_syntax.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +module Gitlab + class UntrustedRegexp + # This class implements support for Ruby syntax of regexps + # and converts that to RE2 representation: + # /<regexp>/<flags> + class RubySyntax + PATTERN = %r{^/(?<regexp>.+)/(?<flags>[ismU]*)$}.freeze + + # Checks if pattern matches a regexp pattern + # but does not enforce it's validity + def self.matches_syntax?(pattern) + pattern.is_a?(String) && pattern.match(PATTERN).present? + end + + # The regexp can match the pattern `/.../`, but may not be fabricatable: + # it can be invalid or incomplete: `/match ( string/` + def self.valid?(pattern) + !!self.fabricate(pattern) + end + + def self.fabricate(pattern) + self.fabricate!(pattern) + rescue RegexpError + nil + end + + def self.fabricate!(pattern) + raise RegexpError, 'Pattern is not string!' unless pattern.is_a?(String) + + matches = pattern.match(PATTERN) + raise RegexpError, 'Invalid regular expression!' if matches.nil? + + expression = matches[:regexp] + flags = matches[:flags] + expression.prepend("(?#{flags})") if flags.present? + + UntrustedRegexp.new(expression, multiline: false) + end + end + end +end diff --git a/spec/lib/gitlab/ci/build/policy/refs_spec.rb b/spec/lib/gitlab/ci/build/policy/refs_spec.rb index b4ddbf89b70..ec0450643c3 100644 --- a/spec/lib/gitlab/ci/build/policy/refs_spec.rb +++ b/spec/lib/gitlab/ci/build/policy/refs_spec.rb @@ -92,10 +92,23 @@ describe Gitlab::Ci::Build::Policy::Refs do .to be_satisfied_by(pipeline) end + it 'is satisfied when case-insensitive regexp matches pipeline ref' do + expect(described_class.new(['/DOCS-.*/i'])) + .to be_satisfied_by(pipeline) + end + it 'is not satisfied when regexp does not match pipeline ref' do expect(described_class.new(['/fix-.*/'])) .not_to be_satisfied_by(pipeline) end end + + context 'malicious regexp' do + let(:pipeline) { build_stubbed(:ci_pipeline, ref: malicious_text) } + + subject { described_class.new([malicious_regexp_ruby]) } + + include_examples 'malicious regexp' + end end end diff --git a/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb b/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb index 3ebc2e94727..cff7f57ceff 100644 --- a/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb +++ b/spec/lib/gitlab/ci/pipeline/expression/lexeme/pattern_spec.rb @@ -85,7 +85,7 @@ describe Gitlab::Ci::Pipeline::Expression::Lexeme::Pattern do end it 'raises error if evaluated regexp is not valid' do - allow(Gitlab::UntrustedRegexp).to receive(:valid?).and_return(true) + allow(Gitlab::UntrustedRegexp::RubySyntax).to receive(:valid?).and_return(true) regexp = described_class.new('/invalid ( .*/') diff --git a/spec/lib/gitlab/ci/trace/stream_spec.rb b/spec/lib/gitlab/ci/trace/stream_spec.rb index 38626f728d7..e45ea1c2528 100644 --- a/spec/lib/gitlab/ci/trace/stream_spec.rb +++ b/spec/lib/gitlab/ci/trace/stream_spec.rb @@ -414,7 +414,7 @@ describe Gitlab::Ci::Trace::Stream, :clean_gitlab_redis_cache do context 'malicious regexp' do let(:data) { malicious_text } - let(:regex) { malicious_regexp } + let(:regex) { malicious_regexp_re2 } include_examples 'malicious regexp' end diff --git a/spec/lib/gitlab/route_map_spec.rb b/spec/lib/gitlab/route_map_spec.rb index d672f7b5675..a39c774429e 100644 --- a/spec/lib/gitlab/route_map_spec.rb +++ b/spec/lib/gitlab/route_map_spec.rb @@ -60,7 +60,7 @@ describe Gitlab::RouteMap do subject do map = described_class.new(<<-"MAP".strip_heredoc) - - source: '#{malicious_regexp}' + - source: '#{malicious_regexp_re2}' public: '/' MAP diff --git a/spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb b/spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb new file mode 100644 index 00000000000..005d41580de --- /dev/null +++ b/spec/lib/gitlab/untrusted_regexp/ruby_syntax_spec.rb @@ -0,0 +1,72 @@ +require 'fast_spec_helper' +require 'support/shared_examples/malicious_regexp_shared_examples' + +describe Gitlab::UntrustedRegexp::RubySyntax do + describe '.matches_syntax?' do + it 'returns true if regexp is valid' do + expect(described_class.matches_syntax?('/some .* thing/')) + .to be true + end + + it 'returns true if regexp is invalid, but resembles regexp' do + expect(described_class.matches_syntax?('/some ( thing/')) + .to be true + end + end + + describe '.valid?' do + it 'returns true if regexp is valid' do + expect(described_class.valid?('/some .* thing/')) + .to be true + end + + it 'returns false if regexp is invalid' do + expect(described_class.valid?('/some ( thing/')) + .to be false + end + end + + describe '.fabricate' do + context 'when regexp is valid' do + it 'fabricates regexp without flags' do + expect(described_class.fabricate('/some .* thing/')).not_to be_nil + end + end + + context 'when regexp is a raw pattern' do + it 'returns error' do + expect(described_class.fabricate('some .* thing')).to be_nil + end + end + end + + describe '.fabricate!' do + context 'when regexp is using /regexp/ scheme with flags' do + it 'fabricates regexp with a single flag' do + regexp = described_class.fabricate!('/something/i') + + expect(regexp).to eq Gitlab::UntrustedRegexp.new('(?i)something') + expect(regexp.scan('SOMETHING')).to be_one + end + + it 'fabricates regexp with multiple flags' do + regexp = described_class.fabricate!('/something/im') + + expect(regexp).to eq Gitlab::UntrustedRegexp.new('(?im)something') + end + + it 'fabricates regexp without flags' do + regexp = described_class.fabricate!('/something/') + + expect(regexp).to eq Gitlab::UntrustedRegexp.new('something') + end + end + + context 'when regexp is a raw pattern' do + it 'raises an error' do + expect { described_class.fabricate!('some .* thing') } + .to raise_error(RegexpError) + end + end + end +end diff --git a/spec/lib/gitlab/untrusted_regexp_spec.rb b/spec/lib/gitlab/untrusted_regexp_spec.rb index 0a6ac0aa294..9d483f13a5e 100644 --- a/spec/lib/gitlab/untrusted_regexp_spec.rb +++ b/spec/lib/gitlab/untrusted_regexp_spec.rb @@ -2,48 +2,6 @@ require 'fast_spec_helper' require 'support/shared_examples/malicious_regexp_shared_examples' describe Gitlab::UntrustedRegexp do - describe '.valid?' do - it 'returns true if regexp is valid' do - expect(described_class.valid?('/some ( thing/')) - .to be false - end - - it 'returns true if regexp is invalid' do - expect(described_class.valid?('/some .* thing/')) - .to be true - end - end - - describe '.fabricate' do - context 'when regexp is using /regexp/ scheme with flags' do - it 'fabricates regexp with a single flag' do - regexp = described_class.fabricate('/something/i') - - expect(regexp).to eq described_class.new('(?i)something') - expect(regexp.scan('SOMETHING')).to be_one - end - - it 'fabricates regexp with multiple flags' do - regexp = described_class.fabricate('/something/im') - - expect(regexp).to eq described_class.new('(?im)something') - end - - it 'fabricates regexp without flags' do - regexp = described_class.fabricate('/something/') - - expect(regexp).to eq described_class.new('something') - end - end - - context 'when regexp is a raw pattern' do - it 'raises an error' do - expect { described_class.fabricate('some .* thing') } - .to raise_error(RegexpError) - end - end - end - describe '#initialize' do subject { described_class.new(pattern) } @@ -92,11 +50,41 @@ describe Gitlab::UntrustedRegexp do end end + describe '#match?' do + subject { described_class.new(regexp).match?(text) } + + context 'malicious regexp' do + let(:text) { malicious_text } + let(:regexp) { malicious_regexp_re2 } + + include_examples 'malicious regexp' + end + + context 'matching regexp' do + let(:regexp) { 'foo' } + let(:text) { 'foo' } + + it 'returns an array of nil matches' do + is_expected.to eq(true) + end + end + + context 'non-matching regexp' do + let(:regexp) { 'boo' } + let(:text) { 'foo' } + + it 'returns an array of nil matches' do + is_expected.to eq(false) + end + end + end + describe '#scan' do subject { described_class.new(regexp).scan(text) } + context 'malicious regexp' do let(:text) { malicious_text } - let(:regexp) { malicious_regexp } + let(:regexp) { malicious_regexp_re2 } include_examples 'malicious regexp' end diff --git a/spec/support/shared_examples/malicious_regexp_shared_examples.rb b/spec/support/shared_examples/malicious_regexp_shared_examples.rb index db69b75c0c8..a86050e2cf2 100644 --- a/spec/support/shared_examples/malicious_regexp_shared_examples.rb +++ b/spec/support/shared_examples/malicious_regexp_shared_examples.rb @@ -2,7 +2,8 @@ require 'timeout' shared_examples 'malicious regexp' do let(:malicious_text) { 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!' } - let(:malicious_regexp) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' } + let(:malicious_regexp_re2) { '(?i)^(([a-z])+.)+[A-Z]([a-z])+$' } + let(:malicious_regexp_ruby) { '/^(([a-z])+.)+[A-Z]([a-z])+$/i' } it 'takes under a second' do expect { Timeout.timeout(1) { subject } }.not_to raise_error |